From patchwork Tue Dec 19 11:14:28 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anatoly Burakov X-Patchwork-Id: 32452 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 6B23A1B019; Tue, 19 Dec 2017 12:15:01 +0100 (CET) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 8ECD21B019 for ; Tue, 19 Dec 2017 12:14:54 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 19 Dec 2017 03:14:53 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,426,1508828400"; d="scan'208";a="3023078" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga002.fm.intel.com with ESMTP; 19 Dec 2017 03:14:51 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id vBJBEpOm003085; Tue, 19 Dec 2017 11:14:51 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id vBJBEpIl010177; Tue, 19 Dec 2017 11:14:51 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id vBJBEpkX010173; Tue, 19 Dec 2017 11:14:51 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, keith.wiles@intel.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net Date: Tue, 19 Dec 2017 11:14:28 +0000 Message-Id: <3aa7c38c79a039a6131b89e790418319c86a6ce2.1513681966.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 01/23] eal: move get_virtual_area out of linuxapp eal_memory.c X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Move get_virtual_area out of linuxapp EAL memory and make it common to EAL, so that other code could reserve virtual areas as well. Signed-off-by: Anatoly Burakov --- lib/librte_eal/common/eal_common_memory.c | 70 ++++++++++++++++++++++++++++++ lib/librte_eal/common/eal_private.h | 29 +++++++++++++ lib/librte_eal/linuxapp/eal/eal_memory.c | 71 ++----------------------------- 3 files changed, 102 insertions(+), 68 deletions(-) diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index fc6c44d..96570a7 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -31,6 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include +#include #include #include #include @@ -49,6 +51,74 @@ #include "eal_internal_cfg.h" /* + * Try to mmap *size bytes in /dev/zero. If it is successful, return the + * pointer to the mmap'd area and keep *size unmodified. Else, retry + * with a smaller zone: decrease *size by hugepage_sz until it reaches + * 0. In this case, return NULL. Note: this function returns an address + * which is a multiple of hugepage size. + */ + +static uint64_t baseaddr_offset; + +void * +eal_get_virtual_area(void *requested_addr, uint64_t *size, + uint64_t page_sz, int flags) +{ + bool addr_is_hint, allow_shrink; + void *addr; + + RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); + + addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0; + allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0; + + if (requested_addr == NULL && internal_config.base_virtaddr != 0) { + requested_addr = (void*) (internal_config.base_virtaddr + + baseaddr_offset); + addr_is_hint = true; + } + + do { + // TODO: we may not necessarily be using memory mapped by this + // function for hugepage mapping, so... HUGETLB flag? + + addr = mmap(requested_addr, + (*size) + page_sz, PROT_READ, +#ifdef RTE_ARCH_PPC_64 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, +#else + MAP_PRIVATE | MAP_ANONYMOUS, +#endif + -1, 0); + if (addr == MAP_FAILED && allow_shrink) + *size -= page_sz; + } while (allow_shrink && addr == MAP_FAILED && *size > 0); + + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", + strerror(errno)); + return NULL; + } else if (requested_addr != NULL && !addr_is_hint && + addr != requested_addr) { + RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p\n", + requested_addr); + munmap(addr, (*size) + page_sz); + return NULL; + } + + /* align addr to page size boundary */ + addr = RTE_PTR_ALIGN(addr, page_sz); + + RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", + addr, *size); + + baseaddr_offset += *size; + + return addr; +} + + +/* * Return a pointer to a read-only table of struct rte_physmem_desc * elements, containing the layout of all addressable physical * memory. The last element of the table contains a NULL address. diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 462226f..5d57fc1 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -34,6 +34,7 @@ #ifndef _EAL_PRIVATE_H_ #define _EAL_PRIVATE_H_ +#include #include #include #include @@ -224,4 +225,32 @@ int rte_eal_hugepage_attach(void); */ struct rte_bus *rte_bus_find_by_device_name(const char *str); +/** + * Get virtual area of specified size from the OS. + * + * This function is private to the EAL. + * + * @param requested_addr + * Address where to request address space. + * @param size + * Size of requested area. + * @param page_sz + * Page size on which to align requested virtual area. + * @param flags + * EAL_VIRTUAL_AREA_* flags. + * + * @return + * Virtual area address if successful. + * NULL if unsuccessful. + */ + +#define EAL_VIRTUAL_AREA_ADDR_IS_HINT 0x1 +/**< don't fail if cannot get exact requested address */ +#define EAL_VIRTUAL_AREA_ALLOW_SHRINK 0x2 +/**< try getting smaller sized (decrement by page size) virtual areas if cannot + * get area of requested size. */ +void * +eal_get_virtual_area(void *requested_addr, uint64_t *size, + uint64_t page_sz, int flags); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 16a181c..dd18d98 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -86,8 +86,6 @@ * zone as well as a physical contiguous zone. */ -static uint64_t baseaddr_offset; - static bool phys_addrs_available = true; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" @@ -250,71 +248,6 @@ aslr_enabled(void) } } -/* - * Try to mmap *size bytes in /dev/zero. If it is successful, return the - * pointer to the mmap'd area and keep *size unmodified. Else, retry - * with a smaller zone: decrease *size by hugepage_sz until it reaches - * 0. In this case, return NULL. Note: this function returns an address - * which is a multiple of hugepage size. - */ -static void * -get_virtual_area(size_t *size, size_t hugepage_sz) -{ - void *addr; - int fd; - long aligned_addr; - - if (internal_config.base_virtaddr != 0) { - addr = (void*) (uintptr_t) (internal_config.base_virtaddr + - baseaddr_offset); - } - else addr = NULL; - - RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); - - fd = open("/dev/zero", O_RDONLY); - if (fd < 0){ - RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); - return NULL; - } - do { - addr = mmap(addr, - (*size) + hugepage_sz, PROT_READ, -#ifdef RTE_ARCH_PPC_64 - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -#else - MAP_PRIVATE, -#endif - fd, 0); - if (addr == MAP_FAILED) - *size -= hugepage_sz; - } while (addr == MAP_FAILED && *size > 0); - - if (addr == MAP_FAILED) { - close(fd); - RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", - strerror(errno)); - return NULL; - } - - munmap(addr, (*size) + hugepage_sz); - close(fd); - - /* align addr to a huge page size boundary */ - aligned_addr = (long)addr; - aligned_addr += (hugepage_sz - 1); - aligned_addr &= (~(hugepage_sz - 1)); - addr = (void *)(aligned_addr); - - RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", - addr, *size); - - /* increment offset */ - baseaddr_offset += *size; - - return addr; -} - static sigjmp_buf huge_jmpenv; static void huge_sigbus_handler(int signo __rte_unused) @@ -463,7 +396,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, /* get the biggest virtual memory area up to * vma_len. If it fails, vma_addr is NULL, so * let the kernel provide the address. */ - vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); + vma_addr = eal_get_virtual_area(NULL, &vma_len, + hpi->hugepage_sz, + EAL_VIRTUAL_AREA_ALLOW_SHRINK); if (vma_addr == NULL) vma_len = hugepage_sz; }