From patchwork Tue Dec 19 11:14:47 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 32472 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 5F5431B208; Tue, 19 Dec 2017 12:15:31 +0100 (CET) Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id 18F171B01E for ; Tue, 19 Dec 2017 12:14:56 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 19 Dec 2017 03:14:55 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,426,1508828400"; d="scan'208";a="188049082" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga005.fm.intel.com with ESMTP; 19 Dec 2017 03:14:54 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id vBJBErag003147; Tue, 19 Dec 2017 11:14:53 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id vBJBErW3010327; Tue, 19 Dec 2017 11:14:53 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id vBJBErBC010319; Tue, 19 Dec 2017 11:14:53 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, keith.wiles@intel.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net Date: Tue, 19 Dec 2017 11:14:47 +0000 Message-Id: <69a29e4ac2822d0c4b1f6c599b428977b2b25505.1513681966.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 20/23] eal: make memzones use rte_fbarray X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" We greatly expand memzone list, and it makes some operations faster. Plus, it's there, so we might as well use it. As part of this commit, a potential memory leak is fixed (when we allocate a memzone but there's no room in config, we don't free it back), and there's a compile fix for ENA driver. Signed-off-by: Anatoly Burakov --- config/common_base | 2 +- drivers/net/ena/ena_ethdev.c | 10 +- lib/librte_eal/common/eal_common_memzone.c | 168 ++++++++++++++++------ lib/librte_eal/common/include/rte_eal_memconfig.h | 4 +- 4 files changed, 137 insertions(+), 47 deletions(-) diff --git a/config/common_base b/config/common_base index 9730d4c..cce464d 100644 --- a/config/common_base +++ b/config/common_base @@ -92,7 +92,7 @@ CONFIG_RTE_MAX_LCORE=128 CONFIG_RTE_MAX_NUMA_NODES=8 CONFIG_RTE_MAX_MEMSEG_LISTS=16 CONFIG_RTE_MAX_MEMSEG_PER_LIST=32768 -CONFIG_RTE_MAX_MEMZONE=2560 +CONFIG_RTE_MAX_MEMZONE=32768 CONFIG_RTE_MAX_TAILQ=32 CONFIG_RTE_ENABLE_ASSERT=n CONFIG_RTE_LOG_LEVEL=RTE_LOG_INFO diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c index 22db895..aa37cad 100644 --- a/drivers/net/ena/ena_ethdev.c +++ b/drivers/net/ena/ena_ethdev.c @@ -249,11 +249,15 @@ static const struct eth_dev_ops ena_dev_ops = { static inline int ena_cpu_to_node(int cpu) { struct rte_config *config = rte_eal_get_configuration(); + const struct rte_fbarray *arr = &config->mem_config->memzones; + const struct rte_memzone *mz; - if (likely(cpu < RTE_MAX_MEMZONE)) - return config->mem_config->memzone[cpu].socket_id; + if (unlikely(cpu >= RTE_MAX_MEMZONE)) + return NUMA_NO_NODE; - return NUMA_NO_NODE; + mz = rte_fbarray_get(arr, cpu); + + return mz->socket_id; } static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf, diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index a9a4bef..58a4f25 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -58,20 +58,23 @@ static inline const struct rte_memzone * memzone_lookup_thread_unsafe(const char *name) { const struct rte_mem_config *mcfg; + const struct rte_fbarray *arr; const struct rte_memzone *mz; - unsigned i = 0; + int i = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; /* * the algorithm is not optimal (linear), but there are few * zones and this function should be called at init only */ - for (i = 0; i < RTE_MAX_MEMZONE; i++) { - mz = &mcfg->memzone[i]; - if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) - return &mcfg->memzone[i]; + while ((i = rte_fbarray_find_next_used(arr, i)) >= 0) { + mz = rte_fbarray_get(arr, i++); + if (mz->addr != NULL && + !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) + return mz; } return NULL; @@ -81,17 +84,44 @@ static inline struct rte_memzone * get_next_free_memzone(void) { struct rte_mem_config *mcfg; - unsigned i = 0; + struct rte_fbarray *arr; + int i = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; + + i = rte_fbarray_find_next_free(arr, 0); + if (i < 0) { + /* no space in config, so try expanding the list */ + int old_len = arr->len; + int new_len = old_len * 2; + new_len = RTE_MIN(new_len, arr->capacity); + + if (old_len == new_len) { + /* can't expand, the list is full */ + RTE_LOG(ERR, EAL, "%s(): no space in memzone list\n", + __func__); + return NULL; + } - for (i = 0; i < RTE_MAX_MEMZONE; i++) { - if (mcfg->memzone[i].addr == NULL) - return &mcfg->memzone[i]; - } + if (rte_fbarray_resize(arr, new_len)) { + RTE_LOG(ERR, EAL, "%s(): can't resize memzone list\n", + __func__); + return NULL; + } - return NULL; + /* ensure we have free space */ + i = rte_fbarray_find_next_free(arr, old_len); + + if (i < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot find room in config!\n", + __func__); + return NULL; + } + } + rte_fbarray_set_used(arr, i, true); + return rte_fbarray_get(arr, i); } /* This function will return the greatest free block if a heap has been @@ -132,14 +162,16 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, { struct rte_memzone *mz; struct rte_mem_config *mcfg; + struct rte_fbarray *arr; size_t requested_len; int socket; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; /* no more room in config */ - if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) { + if (arr->count >= arr->capacity) { RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__); rte_errno = ENOSPC; return NULL; @@ -231,19 +263,19 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, return NULL; } - const struct malloc_elem *elem = malloc_elem_from_data(mz_addr); + struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* fill the zone in config */ mz = get_next_free_memzone(); if (mz == NULL) { - RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room " - "in config!\n", __func__); + RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room in config!\n", + __func__); rte_errno = ENOSPC; + malloc_heap_free(elem); return NULL; } - mcfg->memzone_cnt++; snprintf(mz->name, sizeof(mz->name), "%s", name); mz->iova = rte_malloc_virt2iova(mz_addr); mz->addr = mz_addr; @@ -356,6 +388,8 @@ int rte_memzone_free(const struct rte_memzone *mz) { struct rte_mem_config *mcfg; + struct rte_fbarray *arr; + struct rte_memzone *found_mz; int ret = 0; void *addr; unsigned idx; @@ -364,21 +398,22 @@ rte_memzone_free(const struct rte_memzone *mz) return -EINVAL; mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; rte_rwlock_write_lock(&mcfg->mlock); - idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); - idx = idx / sizeof(struct rte_memzone); + idx = rte_fbarray_find_idx(arr, mz); + found_mz = rte_fbarray_get(arr, idx); - addr = mcfg->memzone[idx].addr; + addr = found_mz->addr; if (addr == NULL) ret = -EINVAL; - else if (mcfg->memzone_cnt == 0) { + else if (arr->count == 0) { rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n", __func__); } else { - memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx])); - mcfg->memzone_cnt--; + memset(found_mz, 0, sizeof(*found_mz)); + rte_fbarray_set_used(arr, idx, false); } rte_rwlock_write_unlock(&mcfg->mlock); @@ -412,25 +447,71 @@ rte_memzone_lookup(const char *name) void rte_memzone_dump(FILE *f) { + struct rte_fbarray *arr; struct rte_mem_config *mcfg; - unsigned i = 0; + int i = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; rte_rwlock_read_lock(&mcfg->mlock); /* dump all zones */ - for (i=0; imemzone[i].addr == NULL) - break; - fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx" + while ((i = rte_fbarray_find_next_used(arr, i)) >= 0) { + void *cur_addr, *mz_end; + struct rte_memzone *mz; + struct rte_memseg_list *msl = NULL; + struct rte_memseg *ms; + int msl_idx, ms_idx; + + mz = rte_fbarray_get(arr, i); + + /* + * memzones can span multiple physical pages, so dump addresses + * of all physical pages this memzone spans. + */ + + fprintf(f, "Zone %u: name:<%s>, len:0x%zx" ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i, - mcfg->memzone[i].name, - mcfg->memzone[i].iova, - mcfg->memzone[i].len, - mcfg->memzone[i].addr, - mcfg->memzone[i].socket_id, - mcfg->memzone[i].flags); + mz->name, + mz->len, + mz->addr, + mz->socket_id, + mz->flags); + + /* get pointer to appropriate memseg list */ + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + if (mcfg->memsegs[msl_idx].hugepage_sz != mz->hugepage_sz) + continue; + if (mcfg->memsegs[msl_idx].socket_id != mz->socket_id) + continue; + msl = &mcfg->memsegs[msl_idx]; + break; + } + if (!msl) { + RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n"); + continue; + } + + cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, mz->hugepage_sz); + mz_end = RTE_PTR_ADD(cur_addr, mz->len); + + ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / + msl->hugepage_sz; + ms = rte_fbarray_get(&msl->memseg_arr, ms_idx); + + fprintf(f, "physical pages used:\n"); + do { + fprintf(f, " addr: %p iova: 0x%" PRIx64 " len: 0x%" PRIx64 " len: 0x%" PRIx64 "\n", + cur_addr, ms->iova, ms->len, ms->hugepage_sz); + + /* advance VA to next page */ + cur_addr = RTE_PTR_ADD(cur_addr, ms->hugepage_sz); + + /* memzones occupy contiguous segments */ + ++ms; + } while (cur_addr < mz_end); + i++; } rte_rwlock_read_unlock(&mcfg->mlock); } @@ -459,9 +540,11 @@ rte_eal_memzone_init(void) rte_rwlock_write_lock(&mcfg->mlock); - /* delete all zones */ - mcfg->memzone_cnt = 0; - memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); + if (rte_fbarray_alloc(&mcfg->memzones, "memzone", 256, + RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) { + RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n"); + return -1; + } rte_rwlock_write_unlock(&mcfg->mlock); @@ -473,14 +556,19 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *), void *arg) { struct rte_mem_config *mcfg; - unsigned i; + struct rte_fbarray *arr; + int i; mcfg = rte_eal_get_configuration()->mem_config; + arr = &mcfg->memzones; + + i = 0; rte_rwlock_read_lock(&mcfg->mlock); - for (i=0; imemzone[i].addr != NULL) - (*func)(&mcfg->memzone[i], arg); + while ((i = rte_fbarray_find_next_used(arr, i)) > 0) { + struct rte_memzone *mz = rte_fbarray_get(arr, i); + (*func)(mz, arg); + i++; } rte_rwlock_read_unlock(&mcfg->mlock); } diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h index c9b57a4..8f4cc34 100644 --- a/lib/librte_eal/common/include/rte_eal_memconfig.h +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -86,10 +86,8 @@ struct rte_mem_config { rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */ rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */ - uint32_t memzone_cnt; /**< Number of allocated memzones */ - /* memory segments and zones */ - struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */ + struct rte_fbarray memzones; /**< Memzone descriptors. */ struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS]; /**< list of dynamic arrays holding memsegs */