From patchwork Mon Oct 31 11:26:32 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Morten_Br=C3=B8rup?= X-Patchwork-Id: 119362 Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id A655AA00C5; Mon, 31 Oct 2022 12:26:44 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 4000440A79; Mon, 31 Oct 2022 12:26:41 +0100 (CET) Received: from smartserver.smartsharesystems.com (smartserver.smartsharesystems.com [77.243.40.215]) by mails.dpdk.org (Postfix) with ESMTP id 749A840223 for ; Mon, 31 Oct 2022 12:26:38 +0100 (CET) Received: from dkrd2.smartsharesys.local ([192.168.4.12]) by smartserver.smartsharesystems.com with Microsoft SMTPSVC(6.0.3790.4675); Mon, 31 Oct 2022 12:26:35 +0100 From: =?utf-8?q?Morten_Br=C3=B8rup?= To: olivier.matz@6wind.com, andrew.rybchenko@oktetlabs.ru, stephen@networkplumber.org, jerinj@marvell.com, bruce.richardson@intel.com Cc: thomas@monjalon.net, dev@dpdk.org, =?utf-8?q?Morten_Br=C3=B8rup?= Subject: [PATCH v2 1/3] mempool: split statistics from debug Date: Mon, 31 Oct 2022 12:26:32 +0100 Message-Id: <20221031112634.18329-1-mb@smartsharesystems.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20221030115445.2115-1-mb@smartsharesystems.com> References: <20221030115445.2115-1-mb@smartsharesystems.com> MIME-Version: 1.0 X-OriginalArrivalTime: 31 Oct 2022 11:26:35.0951 (UTC) FILETIME=[A2D9C3F0:01D8ED1B] X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Split statistics from debug, to make mempool statistics available without the performance cost of continuously validating the cookies in the mempool elements. mempool_perf_autotest shows the follwing change in rate_persec. When enabling mempool debug without this patch: -28.1 % and -74.0 %, respectively without and with cache. When enabling mempool stats (but not debug) with this patch: -5.8 % and -21.2 %, respectively without and with cache. v2: * Fix checkpatch warning: Use C style comments in rte_include.h, not C++ style. * Do not rename the rte_mempool_debug_stats structure. Signed-off-by: Morten Brørup --- config/rte_config.h | 2 ++ lib/mempool/rte_mempool.c | 6 +++--- lib/mempool/rte_mempool.h | 6 +++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/config/rte_config.h b/config/rte_config.h index ae56a86394..3c4876d434 100644 --- a/config/rte_config.h +++ b/config/rte_config.h @@ -47,6 +47,8 @@ /* mempool defines */ #define RTE_MEMPOOL_CACHE_MAX_SIZE 512 +/* RTE_LIBRTE_MEMPOOL_STATS is not set */ +/* RTE_LIBRTE_MEMPOOL_DEBUG is not set */ /* mbuf defines */ #define RTE_MBUF_DEFAULT_MEMPOOL_OPS "ring_mp_mc" diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c index 21c94a2b9f..62d1ce764e 100644 --- a/lib/mempool/rte_mempool.c +++ b/lib/mempool/rte_mempool.c @@ -818,7 +818,7 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size, RTE_CACHE_LINE_MASK) != 0); RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) & RTE_CACHE_LINE_MASK) != 0); -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#ifdef RTE_LIBRTE_MEMPOOL_STATS RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) & RTE_CACHE_LINE_MASK) != 0); RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) & @@ -1221,7 +1221,7 @@ rte_mempool_audit(struct rte_mempool *mp) void rte_mempool_dump(FILE *f, struct rte_mempool *mp) { -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#ifdef RTE_LIBRTE_MEMPOOL_STATS struct rte_mempool_info info; struct rte_mempool_debug_stats sum; unsigned lcore_id; @@ -1269,7 +1269,7 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) fprintf(f, " common_pool_count=%u\n", common_count); /* sum and dump statistics */ -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#ifdef RTE_LIBRTE_MEMPOOL_STATS rte_mempool_ops_get_info(mp, &info); memset(&sum, 0, sizeof(sum)); for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h index 3725a72951..9c4bf5549f 100644 --- a/lib/mempool/rte_mempool.h +++ b/lib/mempool/rte_mempool.h @@ -56,7 +56,7 @@ extern "C" { #define RTE_MEMPOOL_HEADER_COOKIE2 0xf2eef2eedadd2e55ULL /**< Header cookie. */ #define RTE_MEMPOOL_TRAILER_COOKIE 0xadd2e55badbadbadULL /**< Trailer cookie.*/ -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#ifdef RTE_LIBRTE_MEMPOOL_STATS /** * A structure that stores the mempool statistics (per-lcore). * Note: Cache stats (put_cache_bulk/objs, get_cache_bulk/objs) are not @@ -237,7 +237,7 @@ struct rte_mempool { uint32_t nb_mem_chunks; /**< Number of memory chunks */ struct rte_mempool_memhdr_list mem_list; /**< List of memory chunks */ -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#ifdef RTE_LIBRTE_MEMPOOL_STATS /** Per-lcore statistics. */ struct rte_mempool_debug_stats stats[RTE_MAX_LCORE]; #endif @@ -302,7 +302,7 @@ struct rte_mempool { * @param n * Number to add to the object-oriented statistics. */ -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#ifdef RTE_LIBRTE_MEMPOOL_STATS #define RTE_MEMPOOL_STAT_ADD(mp, name, n) do { \ unsigned __lcore_id = rte_lcore_id(); \ if (__lcore_id < RTE_MAX_LCORE) { \ From patchwork Mon Oct 31 11:26:34 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Morten_Br=C3=B8rup?= X-Patchwork-Id: 119363 Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 0FBF7A00C5; Mon, 31 Oct 2022 12:26:50 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 2C22440693; Mon, 31 Oct 2022 12:26:42 +0100 (CET) Received: from smartserver.smartsharesystems.com (smartserver.smartsharesystems.com [77.243.40.215]) by mails.dpdk.org (Postfix) with ESMTP id 976EB40151 for ; Mon, 31 Oct 2022 12:26:38 +0100 (CET) Received: from dkrd2.smartsharesys.local ([192.168.4.12]) by smartserver.smartsharesystems.com with Microsoft SMTPSVC(6.0.3790.4675); Mon, 31 Oct 2022 12:26:37 +0100 From: =?utf-8?q?Morten_Br=C3=B8rup?= To: olivier.matz@6wind.com, andrew.rybchenko@oktetlabs.ru, stephen@networkplumber.org, jerinj@marvell.com, bruce.richardson@intel.com Cc: thomas@monjalon.net, dev@dpdk.org, =?utf-8?q?Morten_Br=C3=B8rup?= Subject: [PATCH v2 3/3] mempool: use cache for frequently updated statistics Date: Mon, 31 Oct 2022 12:26:34 +0100 Message-Id: <20221031112634.18329-3-mb@smartsharesystems.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20221031112634.18329-1-mb@smartsharesystems.com> References: <20221030115445.2115-1-mb@smartsharesystems.com> <20221031112634.18329-1-mb@smartsharesystems.com> MIME-Version: 1.0 X-OriginalArrivalTime: 31 Oct 2022 11:26:37.0592 (UTC) FILETIME=[A3D42980:01D8ED1B] X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org When built with statistics enabled (RTE_LIBRTE_MEMPOOL_STATS defined), the performance of mempools with caches is improved as follows. When accessing objects in the mempool, either the put_bulk and put_objs or the get_success_bulk and get_success_objs statistics counters are likely to be incremented. By adding an alternative set of these counters to the mempool cache structure, accesing the dedicated statistics structure is avoided in the likely cases where these counters are incremented. The trick here is that the cache line holding the mempool cache structure is accessed anyway, in order to access the 'len' or 'flushthresh' fields. Updating some statistics counters in the same cache line has lower performance cost than accessing the statistics counters in the dedicated statistics structure, which resides in another cache line. mempool_perf_autotest with this patch shows the follwing change in rate_persec. Compared to only spliting statistics from debug: +1.5 % and +14.4 %, respectively without and with cache. Compared to not enabling mempool stats: -4.4 % and -9.9 %, respectively without and with cache. v2: * Move the statistics counters into a stats structure. Signed-off-by: Morten Brørup --- lib/mempool/rte_mempool.c | 9 +++++ lib/mempool/rte_mempool.h | 73 ++++++++++++++++++++++++++++++++------- 2 files changed, 69 insertions(+), 13 deletions(-) diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c index e6208125e0..a18e39af04 100644 --- a/lib/mempool/rte_mempool.c +++ b/lib/mempool/rte_mempool.c @@ -1286,6 +1286,15 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp) sum.get_success_blks += mp->stats[lcore_id].get_success_blks; sum.get_fail_blks += mp->stats[lcore_id].get_fail_blks; } + if (mp->cache_size != 0) { + /* Add the statistics stored in the mempool caches. */ + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + sum.put_bulk += mp->local_cache[lcore_id].stats.put_bulk; + sum.put_objs += mp->local_cache[lcore_id].stats.put_objs; + sum.get_success_bulk += mp->local_cache[lcore_id].stats.get_success_bulk; + sum.get_success_objs += mp->local_cache[lcore_id].stats.get_success_objs; + } + } fprintf(f, " stats:\n"); fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h index 16e7e62e3c..5806e75609 100644 --- a/lib/mempool/rte_mempool.h +++ b/lib/mempool/rte_mempool.h @@ -86,6 +86,21 @@ struct rte_mempool_cache { uint32_t size; /**< Size of the cache */ uint32_t flushthresh; /**< Threshold before we flush excess elements */ uint32_t len; /**< Current cache count */ + uint32_t unused0; +#ifdef RTE_LIBRTE_MEMPOOL_STATS + /* + * Alternative location for the most frequently updated mempool statistics (per-lcore), + * providing faster update access when using a mempool cache. + */ + struct { + uint64_t put_bulk; /**< Number of puts. */ + uint64_t put_objs; /**< Number of objects successfully put. */ + uint64_t get_success_bulk; /**< Successful allocation number. */ + uint64_t get_success_objs; /**< Objects successfully allocated. */ + } stats; /**< Statistics */ +#else + uint64_t unused1[4]; +#endif /** * Cache objects * @@ -296,14 +311,14 @@ struct rte_mempool { | RTE_MEMPOOL_F_NO_IOVA_CONTIG \ ) /** - * @internal When debug is enabled, store some statistics. + * @internal When stats is enabled, store some statistics. * * @param mp * Pointer to the memory pool. * @param name * Name of the statistics field to increment in the memory pool. * @param n - * Number to add to the object-oriented statistics. + * Number to add to the statistics. */ #ifdef RTE_LIBRTE_MEMPOOL_STATS #define RTE_MEMPOOL_STAT_ADD(mp, name, n) do { \ @@ -312,6 +327,23 @@ struct rte_mempool { #else #define RTE_MEMPOOL_STAT_ADD(mp, name, n) do {} while (0) #endif +/** + * @internal When stats is enabled, store some statistics. + * + * @param cache + * Pointer to the memory pool cache. + * @param name + * Name of the statistics field to increment in the memory pool cache. + * @param n + * Number to add to the statistics. + */ +#ifdef RTE_LIBRTE_MEMPOOL_STATS +#define RTE_MEMPOOL_CACHE_STAT_ADD(cache, name, n) do { \ + (cache)->stats.name += n; \ + } while (0) +#else +#define RTE_MEMPOOL_CACHE_STAT_ADD(cache, name, n) do {} while (0) +#endif /** * @internal Calculate the size of the mempool header. @@ -1327,13 +1359,17 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table, { void **cache_objs; + /* No cache provided */ + if (unlikely(cache == NULL)) + goto driver_enqueue; + /* increment stat now, adding in mempool always success */ - RTE_MEMPOOL_STAT_ADD(mp, put_bulk, 1); - RTE_MEMPOOL_STAT_ADD(mp, put_objs, n); + RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_bulk, 1); + RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_objs, n); - /* No cache provided or the request itself is too big for the cache */ - if (unlikely(cache == NULL || n > cache->flushthresh)) - goto driver_enqueue; + /* The request itself is too big for the cache */ + if (unlikely(n > cache->flushthresh)) + goto driver_enqueue_stats_incremented; /* * The cache follows the following algorithm: @@ -1358,6 +1394,12 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table, driver_enqueue: + /* increment stat now, adding in mempool always success */ + RTE_MEMPOOL_STAT_ADD(mp, put_bulk, 1); + RTE_MEMPOOL_STAT_ADD(mp, put_objs, n); + +driver_enqueue_stats_incremented: + /* push objects to the backend */ rte_mempool_ops_enqueue_bulk(mp, obj_table, n); } @@ -1464,8 +1506,8 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table, if (remaining == 0) { /* The entire request is satisfied from the cache. */ - RTE_MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); - RTE_MEMPOOL_STAT_ADD(mp, get_success_objs, n); + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1); + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n); return 0; } @@ -1494,8 +1536,8 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table, cache->len = cache->size; - RTE_MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); - RTE_MEMPOOL_STAT_ADD(mp, get_success_objs, n); + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1); + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n); return 0; @@ -1517,8 +1559,13 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table, RTE_MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); RTE_MEMPOOL_STAT_ADD(mp, get_fail_objs, n); } else { - RTE_MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); - RTE_MEMPOOL_STAT_ADD(mp, get_success_objs, n); + if (likely(cache != NULL)) { + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1); + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n); + } else { + RTE_MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + RTE_MEMPOOL_STAT_ADD(mp, get_success_objs, n); + } } return ret;