From patchwork Mon Nov  7 23:15:36 2022
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
X-Patchwork-Id: 119532
X-Patchwork-Delegate: thomas@monjalon.net
Return-Path: <dev-bounces@dpdk.org>
X-Original-To: patchwork@inbox.dpdk.org
Delivered-To: patchwork@inbox.dpdk.org
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 60246A00C4;
	Tue,  8 Nov 2022 00:16:18 +0100 (CET)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id F39C0400D7;
	Tue,  8 Nov 2022 00:16:17 +0100 (CET)
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
 by mails.dpdk.org (Postfix) with ESMTP id D36E24003C
 for <dev@dpdk.org>; Tue,  8 Nov 2022 00:16:15 +0100 (CET)
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
 by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 3BACB1FB;
 Mon,  7 Nov 2022 15:16:21 -0800 (PST)
Received: from ampere-altra-2-1.usa.Arm.com (ampere-altra-2-1.usa.arm.com
 [10.118.91.158])
 by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id EAC3C3F703;
 Mon,  7 Nov 2022 15:16:14 -0800 (PST)
From: Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
To: olivier.matz@6wind.com,
	andrew.rybchenko@oktetlabs.ru
Cc: honnappa.nagarahalli@arm.com, ruifeng.wang@arm.com,
 bruce.richardson@intel.com, mb@smartsharesystems.com, dev@dpdk.org,
 nd@arm.com, Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
Subject: [RFC] mempool: add API to return pointer to free space on per-core
 cache
Date: Mon,  7 Nov 2022 23:15:36 +0000
Message-Id: <20221107231536.1135652-1-kamalakshitha.aligeri@arm.com>
X-Mailer: git-send-email 2.25.1
MIME-Version: 1.0
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

Expose the pointer to free space in per core cache in PMD, so that the
objects can be directly copied to cache without any temporary storage

Signed-off-by: Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
---
Pending Work:
1. Internal review needs to be done. 
2. Make the changes in i40e_tx_free_bufs_avx512

 app/test/test_mempool.c                 | 146 ++++++++++++++++++++++++
 drivers/net/i40e/i40e_rxtx_vec_common.h |  23 +++-
 lib/mempool/rte_mempool.h               |  34 ++++++
 3 files changed, 198 insertions(+), 5 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 8e493eda47..889d8f5d5c 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -187,6 +187,148 @@ test_mempool_basic(struct rte_mempool *mp, int use_external_cache)
 	return ret;
 }
 
+/* basic tests (done on one core) */
+static int
+test_mempool_get_cache(struct rte_mempool *mp, int use_external_cache)
+{
+	uint32_t *objnum;
+	void **objtable;
+	void *obj, *obj2;
+	char *obj_data;
+	int ret = 0;
+	unsigned i, j;
+	int offset;
+	struct rte_mempool_cache *cache;
+	void **cache_objs;
+
+	if (use_external_cache) {
+		/* Create a user-owned mempool cache. */
+		cache = rte_mempool_cache_create(RTE_MEMPOOL_CACHE_MAX_SIZE,
+						 SOCKET_ID_ANY);
+		if (cache == NULL)
+			RET_ERR();
+	} else {
+		/* May be NULL if cache is disabled. */
+		cache = rte_mempool_default_cache(mp, rte_lcore_id());
+	}
+
+	/* dump the mempool status */
+	rte_mempool_dump(stdout, mp);
+
+	printf("get an object\n");
+	if (rte_mempool_generic_get(mp, &obj, 1, cache) < 0)
+		GOTO_ERR(ret, out);
+	rte_mempool_dump(stdout, mp);
+
+	/* tests that improve coverage */
+	printf("get object count\n");
+	/* We have to count the extra caches, one in this case. */
+	offset = use_external_cache ? 1 * cache->len : 0;
+	if (rte_mempool_avail_count(mp) + offset != MEMPOOL_SIZE - 1)
+		GOTO_ERR(ret, out);
+
+	printf("get private data\n");
+	if (rte_mempool_get_priv(mp) != (char *)mp +
+			RTE_MEMPOOL_HEADER_SIZE(mp, mp->cache_size))
+		GOTO_ERR(ret, out);
+
+#ifndef RTE_EXEC_ENV_FREEBSD /* rte_mem_virt2iova() not supported on bsd */
+	printf("get physical address of an object\n");
+	if (rte_mempool_virt2iova(obj) != rte_mem_virt2iova(obj))
+		GOTO_ERR(ret, out);
+#endif
+
+
+	printf("put the object back\n");
+	cache_objs = rte_mempool_get_cache(mp,1);
+	if (cache_objs != NULL) {
+		rte_memcpy(cache_objs, &obj, sizeof(void*));
+	}
+	else {
+		rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
+	}
+
+	rte_mempool_dump(stdout, mp);
+
+	printf("get 2 objects\n");
+	if (rte_mempool_generic_get(mp, &obj, 1, cache) < 0)
+		GOTO_ERR(ret, out);
+	if (rte_mempool_generic_get(mp, &obj2, 1, cache) < 0) {
+		rte_mempool_generic_put(mp, &obj, 1, cache);
+		GOTO_ERR(ret, out);
+	}
+	rte_mempool_dump(stdout, mp);
+
+	printf("put the objects back\n");
+	cache_objs = rte_mempool_get_cache(mp,1);
+	if (cache_objs != NULL) {
+		rte_memcpy(mp, &obj, sizeof(void *));
+	}
+	else {
+		rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
+	}
+	cache_objs = rte_mempool_get_cache(mp,1);
+	if (cache_objs != NULL) {
+		rte_memcpy(mp, &obj2, sizeof(void *));
+	}
+	else {
+		rte_mempool_ops_enqueue_bulk(mp, &obj2, 1);
+	}
+	rte_mempool_dump(stdout, mp);
+
+	/*
+	 * get many objects: we cannot get them all because the cache
+	 * on other cores may not be empty.
+	 */
+	objtable = malloc(MEMPOOL_SIZE * sizeof(void *));
+	if (objtable == NULL)
+		GOTO_ERR(ret, out);
+
+	for (i = 0; i < MEMPOOL_SIZE; i++) {
+		if (rte_mempool_generic_get(mp, &objtable[i], 1, cache) < 0)
+			break;
+	}
+
+	/*
+	 * for each object, check that its content was not modified,
+	 * and put objects back in pool
+	 */
+	cache_objs = rte_mempool_get_cache (mp, MEMPOOL_SIZE);
+	if (cache_objs != NULL) {
+		while (i--) {
+			obj = objtable[i];
+			obj_data = obj;
+			objnum = obj;
+			if (*objnum > MEMPOOL_SIZE) {
+				printf("bad object number(%d)\n", *objnum);
+				ret = -1;
+				break;
+			}
+			for (j = sizeof(*objnum); j < mp->elt_size; j++) {
+				if (obj_data[j] != 0)
+					ret = -1;
+			}
+
+			rte_memcpy(&cache_objs[i], &objtable[i], sizeof(void *));
+		}
+	}
+	else {
+		rte_mempool_ops_enqueue_bulk(mp, objtable, MEMPOOL_SIZE);
+	}
+
+	free(objtable);
+	if (ret == -1)
+		printf("objects were modified!\n");
+
+out:
+	if (use_external_cache) {
+		rte_mempool_cache_flush(cache, mp);
+		rte_mempool_cache_free(cache);
+	}
+
+	return ret;
+}
+
 static int test_mempool_creation_with_exceeded_cache_size(void)
 {
 	struct rte_mempool *mp_cov;
@@ -986,6 +1128,10 @@ test_mempool(void)
 	if (test_mempool_basic(mp_cache, 0) < 0)
 		GOTO_ERR(ret, err);
 
+	/* basic tests with get cache */
+	if (test_mempool_get_cache(mp_cache, 0) < 0)
+		GOTO_ERR(ret, err);
+
 	/* basic tests with user-owned cache */
 	if (test_mempool_basic(mp_nocache, 1) < 0)
 		GOTO_ERR(ret, err);
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index fe1a6ec75e..9626df455b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -99,14 +99,27 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
 	  * tx_next_dd - (tx_rs_thresh-1)
 	  */
 	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+        struct rte_mempool *mp = txep[0].mbuf->pool;
 
 	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		for (i = 0; i < n; i++) {
-			free[i] = txep[i].mbuf;
-			/* no need to reset txep[i].mbuf in vector path */
+	        void **cache_objs;
+		cache_objs = rte_mempool_get_cache(mp,n);
+
+		if (cache_objs!=NULL) {
+		        for (i = 0; i < n; i++) {
+			         /* no need to reset txep[i].mbuf in vector path */
+			         rte_memcpy(&cache_objs[i],&txep->mbuf,sizeof(txep->mbuf));
+                                 txep++;
+		        }
+		        goto done;
+		}
+		else {
+		       for (i = 0; i < n; i++) {
+		                free[i] = txep->mbuf;
+				txep++;
+		       }
+		       rte_mempool_ops_enqueue_bulk(mp, (void **)free, n);
 		}
-		rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
-		goto done;
 	}
 
 	m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
index 1f5707f46a..6a079302f3 100644
--- a/lib/mempool/rte_mempool.h
+++ b/lib/mempool/rte_mempool.h
@@ -1360,6 +1360,40 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table,
 	rte_mempool_ops_enqueue_bulk(mp, obj_table, n);
 }
 
+/**
+ * @internal Give pointer to per core cache; used internally.
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param n
+ *   The number of objects to store back in the mempool, must be strictly
+ *   positive.
+ */
+static __rte_always_inline void **
+rte_mempool_get_cache(struct rte_mempool *mp, unsigned int n)
+{
+        void **cache_objs;
+
+        /* increment stat now, adding in mempool always success */
+        RTE_MEMPOOL_STAT_ADD(mp, put_bulk, 1);
+        RTE_MEMPOOL_STAT_ADD(mp, put_objs, n);
+
+        struct rte_mempool_cache *cache = rte_mempool_default_cache(mp,
+                        rte_lcore_id());
+
+        /* No cache provided or the request itself is too big for the cache */
+        if (unlikely(cache == NULL || n > cache->flushthresh))
+                return NULL;
+
+        if (cache->len + n <= cache->flushthresh) {
+	        cache_objs = &cache->objs[cache->len];
+		cache->len += n;
+	} else {
+		cache_objs = &cache->objs[0];
+		rte_mempool_ops_enqueue_bulk(mp, cache_objs, cache->len);
+		cache->len = n;
+	}
+        return cache_objs;
+}
 
 /**
  * Put several objects back in the mempool.