[5/6] mempool/cnxk: add cn10k batch enqueue/dequeue support

Message ID 20210305162149.2196166-6-asekhar@marvell.com (mailing list archive)
State Changes Requested, archived
Delegated to: Jerin Jacob
Headers
Series Add Marvell CNXK mempool driver |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Ashwin Sekhar T K March 5, 2021, 4:21 p.m. UTC
  Add support for asynchronous batch enqueue/dequeue
of pointers from NPA pool.

Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
 drivers/mempool/cnxk/cn10k_mempool_ops.c | 258 ++++++++++++++++++++++-
 drivers/mempool/cnxk/cnxk_mempool.c      |  19 +-
 drivers/mempool/cnxk/cnxk_mempool.h      |   3 +-
 drivers/mempool/cnxk/cnxk_mempool_ops.c  |  28 +++
 4 files changed, 287 insertions(+), 21 deletions(-)
  

Comments

Jerin Jacob March 28, 2021, 9:22 a.m. UTC | #1
On Fri, Mar 5, 2021 at 11:44 PM Ashwin Sekhar T K <asekhar@marvell.com> wrote:
>
> Add support for asynchronous batch enqueue/dequeue
> of pointers from NPA pool.

Please spilt the enq and deq as separate patches. The rest looks good to me.


>
> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
> ---
  

Patch

diff --git a/drivers/mempool/cnxk/cn10k_mempool_ops.c b/drivers/mempool/cnxk/cn10k_mempool_ops.c
index fc7592fd94..131abc0723 100644
--- a/drivers/mempool/cnxk/cn10k_mempool_ops.c
+++ b/drivers/mempool/cnxk/cn10k_mempool_ops.c
@@ -7,11 +7,239 @@ 
 #include "roc_api.h"
 #include "cnxk_mempool.h"
 
+#define BATCH_ALLOC_SZ ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS
+
+enum batch_op_status {
+	BATCH_ALLOC_OP_NOT_ISSUED = 0,
+	BATCH_ALLOC_OP_ISSUED = 1,
+	BATCH_ALLOC_OP_DONE
+};
+
+struct batch_op_mem {
+	unsigned int sz;
+	enum batch_op_status status;
+	uint64_t objs[BATCH_ALLOC_SZ] __rte_aligned(ROC_ALIGN);
+};
+
+struct batch_op_data {
+	uint64_t lmt_addr;
+	struct batch_op_mem mem[RTE_MAX_LCORE] __rte_aligned(ROC_ALIGN);
+};
+
+static struct batch_op_data **batch_op_data;
+
+#define BATCH_OP_DATA_GET(pool_id)                                             \
+	batch_op_data[roc_npa_aura_handle_to_aura(pool_id)]
+
+#define BATCH_OP_DATA_SET(pool_id, op_data)                                    \
+	do {                                                                   \
+		uint64_t aura = roc_npa_aura_handle_to_aura(pool_id);          \
+		batch_op_data[aura] = op_data;                                 \
+	} while (0)
+
+int
+cn10k_mempool_lf_init(void)
+{
+	unsigned int maxpools, sz;
+
+	maxpools = roc_idev_npa_maxpools_get();
+	sz = maxpools * sizeof(uintptr_t);
+
+	batch_op_data = rte_zmalloc(NULL, sz, ROC_ALIGN);
+	if (!batch_op_data)
+		return -1;
+
+	return 0;
+}
+
+void
+cn10k_mempool_lf_fini(void)
+{
+	if (!batch_op_data)
+		return;
+
+	rte_free(batch_op_data);
+	batch_op_data = NULL;
+}
+
+static int
+batch_op_init(struct rte_mempool *mp)
+{
+	struct batch_op_data *op_data;
+	int i;
+
+	RTE_ASSERT(BATCH_OP_DATA_GET(mp->pool_id) == NULL);
+	op_data = rte_zmalloc(NULL, sizeof(struct batch_op_data), ROC_ALIGN);
+	if (op_data == NULL)
+		return -1;
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		op_data->mem[i].sz = 0;
+		op_data->mem[i].status = BATCH_ALLOC_OP_NOT_ISSUED;
+	}
+
+	op_data->lmt_addr = roc_idev_lmt_base_addr_get();
+	BATCH_OP_DATA_SET(mp->pool_id, op_data);
+
+	return 0;
+}
+
+static void
+batch_op_fini(struct rte_mempool *mp)
+{
+	struct batch_op_data *op_data;
+	int i;
+
+	op_data = BATCH_OP_DATA_GET(mp->pool_id);
+
+	rte_wmb();
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		struct batch_op_mem *mem = &op_data->mem[i];
+
+		if (mem->status == BATCH_ALLOC_OP_ISSUED) {
+			mem->sz = roc_npa_aura_batch_alloc_extract(
+				mem->objs, mem->objs, BATCH_ALLOC_SZ);
+			mem->status = BATCH_ALLOC_OP_DONE;
+		}
+		if (mem->status == BATCH_ALLOC_OP_DONE) {
+			roc_npa_aura_op_bulk_free(mp->pool_id, mem->objs,
+						  mem->sz, 1);
+			mem->status = BATCH_ALLOC_OP_NOT_ISSUED;
+		}
+	}
+
+	rte_free(op_data);
+	BATCH_OP_DATA_SET(mp->pool_id, NULL);
+}
+
+static int __rte_hot
+cn10k_mempool_enq(struct rte_mempool *mp, void *const *obj_table,
+		  unsigned int n)
+{
+	const uint64_t *ptr = (const uint64_t *)obj_table;
+	uint64_t lmt_addr = 0, lmt_id = 0;
+	struct batch_op_data *op_data;
+
+	/* Ensure mbuf init changes are written before the free pointers are
+	 * are enqueued to the stack.
+	 */
+	rte_io_wmb();
+
+	if (n == 1) {
+		roc_npa_aura_op_free(mp->pool_id, 1, ptr[0]);
+		return 0;
+	}
+
+	op_data = BATCH_OP_DATA_GET(mp->pool_id);
+	lmt_addr = op_data->lmt_addr;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	roc_npa_aura_op_batch_free(mp->pool_id, ptr, n, 1, lmt_addr, lmt_id);
+
+	return 0;
+}
+
+static int __rte_hot
+cn10k_mempool_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
+{
+	struct batch_op_data *op_data;
+	struct batch_op_mem *mem;
+	unsigned int count = 0;
+	int tid, rc, retry;
+	bool loop = true;
+
+	op_data = BATCH_OP_DATA_GET(mp->pool_id);
+	tid = rte_lcore_id();
+	mem = &op_data->mem[tid];
+
+	/* Issue batch alloc */
+	if (mem->status == BATCH_ALLOC_OP_NOT_ISSUED) {
+		rc = roc_npa_aura_batch_alloc_issue(mp->pool_id, mem->objs,
+						    BATCH_ALLOC_SZ, 0, 1);
+		/* If issue fails, try falling back to default alloc */
+		if (unlikely(rc))
+			return cn10k_mempool_enq(mp, obj_table, n);
+		mem->status = BATCH_ALLOC_OP_ISSUED;
+	}
+
+	retry = 4;
+	while (loop) {
+		unsigned int cur_sz;
+
+		if (mem->status == BATCH_ALLOC_OP_ISSUED) {
+			mem->sz = roc_npa_aura_batch_alloc_extract(
+				mem->objs, mem->objs, BATCH_ALLOC_SZ);
+
+			/* If partial alloc reduce the retry count */
+			retry -= (mem->sz != BATCH_ALLOC_SZ);
+			/* Break the loop if retry count exhausted */
+			loop = !!retry;
+			mem->status = BATCH_ALLOC_OP_DONE;
+		}
+
+		cur_sz = n - count;
+		if (cur_sz > mem->sz)
+			cur_sz = mem->sz;
+
+		/* Dequeue the pointers */
+		memcpy(&obj_table[count], &mem->objs[mem->sz - cur_sz],
+		       cur_sz * sizeof(uintptr_t));
+		mem->sz -= cur_sz;
+		count += cur_sz;
+
+		/* Break loop if the required pointers has been dequeued */
+		loop &= (count != n);
+
+		/* Issue next batch alloc if pointers are exhausted */
+		if (mem->sz == 0) {
+			rc = roc_npa_aura_batch_alloc_issue(
+				mp->pool_id, mem->objs, BATCH_ALLOC_SZ, 0, 1);
+			/* Break loop if issue failed and set status */
+			loop &= !rc;
+			mem->status = !rc;
+		}
+	}
+
+	if (unlikely(count != n)) {
+		/* No partial alloc allowed. Free up allocated pointers */
+		cn10k_mempool_enq(mp, obj_table, count);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static unsigned int
+cn10k_mempool_get_count(const struct rte_mempool *mp)
+{
+	struct batch_op_data *op_data;
+	unsigned int count = 0;
+	int i;
+
+	op_data = BATCH_OP_DATA_GET(mp->pool_id);
+
+	rte_wmb();
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		struct batch_op_mem *mem = &op_data->mem[i];
+
+		if (mem->status == BATCH_ALLOC_OP_ISSUED)
+			count += roc_npa_aura_batch_alloc_count(mem->objs,
+								BATCH_ALLOC_SZ);
+
+		if (mem->status == BATCH_ALLOC_OP_DONE)
+			count += mem->sz;
+	}
+
+	count += cnxk_mempool_get_count(mp);
+
+	return count;
+}
+
 static int
 cn10k_mempool_alloc(struct rte_mempool *mp)
 {
 	uint32_t block_size;
 	size_t padding;
+	int rc;
 
 	block_size = mp->elt_size + mp->header_size + mp->trailer_size;
 	/* Align header size to ROC_ALIGN */
@@ -29,16 +257,36 @@  cn10k_mempool_alloc(struct rte_mempool *mp)
 		block_size += padding;
 	}
 
-	return cnxk_mempool_alloc(mp);
+	rc = cnxk_mempool_alloc(mp);
+	if (rc)
+		return rc;
+
+	rc = batch_op_init(mp);
+	if (rc) {
+		plt_err("Failed to init batch alloc mem rc=%d", rc);
+		goto error;
+	}
+
+	return 0;
+error:
+	cnxk_mempool_free(mp);
+	return rc;
+}
+
+static void
+cn10k_mempool_free(struct rte_mempool *mp)
+{
+	batch_op_fini(mp);
+	cnxk_mempool_free(mp);
 }
 
 static struct rte_mempool_ops cn10k_mempool_ops = {
 	.name = "cn10k_mempool_ops",
 	.alloc = cn10k_mempool_alloc,
-	.free = cnxk_mempool_free,
-	.enqueue = cnxk_mempool_enq,
-	.dequeue = cnxk_mempool_deq,
-	.get_count = cnxk_mempool_get_count,
+	.free = cn10k_mempool_free,
+	.enqueue = cn10k_mempool_enq,
+	.dequeue = cn10k_mempool_deq,
+	.get_count = cn10k_mempool_get_count,
 	.calc_mem_size = cnxk_mempool_calc_mem_size,
 	.populate = cnxk_mempool_populate,
 };
diff --git a/drivers/mempool/cnxk/cnxk_mempool.c b/drivers/mempool/cnxk/cnxk_mempool.c
index c24497a6e5..1bbe384fe7 100644
--- a/drivers/mempool/cnxk/cnxk_mempool.c
+++ b/drivers/mempool/cnxk/cnxk_mempool.c
@@ -14,14 +14,11 @@ 
 #include <rte_pci.h>
 
 #include "roc_api.h"
-#include "cnxk_mempool.h"
 
 #define CNXK_NPA_DEV_NAME	 RTE_STR(cnxk_npa_dev_)
 #define CNXK_NPA_DEV_NAME_LEN	 (sizeof(CNXK_NPA_DEV_NAME) + PCI_PRI_STR_SIZE)
 #define CNXK_NPA_MAX_POOLS_PARAM "max_pools"
 
-uintptr_t *cnxk_mempool_internal_data;
-
 static inline uint32_t
 npa_aura_size_to_u32(uint8_t val)
 {
@@ -82,33 +79,25 @@  static int
 npa_init(struct rte_pci_device *pci_dev)
 {
 	char name[CNXK_NPA_DEV_NAME_LEN];
-	size_t idata_offset, idata_sz;
 	const struct rte_memzone *mz;
 	struct roc_npa *dev;
-	int rc, maxpools;
+	int rc;
 
 	rc = plt_init();
 	if (rc < 0)
 		goto error;
 
-	maxpools = parse_aura_size(pci_dev->device.devargs);
-	/* Add the space for per-pool internal data pointers to memzone len */
-	idata_offset = RTE_ALIGN_CEIL(sizeof(*dev), ROC_ALIGN);
-	idata_sz = maxpools * sizeof(uintptr_t);
-
 	rc = -ENOMEM;
 	mz = rte_memzone_reserve_aligned(npa_dev_to_name(pci_dev, name),
-					 idata_offset + idata_sz, SOCKET_ID_ANY,
-					 0, RTE_CACHE_LINE_SIZE);
+					 sizeof(*dev), SOCKET_ID_ANY, 0,
+					 RTE_CACHE_LINE_SIZE);
 	if (mz == NULL)
 		goto error;
 
 	dev = mz->addr;
 	dev->pci_dev = pci_dev;
-	cnxk_mempool_internal_data = (uintptr_t *)(mz->addr_64 + idata_offset);
-	memset(cnxk_mempool_internal_data, 0, idata_sz);
 
-	roc_idev_npa_maxpools_set(maxpools);
+	roc_idev_npa_maxpools_set(parse_aura_size(pci_dev->device.devargs));
 	rc = roc_npa_dev_init(dev);
 	if (rc)
 		goto mz_free;
diff --git a/drivers/mempool/cnxk/cnxk_mempool.h b/drivers/mempool/cnxk/cnxk_mempool.h
index 8f226f861c..6e54346e6a 100644
--- a/drivers/mempool/cnxk/cnxk_mempool.h
+++ b/drivers/mempool/cnxk/cnxk_mempool.h
@@ -23,6 +23,7 @@  int __rte_hot cnxk_mempool_enq(struct rte_mempool *mp, void *const *obj_table,
 int __rte_hot cnxk_mempool_deq(struct rte_mempool *mp, void **obj_table,
 			       unsigned int n);
 
-extern uintptr_t *cnxk_mempool_internal_data;
+int cn10k_mempool_lf_init(void);
+void cn10k_mempool_lf_fini(void);
 
 #endif
diff --git a/drivers/mempool/cnxk/cnxk_mempool_ops.c b/drivers/mempool/cnxk/cnxk_mempool_ops.c
index 29a4c12208..18f125c7ac 100644
--- a/drivers/mempool/cnxk/cnxk_mempool_ops.c
+++ b/drivers/mempool/cnxk/cnxk_mempool_ops.c
@@ -2,6 +2,7 @@ 
  * Copyright(C) 2021 Marvell.
  */
 
+#include <rte_mbuf_pool_ops.h>
 #include <rte_mempool.h>
 
 #include "roc_api.h"
@@ -171,3 +172,30 @@  cnxk_mempool_populate(struct rte_mempool *mp, unsigned int max_objs,
 		mp, RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ, max_objs, vaddr, iova,
 		len, obj_cb, obj_cb_arg);
 }
+
+static int
+cnxk_mempool_lf_init(void)
+{
+	int rc = 0;
+
+	if (roc_model_is_cn10k()) {
+		rte_mbuf_set_platform_mempool_ops("cn10k_mempool_ops");
+		rc = cn10k_mempool_lf_init();
+	} else {
+		rte_mbuf_set_platform_mempool_ops("cn9k_mempool_ops");
+	}
+	return rc;
+}
+
+static void
+cnxk_mempool_lf_fini(void)
+{
+	if (roc_model_is_cn10k())
+		cn10k_mempool_lf_fini();
+}
+
+RTE_INIT(cnxk_mempool_ops_init)
+{
+	roc_npa_lf_init_cb_register(cnxk_mempool_lf_init);
+	roc_npa_lf_fini_cb_register(cnxk_mempool_lf_fini);
+}