diff mbox series

[v2,08/11] mempool/cnxk: add batch op init

Message ID 20210403141751.215926-8-asekhar@marvell.com (mailing list archive)
State Changes Requested
Delegated to: Jerin Jacob
Headers show
Series [v2,01/11] mempool/cnxk: add build infra and doc | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Ashwin Sekhar T K April 3, 2021, 2:17 p.m. UTC
Marvell CN10k mempool supports batch enqueue/dequeue which can
dequeue up to 512 pointers and enqueue up to 15 pointers using
a single instruction.

These batch operations require a DMA memory to enqueue/dequeue
pointers. This patch adds the initialization of this DMA memory.

Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
 doc/guides/mempool/cnxk.rst              |   5 +
 drivers/mempool/cnxk/cn10k_mempool_ops.c | 122 ++++++++++++++++++++++-
 drivers/mempool/cnxk/cnxk_mempool.h      |   3 +
 drivers/mempool/cnxk/cnxk_mempool_ops.c  |  13 ++-
 4 files changed, 138 insertions(+), 5 deletions(-)

Comments

Jerin Jacob April 3, 2021, 2:34 p.m. UTC | #1
On Sat, Apr 3, 2021 at 7:49 PM Ashwin Sekhar T K <asekhar@marvell.com> wrote:
>
> Marvell CN10k mempool supports batch enqueue/dequeue which can
> dequeue up to 512 pointers and enqueue up to 15 pointers using
> a single instruction.
>
> These batch operations require a DMA memory to enqueue/dequeue
> pointers. This patch adds the initialization of this DMA memory.
>
> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
> ---
>  doc/guides/mempool/cnxk.rst              |   5 +
>  drivers/mempool/cnxk/cn10k_mempool_ops.c | 122 ++++++++++++++++++++++-
>  drivers/mempool/cnxk/cnxk_mempool.h      |   3 +
>  drivers/mempool/cnxk/cnxk_mempool_ops.c  |  13 ++-
>  4 files changed, 138 insertions(+), 5 deletions(-)
>
> +
> +static struct batch_op_data **batch_op_data;

Please remove the global variable as it will break the multi-process.

> +
> +#define BATCH_OP_DATA_GET(pool_id)                                             \
> +       batch_op_data[roc_npa_aura_handle_to_aura(pool_id)]
> +
> +#define BATCH_OP_DATA_SET(pool_id, op_data)                                    \
> +       do {                                                                   \
> +               uint64_t aura = roc_npa_aura_handle_to_aura(pool_id);          \
> +               batch_op_data[aura] = op_data;                                 \
> +       } while (0)
> +

Please check this can be made as static inline if there is NO performance cost.
diff mbox series

Patch

diff --git a/doc/guides/mempool/cnxk.rst b/doc/guides/mempool/cnxk.rst
index 783368e690..286ee29003 100644
--- a/doc/guides/mempool/cnxk.rst
+++ b/doc/guides/mempool/cnxk.rst
@@ -25,6 +25,11 @@  CN9k NPA supports:
 
 - Burst alloc of up to 32 pointers.
 
+CN10k NPA supports:
+
+- Batch dequeue of up to 512 pointers with single instruction.
+- Batch enqueue of up to 15 pointers with single instruction.
+
 Prerequisites and Compilation procedure
 ---------------------------------------
 
diff --git a/drivers/mempool/cnxk/cn10k_mempool_ops.c b/drivers/mempool/cnxk/cn10k_mempool_ops.c
index 9b63789006..d34041528a 100644
--- a/drivers/mempool/cnxk/cn10k_mempool_ops.c
+++ b/drivers/mempool/cnxk/cn10k_mempool_ops.c
@@ -7,11 +7,117 @@ 
 #include "roc_api.h"
 #include "cnxk_mempool.h"
 
+#define BATCH_ALLOC_SZ ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS
+
+enum batch_op_status {
+	BATCH_ALLOC_OP_NOT_ISSUED = 0,
+	BATCH_ALLOC_OP_ISSUED = 1,
+	BATCH_ALLOC_OP_DONE
+};
+
+struct batch_op_mem {
+	unsigned int sz;
+	enum batch_op_status status;
+	uint64_t objs[BATCH_ALLOC_SZ] __rte_aligned(ROC_ALIGN);
+};
+
+struct batch_op_data {
+	uint64_t lmt_addr;
+	struct batch_op_mem mem[RTE_MAX_LCORE] __rte_aligned(ROC_ALIGN);
+};
+
+static struct batch_op_data **batch_op_data;
+
+#define BATCH_OP_DATA_GET(pool_id)                                             \
+	batch_op_data[roc_npa_aura_handle_to_aura(pool_id)]
+
+#define BATCH_OP_DATA_SET(pool_id, op_data)                                    \
+	do {                                                                   \
+		uint64_t aura = roc_npa_aura_handle_to_aura(pool_id);          \
+		batch_op_data[aura] = op_data;                                 \
+	} while (0)
+
+int
+cn10k_mempool_lf_init(void)
+{
+	unsigned int maxpools, sz;
+
+	maxpools = roc_idev_npa_maxpools_get();
+	sz = maxpools * sizeof(struct batch_op_data *);
+
+	batch_op_data = rte_zmalloc(NULL, sz, ROC_ALIGN);
+	if (!batch_op_data)
+		return -1;
+
+	return 0;
+}
+
+void
+cn10k_mempool_lf_fini(void)
+{
+	if (!batch_op_data)
+		return;
+
+	rte_free(batch_op_data);
+	batch_op_data = NULL;
+}
+
+static int
+batch_op_init(struct rte_mempool *mp)
+{
+	struct batch_op_data *op_data;
+	int i;
+
+	RTE_ASSERT(BATCH_OP_DATA_GET(mp->pool_id) == NULL);
+	op_data = rte_zmalloc(NULL, sizeof(struct batch_op_data), ROC_ALIGN);
+	if (op_data == NULL)
+		return -1;
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		op_data->mem[i].sz = 0;
+		op_data->mem[i].status = BATCH_ALLOC_OP_NOT_ISSUED;
+	}
+
+	op_data->lmt_addr = roc_idev_lmt_base_addr_get();
+	BATCH_OP_DATA_SET(mp->pool_id, op_data);
+
+	return 0;
+}
+
+static void
+batch_op_fini(struct rte_mempool *mp)
+{
+	struct batch_op_data *op_data;
+	int i;
+
+	op_data = BATCH_OP_DATA_GET(mp->pool_id);
+
+	rte_wmb();
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		struct batch_op_mem *mem = &op_data->mem[i];
+
+		if (mem->status == BATCH_ALLOC_OP_ISSUED) {
+			mem->sz = roc_npa_aura_batch_alloc_extract(
+				mem->objs, mem->objs, BATCH_ALLOC_SZ);
+			mem->status = BATCH_ALLOC_OP_DONE;
+		}
+		if (mem->status == BATCH_ALLOC_OP_DONE) {
+			roc_npa_aura_op_bulk_free(mp->pool_id, mem->objs,
+						  mem->sz, 1);
+			mem->status = BATCH_ALLOC_OP_NOT_ISSUED;
+		}
+	}
+
+	rte_free(op_data);
+	BATCH_OP_DATA_SET(mp->pool_id, NULL);
+}
+
 static int
 cn10k_mempool_alloc(struct rte_mempool *mp)
 {
 	uint32_t block_size;
 	size_t padding;
+	int rc;
 
 	block_size = mp->elt_size + mp->header_size + mp->trailer_size;
 	/* Align header size to ROC_ALIGN */
@@ -29,12 +135,26 @@  cn10k_mempool_alloc(struct rte_mempool *mp)
 		block_size += padding;
 	}
 
-	return cnxk_mempool_alloc(mp);
+	rc = cnxk_mempool_alloc(mp);
+	if (rc)
+		return rc;
+
+	rc = batch_op_init(mp);
+	if (rc) {
+		plt_err("Failed to init batch alloc mem rc=%d", rc);
+		goto error;
+	}
+
+	return 0;
+error:
+	cnxk_mempool_free(mp);
+	return rc;
 }
 
 static void
 cn10k_mempool_free(struct rte_mempool *mp)
 {
+	batch_op_fini(mp);
 	cnxk_mempool_free(mp);
 }
 
diff --git a/drivers/mempool/cnxk/cnxk_mempool.h b/drivers/mempool/cnxk/cnxk_mempool.h
index 099b7f6998..6e54346e6a 100644
--- a/drivers/mempool/cnxk/cnxk_mempool.h
+++ b/drivers/mempool/cnxk/cnxk_mempool.h
@@ -23,4 +23,7 @@  int __rte_hot cnxk_mempool_enq(struct rte_mempool *mp, void *const *obj_table,
 int __rte_hot cnxk_mempool_deq(struct rte_mempool *mp, void **obj_table,
 			       unsigned int n);
 
+int cn10k_mempool_lf_init(void);
+void cn10k_mempool_lf_fini(void);
+
 #endif
diff --git a/drivers/mempool/cnxk/cnxk_mempool_ops.c b/drivers/mempool/cnxk/cnxk_mempool_ops.c
index 0ec131a475..389c3622fd 100644
--- a/drivers/mempool/cnxk/cnxk_mempool_ops.c
+++ b/drivers/mempool/cnxk/cnxk_mempool_ops.c
@@ -174,17 +174,22 @@  cnxk_mempool_populate(struct rte_mempool *mp, unsigned int max_objs,
 static int
 cnxk_mempool_lf_init(void)
 {
-	if (roc_model_is_cn9k())
+	int rc = 0;
+
+	if (roc_model_is_cn9k()) {
 		rte_mbuf_set_platform_mempool_ops("cn9k_mempool_ops");
-	else if (roc_model_is_cn10k())
+	} else if (roc_model_is_cn10k()) {
 		rte_mbuf_set_platform_mempool_ops("cn10k_mempool_ops");
-
-	return 0;
+		rc = cn10k_mempool_lf_init();
+	}
+	return rc;
 }
 
 static void
 cnxk_mempool_lf_fini(void)
 {
+	if (roc_model_is_cn10k())
+		cn10k_mempool_lf_fini();
 }
 
 RTE_INIT(cnxk_mempool_ops_init)