[v2,08/11] mempool/cnxk: add batch op init
Checks
Commit Message
Marvell CN10k mempool supports batch enqueue/dequeue which can
dequeue up to 512 pointers and enqueue up to 15 pointers using
a single instruction.
These batch operations require a DMA memory to enqueue/dequeue
pointers. This patch adds the initialization of this DMA memory.
Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
doc/guides/mempool/cnxk.rst | 5 +
drivers/mempool/cnxk/cn10k_mempool_ops.c | 122 ++++++++++++++++++++++-
drivers/mempool/cnxk/cnxk_mempool.h | 3 +
drivers/mempool/cnxk/cnxk_mempool_ops.c | 13 ++-
4 files changed, 138 insertions(+), 5 deletions(-)
Comments
On Sat, Apr 3, 2021 at 7:49 PM Ashwin Sekhar T K <asekhar@marvell.com> wrote:
>
> Marvell CN10k mempool supports batch enqueue/dequeue which can
> dequeue up to 512 pointers and enqueue up to 15 pointers using
> a single instruction.
>
> These batch operations require a DMA memory to enqueue/dequeue
> pointers. This patch adds the initialization of this DMA memory.
>
> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
> ---
> doc/guides/mempool/cnxk.rst | 5 +
> drivers/mempool/cnxk/cn10k_mempool_ops.c | 122 ++++++++++++++++++++++-
> drivers/mempool/cnxk/cnxk_mempool.h | 3 +
> drivers/mempool/cnxk/cnxk_mempool_ops.c | 13 ++-
> 4 files changed, 138 insertions(+), 5 deletions(-)
>
> +
> +static struct batch_op_data **batch_op_data;
Please remove the global variable as it will break the multi-process.
> +
> +#define BATCH_OP_DATA_GET(pool_id) \
> + batch_op_data[roc_npa_aura_handle_to_aura(pool_id)]
> +
> +#define BATCH_OP_DATA_SET(pool_id, op_data) \
> + do { \
> + uint64_t aura = roc_npa_aura_handle_to_aura(pool_id); \
> + batch_op_data[aura] = op_data; \
> + } while (0)
> +
Please check this can be made as static inline if there is NO performance cost.
@@ -25,6 +25,11 @@ CN9k NPA supports:
- Burst alloc of up to 32 pointers.
+CN10k NPA supports:
+
+- Batch dequeue of up to 512 pointers with single instruction.
+- Batch enqueue of up to 15 pointers with single instruction.
+
Prerequisites and Compilation procedure
---------------------------------------
@@ -7,11 +7,117 @@
#include "roc_api.h"
#include "cnxk_mempool.h"
+#define BATCH_ALLOC_SZ ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS
+
+enum batch_op_status {
+ BATCH_ALLOC_OP_NOT_ISSUED = 0,
+ BATCH_ALLOC_OP_ISSUED = 1,
+ BATCH_ALLOC_OP_DONE
+};
+
+struct batch_op_mem {
+ unsigned int sz;
+ enum batch_op_status status;
+ uint64_t objs[BATCH_ALLOC_SZ] __rte_aligned(ROC_ALIGN);
+};
+
+struct batch_op_data {
+ uint64_t lmt_addr;
+ struct batch_op_mem mem[RTE_MAX_LCORE] __rte_aligned(ROC_ALIGN);
+};
+
+static struct batch_op_data **batch_op_data;
+
+#define BATCH_OP_DATA_GET(pool_id) \
+ batch_op_data[roc_npa_aura_handle_to_aura(pool_id)]
+
+#define BATCH_OP_DATA_SET(pool_id, op_data) \
+ do { \
+ uint64_t aura = roc_npa_aura_handle_to_aura(pool_id); \
+ batch_op_data[aura] = op_data; \
+ } while (0)
+
+int
+cn10k_mempool_lf_init(void)
+{
+ unsigned int maxpools, sz;
+
+ maxpools = roc_idev_npa_maxpools_get();
+ sz = maxpools * sizeof(struct batch_op_data *);
+
+ batch_op_data = rte_zmalloc(NULL, sz, ROC_ALIGN);
+ if (!batch_op_data)
+ return -1;
+
+ return 0;
+}
+
+void
+cn10k_mempool_lf_fini(void)
+{
+ if (!batch_op_data)
+ return;
+
+ rte_free(batch_op_data);
+ batch_op_data = NULL;
+}
+
+static int
+batch_op_init(struct rte_mempool *mp)
+{
+ struct batch_op_data *op_data;
+ int i;
+
+ RTE_ASSERT(BATCH_OP_DATA_GET(mp->pool_id) == NULL);
+ op_data = rte_zmalloc(NULL, sizeof(struct batch_op_data), ROC_ALIGN);
+ if (op_data == NULL)
+ return -1;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ op_data->mem[i].sz = 0;
+ op_data->mem[i].status = BATCH_ALLOC_OP_NOT_ISSUED;
+ }
+
+ op_data->lmt_addr = roc_idev_lmt_base_addr_get();
+ BATCH_OP_DATA_SET(mp->pool_id, op_data);
+
+ return 0;
+}
+
+static void
+batch_op_fini(struct rte_mempool *mp)
+{
+ struct batch_op_data *op_data;
+ int i;
+
+ op_data = BATCH_OP_DATA_GET(mp->pool_id);
+
+ rte_wmb();
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ struct batch_op_mem *mem = &op_data->mem[i];
+
+ if (mem->status == BATCH_ALLOC_OP_ISSUED) {
+ mem->sz = roc_npa_aura_batch_alloc_extract(
+ mem->objs, mem->objs, BATCH_ALLOC_SZ);
+ mem->status = BATCH_ALLOC_OP_DONE;
+ }
+ if (mem->status == BATCH_ALLOC_OP_DONE) {
+ roc_npa_aura_op_bulk_free(mp->pool_id, mem->objs,
+ mem->sz, 1);
+ mem->status = BATCH_ALLOC_OP_NOT_ISSUED;
+ }
+ }
+
+ rte_free(op_data);
+ BATCH_OP_DATA_SET(mp->pool_id, NULL);
+}
+
static int
cn10k_mempool_alloc(struct rte_mempool *mp)
{
uint32_t block_size;
size_t padding;
+ int rc;
block_size = mp->elt_size + mp->header_size + mp->trailer_size;
/* Align header size to ROC_ALIGN */
@@ -29,12 +135,26 @@ cn10k_mempool_alloc(struct rte_mempool *mp)
block_size += padding;
}
- return cnxk_mempool_alloc(mp);
+ rc = cnxk_mempool_alloc(mp);
+ if (rc)
+ return rc;
+
+ rc = batch_op_init(mp);
+ if (rc) {
+ plt_err("Failed to init batch alloc mem rc=%d", rc);
+ goto error;
+ }
+
+ return 0;
+error:
+ cnxk_mempool_free(mp);
+ return rc;
}
static void
cn10k_mempool_free(struct rte_mempool *mp)
{
+ batch_op_fini(mp);
cnxk_mempool_free(mp);
}
@@ -23,4 +23,7 @@ int __rte_hot cnxk_mempool_enq(struct rte_mempool *mp, void *const *obj_table,
int __rte_hot cnxk_mempool_deq(struct rte_mempool *mp, void **obj_table,
unsigned int n);
+int cn10k_mempool_lf_init(void);
+void cn10k_mempool_lf_fini(void);
+
#endif
@@ -174,17 +174,22 @@ cnxk_mempool_populate(struct rte_mempool *mp, unsigned int max_objs,
static int
cnxk_mempool_lf_init(void)
{
- if (roc_model_is_cn9k())
+ int rc = 0;
+
+ if (roc_model_is_cn9k()) {
rte_mbuf_set_platform_mempool_ops("cn9k_mempool_ops");
- else if (roc_model_is_cn10k())
+ } else if (roc_model_is_cn10k()) {
rte_mbuf_set_platform_mempool_ops("cn10k_mempool_ops");
-
- return 0;
+ rc = cn10k_mempool_lf_init();
+ }
+ return rc;
}
static void
cnxk_mempool_lf_fini(void)
{
+ if (roc_model_is_cn10k())
+ cn10k_mempool_lf_fini();
}
RTE_INIT(cnxk_mempool_ops_init)