@@ -7,11 +7,239 @@
#include "roc_api.h"
#include "cnxk_mempool.h"
+#define BATCH_ALLOC_SZ ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS
+
+enum batch_op_status {
+ BATCH_ALLOC_OP_NOT_ISSUED = 0,
+ BATCH_ALLOC_OP_ISSUED = 1,
+ BATCH_ALLOC_OP_DONE
+};
+
+struct batch_op_mem {
+ unsigned int sz;
+ enum batch_op_status status;
+ uint64_t objs[BATCH_ALLOC_SZ] __rte_aligned(ROC_ALIGN);
+};
+
+struct batch_op_data {
+ uint64_t lmt_addr;
+ struct batch_op_mem mem[RTE_MAX_LCORE] __rte_aligned(ROC_ALIGN);
+};
+
+static struct batch_op_data **batch_op_data;
+
+#define BATCH_OP_DATA_GET(pool_id) \
+ batch_op_data[roc_npa_aura_handle_to_aura(pool_id)]
+
+#define BATCH_OP_DATA_SET(pool_id, op_data) \
+ do { \
+ uint64_t aura = roc_npa_aura_handle_to_aura(pool_id); \
+ batch_op_data[aura] = op_data; \
+ } while (0)
+
+int
+cn10k_mempool_lf_init(void)
+{
+ unsigned int maxpools, sz;
+
+ maxpools = roc_idev_npa_maxpools_get();
+ sz = maxpools * sizeof(uintptr_t);
+
+ batch_op_data = rte_zmalloc(NULL, sz, ROC_ALIGN);
+ if (!batch_op_data)
+ return -1;
+
+ return 0;
+}
+
+void
+cn10k_mempool_lf_fini(void)
+{
+ if (!batch_op_data)
+ return;
+
+ rte_free(batch_op_data);
+ batch_op_data = NULL;
+}
+
+static int
+batch_op_init(struct rte_mempool *mp)
+{
+ struct batch_op_data *op_data;
+ int i;
+
+ RTE_ASSERT(BATCH_OP_DATA_GET(mp->pool_id) == NULL);
+ op_data = rte_zmalloc(NULL, sizeof(struct batch_op_data), ROC_ALIGN);
+ if (op_data == NULL)
+ return -1;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ op_data->mem[i].sz = 0;
+ op_data->mem[i].status = BATCH_ALLOC_OP_NOT_ISSUED;
+ }
+
+ op_data->lmt_addr = roc_idev_lmt_base_addr_get();
+ BATCH_OP_DATA_SET(mp->pool_id, op_data);
+
+ return 0;
+}
+
+static void
+batch_op_fini(struct rte_mempool *mp)
+{
+ struct batch_op_data *op_data;
+ int i;
+
+ op_data = BATCH_OP_DATA_GET(mp->pool_id);
+
+ rte_wmb();
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ struct batch_op_mem *mem = &op_data->mem[i];
+
+ if (mem->status == BATCH_ALLOC_OP_ISSUED) {
+ mem->sz = roc_npa_aura_batch_alloc_extract(
+ mem->objs, mem->objs, BATCH_ALLOC_SZ);
+ mem->status = BATCH_ALLOC_OP_DONE;
+ }
+ if (mem->status == BATCH_ALLOC_OP_DONE) {
+ roc_npa_aura_op_bulk_free(mp->pool_id, mem->objs,
+ mem->sz, 1);
+ mem->status = BATCH_ALLOC_OP_NOT_ISSUED;
+ }
+ }
+
+ rte_free(op_data);
+ BATCH_OP_DATA_SET(mp->pool_id, NULL);
+}
+
+static int __rte_hot
+cn10k_mempool_enq(struct rte_mempool *mp, void *const *obj_table,
+ unsigned int n)
+{
+ const uint64_t *ptr = (const uint64_t *)obj_table;
+ uint64_t lmt_addr = 0, lmt_id = 0;
+ struct batch_op_data *op_data;
+
+ /* Ensure mbuf init changes are written before the free pointers are
+ * are enqueued to the stack.
+ */
+ rte_io_wmb();
+
+ if (n == 1) {
+ roc_npa_aura_op_free(mp->pool_id, 1, ptr[0]);
+ return 0;
+ }
+
+ op_data = BATCH_OP_DATA_GET(mp->pool_id);
+ lmt_addr = op_data->lmt_addr;
+ ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+ roc_npa_aura_op_batch_free(mp->pool_id, ptr, n, 1, lmt_addr, lmt_id);
+
+ return 0;
+}
+
+static int __rte_hot
+cn10k_mempool_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
+{
+ struct batch_op_data *op_data;
+ struct batch_op_mem *mem;
+ unsigned int count = 0;
+ int tid, rc, retry;
+ bool loop = true;
+
+ op_data = BATCH_OP_DATA_GET(mp->pool_id);
+ tid = rte_lcore_id();
+ mem = &op_data->mem[tid];
+
+ /* Issue batch alloc */
+ if (mem->status == BATCH_ALLOC_OP_NOT_ISSUED) {
+ rc = roc_npa_aura_batch_alloc_issue(mp->pool_id, mem->objs,
+ BATCH_ALLOC_SZ, 0, 1);
+ /* If issue fails, try falling back to default alloc */
+ if (unlikely(rc))
+ return cn10k_mempool_enq(mp, obj_table, n);
+ mem->status = BATCH_ALLOC_OP_ISSUED;
+ }
+
+ retry = 4;
+ while (loop) {
+ unsigned int cur_sz;
+
+ if (mem->status == BATCH_ALLOC_OP_ISSUED) {
+ mem->sz = roc_npa_aura_batch_alloc_extract(
+ mem->objs, mem->objs, BATCH_ALLOC_SZ);
+
+ /* If partial alloc reduce the retry count */
+ retry -= (mem->sz != BATCH_ALLOC_SZ);
+ /* Break the loop if retry count exhausted */
+ loop = !!retry;
+ mem->status = BATCH_ALLOC_OP_DONE;
+ }
+
+ cur_sz = n - count;
+ if (cur_sz > mem->sz)
+ cur_sz = mem->sz;
+
+ /* Dequeue the pointers */
+ memcpy(&obj_table[count], &mem->objs[mem->sz - cur_sz],
+ cur_sz * sizeof(uintptr_t));
+ mem->sz -= cur_sz;
+ count += cur_sz;
+
+ /* Break loop if the required pointers has been dequeued */
+ loop &= (count != n);
+
+ /* Issue next batch alloc if pointers are exhausted */
+ if (mem->sz == 0) {
+ rc = roc_npa_aura_batch_alloc_issue(
+ mp->pool_id, mem->objs, BATCH_ALLOC_SZ, 0, 1);
+ /* Break loop if issue failed and set status */
+ loop &= !rc;
+ mem->status = !rc;
+ }
+ }
+
+ if (unlikely(count != n)) {
+ /* No partial alloc allowed. Free up allocated pointers */
+ cn10k_mempool_enq(mp, obj_table, count);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static unsigned int
+cn10k_mempool_get_count(const struct rte_mempool *mp)
+{
+ struct batch_op_data *op_data;
+ unsigned int count = 0;
+ int i;
+
+ op_data = BATCH_OP_DATA_GET(mp->pool_id);
+
+ rte_wmb();
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ struct batch_op_mem *mem = &op_data->mem[i];
+
+ if (mem->status == BATCH_ALLOC_OP_ISSUED)
+ count += roc_npa_aura_batch_alloc_count(mem->objs,
+ BATCH_ALLOC_SZ);
+
+ if (mem->status == BATCH_ALLOC_OP_DONE)
+ count += mem->sz;
+ }
+
+ count += cnxk_mempool_get_count(mp);
+
+ return count;
+}
+
static int
cn10k_mempool_alloc(struct rte_mempool *mp)
{
uint32_t block_size;
size_t padding;
+ int rc;
block_size = mp->elt_size + mp->header_size + mp->trailer_size;
/* Align header size to ROC_ALIGN */
@@ -29,16 +257,36 @@ cn10k_mempool_alloc(struct rte_mempool *mp)
block_size += padding;
}
- return cnxk_mempool_alloc(mp);
+ rc = cnxk_mempool_alloc(mp);
+ if (rc)
+ return rc;
+
+ rc = batch_op_init(mp);
+ if (rc) {
+ plt_err("Failed to init batch alloc mem rc=%d", rc);
+ goto error;
+ }
+
+ return 0;
+error:
+ cnxk_mempool_free(mp);
+ return rc;
+}
+
+static void
+cn10k_mempool_free(struct rte_mempool *mp)
+{
+ batch_op_fini(mp);
+ cnxk_mempool_free(mp);
}
static struct rte_mempool_ops cn10k_mempool_ops = {
.name = "cn10k_mempool_ops",
.alloc = cn10k_mempool_alloc,
- .free = cnxk_mempool_free,
- .enqueue = cnxk_mempool_enq,
- .dequeue = cnxk_mempool_deq,
- .get_count = cnxk_mempool_get_count,
+ .free = cn10k_mempool_free,
+ .enqueue = cn10k_mempool_enq,
+ .dequeue = cn10k_mempool_deq,
+ .get_count = cn10k_mempool_get_count,
.calc_mem_size = cnxk_mempool_calc_mem_size,
.populate = cnxk_mempool_populate,
};
@@ -14,14 +14,11 @@
#include <rte_pci.h>
#include "roc_api.h"
-#include "cnxk_mempool.h"
#define CNXK_NPA_DEV_NAME RTE_STR(cnxk_npa_dev_)
#define CNXK_NPA_DEV_NAME_LEN (sizeof(CNXK_NPA_DEV_NAME) + PCI_PRI_STR_SIZE)
#define CNXK_NPA_MAX_POOLS_PARAM "max_pools"
-uintptr_t *cnxk_mempool_internal_data;
-
static inline uint32_t
npa_aura_size_to_u32(uint8_t val)
{
@@ -82,33 +79,25 @@ static int
npa_init(struct rte_pci_device *pci_dev)
{
char name[CNXK_NPA_DEV_NAME_LEN];
- size_t idata_offset, idata_sz;
const struct rte_memzone *mz;
struct roc_npa *dev;
- int rc, maxpools;
+ int rc;
rc = plt_init();
if (rc < 0)
goto error;
- maxpools = parse_aura_size(pci_dev->device.devargs);
- /* Add the space for per-pool internal data pointers to memzone len */
- idata_offset = RTE_ALIGN_CEIL(sizeof(*dev), ROC_ALIGN);
- idata_sz = maxpools * sizeof(uintptr_t);
-
rc = -ENOMEM;
mz = rte_memzone_reserve_aligned(npa_dev_to_name(pci_dev, name),
- idata_offset + idata_sz, SOCKET_ID_ANY,
- 0, RTE_CACHE_LINE_SIZE);
+ sizeof(*dev), SOCKET_ID_ANY, 0,
+ RTE_CACHE_LINE_SIZE);
if (mz == NULL)
goto error;
dev = mz->addr;
dev->pci_dev = pci_dev;
- cnxk_mempool_internal_data = (uintptr_t *)(mz->addr_64 + idata_offset);
- memset(cnxk_mempool_internal_data, 0, idata_sz);
- roc_idev_npa_maxpools_set(maxpools);
+ roc_idev_npa_maxpools_set(parse_aura_size(pci_dev->device.devargs));
rc = roc_npa_dev_init(dev);
if (rc)
goto mz_free;
@@ -23,6 +23,7 @@ int __rte_hot cnxk_mempool_enq(struct rte_mempool *mp, void *const *obj_table,
int __rte_hot cnxk_mempool_deq(struct rte_mempool *mp, void **obj_table,
unsigned int n);
-extern uintptr_t *cnxk_mempool_internal_data;
+int cn10k_mempool_lf_init(void);
+void cn10k_mempool_lf_fini(void);
#endif
@@ -2,6 +2,7 @@
* Copyright(C) 2021 Marvell.
*/
+#include <rte_mbuf_pool_ops.h>
#include <rte_mempool.h>
#include "roc_api.h"
@@ -171,3 +172,30 @@ cnxk_mempool_populate(struct rte_mempool *mp, unsigned int max_objs,
mp, RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ, max_objs, vaddr, iova,
len, obj_cb, obj_cb_arg);
}
+
+static int
+cnxk_mempool_lf_init(void)
+{
+ int rc = 0;
+
+ if (roc_model_is_cn10k()) {
+ rte_mbuf_set_platform_mempool_ops("cn10k_mempool_ops");
+ rc = cn10k_mempool_lf_init();
+ } else {
+ rte_mbuf_set_platform_mempool_ops("cn9k_mempool_ops");
+ }
+ return rc;
+}
+
+static void
+cnxk_mempool_lf_fini(void)
+{
+ if (roc_model_is_cn10k())
+ cn10k_mempool_lf_fini();
+}
+
+RTE_INIT(cnxk_mempool_ops_init)
+{
+ roc_npa_lf_init_cb_register(cnxk_mempool_lf_init);
+ roc_npa_lf_fini_cb_register(cnxk_mempool_lf_fini);
+}