[v3,20/22] net/mlx5: support index pool none local core operations

Message ID 20210702061816.10454-21-suanmingm@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: insertion rate optimization |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Suanming Mou July 2, 2021, 6:18 a.m. UTC
  This commit supports the index pool none local core operations with
an extra cache.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/mlx5_utils.c | 75 +++++++++++++++++++++++++----------
 drivers/net/mlx5/mlx5_utils.h |  3 +-
 2 files changed, 56 insertions(+), 22 deletions(-)
  

Patch

diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index 94abe79860..c34d6d62a8 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -114,6 +114,7 @@  mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
 			mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
 	if (!cfg->per_core_cache)
 		pool->free_list = TRUNK_INVALID;
+	rte_spinlock_init(&pool->nlcore_lock);
 	return pool;
 }
 
@@ -354,20 +355,14 @@  mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
 }
 
 static void *
-mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
+_mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
 {
 	struct mlx5_indexed_trunk *trunk;
 	struct mlx5_indexed_cache *lc;
 	uint32_t trunk_idx;
 	uint32_t entry_idx;
-	int cidx;
 
 	MLX5_ASSERT(idx);
-	cidx = rte_lcore_index(rte_lcore_id());
-	if (unlikely(cidx == -1)) {
-		rte_errno = ENOTSUP;
-		return NULL;
-	}
 	lc = mlx5_ipool_update_global_cache(pool, cidx);
 	idx -= 1;
 	trunk_idx = mlx5_trunk_idx_get(pool, idx);
@@ -378,15 +373,27 @@  mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
 }
 
 static void *
-mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
+mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
 {
+	void *entry;
 	int cidx;
 
 	cidx = rte_lcore_index(rte_lcore_id());
 	if (unlikely(cidx == -1)) {
-		rte_errno = ENOTSUP;
-		return NULL;
+		cidx = RTE_MAX_LCORE;
+		rte_spinlock_lock(&pool->nlcore_lock);
 	}
+	entry = _mlx5_ipool_get_cache(pool, cidx, idx);
+	if (unlikely(cidx == RTE_MAX_LCORE))
+		rte_spinlock_unlock(&pool->nlcore_lock);
+	return entry;
+}
+
+
+static void *
+_mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
+			 uint32_t *idx)
+{
 	if (unlikely(!pool->cache[cidx])) {
 		pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
 			sizeof(struct mlx5_ipool_per_lcore) +
@@ -399,29 +406,40 @@  mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
 	} else if (pool->cache[cidx]->len) {
 		pool->cache[cidx]->len--;
 		*idx = pool->cache[cidx]->idx[pool->cache[cidx]->len];
-		return mlx5_ipool_get_cache(pool, *idx);
+		return _mlx5_ipool_get_cache(pool, cidx, *idx);
 	}
 	/* Not enough idx in global cache. Keep fetching from global. */
 	*idx = mlx5_ipool_allocate_from_global(pool, cidx);
 	if (unlikely(!(*idx)))
 		return NULL;
-	return mlx5_ipool_get_cache(pool, *idx);
+	return _mlx5_ipool_get_cache(pool, cidx, *idx);
 }
 
-static void
-mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
+static void *
+mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
 {
+	void *entry;
 	int cidx;
+
+	cidx = rte_lcore_index(rte_lcore_id());
+	if (unlikely(cidx == -1)) {
+		cidx = RTE_MAX_LCORE;
+		rte_spinlock_lock(&pool->nlcore_lock);
+	}
+	entry = _mlx5_ipool_malloc_cache(pool, cidx, idx);
+	if (unlikely(cidx == RTE_MAX_LCORE))
+		rte_spinlock_unlock(&pool->nlcore_lock);
+	return entry;
+}
+
+static void
+_mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
+{
 	struct mlx5_ipool_per_lcore *ilc;
 	struct mlx5_indexed_cache *gc, *olc = NULL;
 	uint32_t reclaim_num = 0;
 
 	MLX5_ASSERT(idx);
-	cidx = rte_lcore_index(rte_lcore_id());
-	if (unlikely(cidx == -1)) {
-		rte_errno = ENOTSUP;
-		return;
-	}
 	/*
 	 * When index was allocated on core A but freed on core B. In this
 	 * case check if local cache on core B was allocated before.
@@ -464,6 +482,21 @@  mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
 	pool->cache[cidx]->len++;
 }
 
+static void
+mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
+{
+	int cidx;
+
+	cidx = rte_lcore_index(rte_lcore_id());
+	if (unlikely(cidx == -1)) {
+		cidx = RTE_MAX_LCORE;
+		rte_spinlock_lock(&pool->nlcore_lock);
+	}
+	_mlx5_ipool_free_cache(pool, cidx, idx);
+	if (unlikely(cidx == RTE_MAX_LCORE))
+		rte_spinlock_unlock(&pool->nlcore_lock);
+}
+
 void *
 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
 {
@@ -643,7 +676,7 @@  mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
 	MLX5_ASSERT(pool);
 	mlx5_ipool_lock(pool);
 	if (pool->cfg.per_core_cache) {
-		for (i = 0; i < RTE_MAX_LCORE; i++) {
+		for (i = 0; i <= RTE_MAX_LCORE; i++) {
 			/*
 			 * Free only old global cache. Pool gc will be
 			 * freed at last.
@@ -712,7 +745,7 @@  mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
 	for (i = 0; i < gc->len; i++)
 		rte_bitmap_clear(ibmp, gc->idx[i] - 1);
 	/* Clear core cache. */
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
+	for (i = 0; i < RTE_MAX_LCORE + 1; i++) {
 		struct mlx5_ipool_per_lcore *ilc = pool->cache[i];
 
 		if (!ilc)
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index 7d9b64c877..060c52f022 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -248,6 +248,7 @@  struct mlx5_ipool_per_lcore {
 struct mlx5_indexed_pool {
 	struct mlx5_indexed_pool_config cfg; /* Indexed pool configuration. */
 	rte_spinlock_t rsz_lock; /* Pool lock for multiple thread usage. */
+	rte_spinlock_t nlcore_lock;
 	/* Dim of trunk pointer array. */
 	union {
 		struct {
@@ -259,7 +260,7 @@  struct mlx5_indexed_pool {
 		struct {
 			struct mlx5_indexed_cache *gc;
 			/* Global cache. */
-			struct mlx5_ipool_per_lcore *cache[RTE_MAX_LCORE];
+			struct mlx5_ipool_per_lcore *cache[RTE_MAX_LCORE + 1];
 			/* Local cache. */
 			struct rte_bitmap *ibmp;
 			void *bmp_mem;