diff mbox series

[v4,08/26] net/mlx5: add per lcore cache to the list utility

Message ID 20210706133257.3353-9-suanmingm@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers show
Series net/mlx5: insertion rate optimization | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Suanming Mou July 6, 2021, 1:32 p.m. UTC
From: Matan Azrad <matan@nvidia.com>

When mlx5 list object is accessed by multiple cores, the list lock
counter is all the time written by all the cores what increases cache
misses in the memory caches.

In addition, when one thread accesses the list for add\remove\lookup
operation, all the other threads coming to do an operation in the list
are stuck in the lock.

Add per lcore cache to allow thread manipulations to be lockless when
the list objects are mostly reused.

Synchronization with atomic operations should be done in order to
allow threads to unregister an entry from other thread cache.

Signed-off-by: Matan Azrad <matan@nvidia.com>
Acked-by: Suanming Mou <suanmingm@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c   |  58 ++++----
 drivers/net/mlx5/mlx5.h            |   1 +
 drivers/net/mlx5/mlx5_flow.h       |  21 ++-
 drivers/net/mlx5/mlx5_flow_dv.c    | 181 +++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_rx.h         |   5 +
 drivers/net/mlx5/mlx5_rxq.c        |  71 +++++++---
 drivers/net/mlx5/mlx5_utils.c      | 214 ++++++++++++++++++-----------
 drivers/net/mlx5/mlx5_utils.h      |  30 ++--
 drivers/net/mlx5/windows/mlx5_os.c |   5 +-
 9 files changed, 451 insertions(+), 135 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 57b0a1c57f..d7119dd561 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -272,30 +272,38 @@  mlx5_alloc_shared_dr(struct mlx5_priv *priv)
 		goto error;
 	/* The resources below are only valid with DV support. */
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
-	/* Init port id action mlx5 list. */
+	/* Init port id action list. */
 	snprintf(s, sizeof(s), "%s_port_id_action_list", sh->ibdev_name);
-	mlx5_list_create(&sh->port_id_action_list, s, 0, sh,
-			     flow_dv_port_id_create_cb,
-			     flow_dv_port_id_match_cb,
-			     flow_dv_port_id_remove_cb);
-	/* Init push vlan action mlx5 list. */
+	mlx5_list_create(&sh->port_id_action_list, s, sh,
+			 flow_dv_port_id_create_cb,
+			 flow_dv_port_id_match_cb,
+			 flow_dv_port_id_remove_cb,
+			 flow_dv_port_id_clone_cb,
+			 flow_dv_port_id_clone_free_cb);
+	/* Init push vlan action list. */
 	snprintf(s, sizeof(s), "%s_push_vlan_action_list", sh->ibdev_name);
-	mlx5_list_create(&sh->push_vlan_action_list, s, 0, sh,
-			     flow_dv_push_vlan_create_cb,
-			     flow_dv_push_vlan_match_cb,
-			     flow_dv_push_vlan_remove_cb);
-	/* Init sample action mlx5 list. */
+	mlx5_list_create(&sh->push_vlan_action_list, s, sh,
+			 flow_dv_push_vlan_create_cb,
+			 flow_dv_push_vlan_match_cb,
+			 flow_dv_push_vlan_remove_cb,
+			 flow_dv_push_vlan_clone_cb,
+			 flow_dv_push_vlan_clone_free_cb);
+	/* Init sample action list. */
 	snprintf(s, sizeof(s), "%s_sample_action_list", sh->ibdev_name);
-	mlx5_list_create(&sh->sample_action_list, s, 0, sh,
-			     flow_dv_sample_create_cb,
-			     flow_dv_sample_match_cb,
-			     flow_dv_sample_remove_cb);
-	/* Init dest array action mlx5 list. */
+	mlx5_list_create(&sh->sample_action_list, s, sh,
+			 flow_dv_sample_create_cb,
+			 flow_dv_sample_match_cb,
+			 flow_dv_sample_remove_cb,
+			 flow_dv_sample_clone_cb,
+			 flow_dv_sample_clone_free_cb);
+	/* Init dest array action list. */
 	snprintf(s, sizeof(s), "%s_dest_array_list", sh->ibdev_name);
-	mlx5_list_create(&sh->dest_array_list, s, 0, sh,
-			     flow_dv_dest_array_create_cb,
-			     flow_dv_dest_array_match_cb,
-			     flow_dv_dest_array_remove_cb);
+	mlx5_list_create(&sh->dest_array_list, s, sh,
+			 flow_dv_dest_array_create_cb,
+			 flow_dv_dest_array_match_cb,
+			 flow_dv_dest_array_remove_cb,
+			 flow_dv_dest_array_clone_cb,
+			 flow_dv_dest_array_clone_free_cb);
 	/* Create tags hash list table. */
 	snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name);
 	sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE, 0,
@@ -1704,10 +1712,12 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			err = ENOTSUP;
 			goto error;
 	}
-	mlx5_list_create(&priv->hrxqs, "hrxq", 0, eth_dev,
-			     mlx5_hrxq_create_cb,
-			     mlx5_hrxq_match_cb,
-			     mlx5_hrxq_remove_cb);
+	mlx5_list_create(&priv->hrxqs, "hrxq", eth_dev, mlx5_hrxq_create_cb,
+			 mlx5_hrxq_match_cb,
+			 mlx5_hrxq_remove_cb,
+			 mlx5_hrxq_clone_cb,
+			 mlx5_hrxq_clone_free_cb);
+	rte_rwlock_init(&priv->ind_tbls_lock);
 	/* Query availability of metadata reg_c's. */
 	err = mlx5_flow_discover_mreg_c(eth_dev);
 	if (err < 0) {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index bf1fbb530b..740695ea5c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1365,6 +1365,7 @@  struct mlx5_priv {
 	/* Indirection tables. */
 	LIST_HEAD(ind_tables, mlx5_ind_table_obj) ind_tbls;
 	/* Pointer to next element. */
+	rte_rwlock_t ind_tbls_lock;
 	uint32_t refcnt; /**< Reference counter. */
 	/**< Verbs modify header action object. */
 	uint8_t ft_type; /**< Flow table type, Rx or Tx. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 4dec703366..ce363355c1 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1634,7 +1634,11 @@  struct mlx5_list_entry *flow_dv_port_id_create_cb(struct mlx5_list *list,
 						  void *cb_ctx);
 void flow_dv_port_id_remove_cb(struct mlx5_list *list,
 			       struct mlx5_list_entry *entry);
-
+struct mlx5_list_entry *flow_dv_port_id_clone_cb(struct mlx5_list *list,
+				struct mlx5_list_entry *entry __rte_unused,
+				 void *cb_ctx);
+void flow_dv_port_id_clone_free_cb(struct mlx5_list *list,
+				struct mlx5_list_entry *entry __rte_unused);
 int flow_dv_push_vlan_match_cb(struct mlx5_list *list,
 			       struct mlx5_list_entry *entry, void *cb_ctx);
 struct mlx5_list_entry *flow_dv_push_vlan_create_cb(struct mlx5_list *list,
@@ -1642,6 +1646,11 @@  struct mlx5_list_entry *flow_dv_push_vlan_create_cb(struct mlx5_list *list,
 						  void *cb_ctx);
 void flow_dv_push_vlan_remove_cb(struct mlx5_list *list,
 				 struct mlx5_list_entry *entry);
+struct mlx5_list_entry *flow_dv_push_vlan_clone_cb
+				(struct mlx5_list *list,
+				 struct mlx5_list_entry *entry, void *cb_ctx);
+void flow_dv_push_vlan_clone_free_cb(struct mlx5_list *list,
+				 struct mlx5_list_entry *entry);
 
 int flow_dv_sample_match_cb(struct mlx5_list *list,
 			    struct mlx5_list_entry *entry, void *cb_ctx);
@@ -1650,6 +1659,11 @@  struct mlx5_list_entry *flow_dv_sample_create_cb(struct mlx5_list *list,
 						 void *cb_ctx);
 void flow_dv_sample_remove_cb(struct mlx5_list *list,
 			      struct mlx5_list_entry *entry);
+struct mlx5_list_entry *flow_dv_sample_clone_cb
+				(struct mlx5_list *list,
+				 struct mlx5_list_entry *entry, void *cb_ctx);
+void flow_dv_sample_clone_free_cb(struct mlx5_list *list,
+			      struct mlx5_list_entry *entry);
 
 int flow_dv_dest_array_match_cb(struct mlx5_list *list,
 				struct mlx5_list_entry *entry, void *cb_ctx);
@@ -1658,6 +1672,11 @@  struct mlx5_list_entry *flow_dv_dest_array_create_cb(struct mlx5_list *list,
 						  void *cb_ctx);
 void flow_dv_dest_array_remove_cb(struct mlx5_list *list,
 				  struct mlx5_list_entry *entry);
+struct mlx5_list_entry *flow_dv_dest_array_clone_cb
+				(struct mlx5_list *list,
+				 struct mlx5_list_entry *entry, void *cb_ctx);
+void flow_dv_dest_array_clone_free_cb(struct mlx5_list *list,
+				  struct mlx5_list_entry *entry);
 struct mlx5_aso_age_action *flow_aso_age_get_by_idx(struct rte_eth_dev *dev,
 						    uint32_t age_idx);
 int flow_dev_geneve_tlv_option_resource_register(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index d19b41c20a..10aa557fee 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -3820,6 +3820,39 @@  flow_dv_port_id_create_cb(struct mlx5_list *list,
 	return &resource->entry;
 }
 
+struct mlx5_list_entry *
+flow_dv_port_id_clone_cb(struct mlx5_list *list,
+			  struct mlx5_list_entry *entry __rte_unused,
+			  void *cb_ctx)
+{
+	struct mlx5_dev_ctx_shared *sh = list->ctx;
+	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
+	struct mlx5_flow_dv_port_id_action_resource *resource;
+	uint32_t idx;
+
+	resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_PORT_ID], &idx);
+	if (!resource) {
+		rte_flow_error_set(ctx->error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				   "cannot allocate port_id action memory");
+		return NULL;
+	}
+	memcpy(resource, entry, sizeof(*resource));
+	resource->idx = idx;
+	return &resource->entry;
+}
+
+void
+flow_dv_port_id_clone_free_cb(struct mlx5_list *list,
+			  struct mlx5_list_entry *entry)
+{
+	struct mlx5_dev_ctx_shared *sh = list->ctx;
+	struct mlx5_flow_dv_port_id_action_resource *resource =
+			container_of(entry, typeof(*resource), entry);
+
+	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_PORT_ID], resource->idx);
+}
+
 /**
  * Find existing table port ID resource or create and register a new one.
  *
@@ -3912,6 +3945,39 @@  flow_dv_push_vlan_create_cb(struct mlx5_list *list,
 	return &resource->entry;
 }
 
+struct mlx5_list_entry *
+flow_dv_push_vlan_clone_cb(struct mlx5_list *list,
+			  struct mlx5_list_entry *entry __rte_unused,
+			  void *cb_ctx)
+{
+	struct mlx5_dev_ctx_shared *sh = list->ctx;
+	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
+	struct mlx5_flow_dv_push_vlan_action_resource *resource;
+	uint32_t idx;
+
+	resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_PUSH_VLAN], &idx);
+	if (!resource) {
+		rte_flow_error_set(ctx->error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				   "cannot allocate push_vlan action memory");
+		return NULL;
+	}
+	memcpy(resource, entry, sizeof(*resource));
+	resource->idx = idx;
+	return &resource->entry;
+}
+
+void
+flow_dv_push_vlan_clone_free_cb(struct mlx5_list *list,
+			    struct mlx5_list_entry *entry)
+{
+	struct mlx5_dev_ctx_shared *sh = list->ctx;
+	struct mlx5_flow_dv_push_vlan_action_resource *resource =
+			container_of(entry, typeof(*resource), entry);
+
+	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_PUSH_VLAN], resource->idx);
+}
+
 /**
  * Find existing push vlan resource or create and register a new one.
  *
@@ -9885,6 +9951,36 @@  flow_dv_matcher_enable(uint32_t *match_criteria)
 	return match_criteria_enable;
 }
 
+static struct mlx5_list_entry *
+flow_dv_matcher_clone_cb(struct mlx5_list *list __rte_unused,
+			 struct mlx5_list_entry *entry, void *cb_ctx)
+{
+	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
+	struct mlx5_flow_dv_matcher *ref = ctx->data;
+	struct mlx5_flow_tbl_data_entry *tbl = container_of(ref->tbl,
+							    typeof(*tbl), tbl);
+	struct mlx5_flow_dv_matcher *resource = mlx5_malloc(MLX5_MEM_ANY,
+							    sizeof(*resource),
+							    0, SOCKET_ID_ANY);
+
+	if (!resource) {
+		rte_flow_error_set(ctx->error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				   "cannot create matcher");
+		return NULL;
+	}
+	memcpy(resource, entry, sizeof(*resource));
+	resource->tbl = &tbl->tbl;
+	return &resource->entry;
+}
+
+static void
+flow_dv_matcher_clone_free_cb(struct mlx5_list *list __rte_unused,
+			     struct mlx5_list_entry *entry)
+{
+	mlx5_free(entry);
+}
+
 struct mlx5_hlist_entry *
 flow_dv_tbl_create_cb(struct mlx5_hlist *list, uint64_t key64, void *cb_ctx)
 {
@@ -9951,10 +10047,12 @@  flow_dv_tbl_create_cb(struct mlx5_hlist *list, uint64_t key64, void *cb_ctx)
 	MKSTR(matcher_name, "%s_%s_%u_%u_matcher_list",
 	      key.is_fdb ? "FDB" : "NIC", key.is_egress ? "egress" : "ingress",
 	      key.level, key.id);
-	mlx5_list_create(&tbl_data->matchers, matcher_name, 0, sh,
+	mlx5_list_create(&tbl_data->matchers, matcher_name, sh,
 			 flow_dv_matcher_create_cb,
 			 flow_dv_matcher_match_cb,
-			 flow_dv_matcher_remove_cb);
+			 flow_dv_matcher_remove_cb,
+			 flow_dv_matcher_clone_cb,
+			 flow_dv_matcher_clone_free_cb);
 	return &tbl_data->entry;
 }
 
@@ -10742,6 +10840,45 @@  flow_dv_sample_create_cb(struct mlx5_list *list __rte_unused,
 
 }
 
+struct mlx5_list_entry *
+flow_dv_sample_clone_cb(struct mlx5_list *list __rte_unused,
+			 struct mlx5_list_entry *entry __rte_unused,
+			 void *cb_ctx)
+{
+	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
+	struct rte_eth_dev *dev = ctx->dev;
+	struct mlx5_flow_dv_sample_resource *resource;
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	uint32_t idx = 0;
+
+	resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_SAMPLE], &idx);
+	if (!resource) {
+		rte_flow_error_set(ctx->error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "cannot allocate resource memory");
+		return NULL;
+	}
+	memcpy(resource, entry, sizeof(*resource));
+	resource->idx = idx;
+	resource->dev = dev;
+	return &resource->entry;
+}
+
+void
+flow_dv_sample_clone_free_cb(struct mlx5_list *list __rte_unused,
+			 struct mlx5_list_entry *entry)
+{
+	struct mlx5_flow_dv_sample_resource *resource =
+			container_of(entry, typeof(*resource), entry);
+	struct rte_eth_dev *dev = resource->dev;
+	struct mlx5_priv *priv = dev->data->dev_private;
+
+	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_SAMPLE],
+			resource->idx);
+}
+
 /**
  * Find existing sample resource or create and register a new one.
  *
@@ -10917,6 +11054,46 @@  flow_dv_dest_array_create_cb(struct mlx5_list *list __rte_unused,
 	return NULL;
 }
 
+struct mlx5_list_entry *
+flow_dv_dest_array_clone_cb(struct mlx5_list *list __rte_unused,
+			 struct mlx5_list_entry *entry __rte_unused,
+			 void *cb_ctx)
+{
+	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
+	struct rte_eth_dev *dev = ctx->dev;
+	struct mlx5_flow_dv_dest_array_resource *resource;
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	uint32_t res_idx = 0;
+	struct rte_flow_error *error = ctx->error;
+
+	resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_DEST_ARRAY],
+				      &res_idx);
+	if (!resource) {
+		rte_flow_error_set(error, ENOMEM,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "cannot allocate dest-array memory");
+		return NULL;
+	}
+	memcpy(resource, entry, sizeof(*resource));
+	resource->idx = res_idx;
+	resource->dev = dev;
+	return &resource->entry;
+}
+
+void
+flow_dv_dest_array_clone_free_cb(struct mlx5_list *list __rte_unused,
+			     struct mlx5_list_entry *entry)
+{
+	struct mlx5_flow_dv_dest_array_resource *resource =
+			container_of(entry, typeof(*resource), entry);
+	struct rte_eth_dev *dev = resource->dev;
+	struct mlx5_priv *priv = dev->data->dev_private;
+
+	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_DEST_ARRAY], resource->idx);
+}
+
 /**
  * Find existing destination array resource or create and register a new one.
  *
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 3dcc71d51d..5450ddd388 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -229,6 +229,11 @@  int mlx5_hrxq_match_cb(struct mlx5_list *list,
 		       void *cb_ctx);
 void mlx5_hrxq_remove_cb(struct mlx5_list *list,
 			 struct mlx5_list_entry *entry);
+struct mlx5_list_entry *mlx5_hrxq_clone_cb(struct mlx5_list *list,
+					   struct mlx5_list_entry *entry,
+					   void *cb_ctx __rte_unused);
+void mlx5_hrxq_clone_free_cb(struct mlx5_list *list,
+			     struct mlx5_list_entry *entry);
 uint32_t mlx5_hrxq_get(struct rte_eth_dev *dev,
 		       struct mlx5_flow_rss_desc *rss_desc);
 int mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hxrq_idx);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 8395332507..f8769da8dc 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1857,20 +1857,18 @@  mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues,
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_ind_table_obj *ind_tbl;
 
+	rte_rwlock_read_lock(&priv->ind_tbls_lock);
 	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
 		if ((ind_tbl->queues_n == queues_n) &&
 		    (memcmp(ind_tbl->queues, queues,
 			    ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
-		     == 0))
+		     == 0)) {
+			__atomic_fetch_add(&ind_tbl->refcnt, 1,
+					   __ATOMIC_RELAXED);
 			break;
+		}
 	}
-	if (ind_tbl) {
-		unsigned int i;
-
-		__atomic_fetch_add(&ind_tbl->refcnt, 1, __ATOMIC_RELAXED);
-		for (i = 0; i != ind_tbl->queues_n; ++i)
-			mlx5_rxq_get(dev, ind_tbl->queues[i]);
-	}
+	rte_rwlock_read_unlock(&priv->ind_tbls_lock);
 	return ind_tbl;
 }
 
@@ -1893,19 +1891,20 @@  mlx5_ind_table_obj_release(struct rte_eth_dev *dev,
 			   bool standalone)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i;
+	unsigned int i, ret;
 
-	if (__atomic_sub_fetch(&ind_tbl->refcnt, 1, __ATOMIC_RELAXED) == 0)
-		priv->obj_ops.ind_table_destroy(ind_tbl);
+	rte_rwlock_write_lock(&priv->ind_tbls_lock);
+	ret = __atomic_sub_fetch(&ind_tbl->refcnt, 1, __ATOMIC_RELAXED);
+	if (!ret && !standalone)
+		LIST_REMOVE(ind_tbl, next);
+	rte_rwlock_write_unlock(&priv->ind_tbls_lock);
+	if (ret)
+		return 1;
+	priv->obj_ops.ind_table_destroy(ind_tbl);
 	for (i = 0; i != ind_tbl->queues_n; ++i)
 		claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i]));
-	if (__atomic_load_n(&ind_tbl->refcnt, __ATOMIC_RELAXED) == 0) {
-		if (!standalone)
-			LIST_REMOVE(ind_tbl, next);
-		mlx5_free(ind_tbl);
-		return 0;
-	}
-	return 1;
+	mlx5_free(ind_tbl);
+	return 0;
 }
 
 /**
@@ -1924,12 +1923,14 @@  mlx5_ind_table_obj_verify(struct rte_eth_dev *dev)
 	struct mlx5_ind_table_obj *ind_tbl;
 	int ret = 0;
 
+	rte_rwlock_read_lock(&priv->ind_tbls_lock);
 	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
 		DRV_LOG(DEBUG,
 			"port %u indirection table obj %p still referenced",
 			dev->data->port_id, (void *)ind_tbl);
 		++ret;
 	}
+	rte_rwlock_read_unlock(&priv->ind_tbls_lock);
 	return ret;
 }
 
@@ -2015,8 +2016,11 @@  mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues,
 		mlx5_free(ind_tbl);
 		return NULL;
 	}
-	if (!standalone)
+	if (!standalone) {
+		rte_rwlock_write_lock(&priv->ind_tbls_lock);
 		LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
+		rte_rwlock_write_unlock(&priv->ind_tbls_lock);
+	}
 	return ind_tbl;
 }
 
@@ -2328,6 +2332,35 @@  mlx5_hrxq_create_cb(struct mlx5_list *list,
 	return hrxq ? &hrxq->entry : NULL;
 }
 
+struct mlx5_list_entry *
+mlx5_hrxq_clone_cb(struct mlx5_list *list,
+		    struct mlx5_list_entry *entry,
+		    void *cb_ctx __rte_unused)
+{
+	struct rte_eth_dev *dev = list->ctx;
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_hrxq *hrxq;
+	uint32_t hrxq_idx = 0;
+
+	hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx);
+	if (!hrxq)
+		return NULL;
+	memcpy(hrxq, entry, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN);
+	hrxq->idx = hrxq_idx;
+	return &hrxq->entry;
+}
+
+void
+mlx5_hrxq_clone_free_cb(struct mlx5_list *list,
+		    struct mlx5_list_entry *entry)
+{
+	struct rte_eth_dev *dev = list->ctx;
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry);
+
+	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx);
+}
+
 /**
  * Get an Rx Hash queue.
  *
diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index 4536ca807d..f505caed4e 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -9,57 +9,68 @@ 
 #include "mlx5_utils.h"
 
 
-/********************* MLX5 list ************************/
-
-static struct mlx5_list_entry *
-mlx5_list_default_create_cb(struct mlx5_list *list,
-			     struct mlx5_list_entry *entry __rte_unused,
-			     void *ctx __rte_unused)
-{
-	return mlx5_malloc(MLX5_MEM_ZERO, list->entry_sz, 0, SOCKET_ID_ANY);
-}
-
-static void
-mlx5_list_default_remove_cb(struct mlx5_list *list __rte_unused,
-			     struct mlx5_list_entry *entry)
-{
-	mlx5_free(entry);
-}
+/********************* mlx5 list ************************/
 
 int
-mlx5_list_create(struct mlx5_list *list, const char *name,
-		     uint32_t entry_size, void *ctx,
-		     mlx5_list_create_cb cb_create,
-		     mlx5_list_match_cb cb_match,
-		     mlx5_list_remove_cb cb_remove)
+mlx5_list_create(struct mlx5_list *list, const char *name, void *ctx,
+		 mlx5_list_create_cb cb_create,
+		 mlx5_list_match_cb cb_match,
+		 mlx5_list_remove_cb cb_remove,
+		 mlx5_list_clone_cb cb_clone,
+		 mlx5_list_clone_free_cb cb_clone_free)
 {
+	int i;
+
 	MLX5_ASSERT(list);
-	if (!cb_match || (!cb_create ^ !cb_remove))
+	if (!cb_match || !cb_create || !cb_remove || !cb_clone ||
+	    !cb_clone_free)
 		return -1;
 	if (name)
 		snprintf(list->name, sizeof(list->name), "%s", name);
-	list->entry_sz = entry_size;
 	list->ctx = ctx;
-	list->cb_create = cb_create ? cb_create : mlx5_list_default_create_cb;
+	list->cb_create = cb_create;
 	list->cb_match = cb_match;
-	list->cb_remove = cb_remove ? cb_remove : mlx5_list_default_remove_cb;
+	list->cb_remove = cb_remove;
+	list->cb_clone = cb_clone;
+	list->cb_clone_free = cb_clone_free;
 	rte_rwlock_init(&list->lock);
 	DRV_LOG(DEBUG, "mlx5 list %s initialized.", list->name);
-	LIST_INIT(&list->head);
+	for (i = 0; i <= RTE_MAX_LCORE; i++)
+		LIST_INIT(&list->cache[i].h);
 	return 0;
 }
 
 static struct mlx5_list_entry *
-__list_lookup(struct mlx5_list *list, void *ctx, bool reuse)
+__list_lookup(struct mlx5_list *list, int lcore_index, void *ctx, bool reuse)
 {
-	struct mlx5_list_entry *entry;
-
-	LIST_FOREACH(entry, &list->head, next) {
-		if (list->cb_match(list, entry, ctx))
+	struct mlx5_list_entry *entry = LIST_FIRST(&list->cache[lcore_index].h);
+	uint32_t ret;
+
+	while (entry != NULL) {
+		struct mlx5_list_entry *nentry = LIST_NEXT(entry, next);
+
+		if (list->cb_match(list, entry, ctx)) {
+			if (lcore_index < RTE_MAX_LCORE) {
+				ret = __atomic_load_n(&entry->ref_cnt,
+						      __ATOMIC_ACQUIRE);
+				if (ret == 0) {
+					LIST_REMOVE(entry, next);
+					list->cb_clone_free(list, entry);
+				}
+			}
+			entry = nentry;
 			continue;
+		}
 		if (reuse) {
-			__atomic_add_fetch(&entry->ref_cnt, 1,
-					   __ATOMIC_RELAXED);
+			ret = __atomic_add_fetch(&entry->ref_cnt, 1,
+						 __ATOMIC_ACQUIRE);
+			if (ret == 1u) {
+				/* Entry was invalid before, free it. */
+				LIST_REMOVE(entry, next);
+				list->cb_clone_free(list, entry);
+				entry = nentry;
+				continue;
+			}
 			DRV_LOG(DEBUG, "mlx5 list %s entry %p ref++: %u.",
 				list->name, (void *)entry, entry->ref_cnt);
 		}
@@ -68,96 +79,141 @@  __list_lookup(struct mlx5_list *list, void *ctx, bool reuse)
 	return entry;
 }
 
-static struct mlx5_list_entry *
-list_lookup(struct mlx5_list *list, void *ctx, bool reuse)
+struct mlx5_list_entry *
+mlx5_list_lookup(struct mlx5_list *list, void *ctx)
 {
-	struct mlx5_list_entry *entry;
+	struct mlx5_list_entry *entry = NULL;
+	int i;
 
 	rte_rwlock_read_lock(&list->lock);
-	entry = __list_lookup(list, ctx, reuse);
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		entry = __list_lookup(list, i, ctx, false);
+		if (entry)
+			break;
+	}
 	rte_rwlock_read_unlock(&list->lock);
 	return entry;
 }
 
-struct mlx5_list_entry *
-mlx5_list_lookup(struct mlx5_list *list, void *ctx)
+static struct mlx5_list_entry *
+mlx5_list_cache_insert(struct mlx5_list *list, int lcore_index,
+		       struct mlx5_list_entry *gentry, void *ctx)
 {
-	return list_lookup(list, ctx, false);
+	struct mlx5_list_entry *lentry = list->cb_clone(list, gentry, ctx);
+
+	if (!lentry)
+		return NULL;
+	lentry->ref_cnt = 1u;
+	lentry->gentry = gentry;
+	LIST_INSERT_HEAD(&list->cache[lcore_index].h, lentry, next);
+	return lentry;
 }
 
 struct mlx5_list_entry *
 mlx5_list_register(struct mlx5_list *list, void *ctx)
 {
-	struct mlx5_list_entry *entry;
+	struct mlx5_list_entry *entry, *lentry;
 	uint32_t prev_gen_cnt = 0;
+	int lcore_index = rte_lcore_index(rte_lcore_id());
 
 	MLX5_ASSERT(list);
-	prev_gen_cnt = __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE);
+	MLX5_ASSERT(lcore_index < RTE_MAX_LCORE);
+	if (unlikely(lcore_index == -1)) {
+		rte_errno = ENOTSUP;
+		return NULL;
+	}
+	/* Lookup in local cache. */
+	lentry = __list_lookup(list, lcore_index, ctx, true);
+	if (lentry)
+		return lentry;
 	/* Lookup with read lock, reuse if found. */
-	entry = list_lookup(list, ctx, true);
-	if (entry)
-		return entry;
+	rte_rwlock_read_lock(&list->lock);
+	entry = __list_lookup(list, RTE_MAX_LCORE, ctx, true);
+	if (entry == NULL) {
+		prev_gen_cnt = __atomic_load_n(&list->gen_cnt,
+					       __ATOMIC_ACQUIRE);
+		rte_rwlock_read_unlock(&list->lock);
+	} else {
+		rte_rwlock_read_unlock(&list->lock);
+		return mlx5_list_cache_insert(list, lcore_index, entry, ctx);
+	}
 	/* Not found, append with write lock - block read from other threads. */
 	rte_rwlock_write_lock(&list->lock);
 	/* If list changed by other threads before lock, search again. */
 	if (prev_gen_cnt != __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE)) {
 		/* Lookup and reuse w/o read lock. */
-		entry = __list_lookup(list, ctx, true);
-		if (entry)
-			goto done;
+		entry = __list_lookup(list, RTE_MAX_LCORE, ctx, true);
+		if (entry) {
+			rte_rwlock_write_unlock(&list->lock);
+			return mlx5_list_cache_insert(list, lcore_index, entry,
+						      ctx);
+		}
 	}
 	entry = list->cb_create(list, entry, ctx);
-	if (!entry) {
-		DRV_LOG(ERR, "Failed to init mlx5 list %s entry %p.",
-			list->name, (void *)entry);
-		goto done;
+	if (entry) {
+		lentry = mlx5_list_cache_insert(list, lcore_index, entry, ctx);
+		if (!lentry) {
+			list->cb_remove(list, entry);
+		} else {
+			entry->ref_cnt = 1u;
+			LIST_INSERT_HEAD(&list->cache[RTE_MAX_LCORE].h, entry,
+					 next);
+			__atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_RELEASE);
+			__atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
+			DRV_LOG(DEBUG, "mlx5 list %s entry %p new: %u.",
+				list->name, (void *)entry, entry->ref_cnt);
+		}
+
 	}
-	entry->ref_cnt = 1;
-	LIST_INSERT_HEAD(&list->head, entry, next);
-	__atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_RELEASE);
-	__atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
-	DRV_LOG(DEBUG, "mlx5 list %s entry %p new: %u.",
-		list->name, (void *)entry, entry->ref_cnt);
-done:
 	rte_rwlock_write_unlock(&list->lock);
-	return entry;
+	return lentry;
 }
 
 int
 mlx5_list_unregister(struct mlx5_list *list,
 		      struct mlx5_list_entry *entry)
 {
+	struct mlx5_list_entry *gentry = entry->gentry;
+
+	if (__atomic_sub_fetch(&entry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
+		return 1;
+	if (__atomic_sub_fetch(&gentry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
+		return 1;
 	rte_rwlock_write_lock(&list->lock);
-	MLX5_ASSERT(entry && entry->next.le_prev);
-	DRV_LOG(DEBUG, "mlx5 list %s entry %p ref--: %u.",
-		list->name, (void *)entry, entry->ref_cnt);
-	if (--entry->ref_cnt) {
+	if (__atomic_load_n(&gentry->ref_cnt, __ATOMIC_ACQUIRE) == 0) {
+		__atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_ACQUIRE);
+		__atomic_sub_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
+		LIST_REMOVE(gentry, next);
+		list->cb_remove(list, gentry);
 		rte_rwlock_write_unlock(&list->lock);
-		return 1;
+		DRV_LOG(DEBUG, "mlx5 list %s entry %p removed.",
+			list->name, (void *)gentry);
+		return 0;
 	}
-	__atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_ACQUIRE);
-	__atomic_sub_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
-	LIST_REMOVE(entry, next);
-	list->cb_remove(list, entry);
 	rte_rwlock_write_unlock(&list->lock);
-	DRV_LOG(DEBUG, "mlx5 list %s entry %p removed.",
-		list->name, (void *)entry);
-	return 0;
+	return 1;
 }
 
 void
 mlx5_list_destroy(struct mlx5_list *list)
 {
 	struct mlx5_list_entry *entry;
+	int i;
 
 	MLX5_ASSERT(list);
-	/* no LIST_FOREACH_SAFE, using while instead */
-	while (!LIST_EMPTY(&list->head)) {
-		entry = LIST_FIRST(&list->head);
-		LIST_REMOVE(entry, next);
-		list->cb_remove(list, entry);
-		DRV_LOG(DEBUG, "mlx5 list %s entry %p destroyed.",
-			list->name, (void *)entry);
+	for (i = 0; i <= RTE_MAX_LCORE; i++) {
+		while (!LIST_EMPTY(&list->cache[i].h)) {
+			entry = LIST_FIRST(&list->cache[i].h);
+			LIST_REMOVE(entry, next);
+			if (i == RTE_MAX_LCORE) {
+				list->cb_remove(list, entry);
+				DRV_LOG(DEBUG, "mlx5 list %s entry %p "
+					"destroyed.", list->name,
+					(void *)entry);
+			} else {
+				list->cb_clone_free(list, entry);
+			}
+		}
 	}
 	memset(list, 0, sizeof(*list));
 }
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index cfb3cb6180..9e3fe0cb85 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -310,9 +310,14 @@  struct mlx5_list;
  */
 struct mlx5_list_entry {
 	LIST_ENTRY(mlx5_list_entry) next; /* Entry pointers in the list. */
-	uint32_t ref_cnt; /* Reference count. */
+	uint32_t ref_cnt; /* 0 means, entry is invalid. */
+	struct mlx5_list_entry *gentry;
 };
 
+struct mlx5_list_cache {
+	LIST_HEAD(mlx5_list_head, mlx5_list_entry) h;
+} __rte_cache_aligned;
+
 /**
  * Type of callback function for entry removal.
  *
@@ -340,6 +345,13 @@  typedef void (*mlx5_list_remove_cb)(struct mlx5_list *list,
 typedef int (*mlx5_list_match_cb)(struct mlx5_list *list,
 				   struct mlx5_list_entry *entry, void *ctx);
 
+typedef struct mlx5_list_entry *(*mlx5_list_clone_cb)
+				 (struct mlx5_list *list,
+				  struct mlx5_list_entry *entry, void *ctx);
+
+typedef void (*mlx5_list_clone_free_cb)(struct mlx5_list *list,
+					 struct mlx5_list_entry *entry);
+
 /**
  * Type of function for user defined mlx5 list entry creation.
  *
@@ -376,15 +388,17 @@  typedef struct mlx5_list_entry *(*mlx5_list_create_cb)
  */
 struct mlx5_list {
 	char name[MLX5_NAME_SIZE]; /**< Name of the mlx5 list. */
-	uint32_t entry_sz; /**< Entry size, 0: use create callback. */
-	rte_rwlock_t lock; /* read/write lock. */
 	uint32_t gen_cnt; /* List modification will update generation count. */
 	uint32_t count; /* number of entries in list. */
 	void *ctx; /* user objects target to callback. */
+	rte_rwlock_t lock; /* read/write lock. */
 	mlx5_list_create_cb cb_create; /**< entry create callback. */
 	mlx5_list_match_cb cb_match; /**< entry match callback. */
 	mlx5_list_remove_cb cb_remove; /**< entry remove callback. */
-	LIST_HEAD(mlx5_list_head, mlx5_list_entry) head;
+	mlx5_list_clone_cb cb_clone; /**< entry clone callback. */
+	mlx5_list_clone_free_cb cb_clone_free;
+	struct mlx5_list_cache cache[RTE_MAX_LCORE + 1];
+	/* Lcore cache, last index is the global cache. */
 };
 
 /**
@@ -394,8 +408,6 @@  struct mlx5_list {
  *   Pointer to the hast list table.
  * @param name
  *   Name of the mlx5 list.
- * @param entry_size
- *   Entry size to allocate, 0 to allocate by creation callback.
  * @param ctx
  *   Pointer to the list context data.
  * @param cb_create
@@ -408,10 +420,12 @@  struct mlx5_list {
  *   0 on success, otherwise failure.
  */
 int mlx5_list_create(struct mlx5_list *list,
-			 const char *name, uint32_t entry_size, void *ctx,
+			 const char *name, void *ctx,
 			 mlx5_list_create_cb cb_create,
 			 mlx5_list_match_cb cb_match,
-			 mlx5_list_remove_cb cb_remove);
+			 mlx5_list_remove_cb cb_remove,
+			 mlx5_list_clone_cb cb_clone,
+			 mlx5_list_clone_free_cb cb_clone_free);
 
 /**
  * Search an entry matching the key.
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index bcf72dc6db..8ced98f0dc 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -610,9 +610,10 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			err = ENOTSUP;
 			goto error;
 	}
-	mlx5_list_create(&priv->hrxqs, "hrxq", 0, eth_dev,
+	mlx5_list_create(&priv->hrxqs, "hrxq", eth_dev,
 		mlx5_hrxq_create_cb, mlx5_hrxq_match_cb,
-		mlx5_hrxq_remove_cb);
+		mlx5_hrxq_remove_cb, mlx5_hrxq_clone_cb,
+		mlx5_hrxq_clone_free_cb);
 	/* Query availability of metadata reg_c's. */
 	err = mlx5_flow_discover_mreg_c(eth_dev);
 	if (err < 0) {