[v2,8/8] net/mlx5: optimize counter extend memory

Message ID 1603162949-150001-9-git-send-email-suanmingm@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: make counter thread safe |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Suanming Mou Oct. 20, 2020, 3:02 a.m. UTC
  Counter extend memory was allocated for non-batch counter to save the
extra DevX object. Currently, for non-batch counter which does not
support aging, entry in the generic counter struct is used only when
counter is free in free list, and bytes in the struct is used only when
counter is allocated in using.

In this case, the DevX object can be saved to the generic counter struct
union with entry memory when counter is allocated and union with bytes
when counter is free.
And pool type is also not needed as non-fallback mode only has generic
counter and aging counter, just a bit to indicate the pool is aged or
not will be enough.

This eliminates the counter extend info struct saves the memory.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c   | 22 +++++++++-----
 drivers/net/mlx5/mlx5.c            | 18 ++++++-----
 drivers/net/mlx5/mlx5.h            | 60 ++++++++++++++++++------------------
 drivers/net/mlx5/mlx5_flow.c       |  6 ++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 38 +++++++++++------------
 drivers/net/mlx5/mlx5_flow_verbs.c | 62 +++++++++++++++++---------------------
 6 files changed, 100 insertions(+), 106 deletions(-)
  

Patch

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 6e33b2b..457008e 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -573,24 +573,30 @@ 
 {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	bool fallback;
 
-	/* If devx is not supported or not DV mode, counters are not working. */
-	if (!priv->config.devx || !priv->config.dv_flow_en)
-		return;
 #ifndef HAVE_IBV_DEVX_ASYNC
-	priv->counter_fallback = 1;
+	fallback = true;
 #else
-	priv->counter_fallback = 0;
-	if (!priv->config.hca_attr.flow_counters_dump ||
+	fallback = false;
+	if (!priv->config.devx || !priv->config.dv_flow_en ||
+	    !priv->config.hca_attr.flow_counters_dump ||
 	    !(priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4) ||
 	    (mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP))
-		priv->counter_fallback = 1;
+		fallback = true;
 #endif
-	if (priv->counter_fallback)
+	if (fallback)
 		DRV_LOG(INFO, "Use fall-back DV counter management. Flow "
 			"counter dump:%d, bulk_alloc_bitmap:0x%hhx.",
 			priv->config.hca_attr.flow_counters_dump,
 			priv->config.hca_attr.flow_counter_bulk_alloc_bitmap);
+	/* Initialize fallback mode only on the port initializes sh. */
+	if (sh->refcnt == 1)
+		sh->cmng.counter_fallback = fallback;
+	else if (fallback != sh->cmng.counter_fallback)
+		DRV_LOG(WARNING, "Port %d in sh has different fallback mode "
+			"with others:%d.", PORT_ID(priv), fallback);
 #endif
 }
 
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index e805723..e4ce9a9 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -535,23 +535,25 @@  struct mlx5_flow_id_pool *
 	if (sh->cmng.pools) {
 		struct mlx5_flow_counter_pool *pool;
 		uint16_t n_valid = sh->cmng.n_valid;
+		bool fallback = sh->cmng.counter_fallback;
 
 		for (i = 0; i < n_valid; ++i) {
 			pool = sh->cmng.pools[i];
-			if (!IS_EXT_POOL(pool) && pool->min_dcs)
+			if (!fallback && pool->min_dcs)
 				claim_zero(mlx5_devx_cmd_destroy
 							       (pool->min_dcs));
 			for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
-				if (MLX5_POOL_GET_CNT(pool, j)->action)
+				struct mlx5_flow_counter *cnt =
+						MLX5_POOL_GET_CNT(pool, j);
+
+				if (cnt->action)
 					claim_zero
 					 (mlx5_glue->destroy_flow_action
-					  (MLX5_POOL_GET_CNT
-					  (pool, j)->action));
-				if (IS_EXT_POOL(pool) && MLX5_GET_POOL_CNT_EXT
-				    (pool, j)->dcs)
+					  (cnt->action));
+				if (fallback && MLX5_POOL_GET_CNT
+				    (pool, j)->dcs_when_free)
 					claim_zero(mlx5_devx_cmd_destroy
-						   (MLX5_GET_POOL_CNT_EXT
-						    (pool, j)->dcs));
+						   (cnt->dcs_when_free));
 			}
 			mlx5_free(pool);
 		}
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 9638ab2..fa69c66 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -277,18 +277,11 @@  struct mlx5_drop {
 #define IS_BATCH_CNT(cnt) (((cnt) & (MLX5_CNT_SHARED_OFFSET - 1)) >= \
 			   MLX5_CNT_BATCH_OFFSET)
 #define MLX5_CNT_SIZE (sizeof(struct mlx5_flow_counter))
-#define MLX5_CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext))
 #define MLX5_AGE_SIZE (sizeof(struct mlx5_age_param))
-#define MLX5_CNT_POOL_TYPE_EXT (1 << 0)
-#define MLX5_CNT_POOL_TYPE_AGE (1 << 1)
-
-#define IS_EXT_POOL(pool) (((pool)->type) & MLX5_CNT_POOL_TYPE_EXT)
-#define IS_AGE_POOL(pool) (((pool)->type) & MLX5_CNT_POOL_TYPE_AGE)
 
 #define MLX5_CNT_LEN(pool) \
 	(MLX5_CNT_SIZE + \
-	(IS_AGE_POOL(pool) ? MLX5_AGE_SIZE : 0) + \
-	(IS_EXT_POOL(pool) ? MLX5_CNTEXT_SIZE : 0))
+	((pool)->is_aged ? MLX5_AGE_SIZE : 0))
 #define MLX5_POOL_GET_CNT(pool, index) \
 	((struct mlx5_flow_counter *) \
 	((uint8_t *)((pool) + 1) + (index) * (MLX5_CNT_LEN(pool))))
@@ -303,12 +296,6 @@  struct mlx5_drop {
  */
 #define MLX5_MAKE_CNT_IDX(pi, offset) \
 	((pi) * MLX5_COUNTERS_PER_POOL + (offset) + 1)
-#define MLX5_CNT_TO_CNT_EXT(pool, cnt) \
-	((struct mlx5_flow_counter_ext *)\
-	((uint8_t *)((cnt) + 1) + \
-	(IS_AGE_POOL(pool) ? MLX5_AGE_SIZE : 0)))
-#define MLX5_GET_POOL_CNT_EXT(pool, offset) \
-	MLX5_CNT_TO_CNT_EXT(pool, MLX5_POOL_GET_CNT((pool), (offset)))
 #define MLX5_CNT_TO_AGE(cnt) \
 	((struct mlx5_age_param *)((cnt) + 1))
 /*
@@ -368,30 +355,41 @@  struct mlx5_flow_counter {
 		 * to the aging list. For shared counter, only when it is
 		 * released, the TAILQ entry memory will be used, at that
 		 * time, shared memory is not used anymore.
+		 *
+		 * Similarly to none-batch counter dcs, since it doesn't
+		 * support aging, while counter is allocated, the entry
+		 * memory is not used anymore. In this case, as bytes
+		 * memory is used only when counter is allocated, and
+		 * entry memory is used only when counter is free. The
+		 * dcs pointer can be saved to these two different place
+		 * at different stage. It will eliminate the individual
+		 * counter extend struct.
 		 */
 		TAILQ_ENTRY(mlx5_flow_counter) next;
 		/**< Pointer to the next flow counter structure. */
-		struct mlx5_flow_counter_shared shared_info;
-		/**< Shared counter information. */
+		struct {
+			struct mlx5_flow_counter_shared shared_info;
+			/**< Shared counter information. */
+			void *dcs_when_active;
+			/*
+			 * For non-batch mode, the dcs will be saved
+			 * here when the counter is free.
+			 */
+		};
 	};
 	union {
 		uint64_t hits; /**< Reset value of hits packets. */
 		struct mlx5_flow_counter_pool *pool; /**< Counter pool. */
 	};
-	uint64_t bytes; /**< Reset value of bytes. */
-	void *action; /**< Pointer to the dv action. */
-};
-
-/* Extend counters information for none batch fallback counters. */
-struct mlx5_flow_counter_ext {
 	union {
-#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
-		struct ibv_counter_set *cs;
-#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
-		struct ibv_counters *cs;
-#endif
-		struct mlx5_devx_obj *dcs; /**< Counter Devx object. */
+		uint64_t bytes; /**< Reset value of bytes. */
+		void *dcs_when_free;
+		/*
+		 * For non-batch mode, the dcs will be saved here
+		 * when the counter is free.
+		 */
 	};
+	void *action; /**< Pointer to the dv action. */
 };
 
 TAILQ_HEAD(mlx5_counters, mlx5_flow_counter);
@@ -407,8 +405,8 @@  struct mlx5_flow_counter_pool {
 	/* The devx object of the minimum counter ID. */
 	uint64_t time_of_last_age_check;
 	/* System time (from rte_rdtsc()) read in the last aging check. */
-	uint32_t index:29; /* Pool index in container. */
-	uint32_t type:2; /* Memory type behind the counter array. */
+	uint32_t index:30; /* Pool index in container. */
+	uint32_t is_aged:1; /* Pool with aging counter. */
 	volatile uint32_t query_gen:1; /* Query round. */
 	rte_spinlock_t sl; /* The pool lock. */
 	rte_spinlock_t csl; /* The pool counter free list lock. */
@@ -454,6 +452,7 @@  struct mlx5_flow_counter_mng {
 	uint16_t pool_index;
 	uint8_t query_thread_on;
 	bool relaxed_ordering;
+	bool counter_fallback; /* Use counter fallback management. */
 	LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
 	LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;
 };
@@ -826,7 +825,6 @@  struct mlx5_priv {
 	unsigned int master:1; /* Device is a E-Switch master. */
 	unsigned int dr_shared:1; /* DV/DR data is shared. */
 	unsigned int txpp_en:1; /* Tx packet pacing enabled. */
-	unsigned int counter_fallback:1; /* Use counter fallback management. */
 	unsigned int mtr_en:1; /* Whether support meter. */
 	unsigned int mtr_reg_share:1; /* Whether support meter REG_C share. */
 	unsigned int sampler_en:1; /* Whether support sampler. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index c79d02e..22fb4ee 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -6876,14 +6876,14 @@  struct mlx5_meter_domains_infos *
 	uint8_t query_gen = pool->query_gen ^ 1;
 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
 	enum mlx5_counter_type cnt_type =
-		IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE :
-				    MLX5_COUNTER_TYPE_ORIGIN;
+		pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
+				MLX5_COUNTER_TYPE_ORIGIN;
 
 	if (unlikely(status)) {
 		raw_to_free = pool->raw_hw;
 	} else {
 		raw_to_free = pool->raw;
-		if (IS_AGE_POOL(pool))
+		if (pool->is_aged)
 			mlx5_flow_aging_check(sh, pool);
 		rte_spinlock_lock(&pool->sl);
 		pool->raw = pool->raw_hw;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index d302a83..49d9636 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4170,7 +4170,7 @@  struct field_modify_info modify_tcp[] = {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	const struct rte_flow_action_age *age = action->conf;
 
-	if (!priv->config.devx || priv->counter_fallback)
+	if (!priv->config.devx || priv->sh->cmng.counter_fallback)
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 					  NULL,
@@ -4739,16 +4739,13 @@  struct field_modify_info modify_tcp[] = {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool = NULL;
 	struct mlx5_flow_counter *cnt;
-	struct mlx5_flow_counter_ext *cnt_ext = NULL;
 	int offset;
 
 	cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
 	MLX5_ASSERT(pool);
-	if (priv->counter_fallback) {
-		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
-		return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0,
+	if (priv->sh->cmng.counter_fallback)
+		return mlx5_devx_cmd_flow_counter_query(cnt->dcs_when_active, 0,
 					0, pkts, bytes, 0, NULL, NULL, 0);
-	}
 	rte_spinlock_lock(&pool->sl);
 	if (!pool->raw) {
 		*pkts = 0;
@@ -4784,11 +4781,10 @@  struct field_modify_info modify_tcp[] = {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool;
 	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
-	uint32_t fallback = priv->counter_fallback;
+	bool fallback = priv->sh->cmng.counter_fallback;
 	uint32_t size = sizeof(*pool);
 
 	size += MLX5_COUNTERS_PER_POOL * MLX5_CNT_SIZE;
-	size += (!fallback ? 0 : MLX5_COUNTERS_PER_POOL * MLX5_CNTEXT_SIZE);
 	size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * MLX5_AGE_SIZE);
 	pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
 	if (!pool) {
@@ -4796,8 +4792,7 @@  struct field_modify_info modify_tcp[] = {
 		return NULL;
 	}
 	pool->raw = NULL;
-	pool->type = 0;
-	pool->type |= (!age ? 0 :  MLX5_CNT_POOL_TYPE_AGE);
+	pool->is_aged = !!age;
 	pool->query_gen = 0;
 	pool->min_dcs = dcs;
 	rte_spinlock_init(&pool->sl);
@@ -4822,7 +4817,6 @@  struct field_modify_info modify_tcp[] = {
 		if (base > cmng->max_id)
 			cmng->max_id = base + MLX5_COUNTERS_PER_POOL - 1;
 		cmng->last_pool_idx = pool->index;
-		pool->type |= MLX5_CNT_POOL_TYPE_EXT;
 	}
 	rte_spinlock_unlock(&cmng->pool_update_sl);
 	return pool;
@@ -4855,7 +4849,7 @@  struct field_modify_info modify_tcp[] = {
 	struct mlx5_flow_counter *cnt;
 	enum mlx5_counter_type cnt_type =
 			age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN;
-	uint32_t fallback = priv->counter_fallback;
+	bool fallback = priv->sh->cmng.counter_fallback;
 	uint32_t i;
 
 	if (fallback) {
@@ -4874,7 +4868,7 @@  struct field_modify_info modify_tcp[] = {
 		i = dcs->id % MLX5_COUNTERS_PER_POOL;
 		cnt = MLX5_POOL_GET_CNT(pool, i);
 		cnt->pool = pool;
-		MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs;
+		cnt->dcs_when_free = dcs;
 		*cnt_free = cnt;
 		return pool;
 	}
@@ -4919,8 +4913,7 @@  struct field_modify_info modify_tcp[] = {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool = NULL;
 	struct mlx5_flow_counter *cnt_free = NULL;
-	struct mlx5_flow_counter_ext *cnt_ext = NULL;
-	uint32_t fallback = priv->counter_fallback;
+	bool fallback = priv->sh->cmng.counter_fallback;
 	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
 	enum mlx5_counter_type cnt_type =
 			age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN;
@@ -4940,7 +4933,7 @@  struct field_modify_info modify_tcp[] = {
 		goto err;
 	pool = cnt_free->pool;
 	if (fallback)
-		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
+		cnt_free->dcs_when_active = cnt_free->dcs_when_free;
 	/* Create a DV counter action only in the first time usage. */
 	if (!cnt_free->action) {
 		uint16_t offset;
@@ -4952,7 +4945,7 @@  struct field_modify_info modify_tcp[] = {
 			dcs = pool->min_dcs;
 		} else {
 			offset = 0;
-			dcs = cnt_ext->dcs;
+			dcs = cnt_free->dcs_when_free;
 		}
 		ret = mlx5_flow_os_create_flow_action_count(dcs->obj, offset,
 							    &cnt_free->action);
@@ -4974,6 +4967,8 @@  struct field_modify_info modify_tcp[] = {
 err:
 	if (cnt_free) {
 		cnt_free->pool = pool;
+		if (fallback)
+			cnt_free->dcs_when_free = cnt_free->dcs_when_active;
 		rte_spinlock_lock(&cmng->csl[cnt_type]);
 		TAILQ_INSERT_TAIL(&cmng->counters[cnt_type], cnt_free, next);
 		rte_spinlock_unlock(&cmng->csl[cnt_type]);
@@ -5117,7 +5112,7 @@  struct field_modify_info modify_tcp[] = {
 	if (IS_SHARED_CNT(counter) &&
 	    mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, cnt->shared_info.id))
 		return;
-	if (IS_AGE_POOL(pool))
+	if (pool->is_aged)
 		flow_dv_counter_remove_from_age(dev, counter, cnt);
 	cnt->pool = pool;
 	/*
@@ -5130,13 +5125,14 @@  struct field_modify_info modify_tcp[] = {
 	 * function both operate with the different list.
 	 *
 	 */
-	if (!priv->counter_fallback) {
+	if (!priv->sh->cmng.counter_fallback) {
 		rte_spinlock_lock(&pool->csl);
 		TAILQ_INSERT_TAIL(&pool->counters[pool->query_gen], cnt, next);
 		rte_spinlock_unlock(&pool->csl);
 	} else {
-		cnt_type = IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE :
-					       MLX5_COUNTER_TYPE_ORIGIN;
+		cnt->dcs_when_free = cnt->dcs_when_active;
+		cnt_type = pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
+					   MLX5_COUNTER_TYPE_ORIGIN;
 		rte_spinlock_lock(&priv->sh->cmng.csl[cnt_type]);
 		TAILQ_INSERT_TAIL(&priv->sh->cmng.counters[cnt_type],
 				  cnt, next);
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 0bb17b5..710622c 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -186,16 +186,16 @@ 
  */
 static int
 flow_verbs_counter_create(struct rte_eth_dev *dev,
-			  struct mlx5_flow_counter_ext *counter)
+			  struct mlx5_flow_counter *counter)
 {
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct ibv_context *ctx = priv->sh->ctx;
 	struct ibv_counter_set_init_attr init = {
-			 .counter_set_id = counter->id};
+			 .counter_set_id = counter->shared_info.id};
 
-	counter->cs = mlx5_glue->create_counter_set(ctx, &init);
-	if (!counter->cs) {
+	counter->dcs_when_free = mlx5_glue->create_counter_set(ctx, &init);
+	if (!counter->dcs_when_free) {
 		rte_errno = ENOTSUP;
 		return -ENOTSUP;
 	}
@@ -208,23 +208,23 @@ 
 	int ret;
 
 	memset(&attach, 0, sizeof(attach));
-	counter->cs = mlx5_glue->create_counters(ctx, &init);
-	if (!counter->cs) {
+	counter->dcs_when_free = mlx5_glue->create_counters(ctx, &init);
+	if (!counter->dcs_when_free) {
 		rte_errno = ENOTSUP;
 		return -ENOTSUP;
 	}
 	attach.counter_desc = IBV_COUNTER_PACKETS;
 	attach.index = 0;
-	ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
+	ret = mlx5_glue->attach_counters(counter->dcs_when_free, &attach, NULL);
 	if (!ret) {
 		attach.counter_desc = IBV_COUNTER_BYTES;
 		attach.index = 1;
 		ret = mlx5_glue->attach_counters
-					(counter->cs, &attach, NULL);
+					(counter->dcs_when_free, &attach, NULL);
 	}
 	if (ret) {
-		claim_zero(mlx5_glue->destroy_counters(counter->cs));
-		counter->cs = NULL;
+		claim_zero(mlx5_glue->destroy_counters(counter->dcs_when_free));
+		counter->dcs_when_free = NULL;
 		rte_errno = ret;
 		return -ret;
 	}
@@ -256,7 +256,6 @@ 
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
 	struct mlx5_flow_counter_pool *pool = NULL;
-	struct mlx5_flow_counter_ext *cnt_ext = NULL;
 	struct mlx5_flow_counter *cnt = NULL;
 	union mlx5_l3t_data data;
 	uint32_t n_valid = cmng->n_valid;
@@ -297,12 +296,10 @@ 
 			cmng->n += MLX5_CNT_CONTAINER_RESIZE;
 		}
 		/* Allocate memory for new pool*/
-		size = sizeof(*pool) + (sizeof(*cnt_ext) + sizeof(*cnt)) *
-		       MLX5_COUNTERS_PER_POOL;
+		size = sizeof(*pool) + sizeof(*cnt) * MLX5_COUNTERS_PER_POOL;
 		pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
 		if (!pool)
 			return 0;
-		pool->type |= MLX5_CNT_POOL_TYPE_EXT;
 		for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
 			cnt = MLX5_POOL_GET_CNT(pool, i);
 			TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
@@ -312,6 +309,7 @@ 
 		pool_idx = n_valid;
 		cmng->n_valid++;
 	}
+	TAILQ_REMOVE(&pool->counters[0], cnt, next);
 	i = MLX5_CNT_ARRAY_IDX(pool, cnt);
 	cnt_idx = MLX5_MAKE_CNT_IDX(pool_idx, i);
 	if (shared) {
@@ -321,15 +319,15 @@ 
 		cnt->shared_info.id = id;
 		cnt_idx |= MLX5_CNT_SHARED_OFFSET;
 	}
-	cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
-	cnt->hits = 0;
-	cnt->bytes = 0;
 	/* Create counter with Verbs. */
-	ret = flow_verbs_counter_create(dev, cnt_ext);
+	ret = flow_verbs_counter_create(dev, cnt);
 	if (!ret) {
-		TAILQ_REMOVE(&pool->counters[0], cnt, next);
+		cnt->dcs_when_active = cnt->dcs_when_free;
+		cnt->hits = 0;
+		cnt->bytes = 0;
 		return cnt_idx;
 	}
+	TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
 	/* Some error occurred in Verbs library. */
 	rte_errno = -ret;
 	return 0;
@@ -349,21 +347,18 @@ 
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool;
 	struct mlx5_flow_counter *cnt;
-	struct mlx5_flow_counter_ext *cnt_ext;
 
 	cnt = flow_verbs_counter_get_by_idx(dev, counter, &pool);
 	if (IS_SHARED_CNT(counter) &&
 	    mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, cnt->shared_info.id))
 		return;
-	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
-	claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
-	cnt_ext->cs = NULL;
+	claim_zero(mlx5_glue->destroy_counter_set
+			((struct ibv_counter_set *)cnt->dcs_when_active));
 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
-	claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
-	cnt_ext->cs = NULL;
+	claim_zero(mlx5_glue->destroy_counters
+				((struct ibv_counters *)cnt->dcs_when_active));
 #endif
-	(void)cnt_ext;
 	TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
 }
 
@@ -384,13 +379,12 @@ 
 		struct mlx5_flow_counter_pool *pool;
 		struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
 						(dev, flow->counter, &pool);
-		struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
-						(pool, cnt);
 		struct rte_flow_query_count *qc = data;
 		uint64_t counters[2] = {0, 0};
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
 		struct ibv_query_counter_set_attr query_cs_attr = {
-			.cs = cnt_ext->cs,
+			.dcs_when_free = (struct ibv_counter_set *)
+						cnt->dcs_when_active,
 			.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
 		};
 		struct ibv_counter_set_data query_out = {
@@ -401,7 +395,7 @@ 
 						       &query_out);
 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
 		int err = mlx5_glue->query_counters
-			       (cnt_ext->cs, counters,
+			((struct ibv_counters *)cnt->dcs_when_active, counters,
 				RTE_DIM(counters),
 				IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
 #endif
@@ -1188,7 +1182,6 @@ 
 	defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
 	struct mlx5_flow_counter_pool *pool;
 	struct mlx5_flow_counter *cnt = NULL;
-	struct mlx5_flow_counter_ext *cnt_ext;
 	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
 	struct ibv_flow_spec_counter_action counter = {
 		.type = IBV_FLOW_SPEC_ACTION_COUNT,
@@ -1208,13 +1201,12 @@ 
 	}
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
-	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
-	counter.counter_set_handle = cnt_ext->cs->handle;
+	counter.counter_set_handle =
+		((struct ibv_counter_set *)cnt->dcs_when_active)->handle;
 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
-	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
-	counter.counters = cnt_ext->cs;
+	counter.counters = (struct ibv_counters *)cnt->dcs_when_active;
 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
 #endif
 	return 0;