net/mlx5: fix counter query during port close

Message ID 20231109080751.1311817-1-suanmingm@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: fix counter query during port close |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/github-robot: build success github build: passed
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS

Commit Message

Suanming Mou Nov. 9, 2023, 8:07 a.m. UTC
  Currently, the counter query service thread queries all the ports
which belongs to the same sh. In case one of the ports is closing
the query may still be proceeded.

This commit adds the pool list in shared context to manage the pool
for avoiding query the port during port close.

Fixes: 4d368e1da3a4 ("net/mlx5: support flow counter action for HWS")

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/mlx5.c         |  3 +++
 drivers/net/mlx5/mlx5.h         |  2 ++
 drivers/net/mlx5/mlx5_hws_cnt.c | 36 ++++++++++++++++++++++-----------
 drivers/net/mlx5/mlx5_hws_cnt.h |  2 ++
 4 files changed, 31 insertions(+), 12 deletions(-)
  

Comments

Raslan Darawsheh Nov. 12, 2023, 2:25 p.m. UTC | #1
Hi,

> -----Original Message-----
> From: Suanming Mou <suanmingm@nvidia.com>
> Sent: Thursday, November 9, 2023 10:08 AM
> To: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>
> Cc: dev@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>
> Subject: [PATCH] net/mlx5: fix counter query during port close
> 
> Currently, the counter query service thread queries all the ports
> which belongs to the same sh. In case one of the ports is closing
> the query may still be proceeded.
> 
> This commit adds the pool list in shared context to manage the pool
> for avoiding query the port during port close.
> 
> Fixes: 4d368e1da3a4 ("net/mlx5: support flow counter action for HWS")
> 
> Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
> Acked-by: Matan Azrad <matan@nvidia.com>

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 2cf21a1921..d6cb0d1c8a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1814,6 +1814,9 @@  mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
 	LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next);
 	rte_spinlock_init(&sh->geneve_tlv_opt_sl);
 	mlx5_init_shared_dev_registers(sh);
+	/* Init counter pool list header and lock. */
+	LIST_INIT(&sh->hws_cpool_list);
+	rte_spinlock_init(&sh->cpool_lock);
 exit:
 	pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
 	return sh;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ee13ad6db2..f5eacb2c67 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1521,6 +1521,8 @@  struct mlx5_dev_ctx_shared {
 	uint32_t host_shaper_rate:8;
 	uint32_t lwm_triggered:1;
 	struct mlx5_hws_cnt_svc_mng *cnt_svc;
+	rte_spinlock_t cpool_lock;
+	LIST_HEAD(hws_cpool_list, mlx5_hws_cnt_pool) hws_cpool_list; /* Count pool list. */
 	struct mlx5_dev_registers registers;
 	struct mlx5_dev_shared_port port[]; /* per device port data array. */
 };
diff --git a/drivers/net/mlx5/mlx5_hws_cnt.c b/drivers/net/mlx5/mlx5_hws_cnt.c
index f556a9fbcc..a3bea94811 100644
--- a/drivers/net/mlx5/mlx5_hws_cnt.c
+++ b/drivers/net/mlx5/mlx5_hws_cnt.c
@@ -294,26 +294,25 @@  mlx5_hws_cnt_svc(void *opaque)
 		(struct mlx5_dev_ctx_shared *)opaque;
 	uint64_t interval =
 		(uint64_t)sh->cnt_svc->query_interval * (US_PER_S / MS_PER_S);
-	uint16_t port_id;
+	struct mlx5_hws_cnt_pool *hws_cpool;
 	uint64_t start_cycle, query_cycle = 0;
 	uint64_t query_us;
 	uint64_t sleep_us;
 
 	while (sh->cnt_svc->svc_running != 0) {
+		if (rte_spinlock_trylock(&sh->cpool_lock) == 0)
+			continue;
 		start_cycle = rte_rdtsc();
-		MLX5_ETH_FOREACH_DEV(port_id, sh->cdev->dev) {
-			struct mlx5_priv *opriv =
-				rte_eth_devices[port_id].data->dev_private;
-			if (opriv != NULL &&
-			    opriv->sh == sh &&
-			    opriv->hws_cpool != NULL) {
-				__mlx5_hws_cnt_svc(sh, opriv->hws_cpool);
-				if (opriv->hws_age_req)
-					mlx5_hws_aging_check(opriv,
-							     opriv->hws_cpool);
-			}
+		/* 200ms for 16M counters. */
+		LIST_FOREACH(hws_cpool, &sh->hws_cpool_list, next) {
+			struct mlx5_priv *opriv = hws_cpool->priv;
+
+			__mlx5_hws_cnt_svc(sh, hws_cpool);
+			if (opriv->hws_age_req)
+				mlx5_hws_aging_check(opriv, hws_cpool);
 		}
 		query_cycle = rte_rdtsc() - start_cycle;
+		rte_spinlock_unlock(&sh->cpool_lock);
 		query_us = query_cycle / (rte_get_timer_hz() / US_PER_S);
 		sleep_us = interval - query_us;
 		if (interval > query_us)
@@ -665,6 +664,10 @@  mlx5_hws_cnt_pool_create(struct rte_eth_dev *dev,
 	if (ret != 0)
 		goto error;
 	priv->sh->cnt_svc->refcnt++;
+	cpool->priv = priv;
+	rte_spinlock_lock(&priv->sh->cpool_lock);
+	LIST_INSERT_HEAD(&priv->sh->hws_cpool_list, cpool, next);
+	rte_spinlock_unlock(&priv->sh->cpool_lock);
 	return cpool;
 error:
 	mlx5_hws_cnt_pool_destroy(priv->sh, cpool);
@@ -677,6 +680,13 @@  mlx5_hws_cnt_pool_destroy(struct mlx5_dev_ctx_shared *sh,
 {
 	if (cpool == NULL)
 		return;
+	/*
+	 * 16M counter consumes 200ms to finish the query.
+	 * Maybe blocked for at most 200ms here.
+	 */
+	rte_spinlock_lock(&sh->cpool_lock);
+	LIST_REMOVE(cpool, next);
+	rte_spinlock_unlock(&sh->cpool_lock);
 	if (cpool->cfg.host_cpool == NULL) {
 		if (--sh->cnt_svc->refcnt == 0)
 			mlx5_hws_cnt_svc_deinit(sh);
@@ -1244,11 +1254,13 @@  mlx5_hws_age_pool_destroy(struct mlx5_priv *priv)
 {
 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
 
+	rte_spinlock_lock(&priv->sh->cpool_lock);
 	MLX5_ASSERT(priv->hws_age_req);
 	mlx5_hws_age_info_destroy(priv);
 	mlx5_ipool_destroy(age_info->ages_ipool);
 	age_info->ages_ipool = NULL;
 	priv->hws_age_req = 0;
+	rte_spinlock_unlock(&priv->sh->cpool_lock);
 }
 
 #endif
diff --git a/drivers/net/mlx5/mlx5_hws_cnt.h b/drivers/net/mlx5/mlx5_hws_cnt.h
index dcd5cec020..585b5a83ad 100644
--- a/drivers/net/mlx5/mlx5_hws_cnt.h
+++ b/drivers/net/mlx5/mlx5_hws_cnt.h
@@ -98,6 +98,7 @@  struct mlx5_hws_cnt_pool_caches {
 };
 
 struct mlx5_hws_cnt_pool {
+	LIST_ENTRY(mlx5_hws_cnt_pool) next;
 	struct mlx5_hws_cnt_pool_cfg cfg __rte_cache_aligned;
 	struct mlx5_hws_cnt_dcs_mng dcs_mng __rte_cache_aligned;
 	uint32_t query_gen __rte_cache_aligned;
@@ -108,6 +109,7 @@  struct mlx5_hws_cnt_pool {
 	struct rte_ring *wait_reset_list;
 	struct mlx5_hws_cnt_pool_caches *cache;
 	uint64_t time_of_last_age_check;
+	struct mlx5_priv *priv;
 } __rte_cache_aligned;
 
 /* HWS AGE status. */