net/mlx5: fix counter query during port close
Checks
Commit Message
Currently, the counter query service thread queries all the ports
which belongs to the same sh. In case one of the ports is closing
the query may still be proceeded.
This commit adds the pool list in shared context to manage the pool
for avoiding query the port during port close.
Fixes: 4d368e1da3a4 ("net/mlx5: support flow counter action for HWS")
Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
drivers/net/mlx5/mlx5.c | 3 +++
drivers/net/mlx5/mlx5.h | 2 ++
drivers/net/mlx5/mlx5_hws_cnt.c | 36 ++++++++++++++++++++++-----------
drivers/net/mlx5/mlx5_hws_cnt.h | 2 ++
4 files changed, 31 insertions(+), 12 deletions(-)
Comments
Hi,
> -----Original Message-----
> From: Suanming Mou <suanmingm@nvidia.com>
> Sent: Thursday, November 9, 2023 10:08 AM
> To: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>
> Cc: dev@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>
> Subject: [PATCH] net/mlx5: fix counter query during port close
>
> Currently, the counter query service thread queries all the ports
> which belongs to the same sh. In case one of the ports is closing
> the query may still be proceeded.
>
> This commit adds the pool list in shared context to manage the pool
> for avoiding query the port during port close.
>
> Fixes: 4d368e1da3a4 ("net/mlx5: support flow counter action for HWS")
>
> Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
> Acked-by: Matan Azrad <matan@nvidia.com>
Patch applied to next-net-mlx,
Kindest regards,
Raslan Darawsheh
@@ -1814,6 +1814,9 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next);
rte_spinlock_init(&sh->geneve_tlv_opt_sl);
mlx5_init_shared_dev_registers(sh);
+ /* Init counter pool list header and lock. */
+ LIST_INIT(&sh->hws_cpool_list);
+ rte_spinlock_init(&sh->cpool_lock);
exit:
pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
return sh;
@@ -1521,6 +1521,8 @@ struct mlx5_dev_ctx_shared {
uint32_t host_shaper_rate:8;
uint32_t lwm_triggered:1;
struct mlx5_hws_cnt_svc_mng *cnt_svc;
+ rte_spinlock_t cpool_lock;
+ LIST_HEAD(hws_cpool_list, mlx5_hws_cnt_pool) hws_cpool_list; /* Count pool list. */
struct mlx5_dev_registers registers;
struct mlx5_dev_shared_port port[]; /* per device port data array. */
};
@@ -294,26 +294,25 @@ mlx5_hws_cnt_svc(void *opaque)
(struct mlx5_dev_ctx_shared *)opaque;
uint64_t interval =
(uint64_t)sh->cnt_svc->query_interval * (US_PER_S / MS_PER_S);
- uint16_t port_id;
+ struct mlx5_hws_cnt_pool *hws_cpool;
uint64_t start_cycle, query_cycle = 0;
uint64_t query_us;
uint64_t sleep_us;
while (sh->cnt_svc->svc_running != 0) {
+ if (rte_spinlock_trylock(&sh->cpool_lock) == 0)
+ continue;
start_cycle = rte_rdtsc();
- MLX5_ETH_FOREACH_DEV(port_id, sh->cdev->dev) {
- struct mlx5_priv *opriv =
- rte_eth_devices[port_id].data->dev_private;
- if (opriv != NULL &&
- opriv->sh == sh &&
- opriv->hws_cpool != NULL) {
- __mlx5_hws_cnt_svc(sh, opriv->hws_cpool);
- if (opriv->hws_age_req)
- mlx5_hws_aging_check(opriv,
- opriv->hws_cpool);
- }
+ /* 200ms for 16M counters. */
+ LIST_FOREACH(hws_cpool, &sh->hws_cpool_list, next) {
+ struct mlx5_priv *opriv = hws_cpool->priv;
+
+ __mlx5_hws_cnt_svc(sh, hws_cpool);
+ if (opriv->hws_age_req)
+ mlx5_hws_aging_check(opriv, hws_cpool);
}
query_cycle = rte_rdtsc() - start_cycle;
+ rte_spinlock_unlock(&sh->cpool_lock);
query_us = query_cycle / (rte_get_timer_hz() / US_PER_S);
sleep_us = interval - query_us;
if (interval > query_us)
@@ -665,6 +664,10 @@ mlx5_hws_cnt_pool_create(struct rte_eth_dev *dev,
if (ret != 0)
goto error;
priv->sh->cnt_svc->refcnt++;
+ cpool->priv = priv;
+ rte_spinlock_lock(&priv->sh->cpool_lock);
+ LIST_INSERT_HEAD(&priv->sh->hws_cpool_list, cpool, next);
+ rte_spinlock_unlock(&priv->sh->cpool_lock);
return cpool;
error:
mlx5_hws_cnt_pool_destroy(priv->sh, cpool);
@@ -677,6 +680,13 @@ mlx5_hws_cnt_pool_destroy(struct mlx5_dev_ctx_shared *sh,
{
if (cpool == NULL)
return;
+ /*
+ * 16M counter consumes 200ms to finish the query.
+ * Maybe blocked for at most 200ms here.
+ */
+ rte_spinlock_lock(&sh->cpool_lock);
+ LIST_REMOVE(cpool, next);
+ rte_spinlock_unlock(&sh->cpool_lock);
if (cpool->cfg.host_cpool == NULL) {
if (--sh->cnt_svc->refcnt == 0)
mlx5_hws_cnt_svc_deinit(sh);
@@ -1244,11 +1254,13 @@ mlx5_hws_age_pool_destroy(struct mlx5_priv *priv)
{
struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
+ rte_spinlock_lock(&priv->sh->cpool_lock);
MLX5_ASSERT(priv->hws_age_req);
mlx5_hws_age_info_destroy(priv);
mlx5_ipool_destroy(age_info->ages_ipool);
age_info->ages_ipool = NULL;
priv->hws_age_req = 0;
+ rte_spinlock_unlock(&priv->sh->cpool_lock);
}
#endif
@@ -98,6 +98,7 @@ struct mlx5_hws_cnt_pool_caches {
};
struct mlx5_hws_cnt_pool {
+ LIST_ENTRY(mlx5_hws_cnt_pool) next;
struct mlx5_hws_cnt_pool_cfg cfg __rte_cache_aligned;
struct mlx5_hws_cnt_dcs_mng dcs_mng __rte_cache_aligned;
uint32_t query_gen __rte_cache_aligned;
@@ -108,6 +109,7 @@ struct mlx5_hws_cnt_pool {
struct rte_ring *wait_reset_list;
struct mlx5_hws_cnt_pool_caches *cache;
uint64_t time_of_last_age_check;
+ struct mlx5_priv *priv;
} __rte_cache_aligned;
/* HWS AGE status. */