[1/4] net/mlx5: fix Rx queue release

Message ID 1602743893-345348-1-git-send-email-matan@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series [1/4] net/mlx5: fix Rx queue release |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Matan Azrad Oct. 15, 2020, 6:38 a.m. UTC
  The HW objects of the Rx queue is created/destroyed in the device
start\stop stage while the ethdev configurations for the Rx queue
starts from the rx_queue_setup stage.
The PMD should save all the last configurations it got from the ethdev
and to apply them to the device in the dev_start operation.

Wrongly, last code added to mitigate the reference counters didn't take
into account the above rule and combined the configurations and HW
objects to be created\destroyed together.

This causes to memory leak and other memory issues.

Make sure the HW object is released in stop operation when there is no
any reference to it while the configurations stay saved.

Fixes: 24e4b650badc ("net/mlx5: mitigate Rx queue reference counters")

Signed-off-by: Matan Azrad <matan@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/mlx5_rxq.c  | 23 +++++++++++++----------
 drivers/net/mlx5/mlx5_rxtx.h |  2 +-
 2 files changed, 14 insertions(+), 11 deletions(-)
  

Comments

Raslan Darawsheh Oct. 18, 2020, 11:58 a.m. UTC | #1
Hi,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Matan Azrad
> Sent: Thursday, October 15, 2020 9:38 AM
> To: dev@dpdk.org
> Cc: Slava Ovsiienko <viacheslavo@nvidia.com>
> Subject: [dpdk-dev] [PATCH 1/4] net/mlx5: fix Rx queue release
> 
> The HW objects of the Rx queue is created/destroyed in the device
> start\stop stage while the ethdev configurations for the Rx queue
> starts from the rx_queue_setup stage.
> The PMD should save all the last configurations it got from the ethdev
> and to apply them to the device in the dev_start operation.
> 
> Wrongly, last code added to mitigate the reference counters didn't take
> into account the above rule and combined the configurations and HW
> objects to be created\destroyed together.
> 
> This causes to memory leak and other memory issues.
> 
> Make sure the HW object is released in stop operation when there is no
> any reference to it while the configurations stay saved.
> 
> Fixes: 24e4b650badc ("net/mlx5: mitigate Rx queue reference counters")
> 
> Signed-off-by: Matan Azrad <matan@nvidia.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> ---
>  drivers/net/mlx5/mlx5_rxq.c  | 23 +++++++++++++----------
>  drivers/net/mlx5/mlx5_rxtx.h |  2 +-
>  2 files changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> index f1d8373..e1783ba 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -447,7 +447,8 @@
>  		return -rte_errno;
>  	}
>  	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
> -	return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
> +	return (__atomic_load_n(&rxq_ctrl->refcnt, __ATOMIC_RELAXED)
> == 1);
> +
>  }
> 
>  /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */
> @@ -1541,7 +1542,7 @@ struct mlx5_rxq_ctrl *
>  	tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
>  #endif
>  	tmpl->rxq.idx = idx;
> -	rte_atomic32_inc(&tmpl->refcnt);
> +	__atomic_add_fetch(&tmpl->refcnt, 1, __ATOMIC_RELAXED);
>  	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
>  	return tmpl;
>  error:
> @@ -1588,7 +1589,7 @@ struct mlx5_rxq_ctrl *
>  	tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
>  	tmpl->hairpin_conf = *hairpin_conf;
>  	tmpl->rxq.idx = idx;
> -	rte_atomic32_inc(&tmpl->refcnt);
> +	__atomic_add_fetch(&tmpl->refcnt, 1, __ATOMIC_RELAXED);
>  	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
>  	return tmpl;
>  }
> @@ -1613,7 +1614,7 @@ struct mlx5_rxq_ctrl *
> 
>  	if (rxq_data) {
>  		rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
> -		rte_atomic32_inc(&rxq_ctrl->refcnt);
> +		__atomic_add_fetch(&rxq_ctrl->refcnt, 1,
> __ATOMIC_RELAXED);
>  	}
>  	return rxq_ctrl;
>  }
> @@ -1638,7 +1639,7 @@ struct mlx5_rxq_ctrl *
>  	if (!(*priv->rxqs)[idx])
>  		return 0;
>  	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
> -	if (!rte_atomic32_dec_and_test(&rxq_ctrl->refcnt))
> +	if (__atomic_sub_fetch(&rxq_ctrl->refcnt, 1, __ATOMIC_RELAXED) >
> 1)
>  		return 1;
>  	if (rxq_ctrl->obj) {
>  		priv->obj_ops.rxq_obj_release(rxq_ctrl->obj);
> @@ -1646,13 +1647,15 @@ struct mlx5_rxq_ctrl *
>  		mlx5_free(rxq_ctrl->obj);
>  		rxq_ctrl->obj = NULL;
>  	}
> -	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
> -		mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
> +	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
>  		rxq_free_elts(rxq_ctrl);
> +	if (!__atomic_load_n(&rxq_ctrl->refcnt, __ATOMIC_RELAXED)) {
> +		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
> +			mlx5_mr_btree_free(&rxq_ctrl-
> >rxq.mr_ctrl.cache_bh);
> +		LIST_REMOVE(rxq_ctrl, next);
> +		mlx5_free(rxq_ctrl);
> +		(*priv->rxqs)[idx] = NULL;
>  	}
> -	LIST_REMOVE(rxq_ctrl, next);
> -	mlx5_free(rxq_ctrl);
> -	(*priv->rxqs)[idx] = NULL;
>  	return 0;
>  }
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index 674296e..c3734e3 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -165,7 +165,7 @@ enum mlx5_rxq_type {
>  struct mlx5_rxq_ctrl {
>  	struct mlx5_rxq_data rxq; /* Data path structure. */
>  	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
> -	rte_atomic32_t refcnt; /* Reference counter. */
> +	uint32_t refcnt; /* Reference counter. */
>  	struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */
>  	struct mlx5_priv *priv; /* Back pointer to private data. */
>  	enum mlx5_rxq_type type; /* Rxq type. */
> --
> 1.8.3.1

Series applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index f1d8373..e1783ba 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -447,7 +447,8 @@ 
 		return -rte_errno;
 	}
 	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
-	return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
+	return (__atomic_load_n(&rxq_ctrl->refcnt, __ATOMIC_RELAXED) == 1);
+
 }
 
 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */
@@ -1541,7 +1542,7 @@  struct mlx5_rxq_ctrl *
 	tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
 #endif
 	tmpl->rxq.idx = idx;
-	rte_atomic32_inc(&tmpl->refcnt);
+	__atomic_add_fetch(&tmpl->refcnt, 1, __ATOMIC_RELAXED);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
 	return tmpl;
 error:
@@ -1588,7 +1589,7 @@  struct mlx5_rxq_ctrl *
 	tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
 	tmpl->hairpin_conf = *hairpin_conf;
 	tmpl->rxq.idx = idx;
-	rte_atomic32_inc(&tmpl->refcnt);
+	__atomic_add_fetch(&tmpl->refcnt, 1, __ATOMIC_RELAXED);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
 	return tmpl;
 }
@@ -1613,7 +1614,7 @@  struct mlx5_rxq_ctrl *
 
 	if (rxq_data) {
 		rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
-		rte_atomic32_inc(&rxq_ctrl->refcnt);
+		__atomic_add_fetch(&rxq_ctrl->refcnt, 1, __ATOMIC_RELAXED);
 	}
 	return rxq_ctrl;
 }
@@ -1638,7 +1639,7 @@  struct mlx5_rxq_ctrl *
 	if (!(*priv->rxqs)[idx])
 		return 0;
 	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
-	if (!rte_atomic32_dec_and_test(&rxq_ctrl->refcnt))
+	if (__atomic_sub_fetch(&rxq_ctrl->refcnt, 1, __ATOMIC_RELAXED) > 1)
 		return 1;
 	if (rxq_ctrl->obj) {
 		priv->obj_ops.rxq_obj_release(rxq_ctrl->obj);
@@ -1646,13 +1647,15 @@  struct mlx5_rxq_ctrl *
 		mlx5_free(rxq_ctrl->obj);
 		rxq_ctrl->obj = NULL;
 	}
-	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
-		mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
+	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
 		rxq_free_elts(rxq_ctrl);
+	if (!__atomic_load_n(&rxq_ctrl->refcnt, __ATOMIC_RELAXED)) {
+		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
+			mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
+		LIST_REMOVE(rxq_ctrl, next);
+		mlx5_free(rxq_ctrl);
+		(*priv->rxqs)[idx] = NULL;
 	}
-	LIST_REMOVE(rxq_ctrl, next);
-	mlx5_free(rxq_ctrl);
-	(*priv->rxqs)[idx] = NULL;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 674296e..c3734e3 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -165,7 +165,7 @@  enum mlx5_rxq_type {
 struct mlx5_rxq_ctrl {
 	struct mlx5_rxq_data rxq; /* Data path structure. */
 	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
-	rte_atomic32_t refcnt; /* Reference counter. */
+	uint32_t refcnt; /* Reference counter. */
 	struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	enum mlx5_rxq_type type; /* Rxq type. */