net/mlx5: reduce txq completion index memory loads

Message ID 1584372899-26732-1-git-send-email-akozyrev@mellanox.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: reduce txq completion index memory loads |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/Intel-compilation success Compilation OK

Commit Message

Alexander Kozyrev March 16, 2020, 3:34 p.m. UTC
  There is a non-optimal check if doorbel is needed present in the
mlx5_tx_handle_completion() function. Advancing a copy of the txq
consumer index and checking this copy with initial value causes
unnecessary memory loads and hurts the performance. It is better to
have a simple small boolean variable for this purpose. That allows
to eliminate all the excessive memory operations with the txq consumer
index and restore the performance of the tx completions.

Fixes: 1fd9af0 ("net/mlx5: update Tx error handling routine")
Cc: stable@dpdk.org

Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)
  

Comments

Raslan Darawsheh March 17, 2020, 1:10 p.m. UTC | #1
Hi,

> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@mellanox.com>
> Sent: Monday, March 16, 2020 5:35 PM
> To: dev@dpdk.org
> Cc: Raslan Darawsheh <rasland@mellanox.com>; Matan Azrad
> <matan@mellanox.com>; Slava Ovsiienko <viacheslavo@mellanox.com>;
> stable@dpdk.org
> Subject: [PATCH] net/mlx5: reduce txq completion index memory loads
> 
> There is a non-optimal check if doorbel is needed present in the
> mlx5_tx_handle_completion() function. Advancing a copy of the txq
> consumer index and checking this copy with initial value causes
> unnecessary memory loads and hurts the performance. It is better to
> have a simple small boolean variable for this purpose. That allows
> to eliminate all the excessive memory operations with the txq consumer
> index and restore the performance of the tx completions.
> 
> Fixes: 1fd9af0 ("net/mlx5: update Tx error handling routine")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++----------------
>  1 file changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 5ac63da..f3bf763 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -2160,7 +2160,7 @@ enum mlx5_txcmp_code {
>  {
>  	unsigned int count = MLX5_TX_COMP_MAX_CQE;
>  	volatile struct mlx5_cqe *last_cqe = NULL;
> -	uint16_t ci = txq->cq_ci;
> +	bool ring_doorbell = false;
>  	int ret;
> 
>  	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative
> value");
> @@ -2168,8 +2168,8 @@ enum mlx5_txcmp_code {
>  	do {
>  		volatile struct mlx5_cqe *cqe;
> 
> -		cqe = &txq->cqes[ci & txq->cqe_m];
> -		ret = check_cqe(cqe, txq->cqe_s, ci);
> +		cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
> +		ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
>  		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
>  			if (likely(ret != MLX5_CQE_STATUS_ERR)) {
>  				/* No new CQEs in completion queue. */
> @@ -2183,7 +2183,6 @@ enum mlx5_txcmp_code {
>  			 * here, before we might perform SQ reset.
>  			 */
>  			rte_wmb();
> -			txq->cq_ci = ci;
>  			ret = mlx5_tx_error_cqe_handle
>  				(txq, (volatile struct mlx5_err_cqe *)cqe);
>  			if (unlikely(ret < 0)) {
> @@ -2199,16 +2198,18 @@ enum mlx5_txcmp_code {
>  			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
>  			 * The send queue is supposed to be empty.
>  			 */
> -			++ci;
> -			txq->cq_pi = ci;
> +			ring_doorbell = true;
> +			++txq->cq_ci;
> +			txq->cq_pi = txq->cq_ci;
>  			last_cqe = NULL;
>  			continue;
>  		}
>  		/* Normal transmit completion. */
> -		MLX5_ASSERT(ci != txq->cq_pi);
> -		MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) ==
> +		MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
> +		MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16)
> ==
>  			    cqe->wqe_counter);
> -		++ci;
> +		ring_doorbell = true;
> +		++txq->cq_ci;
>  		last_cqe = cqe;
>  		/*
>  		 * We have to restrict the amount of processed CQEs
> @@ -2221,14 +2222,10 @@ enum mlx5_txcmp_code {
>  		if (likely(--count == 0))
>  			break;
>  	} while (true);
> -	if (likely(ci != txq->cq_ci)) {
> -		/*
> -		 * Update completion queue consuming index
> -		 * and ring doorbell to notify hardware.
> -		 */
> +	if (likely(ring_doorbell)) {
> +		/* Ring doorbell to notify hardware. */
>  		rte_compiler_barrier();
> -		txq->cq_ci = ci;
> -		*txq->cq_db = rte_cpu_to_be_32(ci);
> +		*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
>  		mlx5_tx_comp_flush(txq, last_cqe, olx);
>  	}
>  }
> --
> 1.8.3.1


Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5ac63da..f3bf763 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2160,7 +2160,7 @@  enum mlx5_txcmp_code {
 {
 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
 	volatile struct mlx5_cqe *last_cqe = NULL;
-	uint16_t ci = txq->cq_ci;
+	bool ring_doorbell = false;
 	int ret;
 
 	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
@@ -2168,8 +2168,8 @@  enum mlx5_txcmp_code {
 	do {
 		volatile struct mlx5_cqe *cqe;
 
-		cqe = &txq->cqes[ci & txq->cqe_m];
-		ret = check_cqe(cqe, txq->cqe_s, ci);
+		cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
+		ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
 			if (likely(ret != MLX5_CQE_STATUS_ERR)) {
 				/* No new CQEs in completion queue. */
@@ -2183,7 +2183,6 @@  enum mlx5_txcmp_code {
 			 * here, before we might perform SQ reset.
 			 */
 			rte_wmb();
-			txq->cq_ci = ci;
 			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
 			if (unlikely(ret < 0)) {
@@ -2199,16 +2198,18 @@  enum mlx5_txcmp_code {
 			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
 			 * The send queue is supposed to be empty.
 			 */
-			++ci;
-			txq->cq_pi = ci;
+			ring_doorbell = true;
+			++txq->cq_ci;
+			txq->cq_pi = txq->cq_ci;
 			last_cqe = NULL;
 			continue;
 		}
 		/* Normal transmit completion. */
-		MLX5_ASSERT(ci != txq->cq_pi);
-		MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) ==
+		MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
+		MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
 			    cqe->wqe_counter);
-		++ci;
+		ring_doorbell = true;
+		++txq->cq_ci;
 		last_cqe = cqe;
 		/*
 		 * We have to restrict the amount of processed CQEs
@@ -2221,14 +2222,10 @@  enum mlx5_txcmp_code {
 		if (likely(--count == 0))
 			break;
 	} while (true);
-	if (likely(ci != txq->cq_ci)) {
-		/*
-		 * Update completion queue consuming index
-		 * and ring doorbell to notify hardware.
-		 */
+	if (likely(ring_doorbell)) {
+		/* Ring doorbell to notify hardware. */
 		rte_compiler_barrier();
-		txq->cq_ci = ci;
-		*txq->cq_db = rte_cpu_to_be_32(ci);
+		*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
 		mlx5_tx_comp_flush(txq, last_cqe, olx);
 	}
 }