net/mlx5: fix aging queue doorbell ringing

Message ID b69e3eebf09c80a032e8f2c5a97aed468d8c0976.1604399810.git.dekelp@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: fix aging queue doorbell ringing |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-broadcom-Functional fail Functional Testing issues
ci/iol-testing success Testing PASS
ci/iol-intel-Functional fail Functional Testing issues
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS

Commit Message

Dekel Peled Nov. 3, 2020, 10:38 a.m. UTC
  Recent patch introduced a new SQ for ASO flow hit management.
This SQ uses two WQEBB's for each WQE.
The SQ producer index is 16 bits wide.

The enqueue loop posts new WQEs to the ASO SQ, using WQE index for
the SQ management.
This 16 bits index multiplied by 2 was wrongly used also for SQ
doorbell ringing.
The multiplication caused the SW index overlapping to be out of sync
with the hardware index, causing it to get stuck.

This patch separates the WQE index management from the doorbell index
management.
So, for each WQE index incrementation by 1, the doorbell index is
incremented by 2.

Fixes: 18c88cf29c29 ("net/mlx5: support flow hit action for aging")

Signed-off-by: Dekel Peled <dekelp@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h   | 21 +++++++++++++------
 drivers/net/mlx5/mlx5.h          |  3 ++-
 drivers/net/mlx5/mlx5_flow_age.c | 36 ++++++++++++++++++--------------
 3 files changed, 37 insertions(+), 23 deletions(-)
  

Comments

Raslan Darawsheh Nov. 8, 2020, 9:11 a.m. UTC | #1
Hi,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Dekel Peled
> Sent: Tuesday, November 3, 2020 12:38 PM
> To: Slava Ovsiienko <viacheslavo@nvidia.com>; Shahaf Shuler
> <shahafs@nvidia.com>; Matan Azrad <matan@nvidia.com>
> Cc: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] net/mlx5: fix aging queue doorbell ringing
> 
> Recent patch introduced a new SQ for ASO flow hit management.
> This SQ uses two WQEBB's for each WQE.
> The SQ producer index is 16 bits wide.
> 
> The enqueue loop posts new WQEs to the ASO SQ, using WQE index for
> the SQ management.
> This 16 bits index multiplied by 2 was wrongly used also for SQ
> doorbell ringing.
> The multiplication caused the SW index overlapping to be out of sync
> with the hardware index, causing it to get stuck.
> 
> This patch separates the WQE index management from the doorbell index
> management.
> So, for each WQE index incrementation by 1, the doorbell index is
> incremented by 2.
> 
> Fixes: 18c88cf29c29 ("net/mlx5: support flow hit action for aging")
> 
> Signed-off-by: Dekel Peled <dekelp@nvidia.com>
> Acked-by: Matan Azrad <matan@nvidia.com>
> ---
>  drivers/common/mlx5/mlx5_prm.h   | 21 +++++++++++++------
>  drivers/net/mlx5/mlx5.h          |  3 ++-
>  drivers/net/mlx5/mlx5_flow_age.c | 36 ++++++++++++++++++--------------
>  3 files changed, 37 insertions(+), 23 deletions(-)
> 

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 10f9b18d1b..58d180486e 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -293,6 +293,15 @@  struct mlx5_wqe_cseg {
 	uint32_t misc;
 } __rte_packed __rte_aligned(MLX5_WSEG_SIZE);
 
+/*
+ * WQE CSEG opcode field size is 32 bits, divided:
+ * Bits 31:24 OPC_MOD
+ * Bits 23:8 wqe_index
+ * Bits 7:0 OPCODE
+ */
+#define WQE_CSEG_OPC_MOD_OFFSET		24
+#define WQE_CSEG_WQE_INDEX_OFFSET	 8
+
 /* Header of data segment. Minimal size Data Segment */
 struct mlx5_wqe_dseg {
 	uint32_t bcount;
@@ -2359,12 +2368,12 @@  struct mlx5_ifc_create_flow_hit_aso_in_bits {
 	struct mlx5_ifc_flow_hit_aso_bits flow_hit_aso;
 };
 
-enum mlx5_access_aso_op_mod {
-	ASO_OP_MOD_IPSEC = 0x0,
-	ASO_OP_MOD_CONNECTION_TRACKING = 0x1,
-	ASO_OP_MOD_POLICER = 0x2,
-	ASO_OP_MOD_RACE_AVOIDANCE = 0x3,
-	ASO_OP_MOD_FLOW_HIT = 0x4,
+enum mlx5_access_aso_opc_mod {
+	ASO_OPC_MOD_IPSEC = 0x0,
+	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
+	ASO_OPC_MOD_POLICER = 0x2,
+	ASO_OPC_MOD_RACE_AVOIDANCE = 0x3,
+	ASO_OPC_MOD_FLOW_HIT = 0x4,
 };
 
 #define ASO_CSEG_DATA_MASK_MODE_OFFSET	30
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 63d263384b..83beee3610 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -516,7 +516,8 @@  struct mlx5_aso_sq {
 	volatile uint64_t *uar_addr;
 	struct mlx5_aso_devx_mr mr;
 	uint16_t pi;
-	uint16_t ci;
+	uint32_t head;
+	uint32_t tail;
 	uint32_t sqn;
 	struct mlx5_aso_sq_elem elts[1 << MLX5_ASO_QUEUE_LOG_DESC];
 	uint16_t next; /* Pool index of the next pool to query. */
diff --git a/drivers/net/mlx5/mlx5_flow_age.c b/drivers/net/mlx5/mlx5_flow_age.c
index 0b7fa46e2a..829094d9cf 100644
--- a/drivers/net/mlx5/mlx5_flow_age.c
+++ b/drivers/net/mlx5/mlx5_flow_age.c
@@ -321,8 +321,9 @@  mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket,
 		rte_errno  = ENOMEM;
 		goto error;
 	}
-	sq->ci = 0;
 	sq->pi = 0;
+	sq->head = 0;
+	sq->tail = 0;
 	sq->sqn = sq->sq->id;
 	sq->db_rec = RTE_PTR_ADD(sq->umem_buf, (uintptr_t)(wq_attr->dbr_addr));
 	sq->uar_addr = (volatile uint64_t *)((uint8_t *)sq->uar_obj->base_addr +
@@ -382,20 +383,20 @@  mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
 	uint16_t size = 1 << sq->log_desc_n;
 	uint16_t mask = size - 1;
 	uint16_t max;
-	uint16_t start_pi = sq->pi;
+	uint16_t start_head = sq->head;
 
-	max = RTE_MIN(size - (uint16_t)(sq->pi - sq->ci), n - sq->next);
+	max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
 	if (unlikely(!max))
 		return 0;
-	sq->elts[start_pi & mask].burst_size = max;
+	sq->elts[start_head & mask].burst_size = max;
 	do {
-		wqe = &sq->wqes[sq->pi & mask];
-		rte_prefetch0(&sq->wqes[(sq->pi + 1) & mask]);
+		wqe = &sq->wqes[sq->head & mask];
+		rte_prefetch0(&sq->wqes[(sq->head + 1) & mask]);
 		/* Fill next WQE. */
 		rte_spinlock_lock(&mng->resize_sl);
 		pool = mng->pools[sq->next];
 		rte_spinlock_unlock(&mng->resize_sl);
-		sq->elts[sq->pi & mask].pool = pool;
+		sq->elts[sq->head & mask].pool = pool;
 		wqe->general_cseg.misc =
 				rte_cpu_to_be_32(((struct mlx5_devx_obj *)
 						 (pool->flow_hit_aso_obj))->id);
@@ -403,20 +404,23 @@  mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
 							 MLX5_COMP_MODE_OFFSET);
 		wqe->general_cseg.opcode = rte_cpu_to_be_32
 						(MLX5_OPCODE_ACCESS_ASO |
-						 ASO_OP_MOD_FLOW_HIT << 24 |
-						 sq->pi << 9);
-		sq->pi++;
+						 (ASO_OPC_MOD_FLOW_HIT <<
+						  WQE_CSEG_OPC_MOD_OFFSET) |
+						 (sq->pi <<
+						  WQE_CSEG_WQE_INDEX_OFFSET));
+		sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
+		sq->head++;
 		sq->next++;
 		max--;
 	} while (max);
 	wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
 							 MLX5_COMP_MODE_OFFSET);
 	rte_io_wmb();
-	sq->db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi << 1);
+	sq->db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
 	rte_wmb();
 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
 	rte_wmb();
-	return sq->elts[start_pi & mask].burst_size;
+	return sq->elts[start_head & mask].burst_size;
 }
 
 /**
@@ -483,7 +487,7 @@  mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
 	uint16_t i;
 
 	for (i = 0; i < n; ++i) {
-		uint16_t idx = (sq->ci + i) & mask;
+		uint16_t idx = (sq->tail + i) & mask;
 		struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
 		uint64_t diff = curr - pool->time_of_last_age_check;
 		uint64_t *addr = sq->mr.buf;
@@ -559,7 +563,7 @@  mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
 	const unsigned int mask = cq_size - 1;
 	uint32_t idx;
 	uint32_t next_idx = cq->cq_ci & mask;
-	const uint16_t max = (uint16_t)(sq->pi - sq->ci);
+	const uint16_t max = (uint16_t)(sq->head - sq->tail);
 	uint16_t i = 0;
 	int ret;
 	if (unlikely(!max))
@@ -580,13 +584,13 @@  mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
 				break;
 			mlx5_aso_cqe_err_handle(sq);
 		} else {
-			i += sq->elts[(sq->ci + i) & mask].burst_size;
+			i += sq->elts[(sq->tail + i) & mask].burst_size;
 		}
 		cq->cq_ci++;
 	} while (1);
 	if (likely(i)) {
 		mlx5_aso_age_action_update(sh, i);
-		sq->ci += i;
+		sq->tail += i;
 		rte_io_wmb();
 		cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
 	}