net/mlx5: ignore non-critical syndromes for Rx queue

Message ID 20230127032243.3990099-1-akozyrev@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: ignore non-critical syndromes for Rx queue |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation warning apply patch failure
ci/iol-testing warning apply patch failure
ci/Intel-compilation warning apply issues

Commit Message

Alexander Kozyrev Jan. 27, 2023, 3:22 a.m. UTC
  For non-fatal syndromes like LOCAL_LENGTH_ERR, the Rx queue reset
shouldn't be triggered. Rx queue could continue with the next packets
without any recovery. Only three syndromes warrant Rx queue reset:
LOCAL_QP_OP_ERR, LOCAL_PROT_ERR and WR_FLUSH_ERR.
Do not initiate a Rx queue reset in any other cases.
Skip all non-critical error CQEs and continue with packet processing.

Fixes: 88c0733535 ("net/mlx5: extend Rx completion with error handling")
Cc: stable@dpdk.org

Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
---
 drivers/net/mlx5/mlx5_rx.c       | 123 ++++++++++++++++++++++++-------
 drivers/net/mlx5/mlx5_rx.h       |   5 +-
 drivers/net/mlx5/mlx5_rxtx_vec.c |   3 +-
 3 files changed, 102 insertions(+), 29 deletions(-)
  

Comments

Matan Azrad Feb. 6, 2023, 3:05 p.m. UTC | #1
From: Alexander Kozyrev
> For non-fatal syndromes like LOCAL_LENGTH_ERR, the Rx queue reset
> shouldn't be triggered. Rx queue could continue with the next packets
> without any recovery. Only three syndromes warrant Rx queue reset:
> LOCAL_QP_OP_ERR, LOCAL_PROT_ERR and WR_FLUSH_ERR.
> Do not initiate a Rx queue reset in any other cases.
> Skip all non-critical error CQEs and continue with packet processing.
> 
> Fixes: 88c0733535 ("net/mlx5: extend Rx completion with error handling")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
  
Raslan Darawsheh Feb. 12, 2023, 1:37 p.m. UTC | #2
Hi,

> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@nvidia.com>
> Sent: Friday, January 27, 2023 5:23 AM
> To: dev@dpdk.org
> Cc: stable@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>; Matan
> Azrad <matan@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> Subject: [PATCH] net/mlx5: ignore non-critical syndromes for Rx queue
> 
> For non-fatal syndromes like LOCAL_LENGTH_ERR, the Rx queue reset
> shouldn't be triggered. Rx queue could continue with the next packets
> without any recovery. Only three syndromes warrant Rx queue reset:
> LOCAL_QP_OP_ERR, LOCAL_PROT_ERR and WR_FLUSH_ERR.
> Do not initiate a Rx queue reset in any other cases.
> Skip all non-critical error CQEs and continue with packet processing.
> 
> Fixes: 88c0733535 ("net/mlx5: extend Rx completion with error handling")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 7612d15f01..99a08ef5f1 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -39,7 +39,8 @@  rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 
 static __rte_always_inline int
 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
-		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
+		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe,
+		 uint16_t *skip_cnt, bool mprq);
 
 static __rte_always_inline uint32_t
 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
@@ -408,10 +409,14 @@  mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 }
 
+#define MLX5_ERROR_CQE_MASK 0x40000000
 /* Must be negative. */
-#define MLX5_ERROR_CQE_RET (-1)
+#define MLX5_REGULAR_ERROR_CQE_RET (-5)
+#define MLX5_CRITICAL_ERROR_CQE_RET (-4)
 /* Must not be negative. */
 #define MLX5_RECOVERY_ERROR_RET 0
+#define MLX5_RECOVERY_IGNORE_RET 1
+#define MLX5_RECOVERY_COMPLETED_RET 2
 
 /**
  * Handle a Rx error.
@@ -429,10 +434,14 @@  mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
  *   Number of CQEs to check for an error.
  *
  * @return
- *   MLX5_RECOVERY_ERROR_RET in case of recovery error, otherwise the CQE status.
+ *   MLX5_RECOVERY_ERROR_RET in case of recovery error,
+ *   MLX5_RECOVERY_IGNORE_RET in case of non-critical error syndrome,
+ *   MLX5_RECOVERY_COMPLETED_RET in case of recovery is completed,
+ *   otherwise the CQE status after ignored error syndrome or queue reset.
  */
 int
-mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
+mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
+		   uint16_t err_n, uint16_t *skip_cnt)
 {
 	const uint16_t cqe_n = 1 << rxq->cqe_n;
 	const uint16_t cqe_mask = cqe_n - 1;
@@ -447,14 +456,35 @@  mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
 		.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
 	};
 	struct mlx5_mp_arg_queue_state_modify sm;
+	bool critical_syndrome = false;
 	int ret, i;
 
 	switch (rxq->err_state) {
+	case MLX5_RXQ_ERR_STATE_IGNORE:
+		ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci - vec);
+		if (ret != MLX5_CQE_STATUS_ERR) {
+			rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+			return ret;
+		}
+		/* Fall-through */
 	case MLX5_RXQ_ERR_STATE_NO_ERROR:
 		for (i = 0; i < (int)err_n; i++) {
 			u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & cqe_mask];
-			if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR)
+			if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR) {
+				if (u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+				    u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
+				    u.err_cqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR)
+					critical_syndrome = true;
 				break;
+			}
+		}
+		if (!critical_syndrome) {
+			if (rxq->err_state == MLX5_RXQ_ERR_STATE_NO_ERROR) {
+				*skip_cnt = 0;
+				if (i == err_n)
+					rxq->err_state = MLX5_RXQ_ERR_STATE_IGNORE;
+			}
+			return MLX5_RECOVERY_IGNORE_RET;
 		}
 		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
 		/* Fall-through */
@@ -546,6 +576,7 @@  mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
 			}
 			mlx5_rxq_initialize(rxq);
 			rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+			return MLX5_RECOVERY_COMPLETED_RET;
 		}
 		return ret;
 	default:
@@ -565,19 +596,24 @@  mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
  * @param[out] mcqe
  *   Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
  *   written.
- *
+ * @param[out] skip_cnt
+ *   Number of packets skipped due to recoverable errors.
+ * @param mprq
+ *   Indication if it is called from MPRQ.
  * @return
- *   0 in case of empty CQE, MLX5_ERROR_CQE_RET in case of error CQE,
- *   otherwise the packet size in regular RxQ, and striding byte
- *   count format in mprq case.
+ *   0 in case of empty CQE, MLX5_REGULAR_ERROR_CQE_RET in case of error CQE,
+ *   MLX5_CRITICAL_ERROR_CQE_RET in case of error CQE lead to Rx queue reset,
+ *   otherwise the packet size in regular RxQ,
+ *   and striding byte count format in mprq case.
  */
 static inline int
 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
-		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
+		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe,
+		 uint16_t *skip_cnt, bool mprq)
 {
 	struct rxq_zip *zip = &rxq->zip;
 	uint16_t cqe_n = cqe_cnt + 1;
-	int len;
+	int len = 0, ret = 0;
 	uint16_t idx, end;
 
 	do {
@@ -626,7 +662,6 @@  mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 * compressed.
 		 */
 		} else {
-			int ret;
 			int8_t op_own;
 			uint32_t cq_ci;
 
@@ -634,10 +669,12 @@  mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 			if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
 				if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
 					     rxq->err_state)) {
-					ret = mlx5_rx_err_handle(rxq, 0, 1);
-					if (ret == MLX5_CQE_STATUS_HW_OWN ||
-					    ret == MLX5_RECOVERY_ERROR_RET)
-						return MLX5_ERROR_CQE_RET;
+					ret = mlx5_rx_err_handle(rxq, 0, 1, skip_cnt);
+					if (ret == MLX5_CQE_STATUS_HW_OWN)
+						return MLX5_ERROR_CQE_MASK;
+					if (ret == MLX5_RECOVERY_ERROR_RET ||
+						ret == MLX5_RECOVERY_COMPLETED_RET)
+						return MLX5_CRITICAL_ERROR_CQE_RET;
 				} else {
 					return 0;
 				}
@@ -690,8 +727,15 @@  mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 			}
 		}
 		if (unlikely(rxq->err_state)) {
+			if (rxq->err_state == MLX5_RXQ_ERR_STATE_IGNORE &&
+			    ret == MLX5_CQE_STATUS_SW_OWN) {
+				rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+				return len & MLX5_ERROR_CQE_MASK;
+			}
 			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
 			++rxq->stats.idropped;
+			(*skip_cnt) += mprq ? (len & MLX5_MPRQ_STRIDE_NUM_MASK) >>
+				MLX5_MPRQ_STRIDE_NUM_SHIFT : 1;
 		} else {
 			return len;
 		}
@@ -843,6 +887,7 @@  mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	int len = 0; /* keep its value across iterations. */
 
 	while (pkts_n) {
+		uint16_t skip_cnt;
 		unsigned int idx = rq_ci & wqe_cnt;
 		volatile struct mlx5_wqe_data_seg *wqe =
 			&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
@@ -881,11 +926,24 @@  mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		}
 		if (!pkt) {
 			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
-			len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
-			if (len <= 0) {
-				rte_mbuf_raw_free(rep);
-				if (unlikely(len == MLX5_ERROR_CQE_RET))
+			len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe, &skip_cnt, false);
+			if (unlikely(len & MLX5_ERROR_CQE_MASK)) {
+				if (len == MLX5_CRITICAL_ERROR_CQE_RET) {
+					rte_mbuf_raw_free(rep);
 					rq_ci = rxq->rq_ci << sges_n;
+					break;
+				}
+				rq_ci >>= sges_n;
+				rq_ci += skip_cnt;
+				rq_ci <<= sges_n;
+				idx = rq_ci & wqe_cnt;
+				wqe = &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
+				seg = (*rxq->elts)[idx];
+				cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
+				len = len & ~MLX5_ERROR_CQE_MASK;
+			}
+			if (len == 0) {
+				rte_mbuf_raw_free(rep);
 				break;
 			}
 			pkt = seg;
@@ -1095,6 +1153,7 @@  mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		uint16_t strd_cnt;
 		uint16_t strd_idx;
 		uint32_t byte_cnt;
+		uint16_t skip_cnt;
 		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
 		enum mlx5_rqx_code rxq_code;
 
@@ -1107,14 +1166,26 @@  mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
 		}
 		cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
-		ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
+		ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe, &skip_cnt, true);
+		if (unlikely(ret & MLX5_ERROR_CQE_MASK)) {
+			if (ret == MLX5_CRITICAL_ERROR_CQE_RET) {
+				rq_ci = rxq->rq_ci;
+				consumed_strd = rxq->consumed_strd;
+				break;
+			}
+			consumed_strd += skip_cnt;
+			while (consumed_strd >= strd_n) {
+				/* Replace WQE if the buffer is still in use. */
+				mprq_buf_replace(rxq, rq_ci & wq_mask);
+				/* Advance to the next WQE. */
+				consumed_strd -= strd_n;
+				++rq_ci;
+				buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
+			}
+			cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
+		}
 		if (ret == 0)
 			break;
-		if (unlikely(ret == MLX5_ERROR_CQE_RET)) {
-			rq_ci = rxq->rq_ci;
-			consumed_strd = rxq->consumed_strd;
-			break;
-		}
 		byte_cnt = ret;
 		len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
 		MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 4ba53ebc48..6b42e27c89 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -62,6 +62,7 @@  enum mlx5_rxq_err_state {
 	MLX5_RXQ_ERR_STATE_NO_ERROR = 0,
 	MLX5_RXQ_ERR_STATE_NEED_RESET,
 	MLX5_RXQ_ERR_STATE_NEED_READY,
+	MLX5_RXQ_ERR_STATE_IGNORE,
 };
 
 enum mlx5_rqx_code {
@@ -286,8 +287,8 @@  int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hxrq_idx,
 
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
 void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
-__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq,
-				      uint8_t vec, uint16_t err_n);
+__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
+				      uint16_t err_n, uint16_t *skip_cnt);
 void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf);
 uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts,
 			    uint16_t pkts_n);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index c6be2be763..667475a93e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -51,6 +51,7 @@  rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
 			 uint16_t pkts_n)
 {
 	uint16_t n = 0;
+	uint16_t skip_cnt;
 	unsigned int i;
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint32_t err_bytes = 0;
@@ -74,7 +75,7 @@  rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
 	rxq->stats.ipackets -= (pkts_n - n);
 	rxq->stats.ibytes -= err_bytes;
 #endif
-	mlx5_rx_err_handle(rxq, 1, pkts_n);
+	mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt);
 	return n;
 }