[v4,1/2] net/iavf: add diagnostic support in TX path

Message ID 20231227101655.799560-1-mingjinx.ye@intel.com (mailing list archive)
State Superseded, archived
Headers
Series [v4,1/2] net/iavf: add diagnostic support in TX path |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK
ci/intel-Functional success Functional PASS
ci/intel-Testing success Testing PASS

Commit Message

Mingjin Ye Dec. 27, 2023, 10:16 a.m. UTC
  The only way to enable diagnostics for TX paths is to modify the
application source code. Making it difficult to diagnose faults.

In this patch, the devarg option "mbuf_check" is introduced and the
parameters are configured to enable the corresponding diagnostics.

supported cases: mbuf, size, segment, offload, strict.
 1. mbuf: check for corrupted mbuf.
 2. size: check min/max packet length according to hw spec.
 3. segment: check number of mbuf segments not exceed hw limitation.
 4. offload: check any unsupported offload flag.
 5. strict: check protocol headers.

parameter format: mbuf_check=[mbuf,<case1>,<case2>]
eg: dpdk-testpmd -a 0000:81:01.0,mbuf_check=[mbuf,size] -- -i

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: Remove call chain.
---
v3: Optimisation implementation.
---
v4: Fix Windows os compilation error.
---
 drivers/net/iavf/iavf.h        |  25 +++++-
 drivers/net/iavf/iavf_ethdev.c |  70 +++++++++++++++++
 drivers/net/iavf/iavf_rxtx.c   | 138 ++++++++++++++++++++++++++++++++-
 drivers/net/iavf/iavf_rxtx.h   |   5 ++
 4 files changed, 234 insertions(+), 4 deletions(-)
  

Comments

Qi Zhang Dec. 27, 2023, 11:30 a.m. UTC | #1
> -----Original Message-----
> From: Mingjin Ye <mingjinx.ye@intel.com>
> Sent: Wednesday, December 27, 2023 6:17 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Ye, MingjinX
> <mingjinx.ye@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> <beilei.xing@intel.com>
> Subject: [PATCH v4 1/2] net/iavf: add diagnostic support in TX path
> 
> The only way to enable diagnostics for TX paths is to modify the application
> source code. Making it difficult to diagnose faults.
> 
> In this patch, the devarg option "mbuf_check" is introduced and the
> parameters are configured to enable the corresponding diagnostics.

Can you separate this patch into two?

1. introduce the tx burst type and the ops array, this actually fixed the multi-process issue, and should be backported into LTS.
2. add mbuf check for tx diagnose 

Btw, please also update the document , so user can know how to use the new devarg.
  

Patch

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 10868f2c30..b81329bb56 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -113,9 +113,14 @@  struct iavf_ipsec_crypto_stats {
 	} ierrors;
 };
 
+struct iavf_mbuf_stats {
+	uint64_t tx_pkt_errors;
+};
+
 struct iavf_eth_xstats {
 	struct virtchnl_eth_stats eth_stats;
 	struct iavf_ipsec_crypto_stats ips_stats;
+	struct iavf_mbuf_stats mbuf_stats;
 };
 
 /* Structure that defines a VSI, associated with a adapter. */
@@ -309,10 +314,27 @@  struct iavf_devargs {
 	uint32_t watchdog_period;
 	int auto_reset;
 	int no_poll_on_link_down;
+	int mbuf_check;
 };
 
 struct iavf_security_ctx;
 
+#define IAVF_MBUF_CHECK_F_TX_MBUF        (1ULL << 0)
+#define IAVF_MBUF_CHECK_F_TX_SIZE        (1ULL << 1)
+#define IAVF_MBUF_CHECK_F_TX_SEGMENT     (1ULL << 2)
+#define IAVF_MBUF_CHECK_F_TX_OFFLOAD     (1ULL << 3)
+#define IAVF_MBUF_CHECK_F_TX_STRICT      (1ULL << 4)
+
+enum iavf_tx_pkt_burst_type {
+	IAVF_PKT_BURST_DEFAULT		= 0,
+	IAVF_PKT_BURST_VEC		= 1,
+	IAVF_PKT_BURST_VEC_AVX2		= 2,
+	IAVF_PKT_BURST_VEC_AVX2_OFFLOAD	= 3,
+	IAVF_PKT_BURST_VEC_AVX512	= 4,
+	IAVF_PKT_BURST_VEC_AVX512_OFFLOAD	= 5,
+	IAVF_PKT_BURST_VEC_AVX512_CTX_OFFLOAD	= 6,
+};
+
 /* Structure to store private data for each VF instance. */
 struct iavf_adapter {
 	struct iavf_hw hw;
@@ -329,7 +351,8 @@  struct iavf_adapter {
 	bool closed;
 	bool no_poll;
 	eth_rx_burst_t rx_pkt_burst;
-	eth_tx_burst_t tx_pkt_burst;
+	enum iavf_tx_pkt_burst_type tx_burst_type;
+	uint64_t mc_flags; /* mbuf check flags. */
 	uint16_t fdir_ref_cnt;
 	struct iavf_devargs devargs;
 };
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index d1edb0dd5c..5398d2783f 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -13,6 +13,7 @@ 
 #include <inttypes.h>
 #include <rte_byteorder.h>
 #include <rte_common.h>
+#include <rte_os_shim.h>
 
 #include <rte_interrupts.h>
 #include <rte_debug.h>
@@ -39,6 +40,8 @@ 
 #define IAVF_RESET_WATCHDOG_ARG    "watchdog_period"
 #define IAVF_ENABLE_AUTO_RESET_ARG "auto_reset"
 #define IAVF_NO_POLL_ON_LINK_DOWN_ARG "no-poll-on-link-down"
+#define IAVF_MBUF_CHECK_ARG       "mbuf_check"
+
 uint64_t iavf_timestamp_dynflag;
 int iavf_timestamp_dynfield_offset = -1;
 
@@ -48,6 +51,7 @@  static const char * const iavf_valid_args[] = {
 	IAVF_RESET_WATCHDOG_ARG,
 	IAVF_ENABLE_AUTO_RESET_ARG,
 	IAVF_NO_POLL_ON_LINK_DOWN_ARG,
+	IAVF_MBUF_CHECK_ARG,
 	NULL
 };
 
@@ -174,6 +178,7 @@  static const struct rte_iavf_xstats_name_off rte_iavf_stats_strings[] = {
 	{"tx_broadcast_packets", _OFF_OF(eth_stats.tx_broadcast)},
 	{"tx_dropped_packets", _OFF_OF(eth_stats.tx_discards)},
 	{"tx_error_packets", _OFF_OF(eth_stats.tx_errors)},
+	{"tx_mbuf_error_packets", _OFF_OF(mbuf_stats.tx_pkt_errors)},
 
 	{"inline_ipsec_crypto_ipackets", _OFF_OF(ips_stats.icount)},
 	{"inline_ipsec_crypto_ibytes", _OFF_OF(ips_stats.ibytes)},
@@ -1881,6 +1886,8 @@  static int iavf_dev_xstats_get(struct rte_eth_dev *dev,
 {
 	int ret;
 	unsigned int i;
+	struct iavf_tx_queue *txq;
+	uint64_t mbuf_errors = 0;
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
@@ -1904,6 +1911,15 @@  static int iavf_dev_xstats_get(struct rte_eth_dev *dev,
 	if (iavf_ipsec_crypto_supported(adapter))
 		iavf_dev_update_ipsec_xstats(dev, &iavf_xtats.ips_stats);
 
+	if (adapter->devargs.mbuf_check) {
+		for (i = 0; i < dev->data->nb_tx_queues; i++) {
+			txq = dev->data->tx_queues[i];
+			mbuf_errors += __atomic_load_n(&txq->mbuf_errors,
+				__ATOMIC_RELAXED);
+		}
+		iavf_xtats.mbuf_stats.tx_pkt_errors = mbuf_errors;
+	}
+
 	/* loop over xstats array and values from pstats */
 	for (i = 0; i < IAVF_NB_XSTATS; i++) {
 		xstats[i].id = i;
@@ -2286,6 +2302,52 @@  iavf_parse_watchdog_period(__rte_unused const char *key, const char *value, void
 	return 0;
 }
 
+static int
+iavf_parse_mbuf_check(__rte_unused const char *key, const char *value, void *args)
+{
+	char *cur;
+	char *tmp;
+	int str_len;
+	int valid_len;
+
+	int ret = 0;
+	uint64_t *mc_flags = args;
+	char *str2 = strdup(value);
+	if (str2 == NULL)
+		return -1;
+
+	str_len = strlen(str2);
+	if (str2[0] == '[' && str2[str_len - 1] == ']') {
+		if (str_len < 3) {
+			ret = -1;
+			goto mdd_end;
+		}
+		valid_len = str_len - 2;
+		memmove(str2, str2 + 1, valid_len);
+		memset(str2 + valid_len, '\0', 2);
+	}
+	cur = strtok_r(str2, ",", &tmp);
+	while (cur != NULL) {
+		if (!strcmp(cur, "mbuf"))
+			*mc_flags |= IAVF_MBUF_CHECK_F_TX_MBUF;
+		else if (!strcmp(cur, "size"))
+			*mc_flags |= IAVF_MBUF_CHECK_F_TX_SIZE;
+		else if (!strcmp(cur, "segment"))
+			*mc_flags |= IAVF_MBUF_CHECK_F_TX_SEGMENT;
+		else if (!strcmp(cur, "offload"))
+			*mc_flags |= IAVF_MBUF_CHECK_F_TX_OFFLOAD;
+		else if (!strcmp(cur, "strict"))
+			*mc_flags |= IAVF_MBUF_CHECK_F_TX_STRICT;
+		else
+			PMD_DRV_LOG(ERR, "Unsupported mdd check type: %s", cur);
+		cur = strtok_r(NULL, ",", &tmp);
+	}
+
+mdd_end:
+	free(str2);
+	return ret;
+}
+
 static int iavf_parse_devargs(struct rte_eth_dev *dev)
 {
 	struct iavf_adapter *ad =
@@ -2340,6 +2402,14 @@  static int iavf_parse_devargs(struct rte_eth_dev *dev)
 		goto bail;
 	}
 
+	ret = rte_kvargs_process(kvlist, IAVF_MBUF_CHECK_ARG,
+				 &iavf_parse_mbuf_check, &ad->mc_flags);
+	if (ret)
+		goto bail;
+
+	if (ad->mc_flags)
+		ad->devargs.mbuf_check = 1;
+
 	ret = rte_kvargs_process(kvlist, IAVF_ENABLE_AUTO_RESET_ARG,
 				 &parse_bool, &ad->devargs.auto_reset);
 	if (ret)
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index f19aa14646..7fa84e9f0e 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -425,6 +425,23 @@  struct iavf_txq_ops iavf_txq_release_mbufs_ops[] = {
 
 };
 
+static const
+struct iavf_tx_burst_ops iavf_tx_pkt_burst_ops[] = {
+	[IAVF_PKT_BURST_DEFAULT].tx_pkt_burst = iavf_xmit_pkts,
+#ifdef RTE_ARCH_X86
+	[IAVF_PKT_BURST_VEC].tx_pkt_burst = iavf_xmit_pkts_vec,
+	[IAVF_PKT_BURST_VEC_AVX2].tx_pkt_burst = iavf_xmit_pkts_vec_avx2,
+	[IAVF_PKT_BURST_VEC_AVX2_OFFLOAD].tx_pkt_burst = iavf_xmit_pkts_vec_avx2_offload,
+#ifdef CC_AVX512_SUPPORT
+	[IAVF_PKT_BURST_VEC_AVX512].tx_pkt_burst = iavf_xmit_pkts_vec_avx512,
+	[IAVF_PKT_BURST_VEC_AVX512_OFFLOAD].tx_pkt_burst =
+		iavf_xmit_pkts_vec_avx512_offload,
+	[IAVF_PKT_BURST_VEC_AVX512_CTX_OFFLOAD].tx_pkt_burst =
+		iavf_xmit_pkts_vec_avx512_ctx_offload,
+#endif
+#endif
+};
+
 static inline void
 iavf_rxd_to_pkt_fields_by_comms_ovs(__rte_unused struct iavf_rx_queue *rxq,
 				    struct rte_mbuf *mb,
@@ -3629,6 +3646,103 @@  iavf_check_mbuf(struct rte_mbuf *m)
 	return check_ether_type(&info, m);
 }
 
+/* Tx mbuf check */
+static uint16_t
+iavf_xmit_pkts_check(void *tx_queue, struct rte_mbuf **tx_pkts,
+	      uint16_t nb_pkts)
+{
+	uint16_t idx;
+	uint64_t ol_flags;
+	struct rte_mbuf *mb;
+	uint16_t good_pkts = nb_pkts;
+	const char *reason = NULL;
+	bool pkt_error = false;
+	struct iavf_tx_queue *txq = tx_queue;
+	struct iavf_adapter *adapter = txq->vsi->adapter;
+	enum iavf_tx_pkt_burst_type tx_burst_type =
+		txq->vsi->adapter->tx_burst_type;
+
+	for (idx = 0; idx < nb_pkts; idx++) {
+		mb = tx_pkts[idx];
+		ol_flags = mb->ol_flags;
+
+		if ((adapter->mc_flags & IAVF_MBUF_CHECK_F_TX_MBUF) &&
+			(rte_mbuf_check(mb, 1, &reason) != 0)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: %s\n", reason);
+			pkt_error = true;
+			break;
+		}
+
+		if ((adapter->mc_flags & IAVF_MBUF_CHECK_F_TX_SIZE) &&
+			(mb->data_len < IAVF_TX_MIN_PKT_LEN ||
+			mb->data_len > adapter->vf.max_pkt_len)) {
+			PMD_TX_LOG(ERR, "INVALID mbuf: data_len (%u) is out "
+			"of range, reasonable range (%d - %u)\n", mb->data_len,
+			IAVF_TX_MIN_PKT_LEN, adapter->vf.max_pkt_len);
+			pkt_error = true;
+			break;
+		}
+
+		if (adapter->mc_flags & IAVF_MBUF_CHECK_F_TX_SEGMENT) {
+			/* Check condition for nb_segs > IAVF_TX_MAX_MTU_SEG. */
+			if (!(ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))) {
+				if (mb->nb_segs > IAVF_TX_MAX_MTU_SEG) {
+					PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs (%d) exceeds "
+					"HW limit, maximum allowed value is %d\n", mb->nb_segs,
+					IAVF_TX_MAX_MTU_SEG);
+					pkt_error = true;
+					break;
+				}
+			} else if ((mb->tso_segsz < IAVF_MIN_TSO_MSS) ||
+				(mb->tso_segsz > IAVF_MAX_TSO_MSS)) {
+				/* MSS outside the range are considered malicious */
+				PMD_TX_LOG(ERR, "INVALID mbuf: tso_segsz (%u) is out "
+				"of range, reasonable range (%d - %u)\n", mb->tso_segsz,
+				IAVF_MIN_TSO_MSS, IAVF_MAX_TSO_MSS);
+				pkt_error = true;
+				break;
+			} else if (mb->nb_segs > txq->nb_tx_desc) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: nb_segs out "
+				"of ring length\n");
+				pkt_error = true;
+				break;
+			}
+		}
+
+		if (adapter->mc_flags & IAVF_MBUF_CHECK_F_TX_OFFLOAD) {
+			if (ol_flags & IAVF_TX_OFFLOAD_NOTSUP_MASK) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"is not supported\n");
+				pkt_error = true;
+				break;
+			}
+
+			if (!rte_validate_tx_offload(mb)) {
+				PMD_TX_LOG(ERR, "INVALID mbuf: TX offload "
+				"setup error\n");
+				pkt_error = true;
+				break;
+			}
+		}
+
+		if (adapter->mc_flags & IAVF_MBUF_CHECK_F_TX_STRICT &&
+			iavf_check_mbuf(mb)) {
+			pkt_error = true;
+			break;
+		}
+	}
+
+	if (pkt_error) {
+		__atomic_fetch_add(&txq->mbuf_errors, 1, __ATOMIC_RELAXED);
+		good_pkts = idx;
+		if (good_pkts == 0)
+			return 0;
+	}
+
+	return iavf_tx_pkt_burst_ops[tx_burst_type].tx_pkt_burst(tx_queue,
+								tx_pkts, good_pkts);
+}
+
 /* TX prep functions */
 uint16_t
 iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -3724,10 +3838,13 @@  iavf_xmit_pkts_no_poll(void *tx_queue, struct rte_mbuf **tx_pkts,
 				uint16_t nb_pkts)
 {
 	struct iavf_tx_queue *txq = tx_queue;
+	enum iavf_tx_pkt_burst_type tx_burst_type =
+		txq->vsi->adapter->tx_burst_type;
+
 	if (!txq->vsi || txq->vsi->adapter->no_poll)
 		return 0;
 
-	return txq->vsi->adapter->tx_pkt_burst(tx_queue,
+	return iavf_tx_pkt_burst_ops[tx_burst_type].tx_pkt_burst(tx_queue,
 								tx_pkts, nb_pkts);
 }
 
@@ -3975,6 +4092,8 @@  iavf_set_tx_function(struct rte_eth_dev *dev)
 {
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	enum iavf_tx_pkt_burst_type tx_burst_type;
+	int mbuf_check = adapter->devargs.mbuf_check;
 	int no_poll_on_link_down = adapter->devargs.no_poll_on_link_down;
 #ifdef RTE_ARCH_X86
 	struct iavf_tx_queue *txq;
@@ -4011,10 +4130,12 @@  iavf_set_tx_function(struct rte_eth_dev *dev)
 			PMD_DRV_LOG(DEBUG, "Using Vector Tx (port %d).",
 				    dev->data->port_id);
 			dev->tx_pkt_burst = iavf_xmit_pkts_vec;
+			tx_burst_type = IAVF_PKT_BURST_VEC;
 		}
 		if (use_avx2) {
 			if (check_ret == IAVF_VECTOR_PATH) {
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx2;
+				tx_burst_type = IAVF_PKT_BURST_VEC_AVX2;
 				PMD_DRV_LOG(DEBUG, "Using AVX2 Vector Tx (port %d).",
 					    dev->data->port_id);
 			} else if (check_ret == IAVF_VECTOR_CTX_OFFLOAD_PATH) {
@@ -4023,6 +4144,7 @@  iavf_set_tx_function(struct rte_eth_dev *dev)
 				goto normal;
 			} else {
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx2_offload;
+				tx_burst_type = IAVF_PKT_BURST_VEC_AVX2_OFFLOAD;
 				dev->tx_pkt_prepare = iavf_prep_pkts;
 				PMD_DRV_LOG(DEBUG, "Using AVX2 OFFLOAD Vector Tx (port %d).",
 					    dev->data->port_id);
@@ -4032,15 +4154,18 @@  iavf_set_tx_function(struct rte_eth_dev *dev)
 		if (use_avx512) {
 			if (check_ret == IAVF_VECTOR_PATH) {
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512;
+				tx_burst_type = IAVF_PKT_BURST_VEC_AVX512;
 				PMD_DRV_LOG(DEBUG, "Using AVX512 Vector Tx (port %d).",
 					    dev->data->port_id);
 			} else if (check_ret == IAVF_VECTOR_OFFLOAD_PATH) {
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_offload;
+				tx_burst_type = IAVF_PKT_BURST_VEC_AVX512_OFFLOAD;
 				dev->tx_pkt_prepare = iavf_prep_pkts;
 				PMD_DRV_LOG(DEBUG, "Using AVX512 OFFLOAD Vector Tx (port %d).",
 					    dev->data->port_id);
 			} else {
 				dev->tx_pkt_burst = iavf_xmit_pkts_vec_avx512_ctx_offload;
+				tx_burst_type = IAVF_PKT_BURST_VEC_AVX512_CTX_OFFLOAD;
 				dev->tx_pkt_prepare = iavf_prep_pkts;
 				PMD_DRV_LOG(DEBUG, "Using AVX512 CONTEXT OFFLOAD Vector Tx (port %d).",
 					    dev->data->port_id);
@@ -4063,8 +4188,11 @@  iavf_set_tx_function(struct rte_eth_dev *dev)
 		}
 
 		if (no_poll_on_link_down) {
-			adapter->tx_pkt_burst = dev->tx_pkt_burst;
+			adapter->tx_burst_type = tx_burst_type;
 			dev->tx_pkt_burst = iavf_xmit_pkts_no_poll;
+		} else if (mbuf_check) {
+			adapter->tx_burst_type = tx_burst_type;
+			dev->tx_pkt_burst = iavf_xmit_pkts_check;
 		}
 		return;
 	}
@@ -4074,11 +4202,15 @@  iavf_set_tx_function(struct rte_eth_dev *dev)
 	PMD_DRV_LOG(DEBUG, "Using Basic Tx callback (port=%d).",
 		    dev->data->port_id);
 	dev->tx_pkt_burst = iavf_xmit_pkts;
+	tx_burst_type = IAVF_PKT_BURST_DEFAULT;
 	dev->tx_pkt_prepare = iavf_prep_pkts;
 
 	if (no_poll_on_link_down) {
-		adapter->tx_pkt_burst = dev->tx_pkt_burst;
+		adapter->tx_burst_type = tx_burst_type;
 		dev->tx_pkt_burst = iavf_xmit_pkts_no_poll;
+	} else if (mbuf_check) {
+		adapter->tx_burst_type = tx_burst_type;
+		dev->tx_pkt_burst = iavf_xmit_pkts_check;
 	}
 }
 
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index f432f9d956..a4128ff7a3 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -203,6 +203,9 @@  struct iavf_txq_ops {
 	void (*release_mbufs)(struct iavf_tx_queue *txq);
 };
 
+struct iavf_tx_burst_ops {
+	eth_tx_burst_t tx_pkt_burst;
+};
 
 struct iavf_rx_queue_stats {
 	uint64_t reserved;
@@ -297,6 +300,8 @@  struct iavf_tx_queue {
 	uint16_t next_rs;              /* next to check DD,  for VPMD */
 	uint16_t ipsec_crypto_pkt_md_offset;
 
+	uint64_t mbuf_errors;
+
 	bool q_set;                    /* if rx queue has been configured */
 	bool tx_deferred_start;        /* don't start this queue in dev start */
 	const struct iavf_txq_ops *ops;