[v4,1/6] net/cnxk: add multi seg Rx vector routine

Message ID 20210628194144.637-1-pbhagavatula@marvell.com (mailing list archive)
State Superseded, archived
Headers
Series [v4,1/6] net/cnxk: add multi seg Rx vector routine |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-testing warning apply patch failure

Commit Message

Pavan Nikhilesh Bhagavatula June 28, 2021, 7:41 p.m. UTC
  From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add multi-segment Rx vector routine, form the primary mbufs using
vector path switch to scalar path when extracting segments.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v4 Changes:
 - Split patches for easier merge.
 - Rebase on dpdk-next-net-mrvl.
 v3 Changes:
 - Spell check.

 drivers/net/cnxk/cn10k_rx.c          | 31 +++++++++++------
 drivers/net/cnxk/cn10k_rx.h          | 51 +++++++++++++++++++++-------
 drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
 drivers/net/cnxk/cn9k_rx.c           | 31 +++++++++++------
 drivers/net/cnxk/cn9k_rx.h           | 51 +++++++++++++++++++++-------
 drivers/net/cnxk/cn9k_rx_vec_mseg.c  | 18 ++++++++++
 drivers/net/cnxk/meson.build         |  2 ++
 7 files changed, 157 insertions(+), 44 deletions(-)
 create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
 create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c

--
2.17.1
  

Patch

diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 5c956c06b..3a9fd7130 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -29,6 +29,8 @@  pick_rx_func(struct rte_eth_dev *eth_dev,
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+	rte_atomic_thread_fence(__ATOMIC_RELEASE);
 }

 void
@@ -60,20 +62,29 @@  cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
 #undef R
 	};

-	/* For PTP enabled, scalar rx function should be chosen as most of the
-	 * PTP apps are implemented to rx burst 1 pkt.
-	 */
-	if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
-		pick_rx_func(eth_dev, nix_eth_rx_burst);
-	else
-		pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+	const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+	[f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,

-	if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
-		pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};

 	/* Copy multi seg version with no offload for tear down sequence */
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
 		dev->rx_pkt_burst_no_offload =
 			nix_eth_rx_burst_mseg[0][0][0][0][0][0];
-	rte_mb();
+
+	/* For PTP enabled, scalar rx function should be chosen as most of the
+	 * PTP apps are implemented to rx burst 1 pkt.
+	 */
+	if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+		if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+			return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+		return pick_rx_func(eth_dev, nix_eth_rx_burst);
+	}
+
+	if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+		return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+	return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
 }
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 1cc37cbaa..5926ff7f4 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -119,8 +119,15 @@  nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,

 	sg = *(const uint64_t *)(rx + 1);
 	nb_segs = (sg >> 48) & 0x3;
-	mbuf->nb_segs = nb_segs;
+
+	if (nb_segs == 1) {
+		mbuf->next = NULL;
+		return;
+	}
+
+	mbuf->pkt_len = rx->pkt_lenm1 + 1;
 	mbuf->data_len = sg & 0xFFFF;
+	mbuf->nb_segs = nb_segs;
 	sg = sg >> 16;

 	eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
@@ -195,15 +202,14 @@  cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
 		ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);

 	mbuf->ol_flags = ol_flags;
-	*(uint64_t *)(&mbuf->rearm_data) = val;
 	mbuf->pkt_len = len;
+	mbuf->data_len = len;
+	*(uint64_t *)(&mbuf->rearm_data) = val;

-	if (flag & NIX_RX_MULTI_SEG_F) {
+	if (flag & NIX_RX_MULTI_SEG_F)
 		nix_cqe_xtract_mseg(rx, mbuf, val);
-	} else {
-		mbuf->data_len = len;
+	else
 		mbuf->next = NULL;
-	}
 }

 static inline uint16_t
@@ -481,16 +487,34 @@  cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
 		vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
 		vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);

-		/* Update that no more segments */
-		mbuf0->next = NULL;
-		mbuf1->next = NULL;
-		mbuf2->next = NULL;
-		mbuf3->next = NULL;
-
 		/* Store the mbufs to rx_pkts */
 		vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
 		vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);

+		if (flags & NIX_RX_MULTI_SEG_F) {
+			/* Multi segment is enable build mseg list for
+			 * individual mbufs in scalar mode.
+			 */
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(0) + 8), mbuf0,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(1) + 8), mbuf1,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(2) + 8), mbuf2,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(3) + 8), mbuf3,
+					    mbuf_initializer);
+		} else {
+			/* Update that no more segments */
+			mbuf0->next = NULL;
+			mbuf1->next = NULL;
+			mbuf2->next = NULL;
+			mbuf3->next = NULL;
+		}
+
 		/* Prefetch mbufs */
 		roc_prefetch_store_keep(mbuf0);
 		roc_prefetch_store_keep(mbuf1);
@@ -645,6 +669,9 @@  R(vlan_ts_mark_cksum_ptype_rss,	1, 1, 1, 1, 1, 1,			       \
 		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
 									       \
 	uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name(      \
+		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
+									       \
+	uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
 		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);

 NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
new file mode 100644
index 000000000..04d1e46c8
--- /dev/null
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -0,0 +1,17 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+	uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
+		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
+	{                                                                      \
+		return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,     \
+					  (flags) | NIX_RX_MULTI_SEG_F);       \
+	}
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
index 0acedd0a1..d293d4eac 100644
--- a/drivers/net/cnxk/cn9k_rx.c
+++ b/drivers/net/cnxk/cn9k_rx.c
@@ -29,6 +29,8 @@  pick_rx_func(struct rte_eth_dev *eth_dev,
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+	rte_atomic_thread_fence(__ATOMIC_RELEASE);
 }

 void
@@ -60,20 +62,29 @@  cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
 #undef R
 	};

-	/* For PTP enabled, scalar rx function should be chosen as most of the
-	 * PTP apps are implemented to rx burst 1 pkt.
-	 */
-	if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
-		pick_rx_func(eth_dev, nix_eth_rx_burst);
-	else
-		pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+	const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+	[f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,

-	if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
-		pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};

 	/* Copy multi seg version with no offload for tear down sequence */
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
 		dev->rx_pkt_burst_no_offload =
 			nix_eth_rx_burst_mseg[0][0][0][0][0][0];
-	rte_mb();
+
+	/* For PTP enabled, scalar rx function should be chosen as most of the
+	 * PTP apps are implemented to rx burst 1 pkt.
+	 */
+	if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+		if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+			return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+		return pick_rx_func(eth_dev, nix_eth_rx_burst);
+	}
+
+	if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+		return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+	return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
 }
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 10ef5c690..5ae9e8195 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -120,8 +120,15 @@  nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,

 	sg = *(const uint64_t *)(rx + 1);
 	nb_segs = (sg >> 48) & 0x3;
-	mbuf->nb_segs = nb_segs;
+
+	if (nb_segs == 1) {
+		mbuf->next = NULL;
+		return;
+	}
+
+	mbuf->pkt_len = rx->pkt_lenm1 + 1;
 	mbuf->data_len = sg & 0xFFFF;
+	mbuf->nb_segs = nb_segs;
 	sg = sg >> 16;

 	eol = ((const rte_iova_t *)(rx + 1) +
@@ -198,15 +205,14 @@  cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
 			nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);

 	mbuf->ol_flags = ol_flags;
-	*(uint64_t *)(&mbuf->rearm_data) = val;
 	mbuf->pkt_len = len;
+	mbuf->data_len = len;
+	*(uint64_t *)(&mbuf->rearm_data) = val;

-	if (flag & NIX_RX_MULTI_SEG_F) {
+	if (flag & NIX_RX_MULTI_SEG_F)
 		nix_cqe_xtract_mseg(rx, mbuf, val);
-	} else {
-		mbuf->data_len = len;
+	else
 		mbuf->next = NULL;
-	}
 }

 static inline uint16_t
@@ -484,16 +490,34 @@  cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
 		vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
 		vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);

-		/* Update that no more segments */
-		mbuf0->next = NULL;
-		mbuf1->next = NULL;
-		mbuf2->next = NULL;
-		mbuf3->next = NULL;
-
 		/* Store the mbufs to rx_pkts */
 		vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
 		vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);

+		if (flags & NIX_RX_MULTI_SEG_F) {
+			/* Multi segment is enable build mseg list for
+			 * individual mbufs in scalar mode.
+			 */
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(0) + 8), mbuf0,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(1) + 8), mbuf1,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(2) + 8), mbuf2,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(3) + 8), mbuf3,
+					    mbuf_initializer);
+		} else {
+			/* Update that no more segments */
+			mbuf0->next = NULL;
+			mbuf1->next = NULL;
+			mbuf2->next = NULL;
+			mbuf3->next = NULL;
+		}
+
 		/* Prefetch mbufs */
 		roc_prefetch_store_keep(mbuf0);
 		roc_prefetch_store_keep(mbuf1);
@@ -647,6 +671,9 @@  R(vlan_ts_mark_cksum_ptype_rss,	1, 1, 1, 1, 1, 1,			       \
 		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
 									       \
 	uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name(       \
+		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
+									       \
+	uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
 		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);

 NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
new file mode 100644
index 000000000..e46d8a474
--- /dev/null
+++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+	uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
+		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
+	{                                                                      \
+		return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,      \
+						 (flags) |                     \
+							 NIX_RX_MULTI_SEG_F);  \
+	}
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index 2071d0dcb..aa8c7253f 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -23,6 +23,7 @@  sources += files('cn9k_ethdev.c',
 		 'cn9k_rx.c',
 		 'cn9k_rx_mseg.c',
 		 'cn9k_rx_vec.c',
+		 'cn9k_rx_vec_mseg.c',
 		 'cn9k_tx.c',
 		 'cn9k_tx_mseg.c',
 		 'cn9k_tx_vec.c')
@@ -32,6 +33,7 @@  sources += files('cn10k_ethdev.c',
 		 'cn10k_rx.c',
 		 'cn10k_rx_mseg.c',
 		 'cn10k_rx_vec.c',
+		 'cn10k_rx_vec_mseg.c',
 		 'cn10k_tx.c',
 		 'cn10k_tx_mseg.c',
 		 'cn10k_tx_vec.c')