[v3,4/5] net/ice: fix vector rx burst for ice

Message ID 20200909063636.60205-5-jia.guo@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Qi Zhang
Headers
Series fix vector rx burst for PMDs |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Guo, Jia Sept. 9, 2020, 6:36 a.m. UTC
  The limitation of burst size in vector rx was removed, since it should
retrieve as much received packets as possible. And also the scattered
receive path should use a wrapper function to achieve the goal of
burst maximizing. And do some code cleaning for vector rx path.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
 drivers/net/ice/ice_rxtx.h          |  1 +
 drivers/net/ice/ice_rxtx_vec_avx2.c | 23 ++++++------
 drivers/net/ice/ice_rxtx_vec_sse.c  | 56 +++++++++++++++++++----------
 3 files changed, 49 insertions(+), 31 deletions(-)
  

Comments

Yingya Han Sept. 15, 2020, 7:10 a.m. UTC | #1
Tested-by: Yingya Han <yingyax.han@intel.com>

-----Original Message-----
From: dev <dev-bounces@dpdk.org> On Behalf Of Jeff Guo
Sent: Wednesday, September 9, 2020 2:37 PM
To: Yang, Qiming <qiming.yang@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
Cc: Richardson, Bruce <bruce.richardson@intel.com>; dev@dpdk.org; Guo, Jia <jia.guo@intel.com>; Zhang, Helin <helin.zhang@intel.com>; mb@smartsharesystems.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Wang, Haiyue <haiyue.wang@intel.com>; stephen@networkplumber.org; barbette@kth.se
Subject: [dpdk-dev] [PATCH v3 4/5] net/ice: fix vector rx burst for ice

The limitation of burst size in vector rx was removed, since it should retrieve as much received packets as possible. And also the scattered receive path should use a wrapper function to achieve the goal of burst maximizing. And do some code cleaning for vector rx path.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
 drivers/net/ice/ice_rxtx.h          |  1 +
 drivers/net/ice/ice_rxtx_vec_avx2.c | 23 ++++++------  drivers/net/ice/ice_rxtx_vec_sse.c  | 56 +++++++++++++++++++----------
 3 files changed, 49 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h index 2fdcfb7d0..3ef5f300d 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -35,6 +35,7 @@
 #define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH
 #define ICE_TX_MAX_FREE_BUF_SZ      64
 #define ICE_DESCS_PER_LOOP          4
+#define ICE_DESCS_PER_LOOP_AVX	    8
 
 #define ICE_FDIR_PKT_LEN	512
 
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index be50677c2..843e4f32a 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -29,7 +29,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
 			__m128i dma_addr0;
 
 			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < ICE_DESCS_PER_LOOP_AVX; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128((__m128i *)&rxdp[i].read,
 						dma_addr0);
@@ -132,12 +132,17 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
 	ICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);  }
 
+/**
+ * vPMD raw receive routine, only accept(nb_pkts >= 
+ICE_DESCS_PER_LOOP_AVX)
+ *
+ * Notice:
+ * - nb_pkts < ICE_DESCS_PER_LOOP_AVX, just return no packet
+ * - floor align nb_pkts to a ICE_DESCS_PER_LOOP_AVX power-of-two  */
 static inline uint16_t
 _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet)  { -#define ICE_DESCS_PER_LOOP_AVX 8
-
 	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
@@ -603,10 +608,6 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return received;
 }
 
-/**
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- */
 uint16_t
 ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
@@ -616,8 +617,6 @@ ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -626,6 +625,9 @@ ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 	struct ice_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
+	/* split_flags only can support max of ICE_VPMD_RX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST);
+
 	/* get some new buffers */
 	uint16_t nb_bufs = _ice_recv_raw_pkts_vec_avx2(rxq, rx_pkts, nb_pkts,
 						       split_flags);
@@ -657,9 +659,6 @@ ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 /**
  * vPMD receive routine that reassembles scattered packets.
- * Main receive routine that can handle arbitrary burst sizes
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 ice_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
index 382ef31f3..c03e24092 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -205,10 +205,11 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,  }
 
 /**
+ * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP)
+ *
  * Notice:
  * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
+ * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, @@ -264,9 +265,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
 						 0x0000000200000002LL);
 
-	/* nb_pkts shall be less equal than ICE_MAX_RX_BURST */
-	nb_pkts = RTE_MIN(nb_pkts, ICE_MAX_RX_BURST);
-
 	/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP);
 
@@ -441,12 +439,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return nb_pkts_recd;
 }
 
-/**
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
- */
 uint16_t
 ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		  uint16_t nb_pkts)
@@ -454,19 +446,19 @@ ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _ice_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);  }
 
-/* vPMD receive routine that reassembles scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
+/**
+ * vPMD receive routine that reassembles single burst of 32 scattered 
+packets
  */
-uint16_t
-ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
-			    uint16_t nb_pkts)
+static uint16_t
+ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			     uint16_t nb_pkts)
 {
 	struct ice_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
+	/* split_flags only can support max of ICE_VPMD_RX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST);
+
 	/* get some new buffers */
 	uint16_t nb_bufs = _ice_recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
 						  split_flags);
@@ -496,6 +488,32 @@ ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 					     &split_flags[i]);
 }
 
+/**
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > ICE_VPMD_RX_BURST) {
+		uint16_t burst;
+
+		burst = ice_recv_scattered_burst_vec(rx_queue,
+						     rx_pkts + retval,
+						     ICE_VPMD_RX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < ICE_VPMD_RX_BURST)
+			return retval;
+	}
+
+	return retval + ice_recv_scattered_burst_vec(rx_queue,
+						     rx_pkts + retval,
+						     nb_pkts);
+}
+
 static inline void
 ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt,
 	 uint64_t flags)
--
2.20.1
  

Patch

diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index 2fdcfb7d0..3ef5f300d 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -35,6 +35,7 @@ 
 #define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH
 #define ICE_TX_MAX_FREE_BUF_SZ      64
 #define ICE_DESCS_PER_LOOP          4
+#define ICE_DESCS_PER_LOOP_AVX	    8
 
 #define ICE_FDIR_PKT_LEN	512
 
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index be50677c2..843e4f32a 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -29,7 +29,7 @@  ice_rxq_rearm(struct ice_rx_queue *rxq)
 			__m128i dma_addr0;
 
 			dma_addr0 = _mm_setzero_si128();
-			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+			for (i = 0; i < ICE_DESCS_PER_LOOP_AVX; i++) {
 				rxep[i].mbuf = &rxq->fake_mbuf;
 				_mm_store_si128((__m128i *)&rxdp[i].read,
 						dma_addr0);
@@ -132,12 +132,17 @@  ice_rxq_rearm(struct ice_rx_queue *rxq)
 	ICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
 }
 
+/**
+ * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP_AVX)
+ *
+ * Notice:
+ * - nb_pkts < ICE_DESCS_PER_LOOP_AVX, just return no packet
+ * - floor align nb_pkts to a ICE_DESCS_PER_LOOP_AVX power-of-two
+ */
 static inline uint16_t
 _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts, uint8_t *split_packet)
 {
-#define ICE_DESCS_PER_LOOP_AVX 8
-
 	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
 			0, rxq->mbuf_initializer);
@@ -603,10 +608,6 @@  _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return received;
 }
 
-/**
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- */
 uint16_t
 ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
@@ -616,8 +617,6 @@  ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
  */
 static uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -626,6 +625,9 @@  ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 	struct ice_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
+	/* split_flags only can support max of ICE_VPMD_RX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST);
+
 	/* get some new buffers */
 	uint16_t nb_bufs = _ice_recv_raw_pkts_vec_avx2(rxq, rx_pkts, nb_pkts,
 						       split_flags);
@@ -657,9 +659,6 @@  ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 /**
  * vPMD receive routine that reassembles scattered packets.
- * Main receive routine that can handle arbitrary burst sizes
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 ice_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
index 382ef31f3..c03e24092 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -205,10 +205,11 @@  ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 }
 
 /**
+ * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP)
+ *
  * Notice:
  * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
+ * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -264,9 +265,6 @@  _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
 						 0x0000000200000002LL);
 
-	/* nb_pkts shall be less equal than ICE_MAX_RX_BURST */
-	nb_pkts = RTE_MIN(nb_pkts, ICE_MAX_RX_BURST);
-
 	/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP);
 
@@ -441,12 +439,6 @@  _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return nb_pkts_recd;
 }
 
-/**
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
- */
 uint16_t
 ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		  uint16_t nb_pkts)
@@ -454,19 +446,19 @@  ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _ice_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
-/* vPMD receive routine that reassembles scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
+/**
+ * vPMD receive routine that reassembles single burst of 32 scattered packets
  */
-uint16_t
-ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
-			    uint16_t nb_pkts)
+static uint16_t
+ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			     uint16_t nb_pkts)
 {
 	struct ice_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
 
+	/* split_flags only can support max of ICE_VPMD_RX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST);
+
 	/* get some new buffers */
 	uint16_t nb_bufs = _ice_recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
 						  split_flags);
@@ -496,6 +488,32 @@  ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 					     &split_flags[i]);
 }
 
+/**
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > ICE_VPMD_RX_BURST) {
+		uint16_t burst;
+
+		burst = ice_recv_scattered_burst_vec(rx_queue,
+						     rx_pkts + retval,
+						     ICE_VPMD_RX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < ICE_VPMD_RX_BURST)
+			return retval;
+	}
+
+	return retval + ice_recv_scattered_burst_vec(rx_queue,
+						     rx_pkts + retval,
+						     nb_pkts);
+}
+
 static inline void
 ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt,
 	 uint64_t flags)