diff mbox series

[v5,5/5] net/iavf: fix vector rx burst for iavf

Message ID 20201016094431.96889-6-jia.guo@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Qi Zhang
Headers show
Series fix vector rx burst for PMDs | expand

Checks

Context Check Description
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-testing fail Testing issues
ci/iol-intel-Performance success Performance Testing PASS
ci/travis-robot success Travis build: passed
ci/Intel-compilation success Compilation OK
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/checkpatch success coding style OK

Commit Message

Guo, Jia Oct. 16, 2020, 9:44 a.m. UTC
The limitation of burst size in vector rx was removed, since it should
retrieve as much received packets as possible. And also the scattered
receive path should use a wrapper function to achieve the goal of
burst maximizing.

Bugzilla ID: 516
Fixes: 319c421f3890 ("net/avf: enable SSE Rx Tx")
Fixes: 1162f5a0ef31 ("net/iavf: support flexible Rx descriptor in SSE path")
Fixes: 5b6e8859081d ("net/iavf: support flexible Rx descriptor in AVX path")

Signed-off-by: Jeff Guo <jia.guo@intel.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 103 ++++++++++++++++++++-------
 1 file changed, 78 insertions(+), 25 deletions(-)

Comments

Ling, WeiX Oct. 23, 2020, 5:09 a.m. UTC | #1
Tested-by: Ling, Wei <weix.ling@intel.com>

Regards,
Ling Wei

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Jeff Guo
> Sent: Friday, October 16, 2020 05:45 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Wang, Haiyue
> <haiyue.wang@intel.com>; Yang, Qiming <qiming.yang@intel.com>
> Cc: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>;
> mb@smartsharesystems.com; stephen@networkplumber.org;
> barbette@kth.se; Feifei.wang2@arm.com; Richardson, Bruce
> <bruce.richardson@intel.com>; Guo, Jia <jia.guo@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>
> Subject: [dpdk-dev] [PATCH v5 5/5] net/iavf: fix vector rx burst for iavf
> 
> The limitation of burst size in vector rx was removed, since it should retrieve
> as much received packets as possible. And also the scattered receive path
> should use a wrapper function to achieve the goal of burst maximizing.
> 
> Bugzilla ID: 516
> Fixes: 319c421f3890 ("net/avf: enable SSE Rx Tx")
> Fixes: 1162f5a0ef31 ("net/iavf: support flexible Rx descriptor in SSE path")
> Fixes: 5b6e8859081d ("net/iavf: support flexible Rx descriptor in AVX path")
> 
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>  drivers/net/iavf/iavf_rxtx_vec_sse.c | 103 ++++++++++++++++++++-------
>  1 file changed, 78 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c
> b/drivers/net/iavf/iavf_rxtx_vec_sse.c
> index 85c5bd4af0..11acaa029e 100644
> --- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
> +++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
> @@ -379,10 +379,12 @@ flex_desc_to_ptype_v(__m128i descs[4], struct
> rte_mbuf **rx_pkts,
>  	rx_pkts[3]->packet_type =
> type_table[_mm_extract_epi16(ptype_all, 7)];  }
> 
> -/* Notice:
> +/**
> + * vPMD raw receive routine, only accept(nb_pkts >=
> +IAVF_VPMD_DESCS_PER_LOOP)
> + *
> + * Notice:
>   * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan
> IAVF_VPMD_RX_MAX_BURST
> - *   numbers of DD bits
> + * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
>   */
>  static inline uint16_t
>  _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
> @@ -413,9 +415,6 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
>  			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
>  	__m128i dd_check, eop_check;
> 
> -	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
> -	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
> -
>  	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP
> */
>  	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts,
> IAVF_VPMD_DESCS_PER_LOOP);
> 
> @@ -627,10 +626,13 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
>  	return nb_pkts_recd;
>  }
> 
> -/* Notice:
> +/**
> + * vPMD raw receive routine for flex RxD,
> + * only accept(nb_pkts >= IAVF_VPMD_DESCS_PER_LOOP)
> + *
> + * Notice:
>   * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan
> IAVF_VPMD_RX_MAX_BURST
> - *   numbers of DD bits
> + * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
>   */
>  static inline uint16_t
>  _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, @@ -688,9 +690,6
> @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
>  	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
>  						 0x0000000200000002LL);
> 
> -	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
> -	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
> -
>  	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP
> */
>  	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts,
> IAVF_VPMD_DESCS_PER_LOOP);
> 
> @@ -945,15 +944,15 @@ iavf_recv_pkts_vec_flex_rxd(void *rx_queue,
> struct rte_mbuf **rx_pkts,
>  	return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts,
> NULL);  }
> 
> -/* vPMD receive routine that reassembles scattered packets
> +/**
> + * vPMD receive routine that reassembles single burst of 32 scattered
> +packets
> + *
>   * Notice:
>   * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > VPMD_RX_MAX_BURST, only scan
> IAVF_VPMD_RX_MAX_BURST
> - *   numbers of DD bits
>   */
> -uint16_t
> -iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> -			    uint16_t nb_pkts)
> +static uint16_t
> +iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> +			      uint16_t nb_pkts)
>  {
>  	struct iavf_rx_queue *rxq = rx_queue;
>  	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0}; @@ -986,16
> +985,43 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf
> **rx_pkts,
>  		&split_flags[i]);
>  }
> 
> -/* vPMD receive routine that reassembles scattered packets for flex RxD
> +/**
> + * vPMD receive routine that reassembles scattered packets.
> + */
> +uint16_t
> +iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> +			     uint16_t nb_pkts)
> +{
> +	uint16_t retval = 0;
> +
> +	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
> +		uint16_t burst;
> +
> +		burst = iavf_recv_scattered_burst_vec(rx_queue,
> +						      rx_pkts + retval,
> +
> IAVF_VPMD_RX_MAX_BURST);
> +		retval += burst;
> +		nb_pkts -= burst;
> +		if (burst < IAVF_VPMD_RX_MAX_BURST)
> +			return retval;
> +	}
> +
> +	return retval + iavf_recv_scattered_burst_vec(rx_queue,
> +						      rx_pkts + retval,
> +						      nb_pkts);
> +}
> +
> +/**
> + * vPMD receive routine that reassembles single burst of 32 scattered
> +packets
> + * for flex RxD
> + *
>   * Notice:
>   * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > VPMD_RX_MAX_BURST, only scan
> IAVF_VPMD_RX_MAX_BURST
> - *   numbers of DD bits
>   */
> -uint16_t
> -iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
> -				      struct rte_mbuf **rx_pkts,
> -				      uint16_t nb_pkts)
> +static uint16_t
> +iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
> +				       struct rte_mbuf **rx_pkts,
> +				       uint16_t nb_pkts)
>  {
>  	struct iavf_rx_queue *rxq = rx_queue;
>  	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0}; @@ -1028,6
> +1054,33 @@ iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
>  		&split_flags[i]);
>  }
> 
> +/**
> + * vPMD receive routine that reassembles scattered packets for flex RxD
> +*/ uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
> +				      struct rte_mbuf **rx_pkts,
> +				      uint16_t nb_pkts)
> +{
> +	uint16_t retval = 0;
> +
> +	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
> +		uint16_t burst;
> +
> +		burst = iavf_recv_scattered_burst_vec_flex_rxd(rx_queue,
> +						rx_pkts + retval,
> +
> 	IAVF_VPMD_RX_MAX_BURST);
> +		retval += burst;
> +		nb_pkts -= burst;
> +		if (burst < IAVF_VPMD_RX_MAX_BURST)
> +			return retval;
> +	}
> +
> +	return retval + iavf_recv_scattered_burst_vec_flex_rxd(rx_queue,
> +						      rx_pkts + retval,
> +						      nb_pkts);
> +}
> +
>  static inline void
>  vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
> {
> --
> 2.20.1
diff mbox series

Patch

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 85c5bd4af0..11acaa029e 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -379,10 +379,12 @@  flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 	rx_pkts[3]->packet_type = type_table[_mm_extract_epi16(ptype_all, 7)];
 }
 
-/* Notice:
+/**
+ * vPMD raw receive routine, only accept(nb_pkts >= IAVF_VPMD_DESCS_PER_LOOP)
+ *
+ * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
- *   numbers of DD bits
+ * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -413,9 +415,6 @@  _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
 	__m128i dd_check, eop_check;
 
-	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
-	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
-
 	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
 
@@ -627,10 +626,13 @@  _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return nb_pkts_recd;
 }
 
-/* Notice:
+/**
+ * vPMD raw receive routine for flex RxD,
+ * only accept(nb_pkts >= IAVF_VPMD_DESCS_PER_LOOP)
+ *
+ * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
- *   numbers of DD bits
+ * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
@@ -688,9 +690,6 @@  _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
 						 0x0000000200000002LL);
 
-	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
-	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
-
 	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
 
@@ -945,15 +944,15 @@  iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
-/* vPMD receive routine that reassembles scattered packets
+/**
+ * vPMD receive routine that reassembles single burst of 32 scattered packets
+ *
  * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
- *   numbers of DD bits
  */
-uint16_t
-iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
-			    uint16_t nb_pkts)
+static uint16_t
+iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			      uint16_t nb_pkts)
 {
 	struct iavf_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
@@ -986,16 +985,43 @@  iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		&split_flags[i]);
 }
 
-/* vPMD receive routine that reassembles scattered packets for flex RxD
+/**
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			     uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+		uint16_t burst;
+
+		burst = iavf_recv_scattered_burst_vec(rx_queue,
+						      rx_pkts + retval,
+						      IAVF_VPMD_RX_MAX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_MAX_BURST)
+			return retval;
+	}
+
+	return retval + iavf_recv_scattered_burst_vec(rx_queue,
+						      rx_pkts + retval,
+						      nb_pkts);
+}
+
+/**
+ * vPMD receive routine that reassembles single burst of 32 scattered packets
+ * for flex RxD
+ *
  * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
- *   numbers of DD bits
  */
-uint16_t
-iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
-				      struct rte_mbuf **rx_pkts,
-				      uint16_t nb_pkts)
+static uint16_t
+iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
+				       struct rte_mbuf **rx_pkts,
+				       uint16_t nb_pkts)
 {
 	struct iavf_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
@@ -1028,6 +1054,33 @@  iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
 		&split_flags[i]);
 }
 
+/**
+ * vPMD receive routine that reassembles scattered packets for flex RxD
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+		uint16_t burst;
+
+		burst = iavf_recv_scattered_burst_vec_flex_rxd(rx_queue,
+						rx_pkts + retval,
+						IAVF_VPMD_RX_MAX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_MAX_BURST)
+			return retval;
+	}
+
+	return retval + iavf_recv_scattered_burst_vec_flex_rxd(rx_queue,
+						      rx_pkts + retval,
+						      nb_pkts);
+}
+
 static inline void
 vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
 {