[v2,3/5] net/ice: maximize vector rx burst for ice

Message ID 20200827101008.76906-4-jia.guo@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series maximize vector rx burst for PMDs |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Guo, Jia Aug. 27, 2020, 10:10 a.m. UTC
  The limitation of burst size in vector rx was removed, since it should
retrieve as much received packets as possible. And also the scattered
receive path should use a wrapper function to achieve the goal of
burst maximizing.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
 drivers/net/ice/ice_rxtx_vec_avx2.c | 11 +------
 drivers/net/ice/ice_rxtx_vec_sse.c  | 49 ++++++++++++++++++++---------
 2 files changed, 35 insertions(+), 25 deletions(-)
  

Comments

Qi Zhang Aug. 31, 2020, 4:41 a.m. UTC | #1
> -----Original Message-----
> From: Guo, Jia <jia.guo@intel.com>
> Sent: Thursday, August 27, 2020 6:10 PM
> To: Yang, Qiming <qiming.yang@intel.com>; Xing, Beilei
> <beilei.xing@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Cc: Richardson, Bruce <bruce.richardson@intel.com>; dev@dpdk.org; Guo, Jia
> <jia.guo@intel.com>; Zhang, Helin <helin.zhang@intel.com>;
> mb@smartsharesystems.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> barbette@kth.se
> Subject: [PATCH v2 3/5] net/ice: maximize vector rx burst for ice
> 
> The limitation of burst size in vector rx was removed, since it should retrieve as
> much received packets as possible. And also the scattered receive path should
> use a wrapper function to achieve the goal of burst maximizing.
> 
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> ---
>  drivers/net/ice/ice_rxtx_vec_avx2.c | 11 +------
> drivers/net/ice/ice_rxtx_vec_sse.c  | 49 ++++++++++++++++++++---------
>  2 files changed, 35 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c
> b/drivers/net/ice/ice_rxtx_vec_avx2.c
> index be50677c2..b7e624fda 100644
> --- a/drivers/net/ice/ice_rxtx_vec_avx2.c
> +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
> @@ -603,10 +603,6 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue
> *rxq, struct rte_mbuf **rx_pkts,
>  	return received;
>  }
> 
> -/**
> - * Notice:
> - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
> - */

Actually this is question for all the patches in the patchset.
Why we remove above comment? I think the patch should only target for the case when nb_pkgs > ICE_VPMD_RX_BURST?
For small packet number case, nothing changed, right?


>  uint16_t
>  ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
>  		       uint16_t nb_pkts)
> @@ -615,9 +611,7 @@ ice_recv_pkts_vec_avx2(void *rx_queue, struct
> rte_mbuf **rx_pkts,  }
> 
>  /**
> - * vPMD receive routine that reassembles single burst of 32 scattered packets
> - * Notice:
> - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
> + * vPMD receive routine that reassembles scattered packets
>   */
>  static uint16_t
>  ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf
> **rx_pkts, @@ -657,9 +651,6 @@ ice_recv_scattered_burst_vec_avx2(void
> *rx_queue, struct rte_mbuf **rx_pkts,
> 
>  /**
>   * vPMD receive routine that reassembles scattered packets.
> - * Main receive routine that can handle arbitrary burst sizes
> - * Notice:
> - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
>   */
>  uint16_t
>  ice_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
> diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c
> b/drivers/net/ice/ice_rxtx_vec_sse.c
> index 382ef31f3..25ae368cc 100644
> --- a/drivers/net/ice/ice_rxtx_vec_sse.c
> +++ b/drivers/net/ice/ice_rxtx_vec_sse.c
> @@ -205,10 +205,11 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct
> rte_mbuf **rx_pkts,  }
> 
>  /**
> + * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP)
> + *
>   * Notice:
>   * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
> - *   numbers of DD bits
> + * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
>   */
>  static inline uint16_t
>  _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
> @@ -264,9 +265,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
>  	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
>  						 0x0000000200000002LL);
> 
> -	/* nb_pkts shall be less equal than ICE_MAX_RX_BURST */
> -	nb_pkts = RTE_MIN(nb_pkts, ICE_MAX_RX_BURST);
> -
>  	/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */
>  	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP);
> 
> @@ -444,8 +442,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
>  /**
>   * Notice:
>   * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
> - *   numbers of DD bits
>   */
>  uint16_t
>  ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -454,15
> +450,12 @@ ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>  	return _ice_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);  }
> 
> -/* vPMD receive routine that reassembles scattered packets
> - * Notice:
> - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
> - *   numbers of DD bits
> +/**
> + * vPMD receive routine that reassembles scattered packets
>   */
> -uint16_t
> -ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> -			    uint16_t nb_pkts)
> +static uint16_t
> +ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> +			     uint16_t nb_pkts)
>  {
>  	struct ice_rx_queue *rxq = rx_queue;
>  	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0}; @@ -496,6 +489,32 @@
> ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>  					     &split_flags[i]);
>  }
> 
> +/**
> + * vPMD receive routine that reassembles scattered packets.
> + */
> +uint16_t
> +ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> +			    uint16_t nb_pkts)
> +{
> +	uint16_t retval = 0;
> +
> +	while (nb_pkts > ICE_VPMD_RX_BURST) {
> +		uint16_t burst;
> +
> +		burst = ice_recv_scattered_burst_vec(rx_queue,
> +						     rx_pkts + retval,
> +						     ICE_VPMD_RX_BURST);
> +		retval += burst;
> +		nb_pkts -= burst;
> +		if (burst < ICE_VPMD_RX_BURST)
> +			return retval;
> +	}
> +
> +	return retval + ice_recv_scattered_burst_vec(rx_queue,
> +						     rx_pkts + retval,
> +						     nb_pkts);
> +}
> +
>  static inline void
>  ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt,
>  	 uint64_t flags)
> --
> 2.20.1
  
Guo, Jia Aug. 31, 2020, 5:24 a.m. UTC | #2
hi, qi

On 8/31/2020 12:41 PM, Zhang, Qi Z wrote:
>
>> -----Original Message-----
>> From: Guo, Jia <jia.guo@intel.com>
>> Sent: Thursday, August 27, 2020 6:10 PM
>> To: Yang, Qiming <qiming.yang@intel.com>; Xing, Beilei
>> <beilei.xing@intel.com>; Zhao1, Wei <wei.zhao1@intel.com>; Zhang, Qi Z
>> <qi.z.zhang@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
>> Cc: Richardson, Bruce <bruce.richardson@intel.com>; dev@dpdk.org; Guo, Jia
>> <jia.guo@intel.com>; Zhang, Helin <helin.zhang@intel.com>;
>> mb@smartsharesystems.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
>> barbette@kth.se
>> Subject: [PATCH v2 3/5] net/ice: maximize vector rx burst for ice
>>
>> The limitation of burst size in vector rx was removed, since it should retrieve as
>> much received packets as possible. And also the scattered receive path should
>> use a wrapper function to achieve the goal of burst maximizing.
>>
>> Signed-off-by: Jeff Guo <jia.guo@intel.com>
>> ---
>>   drivers/net/ice/ice_rxtx_vec_avx2.c | 11 +------
>> drivers/net/ice/ice_rxtx_vec_sse.c  | 49 ++++++++++++++++++++---------
>>   2 files changed, 35 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c
>> b/drivers/net/ice/ice_rxtx_vec_avx2.c
>> index be50677c2..b7e624fda 100644
>> --- a/drivers/net/ice/ice_rxtx_vec_avx2.c
>> +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
>> @@ -603,10 +603,6 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue
>> *rxq, struct rte_mbuf **rx_pkts,
>>   	return received;
>>   }
>>
>> -/**
>> - * Notice:
>> - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
>> - */
> Actually this is question for all the patches in the patchset.
> Why we remove above comment? I think the patch should only target for the case when nb_pkgs > ICE_VPMD_RX_BURST?
> For small packet number case, nothing changed, right?


This is just for doc clean, for the reason that there are no need to 
duplicate the doc on these layer-by-layer helper functions which are not 
reflect on code.

And yes, there is not changed for small packet number case.


>
>>   uint16_t
>>   ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
>>   		       uint16_t nb_pkts)
>> @@ -615,9 +611,7 @@ ice_recv_pkts_vec_avx2(void *rx_queue, struct
>> rte_mbuf **rx_pkts,  }
>>
>>   /**
>> - * vPMD receive routine that reassembles single burst of 32 scattered packets
>> - * Notice:
>> - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
>> + * vPMD receive routine that reassembles scattered packets
>>    */
>>   static uint16_t
>>   ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf
>> **rx_pkts, @@ -657,9 +651,6 @@ ice_recv_scattered_burst_vec_avx2(void
>> *rx_queue, struct rte_mbuf **rx_pkts,
>>
>>   /**
>>    * vPMD receive routine that reassembles scattered packets.
>> - * Main receive routine that can handle arbitrary burst sizes
>> - * Notice:
>> - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
>>    */
>>   uint16_t
>>   ice_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
>> diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c
>> b/drivers/net/ice/ice_rxtx_vec_sse.c
>> index 382ef31f3..25ae368cc 100644
>> --- a/drivers/net/ice/ice_rxtx_vec_sse.c
>> +++ b/drivers/net/ice/ice_rxtx_vec_sse.c
>> @@ -205,10 +205,11 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct
>> rte_mbuf **rx_pkts,  }
>>
>>   /**
>> + * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP)
>> + *
>>    * Notice:
>>    * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
>> - * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
>> - *   numbers of DD bits
>> + * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
>>    */
>>   static inline uint16_t
>>   _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>> @@ -264,9 +265,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq,
>> struct rte_mbuf **rx_pkts,
>>   	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
>>   						 0x0000000200000002LL);
>>
>> -	/* nb_pkts shall be less equal than ICE_MAX_RX_BURST */
>> -	nb_pkts = RTE_MIN(nb_pkts, ICE_MAX_RX_BURST);
>> -
>>   	/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */
>>   	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP);
>>
>> @@ -444,8 +442,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq,
>> struct rte_mbuf **rx_pkts,
>>   /**
>>    * Notice:
>>    * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
>> - * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
>> - *   numbers of DD bits
>>    */
>>   uint16_t
>>   ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -454,15
>> +450,12 @@ ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>>   	return _ice_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);  }
>>
>> -/* vPMD receive routine that reassembles scattered packets
>> - * Notice:
>> - * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
>> - * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
>> - *   numbers of DD bits
>> +/**
>> + * vPMD receive routine that reassembles scattered packets
>>    */
>> -uint16_t
>> -ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>> -			    uint16_t nb_pkts)
>> +static uint16_t
>> +ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>> +			     uint16_t nb_pkts)
>>   {
>>   	struct ice_rx_queue *rxq = rx_queue;
>>   	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0}; @@ -496,6 +489,32 @@
>> ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>>   					     &split_flags[i]);
>>   }
>>
>> +/**
>> + * vPMD receive routine that reassembles scattered packets.
>> + */
>> +uint16_t
>> +ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>> +			    uint16_t nb_pkts)
>> +{
>> +	uint16_t retval = 0;
>> +
>> +	while (nb_pkts > ICE_VPMD_RX_BURST) {
>> +		uint16_t burst;
>> +
>> +		burst = ice_recv_scattered_burst_vec(rx_queue,
>> +						     rx_pkts + retval,
>> +						     ICE_VPMD_RX_BURST);
>> +		retval += burst;
>> +		nb_pkts -= burst;
>> +		if (burst < ICE_VPMD_RX_BURST)
>> +			return retval;
>> +	}
>> +
>> +	return retval + ice_recv_scattered_burst_vec(rx_queue,
>> +						     rx_pkts + retval,
>> +						     nb_pkts);
>> +}
>> +
>>   static inline void
>>   ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt,
>>   	 uint64_t flags)
>> --
>> 2.20.1
  

Patch

diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index be50677c2..b7e624fda 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -603,10 +603,6 @@  _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return received;
 }
 
-/**
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- */
 uint16_t
 ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts)
@@ -615,9 +611,7 @@  ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 }
 
 /**
- * vPMD receive routine that reassembles single burst of 32 scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
+ * vPMD receive routine that reassembles scattered packets
  */
 static uint16_t
 ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -657,9 +651,6 @@  ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 /**
  * vPMD receive routine that reassembles scattered packets.
- * Main receive routine that can handle arbitrary burst sizes
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
  */
 uint16_t
 ice_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
index 382ef31f3..25ae368cc 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -205,10 +205,11 @@  ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 }
 
 /**
+ * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP)
+ *
  * Notice:
  * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
+ * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
  */
 static inline uint16_t
 _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -264,9 +265,6 @@  _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
 						 0x0000000200000002LL);
 
-	/* nb_pkts shall be less equal than ICE_MAX_RX_BURST */
-	nb_pkts = RTE_MIN(nb_pkts, ICE_MAX_RX_BURST);
-
 	/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP);
 
@@ -444,8 +442,6 @@  _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 /**
  * Notice:
  * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
  */
 uint16_t
 ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -454,15 +450,12 @@  ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _ice_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
-/* vPMD receive routine that reassembles scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- *   numbers of DD bits
+/**
+ * vPMD receive routine that reassembles scattered packets
  */
-uint16_t
-ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
-			    uint16_t nb_pkts)
+static uint16_t
+ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			     uint16_t nb_pkts)
 {
 	struct ice_rx_queue *rxq = rx_queue;
 	uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
@@ -496,6 +489,32 @@  ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 					     &split_flags[i]);
 }
 
+/**
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > ICE_VPMD_RX_BURST) {
+		uint16_t burst;
+
+		burst = ice_recv_scattered_burst_vec(rx_queue,
+						     rx_pkts + retval,
+						     ICE_VPMD_RX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < ICE_VPMD_RX_BURST)
+			return retval;
+	}
+
+	return retval + ice_recv_scattered_burst_vec(rx_queue,
+						     rx_pkts + retval,
+						     nb_pkts);
+}
+
 static inline void
 ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt,
 	 uint64_t flags)