[v5,5/5] net/iavf: fix vector rx burst for iavf
Checks
Commit Message
The limitation of burst size in vector rx was removed, since it should
retrieve as much received packets as possible. And also the scattered
receive path should use a wrapper function to achieve the goal of
burst maximizing.
Bugzilla ID: 516
Fixes: 319c421f3890 ("net/avf: enable SSE Rx Tx")
Fixes: 1162f5a0ef31 ("net/iavf: support flexible Rx descriptor in SSE path")
Fixes: 5b6e8859081d ("net/iavf: support flexible Rx descriptor in AVX path")
Signed-off-by: Jeff Guo <jia.guo@intel.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
drivers/net/iavf/iavf_rxtx_vec_sse.c | 103 ++++++++++++++++++++-------
1 file changed, 78 insertions(+), 25 deletions(-)
Comments
Tested-by: Ling, Wei <weix.ling@intel.com>
Regards,
Ling Wei
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Jeff Guo
> Sent: Friday, October 16, 2020 05:45 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Wang, Haiyue
> <haiyue.wang@intel.com>; Yang, Qiming <qiming.yang@intel.com>
> Cc: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>;
> mb@smartsharesystems.com; stephen@networkplumber.org;
> barbette@kth.se; Feifei.wang2@arm.com; Richardson, Bruce
> <bruce.richardson@intel.com>; Guo, Jia <jia.guo@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>
> Subject: [dpdk-dev] [PATCH v5 5/5] net/iavf: fix vector rx burst for iavf
>
> The limitation of burst size in vector rx was removed, since it should retrieve
> as much received packets as possible. And also the scattered receive path
> should use a wrapper function to achieve the goal of burst maximizing.
>
> Bugzilla ID: 516
> Fixes: 319c421f3890 ("net/avf: enable SSE Rx Tx")
> Fixes: 1162f5a0ef31 ("net/iavf: support flexible Rx descriptor in SSE path")
> Fixes: 5b6e8859081d ("net/iavf: support flexible Rx descriptor in AVX path")
>
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> drivers/net/iavf/iavf_rxtx_vec_sse.c | 103 ++++++++++++++++++++-------
> 1 file changed, 78 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c
> b/drivers/net/iavf/iavf_rxtx_vec_sse.c
> index 85c5bd4af0..11acaa029e 100644
> --- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
> +++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
> @@ -379,10 +379,12 @@ flex_desc_to_ptype_v(__m128i descs[4], struct
> rte_mbuf **rx_pkts,
> rx_pkts[3]->packet_type =
> type_table[_mm_extract_epi16(ptype_all, 7)]; }
>
> -/* Notice:
> +/**
> + * vPMD raw receive routine, only accept(nb_pkts >=
> +IAVF_VPMD_DESCS_PER_LOOP)
> + *
> + * Notice:
> * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan
> IAVF_VPMD_RX_MAX_BURST
> - * numbers of DD bits
> + * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
> */
> static inline uint16_t
> _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
> @@ -413,9 +415,6 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
> offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
> __m128i dd_check, eop_check;
>
> - /* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
> - nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
> -
> /* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP
> */
> nb_pkts = RTE_ALIGN_FLOOR(nb_pkts,
> IAVF_VPMD_DESCS_PER_LOOP);
>
> @@ -627,10 +626,13 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq,
> struct rte_mbuf **rx_pkts,
> return nb_pkts_recd;
> }
>
> -/* Notice:
> +/**
> + * vPMD raw receive routine for flex RxD,
> + * only accept(nb_pkts >= IAVF_VPMD_DESCS_PER_LOOP)
> + *
> + * Notice:
> * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan
> IAVF_VPMD_RX_MAX_BURST
> - * numbers of DD bits
> + * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
> */
> static inline uint16_t
> _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, @@ -688,9 +690,6
> @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
> const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
> 0x0000000200000002LL);
>
> - /* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
> - nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
> -
> /* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP
> */
> nb_pkts = RTE_ALIGN_FLOOR(nb_pkts,
> IAVF_VPMD_DESCS_PER_LOOP);
>
> @@ -945,15 +944,15 @@ iavf_recv_pkts_vec_flex_rxd(void *rx_queue,
> struct rte_mbuf **rx_pkts,
> return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts,
> NULL); }
>
> -/* vPMD receive routine that reassembles scattered packets
> +/**
> + * vPMD receive routine that reassembles single burst of 32 scattered
> +packets
> + *
> * Notice:
> * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > VPMD_RX_MAX_BURST, only scan
> IAVF_VPMD_RX_MAX_BURST
> - * numbers of DD bits
> */
> -uint16_t
> -iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> - uint16_t nb_pkts)
> +static uint16_t
> +iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> {
> struct iavf_rx_queue *rxq = rx_queue;
> uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0}; @@ -986,16
> +985,43 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf
> **rx_pkts,
> &split_flags[i]);
> }
>
> -/* vPMD receive routine that reassembles scattered packets for flex RxD
> +/**
> + * vPMD receive routine that reassembles scattered packets.
> + */
> +uint16_t
> +iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> +{
> + uint16_t retval = 0;
> +
> + while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
> + uint16_t burst;
> +
> + burst = iavf_recv_scattered_burst_vec(rx_queue,
> + rx_pkts + retval,
> +
> IAVF_VPMD_RX_MAX_BURST);
> + retval += burst;
> + nb_pkts -= burst;
> + if (burst < IAVF_VPMD_RX_MAX_BURST)
> + return retval;
> + }
> +
> + return retval + iavf_recv_scattered_burst_vec(rx_queue,
> + rx_pkts + retval,
> + nb_pkts);
> +}
> +
> +/**
> + * vPMD receive routine that reassembles single burst of 32 scattered
> +packets
> + * for flex RxD
> + *
> * Notice:
> * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> - * - nb_pkts > VPMD_RX_MAX_BURST, only scan
> IAVF_VPMD_RX_MAX_BURST
> - * numbers of DD bits
> */
> -uint16_t
> -iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
> - struct rte_mbuf **rx_pkts,
> - uint16_t nb_pkts)
> +static uint16_t
> +iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
> + struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> {
> struct iavf_rx_queue *rxq = rx_queue;
> uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0}; @@ -1028,6
> +1054,33 @@ iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
> &split_flags[i]);
> }
>
> +/**
> + * vPMD receive routine that reassembles scattered packets for flex RxD
> +*/ uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
> + struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> +{
> + uint16_t retval = 0;
> +
> + while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
> + uint16_t burst;
> +
> + burst = iavf_recv_scattered_burst_vec_flex_rxd(rx_queue,
> + rx_pkts + retval,
> +
> IAVF_VPMD_RX_MAX_BURST);
> + retval += burst;
> + nb_pkts -= burst;
> + if (burst < IAVF_VPMD_RX_MAX_BURST)
> + return retval;
> + }
> +
> + return retval + iavf_recv_scattered_burst_vec_flex_rxd(rx_queue,
> + rx_pkts + retval,
> + nb_pkts);
> +}
> +
> static inline void
> vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
> {
> --
> 2.20.1
@@ -379,10 +379,12 @@ flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
rx_pkts[3]->packet_type = type_table[_mm_extract_epi16(ptype_all, 7)];
}
-/* Notice:
+/**
+ * vPMD raw receive routine, only accept(nb_pkts >= IAVF_VPMD_DESCS_PER_LOOP)
+ *
+ * Notice:
* - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
- * numbers of DD bits
+ * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
*/
static inline uint16_t
_recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -413,9 +415,6 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
__m128i dd_check, eop_check;
- /* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
- nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
-
/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
@@ -627,10 +626,13 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
return nb_pkts_recd;
}
-/* Notice:
+/**
+ * vPMD raw receive routine for flex RxD,
+ * only accept(nb_pkts >= IAVF_VPMD_DESCS_PER_LOOP)
+ *
+ * Notice:
* - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
- * numbers of DD bits
+ * - floor align nb_pkts to a IAVF_VPMD_DESCS_PER_LOOP power-of-two
*/
static inline uint16_t
_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
@@ -688,9 +690,6 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
0x0000000200000002LL);
- /* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
- nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
-
/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
@@ -945,15 +944,15 @@ iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts, NULL);
}
-/* vPMD receive routine that reassembles scattered packets
+/**
+ * vPMD receive routine that reassembles single burst of 32 scattered packets
+ *
* Notice:
* - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
- * numbers of DD bits
*/
-uint16_t
-iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t nb_pkts)
+static uint16_t
+iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
{
struct iavf_rx_queue *rxq = rx_queue;
uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
@@ -986,16 +985,43 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
&split_flags[i]);
}
-/* vPMD receive routine that reassembles scattered packets for flex RxD
+/**
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ uint16_t retval = 0;
+
+ while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+ uint16_t burst;
+
+ burst = iavf_recv_scattered_burst_vec(rx_queue,
+ rx_pkts + retval,
+ IAVF_VPMD_RX_MAX_BURST);
+ retval += burst;
+ nb_pkts -= burst;
+ if (burst < IAVF_VPMD_RX_MAX_BURST)
+ return retval;
+ }
+
+ return retval + iavf_recv_scattered_burst_vec(rx_queue,
+ rx_pkts + retval,
+ nb_pkts);
+}
+
+/**
+ * vPMD receive routine that reassembles single burst of 32 scattered packets
+ * for flex RxD
+ *
* Notice:
* - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
- * numbers of DD bits
*/
-uint16_t
-iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
- struct rte_mbuf **rx_pkts,
- uint16_t nb_pkts)
+static uint16_t
+iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
{
struct iavf_rx_queue *rxq = rx_queue;
uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
@@ -1028,6 +1054,33 @@ iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
&split_flags[i]);
}
+/**
+ * vPMD receive routine that reassembles scattered packets for flex RxD
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ uint16_t retval = 0;
+
+ while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+ uint16_t burst;
+
+ burst = iavf_recv_scattered_burst_vec_flex_rxd(rx_queue,
+ rx_pkts + retval,
+ IAVF_VPMD_RX_MAX_BURST);
+ retval += burst;
+ nb_pkts -= burst;
+ if (burst < IAVF_VPMD_RX_MAX_BURST)
+ return retval;
+ }
+
+ return retval + iavf_recv_scattered_burst_vec_flex_rxd(rx_queue,
+ rx_pkts + retval,
+ nb_pkts);
+}
+
static inline void
vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
{