[v2,6/6] net/hns3: fix vector Rx burst can't exceed 32

Message ID 1619603593-23928-7-git-send-email-humin29@huawei.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series optimization and bugfix for hns3 PMD |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/github-robot success github build: passed
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

humin (Q) April 28, 2021, 9:53 a.m. UTC
  From: Chengwen Feng <fengchengwen@huawei.com>

Currently, driver uses the macro HNS3_DEFAULT_RX_BURST whose value is
32 to limit the vector Rx burst size, as a result, the burst size
can't exceed 32.

This patch fixes this problem by support big burst size.
Also adjust HNS3_DEFAULT_RX_BURST to 64 as it performs better than 32.

Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")
Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_rxtx.h         |  2 +-
 drivers/net/hns3/hns3_rxtx_vec.c     | 41 +++++++++++++++++++++++++++++-------
 drivers/net/hns3/hns3_rxtx_vec_sve.c | 37 ++++++++++++++++++++++++++------
 3 files changed, 65 insertions(+), 15 deletions(-)
  

Patch

diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index 1e2e994..ba24e00 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -20,7 +20,7 @@ 
 #define HNS3_DEFAULT_TX_RS_THRESH	32
 #define HNS3_TX_FAST_FREE_AHEAD		64
 
-#define HNS3_DEFAULT_RX_BURST		32
+#define HNS3_DEFAULT_RX_BURST		64
 #if (HNS3_DEFAULT_RX_BURST > 64)
 #error "PMD HNS3: HNS3_DEFAULT_RX_BURST must <= 64\n"
 #endif
diff --git a/drivers/net/hns3/hns3_rxtx_vec.c b/drivers/net/hns3/hns3_rxtx_vec.c
index dc1e1ae..66d8904 100644
--- a/drivers/net/hns3/hns3_rxtx_vec.c
+++ b/drivers/net/hns3/hns3_rxtx_vec.c
@@ -108,14 +108,13 @@  hns3_recv_pkts_vec(void *__restrict rx_queue,
 {
 	struct hns3_rx_queue *rxq = rx_queue;
 	struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
-	uint64_t bd_err_mask;  /* bit mask indicate whick pkts is error */
+	uint64_t pkt_err_mask;  /* bit mask indicate whick pkts is error */
 	uint16_t nb_rx;
 
-	nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
-	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
-
 	rte_prefetch_non_temporal(rxdp);
 
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
+
 	if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
 		hns3_rxq_rearm_mbuf(rxq);
 
@@ -128,10 +127,36 @@  hns3_recv_pkts_vec(void *__restrict rx_queue,
 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf);
 	rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf);
 
-	bd_err_mask = 0;
-	nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, &bd_err_mask);
-	if (unlikely(bd_err_mask))
-		nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask);
+	if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) {
+		pkt_err_mask = 0;
+		nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts,
+					    &pkt_err_mask);
+		if (unlikely(pkt_err_mask > 0))
+			nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx,
+							pkt_err_mask);
+		return nb_rx;
+	}
+
+	nb_rx = 0;
+	while (nb_pkts > 0) {
+		uint16_t ret, n;
+
+		n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
+		pkt_err_mask = 0;
+		ret = hns3_recv_burst_vec(rxq, &rx_pkts[nb_rx], n,
+					  &pkt_err_mask);
+		nb_pkts -= ret;
+		if (unlikely(pkt_err_mask > 0))
+			nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret,
+							 pkt_err_mask);
+		else
+			nb_rx += ret;
+		if (ret < n)
+			break;
+
+		if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
+			hns3_rxq_rearm_mbuf(rxq);
+	}
 
 	return nb_rx;
 }
diff --git a/drivers/net/hns3/hns3_rxtx_vec_sve.c b/drivers/net/hns3/hns3_rxtx_vec_sve.c
index ef6c875..44e5293 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_sve.c
+++ b/drivers/net/hns3/hns3_rxtx_vec_sve.c
@@ -292,12 +292,11 @@  hns3_recv_pkts_vec_sve(void *__restrict rx_queue,
 {
 	struct hns3_rx_queue *rxq = rx_queue;
 	struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
-	uint64_t bd_err_mask;  /* bit mask indicate whick pkts is error */
+	uint64_t pkt_err_mask;  /* bit mask indicate whick pkts is error */
 	uint16_t nb_rx;
 
 	rte_prefetch_non_temporal(rxdp);
 
-	nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_SVE_DEFAULT_DESCS_PER_LOOP);
 
 	if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
@@ -309,10 +308,36 @@  hns3_recv_pkts_vec_sve(void *__restrict rx_queue,
 
 	hns3_rx_prefetch_mbuf_sve(&rxq->sw_ring[rxq->next_to_use]);
 
-	bd_err_mask = 0;
-	nb_rx = hns3_recv_burst_vec_sve(rxq, rx_pkts, nb_pkts, &bd_err_mask);
-	if (unlikely(bd_err_mask))
-		nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask);
+	if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) {
+		pkt_err_mask = 0;
+		nb_rx = hns3_recv_burst_vec_sve(rxq, rx_pkts, nb_pkts,
+						&pkt_err_mask);
+		if (unlikely(pkt_err_mask > 0))
+			nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx,
+							pkt_err_mask);
+		return nb_rx;
+	}
+
+	nb_rx = 0;
+	while (nb_pkts > 0) {
+		uint16_t ret, n;
+
+		n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
+		pkt_err_mask = 0;
+		ret = hns3_recv_burst_vec_sve(rxq, &rx_pkts[nb_rx], n,
+					      &pkt_err_mask);
+		nb_pkts -= ret;
+		if (unlikely(pkt_err_mask > 0))
+			nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret,
+							 pkt_err_mask);
+		else
+			nb_rx += ret;
+		if (ret < n)
+			break;
+
+		if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
+			hns3_rxq_rearm_mbuf_sve(rxq);
+	}
 
 	return nb_rx;
 }