[RESEND,13/13] net/hns3: revert optimize Tx performance

Message ID 20220905085939.22236-14-liudongdong3@huawei.com (mailing list archive)
State Accepted, archived
Delegated to: Andrew Rybchenko
Headers
Series some bugfixes and clean code for hns3 |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/github-robot: build success github build: passed
ci/intel-Testing success Testing PASS

Commit Message

Dongdong Liu Sept. 5, 2022, 8:59 a.m. UTC
  From: Chengwen Feng <fengchengwen@huawei.com>

The Tx performance deteriorates in the case of larger packets size and
larger burst. It may take a long time to optimize in these scenarios,
so this commit reverts
commit 0b77e8f3d364 ("net/hns3: optimize Tx performance")

Fixes: 0b77e8f3d364 ("net/hns3: optimize Tx performance")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
---
 drivers/net/hns3/hns3_rxtx.c | 115 ++++++++++++++++++-----------------
 1 file changed, 60 insertions(+), 55 deletions(-)
  

Patch

diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c
index 1b1dc6b8e4..ca61b441df 100644
--- a/drivers/net/hns3/hns3_rxtx.c
+++ b/drivers/net/hns3/hns3_rxtx.c
@@ -3072,51 +3072,40 @@  hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
 	return 0;
 }
 
-static int
+static void
 hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq)
 {
 	uint16_t tx_next_clean = txq->next_to_clean;
-	uint16_t tx_next_use = txq->next_to_use;
-	struct hns3_entry *tx_entry = &txq->sw_ring[tx_next_clean];
+	uint16_t tx_next_use   = txq->next_to_use;
+	uint16_t tx_bd_ready   = txq->tx_bd_ready;
+	uint16_t tx_bd_max     = txq->nb_tx_desc;
+	struct hns3_entry *tx_bak_pkt = &txq->sw_ring[tx_next_clean];
 	struct hns3_desc *desc = &txq->tx_ring[tx_next_clean];
-	uint16_t i;
-
-	if (tx_next_use >= tx_next_clean &&
-	    tx_next_use < tx_next_clean + txq->tx_rs_thresh)
-		return -1;
+	struct rte_mbuf *mbuf;
 
-	/*
-	 * All mbufs can be released only when the VLD bits of all
-	 * descriptors in a batch are cleared.
-	 */
-	for (i = 0; i < txq->tx_rs_thresh; i++) {
-		if (desc[i].tx.tp_fe_sc_vld_ra_ri &
-			rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B)))
-			return -1;
-	}
+	while ((!(desc->tx.tp_fe_sc_vld_ra_ri &
+		rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))) &&
+		tx_next_use != tx_next_clean) {
+		mbuf = tx_bak_pkt->mbuf;
+		if (mbuf) {
+			rte_pktmbuf_free_seg(mbuf);
+			tx_bak_pkt->mbuf = NULL;
+		}
 
-	for (i = 0; i < txq->tx_rs_thresh; i++) {
-		rte_pktmbuf_free_seg(tx_entry[i].mbuf);
-		tx_entry[i].mbuf = NULL;
+		desc++;
+		tx_bak_pkt++;
+		tx_next_clean++;
+		tx_bd_ready++;
+
+		if (tx_next_clean >= tx_bd_max) {
+			tx_next_clean = 0;
+			desc = txq->tx_ring;
+			tx_bak_pkt = txq->sw_ring;
+		}
 	}
 
-	/* Update numbers of available descriptor due to buffer freed */
-	txq->tx_bd_ready += txq->tx_rs_thresh;
-	txq->next_to_clean += txq->tx_rs_thresh;
-	if (txq->next_to_clean >= txq->nb_tx_desc)
-		txq->next_to_clean = 0;
-
-	return 0;
-}
-
-static inline int
-hns3_tx_free_required_buffer(struct hns3_tx_queue *txq, uint16_t required_bds)
-{
-	while (required_bds > txq->tx_bd_ready) {
-		if (hns3_tx_free_useless_buffer(txq) != 0)
-			return -1;
-	}
-	return 0;
+	txq->next_to_clean = tx_next_clean;
+	txq->tx_bd_ready   = tx_bd_ready;
 }
 
 int
@@ -4159,8 +4148,7 @@  hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint16_t nb_tx;
 	uint16_t i;
 
-	if (txq->tx_bd_ready < txq->tx_free_thresh)
-		(void)hns3_tx_free_useless_buffer(txq);
+	hns3_tx_free_useless_buffer(txq);
 
 	tx_next_use   = txq->next_to_use;
 	tx_bd_max     = txq->nb_tx_desc;
@@ -4175,14 +4163,10 @@  hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		nb_buf = tx_pkt->nb_segs;
 
 		if (nb_buf > txq->tx_bd_ready) {
-			/* Try to release the required MBUF, but avoid releasing
-			 * all MBUFs, otherwise, the MBUFs will be released for
-			 * a long time and may cause jitter.
-			 */
-			if (hns3_tx_free_required_buffer(txq, nb_buf) != 0) {
-				txq->dfx_stats.queue_full_cnt++;
-				goto end_of_tx;
-			}
+			txq->dfx_stats.queue_full_cnt++;
+			if (nb_tx == 0)
+				return 0;
+			goto end_of_tx;
 		}
 
 		/*
@@ -4598,22 +4582,43 @@  hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 static int
 hns3_tx_done_cleanup_full(struct hns3_tx_queue *txq, uint32_t free_cnt)
 {
-	uint16_t round_cnt;
+	uint16_t next_to_clean = txq->next_to_clean;
+	uint16_t next_to_use   = txq->next_to_use;
+	uint16_t tx_bd_ready   = txq->tx_bd_ready;
+	struct hns3_entry *tx_pkt = &txq->sw_ring[next_to_clean];
+	struct hns3_desc *desc = &txq->tx_ring[next_to_clean];
 	uint32_t idx;
 
 	if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
 		free_cnt = txq->nb_tx_desc;
 
-	if (txq->tx_rs_thresh == 0)
-		return 0;
-
-	round_cnt = rounddown(free_cnt, txq->tx_rs_thresh);
-	for (idx = 0; idx < round_cnt; idx += txq->tx_rs_thresh) {
-		if (hns3_tx_free_useless_buffer(txq) != 0)
+	for (idx = 0; idx < free_cnt; idx++) {
+		if (next_to_clean == next_to_use)
+			break;
+		if (desc->tx.tp_fe_sc_vld_ra_ri &
+		    rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))
 			break;
+		if (tx_pkt->mbuf != NULL) {
+			rte_pktmbuf_free_seg(tx_pkt->mbuf);
+			tx_pkt->mbuf = NULL;
+		}
+		next_to_clean++;
+		tx_bd_ready++;
+		tx_pkt++;
+		desc++;
+		if (next_to_clean == txq->nb_tx_desc) {
+			tx_pkt = txq->sw_ring;
+			desc = txq->tx_ring;
+			next_to_clean = 0;
+		}
+	}
+
+	if (idx > 0) {
+		txq->next_to_clean = next_to_clean;
+		txq->tx_bd_ready = tx_bd_ready;
 	}
 
-	return idx;
+	return (int)idx;
 }
 
 int