From patchwork Mon Sep 5 08:59:39 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dongdong Liu X-Patchwork-Id: 115882 X-Patchwork-Delegate: andrew.rybchenko@oktetlabs.ru Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id E37E3A054A; Mon, 5 Sep 2022 11:02:32 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 7D66A42BB7; Mon, 5 Sep 2022 11:01:19 +0200 (CEST) Received: from szxga03-in.huawei.com (szxga03-in.huawei.com [45.249.212.189]) by mails.dpdk.org (Postfix) with ESMTP id E525F42B74; Mon, 5 Sep 2022 11:01:16 +0200 (CEST) Received: from kwepemi500017.china.huawei.com (unknown [172.30.72.57]) by szxga03-in.huawei.com (SkyGuard) with ESMTP id 4MLjBL6DMFzrSFy; Mon, 5 Sep 2022 16:59:22 +0800 (CST) Received: from localhost.localdomain (10.28.79.22) by kwepemi500017.china.huawei.com (7.221.188.110) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.24; Mon, 5 Sep 2022 17:01:14 +0800 From: Dongdong Liu To: , , , , , , CC: , , , Subject: [PATCH RESEND 13/13] net/hns3: revert optimize Tx performance Date: Mon, 5 Sep 2022 16:59:39 +0800 Message-ID: <20220905085939.22236-14-liudongdong3@huawei.com> X-Mailer: git-send-email 2.22.0 In-Reply-To: <20220905085939.22236-1-liudongdong3@huawei.com> References: <20220905085939.22236-1-liudongdong3@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.28.79.22] X-ClientProxiedBy: dggems701-chm.china.huawei.com (10.3.19.178) To kwepemi500017.china.huawei.com (7.221.188.110) X-CFilter-Loop: Reflected X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Chengwen Feng The Tx performance deteriorates in the case of larger packets size and larger burst. It may take a long time to optimize in these scenarios, so this commit reverts commit 0b77e8f3d364 ("net/hns3: optimize Tx performance") Fixes: 0b77e8f3d364 ("net/hns3: optimize Tx performance") Cc: stable@dpdk.org Signed-off-by: Chengwen Feng Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_rxtx.c | 115 ++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index 1b1dc6b8e4..ca61b441df 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -3072,51 +3072,40 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, return 0; } -static int +static void hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq) { uint16_t tx_next_clean = txq->next_to_clean; - uint16_t tx_next_use = txq->next_to_use; - struct hns3_entry *tx_entry = &txq->sw_ring[tx_next_clean]; + uint16_t tx_next_use = txq->next_to_use; + uint16_t tx_bd_ready = txq->tx_bd_ready; + uint16_t tx_bd_max = txq->nb_tx_desc; + struct hns3_entry *tx_bak_pkt = &txq->sw_ring[tx_next_clean]; struct hns3_desc *desc = &txq->tx_ring[tx_next_clean]; - uint16_t i; - - if (tx_next_use >= tx_next_clean && - tx_next_use < tx_next_clean + txq->tx_rs_thresh) - return -1; + struct rte_mbuf *mbuf; - /* - * All mbufs can be released only when the VLD bits of all - * descriptors in a batch are cleared. - */ - for (i = 0; i < txq->tx_rs_thresh; i++) { - if (desc[i].tx.tp_fe_sc_vld_ra_ri & - rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B))) - return -1; - } + while ((!(desc->tx.tp_fe_sc_vld_ra_ri & + rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))) && + tx_next_use != tx_next_clean) { + mbuf = tx_bak_pkt->mbuf; + if (mbuf) { + rte_pktmbuf_free_seg(mbuf); + tx_bak_pkt->mbuf = NULL; + } - for (i = 0; i < txq->tx_rs_thresh; i++) { - rte_pktmbuf_free_seg(tx_entry[i].mbuf); - tx_entry[i].mbuf = NULL; + desc++; + tx_bak_pkt++; + tx_next_clean++; + tx_bd_ready++; + + if (tx_next_clean >= tx_bd_max) { + tx_next_clean = 0; + desc = txq->tx_ring; + tx_bak_pkt = txq->sw_ring; + } } - /* Update numbers of available descriptor due to buffer freed */ - txq->tx_bd_ready += txq->tx_rs_thresh; - txq->next_to_clean += txq->tx_rs_thresh; - if (txq->next_to_clean >= txq->nb_tx_desc) - txq->next_to_clean = 0; - - return 0; -} - -static inline int -hns3_tx_free_required_buffer(struct hns3_tx_queue *txq, uint16_t required_bds) -{ - while (required_bds > txq->tx_bd_ready) { - if (hns3_tx_free_useless_buffer(txq) != 0) - return -1; - } - return 0; + txq->next_to_clean = tx_next_clean; + txq->tx_bd_ready = tx_bd_ready; } int @@ -4159,8 +4148,7 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) uint16_t nb_tx; uint16_t i; - if (txq->tx_bd_ready < txq->tx_free_thresh) - (void)hns3_tx_free_useless_buffer(txq); + hns3_tx_free_useless_buffer(txq); tx_next_use = txq->next_to_use; tx_bd_max = txq->nb_tx_desc; @@ -4175,14 +4163,10 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) nb_buf = tx_pkt->nb_segs; if (nb_buf > txq->tx_bd_ready) { - /* Try to release the required MBUF, but avoid releasing - * all MBUFs, otherwise, the MBUFs will be released for - * a long time and may cause jitter. - */ - if (hns3_tx_free_required_buffer(txq, nb_buf) != 0) { - txq->dfx_stats.queue_full_cnt++; - goto end_of_tx; - } + txq->dfx_stats.queue_full_cnt++; + if (nb_tx == 0) + return 0; + goto end_of_tx; } /* @@ -4598,22 +4582,43 @@ hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) static int hns3_tx_done_cleanup_full(struct hns3_tx_queue *txq, uint32_t free_cnt) { - uint16_t round_cnt; + uint16_t next_to_clean = txq->next_to_clean; + uint16_t next_to_use = txq->next_to_use; + uint16_t tx_bd_ready = txq->tx_bd_ready; + struct hns3_entry *tx_pkt = &txq->sw_ring[next_to_clean]; + struct hns3_desc *desc = &txq->tx_ring[next_to_clean]; uint32_t idx; if (free_cnt == 0 || free_cnt > txq->nb_tx_desc) free_cnt = txq->nb_tx_desc; - if (txq->tx_rs_thresh == 0) - return 0; - - round_cnt = rounddown(free_cnt, txq->tx_rs_thresh); - for (idx = 0; idx < round_cnt; idx += txq->tx_rs_thresh) { - if (hns3_tx_free_useless_buffer(txq) != 0) + for (idx = 0; idx < free_cnt; idx++) { + if (next_to_clean == next_to_use) + break; + if (desc->tx.tp_fe_sc_vld_ra_ri & + rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B))) break; + if (tx_pkt->mbuf != NULL) { + rte_pktmbuf_free_seg(tx_pkt->mbuf); + tx_pkt->mbuf = NULL; + } + next_to_clean++; + tx_bd_ready++; + tx_pkt++; + desc++; + if (next_to_clean == txq->nb_tx_desc) { + tx_pkt = txq->sw_ring; + desc = txq->tx_ring; + next_to_clean = 0; + } + } + + if (idx > 0) { + txq->next_to_clean = next_to_clean; + txq->tx_bd_ready = tx_bd_ready; } - return idx; + return (int)idx; } int