[v2,1/2] net/hns3: optimized Tx performance by mbuf fast free
Checks
Commit Message
From: Chengwen Feng <fengchengwen@huawei.com>
Currently the vector and simple xmit algorithm don't support multi_segs,
so if Tx offload support MBUF_FAST_FREE, driver could invoke
rte_mempool_put_bulk() to free Tx mbufs in this situation.
In the testpmd single core MAC forwarding scenario, the performance is
improved by 8% at 64B on Kunpeng920 platform.
Cc: stable@dpdk.org
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
doc/guides/nics/features/hns3.ini | 1 +
drivers/net/hns3/hns3_rxtx.c | 11 +++++++++++
drivers/net/hns3/hns3_rxtx.h | 2 ++
drivers/net/hns3/hns3_rxtx_vec.h | 9 +++++++++
4 files changed, 23 insertions(+)
@@ -12,6 +12,7 @@ Queue start/stop = Y
Runtime Rx queue setup = Y
Runtime Tx queue setup = Y
Burst mode info = Y
+Fast mbuf free = Y
Free Tx mbuf on demand = Y
MTU update = Y
Scattered Rx = Y
@@ -3059,6 +3059,8 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
txq->min_tx_pkt_len = hw->min_tx_pkt_len;
txq->tso_mode = hw->tso_mode;
txq->udp_cksum_mode = hw->udp_cksum_mode;
+ txq->mbuf_fast_free_en = !!(dev->data->dev_conf.txmode.offloads &
+ RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE);
memset(&txq->basic_stats, 0, sizeof(struct hns3_tx_basic_stats));
memset(&txq->dfx_stats, 0, sizeof(struct hns3_tx_dfx_stats));
@@ -3991,6 +3993,14 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
tx_entry = &txq->sw_ring[txq->next_to_clean];
+ if (txq->mbuf_fast_free_en) {
+ rte_mempool_put_bulk(tx_entry->mbuf->pool,
+ (void **)tx_entry, txq->tx_rs_thresh);
+ for (i = 0; i < txq->tx_rs_thresh; i++)
+ tx_entry[i].mbuf = NULL;
+ goto update_field;
+ }
+
for (i = 0; i < txq->tx_rs_thresh; i++)
rte_prefetch0((tx_entry + i)->mbuf);
for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) {
@@ -3998,6 +4008,7 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq)
tx_entry->mbuf = NULL;
}
+update_field:
txq->next_to_clean = (tx_next_clean + 1) % txq->nb_tx_desc;
txq->tx_bd_ready += txq->tx_rs_thresh;
}
@@ -495,6 +495,8 @@ struct hns3_tx_queue {
* this point.
*/
uint16_t pvid_sw_shift_en:1;
+ /* check whether the mbuf fast free offload is enabled */
+ uint16_t mbuf_fast_free_en:1;
/*
* For better performance in tx datapath, releasing mbuf in batches is
@@ -18,6 +18,14 @@ hns3_tx_bulk_free_buffers(struct hns3_tx_queue *txq)
int i;
tx_entry = &txq->sw_ring[txq->next_to_clean];
+ if (txq->mbuf_fast_free_en) {
+ rte_mempool_put_bulk(tx_entry->mbuf->pool, (void **)tx_entry,
+ txq->tx_rs_thresh);
+ for (i = 0; i < txq->tx_rs_thresh; i++)
+ tx_entry[i].mbuf = NULL;
+ goto update_field;
+ }
+
for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) {
m = rte_pktmbuf_prefree_seg(tx_entry->mbuf);
tx_entry->mbuf = NULL;
@@ -36,6 +44,7 @@ hns3_tx_bulk_free_buffers(struct hns3_tx_queue *txq)
if (nb_free)
rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+update_field:
/* Update numbers of available descriptor due to buffer freed */
txq->tx_bd_ready += txq->tx_rs_thresh;
txq->next_to_clean += txq->tx_rs_thresh;