net/nfp: use Tx ring pointer write back

Message ID 20240401065622.106253-1-chaoyong.he@corigine.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers
Series net/nfp: use Tx ring pointer write back |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS

Commit Message

Chaoyong He April 1, 2024, 6:56 a.m. UTC
From: Long Wu <long.wu@corigine.com>

This follows the mainline Linux kernel commit
0dcf7f500b0a (nfp: use TX ring pointer write back) by Jakub Kicinski.

This will speed up Tx completions, because we avoid a read from
device memory (replacing PCIe read with DMA read), and it works on
CoreNIC firmware with NFDk.

Signed-off-by: Long Wu <long.wu@corigine.com>
Reviewed-by: Chaoyong He <chaoyong.he@corigine.com>
---
 drivers/net/nfp/nfdk/nfp_nfdk_dp.c |  7 ++++++
 drivers/net/nfp/nfp_ethdev.c       | 16 +++++++++++++-
 drivers/net/nfp/nfp_net_common.c   | 34 ++++++++++++++++++++++++++++++
 drivers/net/nfp/nfp_net_common.h   |  5 +++++
 drivers/net/nfp/nfp_rxtx.c         | 28 +++++++++++++++++++++++-
 drivers/net/nfp/nfp_rxtx.h         |  6 ++++++
 6 files changed, 94 insertions(+), 2 deletions(-)
  

Comments

Ferruh Yigit April 18, 2024, 10:14 p.m. UTC | #1
On 4/1/2024 7:56 AM, Chaoyong He wrote:
> From: Long Wu <long.wu@corigine.com>
> 
> This follows the mainline Linux kernel commit
> 0dcf7f500b0a (nfp: use TX ring pointer write back) by Jakub Kicinski.
> 
> This will speed up Tx completions, because we avoid a read from
> device memory (replacing PCIe read with DMA read), and it works on
> CoreNIC firmware with NFDk.
> 
> Signed-off-by: Long Wu <long.wu@corigine.com>
> Reviewed-by: Chaoyong He <chaoyong.he@corigine.com>
>

Applied to dpdk-next-net/main, thanks.
  

Patch

diff --git a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c
index 1911736e2b..41cdfd3a40 100644
--- a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c
+++ b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c
@@ -544,6 +544,13 @@  nfp_net_nfdk_tx_queue_setup(struct rte_eth_dev *dev,
 		return -ENOMEM;
 	}
 
+	if (hw->txrwb_mz != NULL) {
+		txq->txrwb = (uint64_t *)hw->txrwb_mz->addr + queue_idx;
+		txq->txrwb_dma = (uint64_t)hw->txrwb_mz->iova +
+				queue_idx * sizeof(uint64_t);
+		nn_cfg_writeq(&hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), txq->txrwb_dma);
+	}
+
 	nfp_net_reset_tx_queue(txq);
 
 	dev->data->tx_queues[queue_idx] = txq;
diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index 568de1d024..b711e15b9f 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -360,6 +360,9 @@  nfp_net_start(struct rte_eth_dev *dev)
 	if ((hw->cap & NFP_NET_CFG_CTRL_RINGCFG) != 0)
 		new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
 
+	if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0)
+		new_ctrl |= NFP_NET_CFG_CTRL_TXRWB;
+
 	if (nfp_reconfig(hw, new_ctrl, update) != 0)
 		return -EIO;
 
@@ -577,6 +580,8 @@  nfp_net_uninit(struct rte_eth_dev *eth_dev)
 		nfp_net_flow_priv_uninit(net_hw->pf_dev, net_hw->idx);
 
 	rte_free(net_hw->eth_xstats_base);
+	if ((net_hw->super.cap & NFP_NET_CFG_CTRL_TXRWB) != 0)
+		nfp_net_txrwb_free(eth_dev);
 	nfp_ipsec_uninit(eth_dev);
 	if (net_hw->mac_stats_area != NULL)
 		nfp_cpp_area_release_free(net_hw->mac_stats_area);
@@ -987,6 +992,12 @@  nfp_net_init(struct rte_eth_dev *eth_dev)
 		goto xstats_free;
 	}
 
+	if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0) {
+		err = nfp_net_txrwb_alloc(eth_dev);
+		if (err != 0)
+			goto xstats_free;
+	}
+
 	nfp_net_pf_read_mac(app_fw_nic, port);
 	nfp_write_mac(hw, &hw->mac_addr.addr_bytes[0]);
 
@@ -1025,12 +1036,15 @@  nfp_net_init(struct rte_eth_dev *eth_dev)
 		err = nfp_net_flow_priv_init(pf_dev, port);
 		if (err != 0) {
 			PMD_INIT_LOG(ERR, "Init net flow priv failed");
-			goto xstats_free;
+			goto txrwb_free;
 		}
 	}
 
 	return 0;
 
+txrwb_free:
+	if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0)
+		nfp_net_txrwb_free(eth_dev);
 xstats_free:
 	rte_free(net_hw->eth_xstats_base);
 ipsec_exit:
diff --git a/drivers/net/nfp/nfp_net_common.c b/drivers/net/nfp/nfp_net_common.c
index c6889949ff..da1a7e7be1 100644
--- a/drivers/net/nfp/nfp_net_common.c
+++ b/drivers/net/nfp/nfp_net_common.c
@@ -2039,6 +2039,40 @@  nfp_net_check_dma_mask(struct nfp_net_hw *hw,
 	return 0;
 }
 
+int
+nfp_net_txrwb_alloc(struct rte_eth_dev *eth_dev)
+{
+	struct nfp_net_hw *net_hw;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+
+	net_hw = nfp_net_get_hw(eth_dev);
+	snprintf(mz_name, sizeof(mz_name), "%s_TXRWB", eth_dev->data->name);
+	net_hw->txrwb_mz = rte_memzone_reserve_aligned(mz_name,
+			net_hw->max_tx_queues * sizeof(uint64_t),
+			rte_socket_id(),
+			RTE_MEMZONE_IOVA_CONTIG, RTE_CACHE_LINE_SIZE);
+	if (net_hw->txrwb_mz == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to alloc %s for TX ring write back",
+				mz_name);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void
+nfp_net_txrwb_free(struct rte_eth_dev *eth_dev)
+{
+	struct nfp_net_hw *net_hw;
+
+	net_hw = nfp_net_get_hw(eth_dev);
+	if (net_hw->txrwb_mz == NULL)
+		return;
+
+	rte_memzone_free(net_hw->txrwb_mz);
+	net_hw->txrwb_mz = NULL;
+}
+
 void
 nfp_net_cfg_read_version(struct nfp_net_hw *hw)
 {
diff --git a/drivers/net/nfp/nfp_net_common.h b/drivers/net/nfp/nfp_net_common.h
index 49a5a84044..8066e77e6f 100644
--- a/drivers/net/nfp/nfp_net_common.h
+++ b/drivers/net/nfp/nfp_net_common.h
@@ -168,6 +168,9 @@  struct nfp_net_hw {
 	/** Backpointer to the eth_dev of this port */
 	struct rte_eth_dev *eth_dev;
 
+	/** TX pointer ring write back memzone */
+	const struct rte_memzone *txrwb_mz;
+
 	/** Info from the firmware */
 	struct nfp_net_fw_ver ver;
 	uint32_t max_mtu;
@@ -321,6 +324,8 @@  int nfp_net_fec_set(struct rte_eth_dev *dev,
 		uint32_t fec_capa);
 void nfp_net_get_fw_version(struct nfp_net_hw *hw,
 		uint32_t *fw_version);
+int nfp_net_txrwb_alloc(struct rte_eth_dev *eth_dev);
+void nfp_net_txrwb_free(struct rte_eth_dev *eth_dev);
 
 #define NFP_PRIV_TO_APP_FW_NIC(app_fw_priv)\
 	((struct nfp_app_fw_nic *)app_fw_priv)
diff --git a/drivers/net/nfp/nfp_rxtx.c b/drivers/net/nfp/nfp_rxtx.c
index 1aee3ecb3f..f9c4636688 100644
--- a/drivers/net/nfp/nfp_rxtx.c
+++ b/drivers/net/nfp/nfp_rxtx.c
@@ -695,6 +695,26 @@  nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
 	return 0;
 }
 
+static inline uint32_t
+nfp_net_read_tx_free_qcp(struct nfp_net_txq *txq)
+{
+	/*
+	 * If TX ring pointer write back is not supported, do a PCIe read.
+	 * Otherwise read qcp value from write back dma address.
+	 */
+	if (txq->txrwb == NULL)
+		return nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
+
+	/*
+	 * In most cases the TX count is a power of two and the costly modulus
+	 * operation can be substituted with a subtraction and an AND operation.
+	 */
+	if (rte_is_power_of_2(txq->tx_count) == 1)
+		return (*txq->txrwb) & (txq->tx_count - 1);
+	else
+		return (*txq->txrwb) % txq->tx_count;
+}
+
 /**
  * Check for descriptors with a complete status
  *
@@ -714,7 +734,7 @@  nfp_net_tx_free_bufs(struct nfp_net_txq *txq)
 			" status", txq->qidx);
 
 	/* Work out how many packets have been sent */
-	qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
+	qcp_rd_p = nfp_net_read_tx_free_qcp(txq);
 
 	if (qcp_rd_p == txq->rd_p) {
 		PMD_TX_LOG(DEBUG, "queue %hu: It seems harrier is not sending "
@@ -761,9 +781,13 @@  void
 nfp_net_tx_queue_release(struct rte_eth_dev *dev,
 		uint16_t queue_idx)
 {
+	struct nfp_net_hw *net_hw;
 	struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx];
 
 	if (txq != NULL) {
+		net_hw = nfp_net_get_hw(dev);
+		if (net_hw->txrwb_mz != NULL)
+			nn_cfg_writeq(&net_hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), 0);
 		nfp_net_tx_queue_release_mbufs(txq);
 		rte_eth_dma_zone_free(dev, "tx_ring", queue_idx);
 		rte_free(txq->txbufs);
@@ -777,6 +801,8 @@  nfp_net_reset_tx_queue(struct nfp_net_txq *txq)
 	nfp_net_tx_queue_release_mbufs(txq);
 	txq->wr_p = 0;
 	txq->rd_p = 0;
+	if (txq->txrwb != NULL)
+		*txq->txrwb = 0;
 }
 
 int
diff --git a/drivers/net/nfp/nfp_rxtx.h b/drivers/net/nfp/nfp_rxtx.h
index 6ecabc232c..f463b9cf75 100644
--- a/drivers/net/nfp/nfp_rxtx.h
+++ b/drivers/net/nfp/nfp_rxtx.h
@@ -77,6 +77,12 @@  struct nfp_net_txq {
 	 * in a cache line.
 	 */
 	uint64_t dma;
+
+	/** TX pointer ring write back area (indexed by queue id) */
+	uint64_t *txrwb;
+
+	/** TX pointer ring write back area DMA address */
+	uint64_t txrwb_dma;
 } __rte_aligned(64);
 
 /* RX and freelist descriptor format */