From: Long Wu <long.wu@corigine.com>
This follows the mainline Linux kernel commit
0dcf7f500b0a (nfp: use TX ring pointer write back) by Jakub Kicinski.
This will speed up Tx completions, because we avoid a read from
device memory (replacing PCIe read with DMA read), and it works on
CoreNIC firmware with NFDk.
Signed-off-by: Long Wu <long.wu@corigine.com>
Reviewed-by: Chaoyong He <chaoyong.he@corigine.com>
---
drivers/net/nfp/nfdk/nfp_nfdk_dp.c | 7 ++++++
drivers/net/nfp/nfp_ethdev.c | 16 +++++++++++++-
drivers/net/nfp/nfp_net_common.c | 34 ++++++++++++++++++++++++++++++
drivers/net/nfp/nfp_net_common.h | 5 +++++
drivers/net/nfp/nfp_rxtx.c | 28 +++++++++++++++++++++++-
drivers/net/nfp/nfp_rxtx.h | 6 ++++++
6 files changed, 94 insertions(+), 2 deletions(-)
@@ -544,6 +544,13 @@ nfp_net_nfdk_tx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
+ if (hw->txrwb_mz != NULL) {
+ txq->txrwb = (uint64_t *)hw->txrwb_mz->addr + queue_idx;
+ txq->txrwb_dma = (uint64_t)hw->txrwb_mz->iova +
+ queue_idx * sizeof(uint64_t);
+ nn_cfg_writeq(&hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), txq->txrwb_dma);
+ }
+
nfp_net_reset_tx_queue(txq);
dev->data->tx_queues[queue_idx] = txq;
@@ -360,6 +360,9 @@ nfp_net_start(struct rte_eth_dev *dev)
if ((hw->cap & NFP_NET_CFG_CTRL_RINGCFG) != 0)
new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
+ if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0)
+ new_ctrl |= NFP_NET_CFG_CTRL_TXRWB;
+
if (nfp_reconfig(hw, new_ctrl, update) != 0)
return -EIO;
@@ -577,6 +580,8 @@ nfp_net_uninit(struct rte_eth_dev *eth_dev)
nfp_net_flow_priv_uninit(net_hw->pf_dev, net_hw->idx);
rte_free(net_hw->eth_xstats_base);
+ if ((net_hw->super.cap & NFP_NET_CFG_CTRL_TXRWB) != 0)
+ nfp_net_txrwb_free(eth_dev);
nfp_ipsec_uninit(eth_dev);
if (net_hw->mac_stats_area != NULL)
nfp_cpp_area_release_free(net_hw->mac_stats_area);
@@ -987,6 +992,12 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
goto xstats_free;
}
+ if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0) {
+ err = nfp_net_txrwb_alloc(eth_dev);
+ if (err != 0)
+ goto xstats_free;
+ }
+
nfp_net_pf_read_mac(app_fw_nic, port);
nfp_write_mac(hw, &hw->mac_addr.addr_bytes[0]);
@@ -1025,12 +1036,15 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
err = nfp_net_flow_priv_init(pf_dev, port);
if (err != 0) {
PMD_INIT_LOG(ERR, "Init net flow priv failed");
- goto xstats_free;
+ goto txrwb_free;
}
}
return 0;
+txrwb_free:
+ if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0)
+ nfp_net_txrwb_free(eth_dev);
xstats_free:
rte_free(net_hw->eth_xstats_base);
ipsec_exit:
@@ -2039,6 +2039,40 @@ nfp_net_check_dma_mask(struct nfp_net_hw *hw,
return 0;
}
+int
+nfp_net_txrwb_alloc(struct rte_eth_dev *eth_dev)
+{
+ struct nfp_net_hw *net_hw;
+ char mz_name[RTE_MEMZONE_NAMESIZE];
+
+ net_hw = nfp_net_get_hw(eth_dev);
+ snprintf(mz_name, sizeof(mz_name), "%s_TXRWB", eth_dev->data->name);
+ net_hw->txrwb_mz = rte_memzone_reserve_aligned(mz_name,
+ net_hw->max_tx_queues * sizeof(uint64_t),
+ rte_socket_id(),
+ RTE_MEMZONE_IOVA_CONTIG, RTE_CACHE_LINE_SIZE);
+ if (net_hw->txrwb_mz == NULL) {
+ PMD_INIT_LOG(ERR, "Failed to alloc %s for TX ring write back",
+ mz_name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void
+nfp_net_txrwb_free(struct rte_eth_dev *eth_dev)
+{
+ struct nfp_net_hw *net_hw;
+
+ net_hw = nfp_net_get_hw(eth_dev);
+ if (net_hw->txrwb_mz == NULL)
+ return;
+
+ rte_memzone_free(net_hw->txrwb_mz);
+ net_hw->txrwb_mz = NULL;
+}
+
void
nfp_net_cfg_read_version(struct nfp_net_hw *hw)
{
@@ -168,6 +168,9 @@ struct nfp_net_hw {
/** Backpointer to the eth_dev of this port */
struct rte_eth_dev *eth_dev;
+ /** TX pointer ring write back memzone */
+ const struct rte_memzone *txrwb_mz;
+
/** Info from the firmware */
struct nfp_net_fw_ver ver;
uint32_t max_mtu;
@@ -321,6 +324,8 @@ int nfp_net_fec_set(struct rte_eth_dev *dev,
uint32_t fec_capa);
void nfp_net_get_fw_version(struct nfp_net_hw *hw,
uint32_t *fw_version);
+int nfp_net_txrwb_alloc(struct rte_eth_dev *eth_dev);
+void nfp_net_txrwb_free(struct rte_eth_dev *eth_dev);
#define NFP_PRIV_TO_APP_FW_NIC(app_fw_priv)\
((struct nfp_app_fw_nic *)app_fw_priv)
@@ -695,6 +695,26 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
return 0;
}
+static inline uint32_t
+nfp_net_read_tx_free_qcp(struct nfp_net_txq *txq)
+{
+ /*
+ * If TX ring pointer write back is not supported, do a PCIe read.
+ * Otherwise read qcp value from write back dma address.
+ */
+ if (txq->txrwb == NULL)
+ return nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
+
+ /*
+ * In most cases the TX count is a power of two and the costly modulus
+ * operation can be substituted with a subtraction and an AND operation.
+ */
+ if (rte_is_power_of_2(txq->tx_count) == 1)
+ return (*txq->txrwb) & (txq->tx_count - 1);
+ else
+ return (*txq->txrwb) % txq->tx_count;
+}
+
/**
* Check for descriptors with a complete status
*
@@ -714,7 +734,7 @@ nfp_net_tx_free_bufs(struct nfp_net_txq *txq)
" status", txq->qidx);
/* Work out how many packets have been sent */
- qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
+ qcp_rd_p = nfp_net_read_tx_free_qcp(txq);
if (qcp_rd_p == txq->rd_p) {
PMD_TX_LOG(DEBUG, "queue %hu: It seems harrier is not sending "
@@ -761,9 +781,13 @@ void
nfp_net_tx_queue_release(struct rte_eth_dev *dev,
uint16_t queue_idx)
{
+ struct nfp_net_hw *net_hw;
struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx];
if (txq != NULL) {
+ net_hw = nfp_net_get_hw(dev);
+ if (net_hw->txrwb_mz != NULL)
+ nn_cfg_writeq(&net_hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), 0);
nfp_net_tx_queue_release_mbufs(txq);
rte_eth_dma_zone_free(dev, "tx_ring", queue_idx);
rte_free(txq->txbufs);
@@ -777,6 +801,8 @@ nfp_net_reset_tx_queue(struct nfp_net_txq *txq)
nfp_net_tx_queue_release_mbufs(txq);
txq->wr_p = 0;
txq->rd_p = 0;
+ if (txq->txrwb != NULL)
+ *txq->txrwb = 0;
}
int
@@ -77,6 +77,12 @@ struct nfp_net_txq {
* in a cache line.
*/
uint64_t dma;
+
+ /** TX pointer ring write back area (indexed by queue id) */
+ uint64_t *txrwb;
+
+ /** TX pointer ring write back area DMA address */
+ uint64_t txrwb_dma;
} __rte_aligned(64);
/* RX and freelist descriptor format */