diff mbox series

[v6,15/19] net/ngbe: add simple Tx flow

Message ID 20210617110005.4132926-16-jiawenwu@trustnetic.com (mailing list archive)
State Changes Requested, archived
Delegated to: Andrew Rybchenko
Headers show
Series net: ngbe PMD | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Jiawen Wu June 17, 2021, 11 a.m. UTC
Initialize device with the simplest transmit functions.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/ngbe/ngbe_ethdev.c |   1 +
 drivers/net/ngbe/ngbe_ethdev.h |   3 +
 drivers/net/ngbe/ngbe_rxtx.c   | 228 +++++++++++++++++++++++++++++++++
 drivers/net/ngbe/ngbe_rxtx.h   |  27 ++++
 4 files changed, 259 insertions(+)

Comments

Andrew Rybchenko July 2, 2021, 4:45 p.m. UTC | #1
On 6/17/21 2:00 PM, Jiawen Wu wrote:
> Initialize device with the simplest transmit functions.
> 
> Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>

The patch cannot be tested before device start up.
So, it should go after corresponding patches.

[snip]

> +uint16_t
> +ngbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
> +		       uint16_t nb_pkts)
> +{
> +	uint16_t nb_tx;
> +
> +	/* Try to transmit at least chunks of TX_MAX_BURST pkts */
> +	if (likely(nb_pkts <= RTE_PMD_NGBE_TX_MAX_BURST))
> +		return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
> +
> +	/* transmit more than the max burst, in chunks of TX_MAX_BURST */
> +	nb_tx = 0;
> +	while (nb_pkts) {

Compare vs 0 explicitly

> +		uint16_t ret, n;
> +
> +		n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_NGBE_TX_MAX_BURST);
> +		ret = tx_xmit_pkts(tx_queue, &tx_pkts[nb_tx], n);
> +		nb_tx = (uint16_t)(nb_tx + ret);
> +		nb_pkts = (uint16_t)(nb_pkts - ret);
> +		if (ret < n)
> +			break;
> +	}
> +
> +	return nb_tx;
> +}
> +
>  /*********************************************************************
>   *
>   *  Rx functions
diff mbox series

Patch

diff --git a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
index 269186acc0..6b4d5ac65b 100644
--- a/drivers/net/ngbe/ngbe_ethdev.c
+++ b/drivers/net/ngbe/ngbe_ethdev.c
@@ -111,6 +111,7 @@  eth_ngbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
 
 	eth_dev->dev_ops = &ngbe_eth_dev_ops;
 	eth_dev->rx_pkt_burst = &ngbe_recv_pkts;
+	eth_dev->tx_pkt_burst = &ngbe_xmit_pkts_simple;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/net/ngbe/ngbe_ethdev.h b/drivers/net/ngbe/ngbe_ethdev.h
index 8fb7c8a19b..c52cac2ca1 100644
--- a/drivers/net/ngbe/ngbe_ethdev.h
+++ b/drivers/net/ngbe/ngbe_ethdev.h
@@ -75,6 +75,9 @@  int  ngbe_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 uint16_t ngbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
 
+uint16_t ngbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts);
+
 int
 ngbe_dev_link_update_share(struct rte_eth_dev *dev,
 		int wait_to_complete);
diff --git a/drivers/net/ngbe/ngbe_rxtx.c b/drivers/net/ngbe/ngbe_rxtx.c
index f97fceaf7c..6dde996659 100644
--- a/drivers/net/ngbe/ngbe_rxtx.c
+++ b/drivers/net/ngbe/ngbe_rxtx.c
@@ -20,6 +20,234 @@ 
  */
 #define rte_ngbe_prefetch(p)   rte_prefetch0(p)
 
+/*********************************************************************
+ *
+ *  Tx functions
+ *
+ **********************************************************************/
+
+/*
+ * Check for descriptors with their DD bit set and free mbufs.
+ * Return the total number of buffers freed.
+ */
+static __rte_always_inline int
+ngbe_tx_free_bufs(struct ngbe_tx_queue *txq)
+{
+	struct ngbe_tx_entry *txep;
+	uint32_t status;
+	int i, nb_free = 0;
+	struct rte_mbuf *m, *free[RTE_NGBE_TX_MAX_FREE_BUF_SZ];
+
+	/* check DD bit on threshold descriptor */
+	status = txq->tx_ring[txq->tx_next_dd].dw3;
+	if (!(status & rte_cpu_to_le_32(NGBE_TXD_DD))) {
+		if (txq->nb_tx_free >> 1 < txq->tx_free_thresh)
+			ngbe_set32_masked(txq->tdc_reg_addr,
+				NGBE_TXCFG_FLUSH, NGBE_TXCFG_FLUSH);
+		return 0;
+	}
+
+	/*
+	 * first buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_free_thresh-1)
+	 */
+	txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_free_thresh - 1)];
+	for (i = 0; i < txq->tx_free_thresh; ++i, ++txep) {
+		/* free buffers one at a time */
+		m = rte_pktmbuf_prefree_seg(txep->mbuf);
+		txep->mbuf = NULL;
+
+		if (unlikely(m == NULL))
+			continue;
+
+		if (nb_free >= RTE_NGBE_TX_MAX_FREE_BUF_SZ ||
+		    (nb_free > 0 && m->pool != free[0]->pool)) {
+			rte_mempool_put_bulk(free[0]->pool,
+					     (void **)free, nb_free);
+			nb_free = 0;
+		}
+
+		free[nb_free++] = m;
+	}
+
+	if (nb_free > 0)
+		rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+
+	/* buffers were freed, update counters */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_free_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_free_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_free_thresh - 1);
+
+	return txq->tx_free_thresh;
+}
+
+/* Populate 4 descriptors with data from 4 mbufs */
+static inline void
+tx4(volatile struct ngbe_tx_desc *txdp, struct rte_mbuf **pkts)
+{
+	uint64_t buf_dma_addr;
+	uint32_t pkt_len;
+	int i;
+
+	for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
+		buf_dma_addr = rte_mbuf_data_iova(*pkts);
+		pkt_len = (*pkts)->data_len;
+
+		/* write data to descriptor */
+		txdp->qw0 = rte_cpu_to_le_64(buf_dma_addr);
+		txdp->dw2 = cpu_to_le32(NGBE_TXD_FLAGS |
+					NGBE_TXD_DATLEN(pkt_len));
+		txdp->dw3 = cpu_to_le32(NGBE_TXD_PAYLEN(pkt_len));
+
+		rte_prefetch0(&(*pkts)->pool);
+	}
+}
+
+/* Populate 1 descriptor with data from 1 mbuf */
+static inline void
+tx1(volatile struct ngbe_tx_desc *txdp, struct rte_mbuf **pkts)
+{
+	uint64_t buf_dma_addr;
+	uint32_t pkt_len;
+
+	buf_dma_addr = rte_mbuf_data_iova(*pkts);
+	pkt_len = (*pkts)->data_len;
+
+	/* write data to descriptor */
+	txdp->qw0 = cpu_to_le64(buf_dma_addr);
+	txdp->dw2 = cpu_to_le32(NGBE_TXD_FLAGS |
+				NGBE_TXD_DATLEN(pkt_len));
+	txdp->dw3 = cpu_to_le32(NGBE_TXD_PAYLEN(pkt_len));
+
+	rte_prefetch0(&(*pkts)->pool);
+}
+
+/*
+ * Fill H/W descriptor ring with mbuf data.
+ * Copy mbuf pointers to the S/W ring.
+ */
+static inline void
+ngbe_tx_fill_hw_ring(struct ngbe_tx_queue *txq, struct rte_mbuf **pkts,
+		      uint16_t nb_pkts)
+{
+	volatile struct ngbe_tx_desc *txdp = &txq->tx_ring[txq->tx_tail];
+	struct ngbe_tx_entry *txep = &txq->sw_ring[txq->tx_tail];
+	const int N_PER_LOOP = 4;
+	const int N_PER_LOOP_MASK = N_PER_LOOP - 1;
+	int mainpart, leftover;
+	int i, j;
+
+	/*
+	 * Process most of the packets in chunks of N pkts.  Any
+	 * leftover packets will get processed one at a time.
+	 */
+	mainpart = (nb_pkts & ((uint32_t)~N_PER_LOOP_MASK));
+	leftover = (nb_pkts & ((uint32_t)N_PER_LOOP_MASK));
+	for (i = 0; i < mainpart; i += N_PER_LOOP) {
+		/* Copy N mbuf pointers to the S/W ring */
+		for (j = 0; j < N_PER_LOOP; ++j)
+			(txep + i + j)->mbuf = *(pkts + i + j);
+		tx4(txdp + i, pkts + i);
+	}
+
+	if (unlikely(leftover > 0)) {
+		for (i = 0; i < leftover; ++i) {
+			(txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
+			tx1(txdp + mainpart + i, pkts + mainpart + i);
+		}
+	}
+}
+
+static inline uint16_t
+tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+	     uint16_t nb_pkts)
+{
+	struct ngbe_tx_queue *txq = (struct ngbe_tx_queue *)tx_queue;
+	uint16_t n = 0;
+
+	/*
+	 * Begin scanning the H/W ring for done descriptors when the
+	 * number of available descriptors drops below tx_free_thresh.
+	 * For each done descriptor, free the associated buffer.
+	 */
+	if (txq->nb_tx_free < txq->tx_free_thresh)
+		ngbe_tx_free_bufs(txq);
+
+	/* Only use descriptors that are available */
+	nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	/* Use exactly nb_pkts descriptors */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
+
+	/*
+	 * At this point, we know there are enough descriptors in the
+	 * ring to transmit all the packets.  This assumes that each
+	 * mbuf contains a single segment, and that no new offloads
+	 * are expected, which would require a new context descriptor.
+	 */
+
+	/*
+	 * See if we're going to wrap-around. If so, handle the top
+	 * of the descriptor ring first, then do the bottom.  If not,
+	 * the processing looks just like the "bottom" part anyway...
+	 */
+	if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
+		n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
+		ngbe_tx_fill_hw_ring(txq, tx_pkts, n);
+		txq->tx_tail = 0;
+	}
+
+	/* Fill H/W descriptor ring with mbuf data */
+	ngbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
+	txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
+
+	/*
+	 * Check for wrap-around. This would only happen if we used
+	 * up to the last descriptor in the ring, no more, no less.
+	 */
+	if (txq->tx_tail >= txq->nb_tx_desc)
+		txq->tx_tail = 0;
+
+	PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
+		   (uint16_t)txq->port_id, (uint16_t)txq->queue_id,
+		   (uint16_t)txq->tx_tail, (uint16_t)nb_pkts);
+
+	/* update tail pointer */
+	rte_wmb();
+	ngbe_set32_relaxed(txq->tdt_reg_addr, txq->tx_tail);
+
+	return nb_pkts;
+}
+
+uint16_t
+ngbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+		       uint16_t nb_pkts)
+{
+	uint16_t nb_tx;
+
+	/* Try to transmit at least chunks of TX_MAX_BURST pkts */
+	if (likely(nb_pkts <= RTE_PMD_NGBE_TX_MAX_BURST))
+		return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
+
+	/* transmit more than the max burst, in chunks of TX_MAX_BURST */
+	nb_tx = 0;
+	while (nb_pkts) {
+		uint16_t ret, n;
+
+		n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_NGBE_TX_MAX_BURST);
+		ret = tx_xmit_pkts(tx_queue, &tx_pkts[nb_tx], n);
+		nb_tx = (uint16_t)(nb_tx + ret);
+		nb_pkts = (uint16_t)(nb_pkts - ret);
+		if (ret < n)
+			break;
+	}
+
+	return nb_tx;
+}
+
 /*********************************************************************
  *
  *  Rx functions
diff --git a/drivers/net/ngbe/ngbe_rxtx.h b/drivers/net/ngbe/ngbe_rxtx.h
index 1c8fd76f12..616b41a300 100644
--- a/drivers/net/ngbe/ngbe_rxtx.h
+++ b/drivers/net/ngbe/ngbe_rxtx.h
@@ -147,7 +147,34 @@  struct ngbe_tx_desc {
 	rte_le32_t dw3; /* r.olinfo_status, w.status      */
 };
 
+/* @ngbe_tx_desc.dw2 */
+#define NGBE_TXD_DATLEN(v)        ((0xFFFF & (v))) /* data buffer length */
+#define NGBE_TXD_1588             ((0x1) << 19) /* IEEE1588 time stamp */
+#define NGBE_TXD_DATA             ((0x0) << 20) /* data descriptor */
+#define NGBE_TXD_EOP              ((0x1) << 24) /* End of Packet */
+#define NGBE_TXD_FCS              ((0x1) << 25) /* Insert FCS */
+#define NGBE_TXD_LINKSEC          ((0x1) << 26) /* Insert LinkSec */
+#define NGBE_TXD_ECU              ((0x1) << 28) /* forward to ECU */
+#define NGBE_TXD_CNTAG            ((0x1) << 29) /* insert CN tag */
+#define NGBE_TXD_VLE              ((0x1) << 30) /* insert VLAN tag */
+#define NGBE_TXD_TSE              ((0x1) << 31) /* transmit segmentation */
+
+#define NGBE_TXD_FLAGS (NGBE_TXD_FCS | NGBE_TXD_EOP)
+
+/* @ngbe_tx_desc.dw3 */
+#define NGBE_TXD_DD_UNUSED        NGBE_TXD_DD
+#define NGBE_TXD_IDX_UNUSED(v)    NGBE_TXD_IDX(v)
+#define NGBE_TXD_CC               ((0x1) << 7) /* check context */
+#define NGBE_TXD_IPSEC            ((0x1) << 8) /* request ipsec offload */
+#define NGBE_TXD_L4CS             ((0x1) << 9) /* insert TCP/UDP/SCTP csum */
+#define NGBE_TXD_IPCS             ((0x1) << 10) /* insert IPv4 csum */
+#define NGBE_TXD_EIPCS            ((0x1) << 11) /* insert outer IP csum */
+#define NGBE_TXD_MNGFLT           ((0x1) << 12) /* enable management filter */
+#define NGBE_TXD_PAYLEN(v)        ((0x7FFFF & (v)) << 13) /* payload length */
+
+#define RTE_PMD_NGBE_TX_MAX_BURST 32
 #define RTE_PMD_NGBE_RX_MAX_BURST 32
+#define RTE_NGBE_TX_MAX_FREE_BUF_SZ 64
 
 #define RX_RING_SZ ((NGBE_RING_DESC_MAX + RTE_PMD_NGBE_RX_MAX_BURST) * \
 		    sizeof(struct ngbe_rx_desc))