diff mbox series

[15/44] net/cnxk: add Tx multi-segment version for cn9k

Message ID 20210306153404.10781-16-ndabilpuram@marvell.com (mailing list archive)
State New
Delegated to: Jerin Jacob
Headers show
Series Marvell CNXK Ethdev Driver | expand

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Nithin Dabilpuram March 6, 2021, 3:33 p.m. UTC
Add Tx burst multi-segment version for CN9K.

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/net/cnxk/cn9k_tx.c     |  70 +++++++++++++++++++++++++++
 drivers/net/cnxk/cn9k_tx.h     | 105 +++++++++++++++++++++++++++++++++++++++++
 drivers/net/cnxk/cnxk_ethdev.h |   4 ++
 3 files changed, 179 insertions(+)
diff mbox series

Patch

diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index 06e9618..a474eb5 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -55,6 +55,44 @@  nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
 	return pkts;
 }
 
+static __rte_always_inline uint16_t
+nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
+		   uint64_t *cmd, const uint16_t flags)
+{
+	struct cn9k_eth_txq *txq = tx_queue;
+	uint64_t i;
+	const rte_iova_t io_addr = txq->io_addr;
+	void *lmt_addr = txq->lmt_addr;
+	uint16_t segdw;
+
+	NIX_XMIT_FC_OR_RETURN(txq, pkts);
+
+	roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
+
+	/* Perform header writes before barrier for TSO */
+	if (flags & NIX_TX_OFFLOAD_TSO_F) {
+		for (i = 0; i < pkts; i++)
+			cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
+	}
+
+	/* Lets commit any changes in the packet here as no further changes
+	 * to the packet will be done unless no fast free is enabled.
+	 */
+	if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+		rte_io_wmb();
+
+	for (i = 0; i < pkts; i++) {
+		cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags);
+		segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
+		cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
+	}
+
+	/* Reduce the cached count */
+	txq->fc_cache_pkts -= pkts;
+
+	return pkts;
+}
+
 #define T(name, f4, f3, f2, f1, f0, sz, flags)				       \
 	static uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name(    \
 		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
@@ -71,6 +109,25 @@  nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
 NIX_TX_FASTPATH_MODES
 #undef T
 
+#define T(name, f4, f3, f2, f1, f0, sz, flags)				       \
+	static uint16_t __rte_noinline __rte_hot                               \
+		cn9k_nix_xmit_pkts_mseg_##name(void *tx_queue,                 \
+					       struct rte_mbuf **tx_pkts,      \
+					       uint16_t pkts)                  \
+	{                                                                      \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
+									       \
+		/* For TSO inner checksum is a must */                         \
+		if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&			       \
+		    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))		       \
+			return 0;                                              \
+		return nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,        \
+					  (flags) | NIX_TX_MULTI_SEG_F);       \
+	}
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
 static inline void
 pick_tx_func(struct rte_eth_dev *eth_dev,
 	     const eth_tx_burst_t tx_burst[2][2][2][2][2])
@@ -89,6 +146,8 @@  pick_tx_func(struct rte_eth_dev *eth_dev,
 void
 cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 {
+	struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
+
 	const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2] = {
 #define T(name, f4, f3, f2, f1, f0, sz, flags)					\
 	[f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_##name,
@@ -97,7 +156,18 @@  cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 #undef T
 	};
 
+	const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2] = {
+#define T(name, f4, f3, f2, f1, f0, sz, flags)					\
+	[f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_mseg_##name,
+
+		NIX_TX_FASTPATH_MODES
+#undef T
+	};
+
 	pick_tx_func(eth_dev, nix_eth_tx_burst);
 
+	if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+		pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+
 	rte_mb();
 }
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 5f915e8..d653b3c 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -294,6 +294,111 @@  cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
 	return roc_lmt_submit_ldeorl(io_addr);
 }
 
+static __rte_always_inline uint16_t
+cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
+{
+	struct nix_send_hdr_s *send_hdr;
+	union nix_send_sg_s *sg;
+	struct rte_mbuf *m_next;
+	uint64_t *slist, sg_u;
+	uint64_t nb_segs;
+	uint64_t segdw;
+	uint8_t off, i;
+
+	send_hdr = (struct nix_send_hdr_s *)cmd;
+	send_hdr->w0.total = m->pkt_len;
+	send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
+
+	if (flags & NIX_TX_NEED_EXT_HDR)
+		off = 2;
+	else
+		off = 0;
+
+	sg = (union nix_send_sg_s *)&cmd[2 + off];
+	/* Clear sg->u header before use */
+	sg->u &= 0xFC00000000000000;
+	sg_u = sg->u;
+	slist = &cmd[3 + off];
+
+	i = 0;
+	nb_segs = m->nb_segs;
+
+	/* Fill mbuf segments */
+	do {
+		m_next = m->next;
+		sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+		*slist = rte_mbuf_data_iova(m);
+		/* Set invert df if buffer is not to be freed by H/W */
+		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+			sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+			/* Commit changes to mbuf */
+			rte_io_wmb();
+		}
+		/* Mark mempool object as "put" since it is freed by NIX */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+		if (!(sg_u & (1ULL << (i + 55))))
+			__mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+		rte_io_wmb();
+#endif
+		slist++;
+		i++;
+		nb_segs--;
+		if (i > 2 && nb_segs) {
+			i = 0;
+			/* Next SG subdesc */
+			*(uint64_t *)slist = sg_u & 0xFC00000000000000;
+			sg->u = sg_u;
+			sg->segs = 3;
+			sg = (union nix_send_sg_s *)slist;
+			sg_u = sg->u;
+			slist++;
+		}
+		m = m_next;
+	} while (nb_segs);
+
+	sg->u = sg_u;
+	sg->segs = i;
+	segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
+	/* Roundup extra dwords to multiple of 2 */
+	segdw = (segdw >> 1) + (segdw & 0x1);
+	/* Default dwords */
+	segdw += (off >> 1) + 1;
+	send_hdr->w0.sizem1 = segdw - 1;
+
+	return segdw;
+}
+
+static __rte_always_inline void
+cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
+{
+	roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
+}
+
+static __rte_always_inline void
+cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
+		       uint16_t segdw)
+{
+	uint64_t lmt_status;
+
+	do {
+		roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
+		lmt_status = roc_lmt_submit_ldeor(io_addr);
+	} while (lmt_status == 0);
+}
+
+static __rte_always_inline void
+cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
+			       rte_iova_t io_addr, uint16_t segdw)
+{
+	uint64_t lmt_status;
+
+	rte_io_wmb();
+	do {
+		roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
+		lmt_status = roc_lmt_submit_ldeor(io_addr);
+	} while (lmt_status == 0);
+}
+
 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
 #define VLAN_F	     NIX_TX_OFFLOAD_VLAN_QINQ_F
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 8facafc..2f31cba 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -46,6 +46,10 @@ 
 #define CNXK_NIX_TX_NB_SEG_MAX 9
 #endif
 
+#define CNXK_NIX_TX_MSEG_SG_DWORDS                                             \
+	((RTE_ALIGN_MUL_CEIL(CNXK_NIX_TX_NB_SEG_MAX, 3) / 3) +                 \
+	 CNXK_NIX_TX_NB_SEG_MAX)
+
 #define CNXK_NIX_RSS_L3_L4_SRC_DST                                             \
 	(ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY | ETH_RSS_L4_SRC_ONLY |     \
 	 ETH_RSS_L4_DST_ONLY)