[8/8] net/octeontx: support Rx Tx checksum offload

Message ID 1584351224-23500-9-git-send-email-hkalra@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series add new features to octeontx PMD |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/travis-robot success Travis build: passed
ci/Intel-compilation fail Compilation issues

Commit Message

Harman Kalra March 16, 2020, 9:33 a.m. UTC
  This patch implements rx/tx checksum offload. In case of
wrong checksum received (inner/outer l3/l4) it reports the
corresponding layer which has bad checksum and also corrects
it if hw checksum is enabled on tx side.

Signed-off-by: Harman Kalra <hkalra@marvell.com>
---
 doc/guides/nics/features/octeontx.ini  |   4 +
 drivers/net/octeontx/octeontx_ethdev.c |  22 ++-
 drivers/net/octeontx/octeontx_ethdev.h |  21 ++-
 drivers/net/octeontx/octeontx_rxtx.c   |  10 +-
 drivers/net/octeontx/octeontx_rxtx.h   | 194 +++++++++++++++++++++++--
 5 files changed, 227 insertions(+), 24 deletions(-)
  

Patch

diff --git a/doc/guides/nics/features/octeontx.ini b/doc/guides/nics/features/octeontx.ini
index 6049c1c43..8a95c216c 100644
--- a/doc/guides/nics/features/octeontx.ini
+++ b/doc/guides/nics/features/octeontx.ini
@@ -17,6 +17,10 @@  Unicast MAC filter   = Y
 VLAN filter          = Y
 VLAN offload         = P
 CRC offload          = Y
+L3 checksum offload  = Y
+L4 checksum offload  = Y
+Inner L3 checksum    = Y
+Inner L4 checksum    = Y
 Packet type parsing  = Y
 Flow control         = Y
 Basic stats          = Y
diff --git a/drivers/net/octeontx/octeontx_ethdev.c b/drivers/net/octeontx/octeontx_ethdev.c
index 191869683..ea3b278a1 100644
--- a/drivers/net/octeontx/octeontx_ethdev.c
+++ b/drivers/net/octeontx/octeontx_ethdev.c
@@ -370,6 +370,16 @@  octeontx_tx_offload_flags(struct rte_eth_dev *eth_dev)
 	struct octeontx_nic *nic = octeontx_pmd_priv(eth_dev);
 	uint16_t flags = 0;
 
+	if (nic->tx_offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM ||
+	    nic->tx_offloads & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM)
+		flags |= OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F;
+
+	if (nic->tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM ||
+	    nic->tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM ||
+	    nic->tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM ||
+	    nic->tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM)
+		flags |= OCCTX_TX_OFFLOAD_L3_L4_CSUM_F;
+
 	if (!(nic->tx_offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
 		flags |= OCCTX_TX_OFFLOAD_MBUF_NOFF_F;
 
@@ -383,13 +393,15 @@  static uint16_t
 octeontx_rx_offload_flags(struct rte_eth_dev *eth_dev)
 {
 	struct octeontx_nic *nic = octeontx_pmd_priv(eth_dev);
-	struct rte_eth_dev_data *data = eth_dev->data;
-	struct rte_eth_conf *conf = &data->dev_conf;
-	struct rte_eth_rxmode *rxmode = &conf->rxmode;
 	uint16_t flags = 0;
 
-	if (rxmode->mq_mode == ETH_MQ_RX_RSS)
-		flags |= OCCTX_RX_OFFLOAD_RSS_F;
+	if (nic->rx_offloads & (DEV_RX_OFFLOAD_TCP_CKSUM |
+			 DEV_RX_OFFLOAD_UDP_CKSUM))
+		flags |= OCCTX_RX_OFFLOAD_CSUM_F;
+
+	if (nic->rx_offloads & (DEV_RX_OFFLOAD_IPV4_CKSUM |
+				DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM))
+		flags |= OCCTX_RX_OFFLOAD_CSUM_F;
 
 	if (nic->rx_offloads & DEV_RX_OFFLOAD_SCATTER) {
 		flags |= OCCTX_RX_MULTI_SEG_F;
diff --git a/drivers/net/octeontx/octeontx_ethdev.h b/drivers/net/octeontx/octeontx_ethdev.h
index dc53b53be..7246fb6d1 100644
--- a/drivers/net/octeontx/octeontx_ethdev.h
+++ b/drivers/net/octeontx/octeontx_ethdev.h
@@ -53,13 +53,24 @@ 
 
 #define OCCTX_MAX_MTU		(OCCTX_MAX_FRS - OCCTX_L2_OVERHEAD)
 
-#define OCTEONTX_RX_OFFLOADS		(DEV_RX_OFFLOAD_CHECKSUM     | \
-					 DEV_RX_OFFLOAD_SCATTER	     | \
-					 DEV_RX_OFFLOAD_JUMBO_FRAME  | \
+#define OCTEONTX_RX_OFFLOADS		(				   \
+					 DEV_RX_OFFLOAD_CHECKSUM	 | \
+					 DEV_RX_OFFLOAD_SCTP_CKSUM       | \
+					 DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | \
+					 DEV_RX_OFFLOAD_SCATTER	         | \
+					 DEV_RX_OFFLOAD_SCATTER		 | \
+					 DEV_RX_OFFLOAD_JUMBO_FRAME	 | \
 					 DEV_RX_OFFLOAD_VLAN_FILTER)
 
-#define OCTEONTX_TX_OFFLOADS		(DEV_TX_OFFLOAD_MT_LOCKFREE    |  \
-					 DEV_TX_OFFLOAD_MBUF_FAST_FREE |  \
+#define OCTEONTX_TX_OFFLOADS		(				   \
+					 DEV_TX_OFFLOAD_MBUF_FAST_FREE	 | \
+					 DEV_TX_OFFLOAD_MT_LOCKFREE	 | \
+					 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | \
+					 DEV_TX_OFFLOAD_OUTER_UDP_CKSUM	 | \
+					 DEV_TX_OFFLOAD_IPV4_CKSUM	 | \
+					 DEV_TX_OFFLOAD_TCP_CKSUM	 | \
+					 DEV_TX_OFFLOAD_UDP_CKSUM	 | \
+					 DEV_TX_OFFLOAD_SCTP_CKSUM	 | \
 					 DEV_TX_OFFLOAD_MULTI_SEGS)
 
 static inline struct octeontx_nic *
diff --git a/drivers/net/octeontx/octeontx_rxtx.c b/drivers/net/octeontx/octeontx_rxtx.c
index c817f7179..d2453ba26 100644
--- a/drivers/net/octeontx/octeontx_rxtx.c
+++ b/drivers/net/octeontx/octeontx_rxtx.c
@@ -41,7 +41,7 @@  octeontx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	return count; /* return number of pkts received */
 }
 
-#define T(name, f1, f0, sz, flags)					\
+#define T(name, f3, f2, f1, f0, sz, flags)				\
 static uint16_t __rte_noinline	__hot					\
 octeontx_xmit_pkts_ ##name(void *tx_queue,				\
 			struct rte_mbuf **tx_pkts, uint16_t pkts)	\
@@ -60,9 +60,9 @@  octeontx_set_tx_function(struct rte_eth_dev *dev)
 {
 	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
 
-	const eth_tx_burst_t tx_burst_func[2][2] = {
-#define T(name, f1, f0, sz, flags)			\
-	[f1][f0] =  octeontx_xmit_pkts_ ##name,
+	const eth_tx_burst_t tx_burst_func[2][2][2][2] = {
+#define T(name, f3, f2, f1, f0, sz, flags)			\
+	[f3][f2][f1][f0] =  octeontx_xmit_pkts_ ##name,
 
 OCCTX_TX_FASTPATH_MODES
 #undef T
@@ -70,5 +70,7 @@  OCCTX_TX_FASTPATH_MODES
 
 	dev->tx_pkt_burst = tx_burst_func
 		[!!(nic->tx_offload_flags & OCCTX_TX_OFFLOAD_MBUF_NOFF_F)]
+		[!!(nic->tx_offload_flags & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+		[!!(nic->tx_offload_flags & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F)]
 		[!!(nic->tx_offload_flags & OCCTX_TX_MULTI_SEG_F)];
 }
diff --git a/drivers/net/octeontx/octeontx_rxtx.h b/drivers/net/octeontx/octeontx_rxtx.h
index cc044dd79..acc1f5cb8 100644
--- a/drivers/net/octeontx/octeontx_rxtx.h
+++ b/drivers/net/octeontx/octeontx_rxtx.h
@@ -18,17 +18,65 @@ 
 #define BIT(nr) (1UL << (nr))
 
 #define OCCTX_RX_OFFLOAD_NONE		(0)
-#define OCCTX_RX_OFFLOAD_RSS_F          BIT(0)
-#define OCCTX_RX_VLAN_FLTR_F            BIT(1)
-#define OCCTX_RX_MULTI_SEG_F		BIT(15)
+#define OCCTX_RX_MULTI_SEG_F		BIT(0)
+#define OCCTX_RX_OFFLOAD_CSUM_F         BIT(1)
+#define OCCTX_RX_VLAN_FLTR_F            BIT(2)
 
 #define OCCTX_TX_OFFLOAD_NONE		(0)
+#define OCCTX_TX_MULTI_SEG_F		BIT(0)
+#define OCCTX_TX_OFFLOAD_L3_L4_CSUM_F	BIT(1)
+#define OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F	BIT(2)
 #define OCCTX_TX_OFFLOAD_MBUF_NOFF_F	BIT(3)
 
-#define OCCTX_TX_MULTI_SEG_F		BIT(15)
 /* Packet type table */
 #define PTYPE_SIZE	OCCTX_PKI_LTYPE_LAST
 
+/* octeontx send header sub descriptor structure */
+RTE_STD_C11
+union octeontx_send_hdr_w0_u {
+	uint64_t u;
+	struct {
+		uint64_t total   : 16;
+		uint64_t markptr : 8;
+		uint64_t l3ptr   : 8;
+		uint64_t l4ptr   : 8;
+		uint64_t ii	 : 1;
+		uint64_t shp_dis : 1;
+		uint64_t ckle    : 1;
+		uint64_t cklf    : 2;
+		uint64_t ckl3    : 1;
+		uint64_t ckl4    : 2;
+		uint64_t p	 : 1;
+		uint64_t format	 : 7;
+		uint64_t tstamp  : 1;
+		uint64_t tso_eom : 1;
+		uint64_t df	 : 1;
+		uint64_t tso	 : 1;
+		uint64_t n2	 : 1;
+		uint64_t scntn1	 : 3;
+	};
+};
+
+RTE_STD_C11
+union octeontx_send_hdr_w1_u {
+	uint64_t u;
+	struct {
+		uint64_t tso_mss : 14;
+		uint64_t shp_ra  : 2;
+		uint64_t tso_sb  : 8;
+		uint64_t leptr   : 8;
+		uint64_t lfptr   : 8;
+		uint64_t shp_chg : 9;
+		uint64_t tso_fn  : 7;
+		uint64_t l2len   : 8;
+	};
+};
+
+struct octeontx_send_hdr_s {
+	union octeontx_send_hdr_w0_u w0;
+	union octeontx_send_hdr_w1_u w1;
+};
+
 static const uint32_t __rte_cache_aligned
 ptype_table[PTYPE_SIZE][PTYPE_SIZE][PTYPE_SIZE] = {
 	[LC_NONE][LE_NONE][LF_NONE] = RTE_PTYPE_UNKNOWN,
@@ -182,6 +230,90 @@  octeontx_prefree_seg(struct rte_mbuf *m)
 	return 1;
 }
 
+static __rte_always_inline void
+octeontx_tx_checksum_offload(uint64_t *cmd_buf, const uint16_t flags,
+			     struct rte_mbuf *m)
+{
+	struct octeontx_send_hdr_s *send_hdr =
+				(struct octeontx_send_hdr_s *)cmd_buf;
+	uint64_t ol_flags = m->ol_flags;
+
+	/* PKO Checksum L4 Algorithm Enumeration
+	 * 0x0 - No checksum
+	 * 0x1 - UDP L4 checksum
+	 * 0x2 - TCP L4 checksum
+	 * 0x3 - SCTP L4 checksum
+	 */
+	const uint8_t csum = (!(((ol_flags ^ PKT_TX_UDP_CKSUM) >> 52) & 0x3) +
+		      (!(((ol_flags ^ PKT_TX_TCP_CKSUM) >> 52) & 0x3) * 2) +
+		      (!(((ol_flags ^ PKT_TX_SCTP_CKSUM) >> 52) & 0x3) * 3));
+
+	const uint8_t is_tunnel_parsed = (!!(ol_flags & PKT_TX_TUNNEL_GTP) ||
+				      !!(ol_flags & PKT_TX_TUNNEL_VXLAN_GPE) ||
+				      !!(ol_flags & PKT_TX_TUNNEL_VXLAN) ||
+				      !!(ol_flags & PKT_TX_TUNNEL_GRE) ||
+				      !!(ol_flags & PKT_TX_TUNNEL_GENEVE) ||
+				      !!(ol_flags & PKT_TX_TUNNEL_IP) ||
+				      !!(ol_flags & PKT_TX_TUNNEL_IPIP));
+
+	const uint8_t csum_outer = (!!(ol_flags & PKT_TX_OUTER_UDP_CKSUM) ||
+				    !!(ol_flags & PKT_TX_TUNNEL_UDP));
+	const uint8_t outer_l2_len = m->outer_l2_len;
+	const uint8_t l2_len = m->l2_len;
+
+	if ((flags & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+	    (flags & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F)) {
+		if (is_tunnel_parsed) {
+			/* Outer L3 */
+			send_hdr->w0.l3ptr = outer_l2_len;
+			send_hdr->w0.l4ptr = outer_l2_len + m->outer_l3_len;
+			/* Set clk3 for PKO to calculate IPV4 header checksum */
+			send_hdr->w0.ckl3 = !!(ol_flags & PKT_TX_OUTER_IPV4);
+
+			/* Outer L4 */
+			send_hdr->w0.ckl4 = csum_outer;
+
+			/* Inner L3 */
+			send_hdr->w1.leptr = send_hdr->w0.l4ptr + l2_len;
+			send_hdr->w1.lfptr = send_hdr->w1.leptr + m->l3_len;
+			/* Set clke for PKO to calculate inner IPV4 header
+			 * checksum.
+			 */
+			send_hdr->w0.ckle = !!(ol_flags & PKT_TX_IPV4);
+
+			/* Inner L4 */
+			send_hdr->w0.cklf = csum;
+		} else {
+			/* Inner L3 */
+			send_hdr->w0.l3ptr = l2_len;
+			send_hdr->w0.l4ptr = l2_len + m->l3_len;
+			/* Set clk3 for PKO to calculate IPV4 header checksum */
+			send_hdr->w0.ckl3 = !!(ol_flags & PKT_TX_IPV4);
+
+			/* Inner L4 */
+			send_hdr->w0.ckl4 = csum;
+		}
+	} else if (flags & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
+		/* Outer L3 */
+		send_hdr->w0.l3ptr = outer_l2_len;
+		send_hdr->w0.l4ptr = outer_l2_len + m->outer_l3_len;
+		/* Set clk3 for PKO to calculate IPV4 header checksum */
+		send_hdr->w0.ckl3 = !!(ol_flags & PKT_TX_OUTER_IPV4);
+
+		/* Outer L4 */
+		send_hdr->w0.ckl4 = csum_outer;
+	} else if (flags & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F) {
+		/* Inner L3 */
+		send_hdr->w0.l3ptr = l2_len;
+		send_hdr->w0.l4ptr = l2_len + m->l3_len;
+		/* Set clk3 for PKO to calculate IPV4 header checksum */
+		send_hdr->w0.ckl3 = !!(ol_flags & PKT_TX_IPV4);
+
+		/* Inner L4 */
+		send_hdr->w0.ckl4 = csum;
+	}
+}
+
 static __rte_always_inline uint16_t
 __octeontx_xmit_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf,
 			const uint16_t flag)
@@ -192,6 +324,11 @@  __octeontx_xmit_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf,
 	cmd_buf[nb_desc++] = tx_pkt->data_len & 0xffff;
 	cmd_buf[nb_desc++] = 0x0;
 
+	/* Enable tx checksum offload */
+	if ((flag & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F) ||
+	    (flag & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F))
+		octeontx_tx_checksum_offload(cmd_buf, flag, tx_pkt);
+
 	/* SEND_HDR[DF] bit controls if buffer is to be freed or
 	 * not, as SG_DESC[I] and SEND_HDR[II] are clear.
 	 */
@@ -230,6 +367,11 @@  __octeontx_xmit_mseg_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf,
 	cmd_buf[nb_desc++] = tx_pkt->pkt_len & 0xffff;
 	cmd_buf[nb_desc++] = 0x0;
 
+	/* Enable tx checksum offload */
+	if ((flag & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F) ||
+	    (flag & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F))
+		octeontx_tx_checksum_offload(cmd_buf, flag, tx_pkt);
+
 	do {
 		m_next = tx_pkt->next;
 		/* To handle case where mbufs belong to diff pools, like
@@ -305,13 +447,45 @@  __octeontx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 uint16_t
 octeontx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 
+#define L3L4CSUM_F   OCCTX_TX_OFFLOAD_L3_L4_CSUM_F
+#define OL3OL4CSUM_F OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F
 #define NOFF_F       OCCTX_TX_OFFLOAD_MBUF_NOFF_F
 #define MULT_F       OCCTX_TX_MULTI_SEG_F
-/* [NOFF] [MULTI_SEG] */
-#define OCCTX_TX_FASTPATH_MODES						      \
-T(no_offload,				0, 0,	4,   OCCTX_TX_OFFLOAD_NONE)   \
-T(mseg,					0, 1,	14,  MULT_F)		      \
-T(noff,					1, 0,	4,   NOFF_F)		      \
-T(noff_mseg,				1, 1,	14,  NOFF_F | MULT_F)
+
+/* [L3L4CSUM_F] [OL3OL4CSUM_F] [NOFF] [MULTI_SEG] */
+#define OCCTX_TX_FASTPATH_MODES						       \
+T(no_offload,				0, 0, 0, 0,	4,		       \
+					OCCTX_TX_OFFLOAD_NONE)		       \
+T(mseg,					0, 0, 0, 1,	14,		       \
+					MULT_F)			               \
+T(l3l4csum,				0, 0, 1, 0,     4,		       \
+					L3L4CSUM_F)			       \
+T(l3l4csum_mseg,			0, 0, 1, 1,	14,		       \
+					L3L4CSUM_F | MULT_F)		       \
+T(ol3ol4csum,				0, 1, 0, 0,	4,		       \
+					OL3OL4CSUM_F)			       \
+T(ol3l4csum_mseg,			0, 1, 0, 1,	14,		       \
+					OL3OL4CSUM_F | MULT_F)	               \
+T(ol3l4csum_l3l4csum,			0, 1, 1, 0,     4,		       \
+					OL3OL4CSUM_F | L3L4CSUM_F)	       \
+T(ol3l4csum_l3l4csum_mseg,		0, 1, 1, 1,	14,		       \
+					OL3OL4CSUM_F | L3L4CSUM_F | MULT_F)    \
+T(noff,					1, 0, 0, 0,     4,		       \
+					NOFF_F)				       \
+T(noff_mseg,				1, 0, 0, 1,	14,		       \
+					NOFF_F | MULT_F)	               \
+T(noff_l3l4csum,			1, 0, 1, 0,     4,		       \
+					NOFF_F | L3L4CSUM_F)		       \
+T(noff_l3l4csum_mseg,			1, 0, 1, 1,	14,		       \
+					NOFF_F | L3L4CSUM_F | MULT_F)	       \
+T(noff_ol3ol4csum,			1, 1, 0, 0,	4,		       \
+					NOFF_F | OL3OL4CSUM_F)		       \
+T(noff_ol3ol4csum_mseg,			1, 1, 0, 1,	14,		       \
+					NOFF_F | OL3OL4CSUM_F | MULT_F)	       \
+T(noff_ol3ol4csum_l3l4csum,		1, 1, 1, 0,     4,		       \
+					NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)    \
+T(noff_ol3ol4csum_l3l4csum_mseg,	1, 1, 1, 1,	14,		       \
+					NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F |   \
+					MULT_F)
 
  #endif /* __OCTEONTX_RXTX_H__ */