[08/12] net/bnxt: use table-based packet type translation
diff mbox series

Message ID 20200909155302.28656-9-lance.richardson@broadcom.com
State Accepted
Delegated to: Ajit Khaparde
Headers show
Series
  • net/bnxt: vector PMD improvements
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Lance Richardson Sept. 9, 2020, 3:53 p.m. UTC
Use table-based method for translating receive packet descriptor
flags into rte_mbuf packet type values.

Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt_rxr.c           | 127 ++++++++++++++++----------
 drivers/net/bnxt/bnxt_rxr.h           |   2 +
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c |  88 ++++++------------
 drivers/net/bnxt/bnxt_rxtx_vec_sse.c  |  81 +++++-----------
 4 files changed, 134 insertions(+), 164 deletions(-)

Patch
diff mbox series

diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 5673e2b50f..a882dd20be 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -322,62 +322,88 @@  static inline struct rte_mbuf *bnxt_tpa_end(
 	return mbuf;
 }
 
-static uint32_t
-bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+uint32_t bnxt_ptype_table[BNXT_PTYPE_TBL_DIM] __rte_cache_aligned;
+
+static void __rte_cold
+bnxt_init_ptype_table(void)
 {
-	uint32_t l3, pkt_type = 0;
-	uint32_t t_ipcs = 0, ip6 = 0, vlan = 0;
-	uint32_t flags_type;
-
-	vlan = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN));
-	pkt_type |= vlan ? RTE_PTYPE_L2_ETHER_VLAN : RTE_PTYPE_L2_ETHER;
-
-	t_ipcs = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC));
-	ip6 = !!(rxcmp1->flags2 &
-		 rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_TYPE));
-
-	flags_type = rxcmp->flags_type &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
-
-	if (!t_ipcs && !ip6)
-		l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
-	else if (!t_ipcs && ip6)
-		l3 = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
-	else if (t_ipcs && !ip6)
-		l3 = RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
-	else
-		l3 = RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
+	uint32_t *pt = bnxt_ptype_table;
+	static bool initialized;
+	int ip6, tun, type;
+	uint32_t l3;
+	int i;
 
-	switch (flags_type) {
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_ICMP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_ICMP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_ICMP;
-		break;
+	if (initialized)
+		return;
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_TCP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_TCP;
+	for (i = 0; i < BNXT_PTYPE_TBL_DIM; i++) {
+		if (i & (RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN >> 2))
+			pt[i] = RTE_PTYPE_L2_ETHER_VLAN;
 		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_TCP;
-		break;
-
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_UDP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_UDP;
+			pt[i] = RTE_PTYPE_L2_ETHER;
+
+		ip6 = i & (RX_PKT_CMPL_FLAGS2_IP_TYPE >> 7);
+		tun = i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC >> 2);
+		type = (i & 0x38) << 9;
+
+		if (!tun && !ip6)
+			l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+		else if (!tun && ip6)
+			l3 = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+		else if (tun && !ip6)
+			l3 = RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
 		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_UDP;
-		break;
-
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_IP):
-		pkt_type |= l3;
-		break;
+			l3 = RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
+
+		switch (type) {
+		case RX_PKT_CMPL_FLAGS_ITYPE_ICMP:
+			if (tun)
+				pt[i] |= l3 | RTE_PTYPE_INNER_L4_ICMP;
+			else
+				pt[i] |= l3 | RTE_PTYPE_L4_ICMP;
+			break;
+		case RX_PKT_CMPL_FLAGS_ITYPE_TCP:
+			if (tun)
+				pt[i] |= l3 | RTE_PTYPE_INNER_L4_TCP;
+			else
+				pt[i] |= l3 | RTE_PTYPE_L4_TCP;
+			break;
+		case RX_PKT_CMPL_FLAGS_ITYPE_UDP:
+			if (tun)
+				pt[i] |= l3 | RTE_PTYPE_INNER_L4_UDP;
+			else
+				pt[i] |= l3 | RTE_PTYPE_L4_UDP;
+			break;
+		case RX_PKT_CMPL_FLAGS_ITYPE_IP:
+			pt[i] |= l3;
+			break;
+		}
 	}
+	initialized = true;
+}
+
+static uint32_t
+bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+{
+	uint32_t flags_type, flags2;
+	uint8_t index;
 
-	return pkt_type;
+	flags_type = rte_le_to_cpu_16(rxcmp->flags_type);
+	flags2 = rte_le_to_cpu_32(rxcmp1->flags2);
+
+	/*
+	 * Index format:
+	 *     bit 0: RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
+	 *     bit 1: RX_CMPL_FLAGS2_IP_TYPE
+	 *     bit 2: RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN
+	 *     bits 3-6: RX_PKT_CMPL_FLAGS_ITYPE
+	 */
+	index = ((flags_type & RX_PKT_CMPL_FLAGS_ITYPE_MASK) >> 9) |
+		((flags2 & (RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+			   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC)) >> 2) |
+		((flags2 & RX_PKT_CMPL_FLAGS2_IP_TYPE) >> 7);
+
+	return bnxt_ptype_table[index];
 }
 
 #ifdef RTE_LIBRTE_IEEE1588
@@ -1046,6 +1072,9 @@  int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
 	unsigned int i;
 	uint16_t size;
 
+	/* Initialize packet type table. */
+	bnxt_init_ptype_table();
+
 	size = rte_pktmbuf_data_room_size(rxq->mb_pool) - RTE_PKTMBUF_HEADROOM;
 	size = RTE_MIN(BNXT_MAX_PKT_LEN, size);
 
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index 5b9b5f3108..0e21c8f900 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -238,4 +238,6 @@  void bnxt_set_mark_in_mbuf(struct bnxt *bp,
 #define BNXT_CFA_META_EEM_TCAM_SHIFT		31
 #define BNXT_CFA_META_EM_TEST(x) ((x) >> BNXT_CFA_META_EEM_TCAM_SHIFT)
 
+#define BNXT_PTYPE_TBL_DIM	128
+extern uint32_t bnxt_ptype_table[BNXT_PTYPE_TBL_DIM];
 #endif
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index 7f3eabcda1..fade67ec8e 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -93,61 +93,27 @@  bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 }
 
 static uint32_t
-bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+bnxt_parse_pkt_type(uint32x4_t mm_rxcmp, uint32x4_t mm_rxcmp1)
 {
-	uint32_t l3, pkt_type = 0;
-	uint32_t t_ipcs = 0, ip6 = 0, vlan = 0;
-	uint32_t flags_type;
-
-	vlan = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN));
-	pkt_type |= vlan ? RTE_PTYPE_L2_ETHER_VLAN : RTE_PTYPE_L2_ETHER;
-
-	t_ipcs = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC));
-	ip6 = !!(rxcmp1->flags2 &
-		 rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_TYPE));
-
-	flags_type = rxcmp->flags_type &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
-
-	if (!t_ipcs && !ip6)
-		l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
-	else if (!t_ipcs && ip6)
-		l3 = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
-	else if (t_ipcs && !ip6)
-		l3 = RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
-	else
-		l3 = RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
-
-	switch (flags_type) {
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_ICMP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_ICMP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_ICMP;
-		break;
+	uint32_t flags_type, flags2;
+	uint8_t index;
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_TCP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_TCP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_TCP;
-		break;
+	flags_type = vgetq_lane_u32(mm_rxcmp, 0);
+	flags2 = (uint16_t)vgetq_lane_u32(mm_rxcmp1, 0);
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_UDP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_UDP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_UDP;
-		break;
-
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_IP):
-		pkt_type |= l3;
-		break;
-	}
+	/*
+	 * Index format:
+	 *     bit 0: RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
+	 *     bit 1: RX_CMPL_FLAGS2_IP_TYPE
+	 *     bit 2: RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN
+	 *     bits 3-6: RX_PKT_CMPL_FLAGS_ITYPE
+	 */
+	index = ((flags_type & RX_PKT_CMPL_FLAGS_ITYPE_MASK) >> 9) |
+		((flags2 & (RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+			   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC)) >> 2) |
+		((flags2 & RX_PKT_CMPL_FLAGS2_IP_TYPE) >> 7);
 
-	return pkt_type;
+	return bnxt_ptype_table[index];
 }
 
 static void
@@ -234,10 +200,12 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 	/* Handle RX burst request */
 	for (i = 0; i < nb_pkts; i++) {
+		uint32x4_t mm_rxcmp, mm_rxcmp1;
 		struct rx_pkt_cmpl_hi *rxcmp1;
 		struct rte_mbuf *mbuf;
-		uint64x2_t mm_rxcmp;
-		uint8x16_t pkt_mb;
+		uint32x4_t pkt_mb;
+		uint8x16_t tmp;
+		uint32_t ptype;
 
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 
@@ -247,6 +215,8 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		if (!CMP_VALID(rxcmp1, raw_cons + 1, cpr->cp_ring_struct))
 			break;
 
+		mm_rxcmp = vld1q_u32((uint32_t *)rxcmp);
+		mm_rxcmp1 = vld1q_u32((uint32_t *)rxcmp);
 		raw_cons += 2;
 		cons = rxcmp->opaque;
 
@@ -258,10 +228,12 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		vst1q_u64((uint64_t *)&mbuf->rearm_data, mbuf_init);
 
 		/* Set mbuf pkt_len, data_len, and rss_hash fields. */
-		mm_rxcmp = vld1q_u64((uint64_t *)rxcmp);
-		pkt_mb = vqtbl1q_u8(vreinterpretq_u8_u64(mm_rxcmp), shuf_msk);
-		vst1q_u64((uint64_t *)&mbuf->rx_descriptor_fields1,
-			  vreinterpretq_u64_u8(pkt_mb));
+		tmp = vqtbl1q_u8(vreinterpretq_u8_u32(mm_rxcmp), shuf_msk);
+		pkt_mb = vreinterpretq_u32_u8(tmp);
+		ptype = bnxt_parse_pkt_type(mm_rxcmp, mm_rxcmp1);
+		pkt_mb = vsetq_lane_u32(ptype, pkt_mb, 0);
+
+		vst1q_u32((uint32_t *)&mbuf->rx_descriptor_fields1, pkt_mb);
 
 		rte_compiler_barrier();
 
@@ -279,8 +251,6 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		}
 
 		bnxt_parse_csum(mbuf, rxcmp1);
-		mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
-
 		rx_pkts[nb_rx_pkts++] = mbuf;
 	}
 
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index eced74e4e3..69ffbe4cc9 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -96,62 +96,28 @@  bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
 	rxq->rxrearm_nb -= nb;
 }
 
-static uint32_t
-bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+static __m128i
+bnxt_parse_pkt_type(__m128i mm_rxcmp, __m128i mm_rxcmp1)
 {
-	uint32_t l3, pkt_type = 0;
-	uint32_t t_ipcs = 0, ip6 = 0, vlan = 0;
-	uint32_t flags_type;
-
-	vlan = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN));
-	pkt_type |= vlan ? RTE_PTYPE_L2_ETHER_VLAN : RTE_PTYPE_L2_ETHER;
-
-	t_ipcs = !!(rxcmp1->flags2 &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC));
-	ip6 = !!(rxcmp1->flags2 &
-		 rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_TYPE));
-
-	flags_type = rxcmp->flags_type &
-		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
-
-	if (!t_ipcs && !ip6)
-		l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
-	else if (!t_ipcs && ip6)
-		l3 = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
-	else if (t_ipcs && !ip6)
-		l3 = RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
-	else
-		l3 = RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
-
-	switch (flags_type) {
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_ICMP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_ICMP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_ICMP;
-		break;
-
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_TCP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_TCP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_TCP;
-		break;
+	uint32_t flags_type, flags2;
+	uint8_t index;
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_UDP):
-		if (!t_ipcs)
-			pkt_type |= l3 | RTE_PTYPE_L4_UDP;
-		else
-			pkt_type |= l3 | RTE_PTYPE_INNER_L4_UDP;
-		break;
+	flags_type = _mm_extract_epi16(mm_rxcmp, 0);
+	flags2 = _mm_extract_epi32(mm_rxcmp1, 0);
 
-	case RTE_LE32(RX_PKT_CMPL_FLAGS_ITYPE_IP):
-		pkt_type |= l3;
-		break;
-	}
+	/*
+	 * Index format:
+	 *     bit 0: RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
+	 *     bit 1: RX_CMPL_FLAGS2_IP_TYPE
+	 *     bit 2: RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN
+	 *     bits 3-6: RX_PKT_CMPL_FLAGS_ITYPE
+	 */
+	index = ((flags_type & RX_PKT_CMPL_FLAGS_ITYPE_MASK) >> 9) |
+		((flags2 & (RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+			   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC)) >> 2) |
+		((flags2 & RX_PKT_CMPL_FLAGS2_IP_TYPE) >> 7);
 
-	return pkt_type;
+	return _mm_set_epi32(0, 0, 0, bnxt_ptype_table[index]);
 }
 
 static void
@@ -242,7 +208,7 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	for (i = 0; i < nb_pkts; i++) {
 		struct rx_pkt_cmpl_hi *rxcmp1;
 		struct rte_mbuf *mbuf;
-		__m128i mm_rxcmp, pkt_mb;
+		__m128i mm_rxcmp, mm_rxcmp1, pkt_mb, ptype;
 
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 
@@ -252,6 +218,9 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		if (!CMP_VALID(rxcmp1, raw_cons + 1, cpr->cp_ring_struct))
 			break;
 
+		mm_rxcmp = _mm_load_si128((__m128i *)rxcmp);
+		mm_rxcmp1 = _mm_load_si128((__m128i *)rxcmp1);
+
 		raw_cons += 2;
 		cons = rxcmp->opaque;
 
@@ -263,8 +232,10 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		_mm_store_si128((__m128i *)&mbuf->rearm_data, mbuf_init);
 
 		/* Set mbuf pkt_len, data_len, and rss_hash fields. */
-		mm_rxcmp = _mm_load_si128((__m128i *)rxcmp);
 		pkt_mb = _mm_shuffle_epi8(mm_rxcmp, shuf_msk);
+		ptype = bnxt_parse_pkt_type(mm_rxcmp, mm_rxcmp1);
+		pkt_mb = _mm_blend_epi16(pkt_mb, ptype, 0x3);
+
 		_mm_storeu_si128((void *)&mbuf->rx_descriptor_fields1, pkt_mb);
 
 		rte_compiler_barrier();
@@ -283,8 +254,6 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		}
 
 		bnxt_parse_csum(mbuf, rxcmp1);
-		mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
-
 		rx_pkts[nb_rx_pkts++] = mbuf;
 	}