diff mbox

[dpdk-dev,v4,05/16] fm10k: add 2 functions to parse pkt_type and offload flag

Message ID 1446110173-13330-6-git-send-email-jing.d.chen@intel.com (mailing list archive)
State Changes Requested, archived
Headers show

Commit Message

Chen, Jing D Oct. 29, 2015, 9:16 a.m. UTC
From: "Chen Jing D(Mark)" <jing.d.chen@intel.com>

Add 2 functions, in which using SSE instructions to parse RX desc
to get pkt_type and ol_flags in mbuf.

Signed-off-by: Chen Jing D(Mark) <jing.d.chen@intel.com>
---
 drivers/net/fm10k/fm10k_rxtx_vec.c |  127 ++++++++++++++++++++++++++++++++++++
 1 files changed, 127 insertions(+), 0 deletions(-)
diff mbox

Patch

diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c
index 6c21f15..88c9536 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -44,6 +44,133 @@ 
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
+/* Handling the offload flags (olflags) field takes computation
+ * time when receiving packets. Therefore we provide a flag to disable
+ * the processing of the olflags field when they are not needed. This
+ * gives improved performance, at the cost of losing the offload info
+ * in the received packet
+ */
+#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
+
+/* Vlan present flag shift */
+#define VP_SHIFT     (2)
+/* L3 type shift */
+#define L3TYPE_SHIFT     (4)
+/* L4 type shift */
+#define L4TYPE_SHIFT     (7)
+
+static inline void
+fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
+{
+	__m128i ptype0, ptype1, vtag0, vtag1;
+	union {
+		uint16_t e[4];
+		uint64_t dword;
+	} vol;
+
+	const __m128i pkttype_msk = _mm_set_epi16(
+			0x0000, 0x0000, 0x0000, 0x0000,
+			PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
+			PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT);
+
+	/* mask everything except rss type */
+	const __m128i rsstype_msk = _mm_set_epi16(
+			0x0000, 0x0000, 0x0000, 0x0000,
+			0x000F, 0x000F, 0x000F, 0x000F);
+
+	/* map rss type to rss hash flag */
+	const __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0,
+			0, 0, 0, PKT_RX_RSS_HASH,
+			PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
+			PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);
+
+	ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
+	ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
+	vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]);
+	vtag1 = _mm_unpackhi_epi16(descs[2], descs[3]);
+
+	ptype0 = _mm_unpacklo_epi32(ptype0, ptype1);
+	ptype0 = _mm_and_si128(ptype0, rsstype_msk);
+	ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
+
+	vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
+	vtag1 = _mm_srli_epi16(vtag1, VP_SHIFT);
+	vtag1 = _mm_and_si128(vtag1, pkttype_msk);
+
+	vtag1 = _mm_or_si128(ptype0, vtag1);
+	vol.dword = _mm_cvtsi128_si64(vtag1);
+
+	rx_pkts[0]->ol_flags = vol.e[0];
+	rx_pkts[1]->ol_flags = vol.e[1];
+	rx_pkts[2]->ol_flags = vol.e[2];
+	rx_pkts[3]->ol_flags = vol.e[3];
+}
+
+static inline void
+fm10k_desc_to_pktype_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
+{
+	__m128i l3l4type0, l3l4type1, l3type, l4type;
+	union {
+		uint16_t e[4];
+		uint64_t dword;
+	} vol;
+
+	/* L3 pkt type mask  Bit4 to Bit6 */
+	const __m128i l3type_msk = _mm_set_epi16(
+			0x0000, 0x0000, 0x0000, 0x0000,
+			0x0070, 0x0070, 0x0070, 0x0070);
+
+	/* L4 pkt type mask  Bit7 to Bit9 */
+	const __m128i l4type_msk = _mm_set_epi16(
+			0x0000, 0x0000, 0x0000, 0x0000,
+			0x0380, 0x0380, 0x0380, 0x0380);
+
+	/* convert RRC l3 type to mbuf format */
+	const __m128i l3type_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			0, 0, 0, RTE_PTYPE_L3_IPV6_EXT,
+			RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV4_EXT,
+			RTE_PTYPE_L3_IPV4, 0);
+
+	/* Convert RRC l4 type to mbuf format l4type_flags shift-left 8 bits
+	 * to fill into8 bits length.
+	 */
+	const __m128i l4type_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0,
+			RTE_PTYPE_TUNNEL_GENEVE >> 8,
+			RTE_PTYPE_TUNNEL_NVGRE >> 8,
+			RTE_PTYPE_TUNNEL_VXLAN >> 8,
+			RTE_PTYPE_TUNNEL_GRE >> 8,
+			RTE_PTYPE_L4_UDP >> 8,
+			RTE_PTYPE_L4_TCP >> 8,
+			0);
+
+	l3l4type0 = _mm_unpacklo_epi16(descs[0], descs[1]);
+	l3l4type1 = _mm_unpacklo_epi16(descs[2], descs[3]);
+	l3l4type0 = _mm_unpacklo_epi32(l3l4type0, l3l4type1);
+
+	l3type = _mm_and_si128(l3l4type0, l3type_msk);
+	l4type = _mm_and_si128(l3l4type0, l4type_msk);
+
+	l3type = _mm_srli_epi16(l3type, L3TYPE_SHIFT);
+	l4type = _mm_srli_epi16(l4type, L4TYPE_SHIFT);
+
+	l3type = _mm_shuffle_epi8(l3type_flags, l3type);
+	/* l4type_flags shift-left for 8 bits, need shift-right back */
+	l4type = _mm_shuffle_epi8(l4type_flags, l4type);
+
+	l4type = _mm_slli_epi16(l4type, 8);
+	l3l4type0 = _mm_or_si128(l3type, l4type);
+	vol.dword = _mm_cvtsi128_si64(l3l4type0);
+
+	rx_pkts[0]->packet_type = vol.e[0];
+	rx_pkts[1]->packet_type = vol.e[1];
+	rx_pkts[2]->packet_type = vol.e[2];
+	rx_pkts[3]->packet_type = vol.e[3];
+}
+#else
+#define fm10k_desc_to_olflags_v(desc, rx_pkts) do {} while (0)
+#define fm10k_desc_to_pktype_v(desc, rx_pkts) do {} while (0)
+#endif
+
 int __attribute__((cold))
 fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq)
 {