[v6,1/9] mbuf: new function to generate raw Tx offload value

Message ID 20190402083444.24755-2-konstantin.ananyev@intel.com (mailing list archive)
State Accepted, archived
Delegated to: akhil goyal
Headers
Series Few small improvements for ipsec library |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/mellanox-Performance-Testing success Performance Testing PASS
ci/intel-Performance-Testing success Performance Testing PASS

Commit Message

Ananyev, Konstantin April 2, 2019, 8:34 a.m. UTC
  Operations to set/update bit-fields often cause compilers
to generate suboptimal code.
To help avoid such situation for tx_offload fields:
introduce new enum for tx_offload bit-fields lengths and offsets,
and new function to generate raw tx_offload value.
Add new test-case into UT for introduced function.

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Akhil Goyal <akhil.goyal@nxp.com>
---
 app/test/test_mbuf.c       | 101 ++++++++++++++++++++++++++++++++++++-
 lib/librte_mbuf/rte_mbuf.h | 101 ++++++++++++++++++++++++++++++++++---
 2 files changed, 194 insertions(+), 8 deletions(-)
  

Comments

Olivier Matz April 2, 2019, 8:49 a.m. UTC | #1
On Tue, Apr 02, 2019 at 09:34:36AM +0100, Konstantin Ananyev wrote:
> Operations to set/update bit-fields often cause compilers
> to generate suboptimal code.
> To help avoid such situation for tx_offload fields:
> introduce new enum for tx_offload bit-fields lengths and offsets,
> and new function to generate raw tx_offload value.
> Add new test-case into UT for introduced function.
> 
> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> Acked-by: Akhil Goyal <akhil.goyal@nxp.com>

Acked-by: Olivier Matz <olivier.matz@6wind.com>

Thanks
  

Patch

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index 9e82a20be..030385ec5 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -27,6 +27,7 @@ 
 #include <rte_mbuf.h>
 #include <rte_random.h>
 #include <rte_cycles.h>
+#include <rte_malloc.h>
 
 #include "test.h"
 
@@ -1028,6 +1029,99 @@  test_mbuf_linearize_check(struct rte_mempool *pktmbuf_pool)
 	return 0;
 }
 
+/*
+ * Helper function for test_tx_ofload
+ */
+static inline void
+set_tx_offload(struct rte_mbuf *mb, uint64_t il2, uint64_t il3, uint64_t il4,
+	uint64_t tso, uint64_t ol3, uint64_t ol2)
+{
+	mb->l2_len = il2;
+	mb->l3_len = il3;
+	mb->l4_len = il4;
+	mb->tso_segsz = tso;
+	mb->outer_l3_len = ol3;
+	mb->outer_l2_len = ol2;
+}
+
+static int
+test_tx_offload(void)
+{
+	struct rte_mbuf *mb;
+	uint64_t tm, v1, v2;
+	size_t sz;
+	uint32_t i;
+
+	static volatile struct {
+		uint16_t l2;
+		uint16_t l3;
+		uint16_t l4;
+		uint16_t tso;
+	} txof;
+
+	const uint32_t num = 0x10000;
+
+	txof.l2 = rte_rand() % (1 <<  RTE_MBUF_L2_LEN_BITS);
+	txof.l3 = rte_rand() % (1 <<  RTE_MBUF_L3_LEN_BITS);
+	txof.l4 = rte_rand() % (1 <<  RTE_MBUF_L4_LEN_BITS);
+	txof.tso = rte_rand() % (1 <<   RTE_MBUF_TSO_SEGSZ_BITS);
+
+	printf("%s started, tx_offload = {\n"
+		"\tl2_len=%#hx,\n"
+		"\tl3_len=%#hx,\n"
+		"\tl4_len=%#hx,\n"
+		"\ttso_segsz=%#hx,\n"
+		"\touter_l3_len=%#x,\n"
+		"\touter_l2_len=%#x,\n"
+		"};\n",
+		__func__,
+		txof.l2, txof.l3, txof.l4, txof.tso, txof.l3, txof.l2);
+
+	sz = sizeof(*mb) * num;
+	mb = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE);
+	if (mb == NULL) {
+		printf("%s failed, out of memory\n", __func__);
+		return -ENOMEM;
+	}
+
+	memset(mb, 0, sz);
+	tm = rte_rdtsc_precise();
+
+	for (i = 0; i != num; i++)
+		set_tx_offload(mb + i, txof.l2, txof.l3, txof.l4,
+			txof.tso, txof.l3, txof.l2);
+
+	tm = rte_rdtsc_precise() - tm;
+	printf("%s set tx_offload by bit-fields: %u iterations, %"
+		PRIu64 " cycles, %#Lf cycles/iter\n",
+		__func__, num, tm, (long double)tm / num);
+
+	v1 = mb[rte_rand() % num].tx_offload;
+
+	memset(mb, 0, sz);
+	tm = rte_rdtsc_precise();
+
+	for (i = 0; i != num; i++)
+		mb[i].tx_offload = rte_mbuf_tx_offload(txof.l2, txof.l3,
+			txof.l4, txof.tso, txof.l3, txof.l2, 0);
+
+	tm = rte_rdtsc_precise() - tm;
+	printf("%s set raw tx_offload: %u iterations, %"
+		PRIu64 " cycles, %#Lf cycles/iter\n",
+		__func__, num, tm, (long double)tm / num);
+
+	v2 = mb[rte_rand() % num].tx_offload;
+
+	rte_free(mb);
+
+	printf("%s finished\n"
+		"expected tx_offload value: 0x%" PRIx64 ";\n"
+		"rte_mbuf_tx_offload value: 0x%" PRIx64 ";\n",
+		__func__, v1, v2);
+
+	return (v1 == v2) ? 0 : -EINVAL;
+}
+
 static int
 test_mbuf(void)
 {
@@ -1126,8 +1220,13 @@  test_mbuf(void)
 		printf("test_mbuf_linearize_check() failed\n");
 		goto err;
 	}
-	ret = 0;
 
+	if (test_tx_offload() < 0) {
+		printf("test_tx_offload() failed\n");
+		goto err;
+	}
+
+	ret = 0;
 err:
 	rte_mempool_free(pktmbuf_pool);
 	rte_mempool_free(pktmbuf_pool2);
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index d961ccaf6..d7439e889 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -40,6 +40,7 @@ 
 #include <rte_atomic.h>
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
 #include <rte_mbuf_ptype.h>
 
 #ifdef __cplusplus
@@ -479,6 +480,50 @@  struct rte_mbuf_sched {
 	uint16_t reserved;   /**< Reserved. */
 }; /**< Hierarchical scheduler */
 
+/**
+ * enum for the tx_offload bit-fields lenghts and offsets.
+ * defines the layout of rte_mbuf tx_offload field.
+ */
+enum {
+	RTE_MBUF_L2_LEN_BITS = 7,
+	RTE_MBUF_L3_LEN_BITS = 9,
+	RTE_MBUF_L4_LEN_BITS = 8,
+	RTE_MBUF_TSO_SEGSZ_BITS = 16,
+	RTE_MBUF_OUTL3_LEN_BITS = 9,
+	RTE_MBUF_OUTL2_LEN_BITS = 7,
+	RTE_MBUF_TXOFLD_UNUSED_BITS = sizeof(uint64_t) * CHAR_BIT -
+		RTE_MBUF_L2_LEN_BITS -
+		RTE_MBUF_L3_LEN_BITS -
+		RTE_MBUF_L4_LEN_BITS -
+		RTE_MBUF_TSO_SEGSZ_BITS -
+		RTE_MBUF_OUTL3_LEN_BITS -
+		RTE_MBUF_OUTL2_LEN_BITS,
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	RTE_MBUF_L2_LEN_OFS =
+		sizeof(uint64_t) * CHAR_BIT - RTE_MBUF_L2_LEN_BITS
+	RTE_MBUF_L3_LEN_OFS = RTE_MBUF_L2_LEN_OFS - RTE_MBUF_L3_LEN_BITS,
+	RTE_MBUF_L4_LEN_OFS = RTE_MBUF_L3_LEN_OFS - RTE_MBUF_L4_LEN_BITS,
+	RTE_MBUF_TSO_SEGSZ_OFS = RTE_MBUF_L4_LEN_OFS - RTE_MBUF_TSO_SEGSZ_BITS,
+	RTE_MBUF_OUTL3_LEN_OFS =
+		RTE_MBUF_TSO_SEGSZ_OFS - RTE_MBUF_OUTL3_LEN_BITS,
+	RTE_MBUF_OUTL2_LEN_OFS =
+		RTE_MBUF_OUTL3_LEN_OFS - RTE_MBUF_OUTL2_LEN_BITS,
+	RTE_MBUF_TXOFLD_UNUSED_OFS =
+		RTE_MBUF_OUTL2_LEN_OFS - RTE_MBUF_TXOFLD_UNUSED_BITS,
+#else
+	RTE_MBUF_L2_LEN_OFS = 0,
+	RTE_MBUF_L3_LEN_OFS = RTE_MBUF_L2_LEN_OFS + RTE_MBUF_L2_LEN_BITS,
+	RTE_MBUF_L4_LEN_OFS = RTE_MBUF_L3_LEN_OFS + RTE_MBUF_L3_LEN_BITS,
+	RTE_MBUF_TSO_SEGSZ_OFS = RTE_MBUF_L4_LEN_OFS + RTE_MBUF_L4_LEN_BITS,
+	RTE_MBUF_OUTL3_LEN_OFS =
+		RTE_MBUF_TSO_SEGSZ_OFS + RTE_MBUF_TSO_SEGSZ_BITS,
+	RTE_MBUF_OUTL2_LEN_OFS =
+		RTE_MBUF_OUTL3_LEN_OFS + RTE_MBUF_OUTL3_LEN_BITS,
+	RTE_MBUF_TXOFLD_UNUSED_OFS =
+		RTE_MBUF_OUTL2_LEN_OFS + RTE_MBUF_OUTL2_LEN_BITS,
+#endif
+};
+
 /**
  * The generic rte_mbuf, containing a packet mbuf.
  */
@@ -640,19 +685,24 @@  struct rte_mbuf {
 		uint64_t tx_offload;       /**< combined for easy fetch */
 		__extension__
 		struct {
-			uint64_t l2_len:7;
+			uint64_t l2_len:RTE_MBUF_L2_LEN_BITS;
 			/**< L2 (MAC) Header Length for non-tunneling pkt.
 			 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
 			 */
-			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
-			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
-			uint64_t tso_segsz:16; /**< TCP TSO segment size */
+			uint64_t l3_len:RTE_MBUF_L3_LEN_BITS;
+			/**< L3 (IP) Header Length. */
+			uint64_t l4_len:RTE_MBUF_L4_LEN_BITS;
+			/**< L4 (TCP/UDP) Header Length. */
+			uint64_t tso_segsz:RTE_MBUF_TSO_SEGSZ_BITS;
+			/**< TCP TSO segment size */
 
 			/* fields for TX offloading of tunnels */
-			uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */
-			uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */
+			uint64_t outer_l3_len:RTE_MBUF_OUTL3_LEN_BITS;
+			/**< Outer L3 (IP) Hdr Length. */
+			uint64_t outer_l2_len:RTE_MBUF_OUTL2_LEN_BITS;
+			/**< Outer L2 (MAC) Hdr Length. */
 
-			/* uint64_t unused:8; */
+			/* uint64_t unused:RTE_MBUF_TXOFLD_UNUSED_BITS; */
 		};
 	};
 
@@ -2243,6 +2293,43 @@  static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail
 	return 0;
 }
 
+/*
+ * @warning
+ * @b EXPERIMENTAL: This API may change without prior notice.
+ *
+ * For given input values generate raw tx_offload value.
+ * Note that it is caller responsibility to make sure that input parameters
+ * don't exceed maximum bit-field values.
+ * @param il2
+ *   l2_len value.
+ * @param il3
+ *   l3_len value.
+ * @param il4
+ *   l4_len value.
+ * @param tso
+ *   tso_segsz value.
+ * @param ol3
+ *   outer_l3_len value.
+ * @param ol2
+ *   outer_l2_len value.
+ * @param unused
+ *   unused value.
+ * @return
+ *   raw tx_offload value.
+ */
+static __rte_always_inline uint64_t
+rte_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
+	uint64_t ol3, uint64_t ol2, uint64_t unused)
+{
+	return il2 << RTE_MBUF_L2_LEN_OFS |
+		il3 << RTE_MBUF_L3_LEN_OFS |
+		il4 << RTE_MBUF_L4_LEN_OFS |
+		tso << RTE_MBUF_TSO_SEGSZ_OFS |
+		ol3 << RTE_MBUF_OUTL3_LEN_OFS |
+		ol2 << RTE_MBUF_OUTL2_LEN_OFS |
+		unused << RTE_MBUF_TXOFLD_UNUSED_OFS;
+}
+
 /**
  * Validate general requirements for Tx offload in mbuf.
  *