[dpdk-dev,v4,2/6] lib: add distributor vector flow matching

Message ID 1483948248-91364-3-git-send-email-david.hunt@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel compilation success Compilation OK

Commit Message

Hunt, David Jan. 9, 2017, 7:50 a.m. UTC
  Signed-off-by: David Hunt <david.hunt@intel.com>
---
 lib/librte_distributor/Makefile                    |   4 +
 lib/librte_distributor/rte_distributor_burst.c     |  11 +-
 lib/librte_distributor/rte_distributor_match_sse.c | 113 +++++++++++++++++++++
 lib/librte_distributor/rte_distributor_priv.h      |   6 ++
 4 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_distributor/rte_distributor_match_sse.c
  

Comments

Bruce Richardson Jan. 13, 2017, 3:26 p.m. UTC | #1
On Mon, Jan 09, 2017 at 07:50:44AM +0000, David Hunt wrote:
> Signed-off-by: David Hunt <david.hunt@intel.com>
> ---
>  lib/librte_distributor/Makefile                    |   4 +
>  lib/librte_distributor/rte_distributor_burst.c     |  11 +-
>  lib/librte_distributor/rte_distributor_match_sse.c | 113 +++++++++++++++++++++
>  lib/librte_distributor/rte_distributor_priv.h      |   6 ++
>  4 files changed, 133 insertions(+), 1 deletion(-)
>  create mode 100644 lib/librte_distributor/rte_distributor_match_sse.c
> 
> diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile
> index 2acc54d..a725aaf 100644
> --- a/lib/librte_distributor/Makefile
> +++ b/lib/librte_distributor/Makefile
> @@ -44,6 +44,10 @@ LIBABIVER := 1
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor.c
>  SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_burst.c
> +ifeq ($(CONFIG_RTE_ARCH_X86),y)
> +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_sse.c
> +endif
> +
>  

I believe some of the intrinsics used in the vector code are SSE4.2
instructions, so you need to pass that flag for the compilation for e.g.
the "default" target for packaging into distros.

>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)-include := rte_distributor.h
> diff --git a/lib/librte_distributor/rte_distributor_burst.c b/lib/librte_distributor/rte_distributor_burst.c
> index ae7cf9d..35044c4 100644
> --- a/lib/librte_distributor/rte_distributor_burst.c
> +++ b/lib/librte_distributor/rte_distributor_burst.c
> @@ -352,6 +352,9 @@ rte_distributor_process_burst(struct rte_distributor_burst *d,
>  		}
>  
>  		switch (d->dist_match_fn) {
> +		case RTE_DIST_MATCH_VECTOR:
> +			find_match_vec(d, &flows[0], &matches[0]);
> +			break;
>  		default:
>  			find_match_scalar(d, &flows[0], &matches[0]);
>  		}
> @@ -538,7 +541,13 @@ rte_distributor_create_burst(const char *name,
>  	snprintf(d->name, sizeof(d->name), "%s", name);
>  	d->num_workers = num_workers;
>  
> -	d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
> +#if defined(RTE_ARCH_X86)
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) {
> +		d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
> +	} else {
> +#endif
> +		d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
> +	}
>  

Two issues here:
1) the check needs to be for SSE4.2, not SSE2 [minimum for DPDK on x86
is SSE3 anyway, so no need for any checks for SSE2]
2) The closing brace should be ifdefed out to fix compilation on non-x86
platforms. A simpler/better solution might actually be to remove the
braces since only a single line is involved in each branch.

Regards,
/Bruce
  
Bruce Richardson Jan. 16, 2017, 4:40 p.m. UTC | #2
On Mon, Jan 09, 2017 at 07:50:44AM +0000, David Hunt wrote:
> Signed-off-by: David Hunt <david.hunt@intel.com>
> ---
>  lib/librte_distributor/Makefile                    |   4 +
>  lib/librte_distributor/rte_distributor_burst.c     |  11 +-
>  lib/librte_distributor/rte_distributor_match_sse.c | 113 +++++++++++++++++++++
>  lib/librte_distributor/rte_distributor_priv.h      |   6 ++
>  4 files changed, 133 insertions(+), 1 deletion(-)
>  create mode 100644 lib/librte_distributor/rte_distributor_match_sse.c
> 
> diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile
> index 2acc54d..a725aaf 100644
> --- a/lib/librte_distributor/Makefile
> +++ b/lib/librte_distributor/Makefile
> @@ -44,6 +44,10 @@ LIBABIVER := 1
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor.c
>  SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_burst.c
> +ifeq ($(CONFIG_RTE_ARCH_X86),y)
> +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_sse.c
> +endif
> +
>  
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)-include := rte_distributor.h
> diff --git a/lib/librte_distributor/rte_distributor_burst.c b/lib/librte_distributor/rte_distributor_burst.c
> index ae7cf9d..35044c4 100644
> --- a/lib/librte_distributor/rte_distributor_burst.c
> +++ b/lib/librte_distributor/rte_distributor_burst.c
> @@ -352,6 +352,9 @@ rte_distributor_process_burst(struct rte_distributor_burst *d,
>  		}
>  
>  		switch (d->dist_match_fn) {
> +		case RTE_DIST_MATCH_VECTOR:
> +			find_match_vec(d, &flows[0], &matches[0]);
> +			break;
>  		default:
>  			find_match_scalar(d, &flows[0], &matches[0]);
>  		}

Will link not fail on non-x86 platforms due to find_match_vec not having
any implementation on those platforms?

/Bruce
  
Hunt, David Jan. 19, 2017, 12:11 p.m. UTC | #3
On 16/1/2017 4:40 PM, Bruce Richardson wrote:
> On Mon, Jan 09, 2017 at 07:50:44AM +0000, David Hunt wrote:
>> --- a/lib/librte_distributor/rte_distributor_burst.c
>> +++ b/lib/librte_distributor/rte_distributor_burst.c
>> @@ -352,6 +352,9 @@ rte_distributor_process_burst(struct rte_distributor_burst *d,
>>   		}
>>   
>>   		switch (d->dist_match_fn) {
>> +		case RTE_DIST_MATCH_VECTOR:
>> +			find_match_vec(d, &flows[0], &matches[0]);
>> +			break;
>>   		default:
>>   			find_match_scalar(d, &flows[0], &matches[0]);
>>   		}
> Will link not fail on non-x86 platforms due to find_match_vec not having
> any implementation on those platforms?
>
> /Bruce

I've added a fallback find_match_vec in rte_distributor_match_generic.c 
that calls find_match_scalar.

Rgds,
Dave.
  
Hunt, David Jan. 19, 2017, 2:59 p.m. UTC | #4
On 13/1/2017 3:26 PM, Bruce Richardson wrote:
> On Mon, Jan 09, 2017 at 07:50:44AM +0000, David Hunt wrote:
>> Signed-off-by: David Hunt <david.hunt@intel.com>
>> ---
>>   lib/librte_distributor/Makefile                    |   4 +
>>   lib/librte_distributor/rte_distributor_burst.c     |  11 +-
>>   lib/librte_distributor/rte_distributor_match_sse.c | 113 +++++++++++++++++++++
>>   lib/librte_distributor/rte_distributor_priv.h      |   6 ++
>>   4 files changed, 133 insertions(+), 1 deletion(-)
>>   create mode 100644 lib/librte_distributor/rte_distributor_match_sse.c
>>
>> diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile
>> index 2acc54d..a725aaf 100644
>> --- a/lib/librte_distributor/Makefile
>> +++ b/lib/librte_distributor/Makefile
>> @@ -44,6 +44,10 @@ LIBABIVER := 1
>>   # all source are stored in SRCS-y
>>   SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor.c
>>   SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_burst.c
>> +ifeq ($(CONFIG_RTE_ARCH_X86),y)
>> +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_sse.c
>> +endif
>> +
>>   
> I believe some of the intrinsics used in the vector code are SSE4.2
> instructions, so you need to pass that flag for the compilation for e.g.
> the "default" target for packaging into distros.
>
>>   # install this header file
>>   SYMLINK-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)-include := rte_distributor.h
>> diff --git a/lib/librte_distributor/rte_distributor_burst.c b/lib/librte_distributor/rte_distributor_burst.c
>> index ae7cf9d..35044c4 100644
>> --- a/lib/librte_distributor/rte_distributor_burst.c
>> +++ b/lib/librte_distributor/rte_distributor_burst.c
>> @@ -352,6 +352,9 @@ rte_distributor_process_burst(struct rte_distributor_burst *d,
>>   		}
>>   
>>   		switch (d->dist_match_fn) {
>> +		case RTE_DIST_MATCH_VECTOR:
>> +			find_match_vec(d, &flows[0], &matches[0]);
>> +			break;
>>   		default:
>>   			find_match_scalar(d, &flows[0], &matches[0]);
>>   		}
>> @@ -538,7 +541,13 @@ rte_distributor_create_burst(const char *name,
>>   	snprintf(d->name, sizeof(d->name), "%s", name);
>>   	d->num_workers = num_workers;
>>   
>> -	d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
>> +#if defined(RTE_ARCH_X86)
>> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) {
>> +		d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
>> +	} else {
>> +#endif
>> +		d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
>> +	}
>>   
> Two issues here:
> 1) the check needs to be for SSE4.2, not SSE2 [minimum for DPDK on x86
> is SSE3 anyway, so no need for any checks for SSE2]
> 2) The closing brace should be ifdefed out to fix compilation on non-x86
> platforms. A simpler/better solution might actually be to remove the
> braces since only a single line is involved in each branch.
>
> Regards,
> /Bruce


Will be resolved up in next revision.

Thanks,
Dave.
  

Patch

diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile
index 2acc54d..a725aaf 100644
--- a/lib/librte_distributor/Makefile
+++ b/lib/librte_distributor/Makefile
@@ -44,6 +44,10 @@  LIBABIVER := 1
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor.c
 SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_burst.c
+ifeq ($(CONFIG_RTE_ARCH_X86),y)
+SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_sse.c
+endif
+
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)-include := rte_distributor.h
diff --git a/lib/librte_distributor/rte_distributor_burst.c b/lib/librte_distributor/rte_distributor_burst.c
index ae7cf9d..35044c4 100644
--- a/lib/librte_distributor/rte_distributor_burst.c
+++ b/lib/librte_distributor/rte_distributor_burst.c
@@ -352,6 +352,9 @@  rte_distributor_process_burst(struct rte_distributor_burst *d,
 		}
 
 		switch (d->dist_match_fn) {
+		case RTE_DIST_MATCH_VECTOR:
+			find_match_vec(d, &flows[0], &matches[0]);
+			break;
 		default:
 			find_match_scalar(d, &flows[0], &matches[0]);
 		}
@@ -538,7 +541,13 @@  rte_distributor_create_burst(const char *name,
 	snprintf(d->name, sizeof(d->name), "%s", name);
 	d->num_workers = num_workers;
 
-	d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
+#if defined(RTE_ARCH_X86)
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) {
+		d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
+	} else {
+#endif
+		d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
+	}
 
 	/*
 	 * Set up the backog tags so they're pointing at the second cache
diff --git a/lib/librte_distributor/rte_distributor_match_sse.c b/lib/librte_distributor/rte_distributor_match_sse.c
new file mode 100644
index 0000000..78641f5
--- /dev/null
+++ b/lib/librte_distributor/rte_distributor_match_sse.c
@@ -0,0 +1,113 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include "rte_distributor_priv.h"
+#include "rte_distributor_burst.h"
+#include "smmintrin.h"
+
+
+void
+find_match_vec(struct rte_distributor_burst *d,
+			uint16_t *data_ptr,
+			uint16_t *output_ptr)
+{
+	/* Setup */
+	__m128i incoming_fids;
+	__m128i inflight_fids;
+	__m128i preflight_fids;
+	__m128i wkr;
+	__m128i mask1;
+	__m128i mask2;
+	__m128i output;
+	struct rte_distributor_backlog *bl;
+	uint16_t i;
+
+	/*
+	 * Function overview:
+	 * 2. Loop through all worker ID's
+	 *  2a. Load the current inflights for that worker into an xmm reg
+	 *  2b. Load the current backlog for that worker into an xmm reg
+	 *  2c. use cmpestrm to intersect flow_ids with backlog and inflights
+	 *  2d. Add any matches to the output
+	 * 3. Write the output xmm (matching worker ids).
+	 */
+
+
+	output = _mm_set1_epi16(0);
+	incoming_fids = _mm_load_si128((__m128i *)data_ptr);
+
+	for (i = 0; i < d->num_workers; i++) {
+		bl = &d->backlog[i];
+
+		inflight_fids =
+			_mm_load_si128((__m128i *)&(d->in_flight_tags[i]));
+		preflight_fids =
+			_mm_load_si128((__m128i *)(bl->tags));
+
+		/*
+		 * Any incoming_fid that exists anywhere in inflight_fids will
+		 * have 0xffff in same position of the mask as the incoming fid
+		 * Example (shortened to bytes for brevity):
+		 * incoming_fids   0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08
+		 * inflight_fids   0x03 0x05 0x07 0x00 0x00 0x00 0x00 0x00
+		 * mask            0x00 0x00 0xff 0x00 0xff 0x00 0xff 0x00
+		 */
+
+		mask1 = _mm_cmpestrm(inflight_fids, 8, incoming_fids, 8,
+			_SIDD_UWORD_OPS |
+			_SIDD_CMP_EQUAL_ANY |
+			_SIDD_UNIT_MASK);
+		mask2 = _mm_cmpestrm(preflight_fids, 8, incoming_fids, 8,
+			_SIDD_UWORD_OPS |
+			_SIDD_CMP_EQUAL_ANY |
+			_SIDD_UNIT_MASK);
+
+		mask1 = _mm_or_si128(mask1, mask2);
+		/*
+		 * Now mask contains 0xffff where there's a match.
+		 * Next we need to store the worker_id in the relevant position
+		 * in the output.
+		 */
+
+		wkr = _mm_set1_epi16(i+1);
+		mask1 = _mm_and_si128(mask1, wkr);
+		output = _mm_or_si128(mask1, output);
+	}
+
+	/*
+	 * At this stage, the output 128-bit contains 8 16-bit values, with
+	 * each non-zero value containing the worker ID on which the
+	 * corresponding flow is pinned to.
+	 */
+	_mm_store_si128((__m128i *)output_ptr, output);
+}
diff --git a/lib/librte_distributor/rte_distributor_priv.h b/lib/librte_distributor/rte_distributor_priv.h
index 833855f..cc2c478 100644
--- a/lib/librte_distributor/rte_distributor_priv.h
+++ b/lib/librte_distributor/rte_distributor_priv.h
@@ -155,6 +155,7 @@  struct rte_distributor {
 /* All different signature compare functions */
 enum rte_distributor_match_function {
 	RTE_DIST_MATCH_SCALAR = 0,
+	RTE_DIST_MATCH_VECTOR,
 	RTE_DIST_MATCH_NUM
 };
 
@@ -182,6 +183,11 @@  struct rte_distributor_burst {
 	enum rte_distributor_match_function dist_match_fn;
 };
 
+void
+find_match_vec(struct rte_distributor_burst *d,
+			uint16_t *data_ptr,
+			uint16_t *output_ptr);
+
 #ifdef __cplusplus
 }
 #endif