[v4,07/14] acl: add infrastructure to support AVX512 classify

Message ID 20201006150316.5776-8-konstantin.ananyev@intel.com (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series acl: introduce AVX512 classify methods |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Ananyev, Konstantin Oct. 6, 2020, 3:03 p.m. UTC
  Add necessary changes to support new AVX512 specific ACL classify
algorithm:
 - changes in meson.build to check that build tools
   (compiler, assembler, etc.) do properly support AVX512.
 - run-time checks to make sure target platform does support AVX512.
 - dummy rte_acl_classify_avx512() for targets where AVX512
   implementation couldn't be properly supported.

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
 config/x86/meson.build          |  3 ++-
 lib/librte_acl/acl.h            |  8 ++++++
 lib/librte_acl/acl_run_avx512.c | 29 ++++++++++++++++++++
 lib/librte_acl/meson.build      | 48 +++++++++++++++++++++++++++++++++
 lib/librte_acl/rte_acl.c        | 42 +++++++++++++++++++++++++++++
 lib/librte_acl/rte_acl.h        |  2 ++
 6 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_acl/acl_run_avx512.c
  

Comments

David Marchand Oct. 13, 2020, 7:17 p.m. UTC | #1
On Tue, Oct 6, 2020 at 5:11 PM Konstantin Ananyev
<konstantin.ananyev@intel.com> wrote:
> diff --git a/config/x86/meson.build b/config/x86/meson.build
> index fea4d54035..724e69f4c4 100644
> --- a/config/x86/meson.build
> +++ b/config/x86/meson.build
> @@ -22,7 +22,8 @@ foreach f:base_flags
>  endforeach
>
>  optional_flags = ['AES', 'PCLMUL',
> -               'AVX', 'AVX2', 'AVX512F',
> +               'AVX', 'AVX2',
> +               'AVX512F', 'AVX512VL', 'AVX512CD', 'AVX512BW',
>                 'RDRND', 'RDSEED']

Rebasing on current main and resolving the conflict with the net crc patches.

I am for sorting this alphabetically as the current order seems chaotic.
The diff in this patch against origin/main would be:

-optional_flags = ['AES', 'PCLMUL',
-        'AVX', 'AVX2', 'AVX512F',
-        'RDRND', 'RDSEED',
-        'AVX512BW', 'AVX512DQ',
-        'AVX512VL', 'VPCLMULQDQ']
+optional_flags = [
+        'AES',
+        'AVX',
+        'AVX2',
+        'AVX512BW',
+        'AVX512CD',
+        'AVX512DQ',
+        'AVX512F',
+        'AVX512VL',
+        'PCLMUL',
+        'RDRND',
+        'RDSEED',
+        'VPCLMULQDQ',
+]

Objection?


>  foreach f:optional_flags
>         if cc.get_define('__@0@__'.format(f), args: machine_args) == '1'

Thanks.
  
Ananyev, Konstantin Oct. 13, 2020, 10:26 p.m. UTC | #2
> On Tue, Oct 6, 2020 at 5:11 PM Konstantin Ananyev
> <konstantin.ananyev@intel.com> wrote:
> > diff --git a/config/x86/meson.build b/config/x86/meson.build
> > index fea4d54035..724e69f4c4 100644
> > --- a/config/x86/meson.build
> > +++ b/config/x86/meson.build
> > @@ -22,7 +22,8 @@ foreach f:base_flags
> >  endforeach
> >
> >  optional_flags = ['AES', 'PCLMUL',
> > -               'AVX', 'AVX2', 'AVX512F',
> > +               'AVX', 'AVX2',
> > +               'AVX512F', 'AVX512VL', 'AVX512CD', 'AVX512BW',
> >                 'RDRND', 'RDSEED']
> 
> Rebasing on current main and resolving the conflict with the net crc patches.
> 
> I am for sorting this alphabetically as the current order seems chaotic.
> The diff in this patch against origin/main would be:
> 
> -optional_flags = ['AES', 'PCLMUL',
> -        'AVX', 'AVX2', 'AVX512F',
> -        'RDRND', 'RDSEED',
> -        'AVX512BW', 'AVX512DQ',
> -        'AVX512VL', 'VPCLMULQDQ']
> +optional_flags = [
> +        'AES',
> +        'AVX',
> +        'AVX2',
> +        'AVX512BW',
> +        'AVX512CD',
> +        'AVX512DQ',
> +        'AVX512F',
> +        'AVX512VL',
> +        'PCLMUL',
> +        'RDRND',
> +        'RDSEED',
> +        'VPCLMULQDQ',
> +]
> 
> Objection?

None 😊
Thanks
Konstantin

> 
> 
> >  foreach f:optional_flags
> >         if cc.get_define('__@0@__'.format(f), args: machine_args) == '1'
> 
> Thanks.
> 
> 
> --
> David Marchand
  

Patch

diff --git a/config/x86/meson.build b/config/x86/meson.build
index fea4d54035..724e69f4c4 100644
--- a/config/x86/meson.build
+++ b/config/x86/meson.build
@@ -22,7 +22,8 @@  foreach f:base_flags
 endforeach
 
 optional_flags = ['AES', 'PCLMUL',
-		'AVX', 'AVX2', 'AVX512F',
+		'AVX', 'AVX2',
+		'AVX512F', 'AVX512VL', 'AVX512CD', 'AVX512BW',
 		'RDRND', 'RDSEED']
 foreach f:optional_flags
 	if cc.get_define('__@0@__'.format(f), args: machine_args) == '1'
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
index 39d45a0c2b..543ce55659 100644
--- a/lib/librte_acl/acl.h
+++ b/lib/librte_acl/acl.h
@@ -201,6 +201,14 @@  int
 rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data,
 	uint32_t *results, uint32_t num, uint32_t categories);
 
+int
+rte_acl_classify_avx512x16(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_avx512x32(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
 int
 rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
 	uint32_t *results, uint32_t num, uint32_t categories);
diff --git a/lib/librte_acl/acl_run_avx512.c b/lib/librte_acl/acl_run_avx512.c
new file mode 100644
index 0000000000..1817f88b29
--- /dev/null
+++ b/lib/librte_acl/acl_run_avx512.c
@@ -0,0 +1,29 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include "acl_run_sse.h"
+
+int
+rte_acl_classify_avx512x16(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories)
+{
+	if (num >= MAX_SEARCHES_SSE8)
+		return search_sse_8(ctx, data, results, num, categories);
+	if (num >= MAX_SEARCHES_SSE4)
+		return search_sse_4(ctx, data, results, num, categories);
+
+	return rte_acl_classify_scalar(ctx, data, results, num, categories);
+}
+
+int
+rte_acl_classify_avx512x32(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories)
+{
+	if (num >= MAX_SEARCHES_SSE8)
+		return search_sse_8(ctx, data, results, num, categories);
+	if (num >= MAX_SEARCHES_SSE4)
+		return search_sse_4(ctx, data, results, num, categories);
+
+	return rte_acl_classify_scalar(ctx, data, results, num, categories);
+}
diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build
index b31a3f798e..a3c7c398d0 100644
--- a/lib/librte_acl/meson.build
+++ b/lib/librte_acl/meson.build
@@ -27,6 +27,54 @@  if dpdk_conf.has('RTE_ARCH_X86')
 		cflags += '-DCC_AVX2_SUPPORT'
 	endif
 
+	# compile AVX512 version if:
+	# we are building 64-bit binary AND binutils can generate proper code
+
+	if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok.returncode() == 0
+
+		# compile AVX512 version if either:
+		# a. we have AVX512 supported in minimum instruction set
+		#    baseline
+		# b. it's not minimum instruction set, but supported by
+		#    compiler
+		#
+		# in former case, just add avx512 C file to files list
+		# in latter case, compile c file to static lib, using correct
+		# compiler flags, and then have the .o file from static lib
+		# linked into main lib.
+
+		# check if all required flags already enabled (variant a).
+		acl_avx512_flags = ['__AVX512F__', '__AVX512VL__',
+			'__AVX512CD__', '__AVX512BW__']
+
+		acl_avx512_on = true
+		foreach f:acl_avx512_flags
+
+			if cc.get_define(f, args: machine_args) == ''
+				acl_avx512_on = false
+			endif
+		endforeach
+
+		if acl_avx512_on == true
+
+			sources += files('acl_run_avx512.c')
+			cflags += '-DCC_AVX512_SUPPORT'
+
+		elif cc.has_multi_arguments('-mavx512f', '-mavx512vl',
+					'-mavx512cd', '-mavx512bw')
+
+			avx512_tmplib = static_library('avx512_tmp',
+				'acl_run_avx512.c',
+				dependencies: static_rte_eal,
+				c_args: cflags +
+					['-mavx512f', '-mavx512vl',
+					 '-mavx512cd', '-mavx512bw'])
+			objs += avx512_tmplib.extract_objects(
+					'acl_run_avx512.c')
+			cflags += '-DCC_AVX512_SUPPORT'
+		endif
+	endif
+
 elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64')
 	cflags += '-flax-vector-conversions'
 	sources += files('acl_run_neon.c')
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index 863549a38b..1154f35107 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -16,6 +16,32 @@  static struct rte_tailq_elem rte_acl_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_acl_tailq)
 
+#ifndef CC_AVX512_SUPPORT
+/*
+ * If the compiler doesn't support AVX512 instructions,
+ * then the dummy one would be used instead for AVX512 classify method.
+ */
+int
+rte_acl_classify_avx512x16(__rte_unused const struct rte_acl_ctx *ctx,
+	__rte_unused const uint8_t **data,
+	__rte_unused uint32_t *results,
+	__rte_unused uint32_t num,
+	__rte_unused uint32_t categories)
+{
+	return -ENOTSUP;
+}
+
+int
+rte_acl_classify_avx512x32(__rte_unused const struct rte_acl_ctx *ctx,
+	__rte_unused const uint8_t **data,
+	__rte_unused uint32_t *results,
+	__rte_unused uint32_t num,
+	__rte_unused uint32_t categories)
+{
+	return -ENOTSUP;
+}
+#endif
+
 #ifndef CC_AVX2_SUPPORT
 /*
  * If the compiler doesn't support AVX2 instructions,
@@ -77,6 +103,8 @@  static const rte_acl_classify_t classify_fns[] = {
 	[RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
 	[RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
 	[RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
+	[RTE_ACL_CLASSIFY_AVX512X16] = rte_acl_classify_avx512x16,
+	[RTE_ACL_CLASSIFY_AVX512X32] = rte_acl_classify_avx512x32,
 };
 
 /*
@@ -126,6 +154,18 @@  acl_check_alg_ppc(enum rte_acl_classify_alg alg)
 static int
 acl_check_alg_x86(enum rte_acl_classify_alg alg)
 {
+	if (alg == RTE_ACL_CLASSIFY_AVX512X16 ||
+			alg == RTE_ACL_CLASSIFY_AVX512X32) {
+#ifdef CC_AVX512_SUPPORT
+		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW))
+			return 0;
+#endif
+		return -ENOTSUP;
+	}
+
 	if (alg == RTE_ACL_CLASSIFY_AVX2) {
 #ifdef CC_AVX2_SUPPORT
 		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
@@ -159,6 +199,8 @@  acl_check_alg(enum rte_acl_classify_alg alg)
 		return acl_check_alg_arm(alg);
 	case RTE_ACL_CLASSIFY_ALTIVEC:
 		return acl_check_alg_ppc(alg);
+	case RTE_ACL_CLASSIFY_AVX512X32:
+	case RTE_ACL_CLASSIFY_AVX512X16:
 	case RTE_ACL_CLASSIFY_AVX2:
 	case RTE_ACL_CLASSIFY_SSE:
 		return acl_check_alg_x86(alg);
diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
index 3999f15ded..1bfed00743 100644
--- a/lib/librte_acl/rte_acl.h
+++ b/lib/librte_acl/rte_acl.h
@@ -241,6 +241,8 @@  enum rte_acl_classify_alg {
 	RTE_ACL_CLASSIFY_AVX2 = 3,    /**< requires AVX2 support. */
 	RTE_ACL_CLASSIFY_NEON = 4,    /**< requires NEON support. */
 	RTE_ACL_CLASSIFY_ALTIVEC = 5,    /**< requires ALTIVEC support. */
+	RTE_ACL_CLASSIFY_AVX512X16 = 6,  /**< requires AVX512 support. */
+	RTE_ACL_CLASSIFY_AVX512X32 = 7,  /**< requires AVX512 support. */
 };
 
 /**