@@ -23,7 +23,8 @@ foreach f:base_flags
endforeach
optional_flags = ['AES', 'PCLMUL',
- 'AVX', 'AVX2', 'AVX512F',
+ 'AVX', 'AVX2',
+ 'AVX512F', 'AVX512VL', 'AVX512CD', 'AVX512BW',
'RDRND', 'RDSEED']
foreach f:optional_flags
if cc.get_define('__@0@__'.format(f), args: machine_args) == '1'
@@ -58,6 +58,32 @@ ifeq ($(CC_AVX2_SUPPORT), 1)
CFLAGS_rte_acl.o += -DCC_AVX2_SUPPORT
endif
+# compile AVX512 version if:
+# we are building 64-bit binary AND binutils can generate proper code
+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
+
+ BINUTIL_OK=$(shell AS=as; \
+ $(RTE_SDK)/buildtools/binutils-avx512-check.sh && \
+ echo 1)
+ ifeq ($(BINUTIL_OK), 1)
+
+ # If the compiler supports AVX512 instructions,
+ # then add support for AVX512 classify method.
+
+ CC_AVX512_FLAGS=$(shell $(CC) \
+ -mavx512f -mavx512vl -mavx512cd -mavx512bw \
+ -dM -E - </dev/null 2>&1 | grep AVX512 | wc -l)
+ ifeq ($(CC_AVX512_FLAGS), 4)
+ CFLAGS_acl_run_avx512.o += -mavx512f
+ CFLAGS_acl_run_avx512.o += -mavx512vl
+ CFLAGS_acl_run_avx512.o += -mavx512cd
+ CFLAGS_acl_run_avx512.o += -mavx512bw
+ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_avx512.c
+ CFLAGS_rte_acl.o += -DCC_AVX512_SUPPORT
+ endif
+ endif
+endif
+
# install this header file
SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include := rte_acl_osdep.h
SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include += rte_acl.h
@@ -201,6 +201,10 @@ int
rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data,
uint32_t *results, uint32_t num, uint32_t categories);
+int
+rte_acl_classify_avx512(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories);
+
int
rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
uint32_t *results, uint32_t num, uint32_t categories);
new file mode 100644
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include "acl_run_sse.h"
+
+int
+rte_acl_classify_avx512(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories)
+{
+ if (num >= MAX_SEARCHES_SSE8)
+ return search_sse_8(ctx, data, results, num, categories);
+ if (num >= MAX_SEARCHES_SSE4)
+ return search_sse_4(ctx, data, results, num, categories);
+
+ return rte_acl_classify_scalar(ctx, data, results, num, categories);
+}
@@ -27,6 +27,45 @@ if dpdk_conf.has('RTE_ARCH_X86')
cflags += '-DCC_AVX2_SUPPORT'
endif
+ # compile AVX512 version if:
+ # we are building 64-bit binary AND binutils can generate proper code
+
+ if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok.returncode() == 0
+
+ # compile AVX512 version if either:
+ # a. we have AVX512 supported in minimum instruction set
+ # baseline
+ # b. it's not minimum instruction set, but supported by
+ # compiler
+ #
+ # in former case, just add avx512 C file to files list
+ # in latter case, compile c file to static lib, using correct
+ # compiler flags, and then have the .o file from static lib
+ # linked into main lib.
+
+ if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512F') and \
+ dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512VL') and \
+ dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512CD') and \
+ dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512BW')
+
+ sources += files('acl_run_avx512.c')
+ cflags += '-DCC_AVX512_SUPPORT'
+
+ elif cc.has_multi_arguments('-mavx512f', '-mavx512vl',
+ '-mavx512cd', '-mavx512bw')
+
+ avx512_tmplib = static_library('avx512_tmp',
+ 'acl_run_avx512.c',
+ dependencies: static_rte_eal,
+ c_args: cflags +
+ ['-mavx512f', '-mavx512vl',
+ '-mavx512cd', '-mavx512bw'])
+ objs += avx512_tmplib.extract_objects(
+ 'acl_run_avx512.c')
+ cflags += '-DCC_AVX512_SUPPORT'
+ endif
+ endif
+
elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64')
cflags += '-flax-vector-conversions'
sources += files('acl_run_neon.c')
@@ -16,6 +16,22 @@ static struct rte_tailq_elem rte_acl_tailq = {
};
EAL_REGISTER_TAILQ(rte_acl_tailq)
+#ifndef CC_AVX512_SUPPORT
+/*
+ * If the compiler doesn't support AVX512 instructions,
+ * then the dummy one would be used instead for AVX512 classify method.
+ */
+int
+rte_acl_classify_avx512(__rte_unused const struct rte_acl_ctx *ctx,
+ __rte_unused const uint8_t **data,
+ __rte_unused uint32_t *results,
+ __rte_unused uint32_t num,
+ __rte_unused uint32_t categories)
+{
+ return -ENOTSUP;
+}
+#endif
+
#ifndef CC_AVX2_SUPPORT
/*
* If the compiler doesn't support AVX2 instructions,
@@ -77,6 +93,7 @@ static const rte_acl_classify_t classify_fns[] = {
[RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
[RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
[RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
+ [RTE_ACL_CLASSIFY_AVX512] = rte_acl_classify_avx512,
};
/* by default, use always available scalar code path. */
@@ -241,6 +241,7 @@ enum rte_acl_classify_alg {
RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */
RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */
RTE_ACL_CLASSIFY_ALTIVEC = 5, /**< requires ALTIVEC support. */
+ RTE_ACL_CLASSIFY_AVX512 = 6, /**< requires AVX512 support. */
};
/**