[v6,18/18] acl: add checks for max SIMD bitwidth
Checks
Commit Message
When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path. These checks are added in the check alg helper functions.
Cc: Konstantin Ananyev <konstantin.ananyev@intel.com>
Signed-off-by: Ciara Power <ciara.power@intel.com>
---
lib/librte_acl/rte_acl.c | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
Comments
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path. These checks are added in the check alg helper functions.
>
> Cc: Konstantin Ananyev <konstantin.ananyev@intel.com>
>
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
> lib/librte_acl/rte_acl.c | 27 +++++++++++++++++++++------
> 1 file changed, 21 insertions(+), 6 deletions(-)
I think we need to update PG for ACL (section " Classification methods")
to reflect these changes.
And probably doxygen comments for rte_acl_set_ctx_classify() too.
> diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
> index 7c2f60b2d6..4ec6c982c9 100644
> --- a/lib/librte_acl/rte_acl.c
> +++ b/lib/librte_acl/rte_acl.c
> @@ -16,6 +16,8 @@ static struct rte_tailq_elem rte_acl_tailq = {
> };
> EAL_REGISTER_TAILQ(rte_acl_tailq)
>
> +uint16_t max_simd_bitwidth;
> +
> #ifndef CC_AVX512_SUPPORT
> /*
> * If the compiler doesn't support AVX512 instructions,
> @@ -114,9 +116,13 @@ acl_check_alg_arm(enum rte_acl_classify_alg alg)
> {
> if (alg == RTE_ACL_CLASSIFY_NEON) {
> #if defined(RTE_ARCH_ARM64)
> - return 0;
> + if (max_simd_bitwidth >= RTE_SIMD_128)
All these are control path functions.
So no point to have local copy of max_simd_bitwidth.
Here and in all other places within that file we can just call
rte_get_max_simd_bitwidth() straightway.
> + return 0;
> + else
> + return -ENOTSUP;
> #elif defined(RTE_ARCH_ARM)
> - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
> + max_simd_bitwidth >= RTE_SIMD_128)
> return 0;
> return -ENOTSUP;
> #else
> @@ -136,7 +142,10 @@ acl_check_alg_ppc(enum rte_acl_classify_alg alg)
> {
> if (alg == RTE_ACL_CLASSIFY_ALTIVEC) {
> #if defined(RTE_ARCH_PPC_64)
> - return 0;
> + if (max_simd_bitwidth >= RTE_SIMD_128)
> + return 0;
> + else
> + return -ENOTSUP;
> #else
> return -ENOTSUP;
> #endif
> @@ -158,7 +167,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
> if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
> rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
> rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
> - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW))
> + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) &&
> + max_simd_bitwidth >= RTE_SIMD_512)
That's not exactly correct, as we have two algs (256 and 512 bit-width) for avx512.
So we have to check different max-simd valued for different algorithms.
Something like that:
static int
acl_check_avx512_cpu_flags(void)
{
return (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW));
}
static int
acl_check_alg_x86(enum rte_acl_classify_alg alg)
{
if (alg == RTE_ACL_CLASSIFY_AVX512X32) {
#ifdef CC_AVX512_SUPPORT
if (acl_check_avx512_cpu_flags) != 0 &&
rte_get_max_simd_bitwidth() >= RTE_SIMD_512)
return 0;
#endif
return -ENOTSUP;
if (alg == RTE_ACL_CLASSIFY_AVX512X16) {
#ifdef CC_AVX512_SUPPORT
if (acl_check_avx512_cpu_flags) != 0 &&
rte_get_max_simd_bitwidth() >= RTE_SIMD_256)
return 0;
#endif
return -ENOTSUP;
if (alg == RTE_ACL_CLASSIFY_AVX2) {
....
> return 0;
> #endif
> return -ENOTSUP;
> @@ -166,7 +176,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
>
> if (alg == RTE_ACL_CLASSIFY_AVX2) {
> #ifdef CC_AVX2_SUPPORT
> - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> + max_simd_bitwidth >= RTE_SIMD_256)
> return 0;
> #endif
> return -ENOTSUP;
> @@ -174,7 +185,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
>
> if (alg == RTE_ACL_CLASSIFY_SSE) {
> #ifdef RTE_ARCH_X86
> - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1))
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1) &&
> + max_simd_bitwidth >= RTE_SIMD_128)
> return 0;
> #endif
> return -ENOTSUP;
> @@ -406,6 +418,9 @@ rte_acl_create(const struct rte_acl_param *param)
> TAILQ_INSERT_TAIL(acl_list, te, next);
> }
>
> + if (max_simd_bitwidth == 0)
> + max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +
> exit:
> rte_mcfg_tailq_write_unlock();
> return ctx;
> --
> 2.22.0
@@ -16,6 +16,8 @@ static struct rte_tailq_elem rte_acl_tailq = {
};
EAL_REGISTER_TAILQ(rte_acl_tailq)
+uint16_t max_simd_bitwidth;
+
#ifndef CC_AVX512_SUPPORT
/*
* If the compiler doesn't support AVX512 instructions,
@@ -114,9 +116,13 @@ acl_check_alg_arm(enum rte_acl_classify_alg alg)
{
if (alg == RTE_ACL_CLASSIFY_NEON) {
#if defined(RTE_ARCH_ARM64)
- return 0;
+ if (max_simd_bitwidth >= RTE_SIMD_128)
+ return 0;
+ else
+ return -ENOTSUP;
#elif defined(RTE_ARCH_ARM)
- if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
+ max_simd_bitwidth >= RTE_SIMD_128)
return 0;
return -ENOTSUP;
#else
@@ -136,7 +142,10 @@ acl_check_alg_ppc(enum rte_acl_classify_alg alg)
{
if (alg == RTE_ACL_CLASSIFY_ALTIVEC) {
#if defined(RTE_ARCH_PPC_64)
- return 0;
+ if (max_simd_bitwidth >= RTE_SIMD_128)
+ return 0;
+ else
+ return -ENOTSUP;
#else
return -ENOTSUP;
#endif
@@ -158,7 +167,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
- rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW))
+ rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) &&
+ max_simd_bitwidth >= RTE_SIMD_512)
return 0;
#endif
return -ENOTSUP;
@@ -166,7 +176,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
if (alg == RTE_ACL_CLASSIFY_AVX2) {
#ifdef CC_AVX2_SUPPORT
- if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+ max_simd_bitwidth >= RTE_SIMD_256)
return 0;
#endif
return -ENOTSUP;
@@ -174,7 +185,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
if (alg == RTE_ACL_CLASSIFY_SSE) {
#ifdef RTE_ARCH_X86
- if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1))
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1) &&
+ max_simd_bitwidth >= RTE_SIMD_128)
return 0;
#endif
return -ENOTSUP;
@@ -406,6 +418,9 @@ rte_acl_create(const struct rte_acl_param *param)
TAILQ_INSERT_TAIL(acl_list, te, next);
}
+ if (max_simd_bitwidth == 0)
+ max_simd_bitwidth = rte_get_max_simd_bitwidth();
+
exit:
rte_mcfg_tailq_write_unlock();
return ctx;