[v6,18/18] acl: add checks for max SIMD bitwidth

Message ID 20201015103814.253636-19-ciara.power@intel.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series add max SIMD bitwidth to EAL |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-intel-Performance success Performance Testing PASS
ci/travis-robot warning Travis build: failed
ci/iol-mellanox-Performance success Performance Testing PASS

Commit Message

Power, Ciara Oct. 15, 2020, 10:38 a.m. UTC
  When choosing a vector path to take, an extra condition must be
satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
path. These checks are added in the check alg helper functions.

Cc: Konstantin Ananyev <konstantin.ananyev@intel.com>

Signed-off-by: Ciara Power <ciara.power@intel.com>
---
 lib/librte_acl/rte_acl.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)
  

Comments

Ananyev, Konstantin Oct. 15, 2020, 12:31 p.m. UTC | #1
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path. These checks are added in the check alg helper functions.
> 
> Cc: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> ---
>  lib/librte_acl/rte_acl.c | 27 +++++++++++++++++++++------
>  1 file changed, 21 insertions(+), 6 deletions(-)

I think we need to update PG for ACL (section " Classification methods")
to reflect these changes. 
And probably doxygen comments for rte_acl_set_ctx_classify() too.
 
> diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
> index 7c2f60b2d6..4ec6c982c9 100644
> --- a/lib/librte_acl/rte_acl.c
> +++ b/lib/librte_acl/rte_acl.c
> @@ -16,6 +16,8 @@ static struct rte_tailq_elem rte_acl_tailq = {
>  };
>  EAL_REGISTER_TAILQ(rte_acl_tailq)
> 
> +uint16_t max_simd_bitwidth;
> +
>  #ifndef CC_AVX512_SUPPORT
>  /*
>   * If the compiler doesn't support AVX512 instructions,
> @@ -114,9 +116,13 @@ acl_check_alg_arm(enum rte_acl_classify_alg alg)
>  {
>  	if (alg == RTE_ACL_CLASSIFY_NEON) {
>  #if defined(RTE_ARCH_ARM64)
> -		return 0;
> +		if (max_simd_bitwidth >= RTE_SIMD_128)

All these are control path functions.
So no point to have local copy of max_simd_bitwidth.
Here and in all other places within that file we can just call
rte_get_max_simd_bitwidth() straightway.

> +			return 0;
> +		else
> +			return -ENOTSUP;
>  #elif defined(RTE_ARCH_ARM)
> -		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
> +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
> +				max_simd_bitwidth >= RTE_SIMD_128)
>  			return 0;
>  		return -ENOTSUP;
>  #else
> @@ -136,7 +142,10 @@ acl_check_alg_ppc(enum rte_acl_classify_alg alg)
>  {
>  	if (alg == RTE_ACL_CLASSIFY_ALTIVEC) {
>  #if defined(RTE_ARCH_PPC_64)
> -		return 0;
> +		if (max_simd_bitwidth >= RTE_SIMD_128)
> +			return 0;
> +		else
> +			return -ENOTSUP;
>  #else
>  		return -ENOTSUP;
>  #endif
> @@ -158,7 +167,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
>  		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
>  			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
>  			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
> -			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW))
> +			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) &&
> +			max_simd_bitwidth >= RTE_SIMD_512)


That's not exactly correct, as we have two algs (256 and 512 bit-width) for avx512.
So we have to check different max-simd valued for different algorithms.
Something like that:

static int
acl_check_avx512_cpu_flags(void)
{
	return (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
                        rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
                        rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
                        rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW));
 }                      


static int
acl_check_alg_x86(enum rte_acl_classify_alg alg)
{
        if (alg == RTE_ACL_CLASSIFY_AVX512X32) {
#ifdef CC_AVX512_SUPPORT
	if (acl_check_avx512_cpu_flags) != 0 &&
			rte_get_max_simd_bitwidth() >=  RTE_SIMD_512)
		return 0;
#endif
	return -ENOTSUP;

        if (alg == RTE_ACL_CLASSIFY_AVX512X16) {
#ifdef CC_AVX512_SUPPORT
	if (acl_check_avx512_cpu_flags) != 0 &&
			rte_get_max_simd_bitwidth() >=  RTE_SIMD_256)
		return 0;
#endif
	return -ENOTSUP;

	if (alg == RTE_ACL_CLASSIFY_AVX2) {	
		....

>  			return 0;
>  #endif
>  		return -ENOTSUP;
> @@ -166,7 +176,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
> 
>  	if (alg == RTE_ACL_CLASSIFY_AVX2) {
>  #ifdef CC_AVX2_SUPPORT
> -		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
> +				max_simd_bitwidth >= RTE_SIMD_256)
>  			return 0;
>  #endif
>  		return -ENOTSUP;
> @@ -174,7 +185,8 @@ acl_check_alg_x86(enum rte_acl_classify_alg alg)
> 
>  	if (alg == RTE_ACL_CLASSIFY_SSE) {
>  #ifdef RTE_ARCH_X86
> -		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1))
> +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1) &&
> +				max_simd_bitwidth >= RTE_SIMD_128)
>  			return 0;
>  #endif
>  		return -ENOTSUP;
> @@ -406,6 +418,9 @@ rte_acl_create(const struct rte_acl_param *param)
>  		TAILQ_INSERT_TAIL(acl_list, te, next);
>  	}
> 
> +	if (max_simd_bitwidth == 0)
> +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +
>  exit:
>  	rte_mcfg_tailq_write_unlock();
>  	return ctx;
> --
> 2.22.0
  

Patch

diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index 7c2f60b2d6..4ec6c982c9 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -16,6 +16,8 @@  static struct rte_tailq_elem rte_acl_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_acl_tailq)
 
+uint16_t max_simd_bitwidth;
+
 #ifndef CC_AVX512_SUPPORT
 /*
  * If the compiler doesn't support AVX512 instructions,
@@ -114,9 +116,13 @@  acl_check_alg_arm(enum rte_acl_classify_alg alg)
 {
 	if (alg == RTE_ACL_CLASSIFY_NEON) {
 #if defined(RTE_ARCH_ARM64)
-		return 0;
+		if (max_simd_bitwidth >= RTE_SIMD_128)
+			return 0;
+		else
+			return -ENOTSUP;
 #elif defined(RTE_ARCH_ARM)
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) &&
+				max_simd_bitwidth >= RTE_SIMD_128)
 			return 0;
 		return -ENOTSUP;
 #else
@@ -136,7 +142,10 @@  acl_check_alg_ppc(enum rte_acl_classify_alg alg)
 {
 	if (alg == RTE_ACL_CLASSIFY_ALTIVEC) {
 #if defined(RTE_ARCH_PPC_64)
-		return 0;
+		if (max_simd_bitwidth >= RTE_SIMD_128)
+			return 0;
+		else
+			return -ENOTSUP;
 #else
 		return -ENOTSUP;
 #endif
@@ -158,7 +167,8 @@  acl_check_alg_x86(enum rte_acl_classify_alg alg)
 		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
 			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
 			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
-			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW))
+			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) &&
+			max_simd_bitwidth >= RTE_SIMD_512)
 			return 0;
 #endif
 		return -ENOTSUP;
@@ -166,7 +176,8 @@  acl_check_alg_x86(enum rte_acl_classify_alg alg)
 
 	if (alg == RTE_ACL_CLASSIFY_AVX2) {
 #ifdef CC_AVX2_SUPPORT
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+				max_simd_bitwidth >= RTE_SIMD_256)
 			return 0;
 #endif
 		return -ENOTSUP;
@@ -174,7 +185,8 @@  acl_check_alg_x86(enum rte_acl_classify_alg alg)
 
 	if (alg == RTE_ACL_CLASSIFY_SSE) {
 #ifdef RTE_ARCH_X86
-		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1))
+		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1) &&
+				max_simd_bitwidth >= RTE_SIMD_128)
 			return 0;
 #endif
 		return -ENOTSUP;
@@ -406,6 +418,9 @@  rte_acl_create(const struct rte_acl_param *param)
 		TAILQ_INSERT_TAIL(acl_list, te, next);
 	}
 
+	if (max_simd_bitwidth == 0)
+		max_simd_bitwidth = rte_get_max_simd_bitwidth();
+
 exit:
 	rte_mcfg_tailq_write_unlock();
 	return ctx;