[dpdk-dev,2/4] eal/acl: enable acl for armv7-a

Message ID 1448995276-9599-3-git-send-email-jianbo.liu@linaro.org (mailing list archive)
State Superseded, archived
Headers

Commit Message

Jianbo Liu Dec. 1, 2015, 6:41 p.m. UTC
  Implement vqtbl1q_u8 intrinsic function, which is not support in armv7-a.

Signed-off-by: Jianbo Liu <jianbo.liu@linaro.org>
---
 config/defconfig_arm-armv7a-linuxapp-gcc          |  1 -
 lib/librte_acl/Makefile                           |  2 +-
 lib/librte_acl/rte_acl.c                          |  2 +-
 lib/librte_eal/common/include/arch/arm/rte_vect.h | 23 +++++++++++++++++++++++
 4 files changed, 25 insertions(+), 3 deletions(-)
  

Comments

Jerin Jacob Dec. 1, 2015, 2:43 p.m. UTC | #1
On Tue, Dec 01, 2015 at 01:41:14PM -0500, Jianbo Liu wrote:
> Implement vqtbl1q_u8 intrinsic function, which is not support in armv7-a.
> 
> Signed-off-by: Jianbo Liu <jianbo.liu@linaro.org>
> ---
>  config/defconfig_arm-armv7a-linuxapp-gcc          |  1 -
>  lib/librte_acl/Makefile                           |  2 +-
>  lib/librte_acl/rte_acl.c                          |  2 +-
>  lib/librte_eal/common/include/arch/arm/rte_vect.h | 23 +++++++++++++++++++++++
>  4 files changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/config/defconfig_arm-armv7a-linuxapp-gcc b/config/defconfig_arm-armv7a-linuxapp-gcc
> index 9924ff9..cbebd64 100644
> --- a/config/defconfig_arm-armv7a-linuxapp-gcc
> +++ b/config/defconfig_arm-armv7a-linuxapp-gcc
> @@ -53,7 +53,6 @@ CONFIG_RTE_LIBRTE_KNI=n
>  CONFIG_RTE_EAL_IGB_UIO=n
>  
>  # fails to compile on ARM
> -CONFIG_RTE_LIBRTE_ACL=n
>  CONFIG_RTE_LIBRTE_LPM=n
>  CONFIG_RTE_LIBRTE_TABLE=n
>  CONFIG_RTE_LIBRTE_PIPELINE=n
> diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
> index 897237d..2e394c9 100644
> --- a/lib/librte_acl/Makefile
> +++ b/lib/librte_acl/Makefile
> @@ -49,7 +49,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_bld.c
>  SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_gen.c
>  SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c
>  
> -ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
> +ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
>  SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c
>  CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized
>  else
> diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
> index e2fdebd..339aace 100644
> --- a/lib/librte_acl/rte_acl.c
> +++ b/lib/librte_acl/rte_acl.c
> @@ -114,7 +114,7 @@ rte_acl_init(void)
>  {
>  	enum rte_acl_classify_alg alg = RTE_ACL_CLASSIFY_DEFAULT;
>  
> -#ifdef RTE_ARCH_ARM64
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
>  	alg =  RTE_ACL_CLASSIFY_NEON;

I believe SIMD is optional in armv7. If true, select alg as
RTE_ACL_CLASSIFY_NEON only when cpufeature NEON enabled.

>  #else
>  #ifdef CC_AVX2_SUPPORT
> diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> index 21cdb4d..a33c054 100644
> --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
> +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> @@ -53,6 +53,29 @@ typedef union rte_xmm {
>  	double   pd[XMM_SIZE / sizeof(double)];
>  } __attribute__((aligned(16))) rte_xmm_t;
>  
> +#ifdef RTE_ARCH_ARM
> +/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
> +static __inline uint8x16_t
> +vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
> +{
> +	uint8_t i, pos;
> +	rte_xmm_t rte_a, rte_b, rte_ret;
> +
> +	vst1q_u8(rte_a.u8, a);
> +	vst1q_u8(rte_b.u8, b);
> +
> +	for (i = 0; i < 16; i++) {
> +		pos = rte_b.u8[i];
> +		if (pos < 16)
> +			rte_ret.u8[i] = rte_a.u8[pos];
> +		else
> +			rte_ret.u8[i] = 0;
> +	}
> +
> +	return vld1q_u8(rte_ret.u8);
> +}
> +#endif
> +
>  #ifdef __cplusplus
>  }
>  #endif
> -- 
> 1.8.3.1
>
  
Jan Viktorin Dec. 1, 2015, 2:46 p.m. UTC | #2
On Tue, 1 Dec 2015 20:13:49 +0530
Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:

> >  	enum rte_acl_classify_alg alg = RTE_ACL_CLASSIFY_DEFAULT;
> >  
> > -#ifdef RTE_ARCH_ARM64
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> >  	alg =  RTE_ACL_CLASSIFY_NEON;  
> 
> I believe SIMD is optional in armv7. If true, select alg as
> RTE_ACL_CLASSIFY_NEON only when cpufeature NEON enabled.

Yes. Or, probably, we can be happy with

#if defined(__ARM_NEON_FP)
...
#endif

as it is currently done in rte_memcpy_32.h.

Regards
Jan
  
Jianbo Liu Dec. 2, 2015, 6:14 a.m. UTC | #3
On 1 December 2015 at 22:46, Jan Viktorin <viktorin@rehivetech.com> wrote:
> On Tue, 1 Dec 2015 20:13:49 +0530
> Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:
>
>> >     enum rte_acl_classify_alg alg = RTE_ACL_CLASSIFY_DEFAULT;
>> >
>> > -#ifdef RTE_ARCH_ARM64
>> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
>> >     alg =  RTE_ACL_CLASSIFY_NEON;
>>
>> I believe SIMD is optional in armv7. If true, select alg as
>> RTE_ACL_CLASSIFY_NEON only when cpufeature NEON enabled.
>
> Yes. Or, probably, we can be happy with
>
> #if defined(__ARM_NEON_FP)
> ...
> #endif
>
> as it is currently done in rte_memcpy_32.h.
>
> Regards
> Jan

Athough optional for armv7, I believe there is NEON in most of the
popular armv7a chips.
Anyway, I will add the checking...

Thanks!
  

Patch

diff --git a/config/defconfig_arm-armv7a-linuxapp-gcc b/config/defconfig_arm-armv7a-linuxapp-gcc
index 9924ff9..cbebd64 100644
--- a/config/defconfig_arm-armv7a-linuxapp-gcc
+++ b/config/defconfig_arm-armv7a-linuxapp-gcc
@@ -53,7 +53,6 @@  CONFIG_RTE_LIBRTE_KNI=n
 CONFIG_RTE_EAL_IGB_UIO=n
 
 # fails to compile on ARM
-CONFIG_RTE_LIBRTE_ACL=n
 CONFIG_RTE_LIBRTE_LPM=n
 CONFIG_RTE_LIBRTE_TABLE=n
 CONFIG_RTE_LIBRTE_PIPELINE=n
diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
index 897237d..2e394c9 100644
--- a/lib/librte_acl/Makefile
+++ b/lib/librte_acl/Makefile
@@ -49,7 +49,7 @@  SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_bld.c
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_gen.c
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c
 
-ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
+ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c
 CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized
 else
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index e2fdebd..339aace 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -114,7 +114,7 @@  rte_acl_init(void)
 {
 	enum rte_acl_classify_alg alg = RTE_ACL_CLASSIFY_DEFAULT;
 
-#ifdef RTE_ARCH_ARM64
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
 	alg =  RTE_ACL_CLASSIFY_NEON;
 #else
 #ifdef CC_AVX2_SUPPORT
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index 21cdb4d..a33c054 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -53,6 +53,29 @@  typedef union rte_xmm {
 	double   pd[XMM_SIZE / sizeof(double)];
 } __attribute__((aligned(16))) rte_xmm_t;
 
+#ifdef RTE_ARCH_ARM
+/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
+static __inline uint8x16_t
+vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
+{
+	uint8_t i, pos;
+	rte_xmm_t rte_a, rte_b, rte_ret;
+
+	vst1q_u8(rte_a.u8, a);
+	vst1q_u8(rte_b.u8, b);
+
+	for (i = 0; i < 16; i++) {
+		pos = rte_b.u8[i];
+		if (pos < 16)
+			rte_ret.u8[i] = rte_a.u8[pos];
+		else
+			rte_ret.u8[i] = 0;
+	}
+
+	return vld1q_u8(rte_ret.u8);
+}
+#endif
+
 #ifdef __cplusplus
 }
 #endif