[dpdk-dev,1/3] eal: introduce rte_vect_* abstractions

Message ID 1448904253-12929-2-git-send-email-jerin.jacob@caviumnetworks.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Jerin Jacob Nov. 30, 2015, 5:24 p.m. UTC
introduce rte_vect_* abstractions to remove SSE/AVX specific
code in the common code(i.e the test applications)

The patch does not provide any functional change for IA, the goal is to
have infrastructure to reuse the common vector-based test code across
all the architectures.

Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
 lib/librte_eal/common/include/arch/arm/rte_vect.h | 17 ++++++++++++++++-
 lib/librte_eal/common/include/arch/x86/rte_vect.h |  8 ++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)
  

Comments

Jan Viktorin Dec. 2, 2015, 1:43 p.m. UTC | #1
On Mon, 30 Nov 2015 22:54:11 +0530
Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:

> introduce rte_vect_* abstractions to remove SSE/AVX specific
> code in the common code(i.e the test applications)
> 
> The patch does not provide any functional change for IA, the goal is to

Does IA mean Intel Architecture?

> have infrastructure to reuse the common vector-based test code across
> all the architectures.
> 
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> ---
>  lib/librte_eal/common/include/arch/arm/rte_vect.h | 17 ++++++++++++++++-
>  lib/librte_eal/common/include/arch/x86/rte_vect.h |  8 ++++++++
>  2 files changed, 24 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> index 21cdb4d..d300951 100644
> --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
> +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> @@ -33,13 +33,14 @@
>  #ifndef _RTE_VECT_ARM_H_
>  #define _RTE_VECT_ARM_H_
>  
> -#include "arm_neon.h"
> +#include <arm_neon.h>
>  
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
>  
>  typedef int32x4_t xmm_t;
> +typedef int32x4_t __m128i;

As Jianbo pointed out recently, the __m128i type should be refactored in
a general rte_vect API too. If we do something like

#if SSE
typedef __m128i rte_128i;
#elif NEON
typedef int32x4_y rte_128i;
#endif

does it make somebody angry? I am afraid that it will influence a lot of
code. However, from the ABI point of view, it is OK, isn't it?

>  
>  #define	XMM_SIZE	(sizeof(xmm_t))
>  #define	XMM_MASK	(XMM_SIZE - 1)
> @@ -53,6 +54,20 @@ typedef union rte_xmm {
>  	double   pd[XMM_SIZE / sizeof(double)];
>  } __attribute__((aligned(16))) rte_xmm_t;
>  
> +/* rte_vect_* abstraction implementation using NEON */
> +
> +/* loads the __m128i value from address p(does not need to be 16-byte aligned)*/
> +#define rte_vect_loadu_sil128(p) vld1q_s32((const int32_t *)p)
> +
> +/* sets the 4 signed 32-bit integer values and returns the __m128i variable */
> +static inline __m128i  __attribute__((always_inline))
> +rte_vect_set_epi32(int i3, int i2, int i1, int i0)
> +{
> +	int32_t data[4] = {i0, i1, i2, i3};
> +
> +	return vld1q_s32(data);
> +}
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
> index b698797..91c6523 100644
> --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
> +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
> @@ -125,6 +125,14 @@ typedef union rte_ymm {
>  })
>  #endif /* (defined(__ICC) && __ICC < 1210) */
>  
> +/* rte_vect_* abstraction implementation using SSE */
> +
> +/* loads the __m128i value from address p(does not need to be 16-byte aligned)*/
> +#define rte_vect_loadu_sil128(p) _mm_loadu_si128(p)
> +
> +/* sets the 4 signed 32-bit integer values and returns the __m128i variable */
> +#define rte_vect_set_epi32(i3, i2, i1, i0) _mm_set_epi32(i3, i2, i1, i0)
> +
>  #ifdef __cplusplus
>  }
>  #endif

I like this approach. It is a question whether to inherit names from
SSE. However, why to reinvent the wheel...

We probably need other people to give their ideas about such
generalization of the API.

I think, there should be an autotest of the rte_vect API. Is it
possible to create one?

Regards
Jan
  
Jerin Jacob Dec. 2, 2015, 2:51 p.m. UTC | #2
On Wed, Dec 02, 2015 at 02:43:34PM +0100, Jan Viktorin wrote:
> On Mon, 30 Nov 2015 22:54:11 +0530
> Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:
> 
> > introduce rte_vect_* abstractions to remove SSE/AVX specific
> > code in the common code(i.e the test applications)
> > 
> > The patch does not provide any functional change for IA, the goal is to
> 
> Does IA mean Intel Architecture?

Yes.

> 
> > have infrastructure to reuse the common vector-based test code across
> > all the architectures.
> > 
> > Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> > ---
> >  lib/librte_eal/common/include/arch/arm/rte_vect.h | 17 ++++++++++++++++-
> >  lib/librte_eal/common/include/arch/x86/rte_vect.h |  8 ++++++++
> >  2 files changed, 24 insertions(+), 1 deletion(-)
> > 
> > diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> > index 21cdb4d..d300951 100644
> > --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
> > +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> > @@ -33,13 +33,14 @@
> >  #ifndef _RTE_VECT_ARM_H_
> >  #define _RTE_VECT_ARM_H_
> >  
> > -#include "arm_neon.h"
> > +#include <arm_neon.h>
> >  
> >  #ifdef __cplusplus
> >  extern "C" {
> >  #endif
> >  
> >  typedef int32x4_t xmm_t;
> > +typedef int32x4_t __m128i;
> 
> As Jianbo pointed out recently, the __m128i type should be refactored in
> a general rte_vect API too. If we do something like
> 
> #if SSE
> typedef __m128i rte_128i;
> #elif NEON
> typedef int32x4_y rte_128i;
> #endif
> 
> does it make somebody angry? I am afraid that it will influence a lot of
> code. However, from the ABI point of view, it is OK, isn't it?
> 
> >  
> >  #define	XMM_SIZE	(sizeof(xmm_t))
> >  #define	XMM_MASK	(XMM_SIZE - 1)
> > @@ -53,6 +54,20 @@ typedef union rte_xmm {
> >  	double   pd[XMM_SIZE / sizeof(double)];
> >  } __attribute__((aligned(16))) rte_xmm_t;
> >  
> > +/* rte_vect_* abstraction implementation using NEON */
> > +
> > +/* loads the __m128i value from address p(does not need to be 16-byte aligned)*/
> > +#define rte_vect_loadu_sil128(p) vld1q_s32((const int32_t *)p)
> > +
> > +/* sets the 4 signed 32-bit integer values and returns the __m128i variable */
> > +static inline __m128i  __attribute__((always_inline))
> > +rte_vect_set_epi32(int i3, int i2, int i1, int i0)
> > +{
> > +	int32_t data[4] = {i0, i1, i2, i3};
> > +
> > +	return vld1q_s32(data);
> > +}
> > +
> >  #ifdef __cplusplus
> >  }
> >  #endif
> > diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
> > index b698797..91c6523 100644
> > --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
> > +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
> > @@ -125,6 +125,14 @@ typedef union rte_ymm {
> >  })
> >  #endif /* (defined(__ICC) && __ICC < 1210) */
> >  
> > +/* rte_vect_* abstraction implementation using SSE */
> > +
> > +/* loads the __m128i value from address p(does not need to be 16-byte aligned)*/
> > +#define rte_vect_loadu_sil128(p) _mm_loadu_si128(p)
> > +
> > +/* sets the 4 signed 32-bit integer values and returns the __m128i variable */
> > +#define rte_vect_set_epi32(i3, i2, i1, i0) _mm_set_epi32(i3, i2, i1, i0)
> > +
> >  #ifdef __cplusplus
> >  }
> >  #endif
> 
> I like this approach. It is a question whether to inherit names from
> SSE. However, why to reinvent the wheel...
> 
> We probably need other people to give their ideas about such
> generalization of the API.

Yes, I would like get the feedback from other people.

ret_vect_* abstraction only for the common code (i.e test code) which
typically used to call the SIMD DPDK API's across the architecture.

> 
> I think, there should be an autotest of the rte_vect API. Is it
> possible to create one?

Yes

> 
> Regards
> Jan
> 
> -- 
>    Jan Viktorin                  E-mail: Viktorin@RehiveTech.com
>    System Architect              Web:    www.RehiveTech.com
>    RehiveTech
>    Brno, Czech Republic
  

Patch

diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index 21cdb4d..d300951 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -33,13 +33,14 @@ 
 #ifndef _RTE_VECT_ARM_H_
 #define _RTE_VECT_ARM_H_
 
-#include "arm_neon.h"
+#include <arm_neon.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 typedef int32x4_t xmm_t;
+typedef int32x4_t __m128i;
 
 #define	XMM_SIZE	(sizeof(xmm_t))
 #define	XMM_MASK	(XMM_SIZE - 1)
@@ -53,6 +54,20 @@  typedef union rte_xmm {
 	double   pd[XMM_SIZE / sizeof(double)];
 } __attribute__((aligned(16))) rte_xmm_t;
 
+/* rte_vect_* abstraction implementation using NEON */
+
+/* loads the __m128i value from address p(does not need to be 16-byte aligned)*/
+#define rte_vect_loadu_sil128(p) vld1q_s32((const int32_t *)p)
+
+/* sets the 4 signed 32-bit integer values and returns the __m128i variable */
+static inline __m128i  __attribute__((always_inline))
+rte_vect_set_epi32(int i3, int i2, int i1, int i0)
+{
+	int32_t data[4] = {i0, i1, i2, i3};
+
+	return vld1q_s32(data);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
index b698797..91c6523 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -125,6 +125,14 @@  typedef union rte_ymm {
 })
 #endif /* (defined(__ICC) && __ICC < 1210) */
 
+/* rte_vect_* abstraction implementation using SSE */
+
+/* loads the __m128i value from address p(does not need to be 16-byte aligned)*/
+#define rte_vect_loadu_sil128(p) _mm_loadu_si128(p)
+
+/* sets the 4 signed 32-bit integer values and returns the __m128i variable */
+#define rte_vect_set_epi32(i3, i2, i1, i0) _mm_set_epi32(i3, i2, i1, i0)
+
 #ifdef __cplusplus
 }
 #endif