diff mbox series

eal: use C11 alignas instead of GCC attribute aligned

Message ID	1700069997-4399-2-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State	Superseded, archived
Delegated to:	Thomas Monjalon
Headers	From: Tyler Retzlaff <roretzla@linux.microsoft.com> To: dev@dpdk.org Cc: =?utf-8?q?Mattias_R=C3=B6nnblom?= <mattias.ronnblom@ericsson.com>, Anatoly Burakov <anatoly.burakov@intel.com>, Bruce Richardson <bruce.richardson@intel.com>, David Christensen <drc@linux.vnet.ibm.com>, Harry van Haaren <harry.van.haaren@intel.com>, Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>, Min Zhou <zhoumin@loongson.cn>, Ruifeng Wang <ruifeng.wang@arm.com>, Stanislaw Kardach <kda@semihalf.com>, Tyler Retzlaff <roretzla@linux.microsoft.com> Subject: [PATCH] eal: use C11 alignas instead of GCC attribute aligned Date: Wed, 15 Nov 2023 09:39:57 -0800 Message-Id: <1700069997-4399-2-git-send-email-roretzla@linux.microsoft.com> In-Reply-To: <1700069997-4399-1-git-send-email-roretzla@linux.microsoft.com> References: <1700069997-4399-1-git-send-email-roretzla@linux.microsoft.com> Precedence: list Errors-To: dev-bounces@dpdk.org
Series	eal: use C11 alignas instead of GCC attribute aligned \| eal: use C11 alignas instead of GCC attribute aligned

Checks

Context	Check	Description
ci/checkpatch	success	coding style OK
ci/loongarch-compilation	success	Compilation OK
ci/loongarch-unit-testing	success	Unit Testing PASS
ci/Intel-compilation	success	Compilation OK
ci/intel-Testing	success	Testing PASS
ci/iol-intel-Performance	success	Performance Testing PASS
ci/github-robot: build	success	github build: passed
ci/iol-intel-Functional	success	Functional Testing PASS
ci/iol-mellanox-Performance	success	Performance Testing PASS
ci/iol-compile-amd64-testing	fail	Testing issues
ci/iol-sample-apps-testing	success	Testing PASS
ci/iol-unit-amd64-testing	success	Testing PASS
ci/iol-unit-arm64-testing	success	Testing PASS
ci/iol-compile-arm64-testing	success	Testing PASS
ci/iol-broadcom-Functional	success	Functional Testing PASS
ci/iol-broadcom-Performance	success	Performance Testing PASS
ci/intel-Functional	success	Functional PASS

Commit Message

Tyler Retzlaff Nov. 15, 2023, 5:39 p.m. UTC

  Now that we have enabled C11 replace the use of __rte_cache_aligned
and __rte_aligned(n) with alignas(RTE_CACHE_LINE_SIZE) and
__rte_aligned(n) respectively.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/arm/include/rte_vect.h       | 4 +++-
 lib/eal/common/malloc_elem.h         | 4 +++-
 lib/eal/common/malloc_heap.h         | 4 +++-
 lib/eal/common/rte_keepalive.c       | 4 +++-
 lib/eal/common/rte_random.c          | 5 ++++-
 lib/eal/common/rte_service.c         | 7 +++++--
 lib/eal/include/generic/rte_atomic.h | 4 +++-
 lib/eal/loongarch/include/rte_vect.h | 7 +++++--
 lib/eal/ppc/include/rte_vect.h       | 5 ++++-
 lib/eal/riscv/include/rte_vect.h     | 4 +++-
 lib/eal/x86/include/rte_vect.h       | 4 +++-
 lib/eal/x86/rte_power_intrinsics.c   | 8 ++++++--
 12 files changed, 45 insertions(+), 15 deletions(-)

Comments

Bruce Richardson Nov. 15, 2023, 6:13 p.m. UTC | #1

On Wed, Nov 15, 2023 at 09:39:57AM -0800, Tyler Retzlaff wrote:
> Now that we have enabled C11 replace the use of __rte_cache_aligned
> and __rte_aligned(n) with alignas(RTE_CACHE_LINE_SIZE) and
> __rte_aligned(n) respectively.

alignas(n)

> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  lib/eal/arm/include/rte_vect.h       | 4 +++-
>  lib/eal/common/malloc_elem.h         | 4 +++-
>  lib/eal/common/malloc_heap.h         | 4 +++-
>  lib/eal/common/rte_keepalive.c       | 4 +++-
>  lib/eal/common/rte_random.c          | 5 ++++-
>  lib/eal/common/rte_service.c         | 7 +++++--
>  lib/eal/include/generic/rte_atomic.h | 4 +++-
>  lib/eal/loongarch/include/rte_vect.h | 7 +++++--
>  lib/eal/ppc/include/rte_vect.h       | 5 ++++-
>  lib/eal/riscv/include/rte_vect.h     | 4 +++-
>  lib/eal/x86/include/rte_vect.h       | 4 +++-
>  lib/eal/x86/rte_power_intrinsics.c   | 8 ++++++--
>  12 files changed, 45 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h
> index 8cfe4bd..c7a3b2e 100644
> --- a/lib/eal/arm/include/rte_vect.h
> +++ b/lib/eal/arm/include/rte_vect.h
> @@ -5,6 +5,7 @@
>  #ifndef _RTE_VECT_ARM_H_
>  #define _RTE_VECT_ARM_H_
>  
> +#include <stdalign.h>
>  #include <stdint.h>
>  #include "generic/rte_vect.h"
>  #include "rte_debug.h"
> @@ -25,13 +26,14 @@
>  #define	XMM_MASK	(XMM_SIZE - 1)
>  
>  typedef union rte_xmm {
> +	alignas(16)
>  	xmm_t    x;
>  	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];

This may seem minor but I really don't like the indentation style used for
these alignas statements. To a casual glance they look like elements in the
struct. The previous macros were nice is that it was hard to mistake them
for anything other than additional info on the struct.

Couple of suggestions:
1. Put them on the same line as the definition of the first element. The
   downside is that we lose the (as here) implication that it's the struct
   being aligned more than just the first element.
2. Alternatively, how about putting the alignas on the same line as the
   struct/union e.g.

	struct rte_xyz {   alignas(16)
		...
	}

   In this case, or perhaps generally, perhaps we want to define
rte_aliases with underscores for these alignas to further visually separate
them.

Thoughts?

/Bruce

Tyler Retzlaff Nov. 15, 2023, 6:27 p.m. UTC | #2

On Wed, Nov 15, 2023 at 06:13:55PM +0000, Bruce Richardson wrote:
> On Wed, Nov 15, 2023 at 09:39:57AM -0800, Tyler Retzlaff wrote:
> > Now that we have enabled C11 replace the use of __rte_cache_aligned
> > and __rte_aligned(n) with alignas(RTE_CACHE_LINE_SIZE) and
> > __rte_aligned(n) respectively.
> 
> alignas(n)
> 
> > 
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
> >  lib/eal/arm/include/rte_vect.h       | 4 +++-
> >  lib/eal/common/malloc_elem.h         | 4 +++-
> >  lib/eal/common/malloc_heap.h         | 4 +++-
> >  lib/eal/common/rte_keepalive.c       | 4 +++-
> >  lib/eal/common/rte_random.c          | 5 ++++-
> >  lib/eal/common/rte_service.c         | 7 +++++--
> >  lib/eal/include/generic/rte_atomic.h | 4 +++-
> >  lib/eal/loongarch/include/rte_vect.h | 7 +++++--
> >  lib/eal/ppc/include/rte_vect.h       | 5 ++++-
> >  lib/eal/riscv/include/rte_vect.h     | 4 +++-
> >  lib/eal/x86/include/rte_vect.h       | 4 +++-
> >  lib/eal/x86/rte_power_intrinsics.c   | 8 ++++++--
> >  12 files changed, 45 insertions(+), 15 deletions(-)
> > 
> > diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h
> > index 8cfe4bd..c7a3b2e 100644
> > --- a/lib/eal/arm/include/rte_vect.h
> > +++ b/lib/eal/arm/include/rte_vect.h
> > @@ -5,6 +5,7 @@
> >  #ifndef _RTE_VECT_ARM_H_
> >  #define _RTE_VECT_ARM_H_
> >  
> > +#include <stdalign.h>
> >  #include <stdint.h>
> >  #include "generic/rte_vect.h"
> >  #include "rte_debug.h"
> > @@ -25,13 +26,14 @@
> >  #define	XMM_MASK	(XMM_SIZE - 1)
> >  
> >  typedef union rte_xmm {
> > +	alignas(16)
> >  	xmm_t    x;
> >  	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
> 
> This may seem minor but I really don't like the indentation style used for
> these alignas statements. To a casual glance they look like elements in the
> struct. The previous macros were nice is that it was hard to mistake them
> for anything other than additional info on the struct.
> 

i'm open to whatever indentation style people choose. though as you have
pointed out it might be important to be clear that the alignas is being
applied to the first member.

> Couple of suggestions:
> 1. Put them on the same line as the definition of the first element. The
>    downside is that we lose the (as here) implication that it's the struct
>    being aligned more than just the first element.

i'd be inclined to place it on the same line so we don't end up with
confusion about what it is being applied to.

> 2. Alternatively, how about putting the alignas on the same line as the
>    struct/union e.g.
> 
> 	struct rte_xyz {   alignas(16)
> 		...
> 	}

for this option what happens if there are more fields in the same
struct? for the first field do we do this and then for other fields we
do (1)?

> 
>    In this case, or perhaps generally, perhaps we want to define
> rte_aliases with underscores for these alignas to further visually separate
> them.

i worry if hidden behind a macro people will continue to assume that the
syntactic placement continues to be permitted anywhere
__attribute__((__aligned__(a)) can go which is not the case. maybe the
expansion raising a compiler error is enough though? not sure.

> 
> Thoughts?
> 
> /Bruce

Morten Brørup Nov. 15, 2023, 8:08 p.m. UTC | #3

> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Wednesday, 15 November 2023 18.40
> 
> Now that we have enabled C11 replace the use of __rte_cache_aligned
> and __rte_aligned(n) with alignas(RTE_CACHE_LINE_SIZE) and
> __rte_aligned(n) respectively.
> 

[...]

>  typedef union rte_xmm {
> +	alignas(16)
>  	xmm_t    x;
>  	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
>  	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
>  	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
>  	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
>  	double   pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;

Your patch message should mention that C11 doesn't allow alignas() being applied to the declarations of struct/union types, so it is applied to the first field in the struct/union, which has the same effect.

Someone unfamiliar with alignas() would expect:

-typedef union rte_xmm {
+typedef alignas(16) union rte_xmm {
[...]
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;

[...]

>  #ifndef RTE_VECT_RISCV_H
>  #define RTE_VECT_RISCV_H
> 
> +#include <stdalign.h>
>  #include <stdint.h>
>  #include "generic/rte_vect.h"
>  #include "rte_common.h"
> @@ -23,13 +24,14 @@
>  #define XMM_MASK	(XMM_SIZE - 1)
> 
>  typedef union rte_xmm {
> +	alignas(16) /* !! NOTE !! changed to 16 it looks like this was a
> bug? */
>  	xmm_t		x;
>  	uint8_t		u8[XMM_SIZE / sizeof(uint8_t)];
>  	uint16_t	u16[XMM_SIZE / sizeof(uint16_t)];
>  	uint32_t	u32[XMM_SIZE / sizeof(uint32_t)];
>  	uint64_t	u64[XMM_SIZE / sizeof(uint64_t)];
>  	double		pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(8) rte_xmm_t;
> +} rte_xmm_t;

Yes, this looks very much like a bug.
Even if a RISC-V CPU could handle alignment like that, it might interact with other software/hardware expecting type-sized alignment, i.e. 16-byte alignment, so partially using 8-byte alignment would cause bugs.

It should be a separate patch with a Fixes tag.

We need to urgently decide if this bug should live on in DPDK 23.11, or if the fix should be included although we are very late in the release process.

Stanislaw, what do you think?

Furthermore, I wonder if it can be backported to stable, and to what extent backporting it would break the ABI/API.

Tyler Retzlaff Nov. 15, 2023, 9:03 p.m. UTC | #4

On Wed, Nov 15, 2023 at 09:08:05PM +0100, Morten Brørup wrote:
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Wednesday, 15 November 2023 18.40
> > 
> > Now that we have enabled C11 replace the use of __rte_cache_aligned
> > and __rte_aligned(n) with alignas(RTE_CACHE_LINE_SIZE) and
> > __rte_aligned(n) respectively.
> > 
> 
> [...]
> 
> >  typedef union rte_xmm {
> > +	alignas(16)
> >  	xmm_t    x;
> >  	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
> >  	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
> >  	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
> >  	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
> >  	double   pd[XMM_SIZE / sizeof(double)];
> > -} __rte_aligned(16) rte_xmm_t;
> > +} rte_xmm_t;
> 
> Your patch message should mention that C11 doesn't allow alignas() being applied to the declarations of struct/union types, so it is applied to the first field in the struct/union, which has the same effect.

no problem, will add a note.

> 
> Someone unfamiliar with alignas() would expect:
> 
> -typedef union rte_xmm {
> +typedef alignas(16) union rte_xmm {
> [...]
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
> 
> [...]
> 
> >  #ifndef RTE_VECT_RISCV_H
> >  #define RTE_VECT_RISCV_H
> > 
> > +#include <stdalign.h>
> >  #include <stdint.h>
> >  #include "generic/rte_vect.h"
> >  #include "rte_common.h"
> > @@ -23,13 +24,14 @@
> >  #define XMM_MASK	(XMM_SIZE - 1)
> > 
> >  typedef union rte_xmm {
> > +	alignas(16) /* !! NOTE !! changed to 16 it looks like this was a
> > bug? */
> >  	xmm_t		x;
> >  	uint8_t		u8[XMM_SIZE / sizeof(uint8_t)];
> >  	uint16_t	u16[XMM_SIZE / sizeof(uint16_t)];
> >  	uint32_t	u32[XMM_SIZE / sizeof(uint32_t)];
> >  	uint64_t	u64[XMM_SIZE / sizeof(uint64_t)];
> >  	double		pd[XMM_SIZE / sizeof(double)];
> > -} __rte_aligned(8) rte_xmm_t;
> > +} rte_xmm_t;
> 
> Yes, this looks very much like a bug.
> Even if a RISC-V CPU could handle alignment like that, it might interact with other software/hardware expecting type-sized alignment, i.e. 16-byte alignment, so partially using 8-byte alignment would cause bugs.
> 
> It should be a separate patch with a Fixes tag.

i'll submit a patch/fix for this so it is available and others can
discuss if it should or shouldn't be merged for 23.11.

> 
> We need to urgently decide if this bug should live on in DPDK 23.11, or if the fix should be included although we are very late in the release process.
> 
> Stanislaw, what do you think?
> 
> Furthermore, I wonder if it can be backported to stable, and to what extent backporting it would break the ABI/API.
>

Stanislaw Kardach Nov. 15, 2023, 10:43 p.m. UTC | #5

On Wed, Nov 15, 2023 at 10:03 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> On Wed, Nov 15, 2023 at 09:08:05PM +0100, Morten Brørup wrote:
> > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > Sent: Wednesday, 15 November 2023 18.40
> > >
> > > Now that we have enabled C11 replace the use of __rte_cache_aligned
> > > and __rte_aligned(n) with alignas(RTE_CACHE_LINE_SIZE) and
> > > __rte_aligned(n) respectively.
> > >
> >
> > [...]
> >
> > >  typedef union rte_xmm {
> > > +   alignas(16)
> > >     xmm_t    x;
> > >     uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
> > >     uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
> > >     uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
> > >     uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
> > >     double   pd[XMM_SIZE / sizeof(double)];
> > > -} __rte_aligned(16) rte_xmm_t;
> > > +} rte_xmm_t;
> >
> > Your patch message should mention that C11 doesn't allow alignas() being applied to the declarations of struct/union types, so it is applied to the first field in the struct/union, which has the same effect.
>
> no problem, will add a note.
>
> >
> > Someone unfamiliar with alignas() would expect:
> >
> > -typedef union rte_xmm {
> > +typedef alignas(16) union rte_xmm {
> > [...]
> > -} __rte_aligned(16) rte_xmm_t;
> > +} rte_xmm_t;
> >
> > [...]
> >
> > >  #ifndef RTE_VECT_RISCV_H
> > >  #define RTE_VECT_RISCV_H
> > >
> > > +#include <stdalign.h>
> > >  #include <stdint.h>
> > >  #include "generic/rte_vect.h"
> > >  #include "rte_common.h"
> > > @@ -23,13 +24,14 @@
> > >  #define XMM_MASK   (XMM_SIZE - 1)
> > >
> > >  typedef union rte_xmm {
> > > +   alignas(16) /* !! NOTE !! changed to 16 it looks like this was a
> > > bug? */
> > >     xmm_t           x;
> > >     uint8_t         u8[XMM_SIZE / sizeof(uint8_t)];
> > >     uint16_t        u16[XMM_SIZE / sizeof(uint16_t)];
> > >     uint32_t        u32[XMM_SIZE / sizeof(uint32_t)];
> > >     uint64_t        u64[XMM_SIZE / sizeof(uint64_t)];
> > >     double          pd[XMM_SIZE / sizeof(double)];
> > > -} __rte_aligned(8) rte_xmm_t;
> > > +} rte_xmm_t;
> >
> > Yes, this looks very much like a bug.
> > Even if a RISC-V CPU could handle alignment like that, it might interact with other software/hardware expecting type-sized alignment, i.e. 16-byte alignment, so partially using 8-byte alignment would cause bugs.
> >
> > It should be a separate patch with a Fixes tag.
>
> i'll submit a patch/fix for this so it is available and others can
> discuss if it should or shouldn't be merged for 23.11.
It is definitely a bug. Good catch. Since we did not have vector
extensions on our bring-up board, all xmm_t handling was essentially
scalar.
>
> >
> > We need to urgently decide if this bug should live on in DPDK 23.11, or if the fix should be included although we are very late in the release process.
> >
> > Stanislaw, what do you think?
> >
> > Furthermore, I wonder if it can be backported to stable, and to what extent backporting it would break the ABI/API.
> >

Mattias Rönnblom Nov. 16, 2023, 10:12 a.m. UTC | #6

On 2023-11-15 18:39, Tyler Retzlaff wrote:
> Now that we have enabled C11 replace the use of __rte_cache_aligned
> and __rte_aligned(n) with alignas(RTE_CACHE_LINE_SIZE) and
> __rte_aligned(n) respectively.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>   lib/eal/arm/include/rte_vect.h       | 4 +++-
>   lib/eal/common/malloc_elem.h         | 4 +++-
>   lib/eal/common/malloc_heap.h         | 4 +++-
>   lib/eal/common/rte_keepalive.c       | 4 +++-
>   lib/eal/common/rte_random.c          | 5 ++++-
>   lib/eal/common/rte_service.c         | 7 +++++--
>   lib/eal/include/generic/rte_atomic.h | 4 +++-
>   lib/eal/loongarch/include/rte_vect.h | 7 +++++--
>   lib/eal/ppc/include/rte_vect.h       | 5 ++++-
>   lib/eal/riscv/include/rte_vect.h     | 4 +++-
>   lib/eal/x86/include/rte_vect.h       | 4 +++-
>   lib/eal/x86/rte_power_intrinsics.c   | 8 ++++++--
>   12 files changed, 45 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h
> index 8cfe4bd..c7a3b2e 100644
> --- a/lib/eal/arm/include/rte_vect.h
> +++ b/lib/eal/arm/include/rte_vect.h
> @@ -5,6 +5,7 @@
>   #ifndef _RTE_VECT_ARM_H_
>   #define _RTE_VECT_ARM_H_
>   
> +#include <stdalign.h>
>   #include <stdint.h>
>   #include "generic/rte_vect.h"
>   #include "rte_debug.h"
> @@ -25,13 +26,14 @@
>   #define	XMM_MASK	(XMM_SIZE - 1)
>   
>   typedef union rte_xmm {
> +	alignas(16)
>   	xmm_t    x >   	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
>   	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
>   	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
>   	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
>   	double   pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
>   
>   #if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)
>   /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
> diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h
> index 952ce73..c2c336e 100644
> --- a/lib/eal/common/malloc_elem.h
> +++ b/lib/eal/common/malloc_elem.h
> @@ -5,6 +5,7 @@
>   #ifndef MALLOC_ELEM_H_
>   #define MALLOC_ELEM_H_
>   
> +#include <stdalign.h>
>   #include <stdbool.h>
>   
>   #include <rte_common.h>
> @@ -21,6 +22,7 @@ enum elem_state {
>   };
>   
>   struct malloc_elem {
> +	alignas(RTE_CACHE_LINE_SIZE)
>   	struct malloc_heap *heap;
>   	struct malloc_elem *volatile prev;
>   	/**< points to prev elem in memseg */
> @@ -48,7 +50,7 @@ struct malloc_elem {
>   	size_t user_size;
>   	uint64_t asan_cookie[2]; /* must be next to header_cookie */
>   #endif
> -} __rte_cache_aligned;
> +};
>   
>   static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
>   
> diff --git a/lib/eal/common/malloc_heap.h b/lib/eal/common/malloc_heap.h
> index 8f3ab57..a724bfb 100644
> --- a/lib/eal/common/malloc_heap.h
> +++ b/lib/eal/common/malloc_heap.h
> @@ -5,6 +5,7 @@
>   #ifndef MALLOC_HEAP_H_
>   #define MALLOC_HEAP_H_
>   
> +#include <stdalign.h>
>   #include <stdbool.h>
>   #include <sys/queue.h>
>   
> @@ -22,6 +23,7 @@
>    * Structure to hold malloc heap
>    */
>   struct malloc_heap {
> +	alignas(RTE_CACHE_LINE_SIZE)
>   	rte_spinlock_t lock;
>   	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
>   	struct malloc_elem *volatile first;
> @@ -31,7 +33,7 @@ struct malloc_heap {
>   	unsigned int socket_id;
>   	size_t total_size;
>   	char name[RTE_HEAP_NAME_MAX_LEN];
> -} __rte_cache_aligned;
> +};
>   
>   void *
>   malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
> diff --git a/lib/eal/common/rte_keepalive.c b/lib/eal/common/rte_keepalive.c
> index e0494b2..67a898d 100644
> --- a/lib/eal/common/rte_keepalive.c
> +++ b/lib/eal/common/rte_keepalive.c
> @@ -3,6 +3,7 @@
>    */
>   
>   #include <inttypes.h>
> +#include <stdalign.h>
>   
>   #include <rte_common.h>
>   #include <rte_cycles.h>
> @@ -17,7 +18,8 @@ struct rte_keepalive {
>   		/*
>   		 * Each element must be cache aligned to prevent false sharing.
>   		 */
> -		enum rte_keepalive_state core_state __rte_cache_aligned;
> +		alignas(RTE_CACHE_LINE_SIZE)
> +		enum rte_keepalive_state core_state;
>   	} live_data[RTE_KEEPALIVE_MAXCORES];
>   
>   	/** Last-seen-alive timestamps */
> diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
> index 7709b8f..c04917e 100644
> --- a/lib/eal/common/rte_random.c
> +++ b/lib/eal/common/rte_random.c
> @@ -2,6 +2,8 @@
>    * Copyright(c) 2019 Ericsson AB
>    */
>   
> +#include <stdalign.h>
> +
>   #ifdef __RDSEED__
>   #include <x86intrin.h>
>   #endif
> @@ -14,13 +16,14 @@
>   #include <rte_random.h>
>   
>   struct rte_rand_state {
> +	alignas(RTE_CACHE_LINE_SIZE)
>   	uint64_t z1;

Formatting convention question: the alignas(n) and the field shouldn't 
be on the same line?

It could be useful to have a macro, so it would be:

RTE_CACHE_ALIGNAS uint64_t z1;

...which is horter than:

alignas(RTE_CACHE_LINE_SIZE) uint64_t z1;

and by tomorrow, it will feel as natural and obvious as the open-coded 
version.

I don't know. Just some thoughts.

>   	uint64_t z2;
>   	uint64_t z3;
>   	uint64_t z4;
>   	uint64_t z5;
>   	RTE_CACHE_GUARD;
> -} __rte_cache_aligned;
> +};
>   
>   /* One instance each for every lcore id-equipped thread, and one
>    * additional instance to be shared by all others threads (i.e., all
> diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
> index e183d2e..861ae31 100644
> --- a/lib/eal/common/rte_service.c
> +++ b/lib/eal/common/rte_service.c
> @@ -2,6 +2,7 @@
>    * Copyright(c) 2017 Intel Corporation
>    */
>   
> +#include <stdalign.h>
>   #include <stdio.h>
>   #include <inttypes.h>
>   #include <string.h>
> @@ -33,6 +34,7 @@
>   
>   /* internal representation of a service */
>   struct rte_service_spec_impl {
> +	alignas(RTE_CACHE_LINE_SIZE)
>   	/* public part of the struct */
>   	struct rte_service_spec spec;
>   
> @@ -53,7 +55,7 @@ struct rte_service_spec_impl {
>   	 * on currently.
>   	 */
>   	RTE_ATOMIC(uint32_t) num_mapped_cores;
> -} __rte_cache_aligned;
> +};
>   
>   struct service_stats {
>   	RTE_ATOMIC(uint64_t) calls;
> @@ -62,6 +64,7 @@ struct service_stats {
>   
>   /* the internal values of a service core */
>   struct core_state {
> +	alignas(RTE_CACHE_LINE_SIZE)
>   	/* map of services IDs are run on this core */
>   	uint64_t service_mask;
>   	RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
> @@ -71,7 +74,7 @@ struct core_state {
>   	RTE_ATOMIC(uint64_t) loops;
>   	RTE_ATOMIC(uint64_t) cycles;
>   	struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
> -} __rte_cache_aligned;
> +};
>   
>   static uint32_t rte_service_count;
>   static struct rte_service_spec_impl *rte_services;
> diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
> index 0e639da..bc9213c 100644
> --- a/lib/eal/include/generic/rte_atomic.h
> +++ b/lib/eal/include/generic/rte_atomic.h
> @@ -12,6 +12,7 @@
>    * This file defines a generic API for atomic operations.
>    */
>   
> +#include <stdalign.h>
>   #include <stdint.h>
>   
>   #include <rte_common.h>
> @@ -1096,6 +1097,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
>    */
>   typedef struct {
>   	union {
> +		alignas(16)
>   		uint64_t val[2];
>   #ifdef RTE_ARCH_64
>   #ifndef RTE_TOOLCHAIN_MSVC
> @@ -1103,7 +1105,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
>   #endif
>   #endif
>   	};
> -} __rte_aligned(16) rte_int128_t;
> +} rte_int128_t;
>   
>   #ifdef __DOXYGEN__
>   
> diff --git a/lib/eal/loongarch/include/rte_vect.h b/lib/eal/loongarch/include/rte_vect.h
> index 1546515..856d87b 100644
> --- a/lib/eal/loongarch/include/rte_vect.h
> +++ b/lib/eal/loongarch/include/rte_vect.h
> @@ -5,6 +5,7 @@
>   #ifndef RTE_VECT_LOONGARCH_H
>   #define RTE_VECT_LOONGARCH_H
>   
> +#include <stdalign.h>
>   #include <stdint.h>
>   #include "generic/rte_vect.h"
>   #include "rte_common.h"
> @@ -16,6 +17,7 @@
>   #define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
>   
>   typedef union xmm {
> +	alignas(16)
>   	int8_t   i8[16];
>   	int16_t  i16[8];
>   	int32_t  i32[4];
> @@ -25,19 +27,20 @@
>   	uint32_t u32[4];
>   	uint64_t u64[2];
>   	double   pd[2];
> -} __rte_aligned(16) xmm_t;
> +} xmm_t;
>   
>   #define XMM_SIZE        (sizeof(xmm_t))
>   #define XMM_MASK        (XMM_SIZE - 1)
>   
>   typedef union rte_xmm {
> +	alignas(16)
>   	xmm_t	 x;
>   	uint8_t	 u8[XMM_SIZE / sizeof(uint8_t)];
>   	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
>   	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
>   	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
>   	double   pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
>   
>   static inline xmm_t
>   vect_load_128(void *p)
> diff --git a/lib/eal/ppc/include/rte_vect.h b/lib/eal/ppc/include/rte_vect.h
> index a5f009b..e6702a4 100644
> --- a/lib/eal/ppc/include/rte_vect.h
> +++ b/lib/eal/ppc/include/rte_vect.h
> @@ -6,6 +6,8 @@
>   #ifndef _RTE_VECT_PPC_64_H_
>   #define _RTE_VECT_PPC_64_H_
>   
> +#include <stdalign.h>
> +
>   #include "rte_altivec.h"
>   
>   #include "generic/rte_vect.h"
> @@ -23,13 +25,14 @@
>   #define	XMM_MASK	(XMM_SIZE - 1)
>   
>   typedef union rte_xmm {
> +	alignas(16)
>   	xmm_t    x;
>   	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
>   	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
>   	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
>   	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
>   	double   pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
>   
>   #ifdef __cplusplus
>   }
> diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
> index 2f97f43..32d4386 100644
> --- a/lib/eal/riscv/include/rte_vect.h
> +++ b/lib/eal/riscv/include/rte_vect.h
> @@ -7,6 +7,7 @@
>   #ifndef RTE_VECT_RISCV_H
>   #define RTE_VECT_RISCV_H
>   
> +#include <stdalign.h>
>   #include <stdint.h>
>   #include "generic/rte_vect.h"
>   #include "rte_common.h"
> @@ -23,13 +24,14 @@
>   #define XMM_MASK	(XMM_SIZE - 1)
>   
>   typedef union rte_xmm {
> +	alignas(16) /* !! NOTE !! changed to 16 it looks like this was a bug? */
>   	xmm_t		x;
>   	uint8_t		u8[XMM_SIZE / sizeof(uint8_t)];
>   	uint16_t	u16[XMM_SIZE / sizeof(uint16_t)];
>   	uint32_t	u32[XMM_SIZE / sizeof(uint32_t)];
>   	uint64_t	u64[XMM_SIZE / sizeof(uint64_t)];
>   	double		pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(8) rte_xmm_t;
> +} rte_xmm_t;
>   
>   static inline xmm_t
>   vect_load_128(void *p)
> diff --git a/lib/eal/x86/include/rte_vect.h b/lib/eal/x86/include/rte_vect.h
> index 560f9e4..2e5669d 100644
> --- a/lib/eal/x86/include/rte_vect.h
> +++ b/lib/eal/x86/include/rte_vect.h
> @@ -11,6 +11,7 @@
>    * RTE SSE/AVX related header.
>    */
>   
> +#include <stdalign.h>
>   #include <stdint.h>
>   #include <rte_config.h>
>   #include <rte_common.h>
> @@ -92,6 +93,7 @@
>   #define RTE_X86_ZMM_MASK	(RTE_X86_ZMM_SIZE - 1)
>   
>   typedef union __rte_x86_zmm {
> +	alignas(RTE_X86_ZMM_SIZE)
>   	__m512i	 z;
>   	ymm_t    y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)];
>   	xmm_t    x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)];
> @@ -100,7 +102,7 @@
>   	uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)];
>   	uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)];
>   	double   pd[RTE_X86_ZMM_SIZE / sizeof(double)];
> -} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t;
> +} __rte_x86_zmm_t;
>   
>   #endif /* __AVX512F__ */
>   
> diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c
> index 532a2e6..5636543 100644
> --- a/lib/eal/x86/rte_power_intrinsics.c
> +++ b/lib/eal/x86/rte_power_intrinsics.c
> @@ -2,6 +2,8 @@
>    * Copyright(c) 2020 Intel Corporation
>    */
>   
> +#include <stdalign.h>
> +
>   #include <rte_common.h>
>   #include <rte_lcore.h>
>   #include <rte_rtm.h>
> @@ -13,9 +15,10 @@
>    * Per-lcore structure holding current status of C0.2 sleeps.
>    */
>   static struct power_wait_status {
> +	alignas(RTE_CACHE_LINE_SIZE)
>   	rte_spinlock_t lock;
>   	volatile void *monitor_addr; /**< NULL if not currently sleeping */
> -} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
> +} wait_status[RTE_MAX_LCORE];
>   
>   /*
>    * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
> @@ -86,9 +89,10 @@ static void amd_mwaitx(const uint64_t timeout)
>   }
>   
>   static struct {
> +	alignas(RTE_CACHE_LINE_SIZE)
>   	void (*mmonitor)(volatile void *addr);
>   	void (*mwait)(const uint64_t timeout);
> -} __rte_cache_aligned power_monitor_ops;
> +} power_monitor_ops;
>   
>   static inline void
>   __umwait_wakeup(volatile void *addr)

diff mbox series

Patch

diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h
index 8cfe4bd..c7a3b2e 100644
--- a/lib/eal/arm/include/rte_vect.h
+++ b/lib/eal/arm/include/rte_vect.h
@@ -5,6 +5,7 @@ 
 #ifndef _RTE_VECT_ARM_H_
 #define _RTE_VECT_ARM_H_
 
+#include <stdalign.h>
 #include <stdint.h>
 #include "generic/rte_vect.h"
 #include "rte_debug.h"
@@ -25,13 +26,14 @@ 
 #define	XMM_MASK	(XMM_SIZE - 1)
 
 typedef union rte_xmm {
+	alignas(16)
 	xmm_t    x;
 	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
 	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
 	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
 	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
 	double   pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
 
 #if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)
 /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h
index 952ce73..c2c336e 100644
--- a/lib/eal/common/malloc_elem.h
+++ b/lib/eal/common/malloc_elem.h
@@ -5,6 +5,7 @@ 
 #ifndef MALLOC_ELEM_H_
 #define MALLOC_ELEM_H_
 
+#include <stdalign.h>
 #include <stdbool.h>
 
 #include <rte_common.h>
@@ -21,6 +22,7 @@  enum elem_state {
 };
 
 struct malloc_elem {
+	alignas(RTE_CACHE_LINE_SIZE)
 	struct malloc_heap *heap;
 	struct malloc_elem *volatile prev;
 	/**< points to prev elem in memseg */
@@ -48,7 +50,7 @@  struct malloc_elem {
 	size_t user_size;
 	uint64_t asan_cookie[2]; /* must be next to header_cookie */
 #endif
-} __rte_cache_aligned;
+};
 
 static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
 
diff --git a/lib/eal/common/malloc_heap.h b/lib/eal/common/malloc_heap.h
index 8f3ab57..a724bfb 100644
--- a/lib/eal/common/malloc_heap.h
+++ b/lib/eal/common/malloc_heap.h
@@ -5,6 +5,7 @@ 
 #ifndef MALLOC_HEAP_H_
 #define MALLOC_HEAP_H_
 
+#include <stdalign.h>
 #include <stdbool.h>
 #include <sys/queue.h>
 
@@ -22,6 +23,7 @@ 
  * Structure to hold malloc heap
  */
 struct malloc_heap {
+	alignas(RTE_CACHE_LINE_SIZE)
 	rte_spinlock_t lock;
 	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
 	struct malloc_elem *volatile first;
@@ -31,7 +33,7 @@  struct malloc_heap {
 	unsigned int socket_id;
 	size_t total_size;
 	char name[RTE_HEAP_NAME_MAX_LEN];
-} __rte_cache_aligned;
+};
 
 void *
 malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
diff --git a/lib/eal/common/rte_keepalive.c b/lib/eal/common/rte_keepalive.c
index e0494b2..67a898d 100644
--- a/lib/eal/common/rte_keepalive.c
+++ b/lib/eal/common/rte_keepalive.c
@@ -3,6 +3,7 @@ 
  */
 
 #include <inttypes.h>
+#include <stdalign.h>
 
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -17,7 +18,8 @@  struct rte_keepalive {
 		/*
 		 * Each element must be cache aligned to prevent false sharing.
 		 */
-		enum rte_keepalive_state core_state __rte_cache_aligned;
+		alignas(RTE_CACHE_LINE_SIZE)
+		enum rte_keepalive_state core_state;
 	} live_data[RTE_KEEPALIVE_MAXCORES];
 
 	/** Last-seen-alive timestamps */
diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
index 7709b8f..c04917e 100644
--- a/lib/eal/common/rte_random.c
+++ b/lib/eal/common/rte_random.c
@@ -2,6 +2,8 @@ 
  * Copyright(c) 2019 Ericsson AB
  */
 
+#include <stdalign.h>
+
 #ifdef __RDSEED__
 #include <x86intrin.h>
 #endif
@@ -14,13 +16,14 @@ 
 #include <rte_random.h>
 
 struct rte_rand_state {
+	alignas(RTE_CACHE_LINE_SIZE)
 	uint64_t z1;
 	uint64_t z2;
 	uint64_t z3;
 	uint64_t z4;
 	uint64_t z5;
 	RTE_CACHE_GUARD;
-} __rte_cache_aligned;
+};
 
 /* One instance each for every lcore id-equipped thread, and one
  * additional instance to be shared by all others threads (i.e., all
diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index e183d2e..861ae31 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -2,6 +2,7 @@ 
  * Copyright(c) 2017 Intel Corporation
  */
 
+#include <stdalign.h>
 #include <stdio.h>
 #include <inttypes.h>
 #include <string.h>
@@ -33,6 +34,7 @@ 
 
 /* internal representation of a service */
 struct rte_service_spec_impl {
+	alignas(RTE_CACHE_LINE_SIZE)
 	/* public part of the struct */
 	struct rte_service_spec spec;
 
@@ -53,7 +55,7 @@  struct rte_service_spec_impl {
 	 * on currently.
 	 */
 	RTE_ATOMIC(uint32_t) num_mapped_cores;
-} __rte_cache_aligned;
+};
 
 struct service_stats {
 	RTE_ATOMIC(uint64_t) calls;
@@ -62,6 +64,7 @@  struct service_stats {
 
 /* the internal values of a service core */
 struct core_state {
+	alignas(RTE_CACHE_LINE_SIZE)
 	/* map of services IDs are run on this core */
 	uint64_t service_mask;
 	RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
@@ -71,7 +74,7 @@  struct core_state {
 	RTE_ATOMIC(uint64_t) loops;
 	RTE_ATOMIC(uint64_t) cycles;
 	struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
-} __rte_cache_aligned;
+};
 
 static uint32_t rte_service_count;
 static struct rte_service_spec_impl *rte_services;
diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
index 0e639da..bc9213c 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -12,6 +12,7 @@ 
  * This file defines a generic API for atomic operations.
  */
 
+#include <stdalign.h>
 #include <stdint.h>
 
 #include <rte_common.h>
@@ -1096,6 +1097,7 @@  static inline void rte_atomic64_clear(rte_atomic64_t *v)
  */
 typedef struct {
 	union {
+		alignas(16)
 		uint64_t val[2];
 #ifdef RTE_ARCH_64
 #ifndef RTE_TOOLCHAIN_MSVC
@@ -1103,7 +1105,7 @@  static inline void rte_atomic64_clear(rte_atomic64_t *v)
 #endif
 #endif
 	};
-} __rte_aligned(16) rte_int128_t;
+} rte_int128_t;
 
 #ifdef __DOXYGEN__
 
diff --git a/lib/eal/loongarch/include/rte_vect.h b/lib/eal/loongarch/include/rte_vect.h
index 1546515..856d87b 100644
--- a/lib/eal/loongarch/include/rte_vect.h
+++ b/lib/eal/loongarch/include/rte_vect.h
@@ -5,6 +5,7 @@ 
 #ifndef RTE_VECT_LOONGARCH_H
 #define RTE_VECT_LOONGARCH_H
 
+#include <stdalign.h>
 #include <stdint.h>
 #include "generic/rte_vect.h"
 #include "rte_common.h"
@@ -16,6 +17,7 @@ 
 #define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
 
 typedef union xmm {
+	alignas(16)
 	int8_t   i8[16];
 	int16_t  i16[8];
 	int32_t  i32[4];
@@ -25,19 +27,20 @@ 
 	uint32_t u32[4];
 	uint64_t u64[2];
 	double   pd[2];
-} __rte_aligned(16) xmm_t;
+} xmm_t;
 
 #define XMM_SIZE        (sizeof(xmm_t))
 #define XMM_MASK        (XMM_SIZE - 1)
 
 typedef union rte_xmm {
+	alignas(16)
 	xmm_t	 x;
 	uint8_t	 u8[XMM_SIZE / sizeof(uint8_t)];
 	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
 	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
 	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
 	double   pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
 
 static inline xmm_t
 vect_load_128(void *p)
diff --git a/lib/eal/ppc/include/rte_vect.h b/lib/eal/ppc/include/rte_vect.h
index a5f009b..e6702a4 100644
--- a/lib/eal/ppc/include/rte_vect.h
+++ b/lib/eal/ppc/include/rte_vect.h
@@ -6,6 +6,8 @@ 
 #ifndef _RTE_VECT_PPC_64_H_
 #define _RTE_VECT_PPC_64_H_
 
+#include <stdalign.h>
+
 #include "rte_altivec.h"
 
 #include "generic/rte_vect.h"
@@ -23,13 +25,14 @@ 
 #define	XMM_MASK	(XMM_SIZE - 1)
 
 typedef union rte_xmm {
+	alignas(16)
 	xmm_t    x;
 	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
 	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
 	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
 	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
 	double   pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
 
 #ifdef __cplusplus
 }
diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
index 2f97f43..32d4386 100644
--- a/lib/eal/riscv/include/rte_vect.h
+++ b/lib/eal/riscv/include/rte_vect.h
@@ -7,6 +7,7 @@ 
 #ifndef RTE_VECT_RISCV_H
 #define RTE_VECT_RISCV_H
 
+#include <stdalign.h>
 #include <stdint.h>
 #include "generic/rte_vect.h"
 #include "rte_common.h"
@@ -23,13 +24,14 @@ 
 #define XMM_MASK	(XMM_SIZE - 1)
 
 typedef union rte_xmm {
+	alignas(16) /* !! NOTE !! changed to 16 it looks like this was a bug? */
 	xmm_t		x;
 	uint8_t		u8[XMM_SIZE / sizeof(uint8_t)];
 	uint16_t	u16[XMM_SIZE / sizeof(uint16_t)];
 	uint32_t	u32[XMM_SIZE / sizeof(uint32_t)];
 	uint64_t	u64[XMM_SIZE / sizeof(uint64_t)];
 	double		pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(8) rte_xmm_t;
+} rte_xmm_t;
 
 static inline xmm_t
 vect_load_128(void *p)
diff --git a/lib/eal/x86/include/rte_vect.h b/lib/eal/x86/include/rte_vect.h
index 560f9e4..2e5669d 100644
--- a/lib/eal/x86/include/rte_vect.h
+++ b/lib/eal/x86/include/rte_vect.h
@@ -11,6 +11,7 @@ 
  * RTE SSE/AVX related header.
  */
 
+#include <stdalign.h>
 #include <stdint.h>
 #include <rte_config.h>
 #include <rte_common.h>
@@ -92,6 +93,7 @@ 
 #define RTE_X86_ZMM_MASK	(RTE_X86_ZMM_SIZE - 1)
 
 typedef union __rte_x86_zmm {
+	alignas(RTE_X86_ZMM_SIZE)
 	__m512i	 z;
 	ymm_t    y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)];
 	xmm_t    x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)];
@@ -100,7 +102,7 @@ 
 	uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)];
 	uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)];
 	double   pd[RTE_X86_ZMM_SIZE / sizeof(double)];
-} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t;
+} __rte_x86_zmm_t;
 
 #endif /* __AVX512F__ */
 
diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c
index 532a2e6..5636543 100644
--- a/lib/eal/x86/rte_power_intrinsics.c
+++ b/lib/eal/x86/rte_power_intrinsics.c
@@ -2,6 +2,8 @@ 
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include <stdalign.h>
+
 #include <rte_common.h>
 #include <rte_lcore.h>
 #include <rte_rtm.h>
@@ -13,9 +15,10 @@ 
  * Per-lcore structure holding current status of C0.2 sleeps.
  */
 static struct power_wait_status {
+	alignas(RTE_CACHE_LINE_SIZE)
 	rte_spinlock_t lock;
 	volatile void *monitor_addr; /**< NULL if not currently sleeping */
-} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
+} wait_status[RTE_MAX_LCORE];
 
 /*
  * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
@@ -86,9 +89,10 @@  static void amd_mwaitx(const uint64_t timeout)
 }
 
 static struct {
+	alignas(RTE_CACHE_LINE_SIZE)
 	void (*mmonitor)(volatile void *addr);
 	void (*mwait)(const uint64_t timeout);
-} __rte_cache_aligned power_monitor_ops;
+} power_monitor_ops;
 
 static inline void
 __umwait_wakeup(volatile void *addr)