[v2,2/6] eal: adapt EAL to present rte optional atomics API

Message ID 1691775136-6460-3-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series RFC optional rte optional stdatomics API |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Tyler Retzlaff Aug. 11, 2023, 5:32 p.m. UTC
  Adapt the EAL public headers to use rte optional atomics API instead of
directly using and exposing toolchain specific atomic builtin intrinsics.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 app/test/test_mcslock.c                |  6 ++--
 lib/eal/arm/include/rte_atomic_32.h    |  4 +--
 lib/eal/arm/include/rte_atomic_64.h    | 36 +++++++++++------------
 lib/eal/arm/include/rte_pause_64.h     | 26 ++++++++--------
 lib/eal/arm/rte_power_intrinsics.c     |  8 ++---
 lib/eal/common/eal_common_trace.c      | 16 +++++-----
 lib/eal/include/generic/rte_atomic.h   | 50 +++++++++++++++----------------
 lib/eal/include/generic/rte_pause.h    | 38 ++++++++++++------------
 lib/eal/include/generic/rte_rwlock.h   | 47 +++++++++++++++--------------
 lib/eal/include/generic/rte_spinlock.h | 19 ++++++------
 lib/eal/include/rte_mcslock.h          | 50 +++++++++++++++----------------
 lib/eal/include/rte_pflock.h           | 24 ++++++++-------
 lib/eal/include/rte_seqcount.h         | 18 ++++++------
 lib/eal/include/rte_ticketlock.h       | 42 +++++++++++++-------------
 lib/eal/include/rte_trace_point.h      |  4 +--
 lib/eal/loongarch/include/rte_atomic.h |  4 +--
 lib/eal/ppc/include/rte_atomic.h       | 54 +++++++++++++++++-----------------
 lib/eal/riscv/include/rte_atomic.h     |  4 +--
 lib/eal/x86/include/rte_atomic.h       |  8 ++---
 lib/eal/x86/include/rte_spinlock.h     |  2 +-
 lib/eal/x86/rte_power_intrinsics.c     |  6 ++--
 21 files changed, 237 insertions(+), 229 deletions(-)
  

Comments

Morten Brørup Aug. 14, 2023, 8 a.m. UTC | #1
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Friday, 11 August 2023 19.32
> 
> Adapt the EAL public headers to use rte optional atomics API instead of
> directly using and exposing toolchain specific atomic builtin intrinsics.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---

[...]

> --- a/app/test/test_mcslock.c
> +++ b/app/test/test_mcslock.c
> @@ -36,9 +36,9 @@
>   *   lock multiple times.
>   */
> 
> -rte_mcslock_t *p_ml;
> -rte_mcslock_t *p_ml_try;
> -rte_mcslock_t *p_ml_perf;
> +rte_mcslock_t * __rte_atomic p_ml;
> +rte_mcslock_t * __rte_atomic p_ml_try;
> +rte_mcslock_t * __rte_atomic p_ml_perf;

Although this looks weird, it is pointers themselves, not the structures, that are used atomically. So it is correct.

> diff --git a/lib/eal/include/generic/rte_pause.h
> b/lib/eal/include/generic/rte_pause.h
> index bebfa95..c816e7d 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -36,13 +36,13 @@
>   *  A 16-bit expected value to be in the memory location.
>   * @param memorder
>   *  Two different memory orders that can be specified:
> - *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  rte_memory_order_acquire and rte_memory_order_relaxed. These map to
>   *  C++11 memory orders with the same names, see the C++11 standard or
>   *  the GCC wiki on atomic synchronization for detailed definition.

Delete the last part of the description, starting at "These map to...".

>   */
>  static __rte_always_inline void
>  rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> -		int memorder);
> +		rte_memory_order memorder);
> 
>  /**
>   * Wait for *addr to be updated with a 32-bit expected value, with a relaxed
> @@ -54,13 +54,13 @@
>   *  A 32-bit expected value to be in the memory location.
>   * @param memorder
>   *  Two different memory orders that can be specified:
> - *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  rte_memory_order_acquire and rte_memory_order_relaxed. These map to
>   *  C++11 memory orders with the same names, see the C++11 standard or
>   *  the GCC wiki on atomic synchronization for detailed definition.

Delete the last part of the description, starting at "These map to...".

>   */
>  static __rte_always_inline void
>  rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
> -		int memorder);
> +		rte_memory_order memorder);
> 
>  /**
>   * Wait for *addr to be updated with a 64-bit expected value, with a relaxed
> @@ -72,42 +72,42 @@
>   *  A 64-bit expected value to be in the memory location.
>   * @param memorder
>   *  Two different memory orders that can be specified:
> - *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  rte_memory_order_acquire and rte_memory_order_relaxed. These map to
>   *  C++11 memory orders with the same names, see the C++11 standard or
>   *  the GCC wiki on atomic synchronization for detailed definition.

Delete the last part of the description, starting at "These map to...".

>   */
>  static __rte_always_inline void
>  rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
> -		int memorder);
> +		rte_memory_order memorder);

[...]

> @@ -125,16 +125,16 @@
>   *  An expected value to be in the memory location.
>   * @param memorder
>   *  Two different memory orders that can be specified:
> - *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  rte_memory_order_acquire and rte_memory_order_relaxed. These map to
>   *  C++11 memory orders with the same names, see the C++11 standard or
>   *  the GCC wiki on atomic synchronization for detailed definition.

Delete the last part of the description, starting at "These map to...".

There might be more similar comments that need removal; I haven't tried searching.

>   */
>  #define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder) do { \

[...]

> --- a/lib/eal/include/generic/rte_spinlock.h
> +++ b/lib/eal/include/generic/rte_spinlock.h
> @@ -29,7 +29,7 @@
>   * The rte_spinlock_t type.
>   */
>  typedef struct __rte_lockable {
> -	volatile int locked; /**< lock status 0 = unlocked, 1 = locked */
> +	volatile int __rte_atomic locked; /**< lock status 0 = unlocked, 1 =
> locked */

I think __rte_atomic should be before the type:
	volatile __rte_atomic int locked; /**< lock status [...]
Alternatively (just mentioning it, I know we don't use this form):
	volatile __rte_atomic(int) locked; /**< lock status [...]

Thinking of where you would put "const" might help.

Maybe your order is also correct, so it is a matter of preference.

The DPDK coding style guidelines doesn't mention where to place "const", but looking at the code, it seems to use "const unsigned int" and "const char *".

>  } rte_spinlock_t;
> 
>  /**

[...]

> --- a/lib/eal/include/rte_mcslock.h
> +++ b/lib/eal/include/rte_mcslock.h
> @@ -33,8 +33,8 @@
>   * The rte_mcslock_t type.
>   */
>  typedef struct rte_mcslock {
> -	struct rte_mcslock *next;
> -	int locked; /* 1 if the queue locked, 0 otherwise */
> +	struct rte_mcslock * __rte_atomic next;

Correct, the pointer is atomic, not the struct.

> +	int __rte_atomic locked; /* 1 if the queue locked, 0 otherwise */

Again, I think __rte_atomic should be before the type:
	__rte_atomic int locked; /* 1 if the queue locked, 0 otherwise */

>  } rte_mcslock_t;
> 

[...]

> @@ -101,34 +101,34 @@
>   *   A pointer to the node of MCS lock passed in rte_mcslock_lock.
>   */
>  static inline void
> -rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
> +rte_mcslock_unlock(rte_mcslock_t * __rte_atomic *msl, rte_mcslock_t *
> __rte_atomic me)
>  {
>  	/* Check if there are more nodes in the queue. */
> -	if (likely(__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)) {
> +	if (likely(rte_atomic_load_explicit(&me->next, rte_memory_order_relaxed)
> == NULL)) {
>  		/* No, last member in the queue. */
> -		rte_mcslock_t *save_me = __atomic_load_n(&me, __ATOMIC_RELAXED);
> +		rte_mcslock_t *save_me = rte_atomic_load_explicit(&me,
> rte_memory_order_relaxed);
> 
>  		/* Release the lock by setting it to NULL */
> -		if (likely(__atomic_compare_exchange_n(msl, &save_me, NULL, 0,
> -				__ATOMIC_RELEASE, __ATOMIC_RELAXED)))
> +		if (likely(rte_atomic_compare_exchange_strong_explicit(msl,
> &save_me, NULL,
> +				rte_memory_order_release,
> rte_memory_order_relaxed)))
>  			return;
> 
>  		/* Speculative execution would be allowed to read in the
>  		 * while-loop first. This has the potential to cause a
>  		 * deadlock. Need a load barrier.
>  		 */
> -		__atomic_thread_fence(__ATOMIC_ACQUIRE);
> +		__rte_atomic_thread_fence(rte_memory_order_acquire);
>  		/* More nodes added to the queue by other CPUs.
>  		 * Wait until the next pointer is set.
>  		 */
> -		uintptr_t *next;
> -		next = (uintptr_t *)&me->next;
> +		uintptr_t __rte_atomic *next;
> +		next = (uintptr_t __rte_atomic *)&me->next;

This way around, I think:
		__rte_atomic uintptr_t *next;
		next = (__rte_atomic uintptr_t *)&me->next;

[...]

> --- a/lib/eal/include/rte_pflock.h
> +++ b/lib/eal/include/rte_pflock.h
> @@ -41,8 +41,8 @@
>   */
>  struct rte_pflock {
>  	struct {
> -		uint16_t in;
> -		uint16_t out;
> +		uint16_t __rte_atomic in;
> +		uint16_t __rte_atomic out;

Again, I think __rte_atomic should be before the type:
		__rte_atomic uint16_t in;
		__rte_atomic uint16_t out;

>  	} rd, wr;
>  };

[...]

> --- a/lib/eal/include/rte_seqcount.h
> +++ b/lib/eal/include/rte_seqcount.h
> @@ -32,7 +32,7 @@
>   * The RTE seqcount type.
>   */
>  typedef struct {
> -	uint32_t sn; /**< A sequence number for the protected data. */
> +	uint32_t __rte_atomic sn; /**< A sequence number for the protected data.
> */

Again, I think __rte_atomic should be before the type:
	__rte_atomic uint32_t sn; /**< A sequence [...]

>  } rte_seqcount_t;

[...]

> --- a/lib/eal/include/rte_ticketlock.h
> +++ b/lib/eal/include/rte_ticketlock.h
> @@ -30,10 +30,10 @@
>   * The rte_ticketlock_t type.
>   */
>  typedef union {
> -	uint32_t tickets;
> +	uint32_t __rte_atomic tickets;
>  	struct {
> -		uint16_t current;
> -		uint16_t next;
> +		uint16_t __rte_atomic current;
> +		uint16_t __rte_atomic next;

Again, I think __rte_atomic should be before the type:
		__rte_atomic uint16_t current;
		__rte_atomic uint16_t next;

>  	} s;
>  } rte_ticketlock_t;



> @@ -127,7 +129,7 @@
> 
>  typedef struct {
>  	rte_ticketlock_t tl; /**< the actual ticketlock */
> -	int user; /**< core id using lock, TICKET_LOCK_INVALID_ID for unused */
> +	int __rte_atomic user; /**< core id using lock, TICKET_LOCK_INVALID_ID
> for unused */

Again, I think __rte_atomic should be before the type:
	__rte_atomic int user; /**< core id [...]

>  	unsigned int count; /**< count of time this lock has been called */
>  } rte_ticketlock_recursive_t;

[...]

> --- a/lib/eal/include/rte_trace_point.h
> +++ b/lib/eal/include/rte_trace_point.h
> @@ -33,7 +33,7 @@
>  #include <rte_stdatomic.h>
> 
>  /** The tracepoint object. */
> -typedef uint64_t rte_trace_point_t;
> +typedef uint64_t __rte_atomic rte_trace_point_t;

Again, I think __rte_atomic should be before the type:
typedef __rte_atomic uint64_t rte_trace_point_t;

[...]

At the risk of having gone "speed blind" by all the search-replaces along the way...

Reviewed-by: Morten Brørup <mb@smartsharesystems.com>
  
Tyler Retzlaff Aug. 14, 2023, 5:47 p.m. UTC | #2
On Mon, Aug 14, 2023 at 10:00:49AM +0200, Morten Brørup wrote:
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Friday, 11 August 2023 19.32
> > 
> > Adapt the EAL public headers to use rte optional atomics API instead of
> > directly using and exposing toolchain specific atomic builtin intrinsics.
> > 
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
> 
> [...]
> 

will fix the comments identified.

> 
> [...]
> 
> > --- a/lib/eal/include/generic/rte_spinlock.h
> > +++ b/lib/eal/include/generic/rte_spinlock.h
> > @@ -29,7 +29,7 @@
> >   * The rte_spinlock_t type.
> >   */
> >  typedef struct __rte_lockable {
> > -	volatile int locked; /**< lock status 0 = unlocked, 1 = locked */
> > +	volatile int __rte_atomic locked; /**< lock status 0 = unlocked, 1 =
> > locked */
> 
> I think __rte_atomic should be before the type:
> 	volatile __rte_atomic int locked; /**< lock status [...]
> Alternatively (just mentioning it, I know we don't use this form):
> 	volatile __rte_atomic(int) locked; /**< lock status [...]
> 
> Thinking of where you would put "const" might help.
> 
> Maybe your order is also correct, so it is a matter of preference.

so for me what you suggest is the canonical convention for c and i did
initially try to make the change with this convention but ran into
trouble when using the keyword in a context used as a type specifier
and the type was incomplete.

the rte_mcslock is a good example for illustration.

  // original struct
  typedef struct rte_mcslock {
    struct rte_mcslock *next;
    ...
  };

  it simply doesn't work / won't compile (at least with clang) which is
  what drove me to use the less-often used syntax.

  typedef struct rte_mcslock {
    _Atomic struct rte_mcslock *next;
    ...
  };

  In file included from ../app/test/test_mcslock.c:19:
  ..\lib\eal\include\rte_mcslock.h:36:2: error: _Atomic cannot be applied
  to incomplete type 'struct rte_mcslock'
	  _Atomic struct rte_mcslock *next;
	  ^
  ..\lib\eal\include\rte_mcslock.h:35:16: note: definition of 'struct
  rte_mcslock' is not complete until the closing '}'
  typedef struct rte_mcslock {
		 ^
  1 error generated.

so i ended up choosing to use a single syntax by convention consistently
rather than using one for the exceptional case and one everywhere else.

i think (based on our other thread of discussion) i would recommend we
use adopt and require the use of the _Atomic(T) macro to disambiguate it
also has the advantage of not being churned later when we can do c++23.

  // using macro
  typedef struct rte_mcslock {
    _Atomic(struct rte_mcslock *) next;
    ...
  };

this is much easier at a glance to know when the specified type is the T
or the T * similarly in parameter lists it becomes more clear too.

e.g.
void foo(int *v)

that it is either void foo(_Atomic(int) *v) or void foo(_Atomic(int *) v) becomes
much clearer without having to do mental gymnastics.

so i propose we retain

  #define __rte_atomic _Atomic

  allow it to be used in contexts where we need a type-qualifier.
  note:
    most of the cases where _Atomic is used as a type-qualifier it
    is a red flag that we are sensitive to an implementation detail
    of the compiler. in time i hope most of these will go away as we
    remove deprecated rte_atomic_xx apis.

but also introduce the following macro

  #define RTE_ATOMIC(type) _Atomic(type)
  require it be used in the contexts that we are using it as a type-specifier.

if folks agree with this please reply back positively and i'll update
the series. feel free to propose alternate names or whatever, but sooner
than later so i don't have to churn things too much :)

thanks!

> 
> The DPDK coding style guidelines doesn't mention where to place "const", but looking at the code, it seems to use "const unsigned int" and "const char *".

we probably should document it as a convention and most likely we should
adopt what is already in use more commonly.

> 
> >  } rte_spinlock_t;
> > 
> >  /**
> 
> [...]
> 
> > --- a/lib/eal/include/rte_mcslock.h
> > +++ b/lib/eal/include/rte_mcslock.h
> > @@ -33,8 +33,8 @@
> >   * The rte_mcslock_t type.
> >   */
> >  typedef struct rte_mcslock {
> > -	struct rte_mcslock *next;
> > -	int locked; /* 1 if the queue locked, 0 otherwise */
> > +	struct rte_mcslock * __rte_atomic next;
> 
> Correct, the pointer is atomic, not the struct.
> 
> > +	int __rte_atomic locked; /* 1 if the queue locked, 0 otherwise */
> 
> Again, I think __rte_atomic should be before the type:
> 	__rte_atomic int locked; /* 1 if the queue locked, 0 otherwise */
> 
> >  } rte_mcslock_t;
> > 
> 
> [...]
> 
> > @@ -101,34 +101,34 @@
> >   *   A pointer to the node of MCS lock passed in rte_mcslock_lock.
> >   */
> >  static inline void
> > -rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
> > +rte_mcslock_unlock(rte_mcslock_t * __rte_atomic *msl, rte_mcslock_t *
> > __rte_atomic me)
> >  {
> >  	/* Check if there are more nodes in the queue. */
> > -	if (likely(__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)) {
> > +	if (likely(rte_atomic_load_explicit(&me->next, rte_memory_order_relaxed)
> > == NULL)) {
> >  		/* No, last member in the queue. */
> > -		rte_mcslock_t *save_me = __atomic_load_n(&me, __ATOMIC_RELAXED);
> > +		rte_mcslock_t *save_me = rte_atomic_load_explicit(&me,
> > rte_memory_order_relaxed);
> > 
> >  		/* Release the lock by setting it to NULL */
> > -		if (likely(__atomic_compare_exchange_n(msl, &save_me, NULL, 0,
> > -				__ATOMIC_RELEASE, __ATOMIC_RELAXED)))
> > +		if (likely(rte_atomic_compare_exchange_strong_explicit(msl,
> > &save_me, NULL,
> > +				rte_memory_order_release,
> > rte_memory_order_relaxed)))
> >  			return;
> > 
> >  		/* Speculative execution would be allowed to read in the
> >  		 * while-loop first. This has the potential to cause a
> >  		 * deadlock. Need a load barrier.
> >  		 */
> > -		__atomic_thread_fence(__ATOMIC_ACQUIRE);
> > +		__rte_atomic_thread_fence(rte_memory_order_acquire);
> >  		/* More nodes added to the queue by other CPUs.
> >  		 * Wait until the next pointer is set.
> >  		 */
> > -		uintptr_t *next;
> > -		next = (uintptr_t *)&me->next;
> > +		uintptr_t __rte_atomic *next;
> > +		next = (uintptr_t __rte_atomic *)&me->next;
> 
> This way around, I think:
> 		__rte_atomic uintptr_t *next;
> 		next = (__rte_atomic uintptr_t *)&me->next;
> 
> [...]
> 
> > --- a/lib/eal/include/rte_pflock.h
> > +++ b/lib/eal/include/rte_pflock.h
> > @@ -41,8 +41,8 @@
> >   */
> >  struct rte_pflock {
> >  	struct {
> > -		uint16_t in;
> > -		uint16_t out;
> > +		uint16_t __rte_atomic in;
> > +		uint16_t __rte_atomic out;
> 
> Again, I think __rte_atomic should be before the type:
> 		__rte_atomic uint16_t in;
> 		__rte_atomic uint16_t out;
> 
> >  	} rd, wr;
> >  };
> 
> [...]
> 
> > --- a/lib/eal/include/rte_seqcount.h
> > +++ b/lib/eal/include/rte_seqcount.h
> > @@ -32,7 +32,7 @@
> >   * The RTE seqcount type.
> >   */
> >  typedef struct {
> > -	uint32_t sn; /**< A sequence number for the protected data. */
> > +	uint32_t __rte_atomic sn; /**< A sequence number for the protected data.
> > */
> 
> Again, I think __rte_atomic should be before the type:
> 	__rte_atomic uint32_t sn; /**< A sequence [...]
> 
> >  } rte_seqcount_t;
> 
> [...]
> 
> > --- a/lib/eal/include/rte_ticketlock.h
> > +++ b/lib/eal/include/rte_ticketlock.h
> > @@ -30,10 +30,10 @@
> >   * The rte_ticketlock_t type.
> >   */
> >  typedef union {
> > -	uint32_t tickets;
> > +	uint32_t __rte_atomic tickets;
> >  	struct {
> > -		uint16_t current;
> > -		uint16_t next;
> > +		uint16_t __rte_atomic current;
> > +		uint16_t __rte_atomic next;
> 
> Again, I think __rte_atomic should be before the type:
> 		__rte_atomic uint16_t current;
> 		__rte_atomic uint16_t next;
> 
> >  	} s;
> >  } rte_ticketlock_t;
> 
> 
> 
> > @@ -127,7 +129,7 @@
> > 
> >  typedef struct {
> >  	rte_ticketlock_t tl; /**< the actual ticketlock */
> > -	int user; /**< core id using lock, TICKET_LOCK_INVALID_ID for unused */
> > +	int __rte_atomic user; /**< core id using lock, TICKET_LOCK_INVALID_ID
> > for unused */
> 
> Again, I think __rte_atomic should be before the type:
> 	__rte_atomic int user; /**< core id [...]
> 
> >  	unsigned int count; /**< count of time this lock has been called */
> >  } rte_ticketlock_recursive_t;
> 
> [...]
> 
> > --- a/lib/eal/include/rte_trace_point.h
> > +++ b/lib/eal/include/rte_trace_point.h
> > @@ -33,7 +33,7 @@
> >  #include <rte_stdatomic.h>
> > 
> >  /** The tracepoint object. */
> > -typedef uint64_t rte_trace_point_t;
> > +typedef uint64_t __rte_atomic rte_trace_point_t;
> 
> Again, I think __rte_atomic should be before the type:
> typedef __rte_atomic uint64_t rte_trace_point_t;
> 
> [...]
> 
> At the risk of having gone "speed blind" by all the search-replaces along the way...
> 
> Reviewed-by: Morten Brørup <mb@smartsharesystems.com>
>
  
Morten Brørup Aug. 16, 2023, 8:13 p.m. UTC | #3
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Monday, 14 August 2023 19.47
> 
> On Mon, Aug 14, 2023 at 10:00:49AM +0200, Morten Brørup wrote:
> > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > Sent: Friday, 11 August 2023 19.32
> > >
> > > Adapt the EAL public headers to use rte optional atomics API instead
> of
> > > directly using and exposing toolchain specific atomic builtin
> intrinsics.
> > >
> > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > ---
> >
> > [...]
> >
> 
> will fix the comments identified.
> 
> >
> > [...]
> >
> > > --- a/lib/eal/include/generic/rte_spinlock.h
> > > +++ b/lib/eal/include/generic/rte_spinlock.h
> > > @@ -29,7 +29,7 @@
> > >   * The rte_spinlock_t type.
> > >   */
> > >  typedef struct __rte_lockable {
> > > -	volatile int locked; /**< lock status 0 = unlocked, 1 = locked */
> > > +	volatile int __rte_atomic locked; /**< lock status 0 = unlocked, 1
> =
> > > locked */
> >
> > I think __rte_atomic should be before the type:
> > 	volatile __rte_atomic int locked; /**< lock status [...]
> > Alternatively (just mentioning it, I know we don't use this form):
> > 	volatile __rte_atomic(int) locked; /**< lock status [...]
> >
> > Thinking of where you would put "const" might help.

Regarding "const", I use the mental trick of reading from right-to-left when pointers are involved, e.g.:

const int * * const x;
----5---- 4 3 --2-- 1

x(1) is a const(2) pointer(3) to a pointer(4) to a const int(5).

And yes, treating "const int" as one word is cheating... formally it should be "int" "const", i.e. the reverse order; but that is not the convention, so I have learned to accept it.

> >
> > Maybe your order is also correct, so it is a matter of preference.
> 
> so for me what you suggest is the canonical convention for c and i did
> initially try to make the change with this convention but ran into
> trouble when using the keyword in a context used as a type specifier
> and the type was incomplete.
> 
> the rte_mcslock is a good example for illustration.
> 
>   // original struct
>   typedef struct rte_mcslock {
>     struct rte_mcslock *next;
>     ...
>   };
> 
>   it simply doesn't work / won't compile (at least with clang) which is
>   what drove me to use the less-often used syntax.
> 
>   typedef struct rte_mcslock {
>     _Atomic struct rte_mcslock *next;
>     ...
>   };
> 
>   In file included from ../app/test/test_mcslock.c:19:
>   ..\lib\eal\include\rte_mcslock.h:36:2: error: _Atomic cannot be
> applied
>   to incomplete type 'struct rte_mcslock'
> 	  _Atomic struct rte_mcslock *next;
> 	  ^
>   ..\lib\eal\include\rte_mcslock.h:35:16: note: definition of 'struct
>   rte_mcslock' is not complete until the closing '}'
>   typedef struct rte_mcslock {
> 		 ^
>   1 error generated.
> 
> so i ended up choosing to use a single syntax by convention consistently
> rather than using one for the exceptional case and one everywhere else.
> 
> i think (based on our other thread of discussion) i would recommend we
> use adopt and require the use of the _Atomic(T) macro to disambiguate it
> also has the advantage of not being churned later when we can do c++23.
> 
>   // using macro
>   typedef struct rte_mcslock {
>     _Atomic(struct rte_mcslock *) next;

This makes it an atomic pointer. Your example above tried making the struct rts_mcslock atomic. Probably what you wanted was:
  typedef struct rte_mcslock {
    struct rte_mcslock * _Atomic next;
    ...
  };

Like "const", the convention should be putting it before any type, but after the "*" for pointers.

I suppose clang doesn't accept applying _Atomic to incomplete types, regardless where you put it... I.e. this should also fail, I guess:
  typedef struct rte_mcslock {
    struct rte_mcslock _Atomic * next;
    ...
  };

>     ...
>   };
> 
> this is much easier at a glance to know when the specified type is the T
> or the T * similarly in parameter lists it becomes more clear too.
> 
> e.g.
> void foo(int *v)
> 
> that it is either void foo(_Atomic(int) *v) or void foo(_Atomic(int *)
> v) becomes
> much clearer without having to do mental gymnastics.

The same could be said about making "const" clearer:
void foo(const(int) * v) instead of void foo(const int * v), and
void foo(const(int *) v) instead of void foo(int * const v).

Luckily, we don't need toolchain specific handling of "const", so let's just leave that the way it is. :-)

> 
> so i propose we retain
> 
>   #define __rte_atomic _Atomic
> 
>   allow it to be used in contexts where we need a type-qualifier.
>   note:
>     most of the cases where _Atomic is used as a type-qualifier it
>     is a red flag that we are sensitive to an implementation detail
>     of the compiler. in time i hope most of these will go away as we
>     remove deprecated rte_atomic_xx apis.
> 
> but also introduce the following macro
> 
>   #define RTE_ATOMIC(type) _Atomic(type)
>   require it be used in the contexts that we are using it as a type-
> specifier.
> 
> if folks agree with this please reply back positively and i'll update
> the series. feel free to propose alternate names or whatever, but sooner
> than later so i don't have to churn things too much :)

+1 to Tyler's updated proposal, with macro names as suggested.

If anyone disagrees, please speak up soon!

If in doubt, please read https://en.cppreference.com/w/c/language/atomic carefully. It says:
(1) _Atomic(type-name) (since C11): Use as a type specifier; this designates a new atomic type.
(2) _Atomic type-name (since C11): Use as a type qualifier; this designates the atomic version of type-name. In this role, it may be mixed with const, volatile, and restrict, although unlike other qualifiers, the atomic version of type-name may have a different size, alignment, and object representation.

NB: I hadn't noticed this before, otherwise I had probably suggested using _Atomic(T) earlier on. We learn something new every day. :-)

> 
> thanks!

Sorry about the late response, Tyler. Other work prevented me from setting aside coherent time to review your updated proposal.

> 
> >
> > The DPDK coding style guidelines doesn't mention where to place
> "const", but looking at the code, it seems to use "const unsigned int"
> and "const char *".
> 
> we probably should document it as a convention and most likely we should
> adopt what is already in use more commonly.

+1, but not as part of this series. :-)
  
Tyler Retzlaff Aug. 16, 2023, 8:32 p.m. UTC | #4
On Wed, Aug 16, 2023 at 10:13:22PM +0200, Morten Brørup wrote:
> > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > Sent: Monday, 14 August 2023 19.47
> > 
> > On Mon, Aug 14, 2023 at 10:00:49AM +0200, Morten Brørup wrote:
> > > > From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> > > > Sent: Friday, 11 August 2023 19.32
> > > >
> > > > Adapt the EAL public headers to use rte optional atomics API instead
> > of
> > > > directly using and exposing toolchain specific atomic builtin
> > intrinsics.
> > > >
> > > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > > ---
> > >
> > > [...]
> > >
> > 
> > will fix the comments identified.
> > 
> > >
> > > [...]
> > >
> > > > --- a/lib/eal/include/generic/rte_spinlock.h
> > > > +++ b/lib/eal/include/generic/rte_spinlock.h
> > > > @@ -29,7 +29,7 @@
> > > >   * The rte_spinlock_t type.
> > > >   */
> > > >  typedef struct __rte_lockable {
> > > > -	volatile int locked; /**< lock status 0 = unlocked, 1 = locked */
> > > > +	volatile int __rte_atomic locked; /**< lock status 0 = unlocked, 1
> > =
> > > > locked */
> > >
> > > I think __rte_atomic should be before the type:
> > > 	volatile __rte_atomic int locked; /**< lock status [...]
> > > Alternatively (just mentioning it, I know we don't use this form):
> > > 	volatile __rte_atomic(int) locked; /**< lock status [...]
> > >
> > > Thinking of where you would put "const" might help.
> 
> Regarding "const", I use the mental trick of reading from right-to-left when pointers are involved, e.g.:
> 
> const int * * const x;
> ----5---- 4 3 --2-- 1

yes, i'm very familiar with where it can appear in the syntax and
applied. but it's always good to have someone summarize it like this for
the discussion.

> 
> x(1) is a const(2) pointer(3) to a pointer(4) to a const int(5).
> 
> And yes, treating "const int" as one word is cheating... formally it should be "int" "const", i.e. the reverse order; but that is not the convention, so I have learned to accept it.

it more often is the convention in c++, but i agree in c conventionally
people put the const first.

> 
> > >
> > > Maybe your order is also correct, so it is a matter of preference.
> > 
> > so for me what you suggest is the canonical convention for c and i did
> > initially try to make the change with this convention but ran into
> > trouble when using the keyword in a context used as a type specifier
> > and the type was incomplete.
> > 
> > the rte_mcslock is a good example for illustration.
> > 
> >   // original struct
> >   typedef struct rte_mcslock {
> >     struct rte_mcslock *next;
> >     ...
> >   };
> > 
> >   it simply doesn't work / won't compile (at least with clang) which is
> >   what drove me to use the less-often used syntax.
> > 
> >   typedef struct rte_mcslock {
> >     _Atomic struct rte_mcslock *next;
> >     ...
> >   };
> > 
> >   In file included from ../app/test/test_mcslock.c:19:
> >   ..\lib\eal\include\rte_mcslock.h:36:2: error: _Atomic cannot be
> > applied
> >   to incomplete type 'struct rte_mcslock'
> > 	  _Atomic struct rte_mcslock *next;
> > 	  ^
> >   ..\lib\eal\include\rte_mcslock.h:35:16: note: definition of 'struct
> >   rte_mcslock' is not complete until the closing '}'
> >   typedef struct rte_mcslock {
> > 		 ^
> >   1 error generated.
> > 
> > so i ended up choosing to use a single syntax by convention consistently
> > rather than using one for the exceptional case and one everywhere else.
> > 
> > i think (based on our other thread of discussion) i would recommend we
> > use adopt and require the use of the _Atomic(T) macro to disambiguate it
> > also has the advantage of not being churned later when we can do c++23.
> > 
> >   // using macro
> >   typedef struct rte_mcslock {
> >     _Atomic(struct rte_mcslock *) next;
> 
> This makes it an atomic pointer. Your example above tried making the struct rts_mcslock atomic. Probably what you wanted was:
>   typedef struct rte_mcslock {
>     struct rte_mcslock * _Atomic next;
>     ...
>   };

this is what my v2 in the patch had. but following your const example
you indicated you preferred the equivalent of `const T' over `T const` i
was trying to illustrate that if you replace T = struct foo * the
compiler can't disambiguate between type and pointer to type and
produces an error.

> 
> Like "const", the convention should be putting it before any type, but after the "*" for pointers.

i see, thank you for this clarification.  I had not understood that you
were suggesting that for pointer types specifically i should use one
placement and for non-pointer types i should use another.

> 
> I suppose clang doesn't accept applying _Atomic to incomplete types, regardless where you put it... I.e. this should also fail, I guess:
>   typedef struct rte_mcslock {
>     struct rte_mcslock _Atomic * next;
>     ...
>   };

actually I think for C11 atomics i think you can actually do this
because you can declare an entire struct object to be atomic.  However,
since we need to intersect with what non-C11 gcc builtin atomics do we
would not be able to make struct objects atomic as gcc only let's you do
atomic things with integer and pointer types.

> 
> >     ...
> >   };
> > 
> > this is much easier at a glance to know when the specified type is the T
> > or the T * similarly in parameter lists it becomes more clear too.
> > 
> > e.g.
> > void foo(int *v)
> > 
> > that it is either void foo(_Atomic(int) *v) or void foo(_Atomic(int *)
> > v) becomes
> > much clearer without having to do mental gymnastics.
> 
> The same could be said about making "const" clearer:
> void foo(const(int) * v) instead of void foo(const int * v), and
> void foo(const(int *) v) instead of void foo(int * const v).
> 
> Luckily, we don't need toolchain specific handling of "const", so let's just leave that the way it is. :-)
> 
> > 
> > so i propose we retain
> > 
> >   #define __rte_atomic _Atomic
> > 
> >   allow it to be used in contexts where we need a type-qualifier.
> >   note:
> >     most of the cases where _Atomic is used as a type-qualifier it
> >     is a red flag that we are sensitive to an implementation detail
> >     of the compiler. in time i hope most of these will go away as we
> >     remove deprecated rte_atomic_xx apis.
> > 
> > but also introduce the following macro
> > 
> >   #define RTE_ATOMIC(type) _Atomic(type)
> >   require it be used in the contexts that we are using it as a type-
> > specifier.
> > 
> > if folks agree with this please reply back positively and i'll update
> > the series. feel free to propose alternate names or whatever, but sooner
> > than later so i don't have to churn things too much :)
> 
> +1 to Tyler's updated proposal, with macro names as suggested.

yeah, I think it really helps clarify the pointer vs regular type
specification by whacking the ( ) around what we are talking about
instead of using positioning of _Atomic in two different places.

> 
> If anyone disagrees, please speak up soon!
> 
> If in doubt, please read https://en.cppreference.com/w/c/language/atomic carefully. It says:
> (1) _Atomic(type-name) (since C11): Use as a type specifier; this designates a new atomic type.
> (2) _Atomic type-name (since C11): Use as a type qualifier; this designates the atomic version of type-name. In this role, it may be mixed with const, volatile, and restrict, although unlike other qualifiers, the atomic version of type-name may have a different size, alignment, and object representation.
> 
> NB: I hadn't noticed this before, otherwise I had probably suggested using _Atomic(T) earlier on. We learn something new every day. :-)

yeah, i knew about this which is why i was being really careful about
'qualification' vs 'specification' in my mails.

> 
> > 
> > thanks!
> 
> Sorry about the late response, Tyler. Other work prevented me from setting aside coherent time to review your updated proposal.

meh it's okay, based on the other thread i kind of guessed you might
agree with using _Atomic(T) so i just submitted a new version an hour
ago with the changes. i hope it meets your approval, one thing i'm kind
of edgy about is the actual macro name itself RTE_ATOMIC(type) it seems
kinda ugly, so if someone has an opinion there i'm open to it.

> 
> > 
> > >
> > > The DPDK coding style guidelines doesn't mention where to place
> > "const", but looking at the code, it seems to use "const unsigned int"
> > and "const char *".
> > 
> > we probably should document it as a convention and most likely we should
> > adopt what is already in use more commonly.
> 
> +1, but not as part of this series. :-)

i'll look into doing it once we get this series merged.

thanks!
  

Patch

diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
index 52e45e7..cc25970 100644
--- a/app/test/test_mcslock.c
+++ b/app/test/test_mcslock.c
@@ -36,9 +36,9 @@ 
  *   lock multiple times.
  */
 
-rte_mcslock_t *p_ml;
-rte_mcslock_t *p_ml_try;
-rte_mcslock_t *p_ml_perf;
+rte_mcslock_t * __rte_atomic p_ml;
+rte_mcslock_t * __rte_atomic p_ml_try;
+rte_mcslock_t * __rte_atomic p_ml_perf;
 
 static unsigned int count;
 
diff --git a/lib/eal/arm/include/rte_atomic_32.h b/lib/eal/arm/include/rte_atomic_32.h
index c00ab78..62fc337 100644
--- a/lib/eal/arm/include/rte_atomic_32.h
+++ b/lib/eal/arm/include/rte_atomic_32.h
@@ -34,9 +34,9 @@ 
 #define rte_io_rmb() rte_rmb()
 
 static __rte_always_inline void
-rte_atomic_thread_fence(int memorder)
+rte_atomic_thread_fence(rte_memory_order memorder)
 {
-	__atomic_thread_fence(memorder);
+	__rte_atomic_thread_fence(memorder);
 }
 
 #ifdef __cplusplus
diff --git a/lib/eal/arm/include/rte_atomic_64.h b/lib/eal/arm/include/rte_atomic_64.h
index 6047911..75d8ba6 100644
--- a/lib/eal/arm/include/rte_atomic_64.h
+++ b/lib/eal/arm/include/rte_atomic_64.h
@@ -38,9 +38,9 @@ 
 #define rte_io_rmb() rte_rmb()
 
 static __rte_always_inline void
-rte_atomic_thread_fence(int memorder)
+rte_atomic_thread_fence(rte_memory_order memorder)
 {
-	__atomic_thread_fence(memorder);
+	__rte_atomic_thread_fence(memorder);
 }
 
 /*------------------------ 128 bit atomic operations -------------------------*/
@@ -107,33 +107,33 @@ 
 	 */
 	RTE_SET_USED(failure);
 	/* Find invalid memory order */
-	RTE_ASSERT(success == __ATOMIC_RELAXED ||
-		success == __ATOMIC_ACQUIRE ||
-		success == __ATOMIC_RELEASE ||
-		success == __ATOMIC_ACQ_REL ||
-		success == __ATOMIC_SEQ_CST);
+	RTE_ASSERT(success == rte_memory_order_relaxed ||
+		success == rte_memory_order_acquire ||
+		success == rte_memory_order_release ||
+		success == rte_memory_order_acq_rel ||
+		success == rte_memory_order_seq_cst);
 
 	rte_int128_t expected = *exp;
 	rte_int128_t desired = *src;
 	rte_int128_t old;
 
 #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
-	if (success == __ATOMIC_RELAXED)
+	if (success == rte_memory_order_relaxed)
 		__cas_128_relaxed(dst, exp, desired);
-	else if (success == __ATOMIC_ACQUIRE)
+	else if (success == rte_memory_order_acquire)
 		__cas_128_acquire(dst, exp, desired);
-	else if (success == __ATOMIC_RELEASE)
+	else if (success == rte_memory_order_release)
 		__cas_128_release(dst, exp, desired);
 	else
 		__cas_128_acq_rel(dst, exp, desired);
 	old = *exp;
 #else
-#define __HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE)
-#define __HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || (mo) == __ATOMIC_ACQ_REL || \
-		(mo) == __ATOMIC_SEQ_CST)
+#define __HAS_ACQ(mo) ((mo) != rte_memory_order_relaxed && (mo) != rte_memory_order_release)
+#define __HAS_RLS(mo) ((mo) == rte_memory_order_release || (mo) == rte_memory_order_acq_rel || \
+		(mo) == rte_memory_order_seq_cst)
 
-	int ldx_mo = __HAS_ACQ(success) ? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED;
-	int stx_mo = __HAS_RLS(success) ? __ATOMIC_RELEASE : __ATOMIC_RELAXED;
+	int ldx_mo = __HAS_ACQ(success) ? rte_memory_order_acquire : rte_memory_order_relaxed;
+	int stx_mo = __HAS_RLS(success) ? rte_memory_order_release : rte_memory_order_relaxed;
 
 #undef __HAS_ACQ
 #undef __HAS_RLS
@@ -153,7 +153,7 @@ 
 		: "Q" (src->val[0])       \
 		: "memory"); }
 
-		if (ldx_mo == __ATOMIC_RELAXED)
+		if (ldx_mo == rte_memory_order_relaxed)
 			__LOAD_128("ldxp", dst, old)
 		else
 			__LOAD_128("ldaxp", dst, old)
@@ -170,7 +170,7 @@ 
 		: "memory"); }
 
 		if (likely(old.int128 == expected.int128)) {
-			if (stx_mo == __ATOMIC_RELAXED)
+			if (stx_mo == rte_memory_order_relaxed)
 				__STORE_128("stxp", dst, desired, ret)
 			else
 				__STORE_128("stlxp", dst, desired, ret)
@@ -181,7 +181,7 @@ 
 			 * needs to be stored back to ensure it was read
 			 * atomically.
 			 */
-			if (stx_mo == __ATOMIC_RELAXED)
+			if (stx_mo == rte_memory_order_relaxed)
 				__STORE_128("stxp", dst, old, ret)
 			else
 				__STORE_128("stlxp", dst, old, ret)
diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index 5f70e97..d4daafc 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -41,7 +41,7 @@  static inline void rte_pause(void)
  * implicitly to exit WFE.
  */
 #define __RTE_ARM_LOAD_EXC_8(src, dst, memorder) {       \
-	if (memorder == __ATOMIC_RELAXED) {               \
+	if (memorder == rte_memory_order_relaxed) {               \
 		asm volatile("ldxrb %w[tmp], [%x[addr]]"  \
 			: [tmp] "=&r" (dst)               \
 			: [addr] "r" (src)                \
@@ -60,7 +60,7 @@  static inline void rte_pause(void)
  * implicitly to exit WFE.
  */
 #define __RTE_ARM_LOAD_EXC_16(src, dst, memorder) {       \
-	if (memorder == __ATOMIC_RELAXED) {               \
+	if (memorder == rte_memory_order_relaxed) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
 			: [tmp] "=&r" (dst)               \
 			: [addr] "r" (src)                \
@@ -79,7 +79,7 @@  static inline void rte_pause(void)
  * implicitly to exit WFE.
  */
 #define __RTE_ARM_LOAD_EXC_32(src, dst, memorder) {      \
-	if (memorder == __ATOMIC_RELAXED) {              \
+	if (memorder == rte_memory_order_relaxed) {              \
 		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
 			: [tmp] "=&r" (dst)              \
 			: [addr] "r" (src)               \
@@ -98,7 +98,7 @@  static inline void rte_pause(void)
  * implicitly to exit WFE.
  */
 #define __RTE_ARM_LOAD_EXC_64(src, dst, memorder) {      \
-	if (memorder == __ATOMIC_RELAXED) {              \
+	if (memorder == rte_memory_order_relaxed) {              \
 		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
 			: [tmp] "=&r" (dst)              \
 			: [addr] "r" (src)               \
@@ -118,7 +118,7 @@  static inline void rte_pause(void)
  */
 #define __RTE_ARM_LOAD_EXC_128(src, dst, memorder) {                    \
 	volatile rte_int128_t *dst_128 = (volatile rte_int128_t *)&dst; \
-	if (memorder == __ATOMIC_RELAXED) {                             \
+	if (memorder == rte_memory_order_relaxed) {                             \
 		asm volatile("ldxp %x[tmp0], %x[tmp1], [%x[addr]]"      \
 			: [tmp0] "=&r" (dst_128->val[0]),               \
 			  [tmp1] "=&r" (dst_128->val[1])                \
@@ -153,8 +153,8 @@  static inline void rte_pause(void)
 {
 	uint16_t value;
 
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
-		memorder != __ATOMIC_RELAXED);
+	RTE_BUILD_BUG_ON(memorder != rte_memory_order_acquire &&
+		memorder != rte_memory_order_relaxed);
 
 	__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
@@ -172,8 +172,8 @@  static inline void rte_pause(void)
 {
 	uint32_t value;
 
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
-		memorder != __ATOMIC_RELAXED);
+	RTE_BUILD_BUG_ON(memorder != rte_memory_order_acquire &&
+		memorder != rte_memory_order_relaxed);
 
 	__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
@@ -191,8 +191,8 @@  static inline void rte_pause(void)
 {
 	uint64_t value;
 
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
-		memorder != __ATOMIC_RELAXED);
+	RTE_BUILD_BUG_ON(memorder != rte_memory_order_acquire &&
+		memorder != rte_memory_order_relaxed);
 
 	__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
@@ -206,8 +206,8 @@  static inline void rte_pause(void)
 
 #define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder) do {  \
 	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
-		memorder != __ATOMIC_RELAXED);                            \
+	RTE_BUILD_BUG_ON(memorder != rte_memory_order_acquire &&                  \
+		memorder != rte_memory_order_relaxed);                            \
 	const uint32_t size = sizeof(*(addr)) << 3;                       \
 	typeof(*(addr)) expected_value = (expected);                      \
 	typeof(*(addr)) value;                                            \
diff --git a/lib/eal/arm/rte_power_intrinsics.c b/lib/eal/arm/rte_power_intrinsics.c
index 77b96e4..f54cf59 100644
--- a/lib/eal/arm/rte_power_intrinsics.c
+++ b/lib/eal/arm/rte_power_intrinsics.c
@@ -33,19 +33,19 @@ 
 
 	switch (pmc->size) {
 	case sizeof(uint8_t):
-		__RTE_ARM_LOAD_EXC_8(pmc->addr, cur_value, __ATOMIC_RELAXED)
+		__RTE_ARM_LOAD_EXC_8(pmc->addr, cur_value, rte_memory_order_relaxed)
 		__RTE_ARM_WFE()
 		break;
 	case sizeof(uint16_t):
-		__RTE_ARM_LOAD_EXC_16(pmc->addr, cur_value, __ATOMIC_RELAXED)
+		__RTE_ARM_LOAD_EXC_16(pmc->addr, cur_value, rte_memory_order_relaxed)
 		__RTE_ARM_WFE()
 		break;
 	case sizeof(uint32_t):
-		__RTE_ARM_LOAD_EXC_32(pmc->addr, cur_value, __ATOMIC_RELAXED)
+		__RTE_ARM_LOAD_EXC_32(pmc->addr, cur_value, rte_memory_order_relaxed)
 		__RTE_ARM_WFE()
 		break;
 	case sizeof(uint64_t):
-		__RTE_ARM_LOAD_EXC_64(pmc->addr, cur_value, __ATOMIC_RELAXED)
+		__RTE_ARM_LOAD_EXC_64(pmc->addr, cur_value, rte_memory_order_relaxed)
 		__RTE_ARM_WFE()
 		break;
 	default:
diff --git a/lib/eal/common/eal_common_trace.c b/lib/eal/common/eal_common_trace.c
index cb980af..c6628dd 100644
--- a/lib/eal/common/eal_common_trace.c
+++ b/lib/eal/common/eal_common_trace.c
@@ -103,11 +103,11 @@  struct trace_point_head *
 trace_mode_set(rte_trace_point_t *t, enum rte_trace_mode mode)
 {
 	if (mode == RTE_TRACE_MODE_OVERWRITE)
-		__atomic_fetch_and(t, ~__RTE_TRACE_FIELD_ENABLE_DISCARD,
-			__ATOMIC_RELEASE);
+		rte_atomic_fetch_and_explicit(t, ~__RTE_TRACE_FIELD_ENABLE_DISCARD,
+			rte_memory_order_release);
 	else
-		__atomic_fetch_or(t, __RTE_TRACE_FIELD_ENABLE_DISCARD,
-			__ATOMIC_RELEASE);
+		rte_atomic_fetch_or_explicit(t, __RTE_TRACE_FIELD_ENABLE_DISCARD,
+			rte_memory_order_release);
 }
 
 void
@@ -141,7 +141,7 @@  rte_trace_mode rte_trace_mode_get(void)
 	if (trace_point_is_invalid(t))
 		return false;
 
-	val = __atomic_load_n(t, __ATOMIC_ACQUIRE);
+	val = rte_atomic_load_explicit(t, rte_memory_order_acquire);
 	return (val & __RTE_TRACE_FIELD_ENABLE_MASK) != 0;
 }
 
@@ -153,7 +153,8 @@  rte_trace_mode rte_trace_mode_get(void)
 	if (trace_point_is_invalid(t))
 		return -ERANGE;
 
-	prev = __atomic_fetch_or(t, __RTE_TRACE_FIELD_ENABLE_MASK, __ATOMIC_RELEASE);
+	prev = rte_atomic_fetch_or_explicit(t, __RTE_TRACE_FIELD_ENABLE_MASK,
+	    rte_memory_order_release);
 	if ((prev & __RTE_TRACE_FIELD_ENABLE_MASK) == 0)
 		__atomic_fetch_add(&trace.status, 1, __ATOMIC_RELEASE);
 	return 0;
@@ -167,7 +168,8 @@  rte_trace_mode rte_trace_mode_get(void)
 	if (trace_point_is_invalid(t))
 		return -ERANGE;
 
-	prev = __atomic_fetch_and(t, ~__RTE_TRACE_FIELD_ENABLE_MASK, __ATOMIC_RELEASE);
+	prev = rte_atomic_fetch_and_explicit(t, ~__RTE_TRACE_FIELD_ENABLE_MASK,
+	    rte_memory_order_release);
 	if ((prev & __RTE_TRACE_FIELD_ENABLE_MASK) != 0)
 		__atomic_fetch_sub(&trace.status, 1, __ATOMIC_RELEASE);
 	return 0;
diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
index efd29eb..f6c4b3e 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -63,7 +63,7 @@ 
  *  but has different syntax and memory ordering semantic. Hence
  *  deprecated for the simplicity of memory ordering semantics in use.
  *
- *  rte_atomic_thread_fence(__ATOMIC_ACQ_REL) should be used instead.
+ *  rte_atomic_thread_fence(rte_memory_order_acq_rel) should be used instead.
  */
 static inline void rte_smp_mb(void);
 
@@ -80,7 +80,7 @@ 
  *  but has different syntax and memory ordering semantic. Hence
  *  deprecated for the simplicity of memory ordering semantics in use.
  *
- *  rte_atomic_thread_fence(__ATOMIC_RELEASE) should be used instead.
+ *  rte_atomic_thread_fence(rte_memory_order_release) should be used instead.
  *  The fence also guarantees LOAD operations that precede the call
  *  are globally visible across the lcores before the STORE operations
  *  that follows it.
@@ -100,7 +100,7 @@ 
  *  but has different syntax and memory ordering semantic. Hence
  *  deprecated for the simplicity of memory ordering semantics in use.
  *
- *  rte_atomic_thread_fence(__ATOMIC_ACQUIRE) should be used instead.
+ *  rte_atomic_thread_fence(rte_memory_order_acquire) should be used instead.
  *  The fence also guarantees LOAD operations that precede the call
  *  are globally visible across the lcores before the STORE operations
  *  that follows it.
@@ -154,7 +154,7 @@ 
 /**
  * Synchronization fence between threads based on the specified memory order.
  */
-static inline void rte_atomic_thread_fence(int memorder);
+static inline void rte_atomic_thread_fence(rte_memory_order memorder);
 
 /*------------------------- 16 bit atomic operations -------------------------*/
 
@@ -207,7 +207,7 @@ 
 static inline uint16_t
 rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
 {
-	return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+	return rte_atomic_exchange_explicit(dst, val, rte_memory_order_seq_cst);
 }
 #endif
 
@@ -274,7 +274,7 @@ 
 static inline void
 rte_atomic16_add(rte_atomic16_t *v, int16_t inc)
 {
-	__atomic_fetch_add(&v->cnt, inc, __ATOMIC_SEQ_CST);
+	rte_atomic_fetch_add_explicit(&v->cnt, inc, rte_memory_order_seq_cst);
 }
 
 /**
@@ -288,7 +288,7 @@ 
 static inline void
 rte_atomic16_sub(rte_atomic16_t *v, int16_t dec)
 {
-	__atomic_fetch_sub(&v->cnt, dec, __ATOMIC_SEQ_CST);
+	rte_atomic_fetch_sub_explicit(&v->cnt, dec, rte_memory_order_seq_cst);
 }
 
 /**
@@ -341,7 +341,7 @@ 
 static inline int16_t
 rte_atomic16_add_return(rte_atomic16_t *v, int16_t inc)
 {
-	return __atomic_fetch_add(&v->cnt, inc, __ATOMIC_SEQ_CST) + inc;
+	return rte_atomic_fetch_add_explicit(&v->cnt, inc, rte_memory_order_seq_cst) + inc;
 }
 
 /**
@@ -361,7 +361,7 @@ 
 static inline int16_t
 rte_atomic16_sub_return(rte_atomic16_t *v, int16_t dec)
 {
-	return __atomic_fetch_sub(&v->cnt, dec, __ATOMIC_SEQ_CST) - dec;
+	return rte_atomic_fetch_sub_explicit(&v->cnt, dec, rte_memory_order_seq_cst) - dec;
 }
 
 /**
@@ -380,7 +380,7 @@ 
 #ifdef RTE_FORCE_INTRINSICS
 static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
 {
-	return __atomic_fetch_add(&v->cnt, 1, __ATOMIC_SEQ_CST) + 1 == 0;
+	return rte_atomic_fetch_add_explicit(&v->cnt, 1, rte_memory_order_seq_cst) + 1 == 0;
 }
 #endif
 
@@ -400,7 +400,7 @@  static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
 #ifdef RTE_FORCE_INTRINSICS
 static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
 {
-	return __atomic_fetch_sub(&v->cnt, 1, __ATOMIC_SEQ_CST) - 1 == 0;
+	return rte_atomic_fetch_sub_explicit(&v->cnt, 1, rte_memory_order_seq_cst) - 1 == 0;
 }
 #endif
 
@@ -486,7 +486,7 @@  static inline void rte_atomic16_clear(rte_atomic16_t *v)
 static inline uint32_t
 rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
 {
-	return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+	return rte_atomic_exchange_explicit(dst, val, rte_memory_order_seq_cst);
 }
 #endif
 
@@ -553,7 +553,7 @@  static inline void rte_atomic16_clear(rte_atomic16_t *v)
 static inline void
 rte_atomic32_add(rte_atomic32_t *v, int32_t inc)
 {
-	__atomic_fetch_add(&v->cnt, inc, __ATOMIC_SEQ_CST);
+	rte_atomic_fetch_add_explicit(&v->cnt, inc, rte_memory_order_seq_cst);
 }
 
 /**
@@ -567,7 +567,7 @@  static inline void rte_atomic16_clear(rte_atomic16_t *v)
 static inline void
 rte_atomic32_sub(rte_atomic32_t *v, int32_t dec)
 {
-	__atomic_fetch_sub(&v->cnt, dec, __ATOMIC_SEQ_CST);
+	rte_atomic_fetch_sub_explicit(&v->cnt, dec, rte_memory_order_seq_cst);
 }
 
 /**
@@ -620,7 +620,7 @@  static inline void rte_atomic16_clear(rte_atomic16_t *v)
 static inline int32_t
 rte_atomic32_add_return(rte_atomic32_t *v, int32_t inc)
 {
-	return __atomic_fetch_add(&v->cnt, inc, __ATOMIC_SEQ_CST) + inc;
+	return rte_atomic_fetch_add_explicit(&v->cnt, inc, rte_memory_order_seq_cst) + inc;
 }
 
 /**
@@ -640,7 +640,7 @@  static inline void rte_atomic16_clear(rte_atomic16_t *v)
 static inline int32_t
 rte_atomic32_sub_return(rte_atomic32_t *v, int32_t dec)
 {
-	return __atomic_fetch_sub(&v->cnt, dec, __ATOMIC_SEQ_CST) - dec;
+	return rte_atomic_fetch_sub_explicit(&v->cnt, dec, rte_memory_order_seq_cst) - dec;
 }
 
 /**
@@ -659,7 +659,7 @@  static inline void rte_atomic16_clear(rte_atomic16_t *v)
 #ifdef RTE_FORCE_INTRINSICS
 static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
 {
-	return __atomic_fetch_add(&v->cnt, 1, __ATOMIC_SEQ_CST) + 1 == 0;
+	return rte_atomic_fetch_add_explicit(&v->cnt, 1, rte_memory_order_seq_cst) + 1 == 0;
 }
 #endif
 
@@ -679,7 +679,7 @@  static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
 #ifdef RTE_FORCE_INTRINSICS
 static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 {
-	return __atomic_fetch_sub(&v->cnt, 1, __ATOMIC_SEQ_CST) - 1 == 0;
+	return rte_atomic_fetch_sub_explicit(&v->cnt, 1, rte_memory_order_seq_cst) - 1 == 0;
 }
 #endif
 
@@ -764,7 +764,7 @@  static inline void rte_atomic32_clear(rte_atomic32_t *v)
 static inline uint64_t
 rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
 {
-	return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+	return rte_atomic_exchange_explicit(dst, val, rte_memory_order_seq_cst);
 }
 #endif
 
@@ -885,7 +885,7 @@  static inline void rte_atomic32_clear(rte_atomic32_t *v)
 static inline void
 rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
 {
-	__atomic_fetch_add(&v->cnt, inc, __ATOMIC_SEQ_CST);
+	rte_atomic_fetch_add_explicit(&v->cnt, inc, rte_memory_order_seq_cst);
 }
 #endif
 
@@ -904,7 +904,7 @@  static inline void rte_atomic32_clear(rte_atomic32_t *v)
 static inline void
 rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
 {
-	__atomic_fetch_sub(&v->cnt, dec, __ATOMIC_SEQ_CST);
+	rte_atomic_fetch_sub_explicit(&v->cnt, dec, rte_memory_order_seq_cst);
 }
 #endif
 
@@ -962,7 +962,7 @@  static inline void rte_atomic32_clear(rte_atomic32_t *v)
 static inline int64_t
 rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
 {
-	return __atomic_fetch_add(&v->cnt, inc, __ATOMIC_SEQ_CST) + inc;
+	return rte_atomic_fetch_add_explicit(&v->cnt, inc, rte_memory_order_seq_cst) + inc;
 }
 #endif
 
@@ -986,7 +986,7 @@  static inline void rte_atomic32_clear(rte_atomic32_t *v)
 static inline int64_t
 rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
 {
-	return __atomic_fetch_sub(&v->cnt, dec, __ATOMIC_SEQ_CST) - dec;
+	return rte_atomic_fetch_sub_explicit(&v->cnt, dec, rte_memory_order_seq_cst) - dec;
 }
 #endif
 
@@ -1117,8 +1117,8 @@  static inline void rte_atomic64_clear(rte_atomic64_t *v)
  *   stronger) model.
  * @param failure
  *   If unsuccessful, the operation's memory behavior conforms to this (or a
- *   stronger) model. This argument cannot be __ATOMIC_RELEASE,
- *   __ATOMIC_ACQ_REL, or a stronger model than success.
+ *   stronger) model. This argument cannot be rte_memory_order_release,
+ *   rte_memory_order_acq_rel, or a stronger model than success.
  * @return
  *   Non-zero on success; 0 on failure.
  */
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index bebfa95..c816e7d 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -36,13 +36,13 @@ 
  *  A 16-bit expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  rte_memory_order_acquire and rte_memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 static __rte_always_inline void
 rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder);
+		rte_memory_order memorder);
 
 /**
  * Wait for *addr to be updated with a 32-bit expected value, with a relaxed
@@ -54,13 +54,13 @@ 
  *  A 32-bit expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  rte_memory_order_acquire and rte_memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 static __rte_always_inline void
 rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
-		int memorder);
+		rte_memory_order memorder);
 
 /**
  * Wait for *addr to be updated with a 64-bit expected value, with a relaxed
@@ -72,42 +72,42 @@ 
  *  A 64-bit expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  rte_memory_order_acquire and rte_memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 static __rte_always_inline void
 rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
-		int memorder);
+		rte_memory_order memorder);
 
 #ifndef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
 static __rte_always_inline void
 rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
+		rte_memory_order memorder)
 {
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+	assert(memorder == rte_memory_order_acquire || memorder == rte_memory_order_relaxed);
 
-	while (__atomic_load_n(addr, memorder) != expected)
+	while (rte_atomic_load_explicit(addr, memorder) != expected)
 		rte_pause();
 }
 
 static __rte_always_inline void
 rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
-		int memorder)
+		rte_memory_order memorder)
 {
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+	assert(memorder == rte_memory_order_acquire || memorder == rte_memory_order_relaxed);
 
-	while (__atomic_load_n(addr, memorder) != expected)
+	while (rte_atomic_load_explicit(addr, memorder) != expected)
 		rte_pause();
 }
 
 static __rte_always_inline void
 rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
-		int memorder)
+		rte_memory_order memorder)
 {
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+	assert(memorder == rte_memory_order_acquire || memorder == rte_memory_order_relaxed);
 
-	while (__atomic_load_n(addr, memorder) != expected)
+	while (rte_atomic_load_explicit(addr, memorder) != expected)
 		rte_pause();
 }
 
@@ -125,16 +125,16 @@ 
  *  An expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  rte_memory_order_acquire and rte_memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 #define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder) do { \
 	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));               \
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                 \
-		memorder != __ATOMIC_RELAXED);                           \
+	RTE_BUILD_BUG_ON((memorder) != rte_memory_order_acquire &&       \
+		(memorder) != rte_memory_order_relaxed);                 \
 	typeof(*(addr)) expected_value = (expected);                     \
-	while (!((__atomic_load_n((addr), (memorder)) & (mask)) cond     \
+	while (!((rte_atomic_load_explicit((addr), (memorder)) & (mask)) cond     \
 			expected_value))                                 \
 		rte_pause();                                             \
 } while (0)
diff --git a/lib/eal/include/generic/rte_rwlock.h b/lib/eal/include/generic/rte_rwlock.h
index 24ebec6..176775f 100644
--- a/lib/eal/include/generic/rte_rwlock.h
+++ b/lib/eal/include/generic/rte_rwlock.h
@@ -58,7 +58,7 @@ 
 #define RTE_RWLOCK_READ	 0x4	/* Reader increment */
 
 typedef struct __rte_lockable {
-	int32_t cnt;
+	int32_t __rte_atomic cnt;
 } rte_rwlock_t;
 
 /**
@@ -93,21 +93,21 @@ 
 
 	while (1) {
 		/* Wait while writer is present or pending */
-		while (__atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED)
+		while (rte_atomic_load_explicit(&rwl->cnt, rte_memory_order_relaxed)
 		       & RTE_RWLOCK_MASK)
 			rte_pause();
 
 		/* Try to get read lock */
-		x = __atomic_fetch_add(&rwl->cnt, RTE_RWLOCK_READ,
-				       __ATOMIC_ACQUIRE) + RTE_RWLOCK_READ;
+		x = rte_atomic_fetch_add_explicit(&rwl->cnt, RTE_RWLOCK_READ,
+				       rte_memory_order_acquire) + RTE_RWLOCK_READ;
 
 		/* If no writer, then acquire was successful */
 		if (likely(!(x & RTE_RWLOCK_MASK)))
 			return;
 
 		/* Lost race with writer, backout the change. */
-		__atomic_fetch_sub(&rwl->cnt, RTE_RWLOCK_READ,
-				   __ATOMIC_RELAXED);
+		rte_atomic_fetch_sub_explicit(&rwl->cnt, RTE_RWLOCK_READ,
+				   rte_memory_order_relaxed);
 	}
 }
 
@@ -128,20 +128,20 @@ 
 {
 	int32_t x;
 
-	x = __atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED);
+	x = rte_atomic_load_explicit(&rwl->cnt, rte_memory_order_relaxed);
 
 	/* fail if write lock is held or writer is pending */
 	if (x & RTE_RWLOCK_MASK)
 		return -EBUSY;
 
 	/* Try to get read lock */
-	x = __atomic_fetch_add(&rwl->cnt, RTE_RWLOCK_READ,
-			       __ATOMIC_ACQUIRE) + RTE_RWLOCK_READ;
+	x = rte_atomic_fetch_add_explicit(&rwl->cnt, RTE_RWLOCK_READ,
+			       rte_memory_order_acquire) + RTE_RWLOCK_READ;
 
 	/* Back out if writer raced in */
 	if (unlikely(x & RTE_RWLOCK_MASK)) {
-		__atomic_fetch_sub(&rwl->cnt, RTE_RWLOCK_READ,
-				   __ATOMIC_RELEASE);
+		rte_atomic_fetch_sub_explicit(&rwl->cnt, RTE_RWLOCK_READ,
+				   rte_memory_order_release);
 
 		return -EBUSY;
 	}
@@ -159,7 +159,7 @@ 
 	__rte_unlock_function(rwl)
 	__rte_no_thread_safety_analysis
 {
-	__atomic_fetch_sub(&rwl->cnt, RTE_RWLOCK_READ, __ATOMIC_RELEASE);
+	rte_atomic_fetch_sub_explicit(&rwl->cnt, RTE_RWLOCK_READ, rte_memory_order_release);
 }
 
 /**
@@ -179,10 +179,10 @@ 
 {
 	int32_t x;
 
-	x = __atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED);
+	x = rte_atomic_load_explicit(&rwl->cnt, rte_memory_order_relaxed);
 	if (x < RTE_RWLOCK_WRITE &&
-	    __atomic_compare_exchange_n(&rwl->cnt, &x, x + RTE_RWLOCK_WRITE,
-					1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+	    rte_atomic_compare_exchange_weak_explicit(&rwl->cnt, &x, x + RTE_RWLOCK_WRITE,
+					rte_memory_order_acquire, rte_memory_order_relaxed))
 		return 0;
 	else
 		return -EBUSY;
@@ -202,22 +202,25 @@ 
 	int32_t x;
 
 	while (1) {
-		x = __atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED);
+		x = rte_atomic_load_explicit(&rwl->cnt, rte_memory_order_relaxed);
 
 		/* No readers or writers? */
 		if (likely(x < RTE_RWLOCK_WRITE)) {
 			/* Turn off RTE_RWLOCK_WAIT, turn on RTE_RWLOCK_WRITE */
-			if (__atomic_compare_exchange_n(&rwl->cnt, &x, RTE_RWLOCK_WRITE, 1,
-							__ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+			if (rte_atomic_compare_exchange_weak_explicit(
+				&rwl->cnt, &x, RTE_RWLOCK_WRITE,
+				rte_memory_order_acquire, rte_memory_order_relaxed))
 				return;
 		}
 
 		/* Turn on writer wait bit */
 		if (!(x & RTE_RWLOCK_WAIT))
-			__atomic_fetch_or(&rwl->cnt, RTE_RWLOCK_WAIT, __ATOMIC_RELAXED);
+			rte_atomic_fetch_or_explicit(&rwl->cnt, RTE_RWLOCK_WAIT,
+			    rte_memory_order_relaxed);
 
 		/* Wait until no readers before trying again */
-		while (__atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED) > RTE_RWLOCK_WAIT)
+		while (rte_atomic_load_explicit(&rwl->cnt,
+		    rte_memory_order_relaxed) > RTE_RWLOCK_WAIT)
 			rte_pause();
 
 	}
@@ -234,7 +237,7 @@ 
 	__rte_unlock_function(rwl)
 	__rte_no_thread_safety_analysis
 {
-	__atomic_fetch_sub(&rwl->cnt, RTE_RWLOCK_WRITE, __ATOMIC_RELEASE);
+	rte_atomic_fetch_sub_explicit(&rwl->cnt, RTE_RWLOCK_WRITE, rte_memory_order_release);
 }
 
 /**
@@ -248,7 +251,7 @@ 
 static inline int
 rte_rwlock_write_is_locked(rte_rwlock_t *rwl)
 {
-	if (__atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED) & RTE_RWLOCK_WRITE)
+	if (rte_atomic_load_explicit(&rwl->cnt, rte_memory_order_relaxed) & RTE_RWLOCK_WRITE)
 		return 1;
 
 	return 0;
diff --git a/lib/eal/include/generic/rte_spinlock.h b/lib/eal/include/generic/rte_spinlock.h
index e18f0cd..274616a 100644
--- a/lib/eal/include/generic/rte_spinlock.h
+++ b/lib/eal/include/generic/rte_spinlock.h
@@ -29,7 +29,7 @@ 
  * The rte_spinlock_t type.
  */
 typedef struct __rte_lockable {
-	volatile int locked; /**< lock status 0 = unlocked, 1 = locked */
+	volatile int __rte_atomic locked; /**< lock status 0 = unlocked, 1 = locked */
 } rte_spinlock_t;
 
 /**
@@ -66,10 +66,10 @@ 
 {
 	int exp = 0;
 
-	while (!__atomic_compare_exchange_n(&sl->locked, &exp, 1, 0,
-				__ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
-		rte_wait_until_equal_32((volatile uint32_t *)&sl->locked,
-			       0, __ATOMIC_RELAXED);
+	while (!rte_atomic_compare_exchange_strong_explicit(&sl->locked, &exp, 1,
+				rte_memory_order_acquire, rte_memory_order_relaxed)) {
+		rte_wait_until_equal_32((volatile uint32_t *)(uintptr_t)&sl->locked,
+			       0, rte_memory_order_relaxed);
 		exp = 0;
 	}
 }
@@ -90,7 +90,7 @@ 
 rte_spinlock_unlock(rte_spinlock_t *sl)
 	__rte_no_thread_safety_analysis
 {
-	__atomic_store_n(&sl->locked, 0, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit(&sl->locked, 0, rte_memory_order_release);
 }
 #endif
 
@@ -113,9 +113,8 @@ 
 	__rte_no_thread_safety_analysis
 {
 	int exp = 0;
-	return __atomic_compare_exchange_n(&sl->locked, &exp, 1,
-				0, /* disallow spurious failure */
-				__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
+	return rte_atomic_compare_exchange_strong_explicit(&sl->locked, &exp, 1,
+				rte_memory_order_acquire, rte_memory_order_relaxed);
 }
 #endif
 
@@ -129,7 +128,7 @@ 
  */
 static inline int rte_spinlock_is_locked (rte_spinlock_t *sl)
 {
-	return __atomic_load_n(&sl->locked, __ATOMIC_ACQUIRE);
+	return rte_atomic_load_explicit(&sl->locked, rte_memory_order_acquire);
 }
 
 /**
diff --git a/lib/eal/include/rte_mcslock.h b/lib/eal/include/rte_mcslock.h
index 18e63eb..229c8e2 100644
--- a/lib/eal/include/rte_mcslock.h
+++ b/lib/eal/include/rte_mcslock.h
@@ -33,8 +33,8 @@ 
  * The rte_mcslock_t type.
  */
 typedef struct rte_mcslock {
-	struct rte_mcslock *next;
-	int locked; /* 1 if the queue locked, 0 otherwise */
+	struct rte_mcslock * __rte_atomic next;
+	int __rte_atomic locked; /* 1 if the queue locked, 0 otherwise */
 } rte_mcslock_t;
 
 /**
@@ -49,13 +49,13 @@ 
  *   lock should use its 'own node'.
  */
 static inline void
-rte_mcslock_lock(rte_mcslock_t **msl, rte_mcslock_t *me)
+rte_mcslock_lock(rte_mcslock_t * __rte_atomic *msl, rte_mcslock_t *me)
 {
 	rte_mcslock_t *prev;
 
 	/* Init me node */
-	__atomic_store_n(&me->locked, 1, __ATOMIC_RELAXED);
-	__atomic_store_n(&me->next, NULL, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&me->locked, 1, rte_memory_order_relaxed);
+	rte_atomic_store_explicit(&me->next, NULL, rte_memory_order_relaxed);
 
 	/* If the queue is empty, the exchange operation is enough to acquire
 	 * the lock. Hence, the exchange operation requires acquire semantics.
@@ -63,7 +63,7 @@ 
 	 * visible to other CPUs/threads. Hence, the exchange operation requires
 	 * release semantics as well.
 	 */
-	prev = __atomic_exchange_n(msl, me, __ATOMIC_ACQ_REL);
+	prev = rte_atomic_exchange_explicit(msl, me, rte_memory_order_acq_rel);
 	if (likely(prev == NULL)) {
 		/* Queue was empty, no further action required,
 		 * proceed with lock taken.
@@ -77,19 +77,19 @@ 
 	 * strong as a release fence and is not sufficient to enforce the
 	 * desired order here.
 	 */
-	__atomic_store_n(&prev->next, me, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit(&prev->next, me, rte_memory_order_release);
 
 	/* The while-load of me->locked should not move above the previous
 	 * store to prev->next. Otherwise it will cause a deadlock. Need a
 	 * store-load barrier.
 	 */
-	__atomic_thread_fence(__ATOMIC_ACQ_REL);
+	__rte_atomic_thread_fence(rte_memory_order_acq_rel);
 	/* If the lock has already been acquired, it first atomically
 	 * places the node at the end of the queue and then proceeds
 	 * to spin on me->locked until the previous lock holder resets
 	 * the me->locked using mcslock_unlock().
 	 */
-	rte_wait_until_equal_32((uint32_t *)&me->locked, 0, __ATOMIC_ACQUIRE);
+	rte_wait_until_equal_32((uint32_t *)(uintptr_t)&me->locked, 0, rte_memory_order_acquire);
 }
 
 /**
@@ -101,34 +101,34 @@ 
  *   A pointer to the node of MCS lock passed in rte_mcslock_lock.
  */
 static inline void
-rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
+rte_mcslock_unlock(rte_mcslock_t * __rte_atomic *msl, rte_mcslock_t * __rte_atomic me)
 {
 	/* Check if there are more nodes in the queue. */
-	if (likely(__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)) {
+	if (likely(rte_atomic_load_explicit(&me->next, rte_memory_order_relaxed) == NULL)) {
 		/* No, last member in the queue. */
-		rte_mcslock_t *save_me = __atomic_load_n(&me, __ATOMIC_RELAXED);
+		rte_mcslock_t *save_me = rte_atomic_load_explicit(&me, rte_memory_order_relaxed);
 
 		/* Release the lock by setting it to NULL */
-		if (likely(__atomic_compare_exchange_n(msl, &save_me, NULL, 0,
-				__ATOMIC_RELEASE, __ATOMIC_RELAXED)))
+		if (likely(rte_atomic_compare_exchange_strong_explicit(msl, &save_me, NULL,
+				rte_memory_order_release, rte_memory_order_relaxed)))
 			return;
 
 		/* Speculative execution would be allowed to read in the
 		 * while-loop first. This has the potential to cause a
 		 * deadlock. Need a load barrier.
 		 */
-		__atomic_thread_fence(__ATOMIC_ACQUIRE);
+		__rte_atomic_thread_fence(rte_memory_order_acquire);
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		uintptr_t *next;
-		next = (uintptr_t *)&me->next;
+		uintptr_t __rte_atomic *next;
+		next = (uintptr_t __rte_atomic *)&me->next;
 		RTE_WAIT_UNTIL_MASKED(next, UINTPTR_MAX, !=, 0,
-			__ATOMIC_RELAXED);
+			rte_memory_order_relaxed);
 	}
 
 	/* Pass lock to next waiter. */
-	__atomic_store_n(&me->next->locked, 0, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit(&me->next->locked, 0, rte_memory_order_release);
 }
 
 /**
@@ -142,10 +142,10 @@ 
  *   1 if the lock is successfully taken; 0 otherwise.
  */
 static inline int
-rte_mcslock_trylock(rte_mcslock_t **msl, rte_mcslock_t *me)
+rte_mcslock_trylock(rte_mcslock_t * __rte_atomic *msl, rte_mcslock_t *me)
 {
 	/* Init me node */
-	__atomic_store_n(&me->next, NULL, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&me->next, NULL, rte_memory_order_relaxed);
 
 	/* Try to lock */
 	rte_mcslock_t *expected = NULL;
@@ -156,8 +156,8 @@ 
 	 * is visible to other CPUs/threads. Hence, the compare-exchange
 	 * operation requires release semantics as well.
 	 */
-	return __atomic_compare_exchange_n(msl, &expected, me, 0,
-			__ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
+	return rte_atomic_compare_exchange_strong_explicit(msl, &expected, me,
+			rte_memory_order_acq_rel, rte_memory_order_relaxed);
 }
 
 /**
@@ -169,9 +169,9 @@ 
  *   1 if the lock is currently taken; 0 otherwise.
  */
 static inline int
-rte_mcslock_is_locked(rte_mcslock_t *msl)
+rte_mcslock_is_locked(rte_mcslock_t * __rte_atomic msl)
 {
-	return (__atomic_load_n(&msl, __ATOMIC_RELAXED) != NULL);
+	return (rte_atomic_load_explicit(&msl, rte_memory_order_relaxed) != NULL);
 }
 
 #ifdef __cplusplus
diff --git a/lib/eal/include/rte_pflock.h b/lib/eal/include/rte_pflock.h
index 790be71..a2375b3 100644
--- a/lib/eal/include/rte_pflock.h
+++ b/lib/eal/include/rte_pflock.h
@@ -41,8 +41,8 @@ 
  */
 struct rte_pflock {
 	struct {
-		uint16_t in;
-		uint16_t out;
+		uint16_t __rte_atomic in;
+		uint16_t __rte_atomic out;
 	} rd, wr;
 };
 typedef struct rte_pflock rte_pflock_t;
@@ -117,14 +117,14 @@  struct rte_pflock {
 	 * If no writer is present, then the operation has completed
 	 * successfully.
 	 */
-	w = __atomic_fetch_add(&pf->rd.in, RTE_PFLOCK_RINC, __ATOMIC_ACQUIRE)
+	w = rte_atomic_fetch_add_explicit(&pf->rd.in, RTE_PFLOCK_RINC, rte_memory_order_acquire)
 		& RTE_PFLOCK_WBITS;
 	if (w == 0)
 		return;
 
 	/* Wait for current write phase to complete. */
 	RTE_WAIT_UNTIL_MASKED(&pf->rd.in, RTE_PFLOCK_WBITS, !=, w,
-		__ATOMIC_ACQUIRE);
+		rte_memory_order_acquire);
 }
 
 /**
@@ -140,7 +140,7 @@  struct rte_pflock {
 static inline void
 rte_pflock_read_unlock(rte_pflock_t *pf)
 {
-	__atomic_fetch_add(&pf->rd.out, RTE_PFLOCK_RINC, __ATOMIC_RELEASE);
+	rte_atomic_fetch_add_explicit(&pf->rd.out, RTE_PFLOCK_RINC, rte_memory_order_release);
 }
 
 /**
@@ -161,8 +161,9 @@  struct rte_pflock {
 	/* Acquire ownership of write-phase.
 	 * This is same as rte_ticketlock_lock().
 	 */
-	ticket = __atomic_fetch_add(&pf->wr.in, 1, __ATOMIC_RELAXED);
-	rte_wait_until_equal_16(&pf->wr.out, ticket, __ATOMIC_ACQUIRE);
+	ticket = rte_atomic_fetch_add_explicit(&pf->wr.in, 1, rte_memory_order_relaxed);
+	rte_wait_until_equal_16((uint16_t *)(uintptr_t)&pf->wr.out, ticket,
+	    rte_memory_order_acquire);
 
 	/*
 	 * Acquire ticket on read-side in order to allow them
@@ -173,10 +174,11 @@  struct rte_pflock {
 	 * speculatively.
 	 */
 	w = RTE_PFLOCK_PRES | (ticket & RTE_PFLOCK_PHID);
-	ticket = __atomic_fetch_add(&pf->rd.in, w, __ATOMIC_RELAXED);
+	ticket = rte_atomic_fetch_add_explicit(&pf->rd.in, w, rte_memory_order_relaxed);
 
 	/* Wait for any pending readers to flush. */
-	rte_wait_until_equal_16(&pf->rd.out, ticket, __ATOMIC_ACQUIRE);
+	rte_wait_until_equal_16((uint16_t *)(uintptr_t)&pf->rd.out, ticket,
+	    rte_memory_order_acquire);
 }
 
 /**
@@ -193,10 +195,10 @@  struct rte_pflock {
 rte_pflock_write_unlock(rte_pflock_t *pf)
 {
 	/* Migrate from write phase to read phase. */
-	__atomic_fetch_and(&pf->rd.in, RTE_PFLOCK_LSB, __ATOMIC_RELEASE);
+	rte_atomic_fetch_and_explicit(&pf->rd.in, RTE_PFLOCK_LSB, rte_memory_order_release);
 
 	/* Allow other writers to continue. */
-	__atomic_fetch_add(&pf->wr.out, 1, __ATOMIC_RELEASE);
+	rte_atomic_fetch_add_explicit(&pf->wr.out, 1, rte_memory_order_release);
 }
 
 #ifdef __cplusplus
diff --git a/lib/eal/include/rte_seqcount.h b/lib/eal/include/rte_seqcount.h
index 098af26..a658178 100644
--- a/lib/eal/include/rte_seqcount.h
+++ b/lib/eal/include/rte_seqcount.h
@@ -32,7 +32,7 @@ 
  * The RTE seqcount type.
  */
 typedef struct {
-	uint32_t sn; /**< A sequence number for the protected data. */
+	uint32_t __rte_atomic sn; /**< A sequence number for the protected data. */
 } rte_seqcount_t;
 
 /**
@@ -106,11 +106,11 @@ 
 static inline uint32_t
 rte_seqcount_read_begin(const rte_seqcount_t *seqcount)
 {
-	/* __ATOMIC_ACQUIRE to prevent loads after (in program order)
+	/* rte_memory_order_acquire to prevent loads after (in program order)
 	 * from happening before the sn load. Synchronizes-with the
 	 * store release in rte_seqcount_write_end().
 	 */
-	return __atomic_load_n(&seqcount->sn, __ATOMIC_ACQUIRE);
+	return rte_atomic_load_explicit(&seqcount->sn, rte_memory_order_acquire);
 }
 
 /**
@@ -161,9 +161,9 @@ 
 		return true;
 
 	/* make sure the data loads happens before the sn load */
-	rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+	rte_atomic_thread_fence(rte_memory_order_acquire);
 
-	end_sn = __atomic_load_n(&seqcount->sn, __ATOMIC_RELAXED);
+	end_sn = rte_atomic_load_explicit(&seqcount->sn, rte_memory_order_relaxed);
 
 	/* A writer incremented the sequence number during this read
 	 * critical section.
@@ -205,12 +205,12 @@ 
 
 	sn = seqcount->sn + 1;
 
-	__atomic_store_n(&seqcount->sn, sn, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&seqcount->sn, sn, rte_memory_order_relaxed);
 
-	/* __ATOMIC_RELEASE to prevent stores after (in program order)
+	/* rte_memory_order_release to prevent stores after (in program order)
 	 * from happening before the sn store.
 	 */
-	rte_atomic_thread_fence(__ATOMIC_RELEASE);
+	rte_atomic_thread_fence(rte_memory_order_release);
 }
 
 /**
@@ -237,7 +237,7 @@ 
 	sn = seqcount->sn + 1;
 
 	/* Synchronizes-with the load acquire in rte_seqcount_read_begin(). */
-	__atomic_store_n(&seqcount->sn, sn, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit(&seqcount->sn, sn, rte_memory_order_release);
 }
 
 #ifdef __cplusplus
diff --git a/lib/eal/include/rte_ticketlock.h b/lib/eal/include/rte_ticketlock.h
index e22d119..d816650 100644
--- a/lib/eal/include/rte_ticketlock.h
+++ b/lib/eal/include/rte_ticketlock.h
@@ -30,10 +30,10 @@ 
  * The rte_ticketlock_t type.
  */
 typedef union {
-	uint32_t tickets;
+	uint32_t __rte_atomic tickets;
 	struct {
-		uint16_t current;
-		uint16_t next;
+		uint16_t __rte_atomic current;
+		uint16_t __rte_atomic next;
 	} s;
 } rte_ticketlock_t;
 
@@ -51,7 +51,7 @@ 
 static inline void
 rte_ticketlock_init(rte_ticketlock_t *tl)
 {
-	__atomic_store_n(&tl->tickets, 0, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&tl->tickets, 0, rte_memory_order_relaxed);
 }
 
 /**
@@ -63,8 +63,9 @@ 
 static inline void
 rte_ticketlock_lock(rte_ticketlock_t *tl)
 {
-	uint16_t me = __atomic_fetch_add(&tl->s.next, 1, __ATOMIC_RELAXED);
-	rte_wait_until_equal_16(&tl->s.current, me, __ATOMIC_ACQUIRE);
+	uint16_t me = rte_atomic_fetch_add_explicit(&tl->s.next, 1, rte_memory_order_relaxed);
+	rte_wait_until_equal_16((uint16_t *)(uintptr_t)&tl->s.current, me,
+	    rte_memory_order_acquire);
 }
 
 /**
@@ -76,8 +77,8 @@ 
 static inline void
 rte_ticketlock_unlock(rte_ticketlock_t *tl)
 {
-	uint16_t i = __atomic_load_n(&tl->s.current, __ATOMIC_RELAXED);
-	__atomic_store_n(&tl->s.current, i + 1, __ATOMIC_RELEASE);
+	uint16_t i = rte_atomic_load_explicit(&tl->s.current, rte_memory_order_relaxed);
+	rte_atomic_store_explicit(&tl->s.current, i + 1, rte_memory_order_release);
 }
 
 /**
@@ -92,12 +93,13 @@ 
 rte_ticketlock_trylock(rte_ticketlock_t *tl)
 {
 	rte_ticketlock_t oldl, newl;
-	oldl.tickets = __atomic_load_n(&tl->tickets, __ATOMIC_RELAXED);
+	oldl.tickets = rte_atomic_load_explicit(&tl->tickets, rte_memory_order_relaxed);
 	newl.tickets = oldl.tickets;
 	newl.s.next++;
 	if (oldl.s.next == oldl.s.current) {
-		if (__atomic_compare_exchange_n(&tl->tickets, &oldl.tickets,
-		    newl.tickets, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+		if (rte_atomic_compare_exchange_strong_explicit(&tl->tickets,
+		    (uint32_t *)(uintptr_t)&oldl.tickets,
+		    newl.tickets, rte_memory_order_acquire, rte_memory_order_relaxed))
 			return 1;
 	}
 
@@ -116,7 +118,7 @@ 
 rte_ticketlock_is_locked(rte_ticketlock_t *tl)
 {
 	rte_ticketlock_t tic;
-	tic.tickets = __atomic_load_n(&tl->tickets, __ATOMIC_ACQUIRE);
+	tic.tickets = rte_atomic_load_explicit(&tl->tickets, rte_memory_order_acquire);
 	return (tic.s.current != tic.s.next);
 }
 
@@ -127,7 +129,7 @@ 
 
 typedef struct {
 	rte_ticketlock_t tl; /**< the actual ticketlock */
-	int user; /**< core id using lock, TICKET_LOCK_INVALID_ID for unused */
+	int __rte_atomic user; /**< core id using lock, TICKET_LOCK_INVALID_ID for unused */
 	unsigned int count; /**< count of time this lock has been called */
 } rte_ticketlock_recursive_t;
 
@@ -147,7 +149,7 @@ 
 rte_ticketlock_recursive_init(rte_ticketlock_recursive_t *tlr)
 {
 	rte_ticketlock_init(&tlr->tl);
-	__atomic_store_n(&tlr->user, TICKET_LOCK_INVALID_ID, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&tlr->user, TICKET_LOCK_INVALID_ID, rte_memory_order_relaxed);
 	tlr->count = 0;
 }
 
@@ -162,9 +164,9 @@ 
 {
 	int id = rte_gettid();
 
-	if (__atomic_load_n(&tlr->user, __ATOMIC_RELAXED) != id) {
+	if (rte_atomic_load_explicit(&tlr->user, rte_memory_order_relaxed) != id) {
 		rte_ticketlock_lock(&tlr->tl);
-		__atomic_store_n(&tlr->user, id, __ATOMIC_RELAXED);
+		rte_atomic_store_explicit(&tlr->user, id, rte_memory_order_relaxed);
 	}
 	tlr->count++;
 }
@@ -179,8 +181,8 @@ 
 rte_ticketlock_recursive_unlock(rte_ticketlock_recursive_t *tlr)
 {
 	if (--(tlr->count) == 0) {
-		__atomic_store_n(&tlr->user, TICKET_LOCK_INVALID_ID,
-				 __ATOMIC_RELAXED);
+		rte_atomic_store_explicit(&tlr->user, TICKET_LOCK_INVALID_ID,
+				 rte_memory_order_relaxed);
 		rte_ticketlock_unlock(&tlr->tl);
 	}
 }
@@ -198,10 +200,10 @@ 
 {
 	int id = rte_gettid();
 
-	if (__atomic_load_n(&tlr->user, __ATOMIC_RELAXED) != id) {
+	if (rte_atomic_load_explicit(&tlr->user, rte_memory_order_relaxed) != id) {
 		if (rte_ticketlock_trylock(&tlr->tl) == 0)
 			return 0;
-		__atomic_store_n(&tlr->user, id, __ATOMIC_RELAXED);
+		rte_atomic_store_explicit(&tlr->user, id, rte_memory_order_relaxed);
 	}
 	tlr->count++;
 	return 1;
diff --git a/lib/eal/include/rte_trace_point.h b/lib/eal/include/rte_trace_point.h
index d587591..e682109 100644
--- a/lib/eal/include/rte_trace_point.h
+++ b/lib/eal/include/rte_trace_point.h
@@ -33,7 +33,7 @@ 
 #include <rte_stdatomic.h>
 
 /** The tracepoint object. */
-typedef uint64_t rte_trace_point_t;
+typedef uint64_t __rte_atomic rte_trace_point_t;
 
 /**
  * Macro to define the tracepoint arguments in RTE_TRACE_POINT macro.
@@ -359,7 +359,7 @@  struct __rte_trace_header {
 #define __rte_trace_point_emit_header_generic(t) \
 void *mem; \
 do { \
-	const uint64_t val = __atomic_load_n(t, __ATOMIC_ACQUIRE); \
+	const uint64_t val = rte_atomic_load_explicit(t, rte_memory_order_acquire); \
 	if (likely(!(val & __RTE_TRACE_FIELD_ENABLE_MASK))) \
 		return; \
 	mem = __rte_trace_mem_get(val); \
diff --git a/lib/eal/loongarch/include/rte_atomic.h b/lib/eal/loongarch/include/rte_atomic.h
index 3c82845..0510b8f 100644
--- a/lib/eal/loongarch/include/rte_atomic.h
+++ b/lib/eal/loongarch/include/rte_atomic.h
@@ -35,9 +35,9 @@ 
 #define rte_io_rmb()	rte_mb()
 
 static __rte_always_inline void
-rte_atomic_thread_fence(int memorder)
+rte_atomic_thread_fence(rte_memory_order memorder)
 {
-	__atomic_thread_fence(memorder);
+	__rte_atomic_thread_fence(memorder);
 }
 
 #ifdef __cplusplus
diff --git a/lib/eal/ppc/include/rte_atomic.h b/lib/eal/ppc/include/rte_atomic.h
index ec8d8a2..7382412 100644
--- a/lib/eal/ppc/include/rte_atomic.h
+++ b/lib/eal/ppc/include/rte_atomic.h
@@ -38,9 +38,9 @@ 
 #define rte_io_rmb() rte_rmb()
 
 static __rte_always_inline void
-rte_atomic_thread_fence(int memorder)
+rte_atomic_thread_fence(rte_memory_order memorder)
 {
-	__atomic_thread_fence(memorder);
+	__rte_atomic_thread_fence(memorder);
 }
 
 /*------------------------- 16 bit atomic operations -------------------------*/
@@ -48,8 +48,8 @@ 
 static inline int
 rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
 {
-	return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE,
-		__ATOMIC_ACQUIRE) ? 1 : 0;
+	return __atomic_compare_exchange(dst, &exp, &src, 0, rte_memory_order_acquire,
+		rte_memory_order_acquire) ? 1 : 0;
 }
 
 static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
@@ -60,29 +60,29 @@  static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
 static inline void
 rte_atomic16_inc(rte_atomic16_t *v)
 {
-	__atomic_fetch_add(&v->cnt, 1, __ATOMIC_ACQUIRE);
+	rte_atomic_fetch_add_explicit(&v->cnt, 1, rte_memory_order_acquire);
 }
 
 static inline void
 rte_atomic16_dec(rte_atomic16_t *v)
 {
-	__atomic_fetch_sub(&v->cnt, 1, __ATOMIC_ACQUIRE);
+	rte_atomic_fetch_sub_explicit(&v->cnt, 1, rte_memory_order_acquire);
 }
 
 static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
 {
-	return __atomic_fetch_add(&v->cnt, 1, __ATOMIC_ACQUIRE) + 1 == 0;
+	return rte_atomic_fetch_add_explicit(&v->cnt, 1, rte_memory_order_acquire) + 1 == 0;
 }
 
 static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
 {
-	return __atomic_fetch_sub(&v->cnt, 1, __ATOMIC_ACQUIRE) - 1 == 0;
+	return rte_atomic_fetch_sub_explicit(&v->cnt, 1, rte_memory_order_acquire) - 1 == 0;
 }
 
 static inline uint16_t
 rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
 {
-	return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
+	return __atomic_exchange_2(dst, val, rte_memory_order_seq_cst);
 }
 
 /*------------------------- 32 bit atomic operations -------------------------*/
@@ -90,8 +90,8 @@  static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
 static inline int
 rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
 {
-	return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE,
-		__ATOMIC_ACQUIRE) ? 1 : 0;
+	return __atomic_compare_exchange(dst, &exp, &src, 0, rte_memory_order_acquire,
+		rte_memory_order_acquire) ? 1 : 0;
 }
 
 static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
@@ -102,29 +102,29 @@  static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
 static inline void
 rte_atomic32_inc(rte_atomic32_t *v)
 {
-	__atomic_fetch_add(&v->cnt, 1, __ATOMIC_ACQUIRE);
+	rte_atomic_fetch_add_explicit(&v->cnt, 1, rte_memory_order_acquire);
 }
 
 static inline void
 rte_atomic32_dec(rte_atomic32_t *v)
 {
-	__atomic_fetch_sub(&v->cnt, 1, __ATOMIC_ACQUIRE);
+	rte_atomic_fetch_sub_explicit(&v->cnt, 1, rte_memory_order_acquire);
 }
 
 static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
 {
-	return __atomic_fetch_add(&v->cnt, 1, __ATOMIC_ACQUIRE) + 1 == 0;
+	return rte_atomic_fetch_add_explicit(&v->cnt, 1, rte_memory_order_acquire) + 1 == 0;
 }
 
 static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 {
-	return __atomic_fetch_sub(&v->cnt, 1, __ATOMIC_ACQUIRE) - 1 == 0;
+	return rte_atomic_fetch_sub_explicit(&v->cnt, 1, rte_memory_order_acquire) - 1 == 0;
 }
 
 static inline uint32_t
 rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
 {
-	return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+	return __atomic_exchange_4(dst, val, rte_memory_order_seq_cst);
 }
 
 /*------------------------- 64 bit atomic operations -------------------------*/
@@ -132,8 +132,8 @@  static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 static inline int
 rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
 {
-	return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE,
-		__ATOMIC_ACQUIRE) ? 1 : 0;
+	return __atomic_compare_exchange(dst, &exp, &src, 0, rte_memory_order_acquire,
+		rte_memory_order_acquire) ? 1 : 0;
 }
 
 static inline void
@@ -157,47 +157,47 @@  static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 static inline void
 rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
 {
-	__atomic_fetch_add(&v->cnt, inc, __ATOMIC_ACQUIRE);
+	rte_atomic_fetch_add_explicit(&v->cnt, inc, rte_memory_order_acquire);
 }
 
 static inline void
 rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
 {
-	__atomic_fetch_sub(&v->cnt, dec, __ATOMIC_ACQUIRE);
+	rte_atomic_fetch_sub_explicit(&v->cnt, dec, rte_memory_order_acquire);
 }
 
 static inline void
 rte_atomic64_inc(rte_atomic64_t *v)
 {
-	__atomic_fetch_add(&v->cnt, 1, __ATOMIC_ACQUIRE);
+	rte_atomic_fetch_add_explicit(&v->cnt, 1, rte_memory_order_acquire);
 }
 
 static inline void
 rte_atomic64_dec(rte_atomic64_t *v)
 {
-	__atomic_fetch_sub(&v->cnt, 1, __ATOMIC_ACQUIRE);
+	rte_atomic_fetch_sub_explicit(&v->cnt, 1, rte_memory_order_acquire);
 }
 
 static inline int64_t
 rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
 {
-	return __atomic_fetch_add(&v->cnt, inc, __ATOMIC_ACQUIRE) + inc;
+	return rte_atomic_fetch_add_explicit(&v->cnt, inc, rte_memory_order_acquire) + inc;
 }
 
 static inline int64_t
 rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
 {
-	return __atomic_fetch_sub(&v->cnt, dec, __ATOMIC_ACQUIRE) - dec;
+	return rte_atomic_fetch_sub_explicit(&v->cnt, dec, rte_memory_order_acquire) - dec;
 }
 
 static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
 {
-	return __atomic_fetch_add(&v->cnt, 1, __ATOMIC_ACQUIRE) + 1 == 0;
+	return rte_atomic_fetch_add_explicit(&v->cnt, 1, rte_memory_order_acquire) + 1 == 0;
 }
 
 static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
 {
-	return __atomic_fetch_sub(&v->cnt, 1, __ATOMIC_ACQUIRE) - 1 == 0;
+	return rte_atomic_fetch_sub_explicit(&v->cnt, 1, rte_memory_order_acquire) - 1 == 0;
 }
 
 static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
@@ -213,7 +213,7 @@  static inline void rte_atomic64_clear(rte_atomic64_t *v)
 static inline uint64_t
 rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
 {
-	return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST);
+	return __atomic_exchange_8(dst, val, rte_memory_order_seq_cst);
 }
 
 #endif
diff --git a/lib/eal/riscv/include/rte_atomic.h b/lib/eal/riscv/include/rte_atomic.h
index 4b4633c..2603bc9 100644
--- a/lib/eal/riscv/include/rte_atomic.h
+++ b/lib/eal/riscv/include/rte_atomic.h
@@ -40,9 +40,9 @@ 
 #define rte_io_rmb()	asm volatile("fence ir, ir" : : : "memory")
 
 static __rte_always_inline void
-rte_atomic_thread_fence(int memorder)
+rte_atomic_thread_fence(rte_memory_order memorder)
 {
-	__atomic_thread_fence(memorder);
+	__rte_atomic_thread_fence(memorder);
 }
 
 #ifdef __cplusplus
diff --git a/lib/eal/x86/include/rte_atomic.h b/lib/eal/x86/include/rte_atomic.h
index f2ee1a9..3b3a9a4 100644
--- a/lib/eal/x86/include/rte_atomic.h
+++ b/lib/eal/x86/include/rte_atomic.h
@@ -82,17 +82,17 @@ 
 /**
  * Synchronization fence between threads based on the specified memory order.
  *
- * On x86 the __atomic_thread_fence(__ATOMIC_SEQ_CST) generates full 'mfence'
+ * On x86 the __rte_atomic_thread_fence(rte_memory_order_seq_cst) generates full 'mfence'
  * which is quite expensive. The optimized implementation of rte_smp_mb is
  * used instead.
  */
 static __rte_always_inline void
-rte_atomic_thread_fence(int memorder)
+rte_atomic_thread_fence(rte_memory_order memorder)
 {
-	if (memorder == __ATOMIC_SEQ_CST)
+	if (memorder == rte_memory_order_seq_cst)
 		rte_smp_mb();
 	else
-		__atomic_thread_fence(memorder);
+		__rte_atomic_thread_fence(memorder);
 }
 
 /*------------------------- 16 bit atomic operations -------------------------*/
diff --git a/lib/eal/x86/include/rte_spinlock.h b/lib/eal/x86/include/rte_spinlock.h
index 0b20ddf..c76218a 100644
--- a/lib/eal/x86/include/rte_spinlock.h
+++ b/lib/eal/x86/include/rte_spinlock.h
@@ -78,7 +78,7 @@  static inline int rte_tm_supported(void)
 }
 
 static inline int
-rte_try_tm(volatile int *lock)
+rte_try_tm(volatile int __rte_atomic *lock)
 {
 	int i, retries;
 
diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c
index f749da9..cf70e33 100644
--- a/lib/eal/x86/rte_power_intrinsics.c
+++ b/lib/eal/x86/rte_power_intrinsics.c
@@ -23,9 +23,9 @@ 
 	uint64_t val;
 
 	/* trigger a write but don't change the value */
-	val = __atomic_load_n((volatile uint64_t *)addr, __ATOMIC_RELAXED);
-	__atomic_compare_exchange_n((volatile uint64_t *)addr, &val, val, 0,
-			__ATOMIC_RELAXED, __ATOMIC_RELAXED);
+	val = rte_atomic_load_explicit((volatile uint64_t *)addr, rte_memory_order_relaxed);
+	rte_atomic_compare_exchange_strong_explicit((volatile uint64_t *)addr, &val, val,
+			rte_memory_order_relaxed, rte_memory_order_relaxed);
 }
 
 static bool wait_supported;