[v7,1/5] eal: add new definitions for wait scheme

Message ID 20211028065640.139655-2-feifei.wang2@arm.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series add new definitions for wait scheme |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Feifei Wang Oct. 28, 2021, 6:56 a.m. UTC
  Introduce macros as generic interface for address monitoring.
For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.

Furthermore, to prevent compilation warning in arm:
----------------------------------------------
'warning: implicit declaration of function ...'
----------------------------------------------
Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
And add ‘__RTE_ARM’ for these macros to fix the namespace.

This is because original macros are undefine at the end of the file.
If new macro 'rte_wait_event' calls them in other files, they will be
seen as 'not defined'.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 166 ++++++++++++++++------------
 lib/eal/include/generic/rte_pause.h |  28 +++++
 2 files changed, 122 insertions(+), 72 deletions(-)
  

Comments

Jerin Jacob Oct. 28, 2021, 7:15 a.m. UTC | #1
On Thu, Oct 28, 2021 at 12:26 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Introduce macros as generic interface for address monitoring.
> For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
>
> Furthermore, to prevent compilation warning in arm:
> ----------------------------------------------
> 'warning: implicit declaration of function ...'
> ----------------------------------------------
> Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> And add ‘__RTE_ARM’ for these macros to fix the namespace.
>
> This is because original macros are undefine at the end of the file.
> If new macro 'rte_wait_event' calls them in other files, they will be
> seen as 'not defined'.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---

> +static __rte_always_inline void
> +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> +               int memorder)
> +{
> +       uint16_t value;
> +
> +       assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);

Assert is not good in the library, Why not RTE_BUILD_BUG_ON here


> +
> +       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
>         if (value != expected) {
> -               __SEVL()
> +                __RTE_ARM_SEVL()
>                 do {
> -                       __WFE()
> -                       __LOAD_EXC_16(addr, value, memorder)
> +                       __RTE_ARM_WFE()
> +                       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
>                 } while (value != expected);
>         }
> -#undef __LOAD_EXC_16
>  }
>
>  static __rte_always_inline void
> @@ -77,34 +124,14 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>
>         assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
>
> -       /*
> -        * Atomic exclusive load from addr, it returns the 32-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated
> -        * implicitly to exit WFE.
> -        */
> -#define __LOAD_EXC_32(src, dst, memorder) {              \
> -       if (memorder == __ATOMIC_RELAXED) {              \
> -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } else {                                         \
> -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } }
> -
> -       __LOAD_EXC_32(addr, value, memorder)
> +       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
>         if (value != expected) {
> -               __SEVL()
> +               __RTE_ARM_SEVL()
>                 do {
> -                       __WFE()
> -                       __LOAD_EXC_32(addr, value, memorder)
> +                       __RTE_ARM_WFE()
> +                       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
>                 } while (value != expected);
>         }
> -#undef __LOAD_EXC_32
>  }
>
>  static __rte_always_inline void
> @@ -115,38 +142,33 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>
>         assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);

remove assert and change to BUILD_BUG_ON

>
> -       /*
> -        * Atomic exclusive load from addr, it returns the 64-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated
> -        * implicitly to exit WFE.
> -        */
> -#define __LOAD_EXC_64(src, dst, memorder) {              \
> -       if (memorder == __ATOMIC_RELAXED) {              \
> -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } else {                                         \
> -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } }
> -
> -       __LOAD_EXC_64(addr, value, memorder)
> +       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
>         if (value != expected) {
> -               __SEVL()
> +               __RTE_ARM_SEVL()
>                 do {
> -                       __WFE()
> -                       __LOAD_EXC_64(addr, value, memorder)
> +                       __RTE_ARM_WFE()
> +                       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
>                 } while (value != expected);
>         }
>  }
> -#undef __LOAD_EXC_64
>
> -#undef __SEVL
> -#undef __WFE
> +#define rte_wait_event(addr, mask, cond, expected, memorder)              \
> +do {                                                                      \
> +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
> +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
> +                               memorder != __ATOMIC_RELAXED);            \
> +       uint32_t size = sizeof(*(addr)) << 3;

Add const

> +       typeof(*(addr)) expected_value = (expected);                      \
> +       typeof(*(addr)) value = 0;

Why zero assignment
                                        \
> +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \

Assert is not good in the library, Why not RTE_BUILD_BUG_ON here


> +       if ((value & (mask)) cond expected_value) {                       \
> +               __RTE_ARM_SEVL()                                          \
> +               do {                                                      \
> +                       __RTE_ARM_WFE()                                   \
> +                       __RTE_ARM_LOAD_EXC((addr), value, memorder, size) \

if the address is the type of __int128_t. This logic will fail? Could
you add 128bit support too and
remove the assert from __RTE_ARM_LOAD_EXC


> +               } while ((value & (mask)) cond expected_value);           \
> +       }                                                                 \
> +} while (0)
>
>  #endif
>
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..d0c5b5a415 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>         while (__atomic_load_n(addr, memorder) != expected)
>                 rte_pause();
>  }
> +
> +/*
> + * Wait until *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest.
> + * @param cond
> + *  A symbol representing the condition.
> + * @param expected
> + *  An expected value to be in the memory location.
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + */
> +#define rte_wait_event(addr, mask, cond, expected, memorder)                       \
> +do {                                                                               \
> +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                         \
> +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                           \
> +                               memorder != __ATOMIC_RELAXED);                     \
> +       typeof(*(addr)) expected_value = (expected);                               \
> +       while ((__atomic_load_n((addr), (memorder)) & (mask)) cond expected_value) \
> +               rte_pause();                                                       \
> +} while (0)
>  #endif
>
>  #endif /* _RTE_PAUSE_H_ */
> --
> 2.25.1
>
  
Feifei Wang Oct. 28, 2021, 7:40 a.m. UTC | #2
> -----邮件原件-----
> 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> 发送时间: Thursday, October 28, 2021 3:16 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; dpdk-dev <dev@dpdk.org>;
> nd <nd@arm.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> Stephen Hemminger <stephen@networkplumber.org>; David Marchand
> <david.marchand@redhat.com>; thomas@monjalon.net; Mattias Rönnblom
> <mattias.ronnblom@ericsson.com>
> 主题: Re: [PATCH v7 1/5] eal: add new definitions for wait scheme
> 
> On Thu, Oct 28, 2021 at 12:26 PM Feifei Wang <feifei.wang2@arm.com>
> wrote:
> >
> > Introduce macros as generic interface for address monitoring.
> > For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> > and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> >
> > Furthermore, to prevent compilation warning in arm:
> > ----------------------------------------------
> > 'warning: implicit declaration of function ...'
> > ----------------------------------------------
> > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> > And add ‘__RTE_ARM’ for these macros to fix the namespace.
> >
> > This is because original macros are undefine at the end of the file.
> > If new macro 'rte_wait_event' calls them in other files, they will be
> > seen as 'not defined'.
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> 
> > +static __rte_always_inline void
> > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > +               int memorder)
> > +{
> > +       uint16_t value;
> > +
> > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > + __ATOMIC_RELAXED);
> 
> Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
[Feifei] This line is the original code which has nothing to do with this patch, 
I can change it in the next version.
> 
> 
> > +
> > +       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> >         if (value != expected) {
> > -               __SEVL()
> > +                __RTE_ARM_SEVL()
> >                 do {
> > -                       __WFE()
> > -                       __LOAD_EXC_16(addr, value, memorder)
> > +                       __RTE_ARM_WFE()
> > +                       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> > -#undef __LOAD_EXC_16
> >  }
> >
> >  static __rte_always_inline void
> > @@ -77,34 +124,14 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> >
> >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> > -        * implicitly to exit WFE.
> > -        */
> > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > -       if (memorder == __ATOMIC_RELAXED) {              \
> > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } else {                                         \
> > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } }
> > -
> > -       __LOAD_EXC_32(addr, value, memorder)
> > +       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> >         if (value != expected) {
> > -               __SEVL()
> > +               __RTE_ARM_SEVL()
> >                 do {
> > -                       __WFE()
> > -                       __LOAD_EXC_32(addr, value, memorder)
> > +                       __RTE_ARM_WFE()
> > +                       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> > -#undef __LOAD_EXC_32
> >  }
> >
> >  static __rte_always_inline void
> > @@ -115,38 +142,33 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> >
> >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> 
> remove assert and change to BUILD_BUG_ON
[Feifei] OK
> 
> >
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> > -        * implicitly to exit WFE.
> > -        */
> > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > -       if (memorder == __ATOMIC_RELAXED) {              \
> > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } else {                                         \
> > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } }
> > -
> > -       __LOAD_EXC_64(addr, value, memorder)
> > +       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> >         if (value != expected) {
> > -               __SEVL()
> > +               __RTE_ARM_SEVL()
> >                 do {
> > -                       __WFE()
> > -                       __LOAD_EXC_64(addr, value, memorder)
> > +                       __RTE_ARM_WFE()
> > +                       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> >  }
> > -#undef __LOAD_EXC_64
> >
> > -#undef __SEVL
> > -#undef __WFE
> > +#define rte_wait_event(addr, mask, cond, expected, memorder)              \
> > +do {                                                                      \
> > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
> > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> \
> > +                               memorder != __ATOMIC_RELAXED);            \
> > +       uint32_t size = sizeof(*(addr)) << 3;
> 
> Add const
[Feifei] OK. 
> > +       typeof(*(addr)) expected_value = (expected);                      \
> > +       typeof(*(addr)) value = 0;
> 
> Why zero assignment
I will delete this initialization.
>                                         \
> > +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
> 
> Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
[Feifei] For __RTE_ARM_LOAD_EXC, 'size' is known until code is running.
So it cannot check 'size' in the compile time and BUILD_BUG_ON doesn't work here.
> 
> 
> > +       if ((value & (mask)) cond expected_value) {                       \
> > +               __RTE_ARM_SEVL()                                          \
> > +               do {                                                      \
> > +                       __RTE_ARM_WFE()                                   \
> > +                       __RTE_ARM_LOAD_EXC((addr), value, memorder,
> > + size) \
> 
> if the address is the type of __int128_t. This logic will fail? Could you add
> 128bit support too and remove the assert from __RTE_ARM_LOAD_EXC
[Feifei] There is no 128bit case in library. And maybe there will be 128bits case, we can
add 128 path here. Now there is assert check in  __RTE_ARM_LOAD_EXC to check
whether size is '16/32/64'.
> 
> 
> > +               } while ((value & (mask)) cond expected_value);           \
> > +       }                                                                 \
> > +} while (0)
> >
> >  #endif
> >
> > diff --git a/lib/eal/include/generic/rte_pause.h
> > b/lib/eal/include/generic/rte_pause.h
> > index 668ee4a184..d0c5b5a415 100644
> > --- a/lib/eal/include/generic/rte_pause.h
> > +++ b/lib/eal/include/generic/rte_pause.h
> > @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >         while (__atomic_load_n(addr, memorder) != expected)
> >                 rte_pause();
> >  }
> > +
> > +/*
> > + * Wait until *addr breaks the condition, with a relaxed memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + *  A pointer to the memory location.
> > + * @param mask
> > + *  A mask of value bits in interest.
> > + * @param cond
> > + *  A symbol representing the condition.
> > + * @param expected
> > + *  An expected value to be in the memory location.
> > + * @param memorder
> > + *  Two different memory orders that can be specified:
> > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + *  C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + *  the GCC wiki on atomic synchronization for detailed definition.
> > + */
> > +#define rte_wait_event(addr, mask, cond, expected, memorder)
> \
> > +do {                                                                               \
> > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));
> \
> > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> \
> > +                               memorder != __ATOMIC_RELAXED);                     \
> > +       typeof(*(addr)) expected_value = (expected);                               \
> > +       while ((__atomic_load_n((addr), (memorder)) & (mask)) cond
> expected_value) \
> > +               rte_pause();                                                       \
> > +} while (0)
> >  #endif
> >
> >  #endif /* _RTE_PAUSE_H_ */
> > --
> > 2.25.1
> >
  
Jerin Jacob Oct. 28, 2021, 7:51 a.m. UTC | #3
On Thu, Oct 28, 2021 at 1:11 PM Feifei Wang <Feifei.Wang2@arm.com> wrote:
>
>
>
> > -----邮件原件-----
> > 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> > 发送时间: Thursday, October 28, 2021 3:16 PM
> > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; dpdk-dev <dev@dpdk.org>;
> > nd <nd@arm.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> > Stephen Hemminger <stephen@networkplumber.org>; David Marchand
> > <david.marchand@redhat.com>; thomas@monjalon.net; Mattias Rönnblom
> > <mattias.ronnblom@ericsson.com>
> > 主题: Re: [PATCH v7 1/5] eal: add new definitions for wait scheme
> >
> > On Thu, Oct 28, 2021 at 12:26 PM Feifei Wang <feifei.wang2@arm.com>
> > wrote:
> > >
> > > Introduce macros as generic interface for address monitoring.
> > > For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> > > and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> > >
> > > Furthermore, to prevent compilation warning in arm:
> > > ----------------------------------------------
> > > 'warning: implicit declaration of function ...'
> > > ----------------------------------------------
> > > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and '__WFE'.
> > > And add ‘__RTE_ARM’ for these macros to fix the namespace.
> > >
> > > This is because original macros are undefine at the end of the file.
> > > If new macro 'rte_wait_event' calls them in other files, they will be
> > > seen as 'not defined'.
> > >
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> >
> > > +static __rte_always_inline void
> > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > +               int memorder)
> > > +{
> > > +       uint16_t value;
> > > +
> > > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > + __ATOMIC_RELAXED);
> >
> > Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
> [Feifei] This line is the original code which has nothing to do with this patch,
> I can change it in the next version.
> >
> >
> > > +
> > > +       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> > >         if (value != expected) {
> > > -               __SEVL()
> > > +                __RTE_ARM_SEVL()
> > >                 do {
> > > -                       __WFE()
> > > -                       __LOAD_EXC_16(addr, value, memorder)
> > > +                       __RTE_ARM_WFE()
> > > +                       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > > -#undef __LOAD_EXC_16
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -77,34 +124,14 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > > uint32_t expected,
> > >
> > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> > > -        * implicitly to exit WFE.
> > > -        */
> > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } else {                                         \
> > > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } }
> > > -
> > > -       __LOAD_EXC_32(addr, value, memorder)
> > > +       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> > >         if (value != expected) {
> > > -               __SEVL()
> > > +               __RTE_ARM_SEVL()
> > >                 do {
> > > -                       __WFE()
> > > -                       __LOAD_EXC_32(addr, value, memorder)
> > > +                       __RTE_ARM_WFE()
> > > +                       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > > -#undef __LOAD_EXC_32
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -115,38 +142,33 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > > uint64_t expected,
> > >
> > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> >
> > remove assert and change to BUILD_BUG_ON
> [Feifei] OK
> >
> > >
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> > > -        * implicitly to exit WFE.
> > > -        */
> > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } else {                                         \
> > > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } }
> > > -
> > > -       __LOAD_EXC_64(addr, value, memorder)
> > > +       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> > >         if (value != expected) {
> > > -               __SEVL()
> > > +               __RTE_ARM_SEVL()
> > >                 do {
> > > -                       __WFE()
> > > -                       __LOAD_EXC_64(addr, value, memorder)
> > > +                       __RTE_ARM_WFE()
> > > +                       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > >  }
> > > -#undef __LOAD_EXC_64
> > >
> > > -#undef __SEVL
> > > -#undef __WFE
> > > +#define rte_wait_event(addr, mask, cond, expected, memorder)              \
> > > +do {                                                                      \
> > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
> > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> > \
> > > +                               memorder != __ATOMIC_RELAXED);            \
> > > +       uint32_t size = sizeof(*(addr)) << 3;
> >
> > Add const
> [Feifei] OK.
> > > +       typeof(*(addr)) expected_value = (expected);                      \
> > > +       typeof(*(addr)) value = 0;
> >
> > Why zero assignment
> I will delete this initialization.
> >                                         \
> > > +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
> >
> > Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
> [Feifei] For __RTE_ARM_LOAD_EXC, 'size' is known until code is running.
> So it cannot check 'size' in the compile time and BUILD_BUG_ON doesn't work here.

uint32_t size = sizeof(*(addr)) << 3 value will get in comple time as
_sizeof_ is preprocessor function.
So I think, BUILD_BUG_ON is fine.



> >
> >
> > > +       if ((value & (mask)) cond expected_value) {                       \
> > > +               __RTE_ARM_SEVL()                                          \
> > > +               do {                                                      \
> > > +                       __RTE_ARM_WFE()                                   \
> > > +                       __RTE_ARM_LOAD_EXC((addr), value, memorder,
> > > + size) \
> >
> > if the address is the type of __int128_t. This logic will fail? Could you add
> > 128bit support too and remove the assert from __RTE_ARM_LOAD_EXC
> [Feifei] There is no 128bit case in library. And maybe there will be 128bits case, we can
> add 128 path here. Now there is assert check in  __RTE_ARM_LOAD_EXC to check
> whether size is '16/32/64'.

API expects is only "addr" without any type so the application can use
128bit too.

Worst case for now we can fall back to __atomic_load_n() for  size
128, we dont want
to break applications while using this API. Or add support for 128 in code.


> >
> >
> > > +               } while ((value & (mask)) cond expected_value);           \
> > > +       }                                                                 \
> > > +} while (0)
> > >
> > >  #endif
> > >
> > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > b/lib/eal/include/generic/rte_pause.h
> > > index 668ee4a184..d0c5b5a415 100644
> > > --- a/lib/eal/include/generic/rte_pause.h
> > > +++ b/lib/eal/include/generic/rte_pause.h
> > > @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >         while (__atomic_load_n(addr, memorder) != expected)
> > >                 rte_pause();
> > >  }
> > > +
> > > +/*
> > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > + * ordering model meaning the loads around this API can be reordered.
> > > + *
> > > + * @param addr
> > > + *  A pointer to the memory location.
> > > + * @param mask
> > > + *  A mask of value bits in interest.
> > > + * @param cond
> > > + *  A symbol representing the condition.
> > > + * @param expected
> > > + *  An expected value to be in the memory location.
> > > + * @param memorder
> > > + *  Two different memory orders that can be specified:
> > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > + *  C++11 memory orders with the same names, see the C++11 standard
> > > +or
> > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > + */
> > > +#define rte_wait_event(addr, mask, cond, expected, memorder)
> > \
> > > +do {                                                                               \
> > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));
> > \
> > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> > \
> > > +                               memorder != __ATOMIC_RELAXED);                     \
> > > +       typeof(*(addr)) expected_value = (expected);                               \
> > > +       while ((__atomic_load_n((addr), (memorder)) & (mask)) cond
> > expected_value) \
> > > +               rte_pause();                                                       \
> > > +} while (0)
> > >  #endif
> > >
> > >  #endif /* _RTE_PAUSE_H_ */
> > > --
> > > 2.25.1
> > >
  
Feifei Wang Oct. 28, 2021, 9:27 a.m. UTC | #4
> -----邮件原件-----
> 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> 发送时间: Thursday, October 28, 2021 3:51 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; dpdk-dev <dev@dpdk.org>;
> nd <nd@arm.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> Stephen Hemminger <stephen@networkplumber.org>; David Marchand
> <david.marchand@redhat.com>; thomas@monjalon.net; Mattias Rönnblom
> <mattias.ronnblom@ericsson.com>
> 主题: Re: [PATCH v7 1/5] eal: add new definitions for wait scheme
> 
> On Thu, Oct 28, 2021 at 1:11 PM Feifei Wang <Feifei.Wang2@arm.com>
> wrote:
> >
> >
> >
> > > -----邮件原件-----
> > > 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> > > 发送时间: Thursday, October 28, 2021 3:16 PM
> > > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > > 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; dpdk-dev
> <dev@dpdk.org>; nd
> > > <nd@arm.com>; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> > > Stephen Hemminger <stephen@networkplumber.org>; David Marchand
> > > <david.marchand@redhat.com>; thomas@monjalon.net; Mattias
> Rönnblom
> > > <mattias.ronnblom@ericsson.com>
> > > 主题: Re: [PATCH v7 1/5] eal: add new definitions for wait scheme
> > >
> > > On Thu, Oct 28, 2021 at 12:26 PM Feifei Wang <feifei.wang2@arm.com>
> > > wrote:
> > > >
> > > > Introduce macros as generic interface for address monitoring.
> > > > For different size, encapsulate '__LOAD_EXC_16', '__LOAD_EXC_32'
> > > > and '__LOAD_EXC_64' into a new macro '__LOAD_EXC'.
> > > >
> > > > Furthermore, to prevent compilation warning in arm:
> > > > ----------------------------------------------
> > > > 'warning: implicit declaration of function ...'
> > > > ----------------------------------------------
> > > > Delete 'undef' constructions for '__LOAD_EXC_xx', '__SEVL' and
> '__WFE'.
> > > > And add ‘__RTE_ARM’ for these macros to fix the namespace.
> > > >
> > > > This is because original macros are undefine at the end of the file.
> > > > If new macro 'rte_wait_event' calls them in other files, they will
> > > > be seen as 'not defined'.
> > > >
> > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > >
> > > > +static __rte_always_inline void
> > > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > +               int memorder)
> > > > +{
> > > > +       uint16_t value;
> > > > +
> > > > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > + __ATOMIC_RELAXED);
> > >
> > > Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
> > [Feifei] This line is the original code which has nothing to do with
> > this patch, I can change it in the next version.
> > >
> > >
> > > > +
> > > > +       __RTE_ARM_LOAD_EXC_16(addr, value, memorder)
> > > >         if (value != expected) {
> > > > -               __SEVL()
> > > > +                __RTE_ARM_SEVL()
> > > >                 do {
> > > > -                       __WFE()
> > > > -                       __LOAD_EXC_16(addr, value, memorder)
> > > > +                       __RTE_ARM_WFE()
> > > > +                       __RTE_ARM_LOAD_EXC_16(addr, value,
> > > > + memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > > -#undef __LOAD_EXC_16
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -77,34 +124,14 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr, uint32_t expected,
> > > >
> > > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } else {                                         \
> > > > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } }
> > > > -
> > > > -       __LOAD_EXC_32(addr, value, memorder)
> > > > +       __RTE_ARM_LOAD_EXC_32(addr, value, memorder)
> > > >         if (value != expected) {
> > > > -               __SEVL()
> > > > +               __RTE_ARM_SEVL()
> > > >                 do {
> > > > -                       __WFE()
> > > > -                       __LOAD_EXC_32(addr, value, memorder)
> > > > +                       __RTE_ARM_WFE()
> > > > +                       __RTE_ARM_LOAD_EXC_32(addr, value,
> > > > + memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > > -#undef __LOAD_EXC_32
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -115,38 +142,33 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr, uint64_t expected,
> > > >
> > > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > >
> > > remove assert and change to BUILD_BUG_ON
> > [Feifei] OK
> > >
> > > >
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } else {                                         \
> > > > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } }
> > > > -
> > > > -       __LOAD_EXC_64(addr, value, memorder)
> > > > +       __RTE_ARM_LOAD_EXC_64(addr, value, memorder)
> > > >         if (value != expected) {
> > > > -               __SEVL()
> > > > +               __RTE_ARM_SEVL()
> > > >                 do {
> > > > -                       __WFE()
> > > > -                       __LOAD_EXC_64(addr, value, memorder)
> > > > +                       __RTE_ARM_WFE()
> > > > +                       __RTE_ARM_LOAD_EXC_64(addr, value,
> > > > + memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > >  }
> > > > -#undef __LOAD_EXC_64
> > > >
> > > > -#undef __SEVL
> > > > -#undef __WFE
> > > > +#define rte_wait_event(addr, mask, cond, expected, memorder)
> \
> > > > +do {                                                                      \
> > > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
> > > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> > > \
> > > > +                               memorder != __ATOMIC_RELAXED);            \
> > > > +       uint32_t size = sizeof(*(addr)) << 3;
> > >
> > > Add const
> > [Feifei] OK.
> > > > +       typeof(*(addr)) expected_value = (expected);                      \
> > > > +       typeof(*(addr)) value = 0;
> > >
> > > Why zero assignment
> > I will delete this initialization.
> > >                                         \
> > > > +       __RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
> > >
> > > Assert is not good in the library, Why not RTE_BUILD_BUG_ON here
> > [Feifei] For __RTE_ARM_LOAD_EXC, 'size' is known until code is running.
> > So it cannot check 'size' in the compile time and BUILD_BUG_ON doesn't
> work here.
> 
> uint32_t size = sizeof(*(addr)) << 3 value will get in comple time as _sizeof_ is
> preprocessor function.
> So I think, BUILD_BUG_ON is fine.
[Feifei] You are right. I try to with build_bug_on, it is OK to check 'size'.
> 
> 
> 
> > >
> > >
> > > > +       if ((value & (mask)) cond expected_value) {                       \
> > > > +               __RTE_ARM_SEVL()                                          \
> > > > +               do {                                                      \
> > > > +                       __RTE_ARM_WFE()                                   \
> > > > +                       __RTE_ARM_LOAD_EXC((addr), value,
> > > > + memorder,
> > > > + size) \
> > >
> > > if the address is the type of __int128_t. This logic will fail?
> > > Could you add 128bit support too and remove the assert from
> > > __RTE_ARM_LOAD_EXC
> > [Feifei] There is no 128bit case in library. And maybe there will be
> > 128bits case, we can add 128 path here. Now there is assert check in
> > __RTE_ARM_LOAD_EXC to check whether size is '16/32/64'.
> 
> API expects is only "addr" without any type so the application can use 128bit
> too.
> 
> Worst case for now we can fall back to __atomic_load_n() for  size 128, we
> dont want to break applications while using this API. Or add support for 128 in
> code.
> 
[Feifei] All right, I will try to add 128load in the next version.
> 
> > >
> > >
> > > > +               } while ((value & (mask)) cond expected_value);           \
> > > > +       }                                                                 \
> > > > +} while (0)
> > > >
> > > >  #endif
> > > >
> > > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > > b/lib/eal/include/generic/rte_pause.h
> > > > index 668ee4a184..d0c5b5a415 100644
> > > > --- a/lib/eal/include/generic/rte_pause.h
> > > > +++ b/lib/eal/include/generic/rte_pause.h
> > > > @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >         while (__atomic_load_n(addr, memorder) != expected)
> > > >                 rte_pause();
> > > >  }
> > > > +
> > > > +/*
> > > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > > + * ordering model meaning the loads around this API can be reordered.
> > > > + *
> > > > + * @param addr
> > > > + *  A pointer to the memory location.
> > > > + * @param mask
> > > > + *  A mask of value bits in interest.
> > > > + * @param cond
> > > > + *  A symbol representing the condition.
> > > > + * @param expected
> > > > + *  An expected value to be in the memory location.
> > > > + * @param memorder
> > > > + *  Two different memory orders that can be specified:
> > > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > > + *  C++11 memory orders with the same names, see the C++11
> > > > +standard or
> > > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > > + */
> > > > +#define rte_wait_event(addr, mask, cond, expected, memorder)
> > > \
> > > > +do {                                                                               \
> > > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));
> > > \
> > > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> > > \
> > > > +                               memorder != __ATOMIC_RELAXED);                     \
> > > > +       typeof(*(addr)) expected_value = (expected);                               \
> > > > +       while ((__atomic_load_n((addr), (memorder)) & (mask)) cond
> > > expected_value) \
> > > > +               rte_pause();                                                       \
> > > > +} while (0)
> > > >  #endif
> > > >
> > > >  #endif /* _RTE_PAUSE_H_ */
> > > > --
> > > > 2.25.1
> > > >
  
Ananyev, Konstantin Oct. 28, 2021, 1:14 p.m. UTC | #5
> 
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..d0c5b5a415 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,34 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>  	while (__atomic_load_n(addr, memorder) != expected)
>  		rte_pause();
>  }
> +
> +/*
> + * Wait until *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest.
> + * @param cond
> + *  A symbol representing the condition.
> + * @param expected
> + *  An expected value to be in the memory location.
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + */
> +#define rte_wait_event(addr, mask, cond, expected, memorder)                       \
> +do {                                                                               \
> +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                         \
> +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                           \
> +				memorder != __ATOMIC_RELAXED);                     \
> +	typeof(*(addr)) expected_value = (expected);                               \
> +	while ((__atomic_load_n((addr), (memorder)) & (mask)) cond expected_value) \
> +		rte_pause();                                                       \
> +} while (0)
>  #endif
> 
>  #endif /* _RTE_PAUSE_H_ */
> --

From generic/x86 perspective:
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 2.25.1
  

Patch

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..d547226a8d 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -26,26 +26,18 @@  static inline void rte_pause(void)
 #ifdef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
 
 /* Send an event to quit WFE. */
-#define __SEVL() { asm volatile("sevl" : : : "memory"); }
+#define __RTE_ARM_SEVL() { asm volatile("sevl" : : : "memory"); }
 
 /* Put processor into low power WFE(Wait For Event) state. */
-#define __WFE() { asm volatile("wfe" : : : "memory"); }
+#define __RTE_ARM_WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_16(src, dst, memorder) {               \
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_16(src, dst, memorder) {       \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
 			: [tmp] "=&r" (dst)               \
@@ -58,15 +50,70 @@  rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			: "memory");                      \
 	} }
 
-	__LOAD_EXC_16(addr, value, memorder)
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_32(src, dst, memorder) {      \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and an event is generated
+ * implicitly to exit WFE.
+ */
+#define __RTE_ARM_LOAD_EXC_64(src, dst, memorder) {      \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+#define __RTE_ARM_LOAD_EXC(src, dst, memorder, size) {    \
+	assert(size == 16 || size == 32 || size == 64);   \
+	if (size == 16)                                   \
+		__RTE_ARM_LOAD_EXC_16(src, dst, memorder) \
+	else if (size == 32)                              \
+		__RTE_ARM_LOAD_EXC_32(src, dst, memorder) \
+	else if (size == 64)                              \
+		__RTE_ARM_LOAD_EXC_64(src, dst, memorder) \
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+	__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		 __RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_16(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -77,34 +124,14 @@  rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
-	__LOAD_EXC_32(addr, value, memorder)
+	__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_32(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -115,38 +142,33 @@  rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
-	__LOAD_EXC_64(addr, value, memorder)
+	__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
-		__SEVL()
+		__RTE_ARM_SEVL()
 		do {
-			__WFE()
-			__LOAD_EXC_64(addr, value, memorder)
+			__RTE_ARM_WFE()
+			__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 		} while (value != expected);
 	}
 }
-#undef __LOAD_EXC_64
 
-#undef __SEVL
-#undef __WFE
+#define rte_wait_event(addr, mask, cond, expected, memorder)              \
+do {                                                                      \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
+				memorder != __ATOMIC_RELAXED);            \
+	uint32_t size = sizeof(*(addr)) << 3;                             \
+	typeof(*(addr)) expected_value = (expected);                      \
+	typeof(*(addr)) value = 0;                                        \
+	__RTE_ARM_LOAD_EXC((addr), value, memorder, size)                 \
+	if ((value & (mask)) cond expected_value) {                       \
+		__RTE_ARM_SEVL()                                          \
+		do {                                                      \
+			__RTE_ARM_WFE()                                   \
+			__RTE_ARM_LOAD_EXC((addr), value, memorder, size) \
+		} while ((value & (mask)) cond expected_value);           \
+	}                                                                 \
+} while (0)
 
 #endif
 
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..d0c5b5a415 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,34 @@  rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param cond
+ *  A symbol representing the condition.
+ * @param expected
+ *  An expected value to be in the memory location.
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event(addr, mask, cond, expected, memorder)                       \
+do {                                                                               \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                         \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                           \
+				memorder != __ATOMIC_RELAXED);                     \
+	typeof(*(addr)) expected_value = (expected);                               \
+	while ((__atomic_load_n((addr), (memorder)) & (mask)) cond expected_value) \
+		rte_pause();                                                       \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */