[v4,1/5] eal: add new definitions for wait scheme

Message ID 20211020084523.1309177-2-feifei.wang2@arm.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series add new definitions for wait scheme |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Feifei Wang Oct. 20, 2021, 8:45 a.m. UTC
  Introduce macros as generic interface for address monitoring.

Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/eal/arm/include/rte_pause_64.h  | 126 ++++++++++++++++------------
 lib/eal/include/generic/rte_pause.h |  32 +++++++
 2 files changed, 104 insertions(+), 54 deletions(-)
  

Comments

Ananyev, Konstantin Oct. 21, 2021, 4:24 p.m. UTC | #1
> Introduce macros as generic interface for address monitoring.
> 
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/eal/arm/include/rte_pause_64.h  | 126 ++++++++++++++++------------
>  lib/eal/include/generic/rte_pause.h |  32 +++++++
>  2 files changed, 104 insertions(+), 54 deletions(-)
> 
> diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
> index e87d10b8cc..23954c2de2 100644
> --- a/lib/eal/arm/include/rte_pause_64.h
> +++ b/lib/eal/arm/include/rte_pause_64.h
> @@ -31,20 +31,12 @@ static inline void rte_pause(void)
>  /* Put processor into low power WFE(Wait For Event) state. */
>  #define __WFE() { asm volatile("wfe" : : : "memory"); }
> 
> -static __rte_always_inline void
> -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> -		int memorder)
> -{
> -	uint16_t value;
> -
> -	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> -
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> +/*
> + * Atomic exclusive load from addr, it returns the 16-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
>  #define __LOAD_EXC_16(src, dst, memorder) {               \
>  	if (memorder == __ATOMIC_RELAXED) {               \
>  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
> @@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>  			: "memory");                      \
>  	} }
> 
> +/*
> + * Atomic exclusive load from addr, it returns the 32-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_32(src, dst, memorder) {              \
> +	if (memorder == __ATOMIC_RELAXED) {              \
> +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} else {                                         \
> +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} }
> +
> +/*
> + * Atomic exclusive load from addr, it returns the 64-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_64(src, dst, memorder) {              \
> +	if (memorder == __ATOMIC_RELAXED) {              \
> +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} else {                                         \
> +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> +			: [tmp] "=&r" (dst)              \
> +			: [addr] "r"(src)                \
> +			: "memory");                     \
> +	} }
> +
> +static __rte_always_inline void
> +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> +		int memorder)
> +{
> +	uint16_t value;
> +
> +	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> +
>  	__LOAD_EXC_16(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>  			__LOAD_EXC_16(addr, value, memorder)
>  		} while (value != expected);
>  	}
> -#undef __LOAD_EXC_16
>  }
> 
>  static __rte_always_inline void
> @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
> 
>  	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> 
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> -#define __LOAD_EXC_32(src, dst, memorder) {              \
> -	if (memorder == __ATOMIC_RELAXED) {              \
> -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} else {                                         \
> -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} }
> -
>  	__LOAD_EXC_32(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>  			__LOAD_EXC_32(addr, value, memorder)
>  		} while (value != expected);
>  	}
> -#undef __LOAD_EXC_32
>  }
> 
>  static __rte_always_inline void
> @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
> 
>  	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> 
> -	/*
> -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> -	 * *addr while making it 'monitored',when it is written by someone
> -	 * else, the 'monitored' state is cleared and a event is generated
> -	 * implicitly to exit WFE.
> -	 */
> -#define __LOAD_EXC_64(src, dst, memorder) {              \
> -	if (memorder == __ATOMIC_RELAXED) {              \
> -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} else {                                         \
> -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> -			: [tmp] "=&r" (dst)              \
> -			: [addr] "r"(src)                \
> -			: "memory");                     \
> -	} }
> -
>  	__LOAD_EXC_64(addr, value, memorder)
>  	if (value != expected) {
>  		__SEVL()
> @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>  		} while (value != expected);
>  	}
>  }
> +
> +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> +do {                                                               \
> +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> +	memorder != __ATOMIC_RELAXED);                             \
> +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> +	uint##size_t value;                                        \
> +	__LOAD_EXC_##size(addr, value, memorder)                   \
> +	if ((value & mask) cond expected) {		           \
> +		__SEVL()                                           \
> +		do {                                               \
> +			__WFE()                                    \
> +			__LOAD_EXC_##size(addr, value, memorder)   \
> +		} while ((value & mask) cond expected);            \
> +	}                                                          \
> +} while (0)
> +
> +#undef __LOAD_EXC_16
> +#undef __LOAD_EXC_32
>  #undef __LOAD_EXC_64
> 
>  #undef __SEVL
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..20a5d2a9fd 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>  	while (__atomic_load_n(addr, memorder) != expected)
>  		rte_pause();
>  }
> +
> +/*
> + * Wait until *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest.
> + * @param expected
> + *  A 16-bit expected value to be in the memory location.
> + * @param cond
> + *  A symbol representing the condition (==, !=).
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + * @param size
> + * The bit size of *addr:
> + * It is used for arm architecture to choose load instructions,
> + * and the optional value is 16, 32 and 64.
> + */
> +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> +do {                                                                   \
> +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> +				memorder != __ATOMIC_RELAXED);         \
> +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \

I don't' really understand why you do need 'size' passed as parameter.
Can't it be:
size_t size = sizeof(*(addr));
And then:
RTE_BUILD_BUG_ON(size != sizeof(uint16_t) && size != sizeof(uint32_t) && size != sizeof(uint64_t));  
?

> +	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> +		rte_pause();                                           \

Just to repeat my own comment from previous version review: 
put () around macro parameters in the macro body.
Will save from a lot of unexpected troubles.

> +} while (0)
>  #endif
> 
>  #endif /* _RTE_PAUSE_H_ */
> --
> 2.25.1
  
Jerin Jacob Oct. 22, 2021, 12:10 a.m. UTC | #2
On Wed, Oct 20, 2021 at 2:16 PM Feifei Wang <feifei.wang2@arm.com> wrote:
>
> Introduce macros as generic interface for address monitoring.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  lib/eal/arm/include/rte_pause_64.h  | 126 ++++++++++++++++------------
>  lib/eal/include/generic/rte_pause.h |  32 +++++++
>  2 files changed, 104 insertions(+), 54 deletions(-)
>
> diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
> index e87d10b8cc..23954c2de2 100644
> --- a/lib/eal/arm/include/rte_pause_64.h
> +++ b/lib/eal/arm/include/rte_pause_64.h
> @@ -31,20 +31,12 @@ static inline void rte_pause(void)
>  /* Put processor into low power WFE(Wait For Event) state. */
>  #define __WFE() { asm volatile("wfe" : : : "memory"); }
>
> -static __rte_always_inline void
> -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> -               int memorder)
> -{
> -       uint16_t value;
> -
> -       assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> -
> -       /*
> -        * Atomic exclusive load from addr, it returns the 16-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated

a event -> an event in all the occurrence.

> -        * implicitly to exit WFE.
> -        */
> +/*
> + * Atomic exclusive load from addr, it returns the 16-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
>  #define __LOAD_EXC_16(src, dst, memorder) {               \
>         if (memorder == __ATOMIC_RELAXED) {               \
>                 asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
> @@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>                         : "memory");                      \
>         } }
>
> +/*
> + * Atomic exclusive load from addr, it returns the 32-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_32(src, dst, memorder) {              \
> +       if (memorder == __ATOMIC_RELAXED) {              \
> +               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r"(src)                \
> +                       : "memory");                     \
> +       } else {                                         \
> +               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r"(src)                \
> +                       : "memory");                     \
> +       } }
> +
> +/*
> + * Atomic exclusive load from addr, it returns the 64-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_64(src, dst, memorder) {              \
> +       if (memorder == __ATOMIC_RELAXED) {              \
> +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r"(src)                \
> +                       : "memory");                     \
> +       } else {                                         \
> +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> +                       : [tmp] "=&r" (dst)              \
> +                       : [addr] "r"(src)                \
> +                       : "memory");                     \
> +       } }
> +
> +static __rte_always_inline void
> +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> +               int memorder)
> +{
> +       uint16_t value;
> +
> +       assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> +
>         __LOAD_EXC_16(addr, value, memorder)
>         if (value != expected) {
>                 __SEVL()
> @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
>                         __LOAD_EXC_16(addr, value, memorder)
>                 } while (value != expected);
>         }
> -#undef __LOAD_EXC_16
>  }
>
>  static __rte_always_inline void
> @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>
>         assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
>
> -       /*
> -        * Atomic exclusive load from addr, it returns the 32-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated
> -        * implicitly to exit WFE.
> -        */
> -#define __LOAD_EXC_32(src, dst, memorder) {              \
> -       if (memorder == __ATOMIC_RELAXED) {              \
> -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } else {                                         \
> -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } }
> -
>         __LOAD_EXC_32(addr, value, memorder)
>         if (value != expected) {
>                 __SEVL()
> @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>                         __LOAD_EXC_32(addr, value, memorder)
>                 } while (value != expected);
>         }
> -#undef __LOAD_EXC_32
>  }
>
>  static __rte_always_inline void
> @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>
>         assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
>
> -       /*
> -        * Atomic exclusive load from addr, it returns the 64-bit content of
> -        * *addr while making it 'monitored',when it is written by someone
> -        * else, the 'monitored' state is cleared and a event is generated
> -        * implicitly to exit WFE.
> -        */
> -#define __LOAD_EXC_64(src, dst, memorder) {              \
> -       if (memorder == __ATOMIC_RELAXED) {              \
> -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } else {                                         \
> -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> -                       : [tmp] "=&r" (dst)              \
> -                       : [addr] "r"(src)                \
> -                       : "memory");                     \
> -       } }
> -
>         __LOAD_EXC_64(addr, value, memorder)
>         if (value != expected) {
>                 __SEVL()
> @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>                 } while (value != expected);
>         }
>  }
> +
> +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \

I think it is better to swap "cond" and "expected" positions to get
better readability.

 rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, 0, !=,
__ATOMIC_RELAXED, 64);

Vs

 rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, !=, 0,
__ATOMIC_RELAXED, 64);

> +do {                                                               \

Any reason to not make an inline function instead of macro?

> +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \

Should n't we add __builtin_constant_p(size) of check?

> +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> +       memorder != __ATOMIC_RELAXED);                             \
> +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> +       uint##size_t value;


                               \
> +       __LOAD_EXC_##size(addr, value, memorder)                   \
> +       if ((value & mask) cond expected) {                        \
> +               __SEVL()                                           \
> +               do {                                               \
> +                       __WFE()                                    \
> +                       __LOAD_EXC_##size(addr, value, memorder)   \
> +               } while ((value & mask) cond expected);            \
> +       }                                                          \
> +} while (0)
> +
> +#undef __LOAD_EXC_16
> +#undef __LOAD_EXC_32
>  #undef __LOAD_EXC_64
>
>  #undef __SEVL
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..20a5d2a9fd 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>         while (__atomic_load_n(addr, memorder) != expected)
>                 rte_pause();
>  }
> +
> +/*
> + * Wait until *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + *  A pointer to the memory location.
> + * @param mask
> + *  A mask of value bits in interest.
> + * @param expected
> + *  A 16-bit expected value to be in the memory location.
> + * @param cond
> + *  A symbol representing the condition (==, !=).
> + * @param memorder
> + *  Two different memory orders that can be specified:
> + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + *  C++11 memory orders with the same names, see the C++11 standard or
> + *  the GCC wiki on atomic synchronization for detailed definition.
> + * @param size
> + * The bit size of *addr:
> + * It is used for arm architecture to choose load instructions,
> + * and the optional value is 16, 32 and 64.
> + */
> +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> +do {                                                                   \
> +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> +                               memorder != __ATOMIC_RELAXED);         \
> +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> +       while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> +               rte_pause();                                           \
> +} while (0)
>  #endif
>
>  #endif /* _RTE_PAUSE_H_ */
> --
> 2.25.1
>
  
Feifei Wang Oct. 25, 2021, 9:20 a.m. UTC | #3
> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Friday, October 22, 2021 12:25 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>
> 主题: RE: [PATCH v4 1/5] eal: add new definitions for wait scheme
> 
> > Introduce macros as generic interface for address monitoring.
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/eal/arm/include/rte_pause_64.h  | 126
> > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h |
> > 32 +++++++
> >  2 files changed, 104 insertions(+), 54 deletions(-)
> >
> > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > b/lib/eal/arm/include/rte_pause_64.h
> > index e87d10b8cc..23954c2de2 100644
> > --- a/lib/eal/arm/include/rte_pause_64.h
> > +++ b/lib/eal/arm/include/rte_pause_64.h
> > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> >  /* Put processor into low power WFE(Wait For Event) state. */
> > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> >
> > -static __rte_always_inline void
> > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > -		int memorder)
> > -{
> > -	uint16_t value;
> > -
> > -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);
> > -
> > -	/*
> > -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> > -	 * *addr while making it 'monitored',when it is written by someone
> > -	 * else, the 'monitored' state is cleared and a event is generated
> > -	 * implicitly to exit WFE.
> > -	 */
> > +/*
> > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> >  	if (memorder == __ATOMIC_RELAXED) {               \
> >  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,52
> @@
> > rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> >  			: "memory");                      \
> >  	} }
> >
> > +/*
> > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > +	if (memorder == __ATOMIC_RELAXED) {              \
> > +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} else {                                         \
> > +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} }
> > +
> > +/*
> > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > +	if (memorder == __ATOMIC_RELAXED) {              \
> > +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} else {                                         \
> > +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > +			: [tmp] "=&r" (dst)              \
> > +			: [addr] "r"(src)                \
> > +			: "memory");                     \
> > +	} }
> > +
> > +static __rte_always_inline void
> > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > +		int memorder)
> > +{
> > +	uint16_t value;
> > +
> > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > +__ATOMIC_RELAXED);
> > +
> >  	__LOAD_EXC_16(addr, value, memorder)
> >  	if (value != expected) {
> >  		__SEVL()
> > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> uint16_t expected,
> >  			__LOAD_EXC_16(addr, value, memorder)
> >  		} while (value != expected);
> >  	}
> > -#undef __LOAD_EXC_16
> >  }
> >
> >  static __rte_always_inline void
> > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> >
> >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -	/*
> > -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> > -	 * *addr while making it 'monitored',when it is written by someone
> > -	 * else, the 'monitored' state is cleared and a event is generated
> > -	 * implicitly to exit WFE.
> > -	 */
> > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > -	if (memorder == __ATOMIC_RELAXED) {              \
> > -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} else {                                         \
> > -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} }
> > -
> >  	__LOAD_EXC_32(addr, value, memorder)
> >  	if (value != expected) {
> >  		__SEVL()
> > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> uint32_t expected,
> >  			__LOAD_EXC_32(addr, value, memorder)
> >  		} while (value != expected);
> >  	}
> > -#undef __LOAD_EXC_32
> >  }
> >
> >  static __rte_always_inline void
> > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> >
> >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -	/*
> > -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> > -	 * *addr while making it 'monitored',when it is written by someone
> > -	 * else, the 'monitored' state is cleared and a event is generated
> > -	 * implicitly to exit WFE.
> > -	 */
> > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > -	if (memorder == __ATOMIC_RELAXED) {              \
> > -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} else {                                         \
> > -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > -			: [tmp] "=&r" (dst)              \
> > -			: [addr] "r"(src)                \
> > -			: "memory");                     \
> > -	} }
> > -
> >  	__LOAD_EXC_64(addr, value, memorder)
> >  	if (value != expected) {
> >  		__SEVL()
> > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >  		} while (value != expected);
> >  	}
> >  }
> > +
> > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> > +do {                                                               \
> > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > +	memorder != __ATOMIC_RELAXED);                             \
> > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > +	uint##size_t value;                                        \
> > +	__LOAD_EXC_##size(addr, value, memorder)                   \
> > +	if ((value & mask) cond expected) {		           \
> > +		__SEVL()                                           \
> > +		do {                                               \
> > +			__WFE()                                    \
> > +			__LOAD_EXC_##size(addr, value, memorder)   \
> > +		} while ((value & mask) cond expected);            \
> > +	}                                                          \
> > +} while (0)
> > +
> > +#undef __LOAD_EXC_16
> > +#undef __LOAD_EXC_32
> >  #undef __LOAD_EXC_64
> >
> >  #undef __SEVL
> > diff --git a/lib/eal/include/generic/rte_pause.h
> > b/lib/eal/include/generic/rte_pause.h
> > index 668ee4a184..20a5d2a9fd 100644
> > --- a/lib/eal/include/generic/rte_pause.h
> > +++ b/lib/eal/include/generic/rte_pause.h
> > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >  	while (__atomic_load_n(addr, memorder) != expected)
> >  		rte_pause();
> >  }
> > +
> > +/*
> > + * Wait until *addr breaks the condition, with a relaxed memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + *  A pointer to the memory location.
> > + * @param mask
> > + *  A mask of value bits in interest.
> > + * @param expected
> > + *  A 16-bit expected value to be in the memory location.
> > + * @param cond
> > + *  A symbol representing the condition (==, !=).
> > + * @param memorder
> > + *  Two different memory orders that can be specified:
> > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + *  C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + *  the GCC wiki on atomic synchronization for detailed definition.
> > + * @param size
> > + * The bit size of *addr:
> > + * It is used for arm architecture to choose load instructions,
> > + * and the optional value is 16, 32 and 64.
> > + */
> > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> > +do {                                                                   \
> > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > +				memorder != __ATOMIC_RELAXED);         \
> > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> 
> I don't' really understand why you do need 'size' passed as parameter.
> Can't it be:
> size_t size = sizeof(*(addr));
> And then:
> RTE_BUILD_BUG_ON(size != sizeof(uint16_t) && size != sizeof(uint32_t) &&
> size != sizeof(uint64_t)); ?
> 
> > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > +		rte_pause();                                           \
> 
> Just to repeat my own comment from previous version review:
> put () around macro parameters in the macro body.
> Will save from a lot of unexpected troubles.

Sorry I didn't catch the point.
In this version, I firstly want to use '__LOAD_EXC_##size' to choose , so I use size
as a parameter.  And in the next version, I will update this as:

#define __LOAD_EXC(src, dst, memorder, size) {    \
	if (size == 16)                               \
		__LOAD_EXC_16(src, dst, memorder)     \
	else if (size == 32)                          \
		__LOAD_EXC_32(src, dst, memorder)     \
	else if (size == 64)                          \
		__LOAD_EXC_64(src, dst, memorder)     \
}

#define rte_wait_event(addr, mask, cond, expected, memorder)    \
do {                                                            \
	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));      \
	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&        \
				memorder != __ATOMIC_RELAXED);  \
	uint32_t size = sizeof(*addr) << 3;                     \
	typeof(*addr) value = 0;                                \
	__LOAD_EXC(addr, value, memorder, size)                 \
	if ((value & mask) cond expected) {                     \
		__SEVL()                                        \
		do {                                            \
			__WFE()                                 \
			__LOAD_EXC(addr, value, memorder, size) \
		} while ((value & mask) cond expected);         \
	}                                                       \
} while (0)
  
Feifei Wang Oct. 25, 2021, 9:30 a.m. UTC | #4
> -----邮件原件-----
> 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> 发送时间: Friday, October 22, 2021 8:10 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; dpdk-dev <dev@dpdk.org>; nd
> <nd@arm.com>
> 主题: Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
> 
> On Wed, Oct 20, 2021 at 2:16 PM Feifei Wang <feifei.wang2@arm.com>
> wrote:
> >
> > Introduce macros as generic interface for address monitoring.
> >
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  lib/eal/arm/include/rte_pause_64.h  | 126
> > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h |
> > 32 +++++++
> >  2 files changed, 104 insertions(+), 54 deletions(-)
> >
> > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > b/lib/eal/arm/include/rte_pause_64.h
> > index e87d10b8cc..23954c2de2 100644
> > --- a/lib/eal/arm/include/rte_pause_64.h
> > +++ b/lib/eal/arm/include/rte_pause_64.h
> > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> >  /* Put processor into low power WFE(Wait For Event) state. */
> > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> >
> > -static __rte_always_inline void
> > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > -               int memorder)
> > -{
> > -       uint16_t value;
> > -
> > -       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED);
> > -
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 16-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> 
> a event -> an event in all the occurrence.
> 
> > -        * implicitly to exit WFE.
> > -        */
> > +/*
> > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> >         if (memorder == __ATOMIC_RELAXED) {               \
> >                 asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6
> > +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t
> expected,
> >                         : "memory");                      \
> >         } }
> >
> > +/*
> > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > +       if (memorder == __ATOMIC_RELAXED) {              \
> > +               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r"(src)                \
> > +                       : "memory");                     \
> > +       } else {                                         \
> > +               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r"(src)                \
> > +                       : "memory");                     \
> > +       } }
> > +
> > +/*
> > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > + * *addr while making it 'monitored', when it is written by someone
> > + * else, the 'monitored' state is cleared and a event is generated
> > + * implicitly to exit WFE.
> > + */
> > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > +       if (memorder == __ATOMIC_RELAXED) {              \
> > +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r"(src)                \
> > +                       : "memory");                     \
> > +       } else {                                         \
> > +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > +                       : [tmp] "=&r" (dst)              \
> > +                       : [addr] "r"(src)                \
> > +                       : "memory");                     \
> > +       } }
> > +
> > +static __rte_always_inline void
> > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > +               int memorder)
> > +{
> > +       uint16_t value;
> > +
> > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > + __ATOMIC_RELAXED);
> > +
> >         __LOAD_EXC_16(addr, value, memorder)
> >         if (value != expected) {
> >                 __SEVL()
> > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> uint16_t expected,
> >                         __LOAD_EXC_16(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> > -#undef __LOAD_EXC_16
> >  }
> >
> >  static __rte_always_inline void
> > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> >
> >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> > -        * implicitly to exit WFE.
> > -        */
> > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > -       if (memorder == __ATOMIC_RELAXED) {              \
> > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } else {                                         \
> > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } }
> > -
> >         __LOAD_EXC_32(addr, value, memorder)
> >         if (value != expected) {
> >                 __SEVL()
> > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> uint32_t expected,
> >                         __LOAD_EXC_32(addr, value, memorder)
> >                 } while (value != expected);
> >         }
> > -#undef __LOAD_EXC_32
> >  }
> >
> >  static __rte_always_inline void
> > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> >
> >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> >
> > -       /*
> > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > -        * *addr while making it 'monitored',when it is written by someone
> > -        * else, the 'monitored' state is cleared and a event is generated
> > -        * implicitly to exit WFE.
> > -        */
> > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > -       if (memorder == __ATOMIC_RELAXED) {              \
> > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } else {                                         \
> > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > -                       : [tmp] "=&r" (dst)              \
> > -                       : [addr] "r"(src)                \
> > -                       : "memory");                     \
> > -       } }
> > -
> >         __LOAD_EXC_64(addr, value, memorder)
> >         if (value != expected) {
> >                 __SEVL()
> > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >                 } while (value != expected);
> >         }
> >  }
> > +
> > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> 
> I think it is better to swap "cond" and "expected" positions to get better
> readability.
Thanks for the comments, it is better than before and I will update in the next version.
> 
>  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, 0, !=,
> __ATOMIC_RELAXED, 64);
> 
> Vs
> 
>  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, !=, 0,
> __ATOMIC_RELAXED, 64);
> 
> > +do {                                                               \
> 
> Any reason to not make an inline function instead of macro?
Because there were many new APIs for different cases. And then we refer to
Linux 'wait_event' code for an example. Please see the first version and its discussion:
http://patches.dpdk.org/project/dpdk/cover/20210902053253.3017858-1-feifei.wang2@arm.com/
> 
> > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> 
> Should n't we add __builtin_constant_p(size) of check?

Please see the discussion with Konstantin.
'size' will not be as a parameter and then it is unnecessary to check it with build_bug.
> 
> > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > +       memorder != __ATOMIC_RELAXED);                             \
> > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > +       uint##size_t value;
> 
> 
>                                \
> > +       __LOAD_EXC_##size(addr, value, memorder)                   \
> > +       if ((value & mask) cond expected) {                        \
> > +               __SEVL()                                           \
> > +               do {                                               \
> > +                       __WFE()                                    \
> > +                       __LOAD_EXC_##size(addr, value, memorder)   \
> > +               } while ((value & mask) cond expected);            \
> > +       }                                                          \
> > +} while (0)
> > +
> > +#undef __LOAD_EXC_16
> > +#undef __LOAD_EXC_32
> >  #undef __LOAD_EXC_64
> >
> >  #undef __SEVL
> > diff --git a/lib/eal/include/generic/rte_pause.h
> > b/lib/eal/include/generic/rte_pause.h
> > index 668ee4a184..20a5d2a9fd 100644
> > --- a/lib/eal/include/generic/rte_pause.h
> > +++ b/lib/eal/include/generic/rte_pause.h
> > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> >         while (__atomic_load_n(addr, memorder) != expected)
> >                 rte_pause();
> >  }
> > +
> > +/*
> > + * Wait until *addr breaks the condition, with a relaxed memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + *  A pointer to the memory location.
> > + * @param mask
> > + *  A mask of value bits in interest.
> > + * @param expected
> > + *  A 16-bit expected value to be in the memory location.
> > + * @param cond
> > + *  A symbol representing the condition (==, !=).
> > + * @param memorder
> > + *  Two different memory orders that can be specified:
> > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + *  C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + *  the GCC wiki on atomic synchronization for detailed definition.
> > + * @param size
> > + * The bit size of *addr:
> > + * It is used for arm architecture to choose load instructions,
> > + * and the optional value is 16, 32 and 64.
> > + */
> > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> > +do {                                                                   \
> > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > +                               memorder != __ATOMIC_RELAXED);         \
> > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> > +       while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > +               rte_pause();                                           \
> > +} while (0)
> >  #endif
> >
> >  #endif /* _RTE_PAUSE_H_ */
> > --
> > 2.25.1
> >
  
Jerin Jacob Oct. 25, 2021, 9:43 a.m. UTC | #5
On Mon, Oct 25, 2021 at 3:01 PM Feifei Wang <Feifei.Wang2@arm.com> wrote:
>
> > -----邮件原件-----
> > 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> > 发送时间: Friday, October 22, 2021 8:10 AM
> > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; Ananyev, Konstantin
> > <konstantin.ananyev@intel.com>; dpdk-dev <dev@dpdk.org>; nd
> > <nd@arm.com>
> > 主题: Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
> >
> > On Wed, Oct 20, 2021 at 2:16 PM Feifei Wang <feifei.wang2@arm.com>
> > wrote:
> > >
> > > Introduce macros as generic interface for address monitoring.
> > >
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> > >  lib/eal/arm/include/rte_pause_64.h  | 126
> > > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h |
> > > 32 +++++++
> > >  2 files changed, 104 insertions(+), 54 deletions(-)
> > >
> > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > b/lib/eal/arm/include/rte_pause_64.h
> > > index e87d10b8cc..23954c2de2 100644
> > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > >  /* Put processor into low power WFE(Wait For Event) state. */
> > > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> > >
> > > -static __rte_always_inline void
> > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > -               int memorder)
> > > -{
> > > -       uint16_t value;
> > > -
> > > -       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> > > -
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 16-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> >
> > a event -> an event in all the occurrence.
> >
> > > -        * implicitly to exit WFE.
> > > -        */
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > >         if (memorder == __ATOMIC_RELAXED) {               \
> > >                 asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6
> > > +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t
> > expected,
> > >                         : "memory");                      \
> > >         } }
> > >
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > +       if (memorder == __ATOMIC_RELAXED) {              \
> > > +               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > +                       : [tmp] "=&r" (dst)              \
> > > +                       : [addr] "r"(src)                \
> > > +                       : "memory");                     \
> > > +       } else {                                         \
> > > +               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > +                       : [tmp] "=&r" (dst)              \
> > > +                       : [addr] "r"(src)                \
> > > +                       : "memory");                     \
> > > +       } }
> > > +
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > +       if (memorder == __ATOMIC_RELAXED) {              \
> > > +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > +                       : [tmp] "=&r" (dst)              \
> > > +                       : [addr] "r"(src)                \
> > > +                       : "memory");                     \
> > > +       } else {                                         \
> > > +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > +                       : [tmp] "=&r" (dst)              \
> > > +                       : [addr] "r"(src)                \
> > > +                       : "memory");                     \
> > > +       } }
> > > +
> > > +static __rte_always_inline void
> > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > +               int memorder)
> > > +{
> > > +       uint16_t value;
> > > +
> > > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > + __ATOMIC_RELAXED);
> > > +
> > >         __LOAD_EXC_16(addr, value, memorder)
> > >         if (value != expected) {
> > >                 __SEVL()
> > > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> > uint16_t expected,
> > >                         __LOAD_EXC_16(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > > -#undef __LOAD_EXC_16
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > > uint32_t expected,
> > >
> > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> > > -        * implicitly to exit WFE.
> > > -        */
> > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } else {                                         \
> > > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } }
> > > -
> > >         __LOAD_EXC_32(addr, value, memorder)
> > >         if (value != expected) {
> > >                 __SEVL()
> > > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> > >                         __LOAD_EXC_32(addr, value, memorder)
> > >                 } while (value != expected);
> > >         }
> > > -#undef __LOAD_EXC_32
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > > uint64_t expected,
> > >
> > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -       /*
> > > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > > -        * *addr while making it 'monitored',when it is written by someone
> > > -        * else, the 'monitored' state is cleared and a event is generated
> > > -        * implicitly to exit WFE.
> > > -        */
> > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } else {                                         \
> > > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > -                       : [tmp] "=&r" (dst)              \
> > > -                       : [addr] "r"(src)                \
> > > -                       : "memory");                     \
> > > -       } }
> > > -
> > >         __LOAD_EXC_64(addr, value, memorder)
> > >         if (value != expected) {
> > >                 __SEVL()
> > > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >                 } while (value != expected);
> > >         }
> > >  }
> > > +
> > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> >
> > I think it is better to swap "cond" and "expected" positions to get better
> > readability.
> Thanks for the comments, it is better than before and I will update in the next version.
> >
> >  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, 0, !=,
> > __ATOMIC_RELAXED, 64);
> >
> > Vs
> >
> >  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, !=, 0,
> > __ATOMIC_RELAXED, 64);
> >
> > > +do {                                                               \
> >
> > Any reason to not make an inline function instead of macro?
> Because there were many new APIs for different cases. And then we refer to
> Linux 'wait_event' code for an example. Please see the first version and its discussion:
> http://patches.dpdk.org/project/dpdk/cover/20210902053253.3017858-1-feifei.wang2@arm.com/


OK.


> >
> > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> >
> > Should n't we add __builtin_constant_p(size) of check?
>
> Please see the discussion with Konstantin.
> 'size' will not be as a parameter and then it is unnecessary to check it with build_bug.

Make sense to remove the 'size'. My comment was more in the direction
of, if the 'size' is required to pass.

> >
> > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > > +       memorder != __ATOMIC_RELAXED);                             \
> > > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > > +       uint##size_t value;
> >
> >
> >                                \
> > > +       __LOAD_EXC_##size(addr, value, memorder)                   \
> > > +       if ((value & mask) cond expected) {                        \
> > > +               __SEVL()                                           \
> > > +               do {                                               \
> > > +                       __WFE()                                    \
> > > +                       __LOAD_EXC_##size(addr, value, memorder)   \
> > > +               } while ((value & mask) cond expected);            \
> > > +       }                                                          \
> > > +} while (0)
> > > +
> > > +#undef __LOAD_EXC_16
> > > +#undef __LOAD_EXC_32
> > >  #undef __LOAD_EXC_64
> > >
> > >  #undef __SEVL
> > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > b/lib/eal/include/generic/rte_pause.h
> > > index 668ee4a184..20a5d2a9fd 100644
> > > --- a/lib/eal/include/generic/rte_pause.h
> > > +++ b/lib/eal/include/generic/rte_pause.h
> > > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >         while (__atomic_load_n(addr, memorder) != expected)
> > >                 rte_pause();
> > >  }
> > > +
> > > +/*
> > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > + * ordering model meaning the loads around this API can be reordered.
> > > + *
> > > + * @param addr
> > > + *  A pointer to the memory location.
> > > + * @param mask
> > > + *  A mask of value bits in interest.
> > > + * @param expected
> > > + *  A 16-bit expected value to be in the memory location.
> > > + * @param cond
> > > + *  A symbol representing the condition (==, !=).
> > > + * @param memorder
> > > + *  Two different memory orders that can be specified:
> > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > + *  C++11 memory orders with the same names, see the C++11 standard
> > > +or
> > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > + * @param size
> > > + * The bit size of *addr:
> > > + * It is used for arm architecture to choose load instructions,
> > > + * and the optional value is 16, 32 and 64.
> > > + */
> > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> > > +do {                                                                   \
> > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > > +                               memorder != __ATOMIC_RELAXED);         \
> > > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> > > +       while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > > +               rte_pause();                                           \
> > > +} while (0)
> > >  #endif
> > >
> > >  #endif /* _RTE_PAUSE_H_ */
> > > --
> > > 2.25.1
> > >
  
Ananyev, Konstantin Oct. 25, 2021, 2:28 p.m. UTC | #6
> > > Introduce macros as generic interface for address monitoring.
> > >
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> > >  lib/eal/arm/include/rte_pause_64.h  | 126
> > > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h |
> > > 32 +++++++
> > >  2 files changed, 104 insertions(+), 54 deletions(-)
> > >
> > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > b/lib/eal/arm/include/rte_pause_64.h
> > > index e87d10b8cc..23954c2de2 100644
> > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > >  /* Put processor into low power WFE(Wait For Event) state. */
> > > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> > >
> > > -static __rte_always_inline void
> > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > -		int memorder)
> > > -{
> > > -	uint16_t value;
> > > -
> > > -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > __ATOMIC_RELAXED);
> > > -
> > > -	/*
> > > -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> > > -	 * *addr while making it 'monitored',when it is written by someone
> > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > -	 * implicitly to exit WFE.
> > > -	 */
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 16-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > >  	if (memorder == __ATOMIC_RELAXED) {               \
> > >  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,52
> > @@
> > > rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > >  			: "memory");                      \
> > >  	} }
> > >
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 32-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} else {                                         \
> > > +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} }
> > > +
> > > +/*
> > > + * Atomic exclusive load from addr, it returns the 64-bit content of
> > > + * *addr while making it 'monitored', when it is written by someone
> > > + * else, the 'monitored' state is cleared and a event is generated
> > > + * implicitly to exit WFE.
> > > + */
> > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} else {                                         \
> > > +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > +			: [tmp] "=&r" (dst)              \
> > > +			: [addr] "r"(src)                \
> > > +			: "memory");                     \
> > > +	} }
> > > +
> > > +static __rte_always_inline void
> > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > +		int memorder)
> > > +{
> > > +	uint16_t value;
> > > +
> > > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > +__ATOMIC_RELAXED);
> > > +
> > >  	__LOAD_EXC_16(addr, value, memorder)
> > >  	if (value != expected) {
> > >  		__SEVL()
> > > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> > uint16_t expected,
> > >  			__LOAD_EXC_16(addr, value, memorder)
> > >  		} while (value != expected);
> > >  	}
> > > -#undef __LOAD_EXC_16
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > > uint32_t expected,
> > >
> > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -	/*
> > > -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> > > -	 * *addr while making it 'monitored',when it is written by someone
> > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > -	 * implicitly to exit WFE.
> > > -	 */
> > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} else {                                         \
> > > -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} }
> > > -
> > >  	__LOAD_EXC_32(addr, value, memorder)
> > >  	if (value != expected) {
> > >  		__SEVL()
> > > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr,
> > uint32_t expected,
> > >  			__LOAD_EXC_32(addr, value, memorder)
> > >  		} while (value != expected);
> > >  	}
> > > -#undef __LOAD_EXC_32
> > >  }
> > >
> > >  static __rte_always_inline void
> > > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > > uint64_t expected,
> > >
> > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > >
> > > -	/*
> > > -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> > > -	 * *addr while making it 'monitored',when it is written by someone
> > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > -	 * implicitly to exit WFE.
> > > -	 */
> > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} else {                                         \
> > > -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > -			: [tmp] "=&r" (dst)              \
> > > -			: [addr] "r"(src)                \
> > > -			: "memory");                     \
> > > -	} }
> > > -
> > >  	__LOAD_EXC_64(addr, value, memorder)
> > >  	if (value != expected) {
> > >  		__SEVL()
> > > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >  		} while (value != expected);
> > >  	}
> > >  }
> > > +
> > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> > > +do {                                                               \
> > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > > +	memorder != __ATOMIC_RELAXED);                             \
> > > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > > +	uint##size_t value;                                        \
> > > +	__LOAD_EXC_##size(addr, value, memorder)                   \
> > > +	if ((value & mask) cond expected) {		           \
> > > +		__SEVL()                                           \
> > > +		do {                                               \
> > > +			__WFE()                                    \
> > > +			__LOAD_EXC_##size(addr, value, memorder)   \
> > > +		} while ((value & mask) cond expected);            \
> > > +	}                                                          \
> > > +} while (0)
> > > +
> > > +#undef __LOAD_EXC_16
> > > +#undef __LOAD_EXC_32
> > >  #undef __LOAD_EXC_64
> > >
> > >  #undef __SEVL
> > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > b/lib/eal/include/generic/rte_pause.h
> > > index 668ee4a184..20a5d2a9fd 100644
> > > --- a/lib/eal/include/generic/rte_pause.h
> > > +++ b/lib/eal/include/generic/rte_pause.h
> > > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > >  	while (__atomic_load_n(addr, memorder) != expected)
> > >  		rte_pause();
> > >  }
> > > +
> > > +/*
> > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > + * ordering model meaning the loads around this API can be reordered.
> > > + *
> > > + * @param addr
> > > + *  A pointer to the memory location.
> > > + * @param mask
> > > + *  A mask of value bits in interest.
> > > + * @param expected
> > > + *  A 16-bit expected value to be in the memory location.
> > > + * @param cond
> > > + *  A symbol representing the condition (==, !=).
> > > + * @param memorder
> > > + *  Two different memory orders that can be specified:
> > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > + *  C++11 memory orders with the same names, see the C++11 standard
> > > +or
> > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > + * @param size
> > > + * The bit size of *addr:
> > > + * It is used for arm architecture to choose load instructions,
> > > + * and the optional value is 16, 32 and 64.
> > > + */
> > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
> > > +do {                                                                   \
> > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > > +				memorder != __ATOMIC_RELAXED);         \
> > > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> >
> > I don't' really understand why you do need 'size' passed as parameter.
> > Can't it be:
> > size_t size = sizeof(*(addr));
> > And then:
> > RTE_BUILD_BUG_ON(size != sizeof(uint16_t) && size != sizeof(uint32_t) &&
> > size != sizeof(uint64_t)); ?
> >
> > > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > > +		rte_pause();                                           \
> >
> > Just to repeat my own comment from previous version review:
> > put () around macro parameters in the macro body.
> > Will save from a lot of unexpected troubles.
> 
> Sorry I didn't catch the point.
> In this version, I firstly want to use '__LOAD_EXC_##size' to choose , so I use size
> as a parameter.  And in the next version, I will update this as:
> 
> #define __LOAD_EXC(src, dst, memorder, size) {    \
> 	if (size == 16)                               \
> 		__LOAD_EXC_16(src, dst, memorder)     \
> 	else if (size == 32)                          \
> 		__LOAD_EXC_32(src, dst, memorder)     \
> 	else if (size == 64)                          \
> 		__LOAD_EXC_64(src, dst, memorder)     \
> }
> 
> #define rte_wait_event(addr, mask, cond, expected, memorder)    \
> do {                                                            \
> 	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));      \
> 	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&        \
> 				memorder != __ATOMIC_RELAXED);  \
> 	uint32_t size = sizeof(*addr) << 3;                     \
> 	typeof(*addr) value = 0;                                \
> 	__LOAD_EXC(addr, value, memorder, size)                 \
> 	if ((value & mask) cond expected) {                     \
> 		__SEVL()                                        \
> 		do {                                            \
> 			__WFE()                                 \
> 			__LOAD_EXC(addr, value, memorder, size) \
> 		} while ((value & mask) cond expected);         \
> 	}                                                       \
> } while (0)

Sorry, I probably wasn't clear enough.
I meant use '(' ')' around  macro arguments (to avoid un-predicted side-effects with operands associativity):
uint32_t size = sizeof(*(addr)) ...;
...
if ((value & (mask)) cond (expected))
...
  
Feifei Wang Oct. 26, 2021, 1:08 a.m. UTC | #7
> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Monday, October 25, 2021 10:29 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; nd <nd@arm.com>
> 主题: RE: [PATCH v4 1/5] eal: add new definitions for wait scheme
> 
> 
> > > > Introduce macros as generic interface for address monitoring.
> > > >
> > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > > >  lib/eal/arm/include/rte_pause_64.h  | 126
> > > > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h
> > > > ++++++++++++++++|
> > > > 32 +++++++
> > > >  2 files changed, 104 insertions(+), 54 deletions(-)
> > > >
> > > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > > b/lib/eal/arm/include/rte_pause_64.h
> > > > index e87d10b8cc..23954c2de2 100644
> > > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > > >  /* Put processor into low power WFE(Wait For Event) state. */
> > > > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> > > >
> > > > -static __rte_always_inline void
> > > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > -		int memorder)
> > > > -{
> > > > -	uint16_t value;
> > > > -
> > > > -	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > > > -
> > > > -	/*
> > > > -	 * Atomic exclusive load from addr, it returns the 16-bit content of
> > > > -	 * *addr while making it 'monitored',when it is written by someone
> > > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > > -	 * implicitly to exit WFE.
> > > > -	 */
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 16-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > > >  	if (memorder == __ATOMIC_RELAXED) {               \
> > > >  		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@ -58,6 +50,52
> > > @@
> > > > rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > >  			: "memory");                      \
> > > >  	} }
> > > >
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 32-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > > +		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} else {                                         \
> > > > +		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} }
> > > > +
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 64-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > +	if (memorder == __ATOMIC_RELAXED) {              \
> > > > +		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} else {                                         \
> > > > +		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > +			: [tmp] "=&r" (dst)              \
> > > > +			: [addr] "r"(src)                \
> > > > +			: "memory");                     \
> > > > +	} }
> > > > +
> > > > +static __rte_always_inline void
> > > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > +		int memorder)
> > > > +{
> > > > +	uint16_t value;
> > > > +
> > > > +	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > +__ATOMIC_RELAXED);
> > > > +
> > > >  	__LOAD_EXC_16(addr, value, memorder)
> > > >  	if (value != expected) {
> > > >  		__SEVL()
> > > > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t
> > > > *addr,
> > > uint16_t expected,
> > > >  			__LOAD_EXC_16(addr, value, memorder)
> > > >  		} while (value != expected);
> > > >  	}
> > > > -#undef __LOAD_EXC_16
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr, uint32_t expected,
> > > >
> > > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -	/*
> > > > -	 * Atomic exclusive load from addr, it returns the 32-bit content of
> > > > -	 * *addr while making it 'monitored',when it is written by someone
> > > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > > -	 * implicitly to exit WFE.
> > > > -	 */
> > > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > > -		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} else {                                         \
> > > > -		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} }
> > > > -
> > > >  	__LOAD_EXC_32(addr, value, memorder)
> > > >  	if (value != expected) {
> > > >  		__SEVL()
> > > > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr,
> > > uint32_t expected,
> > > >  			__LOAD_EXC_32(addr, value, memorder)
> > > >  		} while (value != expected);
> > > >  	}
> > > > -#undef __LOAD_EXC_32
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr, uint64_t expected,
> > > >
> > > >  	assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -	/*
> > > > -	 * Atomic exclusive load from addr, it returns the 64-bit content of
> > > > -	 * *addr while making it 'monitored',when it is written by someone
> > > > -	 * else, the 'monitored' state is cleared and a event is generated
> > > > -	 * implicitly to exit WFE.
> > > > -	 */
> > > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > -	if (memorder == __ATOMIC_RELAXED) {              \
> > > > -		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} else {                                         \
> > > > -		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > -			: [tmp] "=&r" (dst)              \
> > > > -			: [addr] "r"(src)                \
> > > > -			: "memory");                     \
> > > > -	} }
> > > > -
> > > >  	__LOAD_EXC_64(addr, value, memorder)
> > > >  	if (value != expected) {
> > > >  		__SEVL()
> > > > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >  		} while (value != expected);
> > > >  	}
> > > >  }
> > > > +
> > > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
> > > > +do {                                                               \
> > > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> > > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > > > +	memorder != __ATOMIC_RELAXED);                             \
> > > > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > > > +	uint##size_t value;                                        \
> > > > +	__LOAD_EXC_##size(addr, value, memorder)                   \
> > > > +	if ((value & mask) cond expected) {		           \
> > > > +		__SEVL()                                           \
> > > > +		do {                                               \
> > > > +			__WFE()                                    \
> > > > +			__LOAD_EXC_##size(addr, value, memorder)   \
> > > > +		} while ((value & mask) cond expected);            \
> > > > +	}                                                          \
> > > > +} while (0)
> > > > +
> > > > +#undef __LOAD_EXC_16
> > > > +#undef __LOAD_EXC_32
> > > >  #undef __LOAD_EXC_64
> > > >
> > > >  #undef __SEVL
> > > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > > b/lib/eal/include/generic/rte_pause.h
> > > > index 668ee4a184..20a5d2a9fd 100644
> > > > --- a/lib/eal/include/generic/rte_pause.h
> > > > +++ b/lib/eal/include/generic/rte_pause.h
> > > > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >  	while (__atomic_load_n(addr, memorder) != expected)
> > > >  		rte_pause();
> > > >  }
> > > > +
> > > > +/*
> > > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > > + * ordering model meaning the loads around this API can be reordered.
> > > > + *
> > > > + * @param addr
> > > > + *  A pointer to the memory location.
> > > > + * @param mask
> > > > + *  A mask of value bits in interest.
> > > > + * @param expected
> > > > + *  A 16-bit expected value to be in the memory location.
> > > > + * @param cond
> > > > + *  A symbol representing the condition (==, !=).
> > > > + * @param memorder
> > > > + *  Two different memory orders that can be specified:
> > > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > > + *  C++11 memory orders with the same names, see the C++11
> > > > +standard or
> > > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > > + * @param size
> > > > + * The bit size of *addr:
> > > > + * It is used for arm architecture to choose load instructions,
> > > > + * and the optional value is 16, 32 and 64.
> > > > + */
> > > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)
> \
> > > > +do {                                                                   \
> > > > +	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > > > +	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
> > > > +				memorder != __ATOMIC_RELAXED);         \
> > > > +	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> > >
> > > I don't' really understand why you do need 'size' passed as parameter.
> > > Can't it be:
> > > size_t size = sizeof(*(addr));
> > > And then:
> > > RTE_BUILD_BUG_ON(size != sizeof(uint16_t) && size !=
> > > sizeof(uint32_t) && size != sizeof(uint64_t)); ?
> > >
> > > > +	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> > > > +		rte_pause();                                           \
> > >
> > > Just to repeat my own comment from previous version review:
> > > put () around macro parameters in the macro body.
> > > Will save from a lot of unexpected troubles.
> >
> > Sorry I didn't catch the point.
> > In this version, I firstly want to use '__LOAD_EXC_##size' to choose ,
> > so I use size as a parameter.  And in the next version, I will update
> > this as:
> >
> > #define __LOAD_EXC(src, dst, memorder, size) {    \
> > 	if (size == 16)                               \
> > 		__LOAD_EXC_16(src, dst, memorder)     \
> > 	else if (size == 32)                          \
> > 		__LOAD_EXC_32(src, dst, memorder)     \
> > 	else if (size == 64)                          \
> > 		__LOAD_EXC_64(src, dst, memorder)     \
> > }
> >
> > #define rte_wait_event(addr, mask, cond, expected, memorder)    \
> > do {                                                            \
> > 	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));      \
> > 	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&        \
> > 				memorder != __ATOMIC_RELAXED);  \
> > 	uint32_t size = sizeof(*addr) << 3;                     \
> > 	typeof(*addr) value = 0;                                \
> > 	__LOAD_EXC(addr, value, memorder, size)                 \
> > 	if ((value & mask) cond expected) {                     \
> > 		__SEVL()                                        \
> > 		do {                                            \
> > 			__WFE()                                 \
> > 			__LOAD_EXC(addr, value, memorder, size) \
> > 		} while ((value & mask) cond expected);         \
> > 	}                                                       \
> > } while (0)
> 
> Sorry, I probably wasn't clear enough.
> I meant use '(' ')' around  macro arguments (to avoid un-predicted side-effects
> with operands associativity):
> uint32_t size = sizeof(*(addr)) ...;
> ...
> if ((value & (mask)) cond (expected))
> ...
That's Ok. So in the next version, I will change the following:
1. size will not be as a parameter
2. I will add '()' around macro arguments
> 
> 
> 
> 
>
  
Feifei Wang Oct. 26, 2021, 1:11 a.m. UTC | #8
> -----邮件原件-----
> 发件人: dev <dev-bounces@dpdk.org> 代表 Jerin Jacob
> 发送时间: Monday, October 25, 2021 5:44 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; dpdk-dev <dev@dpdk.org>; nd
> <nd@arm.com>
> 主题: Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait scheme
> 
> On Mon, Oct 25, 2021 at 3:01 PM Feifei Wang <Feifei.Wang2@arm.com>
> wrote:
> >
> > > -----邮件原件-----
> > > 发件人: Jerin Jacob <jerinjacobk@gmail.com>
> > > 发送时间: Friday, October 22, 2021 8:10 AM
> > > 收件人: Feifei Wang <Feifei.Wang2@arm.com>
> > > 抄送: Ruifeng Wang <Ruifeng.Wang@arm.com>; Ananyev, Konstantin
> > > <konstantin.ananyev@intel.com>; dpdk-dev <dev@dpdk.org>; nd
> > > <nd@arm.com>
> > > 主题: Re: [dpdk-dev] [PATCH v4 1/5] eal: add new definitions for wait
> > > scheme
> > >
> > > On Wed, Oct 20, 2021 at 2:16 PM Feifei Wang <feifei.wang2@arm.com>
> > > wrote:
> > > >
> > > > Introduce macros as generic interface for address monitoring.
> > > >
> > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > > >  lib/eal/arm/include/rte_pause_64.h  | 126
> > > > ++++++++++++++++------------  lib/eal/include/generic/rte_pause.h
> > > > ++++++++++++++++|
> > > > 32 +++++++
> > > >  2 files changed, 104 insertions(+), 54 deletions(-)
> > > >
> > > > diff --git a/lib/eal/arm/include/rte_pause_64.h
> > > > b/lib/eal/arm/include/rte_pause_64.h
> > > > index e87d10b8cc..23954c2de2 100644
> > > > --- a/lib/eal/arm/include/rte_pause_64.h
> > > > +++ b/lib/eal/arm/include/rte_pause_64.h
> > > > @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> > > >  /* Put processor into low power WFE(Wait For Event) state. */
> > > > #define __WFE() { asm volatile("wfe" : : : "memory"); }
> > > >
> > > > -static __rte_always_inline void
> > > > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > -               int memorder)
> > > > -{
> > > > -       uint16_t value;
> > > > -
> > > > -       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > __ATOMIC_RELAXED);
> > > > -
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 16-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > >
> > > a event -> an event in all the occurrence.
> > >
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 16-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > >  #define __LOAD_EXC_16(src, dst, memorder) {               \
> > > >         if (memorder == __ATOMIC_RELAXED) {               \
> > > >                 asm volatile("ldxrh %w[tmp], [%x[addr]]"  \ @@
> > > > -58,6
> > > > +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr,
> > > > +uint16_t
> > > expected,
> > > >                         : "memory");                      \
> > > >         } }
> > > >
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 32-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > +       if (memorder == __ATOMIC_RELAXED) {              \
> > > > +               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > +                       : [tmp] "=&r" (dst)              \
> > > > +                       : [addr] "r"(src)                \
> > > > +                       : "memory");                     \
> > > > +       } else {                                         \
> > > > +               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > +                       : [tmp] "=&r" (dst)              \
> > > > +                       : [addr] "r"(src)                \
> > > > +                       : "memory");                     \
> > > > +       } }
> > > > +
> > > > +/*
> > > > + * Atomic exclusive load from addr, it returns the 64-bit content
> > > > +of
> > > > + * *addr while making it 'monitored', when it is written by
> > > > +someone
> > > > + * else, the 'monitored' state is cleared and a event is
> > > > +generated
> > > > + * implicitly to exit WFE.
> > > > + */
> > > > +#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > +       if (memorder == __ATOMIC_RELAXED) {              \
> > > > +               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > +                       : [tmp] "=&r" (dst)              \
> > > > +                       : [addr] "r"(src)                \
> > > > +                       : "memory");                     \
> > > > +       } else {                                         \
> > > > +               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > +                       : [tmp] "=&r" (dst)              \
> > > > +                       : [addr] "r"(src)                \
> > > > +                       : "memory");                     \
> > > > +       } }
> > > > +
> > > > +static __rte_always_inline void
> > > > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> > > > +               int memorder)
> > > > +{
> > > > +       uint16_t value;
> > > > +
> > > > +       assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > + __ATOMIC_RELAXED);
> > > > +
> > > >         __LOAD_EXC_16(addr, value, memorder)
> > > >         if (value != expected) {
> > > >                 __SEVL()
> > > > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t
> > > > *addr,
> > > uint16_t expected,
> > > >                         __LOAD_EXC_16(addr, value, memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > > -#undef __LOAD_EXC_16
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr, uint32_t expected,
> > > >
> > > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 32-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > -#define __LOAD_EXC_32(src, dst, memorder) {              \
> > > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > > -               asm volatile("ldxr %w[tmp], [%x[addr]]"  \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } else {                                         \
> > > > -               asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } }
> > > > -
> > > >         __LOAD_EXC_32(addr, value, memorder)
> > > >         if (value != expected) {
> > > >                 __SEVL()
> > > > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t
> > > > *addr,
> > > uint32_t expected,
> > > >                         __LOAD_EXC_32(addr, value, memorder)
> > > >                 } while (value != expected);
> > > >         }
> > > > -#undef __LOAD_EXC_32
> > > >  }
> > > >
> > > >  static __rte_always_inline void
> > > > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr, uint64_t expected,
> > > >
> > > >         assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > __ATOMIC_RELAXED);
> > > >
> > > > -       /*
> > > > -        * Atomic exclusive load from addr, it returns the 64-bit content of
> > > > -        * *addr while making it 'monitored',when it is written by someone
> > > > -        * else, the 'monitored' state is cleared and a event is generated
> > > > -        * implicitly to exit WFE.
> > > > -        */
> > > > -#define __LOAD_EXC_64(src, dst, memorder) {              \
> > > > -       if (memorder == __ATOMIC_RELAXED) {              \
> > > > -               asm volatile("ldxr %x[tmp], [%x[addr]]"  \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } else {                                         \
> > > > -               asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> > > > -                       : [tmp] "=&r" (dst)              \
> > > > -                       : [addr] "r"(src)                \
> > > > -                       : "memory");                     \
> > > > -       } }
> > > > -
> > > >         __LOAD_EXC_64(addr, value, memorder)
> > > >         if (value != expected) {
> > > >                 __SEVL()
> > > > @@ -143,6 +141,26 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >                 } while (value != expected);
> > > >         }
> > > >  }
> > > > +
> > > > +#define rte_wait_event(addr, mask, expected, cond, memorder,
> > > > +size) \
> > >
> > > I think it is better to swap "cond" and "expected" positions to get
> > > better readability.
> > Thanks for the comments, it is better than before and I will update in the
> next version.
> > >
> > >  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, 0, !=,
> > > __ATOMIC_RELAXED, 64);
> > >
> > > Vs
> > >
> > >  rte_wait_event(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, !=, 0,
> > > __ATOMIC_RELAXED, 64);
> > >
> > > > +do {                                                               \
> > >
> > > Any reason to not make an inline function instead of macro?
> > Because there were many new APIs for different cases. And then we
> > refer to Linux 'wait_event' code for an example. Please see the first version
> and its discussion:
> > http://patches.dpdk.org/project/dpdk/cover/20210902053253.3017858-1-
> fe
> > ifei.wang2@arm.com/
> 
> 
> OK.
> 
> 
> > >
> > > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
> > >
> > > Should n't we add __builtin_constant_p(size) of check?
> >
> > Please see the discussion with Konstantin.
> > 'size' will not be as a parameter and then it is unnecessary to check it with
> build_bug.
> 
> Make sense to remove the 'size'. My comment was more in the direction of, if
> the 'size' is required to pass.
That's Ok. Thanks very much for your valuable comments.
> 
> > >
> > > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
> > > > +       memorder != __ATOMIC_RELAXED);                             \
> > > > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
> > > > +       uint##size_t value;
> > >
> > >
> > >                                \
> > > > +       __LOAD_EXC_##size(addr, value, memorder)                   \
> > > > +       if ((value & mask) cond expected) {                        \
> > > > +               __SEVL()                                           \
> > > > +               do {                                               \
> > > > +                       __WFE()                                    \
> > > > +                       __LOAD_EXC_##size(addr, value, memorder)   \
> > > > +               } while ((value & mask) cond expected);            \
> > > > +       }                                                          \
> > > > +} while (0)
> > > > +
> > > > +#undef __LOAD_EXC_16
> > > > +#undef __LOAD_EXC_32
> > > >  #undef __LOAD_EXC_64
> > > >
> > > >  #undef __SEVL
> > > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > > b/lib/eal/include/generic/rte_pause.h
> > > > index 668ee4a184..20a5d2a9fd 100644
> > > > --- a/lib/eal/include/generic/rte_pause.h
> > > > +++ b/lib/eal/include/generic/rte_pause.h
> > > > @@ -111,6 +111,38 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > >         while (__atomic_load_n(addr, memorder) != expected)
> > > >                 rte_pause();
> > > >  }
> > > > +
> > > > +/*
> > > > + * Wait until *addr breaks the condition, with a relaxed memory
> > > > + * ordering model meaning the loads around this API can be reordered.
> > > > + *
> > > > + * @param addr
> > > > + *  A pointer to the memory location.
> > > > + * @param mask
> > > > + *  A mask of value bits in interest.
> > > > + * @param expected
> > > > + *  A 16-bit expected value to be in the memory location.
> > > > + * @param cond
> > > > + *  A symbol representing the condition (==, !=).
> > > > + * @param memorder
> > > > + *  Two different memory orders that can be specified:
> > > > + *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > > + *  C++11 memory orders with the same names, see the C++11
> > > > +standard or
> > > > + *  the GCC wiki on atomic synchronization for detailed definition.
> > > > + * @param size
> > > > + * The bit size of *addr:
> > > > + * It is used for arm architecture to choose load instructions,
> > > > + * and the optional value is 16, 32 and 64.
> > > > + */
> > > > +#define rte_wait_event(addr, mask, expected, cond, memorder, size)
> \
> > > > +do {                                                                   \
> > > > +       RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
> > > > +       RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> \
> > > > +                               memorder != __ATOMIC_RELAXED);         \
> > > > +       RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
> > > > +       while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> \
> > > > +               rte_pause();                                           \
> > > > +} while (0)
> > > >  #endif
> > > >
> > > >  #endif /* _RTE_PAUSE_H_ */
> > > > --
> > > > 2.25.1
> > > >
  

Patch

diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index e87d10b8cc..23954c2de2 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -31,20 +31,12 @@  static inline void rte_pause(void)
 /* Put processor into low power WFE(Wait For Event) state. */
 #define __WFE() { asm volatile("wfe" : : : "memory"); }
 
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
-{
-	uint16_t value;
-
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
-	/*
-	 * Atomic exclusive load from addr, it returns the 16-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
 #define __LOAD_EXC_16(src, dst, memorder) {               \
 	if (memorder == __ATOMIC_RELAXED) {               \
 		asm volatile("ldxrh %w[tmp], [%x[addr]]"  \
@@ -58,6 +50,52 @@  rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			: "memory");                      \
 	} }
 
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_32(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_64(src, dst, memorder) {              \
+	if (memorder == __ATOMIC_RELAXED) {              \
+		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} else {                                         \
+		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+			: [tmp] "=&r" (dst)              \
+			: [addr] "r"(src)                \
+			: "memory");                     \
+	} }
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+		int memorder)
+{
+	uint16_t value;
+
+	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
 	__LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -66,7 +104,6 @@  rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
 			__LOAD_EXC_16(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_16
 }
 
 static __rte_always_inline void
@@ -77,25 +114,6 @@  rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 32-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_32(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %w[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %w[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -104,7 +122,6 @@  rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
 			__LOAD_EXC_32(addr, value, memorder)
 		} while (value != expected);
 	}
-#undef __LOAD_EXC_32
 }
 
 static __rte_always_inline void
@@ -115,25 +132,6 @@  rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 
 	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
 
-	/*
-	 * Atomic exclusive load from addr, it returns the 64-bit content of
-	 * *addr while making it 'monitored',when it is written by someone
-	 * else, the 'monitored' state is cleared and a event is generated
-	 * implicitly to exit WFE.
-	 */
-#define __LOAD_EXC_64(src, dst, memorder) {              \
-	if (memorder == __ATOMIC_RELAXED) {              \
-		asm volatile("ldxr %x[tmp], [%x[addr]]"  \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} else {                                         \
-		asm volatile("ldaxr %x[tmp], [%x[addr]]" \
-			: [tmp] "=&r" (dst)              \
-			: [addr] "r"(src)                \
-			: "memory");                     \
-	} }
-
 	__LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
 		__SEVL()
@@ -143,6 +141,26 @@  rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 		} while (value != expected);
 	}
 }
+
+#define rte_wait_event(addr, mask, expected, cond, memorder, size) \
+do {                                                               \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));         \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&           \
+	memorder != __ATOMIC_RELAXED);                             \
+	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);  \
+	uint##size_t value;                                        \
+	__LOAD_EXC_##size(addr, value, memorder)                   \
+	if ((value & mask) cond expected) {		           \
+		__SEVL()                                           \
+		do {                                               \
+			__WFE()                                    \
+			__LOAD_EXC_##size(addr, value, memorder)   \
+		} while ((value & mask) cond expected);            \
+	}                                                          \
+} while (0)
+
+#undef __LOAD_EXC_16
+#undef __LOAD_EXC_32
 #undef __LOAD_EXC_64
 
 #undef __SEVL
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index 668ee4a184..20a5d2a9fd 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -111,6 +111,38 @@  rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
 	while (__atomic_load_n(addr, memorder) != expected)
 		rte_pause();
 }
+
+/*
+ * Wait until *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ *  A pointer to the memory location.
+ * @param mask
+ *  A mask of value bits in interest.
+ * @param expected
+ *  A 16-bit expected value to be in the memory location.
+ * @param cond
+ *  A symbol representing the condition (==, !=).
+ * @param memorder
+ *  Two different memory orders that can be specified:
+ *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  C++11 memory orders with the same names, see the C++11 standard or
+ *  the GCC wiki on atomic synchronization for detailed definition.
+ * @param size
+ * The bit size of *addr:
+ * It is used for arm architecture to choose load instructions,
+ * and the optional value is 16, 32 and 64.
+ */
+#define rte_wait_event(addr, mask, expected, cond, memorder, size)     \
+do {                                                                   \
+	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));             \
+	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&               \
+				memorder != __ATOMIC_RELAXED);         \
+	RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64);      \
+	while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
+		rte_pause();                                           \
+} while (0)
 #endif
 
 #endif /* _RTE_PAUSE_H_ */