[RFC,v3,1/5] eal: add new definitions for wait scheme
Checks
Commit Message
Introduce macros as generic interface for address monitoring.
Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
lib/eal/arm/include/rte_pause_64.h | 151 ++++++++++++++++++----------
lib/eal/include/generic/rte_pause.h | 78 ++++++++++++++
2 files changed, 175 insertions(+), 54 deletions(-)
Comments
> Introduce macros as generic interface for address monitoring.
>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
> lib/eal/arm/include/rte_pause_64.h | 151 ++++++++++++++++++----------
> lib/eal/include/generic/rte_pause.h | 78 ++++++++++++++
> 2 files changed, 175 insertions(+), 54 deletions(-)
>
> diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
> index e87d10b8cc..205510e044 100644
> --- a/lib/eal/arm/include/rte_pause_64.h
> +++ b/lib/eal/arm/include/rte_pause_64.h
> @@ -31,20 +31,12 @@ static inline void rte_pause(void)
> /* Put processor into low power WFE(Wait For Event) state. */
> #define __WFE() { asm volatile("wfe" : : : "memory"); }
>
> -static __rte_always_inline void
> -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> - int memorder)
> -{
> - uint16_t value;
> -
> - assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> -
> - /*
> - * Atomic exclusive load from addr, it returns the 16-bit content of
> - * *addr while making it 'monitored',when it is written by someone
> - * else, the 'monitored' state is cleared and a event is generated
> - * implicitly to exit WFE.
> - */
> +/*
> + * Atomic exclusive load from addr, it returns the 16-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> #define __LOAD_EXC_16(src, dst, memorder) { \
> if (memorder == __ATOMIC_RELAXED) { \
> asm volatile("ldxrh %w[tmp], [%x[addr]]" \
> @@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> : "memory"); \
> } }
>
> +/*
> + * Atomic exclusive load from addr, it returns the 32-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_32(src, dst, memorder) { \
> + if (memorder == __ATOMIC_RELAXED) { \
> + asm volatile("ldxr %w[tmp], [%x[addr]]" \
> + : [tmp] "=&r" (dst) \
> + : [addr] "r"(src) \
> + : "memory"); \
> + } else { \
> + asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> + : [tmp] "=&r" (dst) \
> + : [addr] "r"(src) \
> + : "memory"); \
> + } }
> +
> +/*
> + * Atomic exclusive load from addr, it returns the 64-bit content of
> + * *addr while making it 'monitored', when it is written by someone
> + * else, the 'monitored' state is cleared and a event is generated
> + * implicitly to exit WFE.
> + */
> +#define __LOAD_EXC_64(src, dst, memorder) { \
> + if (memorder == __ATOMIC_RELAXED) { \
> + asm volatile("ldxr %x[tmp], [%x[addr]]" \
> + : [tmp] "=&r" (dst) \
> + : [addr] "r"(src) \
> + : "memory"); \
> + } else { \
> + asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> + : [tmp] "=&r" (dst) \
> + : [addr] "r"(src) \
> + : "memory"); \
> + } }
> +
> +static __rte_always_inline void
> +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> + int memorder)
> +{
> + uint16_t value;
> +
> + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
> +
> __LOAD_EXC_16(addr, value, memorder)
> if (value != expected) {
> __SEVL()
> @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
> __LOAD_EXC_16(addr, value, memorder)
> } while (value != expected);
> }
> -#undef __LOAD_EXC_16
> }
>
> static __rte_always_inline void
> @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
>
> assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
>
> - /*
> - * Atomic exclusive load from addr, it returns the 32-bit content of
> - * *addr while making it 'monitored',when it is written by someone
> - * else, the 'monitored' state is cleared and a event is generated
> - * implicitly to exit WFE.
> - */
> -#define __LOAD_EXC_32(src, dst, memorder) { \
> - if (memorder == __ATOMIC_RELAXED) { \
> - asm volatile("ldxr %w[tmp], [%x[addr]]" \
> - : [tmp] "=&r" (dst) \
> - : [addr] "r"(src) \
> - : "memory"); \
> - } else { \
> - asm volatile("ldaxr %w[tmp], [%x[addr]]" \
> - : [tmp] "=&r" (dst) \
> - : [addr] "r"(src) \
> - : "memory"); \
> - } }
> -
> __LOAD_EXC_32(addr, value, memorder)
> if (value != expected) {
> __SEVL()
> @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
> __LOAD_EXC_32(addr, value, memorder)
> } while (value != expected);
> }
> -#undef __LOAD_EXC_32
> }
>
> static __rte_always_inline void
> @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
>
> assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
>
> - /*
> - * Atomic exclusive load from addr, it returns the 64-bit content of
> - * *addr while making it 'monitored',when it is written by someone
> - * else, the 'monitored' state is cleared and a event is generated
> - * implicitly to exit WFE.
> - */
> -#define __LOAD_EXC_64(src, dst, memorder) { \
> - if (memorder == __ATOMIC_RELAXED) { \
> - asm volatile("ldxr %x[tmp], [%x[addr]]" \
> - : [tmp] "=&r" (dst) \
> - : [addr] "r"(src) \
> - : "memory"); \
> - } else { \
> - asm volatile("ldaxr %x[tmp], [%x[addr]]" \
> - : [tmp] "=&r" (dst) \
> - : [addr] "r"(src) \
> - : "memory"); \
> - } }
> -
> __LOAD_EXC_64(addr, value, memorder)
> if (value != expected) {
> __SEVL()
> @@ -143,6 +141,51 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
> } while (value != expected);
> }
> }
> +
> +#define rte_wait_event_16(addr, mask, expected, cond, memorder) \
> +do { \
> + uint16_t value \
> + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
> + __LOAD_EXC_16(addr, value, memorder) \
> + if ((value & mask) cond expected) { \
> + __SEVL() \
> + do { \
> + __WFE() \
> + __LOAD_EXC_16(addr, value, memorder) \
> + } while ((value & mask) cond expected); \
> + } \
> +} while (0)
> +
> +#define rte_wait_event_32(addr, mask, expected, cond, memorder) \
> +do { \
> + uint32_t value \
> + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
> + __LOAD_EXC_32(addr, value, memorder) \
> + if ((value & mask) op expected) { \
> + __SEVL() \
> + do { \
> + __WFE() \
> + __LOAD_EXC_32(addr, value, memorder) \
> + } while ((value & mask) cond expected); \
> + } \
> +} while (0)
> +
> +#define rte_wait_event_64(addr, mask, expected, cond, memorder) \
> +do { \
> + uint64_t value \
> + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
> + __LOAD_EXC_64(addr, value, memorder) \
> + if ((value & mask) cond expected) { \
> + __SEVL() \
> + do { \
> + __WFE() \
> + __LOAD_EXC_64(addr, value, memorder) \
> + } while ((value & mask) cond expected); \
> + } \
> +} while (0)
> +
> +#undef __LOAD_EXC_16
> +#undef __LOAD_EXC_32
> #undef __LOAD_EXC_64
>
> #undef __SEVL
> diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
> index 668ee4a184..4e32107eca 100644
> --- a/lib/eal/include/generic/rte_pause.h
> +++ b/lib/eal/include/generic/rte_pause.h
> @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
> while (__atomic_load_n(addr, memorder) != expected)
> rte_pause();
> }
> +
> +/*
> + * Wait until a 16-bit *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + * A pointer to the memory location.
> + * @param mask
> + * A mask of value bits in interest
> + * @param expected
> + * A 16-bit expected value to be in the memory location.
> + * @param cond
> + * A symbol representing the condition (==, !=).
> + * @param memorder
> + * Two different memory orders that can be specified:
> + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + * C++11 memory orders with the same names, see the C++11 standard or
> + * the GCC wiki on atomic synchronization for detailed definition.
> + */
Hmm, so now we have 2 APIs doing similar thing:
rte_wait_until_equal_n() and rte_wait_event_n().
Can we probably unite them somehow?
At least make rte_wait_until_equal_n() to use rte_wait_event_n() underneath.
> +#define rte_wait_event_16(addr, mask, expected, cond, memorder) \
> +do { \
> + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
And why user is not allowed to use __ATOMIC_SEQ_CST here?
BTW, if we expect memorder to always be a constant, might be better BUILD_BUG_ON()?
> + \
> + while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> + rte_pause(); \
> +} while (0)
Two thoughts with these macros:
1. It is a goof practise to put () around macro parameters in the macro body.
Will save from a lot of unexpected troubles.
2. I think these 3 macros can be united into one.
Something like:
#define rte_wait_event(addr, mask, expected, cond, memorder) do {\
typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \
if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \
break; \
rte_pause(); \
} while (1);
> +
> +/*
> + * Wait until a 32-bit *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + * A pointer to the memory location.
> + * @param mask
> + * A mask of value bits in interest.
> + * @param expected
> + * A 32-bit expected value to be in the memory location.
> + * @param cond
> + * A symbol representing the condition (==, !=).
> + * @param memorder
> + * Two different memory orders that can be specified:
> + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + * C++11 memory orders with the same names, see the C++11 standard or
> + * the GCC wiki on atomic synchronization for detailed definition.
> + */
> +#define rte_wait_event_32(addr, mask, expected, cond, memorder) \
> +do { \
> + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
> + \
> + while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> + rte_pause(); \
> +} while (0)
> +
> +/*
> + * Wait until a 64-bit *addr breaks the condition, with a relaxed memory
> + * ordering model meaning the loads around this API can be reordered.
> + *
> + * @param addr
> + * A pointer to the memory location.
> + * @param mask
> + * A mask of value bits in interest
> + * @param expected
> + * A 64-bit expected value to be in the memory location.
> + * @param cond
> + * A symbol representing the condition (==, !=).
> + * @param memorder
> + * Two different memory orders that can be specified:
> + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> + * C++11 memory orders with the same names, see the C++11 standard or
> + * the GCC wiki on atomic synchronization for detailed definition.
> + */
> +#define rte_wait_event_64(addr, mask, expected, cond, memorder) \
> +do { \
> + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
> + \
> + while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
> + rte_pause(); \
> +} while (0)
> #endif
>
> #endif /* _RTE_PAUSE_H_ */
> --
> 2.25.1
> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Friday, October 8, 2021 12:19 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>
> 主题: RE: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait
> scheme
[snip]
> > diff --git a/lib/eal/include/generic/rte_pause.h
> > b/lib/eal/include/generic/rte_pause.h
> > index 668ee4a184..4e32107eca 100644
> > --- a/lib/eal/include/generic/rte_pause.h
> > +++ b/lib/eal/include/generic/rte_pause.h
> > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> uint64_t expected,
> > while (__atomic_load_n(addr, memorder) != expected)
> > rte_pause();
> > }
> > +
> > +/*
> > + * Wait until a 16-bit *addr breaks the condition, with a relaxed
> > +memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + * A pointer to the memory location.
> > + * @param mask
> > + * A mask of value bits in interest
> > + * @param expected
> > + * A 16-bit expected value to be in the memory location.
> > + * @param cond
> > + * A symbol representing the condition (==, !=).
> > + * @param memorder
> > + * Two different memory orders that can be specified:
> > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + * C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + * the GCC wiki on atomic synchronization for detailed definition.
> > + */
>
> Hmm, so now we have 2 APIs doing similar thing:
> rte_wait_until_equal_n() and rte_wait_event_n().
> Can we probably unite them somehow?
> At least make rte_wait_until_equal_n() to use rte_wait_event_n() underneath.
>
You are right. We plan to change rte_wait_until_equal API after this new scheme
is achieved. And then, we will merge wait_unil into wait_event definition in the next new
patch series.
> > +#define rte_wait_event_16(addr, mask, expected, cond, memorder)
> \
> > +do { \
> > + assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > +__ATOMIC_RELAXED); \
>
> And why user is not allowed to use __ATOMIC_SEQ_CST here?
Actually this is just a load operation, and acquire here is enough to make sure 'load
addr value' can be before other operations.
> BTW, if we expect memorder to always be a constant, might be better
> BUILD_BUG_ON()?
If I understand correctly, you means we can replace 'assert' by 'build_bug_on':
RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED);
>
> > + \
> > + while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> \
> > + rte_pause(); \
> > +} while (0)
>
> Two thoughts with these macros:
> 1. It is a goof practise to put () around macro parameters in the macro body.
> Will save from a lot of unexpected troubles.
> 2. I think these 3 macros can be united into one.
> Something like:
>
> #define rte_wait_event(addr, mask, expected, cond, memorder) do {\
> typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \
> if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \
> break; \
> rte_pause(); \
> } while (1);
For this point, I think it is due to different size need to use different assembly instructions
in arm architecture. For example,
load 16 bits instruction is "ldxrh %w[tmp], [%x[addr]"
load 32 bits instruction is " ldxr %w[tmp], [%x[addr]"
load 64 bits instruction is " ldxr %x[tmp], [%x[addr] "
And for consistency, we also use 3 APIs in generic path.
>
>
> > +
> > +/*
> > + * Wait until a 32-bit *addr breaks the condition, with a relaxed
> > +memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + * A pointer to the memory location.
> > + * @param mask
> > + * A mask of value bits in interest.
> > + * @param expected
> > + * A 32-bit expected value to be in the memory location.
> > + * @param cond
> > + * A symbol representing the condition (==, !=).
> > + * @param memorder
> > + * Two different memory orders that can be specified:
> > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + * C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + * the GCC wiki on atomic synchronization for detailed definition.
> > + */
> > +#define rte_wait_event_32(addr, mask, expected, cond, memorder)
> \
> > +do { \
> > + assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED); \
> > + \
> > + while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> \
> > + rte_pause(); \
> > +} while (0)
> > +
> > +/*
> > + * Wait until a 64-bit *addr breaks the condition, with a relaxed
> > +memory
> > + * ordering model meaning the loads around this API can be reordered.
> > + *
> > + * @param addr
> > + * A pointer to the memory location.
> > + * @param mask
> > + * A mask of value bits in interest
> > + * @param expected
> > + * A 64-bit expected value to be in the memory location.
> > + * @param cond
> > + * A symbol representing the condition (==, !=).
> > + * @param memorder
> > + * Two different memory orders that can be specified:
> > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > + * C++11 memory orders with the same names, see the C++11 standard
> > +or
> > + * the GCC wiki on atomic synchronization for detailed definition.
> > + */
> > +#define rte_wait_event_64(addr, mask, expected, cond, memorder)
> \
> > +do { \
> > + assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> __ATOMIC_RELAXED); \
> > + \
> > + while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> \
> > + rte_pause(); \
> > +} while (0)
> > #endif
> >
> > #endif /* _RTE_PAUSE_H_ */
> > --
> > 2.25.1
>
> [snip]
>
> > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > b/lib/eal/include/generic/rte_pause.h
> > > index 668ee4a184..4e32107eca 100644
> > > --- a/lib/eal/include/generic/rte_pause.h
> > > +++ b/lib/eal/include/generic/rte_pause.h
> > > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr,
> > uint64_t expected,
> > > while (__atomic_load_n(addr, memorder) != expected)
> > > rte_pause();
> > > }
> > > +
> > > +/*
> > > + * Wait until a 16-bit *addr breaks the condition, with a relaxed
> > > +memory
> > > + * ordering model meaning the loads around this API can be reordered.
> > > + *
> > > + * @param addr
> > > + * A pointer to the memory location.
> > > + * @param mask
> > > + * A mask of value bits in interest
> > > + * @param expected
> > > + * A 16-bit expected value to be in the memory location.
> > > + * @param cond
> > > + * A symbol representing the condition (==, !=).
> > > + * @param memorder
> > > + * Two different memory orders that can be specified:
> > > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > + * C++11 memory orders with the same names, see the C++11 standard
> > > +or
> > > + * the GCC wiki on atomic synchronization for detailed definition.
> > > + */
> >
> > Hmm, so now we have 2 APIs doing similar thing:
> > rte_wait_until_equal_n() and rte_wait_event_n().
> > Can we probably unite them somehow?
> > At least make rte_wait_until_equal_n() to use rte_wait_event_n() underneath.
> >
> You are right. We plan to change rte_wait_until_equal API after this new scheme
> is achieved. And then, we will merge wait_unil into wait_event definition in the next new
> patch series.
>
> > > +#define rte_wait_event_16(addr, mask, expected, cond, memorder)
> > \
> > > +do { \
> > > + assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > +__ATOMIC_RELAXED); \
> >
> > And why user is not allowed to use __ATOMIC_SEQ_CST here?
> Actually this is just a load operation, and acquire here is enough to make sure 'load
> addr value' can be before other operations.
>
> > BTW, if we expect memorder to always be a constant, might be better
> > BUILD_BUG_ON()?
> If I understand correctly, you means we can replace 'assert' by 'build_bug_on':
> RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED);
Yes, that was my thought.
In that case I think we should be able to catch wrong memorder at compilation stage.
>
> >
> > > + \
> > > + while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> > \
> > > + rte_pause(); \
> > > +} while (0)
> >
> > Two thoughts with these macros:
> > 1. It is a goof practise to put () around macro parameters in the macro body.
> > Will save from a lot of unexpected troubles.
> > 2. I think these 3 macros can be united into one.
> > Something like:
> >
> > #define rte_wait_event(addr, mask, expected, cond, memorder) do {\
> > typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \
> > if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \
> > break; \
> > rte_pause(); \
> > } while (1);
> For this point, I think it is due to different size need to use different assembly instructions
> in arm architecture. For example,
> load 16 bits instruction is "ldxrh %w[tmp], [%x[addr]"
> load 32 bits instruction is " ldxr %w[tmp], [%x[addr]"
> load 64 bits instruction is " ldxr %x[tmp], [%x[addr] "
Ok, but it could be then something like that for arm specific code:
if (sizeof(val) == sizeof(uint16_t)) \
__LOAD_EXC_16(...); \
else if (sizeof(val) == sizeof(uint32_t)) \
__LOAD_EXC_32(...); \
else if (sizeof(val) == sizeof(uint64_t)) \
__LOAD_EXC_64(...); \
...
> And for consistency, we also use 3 APIs in generic path.
Honestly, even one multi-line macro doesn't look nice.
Having 3 identical ones looks even worse.
On Wed, 13 Oct 2021 15:03:56 +0000
"Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote:
> > addr value' can be before other operations.
> >
> > > BTW, if we expect memorder to always be a constant, might be better
> > > BUILD_BUG_ON()?
> > If I understand correctly, you means we can replace 'assert' by 'build_bug_on':
> > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED);
>
> Yes, that was my thought.
> In that case I think we should be able to catch wrong memorder at compilation stage.
Maybe:
RTE_BUILD_BUG_ON(!_constant_p(memorder));
RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED);
> -----邮件原件-----
> 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 发送时间: Wednesday, October 13, 2021 11:04 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; nd <nd@arm.com>
> 主题: RE: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait
> scheme
>
> >
> > [snip]
> >
> > > > diff --git a/lib/eal/include/generic/rte_pause.h
> > > > b/lib/eal/include/generic/rte_pause.h
> > > > index 668ee4a184..4e32107eca 100644
> > > > --- a/lib/eal/include/generic/rte_pause.h
> > > > +++ b/lib/eal/include/generic/rte_pause.h
> > > > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t
> > > > *addr,
> > > uint64_t expected,
> > > > while (__atomic_load_n(addr, memorder) != expected)
> > > > rte_pause();
> > > > }
> > > > +
> > > > +/*
> > > > + * Wait until a 16-bit *addr breaks the condition, with a relaxed
> > > > +memory
> > > > + * ordering model meaning the loads around this API can be reordered.
> > > > + *
> > > > + * @param addr
> > > > + * A pointer to the memory location.
> > > > + * @param mask
> > > > + * A mask of value bits in interest
> > > > + * @param expected
> > > > + * A 16-bit expected value to be in the memory location.
> > > > + * @param cond
> > > > + * A symbol representing the condition (==, !=).
> > > > + * @param memorder
> > > > + * Two different memory orders that can be specified:
> > > > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
> > > > + * C++11 memory orders with the same names, see the C++11
> > > > +standard or
> > > > + * the GCC wiki on atomic synchronization for detailed definition.
> > > > + */
> > >
> > > Hmm, so now we have 2 APIs doing similar thing:
> > > rte_wait_until_equal_n() and rte_wait_event_n().
> > > Can we probably unite them somehow?
> > > At least make rte_wait_until_equal_n() to use rte_wait_event_n()
> underneath.
> > >
> > You are right. We plan to change rte_wait_until_equal API after this
> > new scheme is achieved. And then, we will merge wait_unil into
> > wait_event definition in the next new patch series.
> >
> > > > +#define rte_wait_event_16(addr, mask, expected, cond, memorder)
> > > \
> > > > +do {
> \
> > > > + assert(memorder == __ATOMIC_ACQUIRE || memorder ==
> > > > +__ATOMIC_RELAXED); \
> > >
> > > And why user is not allowed to use __ATOMIC_SEQ_CST here?
> > Actually this is just a load operation, and acquire here is enough to
> > make sure 'load addr value' can be before other operations.
> >
> > > BTW, if we expect memorder to always be a constant, might be better
> > > BUILD_BUG_ON()?
> > If I understand correctly, you means we can replace 'assert' by
> 'build_bug_on':
> > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder
> > !=__ATOMIC_RELAXED);
>
> Yes, that was my thought.
> In that case I think we should be able to catch wrong memorder at compilation
> stage.
>
> >
> > >
> > > > + \
> > > > + while ((__atomic_load_n(addr, memorder) & mask) cond expected)
> > > \
> > > > + rte_pause(); \
> > > > +} while (0)
> > >
> > > Two thoughts with these macros:
> > > 1. It is a goof practise to put () around macro parameters in the macro
> body.
> > > Will save from a lot of unexpected troubles.
> > > 2. I think these 3 macros can be united into one.
> > > Something like:
> > >
> > > #define rte_wait_event(addr, mask, expected, cond, memorder) do {\
> > > typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \
> > > if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \
> > > break; \
> > > rte_pause(); \
> > > } while (1);
> > For this point, I think it is due to different size need to use
> > different assembly instructions in arm architecture. For example, load
> > 16 bits instruction is "ldxrh %w[tmp], [%x[addr]"
> > load 32 bits instruction is " ldxr %w[tmp], [%x[addr]"
> > load 64 bits instruction is " ldxr %x[tmp], [%x[addr] "
>
> Ok, but it could be then something like that for arm specific code:
> if (sizeof(val) == sizeof(uint16_t)) \
> __LOAD_EXC_16(...); \
> else if (sizeof(val) == sizeof(uint32_t)) \
> __LOAD_EXC_32(...); \
> else if (sizeof(val) == sizeof(uint64_t)) \
> __LOAD_EXC_64(...); \
> ...
>
I thinks we should use "addr" as judgement:
rte_wait_event(addr, mask, expected, cond, memorder)
if (sizeof(*addr)) == sizeof(uint16_t)
uint16_t value \
assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
__LOAD_EXC_16(addr, value, memorder) \
if ((value & mask) cond expected) { \
__SEVL() \
do { \
__WFE() \
__LOAD_EXC_16(addr, value, memorder) \
} while ((value & mask) cond expected); \
}
if (sizeof(*addr)) == sizeof(uint32_t)
..........
if (sizeof(*addr)) == sizeof(uint64_t)
...........
> > And for consistency, we also use 3 APIs in generic path.
> Honestly, even one multi-line macro doesn't look nice.
> Having 3 identical ones looks even worse.
> -----邮件原件-----
> 发件人: Stephen Hemminger <stephen@networkplumber.org>
> 发送时间: Thursday, October 14, 2021 1:00 AM
> 收件人: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> 抄送: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; dev@dpdk.org; nd <nd@arm.com>
> 主题: Re: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait
> scheme
>
> On Wed, 13 Oct 2021 15:03:56 +0000
> "Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote:
>
> > > addr value' can be before other operations.
> > >
> > > > BTW, if we expect memorder to always be a constant, might be
> > > > better BUILD_BUG_ON()?
> > > If I understand correctly, you means we can replace 'assert' by
> 'build_bug_on':
> > > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder
> > > !=__ATOMIC_RELAXED);
> >
> > Yes, that was my thought.
> > In that case I think we should be able to catch wrong memorder at
> compilation stage.
>
> Maybe:
> RTE_BUILD_BUG_ON(!_constant_p(memorder));
> RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
> memorder !=__ATOMIC_RELAXED);
>
Thanks for your comments. One question for this, I do not know why we should check if memorder is a constant?
Is it to check whether memorder has been assigned or NULL?
@@ -31,20 +31,12 @@ static inline void rte_pause(void)
/* Put processor into low power WFE(Wait For Event) state. */
#define __WFE() { asm volatile("wfe" : : : "memory"); }
-static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
- int memorder)
-{
- uint16_t value;
-
- assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
-
- /*
- * Atomic exclusive load from addr, it returns the 16-bit content of
- * *addr while making it 'monitored',when it is written by someone
- * else, the 'monitored' state is cleared and a event is generated
- * implicitly to exit WFE.
- */
+/*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
#define __LOAD_EXC_16(src, dst, memorder) { \
if (memorder == __ATOMIC_RELAXED) { \
asm volatile("ldxrh %w[tmp], [%x[addr]]" \
@@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
: "memory"); \
} }
+/*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_32(src, dst, memorder) { \
+ if (memorder == __ATOMIC_RELAXED) { \
+ asm volatile("ldxr %w[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } else { \
+ asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } }
+
+/*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored', when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_64(src, dst, memorder) { \
+ if (memorder == __ATOMIC_RELAXED) { \
+ asm volatile("ldxr %x[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } else { \
+ asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } }
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+ int memorder)
+{
+ uint16_t value;
+
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
__LOAD_EXC_16(addr, value, memorder)
if (value != expected) {
__SEVL()
@@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
__LOAD_EXC_16(addr, value, memorder)
} while (value != expected);
}
-#undef __LOAD_EXC_16
}
static __rte_always_inline void
@@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
- /*
- * Atomic exclusive load from addr, it returns the 32-bit content of
- * *addr while making it 'monitored',when it is written by someone
- * else, the 'monitored' state is cleared and a event is generated
- * implicitly to exit WFE.
- */
-#define __LOAD_EXC_32(src, dst, memorder) { \
- if (memorder == __ATOMIC_RELAXED) { \
- asm volatile("ldxr %w[tmp], [%x[addr]]" \
- : [tmp] "=&r" (dst) \
- : [addr] "r"(src) \
- : "memory"); \
- } else { \
- asm volatile("ldaxr %w[tmp], [%x[addr]]" \
- : [tmp] "=&r" (dst) \
- : [addr] "r"(src) \
- : "memory"); \
- } }
-
__LOAD_EXC_32(addr, value, memorder)
if (value != expected) {
__SEVL()
@@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
__LOAD_EXC_32(addr, value, memorder)
} while (value != expected);
}
-#undef __LOAD_EXC_32
}
static __rte_always_inline void
@@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
- /*
- * Atomic exclusive load from addr, it returns the 64-bit content of
- * *addr while making it 'monitored',when it is written by someone
- * else, the 'monitored' state is cleared and a event is generated
- * implicitly to exit WFE.
- */
-#define __LOAD_EXC_64(src, dst, memorder) { \
- if (memorder == __ATOMIC_RELAXED) { \
- asm volatile("ldxr %x[tmp], [%x[addr]]" \
- : [tmp] "=&r" (dst) \
- : [addr] "r"(src) \
- : "memory"); \
- } else { \
- asm volatile("ldaxr %x[tmp], [%x[addr]]" \
- : [tmp] "=&r" (dst) \
- : [addr] "r"(src) \
- : "memory"); \
- } }
-
__LOAD_EXC_64(addr, value, memorder)
if (value != expected) {
__SEVL()
@@ -143,6 +141,51 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
} while (value != expected);
}
}
+
+#define rte_wait_event_16(addr, mask, expected, cond, memorder) \
+do { \
+ uint16_t value \
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
+ __LOAD_EXC_16(addr, value, memorder) \
+ if ((value & mask) cond expected) { \
+ __SEVL() \
+ do { \
+ __WFE() \
+ __LOAD_EXC_16(addr, value, memorder) \
+ } while ((value & mask) cond expected); \
+ } \
+} while (0)
+
+#define rte_wait_event_32(addr, mask, expected, cond, memorder) \
+do { \
+ uint32_t value \
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
+ __LOAD_EXC_32(addr, value, memorder) \
+ if ((value & mask) op expected) { \
+ __SEVL() \
+ do { \
+ __WFE() \
+ __LOAD_EXC_32(addr, value, memorder) \
+ } while ((value & mask) cond expected); \
+ } \
+} while (0)
+
+#define rte_wait_event_64(addr, mask, expected, cond, memorder) \
+do { \
+ uint64_t value \
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
+ __LOAD_EXC_64(addr, value, memorder) \
+ if ((value & mask) cond expected) { \
+ __SEVL() \
+ do { \
+ __WFE() \
+ __LOAD_EXC_64(addr, value, memorder) \
+ } while ((value & mask) cond expected); \
+ } \
+} while (0)
+
+#undef __LOAD_EXC_16
+#undef __LOAD_EXC_32
#undef __LOAD_EXC_64
#undef __SEVL
@@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
while (__atomic_load_n(addr, memorder) != expected)
rte_pause();
}
+
+/*
+ * Wait until a 16-bit *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ * A pointer to the memory location.
+ * @param mask
+ * A mask of value bits in interest
+ * @param expected
+ * A 16-bit expected value to be in the memory location.
+ * @param cond
+ * A symbol representing the condition (==, !=).
+ * @param memorder
+ * Two different memory orders that can be specified:
+ * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ * C++11 memory orders with the same names, see the C++11 standard or
+ * the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event_16(addr, mask, expected, cond, memorder) \
+do { \
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
+ \
+ while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
+ rte_pause(); \
+} while (0)
+
+/*
+ * Wait until a 32-bit *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ * A pointer to the memory location.
+ * @param mask
+ * A mask of value bits in interest.
+ * @param expected
+ * A 32-bit expected value to be in the memory location.
+ * @param cond
+ * A symbol representing the condition (==, !=).
+ * @param memorder
+ * Two different memory orders that can be specified:
+ * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ * C++11 memory orders with the same names, see the C++11 standard or
+ * the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event_32(addr, mask, expected, cond, memorder) \
+do { \
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
+ \
+ while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
+ rte_pause(); \
+} while (0)
+
+/*
+ * Wait until a 64-bit *addr breaks the condition, with a relaxed memory
+ * ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ * A pointer to the memory location.
+ * @param mask
+ * A mask of value bits in interest
+ * @param expected
+ * A 64-bit expected value to be in the memory location.
+ * @param cond
+ * A symbol representing the condition (==, !=).
+ * @param memorder
+ * Two different memory orders that can be specified:
+ * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ * C++11 memory orders with the same names, see the C++11 standard or
+ * the GCC wiki on atomic synchronization for detailed definition.
+ */
+#define rte_wait_event_64(addr, mask, expected, cond, memorder) \
+do { \
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \
+ \
+ while ((__atomic_load_n(addr, memorder) & mask) cond expected) \
+ rte_pause(); \
+} while (0)
#endif
#endif /* _RTE_PAUSE_H_ */