Message ID | 20210926063302.1541193-2-feifei.wang2@arm.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | David Marchand |
Headers | show |
Series | add new definitions for wait scheme | expand |
Context | Check | Description |
---|---|---|
ci/checkpatch | success | coding style OK |
> Introduce macros as generic interface for address monitoring. > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > lib/eal/arm/include/rte_pause_64.h | 151 ++++++++++++++++++---------- > lib/eal/include/generic/rte_pause.h | 78 ++++++++++++++ > 2 files changed, 175 insertions(+), 54 deletions(-) > > diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h > index e87d10b8cc..205510e044 100644 > --- a/lib/eal/arm/include/rte_pause_64.h > +++ b/lib/eal/arm/include/rte_pause_64.h > @@ -31,20 +31,12 @@ static inline void rte_pause(void) > /* Put processor into low power WFE(Wait For Event) state. */ > #define __WFE() { asm volatile("wfe" : : : "memory"); } > > -static __rte_always_inline void > -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, > - int memorder) > -{ > - uint16_t value; > - > - assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); > - > - /* > - * Atomic exclusive load from addr, it returns the 16-bit content of > - * *addr while making it 'monitored',when it is written by someone > - * else, the 'monitored' state is cleared and a event is generated > - * implicitly to exit WFE. > - */ > +/* > + * Atomic exclusive load from addr, it returns the 16-bit content of > + * *addr while making it 'monitored', when it is written by someone > + * else, the 'monitored' state is cleared and a event is generated > + * implicitly to exit WFE. > + */ > #define __LOAD_EXC_16(src, dst, memorder) { \ > if (memorder == __ATOMIC_RELAXED) { \ > asm volatile("ldxrh %w[tmp], [%x[addr]]" \ > @@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, > : "memory"); \ > } } > > +/* > + * Atomic exclusive load from addr, it returns the 32-bit content of > + * *addr while making it 'monitored', when it is written by someone > + * else, the 'monitored' state is cleared and a event is generated > + * implicitly to exit WFE. > + */ > +#define __LOAD_EXC_32(src, dst, memorder) { \ > + if (memorder == __ATOMIC_RELAXED) { \ > + asm volatile("ldxr %w[tmp], [%x[addr]]" \ > + : [tmp] "=&r" (dst) \ > + : [addr] "r"(src) \ > + : "memory"); \ > + } else { \ > + asm volatile("ldaxr %w[tmp], [%x[addr]]" \ > + : [tmp] "=&r" (dst) \ > + : [addr] "r"(src) \ > + : "memory"); \ > + } } > + > +/* > + * Atomic exclusive load from addr, it returns the 64-bit content of > + * *addr while making it 'monitored', when it is written by someone > + * else, the 'monitored' state is cleared and a event is generated > + * implicitly to exit WFE. > + */ > +#define __LOAD_EXC_64(src, dst, memorder) { \ > + if (memorder == __ATOMIC_RELAXED) { \ > + asm volatile("ldxr %x[tmp], [%x[addr]]" \ > + : [tmp] "=&r" (dst) \ > + : [addr] "r"(src) \ > + : "memory"); \ > + } else { \ > + asm volatile("ldaxr %x[tmp], [%x[addr]]" \ > + : [tmp] "=&r" (dst) \ > + : [addr] "r"(src) \ > + : "memory"); \ > + } } > + > +static __rte_always_inline void > +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, > + int memorder) > +{ > + uint16_t value; > + > + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); > + > __LOAD_EXC_16(addr, value, memorder) > if (value != expected) { > __SEVL() > @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, > __LOAD_EXC_16(addr, value, memorder) > } while (value != expected); > } > -#undef __LOAD_EXC_16 > } > > static __rte_always_inline void > @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected, > > assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); > > - /* > - * Atomic exclusive load from addr, it returns the 32-bit content of > - * *addr while making it 'monitored',when it is written by someone > - * else, the 'monitored' state is cleared and a event is generated > - * implicitly to exit WFE. > - */ > -#define __LOAD_EXC_32(src, dst, memorder) { \ > - if (memorder == __ATOMIC_RELAXED) { \ > - asm volatile("ldxr %w[tmp], [%x[addr]]" \ > - : [tmp] "=&r" (dst) \ > - : [addr] "r"(src) \ > - : "memory"); \ > - } else { \ > - asm volatile("ldaxr %w[tmp], [%x[addr]]" \ > - : [tmp] "=&r" (dst) \ > - : [addr] "r"(src) \ > - : "memory"); \ > - } } > - > __LOAD_EXC_32(addr, value, memorder) > if (value != expected) { > __SEVL() > @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected, > __LOAD_EXC_32(addr, value, memorder) > } while (value != expected); > } > -#undef __LOAD_EXC_32 > } > > static __rte_always_inline void > @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected, > > assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); > > - /* > - * Atomic exclusive load from addr, it returns the 64-bit content of > - * *addr while making it 'monitored',when it is written by someone > - * else, the 'monitored' state is cleared and a event is generated > - * implicitly to exit WFE. > - */ > -#define __LOAD_EXC_64(src, dst, memorder) { \ > - if (memorder == __ATOMIC_RELAXED) { \ > - asm volatile("ldxr %x[tmp], [%x[addr]]" \ > - : [tmp] "=&r" (dst) \ > - : [addr] "r"(src) \ > - : "memory"); \ > - } else { \ > - asm volatile("ldaxr %x[tmp], [%x[addr]]" \ > - : [tmp] "=&r" (dst) \ > - : [addr] "r"(src) \ > - : "memory"); \ > - } } > - > __LOAD_EXC_64(addr, value, memorder) > if (value != expected) { > __SEVL() > @@ -143,6 +141,51 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected, > } while (value != expected); > } > } > + > +#define rte_wait_event_16(addr, mask, expected, cond, memorder) \ > +do { \ > + uint16_t value \ > + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ > + __LOAD_EXC_16(addr, value, memorder) \ > + if ((value & mask) cond expected) { \ > + __SEVL() \ > + do { \ > + __WFE() \ > + __LOAD_EXC_16(addr, value, memorder) \ > + } while ((value & mask) cond expected); \ > + } \ > +} while (0) > + > +#define rte_wait_event_32(addr, mask, expected, cond, memorder) \ > +do { \ > + uint32_t value \ > + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ > + __LOAD_EXC_32(addr, value, memorder) \ > + if ((value & mask) op expected) { \ > + __SEVL() \ > + do { \ > + __WFE() \ > + __LOAD_EXC_32(addr, value, memorder) \ > + } while ((value & mask) cond expected); \ > + } \ > +} while (0) > + > +#define rte_wait_event_64(addr, mask, expected, cond, memorder) \ > +do { \ > + uint64_t value \ > + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ > + __LOAD_EXC_64(addr, value, memorder) \ > + if ((value & mask) cond expected) { \ > + __SEVL() \ > + do { \ > + __WFE() \ > + __LOAD_EXC_64(addr, value, memorder) \ > + } while ((value & mask) cond expected); \ > + } \ > +} while (0) > + > +#undef __LOAD_EXC_16 > +#undef __LOAD_EXC_32 > #undef __LOAD_EXC_64 > > #undef __SEVL > diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h > index 668ee4a184..4e32107eca 100644 > --- a/lib/eal/include/generic/rte_pause.h > +++ b/lib/eal/include/generic/rte_pause.h > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected, > while (__atomic_load_n(addr, memorder) != expected) > rte_pause(); > } > + > +/* > + * Wait until a 16-bit *addr breaks the condition, with a relaxed memory > + * ordering model meaning the loads around this API can be reordered. > + * > + * @param addr > + * A pointer to the memory location. > + * @param mask > + * A mask of value bits in interest > + * @param expected > + * A 16-bit expected value to be in the memory location. > + * @param cond > + * A symbol representing the condition (==, !=). > + * @param memorder > + * Two different memory orders that can be specified: > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to > + * C++11 memory orders with the same names, see the C++11 standard or > + * the GCC wiki on atomic synchronization for detailed definition. > + */ Hmm, so now we have 2 APIs doing similar thing: rte_wait_until_equal_n() and rte_wait_event_n(). Can we probably unite them somehow? At least make rte_wait_until_equal_n() to use rte_wait_event_n() underneath. > +#define rte_wait_event_16(addr, mask, expected, cond, memorder) \ > +do { \ > + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ And why user is not allowed to use __ATOMIC_SEQ_CST here? BTW, if we expect memorder to always be a constant, might be better BUILD_BUG_ON()? > + \ > + while ((__atomic_load_n(addr, memorder) & mask) cond expected) \ > + rte_pause(); \ > +} while (0) Two thoughts with these macros: 1. It is a goof practise to put () around macro parameters in the macro body. Will save from a lot of unexpected troubles. 2. I think these 3 macros can be united into one. Something like: #define rte_wait_event(addr, mask, expected, cond, memorder) do {\ typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \ if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \ break; \ rte_pause(); \ } while (1); > + > +/* > + * Wait until a 32-bit *addr breaks the condition, with a relaxed memory > + * ordering model meaning the loads around this API can be reordered. > + * > + * @param addr > + * A pointer to the memory location. > + * @param mask > + * A mask of value bits in interest. > + * @param expected > + * A 32-bit expected value to be in the memory location. > + * @param cond > + * A symbol representing the condition (==, !=). > + * @param memorder > + * Two different memory orders that can be specified: > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to > + * C++11 memory orders with the same names, see the C++11 standard or > + * the GCC wiki on atomic synchronization for detailed definition. > + */ > +#define rte_wait_event_32(addr, mask, expected, cond, memorder) \ > +do { \ > + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ > + \ > + while ((__atomic_load_n(addr, memorder) & mask) cond expected) \ > + rte_pause(); \ > +} while (0) > + > +/* > + * Wait until a 64-bit *addr breaks the condition, with a relaxed memory > + * ordering model meaning the loads around this API can be reordered. > + * > + * @param addr > + * A pointer to the memory location. > + * @param mask > + * A mask of value bits in interest > + * @param expected > + * A 64-bit expected value to be in the memory location. > + * @param cond > + * A symbol representing the condition (==, !=). > + * @param memorder > + * Two different memory orders that can be specified: > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to > + * C++11 memory orders with the same names, see the C++11 standard or > + * the GCC wiki on atomic synchronization for detailed definition. > + */ > +#define rte_wait_event_64(addr, mask, expected, cond, memorder) \ > +do { \ > + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ > + \ > + while ((__atomic_load_n(addr, memorder) & mask) cond expected) \ > + rte_pause(); \ > +} while (0) > #endif > > #endif /* _RTE_PAUSE_H_ */ > -- > 2.25.1
> -----邮件原件----- > 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com> > 发送时间: Friday, October 8, 2021 12:19 AM > 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang > <Ruifeng.Wang@arm.com> > 抄送: dev@dpdk.org; nd <nd@arm.com> > 主题: RE: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait > scheme [snip] > > diff --git a/lib/eal/include/generic/rte_pause.h > > b/lib/eal/include/generic/rte_pause.h > > index 668ee4a184..4e32107eca 100644 > > --- a/lib/eal/include/generic/rte_pause.h > > +++ b/lib/eal/include/generic/rte_pause.h > > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr, > uint64_t expected, > > while (__atomic_load_n(addr, memorder) != expected) > > rte_pause(); > > } > > + > > +/* > > + * Wait until a 16-bit *addr breaks the condition, with a relaxed > > +memory > > + * ordering model meaning the loads around this API can be reordered. > > + * > > + * @param addr > > + * A pointer to the memory location. > > + * @param mask > > + * A mask of value bits in interest > > + * @param expected > > + * A 16-bit expected value to be in the memory location. > > + * @param cond > > + * A symbol representing the condition (==, !=). > > + * @param memorder > > + * Two different memory orders that can be specified: > > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to > > + * C++11 memory orders with the same names, see the C++11 standard > > +or > > + * the GCC wiki on atomic synchronization for detailed definition. > > + */ > > Hmm, so now we have 2 APIs doing similar thing: > rte_wait_until_equal_n() and rte_wait_event_n(). > Can we probably unite them somehow? > At least make rte_wait_until_equal_n() to use rte_wait_event_n() underneath. > You are right. We plan to change rte_wait_until_equal API after this new scheme is achieved. And then, we will merge wait_unil into wait_event definition in the next new patch series. > > +#define rte_wait_event_16(addr, mask, expected, cond, memorder) > \ > > +do { \ > > + assert(memorder == __ATOMIC_ACQUIRE || memorder == > > +__ATOMIC_RELAXED); \ > > And why user is not allowed to use __ATOMIC_SEQ_CST here? Actually this is just a load operation, and acquire here is enough to make sure 'load addr value' can be before other operations. > BTW, if we expect memorder to always be a constant, might be better > BUILD_BUG_ON()? If I understand correctly, you means we can replace 'assert' by 'build_bug_on': RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED); > > > + \ > > + while ((__atomic_load_n(addr, memorder) & mask) cond expected) > \ > > + rte_pause(); \ > > +} while (0) > > Two thoughts with these macros: > 1. It is a goof practise to put () around macro parameters in the macro body. > Will save from a lot of unexpected troubles. > 2. I think these 3 macros can be united into one. > Something like: > > #define rte_wait_event(addr, mask, expected, cond, memorder) do {\ > typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \ > if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \ > break; \ > rte_pause(); \ > } while (1); For this point, I think it is due to different size need to use different assembly instructions in arm architecture. For example, load 16 bits instruction is "ldxrh %w[tmp], [%x[addr]" load 32 bits instruction is " ldxr %w[tmp], [%x[addr]" load 64 bits instruction is " ldxr %x[tmp], [%x[addr] " And for consistency, we also use 3 APIs in generic path. > > > > + > > +/* > > + * Wait until a 32-bit *addr breaks the condition, with a relaxed > > +memory > > + * ordering model meaning the loads around this API can be reordered. > > + * > > + * @param addr > > + * A pointer to the memory location. > > + * @param mask > > + * A mask of value bits in interest. > > + * @param expected > > + * A 32-bit expected value to be in the memory location. > > + * @param cond > > + * A symbol representing the condition (==, !=). > > + * @param memorder > > + * Two different memory orders that can be specified: > > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to > > + * C++11 memory orders with the same names, see the C++11 standard > > +or > > + * the GCC wiki on atomic synchronization for detailed definition. > > + */ > > +#define rte_wait_event_32(addr, mask, expected, cond, memorder) > \ > > +do { \ > > + assert(memorder == __ATOMIC_ACQUIRE || memorder == > __ATOMIC_RELAXED); \ > > + \ > > + while ((__atomic_load_n(addr, memorder) & mask) cond expected) > \ > > + rte_pause(); \ > > +} while (0) > > + > > +/* > > + * Wait until a 64-bit *addr breaks the condition, with a relaxed > > +memory > > + * ordering model meaning the loads around this API can be reordered. > > + * > > + * @param addr > > + * A pointer to the memory location. > > + * @param mask > > + * A mask of value bits in interest > > + * @param expected > > + * A 64-bit expected value to be in the memory location. > > + * @param cond > > + * A symbol representing the condition (==, !=). > > + * @param memorder > > + * Two different memory orders that can be specified: > > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to > > + * C++11 memory orders with the same names, see the C++11 standard > > +or > > + * the GCC wiki on atomic synchronization for detailed definition. > > + */ > > +#define rte_wait_event_64(addr, mask, expected, cond, memorder) > \ > > +do { \ > > + assert(memorder == __ATOMIC_ACQUIRE || memorder == > __ATOMIC_RELAXED); \ > > + \ > > + while ((__atomic_load_n(addr, memorder) & mask) cond expected) > \ > > + rte_pause(); \ > > +} while (0) > > #endif > > > > #endif /* _RTE_PAUSE_H_ */ > > -- > > 2.25.1
> > [snip] > > > > diff --git a/lib/eal/include/generic/rte_pause.h > > > b/lib/eal/include/generic/rte_pause.h > > > index 668ee4a184..4e32107eca 100644 > > > --- a/lib/eal/include/generic/rte_pause.h > > > +++ b/lib/eal/include/generic/rte_pause.h > > > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr, > > uint64_t expected, > > > while (__atomic_load_n(addr, memorder) != expected) > > > rte_pause(); > > > } > > > + > > > +/* > > > + * Wait until a 16-bit *addr breaks the condition, with a relaxed > > > +memory > > > + * ordering model meaning the loads around this API can be reordered. > > > + * > > > + * @param addr > > > + * A pointer to the memory location. > > > + * @param mask > > > + * A mask of value bits in interest > > > + * @param expected > > > + * A 16-bit expected value to be in the memory location. > > > + * @param cond > > > + * A symbol representing the condition (==, !=). > > > + * @param memorder > > > + * Two different memory orders that can be specified: > > > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to > > > + * C++11 memory orders with the same names, see the C++11 standard > > > +or > > > + * the GCC wiki on atomic synchronization for detailed definition. > > > + */ > > > > Hmm, so now we have 2 APIs doing similar thing: > > rte_wait_until_equal_n() and rte_wait_event_n(). > > Can we probably unite them somehow? > > At least make rte_wait_until_equal_n() to use rte_wait_event_n() underneath. > > > You are right. We plan to change rte_wait_until_equal API after this new scheme > is achieved. And then, we will merge wait_unil into wait_event definition in the next new > patch series. > > > > +#define rte_wait_event_16(addr, mask, expected, cond, memorder) > > \ > > > +do { \ > > > + assert(memorder == __ATOMIC_ACQUIRE || memorder == > > > +__ATOMIC_RELAXED); \ > > > > And why user is not allowed to use __ATOMIC_SEQ_CST here? > Actually this is just a load operation, and acquire here is enough to make sure 'load > addr value' can be before other operations. > > > BTW, if we expect memorder to always be a constant, might be better > > BUILD_BUG_ON()? > If I understand correctly, you means we can replace 'assert' by 'build_bug_on': > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED); Yes, that was my thought. In that case I think we should be able to catch wrong memorder at compilation stage. > > > > > > + \ > > > + while ((__atomic_load_n(addr, memorder) & mask) cond expected) > > \ > > > + rte_pause(); \ > > > +} while (0) > > > > Two thoughts with these macros: > > 1. It is a goof practise to put () around macro parameters in the macro body. > > Will save from a lot of unexpected troubles. > > 2. I think these 3 macros can be united into one. > > Something like: > > > > #define rte_wait_event(addr, mask, expected, cond, memorder) do {\ > > typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \ > > if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \ > > break; \ > > rte_pause(); \ > > } while (1); > For this point, I think it is due to different size need to use different assembly instructions > in arm architecture. For example, > load 16 bits instruction is "ldxrh %w[tmp], [%x[addr]" > load 32 bits instruction is " ldxr %w[tmp], [%x[addr]" > load 64 bits instruction is " ldxr %x[tmp], [%x[addr] " Ok, but it could be then something like that for arm specific code: if (sizeof(val) == sizeof(uint16_t)) \ __LOAD_EXC_16(...); \ else if (sizeof(val) == sizeof(uint32_t)) \ __LOAD_EXC_32(...); \ else if (sizeof(val) == sizeof(uint64_t)) \ __LOAD_EXC_64(...); \ ... > And for consistency, we also use 3 APIs in generic path. Honestly, even one multi-line macro doesn't look nice. Having 3 identical ones looks even worse.
On Wed, 13 Oct 2021 15:03:56 +0000 "Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote: > > addr value' can be before other operations. > > > > > BTW, if we expect memorder to always be a constant, might be better > > > BUILD_BUG_ON()? > > If I understand correctly, you means we can replace 'assert' by 'build_bug_on': > > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED); > > Yes, that was my thought. > In that case I think we should be able to catch wrong memorder at compilation stage. Maybe: RTE_BUILD_BUG_ON(!_constant_p(memorder)); RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder !=__ATOMIC_RELAXED);
> -----邮件原件----- > 发件人: Ananyev, Konstantin <konstantin.ananyev@intel.com> > 发送时间: Wednesday, October 13, 2021 11:04 PM > 收件人: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang > <Ruifeng.Wang@arm.com> > 抄送: dev@dpdk.org; nd <nd@arm.com>; nd <nd@arm.com> > 主题: RE: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait > scheme > > > > > [snip] > > > > > > diff --git a/lib/eal/include/generic/rte_pause.h > > > > b/lib/eal/include/generic/rte_pause.h > > > > index 668ee4a184..4e32107eca 100644 > > > > --- a/lib/eal/include/generic/rte_pause.h > > > > +++ b/lib/eal/include/generic/rte_pause.h > > > > @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t > > > > *addr, > > > uint64_t expected, > > > > while (__atomic_load_n(addr, memorder) != expected) > > > > rte_pause(); > > > > } > > > > + > > > > +/* > > > > + * Wait until a 16-bit *addr breaks the condition, with a relaxed > > > > +memory > > > > + * ordering model meaning the loads around this API can be reordered. > > > > + * > > > > + * @param addr > > > > + * A pointer to the memory location. > > > > + * @param mask > > > > + * A mask of value bits in interest > > > > + * @param expected > > > > + * A 16-bit expected value to be in the memory location. > > > > + * @param cond > > > > + * A symbol representing the condition (==, !=). > > > > + * @param memorder > > > > + * Two different memory orders that can be specified: > > > > + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to > > > > + * C++11 memory orders with the same names, see the C++11 > > > > +standard or > > > > + * the GCC wiki on atomic synchronization for detailed definition. > > > > + */ > > > > > > Hmm, so now we have 2 APIs doing similar thing: > > > rte_wait_until_equal_n() and rte_wait_event_n(). > > > Can we probably unite them somehow? > > > At least make rte_wait_until_equal_n() to use rte_wait_event_n() > underneath. > > > > > You are right. We plan to change rte_wait_until_equal API after this > > new scheme is achieved. And then, we will merge wait_unil into > > wait_event definition in the next new patch series. > > > > > > +#define rte_wait_event_16(addr, mask, expected, cond, memorder) > > > \ > > > > +do { > \ > > > > + assert(memorder == __ATOMIC_ACQUIRE || memorder == > > > > +__ATOMIC_RELAXED); \ > > > > > > And why user is not allowed to use __ATOMIC_SEQ_CST here? > > Actually this is just a load operation, and acquire here is enough to > > make sure 'load addr value' can be before other operations. > > > > > BTW, if we expect memorder to always be a constant, might be better > > > BUILD_BUG_ON()? > > If I understand correctly, you means we can replace 'assert' by > 'build_bug_on': > > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder > > !=__ATOMIC_RELAXED); > > Yes, that was my thought. > In that case I think we should be able to catch wrong memorder at compilation > stage. > > > > > > > > > > + \ > > > > + while ((__atomic_load_n(addr, memorder) & mask) cond expected) > > > \ > > > > + rte_pause(); \ > > > > +} while (0) > > > > > > Two thoughts with these macros: > > > 1. It is a goof practise to put () around macro parameters in the macro > body. > > > Will save from a lot of unexpected troubles. > > > 2. I think these 3 macros can be united into one. > > > Something like: > > > > > > #define rte_wait_event(addr, mask, expected, cond, memorder) do {\ > > > typeof (*(addr)) val = __atomic_load_n((addr), (memorder)); \ > > > if ((val & (typeof(val))(mask)) cond (typeof(val))(expected)) \ > > > break; \ > > > rte_pause(); \ > > > } while (1); > > For this point, I think it is due to different size need to use > > different assembly instructions in arm architecture. For example, load > > 16 bits instruction is "ldxrh %w[tmp], [%x[addr]" > > load 32 bits instruction is " ldxr %w[tmp], [%x[addr]" > > load 64 bits instruction is " ldxr %x[tmp], [%x[addr] " > > Ok, but it could be then something like that for arm specific code: > if (sizeof(val) == sizeof(uint16_t)) \ > __LOAD_EXC_16(...); \ > else if (sizeof(val) == sizeof(uint32_t)) \ > __LOAD_EXC_32(...); \ > else if (sizeof(val) == sizeof(uint64_t)) \ > __LOAD_EXC_64(...); \ > ... > I thinks we should use "addr" as judgement: rte_wait_event(addr, mask, expected, cond, memorder) if (sizeof(*addr)) == sizeof(uint16_t) uint16_t value \ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ __LOAD_EXC_16(addr, value, memorder) \ if ((value & mask) cond expected) { \ __SEVL() \ do { \ __WFE() \ __LOAD_EXC_16(addr, value, memorder) \ } while ((value & mask) cond expected); \ } if (sizeof(*addr)) == sizeof(uint32_t) .......... if (sizeof(*addr)) == sizeof(uint64_t) ........... > > And for consistency, we also use 3 APIs in generic path. > Honestly, even one multi-line macro doesn't look nice. > Having 3 identical ones looks even worse.
> -----邮件原件----- > 发件人: Stephen Hemminger <stephen@networkplumber.org> > 发送时间: Thursday, October 14, 2021 1:00 AM > 收件人: Ananyev, Konstantin <konstantin.ananyev@intel.com> > 抄送: Feifei Wang <Feifei.Wang2@arm.com>; Ruifeng Wang > <Ruifeng.Wang@arm.com>; dev@dpdk.org; nd <nd@arm.com> > 主题: Re: [dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait > scheme > > On Wed, 13 Oct 2021 15:03:56 +0000 > "Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote: > > > > addr value' can be before other operations. > > > > > > > BTW, if we expect memorder to always be a constant, might be > > > > better BUILD_BUG_ON()? > > > If I understand correctly, you means we can replace 'assert' by > 'build_bug_on': > > > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && memorder > > > !=__ATOMIC_RELAXED); > > > > Yes, that was my thought. > > In that case I think we should be able to catch wrong memorder at > compilation stage. > > Maybe: > RTE_BUILD_BUG_ON(!_constant_p(memorder)); > RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE && > memorder !=__ATOMIC_RELAXED); > Thanks for your comments. One question for this, I do not know why we should check if memorder is a constant? Is it to check whether memorder has been assigned or NULL?
diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h index e87d10b8cc..205510e044 100644 --- a/lib/eal/arm/include/rte_pause_64.h +++ b/lib/eal/arm/include/rte_pause_64.h @@ -31,20 +31,12 @@ static inline void rte_pause(void) /* Put processor into low power WFE(Wait For Event) state. */ #define __WFE() { asm volatile("wfe" : : : "memory"); } -static __rte_always_inline void -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, - int memorder) -{ - uint16_t value; - - assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); - - /* - * Atomic exclusive load from addr, it returns the 16-bit content of - * *addr while making it 'monitored',when it is written by someone - * else, the 'monitored' state is cleared and a event is generated - * implicitly to exit WFE. - */ +/* + * Atomic exclusive load from addr, it returns the 16-bit content of + * *addr while making it 'monitored', when it is written by someone + * else, the 'monitored' state is cleared and a event is generated + * implicitly to exit WFE. + */ #define __LOAD_EXC_16(src, dst, memorder) { \ if (memorder == __ATOMIC_RELAXED) { \ asm volatile("ldxrh %w[tmp], [%x[addr]]" \ @@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, : "memory"); \ } } +/* + * Atomic exclusive load from addr, it returns the 32-bit content of + * *addr while making it 'monitored', when it is written by someone + * else, the 'monitored' state is cleared and a event is generated + * implicitly to exit WFE. + */ +#define __LOAD_EXC_32(src, dst, memorder) { \ + if (memorder == __ATOMIC_RELAXED) { \ + asm volatile("ldxr %w[tmp], [%x[addr]]" \ + : [tmp] "=&r" (dst) \ + : [addr] "r"(src) \ + : "memory"); \ + } else { \ + asm volatile("ldaxr %w[tmp], [%x[addr]]" \ + : [tmp] "=&r" (dst) \ + : [addr] "r"(src) \ + : "memory"); \ + } } + +/* + * Atomic exclusive load from addr, it returns the 64-bit content of + * *addr while making it 'monitored', when it is written by someone + * else, the 'monitored' state is cleared and a event is generated + * implicitly to exit WFE. + */ +#define __LOAD_EXC_64(src, dst, memorder) { \ + if (memorder == __ATOMIC_RELAXED) { \ + asm volatile("ldxr %x[tmp], [%x[addr]]" \ + : [tmp] "=&r" (dst) \ + : [addr] "r"(src) \ + : "memory"); \ + } else { \ + asm volatile("ldaxr %x[tmp], [%x[addr]]" \ + : [tmp] "=&r" (dst) \ + : [addr] "r"(src) \ + : "memory"); \ + } } + +static __rte_always_inline void +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, + int memorder) +{ + uint16_t value; + + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); + __LOAD_EXC_16(addr, value, memorder) if (value != expected) { __SEVL() @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, __LOAD_EXC_16(addr, value, memorder) } while (value != expected); } -#undef __LOAD_EXC_16 } static __rte_always_inline void @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected, assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); - /* - * Atomic exclusive load from addr, it returns the 32-bit content of - * *addr while making it 'monitored',when it is written by someone - * else, the 'monitored' state is cleared and a event is generated - * implicitly to exit WFE. - */ -#define __LOAD_EXC_32(src, dst, memorder) { \ - if (memorder == __ATOMIC_RELAXED) { \ - asm volatile("ldxr %w[tmp], [%x[addr]]" \ - : [tmp] "=&r" (dst) \ - : [addr] "r"(src) \ - : "memory"); \ - } else { \ - asm volatile("ldaxr %w[tmp], [%x[addr]]" \ - : [tmp] "=&r" (dst) \ - : [addr] "r"(src) \ - : "memory"); \ - } } - __LOAD_EXC_32(addr, value, memorder) if (value != expected) { __SEVL() @@ -104,7 +122,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected, __LOAD_EXC_32(addr, value, memorder) } while (value != expected); } -#undef __LOAD_EXC_32 } static __rte_always_inline void @@ -115,25 +132,6 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected, assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); - /* - * Atomic exclusive load from addr, it returns the 64-bit content of - * *addr while making it 'monitored',when it is written by someone - * else, the 'monitored' state is cleared and a event is generated - * implicitly to exit WFE. - */ -#define __LOAD_EXC_64(src, dst, memorder) { \ - if (memorder == __ATOMIC_RELAXED) { \ - asm volatile("ldxr %x[tmp], [%x[addr]]" \ - : [tmp] "=&r" (dst) \ - : [addr] "r"(src) \ - : "memory"); \ - } else { \ - asm volatile("ldaxr %x[tmp], [%x[addr]]" \ - : [tmp] "=&r" (dst) \ - : [addr] "r"(src) \ - : "memory"); \ - } } - __LOAD_EXC_64(addr, value, memorder) if (value != expected) { __SEVL() @@ -143,6 +141,51 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected, } while (value != expected); } } + +#define rte_wait_event_16(addr, mask, expected, cond, memorder) \ +do { \ + uint16_t value \ + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ + __LOAD_EXC_16(addr, value, memorder) \ + if ((value & mask) cond expected) { \ + __SEVL() \ + do { \ + __WFE() \ + __LOAD_EXC_16(addr, value, memorder) \ + } while ((value & mask) cond expected); \ + } \ +} while (0) + +#define rte_wait_event_32(addr, mask, expected, cond, memorder) \ +do { \ + uint32_t value \ + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ + __LOAD_EXC_32(addr, value, memorder) \ + if ((value & mask) op expected) { \ + __SEVL() \ + do { \ + __WFE() \ + __LOAD_EXC_32(addr, value, memorder) \ + } while ((value & mask) cond expected); \ + } \ +} while (0) + +#define rte_wait_event_64(addr, mask, expected, cond, memorder) \ +do { \ + uint64_t value \ + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ + __LOAD_EXC_64(addr, value, memorder) \ + if ((value & mask) cond expected) { \ + __SEVL() \ + do { \ + __WFE() \ + __LOAD_EXC_64(addr, value, memorder) \ + } while ((value & mask) cond expected); \ + } \ +} while (0) + +#undef __LOAD_EXC_16 +#undef __LOAD_EXC_32 #undef __LOAD_EXC_64 #undef __SEVL diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h index 668ee4a184..4e32107eca 100644 --- a/lib/eal/include/generic/rte_pause.h +++ b/lib/eal/include/generic/rte_pause.h @@ -111,6 +111,84 @@ rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected, while (__atomic_load_n(addr, memorder) != expected) rte_pause(); } + +/* + * Wait until a 16-bit *addr breaks the condition, with a relaxed memory + * ordering model meaning the loads around this API can be reordered. + * + * @param addr + * A pointer to the memory location. + * @param mask + * A mask of value bits in interest + * @param expected + * A 16-bit expected value to be in the memory location. + * @param cond + * A symbol representing the condition (==, !=). + * @param memorder + * Two different memory orders that can be specified: + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to + * C++11 memory orders with the same names, see the C++11 standard or + * the GCC wiki on atomic synchronization for detailed definition. + */ +#define rte_wait_event_16(addr, mask, expected, cond, memorder) \ +do { \ + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ + \ + while ((__atomic_load_n(addr, memorder) & mask) cond expected) \ + rte_pause(); \ +} while (0) + +/* + * Wait until a 32-bit *addr breaks the condition, with a relaxed memory + * ordering model meaning the loads around this API can be reordered. + * + * @param addr + * A pointer to the memory location. + * @param mask + * A mask of value bits in interest. + * @param expected + * A 32-bit expected value to be in the memory location. + * @param cond + * A symbol representing the condition (==, !=). + * @param memorder + * Two different memory orders that can be specified: + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to + * C++11 memory orders with the same names, see the C++11 standard or + * the GCC wiki on atomic synchronization for detailed definition. + */ +#define rte_wait_event_32(addr, mask, expected, cond, memorder) \ +do { \ + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ + \ + while ((__atomic_load_n(addr, memorder) & mask) cond expected) \ + rte_pause(); \ +} while (0) + +/* + * Wait until a 64-bit *addr breaks the condition, with a relaxed memory + * ordering model meaning the loads around this API can be reordered. + * + * @param addr + * A pointer to the memory location. + * @param mask + * A mask of value bits in interest + * @param expected + * A 64-bit expected value to be in the memory location. + * @param cond + * A symbol representing the condition (==, !=). + * @param memorder + * Two different memory orders that can be specified: + * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to + * C++11 memory orders with the same names, see the C++11 standard or + * the GCC wiki on atomic synchronization for detailed definition. + */ +#define rte_wait_event_64(addr, mask, expected, cond, memorder) \ +do { \ + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); \ + \ + while ((__atomic_load_n(addr, memorder) & mask) cond expected) \ + rte_pause(); \ +} while (0) #endif #endif /* _RTE_PAUSE_H_ */