[v9,3/3] eal/atomic: add wrapper for C11 atomic thread fence
Checks
Commit Message
Provide a wrapper for __atomic_thread_fence builtins to support
optimized code for __ATOMIC_SEQ_CST memory order for x86 platforms.
Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Ola Liljedahl <Ola.Liljedahl@arm.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
lib/librte_eal/arm/include/rte_atomic_32.h | 6 ++++++
lib/librte_eal/arm/include/rte_atomic_64.h | 6 ++++++
lib/librte_eal/include/generic/rte_atomic.h | 6 ++++++
lib/librte_eal/ppc/include/rte_atomic.h | 6 ++++++
lib/librte_eal/x86/include/rte_atomic.h | 17 +++++++++++++++++
5 files changed, 41 insertions(+)
Comments
17/07/2020 07:08, Phil Yang:
> Provide a wrapper for __atomic_thread_fence builtins to support
> optimized code for __ATOMIC_SEQ_CST memory order for x86 platforms.
Logically, it should be the patch 2.
So you can recommend using this wrapper in the explanation
of the actual patch 2.
Thomas Monjalon <thomas@monjalon.net> writes:
> > Provide a wrapper for __atomic_thread_fence builtins to support
> > optimized code for __ATOMIC_SEQ_CST memory order for x86 platforms.
>
> Logically, it should be the patch 2.
> So you can recommend using this wrapper in the explanation
> of the actual patch 2.
Indeed. Thanks.
Will change it in v10.
@@ -37,6 +37,12 @@ extern "C" {
#define rte_cio_rmb() rte_rmb()
+static __rte_always_inline void
+rte_atomic_thread_fence(int mo)
+{
+ __atomic_thread_fence(mo);
+}
+
#ifdef __cplusplus
}
#endif
@@ -41,6 +41,12 @@ extern "C" {
#define rte_cio_rmb() rte_rmb()
+static __rte_always_inline void
+rte_atomic_thread_fence(int mo)
+{
+ __atomic_thread_fence(mo);
+}
+
/*------------------------ 128 bit atomic operations -------------------------*/
#if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
@@ -158,6 +158,12 @@ static inline void rte_cio_rmb(void);
asm volatile ("" : : : "memory"); \
} while(0)
+/**
+ * Synchronization fence between threads based on the specified
+ * memory order.
+ */
+static inline void rte_atomic_thread_fence(int mo);
+
/*------------------------- 16 bit atomic operations -------------------------*/
/**
@@ -40,6 +40,12 @@ extern "C" {
#define rte_cio_rmb() rte_rmb()
+static __rte_always_inline void
+rte_atomic_thread_fence(int mo)
+{
+ __atomic_thread_fence(mo);
+}
+
/*------------------------- 16 bit atomic operations -------------------------*/
/* To be compatible with Power7, use GCC built-in functions for 16 bit
* operations */
@@ -83,6 +83,23 @@ rte_smp_mb(void)
#define rte_cio_rmb() rte_compiler_barrier()
+/**
+ * Synchronization fence between threads based on the specified
+ * memory order.
+ *
+ * On x86 the __atomic_thread_fence(__ATOMIC_SEQ_CST) generates
+ * full 'mfence' which is quite expensive. The optimized
+ * implementation of rte_smp_mb is used instead.
+ */
+static __rte_always_inline void
+rte_atomic_thread_fence(int mo)
+{
+ if (mo == __ATOMIC_SEQ_CST)
+ rte_smp_mb();
+ else
+ __atomic_thread_fence(mo);
+}
+
/*------------------------- 16 bit atomic operations -------------------------*/
#ifndef RTE_FORCE_INTRINSICS