[RFC,1/5] eal: add the APIs to wait until equal

Message ID 1561911676-37718-2-git-send-email-gavin.hu@arm.com (mailing list archive)
State Superseded, archived
Headers
Series use WFE for locks and ring on aarch64 |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Gavin Hu June 30, 2019, 4:21 p.m. UTC
  The rte_wait_until_equal_xxx APIs abstract the functionality of 'polling
for a memory location to become equal to a given value'.

Signed-off-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Steve Capper <steve.capper@arm.com>
Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 .../common/include/arch/arm/rte_pause_64.h         | 143 +++++++++++++++++++++
 lib/librte_eal/common/include/generic/rte_pause.h  |  20 +++
 2 files changed, 163 insertions(+)
  

Comments

Stephen Hemminger June 30, 2019, 8:27 p.m. UTC | #1
On Mon,  1 Jul 2019 00:21:12 +0800
Gavin Hu <gavin.hu@arm.com> wrote:

> +#ifdef RTE_USE_WFE
> +#define rte_wait_until_equal_relaxed(addr, expected) do {\
> +		typeof(*addr) tmp;  \
> +		if (__builtin_constant_p((expected))) \
> +			do { \
> +				if (sizeof(*(addr)) == 16)\
> +					asm volatile(  \
> +						"sevl\n"  \
> +						"1:	 wfe\n"  \
> +						"ldxrh  %w0, %1\n"  \
> +						"cmp	%w0, %w2\n"  \
> +						"bne	1b\n"  \
> +						: "=&r"(tmp)  \
> +						: "Q"(*addr), "i"(expected)  \
> +						: "cc", "memory");  \
> +				else if (sizeof(*(addr)) == 32)\
> +					asm volatile(  \
> +						"sevl\n"  \
> +						"1:	 wfe\n"  \
> +						"ldxr  %w0, %1\n"  \
> +						"cmp	%w0, %w2\n"  \
> +						"bne	1b\n"  \
> +						: "=&r"(tmp)  \
> +						: "Q"(*addr), "i"(expected)  \
> +						: "cc", "memory");  \
> +				else if (sizeof(*(addr)) == 64)\
> +					asm volatile(  \
> +						"sevl\n"  \
> +						"1:	 wfe\n"  \
> +						"ldxr  %x0, %1\n"  \
> +						"cmp	%x0, %x2\n"  \
> +						"bne	1b\n"  \
> +						: "=&r" (tmp)  \
> +						: "Q"(*addr), "i"(expected)  \
> +						: "cc", "memory"); \
> +			} while (0); \
> +		else \
> +			do { \
> +				if (sizeof(*(addr)) == 16)\
> +					asm volatile(  \
> +						"sevl\n"  \
> +						"1:	 wfe\n"  \
> +						"ldxrh  %w0, %1\n"  \
> +						"cmp	%w0, %w2\n"  \
> +						"bne	1b\n"  \
> +						: "=&r"(tmp)  \
> +						: "Q"(*addr), "r"(expected)  \
> +						: "cc", "memory");  \
> +				else if (sizeof(*(addr)) == 32)\
> +					asm volatile(  \
> +						"sevl\n"  \
> +						"1:	 wfe\n"  \
> +						"ldxr  %w0, %1\n"  \
> +						"cmp	%w0, %w2\n"  \
> +						"bne	1b\n"  \
> +						: "=&r"(tmp)  \
> +						: "Q"(*addr), "r"(expected)  \
> +						: "cc", "memory");  \
> +				else if (sizeof(*(addr)) == 64)\
> +					asm volatile(  \
> +						"sevl\n"  \
> +						"1:	 wfe\n"  \
> +						"ldxr  %x0, %1\n"  \
> +						"cmp	%x0, %x2\n"  \
> +						"bne	1b\n"  \
> +						: "=&r" (tmp)  \
> +						: "Q"(*addr), "r"(expected)  \
> +						: "cc", "memory");  \
> +		} while (0); \
> +} while (0)

That is a hot mess.
Macro's are harder to maintain and offer no benefit over inline functions.
  
Gavin Hu July 1, 2019, 7:16 a.m. UTC | #2
Hi Stephen,

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Monday, July 1, 2019 4:28 AM
> To: Gavin Hu (Arm Technology China) <Gavin.Hu@arm.com>
> Cc: dev@dpdk.org; thomas@monjalon.net; jerinj@marvell.com;
> hemant.agrawal@nxp.com; bruce.richardson@intel.com;
> chaozhu@linux.vnet.ibm.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: Re: [dpdk-dev] [RFC 1/5] eal: add the APIs to wait until equal
> 
> On Mon,  1 Jul 2019 00:21:12 +0800
> Gavin Hu <gavin.hu@arm.com> wrote:
> 
> > +#ifdef RTE_USE_WFE
> > +#define rte_wait_until_equal_relaxed(addr, expected) do {\
> > +		typeof(*addr) tmp;  \
> > +		if (__builtin_constant_p((expected))) \
> > +			do { \
> > +				if (sizeof(*(addr)) == 16)\
> > +					asm volatile(  \
> > +						"sevl\n"  \
> > +						"1:	 wfe\n"  \
> > +						"ldxrh  %w0, %1\n"  \
> > +						"cmp	%w0, %w2\n"  \
> > +						"bne	1b\n"  \
> > +						: "=&r"(tmp)  \
> > +						: "Q"(*addr), "i"(expected)  \
> > +						: "cc", "memory");  \
> > +				else if (sizeof(*(addr)) == 32)\
> > +					asm volatile(  \
> > +						"sevl\n"  \
> > +						"1:	 wfe\n"  \
> > +						"ldxr  %w0, %1\n"  \
> > +						"cmp	%w0, %w2\n"  \
> > +						"bne	1b\n"  \
> > +						: "=&r"(tmp)  \
> > +						: "Q"(*addr), "i"(expected)  \
> > +						: "cc", "memory");  \
> > +				else if (sizeof(*(addr)) == 64)\
> > +					asm volatile(  \
> > +						"sevl\n"  \
> > +						"1:	 wfe\n"  \
> > +						"ldxr  %x0, %1\n"  \
> > +						"cmp	%x0, %x2\n"  \
> > +						"bne	1b\n"  \
> > +						: "=&r" (tmp)  \
> > +						: "Q"(*addr), "i"(expected)  \
> > +						: "cc", "memory"); \
> > +			} while (0); \
> > +		else \
> > +			do { \
> > +				if (sizeof(*(addr)) == 16)\
> > +					asm volatile(  \
> > +						"sevl\n"  \
> > +						"1:	 wfe\n"  \
> > +						"ldxrh  %w0, %1\n"  \
> > +						"cmp	%w0, %w2\n"  \
> > +						"bne	1b\n"  \
> > +						: "=&r"(tmp)  \
> > +						: "Q"(*addr), "r"(expected)  \
> > +						: "cc", "memory");  \
> > +				else if (sizeof(*(addr)) == 32)\
> > +					asm volatile(  \
> > +						"sevl\n"  \
> > +						"1:	 wfe\n"  \
> > +						"ldxr  %w0, %1\n"  \
> > +						"cmp	%w0, %w2\n"  \
> > +						"bne	1b\n"  \
> > +						: "=&r"(tmp)  \
> > +						: "Q"(*addr), "r"(expected)  \
> > +						: "cc", "memory");  \
> > +				else if (sizeof(*(addr)) == 64)\
> > +					asm volatile(  \
> > +						"sevl\n"  \
> > +						"1:	 wfe\n"  \
> > +						"ldxr  %x0, %1\n"  \
> > +						"cmp	%x0, %x2\n"  \
> > +						"bne	1b\n"  \
> > +						: "=&r" (tmp)  \
> > +						: "Q"(*addr), "r"(expected)  \
> > +						: "cc", "memory");  \
> > +		} while (0); \
> > +} while (0)
> 
> That is a hot mess.
> Macro's are harder to maintain and offer no benefit over inline functions.
During our internal review, I ever used C11 _Generic to generalize the API to take different types of arguments. 
That makes the API look much simpler and better, but it poses a hard requirement for C11 and gcc 4.9+.
That means, Gaetan's patch, as shown below, has to be reverted, otherwise there are compiling errors.
https://gcc.gnu.org/wiki/C11Status 
$ git show ea7726a6
commit ea7726a6ee4b2b63313c4a198522a8dcea70c13d
Author: Gaetan Rivet <gaetan.rivet@6wind.com>
Date:   Thu Jul 20 14:27:53 2017 +0200

    net/failsafe: fix build on FreeBSD 10 with GCC 4.8

diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index 32aaaa2..d516d36 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_flow.c
 # No exported include files

 # Basic CFLAGS:
-CFLAGS += -std=c11 -Wextra
+CFLAGS += -std=gnu99 -Wextra
  
Thomas Monjalon July 1, 2019, 7:43 a.m. UTC | #3
01/07/2019 09:16, Gavin Hu (Arm Technology China):
> From: Stephen Hemminger <stephen@networkplumber.org>
> > Gavin Hu <gavin.hu@arm.com> wrote:
> > 
> > > +#ifdef RTE_USE_WFE
> > > +#define rte_wait_until_equal_relaxed(addr, expected) do {\
[...]
> > That is a hot mess.
> > Macro's are harder to maintain and offer no benefit over inline functions.
> 
> During our internal review, I ever used C11 _Generic to generalize the API to take different types of arguments. 

Gavin, the question is about macros versus functions.
Please, could you convert it into an inline function?
  
Pavan Nikhilesh Bhagavatula July 1, 2019, 9:58 a.m. UTC | #4
Hi Gavin,

>-----Original Message-----
>From: dev <dev-bounces@dpdk.org> On Behalf Of Gavin Hu
>Sent: Sunday, June 30, 2019 9:51 PM
>To: dev@dpdk.org
>Cc: thomas@monjalon.net; Jerin Jacob Kollanukkaran
><jerinj@marvell.com>; hemant.agrawal@nxp.com;
>bruce.richardson@intel.com; chaozhu@linux.vnet.ibm.com;
>Honnappa.Nagarahalli@arm.com; nd@arm.com; gavin.hu@arm.com
>Subject: [dpdk-dev] [RFC 1/5] eal: add the APIs to wait until equal
>
>The rte_wait_until_equal_xxx APIs abstract the functionality of 'polling
>for a memory location to become equal to a given value'.
>
>Signed-off-by: Gavin Hu <gavin.hu@arm.com>
>Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>Reviewed-by: Steve Capper <steve.capper@arm.com>
>Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
>Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>---
> .../common/include/arch/arm/rte_pause_64.h         | 143
>+++++++++++++++++++++
> lib/librte_eal/common/include/generic/rte_pause.h  |  20 +++
> 2 files changed, 163 insertions(+)
>
>diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
>b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
>index 93895d3..0095da6 100644
>--- a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
>+++ b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
>@@ -17,6 +17,149 @@ static inline void rte_pause(void)
> 	asm volatile("yield" ::: "memory");
> }
>
>+#ifdef RTE_USE_WFE
>+#define rte_wait_until_equal_relaxed(addr, expected) do {\
>+		typeof(*addr) tmp;  \
>+		if (__builtin_constant_p((expected))) \
>+			do { \
>+				if (sizeof(*(addr)) == 16)\
>+					asm volatile(  \
>+						"sevl\n"  \
>+						"1:	 wfe\n"  \
>+						"ldxrh  %w0, %1\n"  \
>+						"cmp	%w0, %w2\n"  \
>+						"bne	1b\n"  \
>+						: "=&r"(tmp)  \
>+						: "Q"(*addr),
>"i"(expected)  \
>+						: "cc", "memory");  \

Can we have early exit here i.e. instead of going directly to wfe can we first check the condition and then fallthrough?
Something like:
		asm volatile("	ldxrh	%w0 %1	\n"
			        "    cmp	%w0 %w2	\n"
			        "	b.eq  	2:		\n"
			        "1: wfe			\n"
			        "    ldxrh	%w0, %1	\n"  
			        "    cmp	%w0, %w2	\n"  
			        "    b.ne	1b		\n"  
			        "2:				\n"
			        :::);

Regards,
Pavan.
  
Gavin Hu July 2, 2019, 2:07 p.m. UTC | #5
Hi Thomas,
> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Monday, July 1, 2019 3:43 PM
> To: Gavin Hu (Arm Technology China) <Gavin.Hu@arm.com>
> Cc: Stephen Hemminger <stephen@networkplumber.org>; dev@dpdk.org;
> jerinj@marvell.com; hemant.agrawal@nxp.com;
> bruce.richardson@intel.com; chaozhu@linux.vnet.ibm.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>;
> gaetan.rivet@6wind.com
> Subject: Re: [dpdk-dev] [RFC 1/5] eal: add the APIs to wait until equal
> 
> 01/07/2019 09:16, Gavin Hu (Arm Technology China):
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > > Gavin Hu <gavin.hu@arm.com> wrote:
> > >
> > > > +#ifdef RTE_USE_WFE
> > > > +#define rte_wait_until_equal_relaxed(addr, expected) do {\
> [...]
> > > That is a hot mess.
> > > Macro's are harder to maintain and offer no benefit over inline functions.
> >
> > During our internal review, I ever used C11 _Generic to generalize the API
> to take different types of arguments.
> 
> Gavin, the question is about macros versus functions.
> Please, could you convert it into an inline function?
Sure, I will do it in next version.
  
Gavin Hu July 2, 2019, 2:08 p.m. UTC | #6
Hi Pavan,

> -----Original Message-----
> From: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>
> Sent: Monday, July 1, 2019 5:59 PM
> To: Gavin Hu (Arm Technology China) <Gavin.Hu@arm.com>; dev@dpdk.org
> Cc: thomas@monjalon.net; jerinj@marvell.com; hemant.agrawal@nxp.com;
> bruce.richardson@intel.com; chaozhu@linux.vnet.ibm.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [RFC 1/5] eal: add the APIs to wait until equal
> 
> Hi Gavin,
> 
> >-----Original Message-----
> >From: dev <dev-bounces@dpdk.org> On Behalf Of Gavin Hu
> >Sent: Sunday, June 30, 2019 9:51 PM
> >To: dev@dpdk.org
> >Cc: thomas@monjalon.net; Jerin Jacob Kollanukkaran
> ><jerinj@marvell.com>; hemant.agrawal@nxp.com;
> >bruce.richardson@intel.com; chaozhu@linux.vnet.ibm.com;
> >Honnappa.Nagarahalli@arm.com; nd@arm.com; gavin.hu@arm.com
> >Subject: [dpdk-dev] [RFC 1/5] eal: add the APIs to wait until equal
> >
> >The rte_wait_until_equal_xxx APIs abstract the functionality of 'polling
> >for a memory location to become equal to a given value'.
> >
> >Signed-off-by: Gavin Hu <gavin.hu@arm.com>
> >Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >Reviewed-by: Steve Capper <steve.capper@arm.com>
> >Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
> >Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> >---
> > .../common/include/arch/arm/rte_pause_64.h         | 143
> >+++++++++++++++++++++
> > lib/librte_eal/common/include/generic/rte_pause.h  |  20 +++
> > 2 files changed, 163 insertions(+)
> >
> >diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
> >b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
> >index 93895d3..0095da6 100644
> >--- a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
> >+++ b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
> >@@ -17,6 +17,149 @@ static inline void rte_pause(void)
> > 	asm volatile("yield" ::: "memory");
> > }
> >
> >+#ifdef RTE_USE_WFE
> >+#define rte_wait_until_equal_relaxed(addr, expected) do {\
> >+		typeof(*addr) tmp;  \
> >+		if (__builtin_constant_p((expected))) \
> >+			do { \
> >+				if (sizeof(*(addr)) == 16)\
> >+					asm volatile(  \
> >+						"sevl\n"  \
> >+						"1:	 wfe\n"  \
> >+						"ldxrh  %w0, %1\n"  \
> >+						"cmp	%w0, %w2\n"  \
> >+						"bne	1b\n"  \
> >+						: "=&r"(tmp)  \
> >+						: "Q"(*addr),
> >"i"(expected)  \
> >+						: "cc", "memory");  \
> 
> Can we have early exit here i.e. instead of going directly to wfe can we first
> check the condition and then fallthrough?
> Something like:
> 		asm volatile("	ldxrh	%w0 %1	\n"
> 			        "    cmp	%w0 %w2	\n"
> 			        "	b.eq  	2:		\n"
> 			        "1: wfe			\n"
> 			        "    ldxrh	%w0, %1	\n"
> 			        "    cmp	%w0, %w2	\n"
> 			        "    b.ne	1b		\n"
> 			        "2:				\n"
> 			        :::);
> 
> Regards,
> Pavan.
Ok, I will do it in next version.
  

Patch

diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
index 93895d3..0095da6 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
@@ -17,6 +17,149 @@  static inline void rte_pause(void)
 	asm volatile("yield" ::: "memory");
 }
 
+#ifdef RTE_USE_WFE
+#define rte_wait_until_equal_relaxed(addr, expected) do {\
+		typeof(*addr) tmp;  \
+		if (__builtin_constant_p((expected))) \
+			do { \
+				if (sizeof(*(addr)) == 16)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldxrh  %w0, %1\n"  \
+						"cmp	%w0, %w2\n"  \
+						"bne	1b\n"  \
+						: "=&r"(tmp)  \
+						: "Q"(*addr), "i"(expected)  \
+						: "cc", "memory");  \
+				else if (sizeof(*(addr)) == 32)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldxr  %w0, %1\n"  \
+						"cmp	%w0, %w2\n"  \
+						"bne	1b\n"  \
+						: "=&r"(tmp)  \
+						: "Q"(*addr), "i"(expected)  \
+						: "cc", "memory");  \
+				else if (sizeof(*(addr)) == 64)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldxr  %x0, %1\n"  \
+						"cmp	%x0, %x2\n"  \
+						"bne	1b\n"  \
+						: "=&r" (tmp)  \
+						: "Q"(*addr), "i"(expected)  \
+						: "cc", "memory"); \
+			} while (0); \
+		else \
+			do { \
+				if (sizeof(*(addr)) == 16)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldxrh  %w0, %1\n"  \
+						"cmp	%w0, %w2\n"  \
+						"bne	1b\n"  \
+						: "=&r"(tmp)  \
+						: "Q"(*addr), "r"(expected)  \
+						: "cc", "memory");  \
+				else if (sizeof(*(addr)) == 32)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldxr  %w0, %1\n"  \
+						"cmp	%w0, %w2\n"  \
+						"bne	1b\n"  \
+						: "=&r"(tmp)  \
+						: "Q"(*addr), "r"(expected)  \
+						: "cc", "memory");  \
+				else if (sizeof(*(addr)) == 64)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldxr  %x0, %1\n"  \
+						"cmp	%x0, %x2\n"  \
+						"bne	1b\n"  \
+						: "=&r" (tmp)  \
+						: "Q"(*addr), "r"(expected)  \
+						: "cc", "memory");  \
+		} while (0); \
+} while (0)
+
+#define rte_wait_until_equal_acquire(addr, expected) do {\
+		typeof(*addr) tmp;  \
+		if (__builtin_constant_p((expected))) \
+			do { \
+				if (sizeof(*(addr)) == 16)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldaxrh  %w0, %1\n"  \
+						"cmp	%w0, %w2\n"  \
+						"bne	1b\n"  \
+						: "=&r"(tmp)  \
+						: "Q"(*addr), "i"(expected)  \
+						: "cc", "memory");  \
+				else if (sizeof(*(addr)) == 32)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldaxr  %w0, %1\n"  \
+						"cmp	%w0, %w2\n"  \
+						"bne	1b\n"  \
+						: "=&r"(tmp)  \
+						: "Q"(*addr), "i"(expected)  \
+						: "cc", "memory");  \
+				else if (sizeof(*(addr)) == 64)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldaxr  %x0, %1\n"  \
+						"cmp	%x0, %x2\n"  \
+						"bne	1b\n"  \
+						: "=&r" (tmp)  \
+						: "Q"(*addr), "i"(expected)  \
+						: "cc", "memory"); \
+			} while (0); \
+		else \
+			do { \
+				if (sizeof(*(addr)) == 16)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldaxrh  %w0, %1\n"  \
+						"cmp	%w0, %w2\n"  \
+						"bne	1b\n"  \
+						: "=&r"(tmp)  \
+						: "Q"(*addr), "r"(expected)  \
+						: "cc", "memory");  \
+				else if (sizeof(*(addr)) == 32)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldaxr  %w0, %1\n"  \
+						"cmp	%w0, %w2\n"  \
+						"bne	1b\n"  \
+						: "=&r"(tmp)  \
+						: "Q"(*addr), "r"(expected)  \
+						: "cc", "memory");  \
+				else if (sizeof(*(addr)) == 64)\
+					asm volatile(  \
+						"sevl\n"  \
+						"1:	 wfe\n"  \
+						"ldaxr  %x0, %1\n"  \
+						"cmp	%x0, %x2\n"  \
+						"bne	1b\n"  \
+						: "=&r" (tmp)  \
+						: "Q"(*addr), "r"(expected)  \
+						: "cc", "memory");  \
+		} while (0); \
+} while (0)
+
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/generic/rte_pause.h b/lib/librte_eal/common/include/generic/rte_pause.h
index 52bd4db..c115b61 100644
--- a/lib/librte_eal/common/include/generic/rte_pause.h
+++ b/lib/librte_eal/common/include/generic/rte_pause.h
@@ -20,4 +20,24 @@ 
  */
 static inline void rte_pause(void);
 
+#if !defined(RTE_USE_WFE)
+#define rte_wait_until_equal_relaxed(addr, expected) do {\
+		rte_pause();	\
+	} while (*(addr) != (expected))
+
+#ifdef RTE_USE_C11_MEM_MODEL
+#define rte_wait_until_equal_acquire(addr, expected) do {\
+		rte_pause();	\
+	} while (__atomic_load_n((addr), __ATOMIC_ACQUIRE) != (expected))
+#else
+#define rte_wait_until_equal_acquire(addr, expected) do {\
+		do {\
+			rte_pause(); \
+		} while (*(addr) != (expected)); \
+		rte_smp_rmb(); \
+	} while (0)
+#endif
+#endif
+
+
 #endif /* _RTE_PAUSE_H_ */