[RFC,4/5] spinlock: use wfe to reduce contention on aarch64

Message ID 1561911676-37718-5-git-send-email-gavin.hu@arm.com (mailing list archive)
State Superseded, archived
Headers
Series use WFE for locks and ring on aarch64 |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Gavin Hu June 30, 2019, 4:21 p.m. UTC
  In acquiring a spinlock, cores repeatedly poll the lock variable.
This is replaced by rte_wait_until_equal API.

20% performance gain was measured by running spinlock_autotest on 14
isolated cores of ThunderX2.

Signed-off-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Steve Capper <steve.capper@arm.com>
Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 .../common/include/arch/arm/rte_spinlock.h         | 25 ++++++++++++++++++++++
 .../common/include/generic/rte_spinlock.h          |  2 +-
 2 files changed, 26 insertions(+), 1 deletion(-)
  

Patch

diff --git a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
index 1a6916b..b7e8521 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
@@ -16,6 +16,31 @@  extern "C" {
 #include <rte_common.h>
 #include "generic/rte_spinlock.h"
 
+/* armv7a does support WFE, but an explicit wake-up signal using SEV is
+ * required (must be preceded by DSB to drain the store buffer) and
+ * this is less performant, so keep armv7a implementation unchanged.
+ */
+#if defined(RTE_USE_WFE) && defined(RTE_ARCH_ARM64)
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+	unsigned int tmp;
+	/* http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.
+	 * faqs/ka16809.html
+	 */
+	asm volatile(
+		"sevl\n"
+		"1:	 wfe\n"
+		"2:	 ldaxr   %w0, %1\n"
+		"cbnz   %w0, 1b\n"
+		"stxr   %w0, %w2, %1\n"
+		"cbnz   %w0, 2b\n"
+		: "=&r" (tmp), "+Q"(sl->locked)
+		: "r" (1)
+		: "cc", "memory");
+}
+#endif
+
 static inline int rte_tm_supported(void)
 {
 	return 0;
diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h
index 87ae7a4..cf4f15b 100644
--- a/lib/librte_eal/common/include/generic/rte_spinlock.h
+++ b/lib/librte_eal/common/include/generic/rte_spinlock.h
@@ -57,7 +57,7 @@  rte_spinlock_init(rte_spinlock_t *sl)
 static inline void
 rte_spinlock_lock(rte_spinlock_t *sl);
 
-#ifdef RTE_FORCE_INTRINSICS
+#if defined(RTE_FORCE_INTRINSICS) && !defined(RTE_USE_WFE)
 static inline void
 rte_spinlock_lock(rte_spinlock_t *sl)
 {