From patchwork Tue Jun 2 13:11:31 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Roman Dementiev X-Patchwork-Id: 5085 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id EEDE3C320; Tue, 2 Jun 2015 15:11:43 +0200 (CEST) Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id AB46CC2DC for ; Tue, 2 Jun 2015 15:11:41 +0200 (CEST) Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP; 02 Jun 2015 06:11:41 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.13,539,1427785200"; d="scan'208";a="580631547" Received: from obarbu-mobl.ger.corp.intel.com (HELO localhost) ([10.252.44.135]) by orsmga003.jf.intel.com with ESMTP; 02 Jun 2015 06:11:41 -0700 From: Roman Dementiev To: dev@dpdk.org Date: Tue, 2 Jun 2015 15:11:31 +0200 Message-Id: <1433250693-23644-2-git-send-email-roman.dementiev@intel.com> X-Mailer: git-send-email 1.9.5.msysgit.0 In-Reply-To: <1433250693-23644-1-git-send-email-roman.dementiev@intel.com> References: <1433250693-23644-1-git-send-email-roman.dementiev@intel.com> Subject: [dpdk-dev] [PATCH 1/3] spinlock: add support for HTM lock elision for x86 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds methods that use hardware memory transactions (HTM) on fast-path for spinlocks (a.k.a. lock elision). Here the methods are implemented for x86 using Restricted Transactional Memory instructions (Intel(r) Transactional Synchronization Extensions). The implementation fall-backs to the normal spinlock if HTM is not available or memory transactions fail. This is not a replacement for all spinlock usages since not all critical sections protected by spinlocks are friendly to HTM. Signed-off-by: Roman Dementiev --- .../common/include/arch/ppc_64/rte_spinlock.h | 41 ++++++++ lib/librte_eal/common/include/arch/x86/rte_rtm.h | 73 ++++++++++++++ .../common/include/arch/x86/rte_spinlock.h | 107 +++++++++++++++++++++ .../common/include/generic/rte_spinlock.h | 75 +++++++++++++++ 4 files changed, 296 insertions(+) create mode 100644 lib/librte_eal/common/include/arch/x86/rte_rtm.h diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h index cf8b81a..3336435 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h @@ -66,6 +66,47 @@ rte_spinlock_trylock(rte_spinlock_t *sl) #endif +static inline int rte_tm_supported(void) +{ + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_unlock(sl); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_unlock(slr); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + return rte_spinlock_recursive_trylock(slr); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h new file mode 100644 index 0000000..d935641 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -0,0 +1,73 @@ +#ifndef _RTE_RTM_H_ +#define _RTE_RTM_H_ 1 + +/* + * Copyright (c) 2012,2013 Intel Corporation + * Author: Andi Kleen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that: (1) source code distributions + * retain the above copyright notice and this paragraph in its entirety, (2) + * distributions including binary code include the above copyright notice and + * this paragraph in its entirety in the documentation or other materials + * provided with the distribution + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* Official RTM intrinsics interface matching gcc/icc, but works + on older gcc compatible compilers and binutils. */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define RTE_XBEGIN_STARTED (~0u) +#define RTE_XABORT_EXPLICIT (1 << 0) +#define RTE_XABORT_RETRY (1 << 1) +#define RTE_XABORT_CONFLICT (1 << 2) +#define RTE_XABORT_CAPACITY (1 << 3) +#define RTE_XABORT_DEBUG (1 << 4) +#define RTE_XABORT_NESTED (1 << 5) +#define RTE_XABORT_CODE(x) (((x) >> 24) & 0xff) + +static __attribute__((__always_inline__)) inline +unsigned int rte_xbegin(void) +{ + unsigned int ret = RTE_XBEGIN_STARTED; + + asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory"); + return ret; +} + +static __attribute__((__always_inline__)) inline +void rte_xend(void) +{ + asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory"); +} + +static __attribute__((__always_inline__)) inline +void rte_xabort(const unsigned int status) +{ + asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); +} + +static __attribute__((__always_inline__)) inline +int rte_xtest(void) +{ + unsigned char out; + + asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" : + "=r" (out) :: "memory"); + return out; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RTM_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h index 54fba95..136f25a 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h +++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h @@ -39,6 +39,13 @@ extern "C" { #endif #include "generic/rte_spinlock.h" +#include "rte_rtm.h" +#include "rte_cpuflags.h" +#include "rte_branch_prediction.h" +#include + +#define RTE_RTM_MAX_RETRIES (10) +#define RTE_XABORT_LOCK_BUSY (0xff) #ifndef RTE_FORCE_INTRINSICS static inline void @@ -87,6 +94,106 @@ rte_spinlock_trylock (rte_spinlock_t *sl) } #endif +static uint8_t rtm_supported; /* cache the flag to avoid the overhead + of the rte_cpu_get_flag_enabled function */ + +static inline void __attribute__((constructor)) +rte_rtm_init(void) +{ + rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM); +} + +static inline int rte_tm_supported(void) +{ + return rtm_supported; +} + +static inline int +rte_try_tm(volatile int *lock) +{ + if (!rtm_supported) + return 0; + + int retries = RTE_RTM_MAX_RETRIES; + + while (likely(retries--)) { + + unsigned int status = rte_xbegin(); + + if (likely(RTE_XBEGIN_STARTED == status)) { + if (unlikely(*lock)) + rte_xabort(RTE_XABORT_LOCK_BUSY); + else + return 1; + } + while (*lock) + rte_pause(); + + if ((status & RTE_XABORT_EXPLICIT) && + (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY)) + continue; + + if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */ + break; + } + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + if (likely(rte_try_tm(&sl->locked))) + return; + + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + if (likely(rte_try_tm(&sl->locked))) + return 1; + + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + if (unlikely(sl->locked)) + rte_spinlock_unlock(sl); + else + rte_xend(); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + if (likely(rte_try_tm(&slr->sl.locked))) + return; + + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + if (unlikely(slr->sl.locked)) + rte_spinlock_recursive_unlock(slr); + else + rte_xend(); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + if (likely(rte_try_tm(&slr->sl.locked))) + return 1; + + return rte_spinlock_recursive_trylock(slr); +} + + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h index c7fb0df..ddb79bf 100644 --- a/lib/librte_eal/common/include/generic/rte_spinlock.h +++ b/lib/librte_eal/common/include/generic/rte_spinlock.h @@ -145,6 +145,47 @@ static inline int rte_spinlock_is_locked (rte_spinlock_t *sl) } /** + * Test if hardware transactional memory (lock elision) is supported + * + * @return + * 1 if the hardware transactional memory is supported; 0 otherwise. + */ +static inline int rte_tm_supported(void); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available take the spinlock. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl); + +/** + * Commit hardware memory transaction or release the spinlock if + * the spinlock is used as a fall-back + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available try to take the lock. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the hardware memory transaction is successfully started + * or lock is successfully taken; 0 otherwise. + */ +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl); + +/** * The rte_spinlock_recursive_t type. */ typedef struct { @@ -223,4 +264,38 @@ static inline int rte_spinlock_recursive_trylock(rte_spinlock_recursive_t *slr) return 1; } + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available take the recursive spinlocks + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_lock_tm( + rte_spinlock_recursive_t *slr); + +/** + * Commit hardware memory transaction or release the recursive spinlock + * if the recursive spinlock is used as a fall-back + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_unlock_tm( + rte_spinlock_recursive_t *slr); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available try to take the recursive lock + * + * @param slr + * A pointer to the recursive spinlock. + * @return + * 1 if the hardware memory transaction is successfully started + * or lock is successfully taken; 0 otherwise. + */ +static inline int rte_spinlock_recursive_trylock_tm( + rte_spinlock_recursive_t *slr); + #endif /* _RTE_SPINLOCK_H_ */