From patchwork Mon Feb 24 06:42:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Phil Yang X-Patchwork-Id: 65998 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 2B500A0524; Mon, 24 Feb 2020 07:42:41 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 6EEEB1BFB3; Mon, 24 Feb 2020 07:42:36 +0100 (CET) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by dpdk.org (Postfix) with ESMTP id 3FA681BFB3 for ; Mon, 24 Feb 2020 07:42:35 +0100 (CET) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id C565630E; Sun, 23 Feb 2020 22:42:34 -0800 (PST) Received: from phil-VirtualBox.arm.com (A010647.Arm.com [10.170.243.163]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id E28123F534; Sun, 23 Feb 2020 22:46:21 -0800 (PST) From: Phil Yang To: rsanford@akamai.com, erik.g.carrillo@intel.com, dev@dpdk.org Cc: david.marchand@redhat.com, anatoly.burakov@intel.com, thomas@monjalon.net, jerinj@marvell.com, hemant.agrawal@nxp.com, Honnappa.Nagarahalli@arm.com, gavin.hu@arm.com, phil.yang@arm.com, nd@arm.com Date: Mon, 24 Feb 2020 14:42:19 +0800 Message-Id: <1582526539-14360-2-git-send-email-phil.yang@arm.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1582526539-14360-1-git-send-email-phil.yang@arm.com> References: <1582526539-14360-1-git-send-email-phil.yang@arm.com> Subject: [dpdk-dev] [PATCH 2/2] lib/timer: relax barrier for status update X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Volatile has no ordering semantics. The rte_timer structure defines timer status as a volatile variable and uses the rte_r/wmb barrier to guarantee inter-thread visibility. This patch optimized the volatile operation with c11 atomic operations and one-way barrier to save the performance penalty. According to the timer_perf_autotest benchmarking results, this patch can uplift 10%~16% timer appending performance, 3%~20% timer resetting performance and 45% timer callbacks scheduling performance on aarch64 and no loss in performance for x86. Suggested-by: Honnappa Nagarahalli Signed-off-by: Phil Yang Reviewed-by: Gavin Hu --- lib/librte_timer/rte_timer.c | 90 +++++++++++++++++++++++++++++++------------- lib/librte_timer/rte_timer.h | 2 +- 2 files changed, 65 insertions(+), 27 deletions(-) diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c index 269e921..be0262d 100644 --- a/lib/librte_timer/rte_timer.c +++ b/lib/librte_timer/rte_timer.c @@ -10,7 +10,6 @@ #include #include -#include #include #include #include @@ -218,7 +217,7 @@ rte_timer_init(struct rte_timer *tim) status.state = RTE_TIMER_STOP; status.owner = RTE_TIMER_NO_OWNER; - tim->status.u32 = status.u32; + __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELAXED); } /* @@ -239,9 +238,9 @@ timer_set_config_state(struct rte_timer *tim, /* wait that the timer is in correct status before update, * and mark it as being configured */ - while (success == 0) { - prev_status.u32 = tim->status.u32; + prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED); + while (success == 0) { /* timer is running on another core * or ready to run on local core, exit */ @@ -258,9 +257,20 @@ timer_set_config_state(struct rte_timer *tim, * mark it atomically as being configured */ status.state = RTE_TIMER_CONFIG; status.owner = (int16_t)lcore_id; - success = rte_atomic32_cmpset(&tim->status.u32, - prev_status.u32, - status.u32); + /* If status is observed as RTE_TIMER_CONFIG earlier, + * that's not going to cause any issues because the + * pattern is read for status then read the other members. + * In one of the callers to timer_set_config_state + * (the __rte_timer_reset) we set other members to the + * structure (period, expire, f, arg) we want these + * changes to be observed after our change to status. + * So we need __ATOMIC_ACQUIRE here. + */ + success = __atomic_compare_exchange_n(&tim->status.u32, + &prev_status.u32, + status.u32, 0, + __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED); } ret_prev_status->u32 = prev_status.u32; @@ -279,20 +289,27 @@ timer_set_running_state(struct rte_timer *tim) /* wait that the timer is in correct status before update, * and mark it as running */ - while (success == 0) { - prev_status.u32 = tim->status.u32; + prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED); + while (success == 0) { /* timer is not pending anymore */ if (prev_status.state != RTE_TIMER_PENDING) return -1; /* here, we know that timer is stopped or pending, - * mark it atomically as being configured */ + * mark it atomically as being running + */ status.state = RTE_TIMER_RUNNING; status.owner = (int16_t)lcore_id; - success = rte_atomic32_cmpset(&tim->status.u32, - prev_status.u32, - status.u32); + /* RUNNING states are acting as locked states. If the + * timer is in RUNNING state, the state cannot be changed + * by other threads. So, we should use ACQUIRE here. + */ + success = __atomic_compare_exchange_n(&tim->status.u32, + &prev_status.u32, + status.u32, 0, + __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED); } return 0; @@ -520,10 +537,12 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire, /* update state: as we are in CONFIG state, only us can modify * the state so we don't need to use cmpset() here */ - rte_wmb(); status.state = RTE_TIMER_PENDING; status.owner = (int16_t)tim_lcore; - tim->status.u32 = status.u32; + /* The "RELEASE" ordering guarantees the memory operations above + * the status update are observed before the update by all threads + */ + __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE); if (tim_lcore != lcore_id || !local_is_locked) rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock); @@ -600,10 +619,12 @@ __rte_timer_stop(struct rte_timer *tim, int local_is_locked, } /* mark timer as stopped */ - rte_wmb(); status.state = RTE_TIMER_STOP; status.owner = RTE_TIMER_NO_OWNER; - tim->status.u32 = status.u32; + /* The "RELEASE" ordering guarantees the memory operations above + * the status update are observed before the update by all threads + */ + __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE); return 0; } @@ -637,7 +658,8 @@ rte_timer_stop_sync(struct rte_timer *tim) int rte_timer_pending(struct rte_timer *tim) { - return tim->status.state == RTE_TIMER_PENDING; + return __atomic_load_n(&tim->status.state, + __ATOMIC_RELAXED) == RTE_TIMER_PENDING; } /* must be called periodically, run all timer that expired */ @@ -739,8 +761,12 @@ __rte_timer_manage(struct rte_timer_data *timer_data) /* remove from done list and mark timer as stopped */ status.state = RTE_TIMER_STOP; status.owner = RTE_TIMER_NO_OWNER; - rte_wmb(); - tim->status.u32 = status.u32; + /* The "RELEASE" ordering guarantees the memory + * operations above the status update are observed + * before the update by all threads + */ + __atomic_store_n(&tim->status.u32, status.u32, + __ATOMIC_RELEASE); } else { /* keep it in list and mark timer as pending */ @@ -748,8 +774,12 @@ __rte_timer_manage(struct rte_timer_data *timer_data) status.state = RTE_TIMER_PENDING; __TIMER_STAT_ADD(priv_timer, pending, 1); status.owner = (int16_t)lcore_id; - rte_wmb(); - tim->status.u32 = status.u32; + /* The "RELEASE" ordering guarantees the memory + * operations above the status update are observed + * before the update by all threads + */ + __atomic_store_n(&tim->status.u32, status.u32, + __ATOMIC_RELEASE); __rte_timer_reset(tim, tim->expire + tim->period, tim->period, lcore_id, tim->f, tim->arg, 1, timer_data); @@ -919,8 +949,12 @@ rte_timer_alt_manage(uint32_t timer_data_id, /* remove from done list and mark timer as stopped */ status.state = RTE_TIMER_STOP; status.owner = RTE_TIMER_NO_OWNER; - rte_wmb(); - tim->status.u32 = status.u32; + /* The "RELEASE" ordering guarantees the memory + * operations above the status update are observed + * before the update by all threads + */ + __atomic_store_n(&tim->status.u32, status.u32, + __ATOMIC_RELEASE); } else { /* keep it in list and mark timer as pending */ rte_spinlock_lock( @@ -928,8 +962,12 @@ rte_timer_alt_manage(uint32_t timer_data_id, status.state = RTE_TIMER_PENDING; __TIMER_STAT_ADD(data->priv_timer, pending, 1); status.owner = (int16_t)this_lcore; - rte_wmb(); - tim->status.u32 = status.u32; + /* The "RELEASE" ordering guarantees the memory + * operations above the status update are observed + * before the update by all threads + */ + __atomic_store_n(&tim->status.u32, status.u32, + __ATOMIC_RELEASE); __rte_timer_reset(tim, tim->expire + tim->period, tim->period, this_lcore, tim->f, tim->arg, 1, data); diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h index c6b3d45..df533fa 100644 --- a/lib/librte_timer/rte_timer.h +++ b/lib/librte_timer/rte_timer.h @@ -101,7 +101,7 @@ struct rte_timer { uint64_t expire; /**< Time when timer expire. */ struct rte_timer *sl_next[MAX_SKIPLIST_DEPTH]; - volatile union rte_timer_status status; /**< Status of timer. */ + union rte_timer_status status; /**< Status of timer. */ uint64_t period; /**< Period of timer (0 if not periodic). */ rte_timer_cb_t f; /**< Callback function. */ void *arg; /**< Argument to callback function. */