diff mbox series

[v2,09/10] service: optimize with c11 one-way barrier

Message ID 1583999071-22872-10-git-send-email-phil.yang@arm.com (mailing list archive)
State Superseded, archived
Headers show
Series generic rte atomic APIs deprecate proposal | expand

Checks

Context Check Description
ci/Intel-compilation fail Compilation issues
ci/checkpatch success coding style OK

Commit Message

Phil Yang March 12, 2020, 7:44 a.m. UTC
The num_mapped_cores and execute_lock are synchronized with rte_atomic_XX
APIs which is a full barrier, DMB, on aarch64. This patch optimized it with
c11 atomic one-way barrier.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_eal/common/rte_service.c | 50 ++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 670f5a9..96a59b6 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -42,7 +42,7 @@  struct rte_service_spec_impl {
 	 * running this service callback. When not set, a core may take the
 	 * lock and then run the service callback.
 	 */
-	rte_atomic32_t execute_lock;
+	uint32_t execute_lock;
 
 	/* API set/get-able variables */
 	int8_t app_runstate;
@@ -54,7 +54,7 @@  struct rte_service_spec_impl {
 	 * It does not indicate the number of cores the service is running
 	 * on currently.
 	 */
-	rte_atomic32_t num_mapped_cores;
+	int32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
 } __rte_cache_aligned;
@@ -329,7 +329,8 @@  rte_service_runstate_get(uint32_t id)
 	rte_smp_rmb();
 
 	int check_disabled = !(s->internal_flags & SERVICE_F_START_CHECK);
-	int lcore_mapped = (rte_atomic32_read(&s->num_mapped_cores) > 0);
+	int lcore_mapped = (__atomic_load_n(&s->num_mapped_cores,
+					    __ATOMIC_RELAXED) > 0);
 
 	return (s->app_runstate == RUNSTATE_RUNNING) &&
 		(s->comp_runstate == RUNSTATE_RUNNING) &&
@@ -372,11 +373,20 @@  service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 	cs->service_active_on_lcore[i] = 1;
 
 	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
-		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
+		uint32_t expected = 0;
+		/* ACQUIRE ordering here is to prevent the callback
+		 * function from hoisting up before the execute_lock
+		 * setting.
+		 */
+		if (!__atomic_compare_exchange_n(&s->execute_lock, &expected, 1,
+			    0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
 			return -EBUSY;
 
 		service_runner_do_callback(s, cs, i);
-		rte_atomic32_clear(&s->execute_lock);
+		/* RELEASE ordering here is used to pair with ACQUIRE
+		 * above to achieve lock semantic.
+		 */
+		__atomic_store_n(&s->execute_lock, 0, __ATOMIC_RELEASE);
 	} else
 		service_runner_do_callback(s, cs, i);
 
@@ -412,11 +422,11 @@  rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 	/* Increment num_mapped_cores to indicate that the service
 	 * is running on a core.
 	 */
-	rte_atomic32_inc(&s->num_mapped_cores);
+	__atomic_add_fetch(&s->num_mapped_cores, 1, __ATOMIC_ACQUIRE);
 
 	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	rte_atomic32_dec(&s->num_mapped_cores);
+	__atomic_sub_fetch(&s->num_mapped_cores, 1, __ATOMIC_RELEASE);
 
 	return ret;
 }
@@ -549,24 +559,32 @@  service_update(uint32_t sid, uint32_t lcore,
 
 	uint64_t sid_mask = UINT64_C(1) << sid;
 	if (set) {
-		uint64_t lcore_mapped = lcore_states[lcore].service_mask &
-			sid_mask;
+		/* When multiple threads try to update the same lcore
+		 * service concurrently, e.g. set lcore map followed
+		 * by clear lcore map, the unsynchronized service_mask
+		 * values have issues on the num_mapped_cores value
+		 * consistency. So we use ACQUIRE ordering to pair with
+		 * the RELEASE ordering to synchronize the service_mask.
+		 */
+		uint64_t lcore_mapped = __atomic_load_n(
+					&lcore_states[lcore].service_mask,
+					__ATOMIC_ACQUIRE) & sid_mask;
 
 		if (*set && !lcore_mapped) {
 			lcore_states[lcore].service_mask |= sid_mask;
-			rte_atomic32_inc(&rte_services[sid].num_mapped_cores);
+			__atomic_add_fetch(&rte_services[sid].num_mapped_cores,
+					    1, __ATOMIC_RELEASE);
 		}
 		if (!*set && lcore_mapped) {
 			lcore_states[lcore].service_mask &= ~(sid_mask);
-			rte_atomic32_dec(&rte_services[sid].num_mapped_cores);
+			__atomic_sub_fetch(&rte_services[sid].num_mapped_cores,
+					    1, __ATOMIC_RELEASE);
 		}
 	}
 
 	if (enabled)
 		*enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
 
-	rte_smp_wmb();
-
 	return 0;
 }
 
@@ -622,7 +640,8 @@  rte_service_lcore_reset_all(void)
 		}
 	}
 	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
-		rte_atomic32_set(&rte_services[i].num_mapped_cores, 0);
+		__atomic_store_n(&rte_services[i].num_mapped_cores, 0,
+				    __ATOMIC_RELAXED);
 
 	rte_smp_wmb();
 
@@ -705,7 +724,8 @@  rte_service_lcore_stop(uint32_t lcore)
 		int32_t enabled = service_mask & (UINT64_C(1) << i);
 		int32_t service_running = rte_service_runstate_get(i);
 		int32_t only_core = (1 ==
-			rte_atomic32_read(&rte_services[i].num_mapped_cores));
+			__atomic_load_n(&rte_services[i].num_mapped_cores,
+					__ATOMIC_RELAXED));
 
 		/* if the core is mapped, and the service is running, and this
 		 * is the only core that is mapped, the service would cease to