diff mbox series

[RFC] pflock: add implementation of phase-fair locks

Message ID 20210212013838.312623-1-sthemmin@microsoft.com (mailing list archive)
State Superseded
Delegated to: Thomas Monjalon
Headers show
Series [RFC] pflock: add implementation of phase-fair locks | expand

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/intel-Testing success Testing PASS
ci/Intel-compilation fail Compilation issues

Commit Message

Stephen Hemminger Feb. 12, 2021, 1:38 a.m. UTC
This is a new type of reader-writer lock that provides better fairness
guarantees. According to the research it will be better for real
time applications such as DPDK.

A similar implementation is Concurrency Kit available in FreeBSD.

For information see:
   "Reader-Writer Synchronization for Shared-Memory Multiprocessor
    Real-Time Systems",
    http://www.cs.unc.edu/~anderson/papers/ecrts09b.pdf

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
Note: This is better than earlier ticket variant proposal so dropping
that one. The fairness properties are better.

 app/test/meson.build                        |   6 +
 app/test/test_pflock.c                      | 220 ++++++++++++++++
 lib/librte_eal/arm/include/meson.build      |   1 +
 lib/librte_eal/arm/include/rte_pflock.h     |  18 ++
 lib/librte_eal/include/generic/rte_pflock.h | 272 ++++++++++++++++++++
 lib/librte_eal/ppc/include/meson.build      |   1 +
 lib/librte_eal/ppc/include/rte_pflock.h     |  16 ++
 lib/librte_eal/x86/include/meson.build      |   1 +
 lib/librte_eal/x86/include/rte_pflock.h     |  18 ++
 9 files changed, 553 insertions(+)
 create mode 100644 app/test/test_pflock.c
 create mode 100644 lib/librte_eal/arm/include/rte_pflock.h
 create mode 100644 lib/librte_eal/include/generic/rte_pflock.h
 create mode 100644 lib/librte_eal/ppc/include/rte_pflock.h
 create mode 100644 lib/librte_eal/x86/include/rte_pflock.h
diff mbox series

Patch

diff --git a/app/test/meson.build b/app/test/meson.build
index 94fd39fecb82..d5cf0ba701e9 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -89,6 +89,7 @@  test_sources = files('commands.c',
 	'test_mcslock.c',
 	'test_mp_secondary.c',
 	'test_per_lcore.c',
+	'test_pflock.c',
 	'test_pmd_perf.c',
 	'test_power.c',
 	'test_power_cpufreq.c',
@@ -227,6 +228,11 @@  fast_tests = [
         ['meter_autotest', true],
         ['multiprocess_autotest', false],
         ['per_lcore_autotest', true],
+        ['pflock_autotest', true],
+        ['pflock_test1_autotest', true],
+        ['pflock_rda_autotest', true],
+        ['pflock_rds_wrm_autotest', true],
+        ['pflock_rde_wro_autotest', true],
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
diff --git a/app/test/test_pflock.c b/app/test/test_pflock.c
new file mode 100644
index 000000000000..b6c5d2f8afde
--- /dev/null
+++ b/app/test/test_pflock.c
@@ -0,0 +1,220 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <sys/queue.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_pause.h>
+#include <rte_pflock.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+
+#include "test.h"
+
+/*
+ * phase fair lock test
+ * ===========
+ * Provides UT for phase fair lock API.
+ * Main concern is on functional testing, but also provides some
+ * performance measurements.
+ * Obviously for proper testing need to be executed with more than one lcore.
+ */
+
+#define ITER_NUM	0x80
+
+#define TEST_SEC	5
+
+static rte_pflock_t sl;
+static rte_pflock_t sl_tab[RTE_MAX_LCORE];
+static uint32_t synchro;
+
+enum {
+	LC_TYPE_RDLOCK,
+	LC_TYPE_WRLOCK,
+};
+
+static int
+test_pflock_per_core(__rte_unused void *arg)
+{
+	rte_pflock_write_lock(&sl);
+	printf("Global write lock taken on core %u\n", rte_lcore_id());
+	rte_pflock_write_unlock(&sl);
+
+	rte_pflock_write_lock(&sl_tab[rte_lcore_id()]);
+	printf("Hello from core %u !\n", rte_lcore_id());
+	rte_pflock_write_unlock(&sl_tab[rte_lcore_id()]);
+
+	rte_pflock_read_lock(&sl);
+	printf("Global read lock taken on core %u\n", rte_lcore_id());
+	rte_delay_ms(100);
+	printf("Release global read lock on core %u\n", rte_lcore_id());
+	rte_pflock_read_unlock(&sl);
+
+	return 0;
+}
+
+static rte_pflock_t lk = RTE_PFLOCK_INITIALIZER;
+static volatile uint64_t pflock_data;
+static uint64_t time_count[RTE_MAX_LCORE] = {0};
+
+#define MAX_LOOP 10000
+#define TEST_PFLOCK_DEBUG 0
+
+static int
+load_loop_fn(__rte_unused void *arg)
+{
+	uint64_t time_diff = 0, begin;
+	uint64_t hz = rte_get_timer_hz();
+	uint64_t lcount = 0;
+	const unsigned int lcore = rte_lcore_id();
+
+	/* wait synchro for workers */
+	if (lcore != rte_get_main_lcore())
+		rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+
+	begin = rte_rdtsc_precise();
+	while (lcount < MAX_LOOP) {
+		rte_pflock_write_lock(&lk);
+		++pflock_data;
+		rte_pflock_write_unlock(&lk);
+
+		rte_pflock_read_lock(&lk);
+		if (TEST_PFLOCK_DEBUG && !(lcount % 100))
+			printf("Core [%u] pflock_data = %"PRIu64"\n",
+				lcore, pflock_data);
+		rte_pflock_read_unlock(&lk);
+
+		lcount++;
+		/* delay to make lock duty cycle slightly realistic */
+		rte_pause();
+	}
+
+	time_diff = rte_rdtsc_precise() - begin;
+	time_count[lcore] = time_diff * 1000000 / hz;
+	return 0;
+}
+
+static int
+test_pflock_perf(void)
+{
+	unsigned int i;
+	uint64_t total = 0;
+
+	printf("\nPhase fair test on %u cores...\n", rte_lcore_count());
+
+	/* clear synchro and start workers */
+	synchro = 0;
+	if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MAIN) < 0)
+		return -1;
+
+	/* start synchro and launch test on main */
+	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	load_loop_fn(NULL);
+
+	rte_eal_mp_wait_lcore();
+
+	RTE_LCORE_FOREACH(i) {
+		printf("Core [%u] cost time = %"PRIu64" us\n",
+			i, time_count[i]);
+		total += time_count[i];
+	}
+
+	printf("Total cost time = %"PRIu64" us\n", total);
+	memset(time_count, 0, sizeof(time_count));
+
+	return 0;
+}
+
+/*
+ * - There is a global pflock and a table of pflocks (one per lcore).
+ *
+ * - The test function takes all of these locks and launches the
+ *   ``test_pflock_per_core()`` function on each core (except the main).
+ *
+ *   - The function takes the global write lock, display something,
+ *     then releases the global lock.
+ *   - Then, it takes the per-lcore write lock, display something, and
+ *     releases the per-core lock.
+ *   - Finally, a read lock is taken during 100 ms, then released.
+ *
+ * - The main function unlocks the per-lcore locks sequentially and
+ *   waits between each lock. This triggers the display of a message
+ *   for each core, in the correct order.
+ *
+ *   Then, it tries to take the global write lock and display the last
+ *   message. The autotest script checks that the message order is correct.
+ */
+static int
+pflock_test1(void)
+{
+	int i;
+
+	rte_pflock_init(&sl);
+	for (i = 0; i < RTE_MAX_LCORE; i++)
+		rte_pflock_init(&sl_tab[i]);
+
+	rte_pflock_write_lock(&sl);
+
+	RTE_LCORE_FOREACH_WORKER(i) {
+		rte_pflock_write_lock(&sl_tab[i]);
+		rte_eal_remote_launch(test_pflock_per_core, NULL, i);
+	}
+
+	rte_pflock_write_unlock(&sl);
+
+	RTE_LCORE_FOREACH_WORKER(i) {
+		rte_pflock_write_unlock(&sl_tab[i]);
+		rte_delay_ms(100);
+	}
+
+	rte_pflock_write_lock(&sl);
+	/* this message should be the last message of test */
+	printf("Global write lock taken on main core %u\n", rte_lcore_id());
+	rte_pflock_write_unlock(&sl);
+
+	rte_eal_mp_wait_lcore();
+
+	if (test_pflock_perf() < 0)
+		return -1;
+
+	return 0;
+}
+
+static int
+test_pflock(void)
+{
+	uint32_t i;
+	int32_t rc, ret;
+
+	static const struct {
+		const char *name;
+		int (*ftst)(void);
+	} test[] = {
+		{
+			.name = "pflock_test1",
+			.ftst = pflock_test1,
+		},
+	};
+
+	ret = 0;
+	for (i = 0; i != RTE_DIM(test); i++) {
+		printf("starting test %s;\n", test[i].name);
+		rc = test[i].ftst();
+		printf("test %s completed with status %d\n", test[i].name, rc);
+		ret |= rc;
+	}
+
+	return ret;
+}
+
+REGISTER_TEST_COMMAND(pflock_autotest, test_pflock);
diff --git a/lib/librte_eal/arm/include/meson.build b/lib/librte_eal/arm/include/meson.build
index 770766de1a34..2c3cff61bed6 100644
--- a/lib/librte_eal/arm/include/meson.build
+++ b/lib/librte_eal/arm/include/meson.build
@@ -21,6 +21,7 @@  arch_headers = files(
 	'rte_pause_32.h',
 	'rte_pause_64.h',
 	'rte_pause.h',
+	'rte_pflock.h',
 	'rte_power_intrinsics.h',
 	'rte_prefetch_32.h',
 	'rte_prefetch_64.h',
diff --git a/lib/librte_eal/arm/include/rte_pflock.h b/lib/librte_eal/arm/include/rte_pflock.h
new file mode 100644
index 000000000000..bb9934eec469
--- /dev/null
+++ b/lib/librte_eal/arm/include/rte_pflock.h
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+
+#ifndef _RTE_PFLOCK_ARM_H_
+#define _RTE_PFLOCK_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pflock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PFLOCK_ARM_H_ */
diff --git a/lib/librte_eal/include/generic/rte_pflock.h b/lib/librte_eal/include/generic/rte_pflock.h
new file mode 100644
index 000000000000..afa4edeb2830
--- /dev/null
+++ b/lib/librte_eal/include/generic/rte_pflock.h
@@ -0,0 +1,272 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corp.
+ * Copyright 2011-2015 Samy Al Bahra.
+ * All rights reserved.
+ */
+
+#ifndef _RTE_PFLOCK_H_
+#define _RTE_PFLOCK_H_
+
+/**
+ * @file
+ *
+ * Phase-fair locks
+ *
+ * This file defines an API for Phase Fair reader writer locks,
+ * which is a variant of typical reader-writer locks that prevent
+ * starvation. In this type of lock, readers and writers alternate.
+ * This significantly reduces the worst-case blocking for readers and writers.
+ *
+ * This is an implementation derived from FreeBSD
+ * based on the work described in:
+ *    Brandenburg, B. and Anderson, J. 2010. Spin-Based
+ *    Reader-Writer Synchronization for Multiprocessor Real-Time Systems
+ *
+ * All locks must be initialised before use, and only initialised once.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include <rte_pause.h>
+
+/**
+ * The rte_pflock_t type.
+ */
+struct rte_pflock {
+	union rte_pflock_ticket {
+		uint32_t tickets;
+		struct {
+			uint16_t in;
+			uint16_t out;
+		};
+	} rd, wr;
+};
+typedef struct rte_pflock rte_pflock_t;
+
+/**
+ * Constants used to map the bits in reader counter
+ *
+ * +-----------------+-+-+
+ * |     Readers     |W|P|
+ * |                 |R|H|
+ * +-----------------+-+-+
+ */
+#define RTE_PFLOCK_LSB   0xFFFFFFF0
+#define RTE_PFLOCK_RINC  0x100		/* Reader increment value. */
+#define RTE_PFLOCK_WBITS 0x3		/* Writer bits in reader. */
+#define RTE_PFLOCK_PRES  0x2		/* Writer present bit. */
+#define RTE_PFLOCK_PHID  0x1		/* Phase ID bit. */
+
+/**
+ * A static pflock initializer.
+ */
+#define RTE_PFLOCK_INITIALIZER {  }
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Initialize the pflock to an unlocked state.
+ *
+ * @param pf
+ *   A pointer to the pflock.
+ */
+__rte_experimental
+static inline void
+rte_pflock_init(struct rte_pflock *pf)
+{
+	pf->rd.tickets = 0;
+	pf->wr.tickets = 0;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Take a pflock for read.
+ *
+ * @param pf
+ *   A pointer to a pflock structure.
+ */
+__rte_experimental
+static inline void
+rte_pflock_read_lock(rte_pflock_t *pf)
+{
+	uint32_t w;
+
+	/*
+	 * If no writer is present, then the operation has completed
+	 * successfully.
+	 */
+	w = __atomic_fetch_add(&pf->rd.in, RTE_PFLOCK_RINC, __ATOMIC_ACQ_REL) & RTE_PFLOCK_WBITS;
+	if (w == 0)
+		return;
+
+	/* Wait for current write phase to complete. */
+	while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE) & RTE_PFLOCK_WBITS) == w)
+		rte_pause();
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Release a pflock locked for reading.
+ *
+ * @param pf
+ *   A pointer to the pflock structure.
+ */
+__rte_experimental
+static inline void
+rte_pflock_read_unlock(rte_pflock_t *pf)
+{
+	__atomic_fetch_add(&pf->rd.out, RTE_PFLOCK_RINC, __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Try to take a pflock for reading
+ *
+ * @param pf
+ *   A pointer to a pflock structure.
+ * @return
+ *   - zero if the lock is successfully taken
+ *   - -EBUSY if lock could not be acquired for reading because a
+ *     writer holds the lock
+ */
+__rte_experimental
+static inline int
+rte_pflock_read_trylock(rte_pflock_t *pf)
+{
+	union rte_pflock_ticket old, new;
+
+	/* Get current state of the lock */
+	old.tickets = __atomic_load_n(&pf->rd.tickets, __ATOMIC_RELAXED);
+
+	/* loop until writer shows up */
+	while ((old.in & RTE_PFLOCK_WBITS) == 0) {
+		new.out = old.out;
+		new.in = old.in + RTE_PFLOCK_RINC;
+		if (__atomic_compare_exchange_n(&pf->rd.tickets, &old.tickets, new.tickets,
+						0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED))
+			return 0;	/* got it */
+
+		/* either new reader got in (so retry) or a writer */
+	}
+
+	/* If writer is present then we are busy */
+	return -EBUSY;
+}
+	
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Take the pflock for write.
+ *
+ * @param p
+ *   A pointer to the ticketlock.
+ */
+__rte_experimental
+static inline void
+rte_pflock_write_lock(rte_pflock_t *pf)
+{
+	uint16_t ticket;
+
+	/* Acquire ownership of write-phase. */
+	ticket = __atomic_fetch_add(&pf->wr.in, 1, __ATOMIC_ACQUIRE);
+	rte_wait_until_equal_16(&pf->wr.out, ticket, __ATOMIC_RELAXED);
+
+	/*
+	 * Acquire ticket on read-side in order to allow them
+	 * to flush. Indicates to any incoming reader that a
+	 * write-phase is pending.
+	 *
+	 * Need ACQUIRE to prevent speculative execution of the wait loop
+	 */
+	ticket = __atomic_fetch_add(&pf->rd.in,
+				    (ticket & RTE_PFLOCK_PHID) | RTE_PFLOCK_PRES,
+				    __ATOMIC_ACQUIRE);
+
+	/* Wait for any pending readers to flush. */
+	rte_wait_until_equal_16(&pf->rd.out, ticket, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Release a pflock held for writing.
+ *
+ * @param pf
+ *   A pointer to a pflock structure.
+ */
+__rte_experimental
+static inline void
+rte_pflock_write_unlock(rte_pflock_t *pf)
+{
+	/* Migrate from write phase to read phase. */
+	__atomic_fetch_and(&pf->rd.in, RTE_PFLOCK_LSB, __ATOMIC_RELEASE);
+
+	/* Allow other writers to continue. */
+	__atomic_fetch_add(&pf->wr.out, 1, __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Try to take the pflock for write.
+ *
+ * @param pf
+ *   A pointer to the pflock.
+ * @return
+ *   - zero if the lock is successfully taken
+ *   - -EBUSY if lock could not be acquired for writing because
+ *     another writer holds the lock
+ */
+__rte_experimental
+static inline int
+rte_pflock_write_trylock(rte_pflock_t *pf)
+{
+	union rte_pflock_ticket old, new;
+	uint16_t ticket;
+	
+	/* Get current state of the lock */
+	old.tickets = __atomic_load_n(&pf->wr.tickets, __ATOMIC_RELAXED);
+	new.out = old.out;
+	new.in  = old.in + 1;
+	ticket = new.in;
+
+	/* if writer is already present then too busy */
+	if (old.out != new.in ||
+	    !__atomic_compare_exchange_n(&pf->wr.tickets, &old.tickets, new.tickets,
+					 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED))
+		return -EBUSY; /* another writer is present already */
+		
+	/*
+	 * We now own the write phase, but still need to tell
+	 * readers and wait for them.
+	 *
+	 * Need ACQUIRE semantics to avoid speculative execution of wait loop
+	 */
+	ticket  = __atomic_fetch_add(&pf->rd.in,
+				 (ticket & RTE_PFLOCK_PHID) | RTE_PFLOCK_PRES,
+				 __ATOMIC_ACQUIRE);
+
+	/* Wait for any pending readers to flush. */
+	rte_wait_until_equal_16(&pf->rd.out, ticket, __ATOMIC_RELAXED);
+	return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_PFLOCK_H */
diff --git a/lib/librte_eal/ppc/include/meson.build b/lib/librte_eal/ppc/include/meson.build
index dae40ede546e..7692a531ccba 100644
--- a/lib/librte_eal/ppc/include/meson.build
+++ b/lib/librte_eal/ppc/include/meson.build
@@ -11,6 +11,7 @@  arch_headers = files(
 	'rte_mcslock.h',
 	'rte_memcpy.h',
 	'rte_pause.h',
+	'rte_pflock.h',
 	'rte_power_intrinsics.h',
 	'rte_prefetch.h',
 	'rte_rwlock.h',
diff --git a/lib/librte_eal/ppc/include/rte_pflock.h b/lib/librte_eal/ppc/include/rte_pflock.h
new file mode 100644
index 000000000000..e7b875ac56a8
--- /dev/null
+++ b/lib/librte_eal/ppc/include/rte_pflock.h
@@ -0,0 +1,16 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ */
+#ifndef _RTE_PFLOCK_PPC_64_H_
+#define _RTE_PFLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pflock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PFLOCK_PPC_64_H_ */
diff --git a/lib/librte_eal/x86/include/meson.build b/lib/librte_eal/x86/include/meson.build
index 549cc21a42ed..39222cf724be 100644
--- a/lib/librte_eal/x86/include/meson.build
+++ b/lib/librte_eal/x86/include/meson.build
@@ -14,6 +14,7 @@  arch_headers = files(
 	'rte_mcslock.h',
 	'rte_memcpy.h',
 	'rte_pause.h',
+	'rte_pflock.h',
 	'rte_power_intrinsics.h',
 	'rte_prefetch.h',
 	'rte_rtm.h',
diff --git a/lib/librte_eal/x86/include/rte_pflock.h b/lib/librte_eal/x86/include/rte_pflock.h
new file mode 100644
index 000000000000..c2d876062c08
--- /dev/null
+++ b/lib/librte_eal/x86/include/rte_pflock.h
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+
+#ifndef _RTE_PFLOCK_X86_64_H_
+#define _RTE_PFLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pflock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PFLOCK_X86_64_H_ */