<snip>
>
> This is a new type of reader-writer lock that provides better fairness
> guarantees which better suited for typical DPDK applications.
> A pflock has two ticket pools, one for readers and one for writers.
>
> Phase fair reader writer locks ensure that neither reader nor writer will be
> starved. Neither reader or writer are preferred, they execute in alternating
> phases. All operations of the same type (reader or writer) that acquire the
> lock are handled in FIFO order. Write operations are exclusive, and multiple
> read operations can be run together (until a write arrives).
>
> A similar implementation is in Concurrency Kit package in FreeBSD.
> For more information see:
> "Reader-Writer Synchronization for Shared-Memory Multiprocessor
> Real-Time Systems",
> http://www.cs.unc.edu/~anderson/papers/ecrts09b.pdf
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Looks good.
Acked-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
One question below
> ---
> v5 - cleanup typos in the lock code comments
> minor revision to unit test.
> Note: the unit test is intentionally the same as other locking tests.
>
> app/test/meson.build | 2 +
> app/test/test_pflock.c | 197 +++++++++++++++++++
> lib/librte_eal/arm/include/meson.build | 1 +
> lib/librte_eal/arm/include/rte_pflock.h | 18 ++
> lib/librte_eal/include/generic/rte_pflock.h | 205 ++++++++++++++++++++
> lib/librte_eal/ppc/include/meson.build | 1 +
> lib/librte_eal/ppc/include/rte_pflock.h | 17 ++
> lib/librte_eal/x86/include/meson.build | 1 +
> lib/librte_eal/x86/include/rte_pflock.h | 18 ++
> 9 files changed, 460 insertions(+)
> create mode 100644 app/test/test_pflock.c create mode 100644
> lib/librte_eal/arm/include/rte_pflock.h
> create mode 100644 lib/librte_eal/include/generic/rte_pflock.h
> create mode 100644 lib/librte_eal/ppc/include/rte_pflock.h
> create mode 100644 lib/librte_eal/x86/include/rte_pflock.h
>
<snip>
> diff --git a/app/test/test_pflock.c b/app/test/test_pflock.c new file mode
> 100644 index 000000000000..6922bbc2f813
> --- /dev/null
> +++ b/app/test/test_pflock.c
> @@ -0,0 +1,197 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2021 Microsoft Corporation */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <unistd.h>
> +#include <sys/queue.h>
> +#include <string.h>
> +
> +#include <rte_common.h>
> +#include <rte_memory.h>
> +#include <rte_per_lcore.h>
> +#include <rte_launch.h>
> +#include <rte_pause.h>
> +#include <rte_pflock.h>
> +#include <rte_eal.h>
> +#include <rte_lcore.h>
> +#include <rte_cycles.h>
> +
> +#include "test.h"
> +
<snip>
> +
> +/*
> + * - There is a global pflock and a table of pflocks (one per lcore).
> + *
> + * - The test function takes all of these locks and launches the
> + * ``test_pflock_per_core()`` function on each core (except the main).
> + *
> + * - The function takes the global write lock, display something,
> + * then releases the global lock.
> + * - Then, it takes the per-lcore write lock, display something, and
> + * releases the per-core lock.
> + * - Finally, a read lock is taken during 100 ms, then released.
> + *
> + * - The main function unlocks the per-lcore locks sequentially and
> + * waits between each lock. This triggers the display of a message
> + * for each core, in the correct order.
> + *
> + * Then, it tries to take the global write lock and display the last
> + * message. The autotest script checks that the message order is correct.
How does the autotest script does this?
> + */
> +static int
> +test_pflock(void)
> +{
> + int i;
> +
> + rte_pflock_init(&sl);
> + for (i = 0; i < RTE_MAX_LCORE; i++)
> + rte_pflock_init(&sl_tab[i]);
> +
> + rte_pflock_write_lock(&sl);
> +
> + RTE_LCORE_FOREACH_WORKER(i) {
> + rte_pflock_write_lock(&sl_tab[i]);
> + rte_eal_remote_launch(test_pflock_per_core, NULL, i);
> + }
> +
> + rte_pflock_write_unlock(&sl);
> +
> + RTE_LCORE_FOREACH_WORKER(i) {
> + rte_pflock_write_unlock(&sl_tab[i]);
> + rte_delay_ms(100);
> + }
> +
> + rte_pflock_write_lock(&sl);
> + /* this message should be the last message of test */
> + printf("Global write lock taken on main core %u\n", rte_lcore_id());
> + rte_pflock_write_unlock(&sl);
> +
> + rte_eal_mp_wait_lcore();
> +
> + if (test_pflock_perf() < 0)
> + return -1;
> +
> + return 0;
> +}
> +
> +REGISTER_TEST_COMMAND(pflock_autotest, test_pflock);
<snip>
@@ -90,6 +90,7 @@ test_sources = files('commands.c',
'test_mcslock.c',
'test_mp_secondary.c',
'test_per_lcore.c',
+ 'test_pflock.c',
'test_pmd_perf.c',
'test_power.c',
'test_power_cpufreq.c',
@@ -228,6 +229,7 @@ fast_tests = [
['meter_autotest', true],
['multiprocess_autotest', false],
['per_lcore_autotest', true],
+ ['pflock_autotest', true],
['prefetch_autotest', true],
['rcu_qsbr_autotest', true],
['red_autotest', true],
new file mode 100644
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <sys/queue.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_pause.h>
+#include <rte_pflock.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+
+#include "test.h"
+
+/*
+ * phase fair lock test
+ * ===========
+ * Provides UT for phase fair lock API.
+ * Main concern is on functional testing, but also provides some
+ * performance measurements.
+ * Obviously for proper testing need to be executed with more than one lcore.
+ */
+
+static rte_pflock_t sl;
+static rte_pflock_t sl_tab[RTE_MAX_LCORE];
+static uint32_t synchro;
+
+static int
+test_pflock_per_core(__rte_unused void *arg)
+{
+ rte_pflock_write_lock(&sl);
+ printf("Global write lock taken on core %u\n", rte_lcore_id());
+ rte_pflock_write_unlock(&sl);
+
+ rte_pflock_write_lock(&sl_tab[rte_lcore_id()]);
+ printf("Hello from core %u !\n", rte_lcore_id());
+ rte_pflock_write_unlock(&sl_tab[rte_lcore_id()]);
+
+ rte_pflock_read_lock(&sl);
+ printf("Global read lock taken on core %u\n", rte_lcore_id());
+ rte_delay_ms(100);
+ printf("Release global read lock on core %u\n", rte_lcore_id());
+ rte_pflock_read_unlock(&sl);
+
+ return 0;
+}
+
+static rte_pflock_t lk = RTE_PFLOCK_INITIALIZER;
+static uint64_t time_count[RTE_MAX_LCORE] = {0};
+
+#define MAX_LOOP 10000
+
+static int
+load_loop_fn(void *arg)
+{
+ uint64_t time_diff = 0, begin;
+ uint64_t hz = rte_get_timer_hz();
+ uint64_t lcount = 0;
+ const int use_lock = *(int *)arg;
+ const unsigned int lcore = rte_lcore_id();
+
+ /* wait synchro for workers */
+ if (lcore != rte_get_main_lcore())
+ rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+
+ begin = rte_rdtsc_precise();
+ while (lcount < MAX_LOOP) {
+ if (use_lock)
+ rte_pflock_write_lock(&lk);
+ lcount++;
+ if (use_lock)
+ rte_pflock_write_unlock(&lk);
+
+ if (use_lock) {
+ rte_pflock_read_lock(&lk);
+ rte_pflock_read_unlock(&lk);
+ }
+ }
+
+ time_diff = rte_rdtsc_precise() - begin;
+ time_count[lcore] = time_diff * 1000000 / hz;
+ return 0;
+}
+
+static int
+test_pflock_perf(void)
+{
+ unsigned int i;
+ int lock = 0;
+ uint64_t total = 0;
+ const unsigned int lcore = rte_lcore_id();
+
+ printf("\nTest with no lock on single core...\n");
+ __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+ load_loop_fn(&lock);
+ printf("Core [%u] Cost Time = %"PRIu64" us\n",
+ lcore, time_count[lcore]);
+ memset(time_count, 0, sizeof(time_count));
+
+ printf("\nTest with phase fair lock on single core...\n");
+ lock = 1;
+ __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+ load_loop_fn(&lock);
+ printf("Core [%u] Cost Time = %"PRIu64" us\n",
+ lcore, time_count[lcore]);
+ memset(time_count, 0, sizeof(time_count));
+
+ printf("\nPhase fair test on %u cores...\n", rte_lcore_count());
+
+ /* clear synchro and start workers */
+ __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+ if (rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN) < 0)
+ return -1;
+
+ /* start synchro and launch test on main */
+ __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+ load_loop_fn(&lock);
+
+ rte_eal_mp_wait_lcore();
+
+ RTE_LCORE_FOREACH(i) {
+ printf("Core [%u] cost time = %"PRIu64" us\n",
+ i, time_count[i]);
+ total += time_count[i];
+ }
+
+ printf("Total cost time = %"PRIu64" us\n", total);
+ memset(time_count, 0, sizeof(time_count));
+
+ return 0;
+}
+
+/*
+ * - There is a global pflock and a table of pflocks (one per lcore).
+ *
+ * - The test function takes all of these locks and launches the
+ * ``test_pflock_per_core()`` function on each core (except the main).
+ *
+ * - The function takes the global write lock, display something,
+ * then releases the global lock.
+ * - Then, it takes the per-lcore write lock, display something, and
+ * releases the per-core lock.
+ * - Finally, a read lock is taken during 100 ms, then released.
+ *
+ * - The main function unlocks the per-lcore locks sequentially and
+ * waits between each lock. This triggers the display of a message
+ * for each core, in the correct order.
+ *
+ * Then, it tries to take the global write lock and display the last
+ * message. The autotest script checks that the message order is correct.
+ */
+static int
+test_pflock(void)
+{
+ int i;
+
+ rte_pflock_init(&sl);
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ rte_pflock_init(&sl_tab[i]);
+
+ rte_pflock_write_lock(&sl);
+
+ RTE_LCORE_FOREACH_WORKER(i) {
+ rte_pflock_write_lock(&sl_tab[i]);
+ rte_eal_remote_launch(test_pflock_per_core, NULL, i);
+ }
+
+ rte_pflock_write_unlock(&sl);
+
+ RTE_LCORE_FOREACH_WORKER(i) {
+ rte_pflock_write_unlock(&sl_tab[i]);
+ rte_delay_ms(100);
+ }
+
+ rte_pflock_write_lock(&sl);
+ /* this message should be the last message of test */
+ printf("Global write lock taken on main core %u\n", rte_lcore_id());
+ rte_pflock_write_unlock(&sl);
+
+ rte_eal_mp_wait_lcore();
+
+ if (test_pflock_perf() < 0)
+ return -1;
+
+ return 0;
+}
+
+REGISTER_TEST_COMMAND(pflock_autotest, test_pflock);
@@ -21,6 +21,7 @@ arch_headers = files(
'rte_pause_32.h',
'rte_pause_64.h',
'rte_pause.h',
+ 'rte_pflock.h',
'rte_power_intrinsics.h',
'rte_prefetch_32.h',
'rte_prefetch_64.h',
new file mode 100644
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+
+#ifndef _RTE_PFLOCK_ARM_H_
+#define _RTE_PFLOCK_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pflock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PFLOCK_ARM_H_ */
new file mode 100644
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Derived from Concurrency Kit
+ * Copyright 2011-2015 Samy Al Bahra.
+ */
+
+#ifndef _RTE_PFLOCK_H_
+#define _RTE_PFLOCK_H_
+
+/**
+ * @file
+ *
+ * Phase-fair locks
+ *
+ * This file defines an API for Phase Fair reader writer locks,
+ * which is a variant of typical reader-writer locks that prevent
+ * starvation. In this type of lock, readers and writers alternate.
+ * This significantly reduces the worst-case blocking for readers and writers.
+ *
+ * This is an implementation derived from FreeBSD
+ * based on the work described in:
+ * Brandenburg, B. and Anderson, J. 2010. Spin-Based
+ * Reader-Writer Synchronization for Multiprocessor Real-Time Systems
+ *
+ * All locks must be initialised before use, and only initialised once.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include <rte_pause.h>
+
+/**
+ * The rte_pflock_t type.
+ */
+struct rte_pflock {
+ struct {
+ uint16_t in;
+ uint16_t out;
+ } rd, wr;
+};
+typedef struct rte_pflock rte_pflock_t;
+
+/*
+ * Allocation of bits to reader
+ *
+ * 15 4 3 2 1 0
+ * +-------------------+---+-+-+
+ * | rin: reads issued |x|x| | |
+ * +-------------------+---+-+-+
+ * ^ ^
+ * | |
+ * PRES: writer present ----/ |
+ * PHID: writer phase id -----/
+ *
+ * 15 4 3 2 1 0
+ * +------------------+------+
+ * |rout:read complete|unused|
+ * +------------------+------+
+ *
+ * The maximum number of readers is 4095
+ */
+
+/* Constants used to map the bits in reader counter */
+#define RTE_PFLOCK_WBITS 0x3 /* Writer bits in reader. */
+#define RTE_PFLOCK_PRES 0x2 /* Writer present bit. */
+#define RTE_PFLOCK_PHID 0x1 /* Phase ID bit. */
+#define RTE_PFLOCK_LSB 0xFFF0 /* reader bits. */
+#define RTE_PFLOCK_RINC 0x10 /* Reader increment. */
+
+/**
+ * A static pflock initializer.
+ */
+#define RTE_PFLOCK_INITIALIZER { }
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Initialize the pflock to an unlocked state.
+ *
+ * @param pf
+ * A pointer to the pflock.
+ */
+__rte_experimental
+static inline void
+rte_pflock_init(struct rte_pflock *pf)
+{
+ pf->rd.in = 0;
+ pf->rd.out = 0;
+ pf->wr.in = 0;
+ pf->wr.out = 0;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Take a pflock for read.
+ *
+ * @param pf
+ * A pointer to a pflock structure.
+ */
+__rte_experimental
+static inline void
+rte_pflock_read_lock(rte_pflock_t *pf)
+{
+ uint16_t w;
+
+ /*
+ * If no writer is present, then the operation has completed
+ * successfully.
+ */
+ w = __atomic_fetch_add(&pf->rd.in, RTE_PFLOCK_RINC, __ATOMIC_ACQUIRE)
+ & RTE_PFLOCK_WBITS;
+ if (w == 0)
+ return;
+
+ /* Wait for current write phase to complete. */
+ while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE)
+ & RTE_PFLOCK_WBITS) == w)
+ rte_pause();
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Release a pflock locked for reading.
+ *
+ * @param pf
+ * A pointer to the pflock structure.
+ */
+__rte_experimental
+static inline void
+rte_pflock_read_unlock(rte_pflock_t *pf)
+{
+ __atomic_fetch_add(&pf->rd.out, RTE_PFLOCK_RINC, __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Take the pflock for write.
+ *
+ * @param pf
+ * A pointer to the pflock structure.
+ */
+__rte_experimental
+static inline void
+rte_pflock_write_lock(rte_pflock_t *pf)
+{
+ uint16_t ticket, w;
+
+ /* Acquire ownership of write-phase.
+ * This is same as rte_tickelock_lock().
+ */
+ ticket = __atomic_fetch_add(&pf->wr.in, 1, __ATOMIC_RELAXED);
+ rte_wait_until_equal_16(&pf->wr.out, ticket, __ATOMIC_ACQUIRE);
+
+ /*
+ * Acquire ticket on read-side in order to allow them
+ * to flush. Indicates to any incoming reader that a
+ * write-phase is pending.
+ *
+ * The load of rd.out in wait loop could be executed
+ * speculatively.
+ */
+ w = RTE_PFLOCK_PRES | (ticket & RTE_PFLOCK_PHID);
+ ticket = __atomic_fetch_add(&pf->rd.in, w, __ATOMIC_RELAXED);
+
+ /* Wait for any pending readers to flush. */
+ rte_wait_until_equal_16(&pf->rd.out, ticket, __ATOMIC_ACQUIRE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Release a pflock held for writing.
+ *
+ * @param pf
+ * A pointer to a pflock structure.
+ */
+__rte_experimental
+static inline void
+rte_pflock_write_unlock(rte_pflock_t *pf)
+{
+ /* Migrate from write phase to read phase. */
+ __atomic_fetch_and(&pf->rd.in, RTE_PFLOCK_LSB, __ATOMIC_RELEASE);
+
+ /* Allow other writers to continue. */
+ __atomic_fetch_add(&pf->wr.out, 1, __ATOMIC_RELEASE);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_PFLOCK_H */
@@ -11,6 +11,7 @@ arch_headers = files(
'rte_mcslock.h',
'rte_memcpy.h',
'rte_pause.h',
+ 'rte_pflock.h',
'rte_power_intrinsics.h',
'rte_prefetch.h',
'rte_rwlock.h',
new file mode 100644
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+#ifndef _RTE_PFLOCK_PPC_64_H_
+#define _RTE_PFLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pflock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PFLOCK_PPC_64_H_ */
@@ -10,6 +10,7 @@ arch_headers = files(
'rte_mcslock.h',
'rte_memcpy.h',
'rte_pause.h',
+ 'rte_pflock.h',
'rte_power_intrinsics.h',
'rte_prefetch.h',
'rte_rtm.h',
new file mode 100644
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+
+#ifndef _RTE_PFLOCK_X86_64_H_
+#define _RTE_PFLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pflock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PFLOCK_X86_64_H_ */