[RFC] eal: add fair reader writer lock
diff mbox series

Message ID 20210112060524.409412-1-stephen@networkplumber.org
State Superseded
Delegated to: Thomas Monjalon
Headers show
Series
  • [RFC] eal: add fair reader writer lock
Related show

Checks

Context Check Description
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/checkpatch warning coding style issues

Commit Message

Stephen Hemminger Jan. 12, 2021, 6:05 a.m. UTC
Implement fair reader/writer lock based on current DPDK ticket lock.
These lock type acts like rte_rwlock() but like ticket lock they
are fair for multiple writers.  Writers have full priority over
readers, a stream of readers will not starve incoming writers
which is a serious bug in existing rte_rwlock.

The tests are just a clone of existing rte_rwlock with test
and function names changed. So the new fair locks should be drop
in replacement for most users.


Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   5 +
 app/test/test_fair_rwlock.c                   | 555 ++++++++++++++++++
 doc/api/doxy-api-index.md                     |   1 +
 lib/librte_eal/arm/include/meson.build        |   1 +
 lib/librte_eal/arm/include/rte_fair_rwlock.h  |  22 +
 .../include/generic/rte_fair_rwlock.h         | 203 +++++++
 lib/librte_eal/include/meson.build            |   1 +
 lib/librte_eal/ppc/include/meson.build        |   1 +
 lib/librte_eal/ppc/include/rte_fair_rwlock.h  |  18 +
 lib/librte_eal/x86/include/meson.build        |   1 +
 lib/librte_eal/x86/include/rte_fair_rwlock.h  |  18 +
 12 files changed, 832 insertions(+)
 create mode 100644 app/test/test_fair_rwlock.c
 create mode 100644 lib/librte_eal/arm/include/rte_fair_rwlock.h
 create mode 100644 lib/librte_eal/include/generic/rte_fair_rwlock.h
 create mode 100644 lib/librte_eal/ppc/include/rte_fair_rwlock.h
 create mode 100644 lib/librte_eal/x86/include/rte_fair_rwlock.h

Patch
diff mbox series

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 097638941f19..d7e970d1cde0 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -63,6 +63,12 @@ 
         "Func":    rwlock_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Fair read/write lock autotest",
+        "Command": "fair_rwlock_autotest",
+        "Func":    rwlock_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lcores autotest",
         "Command": "lcores_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index 94fd39fecb82..388547ade3ff 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -46,6 +46,7 @@  test_sources = files('commands.c',
 	'test_event_timer_adapter.c',
 	'test_eventdev.c',
 	'test_external_mem.c',
+	'test_fair_rwlock.c',
 	'test_fbarray.c',
 	'test_fib.c',
 	'test_fib_perf.c',
@@ -206,6 +207,10 @@  fast_tests = [
         ['errno_autotest', true],
         ['ethdev_link_status', true],
         ['event_ring_autotest', true],
+        ['fair_rwlock_test1_autotest', true],
+        ['fair_rwlock_rda_autotest', true],
+        ['fair_rwlock_rds_wrm_autotest', true],
+        ['fair_rwlock_rde_wro_autotest', true],
         ['fib_autotest', true],
         ['fib6_autotest', true],
         ['func_reentrancy_autotest', false],
diff --git a/app/test/test_fair_rwlock.c b/app/test/test_fair_rwlock.c
new file mode 100644
index 000000000000..b3da49e2ad3b
--- /dev/null
+++ b/app/test/test_fair_rwlock.c
@@ -0,0 +1,555 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <sys/queue.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_fair_rwlock.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+
+#include "test.h"
+
+/*
+ * fair rwlock test
+ * ===========
+ * Provides UT for rte_fair_rwlock API.
+ * Main concern is on functional testing, but also provides some
+ * performance measurements.
+ * Obviously for proper testing need to be executed with more than one lcore.
+ */
+
+#define ITER_NUM	0x80
+
+#define TEST_SEC	5
+
+static rte_fair_rwlock_t sl;
+static rte_fair_rwlock_t sl_tab[RTE_MAX_LCORE];
+static rte_atomic32_t synchro;
+
+enum {
+	LC_TYPE_RDLOCK,
+	LC_TYPE_WRLOCK,
+};
+
+static struct {
+	rte_fair_rwlock_t lock;
+	uint64_t tick;
+	volatile union {
+		uint8_t u8[RTE_CACHE_LINE_SIZE];
+		uint64_t u64[RTE_CACHE_LINE_SIZE / sizeof(uint64_t)];
+	} data;
+} __rte_cache_aligned try_rwlock_data;
+
+struct try_rwlock_lcore {
+	int32_t rc;
+	int32_t type;
+	struct {
+		uint64_t tick;
+		uint64_t fail;
+		uint64_t success;
+	} stat;
+} __rte_cache_aligned;
+
+static struct try_rwlock_lcore try_lcore_data[RTE_MAX_LCORE];
+
+static int
+test_rwlock_per_core(__rte_unused void *arg)
+{
+	rte_fair_rwlock_write_lock(&sl);
+	printf("Global write lock taken on core %u\n", rte_lcore_id());
+	rte_fair_rwlock_write_unlock(&sl);
+
+	rte_fair_rwlock_write_lock(&sl_tab[rte_lcore_id()]);
+	printf("Hello from core %u !\n", rte_lcore_id());
+	rte_fair_rwlock_write_unlock(&sl_tab[rte_lcore_id()]);
+
+	rte_fair_rwlock_read_lock(&sl);
+	printf("Global read lock taken on core %u\n", rte_lcore_id());
+	rte_delay_ms(100);
+	printf("Release global read lock on core %u\n", rte_lcore_id());
+	rte_fair_rwlock_read_unlock(&sl);
+
+	return 0;
+}
+
+static rte_fair_rwlock_t lk = RTE_FAIR_RWLOCK_INITIALIZER;
+static volatile uint64_t rwlock_data;
+static uint64_t time_count[RTE_MAX_LCORE] = {0};
+
+#define MAX_LOOP 10000
+#define TEST_RWLOCK_DEBUG 0
+
+static int
+load_loop_fn(__rte_unused void *arg)
+{
+	uint64_t time_diff = 0, begin;
+	uint64_t hz = rte_get_timer_hz();
+	uint64_t lcount = 0;
+	const unsigned int lcore = rte_lcore_id();
+
+	/* wait synchro for workers */
+	if (lcore != rte_get_main_lcore())
+		while (rte_atomic32_read(&synchro) == 0)
+			;
+
+	begin = rte_rdtsc_precise();
+	while (lcount < MAX_LOOP) {
+		rte_fair_rwlock_write_lock(&lk);
+		++rwlock_data;
+		rte_fair_rwlock_write_unlock(&lk);
+
+		rte_fair_rwlock_read_lock(&lk);
+		if (TEST_RWLOCK_DEBUG && !(lcount % 100))
+			printf("Core [%u] rwlock_data = %"PRIu64"\n",
+				lcore, rwlock_data);
+		rte_fair_rwlock_read_unlock(&lk);
+
+		lcount++;
+		/* delay to make lock duty cycle slightly realistic */
+		rte_pause();
+	}
+
+	time_diff = rte_rdtsc_precise() - begin;
+	time_count[lcore] = time_diff * 1000000 / hz;
+	return 0;
+}
+
+static int
+test_rwlock_perf(void)
+{
+	unsigned int i;
+	uint64_t total = 0;
+
+	printf("\nRwlock Perf Test on %u cores...\n", rte_lcore_count());
+
+	/* clear synchro and start workers */
+	rte_atomic32_set(&synchro, 0);
+	if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MAIN) < 0)
+		return -1;
+
+	/* start synchro and launch test on main */
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(NULL);
+
+	rte_eal_mp_wait_lcore();
+
+	RTE_LCORE_FOREACH(i) {
+		printf("Core [%u] cost time = %"PRIu64" us\n",
+			i, time_count[i]);
+		total += time_count[i];
+	}
+
+	printf("Total cost time = %"PRIu64" us\n", total);
+	memset(time_count, 0, sizeof(time_count));
+
+	return 0;
+}
+
+/*
+ * - There is a global rwlock and a table of rwlocks (one per lcore).
+ *
+ * - The test function takes all of these locks and launches the
+ *   ``test_rwlock_per_core()`` function on each core (except the main).
+ *
+ *   - The function takes the global write lock, display something,
+ *     then releases the global lock.
+ *   - Then, it takes the per-lcore write lock, display something, and
+ *     releases the per-core lock.
+ *   - Finally, a read lock is taken during 100 ms, then released.
+ *
+ * - The main function unlocks the per-lcore locks sequentially and
+ *   waits between each lock. This triggers the display of a message
+ *   for each core, in the correct order.
+ *
+ *   Then, it tries to take the global write lock and display the last
+ *   message. The autotest script checks that the message order is correct.
+ */
+static int
+rwlock_test1(void)
+{
+	int i;
+
+	rte_fair_rwlock_init(&sl);
+	for (i=0; i<RTE_MAX_LCORE; i++)
+		rte_fair_rwlock_init(&sl_tab[i]);
+
+	rte_fair_rwlock_write_lock(&sl);
+
+	RTE_LCORE_FOREACH_WORKER(i) {
+		rte_fair_rwlock_write_lock(&sl_tab[i]);
+		rte_eal_remote_launch(test_rwlock_per_core, NULL, i);
+	}
+
+	rte_fair_rwlock_write_unlock(&sl);
+
+	RTE_LCORE_FOREACH_WORKER(i) {
+		rte_fair_rwlock_write_unlock(&sl_tab[i]);
+		rte_delay_ms(100);
+	}
+
+	rte_fair_rwlock_write_lock(&sl);
+	/* this message should be the last message of test */
+	printf("Global write lock taken on main core %u\n", rte_lcore_id());
+	rte_fair_rwlock_write_unlock(&sl);
+
+	rte_eal_mp_wait_lcore();
+
+	if (test_rwlock_perf() < 0)
+		return -1;
+
+	return 0;
+}
+
+static int
+try_read(uint32_t lc)
+{
+	int32_t rc;
+	uint32_t i;
+
+	rc = rte_fair_rwlock_read_trylock(&try_rwlock_data.lock);
+	if (rc != 0)
+		return rc;
+
+	for (i = 0; i != RTE_DIM(try_rwlock_data.data.u64); i++) {
+
+		/* race condition occurred, lock doesn't work properly */
+		if (try_rwlock_data.data.u64[i] != 0) {
+			printf("%s(%u) error: unexpected data pattern\n",
+				__func__, lc);
+			rte_memdump(stdout, NULL,
+				(void *)(uintptr_t)&try_rwlock_data.data,
+				sizeof(try_rwlock_data.data));
+			rc = -EFAULT;
+			break;
+		}
+	}
+
+	rte_fair_rwlock_read_unlock(&try_rwlock_data.lock);
+	return rc;
+}
+
+static int
+try_write(uint32_t lc)
+{
+	int32_t rc;
+	uint32_t i, v;
+
+	v = RTE_MAX(lc % UINT8_MAX, 1U);
+
+	rc = rte_fair_rwlock_write_trylock(&try_rwlock_data.lock);
+	if (rc != 0)
+		return rc;
+
+	/* update by bytes in reverese order */
+	for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
+
+		/* race condition occurred, lock doesn't work properly */
+		if (try_rwlock_data.data.u8[i] != 0) {
+			printf("%s:%d(%u) error: unexpected data pattern\n",
+				__func__, __LINE__, lc);
+			rte_memdump(stdout, NULL,
+				(void *)(uintptr_t)&try_rwlock_data.data,
+				sizeof(try_rwlock_data.data));
+			rc = -EFAULT;
+			break;
+		}
+
+		try_rwlock_data.data.u8[i] = v;
+	}
+
+	/* restore by bytes in reverese order */
+	for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
+
+		/* race condition occurred, lock doesn't work properly */
+		if (try_rwlock_data.data.u8[i] != v) {
+			printf("%s:%d(%u) error: unexpected data pattern\n",
+				__func__, __LINE__, lc);
+			rte_memdump(stdout, NULL,
+				(void *)(uintptr_t)&try_rwlock_data.data,
+				sizeof(try_rwlock_data.data));
+			rc = -EFAULT;
+			break;
+		}
+
+		try_rwlock_data.data.u8[i] = 0;
+	}
+
+	rte_fair_rwlock_write_unlock(&try_rwlock_data.lock);
+	return rc;
+}
+
+static int
+try_read_lcore(__rte_unused void *data)
+{
+	int32_t rc;
+	uint32_t i, lc;
+	uint64_t ftm, stm, tm;
+	struct try_rwlock_lcore *lcd;
+
+	lc = rte_lcore_id();
+	lcd = try_lcore_data + lc;
+	lcd->type = LC_TYPE_RDLOCK;
+
+	ftm = try_rwlock_data.tick;
+	stm = rte_get_timer_cycles();
+
+	do {
+		for (i = 0; i != ITER_NUM; i++) {
+			rc = try_read(lc);
+			if (rc == 0)
+				lcd->stat.success++;
+			else if (rc == -EBUSY)
+				lcd->stat.fail++;
+			else
+				break;
+			rc = 0;
+		}
+		tm = rte_get_timer_cycles() - stm;
+	} while (tm < ftm && rc == 0);
+
+	lcd->rc = rc;
+	lcd->stat.tick = tm;
+	return rc;
+}
+
+static int
+try_write_lcore(__rte_unused void *data)
+{
+	int32_t rc;
+	uint32_t i, lc;
+	uint64_t ftm, stm, tm;
+	struct try_rwlock_lcore *lcd;
+
+	lc = rte_lcore_id();
+	lcd = try_lcore_data + lc;
+	lcd->type = LC_TYPE_WRLOCK;
+
+	ftm = try_rwlock_data.tick;
+	stm = rte_get_timer_cycles();
+
+	do {
+		for (i = 0; i != ITER_NUM; i++) {
+			rc = try_write(lc);
+			if (rc == 0)
+				lcd->stat.success++;
+			else if (rc == -EBUSY)
+				lcd->stat.fail++;
+			else
+				break;
+			rc = 0;
+		}
+		tm = rte_get_timer_cycles() - stm;
+	} while (tm < ftm && rc == 0);
+
+	lcd->rc = rc;
+	lcd->stat.tick = tm;
+	return rc;
+}
+
+static void
+print_try_lcore_stats(const struct try_rwlock_lcore *tlc, uint32_t lc)
+{
+	uint64_t f, s;
+
+	f = RTE_MAX(tlc->stat.fail, 1ULL);
+	s = RTE_MAX(tlc->stat.success, 1ULL);
+
+	printf("try_lcore_data[%u]={\n"
+		"\trc=%d,\n"
+		"\ttype=%s,\n"
+		"\tfail=%" PRIu64 ",\n"
+		"\tsuccess=%" PRIu64 ",\n"
+		"\tcycles=%" PRIu64 ",\n"
+		"\tcycles/op=%#Lf,\n"
+		"\tcycles/success=%#Lf,\n"
+		"\tsuccess/fail=%#Lf,\n"
+		"};\n",
+		lc,
+		tlc->rc,
+		tlc->type == LC_TYPE_RDLOCK ? "RDLOCK" : "WRLOCK",
+		tlc->stat.fail,
+		tlc->stat.success,
+		tlc->stat.tick,
+		(long double)tlc->stat.tick /
+		(tlc->stat.fail + tlc->stat.success),
+		(long double)tlc->stat.tick / s,
+		(long double)tlc->stat.success / f);
+}
+
+static void
+collect_try_lcore_stats(struct try_rwlock_lcore *tlc,
+	const struct try_rwlock_lcore *lc)
+{
+	tlc->stat.tick += lc->stat.tick;
+	tlc->stat.fail += lc->stat.fail;
+	tlc->stat.success += lc->stat.success;
+}
+
+/*
+ * Process collected results:
+ *  - check status
+ *  - collect and print statistics
+ */
+static int
+process_try_lcore_stats(void)
+{
+	int32_t rc;
+	uint32_t lc, rd, wr;
+	struct try_rwlock_lcore rlc, wlc;
+
+	memset(&rlc, 0, sizeof(rlc));
+	memset(&wlc, 0, sizeof(wlc));
+
+	rlc.type = LC_TYPE_RDLOCK;
+	wlc.type = LC_TYPE_WRLOCK;
+	rd = 0;
+	wr = 0;
+
+	rc = 0;
+	RTE_LCORE_FOREACH(lc) {
+		rc |= try_lcore_data[lc].rc;
+		if (try_lcore_data[lc].type == LC_TYPE_RDLOCK) {
+			collect_try_lcore_stats(&rlc, try_lcore_data + lc);
+			rd++;
+		} else {
+			collect_try_lcore_stats(&wlc, try_lcore_data + lc);
+			wr++;
+		}
+	}
+
+	if (rc == 0) {
+		RTE_LCORE_FOREACH(lc)
+			print_try_lcore_stats(try_lcore_data + lc, lc);
+
+		if (rd != 0) {
+			printf("aggregated stats for %u RDLOCK cores:\n", rd);
+			print_try_lcore_stats(&rlc, rd);
+		}
+
+		if (wr != 0) {
+			printf("aggregated stats for %u WRLOCK cores:\n", wr);
+			print_try_lcore_stats(&wlc, wr);
+		}
+	}
+
+	return rc;
+}
+
+static void
+try_test_reset(void)
+{
+	memset(&try_lcore_data, 0, sizeof(try_lcore_data));
+	memset(&try_rwlock_data, 0, sizeof(try_rwlock_data));
+	try_rwlock_data.tick = TEST_SEC * rte_get_tsc_hz();
+}
+
+/* all lcores grab RDLOCK */
+static int
+try_rwlock_test_rda(void)
+{
+	try_test_reset();
+
+	/* start read test on all avaialble lcores */
+	rte_eal_mp_remote_launch(try_read_lcore, NULL, CALL_MAIN);
+	rte_eal_mp_wait_lcore();
+
+	return process_try_lcore_stats();
+}
+
+/* all worker lcores grab RDLOCK, main one grabs WRLOCK */
+static int
+try_rwlock_test_rds_wrm(void)
+{
+	try_test_reset();
+
+	rte_eal_mp_remote_launch(try_read_lcore, NULL, SKIP_MAIN);
+	try_write_lcore(NULL);
+	rte_eal_mp_wait_lcore();
+
+	return process_try_lcore_stats();
+}
+
+/* main and even worker lcores grab RDLOCK, odd lcores grab WRLOCK */
+static int
+try_rwlock_test_rde_wro(void)
+{
+	uint32_t lc, mlc;
+
+	try_test_reset();
+
+	mlc = rte_get_main_lcore();
+
+	RTE_LCORE_FOREACH(lc) {
+		if (lc != mlc) {
+			if ((lc & 1) == 0)
+				rte_eal_remote_launch(try_read_lcore,
+						NULL, lc);
+			else
+				rte_eal_remote_launch(try_write_lcore,
+						NULL, lc);
+		}
+	}
+	try_read_lcore(NULL);
+	rte_eal_mp_wait_lcore();
+
+	return process_try_lcore_stats();
+}
+
+static int
+test_rwlock(void)
+{
+	uint32_t i;
+	int32_t rc, ret;
+
+	static const struct {
+		const char *name;
+		int (*ftst)(void);
+	} test[] = {
+		{
+			.name = "rwlock_test1",
+			.ftst = rwlock_test1,
+		},
+		{
+			.name = "try_rwlock_test_rda",
+			.ftst = try_rwlock_test_rda,
+		},
+		{
+			.name = "try_rwlock_test_rds_wrm",
+			.ftst = try_rwlock_test_rds_wrm,
+		},
+		{
+			.name = "try_rwlock_test_rde_wro",
+			.ftst = try_rwlock_test_rde_wro,
+		},
+	};
+
+	ret = 0;
+	for (i = 0; i != RTE_DIM(test); i++) {
+		printf("starting test %s;\n", test[i].name);
+		rc = test[i].ftst();
+		printf("test %s completed with status %d\n", test[i].name, rc);
+		ret |= rc;
+	}
+
+	return ret;
+}
+
+REGISTER_TEST_COMMAND(fair_rwlock_autotest, test_rwlock);
+
+/* subtests used in meson for CI */
+REGISTER_TEST_COMMAND(fair_rwlock_test1_autotest, rwlock_test1);
+REGISTER_TEST_COMMAND(fair_rwlock_rda_autotest, try_rwlock_test_rda);
+REGISTER_TEST_COMMAND(fair_rwlock_rds_wrm_autotest, try_rwlock_test_rds_wrm);
+REGISTER_TEST_COMMAND(fair_rwlock_rde_wro_autotest, try_rwlock_test_rde_wro);
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 748514e24316..988a00c8532d 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -76,6 +76,7 @@  The public API headers are grouped by topics:
   [rwlock]             (@ref rte_rwlock.h),
   [spinlock]           (@ref rte_spinlock.h),
   [ticketlock]         (@ref rte_ticketlock.h),
+  [rwlock]             (@ref rte_fair_rwlock.h),
   [RCU]                (@ref rte_rcu_qsbr.h)
 
 - **CPU arch**:
diff --git a/lib/librte_eal/arm/include/meson.build b/lib/librte_eal/arm/include/meson.build
index 770766de1a34..e88aaa1dcd40 100644
--- a/lib/librte_eal/arm/include/meson.build
+++ b/lib/librte_eal/arm/include/meson.build
@@ -12,6 +12,7 @@  arch_headers = files(
 	'rte_cycles_32.h',
 	'rte_cycles_64.h',
 	'rte_cycles.h',
+	'rte_fair_rwlock.h',
 	'rte_io_64.h',
 	'rte_io.h',
 	'rte_mcslock.h',
diff --git a/lib/librte_eal/arm/include/rte_fair_rwlock.h b/lib/librte_eal/arm/include/rte_fair_rwlock.h
new file mode 100644
index 000000000000..73d8a1c17583
--- /dev/null
+++ b/lib/librte_eal/arm/include/rte_fair_rwlock.h
@@ -0,0 +1,22 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+
+#ifndef _RTE_FAIR_RWLOCK_ARM_H_
+#define _RTE_FAIR_RWLOCK_ARM_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_fair_rwlock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_FAIR_RWLOCK_ARM_H_ */
diff --git a/lib/librte_eal/include/generic/rte_fair_rwlock.h b/lib/librte_eal/include/generic/rte_fair_rwlock.h
new file mode 100644
index 000000000000..b2e9c4d92afc
--- /dev/null
+++ b/lib/librte_eal/include/generic/rte_fair_rwlock.h
@@ -0,0 +1,203 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+
+#ifndef _RTE_TICKET_RWLOCK_H_
+#define _RTE_TICKET_RWLOCK_H_
+
+/**
+ * @file
+ *
+ * RTE Fair Read-Write Locks
+ *
+ * This file defines an API for fair read-write locks.
+ * This type  act like rwlock but provide fairness and
+ * first come, first serviced.
+ *
+ * All locks must be initialised before use, and only initialised once.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_ticketlock.h>
+
+/**
+ * The rte_fair_rwlock_t type.
+ */
+typedef struct {
+        rte_ticketlock_t writer;
+        uint16_t readers;
+} rte_fair_rwlock_t;
+
+/**
+ * A static fair_rwlock initializer.
+ */
+#define RTE_FAIR_RWLOCK_INITIALIZER { RTE_TICKETLOCK_INITIALIZER, 0 }
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Initialize the fair_rwlock to an unlocked state.
+ *
+ * @param rwl
+ *   A pointer to the fair_rwlock structure.
+ */
+__rte_experimental
+static inline void
+rte_fair_rwlock_init(rte_fair_rwlock_t *rwl)
+{
+        rte_ticketlock_init(&rwl->writer);
+        rwl->readers = 0;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Take a read lock. Loop until the lock is held.
+ *
+ * @param rwl
+ *   A pointer to a fair_rwlock structure.
+ */
+__rte_experimental
+static inline void
+rte_fair_rwlock_read_lock(rte_fair_rwlock_t *rwl)
+{
+	/* Optimistically, grab a reference */
+        __atomic_fetch_add(&rwl->readers, 1, __ATOMIC_ACQUIRE);
+
+        /* If a writer raced in and got the lock, we need to back out. */
+        if (rte_ticketlock_is_locked(&rwl->writer)) {
+                /* Drop our reference so the Writer can continue */
+                __atomic_sub_fetch(&rwl->readers, 1, __ATOMIC_RELEASE);
+
+                /* Wait for the Writer to finish then get our new reference */
+                rte_ticketlock_lock(&rwl->writer);
+                __atomic_add_fetch(&rwl->readers, 1, __ATOMIC_ACQUIRE);
+                rte_ticketlock_unlock(&rwl->writer);
+        }
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Try to take a read lock.
+ *
+ * @param rwl
+ *   A pointer to a fair_rwlock structure.
+ *
+ * @return
+ *   - zero if the lock is successfully taken
+ *   - -EBUSY if lock could not be acquired for reading because a
+ *     writer holds the lock
+ */
+__rte_experimental
+static inline int
+rte_fair_rwlock_read_trylock(rte_fair_rwlock_t *rwl)
+{
+        /* Get our reference count */
+        __atomic_fetch_add(&rwl->readers, 1, __ATOMIC_ACQUIRE);
+
+        /* If a writer has the lock, we need to back out. */
+        if (rte_ticketlock_is_locked(&rwl->writer)) {
+                __atomic_sub_fetch(&rwl->readers, 1, __ATOMIC_RELEASE);
+                return -EBUSY;
+        }
+
+	return 0;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Release a read lock.
+ *
+ * @param rwl
+ *   A pointer to the fair_rwlock structure.
+ */
+__rte_experimental
+static inline void
+rte_fair_rwlock_read_unlock(rte_fair_rwlock_t *rwl)
+{
+        __atomic_fetch_sub(&rwl->readers, 1, __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Take a write lock. Loop until the lock is held.
+ *
+ * @param rwl
+ *   A pointer to a fair_rwlock structure.
+ */
+__rte_experimental
+static inline void
+rte_fair_rwlock_write_lock(rte_fair_rwlock_t *rwl)
+{
+        /* Wait for our turn to be writer */
+        rte_ticketlock_lock(&rwl->writer);
+
+        /* Wait for all outstanding readers */
+	rte_wait_until_equal_16(&rwl->readers, 0, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Try to take a write lock.
+ *
+ * @param rwl
+ *   A pointer to a fair_rwlock structure.
+ * @return
+ *   - zero if the lock is successfully taken
+ *   - -EBUSY if lock could not be acquired for writing because
+ *     it was already locked for reading or writing
+ */
+__rte_experimental
+static inline int
+rte_fair_rwlock_write_trylock(rte_fair_rwlock_t *rwl)
+{
+	uint16_t x;
+
+        if (!rte_ticketlock_trylock(&rwl->writer))
+                return -EBUSY;
+
+	x = __atomic_load_n(&rwl->readers, __ATOMIC_RELAXED);
+	if (x != 0) {
+                rte_ticketlock_unlock(&rwl->writer);
+                return -EBUSY;
+        }
+
+	return 0;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Release a write lock.
+ *
+ * @param rwl
+ *   A pointer to a fair_rwlock structure.
+ */
+__rte_experimental
+static inline void
+rte_fair_rwlock_write_unlock(rte_fair_rwlock_t *rwl)
+{
+        /* Unblock readers */
+        rte_ticketlock_unlock(&rwl->writer);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_FAIR_RWLOCK_H_ */
diff --git a/lib/librte_eal/include/meson.build b/lib/librte_eal/include/meson.build
index dc007084ff22..2ba6f3be9e6b 100644
--- a/lib/librte_eal/include/meson.build
+++ b/lib/librte_eal/include/meson.build
@@ -56,6 +56,7 @@  generic_headers = files(
 	'generic/rte_cpuflags.h',
 	'generic/rte_cycles.h',
 	'generic/rte_io.h',
+	'generic/rte_fair_rwlock.h',
 	'generic/rte_mcslock.h',
 	'generic/rte_memcpy.h',
 	'generic/rte_pause.h',
diff --git a/lib/librte_eal/ppc/include/meson.build b/lib/librte_eal/ppc/include/meson.build
index dae40ede546e..47c7dfaa4efe 100644
--- a/lib/librte_eal/ppc/include/meson.build
+++ b/lib/librte_eal/ppc/include/meson.build
@@ -7,6 +7,7 @@  arch_headers = files(
 	'rte_byteorder.h',
 	'rte_cpuflags.h',
 	'rte_cycles.h',
+	'rte_fair_rwlock.h',
 	'rte_io.h',
 	'rte_mcslock.h',
 	'rte_memcpy.h',
diff --git a/lib/librte_eal/ppc/include/rte_fair_rwlock.h b/lib/librte_eal/ppc/include/rte_fair_rwlock.h
new file mode 100644
index 000000000000..bf22251a28c5
--- /dev/null
+++ b/lib/librte_eal/ppc/include/rte_fair_rwlock.h
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+
+#ifndef _RTE_FAIR_RWLOCK_PPC_64_H_
+#define _RTE_FAIR_RWLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_fair_rwlock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_FAIR_RWLOCK_PPC_64_H_ */
diff --git a/lib/librte_eal/x86/include/meson.build b/lib/librte_eal/x86/include/meson.build
index 549cc21a42ed..a348d51c97a8 100644
--- a/lib/librte_eal/x86/include/meson.build
+++ b/lib/librte_eal/x86/include/meson.build
@@ -10,6 +10,7 @@  arch_headers = files(
 	'rte_byteorder.h',
 	'rte_cpuflags.h',
 	'rte_cycles.h',
+	'rte_fair_rwlock.h',
 	'rte_io.h',
 	'rte_mcslock.h',
 	'rte_memcpy.h',
diff --git a/lib/librte_eal/x86/include/rte_fair_rwlock.h b/lib/librte_eal/x86/include/rte_fair_rwlock.h
new file mode 100644
index 000000000000..cde08885062e
--- /dev/null
+++ b/lib/librte_eal/x86/include/rte_fair_rwlock.h
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Microsoft Corporation
+ */
+
+#ifndef _RTE_FAIR_RWLOCK_X86_64_H_
+#define _RTE_FAIR_RWLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_fair_rwlock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_FAIR_RWLOCK_X86_64_H_ */