[2/2] eal/windows: implement alarm API

Message ID 20200911002207.31813-3-dmitry.kozliuk@gmail.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series eal/windows: implement alarms |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Dmitry Kozlyuk Sept. 11, 2020, 12:22 a.m. UTC
  Implementation is based on waitable timers Win32 API. When timer is set,
a callback and its argument are supplied to the OS, while timer handle
is stored in EAL alarm list. When timer expires, OS wakes up the
interrupt thread and runs the callback. Upon completion it removes the
alarm.

Waitable timers must be set from the thread their callback will run in,
eal_intr_thread_schedule() provides a way to schedule asyncronuous code
execution in the interrupt thread. Alarm module builds synchronous timer
setup on top of it.

Windows alarms are not a type of DPDK interrupt handle and do not
interact with interrupt module beyond executing in the same thread.

Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
---
 lib/librte_eal/rte_eal_exports.def |   2 +
 lib/librte_eal/windows/eal_alarm.c | 219 +++++++++++++++++++++++++++++
 lib/librte_eal/windows/meson.build |   1 +
 3 files changed, 222 insertions(+)
 create mode 100644 lib/librte_eal/windows/eal_alarm.c
  

Comments

Khoa To Sept. 21, 2020, 7:08 p.m. UTC | #1
Hi Dmitry,

Since all alarm callbacks are scheduled on the interrupt thread, looks like this implementation of rte_eal_alarm_set() would create a head-of-line blocking issue, as the callbacks are serialized one after the other.
Is that correct?  If so, while this does not violate the API contract, it seems undesirable.

I looked at the Linux implementation of alarm, and although I am not very familiar with Linux system, it seems to have similar implementation, with the same head-of-line blocking issue.

I'm wondering why in both Linux and this Windows implementation, we don't spawn a new thread to service each alarm independently.
And, regardless of the Linux implementation, should we consider another implementation that avoids the head-of-line blocking issue?

Khoa.

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Dmitry Kozlyuk
> Sent: Thursday, September 10, 2020 5:22 PM
> To: dev@dpdk.org
> Cc: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>; Narcisa Ana Maria Vasile <navasile@linux.microsoft.com>; Dmitry Malloy
> (MESHCHANINOV) <dmitrym@microsoft.com>; Pallavi Kadam <pallavi.kadam@intel.com>
> Subject: [EXTERNAL] [dpdk-dev] [PATCH 2/2] eal/windows: implement alarm API
> 
> Implementation is based on waitable timers Win32 API. When timer is set,
> a callback and its argument are supplied to the OS, while timer handle
> is stored in EAL alarm list. When timer expires, OS wakes up the
> interrupt thread and runs the callback. Upon completion it removes the
> alarm.
> 
> Waitable timers must be set from the thread their callback will run in,
> eal_intr_thread_schedule() provides a way to schedule asyncronuous code
> execution in the interrupt thread. Alarm module builds synchronous timer
> setup on top of it.
> 
> Windows alarms are not a type of DPDK interrupt handle and do not
> interact with interrupt module beyond executing in the same thread.
> 
> Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
> ---
>  lib/librte_eal/rte_eal_exports.def |   2 +
>  lib/librte_eal/windows/eal_alarm.c | 219 +++++++++++++++++++++++++++++
>  lib/librte_eal/windows/meson.build |   1 +
>  3 files changed, 222 insertions(+)
>  create mode 100644 lib/librte_eal/windows/eal_alarm.c
> 
> diff --git a/lib/librte_eal/rte_eal_exports.def b/lib/librte_eal/rte_eal_exports.def
> index 9baca0110..b6abb5ae1 100644
> --- a/lib/librte_eal/rte_eal_exports.def
> +++ b/lib/librte_eal/rte_eal_exports.def
> @@ -14,6 +14,8 @@ EXPORTS
>  	rte_devargs_insert
>  	rte_devargs_next
>  	rte_devargs_remove
> +	rte_eal_alarm_set
> +	rte_eal_alarm_cancel
>  	rte_eal_get_configuration
>  	rte_eal_has_hugepages
>  	rte_eal_has_pci
> diff --git a/lib/librte_eal/windows/eal_alarm.c b/lib/librte_eal/windows/eal_alarm.c
> new file mode 100644
> index 000000000..3b262793a
> --- /dev/null
> +++ b/lib/librte_eal/windows/eal_alarm.c
> @@ -0,0 +1,219 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) 2020 Dmitry Kozlyuk
> + */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#include <rte_alarm.h>
> +#include <rte_spinlock.h>
> +
> +#include <rte_eal_trace.h>
> +
> +#include "eal_windows.h"
> +
> +enum alarm_state {
> +	ALARM_ARMED,
> +	ALARM_TRIGGERED,
> +	ALARM_CANCELLED
> +};
> +
> +struct alarm_entry {
> +	LIST_ENTRY(alarm_entry) next;
> +	rte_eal_alarm_callback cb_fn;
> +	void *cb_arg;
> +	HANDLE timer;
> +	atomic_uint state;
> +};
> +
> +static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
> +
> +static rte_spinlock_t alarm_lock = RTE_SPINLOCK_INITIALIZER;
> +
> +static int intr_thread_exec(void (*func)(void *arg), void *arg);
> +
> +static void
> +alarm_remove_unsafe(struct alarm_entry *ap)
> +{
> +	LIST_REMOVE(ap, next);
> +	CloseHandle(ap->timer);
> +	free(ap);
> +}
> +
> +static void
> +alarm_callback(void *arg, DWORD low __rte_unused, DWORD high __rte_unused)
> +{
> +	struct alarm_entry *ap = arg;
> +	unsigned int state = ALARM_ARMED;
> +
> +	if (!atomic_compare_exchange_strong(
> +			&ap->state, &state, ALARM_TRIGGERED))
> +		return;
> +
> +	ap->cb_fn(ap->cb_arg);
> +
> +	rte_spinlock_lock(&alarm_lock);
> +	alarm_remove_unsafe(ap);
> +	rte_spinlock_unlock(&alarm_lock);
> +}
> +
> +struct alarm_task {
> +	struct alarm_entry *entry;
> +	LARGE_INTEGER deadline;
> +	int ret;
> +};
> +
> +static void
> +alarm_set(void *arg)
> +{
> +	struct alarm_task *task = arg;
> +
> +	BOOL ret = SetWaitableTimer(
> +		task->entry->timer, &task->deadline,
> +		0, alarm_callback, task->entry, FALSE);
> +	task->ret = ret ? 0 : (-1);
> +}
> +
> +int
> +rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
> +{
> +	struct alarm_entry *ap;
> +	HANDLE timer;
> +	FILETIME ft;
> +	struct alarm_task task;
> +	int ret;
> +
> +	/* Calculate deadline ASAP, unit of measure = 100ns. */
> +	GetSystemTimePreciseAsFileTime(&ft);
> +	task.deadline.LowPart = ft.dwLowDateTime;
> +	task.deadline.HighPart = ft.dwHighDateTime;
> +	task.deadline.QuadPart += 10 * us;
> +
> +	ap = calloc(1, sizeof(*ap));
> +	if (ap == NULL) {
> +		RTE_LOG(ERR, EAL, "Cannot allocate alarm entry\n");
> +		ret = -ENOMEM;
> +		goto exit;
> +	}
> +
> +	timer = CreateWaitableTimer(NULL, FALSE, NULL);
> +	if (timer == NULL) {
> +		RTE_LOG_WIN32_ERR("CreateWaitableTimer()");
> +		ret = -EINVAL;
> +		goto fail;
> +	}
> +
> +	ap->timer = timer;
> +	ap->cb_fn = cb_fn;
> +	ap->cb_arg = cb_arg;
> +	task.entry = ap;
> +
> +	/* Waitable timer must be set in the same thread that will
> +	 * do an alertable wait for the alarm to trigger, that is,
> +	 * in the interrupt thread. Setting can fail, so do it synchronously.
> +	 */
> +	ret = intr_thread_exec(alarm_set, &task);
> +	if (ret < 0) {
> +		RTE_LOG(ERR, EAL, "Cannot setup alarm in interrupt thread\n");
> +		goto fail;
> +	}
> +
> +	ret = task.ret;
> +	if (ret < 0)
> +		goto fail;
> +
> +	rte_spinlock_lock(&alarm_lock);
> +	LIST_INSERT_HEAD(&alarm_list, ap, next);
> +	rte_spinlock_unlock(&alarm_lock);
> +
> +	goto exit;
> +
> +fail:
> +	if (timer != NULL)
> +		CloseHandle(timer);
> +	if (ap != NULL)
> +		free(ap);
> +
> +exit:
> +	rte_eal_trace_alarm_set(us, cb_fn, cb_arg, ret);
> +	return ret;
> +}
> +
> +static bool
> +alarm_matches(const struct alarm_entry *ap,
> +	rte_eal_alarm_callback cb_fn, void *cb_arg)
> +{
> +	bool any_arg = cb_arg == (void *)(-1);
> +	return (ap->cb_fn == cb_fn) && (any_arg || ap->cb_arg == cb_arg);
> +}
> +
> +int
> +rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
> +{
> +	struct alarm_entry *ap;
> +	unsigned int state;
> +	int removed;
> +	bool executing;
> +
> +	removed = 0;
> +	do {
> +		executing = false;
> +
> +		rte_spinlock_lock(&alarm_lock);
> +
> +		LIST_FOREACH(ap, &alarm_list, next) {
> +			if (!alarm_matches(ap, cb_fn, cb_arg))
> +				continue;
> +
> +			state = ALARM_ARMED;
> +			if (atomic_compare_exchange_strong(
> +					&ap->state, &state, ALARM_CANCELLED)) {
> +				alarm_remove_unsafe(ap);
> +				removed++;
> +			} else if (state == ALARM_TRIGGERED)
> +				executing = true;
> +		}
> +
> +		rte_spinlock_unlock(&alarm_lock);
> +	} while (executing);
> +
> +	rte_eal_trace_alarm_cancel(cb_fn, cb_arg, removed);
> +	return removed;
> +}
> +
> +struct intr_task {
> +	void (*func)(void *arg);
> +	void *arg;
> +	rte_spinlock_t lock; /* unlocked at task completion */
> +};
> +
> +static void
> +intr_thread_entry(void *arg)
> +{
> +	struct intr_task *task = arg;
> +	task->func(task->arg);
> +	rte_spinlock_unlock(&task->lock);
> +}
> +
> +static int
> +intr_thread_exec(void (*func)(void *arg), void *arg)
> +{
> +	struct intr_task task;
> +	int ret;
> +
> +	task.func = func;
> +	task.arg = arg;
> +	rte_spinlock_init(&task.lock);
> +
> +	/* Make timers more precise by synchronizing in userspace. */
> +	rte_spinlock_lock(&task.lock);
> +	ret = eal_intr_thread_schedule(intr_thread_entry, &task);
> +	if (ret < 0) {
> +		RTE_LOG(ERR, EAL, "Cannot schedule task to interrupt thread\n");
> +		return -EINVAL;
> +	}
> +
> +	/* Wait for the task to complete. */
> +	rte_spinlock_lock(&task.lock);
> +	return 0;
> +}
> diff --git a/lib/librte_eal/windows/meson.build b/lib/librte_eal/windows/meson.build
> index b690bc6b0..3b2faf29e 100644
> --- a/lib/librte_eal/windows/meson.build
> +++ b/lib/librte_eal/windows/meson.build
> @@ -5,6 +5,7 @@ subdir('include')
> 
>  sources += files(
>  	'eal.c',
> +	'eal_alarm.c',
>  	'eal_debug.c',
>  	'eal_file.c',
>  	'eal_hugepages.c',
> --
> 2.25.4
  
Dmitry Kozlyuk Sept. 24, 2020, 9:38 p.m. UTC | #2
Hi Khoa,

Disclaimer: my experience with interrupts and alarms is limited.

> Since all alarm callbacks are scheduled on the interrupt thread, looks like this implementation of rte_eal_alarm_set() would create a head-of-line blocking issue, as the callbacks are serialized one after the other.
> Is that correct?  If so, while this does not violate the API contract, it seems undesirable.

Yes, the issues is valid, however, probably irrelevant. In DPDK, alarms
are considered on par with interrupts. Much like kernel ISRs, DPDK interrupt
callbacks are supposed to be short. When they need to do more work, there are
facilities to schedule deferred work (again, somewhat like NT kernel DPC):
rte_ctrl_thread_create().

> I'm wondering why in both Linux and this Windows implementation, we don't spawn a new thread to service each alarm independently.

These threads would need cores to be scheduled on. If we set affinity to the
primary lcore, head-of-line issue is back. If we leave that to the OS, they
may land on data-plane cores with a context switch.

> And, regardless of the Linux implementation, should we consider another implementation that avoids the head-of-line blocking issue?

I don't think so. Looking at PMD code, timers are mostly used during
initialization or reconfiguration.
  
Khoa To Sept. 25, 2020, 2:19 a.m. UTC | #3
> -----Original Message-----
> From: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
> Sent: Thursday, September 24, 2020 2:39 PM
> To: Khoa To <khot@microsoft.com>
> Cc: dev@dpdk.org; Narcisa Ana Maria Vasile
> <navasile@linux.microsoft.com>; Dmitry Malloy (MESHCHANINOV)
> <dmitrym@microsoft.com>; Pallavi Kadam <pallavi.kadam@intel.com>
> Subject: Re: [EXTERNAL] [dpdk-dev] [PATCH 2/2] eal/windows: implement
> alarm API
> 
> Hi Khoa,
> 
> Disclaimer: my experience with interrupts and alarms is limited.
> 
> > Since all alarm callbacks are scheduled on the interrupt thread, looks like
> this implementation of rte_eal_alarm_set() would create a head-of-line
> blocking issue, as the callbacks are serialized one after the other.
> > Is that correct?  If so, while this does not violate the API contract, it seems
> undesirable.
> 
> Yes, the issues is valid, however, probably irrelevant. In DPDK, alarms
> are considered on par with interrupts. Much like kernel ISRs, DPDK interrupt
> callbacks are supposed to be short. When they need to do more work, there
> are
> facilities to schedule deferred work (again, somewhat like NT kernel DPC):
> rte_ctrl_thread_create().

Thank you for clarification on the usage model.

> > I'm wondering why in both Linux and this Windows implementation, we
> don't spawn a new thread to service each alarm independently.
> 
> These threads would need cores to be scheduled on. If we set affinity to the
> primary lcore, head-of-line issue is back. If we leave that to the OS, they
> may land on data-plane cores with a context switch.
> 
> > And, regardless of the Linux implementation, should we consider another
> implementation that avoids the head-of-line blocking issue?
> 
> I don't think so. Looking at PMD code, timers are mostly used during
> initialization or reconfiguration.

Sounds good. The implementation looks good. 
I'll take another pass at the code for any additional feedbacks.
  

Patch

diff --git a/lib/librte_eal/rte_eal_exports.def b/lib/librte_eal/rte_eal_exports.def
index 9baca0110..b6abb5ae1 100644
--- a/lib/librte_eal/rte_eal_exports.def
+++ b/lib/librte_eal/rte_eal_exports.def
@@ -14,6 +14,8 @@  EXPORTS
 	rte_devargs_insert
 	rte_devargs_next
 	rte_devargs_remove
+	rte_eal_alarm_set
+	rte_eal_alarm_cancel
 	rte_eal_get_configuration
 	rte_eal_has_hugepages
 	rte_eal_has_pci
diff --git a/lib/librte_eal/windows/eal_alarm.c b/lib/librte_eal/windows/eal_alarm.c
new file mode 100644
index 000000000..3b262793a
--- /dev/null
+++ b/lib/librte_eal/windows/eal_alarm.c
@@ -0,0 +1,219 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Dmitry Kozlyuk
+ */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#include <rte_alarm.h>
+#include <rte_spinlock.h>
+
+#include <rte_eal_trace.h>
+
+#include "eal_windows.h"
+
+enum alarm_state {
+	ALARM_ARMED,
+	ALARM_TRIGGERED,
+	ALARM_CANCELLED
+};
+
+struct alarm_entry {
+	LIST_ENTRY(alarm_entry) next;
+	rte_eal_alarm_callback cb_fn;
+	void *cb_arg;
+	HANDLE timer;
+	atomic_uint state;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+
+static rte_spinlock_t alarm_lock = RTE_SPINLOCK_INITIALIZER;
+
+static int intr_thread_exec(void (*func)(void *arg), void *arg);
+
+static void
+alarm_remove_unsafe(struct alarm_entry *ap)
+{
+	LIST_REMOVE(ap, next);
+	CloseHandle(ap->timer);
+	free(ap);
+}
+
+static void
+alarm_callback(void *arg, DWORD low __rte_unused, DWORD high __rte_unused)
+{
+	struct alarm_entry *ap = arg;
+	unsigned int state = ALARM_ARMED;
+
+	if (!atomic_compare_exchange_strong(
+			&ap->state, &state, ALARM_TRIGGERED))
+		return;
+
+	ap->cb_fn(ap->cb_arg);
+
+	rte_spinlock_lock(&alarm_lock);
+	alarm_remove_unsafe(ap);
+	rte_spinlock_unlock(&alarm_lock);
+}
+
+struct alarm_task {
+	struct alarm_entry *entry;
+	LARGE_INTEGER deadline;
+	int ret;
+};
+
+static void
+alarm_set(void *arg)
+{
+	struct alarm_task *task = arg;
+
+	BOOL ret = SetWaitableTimer(
+		task->entry->timer, &task->deadline,
+		0, alarm_callback, task->entry, FALSE);
+	task->ret = ret ? 0 : (-1);
+}
+
+int
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+	struct alarm_entry *ap;
+	HANDLE timer;
+	FILETIME ft;
+	struct alarm_task task;
+	int ret;
+
+	/* Calculate deadline ASAP, unit of measure = 100ns. */
+	GetSystemTimePreciseAsFileTime(&ft);
+	task.deadline.LowPart = ft.dwLowDateTime;
+	task.deadline.HighPart = ft.dwHighDateTime;
+	task.deadline.QuadPart += 10 * us;
+
+	ap = calloc(1, sizeof(*ap));
+	if (ap == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot allocate alarm entry\n");
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	timer = CreateWaitableTimer(NULL, FALSE, NULL);
+	if (timer == NULL) {
+		RTE_LOG_WIN32_ERR("CreateWaitableTimer()");
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	ap->timer = timer;
+	ap->cb_fn = cb_fn;
+	ap->cb_arg = cb_arg;
+	task.entry = ap;
+
+	/* Waitable timer must be set in the same thread that will
+	 * do an alertable wait for the alarm to trigger, that is,
+	 * in the interrupt thread. Setting can fail, so do it synchronously.
+	 */
+	ret = intr_thread_exec(alarm_set, &task);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "Cannot setup alarm in interrupt thread\n");
+		goto fail;
+	}
+
+	ret = task.ret;
+	if (ret < 0)
+		goto fail;
+
+	rte_spinlock_lock(&alarm_lock);
+	LIST_INSERT_HEAD(&alarm_list, ap, next);
+	rte_spinlock_unlock(&alarm_lock);
+
+	goto exit;
+
+fail:
+	if (timer != NULL)
+		CloseHandle(timer);
+	if (ap != NULL)
+		free(ap);
+
+exit:
+	rte_eal_trace_alarm_set(us, cb_fn, cb_arg, ret);
+	return ret;
+}
+
+static bool
+alarm_matches(const struct alarm_entry *ap,
+	rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+	bool any_arg = cb_arg == (void *)(-1);
+	return (ap->cb_fn == cb_fn) && (any_arg || ap->cb_arg == cb_arg);
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+	struct alarm_entry *ap;
+	unsigned int state;
+	int removed;
+	bool executing;
+
+	removed = 0;
+	do {
+		executing = false;
+
+		rte_spinlock_lock(&alarm_lock);
+
+		LIST_FOREACH(ap, &alarm_list, next) {
+			if (!alarm_matches(ap, cb_fn, cb_arg))
+				continue;
+
+			state = ALARM_ARMED;
+			if (atomic_compare_exchange_strong(
+					&ap->state, &state, ALARM_CANCELLED)) {
+				alarm_remove_unsafe(ap);
+				removed++;
+			} else if (state == ALARM_TRIGGERED)
+				executing = true;
+		}
+
+		rte_spinlock_unlock(&alarm_lock);
+	} while (executing);
+
+	rte_eal_trace_alarm_cancel(cb_fn, cb_arg, removed);
+	return removed;
+}
+
+struct intr_task {
+	void (*func)(void *arg);
+	void *arg;
+	rte_spinlock_t lock; /* unlocked at task completion */
+};
+
+static void
+intr_thread_entry(void *arg)
+{
+	struct intr_task *task = arg;
+	task->func(task->arg);
+	rte_spinlock_unlock(&task->lock);
+}
+
+static int
+intr_thread_exec(void (*func)(void *arg), void *arg)
+{
+	struct intr_task task;
+	int ret;
+
+	task.func = func;
+	task.arg = arg;
+	rte_spinlock_init(&task.lock);
+
+	/* Make timers more precise by synchronizing in userspace. */
+	rte_spinlock_lock(&task.lock);
+	ret = eal_intr_thread_schedule(intr_thread_entry, &task);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "Cannot schedule task to interrupt thread\n");
+		return -EINVAL;
+	}
+
+	/* Wait for the task to complete. */
+	rte_spinlock_lock(&task.lock);
+	return 0;
+}
diff --git a/lib/librte_eal/windows/meson.build b/lib/librte_eal/windows/meson.build
index b690bc6b0..3b2faf29e 100644
--- a/lib/librte_eal/windows/meson.build
+++ b/lib/librte_eal/windows/meson.build
@@ -5,6 +5,7 @@  subdir('include')
 
 sources += files(
 	'eal.c',
+	'eal_alarm.c',
 	'eal_debug.c',
 	'eal_file.c',
 	'eal_hugepages.c',