[dpdk-dev,v4,1/3] rte: add keep alive functionality
Commit Message
Adds functions for detecting and reporting the live-ness of LCores,
the primary requirement of which is minimal overheads for the
core(s) being checked. Core failures are notified via an application
defined callback.
Signed-off-by: Remy Horton <remy.horton@intel.com>
---
lib/librte_eal/bsdapp/eal/Makefile | 1 +
lib/librte_eal/bsdapp/eal/rte_eal_version.map | 6 +-
lib/librte_eal/common/Makefile | 2 +-
lib/librte_eal/common/include/rte_keepalive.h | 146 ++++++++++++++++++++++++
lib/librte_eal/common/rte_keepalive.c | 124 ++++++++++++++++++++
lib/librte_eal/linuxapp/eal/Makefile | 1 +
lib/librte_eal/linuxapp/eal/rte_eal_version.map | 6 +-
7 files changed, 283 insertions(+), 3 deletions(-)
create mode 100644 lib/librte_eal/common/include/rte_keepalive.h
create mode 100644 lib/librte_eal/common/rte_keepalive.c
Comments
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Remy Horton
> Sent: Thursday, November 5, 2015 11:33 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v4 1/3] rte: add keep alive functionality
>
> Adds functions for detecting and reporting the live-ness of LCores, the primary
> requirement of which is minimal overheads for the
> core(s) being checked. Core failures are notified via an application defined
> callback.
>
> Signed-off-by: Remy Horton <remy.horton@intel.com>
> ---
Acked-by: Maryam Tahhan <maryam.tahhan@intel.com>
Hi,
2015-11-05 11:32, Remy Horton:
> +/**
> + * @param *ptr_timer Triggering timer (unused)
> + * @param *ptr_data Data pointer (keepalive structure)
> + */
> +void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data);
There is no description for this function.
Why ptr_timer is unused?
> +#ifdef KEEPALIVE_DEBUG_MSGS
> +static void
> +print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
> +{
> + printf("%sLast seen %" PRId64 "ms ago.\n",
> + msg,
> + ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
> + / rte_get_tsc_hz()
> + );
> +}
> +#else
> +static void
> +print_trace(__attribute__((unused)) const char *msg,
> + __attribute__((unused)) struct rte_keepalive *keepcfg,
> + __attribute__((unused)) int idx_core)
> +{
> +}
> +#endif
This function will never be tested and do not use rte_log.
Please remove it and use the logging functions.
On 10/11/2015 14:02, Thomas Monjalon wrote:
> Why ptr_timer is unused?
Use as rte_timer callback requires the parameter be present, but
responsibility for setting this up is delegated to the application.
..Remy
In general, try not to introduce new thinks and avoid extra code.
Also Linux has more robust mechanism in the watchdog timers, why not use that?
Could you use RTE_PER_LCORE some how?
> +#ifdef KEEPALIVE_DEBUG_MSGS
Any #ifdef must have a config option to enable it.
> +static void
> +print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
> +{
> + printf("%sLast seen %" PRId64 "ms ago.\n",
> + msg,
> + ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
> + / rte_get_tsc_hz()
> + );
> +}
> +#else
> +static void
> +print_trace(__attribute__((unused)) const char *msg,
> + __attribute__((unused)) struct rte_keepalive *keepcfg,
> + __attribute__((unused)) int idx_core)
> +{
> +}
> +#endif
Agree with others don't introduce not logging macros.
> +void
> +rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
Use __rte_unused
> + void *ptr_data)
> +{
> + struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
Cast of void * is unnecessary in C.
> + int idx_core;
> +
> + for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
> + if (keepcfg->active_cores[idx_core] == 0)
> + continue;
> + switch (keepcfg->state_flags[idx_core]) {
My personal preference, prefer blank line after continue.
> + case ALIVE: /* Alive */
> + keepcfg->state_flags[idx_core] = 0;
> + keepcfg->last_alive[idx_core] = rte_rdtsc();
> + break;
> + case MISSING: /* MIA */
> + print_trace("Core MIA. ", keepcfg, idx_core);
> + keepcfg->state_flags[idx_core] = 2;
> + break;
> + case DEAD: /* Dead */
> + keepcfg->state_flags[idx_core] = 3;
> + print_trace("Core died. ", keepcfg, idx_core);
> + if (keepcfg->callback)
> + keepcfg->callback(
> + keepcfg->callback_data,
> + idx_core
> + );
> + break;
> + case GONE: /* Buried */
> + break;
> + }
> + }
> +}
> +
> +
> +struct rte_keepalive *
> +rte_keepalive_create(rte_keepalive_failure_callback_t callback,
> + void *data)
> +{
> + int idx_core;
> + struct rte_keepalive *keepcfg;
> +
> + keepcfg = malloc(sizeof(struct rte_keepalive));
Why not use rte_zmalloc()?
> + if (keepcfg != NULL) {
> + for (idx_core = 0;
> + idx_core < RTE_KEEPALIVE_MAXCORES;
> + idx_core++) {
> + keepcfg->state_flags[idx_core] = 0;
> + keepcfg->active_cores[idx_core] = 0;
> + }
> + keepcfg->callback = callback;
> + keepcfg->callback_data = data;
> + keepcfg->tsc_initial = rte_rdtsc();
> + keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
> + }
> + return keepcfg;
> +}
> +
> +
> +void
> +rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
> +{
> + if (id_core < RTE_KEEPALIVE_MAXCORES)
> + keepcfg->active_cores[id_core] = 1;
> +}
2015-11-05 11:32, Remy Horton:
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_keepalive.h
Please add it in doxygen configuration and index.
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -133,5 +133,9 @@ DPDK_2.2 {
> global:
>
> rte_intr_cap_multiple;
> + rte_keepalive_create;
> + rte_keepalive_dispatch_pings;
> + rte_keepalive_register_core;
> + rte_keepalive_mark_alive;
Please keep alphabetical order.
On 11/11/2015 16:28, Stephen Hemminger wrote:
> Also Linux has more robust mechanism in the watchdog timers, why not use that?
Keepalive was made with tight intra-process reporting intervals of
5-10ms in mind, whereas Linux watchdog appears to be an inter-process
system that operates with granularity of seconds.
> Could you use RTE_PER_LCORE some how?
Possibly, although not sure what it gains..
>> + struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
> Cast of void * is unnecessary in C.
Old habbits die hard.. :)
v+1 on way.
..Remy
@@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c
CFLAGS_eal.o := -D_GNU_SOURCE
#CFLAGS_eal_thread.o := -D_GNU_SOURCE
@@ -130,5 +130,9 @@ DPDK_2.2 {
global:
rte_intr_cap_multiple;
+ rte_keepalive_create;
+ rte_keepalive_dispatch_pings;
+ rte_keepalive_register_core;
+ rte_keepalive_mark_alive;
-} DPDK_2.1;
\ No newline at end of file
+} DPDK_2.1;
@@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
INC += rte_hexdump.h rte_devargs.h rte_dev.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
-INC += rte_malloc.h
+INC += rte_malloc.h rte_keepalive.h
ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
INC += rte_warnings.h
new file mode 100644
@@ -0,0 +1,146 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file rte_keepalive.h
+ * DPDK RTE LCore Keepalive Monitor.
+ *
+ **/
+
+#ifndef _KEEPALIVE_H_
+#define _KEEPALIVE_H_
+
+#include <rte_memory.h>
+
+#ifndef RTE_KEEPALIVE_MAXCORES
+/**
+ * Number of cores to track.
+ * @note Must be larger than the highest core id. */
+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
+#endif
+
+
+/**
+ * Keepalive failure callback.
+ *
+ * Receives a data pointer passed to rte_keepalive_create() and the id of the
+ * failed core.
+ */
+typedef void (*rte_keepalive_failure_callback_t)(
+ void *data,
+ const int id_core);
+
+
+/**
+ * Keepalive state structure.
+ * @internal
+ */
+struct rte_keepalive {
+ /** Core Liveness. */
+
+ enum {
+ ALIVE = 1,
+ MISSING = 0,
+ DEAD = 2,
+ GONE = 3
+ } __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
+
+ /** Last-seen-alive timestamps */
+ uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+ /**
+ * Cores to check.
+ * Indexed by core id, non-zero if the core should be checked.
+ */
+ uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+ /** Dead core handler. */
+ rte_keepalive_failure_callback_t callback;
+
+ /**
+ * Dead core handler app data.
+ * Pointer is passed to dead core handler.
+ */
+ void *callback_data;
+ uint64_t tsc_initial;
+ uint64_t tsc_mhz;
+};
+
+
+/**
+ * Initialise keepalive sub-system.
+ * @param callback
+ * Function called upon detection of a dead core.
+ * @param data
+ * Data pointer to be passed to function callback.
+ * @return
+ * Keepalive structure success, NULL on failure.
+ */
+struct rte_keepalive *rte_keepalive_create(
+ rte_keepalive_failure_callback_t callback,
+ void *data);
+
+
+/**
+ * @param *ptr_timer Triggering timer (unused)
+ * @param *ptr_data Data pointer (keepalive structure)
+ */
+void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data);
+
+
+/**
+ * Registers a core for keepalive checks.
+ * @param *keepcfg
+ * Keepalive structure pointer
+ * @param id_core
+ * ID number of core to register.
+ */
+void rte_keepalive_register_core(struct rte_keepalive *keepcfg,
+ const int id_core);
+
+
+/**
+ * Per-core keepalive check.
+ * @param *keepcfg
+ * Keepalive structure pointer
+ *
+ * This function needs to be called from within the main process loop of
+ * the LCore to be checked.
+ */
+static inline void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+ keepcfg->state_flags[rte_lcore_id()] = 1;
+}
+
+
+#endif /* _KEEPALIVE_H_ */
new file mode 100644
@@ -0,0 +1,124 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_keepalive.h>
+
+#ifdef KEEPALIVE_DEBUG_MSGS
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+ printf("%sLast seen %" PRId64 "ms ago.\n",
+ msg,
+ ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+ / rte_get_tsc_hz()
+ );
+}
+#else
+static void
+print_trace(__attribute__((unused)) const char *msg,
+ __attribute__((unused)) struct rte_keepalive *keepcfg,
+ __attribute__((unused)) int idx_core)
+{
+}
+#endif
+
+
+
+void
+rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
+ void *ptr_data)
+{
+ struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
+ int idx_core;
+
+ for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+ if (keepcfg->active_cores[idx_core] == 0)
+ continue;
+ switch (keepcfg->state_flags[idx_core]) {
+ case ALIVE: /* Alive */
+ keepcfg->state_flags[idx_core] = 0;
+ keepcfg->last_alive[idx_core] = rte_rdtsc();
+ break;
+ case MISSING: /* MIA */
+ print_trace("Core MIA. ", keepcfg, idx_core);
+ keepcfg->state_flags[idx_core] = 2;
+ break;
+ case DEAD: /* Dead */
+ keepcfg->state_flags[idx_core] = 3;
+ print_trace("Core died. ", keepcfg, idx_core);
+ if (keepcfg->callback)
+ keepcfg->callback(
+ keepcfg->callback_data,
+ idx_core
+ );
+ break;
+ case GONE: /* Buried */
+ break;
+ }
+ }
+}
+
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+ void *data)
+{
+ int idx_core;
+ struct rte_keepalive *keepcfg;
+
+ keepcfg = malloc(sizeof(struct rte_keepalive));
+ if (keepcfg != NULL) {
+ for (idx_core = 0;
+ idx_core < RTE_KEEPALIVE_MAXCORES;
+ idx_core++) {
+ keepcfg->state_flags[idx_core] = 0;
+ keepcfg->active_cores[idx_core] = 0;
+ }
+ keepcfg->callback = callback;
+ keepcfg->callback_data = data;
+ keepcfg->tsc_initial = rte_rdtsc();
+ keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+ }
+ return keepcfg;
+}
+
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+ if (id_core < RTE_KEEPALIVE_MAXCORES)
+ keepcfg->active_cores[id_core] = 1;
+}
@@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c
CFLAGS_eal.o := -D_GNU_SOURCE
CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
@@ -133,5 +133,9 @@ DPDK_2.2 {
global:
rte_intr_cap_multiple;
+ rte_keepalive_create;
+ rte_keepalive_dispatch_pings;
+ rte_keepalive_register_core;
+ rte_keepalive_mark_alive;
-} DPDK_2.1;
\ No newline at end of file
+} DPDK_2.1;