[dpdk-dev,v2,1/3] rte: add keep alive functionality
Commit Message
Adds functions for detecting and reporting the live-ness of LCores,
the primary requirement of which is minimal overheads for the
core(s) being checked. Core failures are notified via an application
defined callback.
Signed-off-by: Remy Horton <remy.horton@intel.com>
---
lib/librte_eal/bsdapp/eal/Makefile | 1 +
lib/librte_eal/common/Makefile | 2 +-
lib/librte_eal/common/include/rte_keepalive.h | 140 ++++++++++++++++++++++++++
lib/librte_eal/common/rte_keepalive.c | 122 ++++++++++++++++++++++
lib/librte_eal/linuxapp/eal/Makefile | 1 +
5 files changed, 265 insertions(+), 1 deletion(-)
create mode 100644 lib/librte_eal/common/include/rte_keepalive.h
create mode 100644 lib/librte_eal/common/rte_keepalive.c
Comments
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Remy Horton
> Sent: Wednesday, September 30, 2015 10:05 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2 1/3] rte: add keep alive functionality
>
> Adds functions for detecting and reporting the live-ness of LCores, the
> primary requirement of which is minimal overheads for the
> core(s) being checked. Core failures are notified via an application defined
> callback.
>
> Signed-off-by: Remy Horton <remy.horton@intel.com>
> ---
> lib/librte_eal/bsdapp/eal/Makefile | 1 +
> lib/librte_eal/common/Makefile | 2 +-
> lib/librte_eal/common/include/rte_keepalive.h | 140
> ++++++++++++++++++++++++++
> lib/librte_eal/common/rte_keepalive.c | 122
> ++++++++++++++++++++++
> lib/librte_eal/linuxapp/eal/Makefile | 1 +
> 5 files changed, 265 insertions(+), 1 deletion(-) create mode 100644
> lib/librte_eal/common/include/rte_keepalive.h
> create mode 100644 lib/librte_eal/common/rte_keepalive.c
>
> diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> b/lib/librte_eal/bsdapp/eal/Makefile
> index a49dcec..65b293f 100644
> --- a/lib/librte_eal/bsdapp/eal/Makefile
> +++ b/lib/librte_eal/bsdapp/eal/Makefile
> @@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) +=
> eal_common_thread.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
> +SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c
>
> CFLAGS_eal.o := -D_GNU_SOURCE
> #CFLAGS_eal_thread.o := -D_GNU_SOURCE
> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
> index 0c43d6a..7f1757a 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h INC +=
> rte_eal_memconfig.h rte_malloc_heap.h INC += rte_hexdump.h
> rte_devargs.h rte_dev.h INC += rte_pci_dev_feature_defs.h
> rte_pci_dev_features.h -INC += rte_malloc.h
> +INC += rte_malloc.h rte_keepalive.h
>
> ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
> INC += rte_warnings.h
> diff --git a/lib/librte_eal/common/include/rte_keepalive.h
> b/lib/librte_eal/common/include/rte_keepalive.h
> new file mode 100644
> index 0000000..d67bf4b
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_keepalive.h
> @@ -0,0 +1,140 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright 2015 Intel Shannon Ltd. All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +/**
> + * @file keepalive.h
> + * DPDK RTE LCore Keepalive Monitor.
> + *
> + **/
> +
> +#ifndef _KEEPALIVE_H_
> +#define _KEEPALIVE_H_
> +
> +#include <rte_memory.h>
> +
> +#ifndef RTE_KEEPALIVE_MAXCORES
> +/**
> + * Number of cores to track.
> + * @note Must be larger than the highest core id. */ #define
> +RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE #endif
> +
> +
> +/**
> + * Keepalive failure callback.
> + *
> + * Receives a data pointer passed to rte_keepalive_create() and the id
> +of the
> + * failed core.
> + */
> +typedef void (*rte_keepalive_failure_callback_t)(
> + void *data,
> + const int id_core);
> +
> +
> +/**
> + * Keepalive state structure.
> + * @internal
> + */
> +struct rte_keepalive {
> + /** Core Liveness. */
> + uint32_t __rte_cache_aligned
> state_flags[RTE_KEEPALIVE_MAXCORES];
> +
> + /** Last-seen-alive timestamps */
> + uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
> +
> + /**
> + * Cores to check.
> + * Indexed by core id, non-zero if the core should be checked.
> + */
> + uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
> +
> + /** Dead core handler. */
> + rte_keepalive_failure_callback_t callback;
> +
> + /**
> + * Dead core handler app data.
> + * Pointer is passed to dead core handler.
> + */
> + void *callback_data;
> + uint64_t tsc_initial;
> + uint64_t tsc_mhz;
> +};
> +
> +
> +/**
> + * Initialise keepalive sub-system.
> + * @param callback
> + * Function called upon detection of a dead core.
> + * @param data
> + * Data pointer to be passed to function callback.
> + * @return
> + * Keepalive structure success, NULL on failure.
> + */
> +struct rte_keepalive *rte_keepalive_create(
> + rte_keepalive_failure_callback_t callback,
> + void *data);
> +
> +
> +/**
> + * @param *ptr_timer Triggering timer (unused)
> + * @param *ptr_data Data pointer (keepalive structure) */ void
> +rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
> + void *ptr_data);
> +
> +
> +/**
> + * Registers a core for keepalive checks.
> + * @param *keepcfg
> + * Keepalive structure pointer
> + * @param id_core
> + * ID number of core to register.
> + */
> +void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const
> +int id_core);
> +
> +
> +/**
> + * Per-core keepalive check.
> + * @param *keepcfg
> + * Keepalive structure pointer
> + *
> + * This function needs to be called from within the main process loop
> +of
> + * the LCore to be checked.
> + */
> +static inline void
> +rte_keepalive_mark_alive(struct rte_keepalive *keepcfg) {
> + keepcfg->state_flags[rte_lcore_id()] = 1; }
> +
> +
> +#endif /* _KEEPALIVE_H_ */
> diff --git a/lib/librte_eal/common/rte_keepalive.c
> b/lib/librte_eal/common/rte_keepalive.c
> new file mode 100644
> index 0000000..cbdd801
> --- /dev/null
> +++ b/lib/librte_eal/common/rte_keepalive.c
> @@ -0,0 +1,122 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright 2015 Intel Shannon Ltd. All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include <inttypes.h>
> +
> +#include <rte_cycles.h>
> +#include <rte_lcore.h>
> +#include <rte_keepalive.h>
> +
> +#ifdef KEEPALIVE_DEBUG_MSGS
> +static void
> +print_trace(const char *msg, struct rte_keepalive *keepcfg, int
> +idx_core) {
> + printf("%sLast seen %" PRId64 "ms ago.\n",
> + msg,
> + ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
> + / rte_get_tsc_hz()
> + );
Hi Remy
Looks great overall, should this be an RTE_LOG message rather than a printf?
BR
Maryam
> +}
> +#else
> +static void
> +print_trace(__attribute__((unused)) const char *msg,
> + __attribute__((unused)) struct rte_keepalive *keepcfg,
> + __attribute__((unused)) int idx_core)
> +{
> +}
> +#endif
> +
> +
> +
> +void
> +rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
> + void *ptr_data)
> +{
> + struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
> + int idx_core;
> +
> + for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
> idx_core++) {
> + if (keepcfg->active_cores[idx_core] == 0)
> + continue;
> + switch (keepcfg->state_flags[idx_core]) {
> + case 1: /* Alive */
> + keepcfg->state_flags[idx_core] = 0;
> + keepcfg->last_alive[idx_core] = rte_rdtsc();
> + break;
> + case 0: /* MIA */
> + print_trace("Core MIA. ", keepcfg, idx_core);
> + keepcfg->state_flags[idx_core] = 2;
> + break;
> + case 2: /* Dead */
> + keepcfg->state_flags[idx_core] = 3;
> + print_trace("Core died. ", keepcfg, idx_core);
> + if (keepcfg->callback)
> + keepcfg->callback(
> + keepcfg->callback_data,
> + idx_core
> + );
> + break;
> + case 3: /* Buried */
> + break;
> + }
> + }
> +}
> +
> +
> +struct rte_keepalive *
> +rte_keepalive_create(rte_keepalive_failure_callback_t callback,
> + void *data)
> +{
> + int idx_core;
> + struct rte_keepalive *keepcfg;
> +
> + keepcfg = malloc(sizeof(struct rte_keepalive));
> + if (keepcfg != NULL) {
> + for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
> idx_core++) {
> + keepcfg->state_flags[idx_core] = 0;
> + keepcfg->active_cores[idx_core] = 0;
> + }
> + keepcfg->callback = callback;
> + keepcfg->callback_data = data;
> + keepcfg->tsc_initial = rte_rdtsc();
> + keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
> + }
> + return keepcfg;
> +}
> +
> +
> +void
> +rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int
> +id_core) {
> + if (id_core < RTE_KEEPALIVE_MAXCORES)
> + keepcfg->active_cores[id_core] = 1;
> +}
> diff --git a/lib/librte_eal/linuxapp/eal/Makefile
> b/lib/librte_eal/linuxapp/eal/Makefile
> index d62196e..05a44d7 100644
> --- a/lib/librte_eal/linuxapp/eal/Makefile
> +++ b/lib/librte_eal/linuxapp/eal/Makefile
> @@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) +=
> eal_common_thread.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
> +SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c
>
> CFLAGS_eal.o := -D_GNU_SOURCE
> CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
> --
> 1.9.3
Hi Remy, I have few questions inline.
—
Regards,
++Keith Wiles
Intel Corporation
On 9/30/15, 6:04 AM, "dev on behalf of Remy Horton" <dev-bounces@dpdk.org on behalf of remy.horton@intel.com> wrote:
>Adds functions for detecting and reporting the live-ness of LCores,
>the primary requirement of which is minimal overheads for the
>core(s) being checked. Core failures are notified via an application
>defined callback.
>
>Signed-off-by: Remy Horton <remy.horton@intel.com>
>---
> lib/librte_eal/bsdapp/eal/Makefile | 1 +
> lib/librte_eal/common/Makefile | 2 +-
> lib/librte_eal/common/include/rte_keepalive.h | 140 ++++++++++++++++++++++++++
> lib/librte_eal/common/rte_keepalive.c | 122 ++++++++++++++++++++++
> lib/librte_eal/linuxapp/eal/Makefile | 1 +
> 5 files changed, 265 insertions(+), 1 deletion(-)
> create mode 100644 lib/librte_eal/common/include/rte_keepalive.h
> create mode 100644 lib/librte_eal/common/rte_keepalive.c
>
>diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
>index a49dcec..65b293f 100644
>--- a/lib/librte_eal/bsdapp/eal/Makefile
>+++ b/lib/librte_eal/bsdapp/eal/Makefile
>@@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
>+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c
>
> CFLAGS_eal.o := -D_GNU_SOURCE
> #CFLAGS_eal_thread.o := -D_GNU_SOURCE
>diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
>index 0c43d6a..7f1757a 100644
>--- a/lib/librte_eal/common/Makefile
>+++ b/lib/librte_eal/common/Makefile
>@@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h
> INC += rte_eal_memconfig.h rte_malloc_heap.h
> INC += rte_hexdump.h rte_devargs.h rte_dev.h
> INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
>-INC += rte_malloc.h
>+INC += rte_malloc.h rte_keepalive.h
>
> ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
> INC += rte_warnings.h
>diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h
>new file mode 100644
>index 0000000..d67bf4b
>--- /dev/null
>+++ b/lib/librte_eal/common/include/rte_keepalive.h
>@@ -0,0 +1,140 @@
>+/*-
>+ * BSD LICENSE
>+ *
>+ * Copyright 2015 Intel Shannon Ltd. All rights reserved.
>+ *
>+ * Redistribution and use in source and binary forms, with or without
>+ * modification, are permitted provided that the following conditions
>+ * are met:
>+ *
>+ * * Redistributions of source code must retain the above copyright
>+ * notice, this list of conditions and the following disclaimer.
>+ * * Redistributions in binary form must reproduce the above copyright
>+ * notice, this list of conditions and the following disclaimer in
>+ * the documentation and/or other materials provided with the
>+ * distribution.
>+ * * Neither the name of Intel Corporation nor the names of its
>+ * contributors may be used to endorse or promote products derived
>+ * from this software without specific prior written permission.
>+ *
>+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>+ */
>+
>+/**
>+ * @file keepalive.h
>+ * DPDK RTE LCore Keepalive Monitor.
>+ *
>+ **/
>+
>+#ifndef _KEEPALIVE_H_
>+#define _KEEPALIVE_H_
>+
>+#include <rte_memory.h>
>+
>+#ifndef RTE_KEEPALIVE_MAXCORES
>+/**
>+ * Number of cores to track.
>+ * @note Must be larger than the highest core id. */
>+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
>+#endif
>+
>+
>+/**
>+ * Keepalive failure callback.
>+ *
>+ * Receives a data pointer passed to rte_keepalive_create() and the id of the
>+ * failed core.
>+ */
>+typedef void (*rte_keepalive_failure_callback_t)(
>+ void *data,
>+ const int id_core);
>+
>+
>+/**
>+ * Keepalive state structure.
>+ * @internal
>+ */
>+struct rte_keepalive {
>+ /** Core Liveness. */
>+ uint32_t __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
Normally I see the __rte_cache_aligned at the end of the line before the ‘;’ did you have a reason to have it here? If not then I would move it to the end to look the same as the others. I did a quick grop in the code and that is the normal location.
My next question is why not align the whole, which would do the same thing. I did not check the compiler output, but I was thinking it would possible leave gaps in the structure for bytes we can not use normally, but maybe that is not a problem.
Next it appears the state_flags is only being set to 0-3, which means it does not need to be a uint43_t, but could be a uint8_t, correct?
Also setting the value 0-3 could be turned into a enum with real names so the debuggers could print them out instead of these numbers.
>+
>+ /** Last-seen-alive timestamps */
>+ uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
>+
>+ /**
>+ * Cores to check.
>+ * Indexed by core id, non-zero if the core should be checked.
>+ */
>+ uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
>+
>+ /** Dead core handler. */
>+ rte_keepalive_failure_callback_t callback;
>+
>+ /**
>+ * Dead core handler app data.
>+ * Pointer is passed to dead core handler.
>+ */
>+ void *callback_data;
>+ uint64_t tsc_initial;
>+ uint64_t tsc_mhz;
>+};
>+
>+
>+/**
>+ * Initialise keepalive sub-system.
>+ * @param callback
>+ * Function called upon detection of a dead core.
>+ * @param data
>+ * Data pointer to be passed to function callback.
>+ * @return
>+ * Keepalive structure success, NULL on failure.
>+ */
>+struct rte_keepalive *rte_keepalive_create(
>+ rte_keepalive_failure_callback_t callback,
>+ void *data);
>+
>+
>+/**
>+ * @param *ptr_timer Triggering timer (unused)
>+ * @param *ptr_data Data pointer (keepalive structure)
>+ */
>+void rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
>+ void *ptr_data);
Normally I do not see the unused attribute on the declare of a function, does this help the compiler at all? I do not believe it does, so I would remove it from here as you already have it in the function.
>+
>+
>+/**
>+ * Registers a core for keepalive checks.
>+ * @param *keepcfg
>+ * Keepalive structure pointer
>+ * @param id_core
>+ * ID number of core to register.
>+ */
>+void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core);
>+
>+
>+/**
>+ * Per-core keepalive check.
>+ * @param *keepcfg
>+ * Keepalive structure pointer
>+ *
>+ * This function needs to be called from within the main process loop of
>+ * the LCore to be checked.
>+ */
>+static inline void
>+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
>+{
>+ keepcfg->state_flags[rte_lcore_id()] = 1;
>+}
>+
>+
>+#endif /* _KEEPALIVE_H_ */
>diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c
>new file mode 100644
>index 0000000..cbdd801
>--- /dev/null
>+++ b/lib/librte_eal/common/rte_keepalive.c
>@@ -0,0 +1,122 @@
>+/*-
>+ * BSD LICENSE
>+ *
>+ * Copyright 2015 Intel Shannon Ltd. All rights reserved.
>+ *
>+ * Redistribution and use in source and binary forms, with or without
>+ * modification, are permitted provided that the following conditions
>+ * are met:
>+ *
>+ * * Redistributions of source code must retain the above copyright
>+ * notice, this list of conditions and the following disclaimer.
>+ * * Redistributions in binary form must reproduce the above copyright
>+ * notice, this list of conditions and the following disclaimer in
>+ * the documentation and/or other materials provided with the
>+ * distribution.
>+ * * Neither the name of Intel Corporation nor the names of its
>+ * contributors may be used to endorse or promote products derived
>+ * from this software without specific prior written permission.
>+ *
>+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>+ */
>+
>+#include <inttypes.h>
>+
>+#include <rte_cycles.h>
>+#include <rte_lcore.h>
>+#include <rte_keepalive.h>
>+
>+#ifdef KEEPALIVE_DEBUG_MSGS
>+static void
>+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
>+{
>+ printf("%sLast seen %" PRId64 "ms ago.\n",
>+ msg,
>+ ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
>+ / rte_get_tsc_hz()
>+ );
>+}
>+#else
>+static void
>+print_trace(__attribute__((unused)) const char *msg,
>+ __attribute__((unused)) struct rte_keepalive *keepcfg,
>+ __attribute__((unused)) int idx_core)
>+{
>+}
>+#endif
>+
>+
>+
>+void
>+rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
>+ void *ptr_data)
>+{
>+ struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
>+ int idx_core;
>+
>+ for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
>+ if (keepcfg->active_cores[idx_core] == 0)
>+ continue;
>+ switch (keepcfg->state_flags[idx_core]) {
>+ case 1: /* Alive */
>+ keepcfg->state_flags[idx_core] = 0;
>+ keepcfg->last_alive[idx_core] = rte_rdtsc();
>+ break;
>+ case 0: /* MIA */
>+ print_trace("Core MIA. ", keepcfg, idx_core);
>+ keepcfg->state_flags[idx_core] = 2;
>+ break;
>+ case 2: /* Dead */
>+ keepcfg->state_flags[idx_core] = 3;
>+ print_trace("Core died. ", keepcfg, idx_core);
>+ if (keepcfg->callback)
>+ keepcfg->callback(
>+ keepcfg->callback_data,
>+ idx_core
>+ );
>+ break;
>+ case 3: /* Buried */
>+ break;
>+ }
>+ }
>+}
>+
>+
>+struct rte_keepalive *
>+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
>+ void *data)
>+{
>+ int idx_core;
>+ struct rte_keepalive *keepcfg;
>+
>+ keepcfg = malloc(sizeof(struct rte_keepalive));
>+ if (keepcfg != NULL) {
>+ for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
>+ keepcfg->state_flags[idx_core] = 0;
>+ keepcfg->active_cores[idx_core] = 0;
>+ }
Could you have done a calloc then you do not need the for loop to zero stuff?
>+ keepcfg->callback = callback;
>+ keepcfg->callback_data = data;
>+ keepcfg->tsc_initial = rte_rdtsc();
>+ keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
>+ }
>+ return keepcfg;
>+}
>+
>+
>+void
>+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
>+{
>+ if (id_core < RTE_KEEPALIVE_MAXCORES)
>+ keepcfg->active_cores[id_core] = 1;
>+}
>diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
>index d62196e..05a44d7 100644
>--- a/lib/librte_eal/linuxapp/eal/Makefile
>+++ b/lib/librte_eal/linuxapp/eal/Makefile
>@@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
>+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c
>
> CFLAGS_eal.o := -D_GNU_SOURCE
> CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
>--
>1.9.3
>
'noon,
On 23/10/2015 15:27, Wiles, Keith wrote:
>> + uint32_t __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
> Normally I see the __rte_cache_aligned at the end of the line before
> the ‘;’ did you have a reason to have it here? If not then I would
> move it to the end to look the same as the others. I did a quick grop
> in the code and that is the normal location.
>
> My next question is why not align the whole, which would do the same
> thing. I did not check the compiler output, but I was thinking it
> would possible leave gaps in the structure for bytes we can not use
> normally, but maybe that is not a problem.
Each element of state_flags is assigned to a different LCore, so they
have to be individually cache-aligned. The gaps it leaves behind are
unavoidable.
> Next it appears the state_flags is only being set to 0-3, which means
> it does not need to be a uint43_t, but could be a uint8_t, correct?
Yes, but since it all needs to be cache aligned anyway, wouldn't
actually gain anything.
>> + keepcfg = malloc(sizeof(struct rte_keepalive));
>> + if (keepcfg != NULL) {
>> + for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
>> + keepcfg->state_flags[idx_core] = 0;
>> + keepcfg->active_cores[idx_core] = 0;
>> + }
>
> Could you have done a calloc then you do not need the for loop to zero stuff?
Could do. It was written this way because the function originally took a
structure rather than allocate one.
..Remy
@@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c
CFLAGS_eal.o := -D_GNU_SOURCE
#CFLAGS_eal_thread.o := -D_GNU_SOURCE
@@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
INC += rte_hexdump.h rte_devargs.h rte_dev.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
-INC += rte_malloc.h
+INC += rte_malloc.h rte_keepalive.h
ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
INC += rte_warnings.h
new file mode 100644
@@ -0,0 +1,140 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file keepalive.h
+ * DPDK RTE LCore Keepalive Monitor.
+ *
+ **/
+
+#ifndef _KEEPALIVE_H_
+#define _KEEPALIVE_H_
+
+#include <rte_memory.h>
+
+#ifndef RTE_KEEPALIVE_MAXCORES
+/**
+ * Number of cores to track.
+ * @note Must be larger than the highest core id. */
+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
+#endif
+
+
+/**
+ * Keepalive failure callback.
+ *
+ * Receives a data pointer passed to rte_keepalive_create() and the id of the
+ * failed core.
+ */
+typedef void (*rte_keepalive_failure_callback_t)(
+ void *data,
+ const int id_core);
+
+
+/**
+ * Keepalive state structure.
+ * @internal
+ */
+struct rte_keepalive {
+ /** Core Liveness. */
+ uint32_t __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
+
+ /** Last-seen-alive timestamps */
+ uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+ /**
+ * Cores to check.
+ * Indexed by core id, non-zero if the core should be checked.
+ */
+ uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+ /** Dead core handler. */
+ rte_keepalive_failure_callback_t callback;
+
+ /**
+ * Dead core handler app data.
+ * Pointer is passed to dead core handler.
+ */
+ void *callback_data;
+ uint64_t tsc_initial;
+ uint64_t tsc_mhz;
+};
+
+
+/**
+ * Initialise keepalive sub-system.
+ * @param callback
+ * Function called upon detection of a dead core.
+ * @param data
+ * Data pointer to be passed to function callback.
+ * @return
+ * Keepalive structure success, NULL on failure.
+ */
+struct rte_keepalive *rte_keepalive_create(
+ rte_keepalive_failure_callback_t callback,
+ void *data);
+
+
+/**
+ * @param *ptr_timer Triggering timer (unused)
+ * @param *ptr_data Data pointer (keepalive structure)
+ */
+void rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
+ void *ptr_data);
+
+
+/**
+ * Registers a core for keepalive checks.
+ * @param *keepcfg
+ * Keepalive structure pointer
+ * @param id_core
+ * ID number of core to register.
+ */
+void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core);
+
+
+/**
+ * Per-core keepalive check.
+ * @param *keepcfg
+ * Keepalive structure pointer
+ *
+ * This function needs to be called from within the main process loop of
+ * the LCore to be checked.
+ */
+static inline void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+ keepcfg->state_flags[rte_lcore_id()] = 1;
+}
+
+
+#endif /* _KEEPALIVE_H_ */
new file mode 100644
@@ -0,0 +1,122 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_keepalive.h>
+
+#ifdef KEEPALIVE_DEBUG_MSGS
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+ printf("%sLast seen %" PRId64 "ms ago.\n",
+ msg,
+ ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+ / rte_get_tsc_hz()
+ );
+}
+#else
+static void
+print_trace(__attribute__((unused)) const char *msg,
+ __attribute__((unused)) struct rte_keepalive *keepcfg,
+ __attribute__((unused)) int idx_core)
+{
+}
+#endif
+
+
+
+void
+rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
+ void *ptr_data)
+{
+ struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
+ int idx_core;
+
+ for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+ if (keepcfg->active_cores[idx_core] == 0)
+ continue;
+ switch (keepcfg->state_flags[idx_core]) {
+ case 1: /* Alive */
+ keepcfg->state_flags[idx_core] = 0;
+ keepcfg->last_alive[idx_core] = rte_rdtsc();
+ break;
+ case 0: /* MIA */
+ print_trace("Core MIA. ", keepcfg, idx_core);
+ keepcfg->state_flags[idx_core] = 2;
+ break;
+ case 2: /* Dead */
+ keepcfg->state_flags[idx_core] = 3;
+ print_trace("Core died. ", keepcfg, idx_core);
+ if (keepcfg->callback)
+ keepcfg->callback(
+ keepcfg->callback_data,
+ idx_core
+ );
+ break;
+ case 3: /* Buried */
+ break;
+ }
+ }
+}
+
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+ void *data)
+{
+ int idx_core;
+ struct rte_keepalive *keepcfg;
+
+ keepcfg = malloc(sizeof(struct rte_keepalive));
+ if (keepcfg != NULL) {
+ for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+ keepcfg->state_flags[idx_core] = 0;
+ keepcfg->active_cores[idx_core] = 0;
+ }
+ keepcfg->callback = callback;
+ keepcfg->callback_data = data;
+ keepcfg->tsc_initial = rte_rdtsc();
+ keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+ }
+ return keepcfg;
+}
+
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+ if (id_core < RTE_KEEPALIVE_MAXCORES)
+ keepcfg->active_cores[id_core] = 1;
+}
@@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c
CFLAGS_eal.o := -D_GNU_SOURCE
CFLAGS_eal_interrupts.o := -D_GNU_SOURCE