[dpdk-dev,v2,1/3] rte: add keep alive functionality

Message ID 1443603881-4700-2-git-send-email-remy.horton@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Remy Horton Sept. 30, 2015, 9:04 a.m. UTC
  Adds functions for detecting and reporting the live-ness of LCores,
the primary requirement of which is minimal overheads for the
core(s) being checked. Core failures are notified via an application
defined callback.

Signed-off-by: Remy Horton <remy.horton@intel.com>
---
 lib/librte_eal/bsdapp/eal/Makefile            |   1 +
 lib/librte_eal/common/Makefile                |   2 +-
 lib/librte_eal/common/include/rte_keepalive.h | 140 ++++++++++++++++++++++++++
 lib/librte_eal/common/rte_keepalive.c         | 122 ++++++++++++++++++++++
 lib/librte_eal/linuxapp/eal/Makefile          |   1 +
 5 files changed, 265 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_eal/common/include/rte_keepalive.h
 create mode 100644 lib/librte_eal/common/rte_keepalive.c
  

Comments

Tahhan, Maryam Oct. 23, 2015, 11:40 a.m. UTC | #1
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Remy Horton
> Sent: Wednesday, September 30, 2015 10:05 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2 1/3] rte: add keep alive functionality
> 
> Adds functions for detecting and reporting the live-ness of LCores, the
> primary requirement of which is minimal overheads for the
> core(s) being checked. Core failures are notified via an application defined
> callback.
> 
> Signed-off-by: Remy Horton <remy.horton@intel.com>
> ---
>  lib/librte_eal/bsdapp/eal/Makefile            |   1 +
>  lib/librte_eal/common/Makefile                |   2 +-
>  lib/librte_eal/common/include/rte_keepalive.h | 140
> ++++++++++++++++++++++++++
>  lib/librte_eal/common/rte_keepalive.c         | 122
> ++++++++++++++++++++++
>  lib/librte_eal/linuxapp/eal/Makefile          |   1 +
>  5 files changed, 265 insertions(+), 1 deletion(-)  create mode 100644
> lib/librte_eal/common/include/rte_keepalive.h
>  create mode 100644 lib/librte_eal/common/rte_keepalive.c
> 
> diff --git a/lib/librte_eal/bsdapp/eal/Makefile
> b/lib/librte_eal/bsdapp/eal/Makefile
> index a49dcec..65b293f 100644
> --- a/lib/librte_eal/bsdapp/eal/Makefile
> +++ b/lib/librte_eal/bsdapp/eal/Makefile
> @@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) +=
> eal_common_thread.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
> +SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c
> 
>  CFLAGS_eal.o := -D_GNU_SOURCE
>  #CFLAGS_eal_thread.o := -D_GNU_SOURCE
> diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
> index 0c43d6a..7f1757a 100644
> --- a/lib/librte_eal/common/Makefile
> +++ b/lib/librte_eal/common/Makefile
> @@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h  INC +=
> rte_eal_memconfig.h rte_malloc_heap.h  INC += rte_hexdump.h
> rte_devargs.h rte_dev.h  INC += rte_pci_dev_feature_defs.h
> rte_pci_dev_features.h -INC += rte_malloc.h
> +INC += rte_malloc.h rte_keepalive.h
> 
>  ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
>  INC += rte_warnings.h
> diff --git a/lib/librte_eal/common/include/rte_keepalive.h
> b/lib/librte_eal/common/include/rte_keepalive.h
> new file mode 100644
> index 0000000..d67bf4b
> --- /dev/null
> +++ b/lib/librte_eal/common/include/rte_keepalive.h
> @@ -0,0 +1,140 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +/**
> + * @file keepalive.h
> + * DPDK RTE LCore Keepalive Monitor.
> + *
> + **/
> +
> +#ifndef _KEEPALIVE_H_
> +#define _KEEPALIVE_H_
> +
> +#include <rte_memory.h>
> +
> +#ifndef RTE_KEEPALIVE_MAXCORES
> +/**
> + * Number of cores to track.
> + * @note Must be larger than the highest core id. */ #define
> +RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE #endif
> +
> +
> +/**
> + * Keepalive failure callback.
> + *
> + *  Receives a data pointer passed to rte_keepalive_create() and the id
> +of the
> + *  failed core.
> + */
> +typedef void (*rte_keepalive_failure_callback_t)(
> +	void *data,
> +	const int id_core);
> +
> +
> +/**
> + * Keepalive state structure.
> + * @internal
> + */
> +struct rte_keepalive {
> +	/** Core Liveness. */
> +	uint32_t __rte_cache_aligned
> state_flags[RTE_KEEPALIVE_MAXCORES];
> +
> +	/** Last-seen-alive timestamps */
> +	uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
> +
> +	/**
> +	 * Cores to check.
> +	 * Indexed by core id, non-zero if the core should be checked.
> +	 */
> +	uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
> +
> +	/** Dead core handler. */
> +	rte_keepalive_failure_callback_t callback;
> +
> +	/**
> +	 * Dead core handler app data.
> +	 * Pointer is passed to dead core handler.
> +	 */
> +	void *callback_data;
> +	uint64_t tsc_initial;
> +	uint64_t tsc_mhz;
> +};
> +
> +
> +/**
> + * Initialise keepalive sub-system.
> + * @param callback
> + *   Function called upon detection of a dead core.
> + * @param data
> + *   Data pointer to be passed to function callback.
> + * @return
> + *   Keepalive structure success, NULL on failure.
> + */
> +struct rte_keepalive *rte_keepalive_create(
> +	rte_keepalive_failure_callback_t callback,
> +	void *data);
> +
> +
> +/**
> + * @param *ptr_timer Triggering timer (unused)
> + * @param *ptr_data  Data pointer (keepalive structure)  */ void
> +rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
> +	void *ptr_data);
> +
> +
> +/**
> + * Registers a core for keepalive checks.
> + * @param *keepcfg
> + *   Keepalive structure pointer
> + * @param id_core
> + *   ID number of core to register.
> + */
> +void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const
> +int id_core);
> +
> +
> +/**
> + * Per-core keepalive check.
> + * @param *keepcfg
> + *   Keepalive structure pointer
> + *
> + * This function needs to be called from within the main process loop
> +of
> + * the LCore to be checked.
> + */
> +static inline void
> +rte_keepalive_mark_alive(struct rte_keepalive *keepcfg) {
> +	keepcfg->state_flags[rte_lcore_id()] = 1; }
> +
> +
> +#endif /* _KEEPALIVE_H_ */
> diff --git a/lib/librte_eal/common/rte_keepalive.c
> b/lib/librte_eal/common/rte_keepalive.c
> new file mode 100644
> index 0000000..cbdd801
> --- /dev/null
> +++ b/lib/librte_eal/common/rte_keepalive.c
> @@ -0,0 +1,122 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include <inttypes.h>
> +
> +#include <rte_cycles.h>
> +#include <rte_lcore.h>
> +#include <rte_keepalive.h>
> +
> +#ifdef KEEPALIVE_DEBUG_MSGS
> +static void
> +print_trace(const char *msg, struct rte_keepalive *keepcfg, int
> +idx_core) {
> +	printf("%sLast seen %" PRId64  "ms ago.\n",
> +		msg,
> +		((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
> +		/ rte_get_tsc_hz()
> +	      );

Hi Remy
Looks great overall, should this be an RTE_LOG message rather than a printf?

BR 
Maryam
> +}
> +#else
> +static void
> +print_trace(__attribute__((unused)) const char *msg,
> +	__attribute__((unused)) struct rte_keepalive *keepcfg,
> +	__attribute__((unused)) int idx_core)
> +{
> +}
> +#endif
> +
> +
> +
> +void
> +rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
> +	void *ptr_data)
> +{
> +	struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
> +	int idx_core;
> +
> +	for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
> idx_core++) {
> +		if (keepcfg->active_cores[idx_core] == 0)
> +			continue;
> +		switch (keepcfg->state_flags[idx_core]) {
> +		case 1: /* Alive */
> +			keepcfg->state_flags[idx_core] = 0;
> +			keepcfg->last_alive[idx_core] = rte_rdtsc();
> +			break;
> +		case 0: /* MIA */
> +			print_trace("Core MIA. ", keepcfg, idx_core);
> +			keepcfg->state_flags[idx_core] = 2;
> +			break;
> +		case 2: /* Dead */
> +			keepcfg->state_flags[idx_core] = 3;
> +			print_trace("Core died. ", keepcfg, idx_core);
> +			if (keepcfg->callback)
> +				keepcfg->callback(
> +					keepcfg->callback_data,
> +					idx_core
> +					);
> +			break;
> +		case 3: /* Buried */
> +			break;
> +		}
> +	}
> +}
> +
> +
> +struct rte_keepalive *
> +rte_keepalive_create(rte_keepalive_failure_callback_t callback,
> +	void *data)
> +{
> +	int idx_core;
> +	struct rte_keepalive *keepcfg;
> +
> +	keepcfg = malloc(sizeof(struct rte_keepalive));
> +	if (keepcfg != NULL) {
> +		for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
> idx_core++) {
> +			keepcfg->state_flags[idx_core] = 0;
> +			keepcfg->active_cores[idx_core] = 0;
> +		}
> +		keepcfg->callback = callback;
> +		keepcfg->callback_data = data;
> +		keepcfg->tsc_initial = rte_rdtsc();
> +		keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
> +	}
> +	return keepcfg;
> +}
> +
> +
> +void
> +rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int
> +id_core) {
> +	if (id_core < RTE_KEEPALIVE_MAXCORES)
> +		keepcfg->active_cores[id_core] = 1;
> +}
> diff --git a/lib/librte_eal/linuxapp/eal/Makefile
> b/lib/librte_eal/linuxapp/eal/Makefile
> index d62196e..05a44d7 100644
> --- a/lib/librte_eal/linuxapp/eal/Makefile
> +++ b/lib/librte_eal/linuxapp/eal/Makefile
> @@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) +=
> eal_common_thread.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
>  SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
> +SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c
> 
>  CFLAGS_eal.o := -D_GNU_SOURCE
>  CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
> --
> 1.9.3
  
Wiles, Keith Oct. 23, 2015, 2:27 p.m. UTC | #2
Hi Remy, I have few questions inline.
— 
Regards,
++Keith Wiles

Intel Corporation







On 9/30/15, 6:04 AM, "dev on behalf of Remy Horton" <dev-bounces@dpdk.org on behalf of remy.horton@intel.com> wrote:

>Adds functions for detecting and reporting the live-ness of LCores,

>the primary requirement of which is minimal overheads for the

>core(s) being checked. Core failures are notified via an application

>defined callback.

>

>Signed-off-by: Remy Horton <remy.horton@intel.com>

>---

> lib/librte_eal/bsdapp/eal/Makefile            |   1 +

> lib/librte_eal/common/Makefile                |   2 +-

> lib/librte_eal/common/include/rte_keepalive.h | 140 ++++++++++++++++++++++++++

> lib/librte_eal/common/rte_keepalive.c         | 122 ++++++++++++++++++++++

> lib/librte_eal/linuxapp/eal/Makefile          |   1 +

> 5 files changed, 265 insertions(+), 1 deletion(-)

> create mode 100644 lib/librte_eal/common/include/rte_keepalive.h

> create mode 100644 lib/librte_eal/common/rte_keepalive.c

>

>diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile

>index a49dcec..65b293f 100644

>--- a/lib/librte_eal/bsdapp/eal/Makefile

>+++ b/lib/librte_eal/bsdapp/eal/Makefile

>@@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c

> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c

> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c

> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c

>+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c

> 

> CFLAGS_eal.o := -D_GNU_SOURCE

> #CFLAGS_eal_thread.o := -D_GNU_SOURCE

>diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile

>index 0c43d6a..7f1757a 100644

>--- a/lib/librte_eal/common/Makefile

>+++ b/lib/librte_eal/common/Makefile

>@@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h

> INC += rte_eal_memconfig.h rte_malloc_heap.h

> INC += rte_hexdump.h rte_devargs.h rte_dev.h

> INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h

>-INC += rte_malloc.h

>+INC += rte_malloc.h rte_keepalive.h

> 

> ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)

> INC += rte_warnings.h

>diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h

>new file mode 100644

>index 0000000..d67bf4b

>--- /dev/null

>+++ b/lib/librte_eal/common/include/rte_keepalive.h

>@@ -0,0 +1,140 @@

>+/*-

>+ *   BSD LICENSE

>+ *

>+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.

>+ *

>+ *   Redistribution and use in source and binary forms, with or without

>+ *   modification, are permitted provided that the following conditions

>+ *   are met:

>+ *

>+ *     * Redistributions of source code must retain the above copyright

>+ *       notice, this list of conditions and the following disclaimer.

>+ *     * Redistributions in binary form must reproduce the above copyright

>+ *       notice, this list of conditions and the following disclaimer in

>+ *       the documentation and/or other materials provided with the

>+ *       distribution.

>+ *     * Neither the name of Intel Corporation nor the names of its

>+ *       contributors may be used to endorse or promote products derived

>+ *       from this software without specific prior written permission.

>+ *

>+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

>+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

>+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

>+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

>+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

>+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

>+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

>+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

>+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

>+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

>+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

>+ */

>+

>+/**

>+ * @file keepalive.h

>+ * DPDK RTE LCore Keepalive Monitor.

>+ *

>+ **/

>+

>+#ifndef _KEEPALIVE_H_

>+#define _KEEPALIVE_H_

>+

>+#include <rte_memory.h>

>+

>+#ifndef RTE_KEEPALIVE_MAXCORES

>+/**

>+ * Number of cores to track.

>+ * @note Must be larger than the highest core id. */

>+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE

>+#endif

>+

>+

>+/**

>+ * Keepalive failure callback.

>+ *

>+ *  Receives a data pointer passed to rte_keepalive_create() and the id of the

>+ *  failed core.

>+ */

>+typedef void (*rte_keepalive_failure_callback_t)(

>+	void *data,

>+	const int id_core);

>+

>+

>+/**

>+ * Keepalive state structure.

>+ * @internal

>+ */

>+struct rte_keepalive {

>+	/** Core Liveness. */

>+	uint32_t __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];


Normally I see the __rte_cache_aligned at the end of the line before the ‘;’ did you have a reason to have it here? If not then I would move it to the end to look the same as the others. I did a quick grop in the code and that is the normal location.

My next question is why not align the whole, which would do the same thing. I did not check the compiler output, but I was thinking it would possible leave gaps in the structure for bytes we can not use normally, but maybe that is not a problem.

Next it appears the state_flags is only being set to 0-3, which means it does not need to be a uint43_t, but could be a uint8_t, correct?

Also setting the value 0-3 could be turned into a enum with real names so the debuggers could print them out instead of these numbers.

>+

>+	/** Last-seen-alive timestamps */

>+	uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];

>+

>+	/**

>+	 * Cores to check.

>+	 * Indexed by core id, non-zero if the core should be checked.

>+	 */

>+	uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];

>+

>+	/** Dead core handler. */

>+	rte_keepalive_failure_callback_t callback;

>+

>+	/**

>+	 * Dead core handler app data.

>+	 * Pointer is passed to dead core handler.

>+	 */

>+	void *callback_data;

>+	uint64_t tsc_initial;

>+	uint64_t tsc_mhz;

>+};

>+

>+

>+/**

>+ * Initialise keepalive sub-system.

>+ * @param callback

>+ *   Function called upon detection of a dead core.

>+ * @param data

>+ *   Data pointer to be passed to function callback.

>+ * @return

>+ *   Keepalive structure success, NULL on failure.

>+ */

>+struct rte_keepalive *rte_keepalive_create(

>+	rte_keepalive_failure_callback_t callback,

>+	void *data);

>+

>+

>+/**

>+ * @param *ptr_timer Triggering timer (unused)

>+ * @param *ptr_data  Data pointer (keepalive structure)

>+ */

>+void rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,

>+	void *ptr_data);


Normally I do not see the unused attribute on the declare of a function, does this help the compiler at all? I do not believe it does, so I would remove it from here as you already have it in the function.
>+

>+

>+/**

>+ * Registers a core for keepalive checks.

>+ * @param *keepcfg

>+ *   Keepalive structure pointer

>+ * @param id_core

>+ *   ID number of core to register.

>+ */

>+void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core);

>+

>+

>+/**

>+ * Per-core keepalive check.

>+ * @param *keepcfg

>+ *   Keepalive structure pointer

>+ *

>+ * This function needs to be called from within the main process loop of

>+ * the LCore to be checked.

>+ */

>+static inline void

>+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)

>+{

>+	keepcfg->state_flags[rte_lcore_id()] = 1;

>+}

>+

>+

>+#endif /* _KEEPALIVE_H_ */

>diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c

>new file mode 100644

>index 0000000..cbdd801

>--- /dev/null

>+++ b/lib/librte_eal/common/rte_keepalive.c

>@@ -0,0 +1,122 @@

>+/*-

>+ *   BSD LICENSE

>+ *

>+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.

>+ *

>+ *   Redistribution and use in source and binary forms, with or without

>+ *   modification, are permitted provided that the following conditions

>+ *   are met:

>+ *

>+ *     * Redistributions of source code must retain the above copyright

>+ *       notice, this list of conditions and the following disclaimer.

>+ *     * Redistributions in binary form must reproduce the above copyright

>+ *       notice, this list of conditions and the following disclaimer in

>+ *       the documentation and/or other materials provided with the

>+ *       distribution.

>+ *     * Neither the name of Intel Corporation nor the names of its

>+ *       contributors may be used to endorse or promote products derived

>+ *       from this software without specific prior written permission.

>+ *

>+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

>+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

>+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

>+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

>+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

>+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

>+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

>+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

>+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

>+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

>+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

>+ */

>+

>+#include <inttypes.h>

>+

>+#include <rte_cycles.h>

>+#include <rte_lcore.h>

>+#include <rte_keepalive.h>

>+

>+#ifdef KEEPALIVE_DEBUG_MSGS

>+static void

>+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)

>+{

>+	printf("%sLast seen %" PRId64  "ms ago.\n",

>+		msg,

>+		((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)

>+		/ rte_get_tsc_hz()

>+	      );

>+}

>+#else

>+static void

>+print_trace(__attribute__((unused)) const char *msg,

>+	__attribute__((unused)) struct rte_keepalive *keepcfg,

>+	__attribute__((unused)) int idx_core)

>+{

>+}

>+#endif

>+

>+

>+

>+void

>+rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,

>+	void *ptr_data)

>+{

>+	struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;

>+	int idx_core;

>+

>+	for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {

>+		if (keepcfg->active_cores[idx_core] == 0)

>+			continue;

>+		switch (keepcfg->state_flags[idx_core]) {

>+		case 1: /* Alive */

>+			keepcfg->state_flags[idx_core] = 0;

>+			keepcfg->last_alive[idx_core] = rte_rdtsc();

>+			break;

>+		case 0: /* MIA */

>+			print_trace("Core MIA. ", keepcfg, idx_core);

>+			keepcfg->state_flags[idx_core] = 2;

>+			break;

>+		case 2: /* Dead */

>+			keepcfg->state_flags[idx_core] = 3;

>+			print_trace("Core died. ", keepcfg, idx_core);

>+			if (keepcfg->callback)

>+				keepcfg->callback(

>+					keepcfg->callback_data,

>+					idx_core

>+					);

>+			break;

>+		case 3: /* Buried */

>+			break;

>+		}

>+	}

>+}

>+

>+

>+struct rte_keepalive *

>+rte_keepalive_create(rte_keepalive_failure_callback_t callback,

>+	void *data)

>+{

>+	int idx_core;

>+	struct rte_keepalive *keepcfg;

>+

>+	keepcfg = malloc(sizeof(struct rte_keepalive));

>+	if (keepcfg != NULL) {

>+		for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {

>+			keepcfg->state_flags[idx_core] = 0;

>+			keepcfg->active_cores[idx_core] = 0;

>+		}


Could you have done a calloc then you do not need the for loop to zero stuff?
>+		keepcfg->callback = callback;

>+		keepcfg->callback_data = data;

>+		keepcfg->tsc_initial = rte_rdtsc();

>+		keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;

>+	}

>+	return keepcfg;

>+}

>+

>+

>+void

>+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)

>+{

>+	if (id_core < RTE_KEEPALIVE_MAXCORES)

>+		keepcfg->active_cores[id_core] = 1;

>+}

>diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile

>index d62196e..05a44d7 100644

>--- a/lib/librte_eal/linuxapp/eal/Makefile

>+++ b/lib/librte_eal/linuxapp/eal/Makefile

>@@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c

> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c

> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c

> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c

>+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c

> 

> CFLAGS_eal.o := -D_GNU_SOURCE

> CFLAGS_eal_interrupts.o := -D_GNU_SOURCE

>-- 

>1.9.3

>
  
Remy Horton Oct. 26, 2015, 4:36 p.m. UTC | #3
'noon,

On 23/10/2015 15:27, Wiles, Keith wrote:
>> +	uint32_t __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
> Normally I see the __rte_cache_aligned at the end of the line before
> the ‘;’ did you have a reason to have it here? If not then I would
> move it to the end to look the same as the others. I did a quick grop
> in the code and that is the normal location.
>
> My next question is why not align the whole, which would do the same
> thing. I did not check the compiler output, but I was thinking it
> would possible leave gaps in the structure for bytes we can not use
> normally, but maybe that is not a problem.

Each element of state_flags is assigned to a different LCore, so they 
have to be individually cache-aligned. The gaps it leaves behind are 
unavoidable.


> Next it appears the state_flags is only being set to 0-3, which means
> it does not need to be a uint43_t, but could be a uint8_t, correct?

Yes, but since it all needs to be cache aligned anyway, wouldn't 
actually gain anything.


>> +	keepcfg = malloc(sizeof(struct rte_keepalive));
>> +	if (keepcfg != NULL) {
>> +		for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
>> +			keepcfg->state_flags[idx_core] = 0;
>> +			keepcfg->active_cores[idx_core] = 0;
>> +		}
>
> Could you have done a calloc then you do not need the for loop to zero stuff?

Could do. It was written this way because the function originally took a 
structure rather than allocate one.


..Remy
  

Patch

diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index a49dcec..65b293f 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -80,6 +80,7 @@  SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c
 
 CFLAGS_eal.o := -D_GNU_SOURCE
 #CFLAGS_eal_thread.o := -D_GNU_SOURCE
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 0c43d6a..7f1757a 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -40,7 +40,7 @@  INC += rte_string_fns.h rte_version.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
 INC += rte_hexdump.h rte_devargs.h rte_dev.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
-INC += rte_malloc.h
+INC += rte_malloc.h rte_keepalive.h
 
 ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
 INC += rte_warnings.h
diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h
new file mode 100644
index 0000000..d67bf4b
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_keepalive.h
@@ -0,0 +1,140 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file keepalive.h
+ * DPDK RTE LCore Keepalive Monitor.
+ *
+ **/
+
+#ifndef _KEEPALIVE_H_
+#define _KEEPALIVE_H_
+
+#include <rte_memory.h>
+
+#ifndef RTE_KEEPALIVE_MAXCORES
+/**
+ * Number of cores to track.
+ * @note Must be larger than the highest core id. */
+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
+#endif
+
+
+/**
+ * Keepalive failure callback.
+ *
+ *  Receives a data pointer passed to rte_keepalive_create() and the id of the
+ *  failed core.
+ */
+typedef void (*rte_keepalive_failure_callback_t)(
+	void *data,
+	const int id_core);
+
+
+/**
+ * Keepalive state structure.
+ * @internal
+ */
+struct rte_keepalive {
+	/** Core Liveness. */
+	uint32_t __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
+
+	/** Last-seen-alive timestamps */
+	uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+	/**
+	 * Cores to check.
+	 * Indexed by core id, non-zero if the core should be checked.
+	 */
+	uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+	/** Dead core handler. */
+	rte_keepalive_failure_callback_t callback;
+
+	/**
+	 * Dead core handler app data.
+	 * Pointer is passed to dead core handler.
+	 */
+	void *callback_data;
+	uint64_t tsc_initial;
+	uint64_t tsc_mhz;
+};
+
+
+/**
+ * Initialise keepalive sub-system.
+ * @param callback
+ *   Function called upon detection of a dead core.
+ * @param data
+ *   Data pointer to be passed to function callback.
+ * @return
+ *   Keepalive structure success, NULL on failure.
+ */
+struct rte_keepalive *rte_keepalive_create(
+	rte_keepalive_failure_callback_t callback,
+	void *data);
+
+
+/**
+ * @param *ptr_timer Triggering timer (unused)
+ * @param *ptr_data  Data pointer (keepalive structure)
+ */
+void rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
+	void *ptr_data);
+
+
+/**
+ * Registers a core for keepalive checks.
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ * @param id_core
+ *   ID number of core to register.
+ */
+void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core);
+
+
+/**
+ * Per-core keepalive check.
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ *
+ * This function needs to be called from within the main process loop of
+ * the LCore to be checked.
+ */
+static inline void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+	keepcfg->state_flags[rte_lcore_id()] = 1;
+}
+
+
+#endif /* _KEEPALIVE_H_ */
diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c
new file mode 100644
index 0000000..cbdd801
--- /dev/null
+++ b/lib/librte_eal/common/rte_keepalive.c
@@ -0,0 +1,122 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_keepalive.h>
+
+#ifdef KEEPALIVE_DEBUG_MSGS
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+	printf("%sLast seen %" PRId64  "ms ago.\n",
+		msg,
+		((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+		/ rte_get_tsc_hz()
+	      );
+}
+#else
+static void
+print_trace(__attribute__((unused)) const char *msg,
+	__attribute__((unused)) struct rte_keepalive *keepcfg,
+	__attribute__((unused)) int idx_core)
+{
+}
+#endif
+
+
+
+void
+rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
+	void *ptr_data)
+{
+	struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
+	int idx_core;
+
+	for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+		if (keepcfg->active_cores[idx_core] == 0)
+			continue;
+		switch (keepcfg->state_flags[idx_core]) {
+		case 1: /* Alive */
+			keepcfg->state_flags[idx_core] = 0;
+			keepcfg->last_alive[idx_core] = rte_rdtsc();
+			break;
+		case 0: /* MIA */
+			print_trace("Core MIA. ", keepcfg, idx_core);
+			keepcfg->state_flags[idx_core] = 2;
+			break;
+		case 2: /* Dead */
+			keepcfg->state_flags[idx_core] = 3;
+			print_trace("Core died. ", keepcfg, idx_core);
+			if (keepcfg->callback)
+				keepcfg->callback(
+					keepcfg->callback_data,
+					idx_core
+					);
+			break;
+		case 3: /* Buried */
+			break;
+		}
+	}
+}
+
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+	void *data)
+{
+	int idx_core;
+	struct rte_keepalive *keepcfg;
+
+	keepcfg = malloc(sizeof(struct rte_keepalive));
+	if (keepcfg != NULL) {
+		for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+			keepcfg->state_flags[idx_core] = 0;
+			keepcfg->active_cores[idx_core] = 0;
+		}
+		keepcfg->callback = callback;
+		keepcfg->callback_data = data;
+		keepcfg->tsc_initial = rte_rdtsc();
+		keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+	}
+	return keepcfg;
+}
+
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+	if (id_core < RTE_KEEPALIVE_MAXCORES)
+		keepcfg->active_cores[id_core] = 1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index d62196e..05a44d7 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -90,6 +90,7 @@  SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c
 
 CFLAGS_eal.o := -D_GNU_SOURCE
 CFLAGS_eal_interrupts.o := -D_GNU_SOURCE