[v2,2/6] eal: oops handling API implementation
Checks
Commit Message
From: Jerin Jacob <jerinj@marvell.com>
Implement the base oops handling APIs.
Signed-off-by: Jerin Jacob <jerinj@marvell.com>
---
lib/eal/unix/eal_oops.c | 176 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 169 insertions(+), 7 deletions(-)
Comments
On Tue, 17 Aug 2021 08:57:19 +0530
<jerinj@marvell.com> wrote:
> +#define oops_print(...) rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL, __VA_ARGS__)
It is problematic to call rte_log from a signal handler.
The malloc pool maybe corrupted and rte_log can call functions that
use malloc.
Even rte_dump_stack() is unsafe from these signals.
> +
> +static int oops_signals[] = {SIGSEGV, SIGBUS, SIGILL, SIGABRT, SIGFPE, SIGSYS};
Should be constant.
> +
> +struct oops_signal {
> + int sig;
Redundant, you defined the oops_signals above.
> + bool enabled;
Redundant, you can just compare with action.
> + struct sigaction sa;
> +};
> +
> +static struct oops_signal signals_db[RTE_DIM(oops_signals)];
> +
> +static void
> +back_trace_dump(ucontext_t *context)
> +{
> + RTE_SET_USED(context);
> +
> + rte_dump_stack();
> +}
rte_dump_stack() is not safe in signal handler:
Recommend backtrace_symbols_fd ??
Better yet use libunwind
> +static void
> +siginfo_dump(int sig, siginfo_t *info)
> +{
> + oops_print("PID: %" PRIdMAX "\n", (intmax_t)getpid());
> +
> + if (info == NULL)
> + return;
> + if (sig != info->si_signo)
> + oops_print("Invalid signal info\n");
> +
> + oops_print("Signal number: %d\n", info->si_signo);
> + oops_print("Fault address: %p\n", info->si_addr);
> +}
> +
> +static void
> +mem32_dump(void *ptr)
Should be const
> +{
> + uint32_t *p = ptr;
> + int i;
> +
> + for (i = 0; i < 16; i++)
> + oops_print("%p: 0x%x\n", p + i, rte_be_to_cpu_32(p[i]));
> +}
Why reinvent hexdump?
> +
> +static void
> +stack_dump_header(void)
> +{
> + oops_print("Stack dump:\n");
> + oops_print("----------\n");
> +}
> +
> +static void
> +code_dump_header(void)
> +{
> + oops_print("Code dump:\n");
> + oops_print("----------\n");
> +}
> +
> +static void
> +stack_code_dump(void *stack, void *code)
> +{
> + if (stack == NULL || code == NULL)
> + return;
> +
> + oops_print("\n");
> + stack_dump_header();
> + mem32_dump(stack);
> + oops_print("\n");
> +
> + code_dump_header();
> + mem32_dump(code);
> + oops_print("\n");
> +}
> +static void
> +archinfo_dump(ucontext_t *uc)
> {
> - RTE_SET_USED(sig);
> - RTE_SET_USED(info);
> RTE_SET_USED(uc);
>
> + stack_code_dump(NULL, NULL);
> +}
> +
> +static void
> +default_signal_handler_invoke(int sig)
> +{
> + unsigned int idx;
> +
> + for (idx = 0; idx < RTE_DIM(oops_signals); idx++) {
> + /* Skip disabled signals */
> + if (signals_db[idx].sig != sig)
> + continue;
> + if (!signals_db[idx].enabled)
> + continue;
> + /* Replace with stored handler */
> + sigaction(sig, &signals_db[idx].sa, NULL);
> + kill(getpid(), sig);
If you use SA_RESETHAND, you don't need this stuff.
> + }
> +}
> +
> +void
> +rte_oops_decode(int sig, siginfo_t *info, ucontext_t *uc)
> +{
> + oops_print("Signal info:\n");
> + oops_print("------------\n");
> + siginfo_dump(sig, info);
> + oops_print("\n");
> +
> + oops_print("Backtrace:\n");
> + oops_print("----------\n");
> + back_trace_dump(uc);
> + oops_print("\n");
> +
> + oops_print("Arch info:\n");
> + oops_print("----------\n");
> + if (uc)
> + archinfo_dump(uc);
> +}
> +
> +static void
> +eal_oops_handler(int sig, siginfo_t *info, void *ctx)
> +{
> + ucontext_t *uc = ctx;
> +
> + rte_oops_decode(sig, info, uc);
> + default_signal_handler_invoke(sig);
If you use SA_RESETHAND, then just doing raise(sig) here.
> }
>
> int
> rte_oops_signals_enabled(int *signals)
Why is this necessary and exported?
> {
> - RTE_SET_USED(signals);
> + int count = 0, sig[RTE_OOPS_SIGNALS_MAX];
> + unsigned int idx = 0;
>
> - return 0;
> + for (idx = 0; idx < RTE_DIM(oops_signals); idx++) {
> + if (signals_db[idx].enabled) {
> + sig[count] = signals_db[idx].sig;
> + count++;
> + }
> + }
> + if (signals)
> + memcpy(signals, sig, sizeof(*signals) * count);
> +
> + return count;
> }
>
> int
> eal_oops_init(void)
> {
> - return 0;
> + unsigned int idx, rc = 0;
> + struct sigaction sa;
> +
> + RTE_BUILD_BUG_ON(RTE_DIM(oops_signals) > RTE_OOPS_SIGNALS_MAX);
> +
> + sigemptyset(&sa.sa_mask);
> + sa.sa_sigaction = &eal_oops_handler;
> + sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
> +
> + for (idx = 0; idx < RTE_DIM(oops_signals); idx++) {
> + signals_db[idx].sig = oops_signals[idx];
> + /* Get exiting sigaction */
> + rc = sigaction(signals_db[idx].sig, NULL, &signals_db[idx].sa);
> + if (rc)
> + continue;
> + /* Replace with oops handler */
> + rc = sigaction(signals_db[idx].sig, &sa, NULL);
> + if (rc)
> + continue;
> + signals_db[idx].enabled = true;
> + }
> + return rc;
> }
>
> void
> eal_oops_fini(void)
> {
> + unsigned int idx;
> +
> + for (idx = 0; idx < RTE_DIM(oops_signals); idx++) {
> + if (!signals_db[idx].enabled)
> + continue;
> + /* Replace with stored handler */
> + sigaction(signals_db[idx].sig, &signals_db[idx].sa, NULL);
> + }
> }
On Tue, Aug 17, 2021 at 9:22 AM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Tue, 17 Aug 2021 08:57:19 +0530
> <jerinj@marvell.com> wrote:
>
> > +#define oops_print(...) rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL, __VA_ARGS__)
>
> It is problematic to call rte_log from a signal handler.
> The malloc pool maybe corrupted and rte_log can call functions that
> use malloc.
OK. What to use instead, fprint(stderr, ...)?
>
> Even rte_dump_stack() is unsafe from these signals.
OK
>
> > +
> > +static int oops_signals[] = {SIGSEGV, SIGBUS, SIGILL, SIGABRT, SIGFPE, SIGSYS};
>
> Should be constant.
Ack
>
> > +
> > +struct oops_signal {
> > + int sig;
>
> Redundant, you defined the oops_signals above.
Ack.
>
> > + bool enabled;
>
> Redundant, you can just compare with action.
Anyway, we need to database to hold the sigactions. This makes clean
to implement rte_oops_signals_enabled().
Also != SIG_DFL is not enabled.
>
> > + struct sigaction sa;
> > +};
> > +
> > +static struct oops_signal signals_db[RTE_DIM(oops_signals)];
> > +
> > +static void
> > +back_trace_dump(ucontext_t *context)
> > +{
> > + RTE_SET_USED(context);
> > +
> > + rte_dump_stack();
> > +}
>
> rte_dump_stack() is not safe in signal handler:
>
> Recommend backtrace_symbols_fd ??
>
> Better yet use libunwind
libunwind is an optional dependency. You can see in the next patch,
back_trace_dump() will be implemented with libunwind based stack unwind,
if the dependency is met.
>
> > +static void
> > +siginfo_dump(int sig, siginfo_t *info)
> > +{
> > + oops_print("PID: %" PRIdMAX "\n", (intmax_t)getpid());
> > +
> > + if (info == NULL)
> > + return;
> > + if (sig != info->si_signo)
> > + oops_print("Invalid signal info\n");
> > +
> > + oops_print("Signal number: %d\n", info->si_signo);
> > + oops_print("Fault address: %p\n", info->si_addr);
> > +}
> > +
> > +static void
> > +mem32_dump(void *ptr)
>
> Should be const
Ack.
>
> > +{
> > + uint32_t *p = ptr;
> > + int i;
> > +
> > + for (i = 0; i < 16; i++)
> > + oops_print("%p: 0x%x\n", p + i, rte_be_to_cpu_32(p[i]));
> > +}
>
> Why reinvent hexdump?
Make sense. I can change to hexdump, But, it will use rte_log. Shouldn't we use
fprint(stderr,..) variant.
>
> > +
> > +static void
> > +stack_dump_header(void)
> > +{
> > + oops_print("Stack dump:\n");
> > + oops_print("----------\n");
> > +}
> > +
> > +static void
> > +code_dump_header(void)
> > +{
> > + oops_print("Code dump:\n");
> > + oops_print("----------\n");
> > +}
> > +
> > +static void
> > +stack_code_dump(void *stack, void *code)
> > +{
> > + if (stack == NULL || code == NULL)
> > + return;
> > +
> > + oops_print("\n");
> > + stack_dump_header();
> > + mem32_dump(stack);
> > + oops_print("\n");
> > +
> > + code_dump_header();
> > + mem32_dump(code);
> > + oops_print("\n");
> > +}
> > +static void
> > +archinfo_dump(ucontext_t *uc)
> > {
> > - RTE_SET_USED(sig);
> > - RTE_SET_USED(info);
> > RTE_SET_USED(uc);
> >
> > + stack_code_dump(NULL, NULL);
> > +}
> > +
> > +static void
> > +default_signal_handler_invoke(int sig)
> > +{
> > + unsigned int idx;
> > +
> > + for (idx = 0; idx < RTE_DIM(oops_signals); idx++) {
> > + /* Skip disabled signals */
> > + if (signals_db[idx].sig != sig)
> > + continue;
> > + if (!signals_db[idx].enabled)
> > + continue;
> > + /* Replace with stored handler */
> > + sigaction(sig, &signals_db[idx].sa, NULL);
> > + kill(getpid(), sig);
>
> If you use SA_RESETHAND, you don't need this stuff.
As mentioned in other 1/6 email reply, This is NOT the case where
SIG_DFL handler
called from eal oops handler, instead, it will be calling the signal
handler which
is registered prior to rte_eal_init() which is stored local database.
>
> > + }
> > +}
> > +
> > +void
> > +rte_oops_decode(int sig, siginfo_t *info, ucontext_t *uc)
> > +{
> > + oops_print("Signal info:\n");
> > + oops_print("------------\n");
> > + siginfo_dump(sig, info);
> > + oops_print("\n");
> > +
> > + oops_print("Backtrace:\n");
> > + oops_print("----------\n");
> > + back_trace_dump(uc);
> > + oops_print("\n");
> > +
> > + oops_print("Arch info:\n");
> > + oops_print("----------\n");
> > + if (uc)
> > + archinfo_dump(uc);
> > +}
> > +
> > +static void
> > +eal_oops_handler(int sig, siginfo_t *info, void *ctx)
> > +{
> > + ucontext_t *uc = ctx;
> > +
> > + rte_oops_decode(sig, info, uc);
> > + default_signal_handler_invoke(sig);
>
> If you use SA_RESETHAND, then just doing raise(sig) here.
> > }
> >
> > int
> > rte_oops_signals_enabled(int *signals)
>
> Why is this necessary and exported?
Explained in 1/6 email reply.
@@ -2,35 +2,197 @@
* Copyright(C) 2021 Marvell.
*/
+#include <inttypes.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <rte_byteorder.h>
+#include <rte_debug.h>
+#include <rte_log.h>
#include <rte_oops.h>
#include "eal_private.h"
-void
-rte_oops_decode(int sig, siginfo_t *info, ucontext_t *uc)
+#define oops_print(...) rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL, __VA_ARGS__)
+
+static int oops_signals[] = {SIGSEGV, SIGBUS, SIGILL, SIGABRT, SIGFPE, SIGSYS};
+
+struct oops_signal {
+ int sig;
+ bool enabled;
+ struct sigaction sa;
+};
+
+static struct oops_signal signals_db[RTE_DIM(oops_signals)];
+
+static void
+back_trace_dump(ucontext_t *context)
+{
+ RTE_SET_USED(context);
+
+ rte_dump_stack();
+}
+static void
+siginfo_dump(int sig, siginfo_t *info)
+{
+ oops_print("PID: %" PRIdMAX "\n", (intmax_t)getpid());
+
+ if (info == NULL)
+ return;
+ if (sig != info->si_signo)
+ oops_print("Invalid signal info\n");
+
+ oops_print("Signal number: %d\n", info->si_signo);
+ oops_print("Fault address: %p\n", info->si_addr);
+}
+
+static void
+mem32_dump(void *ptr)
+{
+ uint32_t *p = ptr;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ oops_print("%p: 0x%x\n", p + i, rte_be_to_cpu_32(p[i]));
+}
+
+static void
+stack_dump_header(void)
+{
+ oops_print("Stack dump:\n");
+ oops_print("----------\n");
+}
+
+static void
+code_dump_header(void)
+{
+ oops_print("Code dump:\n");
+ oops_print("----------\n");
+}
+
+static void
+stack_code_dump(void *stack, void *code)
+{
+ if (stack == NULL || code == NULL)
+ return;
+
+ oops_print("\n");
+ stack_dump_header();
+ mem32_dump(stack);
+ oops_print("\n");
+
+ code_dump_header();
+ mem32_dump(code);
+ oops_print("\n");
+}
+static void
+archinfo_dump(ucontext_t *uc)
{
- RTE_SET_USED(sig);
- RTE_SET_USED(info);
RTE_SET_USED(uc);
+ stack_code_dump(NULL, NULL);
+}
+
+static void
+default_signal_handler_invoke(int sig)
+{
+ unsigned int idx;
+
+ for (idx = 0; idx < RTE_DIM(oops_signals); idx++) {
+ /* Skip disabled signals */
+ if (signals_db[idx].sig != sig)
+ continue;
+ if (!signals_db[idx].enabled)
+ continue;
+ /* Replace with stored handler */
+ sigaction(sig, &signals_db[idx].sa, NULL);
+ kill(getpid(), sig);
+ }
+}
+
+void
+rte_oops_decode(int sig, siginfo_t *info, ucontext_t *uc)
+{
+ oops_print("Signal info:\n");
+ oops_print("------------\n");
+ siginfo_dump(sig, info);
+ oops_print("\n");
+
+ oops_print("Backtrace:\n");
+ oops_print("----------\n");
+ back_trace_dump(uc);
+ oops_print("\n");
+
+ oops_print("Arch info:\n");
+ oops_print("----------\n");
+ if (uc)
+ archinfo_dump(uc);
+}
+
+static void
+eal_oops_handler(int sig, siginfo_t *info, void *ctx)
+{
+ ucontext_t *uc = ctx;
+
+ rte_oops_decode(sig, info, uc);
+ default_signal_handler_invoke(sig);
}
int
rte_oops_signals_enabled(int *signals)
{
- RTE_SET_USED(signals);
+ int count = 0, sig[RTE_OOPS_SIGNALS_MAX];
+ unsigned int idx = 0;
- return 0;
+ for (idx = 0; idx < RTE_DIM(oops_signals); idx++) {
+ if (signals_db[idx].enabled) {
+ sig[count] = signals_db[idx].sig;
+ count++;
+ }
+ }
+ if (signals)
+ memcpy(signals, sig, sizeof(*signals) * count);
+
+ return count;
}
int
eal_oops_init(void)
{
- return 0;
+ unsigned int idx, rc = 0;
+ struct sigaction sa;
+
+ RTE_BUILD_BUG_ON(RTE_DIM(oops_signals) > RTE_OOPS_SIGNALS_MAX);
+
+ sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = &eal_oops_handler;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
+
+ for (idx = 0; idx < RTE_DIM(oops_signals); idx++) {
+ signals_db[idx].sig = oops_signals[idx];
+ /* Get exiting sigaction */
+ rc = sigaction(signals_db[idx].sig, NULL, &signals_db[idx].sa);
+ if (rc)
+ continue;
+ /* Replace with oops handler */
+ rc = sigaction(signals_db[idx].sig, &sa, NULL);
+ if (rc)
+ continue;
+ signals_db[idx].enabled = true;
+ }
+ return rc;
}
void
eal_oops_fini(void)
{
+ unsigned int idx;
+
+ for (idx = 0; idx < RTE_DIM(oops_signals); idx++) {
+ if (!signals_db[idx].enabled)
+ continue;
+ /* Replace with stored handler */
+ sigaction(signals_db[idx].sig, &signals_db[idx].sa, NULL);
+ }
}