[v2] lib/eal: fix segfaults due to thread exit order
Checks
Commit Message
From: Zhichao Zeng <zhichaox.zeng@intel.com>
The eal-intr-thread is not closed before memory cleanup in the
process of exiting. There is a small probability that when the
eal-intr-thread is about to use some pointers, the memory were
just cleaned, which cause the segment fault error caught by ASan.
This patch close the eal-intr-thread before memory cleanup when
exiting to avoid segment fault.
Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
---
v2: add the same API for FreeBSD
---
lib/eal/common/eal_private.h | 7 +++++++
lib/eal/freebsd/eal.c | 1 +
lib/eal/freebsd/eal_interrupts.c | 12 ++++++++++++
lib/eal/linux/eal.c | 1 +
lib/eal/linux/eal_interrupts.c | 12 ++++++++++++
5 files changed, 33 insertions(+)
Comments
On Mon, May 23, 2022 at 5:17 AM <zhichaox.zeng@intel.com> wrote:
>
> From: Zhichao Zeng <zhichaox.zeng@intel.com>
>
> The eal-intr-thread is not closed before memory cleanup in the
> process of exiting. There is a small probability that when the
> eal-intr-thread is about to use some pointers, the memory were
> just cleaned, which cause the segment fault error caught by ASan.
>
> This patch close the eal-intr-thread before memory cleanup when
> exiting to avoid segment fault.
This breaks the debug_autotest unit test.
It results in a segfault in a forked process executing
rte_exit()/rte_eal_cleanup().
That's probably because intr_thread thread does not exist in the forked process.
On Mon, May 23, 2022 at 2:10 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> On Mon, May 23, 2022 at 5:17 AM <zhichaox.zeng@intel.com> wrote:
> >
> > From: Zhichao Zeng <zhichaox.zeng@intel.com>
> >
> > The eal-intr-thread is not closed before memory cleanup in the
> > process of exiting. There is a small probability that when the
> > eal-intr-thread is about to use some pointers, the memory were
> > just cleaned, which cause the segment fault error caught by ASan.
> >
> > This patch close the eal-intr-thread before memory cleanup when
> > exiting to avoid segment fault.
>
> This breaks the debug_autotest unit test.
> It results in a segfault in a forked process executing
> rte_exit()/rte_eal_cleanup().
>
> That's probably because intr_thread thread does not exist in the forked process.
Reading fork() manual:
* The child process is created with a single thread—the one
that called fork(). The entire virtual address space of the parent is
replicated in the child, including the states of mutexes, condi‐
tion variables, and other pthreads objects; the use of
pthread_atfork(3) may be helpful for dealing with problems that this
can cause.
We may need a check like diff below.
But then, debug_autotest code seems dangerous, because it does exactly
what the added check wants to warn about.
Opinions?
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 1ef263434a..1e6fd01d5d 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -857,12 +857,25 @@ is_iommu_enabled(void)
return n > 2;
}
+static uint32_t run_once;
+
+static void warn_parent(void)
+{
+ RTE_LOG(WARNING, EAL, "fork() was called, DPDK won't work in the child "
+ "process unless it calls rte_eal_init()\n");
+}
+
+static void scratch_child(void)
+{
+ /* Scratch run_once so that a call to rte_eal_cleanup won't crash... */
+ __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
{
int i, fctret, ret;
- static uint32_t run_once;
uint32_t has_run = 0;
const char *p;
static char logid[PATH_MAX];
@@ -1228,6 +1241,8 @@ rte_eal_init(int argc, char **argv)
eal_mcfg_complete();
+ pthread_atfork(NULL, warn_parent, scratch_child);
+
return fctret;
}
@@ -1257,6 +1272,9 @@ rte_eal_cleanup(void)
struct internal_config *internal_conf =
eal_get_internal_configuration();
+ if (__atomic_load_n(&run_once, __ATOMIC_RELAXED) == 0)
+ return 0;
+
if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
internal_conf->hugepage_file.unlink_existing)
rte_memseg_walk(mark_freeable, NULL);
@@ -152,6 +152,13 @@ int rte_eal_tailqs_init(void);
*/
int rte_eal_intr_init(void);
+/**
+ * Destroy interrupt handling thread.
+ *
+ * This function is private to EAL.
+ */
+void rte_eal_intr_destroy(void);
+
/**
* Close the default log stream
*
@@ -893,6 +893,7 @@ rte_eal_cleanup(void)
eal_get_internal_configuration();
rte_service_finalize();
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
/* after this point, any DPDK pointers will become dangling */
rte_eal_memory_detach();
rte_eal_alarm_cleanup();
@@ -648,6 +648,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close kqueue */
+ close(kq);
+ kq = -1;
+}
+
int
rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
int epfd, int op, unsigned int vec, void *data)
@@ -1266,6 +1266,7 @@ rte_eal_cleanup(void)
vfio_mp_sync_cleanup();
#endif
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
/* after this point, any DPDK pointers will become dangling */
rte_eal_memory_detach();
eal_mp_dev_hotplug_cleanup();
@@ -1199,6 +1199,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close the pipe used by epoll */
+ close(intr_pipe.writefd);
+ close(intr_pipe.readfd);
+}
+
static void
eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
{