[1/7] vhost: fix VDUSE device destruction failure
Checks
Commit Message
VDUSE_DESTROY_DEVICE ioctl can fail because the device's
chardev is not released despite close syscall having been
called. It happens because the events handler thread is
still polling the file descriptor.
fdset_pipe_notify() is not enough because it does not
ensure the notification has been handled by the event
thread, it just returns once the notification is sent.
To fix this, this patch introduces a synchronization
mechanism based on pthread's condition, so that
fdset_pipe_notify() only returns once the pipe's read
callback has been executed.
Fixes: 51d018fdac4e ("vhost: add VDUSE events handler")
Cc: stable@dpdk.org
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/vhost/fd_man.c | 21 ++++++++++++++++++---
lib/vhost/fd_man.h | 5 +++++
2 files changed, 23 insertions(+), 3 deletions(-)
Comments
Hey Maxime,
On Thu, Feb 29, 2024 at 1:25 PM Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
>
> VDUSE_DESTROY_DEVICE ioctl can fail because the device's
> chardev is not released despite close syscall having been
> called. It happens because the events handler thread is
> still polling the file descriptor.
>
> fdset_pipe_notify() is not enough because it does not
> ensure the notification has been handled by the event
> thread, it just returns once the notification is sent.
>
> To fix this, this patch introduces a synchronization
> mechanism based on pthread's condition, so that
> fdset_pipe_notify() only returns once the pipe's read
> callback has been executed.
>
> Fixes: 51d018fdac4e ("vhost: add VDUSE events handler")
This looks to be a generic issue in the fd_man code.
In practice, VDUSE only seems to be affected, so I am ok with this Fixes: tag.
> Cc: stable@dpdk.org
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/vhost/fd_man.c | 21 ++++++++++++++++++---
> lib/vhost/fd_man.h | 5 +++++
> 2 files changed, 23 insertions(+), 3 deletions(-)
>
> diff --git a/lib/vhost/fd_man.c b/lib/vhost/fd_man.c
> index 79a8d2c006..42ce059039 100644
> --- a/lib/vhost/fd_man.c
> +++ b/lib/vhost/fd_man.c
> @@ -309,10 +309,11 @@ fdset_event_dispatch(void *arg)
> }
>
> static void
> -fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
> +fdset_pipe_read_cb(int readfd, void *dat,
> int *remove __rte_unused)
> {
> char charbuf[16];
> + struct fdset *fdset = dat;
> int r = read(readfd, charbuf, sizeof(charbuf));
> /*
> * Just an optimization, we don't care if read() failed
> @@ -320,6 +321,11 @@ fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
> * compiler happy
> */
> RTE_SET_USED(r);
> +
> + pthread_mutex_lock(&fdset->sync_mutex);
> + fdset->sync = true;
> + pthread_cond_broadcast(&fdset->sync_cond);
> + pthread_mutex_unlock(&fdset->sync_mutex);
> }
>
> void
> @@ -342,7 +348,7 @@ fdset_pipe_init(struct fdset *fdset)
> }
>
> ret = fdset_add(fdset, fdset->u.readfd,
> - fdset_pipe_read_cb, NULL, NULL);
> + fdset_pipe_read_cb, NULL, fdset);
>
> if (ret < 0) {
> VHOST_FDMAN_LOG(ERR,
> @@ -359,7 +365,12 @@ fdset_pipe_init(struct fdset *fdset)
> void
> fdset_pipe_notify(struct fdset *fdset)
> {
> - int r = write(fdset->u.writefd, "1", 1);
> + int r;
> +
> + pthread_mutex_lock(&fdset->sync_mutex);
> +
> + fdset->sync = false;
> + r = write(fdset->u.writefd, "1", 1);
> /*
> * Just an optimization, we don't care if write() failed
> * so ignore explicitly its return value to make the
> @@ -367,4 +378,8 @@ fdset_pipe_notify(struct fdset *fdset)
> */
> RTE_SET_USED(r);
>
> + while (!fdset->sync)
> + pthread_cond_wait(&fdset->sync_cond, &fdset->sync_mutex);
> +
> + pthread_mutex_unlock(&fdset->sync_mutex);
> }
> diff --git a/lib/vhost/fd_man.h b/lib/vhost/fd_man.h
> index 6315904c8e..cc19937612 100644
> --- a/lib/vhost/fd_man.h
> +++ b/lib/vhost/fd_man.h
> @@ -6,6 +6,7 @@
> #define _FD_MAN_H_
> #include <pthread.h>
> #include <poll.h>
> +#include <stdbool.h>
>
> #define MAX_FDS 1024
>
> @@ -35,6 +36,10 @@ struct fdset {
> int writefd;
> };
> } u;
> +
> + pthread_mutex_t sync_mutex;
> + pthread_cond_t sync_cond;
> + bool sync;
We should explicitly initialise those in
https://git.dpdk.org/dpdk/tree/lib/vhost/socket.c#n91 and
https://git.dpdk.org/dpdk/tree/lib/vhost/vduse.c#n34.
The rest looks acceptable to me.
@@ -309,10 +309,11 @@ fdset_event_dispatch(void *arg)
}
static void
-fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
+fdset_pipe_read_cb(int readfd, void *dat,
int *remove __rte_unused)
{
char charbuf[16];
+ struct fdset *fdset = dat;
int r = read(readfd, charbuf, sizeof(charbuf));
/*
* Just an optimization, we don't care if read() failed
@@ -320,6 +321,11 @@ fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
* compiler happy
*/
RTE_SET_USED(r);
+
+ pthread_mutex_lock(&fdset->sync_mutex);
+ fdset->sync = true;
+ pthread_cond_broadcast(&fdset->sync_cond);
+ pthread_mutex_unlock(&fdset->sync_mutex);
}
void
@@ -342,7 +348,7 @@ fdset_pipe_init(struct fdset *fdset)
}
ret = fdset_add(fdset, fdset->u.readfd,
- fdset_pipe_read_cb, NULL, NULL);
+ fdset_pipe_read_cb, NULL, fdset);
if (ret < 0) {
VHOST_FDMAN_LOG(ERR,
@@ -359,7 +365,12 @@ fdset_pipe_init(struct fdset *fdset)
void
fdset_pipe_notify(struct fdset *fdset)
{
- int r = write(fdset->u.writefd, "1", 1);
+ int r;
+
+ pthread_mutex_lock(&fdset->sync_mutex);
+
+ fdset->sync = false;
+ r = write(fdset->u.writefd, "1", 1);
/*
* Just an optimization, we don't care if write() failed
* so ignore explicitly its return value to make the
@@ -367,4 +378,8 @@ fdset_pipe_notify(struct fdset *fdset)
*/
RTE_SET_USED(r);
+ while (!fdset->sync)
+ pthread_cond_wait(&fdset->sync_cond, &fdset->sync_mutex);
+
+ pthread_mutex_unlock(&fdset->sync_mutex);
}
@@ -6,6 +6,7 @@
#define _FD_MAN_H_
#include <pthread.h>
#include <poll.h>
+#include <stdbool.h>
#define MAX_FDS 1024
@@ -35,6 +36,10 @@ struct fdset {
int writefd;
};
} u;
+
+ pthread_mutex_t sync_mutex;
+ pthread_cond_t sync_cond;
+ bool sync;
};