[1/7] vhost: fix VDUSE device destruction failure

Message ID 20240229122502.2572343-2-maxime.coquelin@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series vhost: FD manager improvements |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Maxime Coquelin Feb. 29, 2024, 12:24 p.m. UTC
  VDUSE_DESTROY_DEVICE ioctl can fail because the device's
chardev is not released despite close syscall having been
called. It happens because the events handler thread is
still polling the file descriptor.

fdset_pipe_notify() is not enough because it does not
ensure the notification has been handled by the event
thread, it just returns once the notification is sent.

To fix this, this patch introduces a synchronization
mechanism based on pthread's condition, so that
fdset_pipe_notify() only returns once the pipe's read
callback has been executed.

Fixes: 51d018fdac4e ("vhost: add VDUSE events handler")
Cc: stable@dpdk.org

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/vhost/fd_man.c | 21 ++++++++++++++++++---
 lib/vhost/fd_man.h |  5 +++++
 2 files changed, 23 insertions(+), 3 deletions(-)
  

Comments

David Marchand Feb. 29, 2024, 1:31 p.m. UTC | #1
Hey Maxime,

On Thu, Feb 29, 2024 at 1:25 PM Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
>
> VDUSE_DESTROY_DEVICE ioctl can fail because the device's
> chardev is not released despite close syscall having been
> called. It happens because the events handler thread is
> still polling the file descriptor.
>
> fdset_pipe_notify() is not enough because it does not
> ensure the notification has been handled by the event
> thread, it just returns once the notification is sent.
>
> To fix this, this patch introduces a synchronization
> mechanism based on pthread's condition, so that
> fdset_pipe_notify() only returns once the pipe's read
> callback has been executed.
>
> Fixes: 51d018fdac4e ("vhost: add VDUSE events handler")

This looks to be a generic issue in the fd_man code.
In practice, VDUSE only seems to be affected, so I am ok with this Fixes: tag.


> Cc: stable@dpdk.org
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/vhost/fd_man.c | 21 ++++++++++++++++++---
>  lib/vhost/fd_man.h |  5 +++++
>  2 files changed, 23 insertions(+), 3 deletions(-)
>
> diff --git a/lib/vhost/fd_man.c b/lib/vhost/fd_man.c
> index 79a8d2c006..42ce059039 100644
> --- a/lib/vhost/fd_man.c
> +++ b/lib/vhost/fd_man.c
> @@ -309,10 +309,11 @@ fdset_event_dispatch(void *arg)
>  }
>
>  static void
> -fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
> +fdset_pipe_read_cb(int readfd, void *dat,
>                    int *remove __rte_unused)
>  {
>         char charbuf[16];
> +       struct fdset *fdset = dat;
>         int r = read(readfd, charbuf, sizeof(charbuf));
>         /*
>          * Just an optimization, we don't care if read() failed
> @@ -320,6 +321,11 @@ fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
>          * compiler happy
>          */
>         RTE_SET_USED(r);
> +
> +       pthread_mutex_lock(&fdset->sync_mutex);
> +       fdset->sync = true;
> +       pthread_cond_broadcast(&fdset->sync_cond);
> +       pthread_mutex_unlock(&fdset->sync_mutex);
>  }
>
>  void
> @@ -342,7 +348,7 @@ fdset_pipe_init(struct fdset *fdset)
>         }
>
>         ret = fdset_add(fdset, fdset->u.readfd,
> -                       fdset_pipe_read_cb, NULL, NULL);
> +                       fdset_pipe_read_cb, NULL, fdset);
>
>         if (ret < 0) {
>                 VHOST_FDMAN_LOG(ERR,
> @@ -359,7 +365,12 @@ fdset_pipe_init(struct fdset *fdset)
>  void
>  fdset_pipe_notify(struct fdset *fdset)
>  {
> -       int r = write(fdset->u.writefd, "1", 1);
> +       int r;
> +
> +       pthread_mutex_lock(&fdset->sync_mutex);
> +
> +       fdset->sync = false;
> +       r = write(fdset->u.writefd, "1", 1);
>         /*
>          * Just an optimization, we don't care if write() failed
>          * so ignore explicitly its return value to make the
> @@ -367,4 +378,8 @@ fdset_pipe_notify(struct fdset *fdset)
>          */
>         RTE_SET_USED(r);
>
> +       while (!fdset->sync)
> +               pthread_cond_wait(&fdset->sync_cond, &fdset->sync_mutex);
> +
> +       pthread_mutex_unlock(&fdset->sync_mutex);
>  }
> diff --git a/lib/vhost/fd_man.h b/lib/vhost/fd_man.h
> index 6315904c8e..cc19937612 100644
> --- a/lib/vhost/fd_man.h
> +++ b/lib/vhost/fd_man.h
> @@ -6,6 +6,7 @@
>  #define _FD_MAN_H_
>  #include <pthread.h>
>  #include <poll.h>
> +#include <stdbool.h>
>
>  #define MAX_FDS 1024
>
> @@ -35,6 +36,10 @@ struct fdset {
>                         int writefd;
>                 };
>         } u;
> +
> +       pthread_mutex_t sync_mutex;
> +       pthread_cond_t sync_cond;
> +       bool sync;

We should explicitly initialise those in
https://git.dpdk.org/dpdk/tree/lib/vhost/socket.c#n91 and
https://git.dpdk.org/dpdk/tree/lib/vhost/vduse.c#n34.
The rest looks acceptable to me.
  

Patch

diff --git a/lib/vhost/fd_man.c b/lib/vhost/fd_man.c
index 79a8d2c006..42ce059039 100644
--- a/lib/vhost/fd_man.c
+++ b/lib/vhost/fd_man.c
@@ -309,10 +309,11 @@  fdset_event_dispatch(void *arg)
 }
 
 static void
-fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
+fdset_pipe_read_cb(int readfd, void *dat,
 		   int *remove __rte_unused)
 {
 	char charbuf[16];
+	struct fdset *fdset = dat;
 	int r = read(readfd, charbuf, sizeof(charbuf));
 	/*
 	 * Just an optimization, we don't care if read() failed
@@ -320,6 +321,11 @@  fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
 	 * compiler happy
 	 */
 	RTE_SET_USED(r);
+
+	pthread_mutex_lock(&fdset->sync_mutex);
+	fdset->sync = true;
+	pthread_cond_broadcast(&fdset->sync_cond);
+	pthread_mutex_unlock(&fdset->sync_mutex);
 }
 
 void
@@ -342,7 +348,7 @@  fdset_pipe_init(struct fdset *fdset)
 	}
 
 	ret = fdset_add(fdset, fdset->u.readfd,
-			fdset_pipe_read_cb, NULL, NULL);
+			fdset_pipe_read_cb, NULL, fdset);
 
 	if (ret < 0) {
 		VHOST_FDMAN_LOG(ERR,
@@ -359,7 +365,12 @@  fdset_pipe_init(struct fdset *fdset)
 void
 fdset_pipe_notify(struct fdset *fdset)
 {
-	int r = write(fdset->u.writefd, "1", 1);
+	int r;
+
+	pthread_mutex_lock(&fdset->sync_mutex);
+
+	fdset->sync = false;
+	r = write(fdset->u.writefd, "1", 1);
 	/*
 	 * Just an optimization, we don't care if write() failed
 	 * so ignore explicitly its return value to make the
@@ -367,4 +378,8 @@  fdset_pipe_notify(struct fdset *fdset)
 	 */
 	RTE_SET_USED(r);
 
+	while (!fdset->sync)
+		pthread_cond_wait(&fdset->sync_cond, &fdset->sync_mutex);
+
+	pthread_mutex_unlock(&fdset->sync_mutex);
 }
diff --git a/lib/vhost/fd_man.h b/lib/vhost/fd_man.h
index 6315904c8e..cc19937612 100644
--- a/lib/vhost/fd_man.h
+++ b/lib/vhost/fd_man.h
@@ -6,6 +6,7 @@ 
 #define _FD_MAN_H_
 #include <pthread.h>
 #include <poll.h>
+#include <stdbool.h>
 
 #define MAX_FDS 1024
 
@@ -35,6 +36,10 @@  struct fdset {
 			int writefd;
 		};
 	} u;
+
+	pthread_mutex_t sync_mutex;
+	pthread_cond_t sync_cond;
+	bool sync;
 };