[16/28] vhost: move postcopy live migration code

Message ID 1560957293-17294-17-git-send-email-ndragazis@arrikto.com (mailing list archive)
State RFC, archived
Delegated to: Maxime Coquelin
Headers
Series vhost: add virtio-vhost-user transport |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Nikos Dragazis June 19, 2019, 3:14 p.m. UTC
  Postcopy live migration is an AF_UNIX-bound feature due to the
userfaultfd mechanism. Therefore, this patch moves the relevant code from
vhost_user.c to trans_af_unix.c and exposes this functionality via
transport-specific functions. Any other vhost-user transport
could potentially implement this feature by implementing these
transport-specific functions.

Signed-off-by: Nikos Dragazis <ndragazis@arrikto.com>
---
 lib/librte_vhost/trans_af_unix.c | 94 ++++++++++++++++++++++++++++++++++++++--
 lib/librte_vhost/vhost.c         |  1 -
 lib/librte_vhost/vhost.h         | 41 ++++++++++++++++--
 lib/librte_vhost/vhost_user.c    | 61 ++------------------------
 4 files changed, 131 insertions(+), 66 deletions(-)
  

Patch

diff --git a/lib/librte_vhost/trans_af_unix.c b/lib/librte_vhost/trans_af_unix.c
index a451880..4ccf9a7 100644
--- a/lib/librte_vhost/trans_af_unix.c
+++ b/lib/librte_vhost/trans_af_unix.c
@@ -10,6 +10,7 @@ 
 #include <sys/un.h>
 #include <sys/types.h>
 #include <sys/ioctl.h>
+#include <sys/syscall.h>
 #ifdef RTE_LIBRTE_VHOST_POSTCOPY
 #include <linux/userfaultfd.h>
 #endif
@@ -39,6 +40,9 @@  struct vhost_user_connection {
 	int slave_req_fd;
 	rte_spinlock_t slave_req_lock;
 
+	int postcopy_ufd;
+	int postcopy_listening;
+
 	TAILQ_ENTRY(vhost_user_connection) next;
 };
 
@@ -261,6 +265,7 @@  vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	conn->slave_req_fd = -1;
 	conn->vsocket = vsocket;
 	rte_spinlock_init(&conn->slave_req_lock);
+	conn->postcopy_ufd = -1;
 
 	size = strnlen(vsocket->path, PATH_MAX);
 	vhost_set_ifname(dev->vid, vsocket->path, size);
@@ -772,6 +777,13 @@  af_unix_cleanup_device(struct virtio_net *dev, int destroy __rte_unused)
 		close(conn->slave_req_fd);
 		conn->slave_req_fd = -1;
 	}
+
+	if (conn->postcopy_ufd >= 0) {
+		close(conn->postcopy_ufd);
+		conn->postcopy_ufd = -1;
+	}
+
+	conn->postcopy_listening = 0;
 }
 
 static int
@@ -866,7 +878,7 @@  af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
 			alignment,
 			mmap_offset);
 
-		if (dev->postcopy_listening) {
+		if (conn->postcopy_listening) {
 			/*
 			 * We haven't a better way right now than sharing
 			 * DPDK's virtual address with Qemu, so that Qemu can
@@ -877,7 +889,7 @@  af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
 		}
 	}
 
-	if (dev->postcopy_listening) {
+	if (conn->postcopy_listening) {
 		/* Send the addresses back to qemu */
 		msg->fd_num = 0;
 		/* Send reply */
@@ -918,11 +930,11 @@  af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg)
 			reg_struct.range.len = reg->mmap_size;
 			reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 
-			if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
+			if (ioctl(conn->postcopy_ufd, UFFDIO_REGISTER,
 						&reg_struct)) {
 				RTE_LOG(ERR, VHOST_CONFIG,
 					"Failed to register ufd for region %d: (ufd = %d) %s\n",
-					i, dev->postcopy_ufd,
+					i, conn->postcopy_ufd,
 					strerror(errno));
 				return -1;
 			}
@@ -990,6 +1002,77 @@  af_unix_set_log_base(struct virtio_net *dev, const struct VhostUserMsg *msg)
 	return 0;
 }
 
+static int
+af_unix_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	struct vhost_user_connection *conn =
+		container_of(dev, struct vhost_user_connection, device);
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+	struct uffdio_api api_struct;
+
+	conn->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+
+	if (conn->postcopy_ufd == -1) {
+		RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
+			strerror(errno));
+		return RTE_VHOST_MSG_RESULT_ERR;
+	}
+	api_struct.api = UFFD_API;
+	api_struct.features = 0;
+	if (ioctl(conn->postcopy_ufd, UFFDIO_API, &api_struct)) {
+		RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
+			strerror(errno));
+		close(conn->postcopy_ufd);
+		conn->postcopy_ufd = -1;
+		return RTE_VHOST_MSG_RESULT_ERR;
+	}
+	msg->fds[0] = conn->postcopy_ufd;
+	msg->fd_num = 1;
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+#else
+	conn->postcopy_ufd = -1;
+	msg->fd_num = 0;
+
+	return RTE_VHOST_MSG_RESULT_ERR;
+#endif
+}
+
+static int
+af_unix_set_postcopy_listen(struct virtio_net *dev)
+{
+	struct vhost_user_connection *conn =
+		container_of(dev, struct vhost_user_connection, device);
+
+	if (dev->mem && dev->mem->nregions) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Regions already registered at postcopy-listen\n");
+		return RTE_VHOST_MSG_RESULT_ERR;
+	}
+	conn->postcopy_listening = 1;
+
+	return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+af_unix_set_postcopy_end(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	struct vhost_user_connection *conn =
+		container_of(dev, struct vhost_user_connection, device);
+
+	conn->postcopy_listening = 0;
+	if (conn->postcopy_ufd >= 0) {
+		close(conn->postcopy_ufd);
+		conn->postcopy_ufd = -1;
+	}
+
+	msg->payload.u64 = 0;
+	msg->size = sizeof(msg->payload.u64);
+	msg->fd_num = 0;
+
+	return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
 const struct vhost_transport_ops af_unix_trans_ops = {
 	.socket_size = sizeof(struct af_unix_socket),
 	.device_size = sizeof(struct vhost_user_connection),
@@ -1005,4 +1088,7 @@  const struct vhost_transport_ops af_unix_trans_ops = {
 	.map_mem_regions = af_unix_map_mem_regions,
 	.unmap_mem_regions = af_unix_unmap_mem_regions,
 	.set_log_base = af_unix_set_log_base,
+	.set_postcopy_advise = af_unix_set_postcopy_advise,
+	.set_postcopy_listen = af_unix_set_postcopy_listen,
+	.set_postcopy_end = af_unix_set_postcopy_end,
 };
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 5b16390..91a286d 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -512,7 +512,6 @@  vhost_new_device(const struct vhost_transport_ops *trans_ops)
 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
 	dev->trans_ops = trans_ops;
 	dev->vdpa_dev_id = -1;
-	dev->postcopy_ufd = -1;
 
 	return dev;
 }
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index b15d223..f5d6dc8 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -450,6 +450,44 @@  struct vhost_transport_ops {
 	 */
 	int (*set_log_base)(struct virtio_net *dev,
 			    const struct VhostUserMsg *msg);
+
+	/**
+	 * Register a userfault fd and send it to master.
+	 *
+	 * @param dev
+	 *  vhost device
+	 * @param msg
+	 *  message
+	 * @return
+	 *  RTE_VHOST_MSG_RESULT_REPLY on success,
+	 *  RTE_VHOST_MSG_RESULT_ERR on failure
+	 */
+	int (*set_postcopy_advise)(struct virtio_net *dev,
+				   struct VhostUserMsg *msg);
+
+	/**
+	 * Change live migration mode (entering postcopy mode).
+	 *
+	 * @param dev
+	 *  vhost device
+	 * @return
+	 *  RTE_VHOST_MSG_RESULT_OK on success,
+	 *  RTE_VHOST_MSG_RESULT_ERR on failure
+	 */
+	int (*set_postcopy_listen)(struct virtio_net *dev);
+
+	/**
+	 * Register completion of postcopy live migration.
+	 *
+	 * @param dev
+	 *  vhost device
+	 * @param msg
+	 *  message
+	 * @return
+	 *  RTE_VHOST_MSG_RESULT_REPLY
+	 */
+	int (*set_postcopy_end)(struct virtio_net *dev,
+				struct VhostUserMsg *msg);
 };
 
 /** The traditional AF_UNIX vhost-user protocol transport. */
@@ -492,9 +530,6 @@  struct virtio_net {
 	uint32_t		max_guest_pages;
 	struct guest_page       *guest_pages;
 
-	int			postcopy_ufd;
-	int			postcopy_listening;
-
 	/*
 	 * Device id to identify a specific backend device.
 	 * It's set to -1 for the default software implementation.
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index d3c9c5f..29c99e7 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -29,14 +29,10 @@ 
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <sys/syscall.h>
 #include <assert.h>
 #ifdef RTE_LIBRTE_VHOST_NUMA
 #include <numaif.h>
 #endif
-#ifdef RTE_LIBRTE_VHOST_POSTCOPY
-#include <linux/userfaultfd.h>
-#endif
 
 #include <rte_common.h>
 #include <rte_malloc.h>
@@ -136,13 +132,6 @@  vhost_backend_cleanup(struct virtio_net *dev)
 
 	free(dev->guest_pages);
 	dev->guest_pages = NULL;
-
-	if (dev->postcopy_ufd >= 0) {
-		close(dev->postcopy_ufd);
-		dev->postcopy_ufd = -1;
-	}
-
-	dev->postcopy_listening = 0;
 }
 
 /*
@@ -1471,35 +1460,8 @@  vhost_user_set_postcopy_advise(struct virtio_net **pdev,
 			struct VhostUserMsg *msg)
 {
 	struct virtio_net *dev = *pdev;
-#ifdef RTE_LIBRTE_VHOST_POSTCOPY
-	struct uffdio_api api_struct;
-
-	dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
-
-	if (dev->postcopy_ufd == -1) {
-		RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
-			strerror(errno));
-		return RTE_VHOST_MSG_RESULT_ERR;
-	}
-	api_struct.api = UFFD_API;
-	api_struct.features = 0;
-	if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
-		RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
-			strerror(errno));
-		close(dev->postcopy_ufd);
-		dev->postcopy_ufd = -1;
-		return RTE_VHOST_MSG_RESULT_ERR;
-	}
-	msg->fds[0] = dev->postcopy_ufd;
-	msg->fd_num = 1;
-
-	return RTE_VHOST_MSG_RESULT_REPLY;
-#else
-	dev->postcopy_ufd = -1;
-	msg->fd_num = 0;
 
-	return RTE_VHOST_MSG_RESULT_ERR;
-#endif
+	return dev->trans_ops->set_postcopy_advise(dev, msg);
 }
 
 static int
@@ -1508,14 +1470,7 @@  vhost_user_set_postcopy_listen(struct virtio_net **pdev,
 {
 	struct virtio_net *dev = *pdev;
 
-	if (dev->mem && dev->mem->nregions) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Regions already registered at postcopy-listen\n");
-		return RTE_VHOST_MSG_RESULT_ERR;
-	}
-	dev->postcopy_listening = 1;
-
-	return RTE_VHOST_MSG_RESULT_OK;
+	return dev->trans_ops->set_postcopy_listen(dev);
 }
 
 static int
@@ -1523,17 +1478,7 @@  vhost_user_postcopy_end(struct virtio_net **pdev, struct VhostUserMsg *msg)
 {
 	struct virtio_net *dev = *pdev;
 
-	dev->postcopy_listening = 0;
-	if (dev->postcopy_ufd >= 0) {
-		close(dev->postcopy_ufd);
-		dev->postcopy_ufd = -1;
-	}
-
-	msg->payload.u64 = 0;
-	msg->size = sizeof(msg->payload.u64);
-	msg->fd_num = 0;
-
-	return RTE_VHOST_MSG_RESULT_REPLY;
+	return dev->trans_ops->set_postcopy_end(dev, msg);
 }
 
 typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,