From patchwork Wed Jun 19 15:14:41 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nikos Dragazis X-Patchwork-Id: 54970 X-Patchwork-Delegate: maxime.coquelin@redhat.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id C537E1C407; Wed, 19 Jun 2019 17:16:29 +0200 (CEST) Received: from mx0.arrikto.com (mx0.arrikto.com [212.71.252.59]) by dpdk.org (Postfix) with ESMTP id 40E591C393 for ; Wed, 19 Jun 2019 17:15:43 +0200 (CEST) Received: from troi.prod.arr (mail.arr [10.99.0.5]) by mx0.arrikto.com (Postfix) with ESMTP id 129AE182014; Wed, 19 Jun 2019 18:15:43 +0300 (EEST) Received: from localhost.localdomain (unknown [10.89.50.133]) by troi.prod.arr (Postfix) with ESMTPSA id 5689F2B2; Wed, 19 Jun 2019 18:15:42 +0300 (EEST) From: Nikos Dragazis To: dev@dpdk.org Cc: Maxime Coquelin , Tiwei Bie , Zhihong Wang , Stefan Hajnoczi , Wei Wang , Stojaczyk Dariusz , Vangelis Koukis Date: Wed, 19 Jun 2019 18:14:41 +0300 Message-Id: <1560957293-17294-17-git-send-email-ndragazis@arrikto.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1560957293-17294-1-git-send-email-ndragazis@arrikto.com> References: <1560957293-17294-1-git-send-email-ndragazis@arrikto.com> Subject: [dpdk-dev] [PATCH 16/28] vhost: move postcopy live migration code X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Postcopy live migration is an AF_UNIX-bound feature due to the userfaultfd mechanism. Therefore, this patch moves the relevant code from vhost_user.c to trans_af_unix.c and exposes this functionality via transport-specific functions. Any other vhost-user transport could potentially implement this feature by implementing these transport-specific functions. Signed-off-by: Nikos Dragazis --- lib/librte_vhost/trans_af_unix.c | 94 ++++++++++++++++++++++++++++++++++++++-- lib/librte_vhost/vhost.c | 1 - lib/librte_vhost/vhost.h | 41 ++++++++++++++++-- lib/librte_vhost/vhost_user.c | 61 ++------------------------ 4 files changed, 131 insertions(+), 66 deletions(-) diff --git a/lib/librte_vhost/trans_af_unix.c b/lib/librte_vhost/trans_af_unix.c index a451880..4ccf9a7 100644 --- a/lib/librte_vhost/trans_af_unix.c +++ b/lib/librte_vhost/trans_af_unix.c @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef RTE_LIBRTE_VHOST_POSTCOPY #include #endif @@ -39,6 +40,9 @@ struct vhost_user_connection { int slave_req_fd; rte_spinlock_t slave_req_lock; + int postcopy_ufd; + int postcopy_listening; + TAILQ_ENTRY(vhost_user_connection) next; }; @@ -261,6 +265,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) conn->slave_req_fd = -1; conn->vsocket = vsocket; rte_spinlock_init(&conn->slave_req_lock); + conn->postcopy_ufd = -1; size = strnlen(vsocket->path, PATH_MAX); vhost_set_ifname(dev->vid, vsocket->path, size); @@ -772,6 +777,13 @@ af_unix_cleanup_device(struct virtio_net *dev, int destroy __rte_unused) close(conn->slave_req_fd); conn->slave_req_fd = -1; } + + if (conn->postcopy_ufd >= 0) { + close(conn->postcopy_ufd); + conn->postcopy_ufd = -1; + } + + conn->postcopy_listening = 0; } static int @@ -866,7 +878,7 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg) alignment, mmap_offset); - if (dev->postcopy_listening) { + if (conn->postcopy_listening) { /* * We haven't a better way right now than sharing * DPDK's virtual address with Qemu, so that Qemu can @@ -877,7 +889,7 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg) } } - if (dev->postcopy_listening) { + if (conn->postcopy_listening) { /* Send the addresses back to qemu */ msg->fd_num = 0; /* Send reply */ @@ -918,11 +930,11 @@ af_unix_map_mem_regions(struct virtio_net *dev, struct VhostUserMsg *msg) reg_struct.range.len = reg->mmap_size; reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; - if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, + if (ioctl(conn->postcopy_ufd, UFFDIO_REGISTER, ®_struct)) { RTE_LOG(ERR, VHOST_CONFIG, "Failed to register ufd for region %d: (ufd = %d) %s\n", - i, dev->postcopy_ufd, + i, conn->postcopy_ufd, strerror(errno)); return -1; } @@ -990,6 +1002,77 @@ af_unix_set_log_base(struct virtio_net *dev, const struct VhostUserMsg *msg) return 0; } +static int +af_unix_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg) +{ + struct vhost_user_connection *conn = + container_of(dev, struct vhost_user_connection, device); +#ifdef RTE_LIBRTE_VHOST_POSTCOPY + struct uffdio_api api_struct; + + conn->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + + if (conn->postcopy_ufd == -1) { + RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n", + strerror(errno)); + return RTE_VHOST_MSG_RESULT_ERR; + } + api_struct.api = UFFD_API; + api_struct.features = 0; + if (ioctl(conn->postcopy_ufd, UFFDIO_API, &api_struct)) { + RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n", + strerror(errno)); + close(conn->postcopy_ufd); + conn->postcopy_ufd = -1; + return RTE_VHOST_MSG_RESULT_ERR; + } + msg->fds[0] = conn->postcopy_ufd; + msg->fd_num = 1; + + return RTE_VHOST_MSG_RESULT_REPLY; +#else + conn->postcopy_ufd = -1; + msg->fd_num = 0; + + return RTE_VHOST_MSG_RESULT_ERR; +#endif +} + +static int +af_unix_set_postcopy_listen(struct virtio_net *dev) +{ + struct vhost_user_connection *conn = + container_of(dev, struct vhost_user_connection, device); + + if (dev->mem && dev->mem->nregions) { + RTE_LOG(ERR, VHOST_CONFIG, + "Regions already registered at postcopy-listen\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } + conn->postcopy_listening = 1; + + return RTE_VHOST_MSG_RESULT_OK; +} + +static int +af_unix_set_postcopy_end(struct virtio_net *dev, struct VhostUserMsg *msg) +{ + struct vhost_user_connection *conn = + container_of(dev, struct vhost_user_connection, device); + + conn->postcopy_listening = 0; + if (conn->postcopy_ufd >= 0) { + close(conn->postcopy_ufd); + conn->postcopy_ufd = -1; + } + + msg->payload.u64 = 0; + msg->size = sizeof(msg->payload.u64); + msg->fd_num = 0; + + return RTE_VHOST_MSG_RESULT_REPLY; +} + const struct vhost_transport_ops af_unix_trans_ops = { .socket_size = sizeof(struct af_unix_socket), .device_size = sizeof(struct vhost_user_connection), @@ -1005,4 +1088,7 @@ const struct vhost_transport_ops af_unix_trans_ops = { .map_mem_regions = af_unix_map_mem_regions, .unmap_mem_regions = af_unix_unmap_mem_regions, .set_log_base = af_unix_set_log_base, + .set_postcopy_advise = af_unix_set_postcopy_advise, + .set_postcopy_listen = af_unix_set_postcopy_listen, + .set_postcopy_end = af_unix_set_postcopy_end, }; diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 5b16390..91a286d 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -512,7 +512,6 @@ vhost_new_device(const struct vhost_transport_ops *trans_ops) dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET; dev->trans_ops = trans_ops; dev->vdpa_dev_id = -1; - dev->postcopy_ufd = -1; return dev; } diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index b15d223..f5d6dc8 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -450,6 +450,44 @@ struct vhost_transport_ops { */ int (*set_log_base)(struct virtio_net *dev, const struct VhostUserMsg *msg); + + /** + * Register a userfault fd and send it to master. + * + * @param dev + * vhost device + * @param msg + * message + * @return + * RTE_VHOST_MSG_RESULT_REPLY on success, + * RTE_VHOST_MSG_RESULT_ERR on failure + */ + int (*set_postcopy_advise)(struct virtio_net *dev, + struct VhostUserMsg *msg); + + /** + * Change live migration mode (entering postcopy mode). + * + * @param dev + * vhost device + * @return + * RTE_VHOST_MSG_RESULT_OK on success, + * RTE_VHOST_MSG_RESULT_ERR on failure + */ + int (*set_postcopy_listen)(struct virtio_net *dev); + + /** + * Register completion of postcopy live migration. + * + * @param dev + * vhost device + * @param msg + * message + * @return + * RTE_VHOST_MSG_RESULT_REPLY + */ + int (*set_postcopy_end)(struct virtio_net *dev, + struct VhostUserMsg *msg); }; /** The traditional AF_UNIX vhost-user protocol transport. */ @@ -492,9 +530,6 @@ struct virtio_net { uint32_t max_guest_pages; struct guest_page *guest_pages; - int postcopy_ufd; - int postcopy_listening; - /* * Device id to identify a specific backend device. * It's set to -1 for the default software implementation. diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index d3c9c5f..29c99e7 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -29,14 +29,10 @@ #include #include #include -#include #include #ifdef RTE_LIBRTE_VHOST_NUMA #include #endif -#ifdef RTE_LIBRTE_VHOST_POSTCOPY -#include -#endif #include #include @@ -136,13 +132,6 @@ vhost_backend_cleanup(struct virtio_net *dev) free(dev->guest_pages); dev->guest_pages = NULL; - - if (dev->postcopy_ufd >= 0) { - close(dev->postcopy_ufd); - dev->postcopy_ufd = -1; - } - - dev->postcopy_listening = 0; } /* @@ -1471,35 +1460,8 @@ vhost_user_set_postcopy_advise(struct virtio_net **pdev, struct VhostUserMsg *msg) { struct virtio_net *dev = *pdev; -#ifdef RTE_LIBRTE_VHOST_POSTCOPY - struct uffdio_api api_struct; - - dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - - if (dev->postcopy_ufd == -1) { - RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n", - strerror(errno)); - return RTE_VHOST_MSG_RESULT_ERR; - } - api_struct.api = UFFD_API; - api_struct.features = 0; - if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) { - RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n", - strerror(errno)); - close(dev->postcopy_ufd); - dev->postcopy_ufd = -1; - return RTE_VHOST_MSG_RESULT_ERR; - } - msg->fds[0] = dev->postcopy_ufd; - msg->fd_num = 1; - - return RTE_VHOST_MSG_RESULT_REPLY; -#else - dev->postcopy_ufd = -1; - msg->fd_num = 0; - return RTE_VHOST_MSG_RESULT_ERR; -#endif + return dev->trans_ops->set_postcopy_advise(dev, msg); } static int @@ -1508,14 +1470,7 @@ vhost_user_set_postcopy_listen(struct virtio_net **pdev, { struct virtio_net *dev = *pdev; - if (dev->mem && dev->mem->nregions) { - RTE_LOG(ERR, VHOST_CONFIG, - "Regions already registered at postcopy-listen\n"); - return RTE_VHOST_MSG_RESULT_ERR; - } - dev->postcopy_listening = 1; - - return RTE_VHOST_MSG_RESULT_OK; + return dev->trans_ops->set_postcopy_listen(dev); } static int @@ -1523,17 +1478,7 @@ vhost_user_postcopy_end(struct virtio_net **pdev, struct VhostUserMsg *msg) { struct virtio_net *dev = *pdev; - dev->postcopy_listening = 0; - if (dev->postcopy_ufd >= 0) { - close(dev->postcopy_ufd); - dev->postcopy_ufd = -1; - } - - msg->payload.u64 = 0; - msg->size = sizeof(msg->payload.u64); - msg->fd_num = 0; - - return RTE_VHOST_MSG_RESULT_REPLY; + return dev->trans_ops->set_postcopy_end(dev, msg); } typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,