From patchwork Wed Dec 10 21:37:51 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Huawei Xie X-Patchwork-Id: 1935 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 3E8A67F8C; Wed, 10 Dec 2014 22:39:05 +0100 (CET) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id A7C168051 for ; Wed, 10 Dec 2014 22:38:59 +0100 (CET) Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga102.jf.intel.com with ESMTP; 10 Dec 2014 13:37:09 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.04,691,1406617200"; d="scan'208";a="496881343" Received: from shvmail01.sh.intel.com ([10.239.29.42]) by orsmga003.jf.intel.com with ESMTP; 10 Dec 2014 13:34:44 -0800 Received: from shecgisg003.sh.intel.com (shecgisg003.sh.intel.com [10.239.29.90]) by shvmail01.sh.intel.com with ESMTP id sBALcU3v000884; Thu, 11 Dec 2014 05:38:30 +0800 Received: from shecgisg003.sh.intel.com (localhost [127.0.0.1]) by shecgisg003.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id sBALcRd5013996; Thu, 11 Dec 2014 05:38:29 +0800 Received: (from hxie5@localhost) by shecgisg003.sh.intel.com (8.13.6/8.13.6/Submit) id sBALcRir013992; Thu, 11 Dec 2014 05:38:27 +0800 From: Huawei Xie To: dev@dpdk.org Date: Thu, 11 Dec 2014 05:37:51 +0800 Message-Id: <1418247477-13920-7-git-send-email-huawei.xie@intel.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1418247477-13920-1-git-send-email-huawei.xie@intel.com> References: <1418247477-13920-1-git-send-email-huawei.xie@intel.com> Cc: haifeng.lin@intel.com Subject: [dpdk-dev] [PATCH RFC v2 06/12] lib/librte_vhost: cuse_set_memory_table X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" cuse_set_memory_table Signed-off-by: Huawei Xie --- lib/librte_vhost/Makefile | 2 +- lib/librte_vhost/vhost-net.h | 4 +- lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 7 +- lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 85 +++++++ lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 45 ++++ lib/librte_vhost/virtio-net.c | 306 +------------------------- 6 files changed, 144 insertions(+), 305 deletions(-) create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index 0b2f08f..e0d0ef6 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -37,7 +37,7 @@ LIB = librte_vhost.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse LDFLAGS += -lfuse # all source are stored in SRCS-y -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c virtio-net.c vhost_rxtx.c +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h index 03a5c57..f7e96fd 100644 --- a/lib/librte_vhost/vhost-net.h +++ b/lib/librte_vhost/vhost-net.h @@ -41,6 +41,8 @@ #include +#define VHOST_MEMORY_MAX_NREGIONS 8 + /* Macros for printing using RTE_LOG */ #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1 @@ -92,7 +94,7 @@ struct vhost_net_device_ops { int (*get_features)(struct vhost_device_ctx, uint64_t *); int (*set_features)(struct vhost_device_ctx, uint64_t *); - int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t); + int (*set_mem_table)(struct vhost_device_ctx, const struct virtio_memory_regions *, uint32_t nregions); int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *); int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *); diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c index 9424452..3422795 100644 --- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c +++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c @@ -46,6 +46,7 @@ #include #include +#include "virtio-net-cdev.h" #include "vhost-net.h" #include "eventfd_link/eventfd_link.h" @@ -60,7 +61,7 @@ static const char default_cdev[] = "vhost-net"; static const char eventfd_cdev[] = "/dev/eventfd-link"; static struct fuse_session *session; -static struct vhost_net_device_ops const *ops; +struct vhost_net_device_ops const *ops; /* * Returns vhost_device_ctx from given fuse_req_t. The index is populated later @@ -291,8 +292,8 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, break; default: - result = ops->set_mem_table(ctx, - in_buf, mem_temp.nregions); + result = cuse_set_mem_table(ctx, in_buf, + mem_temp.nregions); if (result) fuse_reply_err(req, EINVAL); else diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c index 58ac3dd..edcbc10 100644 --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c @@ -47,7 +47,11 @@ #include +#include "rte_virtio_net.h" #include "vhost-net.h" +#include "virtio-net-cdev.h" + +extern struct vhost_net_device_ops const *ops; /* Line size for reading maps file. */ static const uint32_t BUFSIZE = PATH_MAX; @@ -253,3 +257,84 @@ host_memory_map(pid_t pid, uint64_t addr, return 0; } + +int +cuse_set_mem_table(struct vhost_device_ctx ctx, + const struct vhost_memory *mem_regions_addr, uint32_t nregions) +{ + uint64_t size = offsetof(struct vhost_memory, regions); + uint32_t idx, valid_regions; + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS]; + struct vhost_memory_region *mem_regions = (void *)(uintptr_t) + ((uint64_t)(uintptr_t)mem_regions_addr + size); + uint64_t base_address = 0, mapped_address, mapped_size; + + for (idx = 0; idx < nregions; idx++) { + regions[idx].guest_phys_address = + mem_regions[idx].guest_phys_addr; + regions[idx].guest_phys_address_end = + regions[idx].guest_phys_address + + mem_regions[idx].memory_size; + regions[idx].memory_size = + mem_regions[idx].memory_size; + regions[idx].userspace_address = + mem_regions[idx].userspace_addr; + + LOG_DEBUG(VHOST_CONFIG, + "REGION: %u - GPA: %p - QVA: %p - SIZE (%"PRIu64")\n", + idx, + (void *)(uintptr_t)regions[idx].guest_phys_address, + (void *)(uintptr_t)regions[idx].userspace_address, + regions[idx].memory_size); + + /*set the base address mapping*/ + if (regions[idx].guest_phys_address == 0x0) { + base_address = + regions[idx].userspace_address; + /* Map VM memory file */ + if (host_memory_map(ctx.pid, base_address, + &mapped_address, &mapped_size) != 0) { + return -1; + } + } + } + + /* Check that we have a valid base address. */ + if (base_address == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to find base address of qemu memory file.\n"); + return -1; + } + + valid_regions = nregions; + for (idx = 0; idx < nregions; idx++) { + if ((regions[idx].userspace_address < base_address) || + (regions[idx].userspace_address > + (base_address + mapped_size))) + valid_regions--; + } + + if (valid_regions != nregions) { + valid_regions = 0; + for (idx = nregions; 0 != idx--; ) { + if ((regions[idx].userspace_address < base_address) || + (regions[idx].userspace_address > + (base_address + mapped_size))) { + memmove(®ions[idx], ®ions[idx + 1], + sizeof(struct virtio_memory_regions) * + valid_regions); + } else + valid_regions++; + } + } + + for (idx = 0; idx < valid_regions; idx++) { + regions[idx].address_offset = + mapped_address - base_address + + regions[idx].userspace_address - + regions[idx].guest_phys_address; + } + + ops->set_mem_table(ctx, ®ions[0], valid_regions); + return 0; +} diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h new file mode 100644 index 0000000..5ee81b1 --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h @@ -0,0 +1,45 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _VIRTIO_NET_CDEV_H +#define _VIRTIO_NET_CDEV_H + +#include +#include + +#include "vhost-net.h" + +int +cuse_set_mem_table(struct vhost_device_ctx ctx, + const struct vhost_memory *mem_regions_addr, uint32_t nregions); + +#endif diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c index d1b1aaa..f81e459 100644 --- a/lib/librte_vhost/virtio-net.c +++ b/lib/librte_vhost/virtio-net.c @@ -31,8 +31,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include -#include #include #include #include @@ -67,26 +65,6 @@ static struct virtio_net_config_ll *ll_root; (1ULL << VIRTIO_NET_F_CTRL_RX)) static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES; -/* Line size for reading maps file. */ -static const uint32_t BUFSIZE = PATH_MAX; - -/* Size of prot char array in procmap. */ -#define PROT_SZ 5 - -/* Number of elements in procmap struct. */ -#define PROCMAP_SZ 8 - -/* Structure containing information gathered from maps file. */ -struct procmap { - uint64_t va_start; /* Start virtual address in file. */ - uint64_t len; /* Size of file. */ - uint64_t pgoff; /* Not used. */ - uint32_t maj; /* Not used. */ - uint32_t min; /* Not used. */ - uint32_t ino; /* Not used. */ - char prot[PROT_SZ]; /* Not used. */ - char fname[PATH_MAX]; /* File name. */ -}; /* * Converts QEMU virtual address to Vhost virtual address. This function is @@ -113,191 +91,6 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va) return vhost_va; } -/* - * Locate the file containing QEMU's memory space and - * map it to our address space. - */ -static int -host_memory_map(struct virtio_net *dev, struct virtio_memory *mem, - pid_t pid, uint64_t addr) -{ - struct dirent *dptr = NULL; - struct procmap procmap; - DIR *dp = NULL; - int fd; - int i; - char memfile[PATH_MAX]; - char mapfile[PATH_MAX]; - char procdir[PATH_MAX]; - char resolved_path[PATH_MAX]; - char *path = NULL; - FILE *fmap; - void *map; - uint8_t found = 0; - char line[BUFSIZE]; - char dlm[] = "- : "; - char *str, *sp, *in[PROCMAP_SZ]; - char *end = NULL; - - /* Path where mem files are located. */ - snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid); - /* Maps file used to locate mem file. */ - snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid); - - fmap = fopen(mapfile, "r"); - if (fmap == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to open maps file for pid %d\n", - dev->device_fh, pid); - return -1; - } - - /* Read through maps file until we find out base_address. */ - while (fgets(line, BUFSIZE, fmap) != 0) { - str = line; - errno = 0; - /* Split line into fields. */ - for (i = 0; i < PROCMAP_SZ; i++) { - in[i] = strtok_r(str, &dlm[i], &sp); - if ((in[i] == NULL) || (errno != 0)) { - fclose(fmap); - return -1; - } - str = NULL; - } - - /* Convert/Copy each field as needed. */ - procmap.va_start = strtoull(in[0], &end, 16); - if ((in[0] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.len = strtoull(in[1], &end, 16); - if ((in[1] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.pgoff = strtoull(in[3], &end, 16); - if ((in[3] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.maj = strtoul(in[4], &end, 16); - if ((in[4] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.min = strtoul(in[5], &end, 16); - if ((in[5] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.ino = strtoul(in[6], &end, 16); - if ((in[6] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - memcpy(&procmap.prot, in[2], PROT_SZ); - memcpy(&procmap.fname, in[7], PATH_MAX); - - if (procmap.va_start == addr) { - procmap.len = procmap.len - procmap.va_start; - found = 1; - break; - } - } - fclose(fmap); - - if (!found) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to find memory file in pid %d maps file\n", - dev->device_fh, pid); - return -1; - } - - /* Find the guest memory file among the process fds. */ - dp = opendir(procdir); - if (dp == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Cannot open pid %d process directory\n", - dev->device_fh, pid); - return -1; - } - - found = 0; - - /* Read the fd directory contents. */ - while (NULL != (dptr = readdir(dp))) { - snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s", - pid, dptr->d_name); - path = realpath(memfile, resolved_path); - if ((path == NULL) && (strlen(resolved_path) == 0)) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to resolve fd directory\n", - dev->device_fh); - closedir(dp); - return -1; - } - if (strncmp(resolved_path, procmap.fname, - strnlen(procmap.fname, PATH_MAX)) == 0) { - found = 1; - break; - } - } - - closedir(dp); - - if (found == 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to find memory file for pid %d\n", - dev->device_fh, pid); - return -1; - } - /* Open the shared memory file and map the memory into this process. */ - fd = open(memfile, O_RDWR); - - if (fd == -1) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to open %s for pid %d\n", - dev->device_fh, memfile, pid); - return -1; - } - - map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE, - MAP_POPULATE|MAP_SHARED, fd, 0); - close(fd); - - if (map == MAP_FAILED) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Error mapping the file %s for pid %d\n", - dev->device_fh, memfile, pid); - return -1; - } - - /* Store the memory address and size in the device data structure */ - mem->mapped_address = (uint64_t)(uintptr_t)map; - mem->mapped_size = procmap.len; - - LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n", - dev->device_fh, - memfile, resolved_path, - (unsigned long long)mem->mapped_size, map); - - return 0; -} /* * Retrieves an entry from the devices configuration linked list. @@ -646,14 +439,12 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu) * This includes storing offsets used to translate buffer addresses. */ static int -set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr, - uint32_t nregions) +set_mem_table(struct vhost_device_ctx ctx, + const struct virtio_memory_regions *regions, uint32_t nregions) { struct virtio_net *dev; - struct vhost_memory_region *mem_regions; struct virtio_memory *mem; - uint64_t size = offsetof(struct vhost_memory, regions); - uint32_t regionidx, valid_regions; + uint32_t regionidx; dev = get_device(ctx); if (dev == NULL) @@ -677,104 +468,19 @@ set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr, mem->nregions = nregions; - mem_regions = (void *)(uintptr_t) - ((uint64_t)(uintptr_t)mem_regions_addr + size); - for (regionidx = 0; regionidx < mem->nregions; regionidx++) { /* Populate the region structure for each region. */ - mem->regions[regionidx].guest_phys_address = - mem_regions[regionidx].guest_phys_addr; - mem->regions[regionidx].guest_phys_address_end = - mem->regions[regionidx].guest_phys_address + - mem_regions[regionidx].memory_size; - mem->regions[regionidx].memory_size = - mem_regions[regionidx].memory_size; - mem->regions[regionidx].userspace_address = - mem_regions[regionidx].userspace_addr; - - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh, - regionidx, - (void *)(uintptr_t)mem->regions[regionidx].guest_phys_address, - (void *)(uintptr_t)mem->regions[regionidx].userspace_address, - mem->regions[regionidx].memory_size); - - /*set the base address mapping*/ + mem->regions[regionidx] = regions[regionidx]; if (mem->regions[regionidx].guest_phys_address == 0x0) { mem->base_address = mem->regions[regionidx].userspace_address; - /* Map VM memory file */ - if (host_memory_map(dev, mem, ctx.pid, - mem->base_address) != 0) { - free(mem); - return -1; - } + mem->mapped_address = + mem->regions[regionidx].address_offset; } } - /* Check that we have a valid base address. */ - if (mem->base_address == 0) { - RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base address of qemu memory file.\n", dev->device_fh); - free(mem); - return -1; - } - - /* - * Check if all of our regions have valid mappings. - * Usually one does not exist in the QEMU memory file. - */ - valid_regions = mem->nregions; - for (regionidx = 0; regionidx < mem->nregions; regionidx++) { - if ((mem->regions[regionidx].userspace_address < - mem->base_address) || - (mem->regions[regionidx].userspace_address > - (mem->base_address + mem->mapped_size))) - valid_regions--; - } - - /* - * If a region does not have a valid mapping, - * we rebuild our memory struct to contain only valid entries. - */ - if (valid_regions != mem->nregions) { - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\n", - dev->device_fh); - - /* - * Re-populate the memory structure with only valid regions. - * Invalid regions are over-written with memmove. - */ - valid_regions = 0; - - for (regionidx = mem->nregions; 0 != regionidx--;) { - if ((mem->regions[regionidx].userspace_address < - mem->base_address) || - (mem->regions[regionidx].userspace_address > - (mem->base_address + mem->mapped_size))) { - memmove(&mem->regions[regionidx], - &mem->regions[regionidx + 1], - sizeof(struct virtio_memory_regions) * - valid_regions); - } else { - valid_regions++; - } - } - } - mem->nregions = valid_regions; dev->mem = mem; - /* - * Calculate the address offset for each region. - * This offset is used to identify the vhost virtual address - * corresponding to a QEMU guest physical address. - */ - for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { - dev->mem->regions[regionidx].address_offset = - dev->mem->regions[regionidx].userspace_address - - dev->mem->base_address + - dev->mem->mapped_address - - dev->mem->regions[regionidx].guest_phys_address; - - } return 0; }