get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/1284/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 1284,
    "url": "https://patches.dpdk.org/api/patches/1284/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/1416014087-22499-1-git-send-email-huawei.xie@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1416014087-22499-1-git-send-email-huawei.xie@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1416014087-22499-1-git-send-email-huawei.xie@intel.com",
    "date": "2014-11-15T01:14:47",
    "name": "[dpdk-dev,RFC] lib/librte_vhost: vhost-user",
    "commit_ref": null,
    "pull_url": null,
    "state": "rfc",
    "archived": true,
    "hash": "9d6a9e12d850adad000f011b9aad054287416e45",
    "submitter": {
        "id": 16,
        "url": "https://patches.dpdk.org/api/people/16/?format=api",
        "name": "Huawei Xie",
        "email": "huawei.xie@intel.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/1416014087-22499-1-git-send-email-huawei.xie@intel.com/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/1284/comments/",
    "check": "pending",
    "checks": "https://patches.dpdk.org/api/patches/1284/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id 9ADF168AF;\n\tSat, 15 Nov 2014 02:05:45 +0100 (CET)",
            "from mga01.intel.com (mga01.intel.com [192.55.52.88])\n\tby dpdk.org (Postfix) with ESMTP id AA9696891\n\tfor <dev@dpdk.org>; Sat, 15 Nov 2014 02:05:17 +0100 (CET)",
            "from fmsmga002.fm.intel.com ([10.253.24.26])\n\tby fmsmga101.fm.intel.com with ESMTP; 14 Nov 2014 17:15:14 -0800",
            "from shvmail01.sh.intel.com ([10.239.29.42])\n\tby fmsmga002.fm.intel.com with ESMTP; 14 Nov 2014 17:14:55 -0800",
            "from shecgisg003.sh.intel.com (shecgisg003.sh.intel.com\n\t[10.239.29.90])\n\tby shvmail01.sh.intel.com with ESMTP id sAF1ErTL020571;\n\tSat, 15 Nov 2014 09:14:53 +0800",
            "from shecgisg003.sh.intel.com (localhost [127.0.0.1])\n\tby shecgisg003.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP\n\tid sAF1EorT022533; Sat, 15 Nov 2014 09:14:52 +0800",
            "(from hxie5@localhost)\n\tby shecgisg003.sh.intel.com (8.13.6/8.13.6/Submit) id sAF1EmkP022529; \n\tSat, 15 Nov 2014 09:14:48 +0800"
        ],
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.07,388,1413270000\"; d=\"scan'208\";a=\"632269585\"",
        "From": "Huawei Xie <huawei.xie@intel.com>",
        "To": "dev@dpdk.org",
        "Date": "Sat, 15 Nov 2014 09:14:47 +0800",
        "Message-Id": "<1416014087-22499-1-git-send-email-huawei.xie@intel.com>",
        "X-Mailer": "git-send-email 1.7.4.1",
        "Subject": "[dpdk-dev] [PATCH RFC] lib/librte_vhost: vhost-user",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "implement socket server\nfd event dispatch mechanism\nvhost sock  message handling\nmemory map for each region\nVHOST_USER_SET_VRING_KICK_FD as the indicator that vring is available\nVHOST_USER_GET_VRING_BASE as the message that vring should be released\n  \nThe message flow between vhost-user and vhost-cuse is kindof different,\nwhich makes virtio-net common message handler layer difficult and complicated to handle\nboth cases in new_device/destroy_device/memory map/resource cleanup.\n\nWill only leave the most common messag handling in virtio-net, and move the\ncontrol logic to cuse/fuse layer.  \n\n\nSigned-off-by: Huawei Xie <huawei.xie@intel.com>\n---\n lib/librte_vhost/Makefile                     |  14 +-\n lib/librte_vhost/eventfd_link/eventfd_link.c  |  27 +-\n lib/librte_vhost/eventfd_link/eventfd_link.h  |  48 +-\n lib/librte_vhost/libvirt/qemu-wrap.py         | 367 ---------------\n lib/librte_vhost/rte_virtio_net.h             | 106 ++---\n lib/librte_vhost/vhost-cuse/vhost-net-cdev.c  | 436 ++++++++++++++++++\n lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 314 +++++++++++++\n lib/librte_vhost/vhost-cuse/virtio-net-cdev.h |  43 ++\n lib/librte_vhost/vhost-net-cdev.c             | 389 ----------------\n lib/librte_vhost/vhost-net-cdev.h             | 113 -----\n lib/librte_vhost/vhost-user/fd_man.c          | 158 +++++++\n lib/librte_vhost/vhost-user/fd_man.h          |  31 ++\n lib/librte_vhost/vhost-user/vhost-net-user.c  | 417 +++++++++++++++++\n lib/librte_vhost/vhost-user/vhost-net-user.h  |  74 +++\n lib/librte_vhost/vhost-user/virtio-net-user.c | 208 +++++++++\n lib/librte_vhost/vhost-user/virtio-net-user.h |  11 +\n lib/librte_vhost/vhost_rxtx.c                 | 625 ++++----------------------\n lib/librte_vhost/virtio-net.c                 | 450 ++++---------------\n 18 files changed, 1939 insertions(+), 1892 deletions(-)\n delete mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py\n create mode 100644 lib/librte_vhost/vhost-cuse/vhost-net-cdev.c\n create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.c\n create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.h\n delete mode 100644 lib/librte_vhost/vhost-net-cdev.c\n delete mode 100644 lib/librte_vhost/vhost-net-cdev.h\n create mode 100644 lib/librte_vhost/vhost-user/fd_man.c\n create mode 100644 lib/librte_vhost/vhost-user/fd_man.h\n create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.c\n create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.h\n create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.c\n create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.h",
    "diff": "diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile\nindex c008d64..cb4e172 100644\n--- a/lib/librte_vhost/Makefile\n+++ b/lib/librte_vhost/Makefile\n@@ -34,17 +34,19 @@ include $(RTE_SDK)/mk/rte.vars.mk\n # library name\n LIB = librte_vhost.a\n \n-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse\n+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I. -I vhost-user -I vhost-cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse\n LDFLAGS += -lfuse\n # all source are stored in SRCS-y\n-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c\n+#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c vhost-cuse/virtio-net-cdev.c\n+\n+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-user/fd_man.c vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c\n+\n+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c\n \n # install includes\n SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h\n \n-# dependencies\n-DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal\n-DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether\n-DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf\n+# this lib needs eal\n+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf\n \n include $(RTE_SDK)/mk/rte.lib.mk\ndiff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c\nindex 7755dd6..4c9b628 100644\n--- a/lib/librte_vhost/eventfd_link/eventfd_link.c\n+++ b/lib/librte_vhost/eventfd_link/eventfd_link.c\n@@ -13,8 +13,7 @@\n  *   General Public License for more details.\n  *\n  *   You should have received a copy of the GNU General Public License\n- *   along with this program; if not, write to the Free Software\n- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.\n+ *   along with this program; If not, see <http://www.gnu.org/licenses/>.\n  *   The full GNU General Public License is included in this distribution\n  *   in the file called LICENSE.GPL.\n  *\n@@ -78,8 +77,7 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)\n \n \tswitch (ioctl) {\n \tcase EVENTFD_COPY:\n-\t\tif (copy_from_user(&eventfd_copy, argp,\n-\t\t\tsizeof(struct eventfd_copy)))\n+\t\tif (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))\n \t\t\treturn -EFAULT;\n \n \t\t/*\n@@ -88,28 +86,28 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)\n \t\ttask_target =\n \t\t\tpid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);\n \t\tif (task_target == NULL) {\n-\t\t\tpr_debug(\"Failed to get mem ctx for target pid\\n\");\n+\t\t\tprintk(KERN_DEBUG \"Failed to get mem ctx for target pid\\n\");\n \t\t\treturn -EFAULT;\n \t\t}\n \n \t\tfiles = get_files_struct(current);\n \t\tif (files == NULL) {\n-\t\t\tpr_debug(\"Failed to get files struct\\n\");\n+\t\t\tprintk(KERN_DEBUG \"Failed to get files struct\\n\");\n \t\t\treturn -EFAULT;\n \t\t}\n \n \t\trcu_read_lock();\n \t\tfile = fcheck_files(files, eventfd_copy.source_fd);\n \t\tif (file) {\n-\t\t\tif (file->f_mode & FMODE_PATH ||\n-\t\t\t\t!atomic_long_inc_not_zero(&file->f_count))\n+\t\t\tif (file->f_mode & FMODE_PATH\n+\t\t\t\t|| !atomic_long_inc_not_zero(&file->f_count))\n \t\t\t\tfile = NULL;\n \t\t}\n \t\trcu_read_unlock();\n \t\tput_files_struct(files);\n \n \t\tif (file == NULL) {\n-\t\t\tpr_debug(\"Failed to get file from source pid\\n\");\n+\t\t\tprintk(KERN_DEBUG \"Failed to get file from source pid\\n\");\n \t\t\treturn 0;\n \t\t}\n \n@@ -128,25 +126,26 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)\n \n \t\tfiles = get_files_struct(task_target);\n \t\tif (files == NULL) {\n-\t\t\tpr_debug(\"Failed to get files struct\\n\");\n+\t\t\tprintk(KERN_DEBUG \"Failed to get files struct\\n\");\n \t\t\treturn -EFAULT;\n \t\t}\n \n \t\trcu_read_lock();\n \t\tfile = fcheck_files(files, eventfd_copy.target_fd);\n \t\tif (file) {\n-\t\t\tif (file->f_mode & FMODE_PATH ||\n-\t\t\t\t!atomic_long_inc_not_zero(&file->f_count))\n-\t\t\t\t\tfile = NULL;\n+\t\t\tif (file->f_mode & FMODE_PATH\n+\t\t\t\t|| !atomic_long_inc_not_zero(&file->f_count))\n+\t\t\t\tfile = NULL;\n \t\t}\n \t\trcu_read_unlock();\n \t\tput_files_struct(files);\n \n \t\tif (file == NULL) {\n-\t\t\tpr_debug(\"Failed to get file from target pid\\n\");\n+\t\t\tprintk(KERN_DEBUG \"Failed to get file from target pid\\n\");\n \t\t\treturn 0;\n \t\t}\n \n+\n \t\t/*\n \t\t * Install the file struct from the target process into the\n \t\t * file desciptor of the source process,\ndiff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h\nindex ea619ec..38052e2 100644\n--- a/lib/librte_vhost/eventfd_link/eventfd_link.h\n+++ b/lib/librte_vhost/eventfd_link/eventfd_link.h\n@@ -1,7 +1,4 @@\n /*-\n- *  This file is provided under a dual BSD/GPLv2 license.  When using or\n- *  redistributing this file, you may do so under either license.\n- *\n  * GPL LICENSE SUMMARY\n  *\n  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n@@ -16,61 +13,28 @@\n  *   General Public License for more details.\n  *\n  *   You should have received a copy of the GNU General Public License\n- *   along with this program; if not, write to the Free Software\n- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.\n+ *   along with this program; If not, see <http://www.gnu.org/licenses/>.\n  *   The full GNU General Public License is included in this distribution\n  *   in the file called LICENSE.GPL.\n  *\n  *   Contact Information:\n  *   Intel Corporation\n- *\n- * BSD LICENSE\n- *\n- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n- *   All rights reserved.\n- *\n- *   Redistribution and use in source and binary forms, with or without\n- *   modification, are permitted provided that the following conditions\n- *   are met:\n- *\n- *   Redistributions of source code must retain the above copyright\n- *   notice, this list of conditions and the following disclaimer.\n- *   Redistributions in binary form must reproduce the above copyright\n- *   notice, this list of conditions and the following disclaimer in\n- *   the documentation and/or other materials provided with the\n- *   distribution.\n- *   Neither the name of Intel Corporation nor the names of its\n- *   contributors may be used to endorse or promote products derived\n- *   from this software without specific prior written permission.\n- *\n- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n- *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n- *\n  */\n \n #ifndef _EVENTFD_LINK_H_\n #define _EVENTFD_LINK_H_\n \n /*\n- * ioctl to copy an fd entry in calling process to an fd in a target process\n+ *\tioctl to copy an fd entry in calling process to an fd in a target process\n  */\n #define EVENTFD_COPY 1\n \n /*\n- * arguements for the EVENTFD_COPY ioctl\n+ *\targuements for the EVENTFD_COPY ioctl\n  */\n struct eventfd_copy {\n-\tunsigned target_fd; /* fd in the target pid */\n-\tunsigned source_fd; /* fd in the calling pid */\n-\tpid_t target_pid; /* pid of the target pid */\n+\tunsigned target_fd; /**< fd in the target pid */\n+\tunsigned source_fd; /**< fd in the calling pid */\n+\tpid_t target_pid;   /**< pid of the target pid */\n };\n #endif /* _EVENTFD_LINK_H_ */\ndiff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py\ndeleted file mode 100755\nindex e2d68a0..0000000\n--- a/lib/librte_vhost/libvirt/qemu-wrap.py\n+++ /dev/null\n@@ -1,367 +0,0 @@\n-#!/usr/bin/python\n-#/*\n-# *   BSD LICENSE\n-# *\n-# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n-# *   All rights reserved.\n-# *\n-# *   Redistribution and use in source and binary forms, with or without\n-# *   modification, are permitted provided that the following conditions\n-# *   are met:\n-# *\n-# *     * Redistributions of source code must retain the above copyright\n-# *       notice, this list of conditions and the following disclaimer.\n-# *     * Redistributions in binary form must reproduce the above copyright\n-# *       notice, this list of conditions and the following disclaimer in\n-# *       the documentation and/or other materials provided with the\n-# *       distribution.\n-# *     * Neither the name of Intel Corporation nor the names of its\n-# *       contributors may be used to endorse or promote products derived\n-# *       from this software without specific prior written permission.\n-# *\n-# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n-# *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n-# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n-# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n-# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n-# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n-# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n-# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n-# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n-# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n-# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n-# */\n-\n-#####################################################################\n-# This script is designed to modify the call to the QEMU emulator\n-# to support userspace vhost when starting a guest machine through\n-# libvirt with vhost enabled. The steps to enable this are as follows\n-# and should be run as root:\n-#\n-# 1. Place this script in a libvirtd's binary search PATH ($PATH)\n-#    A good location would be in the same directory that the QEMU\n-#    binary is located\n-#\n-# 2. Ensure that the script has the same owner/group and file\n-#    permissions as the QEMU binary\n-#\n-# 3. Update the VM xml file using \"virsh edit VM.xml\"\n-#\n-#    3.a) Set the VM to use the launch script\n-#\n-#    \tSet the emulator path contained in the\n-#\t\t<emulator><emulator/> tags\n-#\n-#    \te.g replace <emulator>/usr/bin/qemu-kvm<emulator/>\n-#        with    <emulator>/usr/bin/qemu-wrap.py<emulator/>\n-#\n-#\t 3.b) Set the VM's device's to use vhost-net offload\n-#\n-#\t\t<interface type=\"network\">\n-#       \t<model type=\"virtio\"/>\n-#       \t<driver name=\"vhost\"/>\n-#\t\t<interface/>\n-#\n-# 4. Enable libvirt to access our userpace device file by adding it to\n-#    controllers cgroup for libvirtd using the following steps\n-#\n-#   4.a) In /etc/libvirt/qemu.conf add/edit the following lines:\n-#         1) cgroup_controllers = [ ... \"devices\", ... ]\n-#\t\t  2) clear_emulator_capabilities = 0\n-#         3) user = \"root\"\n-#         4) group = \"root\"\n-#         5) cgroup_device_acl = [\n-#                \"/dev/null\", \"/dev/full\", \"/dev/zero\",\n-#                \"/dev/random\", \"/dev/urandom\",\n-#                \"/dev/ptmx\", \"/dev/kvm\", \"/dev/kqemu\",\n-#                \"/dev/rtc\", \"/dev/hpet\", \"/dev/net/tun\",\n-#                \"/dev/<devbase-name>-<index>\",\n-#            ]\n-#\n-#   4.b) Disable SELinux or set to permissive mode\n-#\n-#   4.c) Mount cgroup device controller\n-#        \"mkdir /dev/cgroup\"\n-#        \"mount -t cgroup none /dev/cgroup -o devices\"\n-#\n-#   4.d) Set hugetlbfs_mount variable - ( Optional )\n-#        VMs using userspace vhost must use hugepage backed\n-#        memory. This can be enabled in the libvirt XML\n-#        config by adding a memory backing section to the\n-#        XML config e.g.\n-#             <memoryBacking>\n-#             <hugepages/>\n-#             </memoryBacking>\n-#        This memory backing section should be added after the\n-#        <memory> and <currentMemory> sections. This will add\n-#        flags \"-mem-prealloc -mem-path <path>\" to the QEMU\n-#        command line. The hugetlbfs_mount variable can be used\n-#        to override the default <path> passed through by libvirt.\n-#\n-#        if \"-mem-prealloc\" or \"-mem-path <path>\" are not passed\n-#        through and a vhost device is detected then these options will\n-#        be automatically added by this script. This script will detect\n-#        the system hugetlbfs mount point to be used for <path>. The\n-#        default <path> for this script can be overidden by the\n-#        hugetlbfs_dir variable in the configuration section of this script.\n-#\n-#\n-#   4.e) Restart the libvirtd system process\n-#        e.g. on Fedora \"systemctl restart libvirtd.service\"\n-#\n-#\n-#   4.f) Edit the Configuration Parameters section of this script\n-#        to point to the correct emulator location and set any\n-#        addition options\n-#\n-# The script modifies the libvirtd Qemu call by modifying/adding\n-# options based on the configuration parameters below.\n-# NOTE:\n-#     emul_path and us_vhost_path must be set\n-#     All other parameters are optional\n-#####################################################################\n-\n-\n-#############################################\n-# Configuration Parameters\n-#############################################\n-#Path to QEMU binary\n-emul_path = \"/usr/local/bin/qemu-system-x86_64\"\n-\n-#Path to userspace vhost device file\n-# This filename should match the --dev-basename --dev-index parameters of\n-# the command used to launch the userspace vhost sample application e.g.\n-# if the sample app lauch command is:\n-#    ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1\n-# then this variable should be set to:\n-#   us_vhost_path = \"/dev/usvhost-1\"\n-us_vhost_path = \"/dev/usvhost-1\"\n-\n-#List of additional user defined emulation options. These options will\n-#be added to all Qemu calls\n-emul_opts_user = []\n-\n-#List of additional user defined emulation options for vhost only.\n-#These options will only be added to vhost enabled guests\n-emul_opts_user_vhost = []\n-\n-#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs\n-# Set this variable to one to enable this option for all VMs\n-use_huge_all = 0\n-\n-#Instead of autodetecting, override the hugetlbfs directory by setting\n-#this variable\n-hugetlbfs_dir = \"\"\n-\n-#############################################\n-\n-\n-#############################################\n-# ****** Do Not Modify Below this Line ******\n-#############################################\n-\n-import sys, os, subprocess\n-\n-\n-#List of open userspace vhost file descriptors\n-fd_list = []\n-\n-#additional virtio device flags when using userspace vhost\n-vhost_flags = [ \"csum=off\",\n-                \"gso=off\",\n-                \"guest_tso4=off\",\n-                \"guest_tso6=off\",\n-                \"guest_ecn=off\"\n-              ]\n-\n-\n-#############################################\n-# Find the system hugefile mount point.\n-# Note:\n-# if multiple hugetlbfs mount points exist\n-# then the first one found will be used\n-#############################################\n-def find_huge_mount():\n-\n-    if (len(hugetlbfs_dir)):\n-        return hugetlbfs_dir\n-\n-    huge_mount = \"\"\n-\n-    if (os.access(\"/proc/mounts\", os.F_OK)):\n-        f = open(\"/proc/mounts\", \"r\")\n-        line = f.readline()\n-        while line:\n-            line_split = line.split(\" \")\n-            if line_split[2] == 'hugetlbfs':\n-                huge_mount = line_split[1]\n-                break\n-            line = f.readline()\n-    else:\n-        print \"/proc/mounts not found\"\n-        exit (1)\n-\n-    f.close\n-    if len(huge_mount) == 0:\n-        print \"Failed to find hugetlbfs mount point\"\n-        exit (1)\n-\n-    return huge_mount\n-\n-\n-#############################################\n-# Get a userspace Vhost file descriptor\n-#############################################\n-def get_vhost_fd():\n-\n-    if (os.access(us_vhost_path, os.F_OK)):\n-        fd = os.open( us_vhost_path, os.O_RDWR)\n-    else:\n-        print (\"US-Vhost file %s not found\" %us_vhost_path)\n-        exit (1)\n-\n-    return fd\n-\n-\n-#############################################\n-# Check for vhostfd. if found then replace\n-# with our own vhost fd and append any vhost\n-# flags onto the end\n-#############################################\n-def modify_netdev_arg(arg):\n-\t\n-    global fd_list\n-    vhost_in_use = 0\n-    s = ''\n-    new_opts = []\n-    netdev_opts = arg.split(\",\")\n-\n-    for opt in netdev_opts:\n-        #check if vhost is used\n-        if \"vhost\" == opt[:5]:\n-            vhost_in_use = 1\n-        else:\n-            new_opts.append(opt)\n-\n-    #if using vhost append vhost options\n-    if vhost_in_use == 1:\n-        #append vhost on option\n-        new_opts.append('vhost=on')\n-        #append vhostfd ption\n-        new_fd = get_vhost_fd()\n-        new_opts.append('vhostfd=' + str(new_fd))\n-        fd_list.append(new_fd)\n-\n-    #concatenate all options\n-    for opt in new_opts:\n-        if len(s) > 0:\n-\t\t\ts+=','\n-\n-        s+=opt\n-\n-    return s\t\n-\n-\n-#############################################\n-# Main\n-#############################################\n-def main():\n-\n-    global fd_list\n-    global vhost_in_use\n-    new_args = []\n-    num_cmd_args = len(sys.argv)\n-    emul_call = ''\n-    mem_prealloc_set = 0\n-    mem_path_set = 0\n-    num = 0;\n-\n-    #parse the parameters\n-    while (num < num_cmd_args):\n-        arg = sys.argv[num]\n-\n-\t\t#Check netdev +1 parameter for vhostfd\n-        if arg == '-netdev':\n-            num_vhost_devs = len(fd_list)\n-            new_args.append(arg)\n-\n-            num+=1\n-            arg = sys.argv[num]\n-            mod_arg = modify_netdev_arg(arg)\n-            new_args.append(mod_arg)\n-\n-            #append vhost flags if this is a vhost device\n-            # and -device is the next arg\n-            # i.e -device -opt1,-opt2,...,-opt3,%vhost\n-            if (num_vhost_devs < len(fd_list)):\n-                num+=1\n-                arg = sys.argv[num]\n-                if arg == '-device':\n-                    new_args.append(arg)\n-                    num+=1\n-                    new_arg = sys.argv[num]\n-                    for flag in vhost_flags:\n-                        new_arg = ''.join([new_arg,',',flag])\n-                    new_args.append(new_arg)\n-                else:\n-                    new_args.append(arg)\n-        elif arg == '-mem-prealloc':\n-            mem_prealloc_set = 1\n-            new_args.append(arg)\n-        elif arg == '-mem-path':\n-            mem_path_set = 1\n-            new_args.append(arg)\n-\n-        else:\n-            new_args.append(arg)\n-\n-        num+=1\n-\n-    #Set Qemu binary location\n-    emul_call+=emul_path\n-    emul_call+=\" \"\n-\n-    #Add prealloc mem options if using vhost and not already added\n-    if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):\n-        emul_call += \"-mem-prealloc \"\n-\n-    #Add mempath mem options if using vhost and not already added\n-    if ((len(fd_list) > 0) and (mem_path_set == 0)):\n-        #Detect and add hugetlbfs mount point\n-        mp = find_huge_mount()\n-        mp = \"\".join([\"-mem-path \", mp])\n-        emul_call += mp\n-        emul_call += \" \"\n-\n-\n-    #add user options\n-    for opt in emul_opts_user:\n-        emul_call += opt\n-        emul_call += \" \"\n-\n-    #Add add user vhost only options\n-    if len(fd_list) > 0:\n-        for opt in emul_opts_user_vhost:\n-            emul_call += opt\n-            emul_call += \" \"\n-\n-    #Add updated libvirt options\n-    iter_args = iter(new_args)\n-    #skip 1st arg i.e. call to this script\n-    next(iter_args)\n-    for arg in iter_args:\n-        emul_call+=str(arg)\n-        emul_call+= \" \"\n-\n-    #Call QEMU\n-    subprocess.call(emul_call, shell=True)\n-\n-\n-    #Close usvhost files\n-    for fd in fd_list:\n-        os.close(fd)\n-\n-\n-if __name__ == \"__main__\":\n-    main()\n-\ndiff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h\nindex 00b1328..7a05dab 100644\n--- a/lib/librte_vhost/rte_virtio_net.h\n+++ b/lib/librte_vhost/rte_virtio_net.h\n@@ -34,11 +34,6 @@\n #ifndef _VIRTIO_NET_H_\n #define _VIRTIO_NET_H_\n \n-/**\n- * @file\n- * Interface to vhost net\n- */\n-\n #include <stdint.h>\n #include <linux/virtio_ring.h>\n #include <linux/virtio_net.h>\n@@ -48,66 +43,38 @@\n #include <rte_mempool.h>\n #include <rte_mbuf.h>\n \n-/* Used to indicate that the device is running on a data core */\n-#define VIRTIO_DEV_RUNNING 1\n-\n-/* Backend value set by guest. */\n-#define VIRTIO_DEV_STOPPED -1\n-\n+#define VIRTIO_DEV_RUNNING 1  /**< Used to indicate that the device is running on a data core. */\n+#define VIRTIO_DEV_STOPPED -1 /**< Backend value set by guest. */\n \n /* Enum for virtqueue management. */\n enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};\n \n-#define BUF_VECTOR_MAX 256\n-\n-/**\n- * Structure contains buffer address, length and descriptor index\n- * from vring to do scatter RX.\n- */\n-struct buf_vector {\n-\tuint64_t buf_addr;\n-\tuint32_t buf_len;\n-\tuint32_t desc_idx;\n-};\n-\n /**\n  * Structure contains variables relevant to RX/TX virtqueues.\n  */\n struct vhost_virtqueue {\n-\tstruct vring_desc\t*desc;\t\t\t/**< Virtqueue descriptor ring. */\n-\tstruct vring_avail\t*avail;\t\t\t/**< Virtqueue available ring. */\n-\tstruct vring_used\t*used;\t\t\t/**< Virtqueue used ring. */\n-\tuint32_t\t\tsize;\t\t\t/**< Size of descriptor ring. */\n-\tuint32_t\t\tbackend;\t\t/**< Backend value to determine if device should started/stopped. */\n-\tuint16_t\t\tvhost_hlen;\t\t/**< Vhost header length (varies depending on RX merge buffers. */\n-\tvolatile uint16_t\tlast_used_idx;\t\t/**< Last index used on the available ring */\n-\tvolatile uint16_t\tlast_used_idx_res;\t/**< Used for multiple devices reserving buffers. */\n-\teventfd_t\t\tcallfd;\t\t\t/**< Currently unused as polling mode is enabled. */\n-\teventfd_t\t\tkickfd;\t\t\t/**< Used to notify the guest (trigger interrupt). */\n-\tstruct buf_vector\tbuf_vec[BUF_VECTOR_MAX];\t/**< for scatter RX. */\n-} __rte_cache_aligned;\n-\n-/**\n- * Device structure contains all configuration information relating to the device.\n- */\n-struct virtio_net {\n-\tstruct vhost_virtqueue\t*virtqueue[VIRTIO_QNUM];\t/**< Contains all virtqueue information. */\n-\tstruct virtio_memory\t*mem;\t\t/**< QEMU memory and memory region information. */\n-\tuint64_t\t\tfeatures;\t/**< Negotiated feature set. */\n-\tuint64_t\t\tdevice_fh;\t/**< device identifier. */\n-\tuint32_t\t\tflags;\t\t/**< Device flags. Only used to check if device is running on data core. */\n-\tvoid\t\t\t*priv;\t\t/**< private context */\n+\tstruct vring_desc    *desc;             /**< descriptor ring. */\n+\tstruct vring_avail   *avail;            /**< available ring. */\n+\tstruct vring_used    *used;             /**< used ring. */\n+\tuint32_t             size;              /**< Size of descriptor ring. */\n+\tuint32_t             backend;           /**< Backend value to determine if device should be started/stopped. */\n+\tuint16_t             vhost_hlen;        /**< Vhost header length (varies depending on RX merge buffers. */\n+\tvolatile uint16_t    last_used_idx;     /**< Last index used on the available ring. */\n+\tvolatile uint16_t    last_used_idx_res; /**< Used for multiple devices reserving buffers. */\n+\teventfd_t            callfd;            /**< Currently unused as polling mode is enabled. */\n+\teventfd_t            kickfd;            /**< Used to notify the guest (trigger interrupt). */\n } __rte_cache_aligned;\n \n /**\n- * Information relating to memory regions including offsets to addresses in QEMUs memory file.\n+ * Information relating to memory regions including offsets to\n+ * addresses in QEMUs memory file.\n  */\n struct virtio_memory_regions {\n-\tuint64_t\tguest_phys_address;\t/**< Base guest physical address of region. */\n-\tuint64_t\tguest_phys_address_end;\t/**< End guest physical address of region. */\n-\tuint64_t\tmemory_size;\t\t/**< Size of region. */\n-\tuint64_t\tuserspace_address;\t/**< Base userspace address of region. */\n-\tuint64_t\taddress_offset;\t\t/**< Offset of region for address translation. */\n+\tuint64_t    guest_phys_address;     /**< Base guest physical address of region. */\n+\tuint64_t    guest_phys_address_end; /**< End guest physical address of region. */\n+\tuint64_t    memory_size;            /**< Size of region. */\n+\tuint64_t    userspace_address;      /**< Base userspace address of region. */\n+\tuint64_t    address_offset;         /**< Offset of region for address translation. */\n };\n \n \n@@ -115,21 +82,34 @@ struct virtio_memory_regions {\n  * Memory structure includes region and mapping information.\n  */\n struct virtio_memory {\n-\tuint64_t\tbase_address;\t/**< Base QEMU userspace address of the memory file. */\n-\tuint64_t\tmapped_address;\t/**< Mapped address of memory file base in our applications memory space. */\n-\tuint64_t\tmapped_size;\t/**< Total size of memory file. */\n-\tuint32_t\tnregions;\t/**< Number of memory regions. */\n+\tuint64_t    base_address;    /**< Base QEMU userspace address of the memory file. */\n+\tuint64_t    mapped_address;  /**< Mapped address of memory file base in our applications memory space. */\n+\tuint64_t    mapped_size;     /**< Total size of memory file. */\n+\tuint32_t    nregions;        /**< Number of memory regions. */\n \tstruct virtio_memory_regions      regions[0]; /**< Memory region information. */\n };\n \n /**\n+ * Device structure contains all configuration information relating to the device.\n+ */\n+struct virtio_net {\n+\tstruct vhost_virtqueue  *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */\n+\tstruct virtio_memory    *mem;                    /**< QEMU memory and memory region information. */\n+\tuint64_t features;    /**< Negotiated feature set. */\n+\tuint64_t device_fh;   /**< Device identifier. */\n+\tuint32_t flags;       /**< Device flags. Only used to check if device is running on data core. */\n+\tvoid     *priv;\n+} __rte_cache_aligned;\n+\n+/**\n  * Device operations to add/remove device.\n  */\n struct virtio_net_device_ops {\n-\tint (*new_device)(struct virtio_net *);\t/**< Add device. */\n-\tvoid (*destroy_device)(volatile struct virtio_net *);\t/**< Remove device. */\n+\tint (*new_device)(struct virtio_net *); /**< Add device. */\n+\tvoid (*destroy_device)(struct virtio_net *); /**< Remove device. */\n };\n \n+\n static inline uint16_t __attribute__((always_inline))\n rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)\n {\n@@ -179,7 +159,7 @@ int rte_vhost_driver_register(const char *dev_name);\n \n /* Register callbacks. */\n int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);\n-/* Start vhost driver session blocking loop. */\n+\n int rte_vhost_driver_session_start(void);\n \n /**\n@@ -192,8 +172,8 @@ int rte_vhost_driver_session_start(void);\n  * @return\n  *  num of packets enqueued\n  */\n-uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,\n-\tstruct rte_mbuf **pkts, uint16_t count);\n+uint32_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,\n+\tstruct rte_mbuf **pkts, uint32_t count);\n \n /**\n  * This function gets guest buffers from the virtio device TX virtqueue,\n@@ -206,7 +186,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,\n  * @return\n  *  num of packets dequeued\n  */\n-uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,\n-\tstruct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);\n+uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,\n+\tstruct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count);\n \n #endif /* _VIRTIO_NET_H_ */\ndiff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c\nnew file mode 100644\nindex 0000000..4671643\n--- /dev/null\n+++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c\n@@ -0,0 +1,436 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#include <stdint.h>\n+#include <fuse/cuse_lowlevel.h>\n+#include <linux/limits.h>\n+#include <linux/vhost.h>\n+#include <linux/virtio_net.h>\n+#include <string.h>\n+#include <unistd.h>\n+#include <sys/ioctl.h>\n+\n+#include <rte_ethdev.h>\n+#include <rte_log.h>\n+#include <rte_string_fns.h>\n+#include <rte_virtio_net.h>\n+\n+#include \"virtio-net-cdev.h\"\n+#include \"vhost-net.h\"\n+#include \"eventfd_link/eventfd_link.h\"\n+\n+#define FUSE_OPT_DUMMY \"\\0\\0\"\n+#define FUSE_OPT_FORE  \"-f\\0\\0\"\n+#define FUSE_OPT_NOMULTI \"-s\\0\\0\"\n+\n+static const uint32_t default_major = 231;\n+static const uint32_t default_minor = 1;\n+static const char cuse_device_name[] = \"/dev/cuse\";\n+static const char default_cdev[] = \"vhost-net\";\n+static const char eventfd_cdev[] = \"/dev/eventfd-link\";\n+\n+static struct fuse_session *session;\n+const struct vhost_net_device_ops const *ops;\n+\n+/*\n+ * Returns vhost_device_ctx from given fuse_req_t. The index is populated later\n+ * when the device is added to the device linked list.\n+ */\n+static struct vhost_device_ctx\n+fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)\n+{\n+\tstruct vhost_device_ctx ctx;\n+\tstruct fuse_ctx const *const req_ctx = fuse_req_ctx(req);\n+\n+\tctx.pid = req_ctx->pid;\n+\tctx.fh = fi->fh;\n+\n+\treturn ctx;\n+}\n+\n+/*\n+ * When the device is created in QEMU it gets initialised here and\n+ * added to the device linked list.\n+ */\n+static void\n+vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)\n+{\n+\tstruct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);\n+\tint err = 0;\n+\n+\terr = ops->new_device(ctx);\n+\tif (err == -1) {\n+\t\tfuse_reply_err(req, EPERM);\n+\t\treturn;\n+\t}\n+\n+\tfi->fh = err;\n+\n+\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\"(%\"PRIu64\") Device configuration started\\n\", fi->fh);\n+\tfuse_reply_open(req, fi);\n+}\n+\n+/*\n+ * When QEMU is shutdown or killed the device gets released.\n+ */\n+static void\n+vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)\n+{\n+\tint err = 0;\n+\tstruct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);\n+\n+\tops->destroy_device(ctx);\n+\tRTE_LOG(INFO, VHOST_CONFIG, \"(%\"PRIu64\") Device released\\n\", ctx.fh);\n+\tfuse_reply_err(req, err);\n+}\n+\n+/*\n+ * Boilerplate code for CUSE IOCTL\n+ * Implicit arguments: ctx, req, result.\n+ */\n+#define VHOST_IOCTL(func) do {\t\\\n+\tresult = (func)(ctx);\t\\\n+\tfuse_reply_ioctl(req, result, NULL, 0);\t\\\n+} while (0)\n+\n+/*\n+ * Boilerplate IOCTL RETRY\n+ * Implicit arguments: req.\n+ */\n+#define VHOST_IOCTL_RETRY(size_r, size_w) do {\t\\\n+\tstruct iovec iov_r = { arg, (size_r) };\t\\\n+\tstruct iovec iov_w = { arg, (size_w) };\t\\\n+\tfuse_reply_ioctl_retry(req, &iov_r,\t\\\n+\t\t(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\\\n+} while (0)\n+\n+/*\n+ * Boilerplate code for CUSE Read IOCTL\n+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.\n+ */\n+#define VHOST_IOCTL_R(type, var, func) do {\t\\\n+\tif (!in_bufsz) {\t\t\t\\\n+\t\tVHOST_IOCTL_RETRY(sizeof(type), 0);\\\n+\t} else {\t\\\n+\t\t(var) = *(const type*)in_buf;\t\\\n+\t\tresult = func(ctx, &(var));\t\\\n+\t\tfuse_reply_ioctl(req, result, NULL, 0);\\\n+\t}\t\\\n+} while (0)\n+\n+/*\n+ * Boilerplate code for CUSE Write IOCTL\n+ * Implicit arguments: ctx, req, result, out_bufsz.\n+ */\n+#define VHOST_IOCTL_W(type, var, func) do {\t\\\n+\tif (!out_bufsz) {\t\t\t\\\n+\t\tVHOST_IOCTL_RETRY(0, sizeof(type));\\\n+\t} else {\t\\\n+\t\tresult = (func)(ctx, &(var));\\\n+\t\tfuse_reply_ioctl(req, result, &(var), sizeof(type));\\\n+\t} \\\n+} while (0)\n+\n+/*\n+ * Boilerplate code for CUSE Read/Write IOCTL\n+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.\n+ */\n+#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {\t\\\n+\tif (!in_bufsz) {\t\\\n+\t\tVHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\\\n+\t} else {\t\\\n+\t\t(var1) = *(const type1*) (in_buf);\t\\\n+\t\tresult = (func)(ctx, (var1), &(var2));\t\\\n+\t\tfuse_reply_ioctl(req, result, &(var2), sizeof(type2));\\\n+\t} \\\n+} while (0)\n+\n+/*\n+ * This function uses the eventfd_link kernel module to copy an eventfd file\n+ * descriptor provided by QEMU in to our process space.\n+ */\n+static int\n+eventfd_copy(int target_fd, int target_pid)\n+{\n+\tint eventfd_link, ret;\n+\tstruct eventfd_copy eventfd_copy;\n+\tint fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);\n+\n+\tif (fd == -1)\n+\t\treturn -1;\n+\n+\t/* Open the character device to the kernel module. */\n+\t/* TODO: check this earlier rather than fail until VM boots! */\n+\teventfd_link = open(eventfd_cdev, O_RDWR);\n+\tif (eventfd_link < 0) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"eventfd_link module is not loaded\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\teventfd_copy.source_fd = fd;\n+\teventfd_copy.target_fd = target_fd;\n+\teventfd_copy.target_pid = target_pid;\n+\t/* Call the IOCTL to copy the eventfd. */\n+\tret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);\n+\tclose(eventfd_link);\n+\n+\tif (ret < 0) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"EVENTFD_COPY ioctl failed\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\treturn fd;\n+}\n+\n+/*\n+ * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on\n+ * the type of IOCTL a buffer is requested to read or to write. This\n+ * request is handled by FUSE and the buffer is then given to CUSE.\n+ */\n+static void\n+vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,\n+\t\tstruct fuse_file_info *fi, __rte_unused unsigned flags,\n+\t\tconst void *in_buf, size_t in_bufsz, size_t out_bufsz)\n+{\n+\tstruct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);\n+\tstruct vhost_vring_file file;\n+\tstruct vhost_vring_state state;\n+\tstruct vhost_vring_addr addr;\n+\tuint64_t features;\n+\tuint32_t index;\n+\tint result = 0;\n+\n+\tswitch (cmd) {\n+\tcase VHOST_NET_SET_BACKEND:\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_NET_SET_BACKEND\\n\", ctx.fh);\n+\t\tVHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);\n+\t\tbreak;\n+\n+\tcase VHOST_GET_FEATURES:\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_GET_FEATURES\\n\", ctx.fh);\n+\t\tVHOST_IOCTL_W(uint64_t, features, ops->get_features);\n+\t\tbreak;\n+\n+\tcase VHOST_SET_FEATURES:\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_FEATURES\\n\", ctx.fh);\n+\t\tVHOST_IOCTL_R(uint64_t, features, ops->set_features);\n+\t\tbreak;\n+\n+\tcase VHOST_RESET_OWNER:\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_RESET_OWNER\\n\", ctx.fh);\n+\t\tVHOST_IOCTL(ops->reset_owner);\n+\t\tbreak;\n+\n+\tcase VHOST_SET_OWNER:\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_OWNER\\n\", ctx.fh);\n+\t\tVHOST_IOCTL(ops->set_owner);\n+\t\tbreak;\n+\n+\tcase VHOST_SET_MEM_TABLE:\n+\t\t/*TODO fix race condition.*/\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_MEM_TABLE\\n\", ctx.fh);\n+\t\tstatic struct vhost_memory mem_temp;\n+\t\tswitch (in_bufsz) {\n+\t\tcase 0:\n+\t\t\tVHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);\n+\t\t\tbreak;\n+\n+\t\tcase sizeof(struct vhost_memory):\n+\t\t\tmem_temp = *(const struct vhost_memory *) in_buf;\n+\n+\t\t\tif (mem_temp.nregions > 0) {\n+\t\t\t\tVHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +\n+\t\t\t\t\t(sizeof(struct vhost_memory_region) *\n+\t\t\t\t\t\tmem_temp.nregions), 0);\n+\t\t\t} else {\n+\t\t\t\tresult = -1;\n+\t\t\t\tfuse_reply_ioctl(req, result, NULL, 0);\n+\t\t\t}\n+\t\t\tbreak;\n+\n+\t\tdefault:\n+\t\t\tresult = cuse_set_mem_table(ctx, in_buf,\n+\t\t\t\tmem_temp.nregions);\n+\t\t\tif (result)\n+\t\t\t\tfuse_reply_err(req, EINVAL);\n+\t\t\telse\n+\t\t\t\tfuse_reply_ioctl(req, result, NULL, 0);\n+\t\t}\n+\t\tbreak;\n+\n+\tcase VHOST_SET_VRING_NUM:\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_VRING_NUM\\n\", ctx.fh);\n+\t\tVHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_num);\n+\t\tbreak;\n+\n+\tcase VHOST_SET_VRING_BASE:\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_VRING_BASE\\n\", ctx.fh);\n+\t\tVHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_base);\n+\t\tbreak;\n+\n+\tcase VHOST_GET_VRING_BASE:\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_GET_VRING_BASE\\n\", ctx.fh);\n+\t\tVHOST_IOCTL_RW(uint32_t, index,\n+\t\t\tstruct vhost_vring_state, state, ops->get_vring_base);\n+\t\tbreak;\n+\n+\tcase VHOST_SET_VRING_ADDR:\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_VRING_ADDR\\n\", ctx.fh);\n+\t\tVHOST_IOCTL_R(struct vhost_vring_addr, addr, ops->set_vring_addr);\n+\t\tbreak;\n+\n+\tcase VHOST_SET_VRING_KICK:\n+\tcase VHOST_SET_VRING_CALL:\n+\t\tif (!in_buf) {\n+                \tVHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);\n+\t\t} else {\n+\t\t\tint fd;\n+\t\t\tfile = *(const struct vhost_vring_file *)in_buf;\n+\t\t\tLOG_DEBUG(VHOST_CONFIG, \n+\t\t\t\t\"kick/call idx:%d fd:%d\\n\", file.index, file.fd);\n+\t\t\tif ((fd = eventfd_copy(file.fd, ctx.pid)) < 0){\n+\t\t\t\tfuse_reply_ioctl(req, -1, NULL, 0);\n+\t\t\t}\n+\t\t\tfile.fd = fd;\n+\t\t\tif (cmd == VHOST_SET_VRING_KICK) {\n+\t\t\t\tVHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_call);\n+\t\t\t}\n+\t\t\telse { \n+\t\t\t\tVHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_kick);\n+\t\t\t}\n+\t\t}\n+\t\tbreak;\n+\n+\tdefault:\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: DOESN NOT EXIST\\n\", ctx.fh);\n+\t\tresult = -1;\n+\t\tfuse_reply_ioctl(req, result, NULL, 0);\n+\t}\n+\n+\tif (result < 0)\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: FAIL\\n\", ctx.fh);\n+\telse\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") IOCTL: SUCCESS\\n\", ctx.fh);\n+}\n+\n+/*\n+ * Structure handling open, release and ioctl function pointers is populated.\n+ */\n+static const struct cuse_lowlevel_ops vhost_net_ops = {\n+\t.open\t\t= vhost_net_open,\n+\t.release\t= vhost_net_release,\n+\t.ioctl\t\t= vhost_net_ioctl,\n+};\n+\n+/*\n+ * cuse_info is populated and used to register the cuse device.\n+ * vhost_net_device_ops are also passed when the device is registered in app.\n+ */\n+int\n+rte_vhost_driver_register(const char *dev_name)\n+{\n+\tstruct cuse_info cuse_info;\n+\tchar device_name[PATH_MAX] = \"\";\n+\tchar char_device_name[PATH_MAX] = \"\";\n+\tconst char *device_argv[] = { device_name };\n+\n+\tchar fuse_opt_dummy[] = FUSE_OPT_DUMMY;\n+\tchar fuse_opt_fore[] = FUSE_OPT_FORE;\n+\tchar fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;\n+\tchar *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};\n+\n+\tif (access(cuse_device_name, R_OK | W_OK) < 0) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"char device %s can't be accessed, maybe not exist\\n\",\n+\t\t\tcuse_device_name);\n+\t\treturn -1;\n+\t}\n+\n+\t/*\n+\t * The device name is created. This is passed to QEMU so that it can\n+\t * register the device with our application.\n+\t */\n+\tsnprintf(device_name, PATH_MAX, \"DEVNAME=%s\", dev_name);\n+\tsnprintf(char_device_name, PATH_MAX, \"/dev/%s\", dev_name);\n+\n+\t/* Check if device already exists. */\n+\tif (access(char_device_name, F_OK) != -1) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"char device %s already exists\\n\", char_device_name);\n+\t\treturn -1;\n+\t}\n+\n+\tmemset(&cuse_info, 0, sizeof(cuse_info));\n+\tcuse_info.dev_major = default_major;\n+\tcuse_info.dev_minor = default_minor;\n+\tcuse_info.dev_info_argc = 1;\n+\tcuse_info.dev_info_argv = device_argv;\n+\tcuse_info.flags = CUSE_UNRESTRICTED_IOCTL;\n+\n+\tops = get_virtio_net_callbacks();\n+\n+\tsession = cuse_lowlevel_setup(3, fuse_argv,\n+\t\t\t&cuse_info, &vhost_net_ops, 0, NULL);\n+\tif (session == NULL)\n+\t\treturn -1;\n+\n+\treturn 0;\n+}\n+\n+/**\n+ * The CUSE session is launched allowing the application to receive open,\n+ * release and ioctl calls.\n+ */\n+int\n+rte_vhost_driver_session_start(void)\n+{\n+\tfuse_session_loop(session);\n+\n+\treturn 0;\n+}\ndiff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c\nnew file mode 100644\nindex 0000000..5c16aa5\n--- /dev/null\n+++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c\n@@ -0,0 +1,314 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#include <stdint.h>\n+#include <dirent.h>\n+#include <linux/vhost.h>\n+#include <linux/virtio_net.h>\n+#include <fuse/cuse_lowlevel.h>\n+#include <stddef.h>\n+#include <string.h>\n+#include <stdlib.h>\n+#include <sys/eventfd.h>\n+#include <sys/mman.h>\n+#include <sys/types.h>\n+#include <unistd.h>\n+#include <errno.h>\n+\n+#include <rte_log.h>\n+\n+#include \"vhost-net.h\"\n+#include \"virtio-net-cdev.h\"\n+\n+extern struct vhost_net_device_ops const *ops;\n+\n+/* Line size for reading maps file. */\n+static const uint32_t BUFSIZE = PATH_MAX;\n+\n+/* Size of prot char array in procmap. */\n+#define PROT_SZ 5\n+\n+/* Number of elements in procmap struct. */\n+#define PROCMAP_SZ 8\n+\n+/* Structure containing information gathered from maps file. */\n+struct procmap {\n+\tuint64_t va_start;\t/* Start virtual address in file. */\n+\tuint64_t len;\t\t/* Size of file. */\n+\tuint64_t pgoff;\t\t/* Not used. */\n+\tuint32_t maj;\t\t/* Not used. */\n+\tuint32_t min;\t\t/* Not used. */\n+\tuint32_t ino;\t\t/* Not used. */\n+\tchar prot[PROT_SZ];\t/* Not used. */\n+\tchar fname[PATH_MAX];\t/* File name. */\n+};\n+\n+/*\n+ * Locate the file containing QEMU's memory space and\n+ * map it to our address space.\n+ */\n+static int\n+host_memory_map(pid_t pid, uint64_t addr,\n+\tuint64_t *mapped_address, uint64_t *mapped_size)\n+{\n+\tstruct dirent *dptr = NULL;\n+\tstruct procmap procmap;\n+\tDIR *dp = NULL;\n+\tint fd;\n+\tint i;\n+\tchar memfile[PATH_MAX];\n+\tchar mapfile[PATH_MAX];\n+\tchar procdir[PATH_MAX];\n+\tchar resolved_path[PATH_MAX];\n+\tFILE *fmap;\n+\tvoid *map;\n+\tuint8_t found = 0;\n+\tchar line[BUFSIZE];\n+\tchar dlm[] = \"-   :   \";\n+\tchar *str, *sp, *in[PROCMAP_SZ];\n+\tchar *end = NULL;\n+\n+\t/* Path where mem files are located. */\n+\tsnprintf(procdir, PATH_MAX, \"/proc/%u/fd/\", pid);\n+\t/* Maps file used to locate mem file. */\n+\tsnprintf(mapfile, PATH_MAX, \"/proc/%u/maps\", pid);\n+\n+\tfmap = fopen(mapfile, \"r\");\n+\tif (fmap == NULL) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"Failed to open maps file for pid %d\\n\", pid);\n+\t\treturn -1;\n+\t}\n+\n+\t/* Read through maps file until we find out base_address. */\n+\twhile (fgets(line, BUFSIZE, fmap) != 0) {\n+\t\tstr = line;\n+\t\terrno = 0;\n+\t\t/* Split line in to fields. */\n+\t\tfor (i = 0; i < PROCMAP_SZ; i++) {\n+\t\t\tin[i] = strtok_r(str, &dlm[i], &sp);\n+\t\t\tif ((in[i] == NULL) || (errno != 0)) {\n+\t\t\t\tfclose(fmap);\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\t\t\tstr = NULL;\n+\t\t}\n+\n+\t\t/* Convert/Copy each field as needed. */\n+\t\tprocmap.va_start = strtoull(in[0], &end, 16);\n+\t\tif ((in[0] == '\\0') || (end == NULL) || (*end != '\\0') ||\n+\t\t\t(errno != 0)) {\n+\t\t\tfclose(fmap);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tprocmap.len = strtoull(in[1], &end, 16);\n+\t\tif ((in[1] == '\\0') || (end == NULL) || (*end != '\\0') ||\n+\t\t\t(errno != 0)) {\n+\t\t\tfclose(fmap);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tprocmap.pgoff = strtoull(in[3], &end, 16);\n+\t\tif ((in[3] == '\\0') || (end == NULL) || (*end != '\\0') ||\n+\t\t\t(errno != 0)) {\n+\t\t\tfclose(fmap);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tprocmap.maj = strtoul(in[4], &end, 16);\n+\t\tif ((in[4] == '\\0') || (end == NULL) || (*end != '\\0') ||\n+\t\t\t(errno != 0)) {\n+\t\t\tfclose(fmap);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tprocmap.min = strtoul(in[5], &end, 16);\n+\t\tif ((in[5] == '\\0') || (end == NULL) || (*end != '\\0') ||\n+\t\t\t(errno != 0)) {\n+\t\t\tfclose(fmap);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tprocmap.ino = strtoul(in[6], &end, 16);\n+\t\tif ((in[6] == '\\0') || (end == NULL) || (*end != '\\0') ||\n+\t\t\t(errno != 0)) {\n+\t\t\tfclose(fmap);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tmemcpy(&procmap.prot, in[2], PROT_SZ);\n+\t\tmemcpy(&procmap.fname, in[7], PATH_MAX);\n+\n+\t\tif (procmap.va_start == addr) {\n+\t\t\tprocmap.len = procmap.len - procmap.va_start;\n+\t\t\tfound = 1;\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\tfclose(fmap);\n+\n+\tif (!found) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"Failed to find memory file in pid %d maps file\\n\", pid);\n+\t\treturn -1;\n+\t}\n+\n+\t/* Find the guest memory file among the process fds. */\n+\tdp = opendir(procdir);\n+\tif (dp == NULL) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"Cannot open pid %d process directory\\n\",\n+\t\t\tpid);\n+\t\treturn -1;\n+\n+\t}\n+\n+\tfound = 0;\n+\n+\t/* Read the fd directory contents. */\n+\twhile (NULL != (dptr = readdir(dp))) {\n+\t\tsnprintf(memfile, PATH_MAX, \"/proc/%u/fd/%s\",\n+\t\t\t\tpid, dptr->d_name);\n+\t\trealpath(memfile, resolved_path);\n+\t\tif (resolved_path == NULL) {\n+\t\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\t\"Failed to resolve fd directory\\n\");\n+\t\t\tclosedir(dp);\n+\t\t\treturn -1;\n+\t\t}\n+\t\tif (strncmp(resolved_path, procmap.fname,\n+\t\t\tstrnlen(procmap.fname, PATH_MAX)) == 0) {\n+\t\t\tfound = 1;\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\n+\tclosedir(dp);\n+\n+\tif (found == 0) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"Failed to find memory file for pid %d\\n\",\n+\t\t\tpid);\n+\t\treturn -1;\n+\t}\n+\t/* Open the shared memory file and map the memory into this process. */\n+\tfd = open(memfile, O_RDWR);\n+\n+\tif (fd == -1) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"Failed to open %s for pid %d\\n\",\n+\t\t\tmemfile, pid);\n+\t\treturn -1;\n+\t}\n+\n+\tmap = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE ,\n+\t\t\tMAP_POPULATE|MAP_SHARED, fd, 0);\n+\tclose(fd);\n+\n+\tif (map == MAP_FAILED) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"Error mapping the file %s for pid %d\\n\",\n+\t\t\tmemfile, pid);\n+\t\treturn -1;\n+\t}\n+\n+\t/* Store the memory address and size in the device data structure */\n+\t*mapped_address = (uint64_t)(uintptr_t)map;\n+\t*mapped_size = procmap.len;\n+\n+\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\"Mem File: %s->%s - Size: %llu - VA: %p\\n\",\n+\t\tmemfile, resolved_path,\n+\t\t(unsigned long long)mapped_size, map);\n+\n+\treturn 0;\n+}\n+\n+int\n+cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,\n+\tuint32_t nregions)\n+{\n+\tuint64_t size = offsetof(struct vhost_memory, regions);\n+\tuint32_t idx;\n+\tstruct virtio_memory_regions regions[8]; /* VHOST_MAX_MEMORY_REGIONS */\n+\tstruct vhost_memory_region *mem_regions = (void *)(uintptr_t)\n+\t\t\t((uint64_t)(uintptr_t)mem_regions_addr + size);\n+\tuint64_t base_address = 0, mapped_address, mapped_size;\n+\n+\tfor (idx = 0; idx < nregions; idx++) {\n+\t\tregions[idx].guest_phys_address =\n+\t\t\tmem_regions[idx].guest_phys_addr;\n+\t\tregions[idx].guest_phys_address_end =\n+\t\t\tregions[idx].guest_phys_address +\n+\t\t\tmem_regions[idx].memory_size;\n+\t\tregions[idx].memory_size =\n+\t\t\tmem_regions[idx].memory_size;\n+\t\tregions[idx].userspace_address =\n+\t\t\tmem_regions[idx].userspace_addr;\n+\n+\t\tLOG_DEBUG(VHOST_CONFIG, \"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%\"PRIu64\")\\n\",\n+\t\t\tidx,\n+\t\t\t(void *)(uintptr_t)regions[idx].guest_phys_address,\n+\t\t\t(void *)(uintptr_t)regions[idx].userspace_address,\n+\t\t\tregions[idx].memory_size);\n+\n+\t\t/*set the base address mapping*/\n+\t\tif (regions[idx].guest_phys_address == 0x0) {\n+\t\t\tbase_address =\n+\t\t\t\tregions[idx].userspace_address;\n+\t\t\t/* Map VM memory file */\n+\t\t\tif (host_memory_map(ctx.pid, base_address, \n+\t\t\t\t&mapped_address, &mapped_size) != 0) {\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\t/* Check that we have a valid base address. */\n+\tif (base_address == 0) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG, \n+\t\t\t\"Failed to find base address of qemu memory file.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tfor (idx = 0; idx < nregions; idx++) {\n+\t\tregions[idx].address_offset = \n+\t\t\tmapped_address - base_address +\n+\t\t\tregions[idx].userspace_address -\n+\t\t\tregions[idx].guest_phys_address;\n+\t}\n+\t\n+\tops->set_mem_table(ctx, &regions[0], nregions);\n+\treturn 0;\n+}\ndiff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h\nnew file mode 100644\nindex 0000000..6f98ce8\n--- /dev/null\n+++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h\n@@ -0,0 +1,43 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+#ifndef _VIRTIO_NET_CDEV_H\n+#define _VIRTIO_NET_CDEV_H\n+#include <stdint.h>\n+\n+#include \"vhost-net.h\"\n+\n+int\n+cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,\n+\tuint32_t nregions);\n+\n+#endif\ndiff --git a/lib/librte_vhost/vhost-net-cdev.c b/lib/librte_vhost/vhost-net-cdev.c\ndeleted file mode 100644\nindex 57c76cb..0000000\n--- a/lib/librte_vhost/vhost-net-cdev.c\n+++ /dev/null\n@@ -1,389 +0,0 @@\n-/*-\n- *   BSD LICENSE\n- *\n- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n- *   All rights reserved.\n- *\n- *   Redistribution and use in source and binary forms, with or without\n- *   modification, are permitted provided that the following conditions\n- *   are met:\n- *\n- *     * Redistributions of source code must retain the above copyright\n- *       notice, this list of conditions and the following disclaimer.\n- *     * Redistributions in binary form must reproduce the above copyright\n- *       notice, this list of conditions and the following disclaimer in\n- *       the documentation and/or other materials provided with the\n- *       distribution.\n- *     * Neither the name of Intel Corporation nor the names of its\n- *       contributors may be used to endorse or promote products derived\n- *       from this software without specific prior written permission.\n- *\n- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n- *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n- */\n-\n-#include <errno.h>\n-#include <fuse/cuse_lowlevel.h>\n-#include <linux/limits.h>\n-#include <linux/vhost.h>\n-#include <stdint.h>\n-#include <string.h>\n-#include <unistd.h>\n-\n-#include <rte_ethdev.h>\n-#include <rte_log.h>\n-#include <rte_string_fns.h>\n-#include <rte_virtio_net.h>\n-\n-#include \"vhost-net-cdev.h\"\n-\n-#define FUSE_OPT_DUMMY \"\\0\\0\"\n-#define FUSE_OPT_FORE  \"-f\\0\\0\"\n-#define FUSE_OPT_NOMULTI \"-s\\0\\0\"\n-\n-static const uint32_t default_major = 231;\n-static const uint32_t default_minor = 1;\n-static const char cuse_device_name[] = \"/dev/cuse\";\n-static const char default_cdev[] = \"vhost-net\";\n-\n-static struct fuse_session *session;\n-static struct vhost_net_device_ops const *ops;\n-\n-/*\n- * Returns vhost_device_ctx from given fuse_req_t. The index is populated later\n- * when the device is added to the device linked list.\n- */\n-static struct vhost_device_ctx\n-fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)\n-{\n-\tstruct vhost_device_ctx ctx;\n-\tstruct fuse_ctx const *const req_ctx = fuse_req_ctx(req);\n-\n-\tctx.pid = req_ctx->pid;\n-\tctx.fh = fi->fh;\n-\n-\treturn ctx;\n-}\n-\n-/*\n- * When the device is created in QEMU it gets initialised here and\n- * added to the device linked list.\n- */\n-static void\n-vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)\n-{\n-\tstruct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);\n-\tint err = 0;\n-\n-\terr = ops->new_device(ctx);\n-\tif (err == -1) {\n-\t\tfuse_reply_err(req, EPERM);\n-\t\treturn;\n-\t}\n-\n-\tfi->fh = err;\n-\n-\tRTE_LOG(INFO, VHOST_CONFIG,\n-\t\t\"(%\"PRIu64\") Device configuration started\\n\", fi->fh);\n-\tfuse_reply_open(req, fi);\n-}\n-\n-/*\n- * When QEMU is shutdown or killed the device gets released.\n- */\n-static void\n-vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)\n-{\n-\tint err = 0;\n-\tstruct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);\n-\n-\tops->destroy_device(ctx);\n-\tRTE_LOG(INFO, VHOST_CONFIG, \"(%\"PRIu64\") Device released\\n\", ctx.fh);\n-\tfuse_reply_err(req, err);\n-}\n-\n-/*\n- * Boilerplate code for CUSE IOCTL\n- * Implicit arguments: ctx, req, result.\n- */\n-#define VHOST_IOCTL(func) do {\t\\\n-\tresult = (func)(ctx);\t\\\n-\tfuse_reply_ioctl(req, result, NULL, 0);\t\\\n-} while (0)\n-\n-/*\n- * Boilerplate IOCTL RETRY\n- * Implicit arguments: req.\n- */\n-#define VHOST_IOCTL_RETRY(size_r, size_w) do {\t\\\n-\tstruct iovec iov_r = { arg, (size_r) };\t\\\n-\tstruct iovec iov_w = { arg, (size_w) };\t\\\n-\tfuse_reply_ioctl_retry(req, &iov_r,\t\\\n-\t\t(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\\\n-} while (0)\n-\n-/*\n- * Boilerplate code for CUSE Read IOCTL\n- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.\n- */\n-#define VHOST_IOCTL_R(type, var, func) do {\t\\\n-\tif (!in_bufsz) {\t\\\n-\t\tVHOST_IOCTL_RETRY(sizeof(type), 0);\\\n-\t} else {\t\\\n-\t\t(var) = *(const type*)in_buf;\t\\\n-\t\tresult = func(ctx, &(var));\t\\\n-\t\tfuse_reply_ioctl(req, result, NULL, 0);\\\n-\t}\t\\\n-} while (0)\n-\n-/*\n- * Boilerplate code for CUSE Write IOCTL\n- * Implicit arguments: ctx, req, result, out_bufsz.\n- */\n-#define VHOST_IOCTL_W(type, var, func) do {\t\\\n-\tif (!out_bufsz) {\t\\\n-\t\tVHOST_IOCTL_RETRY(0, sizeof(type));\\\n-\t} else {\t\\\n-\t\tresult = (func)(ctx, &(var));\\\n-\t\tfuse_reply_ioctl(req, result, &(var), sizeof(type));\\\n-\t} \\\n-} while (0)\n-\n-/*\n- * Boilerplate code for CUSE Read/Write IOCTL\n- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.\n- */\n-#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {\t\\\n-\tif (!in_bufsz) {\t\\\n-\t\tVHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\\\n-\t} else {\t\\\n-\t\t(var1) = *(const type1*) (in_buf);\t\\\n-\t\tresult = (func)(ctx, (var1), &(var2));\t\\\n-\t\tfuse_reply_ioctl(req, result, &(var2), sizeof(type2));\\\n-\t}\t\\\n-} while (0)\n-\n-/*\n- * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type\n- * of IOCTL a buffer is requested to read or to write. This request is handled\n- * by FUSE and the buffer is then given to CUSE.\n- */\n-static void\n-vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,\n-\t\tstruct fuse_file_info *fi, __rte_unused unsigned flags,\n-\t\tconst void *in_buf, size_t in_bufsz, size_t out_bufsz)\n-{\n-\tstruct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);\n-\tstruct vhost_vring_file file;\n-\tstruct vhost_vring_state state;\n-\tstruct vhost_vring_addr addr;\n-\tuint64_t features;\n-\tuint32_t index;\n-\tint result = 0;\n-\n-\tswitch (cmd) {\n-\tcase VHOST_NET_SET_BACKEND:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_NET_SET_BACKEND\\n\", ctx.fh);\n-\t\tVHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);\n-\t\tbreak;\n-\n-\tcase VHOST_GET_FEATURES:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_GET_FEATURES\\n\", ctx.fh);\n-\t\tVHOST_IOCTL_W(uint64_t, features, ops->get_features);\n-\t\tbreak;\n-\n-\tcase VHOST_SET_FEATURES:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_FEATURES\\n\", ctx.fh);\n-\t\tVHOST_IOCTL_R(uint64_t, features, ops->set_features);\n-\t\tbreak;\n-\n-\tcase VHOST_RESET_OWNER:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_RESET_OWNER\\n\", ctx.fh);\n-\t\tVHOST_IOCTL(ops->reset_owner);\n-\t\tbreak;\n-\n-\tcase VHOST_SET_OWNER:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_OWNER\\n\", ctx.fh);\n-\t\tVHOST_IOCTL(ops->set_owner);\n-\t\tbreak;\n-\n-\tcase VHOST_SET_MEM_TABLE:\n-\t\t/*TODO fix race condition.*/\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_MEM_TABLE\\n\", ctx.fh);\n-\t\tstatic struct vhost_memory mem_temp;\n-\n-\t\tswitch (in_bufsz) {\n-\t\tcase 0:\n-\t\t\tVHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);\n-\t\t\tbreak;\n-\n-\t\tcase sizeof(struct vhost_memory):\n-\t\t\tmem_temp = *(const struct vhost_memory *) in_buf;\n-\n-\t\t\tif (mem_temp.nregions > 0) {\n-\t\t\t\tVHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +\n-\t\t\t\t\t(sizeof(struct vhost_memory_region) *\n-\t\t\t\t\t\tmem_temp.nregions), 0);\n-\t\t\t} else {\n-\t\t\t\tresult = -1;\n-\t\t\t\tfuse_reply_ioctl(req, result, NULL, 0);\n-\t\t\t}\n-\t\t\tbreak;\n-\n-\t\tdefault:\n-\t\t\tresult = ops->set_mem_table(ctx,\n-\t\t\t\t\tin_buf, mem_temp.nregions);\n-\t\t\tif (result)\n-\t\t\t\tfuse_reply_err(req, EINVAL);\n-\t\t\telse\n-\t\t\t\tfuse_reply_ioctl(req, result, NULL, 0);\n-\t\t}\n-\t\tbreak;\n-\n-\tcase VHOST_SET_VRING_NUM:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_VRING_NUM\\n\", ctx.fh);\n-\t\tVHOST_IOCTL_R(struct vhost_vring_state, state,\n-\t\t\tops->set_vring_num);\n-\t\tbreak;\n-\n-\tcase VHOST_SET_VRING_BASE:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_VRING_BASE\\n\", ctx.fh);\n-\t\tVHOST_IOCTL_R(struct vhost_vring_state, state,\n-\t\t\tops->set_vring_base);\n-\t\tbreak;\n-\n-\tcase VHOST_GET_VRING_BASE:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_GET_VRING_BASE\\n\", ctx.fh);\n-\t\tVHOST_IOCTL_RW(uint32_t, index,\n-\t\t\tstruct vhost_vring_state, state, ops->get_vring_base);\n-\t\tbreak;\n-\n-\tcase VHOST_SET_VRING_ADDR:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_VRING_ADDR\\n\", ctx.fh);\n-\t\tVHOST_IOCTL_R(struct vhost_vring_addr, addr,\n-\t\t\tops->set_vring_addr);\n-\t\tbreak;\n-\n-\tcase VHOST_SET_VRING_KICK:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_VRING_KICK\\n\", ctx.fh);\n-\t\tVHOST_IOCTL_R(struct vhost_vring_file, file,\n-\t\t\tops->set_vring_kick);\n-\t\tbreak;\n-\n-\tcase VHOST_SET_VRING_CALL:\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: VHOST_SET_VRING_CALL\\n\", ctx.fh);\n-\t\tVHOST_IOCTL_R(struct vhost_vring_file, file,\n-\t\t\tops->set_vring_call);\n-\t\tbreak;\n-\n-\tdefault:\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: DOESN NOT EXIST\\n\", ctx.fh);\n-\t\tresult = -1;\n-\t\tfuse_reply_ioctl(req, result, NULL, 0);\n-\t}\n-\n-\tif (result < 0)\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: FAIL\\n\", ctx.fh);\n-\telse\n-\t\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") IOCTL: SUCCESS\\n\", ctx.fh);\n-}\n-\n-/*\n- * Structure handling open, release and ioctl function pointers is populated.\n- */\n-static const struct cuse_lowlevel_ops vhost_net_ops = {\n-\t.open\t\t= vhost_net_open,\n-\t.release\t= vhost_net_release,\n-\t.ioctl\t\t= vhost_net_ioctl,\n-};\n-\n-/*\n- * cuse_info is populated and used to register the cuse device.\n- * vhost_net_device_ops are also passed when the device is registered in app.\n- */\n-int\n-rte_vhost_driver_register(const char *dev_name)\n-{\n-\tstruct cuse_info cuse_info;\n-\tchar device_name[PATH_MAX] = \"\";\n-\tchar char_device_name[PATH_MAX] = \"\";\n-\tconst char *device_argv[] = { device_name };\n-\n-\tchar fuse_opt_dummy[] = FUSE_OPT_DUMMY;\n-\tchar fuse_opt_fore[] = FUSE_OPT_FORE;\n-\tchar fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;\n-\tchar *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};\n-\n-\tif (access(cuse_device_name, R_OK | W_OK) < 0) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"char device %s can't be accessed, maybe not exist\\n\",\n-\t\t\tcuse_device_name);\n-\t\treturn -1;\n-\t}\n-\n-\t/*\n-\t * The device name is created. This is passed to QEMU so that it can\n-\t * register the device with our application.\n-\t */\n-\tsnprintf(device_name, PATH_MAX, \"DEVNAME=%s\", dev_name);\n-\tsnprintf(char_device_name, PATH_MAX, \"/dev/%s\", dev_name);\n-\n-\t/* Check if device already exists. */\n-\tif (access(char_device_name, F_OK) != -1) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"char device %s already exists\\n\", char_device_name);\n-\t\treturn -1;\n-\t}\n-\n-\tmemset(&cuse_info, 0, sizeof(cuse_info));\n-\tcuse_info.dev_major = default_major;\n-\tcuse_info.dev_minor = default_minor;\n-\tcuse_info.dev_info_argc = 1;\n-\tcuse_info.dev_info_argv = device_argv;\n-\tcuse_info.flags = CUSE_UNRESTRICTED_IOCTL;\n-\n-\tops = get_virtio_net_callbacks();\n-\n-\tsession = cuse_lowlevel_setup(3, fuse_argv,\n-\t\t\t&cuse_info, &vhost_net_ops, 0, NULL);\n-\tif (session == NULL)\n-\t\treturn -1;\n-\n-\treturn 0;\n-}\n-\n-/**\n- * The CUSE session is launched allowing the application to receive open,\n- * release and ioctl calls.\n- */\n-int\n-rte_vhost_driver_session_start(void)\n-{\n-\tfuse_session_loop(session);\n-\n-\treturn 0;\n-}\ndiff --git a/lib/librte_vhost/vhost-net-cdev.h b/lib/librte_vhost/vhost-net-cdev.h\ndeleted file mode 100644\nindex 03a5c57..0000000\n--- a/lib/librte_vhost/vhost-net-cdev.h\n+++ /dev/null\n@@ -1,113 +0,0 @@\n-/*-\n- *   BSD LICENSE\n- *\n- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n- *   All rights reserved.\n- *\n- *   Redistribution and use in source and binary forms, with or without\n- *   modification, are permitted provided that the following conditions\n- *   are met:\n- *\n- *     * Redistributions of source code must retain the above copyright\n- *       notice, this list of conditions and the following disclaimer.\n- *     * Redistributions in binary form must reproduce the above copyright\n- *       notice, this list of conditions and the following disclaimer in\n- *       the documentation and/or other materials provided with the\n- *       distribution.\n- *     * Neither the name of Intel Corporation nor the names of its\n- *       contributors may be used to endorse or promote products derived\n- *       from this software without specific prior written permission.\n- *\n- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n- *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n- */\n-\n-#ifndef _VHOST_NET_CDEV_H_\n-#define _VHOST_NET_CDEV_H_\n-#include <stdint.h>\n-#include <stdio.h>\n-#include <sys/types.h>\n-#include <unistd.h>\n-#include <linux/vhost.h>\n-\n-#include <rte_log.h>\n-\n-/* Macros for printing using RTE_LOG */\n-#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1\n-#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1\n-\n-#ifdef RTE_LIBRTE_VHOST_DEBUG\n-#define VHOST_MAX_PRINT_BUFF 6072\n-#define LOG_LEVEL RTE_LOG_DEBUG\n-#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)\n-#define PRINT_PACKET(device, addr, size, header) do { \\\n-\tchar *pkt_addr = (char *)(addr); \\\n-\tunsigned int index; \\\n-\tchar packet[VHOST_MAX_PRINT_BUFF]; \\\n-\t\\\n-\tif ((header)) \\\n-\t\tsnprintf(packet, VHOST_MAX_PRINT_BUFF, \"(%\"PRIu64\") Header size %d: \", (device->device_fh), (size)); \\\n-\telse \\\n-\t\tsnprintf(packet, VHOST_MAX_PRINT_BUFF, \"(%\"PRIu64\") Packet size %d: \", (device->device_fh), (size)); \\\n-\tfor (index = 0; index < (size); index++) { \\\n-\t\tsnprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \\\n-\t\t\t\"%02hhx \", pkt_addr[index]); \\\n-\t} \\\n-\tsnprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \"\\n\"); \\\n-\t\\\n-\tLOG_DEBUG(VHOST_DATA, \"%s\", packet); \\\n-} while (0)\n-#else\n-#define LOG_LEVEL RTE_LOG_INFO\n-#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)\n-#define PRINT_PACKET(device, addr, size, header) do {} while (0)\n-#endif\n-\n-\n-/*\n- * Structure used to identify device context.\n- */\n-struct vhost_device_ctx {\n-\tpid_t\t\tpid;\t/* PID of process calling the IOCTL. */\n-\tuint64_t\tfh;\t/* Populated with fi->fh to track the device index. */\n-};\n-\n-/*\n- * Structure contains function pointers to be defined in virtio-net.c. These\n- * functions are called in CUSE context and are used to configure devices.\n- */\n-struct vhost_net_device_ops {\n-\tint (*new_device)(struct vhost_device_ctx);\n-\tvoid (*destroy_device)(struct vhost_device_ctx);\n-\n-\tint (*get_features)(struct vhost_device_ctx, uint64_t *);\n-\tint (*set_features)(struct vhost_device_ctx, uint64_t *);\n-\n-\tint (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);\n-\n-\tint (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);\n-\tint (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);\n-\tint (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);\n-\tint (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);\n-\n-\tint (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);\n-\tint (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);\n-\n-\tint (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);\n-\n-\tint (*set_owner)(struct vhost_device_ctx);\n-\tint (*reset_owner)(struct vhost_device_ctx);\n-};\n-\n-\n-struct vhost_net_device_ops const *get_virtio_net_callbacks(void);\n-#endif /* _VHOST_NET_CDEV_H_ */\ndiff --git a/lib/librte_vhost/vhost-user/fd_man.c b/lib/librte_vhost/vhost-user/fd_man.c\nnew file mode 100644\nindex 0000000..c7fd3f2\n--- /dev/null\n+++ b/lib/librte_vhost/vhost-user/fd_man.c\n@@ -0,0 +1,158 @@\n+#include <stdint.h>\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <sys/socket.h>\n+#include <sys/select.h>\n+#include <sys/time.h>\n+#include <sys/types.h>\n+#include <unistd.h>\n+\n+#include <rte_log.h>\n+\n+#include \"fd_man.h\"\n+\n+/**\n+ * Returns the index in the fdset for a fd.\n+ * If fd is -1, it means to search for a free entry.\n+ * @return\n+ *   Index for the fd, or -1 if fd isn't in the fdset.\n+ */\n+static int\n+fdset_find_fd(struct fdset *pfdset, int fd)\n+{\n+\tint i;\n+\n+\tfor (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);\n+\t\t\n+\treturn i ==  pfdset->num ? -1 : i;\n+}\n+\n+static int\n+fdset_find_free_slot(struct fdset *pfdset)\n+{\n+\treturn fdset_find_fd(pfdset, -1);\n+\n+}\n+\n+static void\n+fdset_add_fd(struct fdset  *pfdset, int idx, int fd, fd_cb rcb, \n+\t\tfd_cb wcb, uint64_t dat)\n+{\n+\tstruct fdentry *pfdentry = &pfdset->fd[idx];\n+\n+\tpfdentry->fd = fd;\n+\tpfdentry->rcb = rcb;\n+\tpfdentry->wcb = wcb;\n+\tpfdentry->dat = dat;\n+}\n+\n+/**\n+ * Fill the read/write fdset with the fds in the fdset.\n+ * @return\n+ *  the maximum fds filled in the read/write fd_set.\n+ */\n+static int\n+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)\n+{\n+\tstruct fdentry *pfdentry;\n+\tint i, maxfds = -1;\n+\tint num = MAX_FDS;\n+\n+\tfor (i = 0; i < num ; i++) {\n+\t\tpfdentry = &pfdset->fd[i];\n+\t\tif (pfdentry->fd != -1) {\n+\t\t\tint added = 0;\n+\t\t\tif (pfdentry->rcb && rfset) {\n+\t\t\t\tFD_SET(pfdentry->fd, rfset);\n+\t\t\t\tadded = 1;\n+\t\t\t}\n+\t\t\tif (pfdentry->wcb && wfset) {\n+\t\t\t\tFD_SET(pfdentry->fd, wfset);\n+\t\t\t\tadded = 1;\n+\t\t\t}\n+\t\t\tif (added)\n+\t\t\t\tmaxfds = pfdentry->fd < maxfds ?\n+\t\t\t\t\tmaxfds : pfdentry->fd;\n+\t\t}\n+\t}\n+\treturn maxfds;\n+}\n+\n+void\n+fdset_init(struct fdset *pfdset)\n+{\n+\tint i;\n+\n+\tfor (i = 0; i < MAX_FDS; i++)\n+\t\tpfdset->fd[i].fd = -1;\n+\tpfdset->num = MAX_FDS;\n+\n+}\n+\n+/**\n+ * Register the fd in the fdset with its read/write handler and context.\n+ */\n+int\n+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)\n+{\n+\tint i;\n+\n+\tif (fd == -1)\n+\t\treturn -1;\n+\n+\t/* Find a free slot in the list. */\n+\ti = fdset_find_free_slot(pfdset);\n+\tif (i == -1)\n+\t\treturn -2;\n+\n+\tfdset_add_fd(pfdset, i, fd, rcb, wcb, dat);\n+\n+\treturn 0;\n+}\n+\n+/**\n+ *  Unregister the fd from the fdset.\n+ */\n+void\n+fdset_del(struct fdset *pfdset, int fd)\n+{\n+\tint i;\n+\n+\ti = fdset_find_fd(pfdset, fd);\n+\tif (i != -1) {\n+\t\tpfdset->fd[i].fd = -1;\n+\t}\n+}\n+\n+\n+void\n+fdset_event_dispatch(struct fdset *pfdset)\n+{\n+\tfd_set rfds,wfds;\n+\tint i, maxfds;\n+\tstruct fdentry *pfdentry;\n+\tint num = MAX_FDS;\n+\n+\tif (pfdset == NULL)\n+\t\treturn;\n+\twhile (1) {\n+\t\tFD_ZERO(&rfds);\n+\t\tFD_ZERO(&wfds);\n+\t\tmaxfds = fdset_fill(&rfds, &wfds, pfdset);\n+\t\t/* fd management runs in one thread */\n+\t\tif (maxfds == -1) {\n+\t\t\treturn;\n+\t\t}\n+\n+\t\tselect(maxfds + 1, &rfds, &wfds, NULL, NULL);\n+\n+\t\tfor (i = 0; i < num; i++) {\n+\t\t\tpfdentry = &pfdset->fd[i];\n+\t\t\tif (FD_ISSET(pfdentry->fd, &rfds)) \n+\t\t\t\tpfdentry->rcb(pfdentry->fd, pfdentry->dat);\n+\t\t\tif (FD_ISSET(pfdentry->fd, &wfds))\n+\t\t\t\tpfdentry->wcb(pfdentry->fd, pfdentry->dat);\n+\t\t}\n+\t\t\n+\t}\n+}\ndiff --git a/lib/librte_vhost/vhost-user/fd_man.h b/lib/librte_vhost/vhost-user/fd_man.h\nnew file mode 100644\nindex 0000000..57cc81d\n--- /dev/null\n+++ b/lib/librte_vhost/vhost-user/fd_man.h\n@@ -0,0 +1,31 @@\n+#ifndef _FD_MAN_H_\n+#define _FD_MAN_H_\n+#include <stdint.h>\n+\n+#define MAX_FDS 1024\n+\n+typedef void (*fd_cb)(int fd, uint64_t dat);\n+\n+struct fdentry {\n+\tint fd; /* -1 indicates this entry is empty */\n+\tfd_cb rcb; /* callback when this fd is readable. */\n+\tfd_cb wcb; /* callback when this fd is writeable.*/\n+\tuint64_t dat;\t/* fd context */\n+};\n+\n+struct fdset {\n+\tstruct fdentry fd[MAX_FDS];\n+\tint num;\t\n+};\n+\n+\n+void fdset_init(struct fdset *pfdset);\n+\n+int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb,\n+\tfd_cb wcb, uint64_t ctx);\n+\n+void fdset_del(struct fdset *pfdset, int fd);\n+\n+void fdset_event_dispatch(struct fdset *pfdset);\n+\n+#endif\ndiff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c b/lib/librte_vhost/vhost-user/vhost-net-user.c\nnew file mode 100644\nindex 0000000..34450f4\n--- /dev/null\n+++ b/lib/librte_vhost/vhost-user/vhost-net-user.c\n@@ -0,0 +1,417 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#include <stdint.h>\n+#include <stdio.h>\n+#include <limits.h>\n+#include <stdlib.h>\n+#include <unistd.h>\n+#include <string.h>\n+#include <sys/types.h>\n+#include <sys/socket.h>\n+#include <sys/un.h>\n+#include <errno.h>\n+\n+#include <rte_log.h>\n+#include <rte_virtio_net.h>\n+\n+#include \"fd_man.h\"\n+#include \"vhost-net-user.h\"\n+#include \"vhost-net.h\"\n+#include \"virtio-net-user.h\"\n+\n+static void vserver_new_vq_conn(int fd, uint64_t data);\n+static void vserver_message_handler(int fd, uint64_t dat);\n+const struct vhost_net_device_ops *ops;\n+\n+static struct vhost_server *g_vhost_server;\n+\n+static const char *vhost_message_str[VHOST_USER_MAX] =\n+{\n+\t[VHOST_USER_NONE] = \"VHOST_USER_NONE\",\n+\t[VHOST_USER_GET_FEATURES] = \"VHOST_USER_GET_FEATURES\",\n+\t[VHOST_USER_SET_FEATURES] = \"VHOST_USER_SET_FEATURES\",\n+\t[VHOST_USER_SET_OWNER] = \"VHOST_USER_SET_OWNER\",\n+\t[VHOST_USER_RESET_OWNER] = \"VHOST_USER_RESET_OWNER\",\n+\t[VHOST_USER_SET_MEM_TABLE] = \"VHOST_USER_SET_MEM_TABLE\",\n+\t[VHOST_USER_SET_LOG_BASE] = \"VHOST_USER_SET_LOG_BASE\",\n+\t[VHOST_USER_SET_LOG_FD] = \"VHOST_USER_SET_LOG_FD\",\n+\t[VHOST_USER_SET_VRING_NUM] = \"VHOST_USER_SET_VRING_NUM\",\n+\t[VHOST_USER_SET_VRING_ADDR] = \"VHOST_USER_SET_VRING_ADDR\",\n+\t[VHOST_USER_SET_VRING_BASE] = \"VHOST_USER_SET_VRING_BASE\",\n+\t[VHOST_USER_GET_VRING_BASE] = \"VHOST_USER_GET_VRING_BASE\",\n+\t[VHOST_USER_SET_VRING_KICK] = \"VHOST_USER_SET_VRING_KICK\",\n+\t[VHOST_USER_SET_VRING_CALL] = \"VHOST_USER_SET_VRING_CALL\",\n+\t[VHOST_USER_SET_VRING_ERR]  = \"VHOST_USER_SET_VRING_ERR\"\n+};\n+\n+/**\n+ * Create a unix domain socket and bind to path.\n+ * @return\n+ *  socket fd or -1 on failure\n+ */\n+static int\n+uds_socket(const char *path)\n+{\n+\tstruct sockaddr_un un;\n+\tint sockfd;\n+\tint ret;\n+\n+\tif (path == NULL)\n+\t\treturn -1;\n+\n+\tsockfd = socket(AF_UNIX, SOCK_STREAM, 0);\n+\tif (sockfd < 0)\n+\t\treturn -1;\n+\tRTE_LOG(INFO, VHOST_CONFIG, \"socket created, fd:%d\\n\", sockfd);\n+\n+\tmemset(&un, 0, sizeof(un));\n+\tun.sun_family = AF_UNIX;\n+\tsnprintf(un.sun_path, sizeof(un.sun_path), \"%s\", path);\n+\tret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));\n+\tif (ret == -1)\n+\t\tgoto err;\n+\tRTE_LOG(INFO, VHOST_CONFIG, \"bind to %s\\n\", path);\n+\n+\tret = listen(sockfd, 1);\n+\tif (ret == -1)\n+\t\tgoto err;\n+\t\n+\treturn sockfd;\n+\n+err:\n+\tclose(sockfd);\n+\treturn -1;\n+}\n+\n+\n+/* return bytes# of read */\n+static int\n+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)\n+{\n+\n+\tstruct iovec  iov;\n+\tstruct msghdr msgh = { 0 };\n+\tsize_t fdsize = fd_num * sizeof(int);\n+\tchar control[CMSG_SPACE(fdsize)];\n+\tstruct cmsghdr *cmsg;\n+\tint ret;\n+\n+\tiov.iov_base = buf;\n+\tiov.iov_len  = buflen;\n+\t\n+\tmsgh.msg_iov = &iov;\n+\tmsgh.msg_iovlen = 1;\n+\tmsgh.msg_control = control;\n+\tmsgh.msg_controllen = sizeof(control);\n+\n+\tret = recvmsg(sockfd, &msgh, 0);\n+\tif (ret <= 0) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG, \"%s failed\\n\", __func__);\n+\t\treturn ret;\n+\t}\n+\t/* ret == buflen */\n+\tif (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG, \"%s failed\\n\", __func__);\n+\t\treturn -1;\n+\t}\n+\n+\tfor (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;\n+\t\tcmsg = CMSG_NXTHDR(&msgh, cmsg)) {\n+\t\tif ( (cmsg->cmsg_level == SOL_SOCKET) && \n+\t\t\t(cmsg->cmsg_type == SCM_RIGHTS)) {\n+\t\t\tmemcpy(fds, CMSG_DATA(cmsg), fdsize);\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\treturn ret;\n+}\n+\n+static int\n+read_vhost_message(int sockfd, struct VhostUserMsg *msg)\n+{\n+\tint ret;\n+\n+\tret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, \n+\t\tmsg->fds, VHOST_MEMORY_MAX_NREGIONS);\n+\tif (ret <= 0)\n+\t\treturn ret;\n+\n+\tif (msg->size) {\n+\t\tif (msg->size > sizeof(msg->payload)) {\n+\t\t\tRTE_LOG(ERR, VHOST_CONFIG, \n+\t\t\t\t\"%s: invalid size:%d\\n\", __func__, msg->size);\n+\t\t\treturn -1;\n+\t\t}\n+\t\tret = read(sockfd, &msg->payload, msg->size);\n+\t\tif (ret == 0)\n+\t\t\treturn 0;\n+\t\tif (ret != (int)msg->size) {\n+\t\t\tprintf(\"read control message failed\\n\");\n+\t\t\treturn -1;\n+\t\t}\n+\t}\n+\n+\treturn ret; \n+}\n+\n+static int\n+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)\n+{\n+\n+\tstruct iovec iov;\n+\tstruct msghdr msgh = { 0 };\n+\tsize_t fdsize = fd_num * sizeof(int);\n+\tchar control[CMSG_SPACE(fdsize)];\n+\tstruct cmsghdr *cmsg;\n+\tint ret;\n+\n+\tiov.iov_base = buf;\n+\tiov.iov_len = buflen;\n+\tmsgh.msg_iov = &iov;\n+\tmsgh.msg_iovlen = 1;\n+\t\n+\tif (fds && fd_num > 0) {\n+\t\tmsgh.msg_control = control;\n+\t\tmsgh.msg_controllen = sizeof(control);\n+\t\tcmsg = CMSG_FIRSTHDR(&msgh);\n+\t\tcmsg->cmsg_len = CMSG_LEN(fdsize);\n+\t\tcmsg->cmsg_level = SOL_SOCKET;\n+\t\tcmsg->cmsg_type = SCM_RIGHTS;\n+\t\tmemcpy(CMSG_DATA(cmsg), fds, fdsize);\n+\t} else {\n+\t\tmsgh.msg_control = NULL;\n+\t\tmsgh.msg_controllen = 0;\n+\t}\n+\n+\tdo {\n+\t\tret = sendmsg(sockfd, &msgh, 0);\n+\t} while (ret < 0 && errno == EINTR);\n+\n+\tif (ret < 0) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,  \"sendmsg error\\n\");\n+\t\treturn -1;\n+\t}\n+\t\n+\treturn 0;\n+}\n+\n+static int\n+send_vhost_message(int sockfd, struct VhostUserMsg *msg)\n+{\n+\tint ret;\n+\n+\tmsg->flags &= ~VHOST_USER_VERSION_MASK;\n+        msg->flags |= VHOST_USER_VERSION;\n+        msg->flags |= VHOST_USER_REPLY_MASK;\t\n+\n+\tret = send_fd_message(sockfd, (char *)msg, \n+\t\tVHOST_USER_HDR_SIZE + msg->size, NULL, 0);\n+\t\n+\treturn ret;\n+}\n+\n+/* call back when there is new connection.  */\n+static void\n+vserver_new_vq_conn(int fd, uint64_t dat)\n+{\n+\tstruct vhost_server *vserver = (void *)(uintptr_t)dat;\n+\tint conn_fd;\n+\tuint32_t fh;\n+\tstruct vhost_device_ctx vdev_ctx = { 0 };\n+\n+\tconn_fd = accept(fd, NULL, NULL);\n+\tRTE_LOG(INFO, VHOST_CONFIG, \n+\t\t\"%s: new connection is %d\\n\", __func__, conn_fd);\n+\tif (conn_fd < 0)\n+\t\treturn;\n+\n+\tfh = ops->new_device(vdev_ctx);\n+\tRTE_LOG(INFO, VHOST_CONFIG, \"new device, handle is %d\\n\", fh);\n+\n+\tfdset_add(&vserver->fdset, \n+\t\tconn_fd, vserver_message_handler, NULL, fh);\t\n+}\n+\n+/* callback when there is message on the connfd */\n+static void\n+vserver_message_handler(int connfd, uint64_t dat)\n+{\n+\tstruct vhost_device_ctx ctx;\n+\tuint32_t fh = (uint32_t)dat;\n+\tstruct VhostUserMsg msg;\n+\tuint64_t features;\n+\tint ret;\n+\n+\tctx.fh = fh;\n+\tret = read_vhost_message(connfd, &msg);\n+\tif (ret < 0) {\n+\t\tprintf(\"vhost read message failed\\n\");\n+\t\n+\t\t/*TODO: cleanup */\n+\t\tclose(connfd);\n+\t\tfdset_del(&g_vhost_server->fdset, connfd);\n+\t\tops->destroy_device(ctx);\n+\n+\t\treturn;\n+\t} else if (ret == 0) {\n+\t\t/*TODO: cleanup */\n+\t\tRTE_LOG(INFO, VHOST_CONFIG, \n+\t\t\t\"vhost peer closed\\n\");\n+\t\tclose(connfd);\n+\t\tfdset_del(&g_vhost_server->fdset, connfd);\n+\t\tops->destroy_device(ctx);\n+\n+\t\treturn;\n+\t}\n+\tif (msg.request > VHOST_USER_MAX) {\n+\t\t/*TODO: cleanup */\n+\t\tRTE_LOG(INFO, VHOST_CONFIG, \n+\t\t\t\"vhost read incorrect message\\n\");\n+\t\tclose(connfd);\n+\t\tfdset_del(&g_vhost_server->fdset, connfd);\n+\n+\t\treturn;\n+\t}\n+\n+\tRTE_LOG(INFO, VHOST_CONFIG, \"read message %s\\n\",\n+\t\tvhost_message_str[msg.request]);\n+\tswitch (msg.request) {\n+\tcase VHOST_USER_GET_FEATURES:\n+\t\tret = ops->get_features(ctx, &features);\n+\t\tmsg.payload.u64 = ret;\n+\t\tmsg.size = sizeof(msg.payload.u64);\n+\t\tsend_vhost_message(connfd, &msg);\n+\t\tbreak;\n+\tcase VHOST_USER_SET_FEATURES:\n+\t\tops->set_features(ctx, &features);\n+\t\tbreak;\n+\n+\tcase VHOST_USER_SET_OWNER:\n+\t\tops->set_owner(ctx);\n+\t\tbreak;\n+\tcase VHOST_USER_RESET_OWNER:\n+\t\tops->reset_owner(ctx);\n+\t\tbreak;\n+\n+\tcase VHOST_USER_SET_MEM_TABLE:\n+\t\tuser_set_mem_table(ctx, &msg);\n+\t\tbreak;\n+\n+\tcase VHOST_USER_SET_LOG_BASE:\n+\tcase VHOST_USER_SET_LOG_FD:\n+\t\tRTE_LOG(INFO, VHOST_CONFIG, \"not implemented.\\n\");\n+\t\tbreak;\n+\n+\tcase VHOST_USER_SET_VRING_NUM:\n+\t\tops->set_vring_num(ctx, &msg.payload.state);\n+\t\tbreak;\n+\tcase VHOST_USER_SET_VRING_ADDR:\n+\t\tops->set_vring_addr(ctx, &msg.payload.addr);\n+\t\tbreak;\n+\tcase VHOST_USER_SET_VRING_BASE:\n+\t\tops->set_vring_base(ctx, &msg.payload.state);\n+\t\tbreak;\n+\n+\tcase VHOST_USER_GET_VRING_BASE:\n+\t\tret = ops->get_vring_base(ctx, msg.payload.state.index,\n+\t\t\t&msg.payload.state);\n+\t\tmsg.size = sizeof(msg.payload.state);\n+\t\tsend_vhost_message(connfd, &msg);\n+\t\tbreak;\n+\n+\tcase VHOST_USER_SET_VRING_KICK:\n+\t\tuser_set_vring_kick(ctx, &msg);\n+\t\tbreak;\n+\tcase VHOST_USER_SET_VRING_CALL:\n+\t\tuser_set_vring_call(ctx, &msg);\n+\t\tbreak;\n+\n+\tcase VHOST_USER_SET_VRING_ERR:\n+\t\tRTE_LOG(INFO, VHOST_CONFIG, \"not implemented\\n\");\n+\t\tbreak;\n+\n+\tdefault:\n+\t\tbreak;\n+\t\n+\t}\n+}\n+\n+\n+/**\n+ * Creates and initialise the vhost server.\n+ */\n+int\n+rte_vhost_driver_register(const char *path)\n+{\n+\n+\tstruct vhost_server *vserver;\n+\n+\tif (g_vhost_server != NULL)\n+\t\treturn -1;\n+\n+\tvserver = calloc(sizeof(struct vhost_server), 1);\n+\t/*TODO: all allocation is through DPDK memory allocation */\n+\tif (vserver == NULL)\n+\t\treturn -1;\n+\n+\tfdset_init(&vserver->fdset);\n+\n+\tunlink(path);\n+\n+\tvserver->listenfd = uds_socket(path);\n+\tif (vserver->listenfd < 0) {\n+\t\tfree(vserver);\n+\t\treturn -1;\n+\t}\n+\tvserver->path = path;\n+\n+\tfdset_add(&vserver->fdset, vserver->listenfd,\n+\t\t\tvserver_new_vq_conn, NULL,\n+\t\t\t(uint64_t)(uintptr_t)vserver);\n+\n+\tops = get_virtio_net_callbacks();\n+\n+\tg_vhost_server = vserver;\n+\n+\treturn 0;\n+}\n+\n+\n+int\n+rte_vhost_driver_session_start(void)\n+{\n+\tfdset_event_dispatch(&g_vhost_server->fdset);\n+\treturn 0;\n+}\n+\ndiff --git a/lib/librte_vhost/vhost-user/vhost-net-user.h b/lib/librte_vhost/vhost-user/vhost-net-user.h\nnew file mode 100644\nindex 0000000..c9df9fa\n--- /dev/null\n+++ b/lib/librte_vhost/vhost-user/vhost-net-user.h\n@@ -0,0 +1,74 @@\n+#ifndef _VHOST_NET_USER_H\n+#define _VHOST_NET_USER_H\n+#include <stdint.h>\n+#include <linux/vhost.h>\n+\n+#include \"fd_man.h\"\n+\n+struct vhost_server {\n+\tconst char *path; /**< The path the uds is bind to. */\n+\tint listenfd;     /**< The listener sockfd. */\n+\tstruct fdset fdset; /**< The fd list this vhost server manages. */\n+};\n+\n+/*********** FROM hw/virtio/vhost-user.c *************************************/\n+\n+#define VHOST_MEMORY_MAX_NREGIONS    8\n+\n+typedef enum VhostUserRequest {\n+    VHOST_USER_NONE = 0,\n+    VHOST_USER_GET_FEATURES = 1,\n+    VHOST_USER_SET_FEATURES = 2,\n+    VHOST_USER_SET_OWNER = 3,\n+    VHOST_USER_RESET_OWNER = 4,\n+    VHOST_USER_SET_MEM_TABLE = 5,\n+    VHOST_USER_SET_LOG_BASE = 6,\n+    VHOST_USER_SET_LOG_FD = 7,\n+    VHOST_USER_SET_VRING_NUM = 8,\n+    VHOST_USER_SET_VRING_ADDR = 9,\n+    VHOST_USER_SET_VRING_BASE = 10,\n+    VHOST_USER_GET_VRING_BASE = 11,\n+    VHOST_USER_SET_VRING_KICK = 12,\n+    VHOST_USER_SET_VRING_CALL = 13,\n+    VHOST_USER_SET_VRING_ERR = 14,\n+    VHOST_USER_MAX\n+} VhostUserRequest;\n+\n+typedef struct VhostUserMemoryRegion {\n+    uint64_t guest_phys_addr;\n+    uint64_t memory_size;\n+    uint64_t userspace_addr;\n+    uint64_t mmap_offset;\n+} VhostUserMemoryRegion;\n+\n+typedef struct VhostUserMemory {\n+    uint32_t nregions;\n+    uint32_t padding;\n+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];\n+} VhostUserMemory;\n+\n+typedef struct VhostUserMsg {\n+    VhostUserRequest request;\n+\n+#define VHOST_USER_VERSION_MASK     (0x3)\n+#define VHOST_USER_REPLY_MASK       (0x1 << 2)\n+    uint32_t flags;\n+    uint32_t size; /* the following payload size */\n+    union {\n+#define VHOST_USER_VRING_IDX_MASK   (0xff)\n+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)\n+        uint64_t u64;\n+        struct vhost_vring_state state;\n+        struct vhost_vring_addr addr;\n+        VhostUserMemory memory;\n+    } payload;\n+     int fds[VHOST_MEMORY_MAX_NREGIONS];\n+} __attribute__((packed)) VhostUserMsg;\n+\n+#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)\n+\n+/* The version of the protocol we support */\n+#define VHOST_USER_VERSION    (0x1)\n+\n+/*****************************************************************************/\n+#endif\ndiff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c b/lib/librte_vhost/vhost-user/virtio-net-user.c\nnew file mode 100644\nindex 0000000..f38e6cc\n--- /dev/null\n+++ b/lib/librte_vhost/vhost-user/virtio-net-user.c\n@@ -0,0 +1,208 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#include <stdint.h>\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <unistd.h>\n+#include <sys/mman.h>\n+\n+#include <rte_log.h>\n+\n+#include \"virtio-net-user.h\"\n+#include \"vhost-net-user.h\"\n+#include \"vhost-net.h\"\n+\n+extern const struct vhost_net_device_ops *ops;\n+\n+#if 0\n+int\n+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)\n+{\n+\tunsigned int idx;\n+\tstruct VhostUserMemory memory = pmsg->payload.memory;\n+\tstruct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];\n+\tuint64_t mapped_address, base_address = 0, mem_size = 0;\n+\n+\tfor (idx = 0; idx < memory.nregions; idx++) {\n+\t\tif (memory.regions[idx].guest_phys_addr == 0)\n+\t\t\tbase_address = memory.regions[idx].userspace_addr;\n+\t}\n+\tif (base_address == 0) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"couldn't find the mem region whose gpa is 0.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tfor (idx = 0; idx < memory.nregions;  idx++) {\n+\t\tuint64_t size = memory.regions[idx].userspace_addr - \n+\t\t\tbase_address + memory.regions[idx].memory_size;\n+\t\tif (mem_size < size)\n+\t\t\tmem_size = size;\n+\t}\n+\n+\t/*\n+\t * here we assume qemu will map only one file for memory allocation,\n+\t * we only use fds[0] with offset 0.\n+\t */\n+\tmapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size, \n+\t\tPROT_READ | PROT_WRITE, MAP_SHARED, pmsg->fds[0], 0);\n+\n+\tif (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG, \" mmap qemu guest failed.\\n\");\n+\t\treturn -1;\n+\t}\n+\t\t\t\n+\tfor (idx = 0; idx < memory.nregions; idx++) {\n+\t\tregions[idx].guest_phys_address = \n+\t\t\tmemory.regions[idx].guest_phys_addr;\n+\t\tregions[idx].guest_phys_address_end = \n+\t\t\tmemory.regions[idx].guest_phys_addr +\n+\t\t\tmemory.regions[idx].memory_size;\n+\t\tregions[idx].memory_size = memory.regions[idx].memory_size;\n+\t\tregions[idx].userspace_address = \n+\t\t\tmemory.regions[idx].userspace_addr;\n+\n+\t\tregions[idx].address_offset = mapped_address - base_address + \n+\t\t\tregions[idx].userspace_address -\n+\t\t\tregions[idx].guest_phys_address;\n+\t\tLOG_DEBUG(VHOST_CONFIG, \n+\t\t\t\"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%\"PRIu64\")\\n\",\n+\t\t\tidx,\n+\t\t\t(void *)(uintptr_t)regions[idx].guest_phys_address,\n+\t\t\t(void *)(uintptr_t)regions[idx].userspace_address,\n+\t\t\t regions[idx].memory_size);\n+\t}\n+\tops->set_mem_table(ctx, regions, memory.nregions);\n+\treturn 0;\n+}\n+\n+#else\n+\n+int\n+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)\n+{\n+\tunsigned int idx;\n+\tstruct VhostUserMemory memory = pmsg->payload.memory;\n+\tstruct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];\n+\tuint64_t mapped_address, base_address = 0;\n+\n+\tfor (idx = 0; idx < memory.nregions; idx++) {\n+\t\tif (memory.regions[idx].guest_phys_addr == 0)\n+\t\t\tbase_address = memory.regions[idx].userspace_addr;\n+\t}\n+\tif (base_address == 0) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"couldn't find the mem region whose gpa is 0.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\n+\tfor (idx = 0; idx < memory.nregions; idx++) {\n+\t\tregions[idx].guest_phys_address = \n+\t\t\tmemory.regions[idx].guest_phys_addr;\n+\t\tregions[idx].guest_phys_address_end = \n+\t\t\tmemory.regions[idx].guest_phys_addr +\n+\t\t\tmemory.regions[idx].memory_size;\n+\t\tregions[idx].memory_size = memory.regions[idx].memory_size;\n+\t\tregions[idx].userspace_address = \n+\t\t\tmemory.regions[idx].userspace_addr;\n+/*\n+\t\tmapped_address = (uint64_t)(uintptr_t)mmap(NULL, \n+\t\t\tregions[idx].memory_size, \n+\t\t\tPROT_READ | PROT_WRITE, MAP_SHARED, \n+\t\t\tpmsg->fds[idx], \n+\t\t\tmemory.regions[idx].mmap_offset);\n+*/\n+\n+/* This is ugly */\n+\t\tmapped_address = (uint64_t)(uintptr_t)mmap(NULL, \n+\t\t\tregions[idx].memory_size +\n+\t\t\t\tmemory.regions[idx].mmap_offset, \n+\t\t\tPROT_READ | PROT_WRITE, MAP_SHARED, \n+\t\t\tpmsg->fds[idx], \n+\t\t\t0);\n+\t\tprintf(\"mapped to %p\\n\", (void *)mapped_address);\n+\n+\t\tif (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {\n+\t\t\tRTE_LOG(ERR, VHOST_CONFIG, \" mmap qemu guest failed.\\n\");\n+\t\t\treturn -1;\n+\t\t}\n+\n+//\t\tprintf(\"ret=%d\\n\", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset + 0x3FFFFFFF) & ~0x3FFFFFFF));\n+//\t\tprintf(\"unaligned ret=%d\\n\", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset )  ));\n+\t\tmapped_address +=  memory.regions[idx].mmap_offset;\n+\n+\t\tregions[idx].address_offset = mapped_address -\n+\t\t\tregions[idx].guest_phys_address;\n+\t\tLOG_DEBUG(VHOST_CONFIG, \n+\t\t\t\"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%\"PRIu64\")\\n\",\n+\t\t\tidx,\n+\t\t\t(void *)(uintptr_t)regions[idx].guest_phys_address,\n+\t\t\t(void *)(uintptr_t)regions[idx].userspace_address,\n+\t\t\t regions[idx].memory_size);\n+\t}\n+\tops->set_mem_table(ctx, regions, memory.nregions);\n+\treturn 0;\n+}\n+\n+\n+\n+\n+#endif\n+\n+\n+void\n+user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)\n+{\n+\tstruct vhost_vring_file file;\n+\n+\tfile.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;\n+\tfile.fd = pmsg->fds[0];\n+\tRTE_LOG(INFO, VHOST_CONFIG, \n+\t\t\"vring call idx:%d file:%d\\n\", file.index, file.fd);\n+\tops->set_vring_call(ctx, &file);\n+}\n+\n+\n+void\n+user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)\n+{\n+\tstruct vhost_vring_file file;\n+\n+\tfile.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;\n+\tfile.fd = pmsg->fds[0];\n+\tRTE_LOG(INFO, VHOST_CONFIG, \n+\t\t\"vring kick idx:%d file:%d\\n\", file.index, file.fd);\n+\tops->set_vring_kick(ctx, &file);\n+}\ndiff --git a/lib/librte_vhost/vhost-user/virtio-net-user.h b/lib/librte_vhost/vhost-user/virtio-net-user.h\nnew file mode 100644\nindex 0000000..0969376\n--- /dev/null\n+++ b/lib/librte_vhost/vhost-user/virtio-net-user.h\n@@ -0,0 +1,11 @@\n+#ifndef _VIRTIO_NET_USER_H\n+#define _VIRTIO_NET_USER_H\n+\n+#include \"vhost-net.h\"\n+#include \"vhost-net-user.h\"\n+\n+int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);\n+void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);\n+void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);\n+\n+#endif\ndiff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c\nindex ccfd82f..8ff0301 100644\n--- a/lib/librte_vhost/vhost_rxtx.c\n+++ b/lib/librte_vhost/vhost_rxtx.c\n@@ -38,19 +38,14 @@\n #include <rte_memcpy.h>\n #include <rte_virtio_net.h>\n \n-#include \"vhost-net-cdev.h\"\n+#include \"vhost-net.h\"\n \n-#define MAX_PKT_BURST 32\n+#define VHOST_MAX_PKT_BURST 64\n+#define VHOST_MAX_MRG_PKT_BURST 64\n \n-/**\n- * This function adds buffers to the virtio devices RX virtqueue. Buffers can\n- * be received from the physical port or from another virtio device. A packet\n- * count is returned to indicate the number of packets that are succesfully\n- * added to the RX queue. This function works when mergeable is disabled.\n- */\n-static inline uint32_t __attribute__((always_inline))\n-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,\n-\tstruct rte_mbuf **pkts, uint32_t count)\n+\n+uint32_t\n+rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count)\n {\n \tstruct vhost_virtqueue *vq;\n \tstruct vring_desc *desc;\n@@ -59,26 +54,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,\n \tstruct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};\n \tuint64_t buff_addr = 0;\n \tuint64_t buff_hdr_addr = 0;\n-\tuint32_t head[MAX_PKT_BURST], packet_len = 0;\n+\tuint32_t head[VHOST_MAX_PKT_BURST], packet_len = 0;\n \tuint32_t head_idx, packet_success = 0;\n+\tuint32_t mergeable, mrg_count = 0;\n \tuint16_t avail_idx, res_cur_idx;\n \tuint16_t res_base_idx, res_end_idx;\n \tuint16_t free_entries;\n \tuint8_t success = 0;\n \n-\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_rx()\\n\", dev->device_fh);\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") %s()\\n\", dev->device_fh, __func__);\n \tif (unlikely(queue_id != VIRTIO_RXQ)) {\n \t\tLOG_DEBUG(VHOST_DATA, \"mq isn't supported in this version.\\n\");\n \t\treturn 0;\n \t}\n \n \tvq = dev->virtqueue[VIRTIO_RXQ];\n-\tcount = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;\n-\n-\t/*\n-\t * As many data cores may want access to available buffers,\n-\t * they need to be reserved.\n-\t */\n+\tcount = (count > VHOST_MAX_PKT_BURST) ? VHOST_MAX_PKT_BURST : count;\n+\t/* As many data cores may want access to available buffers, they need to be reserved. */\n \tdo {\n \t\tres_base_idx = vq->last_used_idx_res;\n \t\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n@@ -93,21 +85,25 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,\n \n \t\tres_end_idx = res_base_idx + count;\n \t\t/* vq->last_used_idx_res is atomically updated. */\n-\t\t/* TODO: Allow to disable cmpset if no concurrency in application. */\n+\t\t/* TODO: Allow to disable cmpset if no concurrency in application */\n \t\tsuccess = rte_atomic16_cmpset(&vq->last_used_idx_res,\n \t\t\t\tres_base_idx, res_end_idx);\n+\t\t/* If there is contention here and failed, try again. */\n \t} while (unlikely(success == 0));\n \tres_cur_idx = res_base_idx;\n \tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Current Index %d| End Index %d\\n\",\n-\t\t\tdev->device_fh, res_cur_idx, res_end_idx);\n+\t\t\tdev->device_fh,\n+\t\t\tres_cur_idx, res_end_idx);\n \n \t/* Prefetch available ring to retrieve indexes. */\n \trte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);\n \n+\t/* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */\n+\tmergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);\n+\n \t/* Retrieve all of the head indexes first to avoid caching issues. */\n \tfor (head_idx = 0; head_idx < count; head_idx++)\n-\t\thead[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &\n-\t\t\t\t\t(vq->size - 1)];\n+\t\thead[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];\n \n \t/*Prefetch descriptor index. */\n \trte_prefetch0(&vq->desc[head[packet_success]]);\n@@ -123,46 +119,57 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,\n \t\t/* Prefetch buffer address. */\n \t\trte_prefetch0((void *)(uintptr_t)buff_addr);\n \n-\t\t/* Copy virtio_hdr to packet and increment buffer address */\n-\t\tbuff_hdr_addr = buff_addr;\n-\t\tpacket_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;\n-\n-\t\t/*\n-\t\t * If the descriptors are chained the header and data are\n-\t\t * placed in separate buffers.\n-\t\t */\n-\t\tif (desc->flags & VRING_DESC_F_NEXT) {\n-\t\t\tdesc->len = vq->vhost_hlen;\n-\t\t\tdesc = &vq->desc[desc->next];\n-\t\t\t/* Buffer address translation. */\n-\t\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n-\t\t\tdesc->len = rte_pktmbuf_data_len(buff);\n+\t\tif (mergeable && (mrg_count != 0)) {\n+\t\t\tdesc->len = packet_len = rte_pktmbuf_data_len(buff);\n \t\t} else {\n-\t\t\tbuff_addr += vq->vhost_hlen;\n-\t\t\tdesc->len = packet_len;\n+\t\t\t/* Copy virtio_hdr to packet and increment buffer address */\n+\t\t\tbuff_hdr_addr = buff_addr;\n+\t\t\tpacket_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;\n+\n+\t\t\t/*\n+\t\t\t * If the descriptors are chained the header and data are placed in\n+\t\t\t * separate buffers.\n+\t\t\t */\n+\t\t\tif (desc->flags & VRING_DESC_F_NEXT) {\n+\t\t\t\tdesc->len = vq->vhost_hlen;\n+\t\t\t\tdesc = &vq->desc[desc->next];\n+\t\t\t\t/* Buffer address translation. */\n+\t\t\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n+\t\t\t\tdesc->len = rte_pktmbuf_data_len(buff);\n+\t\t\t} else {\n+\t\t\t\tbuff_addr += vq->vhost_hlen;\n+\t\t\t\tdesc->len = packet_len;\n+\t\t\t}\n \t\t}\n \n+\t\tVHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, rte_pktmbuf_data_len(buff), 0);\n+\n \t\t/* Update used ring with desc information */\n-\t\tvq->used->ring[res_cur_idx & (vq->size - 1)].id =\n-\t\t\t\t\t\t\thead[packet_success];\n+\t\tvq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];\n \t\tvq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;\n \n \t\t/* Copy mbuf data to buffer */\n-\t\t/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */\n-\t\trte_memcpy((void *)(uintptr_t)buff_addr,\n-\t\t\trte_pktmbuf_mtod(buff, const void *),\n-\t\t\trte_pktmbuf_data_len(buff));\n-\t\tPRINT_PACKET(dev, (uintptr_t)buff_addr,\n-\t\t\trte_pktmbuf_data_len(buff), 0);\n+\t\t/* TODO fixme for sg mbuf and the case that desc couldn't hold the mbuf data */\n+\t\trte_memcpy((void *)(uintptr_t)buff_addr, (const void *)buff->pkt.data, rte_pktmbuf_data_len(buff));\n \n \t\tres_cur_idx++;\n \t\tpacket_success++;\n \n-\t\trte_memcpy((void *)(uintptr_t)buff_hdr_addr,\n-\t\t\t(const void *)&virtio_hdr, vq->vhost_hlen);\n-\n-\t\tPRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);\n-\n+\t\t/* If mergeable is disabled then a header is required per buffer. */\n+\t\tif (!mergeable) {\n+\t\t\trte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);\n+\t\t\tVHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);\n+\t\t} else {\n+\t\t\tmrg_count++;\n+\t\t\t/* Merge buffer can only handle so many buffers at a time. Tell the guest if this limit is reached. */\n+\t\t\tif ((mrg_count == VHOST_MAX_MRG_PKT_BURST) || (res_cur_idx == res_end_idx)) {\n+\t\t\t\tvirtio_hdr.num_buffers = mrg_count;\n+\t\t\t\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") RX: Num merge buffers %d\\n\", dev->device_fh, virtio_hdr.num_buffers);\n+\t\t\t\trte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);\n+\t\t\t\tVHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);\n+\t\t\t\tmrg_count = 0;\n+\t\t\t}\n+\t\t}\n \t\tif (res_cur_idx < res_end_idx) {\n \t\t\t/* Prefetch descriptor index. */\n \t\t\trte_prefetch0(&vq->desc[head[packet_success]]);\n@@ -184,357 +191,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,\n \treturn count;\n }\n \n-static inline uint32_t __attribute__((always_inline))\n-copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,\n-\tuint16_t res_end_idx, struct rte_mbuf *pkt)\n-{\n-\tuint32_t vec_idx = 0;\n-\tuint32_t entry_success = 0;\n-\tstruct vhost_virtqueue *vq;\n-\t/* The virtio_hdr is initialised to 0. */\n-\tstruct virtio_net_hdr_mrg_rxbuf virtio_hdr = {\n-\t\t{0, 0, 0, 0, 0, 0}, 0};\n-\tuint16_t cur_idx = res_base_idx;\n-\tuint64_t vb_addr = 0;\n-\tuint64_t vb_hdr_addr = 0;\n-\tuint32_t seg_offset = 0;\n-\tuint32_t vb_offset = 0;\n-\tuint32_t seg_avail;\n-\tuint32_t vb_avail;\n-\tuint32_t cpy_len, entry_len;\n-\n-\tif (pkt == NULL)\n-\t\treturn 0;\n-\n-\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Current Index %d| \"\n-\t\t\"End Index %d\\n\",\n-\t\tdev->device_fh, cur_idx, res_end_idx);\n-\n-\t/*\n-\t * Convert from gpa to vva\n-\t * (guest physical addr -> vhost virtual addr)\n-\t */\n-\tvq = dev->virtqueue[VIRTIO_RXQ];\n-\tvb_addr =\n-\t\tgpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);\n-\tvb_hdr_addr = vb_addr;\n-\n-\t/* Prefetch buffer address. */\n-\trte_prefetch0((void *)(uintptr_t)vb_addr);\n-\n-\tvirtio_hdr.num_buffers = res_end_idx - res_base_idx;\n-\n-\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") RX: Num merge buffers %d\\n\",\n-\t\tdev->device_fh, virtio_hdr.num_buffers);\n \n-\trte_memcpy((void *)(uintptr_t)vb_hdr_addr,\n-\t\t(const void *)&virtio_hdr, vq->vhost_hlen);\n-\n-\tPRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);\n-\n-\tseg_avail = rte_pktmbuf_data_len(pkt);\n-\tvb_offset = vq->vhost_hlen;\n-\tvb_avail =\n-\t\tvq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;\n-\n-\tentry_len = vq->vhost_hlen;\n-\n-\tif (vb_avail == 0) {\n-\t\tuint32_t desc_idx =\n-\t\t\tvq->buf_vec[vec_idx].desc_idx;\n-\t\tvq->desc[desc_idx].len = vq->vhost_hlen;\n-\n-\t\tif ((vq->desc[desc_idx].flags\n-\t\t\t& VRING_DESC_F_NEXT) == 0) {\n-\t\t\t/* Update used ring with desc information */\n-\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].id\n-\t\t\t\t= vq->buf_vec[vec_idx].desc_idx;\n-\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].len\n-\t\t\t\t= entry_len;\n-\n-\t\t\tentry_len = 0;\n-\t\t\tcur_idx++;\n-\t\t\tentry_success++;\n-\t\t}\n-\n-\t\tvec_idx++;\n-\t\tvb_addr =\n-\t\t\tgpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);\n-\n-\t\t/* Prefetch buffer address. */\n-\t\trte_prefetch0((void *)(uintptr_t)vb_addr);\n-\t\tvb_offset = 0;\n-\t\tvb_avail = vq->buf_vec[vec_idx].buf_len;\n-\t}\n-\n-\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n-\n-\twhile (cpy_len > 0) {\n-\t\t/* Copy mbuf data to vring buffer */\n-\t\trte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),\n-\t\t\t(const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),\n-\t\t\tcpy_len);\n-\n-\t\tPRINT_PACKET(dev,\n-\t\t\t(uintptr_t)(vb_addr + vb_offset),\n-\t\t\tcpy_len, 0);\n-\n-\t\tseg_offset += cpy_len;\n-\t\tvb_offset += cpy_len;\n-\t\tseg_avail -= cpy_len;\n-\t\tvb_avail -= cpy_len;\n-\t\tentry_len += cpy_len;\n-\n-\t\tif (seg_avail != 0) {\n-\t\t\t/*\n-\t\t\t * The virtio buffer in this vring\n-\t\t\t * entry reach to its end.\n-\t\t\t * But the segment doesn't complete.\n-\t\t\t */\n-\t\t\tif ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &\n-\t\t\t\tVRING_DESC_F_NEXT) == 0) {\n-\t\t\t\t/* Update used ring with desc information */\n-\t\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].id\n-\t\t\t\t\t= vq->buf_vec[vec_idx].desc_idx;\n-\t\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].len\n-\t\t\t\t\t= entry_len;\n-\t\t\t\tentry_len = 0;\n-\t\t\t\tcur_idx++;\n-\t\t\t\tentry_success++;\n-\t\t\t}\n-\n-\t\t\tvec_idx++;\n-\t\t\tvb_addr = gpa_to_vva(dev,\n-\t\t\t\tvq->buf_vec[vec_idx].buf_addr);\n-\t\t\tvb_offset = 0;\n-\t\t\tvb_avail = vq->buf_vec[vec_idx].buf_len;\n-\t\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n-\t\t} else {\n-\t\t\t/*\n-\t\t\t * This current segment complete, need continue to\n-\t\t\t * check if the whole packet complete or not.\n-\t\t\t */\n-\t\t\tpkt = pkt->next;\n-\t\t\tif (pkt != NULL) {\n-\t\t\t\t/*\n-\t\t\t\t * There are more segments.\n-\t\t\t\t */\n-\t\t\t\tif (vb_avail == 0) {\n-\t\t\t\t\t/*\n-\t\t\t\t\t * This current buffer from vring is\n-\t\t\t\t\t * used up, need fetch next buffer\n-\t\t\t\t\t * from buf_vec.\n-\t\t\t\t\t */\n-\t\t\t\t\tuint32_t desc_idx =\n-\t\t\t\t\t\tvq->buf_vec[vec_idx].desc_idx;\n-\t\t\t\t\tvq->desc[desc_idx].len = vb_offset;\n-\n-\t\t\t\t\tif ((vq->desc[desc_idx].flags &\n-\t\t\t\t\t\tVRING_DESC_F_NEXT) == 0) {\n-\t\t\t\t\t\tuint16_t wrapped_idx =\n-\t\t\t\t\t\t\tcur_idx & (vq->size - 1);\n-\t\t\t\t\t\t/*\n-\t\t\t\t\t\t * Update used ring with the\n-\t\t\t\t\t\t * descriptor information\n-\t\t\t\t\t\t */\n-\t\t\t\t\t\tvq->used->ring[wrapped_idx].id\n-\t\t\t\t\t\t\t= desc_idx;\n-\t\t\t\t\t\tvq->used->ring[wrapped_idx].len\n-\t\t\t\t\t\t\t= entry_len;\n-\t\t\t\t\t\tentry_success++;\n-\t\t\t\t\t\tentry_len = 0;\n-\t\t\t\t\t\tcur_idx++;\n-\t\t\t\t\t}\n-\n-\t\t\t\t\t/* Get next buffer from buf_vec. */\n-\t\t\t\t\tvec_idx++;\n-\t\t\t\t\tvb_addr = gpa_to_vva(dev,\n-\t\t\t\t\t\tvq->buf_vec[vec_idx].buf_addr);\n-\t\t\t\t\tvb_avail =\n-\t\t\t\t\t\tvq->buf_vec[vec_idx].buf_len;\n-\t\t\t\t\tvb_offset = 0;\n-\t\t\t\t}\n-\n-\t\t\t\tseg_offset = 0;\n-\t\t\t\tseg_avail = rte_pktmbuf_data_len(pkt);\n-\t\t\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n-\t\t\t} else {\n-\t\t\t\t/*\n-\t\t\t\t * This whole packet completes.\n-\t\t\t\t */\n-\t\t\t\tuint32_t desc_idx =\n-\t\t\t\t\tvq->buf_vec[vec_idx].desc_idx;\n-\t\t\t\tvq->desc[desc_idx].len = vb_offset;\n-\n-\t\t\t\twhile (vq->desc[desc_idx].flags &\n-\t\t\t\t\tVRING_DESC_F_NEXT) {\n-\t\t\t\t\tdesc_idx = vq->desc[desc_idx].next;\n-\t\t\t\t\t vq->desc[desc_idx].len = 0;\n-\t\t\t\t}\n-\n-\t\t\t\t/* Update used ring with desc information */\n-\t\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].id\n-\t\t\t\t\t= vq->buf_vec[vec_idx].desc_idx;\n-\t\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].len\n-\t\t\t\t\t= entry_len;\n-\t\t\t\tentry_len = 0;\n-\t\t\t\tcur_idx++;\n-\t\t\t\tentry_success++;\n-\t\t\t\tseg_avail = 0;\n-\t\t\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n-\t\t\t}\n-\t\t}\n-\t}\n-\n-\treturn entry_success;\n-}\n-\n-/*\n- * This function works for mergeable RX.\n- */\n-static inline uint32_t __attribute__((always_inline))\n-virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,\n-\tstruct rte_mbuf **pkts, uint32_t count)\n+uint32_t\n+rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)\n {\n-\tstruct vhost_virtqueue *vq;\n-\tuint32_t pkt_idx = 0, entry_success = 0;\n-\tuint16_t avail_idx, res_cur_idx;\n-\tuint16_t res_base_idx, res_end_idx;\n-\tuint8_t success = 0;\n-\n-\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_merge_rx()\\n\",\n-\t\tdev->device_fh);\n-\tif (unlikely(queue_id != VIRTIO_RXQ)) {\n-\t\tLOG_DEBUG(VHOST_DATA, \"mq isn't supported in this version.\\n\");\n-\t}\n-\n-\tvq = dev->virtqueue[VIRTIO_RXQ];\n-\tcount = RTE_MIN((uint32_t)MAX_PKT_BURST, count);\n-\n-\tif (count == 0)\n-\t\treturn 0;\n-\n-\tfor (pkt_idx = 0; pkt_idx < count; pkt_idx++) {\n-\t\tuint32_t secure_len = 0;\n-\t\tuint16_t need_cnt;\n-\t\tuint32_t vec_idx = 0;\n-\t\tuint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;\n-\t\tuint16_t i, id;\n-\n-\t\tdo {\n-\t\t\t/*\n-\t\t\t * As many data cores may want access to available\n-\t\t\t * buffers, they need to be reserved.\n-\t\t\t */\n-\t\t\tres_base_idx = vq->last_used_idx_res;\n-\t\t\tres_cur_idx = res_base_idx;\n-\n-\t\t\tdo {\n-\t\t\t\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n-\t\t\t\tif (unlikely(res_cur_idx == avail_idx)) {\n-\t\t\t\t\tLOG_DEBUG(VHOST_DATA,\n-\t\t\t\t\t\t\"(%\"PRIu64\") Failed \"\n-\t\t\t\t\t\t\"to get enough desc from \"\n-\t\t\t\t\t\t\"vring\\n\",\n-\t\t\t\t\t\tdev->device_fh);\n-\t\t\t\t\treturn pkt_idx;\n-\t\t\t\t} else {\n-\t\t\t\t\tuint16_t wrapped_idx =\n-\t\t\t\t\t\t(res_cur_idx) & (vq->size - 1);\n-\t\t\t\t\tuint32_t idx =\n-\t\t\t\t\t\tvq->avail->ring[wrapped_idx];\n-\t\t\t\t\tuint8_t next_desc;\n-\n-\t\t\t\t\tdo {\n-\t\t\t\t\t\tnext_desc = 0;\n-\t\t\t\t\t\tsecure_len += vq->desc[idx].len;\n-\t\t\t\t\t\tif (vq->desc[idx].flags &\n-\t\t\t\t\t\t\tVRING_DESC_F_NEXT) {\n-\t\t\t\t\t\t\tidx = vq->desc[idx].next;\n-\t\t\t\t\t\t\tnext_desc = 1;\n-\t\t\t\t\t\t}\n-\t\t\t\t\t} while (next_desc);\n-\n-\t\t\t\t\tres_cur_idx++;\n-\t\t\t\t}\n-\t\t\t} while (pkt_len > secure_len);\n-\n-\t\t\t/* vq->last_used_idx_res is atomically updated. */\n-\t\t\tsuccess = rte_atomic16_cmpset(&vq->last_used_idx_res,\n-\t\t\t\t\t\t\tres_base_idx,\n-\t\t\t\t\t\t\tres_cur_idx);\n-\t\t} while (success == 0);\n-\n-\t\tid = res_base_idx;\n-\t\tneed_cnt = res_cur_idx - res_base_idx;\n-\n-\t\tfor (i = 0; i < need_cnt; i++, id++) {\n-\t\t\tuint16_t wrapped_idx = id & (vq->size - 1);\n-\t\t\tuint32_t idx = vq->avail->ring[wrapped_idx];\n-\t\t\tuint8_t next_desc;\n-\t\t\tdo {\n-\t\t\t\tnext_desc = 0;\n-\t\t\t\tvq->buf_vec[vec_idx].buf_addr =\n-\t\t\t\t\tvq->desc[idx].addr;\n-\t\t\t\tvq->buf_vec[vec_idx].buf_len =\n-\t\t\t\t\tvq->desc[idx].len;\n-\t\t\t\tvq->buf_vec[vec_idx].desc_idx = idx;\n-\t\t\t\tvec_idx++;\n-\n-\t\t\t\tif (vq->desc[idx].flags & VRING_DESC_F_NEXT) {\n-\t\t\t\t\tidx = vq->desc[idx].next;\n-\t\t\t\t\tnext_desc = 1;\n-\t\t\t\t}\n-\t\t\t} while (next_desc);\n-\t\t}\n-\n-\t\tres_end_idx = res_cur_idx;\n-\n-\t\tentry_success = copy_from_mbuf_to_vring(dev, res_base_idx,\n-\t\t\tres_end_idx, pkts[pkt_idx]);\n-\n-\t\trte_compiler_barrier();\n-\n-\t\t/*\n-\t\t * Wait until it's our turn to add our buffer\n-\t\t * to the used ring.\n-\t\t */\n-\t\twhile (unlikely(vq->last_used_idx != res_base_idx))\n-\t\t\trte_pause();\n-\n-\t\t*(volatile uint16_t *)&vq->used->idx += entry_success;\n-\t\tvq->last_used_idx = res_end_idx;\n-\n-\t\t/* Kick the guest if necessary. */\n-\t\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n-\t\t\teventfd_write((int)vq->kickfd, 1);\n-\t}\n-\n-\treturn count;\n-}\n-\n-uint16_t\n-rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,\n-\tstruct rte_mbuf **pkts, uint16_t count)\n-{\n-\tif (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))\n-\t\treturn virtio_dev_merge_rx(dev, queue_id, pkts, count);\n-\telse\n-\t\treturn virtio_dev_rx(dev, queue_id, pkts, count);\n-}\n-\n-uint16_t\n-rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,\n-\tstruct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)\n-{\n-\tstruct rte_mbuf *m, *prev;\n+\tstruct rte_mbuf *mbuf;\n \tstruct vhost_virtqueue *vq;\n \tstruct vring_desc *desc;\n-\tuint64_t vb_addr = 0;\n-\tuint32_t head[MAX_PKT_BURST];\n+\tuint64_t buff_addr = 0;\n+\tuint32_t head[VHOST_MAX_PKT_BURST];\n \tuint32_t used_idx;\n \tuint32_t i;\n-\tuint16_t free_entries, entry_success = 0;\n+\tuint16_t free_entries, packet_success = 0;\n \tuint16_t avail_idx;\n \n \tif (unlikely(queue_id != VIRTIO_TXQ)) {\n@@ -549,8 +217,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,\n \tif (vq->last_used_idx == avail_idx)\n \t\treturn 0;\n \n-\tLOG_DEBUG(VHOST_DATA, \"%s (%\"PRIu64\")\\n\", __func__,\n-\t\tdev->device_fh);\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") %s(%d->%d)\\n\", \n+\t\tdev->device_fh, __func__, vq->last_used_idx, avail_idx);\n \n \t/* Prefetch available ring to retrieve head indexes. */\n \trte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);\n@@ -558,173 +226,68 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,\n \t/*get the number of free entries in the ring*/\n \tfree_entries = (avail_idx - vq->last_used_idx);\n \n-\tfree_entries = RTE_MIN(free_entries, count);\n+\tif (free_entries > count)\n+\t\tfree_entries = count;\n \t/* Limit to MAX_PKT_BURST. */\n-\tfree_entries = RTE_MIN(free_entries, MAX_PKT_BURST);\n+\tif (free_entries > VHOST_MAX_PKT_BURST)\n+\t\tfree_entries = VHOST_MAX_PKT_BURST;\n \n-\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Buffers available %d\\n\",\n-\t\t\tdev->device_fh, free_entries);\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Buffers available %d\\n\", dev->device_fh, free_entries);\n \t/* Retrieve all of the head indexes first to avoid caching issues. */\n \tfor (i = 0; i < free_entries; i++)\n \t\thead[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];\n \n \t/* Prefetch descriptor index. */\n-\trte_prefetch0(&vq->desc[head[entry_success]]);\n+\trte_prefetch0(&vq->desc[head[packet_success]]);\n \trte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);\n \n-\twhile (entry_success < free_entries) {\n-\t\tuint32_t vb_avail, vb_offset;\n-\t\tuint32_t seg_avail, seg_offset;\n-\t\tuint32_t cpy_len;\n-\t\tuint32_t seg_num = 0;\n-\t\tstruct rte_mbuf *cur;\n-\t\tuint8_t alloc_err = 0;\n-\n-\t\tdesc = &vq->desc[head[entry_success]];\n+\twhile (packet_success < free_entries) {\n+\t\tdesc = &vq->desc[head[packet_success]];\n \n \t\t/* Discard first buffer as it is the virtio header */\n \t\tdesc = &vq->desc[desc->next];\n \n \t\t/* Buffer address translation. */\n-\t\tvb_addr = gpa_to_vva(dev, desc->addr);\n+\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n \t\t/* Prefetch buffer address. */\n-\t\trte_prefetch0((void *)(uintptr_t)vb_addr);\n+\t\trte_prefetch0((void *)(uintptr_t)buff_addr);\n \n \t\tused_idx = vq->last_used_idx & (vq->size - 1);\n \n-\t\tif (entry_success < (free_entries - 1)) {\n+\t\tif (packet_success < (free_entries - 1)) {\n \t\t\t/* Prefetch descriptor index. */\n-\t\t\trte_prefetch0(&vq->desc[head[entry_success+1]]);\n+\t\t\trte_prefetch0(&vq->desc[head[packet_success+1]]);\n \t\t\trte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);\n \t\t}\n \n \t\t/* Update used index buffer information. */\n-\t\tvq->used->ring[used_idx].id = head[entry_success];\n+\t\tvq->used->ring[used_idx].id = head[packet_success];\n \t\tvq->used->ring[used_idx].len = 0;\n \n-\t\tvb_offset = 0;\n-\t\tvb_avail = desc->len;\n-\t\t/* Allocate an mbuf and populate the structure. */\n-\t\tm = rte_pktmbuf_alloc(mbuf_pool);\n-\t\tif (unlikely(m == NULL)) {\n-\t\t\tRTE_LOG(ERR, VHOST_DATA,\n-\t\t\t\t\"Failed to allocate memory for mbuf.\\n\");\n-\t\t\treturn entry_success;\n+\t\tmbuf = rte_pktmbuf_alloc(mbuf_pool);\n+\t\tif (unlikely(mbuf == NULL)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA, \"Failed to allocate memory for mbuf.\\n\");\n+\t\t\treturn packet_success;\n \t\t}\n-\t\tseg_offset = 0;\n-\t\tseg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;\n-\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n-\n-\t\tPRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);\n-\n-\t\tseg_num++;\n-\t\tcur = m;\n-\t\tprev = m;\n-\t\twhile (cpy_len != 0) {\n-\t\t\trte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),\n-\t\t\t\t(void *)((uintptr_t)(vb_addr + vb_offset)),\n-\t\t\t\tcpy_len);\n-\n-\t\t\tseg_offset += cpy_len;\n-\t\t\tvb_offset += cpy_len;\n-\t\t\tvb_avail -= cpy_len;\n-\t\t\tseg_avail -= cpy_len;\n-\n-\t\t\tif (vb_avail != 0) {\n-\t\t\t\t/*\n-\t\t\t\t * The segment reachs to its end,\n-\t\t\t\t * while the virtio buffer in TX vring has\n-\t\t\t\t * more data to be copied.\n-\t\t\t\t */\n-\t\t\t\tcur->data_len = seg_offset;\n-\t\t\t\tm->pkt_len += seg_offset;\n-\t\t\t\t/* Allocate mbuf and populate the structure. */\n-\t\t\t\tcur = rte_pktmbuf_alloc(mbuf_pool);\n-\t\t\t\tif (unlikely(cur == NULL)) {\n-\t\t\t\t\tRTE_LOG(ERR, VHOST_DATA, \"Failed to \"\n-\t\t\t\t\t\t\"allocate memory for mbuf.\\n\");\n-\t\t\t\t\trte_pktmbuf_free(m);\n-\t\t\t\t\talloc_err = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\t}\n-\n-\t\t\t\tseg_num++;\n-\t\t\t\tprev->next = cur;\n-\t\t\t\tprev = cur;\n-\t\t\t\tseg_offset = 0;\n-\t\t\t\tseg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;\n-\t\t\t} else {\n-\t\t\t\tif (desc->flags & VRING_DESC_F_NEXT) {\n-\t\t\t\t\t/*\n-\t\t\t\t\t * There are more virtio buffers in\n-\t\t\t\t\t * same vring entry need to be copied.\n-\t\t\t\t\t */\n-\t\t\t\t\tif (seg_avail == 0) {\n-\t\t\t\t\t\t/*\n-\t\t\t\t\t\t * The current segment hasn't\n-\t\t\t\t\t\t * room to accomodate more\n-\t\t\t\t\t\t * data.\n-\t\t\t\t\t\t */\n-\t\t\t\t\t\tcur->data_len = seg_offset;\n-\t\t\t\t\t\tm->pkt_len += seg_offset;\n-\t\t\t\t\t\t/*\n-\t\t\t\t\t\t * Allocate an mbuf and\n-\t\t\t\t\t\t * populate the structure.\n-\t\t\t\t\t\t */\n-\t\t\t\t\t\tcur = rte_pktmbuf_alloc(mbuf_pool);\n-\t\t\t\t\t\tif (unlikely(cur == NULL)) {\n-\t\t\t\t\t\t\tRTE_LOG(ERR,\n-\t\t\t\t\t\t\t\tVHOST_DATA,\n-\t\t\t\t\t\t\t\t\"Failed to \"\n-\t\t\t\t\t\t\t\t\"allocate memory \"\n-\t\t\t\t\t\t\t\t\"for mbuf\\n\");\n-\t\t\t\t\t\t\trte_pktmbuf_free(m);\n-\t\t\t\t\t\t\talloc_err = 1;\n-\t\t\t\t\t\t\tbreak;\n-\t\t\t\t\t\t}\n-\t\t\t\t\t\tseg_num++;\n-\t\t\t\t\t\tprev->next = cur;\n-\t\t\t\t\t\tprev = cur;\n-\t\t\t\t\t\tseg_offset = 0;\n-\t\t\t\t\t\tseg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;\n-\t\t\t\t\t}\n-\n-\t\t\t\t\tdesc = &vq->desc[desc->next];\n-\n-\t\t\t\t\t/* Buffer address translation. */\n-\t\t\t\t\tvb_addr = gpa_to_vva(dev, desc->addr);\n-\t\t\t\t\t/* Prefetch buffer address. */\n-\t\t\t\t\trte_prefetch0((void *)(uintptr_t)vb_addr);\n-\t\t\t\t\tvb_offset = 0;\n-\t\t\t\t\tvb_avail = desc->len;\n-\n-\t\t\t\t\tPRINT_PACKET(dev, (uintptr_t)vb_addr,\n-\t\t\t\t\t\tdesc->len, 0);\n-\t\t\t\t} else {\n-\t\t\t\t\t/* The whole packet completes. */\n-\t\t\t\t\tcur->data_len = seg_offset;\n-\t\t\t\t\tm->pkt_len += seg_offset;\n-\t\t\t\t\tvb_avail = 0;\n-\t\t\t\t}\n-\t\t\t}\n+\t\tmbuf->pkt.data_len = desc->len;\n+\t\tmbuf->pkt.pkt_len  = mbuf->pkt.data_len;\n \n-\t\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n-\t\t}\n+\t\trte_memcpy((void *) mbuf->pkt.data,\n+\t\t\t(const void *) buff_addr, mbuf->pkt.data_len);\n \n-\t\tif (unlikely(alloc_err == 1))\n-\t\t\tbreak;\n+\t\tpkts[packet_success] = mbuf;\n \n-\t\tm->nb_segs = seg_num;\n+\t\tVHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);\n \n-\t\tpkts[entry_success] = m;\n \t\tvq->last_used_idx++;\n-\t\tentry_success++;\n+\t\tpacket_success++;\n \t}\n \n \trte_compiler_barrier();\n-\tvq->used->idx += entry_success;\n+\tvq->used->idx += packet_success;\n \t/* Kick guest if required. */\n \tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n \t\teventfd_write((int)vq->kickfd, 1);\n-\treturn entry_success;\n+\n+\treturn packet_success;\n }\ndiff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c\nindex 852b6d1..516e743 100644\n--- a/lib/librte_vhost/virtio-net.c\n+++ b/lib/librte_vhost/virtio-net.c\n@@ -31,17 +31,14 @@\n  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n  */\n \n-#include <dirent.h>\n-#include <fuse/cuse_lowlevel.h>\n #include <linux/vhost.h>\n #include <linux/virtio_net.h>\n #include <stddef.h>\n #include <stdint.h>\n #include <stdlib.h>\n-#include <sys/eventfd.h>\n-#include <sys/ioctl.h>\n #include <sys/mman.h>\n #include <unistd.h>\n+#include <assert.h>\n \n #include <rte_ethdev.h>\n #include <rte_log.h>\n@@ -49,10 +46,8 @@\n #include <rte_memory.h>\n #include <rte_virtio_net.h>\n \n-#include \"vhost-net-cdev.h\"\n-#include \"eventfd_link/eventfd_link.h\"\n-\n-/*\n+#include \"vhost-net.h\"\n+/**\n  * Device linked list structure for configuration.\n  */\n struct virtio_net_config_ll {\n@@ -60,38 +55,15 @@ struct virtio_net_config_ll {\n \tstruct virtio_net_config_ll *next;\t/* Next dev on linked list.*/\n };\n \n-const char eventfd_cdev[] = \"/dev/eventfd-link\";\n-\n-/* device ops to add/remove device to/from data core. */\n+/* device ops to add/remove device to data core. */\n static struct virtio_net_device_ops const *notify_ops;\n-/* root address of the linked list of managed virtio devices */\n+/* root address of the linked list in the configuration core. */\n static struct virtio_net_config_ll *ll_root;\n \n /* Features supported by this lib. */\n-#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \\\n-\t\t\t\t  (1ULL << VIRTIO_NET_F_CTRL_RX))\n+#define VHOST_SUPPORTED_FEATURES (1ULL << VIRTIO_NET_F_MRG_RXBUF)\n static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;\n \n-/* Line size for reading maps file. */\n-static const uint32_t BUFSIZE = PATH_MAX;\n-\n-/* Size of prot char array in procmap. */\n-#define PROT_SZ 5\n-\n-/* Number of elements in procmap struct. */\n-#define PROCMAP_SZ 8\n-\n-/* Structure containing information gathered from maps file. */\n-struct procmap {\n-\tuint64_t va_start;\t/* Start virtual address in file. */\n-\tuint64_t len;\t\t/* Size of file. */\n-\tuint64_t pgoff;\t\t/* Not used. */\n-\tuint32_t maj;\t\t/* Not used. */\n-\tuint32_t min;\t\t/* Not used. */\n-\tuint32_t ino;\t\t/* Not used. */\n-\tchar prot[PROT_SZ];\t/* Not used. */\n-\tchar fname[PATH_MAX];\t/* File name. */\n-};\n \n /*\n  * Converts QEMU virtual address to Vhost virtual address. This function is\n@@ -110,199 +82,15 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)\n \t\tif ((qemu_va >= region->userspace_address) &&\n \t\t\t(qemu_va <= region->userspace_address +\n \t\t\tregion->memory_size)) {\n-\t\t\tvhost_va = dev->mem->mapped_address + qemu_va -\n-\t\t\t\t\tdev->mem->base_address;\n+\t\t\tvhost_va = qemu_va +  region->guest_phys_address + \n+\t\t\t\tregion->address_offset -\n+\t\t\t\tregion->userspace_address;\n \t\t\tbreak;\n \t\t}\n \t}\n \treturn vhost_va;\n }\n \n-/*\n- * Locate the file containing QEMU's memory space and\n- * map it to our address space.\n- */\n-static int\n-host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,\n-\tpid_t pid, uint64_t addr)\n-{\n-\tstruct dirent *dptr = NULL;\n-\tstruct procmap procmap;\n-\tDIR *dp = NULL;\n-\tint fd;\n-\tint i;\n-\tchar memfile[PATH_MAX];\n-\tchar mapfile[PATH_MAX];\n-\tchar procdir[PATH_MAX];\n-\tchar resolved_path[PATH_MAX];\n-\tchar *path = NULL;\n-\tFILE *fmap;\n-\tvoid *map;\n-\tuint8_t found = 0;\n-\tchar line[BUFSIZE];\n-\tchar dlm[] = \"-   :   \";\n-\tchar *str, *sp, *in[PROCMAP_SZ];\n-\tchar *end = NULL;\n-\n-\t/* Path where mem files are located. */\n-\tsnprintf(procdir, PATH_MAX, \"/proc/%u/fd/\", pid);\n-\t/* Maps file used to locate mem file. */\n-\tsnprintf(mapfile, PATH_MAX, \"/proc/%u/maps\", pid);\n-\n-\tfmap = fopen(mapfile, \"r\");\n-\tif (fmap == NULL) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") Failed to open maps file for pid %d\\n\",\n-\t\t\tdev->device_fh, pid);\n-\t\treturn -1;\n-\t}\n-\n-\t/* Read through maps file until we find out base_address. */\n-\twhile (fgets(line, BUFSIZE, fmap) != 0) {\n-\t\tstr = line;\n-\t\terrno = 0;\n-\t\t/* Split line into fields. */\n-\t\tfor (i = 0; i < PROCMAP_SZ; i++) {\n-\t\t\tin[i] = strtok_r(str, &dlm[i], &sp);\n-\t\t\tif ((in[i] == NULL) || (errno != 0)) {\n-\t\t\t\tfclose(fmap);\n-\t\t\t\treturn -1;\n-\t\t\t}\n-\t\t\tstr = NULL;\n-\t\t}\n-\n-\t\t/* Convert/Copy each field as needed. */\n-\t\tprocmap.va_start = strtoull(in[0], &end, 16);\n-\t\tif ((in[0] == '\\0') || (end == NULL) || (*end != '\\0') ||\n-\t\t\t(errno != 0)) {\n-\t\t\tfclose(fmap);\n-\t\t\treturn -1;\n-\t\t}\n-\n-\t\tprocmap.len = strtoull(in[1], &end, 16);\n-\t\tif ((in[1] == '\\0') || (end == NULL) || (*end != '\\0') ||\n-\t\t\t(errno != 0)) {\n-\t\t\tfclose(fmap);\n-\t\t\treturn -1;\n-\t\t}\n-\n-\t\tprocmap.pgoff = strtoull(in[3], &end, 16);\n-\t\tif ((in[3] == '\\0') || (end == NULL) || (*end != '\\0') ||\n-\t\t\t(errno != 0)) {\n-\t\t\tfclose(fmap);\n-\t\t\treturn -1;\n-\t\t}\n-\n-\t\tprocmap.maj = strtoul(in[4], &end, 16);\n-\t\tif ((in[4] == '\\0') || (end == NULL) || (*end != '\\0') ||\n-\t\t\t(errno != 0)) {\n-\t\t\tfclose(fmap);\n-\t\t\treturn -1;\n-\t\t}\n-\n-\t\tprocmap.min = strtoul(in[5], &end, 16);\n-\t\tif ((in[5] == '\\0') || (end == NULL) || (*end != '\\0') ||\n-\t\t\t(errno != 0)) {\n-\t\t\tfclose(fmap);\n-\t\t\treturn -1;\n-\t\t}\n-\n-\t\tprocmap.ino = strtoul(in[6], &end, 16);\n-\t\tif ((in[6] == '\\0') || (end == NULL) || (*end != '\\0') ||\n-\t\t\t(errno != 0)) {\n-\t\t\tfclose(fmap);\n-\t\t\treturn -1;\n-\t\t}\n-\n-\t\tmemcpy(&procmap.prot, in[2], PROT_SZ);\n-\t\tmemcpy(&procmap.fname, in[7], PATH_MAX);\n-\n-\t\tif (procmap.va_start == addr) {\n-\t\t\tprocmap.len = procmap.len - procmap.va_start;\n-\t\t\tfound = 1;\n-\t\t\tbreak;\n-\t\t}\n-\t}\n-\tfclose(fmap);\n-\n-\tif (!found) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") Failed to find memory file in pid %d maps file\\n\",\n-\t\t\tdev->device_fh, pid);\n-\t\treturn -1;\n-\t}\n-\n-\t/* Find the guest memory file among the process fds. */\n-\tdp = opendir(procdir);\n-\tif (dp == NULL) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") Cannot open pid %d process directory\\n\",\n-\t\t\tdev->device_fh, pid);\n-\t\treturn -1;\n-\t}\n-\n-\tfound = 0;\n-\n-\t/* Read the fd directory contents. */\n-\twhile (NULL != (dptr = readdir(dp))) {\n-\t\tsnprintf(memfile, PATH_MAX, \"/proc/%u/fd/%s\",\n-\t\t\t\tpid, dptr->d_name);\n-\t\tpath = realpath(memfile, resolved_path);\n-\t\tif ((path == NULL) && (strlen(resolved_path) == 0)) {\n-\t\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\t\"(%\"PRIu64\") Failed to resolve fd directory\\n\",\n-\t\t\t\tdev->device_fh);\n-\t\t\tclosedir(dp);\n-\t\t\treturn -1;\n-\t\t}\n-\t\tif (strncmp(resolved_path, procmap.fname,\n-\t\t\tstrnlen(procmap.fname, PATH_MAX)) == 0) {\n-\t\t\tfound = 1;\n-\t\t\tbreak;\n-\t\t}\n-\t}\n-\n-\tclosedir(dp);\n-\n-\tif (found == 0) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") Failed to find memory file for pid %d\\n\",\n-\t\t\tdev->device_fh, pid);\n-\t\treturn -1;\n-\t}\n-\t/* Open the shared memory file and map the memory into this process. */\n-\tfd = open(memfile, O_RDWR);\n-\n-\tif (fd == -1) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") Failed to open %s for pid %d\\n\",\n-\t\t\tdev->device_fh, memfile, pid);\n-\t\treturn -1;\n-\t}\n-\n-\tmap = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,\n-\t\tMAP_POPULATE|MAP_SHARED, fd, 0);\n-\tclose(fd);\n-\n-\tif (map == MAP_FAILED) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") Error mapping the file %s for pid %d\\n\",\n-\t\t\tdev->device_fh, memfile, pid);\n-\t\treturn -1;\n-\t}\n-\n-\t/* Store the memory address and size in the device data structure */\n-\tmem->mapped_address = (uint64_t)(uintptr_t)map;\n-\tmem->mapped_size = procmap.len;\n-\n-\tLOG_DEBUG(VHOST_CONFIG,\n-\t\t\"(%\"PRIu64\") Mem File: %s->%s - Size: %llu - VA: %p\\n\",\n-\t\tdev->device_fh,\n-\t\tmemfile, resolved_path,\n-\t\t(unsigned long long)mem->mapped_size, map);\n-\n-\treturn 0;\n-}\n \n /*\n  * Retrieves an entry from the devices configuration linked list.\n@@ -376,7 +164,7 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)\n \t}\n \n }\n-\n+/*TODO dpdk alloc/free if possible */\n /*\n  * Unmap any memory, close any file descriptors and\n  * free any memory owned by a device.\n@@ -389,16 +177,17 @@ cleanup_device(struct virtio_net *dev)\n \t\tmunmap((void *)(uintptr_t)dev->mem->mapped_address,\n \t\t\t(size_t)dev->mem->mapped_size);\n \t\tfree(dev->mem);\n+\t\tdev->mem = NULL;\n \t}\n \n \t/* Close any event notifiers opened by device. */\n-\tif (dev->virtqueue[VIRTIO_RXQ]->callfd)\n+\tif (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)\n \t\tclose((int)dev->virtqueue[VIRTIO_RXQ]->callfd);\n-\tif (dev->virtqueue[VIRTIO_RXQ]->kickfd)\n+\tif (dev->virtqueue[VIRTIO_RXQ]->kickfd > 0)\n \t\tclose((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);\n-\tif (dev->virtqueue[VIRTIO_TXQ]->callfd)\n+\tif (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)\n \t\tclose((int)dev->virtqueue[VIRTIO_TXQ]->callfd);\n-\tif (dev->virtqueue[VIRTIO_TXQ]->kickfd)\n+\tif (dev->virtqueue[VIRTIO_TXQ]->kickfd > 0)\n \t\tclose((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);\n }\n \n@@ -522,8 +311,8 @@ new_device(struct vhost_device_ctx ctx)\n }\n \n /*\n- * Function is called from the CUSE release function. This function will\n- * cleanup the device and remove it from device configuration linked list.\n+ * Function is called from the CUSE release function. This function will cleanup\n+ * the device and remove it from device configuration linked list.\n  */\n static void\n destroy_device(struct vhost_device_ctx ctx)\n@@ -569,6 +358,7 @@ set_owner(struct vhost_device_ctx ctx)\n \t\treturn -1;\n \n \treturn 0;\n+\t/* TODO check ctx.fh is meaningfull here */\n }\n \n /*\n@@ -651,14 +441,12 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)\n  * This includes storing offsets used to translate buffer addresses.\n  */\n static int\n-set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,\n-\tuint32_t nregions)\n+set_mem_table(struct vhost_device_ctx ctx,\n+\tconst struct virtio_memory_regions *regions, uint32_t nregions)\n {\n \tstruct virtio_net *dev;\n-\tstruct vhost_memory_region *mem_regions;\n \tstruct virtio_memory *mem;\n-\tuint64_t size = offsetof(struct vhost_memory, regions);\n-\tuint32_t regionidx, valid_regions;\n+\tuint32_t regionidx;\n \n \tdev = get_device(ctx);\n \tif (dev == NULL)\n@@ -682,107 +470,24 @@ set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,\n \n \tmem->nregions = nregions;\n \n-\tmem_regions = (void *)(uintptr_t)\n-\t\t\t((uint64_t)(uintptr_t)mem_regions_addr + size);\n-\n \tfor (regionidx = 0; regionidx < mem->nregions; regionidx++) {\n \t\t/* Populate the region structure for each region. */\n-\t\tmem->regions[regionidx].guest_phys_address =\n-\t\t\tmem_regions[regionidx].guest_phys_addr;\n-\t\tmem->regions[regionidx].guest_phys_address_end =\n-\t\t\tmem->regions[regionidx].guest_phys_address +\n-\t\t\tmem_regions[regionidx].memory_size;\n-\t\tmem->regions[regionidx].memory_size =\n-\t\t\tmem_regions[regionidx].memory_size;\n-\t\tmem->regions[regionidx].userspace_address =\n-\t\t\tmem_regions[regionidx].userspace_addr;\n-\n-\t\tLOG_DEBUG(VHOST_CONFIG, \"(%\"PRIu64\") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%\"PRIu64\")\\n\", dev->device_fh,\n-\t\t\tregionidx,\n-\t\t\t(void *)(uintptr_t)mem->regions[regionidx].guest_phys_address,\n-\t\t\t(void *)(uintptr_t)mem->regions[regionidx].userspace_address,\n-\t\t\tmem->regions[regionidx].memory_size);\n-\n-\t\t/*set the base address mapping*/\n+\t\tmem->regions[regionidx] = regions[regionidx];\n \t\tif (mem->regions[regionidx].guest_phys_address == 0x0) {\n \t\t\tmem->base_address =\n \t\t\t\tmem->regions[regionidx].userspace_address;\n-\t\t\t/* Map VM memory file */\n-\t\t\tif (host_memory_map(dev, mem, ctx.pid,\n-\t\t\t\tmem->base_address) != 0) {\n-\t\t\t\tfree(mem);\n-\t\t\t\treturn -1;\n-\t\t\t}\n+\t\t\tmem->mapped_address = \n+\t\t\t\tmem->regions[regionidx].address_offset;\n \t\t}\n \t}\n \n-\t/* Check that we have a valid base address. */\n-\tif (mem->base_address == 0) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG, \"(%\"PRIu64\") Failed to find base address of qemu memory file.\\n\", dev->device_fh);\n-\t\tfree(mem);\n-\t\treturn -1;\n-\t}\n-\n-\t/*\n-\t * Check if all of our regions have valid mappings.\n-\t * Usually one does not exist in the QEMU memory file.\n-\t */\n-\tvalid_regions = mem->nregions;\n-\tfor (regionidx = 0; regionidx < mem->nregions; regionidx++) {\n-\t\tif ((mem->regions[regionidx].userspace_address <\n-\t\t\tmem->base_address) ||\n-\t\t\t(mem->regions[regionidx].userspace_address >\n-\t\t\t(mem->base_address + mem->mapped_size)))\n-\t\t\t\tvalid_regions--;\n-\t}\n-\n-\t/*\n-\t * If a region does not have a valid mapping,\n-\t * we rebuild our memory struct to contain only valid entries.\n-\t */\n-\tif (valid_regions != mem->nregions) {\n-\t\tLOG_DEBUG(VHOST_CONFIG, \"(%\"PRIu64\") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\\n\",\n-\t\t\tdev->device_fh);\n-\n-\t\t/*\n-\t\t * Re-populate the memory structure with only valid regions.\n-\t\t * Invalid regions are over-written with memmove.\n-\t\t */\n-\t\tvalid_regions = 0;\n-\n-\t\tfor (regionidx = mem->nregions; 0 != regionidx--;) {\n-\t\t\tif ((mem->regions[regionidx].userspace_address <\n-\t\t\t\tmem->base_address) ||\n-\t\t\t\t(mem->regions[regionidx].userspace_address >\n-\t\t\t\t(mem->base_address + mem->mapped_size))) {\n-\t\t\t\tmemmove(&mem->regions[regionidx],\n-\t\t\t\t\t&mem->regions[regionidx + 1],\n-\t\t\t\t\tsizeof(struct virtio_memory_regions) *\n-\t\t\t\t\t\tvalid_regions);\n-\t\t\t} else {\n-\t\t\t\tvalid_regions++;\n-\t\t\t}\n-\t\t}\n-\t}\n-\tmem->nregions = valid_regions;\n+\t/*TODO addback the logic that remove invalid memory regions */\n \tdev->mem = mem;\n \n-\t/*\n-\t * Calculate the address offset for each region.\n-\t * This offset is used to identify the vhost virtual address\n-\t * corresponding to a QEMU guest physical address.\n-\t */\n-\tfor (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {\n-\t\tdev->mem->regions[regionidx].address_offset =\n-\t\t\tdev->mem->regions[regionidx].userspace_address -\n-\t\t\t\tdev->mem->base_address +\n-\t\t\t\tdev->mem->mapped_address -\n-\t\t\t\tdev->mem->regions[regionidx].guest_phys_address;\n-\n-\t}\n \treturn 0;\n }\n \n+\n /*\n  * Called from CUSE IOCTL: VHOST_SET_VRING_NUM\n  * The virtio device sends us the size of the descriptor ring.\n@@ -896,38 +601,62 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t index,\n \t/* State->index refers to the queue index. The txq is 1, rxq is 0. */\n \tstate->num = dev->virtqueue[state->index]->last_used_idx;\n \n-\treturn 0;\n-}\n+\tif (dev->flags & VIRTIO_DEV_RUNNING) {\n+\t\tRTE_LOG(INFO, VHOST_CONFIG, \n+\t\t\t\"get_vring_base message is for release\\n\");\n+\t\tnotify_ops->destroy_device(dev);\n+\t\t/*\n+\t\t * sync call.\n+\t\t * when it returns, it means it si removed from data core.\n+\t\t */\n+\t}\n+\t/* TODO fix all munmap */\n+\tif (dev->mem) {\n+\t\tmunmap((void *)(uintptr_t)dev->mem->mapped_address,\n+\t\t\t(size_t)dev->mem->mapped_size);\n+\t\tfree(dev->mem);\n+\t\tdev->mem = NULL;\n+\t}\n \n-/*\n- * This function uses the eventfd_link kernel module to copy an eventfd file\n- * descriptor provided by QEMU in to our process space.\n- */\n-static int\n-eventfd_copy(struct virtio_net *dev, struct eventfd_copy *eventfd_copy)\n-{\n-\tint eventfd_link, ret;\n \n-\t/* Open the character device to the kernel module. */\n-\teventfd_link = open(eventfd_cdev, O_RDWR);\n-\tif (eventfd_link < 0) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") eventfd_link module is not loaded\\n\",\n-\t\t\tdev->device_fh);\n-\t\treturn -1;\n-\t}\n+\tif (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)\n+\t\tclose((int)dev->virtqueue[VIRTIO_RXQ]->callfd);\n+\tdev->virtqueue[VIRTIO_RXQ]->callfd = -1;\n+\tif (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)\n+\t\tclose((int)dev->virtqueue[VIRTIO_TXQ]->callfd);\n+\tdev->virtqueue[VIRTIO_TXQ]->callfd = -1;\n+\t/* We don't cleanup callfd here as we willn't get CALLFD again */\n+\t\n+\tdev->virtqueue[VIRTIO_RXQ]->desc = NULL;\n+\tdev->virtqueue[VIRTIO_RXQ]->avail = NULL;\n+\tdev->virtqueue[VIRTIO_RXQ]->used = NULL;\n+\tdev->virtqueue[VIRTIO_RXQ]->last_used_idx = 0;\n+\tdev->virtqueue[VIRTIO_RXQ]->last_used_idx_res = 0;\n+\n+\tdev->virtqueue[VIRTIO_TXQ]->desc = NULL;\n+\tdev->virtqueue[VIRTIO_TXQ]->avail = NULL;\n+\tdev->virtqueue[VIRTIO_TXQ]->used = NULL;\n+\tdev->virtqueue[VIRTIO_TXQ]->last_used_idx = 0;\n+\tdev->virtqueue[VIRTIO_TXQ]->last_used_idx_res = 0;\n \n-\t/* Call the IOCTL to copy the eventfd. */\n-\tret = ioctl(eventfd_link, EVENTFD_COPY, eventfd_copy);\n-\tclose(eventfd_link);\n \n-\tif (ret < 0) {\n-\t\tRTE_LOG(ERR, VHOST_CONFIG,\n-\t\t\t\"(%\"PRIu64\") EVENTFD_COPY ioctl failed\\n\",\n-\t\t\tdev->device_fh);\n-\t\treturn -1;\n-\t}\n+\treturn 0;\n+}\n \n+static int\n+virtio_is_ready(struct virtio_net *dev, int index)\n+{\n+\tstruct vhost_virtqueue *vq1, *vq2;\n+\t/* mq support in future.*/\n+\tvq1 = dev->virtqueue[index];\n+\tvq2 = dev->virtqueue[index ^ 1];\n+\tif (vq1 && vq2 && vq1->desc && vq2->desc && \n+\t\t(vq1->kickfd > 0) && (vq1->callfd > 0) &&\n+\t\t(vq2->kickfd > 0) && (vq2->callfd > 0)) {\n+\t\tLOG_DEBUG(VHOST_CONFIG, \"virtio is ready for processing.\\n\");\n+\t\treturn 1;\n+\t}\n+\tLOG_DEBUG(VHOST_CONFIG, \"virtio isn't ready for processing.\\n\");\n \treturn 0;\n }\n \n@@ -940,7 +669,6 @@ static int\n set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)\n {\n \tstruct virtio_net *dev;\n-\tstruct eventfd_copy\teventfd_kick;\n \tstruct vhost_virtqueue *vq;\n \n \tdev = get_device(ctx);\n@@ -953,14 +681,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)\n \tif (vq->kickfd)\n \t\tclose((int)vq->kickfd);\n \n-\t/* Populate the eventfd_copy structure and call eventfd_copy. */\n-\tvq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);\n-\teventfd_kick.source_fd = vq->kickfd;\n-\teventfd_kick.target_fd = file->fd;\n-\teventfd_kick.target_pid = ctx.pid;\n-\n-\tif (eventfd_copy(dev, &eventfd_kick))\n-\t\treturn -1;\n+\tvq->kickfd = file->fd;\n \n \treturn 0;\n }\n@@ -974,7 +695,6 @@ static int\n set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)\n {\n \tstruct virtio_net *dev;\n-\tstruct eventfd_copy eventfd_call;\n \tstruct vhost_virtqueue *vq;\n \n \tdev = get_device(ctx);\n@@ -986,16 +706,11 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)\n \n \tif (vq->callfd)\n \t\tclose((int)vq->callfd);\n+\tvq->callfd = file->fd;\n \n-\t/* Populate the eventfd_copy structure and call eventfd_copy. */\n-\tvq->callfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);\n-\teventfd_call.source_fd = vq->callfd;\n-\teventfd_call.target_fd = file->fd;\n-\teventfd_call.target_pid = ctx.pid;\n-\n-\tif (eventfd_copy(dev, &eventfd_call))\n-\t\treturn -1;\n-\n+\tif (virtio_is_ready(dev, file->index) &&\n+\t\t!(dev->flags & VIRTIO_DEV_RUNNING))\n+\t\t\tnotify_ops->new_device(dev);\n \treturn 0;\n }\n \n@@ -1024,6 +739,7 @@ set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)\n \t * If the device isn't already running and both backend fds are set,\n \t * we add the device.\n \t */\n+\tLOG_DEBUG(VHOST_CONFIG, \"%s %d\\n\", __func__, file->fd);\n \tif (!(dev->flags & VIRTIO_DEV_RUNNING)) {\n \t\tif (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&\n \t\t\t((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED))\n",
    "prefixes": [
        "dpdk-dev",
        "RFC"
    ]
}