Show a patch.

GET /api/patches/296/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 296,
    "url": "https://patches.dpdk.org/api/patches/296/?format=api",
    "web_url": "https://patches.dpdk.org/patch/296/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk"
    },
    "msgid": "<1409648131-4301-4-git-send-email-huawei.xie@intel.com>",
    "date": "2014-09-02T08:55:31",
    "name": "[dpdk-dev,3/3] examples/vhost: vhost example based on vhost library.",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "945c3b69ddc40344dfcf010feefbc2c0ff4b74ca",
    "submitter": {
        "id": 16,
        "url": "https://patches.dpdk.org/api/people/16/?format=api",
        "name": "Huawei Xie",
        "email": "huawei.xie@intel.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/patch/296/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/296/comments/",
    "check": "pending",
    "checks": "https://patches.dpdk.org/api/patches/296/checks/",
    "tags": {},
    "headers": {
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "Return-Path": "<hxie5@shecgisg003.sh.intel.com>",
        "Message-Id": "<1409648131-4301-4-git-send-email-huawei.xie@intel.com>",
        "X-Mailer": "git-send-email 1.7.0.7",
        "To": "dev@dpdk.org",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "X-BeenThere": "dev@dpdk.org",
        "Received": [
            "from mga11.intel.com (mga11.intel.com [192.55.52.93])\n\tby dpdk.org (Postfix) with ESMTP id 49E735947\n\tfor <dev@dpdk.org>; Tue,  2 Sep 2014 10:51:30 +0200 (CEST)",
            "from azsmga001.ch.intel.com ([10.2.17.19])\n\tby fmsmga102.fm.intel.com with ESMTP; 02 Sep 2014 01:55:59 -0700",
            "from shvmail01.sh.intel.com ([10.239.29.42])\n\tby azsmga001.ch.intel.com with ESMTP; 02 Sep 2014 01:55:54 -0700",
            "from shecgisg003.sh.intel.com (shecgisg003.sh.intel.com\n\t[10.239.29.90])\n\tby shvmail01.sh.intel.com with ESMTP id s828tpui027263;\n\tTue, 2 Sep 2014 16:55:51 +0800",
            "from shecgisg003.sh.intel.com (localhost [127.0.0.1])\n\tby shecgisg003.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP\n\tid s828tnnT004431; Tue, 2 Sep 2014 16:55:51 +0800",
            "(from hxie5@localhost)\n\tby shecgisg003.sh.intel.com (8.13.6/8.13.6/Submit) id s828tnOD004427; \n\tTue, 2 Sep 2014 16:55:49 +0800"
        ],
        "Subject": "[dpdk-dev] [PATCH 3/3] examples/vhost: vhost example based on vhost\n\tlibrary.",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "In-Reply-To": "<1409648131-4301-1-git-send-email-huawei.xie@intel.com>",
        "X-ExtLoop1": "1",
        "Precedence": "list",
        "From": "Huawei Xie <huawei.xie@intel.com>",
        "X-IronPort-AV": "E=Sophos;i=\"5.04,447,1406617200\"; d=\"scan'208\";a=\"474586242\"",
        "References": "<1409648131-4301-1-git-send-email-huawei.xie@intel.com>",
        "X-List-Received-Date": "Tue, 02 Sep 2014 08:51:45 -0000",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "Date": "Tue,  2 Sep 2014 16:55:31 +0800",
        "X-Mailman-Version": "2.1.15"
    },
    "content": "Signed-off-by: Huawei Xie <huawei.xie@intel.com>\nAcked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>\n---\n examples/vhost/Makefile             |   52 +\n examples/vhost/libvirt/qemu-wrap.py |  366 ++++\n examples/vhost/main.c               | 3145 +++++++++++++++++++++++++++++++++++\n examples/vhost/main.h               |  109 ++\n 4 files changed, 3672 insertions(+)\n create mode 100644 examples/vhost/Makefile\n create mode 100644 examples/vhost/libvirt/qemu-wrap.py\n create mode 100644 examples/vhost/main.c\n create mode 100644 examples/vhost/main.h",
    "diff": "diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile\nnew file mode 100644\nindex 0000000..a4d4fb0\n--- /dev/null\n+++ b/examples/vhost/Makefile\n@@ -0,0 +1,52 @@\n+#   BSD LICENSE\n+#\n+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+#   All rights reserved.\n+#\n+#   Redistribution and use in source and binary forms, with or without\n+#   modification, are permitted provided that the following conditions\n+#   are met:\n+#\n+#     * Redistributions of source code must retain the above copyright\n+#       notice, this list of conditions and the following disclaimer.\n+#     * Redistributions in binary form must reproduce the above copyright\n+#       notice, this list of conditions and the following disclaimer in\n+#       the documentation and/or other materials provided with the\n+#       distribution.\n+#     * Neither the name of Intel Corporation nor the names of its\n+#       contributors may be used to endorse or promote products derived\n+#       from this software without specific prior written permission.\n+#\n+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+#   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+\n+ifeq ($(RTE_SDK),)\n+$(error \"Please define RTE_SDK environment variable\")\n+endif\n+\n+# Default target, can be overriden by command line or environment\n+RTE_TARGET ?= x86_64-native-linuxapp-gcc\n+\n+include $(RTE_SDK)/mk/rte.vars.mk\n+\n+# binary name\n+APP = vhost-switch\n+\n+# all source are stored in SRCS-y\n+#SRCS-y := cusedrv.c loopback-userspace.c\n+SRCS-y := main.c\n+\n+CFLAGS += -O2 -I/usr/local/include -D_FILE_OFFSET_BITS=64 -Wno-unused-parameter\n+CFLAGS += $(WERROR_FLAGS)\n+LDFLAGS += -lfuse\n+\n+include $(RTE_SDK)/mk/rte.extapp.mk\ndiff --git a/examples/vhost/libvirt/qemu-wrap.py b/examples/vhost/libvirt/qemu-wrap.py\nnew file mode 100644\nindex 0000000..8d820be\n--- /dev/null\n+++ b/examples/vhost/libvirt/qemu-wrap.py\n@@ -0,0 +1,366 @@\n+#!/usr/bin/python\n+#/*\n+# *   BSD LICENSE\n+# *\n+# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+# *   All rights reserved.\n+# *\n+# *   Redistribution and use in source and binary forms, with or without\n+# *   modification, are permitted provided that the following conditions\n+# *   are met:\n+# *\n+# *     * Redistributions of source code must retain the above copyright\n+# *       notice, this list of conditions and the following disclaimer.\n+# *     * Redistributions in binary form must reproduce the above copyright\n+# *       notice, this list of conditions and the following disclaimer in\n+# *       the documentation and/or other materials provided with the\n+# *       distribution.\n+# *     * Neither the name of Intel Corporation nor the names of its\n+# *       contributors may be used to endorse or promote products derived\n+# *       from this software without specific prior written permission.\n+# *\n+# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+# *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+# */\n+\n+#####################################################################\n+# This script is designed to modify the call to the QEMU emulator\n+# to support userspace vhost when starting a guest machine through\n+# libvirt with vhost enabled. The steps to enable this are as follows\n+# and should be run as root:\n+#\n+# 1. Place this script in a libvirtd's binary search PATH ($PATH)\n+#    A good location would be in the same directory that the QEMU\n+#    binary is located\n+#\n+# 2. Ensure that the script has the same owner/group and file\n+#    permissions as the QEMU binary\n+#\n+# 3. Update the VM xml file using \"virsh edit VM.xml\"\n+#\n+#    3.a) Set the VM to use the launch script\n+#\n+#    \tSet the emulator path contained in the\n+#\t\t<emulator><emulator/> tags\n+#\n+#    \te.g replace <emulator>/usr/bin/qemu-kvm<emulator/>\n+#        with    <emulator>/usr/bin/qemu-wrap.py<emulator/>\n+#\n+#\t 3.b) Set the VM's device's to use vhost-net offload\n+#\n+#\t\t<interface type=\"network\">\n+#       \t<model type=\"virtio\"/>\n+#       \t<driver name=\"vhost\"/>\n+#\t\t<interface/>\n+#\n+# 4. Enable libvirt to access our userpace device file by adding it to\n+#    controllers cgroup for libvirtd using the following steps\n+#\n+#   4.a) In /etc/libvirt/qemu.conf add/edit the following lines:\n+#         1) cgroup_controllers = [ ... \"devices\", ... ]\n+#\t\t  2) clear_emulator_capabilities = 0\n+#         3) user = \"root\"\n+#         4) group = \"root\"\n+#         5) cgroup_device_acl = [\n+#                \"/dev/null\", \"/dev/full\", \"/dev/zero\",\n+#                \"/dev/random\", \"/dev/urandom\",\n+#                \"/dev/ptmx\", \"/dev/kvm\", \"/dev/kqemu\",\n+#                \"/dev/rtc\", \"/dev/hpet\", \"/dev/net/tun\",\n+#                \"/dev/<devbase-name>-<index>\",\n+#            ]\n+#\n+#   4.b) Disable SELinux or set to permissive mode\n+#\n+#   4.c) Mount cgroup device controller\n+#        \"mkdir /dev/cgroup\"\n+#        \"mount -t cgroup none /dev/cgroup -o devices\"\n+#\n+#   4.d) Set hugetlbfs_mount variable - ( Optional )\n+#        VMs using userspace vhost must use hugepage backed\n+#        memory. This can be enabled in the libvirt XML\n+#        config by adding a memory backing section to the\n+#        XML config e.g.\n+#             <memoryBacking>\n+#             <hugepages/>\n+#             </memoryBacking>\n+#        This memory backing section should be added after the\n+#        <memory> and <currentMemory> sections. This will add\n+#        flags \"-mem-prealloc -mem-path <path>\" to the QEMU\n+#        command line. The hugetlbfs_mount variable can be used\n+#        to override the default <path> passed through by libvirt.\n+#\n+#        if \"-mem-prealloc\" or \"-mem-path <path>\" are not passed\n+#        through and a vhost device is detected then these options will\n+#        be automatically added by this script. This script will detect\n+#        the system hugetlbfs mount point to be used for <path>. The\n+#        default <path> for this script can be overidden by the\n+#        hugetlbfs_dir variable in the configuration section of this script.\n+#\n+#\n+#   4.e) Restart the libvirtd system process\n+#        e.g. on Fedora \"systemctl restart libvirtd.service\"\n+#\n+#\n+#   4.f) Edit the Configuration Parameters section of this script\n+#        to point to the correct emulator location and set any\n+#        addition options\n+#\n+# The script modifies the libvirtd Qemu call by modifying/adding\n+# options based on the configuration parameters below.\n+# NOTE:\n+#     emul_path and us_vhost_path must be set\n+#     All other parameters are optional\n+#####################################################################\n+\n+\n+#############################################\n+# Configuration Parameters\n+#############################################\n+#Path to QEMU binary\n+emul_path = \"/usr/local/bin/qemu-system-x86_64\"\n+\n+#Path to userspace vhost device file\n+# This filename should match the --dev-basename --dev-index parameters of\n+# the command used to launch the userspace vhost sample application e.g.\n+# if the sample app lauch command is:\n+#    ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1\n+# then this variable should be set to:\n+#   us_vhost_path = \"/dev/usvhost-1\"\n+us_vhost_path = \"/dev/usvhost-1\"\n+\n+#List of additional user defined emulation options. These options will\n+#be added to all Qemu calls\n+emul_opts_user = []\n+\n+#List of additional user defined emulation options for vhost only.\n+#These options will only be added to vhost enabled guests\n+emul_opts_user_vhost = []\n+\n+#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs\n+# Set this variable to one to enable this option for all VMs\n+use_huge_all = 0\n+\n+#Instead of autodetecting, override the hugetlbfs directory by setting\n+#this variable\n+hugetlbfs_dir = \"\"\n+\n+#############################################\n+\n+\n+#############################################\n+# ****** Do Not Modify Below this Line ******\n+#############################################\n+\n+import sys, os, subprocess\n+\n+\n+#List of open userspace vhost file descriptors\n+fd_list = []\n+\n+#additional virtio device flags when using userspace vhost\n+vhost_flags = [ \"csum=off\",\n+                \"gso=off\",\n+                \"guest_tso4=off\",\n+                \"guest_tso6=off\",\n+                \"guest_ecn=off\"\n+              ]\n+\n+\n+#############################################\n+# Find the system hugefile mount point.\n+# Note:\n+# if multiple hugetlbfs mount points exist\n+# then the first one found will be used\n+#############################################\n+def find_huge_mount():\n+\n+    if (len(hugetlbfs_dir)):\n+        return hugetlbfs_dir\n+\n+    huge_mount = \"\"\n+\n+    if (os.access(\"/proc/mounts\", os.F_OK)):\n+        f = open(\"/proc/mounts\", \"r\")\n+        line = f.readline()\n+        while line:\n+            line_split = line.split(\" \")\n+            if line_split[2] == 'hugetlbfs':\n+                huge_mount = line_split[1]\n+                break\n+            line = f.readline()\n+    else:\n+        print \"/proc/mounts not found\"\n+        exit (1)\n+\n+    f.close\n+    if len(huge_mount) == 0:\n+        print \"Failed to find hugetlbfs mount point\"\n+        exit (1)\n+\n+    return huge_mount\n+\n+\n+#############################################\n+# Get a userspace Vhost file descriptor\n+#############################################\n+def get_vhost_fd():\n+\n+    if (os.access(us_vhost_path, os.F_OK)):\n+        fd = os.open( us_vhost_path, os.O_RDWR)\n+    else:\n+        print (\"US-Vhost file %s not found\" %us_vhost_path)\n+        exit (1)\n+\n+    return fd\n+\n+\n+#############################################\n+# Check for vhostfd. if found then replace\n+# with our own vhost fd and append any vhost\n+# flags onto the end\n+#############################################\n+def modify_netdev_arg(arg):\n+\n+    global fd_list\n+    vhost_in_use = 0\n+    s = ''\n+    new_opts = []\n+    netdev_opts = arg.split(\",\")\n+\n+    for opt in netdev_opts:\n+        #check if vhost is used\n+        if \"vhost\" == opt[:5]:\n+            vhost_in_use = 1\n+        else:\n+            new_opts.append(opt)\n+\n+    #if using vhost append vhost options\n+    if vhost_in_use == 1:\n+        #append vhost on option\n+        new_opts.append('vhost=on')\n+        #append vhostfd ption\n+        new_fd = get_vhost_fd()\n+        new_opts.append('vhostfd=' + str(new_fd))\n+        fd_list.append(new_fd)\n+\n+    #concatenate all options\n+    for opt in new_opts:\n+        if len(s) > 0:\n+\t\t\ts+=','\n+\n+        s+=opt\n+\n+    return s\n+\n+\n+#############################################\n+# Main\n+#############################################\n+def main():\n+\n+    global fd_list\n+    global vhost_in_use\n+    new_args = []\n+    num_cmd_args = len(sys.argv)\n+    emul_call = ''\n+    mem_prealloc_set = 0\n+    mem_path_set = 0\n+    num = 0;\n+\n+    #parse the parameters\n+    while (num < num_cmd_args):\n+        arg = sys.argv[num]\n+\n+\t\t#Check netdev +1 parameter for vhostfd\n+        if arg == '-netdev':\n+            num_vhost_devs = len(fd_list)\n+            new_args.append(arg)\n+\n+            num+=1\n+            arg = sys.argv[num]\n+            mod_arg = modify_netdev_arg(arg)\n+            new_args.append(mod_arg)\n+\n+            #append vhost flags if this is a vhost device\n+            # and -device is the next arg\n+            # i.e -device -opt1,-opt2,...,-opt3,%vhost\n+            if (num_vhost_devs < len(fd_list)):\n+                num+=1\n+                arg = sys.argv[num]\n+                if arg == '-device':\n+                    new_args.append(arg)\n+                    num+=1\n+                    new_arg = sys.argv[num]\n+                    for flag in vhost_flags:\n+                        new_arg = ''.join([new_arg,',',flag])\n+                    new_args.append(new_arg)\n+                else:\n+                    new_args.append(arg)\n+        elif arg == '-mem-prealloc':\n+            mem_prealloc_set = 1\n+            new_args.append(arg)\n+        elif arg == '-mem-path':\n+            mem_path_set = 1\n+            new_args.append(arg)\n+\n+        else:\n+            new_args.append(arg)\n+\n+        num+=1\n+\n+    #Set Qemu binary location\n+    emul_call+=emul_path\n+    emul_call+=\" \"\n+\n+    #Add prealloc mem options if using vhost and not already added\n+    if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):\n+        emul_call += \"-mem-prealloc \"\n+\n+    #Add mempath mem options if using vhost and not already added\n+    if ((len(fd_list) > 0) and (mem_path_set == 0)):\n+        #Detect and add hugetlbfs mount point\n+        mp = find_huge_mount()\n+        mp = \"\".join([\"-mem-path \", mp])\n+        emul_call += mp\n+        emul_call += \" \"\n+\n+\n+    #add user options\n+    for opt in emul_opts_user:\n+        emul_call += opt\n+        emul_call += \" \"\n+\n+    #Add add user vhost only options\n+    if len(fd_list) > 0:\n+        for opt in emul_opts_user_vhost:\n+            emul_call += opt\n+            emul_call += \" \"\n+\n+    #Add updated libvirt options\n+    iter_args = iter(new_args)\n+    #skip 1st arg i.e. call to this script\n+    next(iter_args)\n+    for arg in iter_args:\n+        emul_call+=str(arg)\n+        emul_call+= \" \"\n+\n+    #Call QEMU\n+    subprocess.call(emul_call, shell=True)\n+\n+\n+    #Close usvhost files\n+    for fd in fd_list:\n+        os.close(fd)\n+\n+\n+if __name__ == \"__main__\":\n+    main()\ndiff --git a/examples/vhost/main.c b/examples/vhost/main.c\nnew file mode 100644\nindex 0000000..1fa3391\n--- /dev/null\n+++ b/examples/vhost/main.c\n@@ -0,0 +1,3145 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#include <arpa/inet.h>\n+#include <getopt.h>\n+#include <linux/if_ether.h>\n+#include <linux/if_vlan.h>\n+#include <linux/virtio_net.h>\n+#include <linux/virtio_ring.h>\n+#include <signal.h>\n+#include <stdint.h>\n+#include <sys/eventfd.h>\n+#include <sys/param.h>\n+#include <unistd.h>\n+\n+#include <rte_atomic.h>\n+#include <rte_cycles.h>\n+#include <rte_ethdev.h>\n+#include <rte_log.h>\n+#include <rte_string_fns.h>\n+#include <rte_malloc.h>\n+#include <rte_virtio_net.h>\n+\n+#include \"main.h\"\n+\n+#define MAX_QUEUES 128\n+\n+/* #define VHOST_DEBUG */\n+\n+/* Macros for printing using RTE_LOG */\n+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1\n+#define RTE_LOGTYPE_VHOST_PORT   RTE_LOGTYPE_USER2\n+#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER3\n+\n+\n+#ifdef VHOST_DEBUG\n+#define LOG_LEVEL RTE_LOG_DEBUG\n+#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)\n+#else\n+#define LOG_LEVEL RTE_LOG_INFO\n+#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)\n+#endif\n+\n+\n+/* the maximum number of external ports supported */\n+#define MAX_SUP_PORTS 1\n+\n+/*\n+ * Calculate the number of buffers needed per port\n+ */\n+#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \\\n+\t\t\t(num_switching_cores * MAX_PKT_BURST) + \\\n+\t\t\t(num_switching_cores * RTE_TEST_TX_DESC_DEFAULT) + \\\n+\t\t\t(num_switching_cores * MBUF_CACHE_SIZE))\n+\n+#define MBUF_CACHE_SIZE 128\n+#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)\n+\n+/*\n+ * No frame data buffer allocated from host are required for zero copy\n+ * implementation, guest will allocate the frame data buffer, and vhost\n+ * directly use it.\n+ */\n+#define VIRTIO_DESCRIPTOR_LEN_ZCP 1518\n+#define MBUF_SIZE_ZCP (VIRTIO_DESCRIPTOR_LEN_ZCP + sizeof(struct rte_mbuf) \\\n+\t+ RTE_PKTMBUF_HEADROOM)\n+#define MBUF_CACHE_SIZE_ZCP 0\n+\n+/*\n+ * RX and TX Prefetch, Host, and Write-back threshold values should be\n+ * carefully set for optimal performance. Consult the network\n+ * controller's datasheet and supporting DPDK documentation for guidance\n+ * on how these parameters should be set.\n+ */\n+#define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */\n+#define RX_HTHRESH 8 /* Default values of RX host threshold reg. */\n+#define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */\n+\n+/*\n+ * These default values are optimized for use with the Intel(R) 82599 10 GbE\n+ * Controller and the DPDK ixgbe PMD. Consider using other values for other\n+ * network controllers and/or network drivers.\n+ */\n+#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */\n+#define TX_HTHRESH 0  /* Default values of TX host threshold reg. */\n+#define TX_WTHRESH 0  /* Default values of TX write-back threshold reg. */\n+\n+#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */\n+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */\n+\n+#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */\n+#define BURST_RX_RETRIES 4  /* Number of retries on RX. */\n+\n+/* State of virtio device. */\n+#define DEVICE_MAC_LEARNING 0\n+#define DEVICE_RX           1\n+#define DEVICE_SAFE_REMOVE  2\n+\n+/* Config_core_flag status definitions. */\n+#define REQUEST_DEV_REMOVAL 1\n+#define ACK_DEV_REMOVAL 0\n+\n+/* Configurable number of RX/TX ring descriptors */\n+#define RTE_TEST_RX_DESC_DEFAULT 1024\n+#define RTE_TEST_TX_DESC_DEFAULT 512\n+\n+/*\n+ * Need refine these 2 macros for legacy and DPDK based front end:\n+ * Max vring avail descriptor/entries from guest - MAX_PKT_BURST\n+ * And then adjust power 2.\n+ */\n+/*\n+ * For legacy front end, 128 descriptors,\n+ * half for virtio header, another half for mbuf.\n+ */\n+#define RTE_TEST_RX_DESC_DEFAULT_ZCP 32   /* legacy: 32, DPDK virt FE: 128. */\n+#define RTE_TEST_TX_DESC_DEFAULT_ZCP 64   /* legacy: 64, DPDK virt FE: 64.  */\n+\n+/* Get first 4 bytes in mbuf headroom. */\n+#define MBUF_HEADROOM_UINT32(mbuf) (*(uint32_t *)((uint8_t *)(mbuf) \\\n+\t\t+ sizeof(struct rte_mbuf)))\n+\n+/* true if x is a power of 2 */\n+#define POWEROF2(x) ((((x)-1) & (x)) == 0)\n+\n+#define INVALID_PORT_ID 0xFF\n+\n+/* Max number of devices. Limited by vmdq. */\n+#define MAX_DEVICES 64\n+\n+/* Size of buffers used for snprintfs. */\n+#define MAX_PRINT_BUFF 6072\n+\n+/* Maximum character device basename size. */\n+#define MAX_BASENAME_SZ 10\n+\n+/* Maximum long option length for option parsing. */\n+#define MAX_LONG_OPT_SZ 64\n+\n+/* Used to compare MAC addresses. */\n+#define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL\n+\n+/* Number of descriptors per cacheline. */\n+#define DESC_PER_CACHELINE (CACHE_LINE_SIZE / sizeof(struct vring_desc))\n+\n+/* mask of enabled ports */\n+static uint32_t enabled_port_mask;\n+\n+/*Number of switching cores enabled*/\n+static uint32_t num_switching_cores;\n+\n+/* number of devices/queues to support*/\n+static uint32_t num_queues;\n+static uint32_t num_devices;\n+\n+/*\n+ * Enable zero copy, pkts buffer will directly dma to hw descriptor,\n+ * disabled on default.\n+ */\n+static uint32_t zero_copy;\n+static int mergeable;\n+\n+\n+/* number of descriptors to apply*/\n+static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP;\n+static uint32_t num_tx_descriptor = RTE_TEST_TX_DESC_DEFAULT_ZCP;\n+\n+/* max ring descriptor, ixgbe, i40e, e1000 all are 4096. */\n+#define MAX_RING_DESC 4096\n+\n+struct vpool {\n+\tstruct rte_mempool *pool;\n+\tstruct rte_ring *ring;\n+\tuint32_t buf_size;\n+} vpool_array[MAX_QUEUES + MAX_QUEUES];\n+\n+/*\n+ * Enable VM2VM communications. If this is disabled then the MAC address\n+ * compare is skipped.\n+ */\n+enum vm2vm_type {\n+\tVM2VM_DISABLED = 0,\n+\tVM2VM_SOFTWARE = 1,\n+\tVM2VM_HARDWARE = 2,\n+\tVM2VM_LAST\n+};\n+static enum vm2vm_type vm2vm_mode = VM2VM_SOFTWARE;\n+\n+/* The type of host physical address translated from guest physical address. */\n+enum hpa_type {\n+\tPHYS_ADDR_CONTINUOUS = 0,\n+\tPHYS_ADDR_CROSS_SUBREG = 1,\n+\tPHYS_ADDR_INVALID = 2,\n+\tPHYS_ADDR_LAST\n+};\n+\n+/* Enable stats. */\n+static uint32_t enable_stats;\n+/* Enable retries on RX. */\n+static uint32_t enable_retry;\n+/* Specify timeout (in useconds) between retries on RX. */\n+static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;\n+/* Specify the number of retries on RX. */\n+static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;\n+\n+/* Character device basename. Can be set by user. */\n+static char dev_basename[MAX_BASENAME_SZ] = \"vhost-net\";\n+\n+/* Default configuration for rx and tx thresholds etc. */\n+static struct rte_eth_rxconf rx_conf_default = {\n+\t.rx_thresh = {\n+\t\t.pthresh = RX_PTHRESH,\n+\t\t.hthresh = RX_HTHRESH,\n+\t\t.wthresh = RX_WTHRESH,\n+\t},\n+\t.rx_drop_en = 1,\n+};\n+\n+/*\n+ * These default values are optimized for use with the Intel(R) 82599 10 GbE\n+ * Controller and the DPDK ixgbe/igb PMD. Consider using other values for other\n+ * network controllers and/or network drivers.\n+ */\n+static struct rte_eth_txconf tx_conf_default = {\n+\t.tx_thresh = {\n+\t\t.pthresh = TX_PTHRESH,\n+\t\t.hthresh = TX_HTHRESH,\n+\t\t.wthresh = TX_WTHRESH,\n+\t},\n+\t.tx_free_thresh = 0, /* Use PMD default values */\n+\t.tx_rs_thresh = 0, /* Use PMD default values */\n+};\n+\n+/* empty vmdq configuration structure. Filled in programatically */\n+static struct rte_eth_conf vmdq_conf_default = {\n+\t.rxmode = {\n+\t\t.mq_mode        = ETH_MQ_RX_VMDQ_ONLY,\n+\t\t.split_hdr_size = 0,\n+\t\t.header_split   = 0, /**< Header Split disabled */\n+\t\t.hw_ip_checksum = 0, /**< IP checksum offload disabled */\n+\t\t.hw_vlan_filter = 0, /**< VLAN filtering disabled */\n+\t\t/*\n+\t\t * It is necessary for 1G NIC such as I350,\n+\t\t * this fixes bug of ipv4 forwarding in guest can't\n+\t\t * forward pakets from one virtio dev to another virtio dev.\n+\t\t */\n+\t\t.hw_vlan_strip  = 1, /**< VLAN strip enabled. */\n+\t\t.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */\n+\t\t.hw_strip_crc   = 0, /**< CRC stripped by hardware */\n+\t},\n+\n+\t.txmode = {\n+\t\t.mq_mode = ETH_MQ_TX_NONE,\n+\t},\n+\t.rx_adv_conf = {\n+\t\t/*\n+\t\t * should be overridden separately in code with\n+\t\t * appropriate values\n+\t\t */\n+\t\t.vmdq_rx_conf = {\n+\t\t\t.nb_queue_pools = ETH_8_POOLS,\n+\t\t\t.enable_default_pool = 0,\n+\t\t\t.default_pool = 0,\n+\t\t\t.nb_pool_maps = 0,\n+\t\t\t.pool_map = {{0, 0},},\n+\t\t},\n+\t},\n+};\n+\n+static unsigned lcore_ids[RTE_MAX_LCORE];\n+static uint8_t ports[RTE_MAX_ETHPORTS];\n+static unsigned num_ports; /**< The number of ports specified in command line */\n+\n+static const uint16_t external_pkt_default_vlan_tag = 2000;\n+const uint16_t vlan_tags[] = {\n+\t1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,\n+\t1008, 1009, 1010, 1011,\t1012, 1013, 1014, 1015,\n+\t1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,\n+\t1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,\n+\t1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,\n+\t1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,\n+\t1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,\n+\t1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,\n+};\n+\n+/* ethernet addresses of ports */\n+static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];\n+\n+/* heads for the main used and free linked lists for the data path. */\n+static struct virtio_net_data_ll *ll_root_used;\n+static struct virtio_net_data_ll *ll_root_free;\n+\n+/*\n+ * Array of data core structures containing information on individual core\n+ * linked lists.\n+ */\n+static struct lcore_info lcore_info[RTE_MAX_LCORE];\n+\n+/* Used for queueing bursts of TX packets. */\n+struct mbuf_table {\n+\tunsigned len;\n+\tunsigned txq_id;\n+\tstruct rte_mbuf *m_table[MAX_PKT_BURST];\n+};\n+\n+/* TX queue for each data core. */\n+struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];\n+\n+/* TX queue fori each virtio device for zero copy. */\n+struct mbuf_table tx_queue_zcp[MAX_QUEUES];\n+\n+/* Vlan header struct used to insert vlan tags on TX. */\n+struct vlan_ethhdr {\n+\tunsigned char   h_dest[ETH_ALEN];\n+\tunsigned char   h_source[ETH_ALEN];\n+\t__be16          h_vlan_proto;\n+\t__be16          h_vlan_TCI;\n+\t__be16          h_vlan_encapsulated_proto;\n+};\n+\n+/* IPv4 Header */\n+struct ipv4_hdr {\n+\tuint8_t  version_ihl;\t\t/**< version and header length */\n+\tuint8_t  type_of_service;\t/**< type of service */\n+\tuint16_t total_length;\t\t/**< length of packet */\n+\tuint16_t packet_id;\t\t/**< packet ID */\n+\tuint16_t fragment_offset;\t/**< fragmentation offset */\n+\tuint8_t  time_to_live;\t\t/**< time to live */\n+\tuint8_t  next_proto_id;\t\t/**< protocol ID */\n+\tuint16_t hdr_checksum;\t\t/**< header checksum */\n+\tuint32_t src_addr;\t\t/**< source address */\n+\tuint32_t dst_addr;\t\t/**< destination address */\n+} __attribute__((__packed__));\n+\n+/* Header lengths. */\n+#define VLAN_HLEN       4\n+#define VLAN_ETH_HLEN   18\n+\n+/* Per-device statistics struct */\n+struct device_statistics {\n+\tuint64_t tx_total;\n+\trte_atomic64_t rx_total_atomic;\n+\tuint64_t rx_total;\n+\tuint64_t tx;\n+\trte_atomic64_t rx_atomic;\n+\tuint64_t rx;\n+} __rte_cache_aligned;\n+struct device_statistics dev_statistics[MAX_DEVICES];\n+\n+/*\n+ * Builds up the correct configuration for VMDQ VLAN pool map\n+ * according to the pool & queue limits.\n+ */\n+static inline int\n+get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)\n+{\n+\tstruct rte_eth_vmdq_rx_conf conf;\n+\tunsigned i;\n+\n+\tmemset(&conf, 0, sizeof(conf));\n+\tconf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;\n+\tconf.nb_pool_maps = num_devices;\n+\tconf.enable_loop_back =\n+\t\tvmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back;\n+\n+\tfor (i = 0; i < conf.nb_pool_maps; i++) {\n+\t\tconf.pool_map[i].vlan_id = vlan_tags[i];\n+\t\tconf.pool_map[i].pools = (1UL << i);\n+\t}\n+\n+\t*eth_conf = vmdq_conf_default;\n+\teth_conf->rx_adv_conf.vmdq_rx_conf = conf;\n+\treturn 0;\n+}\n+\n+/*\n+ * Validate the device number according to the max pool number gotten form\n+ * dev_info. If the device number is invalid, give the error message and\n+ * return -1. Each device must have its own pool.\n+ */\n+static inline int\n+validate_num_devices(uint32_t max_nb_devices)\n+{\n+\tif (num_devices > max_nb_devices) {\n+\t\tRTE_LOG(ERR, VHOST_PORT, \"invalid number of devices\\n\");\n+\t\treturn -1;\n+\t}\n+\treturn 0;\n+}\n+\n+/*\n+ * Initialises a given port using global settings and with the rx buffers\n+ * coming from the mbuf_pool passed as parameter\n+ */\n+static inline int\n+port_init(uint8_t port)\n+{\n+\tstruct rte_eth_dev_info dev_info;\n+\tstruct rte_eth_conf port_conf;\n+\tuint16_t rx_rings, tx_rings;\n+\tuint16_t rx_ring_size, tx_ring_size;\n+\tint retval;\n+\tuint16_t q;\n+\n+\trte_eth_dev_info_get(port, &dev_info);\n+\n+\t/*\n+\t * Configure the number of supported virtio devices based on VMDQ\n+\t * limits\n+\t */\n+\tnum_devices = dev_info.max_vmdq_pools;\n+\tnum_queues = dev_info.max_rx_queues;\n+\n+\tif (zero_copy) {\n+\t\trx_ring_size = num_rx_descriptor;\n+\t\ttx_ring_size = num_tx_descriptor;\n+\t\ttx_rings = dev_info.max_tx_queues;\n+\t} else {\n+\t\trx_ring_size = RTE_TEST_RX_DESC_DEFAULT;\n+\t\ttx_ring_size = RTE_TEST_TX_DESC_DEFAULT;\n+\t\ttx_rings = (uint16_t)rte_lcore_count();\n+\t}\n+\n+\tretval = validate_num_devices(MAX_DEVICES);\n+\tif (retval < 0)\n+\t\treturn retval;\n+\n+\t/* Get port configuration. */\n+\tretval = get_eth_conf(&port_conf, num_devices);\n+\tif (retval < 0)\n+\t\treturn retval;\n+\n+\tif (port >= rte_eth_dev_count())\n+\t\treturn -1;\n+\n+\trx_rings = (uint16_t)num_queues,\n+\t/* Configure ethernet device. */\n+\tretval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);\n+\tif (retval != 0)\n+\t\treturn retval;\n+\n+\t/* Setup the queues. */\n+\tfor (q = 0; q < rx_rings; q++) {\n+\t\tretval = rte_eth_rx_queue_setup(port, q, rx_ring_size,\n+\t\t\t\t\t\trte_eth_dev_socket_id(port),\n+\t\t\t\t\t\t&rx_conf_default,\n+\t\t\t\t\t\tvpool_array[q].pool);\n+\t\tif (retval < 0)\n+\t\t\treturn retval;\n+\t}\n+\tfor (q = 0; q < tx_rings; q++) {\n+\t\tretval = rte_eth_tx_queue_setup(port, q, tx_ring_size,\n+\t\t\t\t\t\trte_eth_dev_socket_id(port),\n+\t\t\t\t\t\t&tx_conf_default);\n+\t\tif (retval < 0)\n+\t\t\treturn retval;\n+\t}\n+\n+\t/* Start the device. */\n+\tretval  = rte_eth_dev_start(port);\n+\tif (retval < 0) {\n+\t\tRTE_LOG(ERR, VHOST_DATA, \"Failed to start the device.\\n\");\n+\t\treturn retval;\n+\t}\n+\n+\trte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);\n+\tRTE_LOG(INFO, VHOST_PORT, \"Max virtio devices supported: %u\\n\",\n+\t\tnum_devices);\n+\tRTE_LOG(INFO, VHOST_PORT, \"Port %u MAC: %02\"PRIx8\" %02\"PRIx8\" %02\"PRIx8\n+\t\t\t\" %02\"PRIx8\" %02\"PRIx8\" %02\"PRIx8\"\\n\",\n+\t\t\t(unsigned)port,\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[0],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[1],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[2],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[3],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[4],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[5]);\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Set character device basename.\n+ */\n+static int\n+us_vhost_parse_basename(const char *q_arg)\n+{\n+\t/* parse number string */\n+\n+\tif (strnlen(q_arg, MAX_BASENAME_SZ) > MAX_BASENAME_SZ)\n+\t\treturn -1;\n+\telse\n+\t\tsnprintf((char *)&dev_basename, MAX_BASENAME_SZ, \"%s\", q_arg);\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Parse the portmask provided at run time.\n+ */\n+static int\n+parse_portmask(const char *portmask)\n+{\n+\tchar *end = NULL;\n+\tunsigned long pm;\n+\n+\terrno = 0;\n+\n+\t/* parse hexadecimal string */\n+\tpm = strtoul(portmask, &end, 16);\n+\tif ((portmask[0] == '\\0') || (end == NULL) || (*end != '\\0') ||\n+\t\t(errno != 0))\n+\t\treturn -1;\n+\n+\tif (pm == 0)\n+\t\treturn -1;\n+\n+\treturn pm;\n+\n+}\n+\n+/*\n+ * Parse num options at run time.\n+ */\n+static int\n+parse_num_opt(const char *q_arg, uint32_t max_valid_value)\n+{\n+\tchar *end = NULL;\n+\tunsigned long num;\n+\n+\terrno = 0;\n+\n+\t/* parse unsigned int string */\n+\tnum = strtoul(q_arg, &end, 10);\n+\tif ((q_arg[0] == '\\0') || (end == NULL) || (*end != '\\0') ||\n+\t\t(errno != 0))\n+\t\treturn -1;\n+\n+\tif (num > max_valid_value)\n+\t\treturn -1;\n+\n+\treturn num;\n+\n+}\n+\n+/*\n+ * Display usage\n+ */\n+static void\n+us_vhost_usage(const char *prgname)\n+{\n+\tRTE_LOG(INFO, VHOST_CONFIG, \"%s [EAL options] -- -p PORTMASK\\n\"\n+\t\"\t\t--vm2vm [0|1|2]\\n\"\n+\t\"\t\t--rx_retry [0|1] --mergeable [0|1] --stats [0-N]\\n\"\n+\t\"\t\t--dev-basename <name> --dev-index [0-N]\\n\"\n+\t\"\t\t--nb-devices ND\\n\"\n+\t\"\t\t-p PORTMASK: Set mask for ports to be used by application\\n\"\n+\t\"\t\t--vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\\n\"\n+\t\"\t\t--rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\\n\"\n+\t\"\t\t--rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\\n\"\n+\t\"\t\t--rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\\n\"\n+\t\"\t\t--mergeable [0|1]: disable(default)/enable RX mergeable buffers\\n\"\n+\t\"\t\t--stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\\n\"\n+\t\"\t\t--dev-basename: The basename to be used for the character device.\\n\"\n+\t\"\t\t--dev-index [0-N]: Defaults to zero if not used. Index is appended to basename.\\n\"\n+\t\"\t\t--zero-copy [0|1]: disable(default)/enable rx/tx \"\n+\t\t\t\"zero copy\\n\"\n+\t\"\t\t--rx-desc-num [0-N]: the number of descriptors on rx, \"\n+\t\t\t\"used only when zero copy is enabled.\\n\"\n+\t\"\t\t--tx-desc-num [0-N]: the number of descriptors on tx, \"\n+\t\t\t\"used only when zero copy is enabled.\\n\",\n+\t       prgname);\n+}\n+\n+/*\n+ * Parse the arguments given in the command line of the application.\n+ */\n+static int\n+us_vhost_parse_args(int argc, char **argv)\n+{\n+\tint opt, ret;\n+\tint option_index;\n+\tunsigned i;\n+\tconst char *prgname = argv[0];\n+\tstatic struct option long_option[] = {\n+\t\t{\"vm2vm\", required_argument, NULL, 0},\n+\t\t{\"rx-retry\", required_argument, NULL, 0},\n+\t\t{\"rx-retry-delay\", required_argument, NULL, 0},\n+\t\t{\"rx-retry-num\", required_argument, NULL, 0},\n+\t\t{\"mergeable\", required_argument, NULL, 0},\n+\t\t{\"stats\", required_argument, NULL, 0},\n+\t\t{\"dev-basename\", required_argument, NULL, 0},\n+\t\t{\"dev-index\", required_argument, NULL, 0},\n+\t\t{\"zero-copy\", required_argument, NULL, 0},\n+\t\t{\"rx-desc-num\", required_argument, NULL, 0},\n+\t\t{\"tx-desc-num\", required_argument, NULL, 0},\n+\t\t{NULL, 0, 0, 0},\n+\t};\n+\n+\t/* Parse command line */\n+\twhile ((opt = getopt_long(argc, argv, \"p:\", long_option, &option_index))\n+\t\t!= EOF) {\n+\t\tswitch (opt) {\n+\t\t/* Portmask */\n+\t\tcase 'p':\n+\t\t\tenabled_port_mask = parse_portmask(optarg);\n+\t\t\tif (enabled_port_mask == 0) {\n+\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\"Invalid portmask\\n\");\n+\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\t\t\tbreak;\n+\n+\t\tcase 0:\n+\t\t\t/* Enable/disable vm2vm comms. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"vm2vm\",\n+\t\t\t\tMAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, (VM2VM_LAST - 1));\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\t\"Invalid argument for \"\n+\t\t\t\t\t\t\"vm2vm [0|1|2]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tvm2vm_mode = (enum vm2vm_type)ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Enable/disable retries on RX. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"rx-retry\",\n+\t\t\t\tMAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, 1);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\t\"Invalid argument for \"\n+\t\t\t\t\t\t\"rx-retry [0|1]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tenable_retry = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Specify the retries delay time (in useconds) on RX */\n+\t\t\tif (!strncmp(long_option[option_index].name,\n+\t\t\t\t\"rx-retry-delay\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, INT32_MAX);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid \"\n+\t\t\t\t\t\"argument for rx-retry-delay [0-N]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tburst_rx_delay_time = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Specify the retries number on RX. */\n+\t\t\tif (!strncmp(long_option[option_index].name,\n+\t\t\t\t\"rx-retry-num\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, INT32_MAX);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid \"\n+\t\t\t\t\t\"argument for rx-retry-num [0-N]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tburst_rx_retry_num = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Enable/disable RX mergeable buffers. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"mergeable\",\n+\t\t\t\tMAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, 1);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid \"\n+\t\t\t\t\t\"argument for mergeable [0|1]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else\n+\t\t\t\t\tmergeable = !!ret;\n+\t\t\t}\n+\n+\t\t\t/* Enable/disable stats. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"stats\",\n+\t\t\t\tMAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, INT32_MAX);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid \"\n+\t\t\t\t\t\"argument for stats [0..N]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tenable_stats = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Set character device basename. */\n+\t\t\tif (!strncmp(long_option[option_index].name,\n+\t\t\t\t\"dev-basename\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tif (us_vhost_parse_basename(optarg) == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid \"\n+\t\t\t\t\t\"argument for character device basename \"\n+\t\t\t\t\t\"(Max %d characters)\\n\",\n+\t\t\t\t\tMAX_BASENAME_SZ);\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Enable/disable rx/tx zero copy. */\n+\t\t\tif (!strncmp(long_option[option_index].name,\n+\t\t\t\t\"zero-copy\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, 1);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\t\"Invalid argument\"\n+\t\t\t\t\t\t\" for zero-copy [0|1]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else\n+\t\t\t\t\tzero_copy = ret;\n+\n+\t\t\t\tif (zero_copy) {\n+#ifdef RTE_MBUF_SCATTER_GATHER\n+\t\t\t\t\tRTE_LOG(ERR, VHOST_CONFIG, \"Before \"\n+\t\t\t\t\t\"running zero copy vhost APP, please \"\n+\t\t\t\t\t\"disable RTE_MBUF_SCATTER_GATHER \"\n+\t\t\t\t\t\"in config file and then rebuild DPDK \"\n+\t\t\t\t\t\"core lib!\\n\"\n+\t\t\t\t\t\"Otherwise please disable zero copy \"\n+\t\t\t\t\t\"flag in command line!\\n\");\n+\t\t\t\t\treturn -1;\n+#endif\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Specify the descriptor number on RX. */\n+\t\t\tif (!strncmp(long_option[option_index].name,\n+\t\t\t\t\"rx-desc-num\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, MAX_RING_DESC);\n+\t\t\t\tif ((ret == -1) || (!POWEROF2(ret))) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\"Invalid argument for rx-desc-num[0-N],\"\n+\t\t\t\t\t\"power of 2 required.\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tnum_rx_descriptor = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Specify the descriptor number on TX. */\n+\t\t\tif (!strncmp(long_option[option_index].name,\n+\t\t\t\t\"tx-desc-num\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, MAX_RING_DESC);\n+\t\t\t\tif ((ret == -1) || (!POWEROF2(ret))) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\"Invalid argument for tx-desc-num [0-N],\"\n+\t\t\t\t\t\"power of 2 required.\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tnum_tx_descriptor = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\tbreak;\n+\n+\t\t\t/* Invalid option - print options. */\n+\t\tdefault:\n+\t\t\tus_vhost_usage(prgname);\n+\t\t\treturn -1;\n+\t\t}\n+\t}\n+\n+\tfor (i = 0; i < RTE_MAX_ETHPORTS; i++) {\n+\t\tif (enabled_port_mask & (1 << i))\n+\t\t\tports[num_ports++] = (uint8_t)i;\n+\t}\n+\n+\tif ((num_ports ==  0) || (num_ports > MAX_SUP_PORTS)) {\n+\t\tRTE_LOG(INFO, VHOST_PORT, \"Current enabled port number is %u,\"\n+\t\t\t\"but only %u port can be enabled\\n\", num_ports,\n+\t\t\tMAX_SUP_PORTS);\n+\t\treturn -1;\n+\t}\n+\n+\tif ((zero_copy == 1) && (vm2vm_mode == VM2VM_SOFTWARE)) {\n+\t\tRTE_LOG(INFO, VHOST_PORT,\n+\t\t\t\"Vhost zero copy doesn't support software vm2vm, \"\n+\t\t\t\"please specify 'vm2vm 2' to use hardware vm2vm.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Update the global var NUM_PORTS and array PORTS according to\n+ * system ports number and return valid ports number.\n+ */\n+static unsigned check_ports_num(unsigned nb_ports)\n+{\n+\tunsigned valid_num_ports = num_ports;\n+\tunsigned portid;\n+\n+\tif (num_ports > nb_ports) {\n+\t\tRTE_LOG(INFO, VHOST_PORT, \"\\nSpecified port number(%u) exceeds \"\n+\t\t\t\"total system port number(%u)\\n\", num_ports, nb_ports);\n+\t\tnum_ports = nb_ports;\n+\t}\n+\n+\tfor (portid = 0; portid < num_ports; portid++) {\n+\t\tif (ports[portid] >= nb_ports) {\n+\t\t\tRTE_LOG(INFO, VHOST_PORT, \"\\nSpecified port ID(%u) \"\n+\t\t\t\t\"exceeds max system port ID(%u)\\n\",\n+\t\t\t\tports[portid], (nb_ports - 1));\n+\t\t\tports[portid] = INVALID_PORT_ID;\n+\t\t\tvalid_num_ports--;\n+\t\t}\n+\t}\n+\treturn valid_num_ports;\n+}\n+\n+/*\n+ * Macro to print out packet contents. Wrapped in debug define so that the\n+ * data path is not effected when debug is disabled.\n+ */\n+#ifdef VHOST_DEBUG\n+#define PRINT_PACKET(device, addr, size, header) do { \\\n+\tchar *pkt_addr = (char *)(addr); \\\n+\tunsigned int index; \\\n+\tchar packet[MAX_PRINT_BUFF]; \\\n+\tif ((header)) \\\n+\t\tsnprintf(packet, MAX_PRINT_BUFF, \"(%\"PRIu64\") Header size \" \\\n+\t\t\"%d: \", (device->device_fh), (size)); \\\n+\telse \\\n+\t\tsnprintf(packet, MAX_PRINT_BUFF, \"(%\"PRIu64\") Packet size \" \\\n+\t\t\"%d: \", (device->device_fh), (size)); \\\n+\tfor (index = 0; index < (size); index++) { \\\n+\t\tsnprintf(packet + strnlen(packet, MAX_PRINT_BUFF), \\\n+\t\t\tMAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), \\\n+\t\t\t\"%02hhx \", pkt_addr[index]); \\\n+\t} \\\n+\tsnprintf(packet + strnlen(packet, MAX_PRINT_BUFF), \\\n+\t\tMAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), \"\\n\"); \\\n+\tLOG_DEBUG(VHOST_DATA, \"%s\", packet); \\\n+} while (0)\n+#else\n+#define PRINT_PACKET(device, addr, size, header) do {} while (0)\n+#endif\n+\n+/*\n+ * Function to convert guest physical addresses to vhost physical addresses.\n+ * This is used to convert virtio buffer addresses.\n+ */\n+static inline uint64_t __attribute__((always_inline))\n+gpa_to_hpa(struct vhost_dev  *vdev, uint64_t guest_pa,\n+\tuint32_t buf_len, enum hpa_type *addr_type)\n+{\n+\tstruct virtio_memory_regions_hpa *region;\n+\tuint32_t regionidx;\n+\tuint64_t vhost_pa = 0;\n+\n+\t*addr_type = PHYS_ADDR_INVALID;\n+\n+\tfor (regionidx = 0; regionidx < vdev->nregions_hpa; regionidx++) {\n+\t\tregion = &vdev->regions_hpa[regionidx];\n+\t\tif ((guest_pa >= region->guest_phys_address) &&\n+\t\t\t(guest_pa <= region->guest_phys_address_end)) {\n+\t\t\tvhost_pa = region->host_phys_addr_offset + guest_pa;\n+\t\t\tif (likely((guest_pa + buf_len - 1)\n+\t\t\t\t<= region->guest_phys_address_end))\n+\t\t\t\t*addr_type = PHYS_ADDR_CONTINUOUS;\n+\t\t\telse\n+\t\t\t\t*addr_type = PHYS_ADDR_CROSS_SUBREG;\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") GPA %p| HPA %p\\n\",\n+\t\tdev->device_fh, (void *)(uintptr_t)guest_pa,\n+\t\t(void *)(uintptr_t)vhost_pa);\n+\n+\treturn vhost_pa;\n+}\n+\n+/*\n+ * Compares a packet destination MAC address to a device MAC address.\n+ */\n+static inline int __attribute__((always_inline))\n+ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb)\n+{\n+\treturn (((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0);\n+}\n+\n+/*\n+ * This function learns the MAC address of the device and registers this along\n+ * with a vlan tag to a VMDQ.\n+ */\n+static int\n+link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)\n+{\n+\tstruct ether_hdr *pkt_hdr;\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tstruct virtio_net *dev = vdev->dev;\n+\tint i, ret;\n+\n+\t/* Learn MAC address of guest device from packet */\n+\tpkt_hdr = (struct ether_hdr *)m->pkt.data;\n+\n+\tdev_ll = ll_root_used;\n+\n+\twhile (dev_ll != NULL) {\n+\t\tif (ether_addr_cmp(&(pkt_hdr->s_addr),\n+\t\t\t&dev_ll->vdev->mac_address)) {\n+\t\t\tRTE_LOG(INFO, VHOST_DATA, \"(%\"PRIu64\") WARNING: This \"\n+\t\t\t\"device is using an existing MAC address and has not \"\n+\t\t\t\"been registered.\\n\", dev->device_fh);\n+\t\t\treturn -1;\n+\t\t}\n+\t\tdev_ll = dev_ll->next;\n+\t}\n+\n+\tfor (i = 0; i < ETHER_ADDR_LEN; i++)\n+\t\tvdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];\n+\n+\t/* vlan_tag currently uses the device_id. */\n+\tvdev->vlan_tag = vlan_tags[dev->device_fh];\n+\n+\t/* Print out VMDQ registration info. */\n+\tRTE_LOG(INFO, VHOST_DATA, \"(%\"PRIu64\") MAC_ADDRESS %02x:%02x:%02x:%02x:\"\n+\t\t\"%02x:%02x and VLAN_TAG %d registered\\n\",\n+\t\tdev->device_fh,\n+\t\tvdev->mac_address.addr_bytes[0],\n+\t\tvdev->mac_address.addr_bytes[1],\n+\t\tvdev->mac_address.addr_bytes[2],\n+\t\tvdev->mac_address.addr_bytes[3],\n+\t\tvdev->mac_address.addr_bytes[4],\n+\t\tvdev->mac_address.addr_bytes[5],\n+\t\tvdev->vlan_tag);\n+\n+\t/* Register the MAC address on the pool specified by device_fh. */\n+\tret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,\n+\t\t(uint32_t)dev->device_fh);\n+\tif (ret)\n+\t\tRTE_LOG(ERR, VHOST_DATA, \"(%\"PRIu64\") Failed to add device \"\n+\t\t\t\"MAC address to VMDQ\\n\", dev->device_fh);\n+\n+\t/* Enable stripping of the vlan tag as we handle routing. */\n+\trte_eth_dev_set_vlan_strip_on_queue(ports[0],\n+\t\t(uint16_t)vdev->vmdq_rx_q, 1);\n+\n+\t/* Set device as ready for RX. */\n+\tvdev->ready = DEVICE_RX;\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding\n+ * buffers to the RX queue before disabling RX on the device.\n+ */\n+static inline void\n+unlink_vmdq(struct vhost_dev *vdev)\n+{\n+\tunsigned i = 0;\n+\tunsigned rx_count;\n+\tstruct rte_mbuf *pkts_burst[MAX_PKT_BURST];\n+\n+\tif (vdev->ready == DEVICE_RX) {\n+\t\t/*clear MAC and VLAN settings*/\n+\t\trte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address);\n+\t\tfor (i = 0; i < 6; i++)\n+\t\t\tvdev->mac_address.addr_bytes[i] = 0;\n+\n+\t\tvdev->vlan_tag = 0;\n+\n+\t\t/*Clear out the receive buffers*/\n+\t\trx_count = rte_eth_rx_burst(ports[0], (uint16_t)vdev->vmdq_rx_q,\n+\t\t\t\tpkts_burst, MAX_PKT_BURST);\n+\t\twhile (rx_count) {\n+\t\t\tfor (i = 0; i < rx_count; i++)\n+\t\t\t\trte_pktmbuf_free(pkts_burst[i]);\n+\n+\t\t\trx_count = rte_eth_rx_burst(ports[0],\n+\t\t\t\t\t(uint16_t)vdev->vmdq_rx_q, pkts_burst,\n+\t\t\t\t\tMAX_PKT_BURST);\n+\t\t}\n+\n+\t\tvdev->ready = DEVICE_MAC_LEARNING;\n+\t}\n+}\n+\n+/*\n+ * Check if the packet destination MAC address is for a local device. If so\n+ * put the packet on that devices RX queue. If not then return.\n+ */\n+static inline int __attribute__((always_inline))\n+virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)\n+{\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tstruct ether_hdr *pkt_hdr = (struct ether_hdr *)m->pkt.data;\n+\tuint64_t ret = 0;\n+\tstruct virtio_net *dev = vdev->dev;\n+\tstruct virtio_net *tdev; /* destination virito device */\n+\n+\t/* Get the used devices list*/\n+\tdev_ll = ll_root_used;\n+\n+\twhile (dev_ll != NULL) {\n+\t\tif ((dev_ll->vdev->ready == DEVICE_RX) &&\n+\t\t\tether_addr_cmp(&(pkt_hdr->d_addr),\n+\t\t\t\t&dev_ll->vdev->mac_address)) {\n+\n+\t\t\tif (dev_ll->vdev->dev->device_fh == dev->device_fh) {\n+\t\t\t\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") TX: Source \"\n+\t\t\t\t\"and destination MAC addresses are the same. \"\n+\t\t\t\t\"Dropping packet.\\n\", dev->device_fh);\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t\ttdev = dev_ll->vdev->dev;\n+\n+\t\t\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") TX: MAC address is \"\n+\t\t\t\t\"local\\n\", tdev->device_fh);\n+\n+\t\t\tif (unlikely(dev_ll->vdev->remove)) {\n+\t\t\t\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Device is \"\n+\t\t\t\t\"marked for removal\\n\", tdev->device_fh);\n+\t\t\t} else {\n+\t\t\t\t/* Send the packet to the local virtio device*/\n+\t\t\t\tret = rte_vhost_enqueue_burst(tdev, VIRTIO_RXQ,\n+\t\t\t\t\t&m, 1);\n+\t\t\t\tif (enable_stats) {\n+\t\t\t\t\trte_atomic64_add(\n+\t\t\t\t\t&dev_statistics[tdev->device_fh].rx_total_atomic,\n+\t\t\t\t\t1);\n+\t\t\t\t\trte_atomic64_add(\n+\t\t\t\t\t&dev_statistics[tdev->device_fh].rx_atomic,\n+\t\t\t\t\tret);\n+\t\t\t\t\tdev_statistics[tdev->device_fh].tx_total++;\n+\t\t\t\t\tdev_statistics[tdev->device_fh].tx += ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\treturn 0;\n+\t\t}\n+\t\tdev_ll = dev_ll->next;\n+\t}\n+\n+\treturn -1;\n+}\n+\n+/*\n+ * This function routes the TX packet to the correct interface. This may be a\n+ * local device or the physical port.\n+ */\n+static inline void __attribute__((always_inline))\n+virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m,\n+\t\tstruct rte_mempool *mbuf_pool, uint16_t vlan_tag) {\n+\tstruct mbuf_table *tx_q;\n+\tstruct rte_mbuf **m_table;\n+\tunsigned len, ret, offset = 0;\n+\tconst uint16_t lcore_id = rte_lcore_id();\n+\tstruct virtio_net_data_ll *dev_ll = ll_root_used;\n+\tstruct ether_hdr *pkt_hdr = (struct ether_hdr *)m->pkt.data;\n+\tstruct virtio_net *dev = vdev->dev;\n+\n+\t/* Check if destination is local VM*/\n+\tif (vm2vm_mode == VM2VM_SOFTWARE && (virtio_tx_local(vdev, m) == 0)) {\n+\t\trte_pktmbuf_free(m);\n+\t\treturn;\n+\t}\n+\n+\tif (vm2vm_mode == VM2VM_HARDWARE) {\n+\t\twhile (dev_ll != NULL) {\n+\t\t\tif ((dev_ll->vdev->ready == DEVICE_RX)\n+\t\t\t\t&& ether_addr_cmp(&(pkt_hdr->d_addr),\n+\t\t\t\t&dev_ll->vdev->mac_address)) {\n+\t\t\t\t/*\n+\t\t\t\t * Drop the packet if the TX packet is\n+\t\t\t\t * destined for the TX device.\n+\t\t\t\t */\n+\t\t\t\tif (dev_ll->vdev->dev->device_fh ==\n+\t\t\t\t\tdev->device_fh) {\n+\t\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\t\"(%\"PRIu64\") TX: Source and destination \"\n+\t\t\t\t\t\"MAC addresses are the same. Dropping \"\n+\t\t\t\t\t\"packet.\\n\",\n+\t\t\t\t\tdev_ll->vdev->device_fh);\n+\t\t\t\t\trte_pktmbuf_free(m);\n+\t\t\t\t\treturn;\n+\t\t\t\t}\n+\t\t\t\toffset = 4;\n+\t\t\t\tvlan_tag =\n+\t\t\t\t(uint16_t)\n+\t\t\t\tvlan_tags[(uint16_t)dev_ll->vdev->dev->device_fh];\n+\n+\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") TX: pkt to local VM device id:\"\n+\t\t\t\t\"(%\"PRIu64\") vlan tag: %d.\\n\",\n+\t\t\t\tdev->device_fh, dev_ll->vdev->dev->device_fh,\n+\t\t\t\tvlan_tag);\n+\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t}\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") TX: MAC address is external\\n\",\n+\t\tdev->device_fh);\n+\n+\t/*Add packet to the port tx queue*/\n+\ttx_q = &lcore_tx_queue[lcore_id];\n+\tlen = tx_q->len;\n+\n+\tm->ol_flags = PKT_TX_VLAN_PKT;\n+\tm->pkt.data_len += offset;\n+\tm->pkt.pkt_len = m->pkt.data_len;\n+\tm->pkt.vlan_macip.f.vlan_tci = vlan_tag;\n+\n+\ttx_q->m_table[len] = m;\n+\tlen++;\n+\tif (enable_stats) {\n+\t\tdev_statistics[dev->device_fh].tx_total++;\n+\t\tdev_statistics[dev->device_fh].tx++;\n+\t}\n+\n+\tif (unlikely(len == MAX_PKT_BURST)) {\n+\t\tm_table = (struct rte_mbuf **)tx_q->m_table;\n+\t\tret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,\n+\t\t\t\tm_table, (uint16_t) len);\n+\t\t/*\n+\t\t * Free any buffers not handled by TX and update the port\n+\t\t * stats.\n+\t\t */\n+\t\tif (unlikely(ret < len)) {\n+\t\t\tdo {\n+\t\t\t\trte_pktmbuf_free(m_table[ret]);\n+\t\t\t} while (++ret < len);\n+\t\t}\n+\n+\t\tlen = 0;\n+\t}\n+\n+\ttx_q->len = len;\n+\treturn;\n+}\n+/*\n+ * This function is called by each data core. It handles all RX/TX registered\n+ * with the core. For TX the specific lcore linked list is used. For RX, MAC\n+ * addresses are compared with all devices in the main linked list.\n+ */\n+static int\n+switch_worker(__attribute__((unused)) void *arg)\n+{\n+\tstruct rte_mempool *mbuf_pool = arg;\n+\tstruct virtio_net *dev = NULL;\n+\tstruct vhost_dev *vdev = NULL;\n+\tstruct rte_mbuf *pkts_burst[MAX_PKT_BURST];\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tstruct mbuf_table *tx_q;\n+\tvolatile struct lcore_ll_info *lcore_ll;\n+\tconst uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /\n+\t\t\t\t\tUS_PER_S * BURST_TX_DRAIN_US;\n+\tuint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;\n+\tunsigned ret, i;\n+\tconst uint16_t lcore_id = rte_lcore_id();\n+\tconst uint16_t num_cores = (uint16_t)rte_lcore_count();\n+\tuint16_t rx_count = 0;\n+\tuint16_t tx_count;\n+\tuint32_t retry = 0;\n+\n+\tRTE_LOG(INFO, VHOST_DATA, \"Procesing on Core %u started\\n\", lcore_id);\n+\tlcore_ll = lcore_info[lcore_id].lcore_ll;\n+\tprev_tsc = 0;\n+\n+\ttx_q = &lcore_tx_queue[lcore_id];\n+\tfor (i = 0; i < num_cores; i++) {\n+\t\tif (lcore_ids[i] == lcore_id) {\n+\t\t\ttx_q->txq_id = i;\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\n+\twhile (1) {\n+\t\tcur_tsc = rte_rdtsc();\n+\t\t/*\n+\t\t * TX burst queue drain\n+\t\t */\n+\t\tdiff_tsc = cur_tsc - prev_tsc;\n+\t\tif (unlikely(diff_tsc > drain_tsc)) {\n+\n+\t\t\tif (tx_q->len) {\n+\t\t\t\tLOG_DEBUG(VHOST_DATA, \"TX queue drained after \"\n+\t\t\t\t\t\"timeout with burst size %u\\n\", tx_q->len);\n+\n+\t\t\t\t/*Tx any packets in the queue*/\n+\t\t\t\tret = rte_eth_tx_burst(ports[0],\n+\t\t\t\t\t(uint16_t)tx_q->txq_id,\n+\t\t\t\t\t(struct rte_mbuf **)tx_q->m_table,\n+\t\t\t\t\t(uint16_t)tx_q->len);\n+\t\t\t\tif (unlikely(ret < tx_q->len)) {\n+\t\t\t\t\tdo {\n+\t\t\t\t\t\trte_pktmbuf_free(tx_q->m_table[ret]);\n+\t\t\t\t\t} while (++ret < tx_q->len);\n+\t\t\t\t}\n+\n+\t\t\t\ttx_q->len = 0;\n+\t\t\t}\n+\n+\t\t\tprev_tsc = cur_tsc;\n+\n+\t\t}\n+\n+\t\trte_prefetch0(lcore_ll->ll_root_used);\n+\t\t/*\n+\t\t * Inform the configuration core that we have exited the linked\n+\t\t * list and that no devices are in use if requested.\n+\t\t */\n+\t\tif (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)\n+\t\t\tlcore_ll->dev_removal_flag =  ACK_DEV_REMOVAL;\n+\n+\t\t/* Process devices */\n+\t\tdev_ll = lcore_ll->ll_root_used;\n+\n+\t\twhile (likely(dev_ll != NULL)) {\n+\t\t\tvdev = dev_ll->vdev;\n+\t\t\tdev = vdev->dev;\n+\n+\t\t\tif (unlikely(vdev->remove)) {\n+\t\t\t\tdev_ll = dev_ll->next;\n+\t\t\t\tunlink_vmdq(vdev);\n+\t\t\t\tvdev->ready = DEVICE_SAFE_REMOVE;\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t\tif (likely(vdev->ready == DEVICE_RX)) {\n+\t\t\t\t/* Handle guest RX */\n+\t\t\t\trx_count = rte_eth_rx_burst(ports[0],\n+\t\t\t\t\t\tvdev->vmdq_rx_q, pkts_burst,\n+\t\t\t\t\t\tMAX_PKT_BURST);\n+\n+\t\t\t\tif (rx_count) {\n+\t\t\t\t\t/*\n+\t\t\t\t\t * Retry is enabled and the queue is\n+\t\t\t\t\t * full then we wait and retry to avoid\n+\t\t\t\t\t * packet loss. Here MAX_PKT_BURST must\n+\t\t\t\t\t * be less than virtio queue size.\n+\t\t\t\t\t */\n+\t\t\t\t\tif (enable_retry && unlikely(rx_count >\n+\t\t\t\t\t\trte_vring_available_entries(dev, VIRTIO_RXQ))) {\n+\t\t\t\t\t\tfor (retry = 0;\n+\t\t\t\t\t\t\tretry < burst_rx_retry_num;\n+\t\t\t\t\t\t\tretry++) {\n+\t\t\t\t\t\t\trte_delay_us(burst_rx_delay_time);\n+\t\t\t\t\t\t\tif (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ))\n+\t\t\t\t\t\t\t\tbreak;\n+\t\t\t\t\t\t}\n+\t\t\t\t\t}\n+\t\t\t\t\tret_count = rte_vhost_enqueue_burst(dev,\n+\t\t\t\t\tVIRTIO_RXQ, pkts_burst, rx_count);\n+\t\t\t\t\tif (enable_stats) {\n+\t\t\t\t\t\trte_atomic64_add(\n+\t\t\t\t\t\t&dev_statistics[dev_ll->vdev->dev->device_fh].rx_total_atomic,\n+\t\t\t\t\t\trx_count);\n+\t\t\t\t\t\trte_atomic64_add(\n+\t\t\t\t\t\t&dev_statistics[dev_ll->vdev->dev->device_fh].rx_atomic, ret_count);\n+\t\t\t\t\t}\n+\t\t\t\t\twhile (likely(rx_count)) {\n+\t\t\t\t\t\trx_count--;\n+\t\t\t\t\t\trte_pktmbuf_free_seg(pkts_burst[rx_count]);\n+\t\t\t\t\t}\n+\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\tif (!vdev->remove) {\n+\t\t\t\t/* Handle guest TX*/\n+\t\t\t\ttx_count = rte_vhost_dequeue_burst(dev,\n+\t\t\t\t\t\tVIRTIO_TXQ, mbuf_pool,\n+\t\t\t\t\t\tpkts_burst, MAX_PKT_BURST);\n+\t\t\t\t/*\n+\t\t\t\t * If this is the first received packet we need\n+\t\t\t\t * to learn the MAC and setup VMDQ.\n+\t\t\t\t */\n+\t\t\t\tif (unlikely(vdev->ready == DEVICE_MAC_LEARNING)\n+\t\t\t\t\t&& tx_count) {\n+\t\t\t\t\tif (vdev->remove || (link_vmdq(vdev,\n+\t\t\t\t\t\tpkts_burst[0]) == -1)) {\n+\t\t\t\t\t\twhile (tx_count--)\n+\t\t\t\t\t\t\trte_pktmbuf_free(pkts_burst[tx_count]);\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t\twhile (tx_count)\n+\t\t\t\t\tvirtio_tx_route(vdev,\n+\t\t\t\t\t\tpkts_burst[--tx_count],\n+\t\t\t\t\t\tmbuf_pool,\n+\t\t\t\t\t\t(uint16_t)dev->device_fh);\n+\t\t\t}\n+\n+\t\t\t/*move to the next device in the list*/\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t}\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * This function gets available ring number for zero copy rx.\n+ * Only one thread will call this funciton for a paticular virtio device,\n+ * so, it is designed as non-thread-safe function.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+get_available_ring_num_zcp(struct virtio_net *dev)\n+{\n+\tstruct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ];\n+\tuint16_t avail_idx;\n+\n+\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n+\treturn (uint32_t)(avail_idx - vq->last_used_idx_res);\n+}\n+\n+/*\n+ * This function gets available ring index for zero copy rx,\n+ * it will retry 'burst_rx_retry_num' times till it get enough ring index.\n+ * Only one thread will call this funciton for a paticular virtio device,\n+ * so, it is designed as non-thread-safe function.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+get_available_ring_index_zcp(struct virtio_net *dev,\n+\tuint16_t *res_base_idx, uint32_t count)\n+{\n+\tstruct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ];\n+\tuint16_t avail_idx;\n+\tuint32_t retry = 0;\n+\tuint16_t free_entries;\n+\n+\t*res_base_idx = vq->last_used_idx_res;\n+\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n+\tfree_entries = (avail_idx - *res_base_idx);\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") in get_available_ring_index_zcp: \"\n+\t\t\t\"avail idx: %d, \"\n+\t\t\t\"res base idx:%d, free entries:%d\\n\",\n+\t\t\tdev->device_fh, avail_idx, *res_base_idx,\n+\t\t\tfree_entries);\n+\n+\t/*\n+\t * If retry is enabled and the queue is full then we wait\n+\t * and retry to avoid packet loss.\n+\t */\n+\tif (enable_retry && unlikely(count > free_entries)) {\n+\t\tfor (retry = 0; retry < burst_rx_retry_num; retry++) {\n+\t\t\trte_delay_us(burst_rx_delay_time);\n+\t\t\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n+\t\t\tfree_entries = (avail_idx - *res_base_idx);\n+\t\t\tif (count <= free_entries)\n+\t\t\t\tbreak;\n+\t\t}\n+\t}\n+\n+\t/*check that we have enough buffers*/\n+\tif (unlikely(count > free_entries))\n+\t\tcount = free_entries;\n+\n+\tif (unlikely(count == 0)) {\n+\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") Fail in get_available_ring_index_zcp: \"\n+\t\t\t\"avail idx: %d, res base idx:%d, free entries:%d\\n\",\n+\t\t\tdev->device_fh, avail_idx,\n+\t\t\t*res_base_idx, free_entries);\n+\t\treturn 0;\n+\t}\n+\n+\tvq->last_used_idx_res = *res_base_idx + count;\n+\n+\treturn count;\n+}\n+\n+/*\n+ * This function put descriptor back to used list.\n+ */\n+static inline void __attribute__((always_inline))\n+put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, uint16_t desc_idx)\n+{\n+\tuint16_t res_cur_idx = vq->last_used_idx;\n+\tvq->used->ring[res_cur_idx & (vq->size - 1)].id = (uint32_t)desc_idx;\n+\tvq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;\n+\trte_compiler_barrier();\n+\t*(volatile uint16_t *)&vq->used->idx += 1;\n+\tvq->last_used_idx += 1;\n+\n+\t/* Kick the guest if necessary. */\n+\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\teventfd_write((int)vq->kickfd, 1);\n+}\n+\n+/*\n+ * This function get available descriptor from vitio vring and un-attached mbuf\n+ * from vpool->ring, and then attach them together. It needs adjust the offset\n+ * for buff_addr and phys_addr accroding to PMD implementation, otherwise the\n+ * frame data may be put to wrong location in mbuf.\n+ */\n+static inline void __attribute__((always_inline))\n+attach_rxmbuf_zcp(struct virtio_net *dev)\n+{\n+\tuint16_t res_base_idx, desc_idx;\n+\tuint64_t buff_addr, phys_addr;\n+\tstruct vhost_virtqueue *vq;\n+\tstruct vring_desc *desc;\n+\tstruct rte_mbuf *mbuf = NULL;\n+\tstruct vpool *vpool;\n+\tenum hpa_type addr_type;\n+\tstruct vhost_dev *vdev = (struct vhost_dev *)dev->priv;\n+\n+\tvpool = &vpool_array[vdev->vmdq_rx_q];\n+\tvq = dev->virtqueue[VIRTIO_RXQ];\n+\n+\tdo {\n+\t\tif (unlikely(get_available_ring_index_zcp(vdev->dev,\n+\t\t\t\t&res_base_idx, 1) != 1))\n+\t\t\treturn;\n+\t\tdesc_idx = vq->avail->ring[(res_base_idx) & (vq->size - 1)];\n+\n+\t\tdesc = &vq->desc[desc_idx];\n+\t\tif (desc->flags & VRING_DESC_F_NEXT) {\n+\t\t\tdesc = &vq->desc[desc->next];\n+\t\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n+\t\t\tphys_addr = gpa_to_hpa(vdev, desc->addr, desc->len,\n+\t\t\t\t\t&addr_type);\n+\t\t} else {\n+\t\t\tbuff_addr = gpa_to_vva(dev,\n+\t\t\t\t\tdesc->addr + vq->vhost_hlen);\n+\t\t\tphys_addr = gpa_to_hpa(vdev,\n+\t\t\t\t\tdesc->addr + vq->vhost_hlen,\n+\t\t\t\t\tdesc->len, &addr_type);\n+\t\t}\n+\n+\t\tif (unlikely(addr_type == PHYS_ADDR_INVALID)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA, \"(%\"PRIu64\") Invalid frame \"\n+\t\t\t\t\"buffer address found when attaching RX frame \"\n+\t\t\t\t\"buffer address!\\n\", dev->device_fh);\n+\t\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\t/*\n+\t\t * Check if the frame buffer address from guest crosses\n+\t\t * sub-region or not.\n+\t\t */\n+\t\tif (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") Frame buffer address cross \"\n+\t\t\t\t\"sub-regioin found when attaching RX frame \"\n+\t\t\t\t\"buffer address!\\n\",\n+\t\t\t\tdev->device_fh);\n+\t\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\t\tcontinue;\n+\t\t}\n+\t} while (unlikely(phys_addr == 0));\n+\n+\trte_ring_sc_dequeue(vpool->ring, (void **)&mbuf);\n+\tif (unlikely(mbuf == NULL)) {\n+\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") in attach_rxmbuf_zcp: \"\n+\t\t\t\"ring_sc_dequeue fail.\\n\",\n+\t\t\tdev->device_fh);\n+\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\treturn;\n+\t}\n+\n+\tif (unlikely(vpool->buf_size > desc->len)) {\n+\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") in attach_rxmbuf_zcp: frame buffer \"\n+\t\t\t\"length(%d) of descriptor idx: %d less than room \"\n+\t\t\t\"size required: %d\\n\",\n+\t\t\tdev->device_fh, desc->len, desc_idx, vpool->buf_size);\n+\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\trte_ring_sp_enqueue(vpool->ring, (void *)mbuf);\n+\t\treturn;\n+\t}\n+\n+\tmbuf->buf_addr = (void *)(uintptr_t)(buff_addr - RTE_PKTMBUF_HEADROOM);\n+\tmbuf->pkt.data = (void *)(uintptr_t)(buff_addr);\n+\tmbuf->buf_physaddr = phys_addr - RTE_PKTMBUF_HEADROOM;\n+\tmbuf->pkt.data_len = desc->len;\n+\tMBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx;\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in attach_rxmbuf_zcp: res base idx:%d, \"\n+\t\t\"descriptor idx:%d\\n\",\n+\t\tdev->device_fh, res_base_idx, desc_idx);\n+\n+\t__rte_mbuf_raw_free(mbuf);\n+\n+\treturn;\n+}\n+\n+/*\n+ * Detach an attched packet mbuf -\n+ *  - restore original mbuf address and length values.\n+ *  - reset pktmbuf data and data_len to their default values.\n+ *  All other fields of the given packet mbuf will be left intact.\n+ *\n+ * @param m\n+ *   The attached packet mbuf.\n+ */\n+static inline void\n+pktmbuf_detach_zcp(struct rte_mbuf *m)\n+{\n+\tconst struct rte_mempool *mp = m->pool;\n+\tvoid *buf = RTE_MBUF_TO_BADDR(m);\n+\tuint32_t buf_ofs;\n+\tuint32_t buf_len = mp->elt_size - sizeof(*m);\n+\tm->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof(*m);\n+\n+\tm->buf_addr = buf;\n+\tm->buf_len = (uint16_t)buf_len;\n+\n+\tbuf_ofs = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?\n+\t\t\tRTE_PKTMBUF_HEADROOM : m->buf_len;\n+\tm->pkt.data = (char *) m->buf_addr + buf_ofs;\n+\n+\tm->pkt.data_len = 0;\n+}\n+\n+/*\n+ * This function is called after packets have been transimited. It fetchs mbuf\n+ * from vpool->pool, detached it and put into vpool->ring. It also update the\n+ * used index and kick the guest if necessary.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool)\n+{\n+\tstruct rte_mbuf *mbuf;\n+\tstruct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ];\n+\tuint32_t used_idx = vq->last_used_idx & (vq->size - 1);\n+\tuint32_t index = 0;\n+\tuint32_t mbuf_count = rte_mempool_count(vpool->pool);\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: mbuf count in mempool before \"\n+\t\t\"clean is: %d\\n\",\n+\t\tdev->device_fh, mbuf_count);\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: mbuf count in  ring before \"\n+\t\t\"clean  is : %d\\n\",\n+\t\tdev->device_fh, rte_ring_count(vpool->ring));\n+\n+\tfor (index = 0; index < mbuf_count; index++) {\n+\t\tmbuf = __rte_mbuf_raw_alloc(vpool->pool);\n+\t\tif (likely(RTE_MBUF_INDIRECT(mbuf)))\n+\t\t\tpktmbuf_detach_zcp(mbuf);\n+\t\trte_ring_sp_enqueue(vpool->ring, mbuf);\n+\n+\t\t/* Update used index buffer information. */\n+\t\tvq->used->ring[used_idx].id = MBUF_HEADROOM_UINT32(mbuf);\n+\t\tvq->used->ring[used_idx].len = 0;\n+\n+\t\tused_idx = (used_idx + 1) & (vq->size - 1);\n+\t}\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: mbuf count in mempool after \"\n+\t\t\"clean is: %d\\n\",\n+\t\tdev->device_fh, rte_mempool_count(vpool->pool));\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: mbuf count in  ring after \"\n+\t\t\"clean  is : %d\\n\",\n+\t\tdev->device_fh, rte_ring_count(vpool->ring));\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: before updated \"\n+\t\t\"vq->last_used_idx:%d\\n\",\n+\t\tdev->device_fh, vq->last_used_idx);\n+\n+\tvq->last_used_idx += mbuf_count;\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: after updated \"\n+\t\t\"vq->last_used_idx:%d\\n\",\n+\t\tdev->device_fh, vq->last_used_idx);\n+\n+\trte_compiler_barrier();\n+\n+\t*(volatile uint16_t *)&vq->used->idx += mbuf_count;\n+\n+\t/* Kick guest if required. */\n+\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\teventfd_write((int)vq->kickfd, 1);\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * This function is called when a virtio device is destroy.\n+ * It fetchs mbuf from vpool->pool, and detached it, and put into vpool->ring.\n+ */\n+static void\n+mbuf_destroy_zcp(struct vpool *vpool)\n+{\n+\tstruct rte_mbuf *mbuf = NULL;\n+\tuint32_t index, mbuf_count = rte_mempool_count(vpool->pool);\n+\n+\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\"in mbuf_destroy_zcp: mbuf count in mempool before \"\n+\t\t\"mbuf_destroy_zcp is: %d\\n\",\n+\t\tmbuf_count);\n+\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\"in mbuf_destroy_zcp: mbuf count in  ring before \"\n+\t\t\"mbuf_destroy_zcp  is : %d\\n\",\n+\t\trte_ring_count(vpool->ring));\n+\n+\tfor (index = 0; index < mbuf_count; index++) {\n+\t\tmbuf = __rte_mbuf_raw_alloc(vpool->pool);\n+\t\tif (likely(mbuf != NULL)) {\n+\t\t\tif (likely(RTE_MBUF_INDIRECT(mbuf)))\n+\t\t\t\tpktmbuf_detach_zcp(mbuf);\n+\t\t\trte_ring_sp_enqueue(vpool->ring, (void *)mbuf);\n+\t\t}\n+\t}\n+\n+\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\"in mbuf_destroy_zcp: mbuf count in mempool after \"\n+\t\t\"mbuf_destroy_zcp is: %d\\n\",\n+\t\trte_mempool_count(vpool->pool));\n+\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\"in mbuf_destroy_zcp: mbuf count in ring after \"\n+\t\t\"mbuf_destroy_zcp is : %d\\n\",\n+\t\trte_ring_count(vpool->ring));\n+}\n+\n+/*\n+ * This function update the use flag and counter.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts,\n+\tuint32_t count)\n+{\n+\tstruct vhost_virtqueue *vq;\n+\tstruct vring_desc *desc;\n+\tstruct rte_mbuf *buff;\n+\t/* The virtio_hdr is initialised to 0. */\n+\tstruct virtio_net_hdr_mrg_rxbuf virtio_hdr\n+\t\t= {{0, 0, 0, 0, 0, 0}, 0};\n+\tuint64_t buff_hdr_addr = 0;\n+\tuint32_t head[MAX_PKT_BURST], packet_len = 0;\n+\tuint32_t head_idx, packet_success = 0;\n+\tuint16_t res_cur_idx;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_rx()\\n\", dev->device_fh);\n+\n+\tif (count == 0)\n+\t\treturn 0;\n+\n+\tvq = dev->virtqueue[VIRTIO_RXQ];\n+\tcount = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;\n+\n+\tres_cur_idx = vq->last_used_idx;\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Current Index %d| End Index %d\\n\",\n+\t\tdev->device_fh, res_cur_idx, res_cur_idx + count);\n+\n+\t/* Retrieve all of the head indexes first to avoid caching issues. */\n+\tfor (head_idx = 0; head_idx < count; head_idx++)\n+\t\thead[head_idx] = MBUF_HEADROOM_UINT32(pkts[head_idx]);\n+\n+\t/*Prefetch descriptor index. */\n+\trte_prefetch0(&vq->desc[head[packet_success]]);\n+\n+\twhile (packet_success != count) {\n+\t\t/* Get descriptor from available ring */\n+\t\tdesc = &vq->desc[head[packet_success]];\n+\n+\t\tbuff = pkts[packet_success];\n+\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") in dev_rx_zcp: update the used idx for \"\n+\t\t\t\"pkt[%d] descriptor idx: %d\\n\",\n+\t\t\tdev->device_fh, packet_success,\n+\t\t\tMBUF_HEADROOM_UINT32(buff));\n+\n+\t\tPRINT_PACKET(dev,\n+\t\t\t(uintptr_t)(((uint64_t)(uintptr_t)buff->buf_addr)\n+\t\t\t+ RTE_PKTMBUF_HEADROOM),\n+\t\t\trte_pktmbuf_data_len(buff), 0);\n+\n+\t\t/* Buffer address translation for virtio header. */\n+\t\t/* Could we buffer the translation for data in mbuf headroom */\n+\t\tbuff_hdr_addr = gpa_to_vva(dev, desc->addr);\n+\t\tpacket_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;\n+\n+\t\t/*\n+\t\t * If the descriptors are chained the header and data are\n+\t\t * placed in separate buffers.\n+\t\t */\n+\t\tif (desc->flags & VRING_DESC_F_NEXT) {\n+\t\t\tdesc->len = vq->vhost_hlen;\n+\t\t\tdesc = &vq->desc[desc->next];\n+\t\t\tdesc->len = rte_pktmbuf_data_len(buff);\n+\t\t} else {\n+\t\t\tdesc->len = packet_len;\n+\t\t}\n+\n+\t\t/* Update used ring with desc information */\n+\t\tvq->used->ring[res_cur_idx & (vq->size - 1)].id\n+\t\t\t= head[packet_success];\n+\t\tvq->used->ring[res_cur_idx & (vq->size - 1)].len\n+\t\t\t= packet_len;\n+\t\tres_cur_idx++;\n+\t\tpacket_success++;\n+\n+\t\t/* A header is required per buffer. */\n+\t\trte_memcpy((void *)(uintptr_t)buff_hdr_addr,\n+\t\t\t(const void *)&virtio_hdr, vq->vhost_hlen);\n+\n+\t\tPRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);\n+\n+\t\tif (likely(packet_success < count)) {\n+\t\t\t/* Prefetch descriptor index. */\n+\t\t\trte_prefetch0(&vq->desc[head[packet_success]]);\n+\t\t}\n+\t}\n+\n+\trte_compiler_barrier();\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in dev_rx_zcp: before update used idx: \"\n+\t\t\"vq.last_used_idx: %d, vq->used->idx: %d\\n\",\n+\t\tdev->device_fh, vq->last_used_idx, vq->used->idx);\n+\n+\t*(volatile uint16_t *)&vq->used->idx += count;\n+\tvq->last_used_idx += count;\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in dev_rx_zcp: after  update used idx: \"\n+\t\t\"vq.last_used_idx: %d, vq->used->idx: %d\\n\",\n+\t\tdev->device_fh, vq->last_used_idx, vq->used->idx);\n+\n+\t/* Kick the guest if necessary. */\n+\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\teventfd_write((int)vq->kickfd, 1);\n+\n+\treturn count;\n+}\n+\n+/*\n+ * This function routes the TX packet to the correct interface.\n+ * This may be a local device or the physical port.\n+ */\n+static inline void __attribute__((always_inline))\n+virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m,\n+\tuint32_t desc_idx, uint8_t need_copy)\n+{\n+\tstruct mbuf_table *tx_q;\n+\tstruct rte_mbuf **m_table;\n+\tstruct rte_mbuf *mbuf = NULL;\n+\tunsigned len, ret, offset = 0;\n+\tstruct vpool *vpool;\n+\tstruct virtio_net_data_ll *dev_ll = ll_root_used;\n+\tstruct ether_hdr *pkt_hdr = (struct ether_hdr *)m->pkt.data;\n+\tuint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh];\n+\tuint16_t vmdq_rx_q = ((struct vhost_dev *)dev->priv)->vmdq_rx_q;\n+\n+\t/*Add packet to the port tx queue*/\n+\ttx_q = &tx_queue_zcp[vmdq_rx_q];\n+\tlen = tx_q->len;\n+\n+\t/* Allocate an mbuf and populate the structure. */\n+\tvpool = &vpool_array[MAX_QUEUES + vmdq_rx_q];\n+\trte_ring_sc_dequeue(vpool->ring, (void **)&mbuf);\n+\tif (unlikely(mbuf == NULL)) {\n+\t\tstruct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ];\n+\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") Failed to allocate memory for mbuf.\\n\",\n+\t\t\tdev->device_fh);\n+\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\treturn;\n+\t}\n+\n+\tif (vm2vm_mode == VM2VM_HARDWARE) {\n+\t\t/* Avoid using a vlan tag from any vm for external pkt, such as\n+\t\t * vlan_tags[dev->device_fh], oterwise, it conflicts when pool\n+\t\t * selection, MAC address determines it as an external pkt\n+\t\t * which should go to network, while vlan tag determine it as\n+\t\t * a vm2vm pkt should forward to another vm. Hardware confuse\n+\t\t * such a ambiguous situation, so pkt will lost.\n+\t\t */\n+\t\tvlan_tag = external_pkt_default_vlan_tag;\n+\t\twhile (dev_ll != NULL) {\n+\t\t\tif (likely(dev_ll->vdev->ready == DEVICE_RX) &&\n+\t\t\t\tether_addr_cmp(&(pkt_hdr->d_addr),\n+\t\t\t\t&dev_ll->vdev->mac_address)) {\n+\n+\t\t\t\t/*\n+\t\t\t\t * Drop the packet if the TX packet is destined\n+\t\t\t\t * for the TX device.\n+\t\t\t\t */\n+\t\t\t\tif (unlikely(dev_ll->vdev->dev->device_fh\n+\t\t\t\t\t== dev->device_fh)) {\n+\t\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\t\"(%\"PRIu64\") TX: Source and destination\"\n+\t\t\t\t\t\"MAC addresses are the same. Dropping \"\n+\t\t\t\t\t\"packet.\\n\",\n+\t\t\t\t\tdev_ll->dev->device_fh);\n+\t\t\t\t\tMBUF_HEADROOM_UINT32(mbuf)\n+\t\t\t\t\t\t= (uint32_t)desc_idx;\n+\t\t\t\t\t__rte_mbuf_raw_free(mbuf);\n+\t\t\t\t\treturn;\n+\t\t\t\t}\n+\n+\t\t\t\t/*\n+\t\t\t\t * Packet length offset 4 bytes for HW vlan\n+\t\t\t\t * strip when L2 switch back.\n+\t\t\t\t */\n+\t\t\t\toffset = 4;\n+\t\t\t\tvlan_tag =\n+\t\t\t\t(uint16_t)\n+\t\t\t\tvlan_tags[(uint16_t)dev_ll->vdev->dev->device_fh];\n+\n+\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") TX: pkt to local VM device id:\"\n+\t\t\t\t\"(%\"PRIu64\") vlan tag: %d.\\n\",\n+\t\t\t\tdev->device_fh, dev_ll->dev->device_fh,\n+\t\t\t\tvlan_tag);\n+\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t}\n+\n+\tmbuf->pkt.nb_segs = m->pkt.nb_segs;\n+\tmbuf->pkt.next = m->pkt.next;\n+\tmbuf->pkt.data_len = m->pkt.data_len + offset;\n+\tmbuf->pkt.pkt_len = mbuf->pkt.data_len;\n+\tif (unlikely(need_copy)) {\n+\t\t/* Copy the packet contents to the mbuf. */\n+\t\trte_memcpy((void *)((uint8_t *)mbuf->pkt.data),\n+\t\t\t(const void *) ((uint8_t *)m->pkt.data),\n+\t\t\tm->pkt.data_len);\n+\t} else {\n+\t\tmbuf->pkt.data = m->pkt.data;\n+\t\tmbuf->buf_physaddr = m->buf_physaddr;\n+\t\tmbuf->buf_addr = m->buf_addr;\n+\t}\n+\tmbuf->ol_flags = PKT_TX_VLAN_PKT;\n+\tmbuf->pkt.vlan_macip.f.vlan_tci = vlan_tag;\n+\tmbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);\n+\tmbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);\n+\tMBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx;\n+\n+\ttx_q->m_table[len] = mbuf;\n+\tlen++;\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in tx_route_zcp: pkt: nb_seg: %d, next:%s\\n\",\n+\t\tdev->device_fh,\n+\t\tmbuf->pkt.nb_segs,\n+\t\t(mbuf->pkt.next == NULL) ? \"null\" : \"non-null\");\n+\n+\tif (enable_stats) {\n+\t\tdev_statistics[dev->device_fh].tx_total++;\n+\t\tdev_statistics[dev->device_fh].tx++;\n+\t}\n+\n+\tif (unlikely(len == MAX_PKT_BURST)) {\n+\t\tm_table = (struct rte_mbuf **)tx_q->m_table;\n+\t\tret = rte_eth_tx_burst(ports[0],\n+\t\t\t(uint16_t)tx_q->txq_id, m_table, (uint16_t) len);\n+\n+\t\t/*\n+\t\t * Free any buffers not handled by TX and update\n+\t\t * the port stats.\n+\t\t */\n+\t\tif (unlikely(ret < len)) {\n+\t\t\tdo {\n+\t\t\t\trte_pktmbuf_free(m_table[ret]);\n+\t\t\t} while (++ret < len);\n+\t\t}\n+\n+\t\tlen = 0;\n+\t\ttxmbuf_clean_zcp(dev, vpool);\n+\t}\n+\n+\ttx_q->len = len;\n+\n+\treturn;\n+}\n+\n+/*\n+ * This function TX all available packets in virtio TX queue for one\n+ * virtio-net device. If it is first packet, it learns MAC address and\n+ * setup VMDQ.\n+ */\n+static inline void __attribute__((always_inline))\n+virtio_dev_tx_zcp(struct virtio_net *dev)\n+{\n+\tstruct rte_mbuf m;\n+\tstruct vring_desc *desc;\n+\tuint64_t buff_addr = 0, phys_addr;\n+\tuint32_t head[MAX_PKT_BURST];\n+\tuint32_t i;\n+\tuint16_t free_entries, packet_success = 0;\n+\tuint16_t avail_idx;\n+\tuint8_t need_copy = 0;\n+\tenum hpa_type addr_type;\n+\tstruct vhost_dev *vdev = (struct vhost_dev *)dev->priv;\n+\tstruct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ];\n+\n+\tavail_idx =  *((volatile uint16_t *)&vq->avail->idx);\n+\n+\t/* If there are no available buffers then return. */\n+\tif (vq->last_used_idx_res == avail_idx)\n+\t\treturn;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_tx()\\n\", dev->device_fh);\n+\n+\t/* Prefetch available ring to retrieve head indexes. */\n+\trte_prefetch0(&vq->avail->ring[vq->last_used_idx_res & (vq->size - 1)]);\n+\n+\t/* Get the number of free entries in the ring */\n+\tfree_entries = (avail_idx - vq->last_used_idx_res);\n+\n+\t/* Limit to MAX_PKT_BURST. */\n+\tfree_entries\n+\t\t= (free_entries > MAX_PKT_BURST) ? MAX_PKT_BURST : free_entries;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Buffers available %d\\n\",\n+\t\tdev->device_fh, free_entries);\n+\n+\t/* Retrieve all of the head indexes first to avoid caching issues. */\n+\tfor (i = 0; i < free_entries; i++)\n+\t\thead[i]\n+\t\t\t= vq->avail->ring[(vq->last_used_idx_res + i)\n+\t\t\t& (vq->size - 1)];\n+\n+\tvq->last_used_idx_res += free_entries;\n+\n+\t/* Prefetch descriptor index. */\n+\trte_prefetch0(&vq->desc[head[packet_success]]);\n+\trte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);\n+\n+\twhile (packet_success < free_entries) {\n+\t\tdesc = &vq->desc[head[packet_success]];\n+\n+\t\t/* Discard first buffer as it is the virtio header */\n+\t\tdesc = &vq->desc[desc->next];\n+\n+\t\t/* Buffer address translation. */\n+\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n+\t\tphys_addr = gpa_to_hpa(vdev, desc->addr, desc->len, &addr_type);\n+\n+\t\tif (likely(packet_success < (free_entries - 1)))\n+\t\t\t/* Prefetch descriptor index. */\n+\t\t\trte_prefetch0(&vq->desc[head[packet_success + 1]]);\n+\n+\t\tif (unlikely(addr_type == PHYS_ADDR_INVALID)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") Invalid frame buffer address found\"\n+\t\t\t\t\"when TX packets!\\n\",\n+\t\t\t\tdev->device_fh);\n+\t\t\tpacket_success++;\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\t/* Prefetch buffer address. */\n+\t\trte_prefetch0((void *)(uintptr_t)buff_addr);\n+\n+\t\t/*\n+\t\t * Setup dummy mbuf. This is copied to a real mbuf if\n+\t\t * transmitted out the physical port.\n+\t\t */\n+\t\tm.pkt.data_len = desc->len;\n+\t\tm.pkt.nb_segs = 1;\n+\t\tm.pkt.next = NULL;\n+\t\tm.pkt.data = (void *)(uintptr_t)buff_addr;\n+\t\tm.buf_addr = m.pkt.data;\n+\t\tm.buf_physaddr = phys_addr;\n+\n+\t\t/*\n+\t\t * Check if the frame buffer address from guest crosses\n+\t\t * sub-region or not.\n+\t\t */\n+\t\tif (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") Frame buffer address cross \"\n+\t\t\t\t\"sub-regioin found when attaching TX frame \"\n+\t\t\t\t\"buffer address!\\n\",\n+\t\t\t\tdev->device_fh);\n+\t\t\tneed_copy = 1;\n+\t\t} else\n+\t\t\tneed_copy = 0;\n+\n+\t\tPRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);\n+\n+\t\t/*\n+\t\t * If this is the first received packet we need to learn\n+\t\t * the MAC and setup VMDQ\n+\t\t */\n+\t\tif (unlikely(vdev->ready == DEVICE_MAC_LEARNING)) {\n+\t\t\tif (vdev->remove || (link_vmdq(vdev, &m) == -1)) {\n+\t\t\t\t/*\n+\t\t\t\t * Discard frame if device is scheduled for\n+\t\t\t\t * removal or a duplicate MAC address is found.\n+\t\t\t\t */\n+\t\t\t\tpacket_success += free_entries;\n+\t\t\t\tvq->last_used_idx += packet_success;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\n+\t\tvirtio_tx_route_zcp(dev, &m, head[packet_success], need_copy);\n+\t\tpacket_success++;\n+\t}\n+}\n+\n+/*\n+ * This function is called by each data core. It handles all RX/TX registered\n+ * with the core. For TX the specific lcore linked list is used. For RX, MAC\n+ * addresses are compared with all devices in the main linked list.\n+ */\n+static int\n+switch_worker_zcp(__attribute__((unused)) void *arg)\n+{\n+\tstruct virtio_net *dev = NULL;\n+\tstruct vhost_dev  *vdev = NULL;\n+\tstruct rte_mbuf *pkts_burst[MAX_PKT_BURST];\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tstruct mbuf_table *tx_q;\n+\tvolatile struct lcore_ll_info *lcore_ll;\n+\tconst uint64_t drain_tsc\n+\t\t= (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S\n+\t\t* BURST_TX_DRAIN_US;\n+\tuint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;\n+\tunsigned ret;\n+\tconst uint16_t lcore_id = rte_lcore_id();\n+\tuint16_t count_in_ring, rx_count = 0;\n+\n+\tRTE_LOG(INFO, VHOST_DATA, \"Procesing on Core %u started\\n\", lcore_id);\n+\n+\tlcore_ll = lcore_info[lcore_id].lcore_ll;\n+\tprev_tsc = 0;\n+\n+\twhile (1) {\n+\t\tcur_tsc = rte_rdtsc();\n+\n+\t\t/* TX burst queue drain */\n+\t\tdiff_tsc = cur_tsc - prev_tsc;\n+\t\tif (unlikely(diff_tsc > drain_tsc)) {\n+\t\t\t/*\n+\t\t\t * Get mbuf from vpool.pool and detach mbuf and\n+\t\t\t * put back into vpool.ring.\n+\t\t\t */\n+\t\t\tdev_ll = lcore_ll->ll_root_used;\n+\t\t\twhile ((dev_ll != NULL) && (dev_ll->vdev != NULL)) {\n+\t\t\t\t/* Get virtio device ID */\n+\t\t\t\tvdev = dev_ll->vdev;\n+\t\t\t\tdev = vdev->dev;\n+\n+\t\t\t\tif (likely(!vdev->remove)) {\n+\t\t\t\t\ttx_q = &tx_queue_zcp[(uint16_t)vdev->vmdq_rx_q];\n+\t\t\t\t\tif (tx_q->len) {\n+\t\t\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\t\t\"TX queue drained after timeout\"\n+\t\t\t\t\t\t\" with burst size %u\\n\",\n+\t\t\t\t\t\ttx_q->len);\n+\n+\t\t\t\t\t\t/*\n+\t\t\t\t\t\t * Tx any packets in the queue\n+\t\t\t\t\t\t */\n+\t\t\t\t\t\tret = rte_eth_tx_burst(\n+\t\t\t\t\t\t\tports[0],\n+\t\t\t\t\t\t\t(uint16_t)tx_q->txq_id,\n+\t\t\t\t\t\t\t(struct rte_mbuf **)\n+\t\t\t\t\t\t\ttx_q->m_table,\n+\t\t\t\t\t\t\t(uint16_t)tx_q->len);\n+\t\t\t\t\t\tif (unlikely(ret < tx_q->len)) {\n+\t\t\t\t\t\t\tdo {\n+\t\t\t\t\t\t\t\trte_pktmbuf_free(\n+\t\t\t\t\t\t\t\t\ttx_q->m_table[ret]);\n+\t\t\t\t\t\t\t} while (++ret < tx_q->len);\n+\t\t\t\t\t\t}\n+\t\t\t\t\t\ttx_q->len = 0;\n+\n+\t\t\t\t\t\ttxmbuf_clean_zcp(dev,\n+\t\t\t\t\t\t\t&vpool_array[MAX_QUEUES + vdev->vmdq_rx_q]);\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t\tdev_ll = dev_ll->next;\n+\t\t\t}\n+\t\t\tprev_tsc = cur_tsc;\n+\t\t}\n+\n+\t\trte_prefetch0(lcore_ll->ll_root_used);\n+\n+\t\t/*\n+\t\t * Inform the configuration core that we have exited the linked\n+\t\t * list and that no devices are in use if requested.\n+\t\t */\n+\t\tif (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)\n+\t\t\tlcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;\n+\n+\t\t/* Process devices */\n+\t\tdev_ll = lcore_ll->ll_root_used;\n+\n+\t\twhile ((dev_ll != NULL) && (dev_ll->vdev != NULL)) {\n+\t\t\tvdev = dev_ll->vdev;\n+\t\t\tdev  = vdev->dev;\n+\t\t\tif (unlikely(vdev->remove)) {\n+\t\t\t\tdev_ll = dev_ll->next;\n+\t\t\t\tunlink_vmdq(vdev);\n+\t\t\t\tvdev->ready = DEVICE_SAFE_REMOVE;\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\n+\t\t\tif (likely(vdev->ready == DEVICE_RX)) {\n+\t\t\t\tuint32_t index = vdev->vmdq_rx_q;\n+\t\t\t\tuint16_t i;\n+\t\t\t\tcount_in_ring\n+\t\t\t\t= rte_ring_count(vpool_array[index].ring);\n+\t\t\t\tuint16_t free_entries\n+\t\t\t\t= (uint16_t)get_available_ring_num_zcp(dev);\n+\n+\t\t\t\t/*\n+\t\t\t\t * Attach all mbufs in vpool.ring and put back\n+\t\t\t\t * into vpool.pool.\n+\t\t\t\t */\n+\t\t\t\tfor (i = 0;\n+\t\t\t\ti < RTE_MIN(free_entries,\n+\t\t\t\tRTE_MIN(count_in_ring, MAX_PKT_BURST));\n+\t\t\t\ti++)\n+\t\t\t\t\tattach_rxmbuf_zcp(dev);\n+\n+\t\t\t\t/* Handle guest RX */\n+\t\t\t\trx_count = rte_eth_rx_burst(ports[0],\n+\t\t\t\t\tvdev->vmdq_rx_q, pkts_burst,\n+\t\t\t\t\tMAX_PKT_BURST);\n+\n+\t\t\t\tif (rx_count) {\n+\t\t\t\t\tret_count = virtio_dev_rx_zcp(dev,\n+\t\t\t\t\t\t\tpkts_burst, rx_count);\n+\t\t\t\t\tif (enable_stats) {\n+\t\t\t\t\t\tdev_statistics[dev->device_fh].rx_total\n+\t\t\t\t\t\t\t+= rx_count;\n+\t\t\t\t\t\tdev_statistics[dev->device_fh].rx\n+\t\t\t\t\t\t\t+= ret_count;\n+\t\t\t\t\t}\n+\t\t\t\t\twhile (likely(rx_count)) {\n+\t\t\t\t\t\trx_count--;\n+\t\t\t\t\t\tpktmbuf_detach_zcp(\n+\t\t\t\t\t\t\tpkts_burst[rx_count]);\n+\t\t\t\t\t\trte_ring_sp_enqueue(\n+\t\t\t\t\t\t\tvpool_array[index].ring,\n+\t\t\t\t\t\t\t(void *)pkts_burst[rx_count]);\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\tif (likely(!vdev->remove))\n+\t\t\t\t/* Handle guest TX */\n+\t\t\t\tvirtio_dev_tx_zcp(dev);\n+\n+\t\t\t/* Move to the next device in the list */\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t}\n+\n+\treturn 0;\n+}\n+\n+\n+/*\n+ * Add an entry to a used linked list. A free entry must first be found\n+ * in the free linked list using get_data_ll_free_entry();\n+ */\n+static void\n+add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,\n+\tstruct virtio_net_data_ll *ll_dev)\n+{\n+\tstruct virtio_net_data_ll *ll = *ll_root_addr;\n+\n+\t/* Set next as NULL and use a compiler barrier to avoid reordering. */\n+\tll_dev->next = NULL;\n+\trte_compiler_barrier();\n+\n+\t/* If ll == NULL then this is the first device. */\n+\tif (ll) {\n+\t\t/* Increment to the tail of the linked list. */\n+\t\twhile ((ll->next != NULL))\n+\t\t\tll = ll->next;\n+\n+\t\tll->next = ll_dev;\n+\t} else {\n+\t\t*ll_root_addr = ll_dev;\n+\t}\n+}\n+\n+/*\n+ * Remove an entry from a used linked list. The entry must then be added to\n+ * the free linked list using put_data_ll_free_entry().\n+ */\n+static void\n+rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,\n+\tstruct virtio_net_data_ll *ll_dev,\n+\tstruct virtio_net_data_ll *ll_dev_last)\n+{\n+\tstruct virtio_net_data_ll *ll = *ll_root_addr;\n+\n+\tif (unlikely((ll == NULL) || (ll_dev == NULL)))\n+\t\treturn;\n+\n+\tif (ll_dev == ll)\n+\t\t*ll_root_addr = ll_dev->next;\n+\telse\n+\t\tif (likely(ll_dev_last != NULL))\n+\t\t\tll_dev_last->next = ll_dev->next;\n+\t\telse\n+\t\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\t\"Remove entry form ll failed.\\n\");\n+}\n+\n+/*\n+ * Find and return an entry from the free linked list.\n+ */\n+static struct virtio_net_data_ll *\n+get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr)\n+{\n+\tstruct virtio_net_data_ll *ll_free = *ll_root_addr;\n+\tstruct virtio_net_data_ll *ll_dev;\n+\n+\tif (ll_free == NULL)\n+\t\treturn NULL;\n+\n+\tll_dev = ll_free;\n+\t*ll_root_addr = ll_free->next;\n+\n+\treturn ll_dev;\n+}\n+\n+/*\n+ * Place an entry back on to the free linked list.\n+ */\n+static void\n+put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr,\n+\tstruct virtio_net_data_ll *ll_dev)\n+{\n+\tstruct virtio_net_data_ll *ll_free = *ll_root_addr;\n+\n+\tif (ll_dev == NULL)\n+\t\treturn;\n+\n+\tll_dev->next = ll_free;\n+\t*ll_root_addr = ll_dev;\n+}\n+\n+/*\n+ * Creates a linked list of a given size.\n+ */\n+static struct virtio_net_data_ll *\n+alloc_data_ll(uint32_t size)\n+{\n+\tstruct virtio_net_data_ll *ll_new;\n+\tuint32_t i;\n+\n+\t/* Malloc and then chain the linked list. */\n+\tll_new = rte_zmalloc(\"vhost ll\",\n+\t\t\tsize * sizeof(struct virtio_net_data_ll), 0);\n+\tif (ll_new == NULL) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"Failed to allocate memory for ll_new.\\n\");\n+\t\treturn NULL;\n+\t}\n+\n+\tfor (i = 0; i < size - 1; i++) {\n+\t\tll_new[i].vdev = NULL;\n+\t\tll_new[i].next = &ll_new[i+1];\n+\t}\n+\tll_new[i].next = NULL;\n+\n+\treturn ll_new;\n+}\n+\n+/*\n+ * Create the main linked list along with each individual cores linked list.\n+ * A used and a free list are created to manage entries.\n+ */\n+static int\n+init_data_ll(void)\n+{\n+\tint lcore;\n+\n+\tRTE_LCORE_FOREACH_SLAVE(lcore) {\n+\t\tlcore_info[lcore].lcore_ll = rte_zmalloc(\"vhost lcore_ll_info\",\n+\t\t\t\t\tsizeof(struct lcore_ll_info), 0);\n+\t\tif (lcore_info[lcore].lcore_ll == NULL) {\n+\t\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\t\"Failed to allocate memory for lcore_ll.\\n\");\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tlcore_info[lcore].lcore_ll->device_num = 0;\n+\t\tlcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;\n+\t\tlcore_info[lcore].lcore_ll->ll_root_used = NULL;\n+\t\tif (num_devices % num_switching_cores)\n+\t\t\tlcore_info[lcore].lcore_ll->ll_root_free =\n+\t\t\talloc_data_ll((num_devices / num_switching_cores) + 1);\n+\t\telse\n+\t\t\tlcore_info[lcore].lcore_ll->ll_root_free =\n+\t\t\talloc_data_ll(num_devices / num_switching_cores);\n+\t}\n+\n+\t/* Allocate devices up to a maximum of MAX_DEVICES. */\n+\tll_root_free = alloc_data_ll(MIN((num_devices), MAX_DEVICES));\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Remove a device from the specific data core linked list and from the main\n+ * linked list. Synchonization occurs through the use of the lcore\n+ * dev_removal_flag. Device is made volatile here to avoid re-ordering\n+ * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.\n+ */\n+static void\n+destroy_device(struct virtio_net *dev)\n+{\n+\tstruct virtio_net_data_ll *ll_lcore_dev_cur;\n+\tstruct virtio_net_data_ll *ll_main_dev_cur;\n+\tstruct virtio_net_data_ll *ll_lcore_dev_last = NULL;\n+\tstruct virtio_net_data_ll *ll_main_dev_last = NULL;\n+\tstruct vhost_dev *vdev;\n+\tint lcore;\n+\n+\tdev->flags &= ~VIRTIO_DEV_RUNNING;\n+\n+\tvdev = (struct vhost_dev *)dev->priv;\n+\t/* Set the remove flag. */\n+\tvdev->remove = 1;\n+\twhile (vdev->ready != DEVICE_SAFE_REMOVE)\n+\t\trte_pause();\n+\n+\t/* Search for entry to be removed from lcore ll */\n+\tll_lcore_dev_cur = lcore_info[vdev->coreid].lcore_ll->ll_root_used;\n+\twhile (ll_lcore_dev_cur != NULL) {\n+\t\tif (ll_lcore_dev_cur->vdev == vdev) {\n+\t\t\tbreak;\n+\t\t} else {\n+\t\t\tll_lcore_dev_last = ll_lcore_dev_cur;\n+\t\t\tll_lcore_dev_cur = ll_lcore_dev_cur->next;\n+\t\t}\n+\t}\n+\n+\tif (ll_lcore_dev_cur == NULL) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") Failed to find the dev to be destroy.\\n\",\n+\t\t\tdev->device_fh);\n+\t\treturn;\n+\t}\n+\n+\t/* Search for entry to be removed from main ll */\n+\tll_main_dev_cur = ll_root_used;\n+\tll_main_dev_last = NULL;\n+\twhile (ll_main_dev_cur != NULL) {\n+\t\tif (ll_main_dev_cur->vdev == vdev) {\n+\t\t\tbreak;\n+\t\t} else {\n+\t\t\tll_main_dev_last = ll_main_dev_cur;\n+\t\t\tll_main_dev_cur = ll_main_dev_cur->next;\n+\t\t}\n+\t}\n+\n+\t/* Remove entries from the lcore and main ll. */\n+\trm_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used,\n+\t\tll_lcore_dev_cur, ll_lcore_dev_last);\n+\trm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);\n+\n+\t/* Set the dev_removal_flag on each lcore. */\n+\tRTE_LCORE_FOREACH_SLAVE(lcore) {\n+\t\tlcore_info[lcore].lcore_ll->dev_removal_flag =\n+\t\t\tREQUEST_DEV_REMOVAL;\n+\t}\n+\n+\t/*\n+\t * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we\n+\t * can be sure that they can no longer access the device removed from\n+\t * the linked lists and that the devices are no longer in use.\n+\t */\n+\tRTE_LCORE_FOREACH_SLAVE(lcore) {\n+\t\twhile (lcore_info[lcore].lcore_ll->dev_removal_flag !=\n+\t\t\tACK_DEV_REMOVAL)\n+\t\t\trte_pause();\n+\t}\n+\n+\t/* Add the entries back to the lcore and main free ll.*/\n+\tput_data_ll_free_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_free,\n+\t\t\t\tll_lcore_dev_cur);\n+\tput_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);\n+\n+\t/* Decrement number of device on the lcore. */\n+\tlcore_info[vdev->coreid].lcore_ll->device_num--;\n+\n+\tRTE_LOG(INFO, VHOST_DATA,\n+\t\t\"(%\"PRIu64\") Device has been removed from data core\\n\",\n+\t\tdev->device_fh);\n+\n+\tif (zero_copy) {\n+\t\tstruct vpool *vpool = &vpool_array[vdev->vmdq_rx_q];\n+\n+\t\t/* Stop the RX queue. */\n+\t\tif (rte_eth_dev_rx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) {\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"(%\"PRIu64\") In destroy_device: Failed to stop \"\n+\t\t\t\t\"rx queue:%d\\n\",\n+\t\t\t\tdev->device_fh,\n+\t\t\t\tdev->vmdq_rx_q);\n+\t\t}\n+\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") in destroy_device: Start put mbuf in \"\n+\t\t\t\"mempool back to ring for RX queue: %d\\n\",\n+\t\t\tdev->device_fh, dev->vmdq_rx_q);\n+\n+\t\tmbuf_destroy_zcp(vpool);\n+\n+\t\t/* Stop the TX queue. */\n+\t\tif (rte_eth_dev_tx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) {\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"(%\"PRIu64\") In destroy_device: Failed to \"\n+\t\t\t\t\"stop tx queue:%d\\n\",\n+\t\t\t\tdev->device_fh, dev->vmdq_rx_q);\n+\t\t}\n+\n+\t\tvpool = &vpool_array[vdev->vmdq_rx_q + MAX_QUEUES];\n+\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") destroy_device: Start put mbuf in mempool \"\n+\t\t\t\"back to ring for TX queue: %d, dev:(%\"PRIu64\")\\n\",\n+\t\t\tdev->device_fh, (vdev->vmdq_rx_q + MAX_QUEUES),\n+\t\t\tdev->device_fh);\n+\n+\t\tmbuf_destroy_zcp(vpool);\n+\t\trte_free(vdev->regions_hpa);\n+\t}\n+\trte_free(vdev);\n+\n+}\n+\n+/*\n+ * Calculate the region count of physical continous regions for one particular\n+ * region of whose vhost virtual address is continous. The particular region\n+ * start from vva_start, with size of 'size' in argument.\n+ */\n+static uint32_t\n+check_hpa_regions(uint64_t vva_start, uint64_t size)\n+{\n+\tuint32_t i, nregions = 0, page_size = getpagesize();\n+\tuint64_t cur_phys_addr = 0, next_phys_addr = 0;\n+\tif (vva_start % page_size) {\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"in check_countinous: vva start(%p) mod page_size(%d) \"\n+\t\t\t\"has remainder\\n\",\n+\t\t\t(void *)(uintptr_t)vva_start, page_size);\n+\t\treturn 0;\n+\t}\n+\tif (size % page_size) {\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"in check_countinous: \"\n+\t\t\t\"size((%\"PRIu64\")) mod page_size(%d) has remainder\\n\",\n+\t\t\tsize, page_size);\n+\t\treturn 0;\n+\t}\n+\tfor (i = 0; i < size - page_size; i = i + page_size) {\n+\t\tcur_phys_addr\n+\t\t\t= rte_mem_virt2phy((void *)(uintptr_t)(vva_start + i));\n+\t\tnext_phys_addr = rte_mem_virt2phy(\n+\t\t\t(void *)(uintptr_t)(vva_start + i + page_size));\n+\t\tif ((cur_phys_addr + page_size) != next_phys_addr) {\n+\t\t\t++nregions;\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in check_continuous: hva addr:(%p) is not \"\n+\t\t\t\t\"continuous with hva addr:(%p), diff:%d\\n\",\n+\t\t\t\t(void *)(uintptr_t)(vva_start + (uint64_t)i),\n+\t\t\t\t(void *)(uintptr_t)(vva_start + (uint64_t)i\n+\t\t\t\t+ page_size), page_size);\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in check_continuous: hpa addr:(%p) is not \"\n+\t\t\t\t\"continuous with hpa addr:(%p), \"\n+\t\t\t\t\"diff:(%\"PRIu64\")\\n\",\n+\t\t\t\t(void *)(uintptr_t)cur_phys_addr,\n+\t\t\t\t(void *)(uintptr_t)next_phys_addr,\n+\t\t\t\t(next_phys_addr-cur_phys_addr));\n+\t\t}\n+\t}\n+\treturn nregions;\n+}\n+\n+/*\n+ * Divide each region whose vhost virtual address is continous into a few\n+ * sub-regions, make sure the physical address within each sub-region are\n+ * continous. And fill offset(to GPA) and size etc. information of each\n+ * sub-region into regions_hpa.\n+ */\n+static uint32_t\n+fill_hpa_memory_regions(struct virtio_memory_regions_hpa *mem_region_hpa,\n+\t\t\tstruct virtio_memory *virtio_memory)\n+{\n+\tuint32_t regionidx, regionidx_hpa = 0, i, k, page_size = getpagesize();\n+\tuint64_t cur_phys_addr = 0, next_phys_addr = 0, vva_start;\n+\n+\tif (mem_region_hpa == NULL)\n+\t\treturn 0;\n+\n+\tfor (regionidx = 0; regionidx < virtio_memory->nregions; regionidx++) {\n+\t\tvva_start = virtio_memory->regions[regionidx].guest_phys_address +\n+\t\t\tvirtio_memory->regions[regionidx].address_offset;\n+\t\tmem_region_hpa[regionidx_hpa].guest_phys_address =\n+\t\t\tvirtio_memory->regions[regionidx].guest_phys_address;\n+\t\tmem_region_hpa[regionidx_hpa].host_phys_addr_offset =\n+\t\t\trte_mem_virt2phy((void *)(uintptr_t)(vva_start)) -\n+\t\t\tmem_region_hpa[regionidx_hpa].guest_phys_address;\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"in fill_hpa_regions: guest phys addr start[%d]:(%p)\\n\",\n+\t\t\tregionidx_hpa,\n+\t\t\t(void *)(uintptr_t)\n+\t\t\t(mem_region_hpa[regionidx_hpa].guest_phys_address));\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"in fill_hpa_regions: host  phys addr start[%d]:(%p)\\n\",\n+\t\t\tregionidx_hpa,\n+\t\t\t(void *)(uintptr_t)\n+\t\t\t(mem_region_hpa[regionidx_hpa].host_phys_addr_offset));\n+\t\tfor (i = 0, k = 0;\n+\t\t\ti < virtio_memory->regions[regionidx].memory_size -\n+\t\t\t\tpage_size;\n+\t\t\ti += page_size) {\n+\t\t\tcur_phys_addr = rte_mem_virt2phy(\n+\t\t\t\t\t(void *)(uintptr_t)(vva_start + i));\n+\t\t\tnext_phys_addr = rte_mem_virt2phy(\n+\t\t\t\t\t(void *)(uintptr_t)(vva_start +\n+\t\t\t\t\ti + page_size));\n+\t\t\tif ((cur_phys_addr + page_size) != next_phys_addr) {\n+\t\t\t\tmem_region_hpa[regionidx_hpa].guest_phys_address_end =\n+\t\t\t\t\tmem_region_hpa[regionidx_hpa].guest_phys_address +\n+\t\t\t\t\tk + page_size;\n+\t\t\t\tmem_region_hpa[regionidx_hpa].memory_size\n+\t\t\t\t\t= k + page_size;\n+\t\t\t\tLOG_DEBUG(VHOST_CONFIG, \"in fill_hpa_regions: \"\n+\t\t\t\t\t\"guest phys addr end  [%d]:(%p)\\n\",\n+\t\t\t\t\tregionidx_hpa,\n+\t\t\t\t\t(void *)(uintptr_t)\n+\t\t\t\t\t(mem_region_hpa[regionidx_hpa].guest_phys_address_end));\n+\t\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\t\"in fill_hpa_regions: guest phys addr \"\n+\t\t\t\t\t\"size [%d]:(%p)\\n\",\n+\t\t\t\t\tregionidx_hpa,\n+\t\t\t\t\t(void *)(uintptr_t)\n+\t\t\t\t\t(mem_region_hpa[regionidx_hpa].memory_size));\n+\t\t\t\tmem_region_hpa[regionidx_hpa + 1].guest_phys_address\n+\t\t\t\t\t= mem_region_hpa[regionidx_hpa].guest_phys_address_end;\n+\t\t\t\t++regionidx_hpa;\n+\t\t\t\tmem_region_hpa[regionidx_hpa].host_phys_addr_offset =\n+\t\t\t\t\tnext_phys_addr -\n+\t\t\t\t\tmem_region_hpa[regionidx_hpa].guest_phys_address;\n+\t\t\t\tLOG_DEBUG(VHOST_CONFIG, \"in fill_hpa_regions: \"\n+\t\t\t\t\t\"guest phys addr start[%d]:(%p)\\n\",\n+\t\t\t\t\tregionidx_hpa,\n+\t\t\t\t\t(void *)(uintptr_t)\n+\t\t\t\t\t(mem_region_hpa[regionidx_hpa].guest_phys_address));\n+\t\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\t\"in fill_hpa_regions: host  phys addr \"\n+\t\t\t\t\t\"start[%d]:(%p)\\n\",\n+\t\t\t\t\tregionidx_hpa,\n+\t\t\t\t\t(void *)(uintptr_t)\n+\t\t\t\t\t(mem_region_hpa[regionidx_hpa].host_phys_addr_offset));\n+\t\t\t\tk = 0;\n+\t\t\t} else {\n+\t\t\t\tk += page_size;\n+\t\t\t}\n+\t\t}\n+\t\tmem_region_hpa[regionidx_hpa].guest_phys_address_end\n+\t\t\t= mem_region_hpa[regionidx_hpa].guest_phys_address\n+\t\t\t+ k + page_size;\n+\t\tmem_region_hpa[regionidx_hpa].memory_size = k + page_size;\n+\t\tLOG_DEBUG(VHOST_CONFIG, \"in fill_hpa_regions: guest phys addr \"\n+\t\t\t\"end [%d]:(%p)\\n\", regionidx_hpa,\n+\t\t\t(void *)(uintptr_t)\n+\t\t\t(mem_region_hpa[regionidx_hpa].guest_phys_address_end));\n+\t\tLOG_DEBUG(VHOST_CONFIG, \"in fill_hpa_regions: guest phys addr \"\n+\t\t\t\"size [%d]:(%p)\\n\", regionidx_hpa,\n+\t\t\t(void *)(uintptr_t)\n+\t\t\t(mem_region_hpa[regionidx_hpa].memory_size));\n+\t\t++regionidx_hpa;\n+\t}\n+\treturn regionidx_hpa;\n+}\n+\n+/*\n+ * A new device is added to a data core. First the device is added to the main\n+ * linked list and then allocated to a specific data core.\n+ */\n+static int\n+new_device(struct virtio_net *dev)\n+{\n+\tstruct virtio_net_data_ll *ll_dev;\n+\tint lcore, core_add = 0;\n+\tuint32_t device_num_min = num_devices;\n+\tstruct vhost_dev *vdev;\n+\tuint32_t regionidx;\n+\n+\tvdev = rte_zmalloc(\"vhost device\", sizeof(*vdev), CACHE_LINE_SIZE);\n+\tif (vdev == NULL) {\n+\t\tRTE_LOG(INFO, VHOST_DATA,\n+\t\t\"(%\"PRIu64\") Couldn't allocate memory for vhost dev\\n\",\n+\t\t\tdev->device_fh);\n+\t\treturn -1;\n+\t}\n+\tvdev->dev = dev;\n+\tdev->priv = vdev;\n+\n+\tif (zero_copy) {\n+\t\tvdev->nregions_hpa = dev->mem->nregions;\n+\t\tfor (regionidx = 0; regionidx < dev->mem->nregions;\n+\t\t\tregionidx++) {\n+\t\t\tvdev->nregions_hpa\n+\t\t\t\t+= check_hpa_regions(\n+\t\t\t\t\tdev->mem->regions[regionidx].guest_phys_address +\n+\t\t\t\t\tdev->mem->regions[regionidx].address_offset,\n+\t\t\t\t\tdev->mem->regions[regionidx].memory_size);\n+\n+\t\t}\n+\n+\t\tvdev->regions_hpa = (struct virtio_memory_regions_hpa *)\n+\t\t\trte_zmalloc(\"vhost hpa region\",\n+\t\t\tsizeof(struct virtio_memory_regions_hpa) *\n+\t\t\tvdev->nregions_hpa, CACHE_LINE_SIZE);\n+\t\tif (vdev->regions_hpa == NULL) {\n+\t\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\t\"Cannot allocate memory for hpa region\\n\");\n+\t\t\trte_free(vdev);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\n+\t\tif (fill_hpa_memory_regions(\n+\t\t\tvdev->regions_hpa, dev->mem) !=\n+\t\t\tvdev->nregions_hpa) {\n+\n+\t\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\t\"hpa memory regions number mismatch: \"\n+\t\t\t\t\"[%d]\\n\", vdev->nregions_hpa);\n+\t\t\trte_free(vdev->regions_hpa);\n+\t\t\trte_free(vdev);\n+\t\t\treturn -1;\n+\t\t}\n+\t}\n+\n+\n+\t/* Add device to main linked list */\n+\tll_dev = get_data_ll_free_entry(&ll_root_free);\n+\tif (ll_dev == NULL) {\n+\t\tRTE_LOG(INFO, VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") No free entry found in linked list. \"\n+\t\t\t\"Device limit of %d devices per core has been \"\n+\t\t\t\"reached\\n\",\n+\t\t\tdev->device_fh, num_devices);\n+\t\tif (vdev->regions_hpa)\n+\t\t\trte_free(vdev->regions_hpa);\n+\t\trte_free(vdev);\n+\t\treturn -1;\n+\t}\n+\tll_dev->vdev = vdev;\n+\tadd_data_ll_entry(&ll_root_used, ll_dev);\n+\n+\t/* Allocate the vmdq receive queue for this virtio device */\n+\tvdev->vmdq_rx_q = dev->device_fh *  (num_queues / num_devices);\n+\n+\tif (zero_copy) {\n+\t\tuint16_t vmdq_rx_q = vdev->vmdq_rx_q;\n+\t\tuint32_t count_in_ring, i;\n+\t\tstruct mbuf_table *tx_q;\n+\n+\t\tcount_in_ring = rte_ring_count(vpool_array[vmdq_rx_q].ring);\n+\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") in new_device: mbuf count in mempool \"\n+\t\t\t\"before attach is: %d\\n\",\n+\t\t\tdev->device_fh,\n+\t\t\trte_mempool_count(vpool_array[vmdq_rx_q].pool));\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") in new_device: mbuf count in  ring \"\n+\t\t\t\"before attach  is : %d\\n\",\n+\t\t\tdev->device_fh, count_in_ring);\n+\n+\n+\t\t/*\n+\t\t * Attach all mbufs in vpool.ring and put back into vpool.pool.\n+\t\t */\n+\t\tfor (i = 0; i < count_in_ring; i++)\n+\t\t\tattach_rxmbuf_zcp(dev);\n+\n+\t\tLOG_DEBUG(VHOST_CONFIG, \"(%\"PRIu64\") in new_device: \"\n+\t\t\t\"mbuf count in mempool after attach is: %d\\n\",\n+\t\t\tdev->device_fh,\n+\t\t\trte_mempool_count(vpool_array[vmdq_rx_q].pool));\n+\t\tLOG_DEBUG(VHOST_CONFIG, \"(%\"PRIu64\") in new_device: \"\n+\t\t\t\"mbuf count in ring after attach  is : %d\\n\",\n+\t\t\tdev->device_fh,\n+\t\t\trte_ring_count(vpool_array[vmdq_rx_q].ring));\n+\n+\t\ttx_q = &tx_queue_zcp[vmdq_rx_q];\n+\t\ttx_q->txq_id = vmdq_rx_q;\n+\n+\t\tif (rte_eth_dev_tx_queue_start(ports[0], vmdq_rx_q) != 0) {\n+\t\t\tstruct vpool *vpool = &vpool_array[vmdq_rx_q];\n+\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"(%\"PRIu64\") In new_device: Failed to start \"\n+\t\t\t\t\"tx queue:%d\\n\",\n+\t\t\t\tdev->device_fh, vmdq_rx_q);\n+\n+\t\t\tmbuf_destroy_zcp(vpool);\n+\t\t\trte_free(vdev->regions_hpa);\n+\t\t\trte_free(vdev);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tif (rte_eth_dev_rx_queue_start(ports[0], vmdq_rx_q) != 0) {\n+\t\t\tstruct vpool *vpool = &vpool_array[vmdq_rx_q];\n+\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"(%\"PRIu64\") In new_device: Failed to start \"\n+\t\t\t\t\"rx queue:%d\\n\",\n+\t\t\t\tdev->device_fh, vmdq_rx_q);\n+\n+\t\t\t/* Stop the TX queue. */\n+\t\t\tif (rte_eth_dev_tx_queue_stop(ports[0],\n+\t\t\t\tvmdq_rx_q) != 0) {\n+\t\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\t\"(%\"PRIu64\") In new_device: Failed to \"\n+\t\t\t\t\t\"stop tx queue:%d\\n\",\n+\t\t\t\t\tdev->device_fh, vmdq_rx_q);\n+\t\t\t}\n+\n+\t\t\tmbuf_destroy_zcp(vpool);\n+\t\t\trte_free(vdev->regions_hpa);\n+\t\t\trte_free(vdev);\n+\t\t\treturn -1;\n+\t\t}\n+\t}\n+\n+\t/* Reset ready flag*/\n+\tvdev->ready = DEVICE_MAC_LEARNING;\n+\tvdev->remove = 0;\n+\n+\t/* Find a suitable lcore to add the device. */\n+\tRTE_LCORE_FOREACH_SLAVE(lcore) {\n+\t\tif (lcore_info[lcore].lcore_ll->device_num < device_num_min) {\n+\t\t\tdevice_num_min = lcore_info[lcore].lcore_ll->device_num;\n+\t\t\tcore_add = lcore;\n+\t\t}\n+\t}\n+\t/* Add device to per core linked list */\n+\tll_dev =\n+\tget_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free);\n+\tif (ll_dev == NULL) {\n+\t\tRTE_LOG(INFO, VHOST_DATA,\n+\t\t\"(%\"PRIu64\") Failed to add device to data core\\n\",\n+\t\tdev->device_fh);\n+\t\tvdev->ready = DEVICE_SAFE_REMOVE;\n+\t\tdestroy_device(dev);\n+\t\tif (vdev->regions_hpa)\n+\t\t\trte_free(vdev->regions_hpa);\n+\t\trte_free(vdev);\n+\t\treturn -1;\n+\t}\n+\tll_dev->vdev = vdev;\n+\tvdev->coreid = core_add;\n+\n+\n+\n+\tadd_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used,\n+\t\t\tll_dev);\n+\n+\t/* Initialize device stats */\n+\tmemset(&dev_statistics[dev->device_fh], 0,\n+\t\tsizeof(struct device_statistics));\n+\n+\t/* Disable notifications. */\n+\trte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0);\n+\trte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0);\n+\tlcore_info[vdev->coreid].lcore_ll->device_num++;\n+\tdev->flags |= VIRTIO_DEV_RUNNING;\n+\n+\tRTE_LOG(INFO, VHOST_DATA,\n+\t\t\"(%\"PRIu64\") Device has been added to data core %d\\n\",\n+\t\tdev->device_fh, vdev->coreid);\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * These callback allow devices to be added to the data core when configuration\n+ * has been fully complete.\n+ */\n+static const struct virtio_net_device_ops virtio_net_device_ops = {\n+\t.new_device =  new_device,\n+\t.destroy_device = destroy_device,\n+};\n+\n+/*\n+ * This is a thread will wake up after a period to print stats if the user has\n+ * enabled them.\n+ */\n+static void\n+print_stats(void)\n+{\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tuint64_t tx_dropped, rx_dropped;\n+\tuint64_t tx, tx_total, rx, rx_total;\n+\tuint32_t device_fh;\n+\tconst char clr[] = { 27, '[', '2', 'J', '\\0' };\n+\tconst char top_left[] = { 27, '[', '1', ';', '1', 'H', '\\0' };\n+\n+\twhile (1) {\n+\t\tsleep(enable_stats);\n+\n+\t\t/*  Clear screen and move to top left */\n+\t\tprintf(\"%s%s\", clr, top_left);\n+\n+\t\tprintf(\"\\nDevice statistics \"\n+\t\t\t\"====================================\");\n+\n+\t\tdev_ll = ll_root_used;\n+\t\twhile (dev_ll != NULL) {\n+\t\t\tdevice_fh = (uint32_t)dev_ll->vdev->dev->device_fh;\n+\t\t\ttx_total = dev_statistics[device_fh].tx_total;\n+\t\t\ttx = dev_statistics[device_fh].tx;\n+\t\t\ttx_dropped = tx_total - tx;\n+\t\t\tif (zero_copy == 0) {\n+\t\t\t\trx_total = rte_atomic64_read(\n+\t\t\t\t\t&dev_statistics[device_fh].rx_total_atomic);\n+\t\t\t\trx = rte_atomic64_read(\n+\t\t\t\t\t&dev_statistics[device_fh].rx_atomic);\n+\t\t\t} else {\n+\t\t\t\trx_total = dev_statistics[device_fh].rx_total;\n+\t\t\t\trx = dev_statistics[device_fh].rx;\n+\t\t\t}\n+\t\t\trx_dropped = rx_total - rx;\n+\n+\t\t\tprintf(\"\\nStatistics for device %\"PRIu32\" \"\n+\t\t\t\t\"------------------------------\\n\"\n+\t\t\t\t\t\"TX total:       %\"PRIu64\"\\n\"\n+\t\t\t\t\t\"TX dropped:     %\"PRIu64\"\\n\"\n+\t\t\t\t\t\"TX successful:  %\"PRIu64\"\\n\"\n+\t\t\t\t\t\"RX total:       %\"PRIu64\"\\n\"\n+\t\t\t\t\t\"RX dropped:     %\"PRIu64\"\\n\"\n+\t\t\t\t\t\"RX successful:  %\"PRIu64\"\",\n+\t\t\t\t\tdevice_fh,\n+\t\t\t\t\ttx_total,\n+\t\t\t\t\ttx_dropped,\n+\t\t\t\t\ttx,\n+\t\t\t\t\trx_total,\n+\t\t\t\t\trx_dropped,\n+\t\t\t\t\trx);\n+\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t\tprintf(\"\\n======================================================\\n\");\n+\t}\n+}\n+\n+static void\n+setup_mempool_tbl(int socket, uint32_t index, char *pool_name,\n+\tchar *ring_name, uint32_t nb_mbuf)\n+{\n+\tuint16_t roomsize = VIRTIO_DESCRIPTOR_LEN_ZCP + RTE_PKTMBUF_HEADROOM;\n+\tvpool_array[index].pool\n+\t\t= rte_mempool_create(pool_name, nb_mbuf, MBUF_SIZE_ZCP,\n+\t\tMBUF_CACHE_SIZE_ZCP, sizeof(struct rte_pktmbuf_pool_private),\n+\t\trte_pktmbuf_pool_init, (void *)(uintptr_t)roomsize,\n+\t\trte_pktmbuf_init, NULL, socket, 0);\n+\tif (vpool_array[index].pool != NULL) {\n+\t\tvpool_array[index].ring\n+\t\t\t= rte_ring_create(ring_name,\n+\t\t\t\trte_align32pow2(nb_mbuf + 1),\n+\t\t\t\tsocket, RING_F_SP_ENQ | RING_F_SC_DEQ);\n+\t\tif (likely(vpool_array[index].ring != NULL)) {\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in setup_mempool_tbl: mbuf count in \"\n+\t\t\t\t\"mempool is: %d\\n\",\n+\t\t\t\trte_mempool_count(vpool_array[index].pool));\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in setup_mempool_tbl: mbuf count in \"\n+\t\t\t\t\"ring   is: %d\\n\",\n+\t\t\t\trte_ring_count(vpool_array[index].ring));\n+\t\t} else {\n+\t\t\trte_exit(EXIT_FAILURE, \"ring_create(%s) failed\",\n+\t\t\t\tring_name);\n+\t\t}\n+\n+\t\t/* Need consider head room. */\n+\t\tvpool_array[index].buf_size = roomsize - RTE_PKTMBUF_HEADROOM;\n+\t} else {\n+\t\trte_exit(EXIT_FAILURE, \"mempool_create(%s) failed\", pool_name);\n+\t}\n+}\n+\n+/*\n+ * Main function, does initialisation and calls the per-lcore functions.\n+ * The CUSE device is also registered here to handle the IOCTLs.\n+ */\n+int\n+MAIN(int argc, char *argv[])\n+{\n+\tstruct rte_mempool *mbuf_pool = NULL;\n+\tunsigned lcore_id, core_id = 0;\n+\tunsigned nb_ports, valid_num_ports;\n+\tint ret;\n+\tuint8_t portid, queue_id = 0;\n+\tstatic pthread_t tid;\n+\n+\t/* init EAL */\n+\tret = rte_eal_init(argc, argv);\n+\tif (ret < 0)\n+\t\trte_exit(EXIT_FAILURE, \"Error with EAL initialization\\n\");\n+\targc -= ret;\n+\targv += ret;\n+\n+\t/* parse app arguments */\n+\tret = us_vhost_parse_args(argc, argv);\n+\tif (ret < 0)\n+\t\trte_exit(EXIT_FAILURE, \"Invalid argument\\n\");\n+\n+\tif (rte_eal_pci_probe() != 0)\n+\t\trte_exit(EXIT_FAILURE,\n+\t\t\t\"Error with NIC driver initialization\\n\");\n+\n+\tfor (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)\n+\t\tif (rte_lcore_is_enabled(lcore_id))\n+\t\t\tlcore_ids[core_id++] = lcore_id;\n+\n+\tif (rte_lcore_count() > RTE_MAX_LCORE)\n+\t\trte_exit(EXIT_FAILURE, \"Not enough cores\\n\");\n+\n+\t/*set the number of swithcing cores available*/\n+\tnum_switching_cores = rte_lcore_count()-1;\n+\n+\t/* Get the number of physical ports. */\n+\tnb_ports = rte_eth_dev_count();\n+\tif (nb_ports > RTE_MAX_ETHPORTS)\n+\t\tnb_ports = RTE_MAX_ETHPORTS;\n+\n+\t/*\n+\t * Update the global var NUM_PORTS and global array PORTS\n+\t * and get value of var VALID_NUM_PORTS according to system ports number\n+\t */\n+\tvalid_num_ports = check_ports_num(nb_ports);\n+\n+\tif ((valid_num_ports ==  0) || (valid_num_ports > MAX_SUP_PORTS)) {\n+\t\tRTE_LOG(INFO, VHOST_PORT, \"Current enabled port number is %u,\"\n+\t\t\t\"but only %u port can be enabled\\n\",\n+\t\t\tnum_ports, MAX_SUP_PORTS);\n+\t\treturn -1;\n+\t}\n+\n+\tif (zero_copy == 0) {\n+\t\t/* Create the mbuf pool. */\n+\t\tmbuf_pool = rte_mempool_create(\n+\t\t\t\t\"MBUF_POOL\",\n+\t\t\t\tNUM_MBUFS_PER_PORT\n+\t\t\t\t* valid_num_ports,\n+\t\t\t\tMBUF_SIZE, MBUF_CACHE_SIZE,\n+\t\t\t\tsizeof(struct rte_pktmbuf_pool_private),\n+\t\t\t\trte_pktmbuf_pool_init, NULL,\n+\t\t\t\trte_pktmbuf_init, NULL,\n+\t\t\t\trte_socket_id(), 0);\n+\t\tif (mbuf_pool == NULL)\n+\t\t\trte_exit(EXIT_FAILURE, \"Cannot create mbuf pool\\n\");\n+\n+\t\tfor (queue_id = 0; queue_id < MAX_QUEUES + 1; queue_id++)\n+\t\t\tvpool_array[queue_id].pool = mbuf_pool;\n+\n+\t\tif (vm2vm_mode == VM2VM_HARDWARE) {\n+\t\t\t/* Enable VT loop back to let L2 switch to do it. */\n+\t\t\tvmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"Enable loop back for L2 switch in vmdq.\\n\");\n+\t\t}\n+\t} else {\n+\t\tuint32_t nb_mbuf;\n+\t\tchar pool_name[RTE_MEMPOOL_NAMESIZE];\n+\t\tchar ring_name[RTE_MEMPOOL_NAMESIZE];\n+\n+\t\trx_conf_default.start_rx_per_q = (uint8_t)zero_copy;\n+\t\trx_conf_default.rx_drop_en = 0;\n+\t\ttx_conf_default.start_tx_per_q = (uint8_t)zero_copy;\n+\t\tnb_mbuf = num_rx_descriptor\n+\t\t\t+ num_switching_cores * MBUF_CACHE_SIZE_ZCP\n+\t\t\t+ num_switching_cores * MAX_PKT_BURST;\n+\n+\t\tfor (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) {\n+\t\t\tsnprintf(pool_name, sizeof(pool_name),\n+\t\t\t\t\"rxmbuf_pool_%u\", queue_id);\n+\t\t\tsnprintf(ring_name, sizeof(ring_name),\n+\t\t\t\t\"rxmbuf_ring_%u\", queue_id);\n+\t\t\tsetup_mempool_tbl(rte_socket_id(), queue_id,\n+\t\t\t\tpool_name, ring_name, nb_mbuf);\n+\t\t}\n+\n+\t\tnb_mbuf = num_tx_descriptor\n+\t\t\t\t+ num_switching_cores * MBUF_CACHE_SIZE_ZCP\n+\t\t\t\t+ num_switching_cores * MAX_PKT_BURST;\n+\n+\t\tfor (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) {\n+\t\t\tsnprintf(pool_name, sizeof(pool_name),\n+\t\t\t\t\"txmbuf_pool_%u\", queue_id);\n+\t\t\tsnprintf(ring_name, sizeof(ring_name),\n+\t\t\t\t\"txmbuf_ring_%u\", queue_id);\n+\t\t\tsetup_mempool_tbl(rte_socket_id(),\n+\t\t\t\t(queue_id + MAX_QUEUES),\n+\t\t\t\tpool_name, ring_name, nb_mbuf);\n+\t\t}\n+\n+\t\tif (vm2vm_mode == VM2VM_HARDWARE) {\n+\t\t\t/* Enable VT loop back to let L2 switch to do it. */\n+\t\t\tvmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"Enable loop back for L2 switch in vmdq.\\n\");\n+\t\t}\n+\t}\n+\t/* Set log level. */\n+\trte_set_log_level(LOG_LEVEL);\n+\n+\t/* initialize all ports */\n+\tfor (portid = 0; portid < nb_ports; portid++) {\n+\t\t/* skip ports that are not enabled */\n+\t\tif ((enabled_port_mask & (1 << portid)) == 0) {\n+\t\t\tRTE_LOG(INFO, VHOST_PORT,\n+\t\t\t\t\"Skipping disabled port %d\\n\", portid);\n+\t\t\tcontinue;\n+\t\t}\n+\t\tif (port_init(portid) != 0)\n+\t\t\trte_exit(EXIT_FAILURE,\n+\t\t\t\t\"Cannot initialize network ports\\n\");\n+\t}\n+\n+\t/* Initialise all linked lists. */\n+\tif (init_data_ll() == -1)\n+\t\trte_exit(EXIT_FAILURE, \"Failed to initialize linked list\\n\");\n+\n+\t/* Initialize device stats */\n+\tmemset(&dev_statistics, 0, sizeof(dev_statistics));\n+\n+\t/* Enable stats if the user option is set. */\n+\tif (enable_stats)\n+\t\tpthread_create(&tid, NULL, (void *)print_stats, NULL);\n+\n+\t/* Launch all data cores. */\n+\tif (zero_copy == 0) {\n+\t\tRTE_LCORE_FOREACH_SLAVE(lcore_id) {\n+\t\t\trte_eal_remote_launch(switch_worker,\n+\t\t\t\tmbuf_pool, lcore_id);\n+\t\t}\n+\t} else {\n+\t\tuint32_t count_in_mempool, index, i;\n+\t\tfor (index = 0; index < 2 * MAX_QUEUES; index++) {\n+\t\t\t/* For all RX and TX queues. */\n+\t\t\tcount_in_mempool\n+\t\t\t\t= rte_mempool_count(vpool_array[index].pool);\n+\n+\t\t\t/*\n+\t\t\t * Transfer all un-attached mbufs from vpool.pool\n+\t\t\t * to vpoo.ring.\n+\t\t\t */\n+\t\t\tfor (i = 0; i < count_in_mempool; i++) {\n+\t\t\t\tstruct rte_mbuf *mbuf\n+\t\t\t\t\t= __rte_mbuf_raw_alloc(\n+\t\t\t\t\t\tvpool_array[index].pool);\n+\t\t\t\trte_ring_sp_enqueue(vpool_array[index].ring,\n+\t\t\t\t\t\t(void *)mbuf);\n+\t\t\t}\n+\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in MAIN: mbuf count in mempool at initial \"\n+\t\t\t\t\"is: %d\\n\", count_in_mempool);\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in MAIN: mbuf count in  ring at initial  is :\"\n+\t\t\t\t\" %d\\n\",\n+\t\t\t\trte_ring_count(vpool_array[index].ring));\n+\t\t}\n+\n+\t\tRTE_LCORE_FOREACH_SLAVE(lcore_id)\n+\t\t\trte_eal_remote_launch(switch_worker_zcp, NULL,\n+\t\t\t\tlcore_id);\n+\t}\n+\n+\tif (mergeable == 0)\n+\t\trte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);\n+\n+\t/* Register CUSE device to handle IOCTLs. */\n+\tret = rte_vhost_driver_register((char *)&dev_basename);\n+\tif (ret != 0)\n+\t\trte_exit(EXIT_FAILURE, \"CUSE device setup failure.\\n\");\n+\n+\trte_vhost_driver_callback_register(&virtio_net_device_ops);\n+\n+\t/* Start CUSE session. */\n+\trte_vhost_driver_session_start();\n+\n+\treturn 0;\n+\n+}\ndiff --git a/examples/vhost/main.h b/examples/vhost/main.h\nnew file mode 100644\nindex 0000000..866ba3a\n--- /dev/null\n+++ b/examples/vhost/main.h\n@@ -0,0 +1,109 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#ifndef _MAIN_H_\n+#define _MAIN_H_\n+\n+#ifdef RTE_EXEC_ENV_BAREMETAL\n+#define MAIN _main\n+#else\n+#define MAIN main\n+#endif\n+\n+/**\n+ * Information relating to memory regions including offsets to\n+ * addresses in host physical space.\n+ */\n+struct virtio_memory_regions_hpa {\n+\t/**< Base guest physical address of region. */\n+\tuint64_t    guest_phys_address;\n+\t/**< End guest physical address of region. */\n+\tuint64_t    guest_phys_address_end;\n+\t/**< Size of region. */\n+\tuint64_t    memory_size;\n+\t/**< Offset of region for gpa to hpa translation. */\n+\tuint64_t    host_phys_addr_offset;\n+};\n+\n+/**\n+ * Device linked list structure for data path.\n+ */\n+struct vhost_dev {\n+\t/**< Pointer to device created by vhost lib. */\n+\tstruct virtio_net      *dev;\n+\t/**< Number of memory regions for gpa to hpa translation. */\n+\tuint32_t nregions_hpa;\n+\t/**< Memory region information for gpa to hpa translation. */\n+\tstruct virtio_memory_regions_hpa *regions_hpa;\n+\t/**< Device MAC address (Obtained on first TX packet). */\n+\tstruct ether_addr mac_address;\n+\t/**< RX VMDQ queue number. */\n+\tuint16_t vmdq_rx_q;\n+\t/**< Vlan tag assigned to the pool */\n+\tuint32_t vlan_tag;\n+\t/**< Data core that the device is added to. */\n+\tuint16_t coreid;\n+\t/**< A device is set as ready if the MAC address has been set. */\n+\tvolatile uint8_t ready;\n+\t/**< Device is marked for removal from the data core. */\n+\tvolatile uint8_t remove;\n+} __rte_cache_aligned;\n+\n+struct virtio_net_data_ll {\n+\t/* Pointer to device created by configuration core. */\n+\tstruct vhost_dev\t\t*vdev;\n+\t/* Pointer to next device in linked list. */\n+\tstruct virtio_net_data_ll\t*next;\n+};\n+\n+/**\n+ * Structure containing data core specific information.\n+ */\n+struct lcore_ll_info {\n+\t/**< Pointer to head in free linked list. */\n+\tstruct virtio_net_data_ll *ll_root_free;\n+\t/**< Pointer to head of used linked list. */\n+\tstruct virtio_net_data_ll *ll_root_used;\n+\t/**< Number of devices on lcore. */\n+\tuint32_t device_num;\n+\t/**< Flag to synchronize device removal. */\n+\tvolatile uint8_t  dev_removal_flag;\n+};\n+\n+struct lcore_info {\n+\t/* Pointer to data core specific lcore_ll_info struct */\n+\tstruct lcore_ll_info *lcore_ll;\n+};\n+\n+int MAIN(int argc, char **argv);\n+#endif /* _MAIN_H_ */\n",
    "prefixes": [
        "dpdk-dev",
        "3/3"
    ]
}