Show a patch.

GET /api/patches/426/
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 426,
    "url": "http://patches.dpdk.org/api/patches/426/",
    "web_url": "http://patches.dpdk.org/patch/426/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk"
    },
    "msgid": "<1411046390-29478-2-git-send-email-huawei.xie@intel.com>",
    "date": "2014-09-18T13:19:49",
    "name": "[dpdk-dev,1/2] examples/vhost: copy old vhost example files",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "57c256a0bafd694bf74a56355cd33c22043a8627",
    "submitter": {
        "id": 16,
        "url": "http://patches.dpdk.org/api/people/16/",
        "name": "Huawei Xie",
        "email": "huawei.xie@intel.com"
    },
    "delegate": null,
    "mbox": "http://patches.dpdk.org/patch/426/mbox/",
    "series": [],
    "comments": "http://patches.dpdk.org/api/patches/426/comments/",
    "check": "pending",
    "checks": "http://patches.dpdk.org/api/patches/426/checks/",
    "tags": {},
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "References": "<1411046390-29478-1-git-send-email-huawei.xie@intel.com>",
        "X-Mailman-Version": "2.1.15",
        "X-IronPort-AV": "E=Sophos;i=\"5.04,547,1406617200\"; d=\"scan'208\";a=\"575075014\"",
        "Date": "Thu, 18 Sep 2014 21:19:49 +0800",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "X-BeenThere": "dev@dpdk.org",
        "Message-Id": "<1411046390-29478-2-git-send-email-huawei.xie@intel.com>",
        "X-Original-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id 2ABF3B3AB;\n\tThu, 18 Sep 2014 15:15:08 +0200 (CEST)",
            "from mga02.intel.com (mga02.intel.com [134.134.136.20])\n\tby dpdk.org (Postfix) with ESMTP id 513D2B3AA\n\tfor <dev@dpdk.org>; Thu, 18 Sep 2014 15:15:01 +0200 (CEST)",
            "from orsmga001.jf.intel.com ([10.7.209.18])\n\tby orsmga101.jf.intel.com with ESMTP; 18 Sep 2014 06:20:03 -0700",
            "from shvmail01.sh.intel.com ([10.239.29.42])\n\tby orsmga001.jf.intel.com with ESMTP; 18 Sep 2014 06:19:58 -0700",
            "from shecgisg003.sh.intel.com (shecgisg003.sh.intel.com\n\t[10.239.29.90])\n\tby shvmail01.sh.intel.com with ESMTP id s8IDJtxK001698;\n\tThu, 18 Sep 2014 21:19:55 +0800",
            "from shecgisg003.sh.intel.com (localhost [127.0.0.1])\n\tby shecgisg003.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP\n\tid s8IDJriY029520; Thu, 18 Sep 2014 21:19:55 +0800",
            "(from hxie5@localhost)\n\tby shecgisg003.sh.intel.com (8.13.6/8.13.6/Submit) id s8IDJr6D029516; \n\tThu, 18 Sep 2014 21:19:53 +0800"
        ],
        "List-Post": "<mailto:dev@dpdk.org>",
        "Subject": "[dpdk-dev] [PATCH 1/2] examples/vhost: copy old vhost example files",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>",
        "X-Mailer": "git-send-email 1.7.4.1",
        "Precedence": "list",
        "From": "Huawei Xie <huawei.xie@intel.com>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "X-ExtLoop1": "1",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "Delivered-To": "patchwork@dpdk.org",
        "In-Reply-To": "<1411046390-29478-1-git-send-email-huawei.xie@intel.com>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "To": "dev@dpdk.org",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "main.c and main.h are refactored to become vhost lib in vhost lib patch, thus\nare removed. Here they are copied back from old vhost example without any \nmodification.\n\nSigned-off-by: Huawei Xie <huawei.xie@intel.com>\n---\n examples/vhost/main.c | 3722 +++++++++++++++++++++++++++++++++++++++++++++++++\n examples/vhost/main.h |   86 ++\n 2 files changed, 3808 insertions(+)\n create mode 100644 examples/vhost/main.c\n create mode 100644 examples/vhost/main.h",
    "diff": "diff --git a/examples/vhost/main.c b/examples/vhost/main.c\nnew file mode 100644\nindex 0000000..7d9e6a2\n--- /dev/null\n+++ b/examples/vhost/main.c\n@@ -0,0 +1,3722 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#include <arpa/inet.h>\n+#include <getopt.h>\n+#include <linux/if_ether.h>\n+#include <linux/if_vlan.h>\n+#include <linux/virtio_net.h>\n+#include <linux/virtio_ring.h>\n+#include <signal.h>\n+#include <stdint.h>\n+#include <sys/eventfd.h>\n+#include <sys/param.h>\n+#include <unistd.h>\n+\n+#include <rte_atomic.h>\n+#include <rte_cycles.h>\n+#include <rte_ethdev.h>\n+#include <rte_log.h>\n+#include <rte_string_fns.h>\n+#include <rte_malloc.h>\n+\n+#include \"main.h\"\n+#include \"virtio-net.h\"\n+#include \"vhost-net-cdev.h\"\n+\n+#define MAX_QUEUES 128\n+\n+/* the maximum number of external ports supported */\n+#define MAX_SUP_PORTS 1\n+\n+/*\n+ * Calculate the number of buffers needed per port\n+ */\n+#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) +  \t\t\\\n+\t\t\t\t\t\t\t(num_switching_cores*MAX_PKT_BURST) +  \t\t\t\\\n+\t\t\t\t\t\t\t(num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\\\n+\t\t\t\t\t\t\t(num_switching_cores*MBUF_CACHE_SIZE))\n+\n+#define MBUF_CACHE_SIZE 128\n+#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)\n+\n+/*\n+ * No frame data buffer allocated from host are required for zero copy\n+ * implementation, guest will allocate the frame data buffer, and vhost\n+ * directly use it.\n+ */\n+#define VIRTIO_DESCRIPTOR_LEN_ZCP 1518\n+#define MBUF_SIZE_ZCP (VIRTIO_DESCRIPTOR_LEN_ZCP + sizeof(struct rte_mbuf) \\\n+\t+ RTE_PKTMBUF_HEADROOM)\n+#define MBUF_CACHE_SIZE_ZCP 0\n+\n+/*\n+ * RX and TX Prefetch, Host, and Write-back threshold values should be\n+ * carefully set for optimal performance. Consult the network\n+ * controller's datasheet and supporting DPDK documentation for guidance\n+ * on how these parameters should be set.\n+ */\n+#define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */\n+#define RX_HTHRESH 8 /* Default values of RX host threshold reg. */\n+#define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */\n+\n+/*\n+ * These default values are optimized for use with the Intel(R) 82599 10 GbE\n+ * Controller and the DPDK ixgbe PMD. Consider using other values for other\n+ * network controllers and/or network drivers.\n+ */\n+#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */\n+#define TX_HTHRESH 0  /* Default values of TX host threshold reg. */\n+#define TX_WTHRESH 0  /* Default values of TX write-back threshold reg. */\n+\n+#define MAX_PKT_BURST 32 \t\t/* Max burst size for RX/TX */\n+#define MAX_MRG_PKT_BURST 16 \t/* Max burst for merge buffers. Set to 1 due to performance issue. */\n+#define BURST_TX_DRAIN_US 100 \t/* TX drain every ~100us */\n+\n+#define BURST_RX_WAIT_US 15 \t/* Defines how long we wait between retries on RX */\n+#define BURST_RX_RETRIES 4\t\t/* Number of retries on RX. */\n+\n+#define JUMBO_FRAME_MAX_SIZE    0x2600\n+\n+/* State of virtio device. */\n+#define DEVICE_MAC_LEARNING 0\n+#define DEVICE_RX\t\t\t1\n+#define DEVICE_SAFE_REMOVE\t2\n+\n+/* Config_core_flag status definitions. */\n+#define REQUEST_DEV_REMOVAL 1\n+#define ACK_DEV_REMOVAL 0\n+\n+/* Configurable number of RX/TX ring descriptors */\n+#define RTE_TEST_RX_DESC_DEFAULT 1024\n+#define RTE_TEST_TX_DESC_DEFAULT 512\n+\n+/*\n+ * Need refine these 2 macros for legacy and DPDK based front end:\n+ * Max vring avail descriptor/entries from guest - MAX_PKT_BURST\n+ * And then adjust power 2.\n+ */\n+/*\n+ * For legacy front end, 128 descriptors,\n+ * half for virtio header, another half for mbuf.\n+ */\n+#define RTE_TEST_RX_DESC_DEFAULT_ZCP 32   /* legacy: 32, DPDK virt FE: 128. */\n+#define RTE_TEST_TX_DESC_DEFAULT_ZCP 64   /* legacy: 64, DPDK virt FE: 64.  */\n+\n+/* Get first 4 bytes in mbuf headroom. */\n+#define MBUF_HEADROOM_UINT32(mbuf) (*(uint32_t *)((uint8_t *)(mbuf) \\\n+\t\t+ sizeof(struct rte_mbuf)))\n+\n+/* true if x is a power of 2 */\n+#define POWEROF2(x) ((((x)-1) & (x)) == 0)\n+\n+#define INVALID_PORT_ID 0xFF\n+\n+/* Max number of devices. Limited by vmdq. */\n+#define MAX_DEVICES 64\n+\n+/* Size of buffers used for snprintfs. */\n+#define MAX_PRINT_BUFF 6072\n+\n+/* Maximum character device basename size. */\n+#define MAX_BASENAME_SZ 10\n+\n+/* Maximum long option length for option parsing. */\n+#define MAX_LONG_OPT_SZ 64\n+\n+/* Used to compare MAC addresses. */\n+#define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL\n+\n+/* Number of descriptors per cacheline. */\n+#define DESC_PER_CACHELINE (CACHE_LINE_SIZE / sizeof(struct vring_desc))\n+\n+/* mask of enabled ports */\n+static uint32_t enabled_port_mask = 0;\n+\n+/*Number of switching cores enabled*/\n+static uint32_t num_switching_cores = 0;\n+\n+/* number of devices/queues to support*/\n+static uint32_t num_queues = 0;\n+uint32_t num_devices = 0;\n+\n+/*\n+ * Enable zero copy, pkts buffer will directly dma to hw descriptor,\n+ * disabled on default.\n+ */\n+static uint32_t zero_copy;\n+\n+/* number of descriptors to apply*/\n+static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP;\n+static uint32_t num_tx_descriptor = RTE_TEST_TX_DESC_DEFAULT_ZCP;\n+\n+/* max ring descriptor, ixgbe, i40e, e1000 all are 4096. */\n+#define MAX_RING_DESC 4096\n+\n+struct vpool {\n+\tstruct rte_mempool *pool;\n+\tstruct rte_ring *ring;\n+\tuint32_t buf_size;\n+} vpool_array[MAX_QUEUES+MAX_QUEUES];\n+\n+/* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */\n+typedef enum {\n+\tVM2VM_DISABLED = 0,\n+\tVM2VM_SOFTWARE = 1,\n+\tVM2VM_HARDWARE = 2,\n+\tVM2VM_LAST\n+} vm2vm_type;\n+static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE;\n+\n+/* The type of host physical address translated from guest physical address. */\n+typedef enum {\n+\tPHYS_ADDR_CONTINUOUS = 0,\n+\tPHYS_ADDR_CROSS_SUBREG = 1,\n+\tPHYS_ADDR_INVALID = 2,\n+\tPHYS_ADDR_LAST\n+} hpa_type;\n+\n+/* Enable stats. */\n+static uint32_t enable_stats = 0;\n+/* Enable retries on RX. */\n+static uint32_t enable_retry = 1;\n+/* Specify timeout (in useconds) between retries on RX. */\n+static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;\n+/* Specify the number of retries on RX. */\n+static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;\n+\n+/* Character device basename. Can be set by user. */\n+static char dev_basename[MAX_BASENAME_SZ] = \"vhost-net\";\n+\n+/* Charater device index. Can be set by user. */\n+static uint32_t dev_index = 0;\n+\n+/* This can be set by the user so it is made available here. */\n+extern uint64_t VHOST_FEATURES;\n+\n+/* Default configuration for rx and tx thresholds etc. */\n+static struct rte_eth_rxconf rx_conf_default = {\n+\t.rx_thresh = {\n+\t\t.pthresh = RX_PTHRESH,\n+\t\t.hthresh = RX_HTHRESH,\n+\t\t.wthresh = RX_WTHRESH,\n+\t},\n+\t.rx_drop_en = 1,\n+};\n+\n+/*\n+ * These default values are optimized for use with the Intel(R) 82599 10 GbE\n+ * Controller and the DPDK ixgbe/igb PMD. Consider using other values for other\n+ * network controllers and/or network drivers.\n+ */\n+static struct rte_eth_txconf tx_conf_default = {\n+\t.tx_thresh = {\n+\t\t.pthresh = TX_PTHRESH,\n+\t\t.hthresh = TX_HTHRESH,\n+\t\t.wthresh = TX_WTHRESH,\n+\t},\n+\t.tx_free_thresh = 0, /* Use PMD default values */\n+\t.tx_rs_thresh = 0, /* Use PMD default values */\n+};\n+\n+/* empty vmdq configuration structure. Filled in programatically */\n+static struct rte_eth_conf vmdq_conf_default = {\n+\t.rxmode = {\n+\t\t.mq_mode        = ETH_MQ_RX_VMDQ_ONLY,\n+\t\t.split_hdr_size = 0,\n+\t\t.header_split   = 0, /**< Header Split disabled */\n+\t\t.hw_ip_checksum = 0, /**< IP checksum offload disabled */\n+\t\t.hw_vlan_filter = 0, /**< VLAN filtering disabled */\n+\t\t/*\n+\t\t * It is necessary for 1G NIC such as I350,\n+\t\t * this fixes bug of ipv4 forwarding in guest can't\n+\t\t * forward pakets from one virtio dev to another virtio dev.\n+\t\t */\n+\t\t.hw_vlan_strip  = 1, /**< VLAN strip enabled. */\n+\t\t.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */\n+\t\t.hw_strip_crc   = 0, /**< CRC stripped by hardware */\n+\t},\n+\n+\t.txmode = {\n+\t\t.mq_mode = ETH_MQ_TX_NONE,\n+\t},\n+\t.rx_adv_conf = {\n+\t\t/*\n+\t\t * should be overridden separately in code with\n+\t\t * appropriate values\n+\t\t */\n+\t\t.vmdq_rx_conf = {\n+\t\t\t.nb_queue_pools = ETH_8_POOLS,\n+\t\t\t.enable_default_pool = 0,\n+\t\t\t.default_pool = 0,\n+\t\t\t.nb_pool_maps = 0,\n+\t\t\t.pool_map = {{0, 0},},\n+\t\t},\n+\t},\n+};\n+\n+static unsigned lcore_ids[RTE_MAX_LCORE];\n+static uint8_t ports[RTE_MAX_ETHPORTS];\n+static unsigned num_ports = 0; /**< The number of ports specified in command line */\n+\n+static const uint16_t external_pkt_default_vlan_tag = 2000;\n+const uint16_t vlan_tags[] = {\n+\t1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,\n+\t1008, 1009, 1010, 1011,\t1012, 1013, 1014, 1015,\n+\t1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,\n+\t1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,\n+\t1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,\n+\t1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,\n+\t1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,\n+\t1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,\n+};\n+\n+/* ethernet addresses of ports */\n+static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];\n+\n+/* heads for the main used and free linked lists for the data path. */\n+static struct virtio_net_data_ll *ll_root_used = NULL;\n+static struct virtio_net_data_ll *ll_root_free = NULL;\n+\n+/* Array of data core structures containing information on individual core linked lists. */\n+static struct lcore_info lcore_info[RTE_MAX_LCORE];\n+\n+/* Used for queueing bursts of TX packets. */\n+struct mbuf_table {\n+\tunsigned len;\n+\tunsigned txq_id;\n+\tstruct rte_mbuf *m_table[MAX_PKT_BURST];\n+};\n+\n+/* TX queue for each data core. */\n+struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];\n+\n+/* TX queue fori each virtio device for zero copy. */\n+struct mbuf_table tx_queue_zcp[MAX_QUEUES];\n+\n+/* Vlan header struct used to insert vlan tags on TX. */\n+struct vlan_ethhdr {\n+\tunsigned char   h_dest[ETH_ALEN];\n+\tunsigned char   h_source[ETH_ALEN];\n+\t__be16          h_vlan_proto;\n+\t__be16          h_vlan_TCI;\n+\t__be16          h_vlan_encapsulated_proto;\n+};\n+\n+/* IPv4 Header */\n+struct ipv4_hdr {\n+\tuint8_t  version_ihl;\t\t/**< version and header length */\n+\tuint8_t  type_of_service;\t/**< type of service */\n+\tuint16_t total_length;\t\t/**< length of packet */\n+\tuint16_t packet_id;\t\t/**< packet ID */\n+\tuint16_t fragment_offset;\t/**< fragmentation offset */\n+\tuint8_t  time_to_live;\t\t/**< time to live */\n+\tuint8_t  next_proto_id;\t\t/**< protocol ID */\n+\tuint16_t hdr_checksum;\t\t/**< header checksum */\n+\tuint32_t src_addr;\t\t/**< source address */\n+\tuint32_t dst_addr;\t\t/**< destination address */\n+} __attribute__((__packed__));\n+\n+/* Header lengths. */\n+#define VLAN_HLEN       4\n+#define VLAN_ETH_HLEN   18\n+\n+/* Per-device statistics struct */\n+struct device_statistics {\n+\tuint64_t tx_total;\n+\trte_atomic64_t rx_total_atomic;\n+\tuint64_t rx_total;\n+\tuint64_t tx;\n+\trte_atomic64_t rx_atomic;\n+\tuint64_t rx;\n+} __rte_cache_aligned;\n+struct device_statistics dev_statistics[MAX_DEVICES];\n+\n+/*\n+ * Builds up the correct configuration for VMDQ VLAN pool map\n+ * according to the pool & queue limits.\n+ */\n+static inline int\n+get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)\n+{\n+\tstruct rte_eth_vmdq_rx_conf conf;\n+\tunsigned i;\n+\n+\tmemset(&conf, 0, sizeof(conf));\n+\tconf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;\n+\tconf.nb_pool_maps = num_devices;\n+\tconf.enable_loop_back =\n+\t\tvmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back;\n+\n+\tfor (i = 0; i < conf.nb_pool_maps; i++) {\n+\t\tconf.pool_map[i].vlan_id = vlan_tags[ i ];\n+\t\tconf.pool_map[i].pools = (1UL << i);\n+\t}\n+\n+\t(void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));\n+\t(void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,\n+\t\t   sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));\n+\treturn 0;\n+}\n+\n+/*\n+ * Validate the device number according to the max pool number gotten form\n+ * dev_info. If the device number is invalid, give the error message and\n+ * return -1. Each device must have its own pool.\n+ */\n+static inline int\n+validate_num_devices(uint32_t max_nb_devices)\n+{\n+\tif (num_devices > max_nb_devices) {\n+\t\tRTE_LOG(ERR, VHOST_PORT, \"invalid number of devices\\n\");\n+\t\treturn -1;\n+\t}\n+\treturn 0;\n+}\n+\n+/*\n+ * Initialises a given port using global settings and with the rx buffers\n+ * coming from the mbuf_pool passed as parameter\n+ */\n+static inline int\n+port_init(uint8_t port)\n+{\n+\tstruct rte_eth_dev_info dev_info;\n+\tstruct rte_eth_conf port_conf;\n+\tuint16_t rx_rings, tx_rings;\n+\tuint16_t rx_ring_size, tx_ring_size;\n+\tint retval;\n+\tuint16_t q;\n+\n+\t/* The max pool number from dev_info will be used to validate the pool number specified in cmd line */\n+\trte_eth_dev_info_get (port, &dev_info);\n+\n+\t/*configure the number of supported virtio devices based on VMDQ limits */\n+\tnum_devices = dev_info.max_vmdq_pools;\n+\tnum_queues = dev_info.max_rx_queues;\n+\n+\tif (zero_copy) {\n+\t\trx_ring_size = num_rx_descriptor;\n+\t\ttx_ring_size = num_tx_descriptor;\n+\t\ttx_rings = dev_info.max_tx_queues;\n+\t} else {\n+\t\trx_ring_size = RTE_TEST_RX_DESC_DEFAULT;\n+\t\ttx_ring_size = RTE_TEST_TX_DESC_DEFAULT;\n+\t\ttx_rings = (uint16_t)rte_lcore_count();\n+\t}\n+\n+\tretval = validate_num_devices(MAX_DEVICES);\n+\tif (retval < 0)\n+\t\treturn retval;\n+\n+\t/* Get port configuration. */\n+\tretval = get_eth_conf(&port_conf, num_devices);\n+\tif (retval < 0)\n+\t\treturn retval;\n+\n+\tif (port >= rte_eth_dev_count()) return -1;\n+\n+\trx_rings = (uint16_t)num_queues,\n+\t/* Configure ethernet device. */\n+\tretval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);\n+\tif (retval != 0)\n+\t\treturn retval;\n+\n+\t/* Setup the queues. */\n+\tfor (q = 0; q < rx_rings; q ++) {\n+\t\tretval = rte_eth_rx_queue_setup(port, q, rx_ring_size,\n+\t\t\t\t\t\trte_eth_dev_socket_id(port), &rx_conf_default,\n+\t\t\t\t\t\tvpool_array[q].pool);\n+\t\tif (retval < 0)\n+\t\t\treturn retval;\n+\t}\n+\tfor (q = 0; q < tx_rings; q ++) {\n+\t\tretval = rte_eth_tx_queue_setup(port, q, tx_ring_size,\n+\t\t\t\t\t\trte_eth_dev_socket_id(port), &tx_conf_default);\n+\t\tif (retval < 0)\n+\t\t\treturn retval;\n+\t}\n+\n+\t/* Start the device. */\n+\tretval  = rte_eth_dev_start(port);\n+\tif (retval < 0) {\n+\t\tRTE_LOG(ERR, VHOST_DATA, \"Failed to start the device.\\n\");\n+\t\treturn retval;\n+\t}\n+\n+\trte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);\n+\tRTE_LOG(INFO, VHOST_PORT, \"Max virtio devices supported: %u\\n\", num_devices);\n+\tRTE_LOG(INFO, VHOST_PORT, \"Port %u MAC: %02\"PRIx8\" %02\"PRIx8\" %02\"PRIx8\n+\t\t\t\" %02\"PRIx8\" %02\"PRIx8\" %02\"PRIx8\"\\n\",\n+\t\t\t(unsigned)port,\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[0],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[1],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[2],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[3],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[4],\n+\t\t\tvmdq_ports_eth_addr[port].addr_bytes[5]);\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Set character device basename.\n+ */\n+static int\n+us_vhost_parse_basename(const char *q_arg)\n+{\n+\t/* parse number string */\n+\n+\tif (strnlen(q_arg, MAX_BASENAME_SZ) > MAX_BASENAME_SZ)\n+\t\treturn -1;\n+\telse\n+\t\tsnprintf((char*)&dev_basename, MAX_BASENAME_SZ, \"%s\", q_arg);\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Parse the portmask provided at run time.\n+ */\n+static int\n+parse_portmask(const char *portmask)\n+{\n+\tchar *end = NULL;\n+\tunsigned long pm;\n+\n+\terrno = 0;\n+\n+\t/* parse hexadecimal string */\n+\tpm = strtoul(portmask, &end, 16);\n+\tif ((portmask[0] == '\\0') || (end == NULL) || (*end != '\\0') || (errno != 0))\n+\t\treturn -1;\n+\n+\tif (pm == 0)\n+\t\treturn -1;\n+\n+\treturn pm;\n+\n+}\n+\n+/*\n+ * Parse num options at run time.\n+ */\n+static int\n+parse_num_opt(const char *q_arg, uint32_t max_valid_value)\n+{\n+\tchar *end = NULL;\n+\tunsigned long num;\n+\n+\terrno = 0;\n+\n+\t/* parse unsigned int string */\n+\tnum = strtoul(q_arg, &end, 10);\n+\tif ((q_arg[0] == '\\0') || (end == NULL) || (*end != '\\0') || (errno != 0))\n+\t\treturn -1;\n+\n+\tif (num > max_valid_value)\n+\t\treturn -1;\n+\n+\treturn num;\n+\n+}\n+\n+/*\n+ * Display usage\n+ */\n+static void\n+us_vhost_usage(const char *prgname)\n+{\n+\tRTE_LOG(INFO, VHOST_CONFIG, \"%s [EAL options] -- -p PORTMASK\\n\"\n+\t\"\t\t--vm2vm [0|1|2]\\n\"\n+\t\"\t\t--rx_retry [0|1] --mergeable [0|1] --stats [0-N]\\n\"\n+\t\"\t\t--dev-basename <name> --dev-index [0-N]\\n\"\n+\t\"\t\t--nb-devices ND\\n\"\n+\t\"\t\t-p PORTMASK: Set mask for ports to be used by application\\n\"\n+\t\"\t\t--vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\\n\"\n+\t\"\t\t--rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\\n\"\n+\t\"\t\t--rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\\n\"\n+\t\"\t\t--rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\\n\"\n+\t\"\t\t--mergeable [0|1]: disable(default)/enable RX mergeable buffers\\n\"\n+\t\"\t\t--stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\\n\"\n+\t\"\t\t--dev-basename: The basename to be used for the character device.\\n\"\n+\t\"\t\t--dev-index [0-N]: Defaults to zero if not used. Index is appended to basename.\\n\"\n+\t\"\t\t--zero-copy [0|1]: disable(default)/enable rx/tx \"\n+\t\t\t\"zero copy\\n\"\n+\t\"\t\t--rx-desc-num [0-N]: the number of descriptors on rx, \"\n+\t\t\t\"used only when zero copy is enabled.\\n\"\n+\t\"\t\t--tx-desc-num [0-N]: the number of descriptors on tx, \"\n+\t\t\t\"used only when zero copy is enabled.\\n\",\n+\t       prgname);\n+}\n+\n+/*\n+ * Parse the arguments given in the command line of the application.\n+ */\n+static int\n+us_vhost_parse_args(int argc, char **argv)\n+{\n+\tint opt, ret;\n+\tint option_index;\n+\tunsigned i;\n+\tconst char *prgname = argv[0];\n+\tstatic struct option long_option[] = {\n+\t\t{\"vm2vm\", required_argument, NULL, 0},\n+\t\t{\"rx-retry\", required_argument, NULL, 0},\n+\t\t{\"rx-retry-delay\", required_argument, NULL, 0},\n+\t\t{\"rx-retry-num\", required_argument, NULL, 0},\n+\t\t{\"mergeable\", required_argument, NULL, 0},\n+\t\t{\"stats\", required_argument, NULL, 0},\n+\t\t{\"dev-basename\", required_argument, NULL, 0},\n+\t\t{\"dev-index\", required_argument, NULL, 0},\n+\t\t{\"zero-copy\", required_argument, NULL, 0},\n+\t\t{\"rx-desc-num\", required_argument, NULL, 0},\n+\t\t{\"tx-desc-num\", required_argument, NULL, 0},\n+\t\t{NULL, 0, 0, 0},\n+\t};\n+\n+\t/* Parse command line */\n+\twhile ((opt = getopt_long(argc, argv, \"p:\",long_option, &option_index)) != EOF) {\n+\t\tswitch (opt) {\n+\t\t/* Portmask */\n+\t\tcase 'p':\n+\t\t\tenabled_port_mask = parse_portmask(optarg);\n+\t\t\tif (enabled_port_mask == 0) {\n+\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid portmask\\n\");\n+\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\t\t\tbreak;\n+\n+\t\tcase 0:\n+\t\t\t/* Enable/disable vm2vm comms. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"vm2vm\",\n+\t\t\t\tMAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, (VM2VM_LAST - 1));\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\t\"Invalid argument for \"\n+\t\t\t\t\t\t\"vm2vm [0|1|2]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tvm2vm_mode = (vm2vm_type)ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Enable/disable retries on RX. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"rx-retry\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, 1);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid argument for rx-retry [0|1]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tenable_retry = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Specify the retries delay time (in useconds) on RX. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"rx-retry-delay\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, INT32_MAX);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid argument for rx-retry-delay [0-N]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tburst_rx_delay_time = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Specify the retries number on RX. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"rx-retry-num\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, INT32_MAX);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid argument for rx-retry-num [0-N]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tburst_rx_retry_num = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Enable/disable RX mergeable buffers. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"mergeable\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, 1);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid argument for mergeable [0|1]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tif (ret) {\n+\t\t\t\t\t\tvmdq_conf_default.rxmode.jumbo_frame = 1;\n+\t\t\t\t\t\tvmdq_conf_default.rxmode.max_rx_pkt_len\n+\t\t\t\t\t\t\t= JUMBO_FRAME_MAX_SIZE;\n+\t\t\t\t\t\tVHOST_FEATURES = (1ULL << VIRTIO_NET_F_MRG_RXBUF);\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Enable/disable stats. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"stats\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, INT32_MAX);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid argument for stats [0..N]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tenable_stats = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Set character device basename. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"dev-basename\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tif (us_vhost_parse_basename(optarg) == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid argument for character device basename (Max %d characters)\\n\", MAX_BASENAME_SZ);\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Set character device index. */\n+\t\t\tif (!strncmp(long_option[option_index].name, \"dev-index\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, INT32_MAX);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG, \"Invalid argument for character device index [0..N]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else\n+\t\t\t\t\tdev_index = ret;\n+\t\t\t}\n+\n+\t\t\t/* Enable/disable rx/tx zero copy. */\n+\t\t\tif (!strncmp(long_option[option_index].name,\n+\t\t\t\t\"zero-copy\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, 1);\n+\t\t\t\tif (ret == -1) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\t\"Invalid argument\"\n+\t\t\t\t\t\t\" for zero-copy [0|1]\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else\n+\t\t\t\t\tzero_copy = ret;\n+\n+\t\t\t\tif (zero_copy) {\n+#ifdef RTE_MBUF_SCATTER_GATHER\n+\t\t\t\t\tRTE_LOG(ERR, VHOST_CONFIG, \"Before running \"\n+\t\t\t\t\t\"zero copy vhost APP, please \"\n+\t\t\t\t\t\"disable RTE_MBUF_SCATTER_GATHER\\n\"\n+\t\t\t\t\t\"in config file and then rebuild DPDK \"\n+\t\t\t\t\t\"core lib!\\n\"\n+\t\t\t\t\t\"Otherwise please disable zero copy \"\n+\t\t\t\t\t\"flag in command line!\\n\");\n+\t\t\t\t\treturn -1;\n+#endif\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Specify the descriptor number on RX. */\n+\t\t\tif (!strncmp(long_option[option_index].name,\n+\t\t\t\t\"rx-desc-num\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, MAX_RING_DESC);\n+\t\t\t\tif ((ret == -1) || (!POWEROF2(ret))) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\"Invalid argument for rx-desc-num[0-N],\"\n+\t\t\t\t\t\"power of 2 required.\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tnum_rx_descriptor = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\t/* Specify the descriptor number on TX. */\n+\t\t\tif (!strncmp(long_option[option_index].name,\n+\t\t\t\t\"tx-desc-num\", MAX_LONG_OPT_SZ)) {\n+\t\t\t\tret = parse_num_opt(optarg, MAX_RING_DESC);\n+\t\t\t\tif ((ret == -1) || (!POWEROF2(ret))) {\n+\t\t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n+\t\t\t\t\t\"Invalid argument for tx-desc-num [0-N],\"\n+\t\t\t\t\t\"power of 2 required.\\n\");\n+\t\t\t\t\tus_vhost_usage(prgname);\n+\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\tnum_tx_descriptor = ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\tbreak;\n+\n+\t\t\t/* Invalid option - print options. */\n+\t\tdefault:\n+\t\t\tus_vhost_usage(prgname);\n+\t\t\treturn -1;\n+\t\t}\n+\t}\n+\n+\tfor (i = 0; i < RTE_MAX_ETHPORTS; i++) {\n+\t\tif (enabled_port_mask & (1 << i))\n+\t\t\tports[num_ports++] = (uint8_t)i;\n+\t}\n+\n+\tif ((num_ports ==  0) || (num_ports > MAX_SUP_PORTS)) {\n+\t\tRTE_LOG(INFO, VHOST_PORT, \"Current enabled port number is %u,\"\n+\t\t\t\"but only %u port can be enabled\\n\",num_ports, MAX_SUP_PORTS);\n+\t\treturn -1;\n+\t}\n+\n+\tif ((zero_copy == 1) && (vm2vm_mode == VM2VM_SOFTWARE)) {\n+\t\tRTE_LOG(INFO, VHOST_PORT,\n+\t\t\t\"Vhost zero copy doesn't support software vm2vm,\"\n+\t\t\t\"please specify 'vm2vm 2' to use hardware vm2vm.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tif ((zero_copy == 1) && (vmdq_conf_default.rxmode.jumbo_frame == 1)) {\n+\t\tRTE_LOG(INFO, VHOST_PORT,\n+\t\t\t\"Vhost zero copy doesn't support jumbo frame,\"\n+\t\t\t\"please specify '--mergeable 0' to disable the \"\n+\t\t\t\"mergeable feature.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Update the global var NUM_PORTS and array PORTS according to system ports number\n+ * and return valid ports number\n+ */\n+static unsigned check_ports_num(unsigned nb_ports)\n+{\n+\tunsigned valid_num_ports = num_ports;\n+\tunsigned portid;\n+\n+\tif (num_ports > nb_ports) {\n+\t\tRTE_LOG(INFO, VHOST_PORT, \"\\nSpecified port number(%u) exceeds total system port number(%u)\\n\",\n+\t\t\tnum_ports, nb_ports);\n+\t\tnum_ports = nb_ports;\n+\t}\n+\n+\tfor (portid = 0; portid < num_ports; portid ++) {\n+\t\tif (ports[portid] >= nb_ports) {\n+\t\t\tRTE_LOG(INFO, VHOST_PORT, \"\\nSpecified port ID(%u) exceeds max system port ID(%u)\\n\",\n+\t\t\t\tports[portid], (nb_ports - 1));\n+\t\t\tports[portid] = INVALID_PORT_ID;\n+\t\t\tvalid_num_ports--;\n+\t\t}\n+\t}\n+\treturn valid_num_ports;\n+}\n+\n+/*\n+ * Macro to print out packet contents. Wrapped in debug define so that the\n+ * data path is not effected when debug is disabled.\n+ */\n+#ifdef DEBUG\n+#define PRINT_PACKET(device, addr, size, header) do {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tchar *pkt_addr = (char*)(addr);\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tunsigned int index;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tchar packet[MAX_PRINT_BUFF];\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tif ((header))\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tsnprintf(packet, MAX_PRINT_BUFF, \"(%\"PRIu64\") Header size %d: \", (device->device_fh), (size));\t\t\t\t\\\n+\telse\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tsnprintf(packet, MAX_PRINT_BUFF, \"(%\"PRIu64\") Packet size %d: \", (device->device_fh), (size));\t\t\t\t\\\n+\tfor (index = 0; index < (size); index++) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tsnprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF),\t\\\n+\t\t\t\"%02hhx \", pkt_addr[index]);\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tsnprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), \"\\n\");\t\\\n+\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tLOG_DEBUG(VHOST_DATA, \"%s\", packet);\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+} while(0)\n+#else\n+#define PRINT_PACKET(device, addr, size, header) do{} while(0)\n+#endif\n+\n+/*\n+ * Function to convert guest physical addresses to vhost virtual addresses. This\n+ * is used to convert virtio buffer addresses.\n+ */\n+static inline uint64_t __attribute__((always_inline))\n+gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)\n+{\n+\tstruct virtio_memory_regions *region;\n+\tuint32_t regionidx;\n+\tuint64_t vhost_va = 0;\n+\n+\tfor (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {\n+\t\tregion = &dev->mem->regions[regionidx];\n+\t\tif ((guest_pa >= region->guest_phys_address) &&\n+\t\t\t(guest_pa <= region->guest_phys_address_end)) {\n+\t\t\tvhost_va = region->address_offset + guest_pa;\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") GPA %p| VVA %p\\n\",\n+\t\tdev->device_fh, (void*)(uintptr_t)guest_pa, (void*)(uintptr_t)vhost_va);\n+\n+\treturn vhost_va;\n+}\n+\n+/*\n+ * Function to convert guest physical addresses to vhost physical addresses.\n+ * This is used to convert virtio buffer addresses.\n+ */\n+static inline uint64_t __attribute__((always_inline))\n+gpa_to_hpa(struct virtio_net *dev, uint64_t guest_pa,\n+\tuint32_t buf_len, hpa_type *addr_type)\n+{\n+\tstruct virtio_memory_regions_hpa *region;\n+\tuint32_t regionidx;\n+\tuint64_t vhost_pa = 0;\n+\n+\t*addr_type = PHYS_ADDR_INVALID;\n+\n+\tfor (regionidx = 0; regionidx < dev->mem->nregions_hpa; regionidx++) {\n+\t\tregion = &dev->mem->regions_hpa[regionidx];\n+\t\tif ((guest_pa >= region->guest_phys_address) &&\n+\t\t\t(guest_pa <= region->guest_phys_address_end)) {\n+\t\t\tvhost_pa = region->host_phys_addr_offset + guest_pa;\n+\t\t\tif (likely((guest_pa + buf_len - 1)\n+\t\t\t\t<= region->guest_phys_address_end))\n+\t\t\t\t*addr_type = PHYS_ADDR_CONTINUOUS;\n+\t\t\telse\n+\t\t\t\t*addr_type = PHYS_ADDR_CROSS_SUBREG;\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") GPA %p| HPA %p\\n\",\n+\t\tdev->device_fh, (void *)(uintptr_t)guest_pa,\n+\t\t(void *)(uintptr_t)vhost_pa);\n+\n+\treturn vhost_pa;\n+}\n+\n+/*\n+ * This function adds buffers to the virtio devices RX virtqueue. Buffers can\n+ * be received from the physical port or from another virtio device. A packet\n+ * count is returned to indicate the number of packets that were succesfully\n+ * added to the RX queue. This function works when mergeable is disabled.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count)\n+{\n+\tstruct vhost_virtqueue *vq;\n+\tstruct vring_desc *desc;\n+\tstruct rte_mbuf *buff;\n+\t/* The virtio_hdr is initialised to 0. */\n+\tstruct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0};\n+\tuint64_t buff_addr = 0;\n+\tuint64_t buff_hdr_addr = 0;\n+\tuint32_t head[MAX_PKT_BURST], packet_len = 0;\n+\tuint32_t head_idx, packet_success = 0;\n+\tuint32_t retry = 0;\n+\tuint16_t avail_idx, res_cur_idx;\n+\tuint16_t res_base_idx, res_end_idx;\n+\tuint16_t free_entries;\n+\tuint8_t success = 0;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_rx()\\n\", dev->device_fh);\n+\tvq = dev->virtqueue[VIRTIO_RXQ];\n+\tcount = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;\n+\n+\t/* As many data cores may want access to available buffers, they need to be reserved. */\n+\tdo {\n+\t\tres_base_idx = vq->last_used_idx_res;\n+\t\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n+\n+\t\tfree_entries = (avail_idx - res_base_idx);\n+\t\t/* If retry is enabled and the queue is full then we wait and retry to avoid packet loss. */\n+\t\tif (enable_retry && unlikely(count > free_entries)) {\n+\t\t\tfor (retry = 0; retry < burst_rx_retry_num; retry++) {\n+\t\t\t\trte_delay_us(burst_rx_delay_time);\n+\t\t\t\tavail_idx =\n+\t\t\t\t\t*((volatile uint16_t *)&vq->avail->idx);\n+\t\t\t\tfree_entries = (avail_idx - res_base_idx);\n+\t\t\t\tif (count <= free_entries)\n+\t\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\n+\t\t/*check that we have enough buffers*/\n+\t\tif (unlikely(count > free_entries))\n+\t\t\tcount = free_entries;\n+\n+\t\tif (count == 0)\n+\t\t\treturn 0;\n+\n+\t\tres_end_idx = res_base_idx + count;\n+\t\t/* vq->last_used_idx_res is atomically updated. */\n+\t\tsuccess = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx,\n+\t\t\t\t\t\t\t\t\tres_end_idx);\n+\t} while (unlikely(success == 0));\n+\tres_cur_idx = res_base_idx;\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Current Index %d| End Index %d\\n\", dev->device_fh, res_cur_idx, res_end_idx);\n+\n+\t/* Prefetch available ring to retrieve indexes. */\n+\trte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);\n+\n+\t/* Retrieve all of the head indexes first to avoid caching issues. */\n+\tfor (head_idx = 0; head_idx < count; head_idx++)\n+\t\thead[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];\n+\n+\t/*Prefetch descriptor index. */\n+\trte_prefetch0(&vq->desc[head[packet_success]]);\n+\n+\twhile (res_cur_idx != res_end_idx) {\n+\t\t/* Get descriptor from available ring */\n+\t\tdesc = &vq->desc[head[packet_success]];\n+\n+\t\tbuff = pkts[packet_success];\n+\n+\t\t/* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */\n+\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n+\t\t/* Prefetch buffer address. */\n+\t\trte_prefetch0((void*)(uintptr_t)buff_addr);\n+\n+\t\t/* Copy virtio_hdr to packet and increment buffer address */\n+\t\tbuff_hdr_addr = buff_addr;\n+\t\tpacket_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;\n+\n+\t\t/*\n+\t\t * If the descriptors are chained the header and data are\n+\t\t * placed in separate buffers.\n+\t\t */\n+\t\tif (desc->flags & VRING_DESC_F_NEXT) {\n+\t\t\tdesc->len = vq->vhost_hlen;\n+\t\t\tdesc = &vq->desc[desc->next];\n+\t\t\t/* Buffer address translation. */\n+\t\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n+\t\t\tdesc->len = rte_pktmbuf_data_len(buff);\n+\t\t} else {\n+\t\t\tbuff_addr += vq->vhost_hlen;\n+\t\t\tdesc->len = packet_len;\n+\t\t}\n+\n+\t\t/* Update used ring with desc information */\n+\t\tvq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];\n+\t\tvq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;\n+\n+\t\t/* Copy mbuf data to buffer */\n+\t\trte_memcpy((void *)(uintptr_t)buff_addr,\n+\t\t\t(const void *)buff->pkt.data,\n+\t\t\trte_pktmbuf_data_len(buff));\n+\t\tPRINT_PACKET(dev, (uintptr_t)buff_addr,\n+\t\t\trte_pktmbuf_data_len(buff), 0);\n+\n+\t\tres_cur_idx++;\n+\t\tpacket_success++;\n+\n+\t\trte_memcpy((void *)(uintptr_t)buff_hdr_addr,\n+\t\t\t(const void *)&virtio_hdr, vq->vhost_hlen);\n+\n+\t\tPRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);\n+\n+\t\tif (res_cur_idx < res_end_idx) {\n+\t\t\t/* Prefetch descriptor index. */\n+\t\t\trte_prefetch0(&vq->desc[head[packet_success]]);\n+\t\t}\n+\t}\n+\n+\trte_compiler_barrier();\n+\n+\t/* Wait until it's our turn to add our buffer to the used ring. */\n+\twhile (unlikely(vq->last_used_idx != res_base_idx))\n+\t\trte_pause();\n+\n+\t*(volatile uint16_t *)&vq->used->idx += count;\n+\tvq->last_used_idx = res_end_idx;\n+\n+\t/* Kick the guest if necessary. */\n+\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\teventfd_write((int)vq->kickfd, 1);\n+\treturn count;\n+}\n+\n+static inline uint32_t __attribute__((always_inline))\n+copy_from_mbuf_to_vring(struct virtio_net *dev,\n+\tuint16_t res_base_idx, uint16_t res_end_idx,\n+\tstruct rte_mbuf *pkt)\n+{\n+\tuint32_t vec_idx = 0;\n+\tuint32_t entry_success = 0;\n+\tstruct vhost_virtqueue *vq;\n+\t/* The virtio_hdr is initialised to 0. */\n+\tstruct virtio_net_hdr_mrg_rxbuf virtio_hdr = {\n+\t\t{0, 0, 0, 0, 0, 0}, 0};\n+\tuint16_t cur_idx = res_base_idx;\n+\tuint64_t vb_addr = 0;\n+\tuint64_t vb_hdr_addr = 0;\n+\tuint32_t seg_offset = 0;\n+\tuint32_t vb_offset = 0;\n+\tuint32_t seg_avail;\n+\tuint32_t vb_avail;\n+\tuint32_t cpy_len, entry_len;\n+\n+\tif (pkt == NULL)\n+\t\treturn 0;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Current Index %d| \"\n+\t\t\"End Index %d\\n\",\n+\t\tdev->device_fh, cur_idx, res_end_idx);\n+\n+\t/*\n+\t * Convert from gpa to vva\n+\t * (guest physical addr -> vhost virtual addr)\n+\t */\n+\tvq = dev->virtqueue[VIRTIO_RXQ];\n+\tvb_addr =\n+\t\tgpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);\n+\tvb_hdr_addr = vb_addr;\n+\n+\t/* Prefetch buffer address. */\n+\trte_prefetch0((void *)(uintptr_t)vb_addr);\n+\n+\tvirtio_hdr.num_buffers = res_end_idx - res_base_idx;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") RX: Num merge buffers %d\\n\",\n+\t\tdev->device_fh, virtio_hdr.num_buffers);\n+\n+\trte_memcpy((void *)(uintptr_t)vb_hdr_addr,\n+\t\t(const void *)&virtio_hdr, vq->vhost_hlen);\n+\n+\tPRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);\n+\n+\tseg_avail = rte_pktmbuf_data_len(pkt);\n+\tvb_offset = vq->vhost_hlen;\n+\tvb_avail =\n+\t\tvq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;\n+\n+\tentry_len = vq->vhost_hlen;\n+\n+\tif (vb_avail == 0) {\n+\t\tuint32_t desc_idx =\n+\t\t\tvq->buf_vec[vec_idx].desc_idx;\n+\t\tvq->desc[desc_idx].len = vq->vhost_hlen;\n+\n+\t\tif ((vq->desc[desc_idx].flags\n+\t\t\t& VRING_DESC_F_NEXT) == 0) {\n+\t\t\t/* Update used ring with desc information */\n+\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].id\n+\t\t\t\t= vq->buf_vec[vec_idx].desc_idx;\n+\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].len\n+\t\t\t\t= entry_len;\n+\n+\t\t\tentry_len = 0;\n+\t\t\tcur_idx++;\n+\t\t\tentry_success++;\n+\t\t}\n+\n+\t\tvec_idx++;\n+\t\tvb_addr =\n+\t\t\tgpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);\n+\n+\t\t/* Prefetch buffer address. */\n+\t\trte_prefetch0((void *)(uintptr_t)vb_addr);\n+\t\tvb_offset = 0;\n+\t\tvb_avail = vq->buf_vec[vec_idx].buf_len;\n+\t}\n+\n+\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n+\n+\twhile (cpy_len > 0) {\n+\t\t/* Copy mbuf data to vring buffer */\n+\t\trte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),\n+\t\t\t(const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),\n+\t\t\tcpy_len);\n+\n+\t\tPRINT_PACKET(dev,\n+\t\t\t(uintptr_t)(vb_addr + vb_offset),\n+\t\t\tcpy_len, 0);\n+\n+\t\tseg_offset += cpy_len;\n+\t\tvb_offset += cpy_len;\n+\t\tseg_avail -= cpy_len;\n+\t\tvb_avail -= cpy_len;\n+\t\tentry_len += cpy_len;\n+\n+\t\tif (seg_avail != 0) {\n+\t\t\t/*\n+\t\t\t * The virtio buffer in this vring\n+\t\t\t * entry reach to its end.\n+\t\t\t * But the segment doesn't complete.\n+\t\t\t */\n+\t\t\tif ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &\n+\t\t\t\tVRING_DESC_F_NEXT) == 0) {\n+\t\t\t\t/* Update used ring with desc information */\n+\t\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].id\n+\t\t\t\t\t= vq->buf_vec[vec_idx].desc_idx;\n+\t\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].len\n+\t\t\t\t\t= entry_len;\n+\t\t\t\tentry_len = 0;\n+\t\t\t\tcur_idx++;\n+\t\t\t\tentry_success++;\n+\t\t\t}\n+\n+\t\t\tvec_idx++;\n+\t\t\tvb_addr = gpa_to_vva(dev,\n+\t\t\t\tvq->buf_vec[vec_idx].buf_addr);\n+\t\t\tvb_offset = 0;\n+\t\t\tvb_avail = vq->buf_vec[vec_idx].buf_len;\n+\t\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n+\t\t} else {\n+\t\t\t/*\n+\t\t\t * This current segment complete, need continue to\n+\t\t\t * check if the whole packet complete or not.\n+\t\t\t */\n+\t\t\tpkt = pkt->pkt.next;\n+\t\t\tif (pkt != NULL) {\n+\t\t\t\t/*\n+\t\t\t\t * There are more segments.\n+\t\t\t\t */\n+\t\t\t\tif (vb_avail == 0) {\n+\t\t\t\t\t/*\n+\t\t\t\t\t * This current buffer from vring is\n+\t\t\t\t\t * used up, need fetch next buffer\n+\t\t\t\t\t * from buf_vec.\n+\t\t\t\t\t */\n+\t\t\t\t\tuint32_t desc_idx =\n+\t\t\t\t\t\tvq->buf_vec[vec_idx].desc_idx;\n+\t\t\t\t\tvq->desc[desc_idx].len = vb_offset;\n+\n+\t\t\t\t\tif ((vq->desc[desc_idx].flags &\n+\t\t\t\t\t\tVRING_DESC_F_NEXT) == 0) {\n+\t\t\t\t\t\tuint16_t wrapped_idx =\n+\t\t\t\t\t\t\tcur_idx & (vq->size - 1);\n+\t\t\t\t\t\t/*\n+\t\t\t\t\t\t * Update used ring with the\n+\t\t\t\t\t\t * descriptor information\n+\t\t\t\t\t\t */\n+\t\t\t\t\t\tvq->used->ring[wrapped_idx].id\n+\t\t\t\t\t\t\t= desc_idx;\n+\t\t\t\t\t\tvq->used->ring[wrapped_idx].len\n+\t\t\t\t\t\t\t= entry_len;\n+\t\t\t\t\t\tentry_success++;\n+\t\t\t\t\t\tentry_len = 0;\n+\t\t\t\t\t\tcur_idx++;\n+\t\t\t\t\t}\n+\n+\t\t\t\t\t/* Get next buffer from buf_vec. */\n+\t\t\t\t\tvec_idx++;\n+\t\t\t\t\tvb_addr = gpa_to_vva(dev,\n+\t\t\t\t\t\tvq->buf_vec[vec_idx].buf_addr);\n+\t\t\t\t\tvb_avail =\n+\t\t\t\t\t\tvq->buf_vec[vec_idx].buf_len;\n+\t\t\t\t\tvb_offset = 0;\n+\t\t\t\t}\n+\n+\t\t\t\tseg_offset = 0;\n+\t\t\t\tseg_avail = rte_pktmbuf_data_len(pkt);\n+\t\t\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n+\t\t\t} else {\n+\t\t\t\t/*\n+\t\t\t\t * This whole packet completes.\n+\t\t\t\t */\n+\t\t\t\tuint32_t desc_idx =\n+\t\t\t\t\tvq->buf_vec[vec_idx].desc_idx;\n+\t\t\t\tvq->desc[desc_idx].len = vb_offset;\n+\n+\t\t\t\twhile (vq->desc[desc_idx].flags &\n+\t\t\t\t\tVRING_DESC_F_NEXT) {\n+\t\t\t\t\tdesc_idx = vq->desc[desc_idx].next;\n+\t\t\t\t\t vq->desc[desc_idx].len = 0;\n+\t\t\t\t}\n+\n+\t\t\t\t/* Update used ring with desc information */\n+\t\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].id\n+\t\t\t\t\t= vq->buf_vec[vec_idx].desc_idx;\n+\t\t\t\tvq->used->ring[cur_idx & (vq->size - 1)].len\n+\t\t\t\t\t= entry_len;\n+\t\t\t\tentry_len = 0;\n+\t\t\t\tcur_idx++;\n+\t\t\t\tentry_success++;\n+\t\t\t\tseg_avail = 0;\n+\t\t\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\treturn entry_success;\n+}\n+\n+/*\n+ * This function adds buffers to the virtio devices RX virtqueue. Buffers can\n+ * be received from the physical port or from another virtio device. A packet\n+ * count is returned to indicate the number of packets that were succesfully\n+ * added to the RX queue. This function works for mergeable RX.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+virtio_dev_merge_rx(struct virtio_net *dev, struct rte_mbuf **pkts,\n+\tuint32_t count)\n+{\n+\tstruct vhost_virtqueue *vq;\n+\tuint32_t pkt_idx = 0, entry_success = 0;\n+\tuint32_t retry = 0;\n+\tuint16_t avail_idx, res_cur_idx;\n+\tuint16_t res_base_idx, res_end_idx;\n+\tuint8_t success = 0;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_merge_rx()\\n\",\n+\t\tdev->device_fh);\n+\tvq = dev->virtqueue[VIRTIO_RXQ];\n+\tcount = RTE_MIN((uint32_t)MAX_PKT_BURST, count);\n+\n+\tif (count == 0)\n+\t\treturn 0;\n+\n+\tfor (pkt_idx = 0; pkt_idx < count; pkt_idx++) {\n+\t\tuint32_t secure_len = 0;\n+\t\tuint16_t need_cnt;\n+\t\tuint32_t vec_idx = 0;\n+\t\tuint32_t pkt_len = pkts[pkt_idx]->pkt.pkt_len + vq->vhost_hlen;\n+\t\tuint16_t i, id;\n+\n+\t\tdo {\n+\t\t\t/*\n+\t\t\t * As many data cores may want access to available\n+\t\t\t * buffers, they need to be reserved.\n+\t\t\t */\n+\t\t\tres_base_idx = vq->last_used_idx_res;\n+\t\t\tres_cur_idx = res_base_idx;\n+\n+\t\t\tdo {\n+\t\t\t\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n+\t\t\t\tif (unlikely(res_cur_idx == avail_idx)) {\n+\t\t\t\t\t/*\n+\t\t\t\t\t * If retry is enabled and the queue is\n+\t\t\t\t\t * full then we wait and retry to avoid\n+\t\t\t\t\t * packet loss.\n+\t\t\t\t\t */\n+\t\t\t\t\tif (enable_retry) {\n+\t\t\t\t\t\tuint8_t cont = 0;\n+\t\t\t\t\t\tfor (retry = 0; retry < burst_rx_retry_num; retry++) {\n+\t\t\t\t\t\t\trte_delay_us(burst_rx_delay_time);\n+\t\t\t\t\t\t\tavail_idx =\n+\t\t\t\t\t\t\t\t*((volatile uint16_t *)&vq->avail->idx);\n+\t\t\t\t\t\t\tif (likely(res_cur_idx != avail_idx)) {\n+\t\t\t\t\t\t\t\tcont = 1;\n+\t\t\t\t\t\t\t\tbreak;\n+\t\t\t\t\t\t\t}\n+\t\t\t\t\t\t}\n+\t\t\t\t\t\tif (cont == 1)\n+\t\t\t\t\t\t\tcontinue;\n+\t\t\t\t\t}\n+\n+\t\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\t\t\"(%\"PRIu64\") Failed \"\n+\t\t\t\t\t\t\"to get enough desc from \"\n+\t\t\t\t\t\t\"vring\\n\",\n+\t\t\t\t\t\tdev->device_fh);\n+\t\t\t\t\treturn pkt_idx;\n+\t\t\t\t} else {\n+\t\t\t\t\tuint16_t wrapped_idx =\n+\t\t\t\t\t\t(res_cur_idx) & (vq->size - 1);\n+\t\t\t\t\tuint32_t idx =\n+\t\t\t\t\t\tvq->avail->ring[wrapped_idx];\n+\t\t\t\t\tuint8_t next_desc;\n+\n+\t\t\t\t\tdo {\n+\t\t\t\t\t\tnext_desc = 0;\n+\t\t\t\t\t\tsecure_len += vq->desc[idx].len;\n+\t\t\t\t\t\tif (vq->desc[idx].flags &\n+\t\t\t\t\t\t\tVRING_DESC_F_NEXT) {\n+\t\t\t\t\t\t\tidx = vq->desc[idx].next;\n+\t\t\t\t\t\t\tnext_desc = 1;\n+\t\t\t\t\t\t}\n+\t\t\t\t\t} while (next_desc);\n+\n+\t\t\t\t\tres_cur_idx++;\n+\t\t\t\t}\n+\t\t\t} while (pkt_len > secure_len);\n+\n+\t\t\t/* vq->last_used_idx_res is atomically updated. */\n+\t\t\tsuccess = rte_atomic16_cmpset(&vq->last_used_idx_res,\n+\t\t\t\t\t\t\tres_base_idx,\n+\t\t\t\t\t\t\tres_cur_idx);\n+\t\t} while (success == 0);\n+\n+\t\tid = res_base_idx;\n+\t\tneed_cnt = res_cur_idx - res_base_idx;\n+\n+\t\tfor (i = 0; i < need_cnt; i++, id++) {\n+\t\t\tuint16_t wrapped_idx = id & (vq->size - 1);\n+\t\t\tuint32_t idx = vq->avail->ring[wrapped_idx];\n+\t\t\tuint8_t next_desc;\n+\t\t\tdo {\n+\t\t\t\tnext_desc = 0;\n+\t\t\t\tvq->buf_vec[vec_idx].buf_addr =\n+\t\t\t\t\tvq->desc[idx].addr;\n+\t\t\t\tvq->buf_vec[vec_idx].buf_len =\n+\t\t\t\t\tvq->desc[idx].len;\n+\t\t\t\tvq->buf_vec[vec_idx].desc_idx = idx;\n+\t\t\t\tvec_idx++;\n+\n+\t\t\t\tif (vq->desc[idx].flags & VRING_DESC_F_NEXT) {\n+\t\t\t\t\tidx = vq->desc[idx].next;\n+\t\t\t\t\tnext_desc = 1;\n+\t\t\t\t}\n+\t\t\t} while (next_desc);\n+\t\t}\n+\n+\t\tres_end_idx = res_cur_idx;\n+\n+\t\tentry_success = copy_from_mbuf_to_vring(dev, res_base_idx,\n+\t\t\tres_end_idx, pkts[pkt_idx]);\n+\n+\t\trte_compiler_barrier();\n+\n+\t\t/*\n+\t\t * Wait until it's our turn to add our buffer\n+\t\t * to the used ring.\n+\t\t */\n+\t\twhile (unlikely(vq->last_used_idx != res_base_idx))\n+\t\t\trte_pause();\n+\n+\t\t*(volatile uint16_t *)&vq->used->idx += entry_success;\n+\t\tvq->last_used_idx = res_end_idx;\n+\n+\t\t/* Kick the guest if necessary. */\n+\t\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\t\teventfd_write((int)vq->kickfd, 1);\n+\t}\n+\n+\treturn count;\n+}\n+\n+/*\n+ * Compares a packet destination MAC address to a device MAC address.\n+ */\n+static inline int __attribute__((always_inline))\n+ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb)\n+{\n+\treturn (((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0);\n+}\n+\n+/*\n+ * This function learns the MAC address of the device and registers this along with a\n+ * vlan tag to a VMDQ.\n+ */\n+static int\n+link_vmdq(struct virtio_net *dev, struct rte_mbuf *m)\n+{\n+\tstruct ether_hdr *pkt_hdr;\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tint i, ret;\n+\n+\t/* Learn MAC address of guest device from packet */\n+\tpkt_hdr = (struct ether_hdr *)m->pkt.data;\n+\n+\tdev_ll = ll_root_used;\n+\n+\twhile (dev_ll != NULL) {\n+\t\tif (ether_addr_cmp(&(pkt_hdr->s_addr), &dev_ll->dev->mac_address)) {\n+\t\t\tRTE_LOG(INFO, VHOST_DATA, \"(%\"PRIu64\") WARNING: This device is using an existing MAC address and has not been registered.\\n\", dev->device_fh);\n+\t\t\treturn -1;\n+\t\t}\n+\t\tdev_ll = dev_ll->next;\n+\t}\n+\n+\tfor (i = 0; i < ETHER_ADDR_LEN; i++)\n+\t\tdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];\n+\n+\t/* vlan_tag currently uses the device_id. */\n+\tdev->vlan_tag = vlan_tags[dev->device_fh];\n+\n+\t/* Print out VMDQ registration info. */\n+\tRTE_LOG(INFO, VHOST_DATA, \"(%\"PRIu64\") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\\n\",\n+\t\tdev->device_fh,\n+\t\tdev->mac_address.addr_bytes[0], dev->mac_address.addr_bytes[1],\n+\t\tdev->mac_address.addr_bytes[2], dev->mac_address.addr_bytes[3],\n+\t\tdev->mac_address.addr_bytes[4], dev->mac_address.addr_bytes[5],\n+\t\tdev->vlan_tag);\n+\n+\t/* Register the MAC address. */\n+\tret = rte_eth_dev_mac_addr_add(ports[0], &dev->mac_address, (uint32_t)dev->device_fh);\n+\tif (ret)\n+\t\tRTE_LOG(ERR, VHOST_DATA, \"(%\"PRIu64\") Failed to add device MAC address to VMDQ\\n\",\n+\t\t\t\t\tdev->device_fh);\n+\n+\t/* Enable stripping of the vlan tag as we handle routing. */\n+\trte_eth_dev_set_vlan_strip_on_queue(ports[0], (uint16_t)dev->vmdq_rx_q, 1);\n+\n+\t/* Set device as ready for RX. */\n+\tdev->ready = DEVICE_RX;\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX\n+ * queue before disabling RX on the device.\n+ */\n+static inline void\n+unlink_vmdq(struct virtio_net *dev)\n+{\n+\tunsigned i = 0;\n+\tunsigned rx_count;\n+\tstruct rte_mbuf *pkts_burst[MAX_PKT_BURST];\n+\n+\tif (dev->ready == DEVICE_RX) {\n+\t\t/*clear MAC and VLAN settings*/\n+\t\trte_eth_dev_mac_addr_remove(ports[0], &dev->mac_address);\n+\t\tfor (i = 0; i < 6; i++)\n+\t\t\tdev->mac_address.addr_bytes[i] = 0;\n+\n+\t\tdev->vlan_tag = 0;\n+\n+\t\t/*Clear out the receive buffers*/\n+\t\trx_count = rte_eth_rx_burst(ports[0],\n+\t\t\t\t\t(uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);\n+\n+\t\twhile (rx_count) {\n+\t\t\tfor (i = 0; i < rx_count; i++)\n+\t\t\t\trte_pktmbuf_free(pkts_burst[i]);\n+\n+\t\t\trx_count = rte_eth_rx_burst(ports[0],\n+\t\t\t\t\t(uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);\n+\t\t}\n+\n+\t\tdev->ready = DEVICE_MAC_LEARNING;\n+\t}\n+}\n+\n+/*\n+ * Check if the packet destination MAC address is for a local device. If so then put\n+ * the packet on that devices RX queue. If not then return.\n+ */\n+static inline unsigned __attribute__((always_inline))\n+virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m)\n+{\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tstruct ether_hdr *pkt_hdr;\n+\tuint64_t ret = 0;\n+\n+\tpkt_hdr = (struct ether_hdr *)m->pkt.data;\n+\n+\t/*get the used devices list*/\n+\tdev_ll = ll_root_used;\n+\n+\twhile (dev_ll != NULL) {\n+\t\tif ((dev_ll->dev->ready == DEVICE_RX) && ether_addr_cmp(&(pkt_hdr->d_addr),\n+\t\t\t\t          &dev_ll->dev->mac_address)) {\n+\n+\t\t\t/* Drop the packet if the TX packet is destined for the TX device. */\n+\t\t\tif (dev_ll->dev->device_fh == dev->device_fh) {\n+\t\t\t\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") TX: Source and destination MAC addresses are the same. Dropping packet.\\n\",\n+\t\t\t\t\t\t\tdev_ll->dev->device_fh);\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\n+\n+\t\t\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") TX: MAC address is local\\n\", dev_ll->dev->device_fh);\n+\n+\t\t\tif (dev_ll->dev->remove) {\n+\t\t\t\t/*drop the packet if the device is marked for removal*/\n+\t\t\t\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Device is marked for removal\\n\", dev_ll->dev->device_fh);\n+\t\t\t} else {\n+\t\t\t\tuint32_t mergeable =\n+\t\t\t\t\tdev_ll->dev->features &\n+\t\t\t\t\t(1 << VIRTIO_NET_F_MRG_RXBUF);\n+\n+\t\t\t\t/*send the packet to the local virtio device*/\n+\t\t\t\tif (likely(mergeable == 0))\n+\t\t\t\t\tret = virtio_dev_rx(dev_ll->dev, &m, 1);\n+\t\t\t\telse\n+\t\t\t\t\tret = virtio_dev_merge_rx(dev_ll->dev,\n+\t\t\t\t\t\t&m, 1);\n+\n+\t\t\t\tif (enable_stats) {\n+\t\t\t\t\trte_atomic64_add(\n+\t\t\t\t\t&dev_statistics[dev_ll->dev->device_fh].rx_total_atomic,\n+\t\t\t\t\t1);\n+\t\t\t\t\trte_atomic64_add(\n+\t\t\t\t\t&dev_statistics[dev_ll->dev->device_fh].rx_atomic,\n+\t\t\t\t\tret);\n+\t\t\t\t\tdev_statistics[dev->device_fh].tx_total++;\n+\t\t\t\t\tdev_statistics[dev->device_fh].tx += ret;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\treturn 0;\n+\t\t}\n+\t\tdev_ll = dev_ll->next;\n+\t}\n+\n+\treturn -1;\n+}\n+\n+/*\n+ * This function routes the TX packet to the correct interface. This may be a local device\n+ * or the physical port.\n+ */\n+static inline void __attribute__((always_inline))\n+virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *mbuf_pool, uint16_t vlan_tag)\n+{\n+\tstruct mbuf_table *tx_q;\n+\tstruct vlan_ethhdr *vlan_hdr;\n+\tstruct rte_mbuf **m_table;\n+\tstruct rte_mbuf *mbuf, *prev;\n+\tunsigned len, ret, offset = 0;\n+\tconst uint16_t lcore_id = rte_lcore_id();\n+\tstruct virtio_net_data_ll *dev_ll = ll_root_used;\n+\tstruct ether_hdr *pkt_hdr = (struct ether_hdr *)m->pkt.data;\n+\n+\t/*check if destination is local VM*/\n+\tif ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(dev, m) == 0))\n+\t\treturn;\n+\n+\tif (vm2vm_mode == VM2VM_HARDWARE) {\n+\t\twhile (dev_ll != NULL) {\n+\t\t\tif ((dev_ll->dev->ready == DEVICE_RX)\n+\t\t\t\t&& ether_addr_cmp(&(pkt_hdr->d_addr),\n+\t\t\t\t&dev_ll->dev->mac_address)) {\n+\t\t\t\t/*\n+\t\t\t\t * Drop the packet if the TX packet is\n+\t\t\t\t * destined for the TX device.\n+\t\t\t\t */\n+\t\t\t\tif (dev_ll->dev->device_fh == dev->device_fh) {\n+\t\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\t\"(%\"PRIu64\") TX: Source and destination\"\n+\t\t\t\t\t\" MAC addresses are the same. Dropping \"\n+\t\t\t\t\t\"packet.\\n\",\n+\t\t\t\t\tdev_ll->dev->device_fh);\n+\t\t\t\t\treturn;\n+\t\t\t\t}\n+\t\t\t\toffset = 4;\n+\t\t\t\tvlan_tag =\n+\t\t\t\t(uint16_t)\n+\t\t\t\tvlan_tags[(uint16_t)dev_ll->dev->device_fh];\n+\n+\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") TX: pkt to local VM device id:\"\n+\t\t\t\t\"(%\"PRIu64\") vlan tag: %d.\\n\",\n+\t\t\t\tdev->device_fh, dev_ll->dev->device_fh,\n+\t\t\t\tvlan_tag);\n+\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t}\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") TX: MAC address is external\\n\", dev->device_fh);\n+\n+\t/*Add packet to the port tx queue*/\n+\ttx_q = &lcore_tx_queue[lcore_id];\n+\tlen = tx_q->len;\n+\n+\t/* Allocate an mbuf and populate the structure. */\n+\tmbuf = rte_pktmbuf_alloc(mbuf_pool);\n+\tif (unlikely(mbuf == NULL)) {\n+\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\"Failed to allocate memory for mbuf.\\n\");\n+\t\treturn;\n+\t}\n+\n+\tmbuf->pkt.data_len = m->pkt.data_len + VLAN_HLEN + offset;\n+\tmbuf->pkt.pkt_len = m->pkt.pkt_len + VLAN_HLEN + offset;\n+\tmbuf->pkt.nb_segs = m->pkt.nb_segs;\n+\n+\t/* Copy ethernet header to mbuf. */\n+\trte_memcpy((void*)mbuf->pkt.data, (const void*)m->pkt.data, ETH_HLEN);\n+\n+\n+\t/* Setup vlan header. Bytes need to be re-ordered for network with htons()*/\n+\tvlan_hdr = (struct vlan_ethhdr *) mbuf->pkt.data;\n+\tvlan_hdr->h_vlan_encapsulated_proto = vlan_hdr->h_vlan_proto;\n+\tvlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);\n+\tvlan_hdr->h_vlan_TCI = htons(vlan_tag);\n+\n+\t/* Copy the remaining packet contents to the mbuf. */\n+\trte_memcpy((void*) ((uint8_t*)mbuf->pkt.data + VLAN_ETH_HLEN),\n+\t\t(const void*) ((uint8_t*)m->pkt.data + ETH_HLEN), (m->pkt.data_len - ETH_HLEN));\n+\n+\t/* Copy the remaining segments for the whole packet. */\n+\tprev = mbuf;\n+\twhile (m->pkt.next) {\n+\t\t/* Allocate an mbuf and populate the structure. */\n+\t\tstruct rte_mbuf *next_mbuf = rte_pktmbuf_alloc(mbuf_pool);\n+\t\tif (unlikely(next_mbuf == NULL)) {\n+\t\t\trte_pktmbuf_free(mbuf);\n+\t\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\t\"Failed to allocate memory for mbuf.\\n\");\n+\t\t\treturn;\n+\t\t}\n+\n+\t\tm = m->pkt.next;\n+\t\tprev->pkt.next = next_mbuf;\n+\t\tprev = next_mbuf;\n+\t\tnext_mbuf->pkt.data_len = m->pkt.data_len;\n+\n+\t\t/* Copy data to next mbuf. */\n+\t\trte_memcpy(rte_pktmbuf_mtod(next_mbuf, void *),\n+\t\t\trte_pktmbuf_mtod(m, const void *), m->pkt.data_len);\n+\t}\n+\n+\ttx_q->m_table[len] = mbuf;\n+\tlen++;\n+\tif (enable_stats) {\n+\t\tdev_statistics[dev->device_fh].tx_total++;\n+\t\tdev_statistics[dev->device_fh].tx++;\n+\t}\n+\n+\tif (unlikely(len == MAX_PKT_BURST)) {\n+\t\tm_table = (struct rte_mbuf **)tx_q->m_table;\n+\t\tret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);\n+\t\t/* Free any buffers not handled by TX and update the port stats. */\n+\t\tif (unlikely(ret < len)) {\n+\t\t\tdo {\n+\t\t\t\trte_pktmbuf_free(m_table[ret]);\n+\t\t\t} while (++ret < len);\n+\t\t}\n+\n+\t\tlen = 0;\n+\t}\n+\n+\ttx_q->len = len;\n+\treturn;\n+}\n+\n+static inline void __attribute__((always_inline))\n+virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool)\n+{\n+\tstruct rte_mbuf m;\n+\tstruct vhost_virtqueue *vq;\n+\tstruct vring_desc *desc;\n+\tuint64_t buff_addr = 0;\n+\tuint32_t head[MAX_PKT_BURST];\n+\tuint32_t used_idx;\n+\tuint32_t i;\n+\tuint16_t free_entries, packet_success = 0;\n+\tuint16_t avail_idx;\n+\n+\tvq = dev->virtqueue[VIRTIO_TXQ];\n+\tavail_idx =  *((volatile uint16_t *)&vq->avail->idx);\n+\n+\t/* If there are no available buffers then return. */\n+\tif (vq->last_used_idx == avail_idx)\n+\t\treturn;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_tx()\\n\", dev->device_fh);\n+\n+\t/* Prefetch available ring to retrieve head indexes. */\n+\trte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);\n+\n+\t/*get the number of free entries in the ring*/\n+\tfree_entries = (avail_idx - vq->last_used_idx);\n+\n+\t/* Limit to MAX_PKT_BURST. */\n+\tif (free_entries > MAX_PKT_BURST)\n+\t\tfree_entries = MAX_PKT_BURST;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Buffers available %d\\n\", dev->device_fh, free_entries);\n+\t/* Retrieve all of the head indexes first to avoid caching issues. */\n+\tfor (i = 0; i < free_entries; i++)\n+\t\thead[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];\n+\n+\t/* Prefetch descriptor index. */\n+\trte_prefetch0(&vq->desc[head[packet_success]]);\n+\trte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);\n+\n+\twhile (packet_success < free_entries) {\n+\t\tdesc = &vq->desc[head[packet_success]];\n+\n+\t\t/* Discard first buffer as it is the virtio header */\n+\t\tdesc = &vq->desc[desc->next];\n+\n+\t\t/* Buffer address translation. */\n+\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n+\t\t/* Prefetch buffer address. */\n+\t\trte_prefetch0((void*)(uintptr_t)buff_addr);\n+\n+\t\tused_idx = vq->last_used_idx & (vq->size - 1);\n+\n+\t\tif (packet_success < (free_entries - 1)) {\n+\t\t\t/* Prefetch descriptor index. */\n+\t\t\trte_prefetch0(&vq->desc[head[packet_success+1]]);\n+\t\t\trte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);\n+\t\t}\n+\n+\t\t/* Update used index buffer information. */\n+\t\tvq->used->ring[used_idx].id = head[packet_success];\n+\t\tvq->used->ring[used_idx].len = 0;\n+\n+\t\t/* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */\n+\t\tm.pkt.data_len = desc->len;\n+\t\tm.pkt.pkt_len = desc->len;\n+\t\tm.pkt.data = (void*)(uintptr_t)buff_addr;\n+\n+\t\tPRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);\n+\n+\t\t/* If this is the first received packet we need to learn the MAC and setup VMDQ */\n+\t\tif (dev->ready == DEVICE_MAC_LEARNING) {\n+\t\t\tif (dev->remove || (link_vmdq(dev, &m) == -1)) {\n+\t\t\t\t/*discard frame if device is scheduled for removal or a duplicate MAC address is found. */\n+\t\t\t\tpacket_success += free_entries;\n+\t\t\t\tvq->last_used_idx += packet_success;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\t\tvirtio_tx_route(dev, &m, mbuf_pool, (uint16_t)dev->device_fh);\n+\n+\t\tvq->last_used_idx++;\n+\t\tpacket_success++;\n+\t}\n+\n+\trte_compiler_barrier();\n+\tvq->used->idx += packet_success;\n+\t/* Kick guest if required. */\n+\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\teventfd_write((int)vq->kickfd, 1);\n+}\n+\n+/* This function works for TX packets with mergeable feature enabled. */\n+static inline void __attribute__((always_inline))\n+virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool)\n+{\n+\tstruct rte_mbuf *m, *prev;\n+\tstruct vhost_virtqueue *vq;\n+\tstruct vring_desc *desc;\n+\tuint64_t vb_addr = 0;\n+\tuint32_t head[MAX_PKT_BURST];\n+\tuint32_t used_idx;\n+\tuint32_t i;\n+\tuint16_t free_entries, entry_success = 0;\n+\tuint16_t avail_idx;\n+\tuint32_t buf_size = MBUF_SIZE - (sizeof(struct rte_mbuf)\n+\t\t\t+ RTE_PKTMBUF_HEADROOM);\n+\n+\tvq = dev->virtqueue[VIRTIO_TXQ];\n+\tavail_idx =  *((volatile uint16_t *)&vq->avail->idx);\n+\n+\t/* If there are no available buffers then return. */\n+\tif (vq->last_used_idx == avail_idx)\n+\t\treturn;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_merge_tx()\\n\",\n+\t\tdev->device_fh);\n+\n+\t/* Prefetch available ring to retrieve head indexes. */\n+\trte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);\n+\n+\t/*get the number of free entries in the ring*/\n+\tfree_entries = (avail_idx - vq->last_used_idx);\n+\n+\t/* Limit to MAX_PKT_BURST. */\n+\tfree_entries = RTE_MIN(free_entries, MAX_PKT_BURST);\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Buffers available %d\\n\",\n+\t\tdev->device_fh, free_entries);\n+\t/* Retrieve all of the head indexes first to avoid caching issues. */\n+\tfor (i = 0; i < free_entries; i++)\n+\t\thead[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];\n+\n+\t/* Prefetch descriptor index. */\n+\trte_prefetch0(&vq->desc[head[entry_success]]);\n+\trte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);\n+\n+\twhile (entry_success < free_entries) {\n+\t\tuint32_t vb_avail, vb_offset;\n+\t\tuint32_t seg_avail, seg_offset;\n+\t\tuint32_t cpy_len;\n+\t\tuint32_t seg_num = 0;\n+\t\tstruct rte_mbuf *cur;\n+\t\tuint8_t alloc_err = 0;\n+\n+\t\tdesc = &vq->desc[head[entry_success]];\n+\n+\t\t/* Discard first buffer as it is the virtio header */\n+\t\tdesc = &vq->desc[desc->next];\n+\n+\t\t/* Buffer address translation. */\n+\t\tvb_addr = gpa_to_vva(dev, desc->addr);\n+\t\t/* Prefetch buffer address. */\n+\t\trte_prefetch0((void *)(uintptr_t)vb_addr);\n+\n+\t\tused_idx = vq->last_used_idx & (vq->size - 1);\n+\n+\t\tif (entry_success < (free_entries - 1)) {\n+\t\t\t/* Prefetch descriptor index. */\n+\t\t\trte_prefetch0(&vq->desc[head[entry_success+1]]);\n+\t\t\trte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);\n+\t\t}\n+\n+\t\t/* Update used index buffer information. */\n+\t\tvq->used->ring[used_idx].id = head[entry_success];\n+\t\tvq->used->ring[used_idx].len = 0;\n+\n+\t\tvb_offset = 0;\n+\t\tvb_avail = desc->len;\n+\t\tseg_offset = 0;\n+\t\tseg_avail = buf_size;\n+\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n+\n+\t\tPRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);\n+\n+\t\t/* Allocate an mbuf and populate the structure. */\n+\t\tm = rte_pktmbuf_alloc(mbuf_pool);\n+\t\tif (unlikely(m == NULL)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\t\"Failed to allocate memory for mbuf.\\n\");\n+\t\t\treturn;\n+\t\t}\n+\n+\t\tseg_num++;\n+\t\tcur = m;\n+\t\tprev = m;\n+\t\twhile (cpy_len != 0) {\n+\t\t\trte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),\n+\t\t\t\t(void *)((uintptr_t)(vb_addr + vb_offset)),\n+\t\t\t\tcpy_len);\n+\n+\t\t\tseg_offset += cpy_len;\n+\t\t\tvb_offset += cpy_len;\n+\t\t\tvb_avail -= cpy_len;\n+\t\t\tseg_avail -= cpy_len;\n+\n+\t\t\tif (vb_avail != 0) {\n+\t\t\t\t/*\n+\t\t\t\t * The segment reachs to its end,\n+\t\t\t\t * while the virtio buffer in TX vring has\n+\t\t\t\t * more data to be copied.\n+\t\t\t\t */\n+\t\t\t\tcur->pkt.data_len = seg_offset;\n+\t\t\t\tm->pkt.pkt_len += seg_offset;\n+\t\t\t\t/* Allocate mbuf and populate the structure. */\n+\t\t\t\tcur = rte_pktmbuf_alloc(mbuf_pool);\n+\t\t\t\tif (unlikely(cur == NULL)) {\n+\t\t\t\t\tRTE_LOG(ERR, VHOST_DATA, \"Failed to \"\n+\t\t\t\t\t\t\"allocate memory for mbuf.\\n\");\n+\t\t\t\t\trte_pktmbuf_free(m);\n+\t\t\t\t\talloc_err = 1;\n+\t\t\t\t\tbreak;\n+\t\t\t\t}\n+\n+\t\t\t\tseg_num++;\n+\t\t\t\tprev->pkt.next = cur;\n+\t\t\t\tprev = cur;\n+\t\t\t\tseg_offset = 0;\n+\t\t\t\tseg_avail = buf_size;\n+\t\t\t} else {\n+\t\t\t\tif (desc->flags & VRING_DESC_F_NEXT) {\n+\t\t\t\t\t/*\n+\t\t\t\t\t * There are more virtio buffers in\n+\t\t\t\t\t * same vring entry need to be copied.\n+\t\t\t\t\t */\n+\t\t\t\t\tif (seg_avail == 0) {\n+\t\t\t\t\t\t/*\n+\t\t\t\t\t\t * The current segment hasn't\n+\t\t\t\t\t\t * room to accomodate more\n+\t\t\t\t\t\t * data.\n+\t\t\t\t\t\t */\n+\t\t\t\t\t\tcur->pkt.data_len = seg_offset;\n+\t\t\t\t\t\tm->pkt.pkt_len += seg_offset;\n+\t\t\t\t\t\t/*\n+\t\t\t\t\t\t * Allocate an mbuf and\n+\t\t\t\t\t\t * populate the structure.\n+\t\t\t\t\t\t */\n+\t\t\t\t\t\tcur = rte_pktmbuf_alloc(mbuf_pool);\n+\t\t\t\t\t\tif (unlikely(cur == NULL)) {\n+\t\t\t\t\t\t\tRTE_LOG(ERR,\n+\t\t\t\t\t\t\t\tVHOST_DATA,\n+\t\t\t\t\t\t\t\t\"Failed to \"\n+\t\t\t\t\t\t\t\t\"allocate memory \"\n+\t\t\t\t\t\t\t\t\"for mbuf\\n\");\n+\t\t\t\t\t\t\trte_pktmbuf_free(m);\n+\t\t\t\t\t\t\talloc_err = 1;\n+\t\t\t\t\t\t\tbreak;\n+\t\t\t\t\t\t}\n+\t\t\t\t\t\tseg_num++;\n+\t\t\t\t\t\tprev->pkt.next = cur;\n+\t\t\t\t\t\tprev = cur;\n+\t\t\t\t\t\tseg_offset = 0;\n+\t\t\t\t\t\tseg_avail = buf_size;\n+\t\t\t\t\t}\n+\n+\t\t\t\t\tdesc = &vq->desc[desc->next];\n+\n+\t\t\t\t\t/* Buffer address translation. */\n+\t\t\t\t\tvb_addr = gpa_to_vva(dev, desc->addr);\n+\t\t\t\t\t/* Prefetch buffer address. */\n+\t\t\t\t\trte_prefetch0((void *)(uintptr_t)vb_addr);\n+\t\t\t\t\tvb_offset = 0;\n+\t\t\t\t\tvb_avail = desc->len;\n+\n+\t\t\t\t\tPRINT_PACKET(dev, (uintptr_t)vb_addr,\n+\t\t\t\t\t\tdesc->len, 0);\n+\t\t\t\t} else {\n+\t\t\t\t\t/* The whole packet completes. */\n+\t\t\t\t\tcur->pkt.data_len = seg_offset;\n+\t\t\t\t\tm->pkt.pkt_len += seg_offset;\n+\t\t\t\t\tvb_avail = 0;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\tcpy_len = RTE_MIN(vb_avail, seg_avail);\n+\t\t}\n+\n+\t\tif (unlikely(alloc_err == 1))\n+\t\t\tbreak;\n+\n+\t\tm->pkt.nb_segs = seg_num;\n+\n+\t\t/*\n+\t\t * If this is the first received packet we need to learn\n+\t\t * the MAC and setup VMDQ\n+\t\t */\n+\t\tif (dev->ready == DEVICE_MAC_LEARNING) {\n+\t\t\tif (dev->remove || (link_vmdq(dev, m) == -1)) {\n+\t\t\t\t/*\n+\t\t\t\t * Discard frame if device is scheduled for\n+\t\t\t\t * removal or a duplicate MAC address is found.\n+\t\t\t\t */\n+\t\t\t\tentry_success = free_entries;\n+\t\t\t\tvq->last_used_idx += entry_success;\n+\t\t\t\trte_pktmbuf_free(m);\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\n+\t\tvirtio_tx_route(dev, m, mbuf_pool, (uint16_t)dev->device_fh);\n+\t\tvq->last_used_idx++;\n+\t\tentry_success++;\n+\t\trte_pktmbuf_free(m);\n+\t}\n+\n+\trte_compiler_barrier();\n+\tvq->used->idx += entry_success;\n+\t/* Kick guest if required. */\n+\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\teventfd_write((int)vq->kickfd, 1);\n+\n+}\n+\n+/*\n+ * This function is called by each data core. It handles all RX/TX registered with the\n+ * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared\n+ * with all devices in the main linked list.\n+ */\n+static int\n+switch_worker(__attribute__((unused)) void *arg)\n+{\n+\tstruct rte_mempool *mbuf_pool = arg;\n+\tstruct virtio_net *dev = NULL;\n+\tstruct rte_mbuf *pkts_burst[MAX_PKT_BURST];\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tstruct mbuf_table *tx_q;\n+\tvolatile struct lcore_ll_info *lcore_ll;\n+\tconst uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;\n+\tuint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;\n+\tunsigned ret, i;\n+\tconst uint16_t lcore_id = rte_lcore_id();\n+\tconst uint16_t num_cores = (uint16_t)rte_lcore_count();\n+\tuint16_t rx_count = 0;\n+\tuint32_t mergeable = 0;\n+\n+\tRTE_LOG(INFO, VHOST_DATA, \"Procesing on Core %u started\\n\", lcore_id);\n+\tlcore_ll = lcore_info[lcore_id].lcore_ll;\n+\tprev_tsc = 0;\n+\n+\ttx_q = &lcore_tx_queue[lcore_id];\n+\tfor (i = 0; i < num_cores; i ++) {\n+\t\tif (lcore_ids[i] == lcore_id) {\n+\t\t\ttx_q->txq_id = i;\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\n+\twhile(1) {\n+\t\tcur_tsc = rte_rdtsc();\n+\t\t/*\n+\t\t * TX burst queue drain\n+\t\t */\n+\t\tdiff_tsc = cur_tsc - prev_tsc;\n+\t\tif (unlikely(diff_tsc > drain_tsc)) {\n+\n+\t\t\tif (tx_q->len) {\n+\t\t\t\tLOG_DEBUG(VHOST_DATA, \"TX queue drained after timeout with burst size %u \\n\", tx_q->len);\n+\n+\t\t\t\t/*Tx any packets in the queue*/\n+\t\t\t\tret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,\n+\t\t\t\t\t\t\t\t\t   (struct rte_mbuf **)tx_q->m_table,\n+\t\t\t\t\t\t\t\t\t   (uint16_t)tx_q->len);\n+\t\t\t\tif (unlikely(ret < tx_q->len)) {\n+\t\t\t\t\tdo {\n+\t\t\t\t\t\trte_pktmbuf_free(tx_q->m_table[ret]);\n+\t\t\t\t\t} while (++ret < tx_q->len);\n+\t\t\t\t}\n+\n+\t\t\t\ttx_q->len = 0;\n+\t\t\t}\n+\n+\t\t\tprev_tsc = cur_tsc;\n+\n+\t\t}\n+\n+\t\trte_prefetch0(lcore_ll->ll_root_used);\n+\t\t/*\n+\t\t * Inform the configuration core that we have exited the linked list and that no devices are\n+\t\t * in use if requested.\n+\t\t */\n+\t\tif (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)\n+\t\t\tlcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;\n+\n+\t\t/*\n+\t\t * Process devices\n+\t\t */\n+\t\tdev_ll = lcore_ll->ll_root_used;\n+\n+\t\twhile (dev_ll != NULL) {\n+\t\t\t/*get virtio device ID*/\n+\t\t\tdev = dev_ll->dev;\n+\t\t\tmergeable =\n+\t\t\t\tdev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);\n+\n+\t\t\tif (dev->remove) {\n+\t\t\t\tdev_ll = dev_ll->next;\n+\t\t\t\tunlink_vmdq(dev);\n+\t\t\t\tdev->ready = DEVICE_SAFE_REMOVE;\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t\tif (likely(dev->ready == DEVICE_RX)) {\n+\t\t\t\t/*Handle guest RX*/\n+\t\t\t\trx_count = rte_eth_rx_burst(ports[0],\n+\t\t\t\t\t(uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);\n+\n+\t\t\t\tif (rx_count) {\n+\t\t\t\t\tif (likely(mergeable == 0))\n+\t\t\t\t\t\tret_count =\n+\t\t\t\t\t\t\tvirtio_dev_rx(dev,\n+\t\t\t\t\t\t\tpkts_burst, rx_count);\n+\t\t\t\t\telse\n+\t\t\t\t\t\tret_count =\n+\t\t\t\t\t\t\tvirtio_dev_merge_rx(dev,\n+\t\t\t\t\t\t\tpkts_burst, rx_count);\n+\n+\t\t\t\t\tif (enable_stats) {\n+\t\t\t\t\t\trte_atomic64_add(\n+\t\t\t\t\t\t&dev_statistics[dev_ll->dev->device_fh].rx_total_atomic,\n+\t\t\t\t\t\trx_count);\n+\t\t\t\t\t\trte_atomic64_add(\n+\t\t\t\t\t\t&dev_statistics[dev_ll->dev->device_fh].rx_atomic, ret_count);\n+\t\t\t\t\t}\n+\t\t\t\t\twhile (likely(rx_count)) {\n+\t\t\t\t\t\trx_count--;\n+\t\t\t\t\t\trte_pktmbuf_free(pkts_burst[rx_count]);\n+\t\t\t\t\t}\n+\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\tif (!dev->remove) {\n+\t\t\t\t/*Handle guest TX*/\n+\t\t\t\tif (likely(mergeable == 0))\n+\t\t\t\t\tvirtio_dev_tx(dev, mbuf_pool);\n+\t\t\t\telse\n+\t\t\t\t\tvirtio_dev_merge_tx(dev, mbuf_pool);\n+\t\t\t}\n+\n+\t\t\t/*move to the next device in the list*/\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t}\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * This function gets available ring number for zero copy rx.\n+ * Only one thread will call this funciton for a paticular virtio device,\n+ * so, it is designed as non-thread-safe function.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+get_available_ring_num_zcp(struct virtio_net *dev)\n+{\n+\tstruct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ];\n+\tuint16_t avail_idx;\n+\n+\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n+\treturn (uint32_t)(avail_idx - vq->last_used_idx_res);\n+}\n+\n+/*\n+ * This function gets available ring index for zero copy rx,\n+ * it will retry 'burst_rx_retry_num' times till it get enough ring index.\n+ * Only one thread will call this funciton for a paticular virtio device,\n+ * so, it is designed as non-thread-safe function.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+get_available_ring_index_zcp(struct virtio_net *dev,\n+\tuint16_t *res_base_idx, uint32_t count)\n+{\n+\tstruct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ];\n+\tuint16_t avail_idx;\n+\tuint32_t retry = 0;\n+\tuint16_t free_entries;\n+\n+\t*res_base_idx = vq->last_used_idx_res;\n+\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n+\tfree_entries = (avail_idx - *res_base_idx);\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") in get_available_ring_index_zcp: \"\n+\t\t\t\"avail idx: %d, \"\n+\t\t\t\"res base idx:%d, free entries:%d\\n\",\n+\t\t\tdev->device_fh, avail_idx, *res_base_idx,\n+\t\t\tfree_entries);\n+\n+\t/*\n+\t * If retry is enabled and the queue is full then we wait\n+\t * and retry to avoid packet loss.\n+\t */\n+\tif (enable_retry && unlikely(count > free_entries)) {\n+\t\tfor (retry = 0; retry < burst_rx_retry_num; retry++) {\n+\t\t\trte_delay_us(burst_rx_delay_time);\n+\t\t\tavail_idx = *((volatile uint16_t *)&vq->avail->idx);\n+\t\t\tfree_entries = (avail_idx - *res_base_idx);\n+\t\t\tif (count <= free_entries)\n+\t\t\t\tbreak;\n+\t\t}\n+\t}\n+\n+\t/*check that we have enough buffers*/\n+\tif (unlikely(count > free_entries))\n+\t\tcount = free_entries;\n+\n+\tif (unlikely(count == 0)) {\n+\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") Fail in get_available_ring_index_zcp: \"\n+\t\t\t\"avail idx: %d, res base idx:%d, free entries:%d\\n\",\n+\t\t\tdev->device_fh, avail_idx,\n+\t\t\t*res_base_idx, free_entries);\n+\t\treturn 0;\n+\t}\n+\n+\tvq->last_used_idx_res = *res_base_idx + count;\n+\n+\treturn count;\n+}\n+\n+/*\n+ * This function put descriptor back to used list.\n+ */\n+static inline void __attribute__((always_inline))\n+put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, uint16_t desc_idx)\n+{\n+\tuint16_t res_cur_idx = vq->last_used_idx;\n+\tvq->used->ring[res_cur_idx & (vq->size - 1)].id = (uint32_t)desc_idx;\n+\tvq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;\n+\trte_compiler_barrier();\n+\t*(volatile uint16_t *)&vq->used->idx += 1;\n+\tvq->last_used_idx += 1;\n+\n+\t/* Kick the guest if necessary. */\n+\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\teventfd_write((int)vq->kickfd, 1);\n+}\n+\n+/*\n+ * This function get available descriptor from vitio vring and un-attached mbuf\n+ * from vpool->ring, and then attach them together. It needs adjust the offset\n+ * for buff_addr and phys_addr accroding to PMD implementation, otherwise the\n+ * frame data may be put to wrong location in mbuf.\n+ */\n+static inline void __attribute__((always_inline))\n+attach_rxmbuf_zcp(struct virtio_net *dev)\n+{\n+\tuint16_t res_base_idx, desc_idx;\n+\tuint64_t buff_addr, phys_addr;\n+\tstruct vhost_virtqueue *vq;\n+\tstruct vring_desc *desc;\n+\tstruct rte_mbuf *mbuf = NULL;\n+\tstruct vpool *vpool;\n+\thpa_type addr_type;\n+\n+\tvpool = &vpool_array[dev->vmdq_rx_q];\n+\tvq = dev->virtqueue[VIRTIO_RXQ];\n+\n+\tdo {\n+\t\tif (unlikely(get_available_ring_index_zcp(dev, &res_base_idx,\n+\t\t\t\t1) != 1))\n+\t\t\treturn;\n+\t\tdesc_idx = vq->avail->ring[(res_base_idx) & (vq->size - 1)];\n+\n+\t\tdesc = &vq->desc[desc_idx];\n+\t\tif (desc->flags & VRING_DESC_F_NEXT) {\n+\t\t\tdesc = &vq->desc[desc->next];\n+\t\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n+\t\t\tphys_addr = gpa_to_hpa(dev, desc->addr, desc->len,\n+\t\t\t\t\t&addr_type);\n+\t\t} else {\n+\t\t\tbuff_addr = gpa_to_vva(dev,\n+\t\t\t\t\tdesc->addr + vq->vhost_hlen);\n+\t\t\tphys_addr = gpa_to_hpa(dev,\n+\t\t\t\t\tdesc->addr + vq->vhost_hlen,\n+\t\t\t\t\tdesc->len, &addr_type);\n+\t\t}\n+\n+\t\tif (unlikely(addr_type == PHYS_ADDR_INVALID)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA, \"(%\"PRIu64\") Invalid frame buffer\"\n+\t\t\t\t\" address found when attaching RX frame buffer\"\n+\t\t\t\t\" address!\\n\", dev->device_fh);\n+\t\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\t/*\n+\t\t * Check if the frame buffer address from guest crosses\n+\t\t * sub-region or not.\n+\t\t */\n+\t\tif (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") Frame buffer address cross \"\n+\t\t\t\t\"sub-regioin found when attaching RX frame \"\n+\t\t\t\t\"buffer address!\\n\",\n+\t\t\t\tdev->device_fh);\n+\t\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\t\tcontinue;\n+\t\t}\n+\t} while (unlikely(phys_addr == 0));\n+\n+\trte_ring_sc_dequeue(vpool->ring, (void **)&mbuf);\n+\tif (unlikely(mbuf == NULL)) {\n+\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") in attach_rxmbuf_zcp: \"\n+\t\t\t\"ring_sc_dequeue fail.\\n\",\n+\t\t\tdev->device_fh);\n+\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\treturn;\n+\t}\n+\n+\tif (unlikely(vpool->buf_size > desc->len)) {\n+\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") in attach_rxmbuf_zcp: frame buffer \"\n+\t\t\t\"length(%d) of descriptor idx: %d less than room \"\n+\t\t\t\"size required: %d\\n\",\n+\t\t\tdev->device_fh, desc->len, desc_idx, vpool->buf_size);\n+\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\trte_ring_sp_enqueue(vpool->ring, (void *)mbuf);\n+\t\treturn;\n+\t}\n+\n+\tmbuf->buf_addr = (void *)(uintptr_t)(buff_addr - RTE_PKTMBUF_HEADROOM);\n+\tmbuf->pkt.data = (void *)(uintptr_t)(buff_addr);\n+\tmbuf->buf_physaddr = phys_addr - RTE_PKTMBUF_HEADROOM;\n+\tmbuf->pkt.data_len = desc->len;\n+\tMBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx;\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in attach_rxmbuf_zcp: res base idx:%d, \"\n+\t\t\"descriptor idx:%d\\n\",\n+\t\tdev->device_fh, res_base_idx, desc_idx);\n+\n+\t__rte_mbuf_raw_free(mbuf);\n+\n+\treturn;\n+}\n+\n+/*\n+ * Detach an attched packet mbuf -\n+ *  - restore original mbuf address and length values.\n+ *  - reset pktmbuf data and data_len to their default values.\n+ *  All other fields of the given packet mbuf will be left intact.\n+ *\n+ * @param m\n+ *   The attached packet mbuf.\n+ */\n+static inline void pktmbuf_detach_zcp(struct rte_mbuf *m)\n+{\n+\tconst struct rte_mempool *mp = m->pool;\n+\tvoid *buf = RTE_MBUF_TO_BADDR(m);\n+\tuint32_t buf_ofs;\n+\tuint32_t buf_len = mp->elt_size - sizeof(*m);\n+\tm->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof(*m);\n+\n+\tm->buf_addr = buf;\n+\tm->buf_len = (uint16_t)buf_len;\n+\n+\tbuf_ofs = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?\n+\t\t\tRTE_PKTMBUF_HEADROOM : m->buf_len;\n+\tm->pkt.data = (char *) m->buf_addr + buf_ofs;\n+\n+\tm->pkt.data_len = 0;\n+}\n+\n+/*\n+ * This function is called after packets have been transimited. It fetchs mbuf\n+ * from vpool->pool, detached it and put into vpool->ring. It also update the\n+ * used index and kick the guest if necessary.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool)\n+{\n+\tstruct rte_mbuf *mbuf;\n+\tstruct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ];\n+\tuint32_t used_idx = vq->last_used_idx & (vq->size - 1);\n+\tuint32_t index = 0;\n+\tuint32_t mbuf_count = rte_mempool_count(vpool->pool);\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: mbuf count in mempool before \"\n+\t\t\"clean is: %d\\n\",\n+\t\tdev->device_fh, mbuf_count);\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: mbuf count in  ring before \"\n+\t\t\"clean  is : %d\\n\",\n+\t\tdev->device_fh, rte_ring_count(vpool->ring));\n+\n+\tfor (index = 0; index < mbuf_count; index++) {\n+\t\tmbuf = __rte_mbuf_raw_alloc(vpool->pool);\n+\t\tif (likely(RTE_MBUF_INDIRECT(mbuf)))\n+\t\t\tpktmbuf_detach_zcp(mbuf);\n+\t\trte_ring_sp_enqueue(vpool->ring, mbuf);\n+\n+\t\t/* Update used index buffer information. */\n+\t\tvq->used->ring[used_idx].id = MBUF_HEADROOM_UINT32(mbuf);\n+\t\tvq->used->ring[used_idx].len = 0;\n+\n+\t\tused_idx = (used_idx + 1) & (vq->size - 1);\n+\t}\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: mbuf count in mempool after \"\n+\t\t\"clean is: %d\\n\",\n+\t\tdev->device_fh, rte_mempool_count(vpool->pool));\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: mbuf count in  ring after \"\n+\t\t\"clean  is : %d\\n\",\n+\t\tdev->device_fh, rte_ring_count(vpool->ring));\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: before updated \"\n+\t\t\"vq->last_used_idx:%d\\n\",\n+\t\tdev->device_fh, vq->last_used_idx);\n+\n+\tvq->last_used_idx += mbuf_count;\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in txmbuf_clean_zcp: after updated \"\n+\t\t\"vq->last_used_idx:%d\\n\",\n+\t\tdev->device_fh, vq->last_used_idx);\n+\n+\trte_compiler_barrier();\n+\n+\t*(volatile uint16_t *)&vq->used->idx += mbuf_count;\n+\n+\t/* Kick guest if required. */\n+\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\teventfd_write((int)vq->kickfd, 1);\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * This function is called when a virtio device is destroy.\n+ * It fetchs mbuf from vpool->pool, and detached it, and put into vpool->ring.\n+ */\n+static void mbuf_destroy_zcp(struct vpool *vpool)\n+{\n+\tstruct rte_mbuf *mbuf = NULL;\n+\tuint32_t index, mbuf_count = rte_mempool_count(vpool->pool);\n+\n+\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\"in mbuf_destroy_zcp: mbuf count in mempool before \"\n+\t\t\"mbuf_destroy_zcp is: %d\\n\",\n+\t\tmbuf_count);\n+\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\"in mbuf_destroy_zcp: mbuf count in  ring before \"\n+\t\t\"mbuf_destroy_zcp  is : %d\\n\",\n+\t\trte_ring_count(vpool->ring));\n+\n+\tfor (index = 0; index < mbuf_count; index++) {\n+\t\tmbuf = __rte_mbuf_raw_alloc(vpool->pool);\n+\t\tif (likely(mbuf != NULL)) {\n+\t\t\tif (likely(RTE_MBUF_INDIRECT(mbuf)))\n+\t\t\t\tpktmbuf_detach_zcp(mbuf);\n+\t\t\trte_ring_sp_enqueue(vpool->ring, (void *)mbuf);\n+\t\t}\n+\t}\n+\n+\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\"in mbuf_destroy_zcp: mbuf count in mempool after \"\n+\t\t\"mbuf_destroy_zcp is: %d\\n\",\n+\t\trte_mempool_count(vpool->pool));\n+\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\"in mbuf_destroy_zcp: mbuf count in ring after \"\n+\t\t\"mbuf_destroy_zcp is : %d\\n\",\n+\t\trte_ring_count(vpool->ring));\n+}\n+\n+/*\n+ * This function update the use flag and counter.\n+ */\n+static inline uint32_t __attribute__((always_inline))\n+virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts,\n+\tuint32_t count)\n+{\n+\tstruct vhost_virtqueue *vq;\n+\tstruct vring_desc *desc;\n+\tstruct rte_mbuf *buff;\n+\t/* The virtio_hdr is initialised to 0. */\n+\tstruct virtio_net_hdr_mrg_rxbuf virtio_hdr\n+\t\t= {{0, 0, 0, 0, 0, 0}, 0};\n+\tuint64_t buff_hdr_addr = 0;\n+\tuint32_t head[MAX_PKT_BURST], packet_len = 0;\n+\tuint32_t head_idx, packet_success = 0;\n+\tuint16_t res_cur_idx;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_rx()\\n\", dev->device_fh);\n+\n+\tif (count == 0)\n+\t\treturn 0;\n+\n+\tvq = dev->virtqueue[VIRTIO_RXQ];\n+\tcount = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;\n+\n+\tres_cur_idx = vq->last_used_idx;\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Current Index %d| End Index %d\\n\",\n+\t\tdev->device_fh, res_cur_idx, res_cur_idx + count);\n+\n+\t/* Retrieve all of the head indexes first to avoid caching issues. */\n+\tfor (head_idx = 0; head_idx < count; head_idx++)\n+\t\thead[head_idx] = MBUF_HEADROOM_UINT32(pkts[head_idx]);\n+\n+\t/*Prefetch descriptor index. */\n+\trte_prefetch0(&vq->desc[head[packet_success]]);\n+\n+\twhile (packet_success != count) {\n+\t\t/* Get descriptor from available ring */\n+\t\tdesc = &vq->desc[head[packet_success]];\n+\n+\t\tbuff = pkts[packet_success];\n+\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") in dev_rx_zcp: update the used idx for \"\n+\t\t\t\"pkt[%d] descriptor idx: %d\\n\",\n+\t\t\tdev->device_fh, packet_success,\n+\t\t\tMBUF_HEADROOM_UINT32(buff));\n+\n+\t\tPRINT_PACKET(dev,\n+\t\t\t(uintptr_t)(((uint64_t)(uintptr_t)buff->buf_addr)\n+\t\t\t+ RTE_PKTMBUF_HEADROOM),\n+\t\t\trte_pktmbuf_data_len(buff), 0);\n+\n+\t\t/* Buffer address translation for virtio header. */\n+\t\tbuff_hdr_addr = gpa_to_vva(dev, desc->addr);\n+\t\tpacket_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;\n+\n+\t\t/*\n+\t\t * If the descriptors are chained the header and data are\n+\t\t * placed in separate buffers.\n+\t\t */\n+\t\tif (desc->flags & VRING_DESC_F_NEXT) {\n+\t\t\tdesc->len = vq->vhost_hlen;\n+\t\t\tdesc = &vq->desc[desc->next];\n+\t\t\tdesc->len = rte_pktmbuf_data_len(buff);\n+\t\t} else {\n+\t\t\tdesc->len = packet_len;\n+\t\t}\n+\n+\t\t/* Update used ring with desc information */\n+\t\tvq->used->ring[res_cur_idx & (vq->size - 1)].id\n+\t\t\t= head[packet_success];\n+\t\tvq->used->ring[res_cur_idx & (vq->size - 1)].len\n+\t\t\t= packet_len;\n+\t\tres_cur_idx++;\n+\t\tpacket_success++;\n+\n+\t\t/* A header is required per buffer. */\n+\t\trte_memcpy((void *)(uintptr_t)buff_hdr_addr,\n+\t\t\t(const void *)&virtio_hdr, vq->vhost_hlen);\n+\n+\t\tPRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);\n+\n+\t\tif (likely(packet_success < count)) {\n+\t\t\t/* Prefetch descriptor index. */\n+\t\t\trte_prefetch0(&vq->desc[head[packet_success]]);\n+\t\t}\n+\t}\n+\n+\trte_compiler_barrier();\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in dev_rx_zcp: before update used idx: \"\n+\t\t\"vq.last_used_idx: %d, vq->used->idx: %d\\n\",\n+\t\tdev->device_fh, vq->last_used_idx, vq->used->idx);\n+\n+\t*(volatile uint16_t *)&vq->used->idx += count;\n+\tvq->last_used_idx += count;\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in dev_rx_zcp: after  update used idx: \"\n+\t\t\"vq.last_used_idx: %d, vq->used->idx: %d\\n\",\n+\t\tdev->device_fh, vq->last_used_idx, vq->used->idx);\n+\n+\t/* Kick the guest if necessary. */\n+\tif (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))\n+\t\teventfd_write((int)vq->kickfd, 1);\n+\n+\treturn count;\n+}\n+\n+/*\n+ * This function routes the TX packet to the correct interface.\n+ * This may be a local device or the physical port.\n+ */\n+static inline void __attribute__((always_inline))\n+virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m,\n+\tuint32_t desc_idx, uint8_t need_copy)\n+{\n+\tstruct mbuf_table *tx_q;\n+\tstruct rte_mbuf **m_table;\n+\tstruct rte_mbuf *mbuf = NULL;\n+\tunsigned len, ret, offset = 0;\n+\tstruct vpool *vpool;\n+\tstruct virtio_net_data_ll *dev_ll = ll_root_used;\n+\tstruct ether_hdr *pkt_hdr = (struct ether_hdr *)m->pkt.data;\n+\tuint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh];\n+\n+\t/*Add packet to the port tx queue*/\n+\ttx_q = &tx_queue_zcp[(uint16_t)dev->vmdq_rx_q];\n+\tlen = tx_q->len;\n+\n+\t/* Allocate an mbuf and populate the structure. */\n+\tvpool = &vpool_array[MAX_QUEUES + (uint16_t)dev->vmdq_rx_q];\n+\trte_ring_sc_dequeue(vpool->ring, (void **)&mbuf);\n+\tif (unlikely(mbuf == NULL)) {\n+\t\tstruct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ];\n+\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\"(%\"PRIu64\") Failed to allocate memory for mbuf.\\n\",\n+\t\t\tdev->device_fh);\n+\t\tput_desc_to_used_list_zcp(vq, desc_idx);\n+\t\treturn;\n+\t}\n+\n+\tif (vm2vm_mode == VM2VM_HARDWARE) {\n+\t\t/* Avoid using a vlan tag from any vm for external pkt, such as\n+\t\t * vlan_tags[dev->device_fh], oterwise, it conflicts when pool\n+\t\t * selection, MAC address determines it as an external pkt\n+\t\t * which should go to network, while vlan tag determine it as\n+\t\t * a vm2vm pkt should forward to another vm. Hardware confuse\n+\t\t * such a ambiguous situation, so pkt will lost.\n+\t\t */\n+\t\tvlan_tag = external_pkt_default_vlan_tag;\n+\t\twhile (dev_ll != NULL) {\n+\t\t\tif (likely(dev_ll->dev->ready == DEVICE_RX) &&\n+\t\t\t\tether_addr_cmp(&(pkt_hdr->d_addr),\n+\t\t\t\t&dev_ll->dev->mac_address)) {\n+\n+\t\t\t\t/*\n+\t\t\t\t * Drop the packet if the TX packet is destined\n+\t\t\t\t * for the TX device.\n+\t\t\t\t */\n+\t\t\t\tif (unlikely(dev_ll->dev->device_fh\n+\t\t\t\t\t== dev->device_fh)) {\n+\t\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\t\"(%\"PRIu64\") TX: Source and destination\"\n+\t\t\t\t\t\"MAC addresses are the same. Dropping \"\n+\t\t\t\t\t\"packet.\\n\",\n+\t\t\t\t\tdev_ll->dev->device_fh);\n+\t\t\t\t\tMBUF_HEADROOM_UINT32(mbuf)\n+\t\t\t\t\t\t= (uint32_t)desc_idx;\n+\t\t\t\t\t__rte_mbuf_raw_free(mbuf);\n+\t\t\t\t\treturn;\n+\t\t\t\t}\n+\n+\t\t\t\t/*\n+\t\t\t\t * Packet length offset 4 bytes for HW vlan\n+\t\t\t\t * strip when L2 switch back.\n+\t\t\t\t */\n+\t\t\t\toffset = 4;\n+\t\t\t\tvlan_tag =\n+\t\t\t\t(uint16_t)\n+\t\t\t\tvlan_tags[(uint16_t)dev_ll->dev->device_fh];\n+\n+\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") TX: pkt to local VM device id:\"\n+\t\t\t\t\"(%\"PRIu64\") vlan tag: %d.\\n\",\n+\t\t\t\tdev->device_fh, dev_ll->dev->device_fh,\n+\t\t\t\tvlan_tag);\n+\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t}\n+\n+\tmbuf->pkt.nb_segs = m->pkt.nb_segs;\n+\tmbuf->pkt.next = m->pkt.next;\n+\tmbuf->pkt.data_len = m->pkt.data_len + offset;\n+\tmbuf->pkt.pkt_len = mbuf->pkt.data_len;\n+\tif (unlikely(need_copy)) {\n+\t\t/* Copy the packet contents to the mbuf. */\n+\t\trte_memcpy((void *)((uint8_t *)mbuf->pkt.data),\n+\t\t\t(const void *) ((uint8_t *)m->pkt.data),\n+\t\t\tm->pkt.data_len);\n+\t} else {\n+\t\tmbuf->pkt.data = m->pkt.data;\n+\t\tmbuf->buf_physaddr = m->buf_physaddr;\n+\t\tmbuf->buf_addr = m->buf_addr;\n+\t}\n+\tmbuf->ol_flags = PKT_TX_VLAN_PKT;\n+\tmbuf->pkt.vlan_macip.f.vlan_tci = vlan_tag;\n+\tmbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);\n+\tmbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);\n+\tMBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx;\n+\n+\ttx_q->m_table[len] = mbuf;\n+\tlen++;\n+\n+\tLOG_DEBUG(VHOST_DATA,\n+\t\t\"(%\"PRIu64\") in tx_route_zcp: pkt: nb_seg: %d, next:%s\\n\",\n+\t\tdev->device_fh,\n+\t\tmbuf->pkt.nb_segs,\n+\t\t(mbuf->pkt.next == NULL) ? \"null\" : \"non-null\");\n+\n+\tif (enable_stats) {\n+\t\tdev_statistics[dev->device_fh].tx_total++;\n+\t\tdev_statistics[dev->device_fh].tx++;\n+\t}\n+\n+\tif (unlikely(len == MAX_PKT_BURST)) {\n+\t\tm_table = (struct rte_mbuf **)tx_q->m_table;\n+\t\tret = rte_eth_tx_burst(ports[0],\n+\t\t\t(uint16_t)tx_q->txq_id, m_table, (uint16_t) len);\n+\n+\t\t/*\n+\t\t * Free any buffers not handled by TX and update\n+\t\t * the port stats.\n+\t\t */\n+\t\tif (unlikely(ret < len)) {\n+\t\t\tdo {\n+\t\t\t\trte_pktmbuf_free(m_table[ret]);\n+\t\t\t} while (++ret < len);\n+\t\t}\n+\n+\t\tlen = 0;\n+\t\ttxmbuf_clean_zcp(dev, vpool);\n+\t}\n+\n+\ttx_q->len = len;\n+\n+\treturn;\n+}\n+\n+/*\n+ * This function TX all available packets in virtio TX queue for one\n+ * virtio-net device. If it is first packet, it learns MAC address and\n+ * setup VMDQ.\n+ */\n+static inline void __attribute__((always_inline))\n+virtio_dev_tx_zcp(struct virtio_net *dev)\n+{\n+\tstruct rte_mbuf m;\n+\tstruct vhost_virtqueue *vq;\n+\tstruct vring_desc *desc;\n+\tuint64_t buff_addr = 0, phys_addr;\n+\tuint32_t head[MAX_PKT_BURST];\n+\tuint32_t i;\n+\tuint16_t free_entries, packet_success = 0;\n+\tuint16_t avail_idx;\n+\tuint8_t need_copy = 0;\n+\thpa_type addr_type;\n+\n+\tvq = dev->virtqueue[VIRTIO_TXQ];\n+\tavail_idx =  *((volatile uint16_t *)&vq->avail->idx);\n+\n+\t/* If there are no available buffers then return. */\n+\tif (vq->last_used_idx_res == avail_idx)\n+\t\treturn;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") virtio_dev_tx()\\n\", dev->device_fh);\n+\n+\t/* Prefetch available ring to retrieve head indexes. */\n+\trte_prefetch0(&vq->avail->ring[vq->last_used_idx_res & (vq->size - 1)]);\n+\n+\t/* Get the number of free entries in the ring */\n+\tfree_entries = (avail_idx - vq->last_used_idx_res);\n+\n+\t/* Limit to MAX_PKT_BURST. */\n+\tfree_entries\n+\t\t= (free_entries > MAX_PKT_BURST) ? MAX_PKT_BURST : free_entries;\n+\n+\tLOG_DEBUG(VHOST_DATA, \"(%\"PRIu64\") Buffers available %d\\n\",\n+\t\tdev->device_fh, free_entries);\n+\n+\t/* Retrieve all of the head indexes first to avoid caching issues. */\n+\tfor (i = 0; i < free_entries; i++)\n+\t\thead[i]\n+\t\t\t= vq->avail->ring[(vq->last_used_idx_res + i)\n+\t\t\t& (vq->size - 1)];\n+\n+\tvq->last_used_idx_res += free_entries;\n+\n+\t/* Prefetch descriptor index. */\n+\trte_prefetch0(&vq->desc[head[packet_success]]);\n+\trte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);\n+\n+\twhile (packet_success < free_entries) {\n+\t\tdesc = &vq->desc[head[packet_success]];\n+\n+\t\t/* Discard first buffer as it is the virtio header */\n+\t\tdesc = &vq->desc[desc->next];\n+\n+\t\t/* Buffer address translation. */\n+\t\tbuff_addr = gpa_to_vva(dev, desc->addr);\n+\t\tphys_addr = gpa_to_hpa(dev, desc->addr, desc->len, &addr_type);\n+\n+\t\tif (likely(packet_success < (free_entries - 1)))\n+\t\t\t/* Prefetch descriptor index. */\n+\t\t\trte_prefetch0(&vq->desc[head[packet_success + 1]]);\n+\n+\t\tif (unlikely(addr_type == PHYS_ADDR_INVALID)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") Invalid frame buffer address found\"\n+\t\t\t\t\"when TX packets!\\n\",\n+\t\t\t\tdev->device_fh);\n+\t\t\tpacket_success++;\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\t/* Prefetch buffer address. */\n+\t\trte_prefetch0((void *)(uintptr_t)buff_addr);\n+\n+\t\t/*\n+\t\t * Setup dummy mbuf. This is copied to a real mbuf if\n+\t\t * transmitted out the physical port.\n+\t\t */\n+\t\tm.pkt.data_len = desc->len;\n+\t\tm.pkt.nb_segs = 1;\n+\t\tm.pkt.next = NULL;\n+\t\tm.pkt.data = (void *)(uintptr_t)buff_addr;\n+\t\tm.buf_addr = m.pkt.data;\n+\t\tm.buf_physaddr = phys_addr;\n+\n+\t\t/*\n+\t\t * Check if the frame buffer address from guest crosses\n+\t\t * sub-region or not.\n+\t\t */\n+\t\tif (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) {\n+\t\t\tRTE_LOG(ERR, VHOST_DATA,\n+\t\t\t\t\"(%\"PRIu64\") Frame buffer address cross \"\n+\t\t\t\t\"sub-regioin found when attaching TX frame \"\n+\t\t\t\t\"buffer address!\\n\",\n+\t\t\t\tdev->device_fh);\n+\t\t\tneed_copy = 1;\n+\t\t} else\n+\t\t\tneed_copy = 0;\n+\n+\t\tPRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);\n+\n+\t\t/*\n+\t\t * If this is the first received packet we need to learn\n+\t\t * the MAC and setup VMDQ\n+\t\t */\n+\t\tif (unlikely(dev->ready == DEVICE_MAC_LEARNING)) {\n+\t\t\tif (dev->remove || (link_vmdq(dev, &m) == -1)) {\n+\t\t\t\t/*\n+\t\t\t\t * Discard frame if device is scheduled for\n+\t\t\t\t * removal or a duplicate MAC address is found.\n+\t\t\t\t */\n+\t\t\t\tpacket_success += free_entries;\n+\t\t\t\tvq->last_used_idx += packet_success;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\n+\t\tvirtio_tx_route_zcp(dev, &m, head[packet_success], need_copy);\n+\t\tpacket_success++;\n+\t}\n+}\n+\n+/*\n+ * This function is called by each data core. It handles all RX/TX registered\n+ * with the core. For TX the specific lcore linked list is used. For RX, MAC\n+ * addresses are compared with all devices in the main linked list.\n+ */\n+static int\n+switch_worker_zcp(__attribute__((unused)) void *arg)\n+{\n+\tstruct virtio_net *dev = NULL;\n+\tstruct rte_mbuf *pkts_burst[MAX_PKT_BURST];\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tstruct mbuf_table *tx_q;\n+\tvolatile struct lcore_ll_info *lcore_ll;\n+\tconst uint64_t drain_tsc\n+\t\t= (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S\n+\t\t* BURST_TX_DRAIN_US;\n+\tuint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;\n+\tunsigned ret;\n+\tconst uint16_t lcore_id = rte_lcore_id();\n+\tuint16_t count_in_ring, rx_count = 0;\n+\n+\tRTE_LOG(INFO, VHOST_DATA, \"Procesing on Core %u started\\n\", lcore_id);\n+\n+\tlcore_ll = lcore_info[lcore_id].lcore_ll;\n+\tprev_tsc = 0;\n+\n+\twhile (1) {\n+\t\tcur_tsc = rte_rdtsc();\n+\n+\t\t/* TX burst queue drain */\n+\t\tdiff_tsc = cur_tsc - prev_tsc;\n+\t\tif (unlikely(diff_tsc > drain_tsc)) {\n+\t\t\t/*\n+\t\t\t * Get mbuf from vpool.pool and detach mbuf and\n+\t\t\t * put back into vpool.ring.\n+\t\t\t */\n+\t\t\tdev_ll = lcore_ll->ll_root_used;\n+\t\t\twhile ((dev_ll != NULL) && (dev_ll->dev != NULL)) {\n+\t\t\t\t/* Get virtio device ID */\n+\t\t\t\tdev = dev_ll->dev;\n+\n+\t\t\t\tif (likely(!dev->remove)) {\n+\t\t\t\t\ttx_q = &tx_queue_zcp[(uint16_t)dev->vmdq_rx_q];\n+\t\t\t\t\tif (tx_q->len) {\n+\t\t\t\t\t\tLOG_DEBUG(VHOST_DATA,\n+\t\t\t\t\t\t\"TX queue drained after timeout\"\n+\t\t\t\t\t\t\" with burst size %u\\n\",\n+\t\t\t\t\t\ttx_q->len);\n+\n+\t\t\t\t\t\t/*\n+\t\t\t\t\t\t * Tx any packets in the queue\n+\t\t\t\t\t\t */\n+\t\t\t\t\t\tret = rte_eth_tx_burst(\n+\t\t\t\t\t\t\tports[0],\n+\t\t\t\t\t\t\t(uint16_t)tx_q->txq_id,\n+\t\t\t\t\t\t\t(struct rte_mbuf **)\n+\t\t\t\t\t\t\ttx_q->m_table,\n+\t\t\t\t\t\t\t(uint16_t)tx_q->len);\n+\t\t\t\t\t\tif (unlikely(ret < tx_q->len)) {\n+\t\t\t\t\t\t\tdo {\n+\t\t\t\t\t\t\t\trte_pktmbuf_free(\n+\t\t\t\t\t\t\t\t\ttx_q->m_table[ret]);\n+\t\t\t\t\t\t\t} while (++ret < tx_q->len);\n+\t\t\t\t\t\t}\n+\t\t\t\t\t\ttx_q->len = 0;\n+\n+\t\t\t\t\t\ttxmbuf_clean_zcp(dev,\n+\t\t\t\t\t\t\t&vpool_array[MAX_QUEUES+dev->vmdq_rx_q]);\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t\tdev_ll = dev_ll->next;\n+\t\t\t}\n+\t\t\tprev_tsc = cur_tsc;\n+\t\t}\n+\n+\t\trte_prefetch0(lcore_ll->ll_root_used);\n+\n+\t\t/*\n+\t\t * Inform the configuration core that we have exited the linked\n+\t\t * list and that no devices are in use if requested.\n+\t\t */\n+\t\tif (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)\n+\t\t\tlcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;\n+\n+\t\t/* Process devices */\n+\t\tdev_ll = lcore_ll->ll_root_used;\n+\n+\t\twhile ((dev_ll != NULL) && (dev_ll->dev != NULL)) {\n+\t\t\tdev = dev_ll->dev;\n+\t\t\tif (unlikely(dev->remove)) {\n+\t\t\t\tdev_ll = dev_ll->next;\n+\t\t\t\tunlink_vmdq(dev);\n+\t\t\t\tdev->ready = DEVICE_SAFE_REMOVE;\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\n+\t\t\tif (likely(dev->ready == DEVICE_RX)) {\n+\t\t\t\tuint32_t index = dev->vmdq_rx_q;\n+\t\t\t\tuint16_t i;\n+\t\t\t\tcount_in_ring\n+\t\t\t\t= rte_ring_count(vpool_array[index].ring);\n+\t\t\t\tuint16_t free_entries\n+\t\t\t\t= (uint16_t)get_available_ring_num_zcp(dev);\n+\n+\t\t\t\t/*\n+\t\t\t\t * Attach all mbufs in vpool.ring and put back\n+\t\t\t\t * into vpool.pool.\n+\t\t\t\t */\n+\t\t\t\tfor (i = 0;\n+\t\t\t\ti < RTE_MIN(free_entries,\n+\t\t\t\tRTE_MIN(count_in_ring, MAX_PKT_BURST));\n+\t\t\t\ti++)\n+\t\t\t\t\tattach_rxmbuf_zcp(dev);\n+\n+\t\t\t\t/* Handle guest RX */\n+\t\t\t\trx_count = rte_eth_rx_burst(ports[0],\n+\t\t\t\t\t(uint16_t)dev->vmdq_rx_q, pkts_burst,\n+\t\t\t\t\tMAX_PKT_BURST);\n+\n+\t\t\t\tif (rx_count) {\n+\t\t\t\t\tret_count = virtio_dev_rx_zcp(dev,\n+\t\t\t\t\t\t\tpkts_burst, rx_count);\n+\t\t\t\t\tif (enable_stats) {\n+\t\t\t\t\t\tdev_statistics[dev->device_fh].rx_total\n+\t\t\t\t\t\t\t+= rx_count;\n+\t\t\t\t\t\tdev_statistics[dev->device_fh].rx\n+\t\t\t\t\t\t\t+= ret_count;\n+\t\t\t\t\t}\n+\t\t\t\t\twhile (likely(rx_count)) {\n+\t\t\t\t\t\trx_count--;\n+\t\t\t\t\t\tpktmbuf_detach_zcp(\n+\t\t\t\t\t\t\tpkts_burst[rx_count]);\n+\t\t\t\t\t\trte_ring_sp_enqueue(\n+\t\t\t\t\t\t\tvpool_array[index].ring,\n+\t\t\t\t\t\t\t(void *)pkts_burst[rx_count]);\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\tif (likely(!dev->remove))\n+\t\t\t\t/* Handle guest TX */\n+\t\t\t\tvirtio_dev_tx_zcp(dev);\n+\n+\t\t\t/* Move to the next device in the list */\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t}\n+\n+\treturn 0;\n+}\n+\n+\n+/*\n+ * Add an entry to a used linked list. A free entry must first be found\n+ * in the free linked list using get_data_ll_free_entry();\n+ */\n+static void\n+add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,\n+\tstruct virtio_net_data_ll *ll_dev)\n+{\n+\tstruct virtio_net_data_ll *ll = *ll_root_addr;\n+\n+\t/* Set next as NULL and use a compiler barrier to avoid reordering. */\n+\tll_dev->next = NULL;\n+\trte_compiler_barrier();\n+\n+\t/* If ll == NULL then this is the first device. */\n+\tif (ll) {\n+\t\t/* Increment to the tail of the linked list. */\n+\t\twhile ((ll->next != NULL) )\n+\t\t\tll = ll->next;\n+\n+\t\tll->next = ll_dev;\n+\t} else {\n+\t\t*ll_root_addr = ll_dev;\n+\t}\n+}\n+\n+/*\n+ * Remove an entry from a used linked list. The entry must then be added to\n+ * the free linked list using put_data_ll_free_entry().\n+ */\n+static void\n+rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,\n+\tstruct virtio_net_data_ll *ll_dev,\n+\tstruct virtio_net_data_ll *ll_dev_last)\n+{\n+\tstruct virtio_net_data_ll *ll = *ll_root_addr;\n+\n+\tif (unlikely((ll == NULL) || (ll_dev == NULL)))\n+\t\treturn;\n+\n+\tif (ll_dev == ll)\n+\t\t*ll_root_addr = ll_dev->next;\n+\telse\n+\t\tif (likely(ll_dev_last != NULL))\n+\t\t\tll_dev_last->next = ll_dev->next;\n+\t\telse\n+\t\t\tRTE_LOG(ERR, VHOST_CONFIG, \"Remove entry form ll failed.\\n\");\n+}\n+\n+/*\n+ * Find and return an entry from the free linked list.\n+ */\n+static struct virtio_net_data_ll *\n+get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr)\n+{\n+\tstruct virtio_net_data_ll *ll_free = *ll_root_addr;\n+\tstruct virtio_net_data_ll *ll_dev;\n+\n+\tif (ll_free == NULL)\n+\t\treturn NULL;\n+\n+\tll_dev = ll_free;\n+\t*ll_root_addr = ll_free->next;\n+\n+\treturn ll_dev;\n+}\n+\n+/*\n+ * Place an entry back on to the free linked list.\n+ */\n+static void\n+put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr,\n+\tstruct virtio_net_data_ll *ll_dev)\n+{\n+\tstruct virtio_net_data_ll *ll_free = *ll_root_addr;\n+\n+\tif (ll_dev == NULL)\n+\t\treturn;\n+\n+\tll_dev->next = ll_free;\n+\t*ll_root_addr = ll_dev;\n+}\n+\n+/*\n+ * Creates a linked list of a given size.\n+ */\n+static struct virtio_net_data_ll *\n+alloc_data_ll(uint32_t size)\n+{\n+\tstruct virtio_net_data_ll *ll_new;\n+\tuint32_t i;\n+\n+\t/* Malloc and then chain the linked list. */\n+\tll_new = malloc(size * sizeof(struct virtio_net_data_ll));\n+\tif (ll_new == NULL) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG, \"Failed to allocate memory for ll_new.\\n\");\n+\t\treturn NULL;\n+\t}\n+\n+\tfor (i = 0; i < size - 1; i++) {\n+\t\tll_new[i].dev = NULL;\n+\t\tll_new[i].next = &ll_new[i+1];\n+\t}\n+\tll_new[i].next = NULL;\n+\n+\treturn (ll_new);\n+}\n+\n+/*\n+ * Create the main linked list along with each individual cores linked list. A used and a free list\n+ * are created to manage entries.\n+ */\n+static int\n+init_data_ll (void)\n+{\n+\tint lcore;\n+\n+\tRTE_LCORE_FOREACH_SLAVE(lcore) {\n+\t\tlcore_info[lcore].lcore_ll = malloc(sizeof(struct lcore_ll_info));\n+\t\tif (lcore_info[lcore].lcore_ll == NULL) {\n+\t\t\tRTE_LOG(ERR, VHOST_CONFIG, \"Failed to allocate memory for lcore_ll.\\n\");\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tlcore_info[lcore].lcore_ll->device_num = 0;\n+\t\tlcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;\n+\t\tlcore_info[lcore].lcore_ll->ll_root_used = NULL;\n+\t\tif (num_devices % num_switching_cores)\n+\t\t\tlcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll((num_devices / num_switching_cores) + 1);\n+\t\telse\n+\t\t\tlcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll(num_devices / num_switching_cores);\n+\t}\n+\n+\t/* Allocate devices up to a maximum of MAX_DEVICES. */\n+\tll_root_free = alloc_data_ll(MIN((num_devices), MAX_DEVICES));\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * Set virtqueue flags so that we do not receive interrupts.\n+ */\n+static void\n+set_irq_status (struct virtio_net *dev)\n+{\n+\tdev->virtqueue[VIRTIO_RXQ]->used->flags = VRING_USED_F_NO_NOTIFY;\n+\tdev->virtqueue[VIRTIO_TXQ]->used->flags = VRING_USED_F_NO_NOTIFY;\n+}\n+\n+/*\n+ * Remove a device from the specific data core linked list and from the main linked list. Synchonization\n+ * occurs through the use of the lcore dev_removal_flag. Device is made volatile here to avoid re-ordering\n+ * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.\n+ */\n+static void\n+destroy_device (volatile struct virtio_net *dev)\n+{\n+\tstruct virtio_net_data_ll *ll_lcore_dev_cur;\n+\tstruct virtio_net_data_ll *ll_main_dev_cur;\n+\tstruct virtio_net_data_ll *ll_lcore_dev_last = NULL;\n+\tstruct virtio_net_data_ll *ll_main_dev_last = NULL;\n+\tint lcore;\n+\n+\tdev->flags &= ~VIRTIO_DEV_RUNNING;\n+\n+\t/*set the remove flag. */\n+\tdev->remove = 1;\n+\n+\twhile(dev->ready != DEVICE_SAFE_REMOVE) {\n+\t\trte_pause();\n+\t}\n+\n+\t/* Search for entry to be removed from lcore ll */\n+\tll_lcore_dev_cur = lcore_info[dev->coreid].lcore_ll->ll_root_used;\n+\twhile (ll_lcore_dev_cur != NULL) {\n+\t\tif (ll_lcore_dev_cur->dev == dev) {\n+\t\t\tbreak;\n+\t\t} else {\n+\t\t\tll_lcore_dev_last = ll_lcore_dev_cur;\n+\t\t\tll_lcore_dev_cur = ll_lcore_dev_cur->next;\n+\t\t}\n+\t}\n+\n+\tif (ll_lcore_dev_cur == NULL) {\n+\t\tRTE_LOG(ERR, VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") Failed to find the dev to be destroy.\\n\",\n+\t\t\tdev->device_fh);\n+\t\treturn;\n+\t}\n+\n+\t/* Search for entry to be removed from main ll */\n+\tll_main_dev_cur = ll_root_used;\n+\tll_main_dev_last = NULL;\n+\twhile (ll_main_dev_cur != NULL) {\n+\t\tif (ll_main_dev_cur->dev == dev) {\n+\t\t\tbreak;\n+\t\t} else {\n+\t\t\tll_main_dev_last = ll_main_dev_cur;\n+\t\t\tll_main_dev_cur = ll_main_dev_cur->next;\n+\t\t}\n+\t}\n+\n+\t/* Remove entries from the lcore and main ll. */\n+\trm_data_ll_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last);\n+\trm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);\n+\n+\t/* Set the dev_removal_flag on each lcore. */\n+\tRTE_LCORE_FOREACH_SLAVE(lcore) {\n+\t\tlcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL;\n+\t}\n+\n+\t/*\n+\t * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that\n+\t * they can no longer access the device removed from the linked lists and that the devices\n+\t * are no longer in use.\n+\t */\n+\tRTE_LCORE_FOREACH_SLAVE(lcore) {\n+\t\twhile (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL) {\n+\t\t\trte_pause();\n+\t\t}\n+\t}\n+\n+\t/* Add the entries back to the lcore and main free ll.*/\n+\tput_data_ll_free_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur);\n+\tput_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);\n+\n+\t/* Decrement number of device on the lcore. */\n+\tlcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->device_num--;\n+\n+\tRTE_LOG(INFO, VHOST_DATA, \"(%\"PRIu64\") Device has been removed from data core\\n\", dev->device_fh);\n+\n+\tif (zero_copy) {\n+\t\tstruct vpool *vpool = &vpool_array[dev->vmdq_rx_q];\n+\n+\t\t/* Stop the RX queue. */\n+\t\tif (rte_eth_dev_rx_queue_stop(ports[0], dev->vmdq_rx_q) != 0) {\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"(%\"PRIu64\") In destroy_device: Failed to stop \"\n+\t\t\t\t\"rx queue:%d\\n\",\n+\t\t\t\tdev->device_fh,\n+\t\t\t\tdev->vmdq_rx_q);\n+\t\t}\n+\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") in destroy_device: Start put mbuf in \"\n+\t\t\t\"mempool back to ring for RX queue: %d\\n\",\n+\t\t\tdev->device_fh, dev->vmdq_rx_q);\n+\n+\t\tmbuf_destroy_zcp(vpool);\n+\n+\t\t/* Stop the TX queue. */\n+\t\tif (rte_eth_dev_tx_queue_stop(ports[0], dev->vmdq_rx_q) != 0) {\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"(%\"PRIu64\") In destroy_device: Failed to \"\n+\t\t\t\t\"stop tx queue:%d\\n\",\n+\t\t\t\tdev->device_fh, dev->vmdq_rx_q);\n+\t\t}\n+\n+\t\tvpool = &vpool_array[dev->vmdq_rx_q + MAX_QUEUES];\n+\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") destroy_device: Start put mbuf in mempool \"\n+\t\t\t\"back to ring for TX queue: %d, dev:(%\"PRIu64\")\\n\",\n+\t\t\tdev->device_fh, (dev->vmdq_rx_q + MAX_QUEUES),\n+\t\t\tdev->device_fh);\n+\n+\t\tmbuf_destroy_zcp(vpool);\n+\t}\n+\n+}\n+\n+/*\n+ * A new device is added to a data core. First the device is added to the main linked list\n+ * and the allocated to a specific data core.\n+ */\n+static int\n+new_device (struct virtio_net *dev)\n+{\n+\tstruct virtio_net_data_ll *ll_dev;\n+\tint lcore, core_add = 0;\n+\tuint32_t device_num_min = num_devices;\n+\n+\t/* Add device to main ll */\n+\tll_dev = get_data_ll_free_entry(&ll_root_free);\n+\tif (ll_dev == NULL) {\n+\t\tRTE_LOG(INFO, VHOST_DATA, \"(%\"PRIu64\") No free entry found in linked list. Device limit \"\n+\t\t\t\"of %d devices per core has been reached\\n\",\n+\t\t\tdev->device_fh, num_devices);\n+\t\treturn -1;\n+\t}\n+\tll_dev->dev = dev;\n+\tadd_data_ll_entry(&ll_root_used, ll_dev);\n+\tll_dev->dev->vmdq_rx_q\n+\t\t= ll_dev->dev->device_fh * (num_queues / num_devices);\n+\n+\tif (zero_copy) {\n+\t\tuint32_t index = ll_dev->dev->vmdq_rx_q;\n+\t\tuint32_t count_in_ring, i;\n+\t\tstruct mbuf_table *tx_q;\n+\n+\t\tcount_in_ring = rte_ring_count(vpool_array[index].ring);\n+\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") in new_device: mbuf count in mempool \"\n+\t\t\t\"before attach is: %d\\n\",\n+\t\t\tdev->device_fh,\n+\t\t\trte_mempool_count(vpool_array[index].pool));\n+\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\"(%\"PRIu64\") in new_device: mbuf count in  ring \"\n+\t\t\t\"before attach  is : %d\\n\",\n+\t\t\tdev->device_fh, count_in_ring);\n+\n+\t\t/*\n+\t\t * Attach all mbufs in vpool.ring and put back intovpool.pool.\n+\t\t */\n+\t\tfor (i = 0; i < count_in_ring; i++)\n+\t\t\tattach_rxmbuf_zcp(dev);\n+\n+\t\tLOG_DEBUG(VHOST_CONFIG, \"(%\"PRIu64\") in new_device: mbuf count in \"\n+\t\t\t\"mempool after attach is: %d\\n\",\n+\t\t\tdev->device_fh,\n+\t\t\trte_mempool_count(vpool_array[index].pool));\n+\t\tLOG_DEBUG(VHOST_CONFIG, \"(%\"PRIu64\") in new_device: mbuf count in \"\n+\t\t\t\"ring after attach  is : %d\\n\",\n+\t\t\tdev->device_fh,\n+\t\t\trte_ring_count(vpool_array[index].ring));\n+\n+\t\ttx_q = &tx_queue_zcp[(uint16_t)dev->vmdq_rx_q];\n+\t\ttx_q->txq_id = dev->vmdq_rx_q;\n+\n+\t\tif (rte_eth_dev_tx_queue_start(ports[0], dev->vmdq_rx_q) != 0) {\n+\t\t\tstruct vpool *vpool = &vpool_array[dev->vmdq_rx_q];\n+\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"(%\"PRIu64\") In new_device: Failed to start \"\n+\t\t\t\t\"tx queue:%d\\n\",\n+\t\t\t\tdev->device_fh, dev->vmdq_rx_q);\n+\n+\t\t\tmbuf_destroy_zcp(vpool);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tif (rte_eth_dev_rx_queue_start(ports[0], dev->vmdq_rx_q) != 0) {\n+\t\t\tstruct vpool *vpool = &vpool_array[dev->vmdq_rx_q];\n+\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"(%\"PRIu64\") In new_device: Failed to start \"\n+\t\t\t\t\"rx queue:%d\\n\",\n+\t\t\t\tdev->device_fh, dev->vmdq_rx_q);\n+\n+\t\t\t/* Stop the TX queue. */\n+\t\t\tif (rte_eth_dev_tx_queue_stop(ports[0],\n+\t\t\t\tdev->vmdq_rx_q) != 0) {\n+\t\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\t\"(%\"PRIu64\") In new_device: Failed to \"\n+\t\t\t\t\t\"stop tx queue:%d\\n\",\n+\t\t\t\t\tdev->device_fh, dev->vmdq_rx_q);\n+\t\t\t}\n+\n+\t\t\tmbuf_destroy_zcp(vpool);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t}\n+\n+\t/*reset ready flag*/\n+\tdev->ready = DEVICE_MAC_LEARNING;\n+\tdev->remove = 0;\n+\n+\t/* Find a suitable lcore to add the device. */\n+\tRTE_LCORE_FOREACH_SLAVE(lcore) {\n+\t\tif (lcore_info[lcore].lcore_ll->device_num < device_num_min) {\n+\t\t\tdevice_num_min = lcore_info[lcore].lcore_ll->device_num;\n+\t\t\tcore_add = lcore;\n+\t\t}\n+\t}\n+\t/* Add device to lcore ll */\n+\tll_dev->dev->coreid = core_add;\n+\tll_dev = get_data_ll_free_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_free);\n+\tif (ll_dev == NULL) {\n+\t\tRTE_LOG(INFO, VHOST_DATA, \"(%\"PRIu64\") Failed to add device to data core\\n\", dev->device_fh);\n+\t\tdev->ready = DEVICE_SAFE_REMOVE;\n+\t\tdestroy_device(dev);\n+\t\treturn -1;\n+\t}\n+\tll_dev->dev = dev;\n+\tadd_data_ll_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_used, ll_dev);\n+\n+\t/* Initialize device stats */\n+\tmemset(&dev_statistics[dev->device_fh], 0, sizeof(struct device_statistics));\n+\n+\t/* Disable notifications. */\n+\tset_irq_status(dev);\n+\tlcore_info[ll_dev->dev->coreid].lcore_ll->device_num++;\n+\tdev->flags |= VIRTIO_DEV_RUNNING;\n+\n+\tRTE_LOG(INFO, VHOST_DATA, \"(%\"PRIu64\") Device has been added to data core %d\\n\", dev->device_fh, dev->coreid);\n+\n+\treturn 0;\n+}\n+\n+/*\n+ * These callback allow devices to be added to the data core when configuration\n+ * has been fully complete.\n+ */\n+static const struct virtio_net_device_ops virtio_net_device_ops =\n+{\n+\t.new_device =  new_device,\n+\t.destroy_device = destroy_device,\n+};\n+\n+/*\n+ * This is a thread will wake up after a period to print stats if the user has\n+ * enabled them.\n+ */\n+static void\n+print_stats(void)\n+{\n+\tstruct virtio_net_data_ll *dev_ll;\n+\tuint64_t tx_dropped, rx_dropped;\n+\tuint64_t tx, tx_total, rx, rx_total;\n+\tuint32_t device_fh;\n+\tconst char clr[] = { 27, '[', '2', 'J', '\\0' };\n+\tconst char top_left[] = { 27, '[', '1', ';', '1', 'H','\\0' };\n+\n+\twhile(1) {\n+\t\tsleep(enable_stats);\n+\n+\t\t/* Clear screen and move to top left */\n+\t\tprintf(\"%s%s\", clr, top_left);\n+\n+\t\tprintf(\"\\nDevice statistics ====================================\");\n+\n+\t\tdev_ll = ll_root_used;\n+\t\twhile (dev_ll != NULL) {\n+\t\t\tdevice_fh = (uint32_t)dev_ll->dev->device_fh;\n+\t\t\ttx_total = dev_statistics[device_fh].tx_total;\n+\t\t\ttx = dev_statistics[device_fh].tx;\n+\t\t\ttx_dropped = tx_total - tx;\n+\t\t\tif (zero_copy == 0) {\n+\t\t\t\trx_total = rte_atomic64_read(\n+\t\t\t\t\t&dev_statistics[device_fh].rx_total_atomic);\n+\t\t\t\trx = rte_atomic64_read(\n+\t\t\t\t\t&dev_statistics[device_fh].rx_atomic);\n+\t\t\t} else {\n+\t\t\t\trx_total = dev_statistics[device_fh].rx_total;\n+\t\t\t\trx = dev_statistics[device_fh].rx;\n+\t\t\t}\n+\t\t\trx_dropped = rx_total - rx;\n+\n+\t\t\tprintf(\"\\nStatistics for device %\"PRIu32\" ------------------------------\"\n+\t\t\t\t\t\"\\nTX total: \t\t%\"PRIu64\"\"\n+\t\t\t\t\t\"\\nTX dropped: \t\t%\"PRIu64\"\"\n+\t\t\t\t\t\"\\nTX successful: \t\t%\"PRIu64\"\"\n+\t\t\t\t\t\"\\nRX total: \t\t%\"PRIu64\"\"\n+\t\t\t\t\t\"\\nRX dropped: \t\t%\"PRIu64\"\"\n+\t\t\t\t\t\"\\nRX successful: \t\t%\"PRIu64\"\",\n+\t\t\t\t\tdevice_fh,\n+\t\t\t\t\ttx_total,\n+\t\t\t\t\ttx_dropped,\n+\t\t\t\t\ttx,\n+\t\t\t\t\trx_total,\n+\t\t\t\t\trx_dropped,\n+\t\t\t\t\trx);\n+\n+\t\t\tdev_ll = dev_ll->next;\n+\t\t}\n+\t\tprintf(\"\\n======================================================\\n\");\n+\t}\n+}\n+\n+static void\n+setup_mempool_tbl(int socket, uint32_t index, char *pool_name,\n+\tchar *ring_name, uint32_t nb_mbuf)\n+{\n+\tuint16_t roomsize = VIRTIO_DESCRIPTOR_LEN_ZCP + RTE_PKTMBUF_HEADROOM;\n+\tvpool_array[index].pool\n+\t\t= rte_mempool_create(pool_name, nb_mbuf, MBUF_SIZE_ZCP,\n+\t\tMBUF_CACHE_SIZE_ZCP, sizeof(struct rte_pktmbuf_pool_private),\n+\t\trte_pktmbuf_pool_init, (void *)(uintptr_t)roomsize,\n+\t\trte_pktmbuf_init, NULL, socket, 0);\n+\tif (vpool_array[index].pool != NULL) {\n+\t\tvpool_array[index].ring\n+\t\t\t= rte_ring_create(ring_name,\n+\t\t\t\trte_align32pow2(nb_mbuf + 1),\n+\t\t\t\tsocket, RING_F_SP_ENQ | RING_F_SC_DEQ);\n+\t\tif (likely(vpool_array[index].ring != NULL)) {\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in setup_mempool_tbl: mbuf count in \"\n+\t\t\t\t\"mempool is: %d\\n\",\n+\t\t\t\trte_mempool_count(vpool_array[index].pool));\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in setup_mempool_tbl: mbuf count in \"\n+\t\t\t\t\"ring   is: %d\\n\",\n+\t\t\t\trte_ring_count(vpool_array[index].ring));\n+\t\t} else {\n+\t\t\trte_exit(EXIT_FAILURE, \"ring_create(%s) failed\",\n+\t\t\t\tring_name);\n+\t\t}\n+\n+\t\t/* Need consider head room. */\n+\t\tvpool_array[index].buf_size = roomsize - RTE_PKTMBUF_HEADROOM;\n+\t} else {\n+\t\trte_exit(EXIT_FAILURE, \"mempool_create(%s) failed\", pool_name);\n+\t}\n+}\n+\n+\n+/*\n+ * Main function, does initialisation and calls the per-lcore functions. The CUSE\n+ * device is also registered here to handle the IOCTLs.\n+ */\n+int\n+MAIN(int argc, char *argv[])\n+{\n+\tstruct rte_mempool *mbuf_pool = NULL;\n+\tunsigned lcore_id, core_id = 0;\n+\tunsigned nb_ports, valid_num_ports;\n+\tint ret;\n+\tuint8_t portid, queue_id = 0;\n+\tstatic pthread_t tid;\n+\n+\t/* init EAL */\n+\tret = rte_eal_init(argc, argv);\n+\tif (ret < 0)\n+\t\trte_exit(EXIT_FAILURE, \"Error with EAL initialization\\n\");\n+\targc -= ret;\n+\targv += ret;\n+\n+\t/* parse app arguments */\n+\tret = us_vhost_parse_args(argc, argv);\n+\tif (ret < 0)\n+\t\trte_exit(EXIT_FAILURE, \"Invalid argument\\n\");\n+\n+\tif (rte_eal_pci_probe() != 0)\n+\t\trte_exit(EXIT_FAILURE, \"Error with NIC driver initialization\\n\");\n+\n+\tfor (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++)\n+\t\tif (rte_lcore_is_enabled(lcore_id))\n+\t\t\tlcore_ids[core_id ++] = lcore_id;\n+\n+\tif (rte_lcore_count() > RTE_MAX_LCORE)\n+\t\trte_exit(EXIT_FAILURE,\"Not enough cores\\n\");\n+\n+\t/*set the number of swithcing cores available*/\n+\tnum_switching_cores = rte_lcore_count()-1;\n+\n+\t/* Get the number of physical ports. */\n+\tnb_ports = rte_eth_dev_count();\n+\tif (nb_ports > RTE_MAX_ETHPORTS)\n+\t\tnb_ports = RTE_MAX_ETHPORTS;\n+\n+\t/*\n+\t * Update the global var NUM_PORTS and global array PORTS\n+\t * and get value of var VALID_NUM_PORTS according to system ports number\n+\t */\n+\tvalid_num_ports = check_ports_num(nb_ports);\n+\n+\tif ((valid_num_ports ==  0) || (valid_num_ports > MAX_SUP_PORTS)) {\n+\t\tRTE_LOG(INFO, VHOST_PORT, \"Current enabled port number is %u,\"\n+\t\t\t\"but only %u port can be enabled\\n\",num_ports, MAX_SUP_PORTS);\n+\t\treturn -1;\n+\t}\n+\n+\tif (zero_copy == 0) {\n+\t\t/* Create the mbuf pool. */\n+\t\tmbuf_pool = rte_mempool_create(\n+\t\t\t\t\"MBUF_POOL\",\n+\t\t\t\tNUM_MBUFS_PER_PORT\n+\t\t\t\t* valid_num_ports,\n+\t\t\t\tMBUF_SIZE, MBUF_CACHE_SIZE,\n+\t\t\t\tsizeof(struct rte_pktmbuf_pool_private),\n+\t\t\t\trte_pktmbuf_pool_init, NULL,\n+\t\t\t\trte_pktmbuf_init, NULL,\n+\t\t\t\trte_socket_id(), 0);\n+\t\tif (mbuf_pool == NULL)\n+\t\t\trte_exit(EXIT_FAILURE, \"Cannot create mbuf pool\\n\");\n+\n+\t\tfor (queue_id = 0; queue_id < MAX_QUEUES + 1; queue_id++)\n+\t\t\tvpool_array[queue_id].pool = mbuf_pool;\n+\n+\t\tif (vm2vm_mode == VM2VM_HARDWARE) {\n+\t\t\t/* Enable VT loop back to let L2 switch to do it. */\n+\t\t\tvmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"Enable loop back for L2 switch in vmdq.\\n\");\n+\t\t}\n+\t} else {\n+\t\tuint32_t nb_mbuf;\n+\t\tchar pool_name[RTE_MEMPOOL_NAMESIZE];\n+\t\tchar ring_name[RTE_MEMPOOL_NAMESIZE];\n+\n+\t\trx_conf_default.start_rx_per_q = (uint8_t)zero_copy;\n+\t\trx_conf_default.rx_drop_en = 0;\n+\t\ttx_conf_default.start_tx_per_q = (uint8_t)zero_copy;\n+\t\tnb_mbuf = num_rx_descriptor\n+\t\t\t+ num_switching_cores * MBUF_CACHE_SIZE_ZCP\n+\t\t\t+ num_switching_cores * MAX_PKT_BURST;\n+\n+\t\tfor (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) {\n+\t\t\tsnprintf(pool_name, sizeof(pool_name),\n+\t\t\t\t\"rxmbuf_pool_%u\", queue_id);\n+\t\t\tsnprintf(ring_name, sizeof(ring_name),\n+\t\t\t\t\"rxmbuf_ring_%u\", queue_id);\n+\t\t\tsetup_mempool_tbl(rte_socket_id(), queue_id,\n+\t\t\t\tpool_name, ring_name, nb_mbuf);\n+\t\t}\n+\n+\t\tnb_mbuf = num_tx_descriptor\n+\t\t\t\t+ num_switching_cores * MBUF_CACHE_SIZE_ZCP\n+\t\t\t\t+ num_switching_cores * MAX_PKT_BURST;\n+\n+\t\tfor (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) {\n+\t\t\tsnprintf(pool_name, sizeof(pool_name),\n+\t\t\t\t\"txmbuf_pool_%u\", queue_id);\n+\t\t\tsnprintf(ring_name, sizeof(ring_name),\n+\t\t\t\t\"txmbuf_ring_%u\", queue_id);\n+\t\t\tsetup_mempool_tbl(rte_socket_id(),\n+\t\t\t\t(queue_id + MAX_QUEUES),\n+\t\t\t\tpool_name, ring_name, nb_mbuf);\n+\t\t}\n+\n+\t\tif (vm2vm_mode == VM2VM_HARDWARE) {\n+\t\t\t/* Enable VT loop back to let L2 switch to do it. */\n+\t\t\tvmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"Enable loop back for L2 switch in vmdq.\\n\");\n+\t\t}\n+\t}\n+\t/* Set log level. */\n+\trte_set_log_level(LOG_LEVEL);\n+\n+\t/* initialize all ports */\n+\tfor (portid = 0; portid < nb_ports; portid++) {\n+\t\t/* skip ports that are not enabled */\n+\t\tif ((enabled_port_mask & (1 << portid)) == 0) {\n+\t\t\tRTE_LOG(INFO, VHOST_PORT,\n+\t\t\t\t\"Skipping disabled port %d\\n\", portid);\n+\t\t\tcontinue;\n+\t\t}\n+\t\tif (port_init(portid) != 0)\n+\t\t\trte_exit(EXIT_FAILURE,\n+\t\t\t\t\"Cannot initialize network ports\\n\");\n+\t}\n+\n+\t/* Initialise all linked lists. */\n+\tif (init_data_ll() == -1)\n+\t\trte_exit(EXIT_FAILURE, \"Failed to initialize linked list\\n\");\n+\n+\t/* Initialize device stats */\n+\tmemset(&dev_statistics, 0, sizeof(dev_statistics));\n+\n+\t/* Enable stats if the user option is set. */\n+\tif (enable_stats)\n+\t\tpthread_create(&tid, NULL, (void*)print_stats, NULL );\n+\n+\t/* Launch all data cores. */\n+\tif (zero_copy == 0) {\n+\t\tRTE_LCORE_FOREACH_SLAVE(lcore_id) {\n+\t\t\trte_eal_remote_launch(switch_worker,\n+\t\t\t\tmbuf_pool, lcore_id);\n+\t\t}\n+\t} else {\n+\t\tuint32_t count_in_mempool, index, i;\n+\t\tfor (index = 0; index < 2*MAX_QUEUES; index++) {\n+\t\t\t/* For all RX and TX queues. */\n+\t\t\tcount_in_mempool\n+\t\t\t\t= rte_mempool_count(vpool_array[index].pool);\n+\n+\t\t\t/*\n+\t\t\t * Transfer all un-attached mbufs from vpool.pool\n+\t\t\t * to vpoo.ring.\n+\t\t\t */\n+\t\t\tfor (i = 0; i < count_in_mempool; i++) {\n+\t\t\t\tstruct rte_mbuf *mbuf\n+\t\t\t\t\t= __rte_mbuf_raw_alloc(\n+\t\t\t\t\t\tvpool_array[index].pool);\n+\t\t\t\trte_ring_sp_enqueue(vpool_array[index].ring,\n+\t\t\t\t\t\t(void *)mbuf);\n+\t\t\t}\n+\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in MAIN: mbuf count in mempool at initial \"\n+\t\t\t\t\"is: %d\\n\", count_in_mempool);\n+\t\t\tLOG_DEBUG(VHOST_CONFIG,\n+\t\t\t\t\"in MAIN: mbuf count in  ring at initial  is :\"\n+\t\t\t\t\" %d\\n\",\n+\t\t\t\trte_ring_count(vpool_array[index].ring));\n+\t\t}\n+\n+\t\tRTE_LCORE_FOREACH_SLAVE(lcore_id)\n+\t\t\trte_eal_remote_launch(switch_worker_zcp, NULL,\n+\t\t\t\tlcore_id);\n+\t}\n+\n+\t/* Register CUSE device to handle IOCTLs. */\n+\tret = register_cuse_device((char*)&dev_basename, dev_index, get_virtio_net_callbacks());\n+\tif (ret != 0)\n+\t\trte_exit(EXIT_FAILURE,\"CUSE device setup failure.\\n\");\n+\n+\tinit_virtio_net(&virtio_net_device_ops);\n+\n+\t/* Start CUSE session. */\n+\tstart_cuse_session_loop();\n+\treturn 0;\n+\n+}\n+\ndiff --git a/examples/vhost/main.h b/examples/vhost/main.h\nnew file mode 100644\nindex 0000000..c15d938\n--- /dev/null\n+++ b/examples/vhost/main.h\n@@ -0,0 +1,86 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#ifndef _MAIN_H_\n+#define _MAIN_H_\n+\n+#ifdef RTE_EXEC_ENV_BAREMETAL\n+#define MAIN _main\n+#else\n+#define MAIN main\n+#endif\n+\n+//#define DEBUG\n+\n+#ifdef DEBUG\n+#define LOG_LEVEL RTE_LOG_DEBUG\n+#define LOG_DEBUG(log_type, fmt, args...) do {\t\\\n+\tRTE_LOG(DEBUG, log_type, fmt, ##args);\t\t\\\n+} while (0)\n+#else\n+#define LOG_LEVEL RTE_LOG_INFO\n+#define LOG_DEBUG(log_type, fmt, args...) do{} while(0)\n+#endif\n+\n+/* Macros for printing using RTE_LOG */\n+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1\n+#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER2\n+#define RTE_LOGTYPE_VHOST_PORT   RTE_LOGTYPE_USER3\n+\n+/*\n+ * Device linked list structure for data path.\n+ */\n+struct virtio_net_data_ll\n+{\n+\tstruct virtio_net\t\t\t*dev;\t/* Pointer to device created by configuration core. */\n+\tstruct virtio_net_data_ll\t*next;  /* Pointer to next device in linked list. */\n+};\n+\n+/*\n+ * Structure containing data core specific information.\n+ */\n+struct lcore_ll_info\n+{\n+\tstruct virtio_net_data_ll\t*ll_root_free; \t\t/* Pointer to head in free linked list. */\n+\tstruct virtio_net_data_ll\t*ll_root_used;\t\t/* Pointer to head of used linked list. */\n+\tuint32_t \t\t\t\t\tdevice_num;\t\t\t/* Number of devices on lcore. */\n+\tvolatile uint8_t\t\t\tdev_removal_flag;\t/* Flag to synchronize device removal. */\n+};\n+\n+struct lcore_info\n+{\n+\tstruct lcore_ll_info\t*lcore_ll;\t/* Pointer to data core specific lcore_ll_info struct */\n+};\n+\n+int MAIN(int argc, char **argv);\n+#endif /* _MAIN_H_ */\n",
    "prefixes": [
        "dpdk-dev",
        "1/2"
    ]
}