diff mbox

[dpdk-dev,RFC] lib/librte_vhost: vhost-user

Message ID 1416014087-22499-1-git-send-email-huawei.xie@intel.com (mailing list archive)
State RFC, archived
Headers show

Commit Message

Huawei Xie Nov. 15, 2014, 1:14 a.m. UTC
implement socket server
fd event dispatch mechanism
vhost sock  message handling
memory map for each region
VHOST_USER_SET_VRING_KICK_FD as the indicator that vring is available
VHOST_USER_GET_VRING_BASE as the message that vring should be released
  
The message flow between vhost-user and vhost-cuse is kindof different,
which makes virtio-net common message handler layer difficult and complicated to handle
both cases in new_device/destroy_device/memory map/resource cleanup.

Will only leave the most common messag handling in virtio-net, and move the
control logic to cuse/fuse layer.  


Signed-off-by: Huawei Xie <huawei.xie@intel.com>
---
 lib/librte_vhost/Makefile                     |  14 +-
 lib/librte_vhost/eventfd_link/eventfd_link.c  |  27 +-
 lib/librte_vhost/eventfd_link/eventfd_link.h  |  48 +-
 lib/librte_vhost/libvirt/qemu-wrap.py         | 367 ---------------
 lib/librte_vhost/rte_virtio_net.h             | 106 ++---
 lib/librte_vhost/vhost-cuse/vhost-net-cdev.c  | 436 ++++++++++++++++++
 lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 314 +++++++++++++
 lib/librte_vhost/vhost-cuse/virtio-net-cdev.h |  43 ++
 lib/librte_vhost/vhost-net-cdev.c             | 389 ----------------
 lib/librte_vhost/vhost-net-cdev.h             | 113 -----
 lib/librte_vhost/vhost-user/fd_man.c          | 158 +++++++
 lib/librte_vhost/vhost-user/fd_man.h          |  31 ++
 lib/librte_vhost/vhost-user/vhost-net-user.c  | 417 +++++++++++++++++
 lib/librte_vhost/vhost-user/vhost-net-user.h  |  74 +++
 lib/librte_vhost/vhost-user/virtio-net-user.c | 208 +++++++++
 lib/librte_vhost/vhost-user/virtio-net-user.h |  11 +
 lib/librte_vhost/vhost_rxtx.c                 | 625 ++++----------------------
 lib/librte_vhost/virtio-net.c                 | 450 ++++---------------
 18 files changed, 1939 insertions(+), 1892 deletions(-)
 delete mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
 create mode 100644 lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
 delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost-user/fd_man.c
 create mode 100644 lib/librte_vhost/vhost-user/fd_man.h
 create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.c
 create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.h
 create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.c
 create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.h

Comments

Tetsuya Mukawa Nov. 17, 2014, 6:04 a.m. UTC | #1
Hi Xie,


(2014/11/15 10:14), Huawei Xie wrote:
> implement socket server
> fd event dispatch mechanism
> vhost sock  message handling
> memory map for each region
> VHOST_USER_SET_VRING_KICK_FD as the indicator that vring is available
> VHOST_USER_GET_VRING_BASE as the message that vring should be released
>   
> The message flow between vhost-user and vhost-cuse is kindof different,
> which makes virtio-net common message handler layer difficult and complicated to handle
> both cases in new_device/destroy_device/memory map/resource cleanup.
>
> Will only leave the most common messag handling in virtio-net, and move the
> control logic to cuse/fuse layer.  
>
>
> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Great patch!
I guess we can start from this patch to implement vhost-user and
abstraction layer.

I've checked patch.

1. White space, tab and indent patch.
I will send patch that clears white space, tab and indent. Could you
please check it?
It might be difficult to see the difference, if your editor doesn't show
a space or tab.

2. Some files are based on old codes.
At least, following patch is not included.
- vhost: fix build without unused result
Also vhost_rxtx.c isn't probably based on latest code.

3. Device abstraction layer code
I will send the device abstraction layer code after this email.
Anyway, I guess we need to decide whether, or not we still keep
vhost-cuse code

4. Multiple devices operation.
For example, when thread1 opens vhost-user device1 and thread2 opens
vhost-user device2,
each thread may want to register own callbacks.
Current implementation may not allow this.
I guess we need to eliminate global variables in librte_vhost as much as
possible.

Thanks,
Tetsuya

> ---
>  lib/librte_vhost/Makefile                     |  14 +-
>  lib/librte_vhost/eventfd_link/eventfd_link.c  |  27 +-
>  lib/librte_vhost/eventfd_link/eventfd_link.h  |  48 +-
>  lib/librte_vhost/libvirt/qemu-wrap.py         | 367 ---------------
>  lib/librte_vhost/rte_virtio_net.h             | 106 ++---
>  lib/librte_vhost/vhost-cuse/vhost-net-cdev.c  | 436 ++++++++++++++++++
>  lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 314 +++++++++++++
>  lib/librte_vhost/vhost-cuse/virtio-net-cdev.h |  43 ++
>  lib/librte_vhost/vhost-net-cdev.c             | 389 ----------------
>  lib/librte_vhost/vhost-net-cdev.h             | 113 -----
>  lib/librte_vhost/vhost-user/fd_man.c          | 158 +++++++
>  lib/librte_vhost/vhost-user/fd_man.h          |  31 ++
>  lib/librte_vhost/vhost-user/vhost-net-user.c  | 417 +++++++++++++++++
>  lib/librte_vhost/vhost-user/vhost-net-user.h  |  74 +++
>  lib/librte_vhost/vhost-user/virtio-net-user.c | 208 +++++++++
>  lib/librte_vhost/vhost-user/virtio-net-user.h |  11 +
>  lib/librte_vhost/vhost_rxtx.c                 | 625 ++++----------------------
>  lib/librte_vhost/virtio-net.c                 | 450 ++++---------------
>  18 files changed, 1939 insertions(+), 1892 deletions(-)
>  delete mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
>  create mode 100644 lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>  create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>  create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>  delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
>  delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
>  create mode 100644 lib/librte_vhost/vhost-user/fd_man.c
>  create mode 100644 lib/librte_vhost/vhost-user/fd_man.h
>  create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.c
>  create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.h
>  create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.c
>  create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.h
>
> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index c008d64..cb4e172 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -34,17 +34,19 @@ include $(RTE_SDK)/mk/rte.vars.mk
>  # library name
>  LIB = librte_vhost.a
>  
> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I. -I vhost-user -I vhost-cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>  LDFLAGS += -lfuse
>  # all source are stored in SRCS-y
> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c vhost-cuse/virtio-net-cdev.c
> +
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-user/fd_man.c vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c
> +
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c
>  
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
>  
> -# dependencies
> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal
> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether
> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf
> +# this lib needs eal
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
>  
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c
> index 7755dd6..4c9b628 100644
> --- a/lib/librte_vhost/eventfd_link/eventfd_link.c
> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.c
> @@ -13,8 +13,7 @@
>   *   General Public License for more details.
>   *
>   *   You should have received a copy of the GNU General Public License
> - *   along with this program; if not, write to the Free Software
> - *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + *   along with this program; If not, see <http://www.gnu.org/licenses/>.
>   *   The full GNU General Public License is included in this distribution
>   *   in the file called LICENSE.GPL.
>   *
> @@ -78,8 +77,7 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>  
>  	switch (ioctl) {
>  	case EVENTFD_COPY:
> -		if (copy_from_user(&eventfd_copy, argp,
> -			sizeof(struct eventfd_copy)))
> +		if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
>  			return -EFAULT;
>  
>  		/*
> @@ -88,28 +86,28 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>  		task_target =
>  			pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
>  		if (task_target == NULL) {
> -			pr_debug("Failed to get mem ctx for target pid\n");
> +			printk(KERN_DEBUG "Failed to get mem ctx for target pid\n");
>  			return -EFAULT;
>  		}
>  
>  		files = get_files_struct(current);
>  		if (files == NULL) {
> -			pr_debug("Failed to get files struct\n");
> +			printk(KERN_DEBUG "Failed to get files struct\n");
>  			return -EFAULT;
>  		}
>  
>  		rcu_read_lock();
>  		file = fcheck_files(files, eventfd_copy.source_fd);
>  		if (file) {
> -			if (file->f_mode & FMODE_PATH ||
> -				!atomic_long_inc_not_zero(&file->f_count))
> +			if (file->f_mode & FMODE_PATH
> +				|| !atomic_long_inc_not_zero(&file->f_count))
>  				file = NULL;
>  		}
>  		rcu_read_unlock();
>  		put_files_struct(files);
>  
>  		if (file == NULL) {
> -			pr_debug("Failed to get file from source pid\n");
> +			printk(KERN_DEBUG "Failed to get file from source pid\n");
>  			return 0;
>  		}
>  
> @@ -128,25 +126,26 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>  
>  		files = get_files_struct(task_target);
>  		if (files == NULL) {
> -			pr_debug("Failed to get files struct\n");
> +			printk(KERN_DEBUG "Failed to get files struct\n");
>  			return -EFAULT;
>  		}
>  
>  		rcu_read_lock();
>  		file = fcheck_files(files, eventfd_copy.target_fd);
>  		if (file) {
> -			if (file->f_mode & FMODE_PATH ||
> -				!atomic_long_inc_not_zero(&file->f_count))
> -					file = NULL;
> +			if (file->f_mode & FMODE_PATH
> +				|| !atomic_long_inc_not_zero(&file->f_count))
> +				file = NULL;
>  		}
>  		rcu_read_unlock();
>  		put_files_struct(files);
>  
>  		if (file == NULL) {
> -			pr_debug("Failed to get file from target pid\n");
> +			printk(KERN_DEBUG "Failed to get file from target pid\n");
>  			return 0;
>  		}
>  
> +
>  		/*
>  		 * Install the file struct from the target process into the
>  		 * file desciptor of the source process,
> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h
> index ea619ec..38052e2 100644
> --- a/lib/librte_vhost/eventfd_link/eventfd_link.h
> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.h
> @@ -1,7 +1,4 @@
>  /*-
> - *  This file is provided under a dual BSD/GPLv2 license.  When using or
> - *  redistributing this file, you may do so under either license.
> - *
>   * GPL LICENSE SUMMARY
>   *
>   *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> @@ -16,61 +13,28 @@
>   *   General Public License for more details.
>   *
>   *   You should have received a copy of the GNU General Public License
> - *   along with this program; if not, write to the Free Software
> - *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + *   along with this program; If not, see <http://www.gnu.org/licenses/>.
>   *   The full GNU General Public License is included in this distribution
>   *   in the file called LICENSE.GPL.
>   *
>   *   Contact Information:
>   *   Intel Corporation
> - *
> - * BSD LICENSE
> - *
> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> - *   All rights reserved.
> - *
> - *   Redistribution and use in source and binary forms, with or without
> - *   modification, are permitted provided that the following conditions
> - *   are met:
> - *
> - *   Redistributions of source code must retain the above copyright
> - *   notice, this list of conditions and the following disclaimer.
> - *   Redistributions in binary form must reproduce the above copyright
> - *   notice, this list of conditions and the following disclaimer in
> - *   the documentation and/or other materials provided with the
> - *   distribution.
> - *   Neither the name of Intel Corporation nor the names of its
> - *   contributors may be used to endorse or promote products derived
> - *   from this software without specific prior written permission.
> - *
> - *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> - *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> - *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> - *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> - *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> - *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> - *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> - *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> - *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> - *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> - *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> - *
>   */
>  
>  #ifndef _EVENTFD_LINK_H_
>  #define _EVENTFD_LINK_H_
>  
>  /*
> - * ioctl to copy an fd entry in calling process to an fd in a target process
> + *	ioctl to copy an fd entry in calling process to an fd in a target process
>   */
>  #define EVENTFD_COPY 1
>  
>  /*
> - * arguements for the EVENTFD_COPY ioctl
> + *	arguements for the EVENTFD_COPY ioctl
>   */
>  struct eventfd_copy {
> -	unsigned target_fd; /* fd in the target pid */
> -	unsigned source_fd; /* fd in the calling pid */
> -	pid_t target_pid; /* pid of the target pid */
> +	unsigned target_fd; /**< fd in the target pid */
> +	unsigned source_fd; /**< fd in the calling pid */
> +	pid_t target_pid;   /**< pid of the target pid */
>  };
>  #endif /* _EVENTFD_LINK_H_ */
> diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py
> deleted file mode 100755
> index e2d68a0..0000000
> --- a/lib/librte_vhost/libvirt/qemu-wrap.py
> +++ /dev/null
> @@ -1,367 +0,0 @@
> -#!/usr/bin/python
> -#/*
> -# *   BSD LICENSE
> -# *
> -# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> -# *   All rights reserved.
> -# *
> -# *   Redistribution and use in source and binary forms, with or without
> -# *   modification, are permitted provided that the following conditions
> -# *   are met:
> -# *
> -# *     * Redistributions of source code must retain the above copyright
> -# *       notice, this list of conditions and the following disclaimer.
> -# *     * Redistributions in binary form must reproduce the above copyright
> -# *       notice, this list of conditions and the following disclaimer in
> -# *       the documentation and/or other materials provided with the
> -# *       distribution.
> -# *     * Neither the name of Intel Corporation nor the names of its
> -# *       contributors may be used to endorse or promote products derived
> -# *       from this software without specific prior written permission.
> -# *
> -# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> -# *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> -# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> -# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> -# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> -# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> -# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> -# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> -# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> -# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> -# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> -# */
> -
> -#####################################################################
> -# This script is designed to modify the call to the QEMU emulator
> -# to support userspace vhost when starting a guest machine through
> -# libvirt with vhost enabled. The steps to enable this are as follows
> -# and should be run as root:
> -#
> -# 1. Place this script in a libvirtd's binary search PATH ($PATH)
> -#    A good location would be in the same directory that the QEMU
> -#    binary is located
> -#
> -# 2. Ensure that the script has the same owner/group and file
> -#    permissions as the QEMU binary
> -#
> -# 3. Update the VM xml file using "virsh edit VM.xml"
> -#
> -#    3.a) Set the VM to use the launch script
> -#
> -#    	Set the emulator path contained in the
> -#		<emulator><emulator/> tags
> -#
> -#    	e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
> -#        with    <emulator>/usr/bin/qemu-wrap.py<emulator/>
> -#
> -#	 3.b) Set the VM's device's to use vhost-net offload
> -#
> -#		<interface type="network">
> -#       	<model type="virtio"/>
> -#       	<driver name="vhost"/>
> -#		<interface/>
> -#
> -# 4. Enable libvirt to access our userpace device file by adding it to
> -#    controllers cgroup for libvirtd using the following steps
> -#
> -#   4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
> -#         1) cgroup_controllers = [ ... "devices", ... ]
> -#		  2) clear_emulator_capabilities = 0
> -#         3) user = "root"
> -#         4) group = "root"
> -#         5) cgroup_device_acl = [
> -#                "/dev/null", "/dev/full", "/dev/zero",
> -#                "/dev/random", "/dev/urandom",
> -#                "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
> -#                "/dev/rtc", "/dev/hpet", "/dev/net/tun",
> -#                "/dev/<devbase-name>-<index>",
> -#            ]
> -#
> -#   4.b) Disable SELinux or set to permissive mode
> -#
> -#   4.c) Mount cgroup device controller
> -#        "mkdir /dev/cgroup"
> -#        "mount -t cgroup none /dev/cgroup -o devices"
> -#
> -#   4.d) Set hugetlbfs_mount variable - ( Optional )
> -#        VMs using userspace vhost must use hugepage backed
> -#        memory. This can be enabled in the libvirt XML
> -#        config by adding a memory backing section to the
> -#        XML config e.g.
> -#             <memoryBacking>
> -#             <hugepages/>
> -#             </memoryBacking>
> -#        This memory backing section should be added after the
> -#        <memory> and <currentMemory> sections. This will add
> -#        flags "-mem-prealloc -mem-path <path>" to the QEMU
> -#        command line. The hugetlbfs_mount variable can be used
> -#        to override the default <path> passed through by libvirt.
> -#
> -#        if "-mem-prealloc" or "-mem-path <path>" are not passed
> -#        through and a vhost device is detected then these options will
> -#        be automatically added by this script. This script will detect
> -#        the system hugetlbfs mount point to be used for <path>. The
> -#        default <path> for this script can be overidden by the
> -#        hugetlbfs_dir variable in the configuration section of this script.
> -#
> -#
> -#   4.e) Restart the libvirtd system process
> -#        e.g. on Fedora "systemctl restart libvirtd.service"
> -#
> -#
> -#   4.f) Edit the Configuration Parameters section of this script
> -#        to point to the correct emulator location and set any
> -#        addition options
> -#
> -# The script modifies the libvirtd Qemu call by modifying/adding
> -# options based on the configuration parameters below.
> -# NOTE:
> -#     emul_path and us_vhost_path must be set
> -#     All other parameters are optional
> -#####################################################################
> -
> -
> -#############################################
> -# Configuration Parameters
> -#############################################
> -#Path to QEMU binary
> -emul_path = "/usr/local/bin/qemu-system-x86_64"
> -
> -#Path to userspace vhost device file
> -# This filename should match the --dev-basename --dev-index parameters of
> -# the command used to launch the userspace vhost sample application e.g.
> -# if the sample app lauch command is:
> -#    ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1
> -# then this variable should be set to:
> -#   us_vhost_path = "/dev/usvhost-1"
> -us_vhost_path = "/dev/usvhost-1"
> -
> -#List of additional user defined emulation options. These options will
> -#be added to all Qemu calls
> -emul_opts_user = []
> -
> -#List of additional user defined emulation options for vhost only.
> -#These options will only be added to vhost enabled guests
> -emul_opts_user_vhost = []
> -
> -#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
> -# Set this variable to one to enable this option for all VMs
> -use_huge_all = 0
> -
> -#Instead of autodetecting, override the hugetlbfs directory by setting
> -#this variable
> -hugetlbfs_dir = ""
> -
> -#############################################
> -
> -
> -#############################################
> -# ****** Do Not Modify Below this Line ******
> -#############################################
> -
> -import sys, os, subprocess
> -
> -
> -#List of open userspace vhost file descriptors
> -fd_list = []
> -
> -#additional virtio device flags when using userspace vhost
> -vhost_flags = [ "csum=off",
> -                "gso=off",
> -                "guest_tso4=off",
> -                "guest_tso6=off",
> -                "guest_ecn=off"
> -              ]
> -
> -
> -#############################################
> -# Find the system hugefile mount point.
> -# Note:
> -# if multiple hugetlbfs mount points exist
> -# then the first one found will be used
> -#############################################
> -def find_huge_mount():
> -
> -    if (len(hugetlbfs_dir)):
> -        return hugetlbfs_dir
> -
> -    huge_mount = ""
> -
> -    if (os.access("/proc/mounts", os.F_OK)):
> -        f = open("/proc/mounts", "r")
> -        line = f.readline()
> -        while line:
> -            line_split = line.split(" ")
> -            if line_split[2] == 'hugetlbfs':
> -                huge_mount = line_split[1]
> -                break
> -            line = f.readline()
> -    else:
> -        print "/proc/mounts not found"
> -        exit (1)
> -
> -    f.close
> -    if len(huge_mount) == 0:
> -        print "Failed to find hugetlbfs mount point"
> -        exit (1)
> -
> -    return huge_mount
> -
> -
> -#############################################
> -# Get a userspace Vhost file descriptor
> -#############################################
> -def get_vhost_fd():
> -
> -    if (os.access(us_vhost_path, os.F_OK)):
> -        fd = os.open( us_vhost_path, os.O_RDWR)
> -    else:
> -        print ("US-Vhost file %s not found" %us_vhost_path)
> -        exit (1)
> -
> -    return fd
> -
> -
> -#############################################
> -# Check for vhostfd. if found then replace
> -# with our own vhost fd and append any vhost
> -# flags onto the end
> -#############################################
> -def modify_netdev_arg(arg):
> -	
> -    global fd_list
> -    vhost_in_use = 0
> -    s = ''
> -    new_opts = []
> -    netdev_opts = arg.split(",")
> -
> -    for opt in netdev_opts:
> -        #check if vhost is used
> -        if "vhost" == opt[:5]:
> -            vhost_in_use = 1
> -        else:
> -            new_opts.append(opt)
> -
> -    #if using vhost append vhost options
> -    if vhost_in_use == 1:
> -        #append vhost on option
> -        new_opts.append('vhost=on')
> -        #append vhostfd ption
> -        new_fd = get_vhost_fd()
> -        new_opts.append('vhostfd=' + str(new_fd))
> -        fd_list.append(new_fd)
> -
> -    #concatenate all options
> -    for opt in new_opts:
> -        if len(s) > 0:
> -			s+=','
> -
> -        s+=opt
> -
> -    return s	
> -
> -
> -#############################################
> -# Main
> -#############################################
> -def main():
> -
> -    global fd_list
> -    global vhost_in_use
> -    new_args = []
> -    num_cmd_args = len(sys.argv)
> -    emul_call = ''
> -    mem_prealloc_set = 0
> -    mem_path_set = 0
> -    num = 0;
> -
> -    #parse the parameters
> -    while (num < num_cmd_args):
> -        arg = sys.argv[num]
> -
> -		#Check netdev +1 parameter for vhostfd
> -        if arg == '-netdev':
> -            num_vhost_devs = len(fd_list)
> -            new_args.append(arg)
> -
> -            num+=1
> -            arg = sys.argv[num]
> -            mod_arg = modify_netdev_arg(arg)
> -            new_args.append(mod_arg)
> -
> -            #append vhost flags if this is a vhost device
> -            # and -device is the next arg
> -            # i.e -device -opt1,-opt2,...,-opt3,%vhost
> -            if (num_vhost_devs < len(fd_list)):
> -                num+=1
> -                arg = sys.argv[num]
> -                if arg == '-device':
> -                    new_args.append(arg)
> -                    num+=1
> -                    new_arg = sys.argv[num]
> -                    for flag in vhost_flags:
> -                        new_arg = ''.join([new_arg,',',flag])
> -                    new_args.append(new_arg)
> -                else:
> -                    new_args.append(arg)
> -        elif arg == '-mem-prealloc':
> -            mem_prealloc_set = 1
> -            new_args.append(arg)
> -        elif arg == '-mem-path':
> -            mem_path_set = 1
> -            new_args.append(arg)
> -
> -        else:
> -            new_args.append(arg)
> -
> -        num+=1
> -
> -    #Set Qemu binary location
> -    emul_call+=emul_path
> -    emul_call+=" "
> -
> -    #Add prealloc mem options if using vhost and not already added
> -    if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
> -        emul_call += "-mem-prealloc "
> -
> -    #Add mempath mem options if using vhost and not already added
> -    if ((len(fd_list) > 0) and (mem_path_set == 0)):
> -        #Detect and add hugetlbfs mount point
> -        mp = find_huge_mount()
> -        mp = "".join(["-mem-path ", mp])
> -        emul_call += mp
> -        emul_call += " "
> -
> -
> -    #add user options
> -    for opt in emul_opts_user:
> -        emul_call += opt
> -        emul_call += " "
> -
> -    #Add add user vhost only options
> -    if len(fd_list) > 0:
> -        for opt in emul_opts_user_vhost:
> -            emul_call += opt
> -            emul_call += " "
> -
> -    #Add updated libvirt options
> -    iter_args = iter(new_args)
> -    #skip 1st arg i.e. call to this script
> -    next(iter_args)
> -    for arg in iter_args:
> -        emul_call+=str(arg)
> -        emul_call+= " "
> -
> -    #Call QEMU
> -    subprocess.call(emul_call, shell=True)
> -
> -
> -    #Close usvhost files
> -    for fd in fd_list:
> -        os.close(fd)
> -
> -
> -if __name__ == "__main__":
> -    main()
> -
> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> index 00b1328..7a05dab 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -34,11 +34,6 @@
>  #ifndef _VIRTIO_NET_H_
>  #define _VIRTIO_NET_H_
>  
> -/**
> - * @file
> - * Interface to vhost net
> - */
> -
>  #include <stdint.h>
>  #include <linux/virtio_ring.h>
>  #include <linux/virtio_net.h>
> @@ -48,66 +43,38 @@
>  #include <rte_mempool.h>
>  #include <rte_mbuf.h>
>  
> -/* Used to indicate that the device is running on a data core */
> -#define VIRTIO_DEV_RUNNING 1
> -
> -/* Backend value set by guest. */
> -#define VIRTIO_DEV_STOPPED -1
> -
> +#define VIRTIO_DEV_RUNNING 1  /**< Used to indicate that the device is running on a data core. */
> +#define VIRTIO_DEV_STOPPED -1 /**< Backend value set by guest. */
>  
>  /* Enum for virtqueue management. */
>  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>  
> -#define BUF_VECTOR_MAX 256
> -
> -/**
> - * Structure contains buffer address, length and descriptor index
> - * from vring to do scatter RX.
> - */
> -struct buf_vector {
> -	uint64_t buf_addr;
> -	uint32_t buf_len;
> -	uint32_t desc_idx;
> -};
> -
>  /**
>   * Structure contains variables relevant to RX/TX virtqueues.
>   */
>  struct vhost_virtqueue {
> -	struct vring_desc	*desc;			/**< Virtqueue descriptor ring. */
> -	struct vring_avail	*avail;			/**< Virtqueue available ring. */
> -	struct vring_used	*used;			/**< Virtqueue used ring. */
> -	uint32_t		size;			/**< Size of descriptor ring. */
> -	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
> -	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
> -	volatile uint16_t	last_used_idx;		/**< Last index used on the available ring */
> -	volatile uint16_t	last_used_idx_res;	/**< Used for multiple devices reserving buffers. */
> -	eventfd_t		callfd;			/**< Currently unused as polling mode is enabled. */
> -	eventfd_t		kickfd;			/**< Used to notify the guest (trigger interrupt). */
> -	struct buf_vector	buf_vec[BUF_VECTOR_MAX];	/**< for scatter RX. */
> -} __rte_cache_aligned;
> -
> -/**
> - * Device structure contains all configuration information relating to the device.
> - */
> -struct virtio_net {
> -	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
> -	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
> -	uint64_t		features;	/**< Negotiated feature set. */
> -	uint64_t		device_fh;	/**< device identifier. */
> -	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
> -	void			*priv;		/**< private context */
> +	struct vring_desc    *desc;             /**< descriptor ring. */
> +	struct vring_avail   *avail;            /**< available ring. */
> +	struct vring_used    *used;             /**< used ring. */
> +	uint32_t             size;              /**< Size of descriptor ring. */
> +	uint32_t             backend;           /**< Backend value to determine if device should be started/stopped. */
> +	uint16_t             vhost_hlen;        /**< Vhost header length (varies depending on RX merge buffers. */
> +	volatile uint16_t    last_used_idx;     /**< Last index used on the available ring. */
> +	volatile uint16_t    last_used_idx_res; /**< Used for multiple devices reserving buffers. */
> +	eventfd_t            callfd;            /**< Currently unused as polling mode is enabled. */
> +	eventfd_t            kickfd;            /**< Used to notify the guest (trigger interrupt). */
>  } __rte_cache_aligned;
>  
>  /**
> - * Information relating to memory regions including offsets to addresses in QEMUs memory file.
> + * Information relating to memory regions including offsets to
> + * addresses in QEMUs memory file.
>   */
>  struct virtio_memory_regions {
> -	uint64_t	guest_phys_address;	/**< Base guest physical address of region. */
> -	uint64_t	guest_phys_address_end;	/**< End guest physical address of region. */
> -	uint64_t	memory_size;		/**< Size of region. */
> -	uint64_t	userspace_address;	/**< Base userspace address of region. */
> -	uint64_t	address_offset;		/**< Offset of region for address translation. */
> +	uint64_t    guest_phys_address;     /**< Base guest physical address of region. */
> +	uint64_t    guest_phys_address_end; /**< End guest physical address of region. */
> +	uint64_t    memory_size;            /**< Size of region. */
> +	uint64_t    userspace_address;      /**< Base userspace address of region. */
> +	uint64_t    address_offset;         /**< Offset of region for address translation. */
>  };
>  
>  
> @@ -115,21 +82,34 @@ struct virtio_memory_regions {
>   * Memory structure includes region and mapping information.
>   */
>  struct virtio_memory {
> -	uint64_t	base_address;	/**< Base QEMU userspace address of the memory file. */
> -	uint64_t	mapped_address;	/**< Mapped address of memory file base in our applications memory space. */
> -	uint64_t	mapped_size;	/**< Total size of memory file. */
> -	uint32_t	nregions;	/**< Number of memory regions. */
> +	uint64_t    base_address;    /**< Base QEMU userspace address of the memory file. */
> +	uint64_t    mapped_address;  /**< Mapped address of memory file base in our applications memory space. */
> +	uint64_t    mapped_size;     /**< Total size of memory file. */
> +	uint32_t    nregions;        /**< Number of memory regions. */
>  	struct virtio_memory_regions      regions[0]; /**< Memory region information. */
>  };
>  
>  /**
> + * Device structure contains all configuration information relating to the device.
> + */
> +struct virtio_net {
> +	struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */
> +	struct virtio_memory    *mem;                    /**< QEMU memory and memory region information. */
> +	uint64_t features;    /**< Negotiated feature set. */
> +	uint64_t device_fh;   /**< Device identifier. */
> +	uint32_t flags;       /**< Device flags. Only used to check if device is running on data core. */
> +	void     *priv;
> +} __rte_cache_aligned;
> +
> +/**
>   * Device operations to add/remove device.
>   */
>  struct virtio_net_device_ops {
> -	int (*new_device)(struct virtio_net *);	/**< Add device. */
> -	void (*destroy_device)(volatile struct virtio_net *);	/**< Remove device. */
> +	int (*new_device)(struct virtio_net *); /**< Add device. */
> +	void (*destroy_device)(struct virtio_net *); /**< Remove device. */
>  };
>  
> +
>  static inline uint16_t __attribute__((always_inline))
>  rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
>  {
> @@ -179,7 +159,7 @@ int rte_vhost_driver_register(const char *dev_name);
>  
>  /* Register callbacks. */
>  int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
> -/* Start vhost driver session blocking loop. */
> +
>  int rte_vhost_driver_session_start(void);
>  
>  /**
> @@ -192,8 +172,8 @@ int rte_vhost_driver_session_start(void);
>   * @return
>   *  num of packets enqueued
>   */
> -uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
> -	struct rte_mbuf **pkts, uint16_t count);
> +uint32_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
> +	struct rte_mbuf **pkts, uint32_t count);
>  
>  /**
>   * This function gets guest buffers from the virtio device TX virtqueue,
> @@ -206,7 +186,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>   * @return
>   *  num of packets dequeued
>   */
> -uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> -	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
> +uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> +	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count);
>  
>  #endif /* _VIRTIO_NET_H_ */
> diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
> new file mode 100644
> index 0000000..4671643
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
> @@ -0,0 +1,436 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <fuse/cuse_lowlevel.h>
> +#include <linux/limits.h>
> +#include <linux/vhost.h>
> +#include <linux/virtio_net.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_ethdev.h>
> +#include <rte_log.h>
> +#include <rte_string_fns.h>
> +#include <rte_virtio_net.h>
> +
> +#include "virtio-net-cdev.h"
> +#include "vhost-net.h"
> +#include "eventfd_link/eventfd_link.h"
> +
> +#define FUSE_OPT_DUMMY "\0\0"
> +#define FUSE_OPT_FORE  "-f\0\0"
> +#define FUSE_OPT_NOMULTI "-s\0\0"
> +
> +static const uint32_t default_major = 231;
> +static const uint32_t default_minor = 1;
> +static const char cuse_device_name[] = "/dev/cuse";
> +static const char default_cdev[] = "vhost-net";
> +static const char eventfd_cdev[] = "/dev/eventfd-link";
> +
> +static struct fuse_session *session;
> +const struct vhost_net_device_ops const *ops;
> +
> +/*
> + * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
> + * when the device is added to the device linked list.
> + */
> +static struct vhost_device_ctx
> +fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
> +{
> +	struct vhost_device_ctx ctx;
> +	struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
> +
> +	ctx.pid = req_ctx->pid;
> +	ctx.fh = fi->fh;
> +
> +	return ctx;
> +}
> +
> +/*
> + * When the device is created in QEMU it gets initialised here and
> + * added to the device linked list.
> + */
> +static void
> +vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
> +{
> +	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> +	int err = 0;
> +
> +	err = ops->new_device(ctx);
> +	if (err == -1) {
> +		fuse_reply_err(req, EPERM);
> +		return;
> +	}
> +
> +	fi->fh = err;
> +
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"(%"PRIu64") Device configuration started\n", fi->fh);
> +	fuse_reply_open(req, fi);
> +}
> +
> +/*
> + * When QEMU is shutdown or killed the device gets released.
> + */
> +static void
> +vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
> +{
> +	int err = 0;
> +	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> +
> +	ops->destroy_device(ctx);
> +	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
> +	fuse_reply_err(req, err);
> +}
> +
> +/*
> + * Boilerplate code for CUSE IOCTL
> + * Implicit arguments: ctx, req, result.
> + */
> +#define VHOST_IOCTL(func) do {	\
> +	result = (func)(ctx);	\
> +	fuse_reply_ioctl(req, result, NULL, 0);	\
> +} while (0)
> +
> +/*
> + * Boilerplate IOCTL RETRY
> + * Implicit arguments: req.
> + */
> +#define VHOST_IOCTL_RETRY(size_r, size_w) do {	\
> +	struct iovec iov_r = { arg, (size_r) };	\
> +	struct iovec iov_w = { arg, (size_w) };	\
> +	fuse_reply_ioctl_retry(req, &iov_r,	\
> +		(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
> +} while (0)
> +
> +/*
> + * Boilerplate code for CUSE Read IOCTL
> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> + */
> +#define VHOST_IOCTL_R(type, var, func) do {	\
> +	if (!in_bufsz) {			\
> +		VHOST_IOCTL_RETRY(sizeof(type), 0);\
> +	} else {	\
> +		(var) = *(const type*)in_buf;	\
> +		result = func(ctx, &(var));	\
> +		fuse_reply_ioctl(req, result, NULL, 0);\
> +	}	\
> +} while (0)
> +
> +/*
> + * Boilerplate code for CUSE Write IOCTL
> + * Implicit arguments: ctx, req, result, out_bufsz.
> + */
> +#define VHOST_IOCTL_W(type, var, func) do {	\
> +	if (!out_bufsz) {			\
> +		VHOST_IOCTL_RETRY(0, sizeof(type));\
> +	} else {	\
> +		result = (func)(ctx, &(var));\
> +		fuse_reply_ioctl(req, result, &(var), sizeof(type));\
> +	} \
> +} while (0)
> +
> +/*
> + * Boilerplate code for CUSE Read/Write IOCTL
> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> + */
> +#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {	\
> +	if (!in_bufsz) {	\
> +		VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
> +	} else {	\
> +		(var1) = *(const type1*) (in_buf);	\
> +		result = (func)(ctx, (var1), &(var2));	\
> +		fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
> +	} \
> +} while (0)
> +
> +/*
> + * This function uses the eventfd_link kernel module to copy an eventfd file
> + * descriptor provided by QEMU in to our process space.
> + */
> +static int
> +eventfd_copy(int target_fd, int target_pid)
> +{
> +	int eventfd_link, ret;
> +	struct eventfd_copy eventfd_copy;
> +	int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +
> +	if (fd == -1)
> +		return -1;
> +
> +	/* Open the character device to the kernel module. */
> +	/* TODO: check this earlier rather than fail until VM boots! */
> +	eventfd_link = open(eventfd_cdev, O_RDWR);
> +	if (eventfd_link < 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"eventfd_link module is not loaded\n");
> +		return -1;
> +	}
> +
> +	eventfd_copy.source_fd = fd;
> +	eventfd_copy.target_fd = target_fd;
> +	eventfd_copy.target_pid = target_pid;
> +	/* Call the IOCTL to copy the eventfd. */
> +	ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
> +	close(eventfd_link);
> +
> +	if (ret < 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"EVENTFD_COPY ioctl failed\n");
> +		return -1;
> +	}
> +
> +	return fd;
> +}
> +
> +/*
> + * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on
> + * the type of IOCTL a buffer is requested to read or to write. This
> + * request is handled by FUSE and the buffer is then given to CUSE.
> + */
> +static void
> +vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
> +		struct fuse_file_info *fi, __rte_unused unsigned flags,
> +		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
> +{
> +	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> +	struct vhost_vring_file file;
> +	struct vhost_vring_state state;
> +	struct vhost_vring_addr addr;
> +	uint64_t features;
> +	uint32_t index;
> +	int result = 0;
> +
> +	switch (cmd) {
> +	case VHOST_NET_SET_BACKEND:
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
> +		VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
> +		break;
> +
> +	case VHOST_GET_FEATURES:
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
> +		VHOST_IOCTL_W(uint64_t, features, ops->get_features);
> +		break;
> +
> +	case VHOST_SET_FEATURES:
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
> +		VHOST_IOCTL_R(uint64_t, features, ops->set_features);
> +		break;
> +
> +	case VHOST_RESET_OWNER:
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
> +		VHOST_IOCTL(ops->reset_owner);
> +		break;
> +
> +	case VHOST_SET_OWNER:
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
> +		VHOST_IOCTL(ops->set_owner);
> +		break;
> +
> +	case VHOST_SET_MEM_TABLE:
> +		/*TODO fix race condition.*/
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
> +		static struct vhost_memory mem_temp;
> +		switch (in_bufsz) {
> +		case 0:
> +			VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
> +			break;
> +
> +		case sizeof(struct vhost_memory):
> +			mem_temp = *(const struct vhost_memory *) in_buf;
> +
> +			if (mem_temp.nregions > 0) {
> +				VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
> +					(sizeof(struct vhost_memory_region) *
> +						mem_temp.nregions), 0);
> +			} else {
> +				result = -1;
> +				fuse_reply_ioctl(req, result, NULL, 0);
> +			}
> +			break;
> +
> +		default:
> +			result = cuse_set_mem_table(ctx, in_buf,
> +				mem_temp.nregions);
> +			if (result)
> +				fuse_reply_err(req, EINVAL);
> +			else
> +				fuse_reply_ioctl(req, result, NULL, 0);
> +		}
> +		break;
> +
> +	case VHOST_SET_VRING_NUM:
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
> +		VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_num);
> +		break;
> +
> +	case VHOST_SET_VRING_BASE:
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
> +		VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_base);
> +		break;
> +
> +	case VHOST_GET_VRING_BASE:
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
> +		VHOST_IOCTL_RW(uint32_t, index,
> +			struct vhost_vring_state, state, ops->get_vring_base);
> +		break;
> +
> +	case VHOST_SET_VRING_ADDR:
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
> +		VHOST_IOCTL_R(struct vhost_vring_addr, addr, ops->set_vring_addr);
> +		break;
> +
> +	case VHOST_SET_VRING_KICK:
> +	case VHOST_SET_VRING_CALL:
> +		if (!in_buf) {
> +                	VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
> +		} else {
> +			int fd;
> +			file = *(const struct vhost_vring_file *)in_buf;
> +			LOG_DEBUG(VHOST_CONFIG, 
> +				"kick/call idx:%d fd:%d\n", file.index, file.fd);
> +			if ((fd = eventfd_copy(file.fd, ctx.pid)) < 0){
> +				fuse_reply_ioctl(req, -1, NULL, 0);
> +			}
> +			file.fd = fd;
> +			if (cmd == VHOST_SET_VRING_KICK) {
> +				VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_call);
> +			}
> +			else { 
> +				VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_kick);
> +			}
> +		}
> +		break;
> +
> +	default:
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
> +		result = -1;
> +		fuse_reply_ioctl(req, result, NULL, 0);
> +	}
> +
> +	if (result < 0)
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
> +	else
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
> +}
> +
> +/*
> + * Structure handling open, release and ioctl function pointers is populated.
> + */
> +static const struct cuse_lowlevel_ops vhost_net_ops = {
> +	.open		= vhost_net_open,
> +	.release	= vhost_net_release,
> +	.ioctl		= vhost_net_ioctl,
> +};
> +
> +/*
> + * cuse_info is populated and used to register the cuse device.
> + * vhost_net_device_ops are also passed when the device is registered in app.
> + */
> +int
> +rte_vhost_driver_register(const char *dev_name)
> +{
> +	struct cuse_info cuse_info;
> +	char device_name[PATH_MAX] = "";
> +	char char_device_name[PATH_MAX] = "";
> +	const char *device_argv[] = { device_name };
> +
> +	char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
> +	char fuse_opt_fore[] = FUSE_OPT_FORE;
> +	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
> +	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
> +
> +	if (access(cuse_device_name, R_OK | W_OK) < 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"char device %s can't be accessed, maybe not exist\n",
> +			cuse_device_name);
> +		return -1;
> +	}
> +
> +	/*
> +	 * The device name is created. This is passed to QEMU so that it can
> +	 * register the device with our application.
> +	 */
> +	snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
> +	snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
> +
> +	/* Check if device already exists. */
> +	if (access(char_device_name, F_OK) != -1) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"char device %s already exists\n", char_device_name);
> +		return -1;
> +	}
> +
> +	memset(&cuse_info, 0, sizeof(cuse_info));
> +	cuse_info.dev_major = default_major;
> +	cuse_info.dev_minor = default_minor;
> +	cuse_info.dev_info_argc = 1;
> +	cuse_info.dev_info_argv = device_argv;
> +	cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
> +
> +	ops = get_virtio_net_callbacks();
> +
> +	session = cuse_lowlevel_setup(3, fuse_argv,
> +			&cuse_info, &vhost_net_ops, 0, NULL);
> +	if (session == NULL)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +/**
> + * The CUSE session is launched allowing the application to receive open,
> + * release and ioctl calls.
> + */
> +int
> +rte_vhost_driver_session_start(void)
> +{
> +	fuse_session_loop(session);
> +
> +	return 0;
> +}
> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
> new file mode 100644
> index 0000000..5c16aa5
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
> @@ -0,0 +1,314 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <dirent.h>
> +#include <linux/vhost.h>
> +#include <linux/virtio_net.h>
> +#include <fuse/cuse_lowlevel.h>
> +#include <stddef.h>
> +#include <string.h>
> +#include <stdlib.h>
> +#include <sys/eventfd.h>
> +#include <sys/mman.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +#include <errno.h>
> +
> +#include <rte_log.h>
> +
> +#include "vhost-net.h"
> +#include "virtio-net-cdev.h"
> +
> +extern struct vhost_net_device_ops const *ops;
> +
> +/* Line size for reading maps file. */
> +static const uint32_t BUFSIZE = PATH_MAX;
> +
> +/* Size of prot char array in procmap. */
> +#define PROT_SZ 5
> +
> +/* Number of elements in procmap struct. */
> +#define PROCMAP_SZ 8
> +
> +/* Structure containing information gathered from maps file. */
> +struct procmap {
> +	uint64_t va_start;	/* Start virtual address in file. */
> +	uint64_t len;		/* Size of file. */
> +	uint64_t pgoff;		/* Not used. */
> +	uint32_t maj;		/* Not used. */
> +	uint32_t min;		/* Not used. */
> +	uint32_t ino;		/* Not used. */
> +	char prot[PROT_SZ];	/* Not used. */
> +	char fname[PATH_MAX];	/* File name. */
> +};
> +
> +/*
> + * Locate the file containing QEMU's memory space and
> + * map it to our address space.
> + */
> +static int
> +host_memory_map(pid_t pid, uint64_t addr,
> +	uint64_t *mapped_address, uint64_t *mapped_size)
> +{
> +	struct dirent *dptr = NULL;
> +	struct procmap procmap;
> +	DIR *dp = NULL;
> +	int fd;
> +	int i;
> +	char memfile[PATH_MAX];
> +	char mapfile[PATH_MAX];
> +	char procdir[PATH_MAX];
> +	char resolved_path[PATH_MAX];
> +	FILE *fmap;
> +	void *map;
> +	uint8_t found = 0;
> +	char line[BUFSIZE];
> +	char dlm[] = "-   :   ";
> +	char *str, *sp, *in[PROCMAP_SZ];
> +	char *end = NULL;
> +
> +	/* Path where mem files are located. */
> +	snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
> +	/* Maps file used to locate mem file. */
> +	snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
> +
> +	fmap = fopen(mapfile, "r");
> +	if (fmap == NULL) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"Failed to open maps file for pid %d\n", pid);
> +		return -1;
> +	}
> +
> +	/* Read through maps file until we find out base_address. */
> +	while (fgets(line, BUFSIZE, fmap) != 0) {
> +		str = line;
> +		errno = 0;
> +		/* Split line in to fields. */
> +		for (i = 0; i < PROCMAP_SZ; i++) {
> +			in[i] = strtok_r(str, &dlm[i], &sp);
> +			if ((in[i] == NULL) || (errno != 0)) {
> +				fclose(fmap);
> +				return -1;
> +			}
> +			str = NULL;
> +		}
> +
> +		/* Convert/Copy each field as needed. */
> +		procmap.va_start = strtoull(in[0], &end, 16);
> +		if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
> +			(errno != 0)) {
> +			fclose(fmap);
> +			return -1;
> +		}
> +
> +		procmap.len = strtoull(in[1], &end, 16);
> +		if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
> +			(errno != 0)) {
> +			fclose(fmap);
> +			return -1;
> +		}
> +
> +		procmap.pgoff = strtoull(in[3], &end, 16);
> +		if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
> +			(errno != 0)) {
> +			fclose(fmap);
> +			return -1;
> +		}
> +
> +		procmap.maj = strtoul(in[4], &end, 16);
> +		if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
> +			(errno != 0)) {
> +			fclose(fmap);
> +			return -1;
> +		}
> +
> +		procmap.min = strtoul(in[5], &end, 16);
> +		if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
> +			(errno != 0)) {
> +			fclose(fmap);
> +			return -1;
> +		}
> +
> +		procmap.ino = strtoul(in[6], &end, 16);
> +		if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
> +			(errno != 0)) {
> +			fclose(fmap);
> +			return -1;
> +		}
> +
> +		memcpy(&procmap.prot, in[2], PROT_SZ);
> +		memcpy(&procmap.fname, in[7], PATH_MAX);
> +
> +		if (procmap.va_start == addr) {
> +			procmap.len = procmap.len - procmap.va_start;
> +			found = 1;
> +			break;
> +		}
> +	}
> +	fclose(fmap);
> +
> +	if (!found) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"Failed to find memory file in pid %d maps file\n", pid);
> +		return -1;
> +	}
> +
> +	/* Find the guest memory file among the process fds. */
> +	dp = opendir(procdir);
> +	if (dp == NULL) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"Cannot open pid %d process directory\n",
> +			pid);
> +		return -1;
> +
> +	}
> +
> +	found = 0;
> +
> +	/* Read the fd directory contents. */
> +	while (NULL != (dptr = readdir(dp))) {
> +		snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
> +				pid, dptr->d_name);
> +		realpath(memfile, resolved_path);
> +		if (resolved_path == NULL) {
> +			RTE_LOG(ERR, VHOST_CONFIG,
> +				"Failed to resolve fd directory\n");
> +			closedir(dp);
> +			return -1;
> +		}
> +		if (strncmp(resolved_path, procmap.fname,
> +			strnlen(procmap.fname, PATH_MAX)) == 0) {
> +			found = 1;
> +			break;
> +		}
> +	}
> +
> +	closedir(dp);
> +
> +	if (found == 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"Failed to find memory file for pid %d\n",
> +			pid);
> +		return -1;
> +	}
> +	/* Open the shared memory file and map the memory into this process. */
> +	fd = open(memfile, O_RDWR);
> +
> +	if (fd == -1) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"Failed to open %s for pid %d\n",
> +			memfile, pid);
> +		return -1;
> +	}
> +
> +	map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE ,
> +			MAP_POPULATE|MAP_SHARED, fd, 0);
> +	close(fd);
> +
> +	if (map == MAP_FAILED) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"Error mapping the file %s for pid %d\n",
> +			memfile, pid);
> +		return -1;
> +	}
> +
> +	/* Store the memory address and size in the device data structure */
> +	*mapped_address = (uint64_t)(uintptr_t)map;
> +	*mapped_size = procmap.len;
> +
> +	LOG_DEBUG(VHOST_CONFIG,
> +		"Mem File: %s->%s - Size: %llu - VA: %p\n",
> +		memfile, resolved_path,
> +		(unsigned long long)mapped_size, map);
> +
> +	return 0;
> +}
> +
> +int
> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
> +	uint32_t nregions)
> +{
> +	uint64_t size = offsetof(struct vhost_memory, regions);
> +	uint32_t idx;
> +	struct virtio_memory_regions regions[8]; /* VHOST_MAX_MEMORY_REGIONS */
> +	struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
> +			((uint64_t)(uintptr_t)mem_regions_addr + size);
> +	uint64_t base_address = 0, mapped_address, mapped_size;
> +
> +	for (idx = 0; idx < nregions; idx++) {
> +		regions[idx].guest_phys_address =
> +			mem_regions[idx].guest_phys_addr;
> +		regions[idx].guest_phys_address_end =
> +			regions[idx].guest_phys_address +
> +			mem_regions[idx].memory_size;
> +		regions[idx].memory_size =
> +			mem_regions[idx].memory_size;
> +		regions[idx].userspace_address =
> +			mem_regions[idx].userspace_addr;
> +
> +		LOG_DEBUG(VHOST_CONFIG, "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
> +			idx,
> +			(void *)(uintptr_t)regions[idx].guest_phys_address,
> +			(void *)(uintptr_t)regions[idx].userspace_address,
> +			regions[idx].memory_size);
> +
> +		/*set the base address mapping*/
> +		if (regions[idx].guest_phys_address == 0x0) {
> +			base_address =
> +				regions[idx].userspace_address;
> +			/* Map VM memory file */
> +			if (host_memory_map(ctx.pid, base_address, 
> +				&mapped_address, &mapped_size) != 0) {
> +				return -1;
> +			}
> +		}
> +	}
> +
> +	/* Check that we have a valid base address. */
> +	if (base_address == 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG, 
> +			"Failed to find base address of qemu memory file.\n");
> +		return -1;
> +	}
> +
> +	for (idx = 0; idx < nregions; idx++) {
> +		regions[idx].address_offset = 
> +			mapped_address - base_address +
> +			regions[idx].userspace_address -
> +			regions[idx].guest_phys_address;
> +	}
> +	
> +	ops->set_mem_table(ctx, &regions[0], nregions);
> +	return 0;
> +}
> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
> new file mode 100644
> index 0000000..6f98ce8
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
> @@ -0,0 +1,43 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +#ifndef _VIRTIO_NET_CDEV_H
> +#define _VIRTIO_NET_CDEV_H
> +#include <stdint.h>
> +
> +#include "vhost-net.h"
> +
> +int
> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
> +	uint32_t nregions);
> +
> +#endif
> diff --git a/lib/librte_vhost/vhost-net-cdev.c b/lib/librte_vhost/vhost-net-cdev.c
> deleted file mode 100644
> index 57c76cb..0000000
> --- a/lib/librte_vhost/vhost-net-cdev.c
> +++ /dev/null
> @@ -1,389 +0,0 @@
> -/*-
> - *   BSD LICENSE
> - *
> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> - *   All rights reserved.
> - *
> - *   Redistribution and use in source and binary forms, with or without
> - *   modification, are permitted provided that the following conditions
> - *   are met:
> - *
> - *     * Redistributions of source code must retain the above copyright
> - *       notice, this list of conditions and the following disclaimer.
> - *     * Redistributions in binary form must reproduce the above copyright
> - *       notice, this list of conditions and the following disclaimer in
> - *       the documentation and/or other materials provided with the
> - *       distribution.
> - *     * Neither the name of Intel Corporation nor the names of its
> - *       contributors may be used to endorse or promote products derived
> - *       from this software without specific prior written permission.
> - *
> - *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> - *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> - *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> - *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> - *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> - *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> - *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> - *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> - *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> - *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> - *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> - */
> -
> -#include <errno.h>
> -#include <fuse/cuse_lowlevel.h>
> -#include <linux/limits.h>
> -#include <linux/vhost.h>
> -#include <stdint.h>
> -#include <string.h>
> -#include <unistd.h>
> -
> -#include <rte_ethdev.h>
> -#include <rte_log.h>
> -#include <rte_string_fns.h>
> -#include <rte_virtio_net.h>
> -
> -#include "vhost-net-cdev.h"
> -
> -#define FUSE_OPT_DUMMY "\0\0"
> -#define FUSE_OPT_FORE  "-f\0\0"
> -#define FUSE_OPT_NOMULTI "-s\0\0"
> -
> -static const uint32_t default_major = 231;
> -static const uint32_t default_minor = 1;
> -static const char cuse_device_name[] = "/dev/cuse";
> -static const char default_cdev[] = "vhost-net";
> -
> -static struct fuse_session *session;
> -static struct vhost_net_device_ops const *ops;
> -
> -/*
> - * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
> - * when the device is added to the device linked list.
> - */
> -static struct vhost_device_ctx
> -fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
> -{
> -	struct vhost_device_ctx ctx;
> -	struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
> -
> -	ctx.pid = req_ctx->pid;
> -	ctx.fh = fi->fh;
> -
> -	return ctx;
> -}
> -
> -/*
> - * When the device is created in QEMU it gets initialised here and
> - * added to the device linked list.
> - */
> -static void
> -vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
> -{
> -	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> -	int err = 0;
> -
> -	err = ops->new_device(ctx);
> -	if (err == -1) {
> -		fuse_reply_err(req, EPERM);
> -		return;
> -	}
> -
> -	fi->fh = err;
> -
> -	RTE_LOG(INFO, VHOST_CONFIG,
> -		"(%"PRIu64") Device configuration started\n", fi->fh);
> -	fuse_reply_open(req, fi);
> -}
> -
> -/*
> - * When QEMU is shutdown or killed the device gets released.
> - */
> -static void
> -vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
> -{
> -	int err = 0;
> -	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> -
> -	ops->destroy_device(ctx);
> -	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
> -	fuse_reply_err(req, err);
> -}
> -
> -/*
> - * Boilerplate code for CUSE IOCTL
> - * Implicit arguments: ctx, req, result.
> - */
> -#define VHOST_IOCTL(func) do {	\
> -	result = (func)(ctx);	\
> -	fuse_reply_ioctl(req, result, NULL, 0);	\
> -} while (0)
> -
> -/*
> - * Boilerplate IOCTL RETRY
> - * Implicit arguments: req.
> - */
> -#define VHOST_IOCTL_RETRY(size_r, size_w) do {	\
> -	struct iovec iov_r = { arg, (size_r) };	\
> -	struct iovec iov_w = { arg, (size_w) };	\
> -	fuse_reply_ioctl_retry(req, &iov_r,	\
> -		(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
> -} while (0)
> -
> -/*
> - * Boilerplate code for CUSE Read IOCTL
> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> - */
> -#define VHOST_IOCTL_R(type, var, func) do {	\
> -	if (!in_bufsz) {	\
> -		VHOST_IOCTL_RETRY(sizeof(type), 0);\
> -	} else {	\
> -		(var) = *(const type*)in_buf;	\
> -		result = func(ctx, &(var));	\
> -		fuse_reply_ioctl(req, result, NULL, 0);\
> -	}	\
> -} while (0)
> -
> -/*
> - * Boilerplate code for CUSE Write IOCTL
> - * Implicit arguments: ctx, req, result, out_bufsz.
> - */
> -#define VHOST_IOCTL_W(type, var, func) do {	\
> -	if (!out_bufsz) {	\
> -		VHOST_IOCTL_RETRY(0, sizeof(type));\
> -	} else {	\
> -		result = (func)(ctx, &(var));\
> -		fuse_reply_ioctl(req, result, &(var), sizeof(type));\
> -	} \
> -} while (0)
> -
> -/*
> - * Boilerplate code for CUSE Read/Write IOCTL
> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> - */
> -#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {	\
> -	if (!in_bufsz) {	\
> -		VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
> -	} else {	\
> -		(var1) = *(const type1*) (in_buf);	\
> -		result = (func)(ctx, (var1), &(var2));	\
> -		fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
> -	}	\
> -} while (0)
> -
> -/*
> - * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
> - * of IOCTL a buffer is requested to read or to write. This request is handled
> - * by FUSE and the buffer is then given to CUSE.
> - */
> -static void
> -vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
> -		struct fuse_file_info *fi, __rte_unused unsigned flags,
> -		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
> -{
> -	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
> -	struct vhost_vring_file file;
> -	struct vhost_vring_state state;
> -	struct vhost_vring_addr addr;
> -	uint64_t features;
> -	uint32_t index;
> -	int result = 0;
> -
> -	switch (cmd) {
> -	case VHOST_NET_SET_BACKEND:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
> -		VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
> -		break;
> -
> -	case VHOST_GET_FEATURES:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
> -		VHOST_IOCTL_W(uint64_t, features, ops->get_features);
> -		break;
> -
> -	case VHOST_SET_FEATURES:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
> -		VHOST_IOCTL_R(uint64_t, features, ops->set_features);
> -		break;
> -
> -	case VHOST_RESET_OWNER:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
> -		VHOST_IOCTL(ops->reset_owner);
> -		break;
> -
> -	case VHOST_SET_OWNER:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
> -		VHOST_IOCTL(ops->set_owner);
> -		break;
> -
> -	case VHOST_SET_MEM_TABLE:
> -		/*TODO fix race condition.*/
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
> -		static struct vhost_memory mem_temp;
> -
> -		switch (in_bufsz) {
> -		case 0:
> -			VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
> -			break;
> -
> -		case sizeof(struct vhost_memory):
> -			mem_temp = *(const struct vhost_memory *) in_buf;
> -
> -			if (mem_temp.nregions > 0) {
> -				VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
> -					(sizeof(struct vhost_memory_region) *
> -						mem_temp.nregions), 0);
> -			} else {
> -				result = -1;
> -				fuse_reply_ioctl(req, result, NULL, 0);
> -			}
> -			break;
> -
> -		default:
> -			result = ops->set_mem_table(ctx,
> -					in_buf, mem_temp.nregions);
> -			if (result)
> -				fuse_reply_err(req, EINVAL);
> -			else
> -				fuse_reply_ioctl(req, result, NULL, 0);
> -		}
> -		break;
> -
> -	case VHOST_SET_VRING_NUM:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
> -		VHOST_IOCTL_R(struct vhost_vring_state, state,
> -			ops->set_vring_num);
> -		break;
> -
> -	case VHOST_SET_VRING_BASE:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
> -		VHOST_IOCTL_R(struct vhost_vring_state, state,
> -			ops->set_vring_base);
> -		break;
> -
> -	case VHOST_GET_VRING_BASE:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
> -		VHOST_IOCTL_RW(uint32_t, index,
> -			struct vhost_vring_state, state, ops->get_vring_base);
> -		break;
> -
> -	case VHOST_SET_VRING_ADDR:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
> -		VHOST_IOCTL_R(struct vhost_vring_addr, addr,
> -			ops->set_vring_addr);
> -		break;
> -
> -	case VHOST_SET_VRING_KICK:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
> -		VHOST_IOCTL_R(struct vhost_vring_file, file,
> -			ops->set_vring_kick);
> -		break;
> -
> -	case VHOST_SET_VRING_CALL:
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
> -		VHOST_IOCTL_R(struct vhost_vring_file, file,
> -			ops->set_vring_call);
> -		break;
> -
> -	default:
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
> -		result = -1;
> -		fuse_reply_ioctl(req, result, NULL, 0);
> -	}
> -
> -	if (result < 0)
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
> -	else
> -		LOG_DEBUG(VHOST_CONFIG,
> -			"(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
> -}
> -
> -/*
> - * Structure handling open, release and ioctl function pointers is populated.
> - */
> -static const struct cuse_lowlevel_ops vhost_net_ops = {
> -	.open		= vhost_net_open,
> -	.release	= vhost_net_release,
> -	.ioctl		= vhost_net_ioctl,
> -};
> -
> -/*
> - * cuse_info is populated and used to register the cuse device.
> - * vhost_net_device_ops are also passed when the device is registered in app.
> - */
> -int
> -rte_vhost_driver_register(const char *dev_name)
> -{
> -	struct cuse_info cuse_info;
> -	char device_name[PATH_MAX] = "";
> -	char char_device_name[PATH_MAX] = "";
> -	const char *device_argv[] = { device_name };
> -
> -	char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
> -	char fuse_opt_fore[] = FUSE_OPT_FORE;
> -	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
> -	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
> -
> -	if (access(cuse_device_name, R_OK | W_OK) < 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"char device %s can't be accessed, maybe not exist\n",
> -			cuse_device_name);
> -		return -1;
> -	}
> -
> -	/*
> -	 * The device name is created. This is passed to QEMU so that it can
> -	 * register the device with our application.
> -	 */
> -	snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
> -	snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
> -
> -	/* Check if device already exists. */
> -	if (access(char_device_name, F_OK) != -1) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"char device %s already exists\n", char_device_name);
> -		return -1;
> -	}
> -
> -	memset(&cuse_info, 0, sizeof(cuse_info));
> -	cuse_info.dev_major = default_major;
> -	cuse_info.dev_minor = default_minor;
> -	cuse_info.dev_info_argc = 1;
> -	cuse_info.dev_info_argv = device_argv;
> -	cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
> -
> -	ops = get_virtio_net_callbacks();
> -
> -	session = cuse_lowlevel_setup(3, fuse_argv,
> -			&cuse_info, &vhost_net_ops, 0, NULL);
> -	if (session == NULL)
> -		return -1;
> -
> -	return 0;
> -}
> -
> -/**
> - * The CUSE session is launched allowing the application to receive open,
> - * release and ioctl calls.
> - */
> -int
> -rte_vhost_driver_session_start(void)
> -{
> -	fuse_session_loop(session);
> -
> -	return 0;
> -}
> diff --git a/lib/librte_vhost/vhost-net-cdev.h b/lib/librte_vhost/vhost-net-cdev.h
> deleted file mode 100644
> index 03a5c57..0000000
> --- a/lib/librte_vhost/vhost-net-cdev.h
> +++ /dev/null
> @@ -1,113 +0,0 @@
> -/*-
> - *   BSD LICENSE
> - *
> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> - *   All rights reserved.
> - *
> - *   Redistribution and use in source and binary forms, with or without
> - *   modification, are permitted provided that the following conditions
> - *   are met:
> - *
> - *     * Redistributions of source code must retain the above copyright
> - *       notice, this list of conditions and the following disclaimer.
> - *     * Redistributions in binary form must reproduce the above copyright
> - *       notice, this list of conditions and the following disclaimer in
> - *       the documentation and/or other materials provided with the
> - *       distribution.
> - *     * Neither the name of Intel Corporation nor the names of its
> - *       contributors may be used to endorse or promote products derived
> - *       from this software without specific prior written permission.
> - *
> - *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> - *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> - *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> - *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> - *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> - *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> - *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> - *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> - *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> - *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> - *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> - */
> -
> -#ifndef _VHOST_NET_CDEV_H_
> -#define _VHOST_NET_CDEV_H_
> -#include <stdint.h>
> -#include <stdio.h>
> -#include <sys/types.h>
> -#include <unistd.h>
> -#include <linux/vhost.h>
> -
> -#include <rte_log.h>
> -
> -/* Macros for printing using RTE_LOG */
> -#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
> -#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
> -
> -#ifdef RTE_LIBRTE_VHOST_DEBUG
> -#define VHOST_MAX_PRINT_BUFF 6072
> -#define LOG_LEVEL RTE_LOG_DEBUG
> -#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
> -#define PRINT_PACKET(device, addr, size, header) do { \
> -	char *pkt_addr = (char *)(addr); \
> -	unsigned int index; \
> -	char packet[VHOST_MAX_PRINT_BUFF]; \
> -	\
> -	if ((header)) \
> -		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
> -	else \
> -		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
> -	for (index = 0; index < (size); index++) { \
> -		snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
> -			"%02hhx ", pkt_addr[index]); \
> -	} \
> -	snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
> -	\
> -	LOG_DEBUG(VHOST_DATA, "%s", packet); \
> -} while (0)
> -#else
> -#define LOG_LEVEL RTE_LOG_INFO
> -#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
> -#define PRINT_PACKET(device, addr, size, header) do {} while (0)
> -#endif
> -
> -
> -/*
> - * Structure used to identify device context.
> - */
> -struct vhost_device_ctx {
> -	pid_t		pid;	/* PID of process calling the IOCTL. */
> -	uint64_t	fh;	/* Populated with fi->fh to track the device index. */
> -};
> -
> -/*
> - * Structure contains function pointers to be defined in virtio-net.c. These
> - * functions are called in CUSE context and are used to configure devices.
> - */
> -struct vhost_net_device_ops {
> -	int (*new_device)(struct vhost_device_ctx);
> -	void (*destroy_device)(struct vhost_device_ctx);
> -
> -	int (*get_features)(struct vhost_device_ctx, uint64_t *);
> -	int (*set_features)(struct vhost_device_ctx, uint64_t *);
> -
> -	int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
> -
> -	int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
> -	int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
> -	int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);
> -	int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);
> -
> -	int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);
> -	int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);
> -
> -	int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
> -
> -	int (*set_owner)(struct vhost_device_ctx);
> -	int (*reset_owner)(struct vhost_device_ctx);
> -};
> -
> -
> -struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
> -#endif /* _VHOST_NET_CDEV_H_ */
> diff --git a/lib/librte_vhost/vhost-user/fd_man.c b/lib/librte_vhost/vhost-user/fd_man.c
> new file mode 100644
> index 0000000..c7fd3f2
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/fd_man.c
> @@ -0,0 +1,158 @@
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <sys/socket.h>
> +#include <sys/select.h>
> +#include <sys/time.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +
> +#include <rte_log.h>
> +
> +#include "fd_man.h"
> +
> +/**
> + * Returns the index in the fdset for a fd.
> + * If fd is -1, it means to search for a free entry.
> + * @return
> + *   Index for the fd, or -1 if fd isn't in the fdset.
> + */
> +static int
> +fdset_find_fd(struct fdset *pfdset, int fd)
> +{
> +	int i;
> +
> +	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);
> +		
> +	return i ==  pfdset->num ? -1 : i;
> +}
> +
> +static int
> +fdset_find_free_slot(struct fdset *pfdset)
> +{
> +	return fdset_find_fd(pfdset, -1);
> +
> +}
> +
> +static void
> +fdset_add_fd(struct fdset  *pfdset, int idx, int fd, fd_cb rcb, 
> +		fd_cb wcb, uint64_t dat)
> +{
> +	struct fdentry *pfdentry = &pfdset->fd[idx];
> +
> +	pfdentry->fd = fd;
> +	pfdentry->rcb = rcb;
> +	pfdentry->wcb = wcb;
> +	pfdentry->dat = dat;
> +}
> +
> +/**
> + * Fill the read/write fdset with the fds in the fdset.
> + * @return
> + *  the maximum fds filled in the read/write fd_set.
> + */
> +static int
> +fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
> +{
> +	struct fdentry *pfdentry;
> +	int i, maxfds = -1;
> +	int num = MAX_FDS;
> +
> +	for (i = 0; i < num ; i++) {
> +		pfdentry = &pfdset->fd[i];
> +		if (pfdentry->fd != -1) {
> +			int added = 0;
> +			if (pfdentry->rcb && rfset) {
> +				FD_SET(pfdentry->fd, rfset);
> +				added = 1;
> +			}
> +			if (pfdentry->wcb && wfset) {
> +				FD_SET(pfdentry->fd, wfset);
> +				added = 1;
> +			}
> +			if (added)
> +				maxfds = pfdentry->fd < maxfds ?
> +					maxfds : pfdentry->fd;
> +		}
> +	}
> +	return maxfds;
> +}
> +
> +void
> +fdset_init(struct fdset *pfdset)
> +{
> +	int i;
> +
> +	for (i = 0; i < MAX_FDS; i++)
> +		pfdset->fd[i].fd = -1;
> +	pfdset->num = MAX_FDS;
> +
> +}
> +
> +/**
> + * Register the fd in the fdset with its read/write handler and context.
> + */
> +int
> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
> +{
> +	int i;
> +
> +	if (fd == -1)
> +		return -1;
> +
> +	/* Find a free slot in the list. */
> +	i = fdset_find_free_slot(pfdset);
> +	if (i == -1)
> +		return -2;
> +
> +	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
> +
> +	return 0;
> +}
> +
> +/**
> + *  Unregister the fd from the fdset.
> + */
> +void
> +fdset_del(struct fdset *pfdset, int fd)
> +{
> +	int i;
> +
> +	i = fdset_find_fd(pfdset, fd);
> +	if (i != -1) {
> +		pfdset->fd[i].fd = -1;
> +	}
> +}
> +
> +
> +void
> +fdset_event_dispatch(struct fdset *pfdset)
> +{
> +	fd_set rfds,wfds;
> +	int i, maxfds;
> +	struct fdentry *pfdentry;
> +	int num = MAX_FDS;
> +
> +	if (pfdset == NULL)
> +		return;
> +	while (1) {
> +		FD_ZERO(&rfds);
> +		FD_ZERO(&wfds);
> +		maxfds = fdset_fill(&rfds, &wfds, pfdset);
> +		/* fd management runs in one thread */
> +		if (maxfds == -1) {
> +			return;
> +		}
> +
> +		select(maxfds + 1, &rfds, &wfds, NULL, NULL);
> +
> +		for (i = 0; i < num; i++) {
> +			pfdentry = &pfdset->fd[i];
> +			if (FD_ISSET(pfdentry->fd, &rfds)) 
> +				pfdentry->rcb(pfdentry->fd, pfdentry->dat);
> +			if (FD_ISSET(pfdentry->fd, &wfds))
> +				pfdentry->wcb(pfdentry->fd, pfdentry->dat);
> +		}
> +		
> +	}
> +}
> diff --git a/lib/librte_vhost/vhost-user/fd_man.h b/lib/librte_vhost/vhost-user/fd_man.h
> new file mode 100644
> index 0000000..57cc81d
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/fd_man.h
> @@ -0,0 +1,31 @@
> +#ifndef _FD_MAN_H_
> +#define _FD_MAN_H_
> +#include <stdint.h>
> +
> +#define MAX_FDS 1024
> +
> +typedef void (*fd_cb)(int fd, uint64_t dat);
> +
> +struct fdentry {
> +	int fd; /* -1 indicates this entry is empty */
> +	fd_cb rcb; /* callback when this fd is readable. */
> +	fd_cb wcb; /* callback when this fd is writeable.*/
> +	uint64_t dat;	/* fd context */
> +};
> +
> +struct fdset {
> +	struct fdentry fd[MAX_FDS];
> +	int num;	
> +};
> +
> +
> +void fdset_init(struct fdset *pfdset);
> +
> +int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb,
> +	fd_cb wcb, uint64_t ctx);
> +
> +void fdset_del(struct fdset *pfdset, int fd);
> +
> +void fdset_event_dispatch(struct fdset *pfdset);
> +
> +#endif
> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c b/lib/librte_vhost/vhost-user/vhost-net-user.c
> new file mode 100644
> index 0000000..34450f4
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.c
> @@ -0,0 +1,417 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <limits.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <errno.h>
> +
> +#include <rte_log.h>
> +#include <rte_virtio_net.h>
> +
> +#include "fd_man.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +#include "virtio-net-user.h"
> +
> +static void vserver_new_vq_conn(int fd, uint64_t data);
> +static void vserver_message_handler(int fd, uint64_t dat);
> +const struct vhost_net_device_ops *ops;
> +
> +static struct vhost_server *g_vhost_server;
> +
> +static const char *vhost_message_str[VHOST_USER_MAX] =
> +{
> +	[VHOST_USER_NONE] = "VHOST_USER_NONE",
> +	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
> +	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
> +	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
> +	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
> +	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
> +	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
> +	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
> +	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
> +	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
> +	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
> +	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
> +	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
> +	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
> +	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR"
> +};
> +
> +/**
> + * Create a unix domain socket and bind to path.
> + * @return
> + *  socket fd or -1 on failure
> + */
> +static int
> +uds_socket(const char *path)
> +{
> +	struct sockaddr_un un;
> +	int sockfd;
> +	int ret;
> +
> +	if (path == NULL)
> +		return -1;
> +
> +	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> +	if (sockfd < 0)
> +		return -1;
> +	RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> +
> +	memset(&un, 0, sizeof(un));
> +	un.sun_family = AF_UNIX;
> +	snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> +	ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> +	if (ret == -1)
> +		goto err;
> +	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> +
> +	ret = listen(sockfd, 1);
> +	if (ret == -1)
> +		goto err;
> +	
> +	return sockfd;
> +
> +err:
> +	close(sockfd);
> +	return -1;
> +}
> +
> +
> +/* return bytes# of read */
> +static int
> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +
> +	struct iovec  iov;
> +	struct msghdr msgh = { 0 };
> +	size_t fdsize = fd_num * sizeof(int);
> +	char control[CMSG_SPACE(fdsize)];
> +	struct cmsghdr *cmsg;
> +	int ret;
> +
> +	iov.iov_base = buf;
> +	iov.iov_len  = buflen;
> +	
> +	msgh.msg_iov = &iov;
> +	msgh.msg_iovlen = 1;
> +	msgh.msg_control = control;
> +	msgh.msg_controllen = sizeof(control);
> +
> +	ret = recvmsg(sockfd, &msgh, 0);
> +	if (ret <= 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
> +		return ret;
> +	}
> +	/* ret == buflen */
> +	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
> +		return -1;
> +	}
> +
> +	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
> +		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
> +		if ( (cmsg->cmsg_level == SOL_SOCKET) && 
> +			(cmsg->cmsg_type == SCM_RIGHTS)) {
> +			memcpy(fds, CMSG_DATA(cmsg), fdsize);
> +			break;
> +		}
> +	}
> +	return ret;
> +}
> +
> +static int
> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> +	int ret;
> +
> +	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, 
> +		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
> +	if (ret <= 0)
> +		return ret;
> +
> +	if (msg->size) {
> +		if (msg->size > sizeof(msg->payload)) {
> +			RTE_LOG(ERR, VHOST_CONFIG, 
> +				"%s: invalid size:%d\n", __func__, msg->size);
> +			return -1;
> +		}
> +		ret = read(sockfd, &msg->payload, msg->size);
> +		if (ret == 0)
> +			return 0;
> +		if (ret != (int)msg->size) {
> +			printf("read control message failed\n");
> +			return -1;
> +		}
> +	}
> +
> +	return ret; 
> +}
> +
> +static int
> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +
> +	struct iovec iov;
> +	struct msghdr msgh = { 0 };
> +	size_t fdsize = fd_num * sizeof(int);
> +	char control[CMSG_SPACE(fdsize)];
> +	struct cmsghdr *cmsg;
> +	int ret;
> +
> +	iov.iov_base = buf;
> +	iov.iov_len = buflen;
> +	msgh.msg_iov = &iov;
> +	msgh.msg_iovlen = 1;
> +	
> +	if (fds && fd_num > 0) {
> +		msgh.msg_control = control;
> +		msgh.msg_controllen = sizeof(control);
> +		cmsg = CMSG_FIRSTHDR(&msgh);
> +		cmsg->cmsg_len = CMSG_LEN(fdsize);
> +		cmsg->cmsg_level = SOL_SOCKET;
> +		cmsg->cmsg_type = SCM_RIGHTS;
> +		memcpy(CMSG_DATA(cmsg), fds, fdsize);
> +	} else {
> +		msgh.msg_control = NULL;
> +		msgh.msg_controllen = 0;
> +	}
> +
> +	do {
> +		ret = sendmsg(sockfd, &msgh, 0);
> +	} while (ret < 0 && errno == EINTR);
> +
> +	if (ret < 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
> +		return -1;
> +	}
> +	
> +	return 0;
> +}
> +
> +static int
> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> +	int ret;
> +
> +	msg->flags &= ~VHOST_USER_VERSION_MASK;
> +        msg->flags |= VHOST_USER_VERSION;
> +        msg->flags |= VHOST_USER_REPLY_MASK;	
> +
> +	ret = send_fd_message(sockfd, (char *)msg, 
> +		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
> +	
> +	return ret;
> +}
> +
> +/* call back when there is new connection.  */
> +static void
> +vserver_new_vq_conn(int fd, uint64_t dat)
> +{
> +	struct vhost_server *vserver = (void *)(uintptr_t)dat;
> +	int conn_fd;
> +	uint32_t fh;
> +	struct vhost_device_ctx vdev_ctx = { 0 };
> +
> +	conn_fd = accept(fd, NULL, NULL);
> +	RTE_LOG(INFO, VHOST_CONFIG, 
> +		"%s: new connection is %d\n", __func__, conn_fd);
> +	if (conn_fd < 0)
> +		return;
> +
> +	fh = ops->new_device(vdev_ctx);
> +	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
> +
> +	fdset_add(&vserver->fdset, 
> +		conn_fd, vserver_message_handler, NULL, fh);	
> +}
> +
> +/* callback when there is message on the connfd */
> +static void
> +vserver_message_handler(int connfd, uint64_t dat)
> +{
> +	struct vhost_device_ctx ctx;
> +	uint32_t fh = (uint32_t)dat;
> +	struct VhostUserMsg msg;
> +	uint64_t features;
> +	int ret;
> +
> +	ctx.fh = fh;
> +	ret = read_vhost_message(connfd, &msg);
> +	if (ret < 0) {
> +		printf("vhost read message failed\n");
> +	
> +		/*TODO: cleanup */
> +		close(connfd);
> +		fdset_del(&g_vhost_server->fdset, connfd);
> +		ops->destroy_device(ctx);
> +
> +		return;
> +	} else if (ret == 0) {
> +		/*TODO: cleanup */
> +		RTE_LOG(INFO, VHOST_CONFIG, 
> +			"vhost peer closed\n");
> +		close(connfd);
> +		fdset_del(&g_vhost_server->fdset, connfd);
> +		ops->destroy_device(ctx);
> +
> +		return;
> +	}
> +	if (msg.request > VHOST_USER_MAX) {
> +		/*TODO: cleanup */
> +		RTE_LOG(INFO, VHOST_CONFIG, 
> +			"vhost read incorrect message\n");
> +		close(connfd);
> +		fdset_del(&g_vhost_server->fdset, connfd);
> +
> +		return;
> +	}
> +
> +	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> +		vhost_message_str[msg.request]);
> +	switch (msg.request) {
> +	case VHOST_USER_GET_FEATURES:
> +		ret = ops->get_features(ctx, &features);
> +		msg.payload.u64 = ret;
> +		msg.size = sizeof(msg.payload.u64);
> +		send_vhost_message(connfd, &msg);
> +		break;
> +	case VHOST_USER_SET_FEATURES:
> +		ops->set_features(ctx, &features);
> +		break;
> +
> +	case VHOST_USER_SET_OWNER:
> +		ops->set_owner(ctx);
> +		break;
> +	case VHOST_USER_RESET_OWNER:
> +		ops->reset_owner(ctx);
> +		break;
> +
> +	case VHOST_USER_SET_MEM_TABLE:
> +		user_set_mem_table(ctx, &msg);
> +		break;
> +
> +	case VHOST_USER_SET_LOG_BASE:
> +	case VHOST_USER_SET_LOG_FD:
> +		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> +		break;
> +
> +	case VHOST_USER_SET_VRING_NUM:
> +		ops->set_vring_num(ctx, &msg.payload.state);
> +		break;
> +	case VHOST_USER_SET_VRING_ADDR:
> +		ops->set_vring_addr(ctx, &msg.payload.addr);
> +		break;
> +	case VHOST_USER_SET_VRING_BASE:
> +		ops->set_vring_base(ctx, &msg.payload.state);
> +		break;
> +
> +	case VHOST_USER_GET_VRING_BASE:
> +		ret = ops->get_vring_base(ctx, msg.payload.state.index,
> +			&msg.payload.state);
> +		msg.size = sizeof(msg.payload.state);
> +		send_vhost_message(connfd, &msg);
> +		break;
> +
> +	case VHOST_USER_SET_VRING_KICK:
> +		user_set_vring_kick(ctx, &msg);
> +		break;
> +	case VHOST_USER_SET_VRING_CALL:
> +		user_set_vring_call(ctx, &msg);
> +		break;
> +
> +	case VHOST_USER_SET_VRING_ERR:
> +		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
> +		break;
> +
> +	default:
> +		break;
> +	
> +	}
> +}
> +
> +
> +/**
> + * Creates and initialise the vhost server.
> + */
> +int
> +rte_vhost_driver_register(const char *path)
> +{
> +
> +	struct vhost_server *vserver;
> +
> +	if (g_vhost_server != NULL)
> +		return -1;
> +
> +	vserver = calloc(sizeof(struct vhost_server), 1);
> +	/*TODO: all allocation is through DPDK memory allocation */
> +	if (vserver == NULL)
> +		return -1;
> +
> +	fdset_init(&vserver->fdset);
> +
> +	unlink(path);
> +
> +	vserver->listenfd = uds_socket(path);
> +	if (vserver->listenfd < 0) {
> +		free(vserver);
> +		return -1;
> +	}
> +	vserver->path = path;
> +
> +	fdset_add(&vserver->fdset, vserver->listenfd,
> +			vserver_new_vq_conn, NULL,
> +			(uint64_t)(uintptr_t)vserver);
> +
> +	ops = get_virtio_net_callbacks();
> +
> +	g_vhost_server = vserver;
> +
> +	return 0;
> +}
> +
> +
> +int
> +rte_vhost_driver_session_start(void)
> +{
> +	fdset_event_dispatch(&g_vhost_server->fdset);
> +	return 0;
> +}
> +
> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.h b/lib/librte_vhost/vhost-user/vhost-net-user.h
> new file mode 100644
> index 0000000..c9df9fa
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.h
> @@ -0,0 +1,74 @@
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "fd_man.h"
> +
> +struct vhost_server {
> +	const char *path; /**< The path the uds is bind to. */
> +	int listenfd;     /**< The listener sockfd. */
> +	struct fdset fdset; /**< The fd list this vhost server manages. */
> +};
> +
> +/*********** FROM hw/virtio/vhost-user.c *************************************/
> +
> +#define VHOST_MEMORY_MAX_NREGIONS    8
> +
> +typedef enum VhostUserRequest {
> +    VHOST_USER_NONE = 0,
> +    VHOST_USER_GET_FEATURES = 1,
> +    VHOST_USER_SET_FEATURES = 2,
> +    VHOST_USER_SET_OWNER = 3,
> +    VHOST_USER_RESET_OWNER = 4,
> +    VHOST_USER_SET_MEM_TABLE = 5,
> +    VHOST_USER_SET_LOG_BASE = 6,
> +    VHOST_USER_SET_LOG_FD = 7,
> +    VHOST_USER_SET_VRING_NUM = 8,
> +    VHOST_USER_SET_VRING_ADDR = 9,
> +    VHOST_USER_SET_VRING_BASE = 10,
> +    VHOST_USER_GET_VRING_BASE = 11,
> +    VHOST_USER_SET_VRING_KICK = 12,
> +    VHOST_USER_SET_VRING_CALL = 13,
> +    VHOST_USER_SET_VRING_ERR = 14,
> +    VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +    uint64_t guest_phys_addr;
> +    uint64_t memory_size;
> +    uint64_t userspace_addr;
> +    uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +    uint32_t nregions;
> +    uint32_t padding;
> +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> +    VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK     (0x3)
> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> +    uint32_t flags;
> +    uint32_t size; /* the following payload size */
> +    union {
> +#define VHOST_USER_VRING_IDX_MASK   (0xff)
> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> +        uint64_t u64;
> +        struct vhost_vring_state state;
> +        struct vhost_vring_addr addr;
> +        VhostUserMemory memory;
> +    } payload;
> +     int fds[VHOST_MEMORY_MAX_NREGIONS];
> +} __attribute__((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    (0x1)
> +
> +/*****************************************************************************/
> +#endif
> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c b/lib/librte_vhost/vhost-user/virtio-net-user.c
> new file mode 100644
> index 0000000..f38e6cc
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.c
> @@ -0,0 +1,208 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +
> +#include <rte_log.h>
> +
> +#include "virtio-net-user.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +
> +extern const struct vhost_net_device_ops *ops;
> +
> +#if 0
> +int
> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	unsigned int idx;
> +	struct VhostUserMemory memory = pmsg->payload.memory;
> +	struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> +	uint64_t mapped_address, base_address = 0, mem_size = 0;
> +
> +	for (idx = 0; idx < memory.nregions; idx++) {
> +		if (memory.regions[idx].guest_phys_addr == 0)
> +			base_address = memory.regions[idx].userspace_addr;
> +	}
> +	if (base_address == 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"couldn't find the mem region whose gpa is 0.\n");
> +		return -1;
> +	}
> +
> +	for (idx = 0; idx < memory.nregions;  idx++) {
> +		uint64_t size = memory.regions[idx].userspace_addr - 
> +			base_address + memory.regions[idx].memory_size;
> +		if (mem_size < size)
> +			mem_size = size;
> +	}
> +
> +	/*
> +	 * here we assume qemu will map only one file for memory allocation,
> +	 * we only use fds[0] with offset 0.
> +	 */
> +	mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size, 
> +		PROT_READ | PROT_WRITE, MAP_SHARED, pmsg->fds[0], 0);
> +
> +	if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> +		RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
> +		return -1;
> +	}
> +			
> +	for (idx = 0; idx < memory.nregions; idx++) {
> +		regions[idx].guest_phys_address = 
> +			memory.regions[idx].guest_phys_addr;
> +		regions[idx].guest_phys_address_end = 
> +			memory.regions[idx].guest_phys_addr +
> +			memory.regions[idx].memory_size;
> +		regions[idx].memory_size = memory.regions[idx].memory_size;
> +		regions[idx].userspace_address = 
> +			memory.regions[idx].userspace_addr;
> +
> +		regions[idx].address_offset = mapped_address - base_address + 
> +			regions[idx].userspace_address -
> +			regions[idx].guest_phys_address;
> +		LOG_DEBUG(VHOST_CONFIG, 
> +			"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
> +			idx,
> +			(void *)(uintptr_t)regions[idx].guest_phys_address,
> +			(void *)(uintptr_t)regions[idx].userspace_address,
> +			 regions[idx].memory_size);
> +	}
> +	ops->set_mem_table(ctx, regions, memory.nregions);
> +	return 0;
> +}
> +
> +#else
> +
> +int
> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	unsigned int idx;
> +	struct VhostUserMemory memory = pmsg->payload.memory;
> +	struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> +	uint64_t mapped_address, base_address = 0;
> +
> +	for (idx = 0; idx < memory.nregions; idx++) {
> +		if (memory.regions[idx].guest_phys_addr == 0)
> +			base_address = memory.regions[idx].userspace_addr;
> +	}
> +	if (base_address == 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"couldn't find the mem region whose gpa is 0.\n");
> +		return -1;
> +	}
> +
> +
> +	for (idx = 0; idx < memory.nregions; idx++) {
> +		regions[idx].guest_phys_address = 
> +			memory.regions[idx].guest_phys_addr;
> +		regions[idx].guest_phys_address_end = 
> +			memory.regions[idx].guest_phys_addr +
> +			memory.regions[idx].memory_size;
> +		regions[idx].memory_size = memory.regions[idx].memory_size;
> +		regions[idx].userspace_address = 
> +			memory.regions[idx].userspace_addr;
> +/*
> +		mapped_address = (uint64_t)(uintptr_t)mmap(NULL, 
> +			regions[idx].memory_size, 
> +			PROT_READ | PROT_WRITE, MAP_SHARED, 
> +			pmsg->fds[idx], 
> +			memory.regions[idx].mmap_offset);
> +*/
> +
> +/* This is ugly */
> +		mapped_address = (uint64_t)(uintptr_t)mmap(NULL, 
> +			regions[idx].memory_size +
> +				memory.regions[idx].mmap_offset, 
> +			PROT_READ | PROT_WRITE, MAP_SHARED, 
> +			pmsg->fds[idx], 
> +			0);
> +		printf("mapped to %p\n", (void *)mapped_address);
> +
> +		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> +			RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
> +			return -1;
> +		}
> +
> +//		printf("ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset + 0x3FFFFFFF) & ~0x3FFFFFFF));
> +//		printf("unaligned ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset )  ));
> +		mapped_address +=  memory.regions[idx].mmap_offset;
> +
> +		regions[idx].address_offset = mapped_address -
> +			regions[idx].guest_phys_address;
> +		LOG_DEBUG(VHOST_CONFIG, 
> +			"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
> +			idx,
> +			(void *)(uintptr_t)regions[idx].guest_phys_address,
> +			(void *)(uintptr_t)regions[idx].userspace_address,
> +			 regions[idx].memory_size);
> +	}
> +	ops->set_mem_table(ctx, regions, memory.nregions);
> +	return 0;
> +}
> +
> +
> +
> +
> +#endif
> +
> +
> +void
> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	struct vhost_vring_file file;
> +
> +	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> +	file.fd = pmsg->fds[0];
> +	RTE_LOG(INFO, VHOST_CONFIG, 
> +		"vring call idx:%d file:%d\n", file.index, file.fd);
> +	ops->set_vring_call(ctx, &file);
> +}
> +
> +
> +void
> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	struct vhost_vring_file file;
> +
> +	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> +	file.fd = pmsg->fds[0];
> +	RTE_LOG(INFO, VHOST_CONFIG, 
> +		"vring kick idx:%d file:%d\n", file.index, file.fd);
> +	ops->set_vring_kick(ctx, &file);
> +}
> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.h b/lib/librte_vhost/vhost-user/virtio-net-user.h
> new file mode 100644
> index 0000000..0969376
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.h
> @@ -0,0 +1,11 @@
> +#ifndef _VIRTIO_NET_USER_H
> +#define _VIRTIO_NET_USER_H
> +
> +#include "vhost-net.h"
> +#include "vhost-net-user.h"
> +
> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +#endif
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index ccfd82f..8ff0301 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -38,19 +38,14 @@
>  #include <rte_memcpy.h>
>  #include <rte_virtio_net.h>
>  
> -#include "vhost-net-cdev.h"
> +#include "vhost-net.h"
>  
> -#define MAX_PKT_BURST 32
> +#define VHOST_MAX_PKT_BURST 64
> +#define VHOST_MAX_MRG_PKT_BURST 64
>  
> -/**
> - * This function adds buffers to the virtio devices RX virtqueue. Buffers can
> - * be received from the physical port or from another virtio device. A packet
> - * count is returned to indicate the number of packets that are succesfully
> - * added to the RX queue. This function works when mergeable is disabled.
> - */
> -static inline uint32_t __attribute__((always_inline))
> -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
> -	struct rte_mbuf **pkts, uint32_t count)
> +
> +uint32_t
> +rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count)
>  {
>  	struct vhost_virtqueue *vq;
>  	struct vring_desc *desc;
> @@ -59,26 +54,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>  	uint64_t buff_addr = 0;
>  	uint64_t buff_hdr_addr = 0;
> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> +	uint32_t head[VHOST_MAX_PKT_BURST], packet_len = 0;
>  	uint32_t head_idx, packet_success = 0;
> +	uint32_t mergeable, mrg_count = 0;
>  	uint16_t avail_idx, res_cur_idx;
>  	uint16_t res_base_idx, res_end_idx;
>  	uint16_t free_entries;
>  	uint8_t success = 0;
>  
> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
> +	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s()\n", dev->device_fh, __func__);
>  	if (unlikely(queue_id != VIRTIO_RXQ)) {
>  		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
>  		return 0;
>  	}
>  
>  	vq = dev->virtqueue[VIRTIO_RXQ];
> -	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
> -
> -	/*
> -	 * As many data cores may want access to available buffers,
> -	 * they need to be reserved.
> -	 */
> +	count = (count > VHOST_MAX_PKT_BURST) ? VHOST_MAX_PKT_BURST : count;
> +	/* As many data cores may want access to available buffers, they need to be reserved. */
>  	do {
>  		res_base_idx = vq->last_used_idx_res;
>  		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
> @@ -93,21 +85,25 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  
>  		res_end_idx = res_base_idx + count;
>  		/* vq->last_used_idx_res is atomically updated. */
> -		/* TODO: Allow to disable cmpset if no concurrency in application. */
> +		/* TODO: Allow to disable cmpset if no concurrency in application */
>  		success = rte_atomic16_cmpset(&vq->last_used_idx_res,
>  				res_base_idx, res_end_idx);
> +		/* If there is contention here and failed, try again. */
>  	} while (unlikely(success == 0));
>  	res_cur_idx = res_base_idx;
>  	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
> -			dev->device_fh, res_cur_idx, res_end_idx);
> +			dev->device_fh,
> +			res_cur_idx, res_end_idx);
>  
>  	/* Prefetch available ring to retrieve indexes. */
>  	rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
>  
> +	/* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */
> +	mergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);
> +
>  	/* Retrieve all of the head indexes first to avoid caching issues. */
>  	for (head_idx = 0; head_idx < count; head_idx++)
> -		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
> -					(vq->size - 1)];
> +		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
>  
>  	/*Prefetch descriptor index. */
>  	rte_prefetch0(&vq->desc[head[packet_success]]);
> @@ -123,46 +119,57 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  		/* Prefetch buffer address. */
>  		rte_prefetch0((void *)(uintptr_t)buff_addr);
>  
> -		/* Copy virtio_hdr to packet and increment buffer address */
> -		buff_hdr_addr = buff_addr;
> -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> -
> -		/*
> -		 * If the descriptors are chained the header and data are
> -		 * placed in separate buffers.
> -		 */
> -		if (desc->flags & VRING_DESC_F_NEXT) {
> -			desc->len = vq->vhost_hlen;
> -			desc = &vq->desc[desc->next];
> -			/* Buffer address translation. */
> -			buff_addr = gpa_to_vva(dev, desc->addr);
> -			desc->len = rte_pktmbuf_data_len(buff);
> +		if (mergeable && (mrg_count != 0)) {
> +			desc->len = packet_len = rte_pktmbuf_data_len(buff);
>  		} else {
> -			buff_addr += vq->vhost_hlen;
> -			desc->len = packet_len;
> +			/* Copy virtio_hdr to packet and increment buffer address */
> +			buff_hdr_addr = buff_addr;
> +			packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> +
> +			/*
> +			 * If the descriptors are chained the header and data are placed in
> +			 * separate buffers.
> +			 */
> +			if (desc->flags & VRING_DESC_F_NEXT) {
> +				desc->len = vq->vhost_hlen;
> +				desc = &vq->desc[desc->next];
> +				/* Buffer address translation. */
> +				buff_addr = gpa_to_vva(dev, desc->addr);
> +				desc->len = rte_pktmbuf_data_len(buff);
> +			} else {
> +				buff_addr += vq->vhost_hlen;
> +				desc->len = packet_len;
> +			}
>  		}
>  
> +		VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, rte_pktmbuf_data_len(buff), 0);
> +
>  		/* Update used ring with desc information */
> -		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
> -							head[packet_success];
> +		vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
>  		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
>  
>  		/* Copy mbuf data to buffer */
> -		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
> -		rte_memcpy((void *)(uintptr_t)buff_addr,
> -			rte_pktmbuf_mtod(buff, const void *),
> -			rte_pktmbuf_data_len(buff));
> -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> -			rte_pktmbuf_data_len(buff), 0);
> +		/* TODO fixme for sg mbuf and the case that desc couldn't hold the mbuf data */
> +		rte_memcpy((void *)(uintptr_t)buff_addr, (const void *)buff->pkt.data, rte_pktmbuf_data_len(buff));
>  
>  		res_cur_idx++;
>  		packet_success++;
>  
> -		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
> -			(const void *)&virtio_hdr, vq->vhost_hlen);
> -
> -		PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
> -
> +		/* If mergeable is disabled then a header is required per buffer. */
> +		if (!mergeable) {
> +			rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
> +			VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
> +		} else {
> +			mrg_count++;
> +			/* Merge buffer can only handle so many buffers at a time. Tell the guest if this limit is reached. */
> +			if ((mrg_count == VHOST_MAX_MRG_PKT_BURST) || (res_cur_idx == res_end_idx)) {
> +				virtio_hdr.num_buffers = mrg_count;
> +				LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers);
> +				rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
> +				VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
> +				mrg_count = 0;
> +			}
> +		}
>  		if (res_cur_idx < res_end_idx) {
>  			/* Prefetch descriptor index. */
>  			rte_prefetch0(&vq->desc[head[packet_success]]);
> @@ -184,357 +191,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	return count;
>  }
>  
> -static inline uint32_t __attribute__((always_inline))
> -copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
> -	uint16_t res_end_idx, struct rte_mbuf *pkt)
> -{
> -	uint32_t vec_idx = 0;
> -	uint32_t entry_success = 0;
> -	struct vhost_virtqueue *vq;
> -	/* The virtio_hdr is initialised to 0. */
> -	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
> -		{0, 0, 0, 0, 0, 0}, 0};
> -	uint16_t cur_idx = res_base_idx;
> -	uint64_t vb_addr = 0;
> -	uint64_t vb_hdr_addr = 0;
> -	uint32_t seg_offset = 0;
> -	uint32_t vb_offset = 0;
> -	uint32_t seg_avail;
> -	uint32_t vb_avail;
> -	uint32_t cpy_len, entry_len;
> -
> -	if (pkt == NULL)
> -		return 0;
> -
> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
> -		"End Index %d\n",
> -		dev->device_fh, cur_idx, res_end_idx);
> -
> -	/*
> -	 * Convert from gpa to vva
> -	 * (guest physical addr -> vhost virtual addr)
> -	 */
> -	vq = dev->virtqueue[VIRTIO_RXQ];
> -	vb_addr =
> -		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
> -	vb_hdr_addr = vb_addr;
> -
> -	/* Prefetch buffer address. */
> -	rte_prefetch0((void *)(uintptr_t)vb_addr);
> -
> -	virtio_hdr.num_buffers = res_end_idx - res_base_idx;
> -
> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
> -		dev->device_fh, virtio_hdr.num_buffers);
>  
> -	rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
> -		(const void *)&virtio_hdr, vq->vhost_hlen);
> -
> -	PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
> -
> -	seg_avail = rte_pktmbuf_data_len(pkt);
> -	vb_offset = vq->vhost_hlen;
> -	vb_avail =
> -		vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
> -
> -	entry_len = vq->vhost_hlen;
> -
> -	if (vb_avail == 0) {
> -		uint32_t desc_idx =
> -			vq->buf_vec[vec_idx].desc_idx;
> -		vq->desc[desc_idx].len = vq->vhost_hlen;
> -
> -		if ((vq->desc[desc_idx].flags
> -			& VRING_DESC_F_NEXT) == 0) {
> -			/* Update used ring with desc information */
> -			vq->used->ring[cur_idx & (vq->size - 1)].id
> -				= vq->buf_vec[vec_idx].desc_idx;
> -			vq->used->ring[cur_idx & (vq->size - 1)].len
> -				= entry_len;
> -
> -			entry_len = 0;
> -			cur_idx++;
> -			entry_success++;
> -		}
> -
> -		vec_idx++;
> -		vb_addr =
> -			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
> -
> -		/* Prefetch buffer address. */
> -		rte_prefetch0((void *)(uintptr_t)vb_addr);
> -		vb_offset = 0;
> -		vb_avail = vq->buf_vec[vec_idx].buf_len;
> -	}
> -
> -	cpy_len = RTE_MIN(vb_avail, seg_avail);
> -
> -	while (cpy_len > 0) {
> -		/* Copy mbuf data to vring buffer */
> -		rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
> -			(const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
> -			cpy_len);
> -
> -		PRINT_PACKET(dev,
> -			(uintptr_t)(vb_addr + vb_offset),
> -			cpy_len, 0);
> -
> -		seg_offset += cpy_len;
> -		vb_offset += cpy_len;
> -		seg_avail -= cpy_len;
> -		vb_avail -= cpy_len;
> -		entry_len += cpy_len;
> -
> -		if (seg_avail != 0) {
> -			/*
> -			 * The virtio buffer in this vring
> -			 * entry reach to its end.
> -			 * But the segment doesn't complete.
> -			 */
> -			if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
> -				VRING_DESC_F_NEXT) == 0) {
> -				/* Update used ring with desc information */
> -				vq->used->ring[cur_idx & (vq->size - 1)].id
> -					= vq->buf_vec[vec_idx].desc_idx;
> -				vq->used->ring[cur_idx & (vq->size - 1)].len
> -					= entry_len;
> -				entry_len = 0;
> -				cur_idx++;
> -				entry_success++;
> -			}
> -
> -			vec_idx++;
> -			vb_addr = gpa_to_vva(dev,
> -				vq->buf_vec[vec_idx].buf_addr);
> -			vb_offset = 0;
> -			vb_avail = vq->buf_vec[vec_idx].buf_len;
> -			cpy_len = RTE_MIN(vb_avail, seg_avail);
> -		} else {
> -			/*
> -			 * This current segment complete, need continue to
> -			 * check if the whole packet complete or not.
> -			 */
> -			pkt = pkt->next;
> -			if (pkt != NULL) {
> -				/*
> -				 * There are more segments.
> -				 */
> -				if (vb_avail == 0) {
> -					/*
> -					 * This current buffer from vring is
> -					 * used up, need fetch next buffer
> -					 * from buf_vec.
> -					 */
> -					uint32_t desc_idx =
> -						vq->buf_vec[vec_idx].desc_idx;
> -					vq->desc[desc_idx].len = vb_offset;
> -
> -					if ((vq->desc[desc_idx].flags &
> -						VRING_DESC_F_NEXT) == 0) {
> -						uint16_t wrapped_idx =
> -							cur_idx & (vq->size - 1);
> -						/*
> -						 * Update used ring with the
> -						 * descriptor information
> -						 */
> -						vq->used->ring[wrapped_idx].id
> -							= desc_idx;
> -						vq->used->ring[wrapped_idx].len
> -							= entry_len;
> -						entry_success++;
> -						entry_len = 0;
> -						cur_idx++;
> -					}
> -
> -					/* Get next buffer from buf_vec. */
> -					vec_idx++;
> -					vb_addr = gpa_to_vva(dev,
> -						vq->buf_vec[vec_idx].buf_addr);
> -					vb_avail =
> -						vq->buf_vec[vec_idx].buf_len;
> -					vb_offset = 0;
> -				}
> -
> -				seg_offset = 0;
> -				seg_avail = rte_pktmbuf_data_len(pkt);
> -				cpy_len = RTE_MIN(vb_avail, seg_avail);
> -			} else {
> -				/*
> -				 * This whole packet completes.
> -				 */
> -				uint32_t desc_idx =
> -					vq->buf_vec[vec_idx].desc_idx;
> -				vq->desc[desc_idx].len = vb_offset;
> -
> -				while (vq->desc[desc_idx].flags &
> -					VRING_DESC_F_NEXT) {
> -					desc_idx = vq->desc[desc_idx].next;
> -					 vq->desc[desc_idx].len = 0;
> -				}
> -
> -				/* Update used ring with desc information */
> -				vq->used->ring[cur_idx & (vq->size - 1)].id
> -					= vq->buf_vec[vec_idx].desc_idx;
> -				vq->used->ring[cur_idx & (vq->size - 1)].len
> -					= entry_len;
> -				entry_len = 0;
> -				cur_idx++;
> -				entry_success++;
> -				seg_avail = 0;
> -				cpy_len = RTE_MIN(vb_avail, seg_avail);
> -			}
> -		}
> -	}
> -
> -	return entry_success;
> -}
> -
> -/*
> - * This function works for mergeable RX.
> - */
> -static inline uint32_t __attribute__((always_inline))
> -virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
> -	struct rte_mbuf **pkts, uint32_t count)
> +uint32_t
> +rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)
>  {
> -	struct vhost_virtqueue *vq;
> -	uint32_t pkt_idx = 0, entry_success = 0;
> -	uint16_t avail_idx, res_cur_idx;
> -	uint16_t res_base_idx, res_end_idx;
> -	uint8_t success = 0;
> -
> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
> -		dev->device_fh);
> -	if (unlikely(queue_id != VIRTIO_RXQ)) {
> -		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
> -	}
> -
> -	vq = dev->virtqueue[VIRTIO_RXQ];
> -	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
> -
> -	if (count == 0)
> -		return 0;
> -
> -	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
> -		uint32_t secure_len = 0;
> -		uint16_t need_cnt;
> -		uint32_t vec_idx = 0;
> -		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
> -		uint16_t i, id;
> -
> -		do {
> -			/*
> -			 * As many data cores may want access to available
> -			 * buffers, they need to be reserved.
> -			 */
> -			res_base_idx = vq->last_used_idx_res;
> -			res_cur_idx = res_base_idx;
> -
> -			do {
> -				avail_idx = *((volatile uint16_t *)&vq->avail->idx);
> -				if (unlikely(res_cur_idx == avail_idx)) {
> -					LOG_DEBUG(VHOST_DATA,
> -						"(%"PRIu64") Failed "
> -						"to get enough desc from "
> -						"vring\n",
> -						dev->device_fh);
> -					return pkt_idx;
> -				} else {
> -					uint16_t wrapped_idx =
> -						(res_cur_idx) & (vq->size - 1);
> -					uint32_t idx =
> -						vq->avail->ring[wrapped_idx];
> -					uint8_t next_desc;
> -
> -					do {
> -						next_desc = 0;
> -						secure_len += vq->desc[idx].len;
> -						if (vq->desc[idx].flags &
> -							VRING_DESC_F_NEXT) {
> -							idx = vq->desc[idx].next;
> -							next_desc = 1;
> -						}
> -					} while (next_desc);
> -
> -					res_cur_idx++;
> -				}
> -			} while (pkt_len > secure_len);
> -
> -			/* vq->last_used_idx_res is atomically updated. */
> -			success = rte_atomic16_cmpset(&vq->last_used_idx_res,
> -							res_base_idx,
> -							res_cur_idx);
> -		} while (success == 0);
> -
> -		id = res_base_idx;
> -		need_cnt = res_cur_idx - res_base_idx;
> -
> -		for (i = 0; i < need_cnt; i++, id++) {
> -			uint16_t wrapped_idx = id & (vq->size - 1);
> -			uint32_t idx = vq->avail->ring[wrapped_idx];
> -			uint8_t next_desc;
> -			do {
> -				next_desc = 0;
> -				vq->buf_vec[vec_idx].buf_addr =
> -					vq->desc[idx].addr;
> -				vq->buf_vec[vec_idx].buf_len =
> -					vq->desc[idx].len;
> -				vq->buf_vec[vec_idx].desc_idx = idx;
> -				vec_idx++;
> -
> -				if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
> -					idx = vq->desc[idx].next;
> -					next_desc = 1;
> -				}
> -			} while (next_desc);
> -		}
> -
> -		res_end_idx = res_cur_idx;
> -
> -		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
> -			res_end_idx, pkts[pkt_idx]);
> -
> -		rte_compiler_barrier();
> -
> -		/*
> -		 * Wait until it's our turn to add our buffer
> -		 * to the used ring.
> -		 */
> -		while (unlikely(vq->last_used_idx != res_base_idx))
> -			rte_pause();
> -
> -		*(volatile uint16_t *)&vq->used->idx += entry_success;
> -		vq->last_used_idx = res_end_idx;
> -
> -		/* Kick the guest if necessary. */
> -		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
> -			eventfd_write((int)vq->kickfd, 1);
> -	}
> -
> -	return count;
> -}
> -
> -uint16_t
> -rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
> -	struct rte_mbuf **pkts, uint16_t count)
> -{
> -	if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
> -		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
> -	else
> -		return virtio_dev_rx(dev, queue_id, pkts, count);
> -}
> -
> -uint16_t
> -rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
> -	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
> -{
> -	struct rte_mbuf *m, *prev;
> +	struct rte_mbuf *mbuf;
>  	struct vhost_virtqueue *vq;
>  	struct vring_desc *desc;
> -	uint64_t vb_addr = 0;
> -	uint32_t head[MAX_PKT_BURST];
> +	uint64_t buff_addr = 0;
> +	uint32_t head[VHOST_MAX_PKT_BURST];
>  	uint32_t used_idx;
>  	uint32_t i;
> -	uint16_t free_entries, entry_success = 0;
> +	uint16_t free_entries, packet_success = 0;
>  	uint16_t avail_idx;
>  
>  	if (unlikely(queue_id != VIRTIO_TXQ)) {
> @@ -549,8 +217,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  	if (vq->last_used_idx == avail_idx)
>  		return 0;
>  
> -	LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
> -		dev->device_fh);
> +	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s(%d->%d)\n", 
> +		dev->device_fh, __func__, vq->last_used_idx, avail_idx);
>  
>  	/* Prefetch available ring to retrieve head indexes. */
>  	rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
> @@ -558,173 +226,68 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  	/*get the number of free entries in the ring*/
>  	free_entries = (avail_idx - vq->last_used_idx);
>  
> -	free_entries = RTE_MIN(free_entries, count);
> +	if (free_entries > count)
> +		free_entries = count;
>  	/* Limit to MAX_PKT_BURST. */
> -	free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
> +	if (free_entries > VHOST_MAX_PKT_BURST)
> +		free_entries = VHOST_MAX_PKT_BURST;
>  
> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
> -			dev->device_fh, free_entries);
> +	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
>  	/* Retrieve all of the head indexes first to avoid caching issues. */
>  	for (i = 0; i < free_entries; i++)
>  		head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
>  
>  	/* Prefetch descriptor index. */
> -	rte_prefetch0(&vq->desc[head[entry_success]]);
> +	rte_prefetch0(&vq->desc[head[packet_success]]);
>  	rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
>  
> -	while (entry_success < free_entries) {
> -		uint32_t vb_avail, vb_offset;
> -		uint32_t seg_avail, seg_offset;
> -		uint32_t cpy_len;
> -		uint32_t seg_num = 0;
> -		struct rte_mbuf *cur;
> -		uint8_t alloc_err = 0;
> -
> -		desc = &vq->desc[head[entry_success]];
> +	while (packet_success < free_entries) {
> +		desc = &vq->desc[head[packet_success]];
>  
>  		/* Discard first buffer as it is the virtio header */
>  		desc = &vq->desc[desc->next];
>  
>  		/* Buffer address translation. */
> -		vb_addr = gpa_to_vva(dev, desc->addr);
> +		buff_addr = gpa_to_vva(dev, desc->addr);
>  		/* Prefetch buffer address. */
> -		rte_prefetch0((void *)(uintptr_t)vb_addr);
> +		rte_prefetch0((void *)(uintptr_t)buff_addr);
>  
>  		used_idx = vq->last_used_idx & (vq->size - 1);
>  
> -		if (entry_success < (free_entries - 1)) {
> +		if (packet_success < (free_entries - 1)) {
>  			/* Prefetch descriptor index. */
> -			rte_prefetch0(&vq->desc[head[entry_success+1]]);
> +			rte_prefetch0(&vq->desc[head[packet_success+1]]);
>  			rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
>  		}
>  
>  		/* Update used index buffer information. */
> -		vq->used->ring[used_idx].id = head[entry_success];
> +		vq->used->ring[used_idx].id = head[packet_success];
>  		vq->used->ring[used_idx].len = 0;
>  
> -		vb_offset = 0;
> -		vb_avail = desc->len;
> -		/* Allocate an mbuf and populate the structure. */
> -		m = rte_pktmbuf_alloc(mbuf_pool);
> -		if (unlikely(m == NULL)) {
> -			RTE_LOG(ERR, VHOST_DATA,
> -				"Failed to allocate memory for mbuf.\n");
> -			return entry_success;
> +		mbuf = rte_pktmbuf_alloc(mbuf_pool);
> +		if (unlikely(mbuf == NULL)) {
> +			RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n");
> +			return packet_success;
>  		}
> -		seg_offset = 0;
> -		seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
> -		cpy_len = RTE_MIN(vb_avail, seg_avail);
> -
> -		PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
> -
> -		seg_num++;
> -		cur = m;
> -		prev = m;
> -		while (cpy_len != 0) {
> -			rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
> -				(void *)((uintptr_t)(vb_addr + vb_offset)),
> -				cpy_len);
> -
> -			seg_offset += cpy_len;
> -			vb_offset += cpy_len;
> -			vb_avail -= cpy_len;
> -			seg_avail -= cpy_len;
> -
> -			if (vb_avail != 0) {
> -				/*
> -				 * The segment reachs to its end,
> -				 * while the virtio buffer in TX vring has
> -				 * more data to be copied.
> -				 */
> -				cur->data_len = seg_offset;
> -				m->pkt_len += seg_offset;
> -				/* Allocate mbuf and populate the structure. */
> -				cur = rte_pktmbuf_alloc(mbuf_pool);
> -				if (unlikely(cur == NULL)) {
> -					RTE_LOG(ERR, VHOST_DATA, "Failed to "
> -						"allocate memory for mbuf.\n");
> -					rte_pktmbuf_free(m);
> -					alloc_err = 1;
> -					break;
> -				}
> -
> -				seg_num++;
> -				prev->next = cur;
> -				prev = cur;
> -				seg_offset = 0;
> -				seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
> -			} else {
> -				if (desc->flags & VRING_DESC_F_NEXT) {
> -					/*
> -					 * There are more virtio buffers in
> -					 * same vring entry need to be copied.
> -					 */
> -					if (seg_avail == 0) {
> -						/*
> -						 * The current segment hasn't
> -						 * room to accomodate more
> -						 * data.
> -						 */
> -						cur->data_len = seg_offset;
> -						m->pkt_len += seg_offset;
> -						/*
> -						 * Allocate an mbuf and
> -						 * populate the structure.
> -						 */
> -						cur = rte_pktmbuf_alloc(mbuf_pool);
> -						if (unlikely(cur == NULL)) {
> -							RTE_LOG(ERR,
> -								VHOST_DATA,
> -								"Failed to "
> -								"allocate memory "
> -								"for mbuf\n");
> -							rte_pktmbuf_free(m);
> -							alloc_err = 1;
> -							break;
> -						}
> -						seg_num++;
> -						prev->next = cur;
> -						prev = cur;
> -						seg_offset = 0;
> -						seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
> -					}
> -
> -					desc = &vq->desc[desc->next];
> -
> -					/* Buffer address translation. */
> -					vb_addr = gpa_to_vva(dev, desc->addr);
> -					/* Prefetch buffer address. */
> -					rte_prefetch0((void *)(uintptr_t)vb_addr);
> -					vb_offset = 0;
> -					vb_avail = desc->len;
> -
> -					PRINT_PACKET(dev, (uintptr_t)vb_addr,
> -						desc->len, 0);
> -				} else {
> -					/* The whole packet completes. */
> -					cur->data_len = seg_offset;
> -					m->pkt_len += seg_offset;
> -					vb_avail = 0;
> -				}
> -			}
> +		mbuf->pkt.data_len = desc->len;
> +		mbuf->pkt.pkt_len  = mbuf->pkt.data_len;
>  
> -			cpy_len = RTE_MIN(vb_avail, seg_avail);
> -		}
> +		rte_memcpy((void *) mbuf->pkt.data,
> +			(const void *) buff_addr, mbuf->pkt.data_len);
>  
> -		if (unlikely(alloc_err == 1))
> -			break;
> +		pkts[packet_success] = mbuf;
>  
> -		m->nb_segs = seg_num;
> +		VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
>  
> -		pkts[entry_success] = m;
>  		vq->last_used_idx++;
> -		entry_success++;
> +		packet_success++;
>  	}
>  
>  	rte_compiler_barrier();
> -	vq->used->idx += entry_success;
> +	vq->used->idx += packet_success;
>  	/* Kick guest if required. */
>  	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
>  		eventfd_write((int)vq->kickfd, 1);
> -	return entry_success;
> +
> +	return packet_success;
>  }
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index 852b6d1..516e743 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -31,17 +31,14 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>   */
>  
> -#include <dirent.h>
> -#include <fuse/cuse_lowlevel.h>
>  #include <linux/vhost.h>
>  #include <linux/virtio_net.h>
>  #include <stddef.h>
>  #include <stdint.h>
>  #include <stdlib.h>
> -#include <sys/eventfd.h>
> -#include <sys/ioctl.h>
>  #include <sys/mman.h>
>  #include <unistd.h>
> +#include <assert.h>
>  
>  #include <rte_ethdev.h>
>  #include <rte_log.h>
> @@ -49,10 +46,8 @@
>  #include <rte_memory.h>
>  #include <rte_virtio_net.h>
>  
> -#include "vhost-net-cdev.h"
> -#include "eventfd_link/eventfd_link.h"
> -
> -/*
> +#include "vhost-net.h"
> +/**
>   * Device linked list structure for configuration.
>   */
>  struct virtio_net_config_ll {
> @@ -60,38 +55,15 @@ struct virtio_net_config_ll {
>  	struct virtio_net_config_ll *next;	/* Next dev on linked list.*/
>  };
>  
> -const char eventfd_cdev[] = "/dev/eventfd-link";
> -
> -/* device ops to add/remove device to/from data core. */
> +/* device ops to add/remove device to data core. */
>  static struct virtio_net_device_ops const *notify_ops;
> -/* root address of the linked list of managed virtio devices */
> +/* root address of the linked list in the configuration core. */
>  static struct virtio_net_config_ll *ll_root;
>  
>  /* Features supported by this lib. */
> -#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
> -				  (1ULL << VIRTIO_NET_F_CTRL_RX))
> +#define VHOST_SUPPORTED_FEATURES (1ULL << VIRTIO_NET_F_MRG_RXBUF)
>  static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
>  
> -/* Line size for reading maps file. */
> -static const uint32_t BUFSIZE = PATH_MAX;
> -
> -/* Size of prot char array in procmap. */
> -#define PROT_SZ 5
> -
> -/* Number of elements in procmap struct. */
> -#define PROCMAP_SZ 8
> -
> -/* Structure containing information gathered from maps file. */
> -struct procmap {
> -	uint64_t va_start;	/* Start virtual address in file. */
> -	uint64_t len;		/* Size of file. */
> -	uint64_t pgoff;		/* Not used. */
> -	uint32_t maj;		/* Not used. */
> -	uint32_t min;		/* Not used. */
> -	uint32_t ino;		/* Not used. */
> -	char prot[PROT_SZ];	/* Not used. */
> -	char fname[PATH_MAX];	/* File name. */
> -};
>  
>  /*
>   * Converts QEMU virtual address to Vhost virtual address. This function is
> @@ -110,199 +82,15 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
>  		if ((qemu_va >= region->userspace_address) &&
>  			(qemu_va <= region->userspace_address +
>  			region->memory_size)) {
> -			vhost_va = dev->mem->mapped_address + qemu_va -
> -					dev->mem->base_address;
> +			vhost_va = qemu_va +  region->guest_phys_address + 
> +				region->address_offset -
> +				region->userspace_address;
>  			break;
>  		}
>  	}
>  	return vhost_va;
>  }
>  
> -/*
> - * Locate the file containing QEMU's memory space and
> - * map it to our address space.
> - */
> -static int
> -host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
> -	pid_t pid, uint64_t addr)
> -{
> -	struct dirent *dptr = NULL;
> -	struct procmap procmap;
> -	DIR *dp = NULL;
> -	int fd;
> -	int i;
> -	char memfile[PATH_MAX];
> -	char mapfile[PATH_MAX];
> -	char procdir[PATH_MAX];
> -	char resolved_path[PATH_MAX];
> -	char *path = NULL;
> -	FILE *fmap;
> -	void *map;
> -	uint8_t found = 0;
> -	char line[BUFSIZE];
> -	char dlm[] = "-   :   ";
> -	char *str, *sp, *in[PROCMAP_SZ];
> -	char *end = NULL;
> -
> -	/* Path where mem files are located. */
> -	snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
> -	/* Maps file used to locate mem file. */
> -	snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
> -
> -	fmap = fopen(mapfile, "r");
> -	if (fmap == NULL) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to open maps file for pid %d\n",
> -			dev->device_fh, pid);
> -		return -1;
> -	}
> -
> -	/* Read through maps file until we find out base_address. */
> -	while (fgets(line, BUFSIZE, fmap) != 0) {
> -		str = line;
> -		errno = 0;
> -		/* Split line into fields. */
> -		for (i = 0; i < PROCMAP_SZ; i++) {
> -			in[i] = strtok_r(str, &dlm[i], &sp);
> -			if ((in[i] == NULL) || (errno != 0)) {
> -				fclose(fmap);
> -				return -1;
> -			}
> -			str = NULL;
> -		}
> -
> -		/* Convert/Copy each field as needed. */
> -		procmap.va_start = strtoull(in[0], &end, 16);
> -		if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
> -			(errno != 0)) {
> -			fclose(fmap);
> -			return -1;
> -		}
> -
> -		procmap.len = strtoull(in[1], &end, 16);
> -		if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
> -			(errno != 0)) {
> -			fclose(fmap);
> -			return -1;
> -		}
> -
> -		procmap.pgoff = strtoull(in[3], &end, 16);
> -		if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
> -			(errno != 0)) {
> -			fclose(fmap);
> -			return -1;
> -		}
> -
> -		procmap.maj = strtoul(in[4], &end, 16);
> -		if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
> -			(errno != 0)) {
> -			fclose(fmap);
> -			return -1;
> -		}
> -
> -		procmap.min = strtoul(in[5], &end, 16);
> -		if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
> -			(errno != 0)) {
> -			fclose(fmap);
> -			return -1;
> -		}
> -
> -		procmap.ino = strtoul(in[6], &end, 16);
> -		if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
> -			(errno != 0)) {
> -			fclose(fmap);
> -			return -1;
> -		}
> -
> -		memcpy(&procmap.prot, in[2], PROT_SZ);
> -		memcpy(&procmap.fname, in[7], PATH_MAX);
> -
> -		if (procmap.va_start == addr) {
> -			procmap.len = procmap.len - procmap.va_start;
> -			found = 1;
> -			break;
> -		}
> -	}
> -	fclose(fmap);
> -
> -	if (!found) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to find memory file in pid %d maps file\n",
> -			dev->device_fh, pid);
> -		return -1;
> -	}
> -
> -	/* Find the guest memory file among the process fds. */
> -	dp = opendir(procdir);
> -	if (dp == NULL) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Cannot open pid %d process directory\n",
> -			dev->device_fh, pid);
> -		return -1;
> -	}
> -
> -	found = 0;
> -
> -	/* Read the fd directory contents. */
> -	while (NULL != (dptr = readdir(dp))) {
> -		snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
> -				pid, dptr->d_name);
> -		path = realpath(memfile, resolved_path);
> -		if ((path == NULL) && (strlen(resolved_path) == 0)) {
> -			RTE_LOG(ERR, VHOST_CONFIG,
> -				"(%"PRIu64") Failed to resolve fd directory\n",
> -				dev->device_fh);
> -			closedir(dp);
> -			return -1;
> -		}
> -		if (strncmp(resolved_path, procmap.fname,
> -			strnlen(procmap.fname, PATH_MAX)) == 0) {
> -			found = 1;
> -			break;
> -		}
> -	}
> -
> -	closedir(dp);
> -
> -	if (found == 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to find memory file for pid %d\n",
> -			dev->device_fh, pid);
> -		return -1;
> -	}
> -	/* Open the shared memory file and map the memory into this process. */
> -	fd = open(memfile, O_RDWR);
> -
> -	if (fd == -1) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to open %s for pid %d\n",
> -			dev->device_fh, memfile, pid);
> -		return -1;
> -	}
> -
> -	map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
> -		MAP_POPULATE|MAP_SHARED, fd, 0);
> -	close(fd);
> -
> -	if (map == MAP_FAILED) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Error mapping the file %s for pid %d\n",
> -			dev->device_fh, memfile, pid);
> -		return -1;
> -	}
> -
> -	/* Store the memory address and size in the device data structure */
> -	mem->mapped_address = (uint64_t)(uintptr_t)map;
> -	mem->mapped_size = procmap.len;
> -
> -	LOG_DEBUG(VHOST_CONFIG,
> -		"(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
> -		dev->device_fh,
> -		memfile, resolved_path,
> -		(unsigned long long)mem->mapped_size, map);
> -
> -	return 0;
> -}
>  
>  /*
>   * Retrieves an entry from the devices configuration linked list.
> @@ -376,7 +164,7 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
>  	}
>  
>  }
> -
> +/*TODO dpdk alloc/free if possible */
>  /*
>   * Unmap any memory, close any file descriptors and
>   * free any memory owned by a device.
> @@ -389,16 +177,17 @@ cleanup_device(struct virtio_net *dev)
>  		munmap((void *)(uintptr_t)dev->mem->mapped_address,
>  			(size_t)dev->mem->mapped_size);
>  		free(dev->mem);
> +		dev->mem = NULL;
>  	}
>  
>  	/* Close any event notifiers opened by device. */
> -	if (dev->virtqueue[VIRTIO_RXQ]->callfd)
> +	if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
>  		close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
> -	if (dev->virtqueue[VIRTIO_RXQ]->kickfd)
> +	if (dev->virtqueue[VIRTIO_RXQ]->kickfd > 0)
>  		close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
> -	if (dev->virtqueue[VIRTIO_TXQ]->callfd)
> +	if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
>  		close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
> -	if (dev->virtqueue[VIRTIO_TXQ]->kickfd)
> +	if (dev->virtqueue[VIRTIO_TXQ]->kickfd > 0)
>  		close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
>  }
>  
> @@ -522,8 +311,8 @@ new_device(struct vhost_device_ctx ctx)
>  }
>  
>  /*
> - * Function is called from the CUSE release function. This function will
> - * cleanup the device and remove it from device configuration linked list.
> + * Function is called from the CUSE release function. This function will cleanup
> + * the device and remove it from device configuration linked list.
>   */
>  static void
>  destroy_device(struct vhost_device_ctx ctx)
> @@ -569,6 +358,7 @@ set_owner(struct vhost_device_ctx ctx)
>  		return -1;
>  
>  	return 0;
> +	/* TODO check ctx.fh is meaningfull here */
>  }
>  
>  /*
> @@ -651,14 +441,12 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)
>   * This includes storing offsets used to translate buffer addresses.
>   */
>  static int
> -set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
> -	uint32_t nregions)
> +set_mem_table(struct vhost_device_ctx ctx,
> +	const struct virtio_memory_regions *regions, uint32_t nregions)
>  {
>  	struct virtio_net *dev;
> -	struct vhost_memory_region *mem_regions;
>  	struct virtio_memory *mem;
> -	uint64_t size = offsetof(struct vhost_memory, regions);
> -	uint32_t regionidx, valid_regions;
> +	uint32_t regionidx;
>  
>  	dev = get_device(ctx);
>  	if (dev == NULL)
> @@ -682,107 +470,24 @@ set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
>  
>  	mem->nregions = nregions;
>  
> -	mem_regions = (void *)(uintptr_t)
> -			((uint64_t)(uintptr_t)mem_regions_addr + size);
> -
>  	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
>  		/* Populate the region structure for each region. */
> -		mem->regions[regionidx].guest_phys_address =
> -			mem_regions[regionidx].guest_phys_addr;
> -		mem->regions[regionidx].guest_phys_address_end =
> -			mem->regions[regionidx].guest_phys_address +
> -			mem_regions[regionidx].memory_size;
> -		mem->regions[regionidx].memory_size =
> -			mem_regions[regionidx].memory_size;
> -		mem->regions[regionidx].userspace_address =
> -			mem_regions[regionidx].userspace_addr;
> -
> -		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
> -			regionidx,
> -			(void *)(uintptr_t)mem->regions[regionidx].guest_phys_address,
> -			(void *)(uintptr_t)mem->regions[regionidx].userspace_address,
> -			mem->regions[regionidx].memory_size);
> -
> -		/*set the base address mapping*/
> +		mem->regions[regionidx] = regions[regionidx];
>  		if (mem->regions[regionidx].guest_phys_address == 0x0) {
>  			mem->base_address =
>  				mem->regions[regionidx].userspace_address;
> -			/* Map VM memory file */
> -			if (host_memory_map(dev, mem, ctx.pid,
> -				mem->base_address) != 0) {
> -				free(mem);
> -				return -1;
> -			}
> +			mem->mapped_address = 
> +				mem->regions[regionidx].address_offset;
>  		}
>  	}
>  
> -	/* Check that we have a valid base address. */
> -	if (mem->base_address == 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base address of qemu memory file.\n", dev->device_fh);
> -		free(mem);
> -		return -1;
> -	}
> -
> -	/*
> -	 * Check if all of our regions have valid mappings.
> -	 * Usually one does not exist in the QEMU memory file.
> -	 */
> -	valid_regions = mem->nregions;
> -	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
> -		if ((mem->regions[regionidx].userspace_address <
> -			mem->base_address) ||
> -			(mem->regions[regionidx].userspace_address >
> -			(mem->base_address + mem->mapped_size)))
> -				valid_regions--;
> -	}
> -
> -	/*
> -	 * If a region does not have a valid mapping,
> -	 * we rebuild our memory struct to contain only valid entries.
> -	 */
> -	if (valid_regions != mem->nregions) {
> -		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\n",
> -			dev->device_fh);
> -
> -		/*
> -		 * Re-populate the memory structure with only valid regions.
> -		 * Invalid regions are over-written with memmove.
> -		 */
> -		valid_regions = 0;
> -
> -		for (regionidx = mem->nregions; 0 != regionidx--;) {
> -			if ((mem->regions[regionidx].userspace_address <
> -				mem->base_address) ||
> -				(mem->regions[regionidx].userspace_address >
> -				(mem->base_address + mem->mapped_size))) {
> -				memmove(&mem->regions[regionidx],
> -					&mem->regions[regionidx + 1],
> -					sizeof(struct virtio_memory_regions) *
> -						valid_regions);
> -			} else {
> -				valid_regions++;
> -			}
> -		}
> -	}
> -	mem->nregions = valid_regions;
> +	/*TODO addback the logic that remove invalid memory regions */
>  	dev->mem = mem;
>  
> -	/*
> -	 * Calculate the address offset for each region.
> -	 * This offset is used to identify the vhost virtual address
> -	 * corresponding to a QEMU guest physical address.
> -	 */
> -	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
> -		dev->mem->regions[regionidx].address_offset =
> -			dev->mem->regions[regionidx].userspace_address -
> -				dev->mem->base_address +
> -				dev->mem->mapped_address -
> -				dev->mem->regions[regionidx].guest_phys_address;
> -
> -	}
>  	return 0;
>  }
>  
> +
>  /*
>   * Called from CUSE IOCTL: VHOST_SET_VRING_NUM
>   * The virtio device sends us the size of the descriptor ring.
> @@ -896,38 +601,62 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
>  	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
>  	state->num = dev->virtqueue[state->index]->last_used_idx;
>  
> -	return 0;
> -}
> +	if (dev->flags & VIRTIO_DEV_RUNNING) {
> +		RTE_LOG(INFO, VHOST_CONFIG, 
> +			"get_vring_base message is for release\n");
> +		notify_ops->destroy_device(dev);
> +		/*
> +		 * sync call.
> +		 * when it returns, it means it si removed from data core.
> +		 */
> +	}
> +	/* TODO fix all munmap */
> +	if (dev->mem) {
> +		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> +			(size_t)dev->mem->mapped_size);
> +		free(dev->mem);
> +		dev->mem = NULL;
> +	}
>  
> -/*
> - * This function uses the eventfd_link kernel module to copy an eventfd file
> - * descriptor provided by QEMU in to our process space.
> - */
> -static int
> -eventfd_copy(struct virtio_net *dev, struct eventfd_copy *eventfd_copy)
> -{
> -	int eventfd_link, ret;
>  
> -	/* Open the character device to the kernel module. */
> -	eventfd_link = open(eventfd_cdev, O_RDWR);
> -	if (eventfd_link < 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") eventfd_link module is not loaded\n",
> -			dev->device_fh);
> -		return -1;
> -	}
> +	if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
> +		close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
> +	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
> +	if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
> +		close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
> +	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
> +	/* We don't cleanup callfd here as we willn't get CALLFD again */
> +	
> +	dev->virtqueue[VIRTIO_RXQ]->desc = NULL;
> +	dev->virtqueue[VIRTIO_RXQ]->avail = NULL;
> +	dev->virtqueue[VIRTIO_RXQ]->used = NULL;
> +	dev->virtqueue[VIRTIO_RXQ]->last_used_idx = 0;
> +	dev->virtqueue[VIRTIO_RXQ]->last_used_idx_res = 0;
> +
> +	dev->virtqueue[VIRTIO_TXQ]->desc = NULL;
> +	dev->virtqueue[VIRTIO_TXQ]->avail = NULL;
> +	dev->virtqueue[VIRTIO_TXQ]->used = NULL;
> +	dev->virtqueue[VIRTIO_TXQ]->last_used_idx = 0;
> +	dev->virtqueue[VIRTIO_TXQ]->last_used_idx_res = 0;
>  
> -	/* Call the IOCTL to copy the eventfd. */
> -	ret = ioctl(eventfd_link, EVENTFD_COPY, eventfd_copy);
> -	close(eventfd_link);
>  
> -	if (ret < 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") EVENTFD_COPY ioctl failed\n",
> -			dev->device_fh);
> -		return -1;
> -	}
> +	return 0;
> +}
>  
> +static int
> +virtio_is_ready(struct virtio_net *dev, int index)
> +{
> +	struct vhost_virtqueue *vq1, *vq2;
> +	/* mq support in future.*/
> +	vq1 = dev->virtqueue[index];
> +	vq2 = dev->virtqueue[index ^ 1];
> +	if (vq1 && vq2 && vq1->desc && vq2->desc && 
> +		(vq1->kickfd > 0) && (vq1->callfd > 0) &&
> +		(vq2->kickfd > 0) && (vq2->callfd > 0)) {
> +		LOG_DEBUG(VHOST_CONFIG, "virtio is ready for processing.\n");
> +		return 1;
> +	}
> +	LOG_DEBUG(VHOST_CONFIG, "virtio isn't ready for processing.\n");
>  	return 0;
>  }
>  
> @@ -940,7 +669,6 @@ static int
>  set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>  {
>  	struct virtio_net *dev;
> -	struct eventfd_copy	eventfd_kick;
>  	struct vhost_virtqueue *vq;
>  
>  	dev = get_device(ctx);
> @@ -953,14 +681,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>  	if (vq->kickfd)
>  		close((int)vq->kickfd);
>  
> -	/* Populate the eventfd_copy structure and call eventfd_copy. */
> -	vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -	eventfd_kick.source_fd = vq->kickfd;
> -	eventfd_kick.target_fd = file->fd;
> -	eventfd_kick.target_pid = ctx.pid;
> -
> -	if (eventfd_copy(dev, &eventfd_kick))
> -		return -1;
> +	vq->kickfd = file->fd;
>  
>  	return 0;
>  }
> @@ -974,7 +695,6 @@ static int
>  set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>  {
>  	struct virtio_net *dev;
> -	struct eventfd_copy eventfd_call;
>  	struct vhost_virtqueue *vq;
>  
>  	dev = get_device(ctx);
> @@ -986,16 +706,11 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>  
>  	if (vq->callfd)
>  		close((int)vq->callfd);
> +	vq->callfd = file->fd;
>  
> -	/* Populate the eventfd_copy structure and call eventfd_copy. */
> -	vq->callfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> -	eventfd_call.source_fd = vq->callfd;
> -	eventfd_call.target_fd = file->fd;
> -	eventfd_call.target_pid = ctx.pid;
> -
> -	if (eventfd_copy(dev, &eventfd_call))
> -		return -1;
> -
> +	if (virtio_is_ready(dev, file->index) &&
> +		!(dev->flags & VIRTIO_DEV_RUNNING))
> +			notify_ops->new_device(dev);
>  	return 0;
>  }
>  
> @@ -1024,6 +739,7 @@ set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>  	 * If the device isn't already running and both backend fds are set,
>  	 * we add the device.
>  	 */
> +	LOG_DEBUG(VHOST_CONFIG, "%s %d\n", __func__, file->fd);
>  	if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
>  		if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&
>  			((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED))
Tetsuya Mukawa Nov. 17, 2014, 6:11 a.m. UTC | #2
Hi Xie,

(2014/11/17 15:04), Tetsuya Mukawa wrote:
> Hi Xie,
>
>
> (2014/11/15 10:14), Huawei Xie wrote:
>> implement socket server
>> fd event dispatch mechanism
>> vhost sock  message handling
>> memory map for each region
>> VHOST_USER_SET_VRING_KICK_FD as the indicator that vring is available
>> VHOST_USER_GET_VRING_BASE as the message that vring should be released
>>   
>> The message flow between vhost-user and vhost-cuse is kindof different,
>> which makes virtio-net common message handler layer difficult and complicated to handle
>> both cases in new_device/destroy_device/memory map/resource cleanup.
>>
>> Will only leave the most common messag handling in virtio-net, and move the
>> control logic to cuse/fuse layer.  
>>
>>
>> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
> Great patch!
> I guess we can start from this patch to implement vhost-user and
> abstraction layer.
>
> I've checked patch.
>
> 1. White space, tab and indent patch.
> I will send patch that clears white space, tab and indent. Could you
> please check it?
> It might be difficult to see the difference, if your editor doesn't show
> a space or tab.
>
> 2. Some files are based on old codes.
> At least, following patch is not included.
> - vhost: fix build without unused result
> Also vhost_rxtx.c isn't probably based on latest code.
>
> 3. Device abstraction layer code
> I will send the device abstraction layer code after this email.
> Anyway, I guess we need to decide whether, or not we still keep
> vhost-cuse code
Additionally, the above patches are based on your RFC patch.

Tetsuya

>
> 4. Multiple devices operation.
> For example, when thread1 opens vhost-user device1 and thread2 opens
> vhost-user device2,
> each thread may want to register own callbacks.
> Current implementation may not allow this.
> I guess we need to eliminate global variables in librte_vhost as much as
> possible.
>
> Thanks,
> Tetsuya
>
>> ---
>>  lib/librte_vhost/Makefile                     |  14 +-
>>  lib/librte_vhost/eventfd_link/eventfd_link.c  |  27 +-
>>  lib/librte_vhost/eventfd_link/eventfd_link.h  |  48 +-
>>  lib/librte_vhost/libvirt/qemu-wrap.py         | 367 ---------------
>>  lib/librte_vhost/rte_virtio_net.h             | 106 ++---
>>  lib/librte_vhost/vhost-cuse/vhost-net-cdev.c  | 436 ++++++++++++++++++
>>  lib/librte_vhost/vhost-cuse/virtio-net-cdev.c | 314 +++++++++++++
>>  lib/librte_vhost/vhost-cuse/virtio-net-cdev.h |  43 ++
>>  lib/librte_vhost/vhost-net-cdev.c             | 389 ----------------
>>  lib/librte_vhost/vhost-net-cdev.h             | 113 -----
>>  lib/librte_vhost/vhost-user/fd_man.c          | 158 +++++++
>>  lib/librte_vhost/vhost-user/fd_man.h          |  31 ++
>>  lib/librte_vhost/vhost-user/vhost-net-user.c  | 417 +++++++++++++++++
>>  lib/librte_vhost/vhost-user/vhost-net-user.h  |  74 +++
>>  lib/librte_vhost/vhost-user/virtio-net-user.c | 208 +++++++++
>>  lib/librte_vhost/vhost-user/virtio-net-user.h |  11 +
>>  lib/librte_vhost/vhost_rxtx.c                 | 625 ++++----------------------
>>  lib/librte_vhost/virtio-net.c                 | 450 ++++---------------
>>  18 files changed, 1939 insertions(+), 1892 deletions(-)
>>  delete mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
>>  create mode 100644 lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>>  create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>>  create mode 100644 lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>>  delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
>>  delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
>>  create mode 100644 lib/librte_vhost/vhost-user/fd_man.c
>>  create mode 100644 lib/librte_vhost/vhost-user/fd_man.h
>>  create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.c
>>  create mode 100644 lib/librte_vhost/vhost-user/vhost-net-user.h
>>  create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.c
>>  create mode 100644 lib/librte_vhost/vhost-user/virtio-net-user.h
>>
>> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
>> index c008d64..cb4e172 100644
>> --- a/lib/librte_vhost/Makefile
>> +++ b/lib/librte_vhost/Makefile
>> @@ -34,17 +34,19 @@ include $(RTE_SDK)/mk/rte.vars.mk
>>  # library name
>>  LIB = librte_vhost.a
>>  
>> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I. -I vhost-user -I vhost-cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>>  LDFLAGS += -lfuse
>>  # all source are stored in SRCS-y
>> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
>> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c vhost-cuse/virtio-net-cdev.c
>> +
>> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-user/fd_man.c vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c
>> +
>> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c
>>  
>>  # install includes
>>  SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
>>  
>> -# dependencies
>> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal
>> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether
>> -DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf
>> +# this lib needs eal
>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
>>  
>>  include $(RTE_SDK)/mk/rte.lib.mk
>> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c
>> index 7755dd6..4c9b628 100644
>> --- a/lib/librte_vhost/eventfd_link/eventfd_link.c
>> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.c
>> @@ -13,8 +13,7 @@
>>   *   General Public License for more details.
>>   *
>>   *   You should have received a copy of the GNU General Public License
>> - *   along with this program; if not, write to the Free Software
>> - *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
>> + *   along with this program; If not, see <http://www.gnu.org/licenses/>.
>>   *   The full GNU General Public License is included in this distribution
>>   *   in the file called LICENSE.GPL.
>>   *
>> @@ -78,8 +77,7 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>>  
>>  	switch (ioctl) {
>>  	case EVENTFD_COPY:
>> -		if (copy_from_user(&eventfd_copy, argp,
>> -			sizeof(struct eventfd_copy)))
>> +		if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
>>  			return -EFAULT;
>>  
>>  		/*
>> @@ -88,28 +86,28 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>>  		task_target =
>>  			pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
>>  		if (task_target == NULL) {
>> -			pr_debug("Failed to get mem ctx for target pid\n");
>> +			printk(KERN_DEBUG "Failed to get mem ctx for target pid\n");
>>  			return -EFAULT;
>>  		}
>>  
>>  		files = get_files_struct(current);
>>  		if (files == NULL) {
>> -			pr_debug("Failed to get files struct\n");
>> +			printk(KERN_DEBUG "Failed to get files struct\n");
>>  			return -EFAULT;
>>  		}
>>  
>>  		rcu_read_lock();
>>  		file = fcheck_files(files, eventfd_copy.source_fd);
>>  		if (file) {
>> -			if (file->f_mode & FMODE_PATH ||
>> -				!atomic_long_inc_not_zero(&file->f_count))
>> +			if (file->f_mode & FMODE_PATH
>> +				|| !atomic_long_inc_not_zero(&file->f_count))
>>  				file = NULL;
>>  		}
>>  		rcu_read_unlock();
>>  		put_files_struct(files);
>>  
>>  		if (file == NULL) {
>> -			pr_debug("Failed to get file from source pid\n");
>> +			printk(KERN_DEBUG "Failed to get file from source pid\n");
>>  			return 0;
>>  		}
>>  
>> @@ -128,25 +126,26 @@ eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
>>  
>>  		files = get_files_struct(task_target);
>>  		if (files == NULL) {
>> -			pr_debug("Failed to get files struct\n");
>> +			printk(KERN_DEBUG "Failed to get files struct\n");
>>  			return -EFAULT;
>>  		}
>>  
>>  		rcu_read_lock();
>>  		file = fcheck_files(files, eventfd_copy.target_fd);
>>  		if (file) {
>> -			if (file->f_mode & FMODE_PATH ||
>> -				!atomic_long_inc_not_zero(&file->f_count))
>> -					file = NULL;
>> +			if (file->f_mode & FMODE_PATH
>> +				|| !atomic_long_inc_not_zero(&file->f_count))
>> +				file = NULL;
>>  		}
>>  		rcu_read_unlock();
>>  		put_files_struct(files);
>>  
>>  		if (file == NULL) {
>> -			pr_debug("Failed to get file from target pid\n");
>> +			printk(KERN_DEBUG "Failed to get file from target pid\n");
>>  			return 0;
>>  		}
>>  
>> +
>>  		/*
>>  		 * Install the file struct from the target process into the
>>  		 * file desciptor of the source process,
>> diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h
>> index ea619ec..38052e2 100644
>> --- a/lib/librte_vhost/eventfd_link/eventfd_link.h
>> +++ b/lib/librte_vhost/eventfd_link/eventfd_link.h
>> @@ -1,7 +1,4 @@
>>  /*-
>> - *  This file is provided under a dual BSD/GPLv2 license.  When using or
>> - *  redistributing this file, you may do so under either license.
>> - *
>>   * GPL LICENSE SUMMARY
>>   *
>>   *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> @@ -16,61 +13,28 @@
>>   *   General Public License for more details.
>>   *
>>   *   You should have received a copy of the GNU General Public License
>> - *   along with this program; if not, write to the Free Software
>> - *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
>> + *   along with this program; If not, see <http://www.gnu.org/licenses/>.
>>   *   The full GNU General Public License is included in this distribution
>>   *   in the file called LICENSE.GPL.
>>   *
>>   *   Contact Information:
>>   *   Intel Corporation
>> - *
>> - * BSD LICENSE
>> - *
>> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> - *   All rights reserved.
>> - *
>> - *   Redistribution and use in source and binary forms, with or without
>> - *   modification, are permitted provided that the following conditions
>> - *   are met:
>> - *
>> - *   Redistributions of source code must retain the above copyright
>> - *   notice, this list of conditions and the following disclaimer.
>> - *   Redistributions in binary form must reproduce the above copyright
>> - *   notice, this list of conditions and the following disclaimer in
>> - *   the documentation and/or other materials provided with the
>> - *   distribution.
>> - *   Neither the name of Intel Corporation nor the names of its
>> - *   contributors may be used to endorse or promote products derived
>> - *   from this software without specific prior written permission.
>> - *
>> - *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> - *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> - *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> - *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> - *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> - *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> - *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> - *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> - *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> - *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> - *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> - *
>>   */
>>  
>>  #ifndef _EVENTFD_LINK_H_
>>  #define _EVENTFD_LINK_H_
>>  
>>  /*
>> - * ioctl to copy an fd entry in calling process to an fd in a target process
>> + *	ioctl to copy an fd entry in calling process to an fd in a target process
>>   */
>>  #define EVENTFD_COPY 1
>>  
>>  /*
>> - * arguements for the EVENTFD_COPY ioctl
>> + *	arguements for the EVENTFD_COPY ioctl
>>   */
>>  struct eventfd_copy {
>> -	unsigned target_fd; /* fd in the target pid */
>> -	unsigned source_fd; /* fd in the calling pid */
>> -	pid_t target_pid; /* pid of the target pid */
>> +	unsigned target_fd; /**< fd in the target pid */
>> +	unsigned source_fd; /**< fd in the calling pid */
>> +	pid_t target_pid;   /**< pid of the target pid */
>>  };
>>  #endif /* _EVENTFD_LINK_H_ */
>> diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py
>> deleted file mode 100755
>> index e2d68a0..0000000
>> --- a/lib/librte_vhost/libvirt/qemu-wrap.py
>> +++ /dev/null
>> @@ -1,367 +0,0 @@
>> -#!/usr/bin/python
>> -#/*
>> -# *   BSD LICENSE
>> -# *
>> -# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> -# *   All rights reserved.
>> -# *
>> -# *   Redistribution and use in source and binary forms, with or without
>> -# *   modification, are permitted provided that the following conditions
>> -# *   are met:
>> -# *
>> -# *     * Redistributions of source code must retain the above copyright
>> -# *       notice, this list of conditions and the following disclaimer.
>> -# *     * Redistributions in binary form must reproduce the above copyright
>> -# *       notice, this list of conditions and the following disclaimer in
>> -# *       the documentation and/or other materials provided with the
>> -# *       distribution.
>> -# *     * Neither the name of Intel Corporation nor the names of its
>> -# *       contributors may be used to endorse or promote products derived
>> -# *       from this software without specific prior written permission.
>> -# *
>> -# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> -# *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> -# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> -# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> -# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> -# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> -# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> -# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> -# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> -# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> -# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> -# */
>> -
>> -#####################################################################
>> -# This script is designed to modify the call to the QEMU emulator
>> -# to support userspace vhost when starting a guest machine through
>> -# libvirt with vhost enabled. The steps to enable this are as follows
>> -# and should be run as root:
>> -#
>> -# 1. Place this script in a libvirtd's binary search PATH ($PATH)
>> -#    A good location would be in the same directory that the QEMU
>> -#    binary is located
>> -#
>> -# 2. Ensure that the script has the same owner/group and file
>> -#    permissions as the QEMU binary
>> -#
>> -# 3. Update the VM xml file using "virsh edit VM.xml"
>> -#
>> -#    3.a) Set the VM to use the launch script
>> -#
>> -#    	Set the emulator path contained in the
>> -#		<emulator><emulator/> tags
>> -#
>> -#    	e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
>> -#        with    <emulator>/usr/bin/qemu-wrap.py<emulator/>
>> -#
>> -#	 3.b) Set the VM's device's to use vhost-net offload
>> -#
>> -#		<interface type="network">
>> -#       	<model type="virtio"/>
>> -#       	<driver name="vhost"/>
>> -#		<interface/>
>> -#
>> -# 4. Enable libvirt to access our userpace device file by adding it to
>> -#    controllers cgroup for libvirtd using the following steps
>> -#
>> -#   4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
>> -#         1) cgroup_controllers = [ ... "devices", ... ]
>> -#		  2) clear_emulator_capabilities = 0
>> -#         3) user = "root"
>> -#         4) group = "root"
>> -#         5) cgroup_device_acl = [
>> -#                "/dev/null", "/dev/full", "/dev/zero",
>> -#                "/dev/random", "/dev/urandom",
>> -#                "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
>> -#                "/dev/rtc", "/dev/hpet", "/dev/net/tun",
>> -#                "/dev/<devbase-name>-<index>",
>> -#            ]
>> -#
>> -#   4.b) Disable SELinux or set to permissive mode
>> -#
>> -#   4.c) Mount cgroup device controller
>> -#        "mkdir /dev/cgroup"
>> -#        "mount -t cgroup none /dev/cgroup -o devices"
>> -#
>> -#   4.d) Set hugetlbfs_mount variable - ( Optional )
>> -#        VMs using userspace vhost must use hugepage backed
>> -#        memory. This can be enabled in the libvirt XML
>> -#        config by adding a memory backing section to the
>> -#        XML config e.g.
>> -#             <memoryBacking>
>> -#             <hugepages/>
>> -#             </memoryBacking>
>> -#        This memory backing section should be added after the
>> -#        <memory> and <currentMemory> sections. This will add
>> -#        flags "-mem-prealloc -mem-path <path>" to the QEMU
>> -#        command line. The hugetlbfs_mount variable can be used
>> -#        to override the default <path> passed through by libvirt.
>> -#
>> -#        if "-mem-prealloc" or "-mem-path <path>" are not passed
>> -#        through and a vhost device is detected then these options will
>> -#        be automatically added by this script. This script will detect
>> -#        the system hugetlbfs mount point to be used for <path>. The
>> -#        default <path> for this script can be overidden by the
>> -#        hugetlbfs_dir variable in the configuration section of this script.
>> -#
>> -#
>> -#   4.e) Restart the libvirtd system process
>> -#        e.g. on Fedora "systemctl restart libvirtd.service"
>> -#
>> -#
>> -#   4.f) Edit the Configuration Parameters section of this script
>> -#        to point to the correct emulator location and set any
>> -#        addition options
>> -#
>> -# The script modifies the libvirtd Qemu call by modifying/adding
>> -# options based on the configuration parameters below.
>> -# NOTE:
>> -#     emul_path and us_vhost_path must be set
>> -#     All other parameters are optional
>> -#####################################################################
>> -
>> -
>> -#############################################
>> -# Configuration Parameters
>> -#############################################
>> -#Path to QEMU binary
>> -emul_path = "/usr/local/bin/qemu-system-x86_64"
>> -
>> -#Path to userspace vhost device file
>> -# This filename should match the --dev-basename --dev-index parameters of
>> -# the command used to launch the userspace vhost sample application e.g.
>> -# if the sample app lauch command is:
>> -#    ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1
>> -# then this variable should be set to:
>> -#   us_vhost_path = "/dev/usvhost-1"
>> -us_vhost_path = "/dev/usvhost-1"
>> -
>> -#List of additional user defined emulation options. These options will
>> -#be added to all Qemu calls
>> -emul_opts_user = []
>> -
>> -#List of additional user defined emulation options for vhost only.
>> -#These options will only be added to vhost enabled guests
>> -emul_opts_user_vhost = []
>> -
>> -#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
>> -# Set this variable to one to enable this option for all VMs
>> -use_huge_all = 0
>> -
>> -#Instead of autodetecting, override the hugetlbfs directory by setting
>> -#this variable
>> -hugetlbfs_dir = ""
>> -
>> -#############################################
>> -
>> -
>> -#############################################
>> -# ****** Do Not Modify Below this Line ******
>> -#############################################
>> -
>> -import sys, os, subprocess
>> -
>> -
>> -#List of open userspace vhost file descriptors
>> -fd_list = []
>> -
>> -#additional virtio device flags when using userspace vhost
>> -vhost_flags = [ "csum=off",
>> -                "gso=off",
>> -                "guest_tso4=off",
>> -                "guest_tso6=off",
>> -                "guest_ecn=off"
>> -              ]
>> -
>> -
>> -#############################################
>> -# Find the system hugefile mount point.
>> -# Note:
>> -# if multiple hugetlbfs mount points exist
>> -# then the first one found will be used
>> -#############################################
>> -def find_huge_mount():
>> -
>> -    if (len(hugetlbfs_dir)):
>> -        return hugetlbfs_dir
>> -
>> -    huge_mount = ""
>> -
>> -    if (os.access("/proc/mounts", os.F_OK)):
>> -        f = open("/proc/mounts", "r")
>> -        line = f.readline()
>> -        while line:
>> -            line_split = line.split(" ")
>> -            if line_split[2] == 'hugetlbfs':
>> -                huge_mount = line_split[1]
>> -                break
>> -            line = f.readline()
>> -    else:
>> -        print "/proc/mounts not found"
>> -        exit (1)
>> -
>> -    f.close
>> -    if len(huge_mount) == 0:
>> -        print "Failed to find hugetlbfs mount point"
>> -        exit (1)
>> -
>> -    return huge_mount
>> -
>> -
>> -#############################################
>> -# Get a userspace Vhost file descriptor
>> -#############################################
>> -def get_vhost_fd():
>> -
>> -    if (os.access(us_vhost_path, os.F_OK)):
>> -        fd = os.open( us_vhost_path, os.O_RDWR)
>> -    else:
>> -        print ("US-Vhost file %s not found" %us_vhost_path)
>> -        exit (1)
>> -
>> -    return fd
>> -
>> -
>> -#############################################
>> -# Check for vhostfd. if found then replace
>> -# with our own vhost fd and append any vhost
>> -# flags onto the end
>> -#############################################
>> -def modify_netdev_arg(arg):
>> -	
>> -    global fd_list
>> -    vhost_in_use = 0
>> -    s = ''
>> -    new_opts = []
>> -    netdev_opts = arg.split(",")
>> -
>> -    for opt in netdev_opts:
>> -        #check if vhost is used
>> -        if "vhost" == opt[:5]:
>> -            vhost_in_use = 1
>> -        else:
>> -            new_opts.append(opt)
>> -
>> -    #if using vhost append vhost options
>> -    if vhost_in_use == 1:
>> -        #append vhost on option
>> -        new_opts.append('vhost=on')
>> -        #append vhostfd ption
>> -        new_fd = get_vhost_fd()
>> -        new_opts.append('vhostfd=' + str(new_fd))
>> -        fd_list.append(new_fd)
>> -
>> -    #concatenate all options
>> -    for opt in new_opts:
>> -        if len(s) > 0:
>> -			s+=','
>> -
>> -        s+=opt
>> -
>> -    return s	
>> -
>> -
>> -#############################################
>> -# Main
>> -#############################################
>> -def main():
>> -
>> -    global fd_list
>> -    global vhost_in_use
>> -    new_args = []
>> -    num_cmd_args = len(sys.argv)
>> -    emul_call = ''
>> -    mem_prealloc_set = 0
>> -    mem_path_set = 0
>> -    num = 0;
>> -
>> -    #parse the parameters
>> -    while (num < num_cmd_args):
>> -        arg = sys.argv[num]
>> -
>> -		#Check netdev +1 parameter for vhostfd
>> -        if arg == '-netdev':
>> -            num_vhost_devs = len(fd_list)
>> -            new_args.append(arg)
>> -
>> -            num+=1
>> -            arg = sys.argv[num]
>> -            mod_arg = modify_netdev_arg(arg)
>> -            new_args.append(mod_arg)
>> -
>> -            #append vhost flags if this is a vhost device
>> -            # and -device is the next arg
>> -            # i.e -device -opt1,-opt2,...,-opt3,%vhost
>> -            if (num_vhost_devs < len(fd_list)):
>> -                num+=1
>> -                arg = sys.argv[num]
>> -                if arg == '-device':
>> -                    new_args.append(arg)
>> -                    num+=1
>> -                    new_arg = sys.argv[num]
>> -                    for flag in vhost_flags:
>> -                        new_arg = ''.join([new_arg,',',flag])
>> -                    new_args.append(new_arg)
>> -                else:
>> -                    new_args.append(arg)
>> -        elif arg == '-mem-prealloc':
>> -            mem_prealloc_set = 1
>> -            new_args.append(arg)
>> -        elif arg == '-mem-path':
>> -            mem_path_set = 1
>> -            new_args.append(arg)
>> -
>> -        else:
>> -            new_args.append(arg)
>> -
>> -        num+=1
>> -
>> -    #Set Qemu binary location
>> -    emul_call+=emul_path
>> -    emul_call+=" "
>> -
>> -    #Add prealloc mem options if using vhost and not already added
>> -    if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
>> -        emul_call += "-mem-prealloc "
>> -
>> -    #Add mempath mem options if using vhost and not already added
>> -    if ((len(fd_list) > 0) and (mem_path_set == 0)):
>> -        #Detect and add hugetlbfs mount point
>> -        mp = find_huge_mount()
>> -        mp = "".join(["-mem-path ", mp])
>> -        emul_call += mp
>> -        emul_call += " "
>> -
>> -
>> -    #add user options
>> -    for opt in emul_opts_user:
>> -        emul_call += opt
>> -        emul_call += " "
>> -
>> -    #Add add user vhost only options
>> -    if len(fd_list) > 0:
>> -        for opt in emul_opts_user_vhost:
>> -            emul_call += opt
>> -            emul_call += " "
>> -
>> -    #Add updated libvirt options
>> -    iter_args = iter(new_args)
>> -    #skip 1st arg i.e. call to this script
>> -    next(iter_args)
>> -    for arg in iter_args:
>> -        emul_call+=str(arg)
>> -        emul_call+= " "
>> -
>> -    #Call QEMU
>> -    subprocess.call(emul_call, shell=True)
>> -
>> -
>> -    #Close usvhost files
>> -    for fd in fd_list:
>> -        os.close(fd)
>> -
>> -
>> -if __name__ == "__main__":
>> -    main()
>> -
>> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
>> index 00b1328..7a05dab 100644
>> --- a/lib/librte_vhost/rte_virtio_net.h
>> +++ b/lib/librte_vhost/rte_virtio_net.h
>> @@ -34,11 +34,6 @@
>>  #ifndef _VIRTIO_NET_H_
>>  #define _VIRTIO_NET_H_
>>  
>> -/**
>> - * @file
>> - * Interface to vhost net
>> - */
>> -
>>  #include <stdint.h>
>>  #include <linux/virtio_ring.h>
>>  #include <linux/virtio_net.h>
>> @@ -48,66 +43,38 @@
>>  #include <rte_mempool.h>
>>  #include <rte_mbuf.h>
>>  
>> -/* Used to indicate that the device is running on a data core */
>> -#define VIRTIO_DEV_RUNNING 1
>> -
>> -/* Backend value set by guest. */
>> -#define VIRTIO_DEV_STOPPED -1
>> -
>> +#define VIRTIO_DEV_RUNNING 1  /**< Used to indicate that the device is running on a data core. */
>> +#define VIRTIO_DEV_STOPPED -1 /**< Backend value set by guest. */
>>  
>>  /* Enum for virtqueue management. */
>>  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>>  
>> -#define BUF_VECTOR_MAX 256
>> -
>> -/**
>> - * Structure contains buffer address, length and descriptor index
>> - * from vring to do scatter RX.
>> - */
>> -struct buf_vector {
>> -	uint64_t buf_addr;
>> -	uint32_t buf_len;
>> -	uint32_t desc_idx;
>> -};
>> -
>>  /**
>>   * Structure contains variables relevant to RX/TX virtqueues.
>>   */
>>  struct vhost_virtqueue {
>> -	struct vring_desc	*desc;			/**< Virtqueue descriptor ring. */
>> -	struct vring_avail	*avail;			/**< Virtqueue available ring. */
>> -	struct vring_used	*used;			/**< Virtqueue used ring. */
>> -	uint32_t		size;			/**< Size of descriptor ring. */
>> -	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
>> -	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
>> -	volatile uint16_t	last_used_idx;		/**< Last index used on the available ring */
>> -	volatile uint16_t	last_used_idx_res;	/**< Used for multiple devices reserving buffers. */
>> -	eventfd_t		callfd;			/**< Currently unused as polling mode is enabled. */
>> -	eventfd_t		kickfd;			/**< Used to notify the guest (trigger interrupt). */
>> -	struct buf_vector	buf_vec[BUF_VECTOR_MAX];	/**< for scatter RX. */
>> -} __rte_cache_aligned;
>> -
>> -/**
>> - * Device structure contains all configuration information relating to the device.
>> - */
>> -struct virtio_net {
>> -	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
>> -	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
>> -	uint64_t		features;	/**< Negotiated feature set. */
>> -	uint64_t		device_fh;	/**< device identifier. */
>> -	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
>> -	void			*priv;		/**< private context */
>> +	struct vring_desc    *desc;             /**< descriptor ring. */
>> +	struct vring_avail   *avail;            /**< available ring. */
>> +	struct vring_used    *used;             /**< used ring. */
>> +	uint32_t             size;              /**< Size of descriptor ring. */
>> +	uint32_t             backend;           /**< Backend value to determine if device should be started/stopped. */
>> +	uint16_t             vhost_hlen;        /**< Vhost header length (varies depending on RX merge buffers. */
>> +	volatile uint16_t    last_used_idx;     /**< Last index used on the available ring. */
>> +	volatile uint16_t    last_used_idx_res; /**< Used for multiple devices reserving buffers. */
>> +	eventfd_t            callfd;            /**< Currently unused as polling mode is enabled. */
>> +	eventfd_t            kickfd;            /**< Used to notify the guest (trigger interrupt). */
>>  } __rte_cache_aligned;
>>  
>>  /**
>> - * Information relating to memory regions including offsets to addresses in QEMUs memory file.
>> + * Information relating to memory regions including offsets to
>> + * addresses in QEMUs memory file.
>>   */
>>  struct virtio_memory_regions {
>> -	uint64_t	guest_phys_address;	/**< Base guest physical address of region. */
>> -	uint64_t	guest_phys_address_end;	/**< End guest physical address of region. */
>> -	uint64_t	memory_size;		/**< Size of region. */
>> -	uint64_t	userspace_address;	/**< Base userspace address of region. */
>> -	uint64_t	address_offset;		/**< Offset of region for address translation. */
>> +	uint64_t    guest_phys_address;     /**< Base guest physical address of region. */
>> +	uint64_t    guest_phys_address_end; /**< End guest physical address of region. */
>> +	uint64_t    memory_size;            /**< Size of region. */
>> +	uint64_t    userspace_address;      /**< Base userspace address of region. */
>> +	uint64_t    address_offset;         /**< Offset of region for address translation. */
>>  };
>>  
>>  
>> @@ -115,21 +82,34 @@ struct virtio_memory_regions {
>>   * Memory structure includes region and mapping information.
>>   */
>>  struct virtio_memory {
>> -	uint64_t	base_address;	/**< Base QEMU userspace address of the memory file. */
>> -	uint64_t	mapped_address;	/**< Mapped address of memory file base in our applications memory space. */
>> -	uint64_t	mapped_size;	/**< Total size of memory file. */
>> -	uint32_t	nregions;	/**< Number of memory regions. */
>> +	uint64_t    base_address;    /**< Base QEMU userspace address of the memory file. */
>> +	uint64_t    mapped_address;  /**< Mapped address of memory file base in our applications memory space. */
>> +	uint64_t    mapped_size;     /**< Total size of memory file. */
>> +	uint32_t    nregions;        /**< Number of memory regions. */
>>  	struct virtio_memory_regions      regions[0]; /**< Memory region information. */
>>  };
>>  
>>  /**
>> + * Device structure contains all configuration information relating to the device.
>> + */
>> +struct virtio_net {
>> +	struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */
>> +	struct virtio_memory    *mem;                    /**< QEMU memory and memory region information. */
>> +	uint64_t features;    /**< Negotiated feature set. */
>> +	uint64_t device_fh;   /**< Device identifier. */
>> +	uint32_t flags;       /**< Device flags. Only used to check if device is running on data core. */
>> +	void     *priv;
>> +} __rte_cache_aligned;
>> +
>> +/**
>>   * Device operations to add/remove device.
>>   */
>>  struct virtio_net_device_ops {
>> -	int (*new_device)(struct virtio_net *);	/**< Add device. */
>> -	void (*destroy_device)(volatile struct virtio_net *);	/**< Remove device. */
>> +	int (*new_device)(struct virtio_net *); /**< Add device. */
>> +	void (*destroy_device)(struct virtio_net *); /**< Remove device. */
>>  };
>>  
>> +
>>  static inline uint16_t __attribute__((always_inline))
>>  rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
>>  {
>> @@ -179,7 +159,7 @@ int rte_vhost_driver_register(const char *dev_name);
>>  
>>  /* Register callbacks. */
>>  int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
>> -/* Start vhost driver session blocking loop. */
>> +
>>  int rte_vhost_driver_session_start(void);
>>  
>>  /**
>> @@ -192,8 +172,8 @@ int rte_vhost_driver_session_start(void);
>>   * @return
>>   *  num of packets enqueued
>>   */
>> -uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> -	struct rte_mbuf **pkts, uint16_t count);
>> +uint32_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> +	struct rte_mbuf **pkts, uint32_t count);
>>  
>>  /**
>>   * This function gets guest buffers from the virtio device TX virtqueue,
>> @@ -206,7 +186,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>>   * @return
>>   *  num of packets dequeued
>>   */
>> -uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> -	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
>> +uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> +	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count);
>>  
>>  #endif /* _VIRTIO_NET_H_ */
>> diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>> new file mode 100644
>> index 0000000..4671643
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
>> @@ -0,0 +1,436 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <fuse/cuse_lowlevel.h>
>> +#include <linux/limits.h>
>> +#include <linux/vhost.h>
>> +#include <linux/virtio_net.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <sys/ioctl.h>
>> +
>> +#include <rte_ethdev.h>
>> +#include <rte_log.h>
>> +#include <rte_string_fns.h>
>> +#include <rte_virtio_net.h>
>> +
>> +#include "virtio-net-cdev.h"
>> +#include "vhost-net.h"
>> +#include "eventfd_link/eventfd_link.h"
>> +
>> +#define FUSE_OPT_DUMMY "\0\0"
>> +#define FUSE_OPT_FORE  "-f\0\0"
>> +#define FUSE_OPT_NOMULTI "-s\0\0"
>> +
>> +static const uint32_t default_major = 231;
>> +static const uint32_t default_minor = 1;
>> +static const char cuse_device_name[] = "/dev/cuse";
>> +static const char default_cdev[] = "vhost-net";
>> +static const char eventfd_cdev[] = "/dev/eventfd-link";
>> +
>> +static struct fuse_session *session;
>> +const struct vhost_net_device_ops const *ops;
>> +
>> +/*
>> + * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
>> + * when the device is added to the device linked list.
>> + */
>> +static struct vhost_device_ctx
>> +fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
>> +{
>> +	struct vhost_device_ctx ctx;
>> +	struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
>> +
>> +	ctx.pid = req_ctx->pid;
>> +	ctx.fh = fi->fh;
>> +
>> +	return ctx;
>> +}
>> +
>> +/*
>> + * When the device is created in QEMU it gets initialised here and
>> + * added to the device linked list.
>> + */
>> +static void
>> +vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
>> +{
>> +	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> +	int err = 0;
>> +
>> +	err = ops->new_device(ctx);
>> +	if (err == -1) {
>> +		fuse_reply_err(req, EPERM);
>> +		return;
>> +	}
>> +
>> +	fi->fh = err;
>> +
>> +	RTE_LOG(INFO, VHOST_CONFIG,
>> +		"(%"PRIu64") Device configuration started\n", fi->fh);
>> +	fuse_reply_open(req, fi);
>> +}
>> +
>> +/*
>> + * When QEMU is shutdown or killed the device gets released.
>> + */
>> +static void
>> +vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
>> +{
>> +	int err = 0;
>> +	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> +
>> +	ops->destroy_device(ctx);
>> +	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
>> +	fuse_reply_err(req, err);
>> +}
>> +
>> +/*
>> + * Boilerplate code for CUSE IOCTL
>> + * Implicit arguments: ctx, req, result.
>> + */
>> +#define VHOST_IOCTL(func) do {	\
>> +	result = (func)(ctx);	\
>> +	fuse_reply_ioctl(req, result, NULL, 0);	\
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate IOCTL RETRY
>> + * Implicit arguments: req.
>> + */
>> +#define VHOST_IOCTL_RETRY(size_r, size_w) do {	\
>> +	struct iovec iov_r = { arg, (size_r) };	\
>> +	struct iovec iov_w = { arg, (size_w) };	\
>> +	fuse_reply_ioctl_retry(req, &iov_r,	\
>> +		(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate code for CUSE Read IOCTL
>> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> + */
>> +#define VHOST_IOCTL_R(type, var, func) do {	\
>> +	if (!in_bufsz) {			\
>> +		VHOST_IOCTL_RETRY(sizeof(type), 0);\
>> +	} else {	\
>> +		(var) = *(const type*)in_buf;	\
>> +		result = func(ctx, &(var));	\
>> +		fuse_reply_ioctl(req, result, NULL, 0);\
>> +	}	\
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate code for CUSE Write IOCTL
>> + * Implicit arguments: ctx, req, result, out_bufsz.
>> + */
>> +#define VHOST_IOCTL_W(type, var, func) do {	\
>> +	if (!out_bufsz) {			\
>> +		VHOST_IOCTL_RETRY(0, sizeof(type));\
>> +	} else {	\
>> +		result = (func)(ctx, &(var));\
>> +		fuse_reply_ioctl(req, result, &(var), sizeof(type));\
>> +	} \
>> +} while (0)
>> +
>> +/*
>> + * Boilerplate code for CUSE Read/Write IOCTL
>> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> + */
>> +#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {	\
>> +	if (!in_bufsz) {	\
>> +		VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
>> +	} else {	\
>> +		(var1) = *(const type1*) (in_buf);	\
>> +		result = (func)(ctx, (var1), &(var2));	\
>> +		fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
>> +	} \
>> +} while (0)
>> +
>> +/*
>> + * This function uses the eventfd_link kernel module to copy an eventfd file
>> + * descriptor provided by QEMU in to our process space.
>> + */
>> +static int
>> +eventfd_copy(int target_fd, int target_pid)
>> +{
>> +	int eventfd_link, ret;
>> +	struct eventfd_copy eventfd_copy;
>> +	int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> +
>> +	if (fd == -1)
>> +		return -1;
>> +
>> +	/* Open the character device to the kernel module. */
>> +	/* TODO: check this earlier rather than fail until VM boots! */
>> +	eventfd_link = open(eventfd_cdev, O_RDWR);
>> +	if (eventfd_link < 0) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"eventfd_link module is not loaded\n");
>> +		return -1;
>> +	}
>> +
>> +	eventfd_copy.source_fd = fd;
>> +	eventfd_copy.target_fd = target_fd;
>> +	eventfd_copy.target_pid = target_pid;
>> +	/* Call the IOCTL to copy the eventfd. */
>> +	ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
>> +	close(eventfd_link);
>> +
>> +	if (ret < 0) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"EVENTFD_COPY ioctl failed\n");
>> +		return -1;
>> +	}
>> +
>> +	return fd;
>> +}
>> +
>> +/*
>> + * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on
>> + * the type of IOCTL a buffer is requested to read or to write. This
>> + * request is handled by FUSE and the buffer is then given to CUSE.
>> + */
>> +static void
>> +vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
>> +		struct fuse_file_info *fi, __rte_unused unsigned flags,
>> +		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
>> +{
>> +	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> +	struct vhost_vring_file file;
>> +	struct vhost_vring_state state;
>> +	struct vhost_vring_addr addr;
>> +	uint64_t features;
>> +	uint32_t index;
>> +	int result = 0;
>> +
>> +	switch (cmd) {
>> +	case VHOST_NET_SET_BACKEND:
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
>> +		VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
>> +		break;
>> +
>> +	case VHOST_GET_FEATURES:
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
>> +		VHOST_IOCTL_W(uint64_t, features, ops->get_features);
>> +		break;
>> +
>> +	case VHOST_SET_FEATURES:
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
>> +		VHOST_IOCTL_R(uint64_t, features, ops->set_features);
>> +		break;
>> +
>> +	case VHOST_RESET_OWNER:
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
>> +		VHOST_IOCTL(ops->reset_owner);
>> +		break;
>> +
>> +	case VHOST_SET_OWNER:
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
>> +		VHOST_IOCTL(ops->set_owner);
>> +		break;
>> +
>> +	case VHOST_SET_MEM_TABLE:
>> +		/*TODO fix race condition.*/
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
>> +		static struct vhost_memory mem_temp;
>> +		switch (in_bufsz) {
>> +		case 0:
>> +			VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
>> +			break;
>> +
>> +		case sizeof(struct vhost_memory):
>> +			mem_temp = *(const struct vhost_memory *) in_buf;
>> +
>> +			if (mem_temp.nregions > 0) {
>> +				VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
>> +					(sizeof(struct vhost_memory_region) *
>> +						mem_temp.nregions), 0);
>> +			} else {
>> +				result = -1;
>> +				fuse_reply_ioctl(req, result, NULL, 0);
>> +			}
>> +			break;
>> +
>> +		default:
>> +			result = cuse_set_mem_table(ctx, in_buf,
>> +				mem_temp.nregions);
>> +			if (result)
>> +				fuse_reply_err(req, EINVAL);
>> +			else
>> +				fuse_reply_ioctl(req, result, NULL, 0);
>> +		}
>> +		break;
>> +
>> +	case VHOST_SET_VRING_NUM:
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
>> +		VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_num);
>> +		break;
>> +
>> +	case VHOST_SET_VRING_BASE:
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
>> +		VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_base);
>> +		break;
>> +
>> +	case VHOST_GET_VRING_BASE:
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
>> +		VHOST_IOCTL_RW(uint32_t, index,
>> +			struct vhost_vring_state, state, ops->get_vring_base);
>> +		break;
>> +
>> +	case VHOST_SET_VRING_ADDR:
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
>> +		VHOST_IOCTL_R(struct vhost_vring_addr, addr, ops->set_vring_addr);
>> +		break;
>> +
>> +	case VHOST_SET_VRING_KICK:
>> +	case VHOST_SET_VRING_CALL:
>> +		if (!in_buf) {
>> +                	VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
>> +		} else {
>> +			int fd;
>> +			file = *(const struct vhost_vring_file *)in_buf;
>> +			LOG_DEBUG(VHOST_CONFIG, 
>> +				"kick/call idx:%d fd:%d\n", file.index, file.fd);
>> +			if ((fd = eventfd_copy(file.fd, ctx.pid)) < 0){
>> +				fuse_reply_ioctl(req, -1, NULL, 0);
>> +			}
>> +			file.fd = fd;
>> +			if (cmd == VHOST_SET_VRING_KICK) {
>> +				VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_call);
>> +			}
>> +			else { 
>> +				VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_kick);
>> +			}
>> +		}
>> +		break;
>> +
>> +	default:
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
>> +		result = -1;
>> +		fuse_reply_ioctl(req, result, NULL, 0);
>> +	}
>> +
>> +	if (result < 0)
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
>> +	else
>> +		LOG_DEBUG(VHOST_CONFIG,
>> +			"(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
>> +}
>> +
>> +/*
>> + * Structure handling open, release and ioctl function pointers is populated.
>> + */
>> +static const struct cuse_lowlevel_ops vhost_net_ops = {
>> +	.open		= vhost_net_open,
>> +	.release	= vhost_net_release,
>> +	.ioctl		= vhost_net_ioctl,
>> +};
>> +
>> +/*
>> + * cuse_info is populated and used to register the cuse device.
>> + * vhost_net_device_ops are also passed when the device is registered in app.
>> + */
>> +int
>> +rte_vhost_driver_register(const char *dev_name)
>> +{
>> +	struct cuse_info cuse_info;
>> +	char device_name[PATH_MAX] = "";
>> +	char char_device_name[PATH_MAX] = "";
>> +	const char *device_argv[] = { device_name };
>> +
>> +	char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
>> +	char fuse_opt_fore[] = FUSE_OPT_FORE;
>> +	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
>> +	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
>> +
>> +	if (access(cuse_device_name, R_OK | W_OK) < 0) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"char device %s can't be accessed, maybe not exist\n",
>> +			cuse_device_name);
>> +		return -1;
>> +	}
>> +
>> +	/*
>> +	 * The device name is created. This is passed to QEMU so that it can
>> +	 * register the device with our application.
>> +	 */
>> +	snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
>> +	snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
>> +
>> +	/* Check if device already exists. */
>> +	if (access(char_device_name, F_OK) != -1) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"char device %s already exists\n", char_device_name);
>> +		return -1;
>> +	}
>> +
>> +	memset(&cuse_info, 0, sizeof(cuse_info));
>> +	cuse_info.dev_major = default_major;
>> +	cuse_info.dev_minor = default_minor;
>> +	cuse_info.dev_info_argc = 1;
>> +	cuse_info.dev_info_argv = device_argv;
>> +	cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
>> +
>> +	ops = get_virtio_net_callbacks();
>> +
>> +	session = cuse_lowlevel_setup(3, fuse_argv,
>> +			&cuse_info, &vhost_net_ops, 0, NULL);
>> +	if (session == NULL)
>> +		return -1;
>> +
>> +	return 0;
>> +}
>> +
>> +/**
>> + * The CUSE session is launched allowing the application to receive open,
>> + * release and ioctl calls.
>> + */
>> +int
>> +rte_vhost_driver_session_start(void)
>> +{
>> +	fuse_session_loop(session);
>> +
>> +	return 0;
>> +}
>> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>> new file mode 100644
>> index 0000000..5c16aa5
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
>> @@ -0,0 +1,314 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <dirent.h>
>> +#include <linux/vhost.h>
>> +#include <linux/virtio_net.h>
>> +#include <fuse/cuse_lowlevel.h>
>> +#include <stddef.h>
>> +#include <string.h>
>> +#include <stdlib.h>
>> +#include <sys/eventfd.h>
>> +#include <sys/mman.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +#include <errno.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "vhost-net.h"
>> +#include "virtio-net-cdev.h"
>> +
>> +extern struct vhost_net_device_ops const *ops;
>> +
>> +/* Line size for reading maps file. */
>> +static const uint32_t BUFSIZE = PATH_MAX;
>> +
>> +/* Size of prot char array in procmap. */
>> +#define PROT_SZ 5
>> +
>> +/* Number of elements in procmap struct. */
>> +#define PROCMAP_SZ 8
>> +
>> +/* Structure containing information gathered from maps file. */
>> +struct procmap {
>> +	uint64_t va_start;	/* Start virtual address in file. */
>> +	uint64_t len;		/* Size of file. */
>> +	uint64_t pgoff;		/* Not used. */
>> +	uint32_t maj;		/* Not used. */
>> +	uint32_t min;		/* Not used. */
>> +	uint32_t ino;		/* Not used. */
>> +	char prot[PROT_SZ];	/* Not used. */
>> +	char fname[PATH_MAX];	/* File name. */
>> +};
>> +
>> +/*
>> + * Locate the file containing QEMU's memory space and
>> + * map it to our address space.
>> + */
>> +static int
>> +host_memory_map(pid_t pid, uint64_t addr,
>> +	uint64_t *mapped_address, uint64_t *mapped_size)
>> +{
>> +	struct dirent *dptr = NULL;
>> +	struct procmap procmap;
>> +	DIR *dp = NULL;
>> +	int fd;
>> +	int i;
>> +	char memfile[PATH_MAX];
>> +	char mapfile[PATH_MAX];
>> +	char procdir[PATH_MAX];
>> +	char resolved_path[PATH_MAX];
>> +	FILE *fmap;
>> +	void *map;
>> +	uint8_t found = 0;
>> +	char line[BUFSIZE];
>> +	char dlm[] = "-   :   ";
>> +	char *str, *sp, *in[PROCMAP_SZ];
>> +	char *end = NULL;
>> +
>> +	/* Path where mem files are located. */
>> +	snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
>> +	/* Maps file used to locate mem file. */
>> +	snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
>> +
>> +	fmap = fopen(mapfile, "r");
>> +	if (fmap == NULL) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"Failed to open maps file for pid %d\n", pid);
>> +		return -1;
>> +	}
>> +
>> +	/* Read through maps file until we find out base_address. */
>> +	while (fgets(line, BUFSIZE, fmap) != 0) {
>> +		str = line;
>> +		errno = 0;
>> +		/* Split line in to fields. */
>> +		for (i = 0; i < PROCMAP_SZ; i++) {
>> +			in[i] = strtok_r(str, &dlm[i], &sp);
>> +			if ((in[i] == NULL) || (errno != 0)) {
>> +				fclose(fmap);
>> +				return -1;
>> +			}
>> +			str = NULL;
>> +		}
>> +
>> +		/* Convert/Copy each field as needed. */
>> +		procmap.va_start = strtoull(in[0], &end, 16);
>> +		if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
>> +			(errno != 0)) {
>> +			fclose(fmap);
>> +			return -1;
>> +		}
>> +
>> +		procmap.len = strtoull(in[1], &end, 16);
>> +		if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
>> +			(errno != 0)) {
>> +			fclose(fmap);
>> +			return -1;
>> +		}
>> +
>> +		procmap.pgoff = strtoull(in[3], &end, 16);
>> +		if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
>> +			(errno != 0)) {
>> +			fclose(fmap);
>> +			return -1;
>> +		}
>> +
>> +		procmap.maj = strtoul(in[4], &end, 16);
>> +		if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
>> +			(errno != 0)) {
>> +			fclose(fmap);
>> +			return -1;
>> +		}
>> +
>> +		procmap.min = strtoul(in[5], &end, 16);
>> +		if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
>> +			(errno != 0)) {
>> +			fclose(fmap);
>> +			return -1;
>> +		}
>> +
>> +		procmap.ino = strtoul(in[6], &end, 16);
>> +		if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
>> +			(errno != 0)) {
>> +			fclose(fmap);
>> +			return -1;
>> +		}
>> +
>> +		memcpy(&procmap.prot, in[2], PROT_SZ);
>> +		memcpy(&procmap.fname, in[7], PATH_MAX);
>> +
>> +		if (procmap.va_start == addr) {
>> +			procmap.len = procmap.len - procmap.va_start;
>> +			found = 1;
>> +			break;
>> +		}
>> +	}
>> +	fclose(fmap);
>> +
>> +	if (!found) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"Failed to find memory file in pid %d maps file\n", pid);
>> +		return -1;
>> +	}
>> +
>> +	/* Find the guest memory file among the process fds. */
>> +	dp = opendir(procdir);
>> +	if (dp == NULL) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"Cannot open pid %d process directory\n",
>> +			pid);
>> +		return -1;
>> +
>> +	}
>> +
>> +	found = 0;
>> +
>> +	/* Read the fd directory contents. */
>> +	while (NULL != (dptr = readdir(dp))) {
>> +		snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
>> +				pid, dptr->d_name);
>> +		realpath(memfile, resolved_path);
>> +		if (resolved_path == NULL) {
>> +			RTE_LOG(ERR, VHOST_CONFIG,
>> +				"Failed to resolve fd directory\n");
>> +			closedir(dp);
>> +			return -1;
>> +		}
>> +		if (strncmp(resolved_path, procmap.fname,
>> +			strnlen(procmap.fname, PATH_MAX)) == 0) {
>> +			found = 1;
>> +			break;
>> +		}
>> +	}
>> +
>> +	closedir(dp);
>> +
>> +	if (found == 0) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"Failed to find memory file for pid %d\n",
>> +			pid);
>> +		return -1;
>> +	}
>> +	/* Open the shared memory file and map the memory into this process. */
>> +	fd = open(memfile, O_RDWR);
>> +
>> +	if (fd == -1) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"Failed to open %s for pid %d\n",
>> +			memfile, pid);
>> +		return -1;
>> +	}
>> +
>> +	map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE ,
>> +			MAP_POPULATE|MAP_SHARED, fd, 0);
>> +	close(fd);
>> +
>> +	if (map == MAP_FAILED) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"Error mapping the file %s for pid %d\n",
>> +			memfile, pid);
>> +		return -1;
>> +	}
>> +
>> +	/* Store the memory address and size in the device data structure */
>> +	*mapped_address = (uint64_t)(uintptr_t)map;
>> +	*mapped_size = procmap.len;
>> +
>> +	LOG_DEBUG(VHOST_CONFIG,
>> +		"Mem File: %s->%s - Size: %llu - VA: %p\n",
>> +		memfile, resolved_path,
>> +		(unsigned long long)mapped_size, map);
>> +
>> +	return 0;
>> +}
>> +
>> +int
>> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
>> +	uint32_t nregions)
>> +{
>> +	uint64_t size = offsetof(struct vhost_memory, regions);
>> +	uint32_t idx;
>> +	struct virtio_memory_regions regions[8]; /* VHOST_MAX_MEMORY_REGIONS */
>> +	struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
>> +			((uint64_t)(uintptr_t)mem_regions_addr + size);
>> +	uint64_t base_address = 0, mapped_address, mapped_size;
>> +
>> +	for (idx = 0; idx < nregions; idx++) {
>> +		regions[idx].guest_phys_address =
>> +			mem_regions[idx].guest_phys_addr;
>> +		regions[idx].guest_phys_address_end =
>> +			regions[idx].guest_phys_address +
>> +			mem_regions[idx].memory_size;
>> +		regions[idx].memory_size =
>> +			mem_regions[idx].memory_size;
>> +		regions[idx].userspace_address =
>> +			mem_regions[idx].userspace_addr;
>> +
>> +		LOG_DEBUG(VHOST_CONFIG, "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
>> +			idx,
>> +			(void *)(uintptr_t)regions[idx].guest_phys_address,
>> +			(void *)(uintptr_t)regions[idx].userspace_address,
>> +			regions[idx].memory_size);
>> +
>> +		/*set the base address mapping*/
>> +		if (regions[idx].guest_phys_address == 0x0) {
>> +			base_address =
>> +				regions[idx].userspace_address;
>> +			/* Map VM memory file */
>> +			if (host_memory_map(ctx.pid, base_address, 
>> +				&mapped_address, &mapped_size) != 0) {
>> +				return -1;
>> +			}
>> +		}
>> +	}
>> +
>> +	/* Check that we have a valid base address. */
>> +	if (base_address == 0) {
>> +		RTE_LOG(ERR, VHOST_CONFIG, 
>> +			"Failed to find base address of qemu memory file.\n");
>> +		return -1;
>> +	}
>> +
>> +	for (idx = 0; idx < nregions; idx++) {
>> +		regions[idx].address_offset = 
>> +			mapped_address - base_address +
>> +			regions[idx].userspace_address -
>> +			regions[idx].guest_phys_address;
>> +	}
>> +	
>> +	ops->set_mem_table(ctx, &regions[0], nregions);
>> +	return 0;
>> +}
>> diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>> new file mode 100644
>> index 0000000..6f98ce8
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
>> @@ -0,0 +1,43 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +#ifndef _VIRTIO_NET_CDEV_H
>> +#define _VIRTIO_NET_CDEV_H
>> +#include <stdint.h>
>> +
>> +#include "vhost-net.h"
>> +
>> +int
>> +cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
>> +	uint32_t nregions);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/vhost-net-cdev.c b/lib/librte_vhost/vhost-net-cdev.c
>> deleted file mode 100644
>> index 57c76cb..0000000
>> --- a/lib/librte_vhost/vhost-net-cdev.c
>> +++ /dev/null
>> @@ -1,389 +0,0 @@
>> -/*-
>> - *   BSD LICENSE
>> - *
>> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> - *   All rights reserved.
>> - *
>> - *   Redistribution and use in source and binary forms, with or without
>> - *   modification, are permitted provided that the following conditions
>> - *   are met:
>> - *
>> - *     * Redistributions of source code must retain the above copyright
>> - *       notice, this list of conditions and the following disclaimer.
>> - *     * Redistributions in binary form must reproduce the above copyright
>> - *       notice, this list of conditions and the following disclaimer in
>> - *       the documentation and/or other materials provided with the
>> - *       distribution.
>> - *     * Neither the name of Intel Corporation nor the names of its
>> - *       contributors may be used to endorse or promote products derived
>> - *       from this software without specific prior written permission.
>> - *
>> - *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> - *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> - *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> - *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> - *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> - *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> - *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> - *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> - *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> - *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> - *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> - */
>> -
>> -#include <errno.h>
>> -#include <fuse/cuse_lowlevel.h>
>> -#include <linux/limits.h>
>> -#include <linux/vhost.h>
>> -#include <stdint.h>
>> -#include <string.h>
>> -#include <unistd.h>
>> -
>> -#include <rte_ethdev.h>
>> -#include <rte_log.h>
>> -#include <rte_string_fns.h>
>> -#include <rte_virtio_net.h>
>> -
>> -#include "vhost-net-cdev.h"
>> -
>> -#define FUSE_OPT_DUMMY "\0\0"
>> -#define FUSE_OPT_FORE  "-f\0\0"
>> -#define FUSE_OPT_NOMULTI "-s\0\0"
>> -
>> -static const uint32_t default_major = 231;
>> -static const uint32_t default_minor = 1;
>> -static const char cuse_device_name[] = "/dev/cuse";
>> -static const char default_cdev[] = "vhost-net";
>> -
>> -static struct fuse_session *session;
>> -static struct vhost_net_device_ops const *ops;
>> -
>> -/*
>> - * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
>> - * when the device is added to the device linked list.
>> - */
>> -static struct vhost_device_ctx
>> -fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
>> -{
>> -	struct vhost_device_ctx ctx;
>> -	struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
>> -
>> -	ctx.pid = req_ctx->pid;
>> -	ctx.fh = fi->fh;
>> -
>> -	return ctx;
>> -}
>> -
>> -/*
>> - * When the device is created in QEMU it gets initialised here and
>> - * added to the device linked list.
>> - */
>> -static void
>> -vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
>> -{
>> -	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> -	int err = 0;
>> -
>> -	err = ops->new_device(ctx);
>> -	if (err == -1) {
>> -		fuse_reply_err(req, EPERM);
>> -		return;
>> -	}
>> -
>> -	fi->fh = err;
>> -
>> -	RTE_LOG(INFO, VHOST_CONFIG,
>> -		"(%"PRIu64") Device configuration started\n", fi->fh);
>> -	fuse_reply_open(req, fi);
>> -}
>> -
>> -/*
>> - * When QEMU is shutdown or killed the device gets released.
>> - */
>> -static void
>> -vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
>> -{
>> -	int err = 0;
>> -	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> -
>> -	ops->destroy_device(ctx);
>> -	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
>> -	fuse_reply_err(req, err);
>> -}
>> -
>> -/*
>> - * Boilerplate code for CUSE IOCTL
>> - * Implicit arguments: ctx, req, result.
>> - */
>> -#define VHOST_IOCTL(func) do {	\
>> -	result = (func)(ctx);	\
>> -	fuse_reply_ioctl(req, result, NULL, 0);	\
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate IOCTL RETRY
>> - * Implicit arguments: req.
>> - */
>> -#define VHOST_IOCTL_RETRY(size_r, size_w) do {	\
>> -	struct iovec iov_r = { arg, (size_r) };	\
>> -	struct iovec iov_w = { arg, (size_w) };	\
>> -	fuse_reply_ioctl_retry(req, &iov_r,	\
>> -		(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate code for CUSE Read IOCTL
>> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> - */
>> -#define VHOST_IOCTL_R(type, var, func) do {	\
>> -	if (!in_bufsz) {	\
>> -		VHOST_IOCTL_RETRY(sizeof(type), 0);\
>> -	} else {	\
>> -		(var) = *(const type*)in_buf;	\
>> -		result = func(ctx, &(var));	\
>> -		fuse_reply_ioctl(req, result, NULL, 0);\
>> -	}	\
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate code for CUSE Write IOCTL
>> - * Implicit arguments: ctx, req, result, out_bufsz.
>> - */
>> -#define VHOST_IOCTL_W(type, var, func) do {	\
>> -	if (!out_bufsz) {	\
>> -		VHOST_IOCTL_RETRY(0, sizeof(type));\
>> -	} else {	\
>> -		result = (func)(ctx, &(var));\
>> -		fuse_reply_ioctl(req, result, &(var), sizeof(type));\
>> -	} \
>> -} while (0)
>> -
>> -/*
>> - * Boilerplate code for CUSE Read/Write IOCTL
>> - * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
>> - */
>> -#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {	\
>> -	if (!in_bufsz) {	\
>> -		VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
>> -	} else {	\
>> -		(var1) = *(const type1*) (in_buf);	\
>> -		result = (func)(ctx, (var1), &(var2));	\
>> -		fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
>> -	}	\
>> -} while (0)
>> -
>> -/*
>> - * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
>> - * of IOCTL a buffer is requested to read or to write. This request is handled
>> - * by FUSE and the buffer is then given to CUSE.
>> - */
>> -static void
>> -vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
>> -		struct fuse_file_info *fi, __rte_unused unsigned flags,
>> -		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
>> -{
>> -	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
>> -	struct vhost_vring_file file;
>> -	struct vhost_vring_state state;
>> -	struct vhost_vring_addr addr;
>> -	uint64_t features;
>> -	uint32_t index;
>> -	int result = 0;
>> -
>> -	switch (cmd) {
>> -	case VHOST_NET_SET_BACKEND:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
>> -		VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
>> -		break;
>> -
>> -	case VHOST_GET_FEATURES:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
>> -		VHOST_IOCTL_W(uint64_t, features, ops->get_features);
>> -		break;
>> -
>> -	case VHOST_SET_FEATURES:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
>> -		VHOST_IOCTL_R(uint64_t, features, ops->set_features);
>> -		break;
>> -
>> -	case VHOST_RESET_OWNER:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
>> -		VHOST_IOCTL(ops->reset_owner);
>> -		break;
>> -
>> -	case VHOST_SET_OWNER:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
>> -		VHOST_IOCTL(ops->set_owner);
>> -		break;
>> -
>> -	case VHOST_SET_MEM_TABLE:
>> -		/*TODO fix race condition.*/
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
>> -		static struct vhost_memory mem_temp;
>> -
>> -		switch (in_bufsz) {
>> -		case 0:
>> -			VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
>> -			break;
>> -
>> -		case sizeof(struct vhost_memory):
>> -			mem_temp = *(const struct vhost_memory *) in_buf;
>> -
>> -			if (mem_temp.nregions > 0) {
>> -				VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
>> -					(sizeof(struct vhost_memory_region) *
>> -						mem_temp.nregions), 0);
>> -			} else {
>> -				result = -1;
>> -				fuse_reply_ioctl(req, result, NULL, 0);
>> -			}
>> -			break;
>> -
>> -		default:
>> -			result = ops->set_mem_table(ctx,
>> -					in_buf, mem_temp.nregions);
>> -			if (result)
>> -				fuse_reply_err(req, EINVAL);
>> -			else
>> -				fuse_reply_ioctl(req, result, NULL, 0);
>> -		}
>> -		break;
>> -
>> -	case VHOST_SET_VRING_NUM:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
>> -		VHOST_IOCTL_R(struct vhost_vring_state, state,
>> -			ops->set_vring_num);
>> -		break;
>> -
>> -	case VHOST_SET_VRING_BASE:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
>> -		VHOST_IOCTL_R(struct vhost_vring_state, state,
>> -			ops->set_vring_base);
>> -		break;
>> -
>> -	case VHOST_GET_VRING_BASE:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
>> -		VHOST_IOCTL_RW(uint32_t, index,
>> -			struct vhost_vring_state, state, ops->get_vring_base);
>> -		break;
>> -
>> -	case VHOST_SET_VRING_ADDR:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
>> -		VHOST_IOCTL_R(struct vhost_vring_addr, addr,
>> -			ops->set_vring_addr);
>> -		break;
>> -
>> -	case VHOST_SET_VRING_KICK:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
>> -		VHOST_IOCTL_R(struct vhost_vring_file, file,
>> -			ops->set_vring_kick);
>> -		break;
>> -
>> -	case VHOST_SET_VRING_CALL:
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
>> -		VHOST_IOCTL_R(struct vhost_vring_file, file,
>> -			ops->set_vring_call);
>> -		break;
>> -
>> -	default:
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
>> -		result = -1;
>> -		fuse_reply_ioctl(req, result, NULL, 0);
>> -	}
>> -
>> -	if (result < 0)
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
>> -	else
>> -		LOG_DEBUG(VHOST_CONFIG,
>> -			"(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
>> -}
>> -
>> -/*
>> - * Structure handling open, release and ioctl function pointers is populated.
>> - */
>> -static const struct cuse_lowlevel_ops vhost_net_ops = {
>> -	.open		= vhost_net_open,
>> -	.release	= vhost_net_release,
>> -	.ioctl		= vhost_net_ioctl,
>> -};
>> -
>> -/*
>> - * cuse_info is populated and used to register the cuse device.
>> - * vhost_net_device_ops are also passed when the device is registered in app.
>> - */
>> -int
>> -rte_vhost_driver_register(const char *dev_name)
>> -{
>> -	struct cuse_info cuse_info;
>> -	char device_name[PATH_MAX] = "";
>> -	char char_device_name[PATH_MAX] = "";
>> -	const char *device_argv[] = { device_name };
>> -
>> -	char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
>> -	char fuse_opt_fore[] = FUSE_OPT_FORE;
>> -	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
>> -	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
>> -
>> -	if (access(cuse_device_name, R_OK | W_OK) < 0) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"char device %s can't be accessed, maybe not exist\n",
>> -			cuse_device_name);
>> -		return -1;
>> -	}
>> -
>> -	/*
>> -	 * The device name is created. This is passed to QEMU so that it can
>> -	 * register the device with our application.
>> -	 */
>> -	snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
>> -	snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
>> -
>> -	/* Check if device already exists. */
>> -	if (access(char_device_name, F_OK) != -1) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"char device %s already exists\n", char_device_name);
>> -		return -1;
>> -	}
>> -
>> -	memset(&cuse_info, 0, sizeof(cuse_info));
>> -	cuse_info.dev_major = default_major;
>> -	cuse_info.dev_minor = default_minor;
>> -	cuse_info.dev_info_argc = 1;
>> -	cuse_info.dev_info_argv = device_argv;
>> -	cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
>> -
>> -	ops = get_virtio_net_callbacks();
>> -
>> -	session = cuse_lowlevel_setup(3, fuse_argv,
>> -			&cuse_info, &vhost_net_ops, 0, NULL);
>> -	if (session == NULL)
>> -		return -1;
>> -
>> -	return 0;
>> -}
>> -
>> -/**
>> - * The CUSE session is launched allowing the application to receive open,
>> - * release and ioctl calls.
>> - */
>> -int
>> -rte_vhost_driver_session_start(void)
>> -{
>> -	fuse_session_loop(session);
>> -
>> -	return 0;
>> -}
>> diff --git a/lib/librte_vhost/vhost-net-cdev.h b/lib/librte_vhost/vhost-net-cdev.h
>> deleted file mode 100644
>> index 03a5c57..0000000
>> --- a/lib/librte_vhost/vhost-net-cdev.h
>> +++ /dev/null
>> @@ -1,113 +0,0 @@
>> -/*-
>> - *   BSD LICENSE
>> - *
>> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> - *   All rights reserved.
>> - *
>> - *   Redistribution and use in source and binary forms, with or without
>> - *   modification, are permitted provided that the following conditions
>> - *   are met:
>> - *
>> - *     * Redistributions of source code must retain the above copyright
>> - *       notice, this list of conditions and the following disclaimer.
>> - *     * Redistributions in binary form must reproduce the above copyright
>> - *       notice, this list of conditions and the following disclaimer in
>> - *       the documentation and/or other materials provided with the
>> - *       distribution.
>> - *     * Neither the name of Intel Corporation nor the names of its
>> - *       contributors may be used to endorse or promote products derived
>> - *       from this software without specific prior written permission.
>> - *
>> - *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> - *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> - *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> - *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> - *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> - *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> - *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> - *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> - *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> - *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> - *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> - */
>> -
>> -#ifndef _VHOST_NET_CDEV_H_
>> -#define _VHOST_NET_CDEV_H_
>> -#include <stdint.h>
>> -#include <stdio.h>
>> -#include <sys/types.h>
>> -#include <unistd.h>
>> -#include <linux/vhost.h>
>> -
>> -#include <rte_log.h>
>> -
>> -/* Macros for printing using RTE_LOG */
>> -#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
>> -#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
>> -
>> -#ifdef RTE_LIBRTE_VHOST_DEBUG
>> -#define VHOST_MAX_PRINT_BUFF 6072
>> -#define LOG_LEVEL RTE_LOG_DEBUG
>> -#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
>> -#define PRINT_PACKET(device, addr, size, header) do { \
>> -	char *pkt_addr = (char *)(addr); \
>> -	unsigned int index; \
>> -	char packet[VHOST_MAX_PRINT_BUFF]; \
>> -	\
>> -	if ((header)) \
>> -		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
>> -	else \
>> -		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
>> -	for (index = 0; index < (size); index++) { \
>> -		snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
>> -			"%02hhx ", pkt_addr[index]); \
>> -	} \
>> -	snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
>> -	\
>> -	LOG_DEBUG(VHOST_DATA, "%s", packet); \
>> -} while (0)
>> -#else
>> -#define LOG_LEVEL RTE_LOG_INFO
>> -#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
>> -#define PRINT_PACKET(device, addr, size, header) do {} while (0)
>> -#endif
>> -
>> -
>> -/*
>> - * Structure used to identify device context.
>> - */
>> -struct vhost_device_ctx {
>> -	pid_t		pid;	/* PID of process calling the IOCTL. */
>> -	uint64_t	fh;	/* Populated with fi->fh to track the device index. */
>> -};
>> -
>> -/*
>> - * Structure contains function pointers to be defined in virtio-net.c. These
>> - * functions are called in CUSE context and are used to configure devices.
>> - */
>> -struct vhost_net_device_ops {
>> -	int (*new_device)(struct vhost_device_ctx);
>> -	void (*destroy_device)(struct vhost_device_ctx);
>> -
>> -	int (*get_features)(struct vhost_device_ctx, uint64_t *);
>> -	int (*set_features)(struct vhost_device_ctx, uint64_t *);
>> -
>> -	int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
>> -
>> -	int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
>> -	int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
>> -	int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);
>> -	int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);
>> -
>> -	int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);
>> -	int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);
>> -
>> -	int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
>> -
>> -	int (*set_owner)(struct vhost_device_ctx);
>> -	int (*reset_owner)(struct vhost_device_ctx);
>> -};
>> -
>> -
>> -struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
>> -#endif /* _VHOST_NET_CDEV_H_ */
>> diff --git a/lib/librte_vhost/vhost-user/fd_man.c b/lib/librte_vhost/vhost-user/fd_man.c
>> new file mode 100644
>> index 0000000..c7fd3f2
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/fd_man.c
>> @@ -0,0 +1,158 @@
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <sys/socket.h>
>> +#include <sys/select.h>
>> +#include <sys/time.h>
>> +#include <sys/types.h>
>> +#include <unistd.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "fd_man.h"
>> +
>> +/**
>> + * Returns the index in the fdset for a fd.
>> + * If fd is -1, it means to search for a free entry.
>> + * @return
>> + *   Index for the fd, or -1 if fd isn't in the fdset.
>> + */
>> +static int
>> +fdset_find_fd(struct fdset *pfdset, int fd)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);
>> +		
>> +	return i ==  pfdset->num ? -1 : i;
>> +}
>> +
>> +static int
>> +fdset_find_free_slot(struct fdset *pfdset)
>> +{
>> +	return fdset_find_fd(pfdset, -1);
>> +
>> +}
>> +
>> +static void
>> +fdset_add_fd(struct fdset  *pfdset, int idx, int fd, fd_cb rcb, 
>> +		fd_cb wcb, uint64_t dat)
>> +{
>> +	struct fdentry *pfdentry = &pfdset->fd[idx];
>> +
>> +	pfdentry->fd = fd;
>> +	pfdentry->rcb = rcb;
>> +	pfdentry->wcb = wcb;
>> +	pfdentry->dat = dat;
>> +}
>> +
>> +/**
>> + * Fill the read/write fdset with the fds in the fdset.
>> + * @return
>> + *  the maximum fds filled in the read/write fd_set.
>> + */
>> +static int
>> +fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
>> +{
>> +	struct fdentry *pfdentry;
>> +	int i, maxfds = -1;
>> +	int num = MAX_FDS;
>> +
>> +	for (i = 0; i < num ; i++) {
>> +		pfdentry = &pfdset->fd[i];
>> +		if (pfdentry->fd != -1) {
>> +			int added = 0;
>> +			if (pfdentry->rcb && rfset) {
>> +				FD_SET(pfdentry->fd, rfset);
>> +				added = 1;
>> +			}
>> +			if (pfdentry->wcb && wfset) {
>> +				FD_SET(pfdentry->fd, wfset);
>> +				added = 1;
>> +			}
>> +			if (added)
>> +				maxfds = pfdentry->fd < maxfds ?
>> +					maxfds : pfdentry->fd;
>> +		}
>> +	}
>> +	return maxfds;
>> +}
>> +
>> +void
>> +fdset_init(struct fdset *pfdset)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < MAX_FDS; i++)
>> +		pfdset->fd[i].fd = -1;
>> +	pfdset->num = MAX_FDS;
>> +
>> +}
>> +
>> +/**
>> + * Register the fd in the fdset with its read/write handler and context.
>> + */
>> +int
>> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
>> +{
>> +	int i;
>> +
>> +	if (fd == -1)
>> +		return -1;
>> +
>> +	/* Find a free slot in the list. */
>> +	i = fdset_find_free_slot(pfdset);
>> +	if (i == -1)
>> +		return -2;
>> +
>> +	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
>> +
>> +	return 0;
>> +}
>> +
>> +/**
>> + *  Unregister the fd from the fdset.
>> + */
>> +void
>> +fdset_del(struct fdset *pfdset, int fd)
>> +{
>> +	int i;
>> +
>> +	i = fdset_find_fd(pfdset, fd);
>> +	if (i != -1) {
>> +		pfdset->fd[i].fd = -1;
>> +	}
>> +}
>> +
>> +
>> +void
>> +fdset_event_dispatch(struct fdset *pfdset)
>> +{
>> +	fd_set rfds,wfds;
>> +	int i, maxfds;
>> +	struct fdentry *pfdentry;
>> +	int num = MAX_FDS;
>> +
>> +	if (pfdset == NULL)
>> +		return;
>> +	while (1) {
>> +		FD_ZERO(&rfds);
>> +		FD_ZERO(&wfds);
>> +		maxfds = fdset_fill(&rfds, &wfds, pfdset);
>> +		/* fd management runs in one thread */
>> +		if (maxfds == -1) {
>> +			return;
>> +		}
>> +
>> +		select(maxfds + 1, &rfds, &wfds, NULL, NULL);
>> +
>> +		for (i = 0; i < num; i++) {
>> +			pfdentry = &pfdset->fd[i];
>> +			if (FD_ISSET(pfdentry->fd, &rfds)) 
>> +				pfdentry->rcb(pfdentry->fd, pfdentry->dat);
>> +			if (FD_ISSET(pfdentry->fd, &wfds))
>> +				pfdentry->wcb(pfdentry->fd, pfdentry->dat);
>> +		}
>> +		
>> +	}
>> +}
>> diff --git a/lib/librte_vhost/vhost-user/fd_man.h b/lib/librte_vhost/vhost-user/fd_man.h
>> new file mode 100644
>> index 0000000..57cc81d
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/fd_man.h
>> @@ -0,0 +1,31 @@
>> +#ifndef _FD_MAN_H_
>> +#define _FD_MAN_H_
>> +#include <stdint.h>
>> +
>> +#define MAX_FDS 1024
>> +
>> +typedef void (*fd_cb)(int fd, uint64_t dat);
>> +
>> +struct fdentry {
>> +	int fd; /* -1 indicates this entry is empty */
>> +	fd_cb rcb; /* callback when this fd is readable. */
>> +	fd_cb wcb; /* callback when this fd is writeable.*/
>> +	uint64_t dat;	/* fd context */
>> +};
>> +
>> +struct fdset {
>> +	struct fdentry fd[MAX_FDS];
>> +	int num;	
>> +};
>> +
>> +
>> +void fdset_init(struct fdset *pfdset);
>> +
>> +int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb,
>> +	fd_cb wcb, uint64_t ctx);
>> +
>> +void fdset_del(struct fdset *pfdset, int fd);
>> +
>> +void fdset_event_dispatch(struct fdset *pfdset);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c b/lib/librte_vhost/vhost-user/vhost-net-user.c
>> new file mode 100644
>> index 0000000..34450f4
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.c
>> @@ -0,0 +1,417 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <limits.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <string.h>
>> +#include <sys/types.h>
>> +#include <sys/socket.h>
>> +#include <sys/un.h>
>> +#include <errno.h>
>> +
>> +#include <rte_log.h>
>> +#include <rte_virtio_net.h>
>> +
>> +#include "fd_man.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +#include "virtio-net-user.h"
>> +
>> +static void vserver_new_vq_conn(int fd, uint64_t data);
>> +static void vserver_message_handler(int fd, uint64_t dat);
>> +const struct vhost_net_device_ops *ops;
>> +
>> +static struct vhost_server *g_vhost_server;
>> +
>> +static const char *vhost_message_str[VHOST_USER_MAX] =
>> +{
>> +	[VHOST_USER_NONE] = "VHOST_USER_NONE",
>> +	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
>> +	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
>> +	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
>> +	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
>> +	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
>> +	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
>> +	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
>> +	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
>> +	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
>> +	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
>> +	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
>> +	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
>> +	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
>> +	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR"
>> +};
>> +
>> +/**
>> + * Create a unix domain socket and bind to path.
>> + * @return
>> + *  socket fd or -1 on failure
>> + */
>> +static int
>> +uds_socket(const char *path)
>> +{
>> +	struct sockaddr_un un;
>> +	int sockfd;
>> +	int ret;
>> +
>> +	if (path == NULL)
>> +		return -1;
>> +
>> +	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
>> +	if (sockfd < 0)
>> +		return -1;
>> +	RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
>> +
>> +	memset(&un, 0, sizeof(un));
>> +	un.sun_family = AF_UNIX;
>> +	snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
>> +	ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
>> +	if (ret == -1)
>> +		goto err;
>> +	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
>> +
>> +	ret = listen(sockfd, 1);
>> +	if (ret == -1)
>> +		goto err;
>> +	
>> +	return sockfd;
>> +
>> +err:
>> +	close(sockfd);
>> +	return -1;
>> +}
>> +
>> +
>> +/* return bytes# of read */
>> +static int
>> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> +
>> +	struct iovec  iov;
>> +	struct msghdr msgh = { 0 };
>> +	size_t fdsize = fd_num * sizeof(int);
>> +	char control[CMSG_SPACE(fdsize)];
>> +	struct cmsghdr *cmsg;
>> +	int ret;
>> +
>> +	iov.iov_base = buf;
>> +	iov.iov_len  = buflen;
>> +	
>> +	msgh.msg_iov = &iov;
>> +	msgh.msg_iovlen = 1;
>> +	msgh.msg_control = control;
>> +	msgh.msg_controllen = sizeof(control);
>> +
>> +	ret = recvmsg(sockfd, &msgh, 0);
>> +	if (ret <= 0) {
>> +		RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
>> +		return ret;
>> +	}
>> +	/* ret == buflen */
>> +	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
>> +		RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
>> +		return -1;
>> +	}
>> +
>> +	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
>> +		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
>> +		if ( (cmsg->cmsg_level == SOL_SOCKET) && 
>> +			(cmsg->cmsg_type == SCM_RIGHTS)) {
>> +			memcpy(fds, CMSG_DATA(cmsg), fdsize);
>> +			break;
>> +		}
>> +	}
>> +	return ret;
>> +}
>> +
>> +static int
>> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> +	int ret;
>> +
>> +	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, 
>> +		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
>> +	if (ret <= 0)
>> +		return ret;
>> +
>> +	if (msg->size) {
>> +		if (msg->size > sizeof(msg->payload)) {
>> +			RTE_LOG(ERR, VHOST_CONFIG, 
>> +				"%s: invalid size:%d\n", __func__, msg->size);
>> +			return -1;
>> +		}
>> +		ret = read(sockfd, &msg->payload, msg->size);
>> +		if (ret == 0)
>> +			return 0;
>> +		if (ret != (int)msg->size) {
>> +			printf("read control message failed\n");
>> +			return -1;
>> +		}
>> +	}
>> +
>> +	return ret; 
>> +}
>> +
>> +static int
>> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> +
>> +	struct iovec iov;
>> +	struct msghdr msgh = { 0 };
>> +	size_t fdsize = fd_num * sizeof(int);
>> +	char control[CMSG_SPACE(fdsize)];
>> +	struct cmsghdr *cmsg;
>> +	int ret;
>> +
>> +	iov.iov_base = buf;
>> +	iov.iov_len = buflen;
>> +	msgh.msg_iov = &iov;
>> +	msgh.msg_iovlen = 1;
>> +	
>> +	if (fds && fd_num > 0) {
>> +		msgh.msg_control = control;
>> +		msgh.msg_controllen = sizeof(control);
>> +		cmsg = CMSG_FIRSTHDR(&msgh);
>> +		cmsg->cmsg_len = CMSG_LEN(fdsize);
>> +		cmsg->cmsg_level = SOL_SOCKET;
>> +		cmsg->cmsg_type = SCM_RIGHTS;
>> +		memcpy(CMSG_DATA(cmsg), fds, fdsize);
>> +	} else {
>> +		msgh.msg_control = NULL;
>> +		msgh.msg_controllen = 0;
>> +	}
>> +
>> +	do {
>> +		ret = sendmsg(sockfd, &msgh, 0);
>> +	} while (ret < 0 && errno == EINTR);
>> +
>> +	if (ret < 0) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
>> +		return -1;
>> +	}
>> +	
>> +	return 0;
>> +}
>> +
>> +static int
>> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> +	int ret;
>> +
>> +	msg->flags &= ~VHOST_USER_VERSION_MASK;
>> +        msg->flags |= VHOST_USER_VERSION;
>> +        msg->flags |= VHOST_USER_REPLY_MASK;	
>> +
>> +	ret = send_fd_message(sockfd, (char *)msg, 
>> +		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
>> +	
>> +	return ret;
>> +}
>> +
>> +/* call back when there is new connection.  */
>> +static void
>> +vserver_new_vq_conn(int fd, uint64_t dat)
>> +{
>> +	struct vhost_server *vserver = (void *)(uintptr_t)dat;
>> +	int conn_fd;
>> +	uint32_t fh;
>> +	struct vhost_device_ctx vdev_ctx = { 0 };
>> +
>> +	conn_fd = accept(fd, NULL, NULL);
>> +	RTE_LOG(INFO, VHOST_CONFIG, 
>> +		"%s: new connection is %d\n", __func__, conn_fd);
>> +	if (conn_fd < 0)
>> +		return;
>> +
>> +	fh = ops->new_device(vdev_ctx);
>> +	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
>> +
>> +	fdset_add(&vserver->fdset, 
>> +		conn_fd, vserver_message_handler, NULL, fh);	
>> +}
>> +
>> +/* callback when there is message on the connfd */
>> +static void
>> +vserver_message_handler(int connfd, uint64_t dat)
>> +{
>> +	struct vhost_device_ctx ctx;
>> +	uint32_t fh = (uint32_t)dat;
>> +	struct VhostUserMsg msg;
>> +	uint64_t features;
>> +	int ret;
>> +
>> +	ctx.fh = fh;
>> +	ret = read_vhost_message(connfd, &msg);
>> +	if (ret < 0) {
>> +		printf("vhost read message failed\n");
>> +	
>> +		/*TODO: cleanup */
>> +		close(connfd);
>> +		fdset_del(&g_vhost_server->fdset, connfd);
>> +		ops->destroy_device(ctx);
>> +
>> +		return;
>> +	} else if (ret == 0) {
>> +		/*TODO: cleanup */
>> +		RTE_LOG(INFO, VHOST_CONFIG, 
>> +			"vhost peer closed\n");
>> +		close(connfd);
>> +		fdset_del(&g_vhost_server->fdset, connfd);
>> +		ops->destroy_device(ctx);
>> +
>> +		return;
>> +	}
>> +	if (msg.request > VHOST_USER_MAX) {
>> +		/*TODO: cleanup */
>> +		RTE_LOG(INFO, VHOST_CONFIG, 
>> +			"vhost read incorrect message\n");
>> +		close(connfd);
>> +		fdset_del(&g_vhost_server->fdset, connfd);
>> +
>> +		return;
>> +	}
>> +
>> +	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
>> +		vhost_message_str[msg.request]);
>> +	switch (msg.request) {
>> +	case VHOST_USER_GET_FEATURES:
>> +		ret = ops->get_features(ctx, &features);
>> +		msg.payload.u64 = ret;
>> +		msg.size = sizeof(msg.payload.u64);
>> +		send_vhost_message(connfd, &msg);
>> +		break;
>> +	case VHOST_USER_SET_FEATURES:
>> +		ops->set_features(ctx, &features);
>> +		break;
>> +
>> +	case VHOST_USER_SET_OWNER:
>> +		ops->set_owner(ctx);
>> +		break;
>> +	case VHOST_USER_RESET_OWNER:
>> +		ops->reset_owner(ctx);
>> +		break;
>> +
>> +	case VHOST_USER_SET_MEM_TABLE:
>> +		user_set_mem_table(ctx, &msg);
>> +		break;
>> +
>> +	case VHOST_USER_SET_LOG_BASE:
>> +	case VHOST_USER_SET_LOG_FD:
>> +		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
>> +		break;
>> +
>> +	case VHOST_USER_SET_VRING_NUM:
>> +		ops->set_vring_num(ctx, &msg.payload.state);
>> +		break;
>> +	case VHOST_USER_SET_VRING_ADDR:
>> +		ops->set_vring_addr(ctx, &msg.payload.addr);
>> +		break;
>> +	case VHOST_USER_SET_VRING_BASE:
>> +		ops->set_vring_base(ctx, &msg.payload.state);
>> +		break;
>> +
>> +	case VHOST_USER_GET_VRING_BASE:
>> +		ret = ops->get_vring_base(ctx, msg.payload.state.index,
>> +			&msg.payload.state);
>> +		msg.size = sizeof(msg.payload.state);
>> +		send_vhost_message(connfd, &msg);
>> +		break;
>> +
>> +	case VHOST_USER_SET_VRING_KICK:
>> +		user_set_vring_kick(ctx, &msg);
>> +		break;
>> +	case VHOST_USER_SET_VRING_CALL:
>> +		user_set_vring_call(ctx, &msg);
>> +		break;
>> +
>> +	case VHOST_USER_SET_VRING_ERR:
>> +		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
>> +		break;
>> +
>> +	default:
>> +		break;
>> +	
>> +	}
>> +}
>> +
>> +
>> +/**
>> + * Creates and initialise the vhost server.
>> + */
>> +int
>> +rte_vhost_driver_register(const char *path)
>> +{
>> +
>> +	struct vhost_server *vserver;
>> +
>> +	if (g_vhost_server != NULL)
>> +		return -1;
>> +
>> +	vserver = calloc(sizeof(struct vhost_server), 1);
>> +	/*TODO: all allocation is through DPDK memory allocation */
>> +	if (vserver == NULL)
>> +		return -1;
>> +
>> +	fdset_init(&vserver->fdset);
>> +
>> +	unlink(path);
>> +
>> +	vserver->listenfd = uds_socket(path);
>> +	if (vserver->listenfd < 0) {
>> +		free(vserver);
>> +		return -1;
>> +	}
>> +	vserver->path = path;
>> +
>> +	fdset_add(&vserver->fdset, vserver->listenfd,
>> +			vserver_new_vq_conn, NULL,
>> +			(uint64_t)(uintptr_t)vserver);
>> +
>> +	ops = get_virtio_net_callbacks();
>> +
>> +	g_vhost_server = vserver;
>> +
>> +	return 0;
>> +}
>> +
>> +
>> +int
>> +rte_vhost_driver_session_start(void)
>> +{
>> +	fdset_event_dispatch(&g_vhost_server->fdset);
>> +	return 0;
>> +}
>> +
>> diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.h b/lib/librte_vhost/vhost-user/vhost-net-user.h
>> new file mode 100644
>> index 0000000..c9df9fa
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/vhost-net-user.h
>> @@ -0,0 +1,74 @@
>> +#ifndef _VHOST_NET_USER_H
>> +#define _VHOST_NET_USER_H
>> +#include <stdint.h>
>> +#include <linux/vhost.h>
>> +
>> +#include "fd_man.h"
>> +
>> +struct vhost_server {
>> +	const char *path; /**< The path the uds is bind to. */
>> +	int listenfd;     /**< The listener sockfd. */
>> +	struct fdset fdset; /**< The fd list this vhost server manages. */
>> +};
>> +
>> +/*********** FROM hw/virtio/vhost-user.c *************************************/
>> +
>> +#define VHOST_MEMORY_MAX_NREGIONS    8
>> +
>> +typedef enum VhostUserRequest {
>> +    VHOST_USER_NONE = 0,
>> +    VHOST_USER_GET_FEATURES = 1,
>> +    VHOST_USER_SET_FEATURES = 2,
>> +    VHOST_USER_SET_OWNER = 3,
>> +    VHOST_USER_RESET_OWNER = 4,
>> +    VHOST_USER_SET_MEM_TABLE = 5,
>> +    VHOST_USER_SET_LOG_BASE = 6,
>> +    VHOST_USER_SET_LOG_FD = 7,
>> +    VHOST_USER_SET_VRING_NUM = 8,
>> +    VHOST_USER_SET_VRING_ADDR = 9,
>> +    VHOST_USER_SET_VRING_BASE = 10,
>> +    VHOST_USER_GET_VRING_BASE = 11,
>> +    VHOST_USER_SET_VRING_KICK = 12,
>> +    VHOST_USER_SET_VRING_CALL = 13,
>> +    VHOST_USER_SET_VRING_ERR = 14,
>> +    VHOST_USER_MAX
>> +} VhostUserRequest;
>> +
>> +typedef struct VhostUserMemoryRegion {
>> +    uint64_t guest_phys_addr;
>> +    uint64_t memory_size;
>> +    uint64_t userspace_addr;
>> +    uint64_t mmap_offset;
>> +} VhostUserMemoryRegion;
>> +
>> +typedef struct VhostUserMemory {
>> +    uint32_t nregions;
>> +    uint32_t padding;
>> +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>> +} VhostUserMemory;
>> +
>> +typedef struct VhostUserMsg {
>> +    VhostUserRequest request;
>> +
>> +#define VHOST_USER_VERSION_MASK     (0x3)
>> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
>> +    uint32_t flags;
>> +    uint32_t size; /* the following payload size */
>> +    union {
>> +#define VHOST_USER_VRING_IDX_MASK   (0xff)
>> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
>> +        uint64_t u64;
>> +        struct vhost_vring_state state;
>> +        struct vhost_vring_addr addr;
>> +        VhostUserMemory memory;
>> +    } payload;
>> +     int fds[VHOST_MEMORY_MAX_NREGIONS];
>> +} __attribute__((packed)) VhostUserMsg;
>> +
>> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
>> +
>> +/* The version of the protocol we support */
>> +#define VHOST_USER_VERSION    (0x1)
>> +
>> +/*****************************************************************************/
>> +#endif
>> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c b/lib/librte_vhost/vhost-user/virtio-net-user.c
>> new file mode 100644
>> index 0000000..f38e6cc
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.c
>> @@ -0,0 +1,208 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <sys/mman.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "virtio-net-user.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +
>> +extern const struct vhost_net_device_ops *ops;
>> +
>> +#if 0
>> +int
>> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> +	unsigned int idx;
>> +	struct VhostUserMemory memory = pmsg->payload.memory;
>> +	struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
>> +	uint64_t mapped_address, base_address = 0, mem_size = 0;
>> +
>> +	for (idx = 0; idx < memory.nregions; idx++) {
>> +		if (memory.regions[idx].guest_phys_addr == 0)
>> +			base_address = memory.regions[idx].userspace_addr;
>> +	}
>> +	if (base_address == 0) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"couldn't find the mem region whose gpa is 0.\n");
>> +		return -1;
>> +	}
>> +
>> +	for (idx = 0; idx < memory.nregions;  idx++) {
>> +		uint64_t size = memory.regions[idx].userspace_addr - 
>> +			base_address + memory.regions[idx].memory_size;
>> +		if (mem_size < size)
>> +			mem_size = size;
>> +	}
>> +
>> +	/*
>> +	 * here we assume qemu will map only one file for memory allocation,
>> +	 * we only use fds[0] with offset 0.
>> +	 */
>> +	mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size, 
>> +		PROT_READ | PROT_WRITE, MAP_SHARED, pmsg->fds[0], 0);
>> +
>> +	if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>> +		RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
>> +		return -1;
>> +	}
>> +			
>> +	for (idx = 0; idx < memory.nregions; idx++) {
>> +		regions[idx].guest_phys_address = 
>> +			memory.regions[idx].guest_phys_addr;
>> +		regions[idx].guest_phys_address_end = 
>> +			memory.regions[idx].guest_phys_addr +
>> +			memory.regions[idx].memory_size;
>> +		regions[idx].memory_size = memory.regions[idx].memory_size;
>> +		regions[idx].userspace_address = 
>> +			memory.regions[idx].userspace_addr;
>> +
>> +		regions[idx].address_offset = mapped_address - base_address + 
>> +			regions[idx].userspace_address -
>> +			regions[idx].guest_phys_address;
>> +		LOG_DEBUG(VHOST_CONFIG, 
>> +			"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
>> +			idx,
>> +			(void *)(uintptr_t)regions[idx].guest_phys_address,
>> +			(void *)(uintptr_t)regions[idx].userspace_address,
>> +			 regions[idx].memory_size);
>> +	}
>> +	ops->set_mem_table(ctx, regions, memory.nregions);
>> +	return 0;
>> +}
>> +
>> +#else
>> +
>> +int
>> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> +	unsigned int idx;
>> +	struct VhostUserMemory memory = pmsg->payload.memory;
>> +	struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
>> +	uint64_t mapped_address, base_address = 0;
>> +
>> +	for (idx = 0; idx < memory.nregions; idx++) {
>> +		if (memory.regions[idx].guest_phys_addr == 0)
>> +			base_address = memory.regions[idx].userspace_addr;
>> +	}
>> +	if (base_address == 0) {
>> +		RTE_LOG(ERR, VHOST_CONFIG,
>> +			"couldn't find the mem region whose gpa is 0.\n");
>> +		return -1;
>> +	}
>> +
>> +
>> +	for (idx = 0; idx < memory.nregions; idx++) {
>> +		regions[idx].guest_phys_address = 
>> +			memory.regions[idx].guest_phys_addr;
>> +		regions[idx].guest_phys_address_end = 
>> +			memory.regions[idx].guest_phys_addr +
>> +			memory.regions[idx].memory_size;
>> +		regions[idx].memory_size = memory.regions[idx].memory_size;
>> +		regions[idx].userspace_address = 
>> +			memory.regions[idx].userspace_addr;
>> +/*
>> +		mapped_address = (uint64_t)(uintptr_t)mmap(NULL, 
>> +			regions[idx].memory_size, 
>> +			PROT_READ | PROT_WRITE, MAP_SHARED, 
>> +			pmsg->fds[idx], 
>> +			memory.regions[idx].mmap_offset);
>> +*/
>> +
>> +/* This is ugly */
>> +		mapped_address = (uint64_t)(uintptr_t)mmap(NULL, 
>> +			regions[idx].memory_size +
>> +				memory.regions[idx].mmap_offset, 
>> +			PROT_READ | PROT_WRITE, MAP_SHARED, 
>> +			pmsg->fds[idx], 
>> +			0);
>> +		printf("mapped to %p\n", (void *)mapped_address);
>> +
>> +		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>> +			RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
>> +			return -1;
>> +		}
>> +
>> +//		printf("ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset + 0x3FFFFFFF) & ~0x3FFFFFFF));
>> +//		printf("unaligned ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset )  ));
>> +		mapped_address +=  memory.regions[idx].mmap_offset;
>> +
>> +		regions[idx].address_offset = mapped_address -
>> +			regions[idx].guest_phys_address;
>> +		LOG_DEBUG(VHOST_CONFIG, 
>> +			"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
>> +			idx,
>> +			(void *)(uintptr_t)regions[idx].guest_phys_address,
>> +			(void *)(uintptr_t)regions[idx].userspace_address,
>> +			 regions[idx].memory_size);
>> +	}
>> +	ops->set_mem_table(ctx, regions, memory.nregions);
>> +	return 0;
>> +}
>> +
>> +
>> +
>> +
>> +#endif
>> +
>> +
>> +void
>> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> +	struct vhost_vring_file file;
>> +
>> +	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> +	file.fd = pmsg->fds[0];
>> +	RTE_LOG(INFO, VHOST_CONFIG, 
>> +		"vring call idx:%d file:%d\n", file.index, file.fd);
>> +	ops->set_vring_call(ctx, &file);
>> +}
>> +
>> +
>> +void
>> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> +	struct vhost_vring_file file;
>> +
>> +	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> +	file.fd = pmsg->fds[0];
>> +	RTE_LOG(INFO, VHOST_CONFIG, 
>> +		"vring kick idx:%d file:%d\n", file.index, file.fd);
>> +	ops->set_vring_kick(ctx, &file);
>> +}
>> diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.h b/lib/librte_vhost/vhost-user/virtio-net-user.h
>> new file mode 100644
>> index 0000000..0969376
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost-user/virtio-net-user.h
>> @@ -0,0 +1,11 @@
>> +#ifndef _VIRTIO_NET_USER_H
>> +#define _VIRTIO_NET_USER_H
>> +
>> +#include "vhost-net.h"
>> +#include "vhost-net-user.h"
>> +
>> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
>> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
>> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
>> index ccfd82f..8ff0301 100644
>> --- a/lib/librte_vhost/vhost_rxtx.c
>> +++ b/lib/librte_vhost/vhost_rxtx.c
>> @@ -38,19 +38,14 @@
>>  #include <rte_memcpy.h>
>>  #include <rte_virtio_net.h>
>>  
>> -#include "vhost-net-cdev.h"
>> +#include "vhost-net.h"
>>  
>> -#define MAX_PKT_BURST 32
>> +#define VHOST_MAX_PKT_BURST 64
>> +#define VHOST_MAX_MRG_PKT_BURST 64
>>  
>> -/**
>> - * This function adds buffers to the virtio devices RX virtqueue. Buffers can
>> - * be received from the physical port or from another virtio device. A packet
>> - * count is returned to indicate the number of packets that are succesfully
>> - * added to the RX queue. This function works when mergeable is disabled.
>> - */
>> -static inline uint32_t __attribute__((always_inline))
>> -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>> -	struct rte_mbuf **pkts, uint32_t count)
>> +
>> +uint32_t
>> +rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count)
>>  {
>>  	struct vhost_virtqueue *vq;
>>  	struct vring_desc *desc;
>> @@ -59,26 +54,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>>  	uint64_t buff_addr = 0;
>>  	uint64_t buff_hdr_addr = 0;
>> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
>> +	uint32_t head[VHOST_MAX_PKT_BURST], packet_len = 0;
>>  	uint32_t head_idx, packet_success = 0;
>> +	uint32_t mergeable, mrg_count = 0;
>>  	uint16_t avail_idx, res_cur_idx;
>>  	uint16_t res_base_idx, res_end_idx;
>>  	uint16_t free_entries;
>>  	uint8_t success = 0;
>>  
>> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
>> +	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s()\n", dev->device_fh, __func__);
>>  	if (unlikely(queue_id != VIRTIO_RXQ)) {
>>  		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
>>  		return 0;
>>  	}
>>  
>>  	vq = dev->virtqueue[VIRTIO_RXQ];
>> -	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
>> -
>> -	/*
>> -	 * As many data cores may want access to available buffers,
>> -	 * they need to be reserved.
>> -	 */
>> +	count = (count > VHOST_MAX_PKT_BURST) ? VHOST_MAX_PKT_BURST : count;
>> +	/* As many data cores may want access to available buffers, they need to be reserved. */
>>  	do {
>>  		res_base_idx = vq->last_used_idx_res;
>>  		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
>> @@ -93,21 +85,25 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>>  
>>  		res_end_idx = res_base_idx + count;
>>  		/* vq->last_used_idx_res is atomically updated. */
>> -		/* TODO: Allow to disable cmpset if no concurrency in application. */
>> +		/* TODO: Allow to disable cmpset if no concurrency in application */
>>  		success = rte_atomic16_cmpset(&vq->last_used_idx_res,
>>  				res_base_idx, res_end_idx);
>> +		/* If there is contention here and failed, try again. */
>>  	} while (unlikely(success == 0));
>>  	res_cur_idx = res_base_idx;
>>  	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
>> -			dev->device_fh, res_cur_idx, res_end_idx);
>> +			dev->device_fh,
>> +			res_cur_idx, res_end_idx);
>>  
>>  	/* Prefetch available ring to retrieve indexes. */
>>  	rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
>>  
>> +	/* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */
>> +	mergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);
>> +
>>  	/* Retrieve all of the head indexes first to avoid caching issues. */
>>  	for (head_idx = 0; head_idx < count; head_idx++)
>> -		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
>> -					(vq->size - 1)];
>> +		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
>>  
>>  	/*Prefetch descriptor index. */
>>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>> @@ -123,46 +119,57 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>>  		/* Prefetch buffer address. */
>>  		rte_prefetch0((void *)(uintptr_t)buff_addr);
>>  
>> -		/* Copy virtio_hdr to packet and increment buffer address */
>> -		buff_hdr_addr = buff_addr;
>> -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>> -
>> -		/*
>> -		 * If the descriptors are chained the header and data are
>> -		 * placed in separate buffers.
>> -		 */
>> -		if (desc->flags & VRING_DESC_F_NEXT) {
>> -			desc->len = vq->vhost_hlen;
>> -			desc = &vq->desc[desc->next];
>> -			/* Buffer address translation. */
>> -			buff_addr = gpa_to_vva(dev, desc->addr);
>> -			desc->len = rte_pktmbuf_data_len(buff);
>> +		if (mergeable && (mrg_count != 0)) {
>> +			desc->len = packet_len = rte_pktmbuf_data_len(buff);
>>  		} else {
>> -			buff_addr += vq->vhost_hlen;
>> -			desc->len = packet_len;
>> +			/* Copy virtio_hdr to packet and increment buffer address */
>> +			buff_hdr_addr = buff_addr;
>> +			packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>> +
>> +			/*
>> +			 * If the descriptors are chained the header and data are placed in
>> +			 * separate buffers.
>> +			 */
>> +			if (desc->flags & VRING_DESC_F_NEXT) {
>> +				desc->len = vq->vhost_hlen;
>> +				desc = &vq->desc[desc->next];
>> +				/* Buffer address translation. */
>> +				buff_addr = gpa_to_vva(dev, desc->addr);
>> +				desc->len = rte_pktmbuf_data_len(buff);
>> +			} else {
>> +				buff_addr += vq->vhost_hlen;
>> +				desc->len = packet_len;
>> +			}
>>  		}
>>  
>> +		VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, rte_pktmbuf_data_len(buff), 0);
>> +
>>  		/* Update used ring with desc information */
>> -		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
>> -							head[packet_success];
>> +		vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
>>  		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
>>  
>>  		/* Copy mbuf data to buffer */
>> -		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
>> -		rte_memcpy((void *)(uintptr_t)buff_addr,
>> -			rte_pktmbuf_mtod(buff, const void *),
>> -			rte_pktmbuf_data_len(buff));
>> -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
>> -			rte_pktmbuf_data_len(buff), 0);
>> +		/* TODO fixme for sg mbuf and the case that desc couldn't hold the mbuf data */
>> +		rte_memcpy((void *)(uintptr_t)buff_addr, (const void *)buff->pkt.data, rte_pktmbuf_data_len(buff));
>>  
>>  		res_cur_idx++;
>>  		packet_success++;
>>  
>> -		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
>> -			(const void *)&virtio_hdr, vq->vhost_hlen);
>> -
>> -		PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
>> -
>> +		/* If mergeable is disabled then a header is required per buffer. */
>> +		if (!mergeable) {
>> +			rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
>> +			VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
>> +		} else {
>> +			mrg_count++;
>> +			/* Merge buffer can only handle so many buffers at a time. Tell the guest if this limit is reached. */
>> +			if ((mrg_count == VHOST_MAX_MRG_PKT_BURST) || (res_cur_idx == res_end_idx)) {
>> +				virtio_hdr.num_buffers = mrg_count;
>> +				LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers);
>> +				rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
>> +				VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
>> +				mrg_count = 0;
>> +			}
>> +		}
>>  		if (res_cur_idx < res_end_idx) {
>>  			/* Prefetch descriptor index. */
>>  			rte_prefetch0(&vq->desc[head[packet_success]]);
>> @@ -184,357 +191,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>>  	return count;
>>  }
>>  
>> -static inline uint32_t __attribute__((always_inline))
>> -copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
>> -	uint16_t res_end_idx, struct rte_mbuf *pkt)
>> -{
>> -	uint32_t vec_idx = 0;
>> -	uint32_t entry_success = 0;
>> -	struct vhost_virtqueue *vq;
>> -	/* The virtio_hdr is initialised to 0. */
>> -	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
>> -		{0, 0, 0, 0, 0, 0}, 0};
>> -	uint16_t cur_idx = res_base_idx;
>> -	uint64_t vb_addr = 0;
>> -	uint64_t vb_hdr_addr = 0;
>> -	uint32_t seg_offset = 0;
>> -	uint32_t vb_offset = 0;
>> -	uint32_t seg_avail;
>> -	uint32_t vb_avail;
>> -	uint32_t cpy_len, entry_len;
>> -
>> -	if (pkt == NULL)
>> -		return 0;
>> -
>> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
>> -		"End Index %d\n",
>> -		dev->device_fh, cur_idx, res_end_idx);
>> -
>> -	/*
>> -	 * Convert from gpa to vva
>> -	 * (guest physical addr -> vhost virtual addr)
>> -	 */
>> -	vq = dev->virtqueue[VIRTIO_RXQ];
>> -	vb_addr =
>> -		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
>> -	vb_hdr_addr = vb_addr;
>> -
>> -	/* Prefetch buffer address. */
>> -	rte_prefetch0((void *)(uintptr_t)vb_addr);
>> -
>> -	virtio_hdr.num_buffers = res_end_idx - res_base_idx;
>> -
>> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
>> -		dev->device_fh, virtio_hdr.num_buffers);
>>  
>> -	rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
>> -		(const void *)&virtio_hdr, vq->vhost_hlen);
>> -
>> -	PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
>> -
>> -	seg_avail = rte_pktmbuf_data_len(pkt);
>> -	vb_offset = vq->vhost_hlen;
>> -	vb_avail =
>> -		vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
>> -
>> -	entry_len = vq->vhost_hlen;
>> -
>> -	if (vb_avail == 0) {
>> -		uint32_t desc_idx =
>> -			vq->buf_vec[vec_idx].desc_idx;
>> -		vq->desc[desc_idx].len = vq->vhost_hlen;
>> -
>> -		if ((vq->desc[desc_idx].flags
>> -			& VRING_DESC_F_NEXT) == 0) {
>> -			/* Update used ring with desc information */
>> -			vq->used->ring[cur_idx & (vq->size - 1)].id
>> -				= vq->buf_vec[vec_idx].desc_idx;
>> -			vq->used->ring[cur_idx & (vq->size - 1)].len
>> -				= entry_len;
>> -
>> -			entry_len = 0;
>> -			cur_idx++;
>> -			entry_success++;
>> -		}
>> -
>> -		vec_idx++;
>> -		vb_addr =
>> -			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
>> -
>> -		/* Prefetch buffer address. */
>> -		rte_prefetch0((void *)(uintptr_t)vb_addr);
>> -		vb_offset = 0;
>> -		vb_avail = vq->buf_vec[vec_idx].buf_len;
>> -	}
>> -
>> -	cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -
>> -	while (cpy_len > 0) {
>> -		/* Copy mbuf data to vring buffer */
>> -		rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
>> -			(const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
>> -			cpy_len);
>> -
>> -		PRINT_PACKET(dev,
>> -			(uintptr_t)(vb_addr + vb_offset),
>> -			cpy_len, 0);
>> -
>> -		seg_offset += cpy_len;
>> -		vb_offset += cpy_len;
>> -		seg_avail -= cpy_len;
>> -		vb_avail -= cpy_len;
>> -		entry_len += cpy_len;
>> -
>> -		if (seg_avail != 0) {
>> -			/*
>> -			 * The virtio buffer in this vring
>> -			 * entry reach to its end.
>> -			 * But the segment doesn't complete.
>> -			 */
>> -			if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
>> -				VRING_DESC_F_NEXT) == 0) {
>> -				/* Update used ring with desc information */
>> -				vq->used->ring[cur_idx & (vq->size - 1)].id
>> -					= vq->buf_vec[vec_idx].desc_idx;
>> -				vq->used->ring[cur_idx & (vq->size - 1)].len
>> -					= entry_len;
>> -				entry_len = 0;
>> -				cur_idx++;
>> -				entry_success++;
>> -			}
>> -
>> -			vec_idx++;
>> -			vb_addr = gpa_to_vva(dev,
>> -				vq->buf_vec[vec_idx].buf_addr);
>> -			vb_offset = 0;
>> -			vb_avail = vq->buf_vec[vec_idx].buf_len;
>> -			cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -		} else {
>> -			/*
>> -			 * This current segment complete, need continue to
>> -			 * check if the whole packet complete or not.
>> -			 */
>> -			pkt = pkt->next;
>> -			if (pkt != NULL) {
>> -				/*
>> -				 * There are more segments.
>> -				 */
>> -				if (vb_avail == 0) {
>> -					/*
>> -					 * This current buffer from vring is
>> -					 * used up, need fetch next buffer
>> -					 * from buf_vec.
>> -					 */
>> -					uint32_t desc_idx =
>> -						vq->buf_vec[vec_idx].desc_idx;
>> -					vq->desc[desc_idx].len = vb_offset;
>> -
>> -					if ((vq->desc[desc_idx].flags &
>> -						VRING_DESC_F_NEXT) == 0) {
>> -						uint16_t wrapped_idx =
>> -							cur_idx & (vq->size - 1);
>> -						/*
>> -						 * Update used ring with the
>> -						 * descriptor information
>> -						 */
>> -						vq->used->ring[wrapped_idx].id
>> -							= desc_idx;
>> -						vq->used->ring[wrapped_idx].len
>> -							= entry_len;
>> -						entry_success++;
>> -						entry_len = 0;
>> -						cur_idx++;
>> -					}
>> -
>> -					/* Get next buffer from buf_vec. */
>> -					vec_idx++;
>> -					vb_addr = gpa_to_vva(dev,
>> -						vq->buf_vec[vec_idx].buf_addr);
>> -					vb_avail =
>> -						vq->buf_vec[vec_idx].buf_len;
>> -					vb_offset = 0;
>> -				}
>> -
>> -				seg_offset = 0;
>> -				seg_avail = rte_pktmbuf_data_len(pkt);
>> -				cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -			} else {
>> -				/*
>> -				 * This whole packet completes.
>> -				 */
>> -				uint32_t desc_idx =
>> -					vq->buf_vec[vec_idx].desc_idx;
>> -				vq->desc[desc_idx].len = vb_offset;
>> -
>> -				while (vq->desc[desc_idx].flags &
>> -					VRING_DESC_F_NEXT) {
>> -					desc_idx = vq->desc[desc_idx].next;
>> -					 vq->desc[desc_idx].len = 0;
>> -				}
>> -
>> -				/* Update used ring with desc information */
>> -				vq->used->ring[cur_idx & (vq->size - 1)].id
>> -					= vq->buf_vec[vec_idx].desc_idx;
>> -				vq->used->ring[cur_idx & (vq->size - 1)].len
>> -					= entry_len;
>> -				entry_len = 0;
>> -				cur_idx++;
>> -				entry_success++;
>> -				seg_avail = 0;
>> -				cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -			}
>> -		}
>> -	}
>> -
>> -	return entry_success;
>> -}
>> -
>> -/*
>> - * This function works for mergeable RX.
>> - */
>> -static inline uint32_t __attribute__((always_inline))
>> -virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
>> -	struct rte_mbuf **pkts, uint32_t count)
>> +uint32_t
>> +rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)
>>  {
>> -	struct vhost_virtqueue *vq;
>> -	uint32_t pkt_idx = 0, entry_success = 0;
>> -	uint16_t avail_idx, res_cur_idx;
>> -	uint16_t res_base_idx, res_end_idx;
>> -	uint8_t success = 0;
>> -
>> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
>> -		dev->device_fh);
>> -	if (unlikely(queue_id != VIRTIO_RXQ)) {
>> -		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
>> -	}
>> -
>> -	vq = dev->virtqueue[VIRTIO_RXQ];
>> -	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
>> -
>> -	if (count == 0)
>> -		return 0;
>> -
>> -	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
>> -		uint32_t secure_len = 0;
>> -		uint16_t need_cnt;
>> -		uint32_t vec_idx = 0;
>> -		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
>> -		uint16_t i, id;
>> -
>> -		do {
>> -			/*
>> -			 * As many data cores may want access to available
>> -			 * buffers, they need to be reserved.
>> -			 */
>> -			res_base_idx = vq->last_used_idx_res;
>> -			res_cur_idx = res_base_idx;
>> -
>> -			do {
>> -				avail_idx = *((volatile uint16_t *)&vq->avail->idx);
>> -				if (unlikely(res_cur_idx == avail_idx)) {
>> -					LOG_DEBUG(VHOST_DATA,
>> -						"(%"PRIu64") Failed "
>> -						"to get enough desc from "
>> -						"vring\n",
>> -						dev->device_fh);
>> -					return pkt_idx;
>> -				} else {
>> -					uint16_t wrapped_idx =
>> -						(res_cur_idx) & (vq->size - 1);
>> -					uint32_t idx =
>> -						vq->avail->ring[wrapped_idx];
>> -					uint8_t next_desc;
>> -
>> -					do {
>> -						next_desc = 0;
>> -						secure_len += vq->desc[idx].len;
>> -						if (vq->desc[idx].flags &
>> -							VRING_DESC_F_NEXT) {
>> -							idx = vq->desc[idx].next;
>> -							next_desc = 1;
>> -						}
>> -					} while (next_desc);
>> -
>> -					res_cur_idx++;
>> -				}
>> -			} while (pkt_len > secure_len);
>> -
>> -			/* vq->last_used_idx_res is atomically updated. */
>> -			success = rte_atomic16_cmpset(&vq->last_used_idx_res,
>> -							res_base_idx,
>> -							res_cur_idx);
>> -		} while (success == 0);
>> -
>> -		id = res_base_idx;
>> -		need_cnt = res_cur_idx - res_base_idx;
>> -
>> -		for (i = 0; i < need_cnt; i++, id++) {
>> -			uint16_t wrapped_idx = id & (vq->size - 1);
>> -			uint32_t idx = vq->avail->ring[wrapped_idx];
>> -			uint8_t next_desc;
>> -			do {
>> -				next_desc = 0;
>> -				vq->buf_vec[vec_idx].buf_addr =
>> -					vq->desc[idx].addr;
>> -				vq->buf_vec[vec_idx].buf_len =
>> -					vq->desc[idx].len;
>> -				vq->buf_vec[vec_idx].desc_idx = idx;
>> -				vec_idx++;
>> -
>> -				if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
>> -					idx = vq->desc[idx].next;
>> -					next_desc = 1;
>> -				}
>> -			} while (next_desc);
>> -		}
>> -
>> -		res_end_idx = res_cur_idx;
>> -
>> -		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
>> -			res_end_idx, pkts[pkt_idx]);
>> -
>> -		rte_compiler_barrier();
>> -
>> -		/*
>> -		 * Wait until it's our turn to add our buffer
>> -		 * to the used ring.
>> -		 */
>> -		while (unlikely(vq->last_used_idx != res_base_idx))
>> -			rte_pause();
>> -
>> -		*(volatile uint16_t *)&vq->used->idx += entry_success;
>> -		vq->last_used_idx = res_end_idx;
>> -
>> -		/* Kick the guest if necessary. */
>> -		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
>> -			eventfd_write((int)vq->kickfd, 1);
>> -	}
>> -
>> -	return count;
>> -}
>> -
>> -uint16_t
>> -rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
>> -	struct rte_mbuf **pkts, uint16_t count)
>> -{
>> -	if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
>> -		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
>> -	else
>> -		return virtio_dev_rx(dev, queue_id, pkts, count);
>> -}
>> -
>> -uint16_t
>> -rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>> -	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
>> -{
>> -	struct rte_mbuf *m, *prev;
>> +	struct rte_mbuf *mbuf;
>>  	struct vhost_virtqueue *vq;
>>  	struct vring_desc *desc;
>> -	uint64_t vb_addr = 0;
>> -	uint32_t head[MAX_PKT_BURST];
>> +	uint64_t buff_addr = 0;
>> +	uint32_t head[VHOST_MAX_PKT_BURST];
>>  	uint32_t used_idx;
>>  	uint32_t i;
>> -	uint16_t free_entries, entry_success = 0;
>> +	uint16_t free_entries, packet_success = 0;
>>  	uint16_t avail_idx;
>>  
>>  	if (unlikely(queue_id != VIRTIO_TXQ)) {
>> @@ -549,8 +217,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>>  	if (vq->last_used_idx == avail_idx)
>>  		return 0;
>>  
>> -	LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
>> -		dev->device_fh);
>> +	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s(%d->%d)\n", 
>> +		dev->device_fh, __func__, vq->last_used_idx, avail_idx);
>>  
>>  	/* Prefetch available ring to retrieve head indexes. */
>>  	rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
>> @@ -558,173 +226,68 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>>  	/*get the number of free entries in the ring*/
>>  	free_entries = (avail_idx - vq->last_used_idx);
>>  
>> -	free_entries = RTE_MIN(free_entries, count);
>> +	if (free_entries > count)
>> +		free_entries = count;
>>  	/* Limit to MAX_PKT_BURST. */
>> -	free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
>> +	if (free_entries > VHOST_MAX_PKT_BURST)
>> +		free_entries = VHOST_MAX_PKT_BURST;
>>  
>> -	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
>> -			dev->device_fh, free_entries);
>> +	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
>>  	/* Retrieve all of the head indexes first to avoid caching issues. */
>>  	for (i = 0; i < free_entries; i++)
>>  		head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
>>  
>>  	/* Prefetch descriptor index. */
>> -	rte_prefetch0(&vq->desc[head[entry_success]]);
>> +	rte_prefetch0(&vq->desc[head[packet_success]]);
>>  	rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
>>  
>> -	while (entry_success < free_entries) {
>> -		uint32_t vb_avail, vb_offset;
>> -		uint32_t seg_avail, seg_offset;
>> -		uint32_t cpy_len;
>> -		uint32_t seg_num = 0;
>> -		struct rte_mbuf *cur;
>> -		uint8_t alloc_err = 0;
>> -
>> -		desc = &vq->desc[head[entry_success]];
>> +	while (packet_success < free_entries) {
>> +		desc = &vq->desc[head[packet_success]];
>>  
>>  		/* Discard first buffer as it is the virtio header */
>>  		desc = &vq->desc[desc->next];
>>  
>>  		/* Buffer address translation. */
>> -		vb_addr = gpa_to_vva(dev, desc->addr);
>> +		buff_addr = gpa_to_vva(dev, desc->addr);
>>  		/* Prefetch buffer address. */
>> -		rte_prefetch0((void *)(uintptr_t)vb_addr);
>> +		rte_prefetch0((void *)(uintptr_t)buff_addr);
>>  
>>  		used_idx = vq->last_used_idx & (vq->size - 1);
>>  
>> -		if (entry_success < (free_entries - 1)) {
>> +		if (packet_success < (free_entries - 1)) {
>>  			/* Prefetch descriptor index. */
>> -			rte_prefetch0(&vq->desc[head[entry_success+1]]);
>> +			rte_prefetch0(&vq->desc[head[packet_success+1]]);
>>  			rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
>>  		}
>>  
>>  		/* Update used index buffer information. */
>> -		vq->used->ring[used_idx].id = head[entry_success];
>> +		vq->used->ring[used_idx].id = head[packet_success];
>>  		vq->used->ring[used_idx].len = 0;
>>  
>> -		vb_offset = 0;
>> -		vb_avail = desc->len;
>> -		/* Allocate an mbuf and populate the structure. */
>> -		m = rte_pktmbuf_alloc(mbuf_pool);
>> -		if (unlikely(m == NULL)) {
>> -			RTE_LOG(ERR, VHOST_DATA,
>> -				"Failed to allocate memory for mbuf.\n");
>> -			return entry_success;
>> +		mbuf = rte_pktmbuf_alloc(mbuf_pool);
>> +		if (unlikely(mbuf == NULL)) {
>> +			RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n");
>> +			return packet_success;
>>  		}
>> -		seg_offset = 0;
>> -		seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
>> -		cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -
>> -		PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
>> -
>> -		seg_num++;
>> -		cur = m;
>> -		prev = m;
>> -		while (cpy_len != 0) {
>> -			rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
>> -				(void *)((uintptr_t)(vb_addr + vb_offset)),
>> -				cpy_len);
>> -
>> -			seg_offset += cpy_len;
>> -			vb_offset += cpy_len;
>> -			vb_avail -= cpy_len;
>> -			seg_avail -= cpy_len;
>> -
>> -			if (vb_avail != 0) {
>> -				/*
>> -				 * The segment reachs to its end,
>> -				 * while the virtio buffer in TX vring has
>> -				 * more data to be copied.
>> -				 */
>> -				cur->data_len = seg_offset;
>> -				m->pkt_len += seg_offset;
>> -				/* Allocate mbuf and populate the structure. */
>> -				cur = rte_pktmbuf_alloc(mbuf_pool);
>> -				if (unlikely(cur == NULL)) {
>> -					RTE_LOG(ERR, VHOST_DATA, "Failed to "
>> -						"allocate memory for mbuf.\n");
>> -					rte_pktmbuf_free(m);
>> -					alloc_err = 1;
>> -					break;
>> -				}
>> -
>> -				seg_num++;
>> -				prev->next = cur;
>> -				prev = cur;
>> -				seg_offset = 0;
>> -				seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
>> -			} else {
>> -				if (desc->flags & VRING_DESC_F_NEXT) {
>> -					/*
>> -					 * There are more virtio buffers in
>> -					 * same vring entry need to be copied.
>> -					 */
>> -					if (seg_avail == 0) {
>> -						/*
>> -						 * The current segment hasn't
>> -						 * room to accomodate more
>> -						 * data.
>> -						 */
>> -						cur->data_len = seg_offset;
>> -						m->pkt_len += seg_offset;
>> -						/*
>> -						 * Allocate an mbuf and
>> -						 * populate the structure.
>> -						 */
>> -						cur = rte_pktmbuf_alloc(mbuf_pool);
>> -						if (unlikely(cur == NULL)) {
>> -							RTE_LOG(ERR,
>> -								VHOST_DATA,
>> -								"Failed to "
>> -								"allocate memory "
>> -								"for mbuf\n");
>> -							rte_pktmbuf_free(m);
>> -							alloc_err = 1;
>> -							break;
>> -						}
>> -						seg_num++;
>> -						prev->next = cur;
>> -						prev = cur;
>> -						seg_offset = 0;
>> -						seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
>> -					}
>> -
>> -					desc = &vq->desc[desc->next];
>> -
>> -					/* Buffer address translation. */
>> -					vb_addr = gpa_to_vva(dev, desc->addr);
>> -					/* Prefetch buffer address. */
>> -					rte_prefetch0((void *)(uintptr_t)vb_addr);
>> -					vb_offset = 0;
>> -					vb_avail = desc->len;
>> -
>> -					PRINT_PACKET(dev, (uintptr_t)vb_addr,
>> -						desc->len, 0);
>> -				} else {
>> -					/* The whole packet completes. */
>> -					cur->data_len = seg_offset;
>> -					m->pkt_len += seg_offset;
>> -					vb_avail = 0;
>> -				}
>> -			}
>> +		mbuf->pkt.data_len = desc->len;
>> +		mbuf->pkt.pkt_len  = mbuf->pkt.data_len;
>>  
>> -			cpy_len = RTE_MIN(vb_avail, seg_avail);
>> -		}
>> +		rte_memcpy((void *) mbuf->pkt.data,
>> +			(const void *) buff_addr, mbuf->pkt.data_len);
>>  
>> -		if (unlikely(alloc_err == 1))
>> -			break;
>> +		pkts[packet_success] = mbuf;
>>  
>> -		m->nb_segs = seg_num;
>> +		VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
>>  
>> -		pkts[entry_success] = m;
>>  		vq->last_used_idx++;
>> -		entry_success++;
>> +		packet_success++;
>>  	}
>>  
>>  	rte_compiler_barrier();
>> -	vq->used->idx += entry_success;
>> +	vq->used->idx += packet_success;
>>  	/* Kick guest if required. */
>>  	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
>>  		eventfd_write((int)vq->kickfd, 1);
>> -	return entry_success;
>> +
>> +	return packet_success;
>>  }
>> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
>> index 852b6d1..516e743 100644
>> --- a/lib/librte_vhost/virtio-net.c
>> +++ b/lib/librte_vhost/virtio-net.c
>> @@ -31,17 +31,14 @@
>>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>>   */
>>  
>> -#include <dirent.h>
>> -#include <fuse/cuse_lowlevel.h>
>>  #include <linux/vhost.h>
>>  #include <linux/virtio_net.h>
>>  #include <stddef.h>
>>  #include <stdint.h>
>>  #include <stdlib.h>
>> -#include <sys/eventfd.h>
>> -#include <sys/ioctl.h>
>>  #include <sys/mman.h>
>>  #include <unistd.h>
>> +#include <assert.h>
>>  
>>  #include <rte_ethdev.h>
>>  #include <rte_log.h>
>> @@ -49,10 +46,8 @@
>>  #include <rte_memory.h>
>>  #include <rte_virtio_net.h>
>>  
>> -#include "vhost-net-cdev.h"
>> -#include "eventfd_link/eventfd_link.h"
>> -
>> -/*
>> +#include "vhost-net.h"
>> +/**
>>   * Device linked list structure for configuration.
>>   */
>>  struct virtio_net_config_ll {
>> @@ -60,38 +55,15 @@ struct virtio_net_config_ll {
>>  	struct virtio_net_config_ll *next;	/* Next dev on linked list.*/
>>  };
>>  
>> -const char eventfd_cdev[] = "/dev/eventfd-link";
>> -
>> -/* device ops to add/remove device to/from data core. */
>> +/* device ops to add/remove device to data core. */
>>  static struct virtio_net_device_ops const *notify_ops;
>> -/* root address of the linked list of managed virtio devices */
>> +/* root address of the linked list in the configuration core. */
>>  static struct virtio_net_config_ll *ll_root;
>>  
>>  /* Features supported by this lib. */
>> -#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
>> -				  (1ULL << VIRTIO_NET_F_CTRL_RX))
>> +#define VHOST_SUPPORTED_FEATURES (1ULL << VIRTIO_NET_F_MRG_RXBUF)
>>  static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
>>  
>> -/* Line size for reading maps file. */
>> -static const uint32_t BUFSIZE = PATH_MAX;
>> -
>> -/* Size of prot char array in procmap. */
>> -#define PROT_SZ 5
>> -
>> -/* Number of elements in procmap struct. */
>> -#define PROCMAP_SZ 8
>> -
>> -/* Structure containing information gathered from maps file. */
>> -struct procmap {
>> -	uint64_t va_start;	/* Start virtual address in file. */
>> -	uint64_t len;		/* Size of file. */
>> -	uint64_t pgoff;		/* Not used. */
>> -	uint32_t maj;		/* Not used. */
>> -	uint32_t min;		/* Not used. */
>> -	uint32_t ino;		/* Not used. */
>> -	char prot[PROT_SZ];	/* Not used. */
>> -	char fname[PATH_MAX];	/* File name. */
>> -};
>>  
>>  /*
>>   * Converts QEMU virtual address to Vhost virtual address. This function is
>> @@ -110,199 +82,15 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
>>  		if ((qemu_va >= region->userspace_address) &&
>>  			(qemu_va <= region->userspace_address +
>>  			region->memory_size)) {
>> -			vhost_va = dev->mem->mapped_address + qemu_va -
>> -					dev->mem->base_address;
>> +			vhost_va = qemu_va +  region->guest_phys_address + 
>> +				region->address_offset -
>> +				region->userspace_address;
>>  			break;
>>  		}
>>  	}
>>  	return vhost_va;
>>  }
>>  
>> -/*
>> - * Locate the file containing QEMU's memory space and
>> - * map it to our address space.
>> - */
>> -static int
>> -host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
>> -	pid_t pid, uint64_t addr)
>> -{
>> -	struct dirent *dptr = NULL;
>> -	struct procmap procmap;
>> -	DIR *dp = NULL;
>> -	int fd;
>> -	int i;
>> -	char memfile[PATH_MAX];
>> -	char mapfile[PATH_MAX];
>> -	char procdir[PATH_MAX];
>> -	char resolved_path[PATH_MAX];
>> -	char *path = NULL;
>> -	FILE *fmap;
>> -	void *map;
>> -	uint8_t found = 0;
>> -	char line[BUFSIZE];
>> -	char dlm[] = "-   :   ";
>> -	char *str, *sp, *in[PROCMAP_SZ];
>> -	char *end = NULL;
>> -
>> -	/* Path where mem files are located. */
>> -	snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
>> -	/* Maps file used to locate mem file. */
>> -	snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
>> -
>> -	fmap = fopen(mapfile, "r");
>> -	if (fmap == NULL) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"(%"PRIu64") Failed to open maps file for pid %d\n",
>> -			dev->device_fh, pid);
>> -		return -1;
>> -	}
>> -
>> -	/* Read through maps file until we find out base_address. */
>> -	while (fgets(line, BUFSIZE, fmap) != 0) {
>> -		str = line;
>> -		errno = 0;
>> -		/* Split line into fields. */
>> -		for (i = 0; i < PROCMAP_SZ; i++) {
>> -			in[i] = strtok_r(str, &dlm[i], &sp);
>> -			if ((in[i] == NULL) || (errno != 0)) {
>> -				fclose(fmap);
>> -				return -1;
>> -			}
>> -			str = NULL;
>> -		}
>> -
>> -		/* Convert/Copy each field as needed. */
>> -		procmap.va_start = strtoull(in[0], &end, 16);
>> -		if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
>> -			(errno != 0)) {
>> -			fclose(fmap);
>> -			return -1;
>> -		}
>> -
>> -		procmap.len = strtoull(in[1], &end, 16);
>> -		if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
>> -			(errno != 0)) {
>> -			fclose(fmap);
>> -			return -1;
>> -		}
>> -
>> -		procmap.pgoff = strtoull(in[3], &end, 16);
>> -		if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
>> -			(errno != 0)) {
>> -			fclose(fmap);
>> -			return -1;
>> -		}
>> -
>> -		procmap.maj = strtoul(in[4], &end, 16);
>> -		if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
>> -			(errno != 0)) {
>> -			fclose(fmap);
>> -			return -1;
>> -		}
>> -
>> -		procmap.min = strtoul(in[5], &end, 16);
>> -		if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
>> -			(errno != 0)) {
>> -			fclose(fmap);
>> -			return -1;
>> -		}
>> -
>> -		procmap.ino = strtoul(in[6], &end, 16);
>> -		if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
>> -			(errno != 0)) {
>> -			fclose(fmap);
>> -			return -1;
>> -		}
>> -
>> -		memcpy(&procmap.prot, in[2], PROT_SZ);
>> -		memcpy(&procmap.fname, in[7], PATH_MAX);
>> -
>> -		if (procmap.va_start == addr) {
>> -			procmap.len = procmap.len - procmap.va_start;
>> -			found = 1;
>> -			break;
>> -		}
>> -	}
>> -	fclose(fmap);
>> -
>> -	if (!found) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"(%"PRIu64") Failed to find memory file in pid %d maps file\n",
>> -			dev->device_fh, pid);
>> -		return -1;
>> -	}
>> -
>> -	/* Find the guest memory file among the process fds. */
>> -	dp = opendir(procdir);
>> -	if (dp == NULL) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"(%"PRIu64") Cannot open pid %d process directory\n",
>> -			dev->device_fh, pid);
>> -		return -1;
>> -	}
>> -
>> -	found = 0;
>> -
>> -	/* Read the fd directory contents. */
>> -	while (NULL != (dptr = readdir(dp))) {
>> -		snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
>> -				pid, dptr->d_name);
>> -		path = realpath(memfile, resolved_path);
>> -		if ((path == NULL) && (strlen(resolved_path) == 0)) {
>> -			RTE_LOG(ERR, VHOST_CONFIG,
>> -				"(%"PRIu64") Failed to resolve fd directory\n",
>> -				dev->device_fh);
>> -			closedir(dp);
>> -			return -1;
>> -		}
>> -		if (strncmp(resolved_path, procmap.fname,
>> -			strnlen(procmap.fname, PATH_MAX)) == 0) {
>> -			found = 1;
>> -			break;
>> -		}
>> -	}
>> -
>> -	closedir(dp);
>> -
>> -	if (found == 0) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"(%"PRIu64") Failed to find memory file for pid %d\n",
>> -			dev->device_fh, pid);
>> -		return -1;
>> -	}
>> -	/* Open the shared memory file and map the memory into this process. */
>> -	fd = open(memfile, O_RDWR);
>> -
>> -	if (fd == -1) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"(%"PRIu64") Failed to open %s for pid %d\n",
>> -			dev->device_fh, memfile, pid);
>> -		return -1;
>> -	}
>> -
>> -	map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
>> -		MAP_POPULATE|MAP_SHARED, fd, 0);
>> -	close(fd);
>> -
>> -	if (map == MAP_FAILED) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"(%"PRIu64") Error mapping the file %s for pid %d\n",
>> -			dev->device_fh, memfile, pid);
>> -		return -1;
>> -	}
>> -
>> -	/* Store the memory address and size in the device data structure */
>> -	mem->mapped_address = (uint64_t)(uintptr_t)map;
>> -	mem->mapped_size = procmap.len;
>> -
>> -	LOG_DEBUG(VHOST_CONFIG,
>> -		"(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
>> -		dev->device_fh,
>> -		memfile, resolved_path,
>> -		(unsigned long long)mem->mapped_size, map);
>> -
>> -	return 0;
>> -}
>>  
>>  /*
>>   * Retrieves an entry from the devices configuration linked list.
>> @@ -376,7 +164,7 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
>>  	}
>>  
>>  }
>> -
>> +/*TODO dpdk alloc/free if possible */
>>  /*
>>   * Unmap any memory, close any file descriptors and
>>   * free any memory owned by a device.
>> @@ -389,16 +177,17 @@ cleanup_device(struct virtio_net *dev)
>>  		munmap((void *)(uintptr_t)dev->mem->mapped_address,
>>  			(size_t)dev->mem->mapped_size);
>>  		free(dev->mem);
>> +		dev->mem = NULL;
>>  	}
>>  
>>  	/* Close any event notifiers opened by device. */
>> -	if (dev->virtqueue[VIRTIO_RXQ]->callfd)
>> +	if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
>>  		close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
>> -	if (dev->virtqueue[VIRTIO_RXQ]->kickfd)
>> +	if (dev->virtqueue[VIRTIO_RXQ]->kickfd > 0)
>>  		close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
>> -	if (dev->virtqueue[VIRTIO_TXQ]->callfd)
>> +	if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
>>  		close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
>> -	if (dev->virtqueue[VIRTIO_TXQ]->kickfd)
>> +	if (dev->virtqueue[VIRTIO_TXQ]->kickfd > 0)
>>  		close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
>>  }
>>  
>> @@ -522,8 +311,8 @@ new_device(struct vhost_device_ctx ctx)
>>  }
>>  
>>  /*
>> - * Function is called from the CUSE release function. This function will
>> - * cleanup the device and remove it from device configuration linked list.
>> + * Function is called from the CUSE release function. This function will cleanup
>> + * the device and remove it from device configuration linked list.
>>   */
>>  static void
>>  destroy_device(struct vhost_device_ctx ctx)
>> @@ -569,6 +358,7 @@ set_owner(struct vhost_device_ctx ctx)
>>  		return -1;
>>  
>>  	return 0;
>> +	/* TODO check ctx.fh is meaningfull here */
>>  }
>>  
>>  /*
>> @@ -651,14 +441,12 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)
>>   * This includes storing offsets used to translate buffer addresses.
>>   */
>>  static int
>> -set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
>> -	uint32_t nregions)
>> +set_mem_table(struct vhost_device_ctx ctx,
>> +	const struct virtio_memory_regions *regions, uint32_t nregions)
>>  {
>>  	struct virtio_net *dev;
>> -	struct vhost_memory_region *mem_regions;
>>  	struct virtio_memory *mem;
>> -	uint64_t size = offsetof(struct vhost_memory, regions);
>> -	uint32_t regionidx, valid_regions;
>> +	uint32_t regionidx;
>>  
>>  	dev = get_device(ctx);
>>  	if (dev == NULL)
>> @@ -682,107 +470,24 @@ set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
>>  
>>  	mem->nregions = nregions;
>>  
>> -	mem_regions = (void *)(uintptr_t)
>> -			((uint64_t)(uintptr_t)mem_regions_addr + size);
>> -
>>  	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
>>  		/* Populate the region structure for each region. */
>> -		mem->regions[regionidx].guest_phys_address =
>> -			mem_regions[regionidx].guest_phys_addr;
>> -		mem->regions[regionidx].guest_phys_address_end =
>> -			mem->regions[regionidx].guest_phys_address +
>> -			mem_regions[regionidx].memory_size;
>> -		mem->regions[regionidx].memory_size =
>> -			mem_regions[regionidx].memory_size;
>> -		mem->regions[regionidx].userspace_address =
>> -			mem_regions[regionidx].userspace_addr;
>> -
>> -		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
>> -			regionidx,
>> -			(void *)(uintptr_t)mem->regions[regionidx].guest_phys_address,
>> -			(void *)(uintptr_t)mem->regions[regionidx].userspace_address,
>> -			mem->regions[regionidx].memory_size);
>> -
>> -		/*set the base address mapping*/
>> +		mem->regions[regionidx] = regions[regionidx];
>>  		if (mem->regions[regionidx].guest_phys_address == 0x0) {
>>  			mem->base_address =
>>  				mem->regions[regionidx].userspace_address;
>> -			/* Map VM memory file */
>> -			if (host_memory_map(dev, mem, ctx.pid,
>> -				mem->base_address) != 0) {
>> -				free(mem);
>> -				return -1;
>> -			}
>> +			mem->mapped_address = 
>> +				mem->regions[regionidx].address_offset;
>>  		}
>>  	}
>>  
>> -	/* Check that we have a valid base address. */
>> -	if (mem->base_address == 0) {
>> -		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base address of qemu memory file.\n", dev->device_fh);
>> -		free(mem);
>> -		return -1;
>> -	}
>> -
>> -	/*
>> -	 * Check if all of our regions have valid mappings.
>> -	 * Usually one does not exist in the QEMU memory file.
>> -	 */
>> -	valid_regions = mem->nregions;
>> -	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
>> -		if ((mem->regions[regionidx].userspace_address <
>> -			mem->base_address) ||
>> -			(mem->regions[regionidx].userspace_address >
>> -			(mem->base_address + mem->mapped_size)))
>> -				valid_regions--;
>> -	}
>> -
>> -	/*
>> -	 * If a region does not have a valid mapping,
>> -	 * we rebuild our memory struct to contain only valid entries.
>> -	 */
>> -	if (valid_regions != mem->nregions) {
>> -		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\n",
>> -			dev->device_fh);
>> -
>> -		/*
>> -		 * Re-populate the memory structure with only valid regions.
>> -		 * Invalid regions are over-written with memmove.
>> -		 */
>> -		valid_regions = 0;
>> -
>> -		for (regionidx = mem->nregions; 0 != regionidx--;) {
>> -			if ((mem->regions[regionidx].userspace_address <
>> -				mem->base_address) ||
>> -				(mem->regions[regionidx].userspace_address >
>> -				(mem->base_address + mem->mapped_size))) {
>> -				memmove(&mem->regions[regionidx],
>> -					&mem->regions[regionidx + 1],
>> -					sizeof(struct virtio_memory_regions) *
>> -						valid_regions);
>> -			} else {
>> -				valid_regions++;
>> -			}
>> -		}
>> -	}
>> -	mem->nregions = valid_regions;
>> +	/*TODO addback the logic that remove invalid memory regions */
>>  	dev->mem = mem;
>>  
>> -	/*
>> -	 * Calculate the address offset for each region.
>> -	 * This offset is used to identify the vhost virtual address
>> -	 * corresponding to a QEMU guest physical address.
>> -	 */
>> -	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
>> -		dev->mem->regions[regionidx].address_offset =
>> -			dev->mem->regions[regionidx].userspace_address -
>> -				dev->mem->base_address +
>> -				dev->mem->mapped_address -
>> -				dev->mem->regions[regionidx].guest_phys_address;
>> -
>> -	}
>>  	return 0;
>>  }
>>  
>> +
>>  /*
>>   * Called from CUSE IOCTL: VHOST_SET_VRING_NUM
>>   * The virtio device sends us the size of the descriptor ring.
>> @@ -896,38 +601,62 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
>>  	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
>>  	state->num = dev->virtqueue[state->index]->last_used_idx;
>>  
>> -	return 0;
>> -}
>> +	if (dev->flags & VIRTIO_DEV_RUNNING) {
>> +		RTE_LOG(INFO, VHOST_CONFIG, 
>> +			"get_vring_base message is for release\n");
>> +		notify_ops->destroy_device(dev);
>> +		/*
>> +		 * sync call.
>> +		 * when it returns, it means it si removed from data core.
>> +		 */
>> +	}
>> +	/* TODO fix all munmap */
>> +	if (dev->mem) {
>> +		munmap((void *)(uintptr_t)dev->mem->mapped_address,
>> +			(size_t)dev->mem->mapped_size);
>> +		free(dev->mem);
>> +		dev->mem = NULL;
>> +	}
>>  
>> -/*
>> - * This function uses the eventfd_link kernel module to copy an eventfd file
>> - * descriptor provided by QEMU in to our process space.
>> - */
>> -static int
>> -eventfd_copy(struct virtio_net *dev, struct eventfd_copy *eventfd_copy)
>> -{
>> -	int eventfd_link, ret;
>>  
>> -	/* Open the character device to the kernel module. */
>> -	eventfd_link = open(eventfd_cdev, O_RDWR);
>> -	if (eventfd_link < 0) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"(%"PRIu64") eventfd_link module is not loaded\n",
>> -			dev->device_fh);
>> -		return -1;
>> -	}
>> +	if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
>> +		close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
>> +	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
>> +	if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
>> +		close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
>> +	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
>> +	/* We don't cleanup callfd here as we willn't get CALLFD again */
>> +	
>> +	dev->virtqueue[VIRTIO_RXQ]->desc = NULL;
>> +	dev->virtqueue[VIRTIO_RXQ]->avail = NULL;
>> +	dev->virtqueue[VIRTIO_RXQ]->used = NULL;
>> +	dev->virtqueue[VIRTIO_RXQ]->last_used_idx = 0;
>> +	dev->virtqueue[VIRTIO_RXQ]->last_used_idx_res = 0;
>> +
>> +	dev->virtqueue[VIRTIO_TXQ]->desc = NULL;
>> +	dev->virtqueue[VIRTIO_TXQ]->avail = NULL;
>> +	dev->virtqueue[VIRTIO_TXQ]->used = NULL;
>> +	dev->virtqueue[VIRTIO_TXQ]->last_used_idx = 0;
>> +	dev->virtqueue[VIRTIO_TXQ]->last_used_idx_res = 0;
>>  
>> -	/* Call the IOCTL to copy the eventfd. */
>> -	ret = ioctl(eventfd_link, EVENTFD_COPY, eventfd_copy);
>> -	close(eventfd_link);
>>  
>> -	if (ret < 0) {
>> -		RTE_LOG(ERR, VHOST_CONFIG,
>> -			"(%"PRIu64") EVENTFD_COPY ioctl failed\n",
>> -			dev->device_fh);
>> -		return -1;
>> -	}
>> +	return 0;
>> +}
>>  
>> +static int
>> +virtio_is_ready(struct virtio_net *dev, int index)
>> +{
>> +	struct vhost_virtqueue *vq1, *vq2;
>> +	/* mq support in future.*/
>> +	vq1 = dev->virtqueue[index];
>> +	vq2 = dev->virtqueue[index ^ 1];
>> +	if (vq1 && vq2 && vq1->desc && vq2->desc && 
>> +		(vq1->kickfd > 0) && (vq1->callfd > 0) &&
>> +		(vq2->kickfd > 0) && (vq2->callfd > 0)) {
>> +		LOG_DEBUG(VHOST_CONFIG, "virtio is ready for processing.\n");
>> +		return 1;
>> +	}
>> +	LOG_DEBUG(VHOST_CONFIG, "virtio isn't ready for processing.\n");
>>  	return 0;
>>  }
>>  
>> @@ -940,7 +669,6 @@ static int
>>  set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>>  {
>>  	struct virtio_net *dev;
>> -	struct eventfd_copy	eventfd_kick;
>>  	struct vhost_virtqueue *vq;
>>  
>>  	dev = get_device(ctx);
>> @@ -953,14 +681,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>>  	if (vq->kickfd)
>>  		close((int)vq->kickfd);
>>  
>> -	/* Populate the eventfd_copy structure and call eventfd_copy. */
>> -	vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> -	eventfd_kick.source_fd = vq->kickfd;
>> -	eventfd_kick.target_fd = file->fd;
>> -	eventfd_kick.target_pid = ctx.pid;
>> -
>> -	if (eventfd_copy(dev, &eventfd_kick))
>> -		return -1;
>> +	vq->kickfd = file->fd;
>>  
>>  	return 0;
>>  }
>> @@ -974,7 +695,6 @@ static int
>>  set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>>  {
>>  	struct virtio_net *dev;
>> -	struct eventfd_copy eventfd_call;
>>  	struct vhost_virtqueue *vq;
>>  
>>  	dev = get_device(ctx);
>> @@ -986,16 +706,11 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>>  
>>  	if (vq->callfd)
>>  		close((int)vq->callfd);
>> +	vq->callfd = file->fd;
>>  
>> -	/* Populate the eventfd_copy structure and call eventfd_copy. */
>> -	vq->callfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
>> -	eventfd_call.source_fd = vq->callfd;
>> -	eventfd_call.target_fd = file->fd;
>> -	eventfd_call.target_pid = ctx.pid;
>> -
>> -	if (eventfd_copy(dev, &eventfd_call))
>> -		return -1;
>> -
>> +	if (virtio_is_ready(dev, file->index) &&
>> +		!(dev->flags & VIRTIO_DEV_RUNNING))
>> +			notify_ops->new_device(dev);
>>  	return 0;
>>  }
>>  
>> @@ -1024,6 +739,7 @@ set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>>  	 * If the device isn't already running and both backend fds are set,
>>  	 * we add the device.
>>  	 */
>> +	LOG_DEBUG(VHOST_CONFIG, "%s %d\n", __func__, file->fd);
>>  	if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
>>  		if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&
>>  			((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED))
diff mbox

Patch

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index c008d64..cb4e172 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -34,17 +34,19 @@  include $(RTE_SDK)/mk/rte.vars.mk
 # library name
 LIB = librte_vhost.a
 
-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I. -I vhost-user -I vhost-cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
 LDFLAGS += -lfuse
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-cuse/vhost-net-cdev.c vhost-cuse/virtio-net-cdev.c
+
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-user/fd_man.c vhost-user/vhost-net-user.c vhost-user/virtio-net-user.c
+
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += virtio-net.c vhost_rxtx.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
 
-# dependencies
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether
-DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c
index 7755dd6..4c9b628 100644
--- a/lib/librte_vhost/eventfd_link/eventfd_link.c
+++ b/lib/librte_vhost/eventfd_link/eventfd_link.c
@@ -13,8 +13,7 @@ 
  *   General Public License for more details.
  *
  *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   along with this program; If not, see <http://www.gnu.org/licenses/>.
  *   The full GNU General Public License is included in this distribution
  *   in the file called LICENSE.GPL.
  *
@@ -78,8 +77,7 @@  eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
 
 	switch (ioctl) {
 	case EVENTFD_COPY:
-		if (copy_from_user(&eventfd_copy, argp,
-			sizeof(struct eventfd_copy)))
+		if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
 			return -EFAULT;
 
 		/*
@@ -88,28 +86,28 @@  eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
 		task_target =
 			pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
 		if (task_target == NULL) {
-			pr_debug("Failed to get mem ctx for target pid\n");
+			printk(KERN_DEBUG "Failed to get mem ctx for target pid\n");
 			return -EFAULT;
 		}
 
 		files = get_files_struct(current);
 		if (files == NULL) {
-			pr_debug("Failed to get files struct\n");
+			printk(KERN_DEBUG "Failed to get files struct\n");
 			return -EFAULT;
 		}
 
 		rcu_read_lock();
 		file = fcheck_files(files, eventfd_copy.source_fd);
 		if (file) {
-			if (file->f_mode & FMODE_PATH ||
-				!atomic_long_inc_not_zero(&file->f_count))
+			if (file->f_mode & FMODE_PATH
+				|| !atomic_long_inc_not_zero(&file->f_count))
 				file = NULL;
 		}
 		rcu_read_unlock();
 		put_files_struct(files);
 
 		if (file == NULL) {
-			pr_debug("Failed to get file from source pid\n");
+			printk(KERN_DEBUG "Failed to get file from source pid\n");
 			return 0;
 		}
 
@@ -128,25 +126,26 @@  eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
 
 		files = get_files_struct(task_target);
 		if (files == NULL) {
-			pr_debug("Failed to get files struct\n");
+			printk(KERN_DEBUG "Failed to get files struct\n");
 			return -EFAULT;
 		}
 
 		rcu_read_lock();
 		file = fcheck_files(files, eventfd_copy.target_fd);
 		if (file) {
-			if (file->f_mode & FMODE_PATH ||
-				!atomic_long_inc_not_zero(&file->f_count))
-					file = NULL;
+			if (file->f_mode & FMODE_PATH
+				|| !atomic_long_inc_not_zero(&file->f_count))
+				file = NULL;
 		}
 		rcu_read_unlock();
 		put_files_struct(files);
 
 		if (file == NULL) {
-			pr_debug("Failed to get file from target pid\n");
+			printk(KERN_DEBUG "Failed to get file from target pid\n");
 			return 0;
 		}
 
+
 		/*
 		 * Install the file struct from the target process into the
 		 * file desciptor of the source process,
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h
index ea619ec..38052e2 100644
--- a/lib/librte_vhost/eventfd_link/eventfd_link.h
+++ b/lib/librte_vhost/eventfd_link/eventfd_link.h
@@ -1,7 +1,4 @@ 
 /*-
- *  This file is provided under a dual BSD/GPLv2 license.  When using or
- *  redistributing this file, you may do so under either license.
- *
  * GPL LICENSE SUMMARY
  *
  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
@@ -16,61 +13,28 @@ 
  *   General Public License for more details.
  *
  *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   along with this program; If not, see <http://www.gnu.org/licenses/>.
  *   The full GNU General Public License is included in this distribution
  *   in the file called LICENSE.GPL.
  *
  *   Contact Information:
  *   Intel Corporation
- *
- * BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *   Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- *   Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- *   Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #ifndef _EVENTFD_LINK_H_
 #define _EVENTFD_LINK_H_
 
 /*
- * ioctl to copy an fd entry in calling process to an fd in a target process
+ *	ioctl to copy an fd entry in calling process to an fd in a target process
  */
 #define EVENTFD_COPY 1
 
 /*
- * arguements for the EVENTFD_COPY ioctl
+ *	arguements for the EVENTFD_COPY ioctl
  */
 struct eventfd_copy {
-	unsigned target_fd; /* fd in the target pid */
-	unsigned source_fd; /* fd in the calling pid */
-	pid_t target_pid; /* pid of the target pid */
+	unsigned target_fd; /**< fd in the target pid */
+	unsigned source_fd; /**< fd in the calling pid */
+	pid_t target_pid;   /**< pid of the target pid */
 };
 #endif /* _EVENTFD_LINK_H_ */
diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py
deleted file mode 100755
index e2d68a0..0000000
--- a/lib/librte_vhost/libvirt/qemu-wrap.py
+++ /dev/null
@@ -1,367 +0,0 @@ 
-#!/usr/bin/python
-#/*
-# *   BSD LICENSE
-# *
-# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-# *   All rights reserved.
-# *
-# *   Redistribution and use in source and binary forms, with or without
-# *   modification, are permitted provided that the following conditions
-# *   are met:
-# *
-# *     * Redistributions of source code must retain the above copyright
-# *       notice, this list of conditions and the following disclaimer.
-# *     * Redistributions in binary form must reproduce the above copyright
-# *       notice, this list of conditions and the following disclaimer in
-# *       the documentation and/or other materials provided with the
-# *       distribution.
-# *     * Neither the name of Intel Corporation nor the names of its
-# *       contributors may be used to endorse or promote products derived
-# *       from this software without specific prior written permission.
-# *
-# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# */
-
-#####################################################################
-# This script is designed to modify the call to the QEMU emulator
-# to support userspace vhost when starting a guest machine through
-# libvirt with vhost enabled. The steps to enable this are as follows
-# and should be run as root:
-#
-# 1. Place this script in a libvirtd's binary search PATH ($PATH)
-#    A good location would be in the same directory that the QEMU
-#    binary is located
-#
-# 2. Ensure that the script has the same owner/group and file
-#    permissions as the QEMU binary
-#
-# 3. Update the VM xml file using "virsh edit VM.xml"
-#
-#    3.a) Set the VM to use the launch script
-#
-#    	Set the emulator path contained in the
-#		<emulator><emulator/> tags
-#
-#    	e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
-#        with    <emulator>/usr/bin/qemu-wrap.py<emulator/>
-#
-#	 3.b) Set the VM's device's to use vhost-net offload
-#
-#		<interface type="network">
-#       	<model type="virtio"/>
-#       	<driver name="vhost"/>
-#		<interface/>
-#
-# 4. Enable libvirt to access our userpace device file by adding it to
-#    controllers cgroup for libvirtd using the following steps
-#
-#   4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
-#         1) cgroup_controllers = [ ... "devices", ... ]
-#		  2) clear_emulator_capabilities = 0
-#         3) user = "root"
-#         4) group = "root"
-#         5) cgroup_device_acl = [
-#                "/dev/null", "/dev/full", "/dev/zero",
-#                "/dev/random", "/dev/urandom",
-#                "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
-#                "/dev/rtc", "/dev/hpet", "/dev/net/tun",
-#                "/dev/<devbase-name>-<index>",
-#            ]
-#
-#   4.b) Disable SELinux or set to permissive mode
-#
-#   4.c) Mount cgroup device controller
-#        "mkdir /dev/cgroup"
-#        "mount -t cgroup none /dev/cgroup -o devices"
-#
-#   4.d) Set hugetlbfs_mount variable - ( Optional )
-#        VMs using userspace vhost must use hugepage backed
-#        memory. This can be enabled in the libvirt XML
-#        config by adding a memory backing section to the
-#        XML config e.g.
-#             <memoryBacking>
-#             <hugepages/>
-#             </memoryBacking>
-#        This memory backing section should be added after the
-#        <memory> and <currentMemory> sections. This will add
-#        flags "-mem-prealloc -mem-path <path>" to the QEMU
-#        command line. The hugetlbfs_mount variable can be used
-#        to override the default <path> passed through by libvirt.
-#
-#        if "-mem-prealloc" or "-mem-path <path>" are not passed
-#        through and a vhost device is detected then these options will
-#        be automatically added by this script. This script will detect
-#        the system hugetlbfs mount point to be used for <path>. The
-#        default <path> for this script can be overidden by the
-#        hugetlbfs_dir variable in the configuration section of this script.
-#
-#
-#   4.e) Restart the libvirtd system process
-#        e.g. on Fedora "systemctl restart libvirtd.service"
-#
-#
-#   4.f) Edit the Configuration Parameters section of this script
-#        to point to the correct emulator location and set any
-#        addition options
-#
-# The script modifies the libvirtd Qemu call by modifying/adding
-# options based on the configuration parameters below.
-# NOTE:
-#     emul_path and us_vhost_path must be set
-#     All other parameters are optional
-#####################################################################
-
-
-#############################################
-# Configuration Parameters
-#############################################
-#Path to QEMU binary
-emul_path = "/usr/local/bin/qemu-system-x86_64"
-
-#Path to userspace vhost device file
-# This filename should match the --dev-basename --dev-index parameters of
-# the command used to launch the userspace vhost sample application e.g.
-# if the sample app lauch command is:
-#    ./build/vhost-switch ..... --dev-basename usvhost --dev-index 1
-# then this variable should be set to:
-#   us_vhost_path = "/dev/usvhost-1"
-us_vhost_path = "/dev/usvhost-1"
-
-#List of additional user defined emulation options. These options will
-#be added to all Qemu calls
-emul_opts_user = []
-
-#List of additional user defined emulation options for vhost only.
-#These options will only be added to vhost enabled guests
-emul_opts_user_vhost = []
-
-#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
-# Set this variable to one to enable this option for all VMs
-use_huge_all = 0
-
-#Instead of autodetecting, override the hugetlbfs directory by setting
-#this variable
-hugetlbfs_dir = ""
-
-#############################################
-
-
-#############################################
-# ****** Do Not Modify Below this Line ******
-#############################################
-
-import sys, os, subprocess
-
-
-#List of open userspace vhost file descriptors
-fd_list = []
-
-#additional virtio device flags when using userspace vhost
-vhost_flags = [ "csum=off",
-                "gso=off",
-                "guest_tso4=off",
-                "guest_tso6=off",
-                "guest_ecn=off"
-              ]
-
-
-#############################################
-# Find the system hugefile mount point.
-# Note:
-# if multiple hugetlbfs mount points exist
-# then the first one found will be used
-#############################################
-def find_huge_mount():
-
-    if (len(hugetlbfs_dir)):
-        return hugetlbfs_dir
-
-    huge_mount = ""
-
-    if (os.access("/proc/mounts", os.F_OK)):
-        f = open("/proc/mounts", "r")
-        line = f.readline()
-        while line:
-            line_split = line.split(" ")
-            if line_split[2] == 'hugetlbfs':
-                huge_mount = line_split[1]
-                break
-            line = f.readline()
-    else:
-        print "/proc/mounts not found"
-        exit (1)
-
-    f.close
-    if len(huge_mount) == 0:
-        print "Failed to find hugetlbfs mount point"
-        exit (1)
-
-    return huge_mount
-
-
-#############################################
-# Get a userspace Vhost file descriptor
-#############################################
-def get_vhost_fd():
-
-    if (os.access(us_vhost_path, os.F_OK)):
-        fd = os.open( us_vhost_path, os.O_RDWR)
-    else:
-        print ("US-Vhost file %s not found" %us_vhost_path)
-        exit (1)
-
-    return fd
-
-
-#############################################
-# Check for vhostfd. if found then replace
-# with our own vhost fd and append any vhost
-# flags onto the end
-#############################################
-def modify_netdev_arg(arg):
-	
-    global fd_list
-    vhost_in_use = 0
-    s = ''
-    new_opts = []
-    netdev_opts = arg.split(",")
-
-    for opt in netdev_opts:
-        #check if vhost is used
-        if "vhost" == opt[:5]:
-            vhost_in_use = 1
-        else:
-            new_opts.append(opt)
-
-    #if using vhost append vhost options
-    if vhost_in_use == 1:
-        #append vhost on option
-        new_opts.append('vhost=on')
-        #append vhostfd ption
-        new_fd = get_vhost_fd()
-        new_opts.append('vhostfd=' + str(new_fd))
-        fd_list.append(new_fd)
-
-    #concatenate all options
-    for opt in new_opts:
-        if len(s) > 0:
-			s+=','
-
-        s+=opt
-
-    return s	
-
-
-#############################################
-# Main
-#############################################
-def main():
-
-    global fd_list
-    global vhost_in_use
-    new_args = []
-    num_cmd_args = len(sys.argv)
-    emul_call = ''
-    mem_prealloc_set = 0
-    mem_path_set = 0
-    num = 0;
-
-    #parse the parameters
-    while (num < num_cmd_args):
-        arg = sys.argv[num]
-
-		#Check netdev +1 parameter for vhostfd
-        if arg == '-netdev':
-            num_vhost_devs = len(fd_list)
-            new_args.append(arg)
-
-            num+=1
-            arg = sys.argv[num]
-            mod_arg = modify_netdev_arg(arg)
-            new_args.append(mod_arg)
-
-            #append vhost flags if this is a vhost device
-            # and -device is the next arg
-            # i.e -device -opt1,-opt2,...,-opt3,%vhost
-            if (num_vhost_devs < len(fd_list)):
-                num+=1
-                arg = sys.argv[num]
-                if arg == '-device':
-                    new_args.append(arg)
-                    num+=1
-                    new_arg = sys.argv[num]
-                    for flag in vhost_flags:
-                        new_arg = ''.join([new_arg,',',flag])
-                    new_args.append(new_arg)
-                else:
-                    new_args.append(arg)
-        elif arg == '-mem-prealloc':
-            mem_prealloc_set = 1
-            new_args.append(arg)
-        elif arg == '-mem-path':
-            mem_path_set = 1
-            new_args.append(arg)
-
-        else:
-            new_args.append(arg)
-
-        num+=1
-
-    #Set Qemu binary location
-    emul_call+=emul_path
-    emul_call+=" "
-
-    #Add prealloc mem options if using vhost and not already added
-    if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
-        emul_call += "-mem-prealloc "
-
-    #Add mempath mem options if using vhost and not already added
-    if ((len(fd_list) > 0) and (mem_path_set == 0)):
-        #Detect and add hugetlbfs mount point
-        mp = find_huge_mount()
-        mp = "".join(["-mem-path ", mp])
-        emul_call += mp
-        emul_call += " "
-
-
-    #add user options
-    for opt in emul_opts_user:
-        emul_call += opt
-        emul_call += " "
-
-    #Add add user vhost only options
-    if len(fd_list) > 0:
-        for opt in emul_opts_user_vhost:
-            emul_call += opt
-            emul_call += " "
-
-    #Add updated libvirt options
-    iter_args = iter(new_args)
-    #skip 1st arg i.e. call to this script
-    next(iter_args)
-    for arg in iter_args:
-        emul_call+=str(arg)
-        emul_call+= " "
-
-    #Call QEMU
-    subprocess.call(emul_call, shell=True)
-
-
-    #Close usvhost files
-    for fd in fd_list:
-        os.close(fd)
-
-
-if __name__ == "__main__":
-    main()
-
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 00b1328..7a05dab 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -34,11 +34,6 @@ 
 #ifndef _VIRTIO_NET_H_
 #define _VIRTIO_NET_H_
 
-/**
- * @file
- * Interface to vhost net
- */
-
 #include <stdint.h>
 #include <linux/virtio_ring.h>
 #include <linux/virtio_net.h>
@@ -48,66 +43,38 @@ 
 #include <rte_mempool.h>
 #include <rte_mbuf.h>
 
-/* Used to indicate that the device is running on a data core */
-#define VIRTIO_DEV_RUNNING 1
-
-/* Backend value set by guest. */
-#define VIRTIO_DEV_STOPPED -1
-
+#define VIRTIO_DEV_RUNNING 1  /**< Used to indicate that the device is running on a data core. */
+#define VIRTIO_DEV_STOPPED -1 /**< Backend value set by guest. */
 
 /* Enum for virtqueue management. */
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 
-#define BUF_VECTOR_MAX 256
-
-/**
- * Structure contains buffer address, length and descriptor index
- * from vring to do scatter RX.
- */
-struct buf_vector {
-	uint64_t buf_addr;
-	uint32_t buf_len;
-	uint32_t desc_idx;
-};
-
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
 struct vhost_virtqueue {
-	struct vring_desc	*desc;			/**< Virtqueue descriptor ring. */
-	struct vring_avail	*avail;			/**< Virtqueue available ring. */
-	struct vring_used	*used;			/**< Virtqueue used ring. */
-	uint32_t		size;			/**< Size of descriptor ring. */
-	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
-	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
-	volatile uint16_t	last_used_idx;		/**< Last index used on the available ring */
-	volatile uint16_t	last_used_idx_res;	/**< Used for multiple devices reserving buffers. */
-	eventfd_t		callfd;			/**< Currently unused as polling mode is enabled. */
-	eventfd_t		kickfd;			/**< Used to notify the guest (trigger interrupt). */
-	struct buf_vector	buf_vec[BUF_VECTOR_MAX];	/**< for scatter RX. */
-} __rte_cache_aligned;
-
-/**
- * Device structure contains all configuration information relating to the device.
- */
-struct virtio_net {
-	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
-	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
-	uint64_t		features;	/**< Negotiated feature set. */
-	uint64_t		device_fh;	/**< device identifier. */
-	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
-	void			*priv;		/**< private context */
+	struct vring_desc    *desc;             /**< descriptor ring. */
+	struct vring_avail   *avail;            /**< available ring. */
+	struct vring_used    *used;             /**< used ring. */
+	uint32_t             size;              /**< Size of descriptor ring. */
+	uint32_t             backend;           /**< Backend value to determine if device should be started/stopped. */
+	uint16_t             vhost_hlen;        /**< Vhost header length (varies depending on RX merge buffers. */
+	volatile uint16_t    last_used_idx;     /**< Last index used on the available ring. */
+	volatile uint16_t    last_used_idx_res; /**< Used for multiple devices reserving buffers. */
+	eventfd_t            callfd;            /**< Currently unused as polling mode is enabled. */
+	eventfd_t            kickfd;            /**< Used to notify the guest (trigger interrupt). */
 } __rte_cache_aligned;
 
 /**
- * Information relating to memory regions including offsets to addresses in QEMUs memory file.
+ * Information relating to memory regions including offsets to
+ * addresses in QEMUs memory file.
  */
 struct virtio_memory_regions {
-	uint64_t	guest_phys_address;	/**< Base guest physical address of region. */
-	uint64_t	guest_phys_address_end;	/**< End guest physical address of region. */
-	uint64_t	memory_size;		/**< Size of region. */
-	uint64_t	userspace_address;	/**< Base userspace address of region. */
-	uint64_t	address_offset;		/**< Offset of region for address translation. */
+	uint64_t    guest_phys_address;     /**< Base guest physical address of region. */
+	uint64_t    guest_phys_address_end; /**< End guest physical address of region. */
+	uint64_t    memory_size;            /**< Size of region. */
+	uint64_t    userspace_address;      /**< Base userspace address of region. */
+	uint64_t    address_offset;         /**< Offset of region for address translation. */
 };
 
 
@@ -115,21 +82,34 @@  struct virtio_memory_regions {
  * Memory structure includes region and mapping information.
  */
 struct virtio_memory {
-	uint64_t	base_address;	/**< Base QEMU userspace address of the memory file. */
-	uint64_t	mapped_address;	/**< Mapped address of memory file base in our applications memory space. */
-	uint64_t	mapped_size;	/**< Total size of memory file. */
-	uint32_t	nregions;	/**< Number of memory regions. */
+	uint64_t    base_address;    /**< Base QEMU userspace address of the memory file. */
+	uint64_t    mapped_address;  /**< Mapped address of memory file base in our applications memory space. */
+	uint64_t    mapped_size;     /**< Total size of memory file. */
+	uint32_t    nregions;        /**< Number of memory regions. */
 	struct virtio_memory_regions      regions[0]; /**< Memory region information. */
 };
 
 /**
+ * Device structure contains all configuration information relating to the device.
+ */
+struct virtio_net {
+	struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM]; /**< Contains all virtqueue information. */
+	struct virtio_memory    *mem;                    /**< QEMU memory and memory region information. */
+	uint64_t features;    /**< Negotiated feature set. */
+	uint64_t device_fh;   /**< Device identifier. */
+	uint32_t flags;       /**< Device flags. Only used to check if device is running on data core. */
+	void     *priv;
+} __rte_cache_aligned;
+
+/**
  * Device operations to add/remove device.
  */
 struct virtio_net_device_ops {
-	int (*new_device)(struct virtio_net *);	/**< Add device. */
-	void (*destroy_device)(volatile struct virtio_net *);	/**< Remove device. */
+	int (*new_device)(struct virtio_net *); /**< Add device. */
+	void (*destroy_device)(struct virtio_net *); /**< Remove device. */
 };
 
+
 static inline uint16_t __attribute__((always_inline))
 rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
 {
@@ -179,7 +159,7 @@  int rte_vhost_driver_register(const char *dev_name);
 
 /* Register callbacks. */
 int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
-/* Start vhost driver session blocking loop. */
+
 int rte_vhost_driver_session_start(void);
 
 /**
@@ -192,8 +172,8 @@  int rte_vhost_driver_session_start(void);
  * @return
  *  num of packets enqueued
  */
-uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
-	struct rte_mbuf **pkts, uint16_t count);
+uint32_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint32_t count);
 
 /**
  * This function gets guest buffers from the virtio device TX virtqueue,
@@ -206,7 +186,7 @@  uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
  * @return
  *  num of packets dequeued
  */
-uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
-	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
+uint32_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count);
 
 #endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
new file mode 100644
index 0000000..4671643
--- /dev/null
+++ b/lib/librte_vhost/vhost-cuse/vhost-net-cdev.c
@@ -0,0 +1,436 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <fuse/cuse_lowlevel.h>
+#include <linux/limits.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_virtio_net.h>
+
+#include "virtio-net-cdev.h"
+#include "vhost-net.h"
+#include "eventfd_link/eventfd_link.h"
+
+#define FUSE_OPT_DUMMY "\0\0"
+#define FUSE_OPT_FORE  "-f\0\0"
+#define FUSE_OPT_NOMULTI "-s\0\0"
+
+static const uint32_t default_major = 231;
+static const uint32_t default_minor = 1;
+static const char cuse_device_name[] = "/dev/cuse";
+static const char default_cdev[] = "vhost-net";
+static const char eventfd_cdev[] = "/dev/eventfd-link";
+
+static struct fuse_session *session;
+const struct vhost_net_device_ops const *ops;
+
+/*
+ * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
+ * when the device is added to the device linked list.
+ */
+static struct vhost_device_ctx
+fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
+{
+	struct vhost_device_ctx ctx;
+	struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
+
+	ctx.pid = req_ctx->pid;
+	ctx.fh = fi->fh;
+
+	return ctx;
+}
+
+/*
+ * When the device is created in QEMU it gets initialised here and
+ * added to the device linked list.
+ */
+static void
+vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
+{
+	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+	int err = 0;
+
+	err = ops->new_device(ctx);
+	if (err == -1) {
+		fuse_reply_err(req, EPERM);
+		return;
+	}
+
+	fi->fh = err;
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"(%"PRIu64") Device configuration started\n", fi->fh);
+	fuse_reply_open(req, fi);
+}
+
+/*
+ * When QEMU is shutdown or killed the device gets released.
+ */
+static void
+vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
+{
+	int err = 0;
+	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+
+	ops->destroy_device(ctx);
+	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
+	fuse_reply_err(req, err);
+}
+
+/*
+ * Boilerplate code for CUSE IOCTL
+ * Implicit arguments: ctx, req, result.
+ */
+#define VHOST_IOCTL(func) do {	\
+	result = (func)(ctx);	\
+	fuse_reply_ioctl(req, result, NULL, 0);	\
+} while (0)
+
+/*
+ * Boilerplate IOCTL RETRY
+ * Implicit arguments: req.
+ */
+#define VHOST_IOCTL_RETRY(size_r, size_w) do {	\
+	struct iovec iov_r = { arg, (size_r) };	\
+	struct iovec iov_w = { arg, (size_w) };	\
+	fuse_reply_ioctl_retry(req, &iov_r,	\
+		(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_R(type, var, func) do {	\
+	if (!in_bufsz) {			\
+		VHOST_IOCTL_RETRY(sizeof(type), 0);\
+	} else {	\
+		(var) = *(const type*)in_buf;	\
+		result = func(ctx, &(var));	\
+		fuse_reply_ioctl(req, result, NULL, 0);\
+	}	\
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Write IOCTL
+ * Implicit arguments: ctx, req, result, out_bufsz.
+ */
+#define VHOST_IOCTL_W(type, var, func) do {	\
+	if (!out_bufsz) {			\
+		VHOST_IOCTL_RETRY(0, sizeof(type));\
+	} else {	\
+		result = (func)(ctx, &(var));\
+		fuse_reply_ioctl(req, result, &(var), sizeof(type));\
+	} \
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read/Write IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {	\
+	if (!in_bufsz) {	\
+		VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
+	} else {	\
+		(var1) = *(const type1*) (in_buf);	\
+		result = (func)(ctx, (var1), &(var2));	\
+		fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
+	} \
+} while (0)
+
+/*
+ * This function uses the eventfd_link kernel module to copy an eventfd file
+ * descriptor provided by QEMU in to our process space.
+ */
+static int
+eventfd_copy(int target_fd, int target_pid)
+{
+	int eventfd_link, ret;
+	struct eventfd_copy eventfd_copy;
+	int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+
+	if (fd == -1)
+		return -1;
+
+	/* Open the character device to the kernel module. */
+	/* TODO: check this earlier rather than fail until VM boots! */
+	eventfd_link = open(eventfd_cdev, O_RDWR);
+	if (eventfd_link < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"eventfd_link module is not loaded\n");
+		return -1;
+	}
+
+	eventfd_copy.source_fd = fd;
+	eventfd_copy.target_fd = target_fd;
+	eventfd_copy.target_pid = target_pid;
+	/* Call the IOCTL to copy the eventfd. */
+	ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
+	close(eventfd_link);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"EVENTFD_COPY ioctl failed\n");
+		return -1;
+	}
+
+	return fd;
+}
+
+/*
+ * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on
+ * the type of IOCTL a buffer is requested to read or to write. This
+ * request is handled by FUSE and the buffer is then given to CUSE.
+ */
+static void
+vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
+		struct fuse_file_info *fi, __rte_unused unsigned flags,
+		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
+{
+	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+	struct vhost_vring_file file;
+	struct vhost_vring_state state;
+	struct vhost_vring_addr addr;
+	uint64_t features;
+	uint32_t index;
+	int result = 0;
+
+	switch (cmd) {
+	case VHOST_NET_SET_BACKEND:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
+		VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
+		break;
+
+	case VHOST_GET_FEATURES:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
+		VHOST_IOCTL_W(uint64_t, features, ops->get_features);
+		break;
+
+	case VHOST_SET_FEATURES:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
+		VHOST_IOCTL_R(uint64_t, features, ops->set_features);
+		break;
+
+	case VHOST_RESET_OWNER:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
+		VHOST_IOCTL(ops->reset_owner);
+		break;
+
+	case VHOST_SET_OWNER:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
+		VHOST_IOCTL(ops->set_owner);
+		break;
+
+	case VHOST_SET_MEM_TABLE:
+		/*TODO fix race condition.*/
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
+		static struct vhost_memory mem_temp;
+		switch (in_bufsz) {
+		case 0:
+			VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
+			break;
+
+		case sizeof(struct vhost_memory):
+			mem_temp = *(const struct vhost_memory *) in_buf;
+
+			if (mem_temp.nregions > 0) {
+				VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
+					(sizeof(struct vhost_memory_region) *
+						mem_temp.nregions), 0);
+			} else {
+				result = -1;
+				fuse_reply_ioctl(req, result, NULL, 0);
+			}
+			break;
+
+		default:
+			result = cuse_set_mem_table(ctx, in_buf,
+				mem_temp.nregions);
+			if (result)
+				fuse_reply_err(req, EINVAL);
+			else
+				fuse_reply_ioctl(req, result, NULL, 0);
+		}
+		break;
+
+	case VHOST_SET_VRING_NUM:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
+		VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_num);
+		break;
+
+	case VHOST_SET_VRING_BASE:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
+		VHOST_IOCTL_R(struct vhost_vring_state, state, ops->set_vring_base);
+		break;
+
+	case VHOST_GET_VRING_BASE:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
+		VHOST_IOCTL_RW(uint32_t, index,
+			struct vhost_vring_state, state, ops->get_vring_base);
+		break;
+
+	case VHOST_SET_VRING_ADDR:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
+		VHOST_IOCTL_R(struct vhost_vring_addr, addr, ops->set_vring_addr);
+		break;
+
+	case VHOST_SET_VRING_KICK:
+	case VHOST_SET_VRING_CALL:
+		if (!in_buf) {
+                	VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
+		} else {
+			int fd;
+			file = *(const struct vhost_vring_file *)in_buf;
+			LOG_DEBUG(VHOST_CONFIG, 
+				"kick/call idx:%d fd:%d\n", file.index, file.fd);
+			if ((fd = eventfd_copy(file.fd, ctx.pid)) < 0){
+				fuse_reply_ioctl(req, -1, NULL, 0);
+			}
+			file.fd = fd;
+			if (cmd == VHOST_SET_VRING_KICK) {
+				VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_call);
+			}
+			else { 
+				VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_kick);
+			}
+		}
+		break;
+
+	default:
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
+		result = -1;
+		fuse_reply_ioctl(req, result, NULL, 0);
+	}
+
+	if (result < 0)
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
+	else
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
+}
+
+/*
+ * Structure handling open, release and ioctl function pointers is populated.
+ */
+static const struct cuse_lowlevel_ops vhost_net_ops = {
+	.open		= vhost_net_open,
+	.release	= vhost_net_release,
+	.ioctl		= vhost_net_ioctl,
+};
+
+/*
+ * cuse_info is populated and used to register the cuse device.
+ * vhost_net_device_ops are also passed when the device is registered in app.
+ */
+int
+rte_vhost_driver_register(const char *dev_name)
+{
+	struct cuse_info cuse_info;
+	char device_name[PATH_MAX] = "";
+	char char_device_name[PATH_MAX] = "";
+	const char *device_argv[] = { device_name };
+
+	char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
+	char fuse_opt_fore[] = FUSE_OPT_FORE;
+	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
+	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
+
+	if (access(cuse_device_name, R_OK | W_OK) < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"char device %s can't be accessed, maybe not exist\n",
+			cuse_device_name);
+		return -1;
+	}
+
+	/*
+	 * The device name is created. This is passed to QEMU so that it can
+	 * register the device with our application.
+	 */
+	snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
+	snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
+
+	/* Check if device already exists. */
+	if (access(char_device_name, F_OK) != -1) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"char device %s already exists\n", char_device_name);
+		return -1;
+	}
+
+	memset(&cuse_info, 0, sizeof(cuse_info));
+	cuse_info.dev_major = default_major;
+	cuse_info.dev_minor = default_minor;
+	cuse_info.dev_info_argc = 1;
+	cuse_info.dev_info_argv = device_argv;
+	cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
+
+	ops = get_virtio_net_callbacks();
+
+	session = cuse_lowlevel_setup(3, fuse_argv,
+			&cuse_info, &vhost_net_ops, 0, NULL);
+	if (session == NULL)
+		return -1;
+
+	return 0;
+}
+
+/**
+ * The CUSE session is launched allowing the application to receive open,
+ * release and ioctl calls.
+ */
+int
+rte_vhost_driver_session_start(void)
+{
+	fuse_session_loop(session);
+
+	return 0;
+}
diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
new file mode 100644
index 0000000..5c16aa5
--- /dev/null
+++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.c
@@ -0,0 +1,314 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <dirent.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <fuse/cuse_lowlevel.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <rte_log.h>
+
+#include "vhost-net.h"
+#include "virtio-net-cdev.h"
+
+extern struct vhost_net_device_ops const *ops;
+
+/* Line size for reading maps file. */
+static const uint32_t BUFSIZE = PATH_MAX;
+
+/* Size of prot char array in procmap. */
+#define PROT_SZ 5
+
+/* Number of elements in procmap struct. */
+#define PROCMAP_SZ 8
+
+/* Structure containing information gathered from maps file. */
+struct procmap {
+	uint64_t va_start;	/* Start virtual address in file. */
+	uint64_t len;		/* Size of file. */
+	uint64_t pgoff;		/* Not used. */
+	uint32_t maj;		/* Not used. */
+	uint32_t min;		/* Not used. */
+	uint32_t ino;		/* Not used. */
+	char prot[PROT_SZ];	/* Not used. */
+	char fname[PATH_MAX];	/* File name. */
+};
+
+/*
+ * Locate the file containing QEMU's memory space and
+ * map it to our address space.
+ */
+static int
+host_memory_map(pid_t pid, uint64_t addr,
+	uint64_t *mapped_address, uint64_t *mapped_size)
+{
+	struct dirent *dptr = NULL;
+	struct procmap procmap;
+	DIR *dp = NULL;
+	int fd;
+	int i;
+	char memfile[PATH_MAX];
+	char mapfile[PATH_MAX];
+	char procdir[PATH_MAX];
+	char resolved_path[PATH_MAX];
+	FILE *fmap;
+	void *map;
+	uint8_t found = 0;
+	char line[BUFSIZE];
+	char dlm[] = "-   :   ";
+	char *str, *sp, *in[PROCMAP_SZ];
+	char *end = NULL;
+
+	/* Path where mem files are located. */
+	snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
+	/* Maps file used to locate mem file. */
+	snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
+
+	fmap = fopen(mapfile, "r");
+	if (fmap == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to open maps file for pid %d\n", pid);
+		return -1;
+	}
+
+	/* Read through maps file until we find out base_address. */
+	while (fgets(line, BUFSIZE, fmap) != 0) {
+		str = line;
+		errno = 0;
+		/* Split line in to fields. */
+		for (i = 0; i < PROCMAP_SZ; i++) {
+			in[i] = strtok_r(str, &dlm[i], &sp);
+			if ((in[i] == NULL) || (errno != 0)) {
+				fclose(fmap);
+				return -1;
+			}
+			str = NULL;
+		}
+
+		/* Convert/Copy each field as needed. */
+		procmap.va_start = strtoull(in[0], &end, 16);
+		if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.len = strtoull(in[1], &end, 16);
+		if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.pgoff = strtoull(in[3], &end, 16);
+		if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.maj = strtoul(in[4], &end, 16);
+		if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.min = strtoul(in[5], &end, 16);
+		if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.ino = strtoul(in[6], &end, 16);
+		if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		memcpy(&procmap.prot, in[2], PROT_SZ);
+		memcpy(&procmap.fname, in[7], PATH_MAX);
+
+		if (procmap.va_start == addr) {
+			procmap.len = procmap.len - procmap.va_start;
+			found = 1;
+			break;
+		}
+	}
+	fclose(fmap);
+
+	if (!found) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to find memory file in pid %d maps file\n", pid);
+		return -1;
+	}
+
+	/* Find the guest memory file among the process fds. */
+	dp = opendir(procdir);
+	if (dp == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Cannot open pid %d process directory\n",
+			pid);
+		return -1;
+
+	}
+
+	found = 0;
+
+	/* Read the fd directory contents. */
+	while (NULL != (dptr = readdir(dp))) {
+		snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
+				pid, dptr->d_name);
+		realpath(memfile, resolved_path);
+		if (resolved_path == NULL) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"Failed to resolve fd directory\n");
+			closedir(dp);
+			return -1;
+		}
+		if (strncmp(resolved_path, procmap.fname,
+			strnlen(procmap.fname, PATH_MAX)) == 0) {
+			found = 1;
+			break;
+		}
+	}
+
+	closedir(dp);
+
+	if (found == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to find memory file for pid %d\n",
+			pid);
+		return -1;
+	}
+	/* Open the shared memory file and map the memory into this process. */
+	fd = open(memfile, O_RDWR);
+
+	if (fd == -1) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to open %s for pid %d\n",
+			memfile, pid);
+		return -1;
+	}
+
+	map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE ,
+			MAP_POPULATE|MAP_SHARED, fd, 0);
+	close(fd);
+
+	if (map == MAP_FAILED) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Error mapping the file %s for pid %d\n",
+			memfile, pid);
+		return -1;
+	}
+
+	/* Store the memory address and size in the device data structure */
+	*mapped_address = (uint64_t)(uintptr_t)map;
+	*mapped_size = procmap.len;
+
+	LOG_DEBUG(VHOST_CONFIG,
+		"Mem File: %s->%s - Size: %llu - VA: %p\n",
+		memfile, resolved_path,
+		(unsigned long long)mapped_size, map);
+
+	return 0;
+}
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
+	uint32_t nregions)
+{
+	uint64_t size = offsetof(struct vhost_memory, regions);
+	uint32_t idx;
+	struct virtio_memory_regions regions[8]; /* VHOST_MAX_MEMORY_REGIONS */
+	struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
+			((uint64_t)(uintptr_t)mem_regions_addr + size);
+	uint64_t base_address = 0, mapped_address, mapped_size;
+
+	for (idx = 0; idx < nregions; idx++) {
+		regions[idx].guest_phys_address =
+			mem_regions[idx].guest_phys_addr;
+		regions[idx].guest_phys_address_end =
+			regions[idx].guest_phys_address +
+			mem_regions[idx].memory_size;
+		regions[idx].memory_size =
+			mem_regions[idx].memory_size;
+		regions[idx].userspace_address =
+			mem_regions[idx].userspace_addr;
+
+		LOG_DEBUG(VHOST_CONFIG, "REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
+			idx,
+			(void *)(uintptr_t)regions[idx].guest_phys_address,
+			(void *)(uintptr_t)regions[idx].userspace_address,
+			regions[idx].memory_size);
+
+		/*set the base address mapping*/
+		if (regions[idx].guest_phys_address == 0x0) {
+			base_address =
+				regions[idx].userspace_address;
+			/* Map VM memory file */
+			if (host_memory_map(ctx.pid, base_address, 
+				&mapped_address, &mapped_size) != 0) {
+				return -1;
+			}
+		}
+	}
+
+	/* Check that we have a valid base address. */
+	if (base_address == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, 
+			"Failed to find base address of qemu memory file.\n");
+		return -1;
+	}
+
+	for (idx = 0; idx < nregions; idx++) {
+		regions[idx].address_offset = 
+			mapped_address - base_address +
+			regions[idx].userspace_address -
+			regions[idx].guest_phys_address;
+	}
+	
+	ops->set_mem_table(ctx, &regions[0], nregions);
+	return 0;
+}
diff --git a/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
new file mode 100644
index 0000000..6f98ce8
--- /dev/null
+++ b/lib/librte_vhost/vhost-cuse/virtio-net-cdev.h
@@ -0,0 +1,43 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _VIRTIO_NET_CDEV_H
+#define _VIRTIO_NET_CDEV_H
+#include <stdint.h>
+
+#include "vhost-net.h"
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx, const struct vhost_memory *mem_regions_addr,
+	uint32_t nregions);
+
+#endif
diff --git a/lib/librte_vhost/vhost-net-cdev.c b/lib/librte_vhost/vhost-net-cdev.c
deleted file mode 100644
index 57c76cb..0000000
--- a/lib/librte_vhost/vhost-net-cdev.c
+++ /dev/null
@@ -1,389 +0,0 @@ 
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <errno.h>
-#include <fuse/cuse_lowlevel.h>
-#include <linux/limits.h>
-#include <linux/vhost.h>
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <rte_ethdev.h>
-#include <rte_log.h>
-#include <rte_string_fns.h>
-#include <rte_virtio_net.h>
-
-#include "vhost-net-cdev.h"
-
-#define FUSE_OPT_DUMMY "\0\0"
-#define FUSE_OPT_FORE  "-f\0\0"
-#define FUSE_OPT_NOMULTI "-s\0\0"
-
-static const uint32_t default_major = 231;
-static const uint32_t default_minor = 1;
-static const char cuse_device_name[] = "/dev/cuse";
-static const char default_cdev[] = "vhost-net";
-
-static struct fuse_session *session;
-static struct vhost_net_device_ops const *ops;
-
-/*
- * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
- * when the device is added to the device linked list.
- */
-static struct vhost_device_ctx
-fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
-{
-	struct vhost_device_ctx ctx;
-	struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
-
-	ctx.pid = req_ctx->pid;
-	ctx.fh = fi->fh;
-
-	return ctx;
-}
-
-/*
- * When the device is created in QEMU it gets initialised here and
- * added to the device linked list.
- */
-static void
-vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
-{
-	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-	int err = 0;
-
-	err = ops->new_device(ctx);
-	if (err == -1) {
-		fuse_reply_err(req, EPERM);
-		return;
-	}
-
-	fi->fh = err;
-
-	RTE_LOG(INFO, VHOST_CONFIG,
-		"(%"PRIu64") Device configuration started\n", fi->fh);
-	fuse_reply_open(req, fi);
-}
-
-/*
- * When QEMU is shutdown or killed the device gets released.
- */
-static void
-vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
-{
-	int err = 0;
-	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-
-	ops->destroy_device(ctx);
-	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
-	fuse_reply_err(req, err);
-}
-
-/*
- * Boilerplate code for CUSE IOCTL
- * Implicit arguments: ctx, req, result.
- */
-#define VHOST_IOCTL(func) do {	\
-	result = (func)(ctx);	\
-	fuse_reply_ioctl(req, result, NULL, 0);	\
-} while (0)
-
-/*
- * Boilerplate IOCTL RETRY
- * Implicit arguments: req.
- */
-#define VHOST_IOCTL_RETRY(size_r, size_w) do {	\
-	struct iovec iov_r = { arg, (size_r) };	\
-	struct iovec iov_w = { arg, (size_w) };	\
-	fuse_reply_ioctl_retry(req, &iov_r,	\
-		(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read IOCTL
- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_R(type, var, func) do {	\
-	if (!in_bufsz) {	\
-		VHOST_IOCTL_RETRY(sizeof(type), 0);\
-	} else {	\
-		(var) = *(const type*)in_buf;	\
-		result = func(ctx, &(var));	\
-		fuse_reply_ioctl(req, result, NULL, 0);\
-	}	\
-} while (0)
-
-/*
- * Boilerplate code for CUSE Write IOCTL
- * Implicit arguments: ctx, req, result, out_bufsz.
- */
-#define VHOST_IOCTL_W(type, var, func) do {	\
-	if (!out_bufsz) {	\
-		VHOST_IOCTL_RETRY(0, sizeof(type));\
-	} else {	\
-		result = (func)(ctx, &(var));\
-		fuse_reply_ioctl(req, result, &(var), sizeof(type));\
-	} \
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read/Write IOCTL
- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {	\
-	if (!in_bufsz) {	\
-		VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
-	} else {	\
-		(var1) = *(const type1*) (in_buf);	\
-		result = (func)(ctx, (var1), &(var2));	\
-		fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
-	}	\
-} while (0)
-
-/*
- * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
- * of IOCTL a buffer is requested to read or to write. This request is handled
- * by FUSE and the buffer is then given to CUSE.
- */
-static void
-vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
-		struct fuse_file_info *fi, __rte_unused unsigned flags,
-		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
-{
-	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-	struct vhost_vring_file file;
-	struct vhost_vring_state state;
-	struct vhost_vring_addr addr;
-	uint64_t features;
-	uint32_t index;
-	int result = 0;
-
-	switch (cmd) {
-	case VHOST_NET_SET_BACKEND:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
-		VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
-		break;
-
-	case VHOST_GET_FEATURES:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
-		VHOST_IOCTL_W(uint64_t, features, ops->get_features);
-		break;
-
-	case VHOST_SET_FEATURES:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
-		VHOST_IOCTL_R(uint64_t, features, ops->set_features);
-		break;
-
-	case VHOST_RESET_OWNER:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
-		VHOST_IOCTL(ops->reset_owner);
-		break;
-
-	case VHOST_SET_OWNER:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
-		VHOST_IOCTL(ops->set_owner);
-		break;
-
-	case VHOST_SET_MEM_TABLE:
-		/*TODO fix race condition.*/
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
-		static struct vhost_memory mem_temp;
-
-		switch (in_bufsz) {
-		case 0:
-			VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
-			break;
-
-		case sizeof(struct vhost_memory):
-			mem_temp = *(const struct vhost_memory *) in_buf;
-
-			if (mem_temp.nregions > 0) {
-				VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
-					(sizeof(struct vhost_memory_region) *
-						mem_temp.nregions), 0);
-			} else {
-				result = -1;
-				fuse_reply_ioctl(req, result, NULL, 0);
-			}
-			break;
-
-		default:
-			result = ops->set_mem_table(ctx,
-					in_buf, mem_temp.nregions);
-			if (result)
-				fuse_reply_err(req, EINVAL);
-			else
-				fuse_reply_ioctl(req, result, NULL, 0);
-		}
-		break;
-
-	case VHOST_SET_VRING_NUM:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
-		VHOST_IOCTL_R(struct vhost_vring_state, state,
-			ops->set_vring_num);
-		break;
-
-	case VHOST_SET_VRING_BASE:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
-		VHOST_IOCTL_R(struct vhost_vring_state, state,
-			ops->set_vring_base);
-		break;
-
-	case VHOST_GET_VRING_BASE:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
-		VHOST_IOCTL_RW(uint32_t, index,
-			struct vhost_vring_state, state, ops->get_vring_base);
-		break;
-
-	case VHOST_SET_VRING_ADDR:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
-		VHOST_IOCTL_R(struct vhost_vring_addr, addr,
-			ops->set_vring_addr);
-		break;
-
-	case VHOST_SET_VRING_KICK:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
-		VHOST_IOCTL_R(struct vhost_vring_file, file,
-			ops->set_vring_kick);
-		break;
-
-	case VHOST_SET_VRING_CALL:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
-		VHOST_IOCTL_R(struct vhost_vring_file, file,
-			ops->set_vring_call);
-		break;
-
-	default:
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
-		result = -1;
-		fuse_reply_ioctl(req, result, NULL, 0);
-	}
-
-	if (result < 0)
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
-	else
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
-}
-
-/*
- * Structure handling open, release and ioctl function pointers is populated.
- */
-static const struct cuse_lowlevel_ops vhost_net_ops = {
-	.open		= vhost_net_open,
-	.release	= vhost_net_release,
-	.ioctl		= vhost_net_ioctl,
-};
-
-/*
- * cuse_info is populated and used to register the cuse device.
- * vhost_net_device_ops are also passed when the device is registered in app.
- */
-int
-rte_vhost_driver_register(const char *dev_name)
-{
-	struct cuse_info cuse_info;
-	char device_name[PATH_MAX] = "";
-	char char_device_name[PATH_MAX] = "";
-	const char *device_argv[] = { device_name };
-
-	char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
-	char fuse_opt_fore[] = FUSE_OPT_FORE;
-	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
-	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
-
-	if (access(cuse_device_name, R_OK | W_OK) < 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"char device %s can't be accessed, maybe not exist\n",
-			cuse_device_name);
-		return -1;
-	}
-
-	/*
-	 * The device name is created. This is passed to QEMU so that it can
-	 * register the device with our application.
-	 */
-	snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
-	snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
-
-	/* Check if device already exists. */
-	if (access(char_device_name, F_OK) != -1) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"char device %s already exists\n", char_device_name);
-		return -1;
-	}
-
-	memset(&cuse_info, 0, sizeof(cuse_info));
-	cuse_info.dev_major = default_major;
-	cuse_info.dev_minor = default_minor;
-	cuse_info.dev_info_argc = 1;
-	cuse_info.dev_info_argv = device_argv;
-	cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
-
-	ops = get_virtio_net_callbacks();
-
-	session = cuse_lowlevel_setup(3, fuse_argv,
-			&cuse_info, &vhost_net_ops, 0, NULL);
-	if (session == NULL)
-		return -1;
-
-	return 0;
-}
-
-/**
- * The CUSE session is launched allowing the application to receive open,
- * release and ioctl calls.
- */
-int
-rte_vhost_driver_session_start(void)
-{
-	fuse_session_loop(session);
-
-	return 0;
-}
diff --git a/lib/librte_vhost/vhost-net-cdev.h b/lib/librte_vhost/vhost-net-cdev.h
deleted file mode 100644
index 03a5c57..0000000
--- a/lib/librte_vhost/vhost-net-cdev.h
+++ /dev/null
@@ -1,113 +0,0 @@ 
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VHOST_NET_CDEV_H_
-#define _VHOST_NET_CDEV_H_
-#include <stdint.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <linux/vhost.h>
-
-#include <rte_log.h>
-
-/* Macros for printing using RTE_LOG */
-#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
-#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
-
-#ifdef RTE_LIBRTE_VHOST_DEBUG
-#define VHOST_MAX_PRINT_BUFF 6072
-#define LOG_LEVEL RTE_LOG_DEBUG
-#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
-#define PRINT_PACKET(device, addr, size, header) do { \
-	char *pkt_addr = (char *)(addr); \
-	unsigned int index; \
-	char packet[VHOST_MAX_PRINT_BUFF]; \
-	\
-	if ((header)) \
-		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
-	else \
-		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
-	for (index = 0; index < (size); index++) { \
-		snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
-			"%02hhx ", pkt_addr[index]); \
-	} \
-	snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
-	\
-	LOG_DEBUG(VHOST_DATA, "%s", packet); \
-} while (0)
-#else
-#define LOG_LEVEL RTE_LOG_INFO
-#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
-#define PRINT_PACKET(device, addr, size, header) do {} while (0)
-#endif
-
-
-/*
- * Structure used to identify device context.
- */
-struct vhost_device_ctx {
-	pid_t		pid;	/* PID of process calling the IOCTL. */
-	uint64_t	fh;	/* Populated with fi->fh to track the device index. */
-};
-
-/*
- * Structure contains function pointers to be defined in virtio-net.c. These
- * functions are called in CUSE context and are used to configure devices.
- */
-struct vhost_net_device_ops {
-	int (*new_device)(struct vhost_device_ctx);
-	void (*destroy_device)(struct vhost_device_ctx);
-
-	int (*get_features)(struct vhost_device_ctx, uint64_t *);
-	int (*set_features)(struct vhost_device_ctx, uint64_t *);
-
-	int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
-
-	int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
-	int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
-	int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);
-	int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);
-
-	int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);
-	int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);
-
-	int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
-
-	int (*set_owner)(struct vhost_device_ctx);
-	int (*reset_owner)(struct vhost_device_ctx);
-};
-
-
-struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
-#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost-user/fd_man.c b/lib/librte_vhost/vhost-user/fd_man.c
new file mode 100644
index 0000000..c7fd3f2
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/fd_man.c
@@ -0,0 +1,158 @@ 
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <rte_log.h>
+
+#include "fd_man.h"
+
+/**
+ * Returns the index in the fdset for a fd.
+ * If fd is -1, it means to search for a free entry.
+ * @return
+ *   Index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++);
+		
+	return i ==  pfdset->num ? -1 : i;
+}
+
+static int
+fdset_find_free_slot(struct fdset *pfdset)
+{
+	return fdset_find_fd(pfdset, -1);
+
+}
+
+static void
+fdset_add_fd(struct fdset  *pfdset, int idx, int fd, fd_cb rcb, 
+		fd_cb wcb, uint64_t dat)
+{
+	struct fdentry *pfdentry = &pfdset->fd[idx];
+
+	pfdentry->fd = fd;
+	pfdentry->rcb = rcb;
+	pfdentry->wcb = wcb;
+	pfdentry->dat = dat;
+}
+
+/**
+ * Fill the read/write fdset with the fds in the fdset.
+ * @return
+ *  the maximum fds filled in the read/write fd_set.
+ */
+static int
+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
+{
+	struct fdentry *pfdentry;
+	int i, maxfds = -1;
+	int num = MAX_FDS;
+
+	for (i = 0; i < num ; i++) {
+		pfdentry = &pfdset->fd[i];
+		if (pfdentry->fd != -1) {
+			int added = 0;
+			if (pfdentry->rcb && rfset) {
+				FD_SET(pfdentry->fd, rfset);
+				added = 1;
+			}
+			if (pfdentry->wcb && wfset) {
+				FD_SET(pfdentry->fd, wfset);
+				added = 1;
+			}
+			if (added)
+				maxfds = pfdentry->fd < maxfds ?
+					maxfds : pfdentry->fd;
+		}
+	}
+	return maxfds;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+	int i;
+
+	for (i = 0; i < MAX_FDS; i++)
+		pfdset->fd[i].fd = -1;
+	pfdset->num = MAX_FDS;
+
+}
+
+/**
+ * Register the fd in the fdset with its read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
+{
+	int i;
+
+	if (fd == -1)
+		return -1;
+
+	/* Find a free slot in the list. */
+	i = fdset_find_free_slot(pfdset);
+	if (i == -1)
+		return -2;
+
+	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
+
+	return 0;
+}
+
+/**
+ *  Unregister the fd from the fdset.
+ */
+void
+fdset_del(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	i = fdset_find_fd(pfdset, fd);
+	if (i != -1) {
+		pfdset->fd[i].fd = -1;
+	}
+}
+
+
+void
+fdset_event_dispatch(struct fdset *pfdset)
+{
+	fd_set rfds,wfds;
+	int i, maxfds;
+	struct fdentry *pfdentry;
+	int num = MAX_FDS;
+
+	if (pfdset == NULL)
+		return;
+	while (1) {
+		FD_ZERO(&rfds);
+		FD_ZERO(&wfds);
+		maxfds = fdset_fill(&rfds, &wfds, pfdset);
+		/* fd management runs in one thread */
+		if (maxfds == -1) {
+			return;
+		}
+
+		select(maxfds + 1, &rfds, &wfds, NULL, NULL);
+
+		for (i = 0; i < num; i++) {
+			pfdentry = &pfdset->fd[i];
+			if (FD_ISSET(pfdentry->fd, &rfds)) 
+				pfdentry->rcb(pfdentry->fd, pfdentry->dat);
+			if (FD_ISSET(pfdentry->fd, &wfds))
+				pfdentry->wcb(pfdentry->fd, pfdentry->dat);
+		}
+		
+	}
+}
diff --git a/lib/librte_vhost/vhost-user/fd_man.h b/lib/librte_vhost/vhost-user/fd_man.h
new file mode 100644
index 0000000..57cc81d
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/fd_man.h
@@ -0,0 +1,31 @@ 
+#ifndef _FD_MAN_H_
+#define _FD_MAN_H_
+#include <stdint.h>
+
+#define MAX_FDS 1024
+
+typedef void (*fd_cb)(int fd, uint64_t dat);
+
+struct fdentry {
+	int fd; /* -1 indicates this entry is empty */
+	fd_cb rcb; /* callback when this fd is readable. */
+	fd_cb wcb; /* callback when this fd is writeable.*/
+	uint64_t dat;	/* fd context */
+};
+
+struct fdset {
+	struct fdentry fd[MAX_FDS];
+	int num;	
+};
+
+
+void fdset_init(struct fdset *pfdset);
+
+int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb,
+	fd_cb wcb, uint64_t ctx);
+
+void fdset_del(struct fdset *pfdset, int fd);
+
+void fdset_event_dispatch(struct fdset *pfdset);
+
+#endif
diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.c b/lib/librte_vhost/vhost-user/vhost-net-user.c
new file mode 100644
index 0000000..34450f4
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/vhost-net-user.c
@@ -0,0 +1,417 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <errno.h>
+
+#include <rte_log.h>
+#include <rte_virtio_net.h>
+
+#include "fd_man.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+#include "virtio-net-user.h"
+
+static void vserver_new_vq_conn(int fd, uint64_t data);
+static void vserver_message_handler(int fd, uint64_t dat);
+const struct vhost_net_device_ops *ops;
+
+static struct vhost_server *g_vhost_server;
+
+static const char *vhost_message_str[VHOST_USER_MAX] =
+{
+	[VHOST_USER_NONE] = "VHOST_USER_NONE",
+	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR"
+};
+
+/**
+ * Create a unix domain socket and bind to path.
+ * @return
+ *  socket fd or -1 on failure
+ */
+static int
+uds_socket(const char *path)
+{
+	struct sockaddr_un un;
+	int sockfd;
+	int ret;
+
+	if (path == NULL)
+		return -1;
+
+	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (sockfd < 0)
+		return -1;
+	RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
+
+	memset(&un, 0, sizeof(un));
+	un.sun_family = AF_UNIX;
+	snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+	ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
+	if (ret == -1)
+		goto err;
+	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
+
+	ret = listen(sockfd, 1);
+	if (ret == -1)
+		goto err;
+	
+	return sockfd;
+
+err:
+	close(sockfd);
+	return -1;
+}
+
+
+/* return bytes# of read */
+static int
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+	struct iovec  iov;
+	struct msghdr msgh = { 0 };
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	iov.iov_base = buf;
+	iov.iov_len  = buflen;
+	
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	ret = recvmsg(sockfd, &msgh, 0);
+	if (ret <= 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
+		return ret;
+	}
+	/* ret == buflen */
+	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+		RTE_LOG(ERR, VHOST_CONFIG, "%s failed\n", __func__);
+		return -1;
+	}
+
+	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+		if ( (cmsg->cmsg_level == SOL_SOCKET) && 
+			(cmsg->cmsg_type == SCM_RIGHTS)) {
+			memcpy(fds, CMSG_DATA(cmsg), fdsize);
+			break;
+		}
+	}
+	return ret;
+}
+
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, 
+		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+	if (ret <= 0)
+		return ret;
+
+	if (msg->size) {
+		if (msg->size > sizeof(msg->payload)) {
+			RTE_LOG(ERR, VHOST_CONFIG, 
+				"%s: invalid size:%d\n", __func__, msg->size);
+			return -1;
+		}
+		ret = read(sockfd, &msg->payload, msg->size);
+		if (ret == 0)
+			return 0;
+		if (ret != (int)msg->size) {
+			printf("read control message failed\n");
+			return -1;
+		}
+	}
+
+	return ret; 
+}
+
+static int
+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+	struct iovec iov;
+	struct msghdr msgh = { 0 };
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	iov.iov_base = buf;
+	iov.iov_len = buflen;
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	
+	if (fds && fd_num > 0) {
+		msgh.msg_control = control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), fds, fdsize);
+	} else {
+		msgh.msg_control = NULL;
+		msgh.msg_controllen = 0;
+	}
+
+	do {
+		ret = sendmsg(sockfd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
+		return -1;
+	}
+	
+	return 0;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	msg->flags &= ~VHOST_USER_VERSION_MASK;
+        msg->flags |= VHOST_USER_VERSION;
+        msg->flags |= VHOST_USER_REPLY_MASK;	
+
+	ret = send_fd_message(sockfd, (char *)msg, 
+		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+	
+	return ret;
+}
+
+/* call back when there is new connection.  */
+static void
+vserver_new_vq_conn(int fd, uint64_t dat)
+{
+	struct vhost_server *vserver = (void *)(uintptr_t)dat;
+	int conn_fd;
+	uint32_t fh;
+	struct vhost_device_ctx vdev_ctx = { 0 };
+
+	conn_fd = accept(fd, NULL, NULL);
+	RTE_LOG(INFO, VHOST_CONFIG, 
+		"%s: new connection is %d\n", __func__, conn_fd);
+	if (conn_fd < 0)
+		return;
+
+	fh = ops->new_device(vdev_ctx);
+	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
+
+	fdset_add(&vserver->fdset, 
+		conn_fd, vserver_message_handler, NULL, fh);	
+}
+
+/* callback when there is message on the connfd */
+static void
+vserver_message_handler(int connfd, uint64_t dat)
+{
+	struct vhost_device_ctx ctx;
+	uint32_t fh = (uint32_t)dat;
+	struct VhostUserMsg msg;
+	uint64_t features;
+	int ret;
+
+	ctx.fh = fh;
+	ret = read_vhost_message(connfd, &msg);
+	if (ret < 0) {
+		printf("vhost read message failed\n");
+	
+		/*TODO: cleanup */
+		close(connfd);
+		fdset_del(&g_vhost_server->fdset, connfd);
+		ops->destroy_device(ctx);
+
+		return;
+	} else if (ret == 0) {
+		/*TODO: cleanup */
+		RTE_LOG(INFO, VHOST_CONFIG, 
+			"vhost peer closed\n");
+		close(connfd);
+		fdset_del(&g_vhost_server->fdset, connfd);
+		ops->destroy_device(ctx);
+
+		return;
+	}
+	if (msg.request > VHOST_USER_MAX) {
+		/*TODO: cleanup */
+		RTE_LOG(INFO, VHOST_CONFIG, 
+			"vhost read incorrect message\n");
+		close(connfd);
+		fdset_del(&g_vhost_server->fdset, connfd);
+
+		return;
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+		vhost_message_str[msg.request]);
+	switch (msg.request) {
+	case VHOST_USER_GET_FEATURES:
+		ret = ops->get_features(ctx, &features);
+		msg.payload.u64 = ret;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(connfd, &msg);
+		break;
+	case VHOST_USER_SET_FEATURES:
+		ops->set_features(ctx, &features);
+		break;
+
+	case VHOST_USER_SET_OWNER:
+		ops->set_owner(ctx);
+		break;
+	case VHOST_USER_RESET_OWNER:
+		ops->reset_owner(ctx);
+		break;
+
+	case VHOST_USER_SET_MEM_TABLE:
+		user_set_mem_table(ctx, &msg);
+		break;
+
+	case VHOST_USER_SET_LOG_BASE:
+	case VHOST_USER_SET_LOG_FD:
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+		break;
+
+	case VHOST_USER_SET_VRING_NUM:
+		ops->set_vring_num(ctx, &msg.payload.state);
+		break;
+	case VHOST_USER_SET_VRING_ADDR:
+		ops->set_vring_addr(ctx, &msg.payload.addr);
+		break;
+	case VHOST_USER_SET_VRING_BASE:
+		ops->set_vring_base(ctx, &msg.payload.state);
+		break;
+
+	case VHOST_USER_GET_VRING_BASE:
+		ret = ops->get_vring_base(ctx, msg.payload.state.index,
+			&msg.payload.state);
+		msg.size = sizeof(msg.payload.state);
+		send_vhost_message(connfd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_KICK:
+		user_set_vring_kick(ctx, &msg);
+		break;
+	case VHOST_USER_SET_VRING_CALL:
+		user_set_vring_call(ctx, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ERR:
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+		break;
+
+	default:
+		break;
+	
+	}
+}
+
+
+/**
+ * Creates and initialise the vhost server.
+ */
+int
+rte_vhost_driver_register(const char *path)
+{
+
+	struct vhost_server *vserver;
+
+	if (g_vhost_server != NULL)
+		return -1;
+
+	vserver = calloc(sizeof(struct vhost_server), 1);
+	/*TODO: all allocation is through DPDK memory allocation */
+	if (vserver == NULL)
+		return -1;
+
+	fdset_init(&vserver->fdset);
+
+	unlink(path);
+
+	vserver->listenfd = uds_socket(path);
+	if (vserver->listenfd < 0) {
+		free(vserver);
+		return -1;
+	}
+	vserver->path = path;
+
+	fdset_add(&vserver->fdset, vserver->listenfd,
+			vserver_new_vq_conn, NULL,
+			(uint64_t)(uintptr_t)vserver);
+
+	ops = get_virtio_net_callbacks();
+
+	g_vhost_server = vserver;
+
+	return 0;
+}
+
+
+int
+rte_vhost_driver_session_start(void)
+{
+	fdset_event_dispatch(&g_vhost_server->fdset);
+	return 0;
+}
+
diff --git a/lib/librte_vhost/vhost-user/vhost-net-user.h b/lib/librte_vhost/vhost-user/vhost-net-user.h
new file mode 100644
index 0000000..c9df9fa
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/vhost-net-user.h
@@ -0,0 +1,74 @@ 
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "fd_man.h"
+
+struct vhost_server {
+	const char *path; /**< The path the uds is bind to. */
+	int listenfd;     /**< The listener sockfd. */
+	struct fdset fdset; /**< The fd list this vhost server manages. */
+};
+
+/*********** FROM hw/virtio/vhost-user.c *************************************/
+
+#define VHOST_MEMORY_MAX_NREGIONS    8
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_OWNER = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+    uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    uint32_t nregions;
+    uint32_t padding;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+    union {
+#define VHOST_USER_VRING_IDX_MASK   (0xff)
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+    } payload;
+     int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute__((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    (0x1)
+
+/*****************************************************************************/
+#endif
diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.c b/lib/librte_vhost/vhost-user/virtio-net-user.c
new file mode 100644
index 0000000..f38e6cc
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/virtio-net-user.c
@@ -0,0 +1,208 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <rte_log.h>
+
+#include "virtio-net-user.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+
+extern const struct vhost_net_device_ops *ops;
+
+#if 0
+int
+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	unsigned int idx;
+	struct VhostUserMemory memory = pmsg->payload.memory;
+	struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
+	uint64_t mapped_address, base_address = 0, mem_size = 0;
+
+	for (idx = 0; idx < memory.nregions; idx++) {
+		if (memory.regions[idx].guest_phys_addr == 0)
+			base_address = memory.regions[idx].userspace_addr;
+	}
+	if (base_address == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"couldn't find the mem region whose gpa is 0.\n");
+		return -1;
+	}
+
+	for (idx = 0; idx < memory.nregions;  idx++) {
+		uint64_t size = memory.regions[idx].userspace_addr - 
+			base_address + memory.regions[idx].memory_size;
+		if (mem_size < size)
+			mem_size = size;
+	}
+
+	/*
+	 * here we assume qemu will map only one file for memory allocation,
+	 * we only use fds[0] with offset 0.
+	 */
+	mapped_address = (uint64_t)(uintptr_t)mmap(NULL, mem_size, 
+		PROT_READ | PROT_WRITE, MAP_SHARED, pmsg->fds[0], 0);
+
+	if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+		RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
+		return -1;
+	}
+			
+	for (idx = 0; idx < memory.nregions; idx++) {
+		regions[idx].guest_phys_address = 
+			memory.regions[idx].guest_phys_addr;
+		regions[idx].guest_phys_address_end = 
+			memory.regions[idx].guest_phys_addr +
+			memory.regions[idx].memory_size;
+		regions[idx].memory_size = memory.regions[idx].memory_size;
+		regions[idx].userspace_address = 
+			memory.regions[idx].userspace_addr;
+
+		regions[idx].address_offset = mapped_address - base_address + 
+			regions[idx].userspace_address -
+			regions[idx].guest_phys_address;
+		LOG_DEBUG(VHOST_CONFIG, 
+			"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
+			idx,
+			(void *)(uintptr_t)regions[idx].guest_phys_address,
+			(void *)(uintptr_t)regions[idx].userspace_address,
+			 regions[idx].memory_size);
+	}
+	ops->set_mem_table(ctx, regions, memory.nregions);
+	return 0;
+}
+
+#else
+
+int
+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	unsigned int idx;
+	struct VhostUserMemory memory = pmsg->payload.memory;
+	struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
+	uint64_t mapped_address, base_address = 0;
+
+	for (idx = 0; idx < memory.nregions; idx++) {
+		if (memory.regions[idx].guest_phys_addr == 0)
+			base_address = memory.regions[idx].userspace_addr;
+	}
+	if (base_address == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"couldn't find the mem region whose gpa is 0.\n");
+		return -1;
+	}
+
+
+	for (idx = 0; idx < memory.nregions; idx++) {
+		regions[idx].guest_phys_address = 
+			memory.regions[idx].guest_phys_addr;
+		regions[idx].guest_phys_address_end = 
+			memory.regions[idx].guest_phys_addr +
+			memory.regions[idx].memory_size;
+		regions[idx].memory_size = memory.regions[idx].memory_size;
+		regions[idx].userspace_address = 
+			memory.regions[idx].userspace_addr;
+/*
+		mapped_address = (uint64_t)(uintptr_t)mmap(NULL, 
+			regions[idx].memory_size, 
+			PROT_READ | PROT_WRITE, MAP_SHARED, 
+			pmsg->fds[idx], 
+			memory.regions[idx].mmap_offset);
+*/
+
+/* This is ugly */
+		mapped_address = (uint64_t)(uintptr_t)mmap(NULL, 
+			regions[idx].memory_size +
+				memory.regions[idx].mmap_offset, 
+			PROT_READ | PROT_WRITE, MAP_SHARED, 
+			pmsg->fds[idx], 
+			0);
+		printf("mapped to %p\n", (void *)mapped_address);
+
+		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+			RTE_LOG(ERR, VHOST_CONFIG, " mmap qemu guest failed.\n");
+			return -1;
+		}
+
+//		printf("ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset + 0x3FFFFFFF) & ~0x3FFFFFFF));
+//		printf("unaligned ret=%d\n", munmap((void *)mapped_address, (regions[idx].memory_size + memory.regions[idx].mmap_offset )  ));
+		mapped_address +=  memory.regions[idx].mmap_offset;
+
+		regions[idx].address_offset = mapped_address -
+			regions[idx].guest_phys_address;
+		LOG_DEBUG(VHOST_CONFIG, 
+			"REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n",
+			idx,
+			(void *)(uintptr_t)regions[idx].guest_phys_address,
+			(void *)(uintptr_t)regions[idx].userspace_address,
+			 regions[idx].memory_size);
+	}
+	ops->set_mem_table(ctx, regions, memory.nregions);
+	return 0;
+}
+
+
+
+
+#endif
+
+
+void
+user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG, 
+		"vring call idx:%d file:%d\n", file.index, file.fd);
+	ops->set_vring_call(ctx, &file);
+}
+
+
+void
+user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG, 
+		"vring kick idx:%d file:%d\n", file.index, file.fd);
+	ops->set_vring_kick(ctx, &file);
+}
diff --git a/lib/librte_vhost/vhost-user/virtio-net-user.h b/lib/librte_vhost/vhost-user/virtio-net-user.h
new file mode 100644
index 0000000..0969376
--- /dev/null
+++ b/lib/librte_vhost/vhost-user/virtio-net-user.h
@@ -0,0 +1,11 @@ 
+#ifndef _VIRTIO_NET_USER_H
+#define _VIRTIO_NET_USER_H
+
+#include "vhost-net.h"
+#include "vhost-net-user.h"
+
+int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
+void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
+void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
+
+#endif
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index ccfd82f..8ff0301 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -38,19 +38,14 @@ 
 #include <rte_memcpy.h>
 #include <rte_virtio_net.h>
 
-#include "vhost-net-cdev.h"
+#include "vhost-net.h"
 
-#define MAX_PKT_BURST 32
+#define VHOST_MAX_PKT_BURST 64
+#define VHOST_MAX_MRG_PKT_BURST 64
 
-/**
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when mergeable is disabled.
- */
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
-	struct rte_mbuf **pkts, uint32_t count)
+
+uint32_t
+rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count)
 {
 	struct vhost_virtqueue *vq;
 	struct vring_desc *desc;
@@ -59,26 +54,23 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint64_t buff_addr = 0;
 	uint64_t buff_hdr_addr = 0;
-	uint32_t head[MAX_PKT_BURST], packet_len = 0;
+	uint32_t head[VHOST_MAX_PKT_BURST], packet_len = 0;
 	uint32_t head_idx, packet_success = 0;
+	uint32_t mergeable, mrg_count = 0;
 	uint16_t avail_idx, res_cur_idx;
 	uint16_t res_base_idx, res_end_idx;
 	uint16_t free_entries;
 	uint8_t success = 0;
 
-	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
+	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s()\n", dev->device_fh, __func__);
 	if (unlikely(queue_id != VIRTIO_RXQ)) {
 		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
 		return 0;
 	}
 
 	vq = dev->virtqueue[VIRTIO_RXQ];
-	count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
-
-	/*
-	 * As many data cores may want access to available buffers,
-	 * they need to be reserved.
-	 */
+	count = (count > VHOST_MAX_PKT_BURST) ? VHOST_MAX_PKT_BURST : count;
+	/* As many data cores may want access to available buffers, they need to be reserved. */
 	do {
 		res_base_idx = vq->last_used_idx_res;
 		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
@@ -93,21 +85,25 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 		res_end_idx = res_base_idx + count;
 		/* vq->last_used_idx_res is atomically updated. */
-		/* TODO: Allow to disable cmpset if no concurrency in application. */
+		/* TODO: Allow to disable cmpset if no concurrency in application */
 		success = rte_atomic16_cmpset(&vq->last_used_idx_res,
 				res_base_idx, res_end_idx);
+		/* If there is contention here and failed, try again. */
 	} while (unlikely(success == 0));
 	res_cur_idx = res_base_idx;
 	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
-			dev->device_fh, res_cur_idx, res_end_idx);
+			dev->device_fh,
+			res_cur_idx, res_end_idx);
 
 	/* Prefetch available ring to retrieve indexes. */
 	rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
 
+	/* Check if the VIRTIO_NET_F_MRG_RXBUF feature is enabled. */
+	mergeable = dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);
+
 	/* Retrieve all of the head indexes first to avoid caching issues. */
 	for (head_idx = 0; head_idx < count; head_idx++)
-		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
-					(vq->size - 1)];
+		head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
 
 	/*Prefetch descriptor index. */
 	rte_prefetch0(&vq->desc[head[packet_success]]);
@@ -123,46 +119,57 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)buff_addr);
 
-		/* Copy virtio_hdr to packet and increment buffer address */
-		buff_hdr_addr = buff_addr;
-		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
-
-		/*
-		 * If the descriptors are chained the header and data are
-		 * placed in separate buffers.
-		 */
-		if (desc->flags & VRING_DESC_F_NEXT) {
-			desc->len = vq->vhost_hlen;
-			desc = &vq->desc[desc->next];
-			/* Buffer address translation. */
-			buff_addr = gpa_to_vva(dev, desc->addr);
-			desc->len = rte_pktmbuf_data_len(buff);
+		if (mergeable && (mrg_count != 0)) {
+			desc->len = packet_len = rte_pktmbuf_data_len(buff);
 		} else {
-			buff_addr += vq->vhost_hlen;
-			desc->len = packet_len;
+			/* Copy virtio_hdr to packet and increment buffer address */
+			buff_hdr_addr = buff_addr;
+			packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
+
+			/*
+			 * If the descriptors are chained the header and data are placed in
+			 * separate buffers.
+			 */
+			if (desc->flags & VRING_DESC_F_NEXT) {
+				desc->len = vq->vhost_hlen;
+				desc = &vq->desc[desc->next];
+				/* Buffer address translation. */
+				buff_addr = gpa_to_vva(dev, desc->addr);
+				desc->len = rte_pktmbuf_data_len(buff);
+			} else {
+				buff_addr += vq->vhost_hlen;
+				desc->len = packet_len;
+			}
 		}
 
+		VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, rte_pktmbuf_data_len(buff), 0);
+
 		/* Update used ring with desc information */
-		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
-							head[packet_success];
+		vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
 		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
 
 		/* Copy mbuf data to buffer */
-		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
-		rte_memcpy((void *)(uintptr_t)buff_addr,
-			rte_pktmbuf_mtod(buff, const void *),
-			rte_pktmbuf_data_len(buff));
-		PRINT_PACKET(dev, (uintptr_t)buff_addr,
-			rte_pktmbuf_data_len(buff), 0);
+		/* TODO fixme for sg mbuf and the case that desc couldn't hold the mbuf data */
+		rte_memcpy((void *)(uintptr_t)buff_addr, (const void *)buff->pkt.data, rte_pktmbuf_data_len(buff));
 
 		res_cur_idx++;
 		packet_success++;
 
-		rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
-			(const void *)&virtio_hdr, vq->vhost_hlen);
-
-		PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
-
+		/* If mergeable is disabled then a header is required per buffer. */
+		if (!mergeable) {
+			rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
+			VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
+		} else {
+			mrg_count++;
+			/* Merge buffer can only handle so many buffers at a time. Tell the guest if this limit is reached. */
+			if ((mrg_count == VHOST_MAX_MRG_PKT_BURST) || (res_cur_idx == res_end_idx)) {
+				virtio_hdr.num_buffers = mrg_count;
+				LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", dev->device_fh, virtio_hdr.num_buffers);
+				rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
+				VHOST_PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
+				mrg_count = 0;
+			}
+		}
 		if (res_cur_idx < res_end_idx) {
 			/* Prefetch descriptor index. */
 			rte_prefetch0(&vq->desc[head[packet_success]]);
@@ -184,357 +191,18 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	return count;
 }
 
-static inline uint32_t __attribute__((always_inline))
-copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
-	uint16_t res_end_idx, struct rte_mbuf *pkt)
-{
-	uint32_t vec_idx = 0;
-	uint32_t entry_success = 0;
-	struct vhost_virtqueue *vq;
-	/* The virtio_hdr is initialised to 0. */
-	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
-		{0, 0, 0, 0, 0, 0}, 0};
-	uint16_t cur_idx = res_base_idx;
-	uint64_t vb_addr = 0;
-	uint64_t vb_hdr_addr = 0;
-	uint32_t seg_offset = 0;
-	uint32_t vb_offset = 0;
-	uint32_t seg_avail;
-	uint32_t vb_avail;
-	uint32_t cpy_len, entry_len;
-
-	if (pkt == NULL)
-		return 0;
-
-	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
-		"End Index %d\n",
-		dev->device_fh, cur_idx, res_end_idx);
-
-	/*
-	 * Convert from gpa to vva
-	 * (guest physical addr -> vhost virtual addr)
-	 */
-	vq = dev->virtqueue[VIRTIO_RXQ];
-	vb_addr =
-		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
-	vb_hdr_addr = vb_addr;
-
-	/* Prefetch buffer address. */
-	rte_prefetch0((void *)(uintptr_t)vb_addr);
-
-	virtio_hdr.num_buffers = res_end_idx - res_base_idx;
-
-	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
-		dev->device_fh, virtio_hdr.num_buffers);
 
-	rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
-		(const void *)&virtio_hdr, vq->vhost_hlen);
-
-	PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
-
-	seg_avail = rte_pktmbuf_data_len(pkt);
-	vb_offset = vq->vhost_hlen;
-	vb_avail =
-		vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
-
-	entry_len = vq->vhost_hlen;
-
-	if (vb_avail == 0) {
-		uint32_t desc_idx =
-			vq->buf_vec[vec_idx].desc_idx;
-		vq->desc[desc_idx].len = vq->vhost_hlen;
-
-		if ((vq->desc[desc_idx].flags
-			& VRING_DESC_F_NEXT) == 0) {
-			/* Update used ring with desc information */
-			vq->used->ring[cur_idx & (vq->size - 1)].id
-				= vq->buf_vec[vec_idx].desc_idx;
-			vq->used->ring[cur_idx & (vq->size - 1)].len
-				= entry_len;
-
-			entry_len = 0;
-			cur_idx++;
-			entry_success++;
-		}
-
-		vec_idx++;
-		vb_addr =
-			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
-
-		/* Prefetch buffer address. */
-		rte_prefetch0((void *)(uintptr_t)vb_addr);
-		vb_offset = 0;
-		vb_avail = vq->buf_vec[vec_idx].buf_len;
-	}
-
-	cpy_len = RTE_MIN(vb_avail, seg_avail);
-
-	while (cpy_len > 0) {
-		/* Copy mbuf data to vring buffer */
-		rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
-			(const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
-			cpy_len);
-
-		PRINT_PACKET(dev,
-			(uintptr_t)(vb_addr + vb_offset),
-			cpy_len, 0);
-
-		seg_offset += cpy_len;
-		vb_offset += cpy_len;
-		seg_avail -= cpy_len;
-		vb_avail -= cpy_len;
-		entry_len += cpy_len;
-
-		if (seg_avail != 0) {
-			/*
-			 * The virtio buffer in this vring
-			 * entry reach to its end.
-			 * But the segment doesn't complete.
-			 */
-			if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
-				VRING_DESC_F_NEXT) == 0) {
-				/* Update used ring with desc information */
-				vq->used->ring[cur_idx & (vq->size - 1)].id
-					= vq->buf_vec[vec_idx].desc_idx;
-				vq->used->ring[cur_idx & (vq->size - 1)].len
-					= entry_len;
-				entry_len = 0;
-				cur_idx++;
-				entry_success++;
-			}
-
-			vec_idx++;
-			vb_addr = gpa_to_vva(dev,
-				vq->buf_vec[vec_idx].buf_addr);
-			vb_offset = 0;
-			vb_avail = vq->buf_vec[vec_idx].buf_len;
-			cpy_len = RTE_MIN(vb_avail, seg_avail);
-		} else {
-			/*
-			 * This current segment complete, need continue to
-			 * check if the whole packet complete or not.
-			 */
-			pkt = pkt->next;
-			if (pkt != NULL) {
-				/*
-				 * There are more segments.
-				 */
-				if (vb_avail == 0) {
-					/*
-					 * This current buffer from vring is
-					 * used up, need fetch next buffer
-					 * from buf_vec.
-					 */
-					uint32_t desc_idx =
-						vq->buf_vec[vec_idx].desc_idx;
-					vq->desc[desc_idx].len = vb_offset;
-
-					if ((vq->desc[desc_idx].flags &
-						VRING_DESC_F_NEXT) == 0) {
-						uint16_t wrapped_idx =
-							cur_idx & (vq->size - 1);
-						/*
-						 * Update used ring with the
-						 * descriptor information
-						 */
-						vq->used->ring[wrapped_idx].id
-							= desc_idx;
-						vq->used->ring[wrapped_idx].len
-							= entry_len;
-						entry_success++;
-						entry_len = 0;
-						cur_idx++;
-					}
-
-					/* Get next buffer from buf_vec. */
-					vec_idx++;
-					vb_addr = gpa_to_vva(dev,
-						vq->buf_vec[vec_idx].buf_addr);
-					vb_avail =
-						vq->buf_vec[vec_idx].buf_len;
-					vb_offset = 0;
-				}
-
-				seg_offset = 0;
-				seg_avail = rte_pktmbuf_data_len(pkt);
-				cpy_len = RTE_MIN(vb_avail, seg_avail);
-			} else {
-				/*
-				 * This whole packet completes.
-				 */
-				uint32_t desc_idx =
-					vq->buf_vec[vec_idx].desc_idx;
-				vq->desc[desc_idx].len = vb_offset;
-
-				while (vq->desc[desc_idx].flags &
-					VRING_DESC_F_NEXT) {
-					desc_idx = vq->desc[desc_idx].next;
-					 vq->desc[desc_idx].len = 0;
-				}
-
-				/* Update used ring with desc information */
-				vq->used->ring[cur_idx & (vq->size - 1)].id
-					= vq->buf_vec[vec_idx].desc_idx;
-				vq->used->ring[cur_idx & (vq->size - 1)].len
-					= entry_len;
-				entry_len = 0;
-				cur_idx++;
-				entry_success++;
-				seg_avail = 0;
-				cpy_len = RTE_MIN(vb_avail, seg_avail);
-			}
-		}
-	}
-
-	return entry_success;
-}
-
-/*
- * This function works for mergeable RX.
- */
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
-	struct rte_mbuf **pkts, uint32_t count)
+uint32_t
+rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint32_t count)
 {
-	struct vhost_virtqueue *vq;
-	uint32_t pkt_idx = 0, entry_success = 0;
-	uint16_t avail_idx, res_cur_idx;
-	uint16_t res_base_idx, res_end_idx;
-	uint8_t success = 0;
-
-	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
-		dev->device_fh);
-	if (unlikely(queue_id != VIRTIO_RXQ)) {
-		LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
-	}
-
-	vq = dev->virtqueue[VIRTIO_RXQ];
-	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
-
-	if (count == 0)
-		return 0;
-
-	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
-		uint32_t secure_len = 0;
-		uint16_t need_cnt;
-		uint32_t vec_idx = 0;
-		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
-		uint16_t i, id;
-
-		do {
-			/*
-			 * As many data cores may want access to available
-			 * buffers, they need to be reserved.
-			 */
-			res_base_idx = vq->last_used_idx_res;
-			res_cur_idx = res_base_idx;
-
-			do {
-				avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-				if (unlikely(res_cur_idx == avail_idx)) {
-					LOG_DEBUG(VHOST_DATA,
-						"(%"PRIu64") Failed "
-						"to get enough desc from "
-						"vring\n",
-						dev->device_fh);
-					return pkt_idx;
-				} else {
-					uint16_t wrapped_idx =
-						(res_cur_idx) & (vq->size - 1);
-					uint32_t idx =
-						vq->avail->ring[wrapped_idx];
-					uint8_t next_desc;
-
-					do {
-						next_desc = 0;
-						secure_len += vq->desc[idx].len;
-						if (vq->desc[idx].flags &
-							VRING_DESC_F_NEXT) {
-							idx = vq->desc[idx].next;
-							next_desc = 1;
-						}
-					} while (next_desc);
-
-					res_cur_idx++;
-				}
-			} while (pkt_len > secure_len);
-
-			/* vq->last_used_idx_res is atomically updated. */
-			success = rte_atomic16_cmpset(&vq->last_used_idx_res,
-							res_base_idx,
-							res_cur_idx);
-		} while (success == 0);
-
-		id = res_base_idx;
-		need_cnt = res_cur_idx - res_base_idx;
-
-		for (i = 0; i < need_cnt; i++, id++) {
-			uint16_t wrapped_idx = id & (vq->size - 1);
-			uint32_t idx = vq->avail->ring[wrapped_idx];
-			uint8_t next_desc;
-			do {
-				next_desc = 0;
-				vq->buf_vec[vec_idx].buf_addr =
-					vq->desc[idx].addr;
-				vq->buf_vec[vec_idx].buf_len =
-					vq->desc[idx].len;
-				vq->buf_vec[vec_idx].desc_idx = idx;
-				vec_idx++;
-
-				if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
-					idx = vq->desc[idx].next;
-					next_desc = 1;
-				}
-			} while (next_desc);
-		}
-
-		res_end_idx = res_cur_idx;
-
-		entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
-			res_end_idx, pkts[pkt_idx]);
-
-		rte_compiler_barrier();
-
-		/*
-		 * Wait until it's our turn to add our buffer
-		 * to the used ring.
-		 */
-		while (unlikely(vq->last_used_idx != res_base_idx))
-			rte_pause();
-
-		*(volatile uint16_t *)&vq->used->idx += entry_success;
-		vq->last_used_idx = res_end_idx;
-
-		/* Kick the guest if necessary. */
-		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-			eventfd_write((int)vq->kickfd, 1);
-	}
-
-	return count;
-}
-
-uint16_t
-rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
-	struct rte_mbuf **pkts, uint16_t count)
-{
-	if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
-		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-	else
-		return virtio_dev_rx(dev, queue_id, pkts, count);
-}
-
-uint16_t
-rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
-	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
-{
-	struct rte_mbuf *m, *prev;
+	struct rte_mbuf *mbuf;
 	struct vhost_virtqueue *vq;
 	struct vring_desc *desc;
-	uint64_t vb_addr = 0;
-	uint32_t head[MAX_PKT_BURST];
+	uint64_t buff_addr = 0;
+	uint32_t head[VHOST_MAX_PKT_BURST];
 	uint32_t used_idx;
 	uint32_t i;
-	uint16_t free_entries, entry_success = 0;
+	uint16_t free_entries, packet_success = 0;
 	uint16_t avail_idx;
 
 	if (unlikely(queue_id != VIRTIO_TXQ)) {
@@ -549,8 +217,8 @@  rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 	if (vq->last_used_idx == avail_idx)
 		return 0;
 
-	LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
-		dev->device_fh);
+	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s(%d->%d)\n", 
+		dev->device_fh, __func__, vq->last_used_idx, avail_idx);
 
 	/* Prefetch available ring to retrieve head indexes. */
 	rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
@@ -558,173 +226,68 @@  rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 	/*get the number of free entries in the ring*/
 	free_entries = (avail_idx - vq->last_used_idx);
 
-	free_entries = RTE_MIN(free_entries, count);
+	if (free_entries > count)
+		free_entries = count;
 	/* Limit to MAX_PKT_BURST. */
-	free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
+	if (free_entries > VHOST_MAX_PKT_BURST)
+		free_entries = VHOST_MAX_PKT_BURST;
 
-	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
-			dev->device_fh, free_entries);
+	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
 	/* Retrieve all of the head indexes first to avoid caching issues. */
 	for (i = 0; i < free_entries; i++)
 		head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
 
 	/* Prefetch descriptor index. */
-	rte_prefetch0(&vq->desc[head[entry_success]]);
+	rte_prefetch0(&vq->desc[head[packet_success]]);
 	rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
 
-	while (entry_success < free_entries) {
-		uint32_t vb_avail, vb_offset;
-		uint32_t seg_avail, seg_offset;
-		uint32_t cpy_len;
-		uint32_t seg_num = 0;
-		struct rte_mbuf *cur;
-		uint8_t alloc_err = 0;
-
-		desc = &vq->desc[head[entry_success]];
+	while (packet_success < free_entries) {
+		desc = &vq->desc[head[packet_success]];
 
 		/* Discard first buffer as it is the virtio header */
 		desc = &vq->desc[desc->next];
 
 		/* Buffer address translation. */
-		vb_addr = gpa_to_vva(dev, desc->addr);
+		buff_addr = gpa_to_vva(dev, desc->addr);
 		/* Prefetch buffer address. */
-		rte_prefetch0((void *)(uintptr_t)vb_addr);
+		rte_prefetch0((void *)(uintptr_t)buff_addr);
 
 		used_idx = vq->last_used_idx & (vq->size - 1);
 
-		if (entry_success < (free_entries - 1)) {
+		if (packet_success < (free_entries - 1)) {
 			/* Prefetch descriptor index. */
-			rte_prefetch0(&vq->desc[head[entry_success+1]]);
+			rte_prefetch0(&vq->desc[head[packet_success+1]]);
 			rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
 		}
 
 		/* Update used index buffer information. */
-		vq->used->ring[used_idx].id = head[entry_success];
+		vq->used->ring[used_idx].id = head[packet_success];
 		vq->used->ring[used_idx].len = 0;
 
-		vb_offset = 0;
-		vb_avail = desc->len;
-		/* Allocate an mbuf and populate the structure. */
-		m = rte_pktmbuf_alloc(mbuf_pool);
-		if (unlikely(m == NULL)) {
-			RTE_LOG(ERR, VHOST_DATA,
-				"Failed to allocate memory for mbuf.\n");
-			return entry_success;
+		mbuf = rte_pktmbuf_alloc(mbuf_pool);
+		if (unlikely(mbuf == NULL)) {
+			RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n");
+			return packet_success;
 		}
-		seg_offset = 0;
-		seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
-		cpy_len = RTE_MIN(vb_avail, seg_avail);
-
-		PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
-
-		seg_num++;
-		cur = m;
-		prev = m;
-		while (cpy_len != 0) {
-			rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
-				(void *)((uintptr_t)(vb_addr + vb_offset)),
-				cpy_len);
-
-			seg_offset += cpy_len;
-			vb_offset += cpy_len;
-			vb_avail -= cpy_len;
-			seg_avail -= cpy_len;
-
-			if (vb_avail != 0) {
-				/*
-				 * The segment reachs to its end,
-				 * while the virtio buffer in TX vring has
-				 * more data to be copied.
-				 */
-				cur->data_len = seg_offset;
-				m->pkt_len += seg_offset;
-				/* Allocate mbuf and populate the structure. */
-				cur = rte_pktmbuf_alloc(mbuf_pool);
-				if (unlikely(cur == NULL)) {
-					RTE_LOG(ERR, VHOST_DATA, "Failed to "
-						"allocate memory for mbuf.\n");
-					rte_pktmbuf_free(m);
-					alloc_err = 1;
-					break;
-				}
-
-				seg_num++;
-				prev->next = cur;
-				prev = cur;
-				seg_offset = 0;
-				seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
-			} else {
-				if (desc->flags & VRING_DESC_F_NEXT) {
-					/*
-					 * There are more virtio buffers in
-					 * same vring entry need to be copied.
-					 */
-					if (seg_avail == 0) {
-						/*
-						 * The current segment hasn't
-						 * room to accomodate more
-						 * data.
-						 */
-						cur->data_len = seg_offset;
-						m->pkt_len += seg_offset;
-						/*
-						 * Allocate an mbuf and
-						 * populate the structure.
-						 */
-						cur = rte_pktmbuf_alloc(mbuf_pool);
-						if (unlikely(cur == NULL)) {
-							RTE_LOG(ERR,
-								VHOST_DATA,
-								"Failed to "
-								"allocate memory "
-								"for mbuf\n");
-							rte_pktmbuf_free(m);
-							alloc_err = 1;
-							break;
-						}
-						seg_num++;
-						prev->next = cur;
-						prev = cur;
-						seg_offset = 0;
-						seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
-					}
-
-					desc = &vq->desc[desc->next];
-
-					/* Buffer address translation. */
-					vb_addr = gpa_to_vva(dev, desc->addr);
-					/* Prefetch buffer address. */
-					rte_prefetch0((void *)(uintptr_t)vb_addr);
-					vb_offset = 0;
-					vb_avail = desc->len;
-
-					PRINT_PACKET(dev, (uintptr_t)vb_addr,
-						desc->len, 0);
-				} else {
-					/* The whole packet completes. */
-					cur->data_len = seg_offset;
-					m->pkt_len += seg_offset;
-					vb_avail = 0;
-				}
-			}
+		mbuf->pkt.data_len = desc->len;
+		mbuf->pkt.pkt_len  = mbuf->pkt.data_len;
 
-			cpy_len = RTE_MIN(vb_avail, seg_avail);
-		}
+		rte_memcpy((void *) mbuf->pkt.data,
+			(const void *) buff_addr, mbuf->pkt.data_len);
 
-		if (unlikely(alloc_err == 1))
-			break;
+		pkts[packet_success] = mbuf;
 
-		m->nb_segs = seg_num;
+		VHOST_PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
 
-		pkts[entry_success] = m;
 		vq->last_used_idx++;
-		entry_success++;
+		packet_success++;
 	}
 
 	rte_compiler_barrier();
-	vq->used->idx += entry_success;
+	vq->used->idx += packet_success;
 	/* Kick guest if required. */
 	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
 		eventfd_write((int)vq->kickfd, 1);
-	return entry_success;
+
+	return packet_success;
 }
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 852b6d1..516e743 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -31,17 +31,14 @@ 
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <dirent.h>
-#include <fuse/cuse_lowlevel.h>
 #include <linux/vhost.h>
 #include <linux/virtio_net.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
-#include <sys/eventfd.h>
-#include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <unistd.h>
+#include <assert.h>
 
 #include <rte_ethdev.h>
 #include <rte_log.h>
@@ -49,10 +46,8 @@ 
 #include <rte_memory.h>
 #include <rte_virtio_net.h>
 
-#include "vhost-net-cdev.h"
-#include "eventfd_link/eventfd_link.h"
-
-/*
+#include "vhost-net.h"
+/**
  * Device linked list structure for configuration.
  */
 struct virtio_net_config_ll {
@@ -60,38 +55,15 @@  struct virtio_net_config_ll {
 	struct virtio_net_config_ll *next;	/* Next dev on linked list.*/
 };
 
-const char eventfd_cdev[] = "/dev/eventfd-link";
-
-/* device ops to add/remove device to/from data core. */
+/* device ops to add/remove device to data core. */
 static struct virtio_net_device_ops const *notify_ops;
-/* root address of the linked list of managed virtio devices */
+/* root address of the linked list in the configuration core. */
 static struct virtio_net_config_ll *ll_root;
 
 /* Features supported by this lib. */
-#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
-				  (1ULL << VIRTIO_NET_F_CTRL_RX))
+#define VHOST_SUPPORTED_FEATURES (1ULL << VIRTIO_NET_F_MRG_RXBUF)
 static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
 
-/* Line size for reading maps file. */
-static const uint32_t BUFSIZE = PATH_MAX;
-
-/* Size of prot char array in procmap. */
-#define PROT_SZ 5
-
-/* Number of elements in procmap struct. */
-#define PROCMAP_SZ 8
-
-/* Structure containing information gathered from maps file. */
-struct procmap {
-	uint64_t va_start;	/* Start virtual address in file. */
-	uint64_t len;		/* Size of file. */
-	uint64_t pgoff;		/* Not used. */
-	uint32_t maj;		/* Not used. */
-	uint32_t min;		/* Not used. */
-	uint32_t ino;		/* Not used. */
-	char prot[PROT_SZ];	/* Not used. */
-	char fname[PATH_MAX];	/* File name. */
-};
 
 /*
  * Converts QEMU virtual address to Vhost virtual address. This function is
@@ -110,199 +82,15 @@  qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
 		if ((qemu_va >= region->userspace_address) &&
 			(qemu_va <= region->userspace_address +
 			region->memory_size)) {
-			vhost_va = dev->mem->mapped_address + qemu_va -
-					dev->mem->base_address;
+			vhost_va = qemu_va +  region->guest_phys_address + 
+				region->address_offset -
+				region->userspace_address;
 			break;
 		}
 	}
 	return vhost_va;
 }
 
-/*
- * Locate the file containing QEMU's memory space and
- * map it to our address space.
- */
-static int
-host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
-	pid_t pid, uint64_t addr)
-{
-	struct dirent *dptr = NULL;
-	struct procmap procmap;
-	DIR *dp = NULL;
-	int fd;
-	int i;
-	char memfile[PATH_MAX];
-	char mapfile[PATH_MAX];
-	char procdir[PATH_MAX];
-	char resolved_path[PATH_MAX];
-	char *path = NULL;
-	FILE *fmap;
-	void *map;
-	uint8_t found = 0;
-	char line[BUFSIZE];
-	char dlm[] = "-   :   ";
-	char *str, *sp, *in[PROCMAP_SZ];
-	char *end = NULL;
-
-	/* Path where mem files are located. */
-	snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
-	/* Maps file used to locate mem file. */
-	snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
-
-	fmap = fopen(mapfile, "r");
-	if (fmap == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to open maps file for pid %d\n",
-			dev->device_fh, pid);
-		return -1;
-	}
-
-	/* Read through maps file until we find out base_address. */
-	while (fgets(line, BUFSIZE, fmap) != 0) {
-		str = line;
-		errno = 0;
-		/* Split line into fields. */
-		for (i = 0; i < PROCMAP_SZ; i++) {
-			in[i] = strtok_r(str, &dlm[i], &sp);
-			if ((in[i] == NULL) || (errno != 0)) {
-				fclose(fmap);
-				return -1;
-			}
-			str = NULL;
-		}
-
-		/* Convert/Copy each field as needed. */
-		procmap.va_start = strtoull(in[0], &end, 16);
-		if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.len = strtoull(in[1], &end, 16);
-		if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.pgoff = strtoull(in[3], &end, 16);
-		if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.maj = strtoul(in[4], &end, 16);
-		if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.min = strtoul(in[5], &end, 16);
-		if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.ino = strtoul(in[6], &end, 16);
-		if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		memcpy(&procmap.prot, in[2], PROT_SZ);
-		memcpy(&procmap.fname, in[7], PATH_MAX);
-
-		if (procmap.va_start == addr) {
-			procmap.len = procmap.len - procmap.va_start;
-			found = 1;
-			break;
-		}
-	}
-	fclose(fmap);
-
-	if (!found) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to find memory file in pid %d maps file\n",
-			dev->device_fh, pid);
-		return -1;
-	}
-
-	/* Find the guest memory file among the process fds. */
-	dp = opendir(procdir);
-	if (dp == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Cannot open pid %d process directory\n",
-			dev->device_fh, pid);
-		return -1;
-	}
-
-	found = 0;
-
-	/* Read the fd directory contents. */
-	while (NULL != (dptr = readdir(dp))) {
-		snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
-				pid, dptr->d_name);
-		path = realpath(memfile, resolved_path);
-		if ((path == NULL) && (strlen(resolved_path) == 0)) {
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"(%"PRIu64") Failed to resolve fd directory\n",
-				dev->device_fh);
-			closedir(dp);
-			return -1;
-		}
-		if (strncmp(resolved_path, procmap.fname,
-			strnlen(procmap.fname, PATH_MAX)) == 0) {
-			found = 1;
-			break;
-		}
-	}
-
-	closedir(dp);
-
-	if (found == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to find memory file for pid %d\n",
-			dev->device_fh, pid);
-		return -1;
-	}
-	/* Open the shared memory file and map the memory into this process. */
-	fd = open(memfile, O_RDWR);
-
-	if (fd == -1) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to open %s for pid %d\n",
-			dev->device_fh, memfile, pid);
-		return -1;
-	}
-
-	map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
-		MAP_POPULATE|MAP_SHARED, fd, 0);
-	close(fd);
-
-	if (map == MAP_FAILED) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Error mapping the file %s for pid %d\n",
-			dev->device_fh, memfile, pid);
-		return -1;
-	}
-
-	/* Store the memory address and size in the device data structure */
-	mem->mapped_address = (uint64_t)(uintptr_t)map;
-	mem->mapped_size = procmap.len;
-
-	LOG_DEBUG(VHOST_CONFIG,
-		"(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
-		dev->device_fh,
-		memfile, resolved_path,
-		(unsigned long long)mem->mapped_size, map);
-
-	return 0;
-}
 
 /*
  * Retrieves an entry from the devices configuration linked list.
@@ -376,7 +164,7 @@  add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
 	}
 
 }
-
+/*TODO dpdk alloc/free if possible */
 /*
  * Unmap any memory, close any file descriptors and
  * free any memory owned by a device.
@@ -389,16 +177,17 @@  cleanup_device(struct virtio_net *dev)
 		munmap((void *)(uintptr_t)dev->mem->mapped_address,
 			(size_t)dev->mem->mapped_size);
 		free(dev->mem);
+		dev->mem = NULL;
 	}
 
 	/* Close any event notifiers opened by device. */
-	if (dev->virtqueue[VIRTIO_RXQ]->callfd)
+	if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
 		close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
-	if (dev->virtqueue[VIRTIO_RXQ]->kickfd)
+	if (dev->virtqueue[VIRTIO_RXQ]->kickfd > 0)
 		close((int)dev->virtqueue[VIRTIO_RXQ]->kickfd);
-	if (dev->virtqueue[VIRTIO_TXQ]->callfd)
+	if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
 		close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
-	if (dev->virtqueue[VIRTIO_TXQ]->kickfd)
+	if (dev->virtqueue[VIRTIO_TXQ]->kickfd > 0)
 		close((int)dev->virtqueue[VIRTIO_TXQ]->kickfd);
 }
 
@@ -522,8 +311,8 @@  new_device(struct vhost_device_ctx ctx)
 }
 
 /*
- * Function is called from the CUSE release function. This function will
- * cleanup the device and remove it from device configuration linked list.
+ * Function is called from the CUSE release function. This function will cleanup
+ * the device and remove it from device configuration linked list.
  */
 static void
 destroy_device(struct vhost_device_ctx ctx)
@@ -569,6 +358,7 @@  set_owner(struct vhost_device_ctx ctx)
 		return -1;
 
 	return 0;
+	/* TODO check ctx.fh is meaningfull here */
 }
 
 /*
@@ -651,14 +441,12 @@  set_features(struct vhost_device_ctx ctx, uint64_t *pu)
  * This includes storing offsets used to translate buffer addresses.
  */
 static int
-set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
-	uint32_t nregions)
+set_mem_table(struct vhost_device_ctx ctx,
+	const struct virtio_memory_regions *regions, uint32_t nregions)
 {
 	struct virtio_net *dev;
-	struct vhost_memory_region *mem_regions;
 	struct virtio_memory *mem;
-	uint64_t size = offsetof(struct vhost_memory, regions);
-	uint32_t regionidx, valid_regions;
+	uint32_t regionidx;
 
 	dev = get_device(ctx);
 	if (dev == NULL)
@@ -682,107 +470,24 @@  set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
 
 	mem->nregions = nregions;
 
-	mem_regions = (void *)(uintptr_t)
-			((uint64_t)(uintptr_t)mem_regions_addr + size);
-
 	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
 		/* Populate the region structure for each region. */
-		mem->regions[regionidx].guest_phys_address =
-			mem_regions[regionidx].guest_phys_addr;
-		mem->regions[regionidx].guest_phys_address_end =
-			mem->regions[regionidx].guest_phys_address +
-			mem_regions[regionidx].memory_size;
-		mem->regions[regionidx].memory_size =
-			mem_regions[regionidx].memory_size;
-		mem->regions[regionidx].userspace_address =
-			mem_regions[regionidx].userspace_addr;
-
-		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
-			regionidx,
-			(void *)(uintptr_t)mem->regions[regionidx].guest_phys_address,
-			(void *)(uintptr_t)mem->regions[regionidx].userspace_address,
-			mem->regions[regionidx].memory_size);
-
-		/*set the base address mapping*/
+		mem->regions[regionidx] = regions[regionidx];
 		if (mem->regions[regionidx].guest_phys_address == 0x0) {
 			mem->base_address =
 				mem->regions[regionidx].userspace_address;
-			/* Map VM memory file */
-			if (host_memory_map(dev, mem, ctx.pid,
-				mem->base_address) != 0) {
-				free(mem);
-				return -1;
-			}
+			mem->mapped_address = 
+				mem->regions[regionidx].address_offset;
 		}
 	}
 
-	/* Check that we have a valid base address. */
-	if (mem->base_address == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base address of qemu memory file.\n", dev->device_fh);
-		free(mem);
-		return -1;
-	}
-
-	/*
-	 * Check if all of our regions have valid mappings.
-	 * Usually one does not exist in the QEMU memory file.
-	 */
-	valid_regions = mem->nregions;
-	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
-		if ((mem->regions[regionidx].userspace_address <
-			mem->base_address) ||
-			(mem->regions[regionidx].userspace_address >
-			(mem->base_address + mem->mapped_size)))
-				valid_regions--;
-	}
-
-	/*
-	 * If a region does not have a valid mapping,
-	 * we rebuild our memory struct to contain only valid entries.
-	 */
-	if (valid_regions != mem->nregions) {
-		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\n",
-			dev->device_fh);
-
-		/*
-		 * Re-populate the memory structure with only valid regions.
-		 * Invalid regions are over-written with memmove.
-		 */
-		valid_regions = 0;
-
-		for (regionidx = mem->nregions; 0 != regionidx--;) {
-			if ((mem->regions[regionidx].userspace_address <
-				mem->base_address) ||
-				(mem->regions[regionidx].userspace_address >
-				(mem->base_address + mem->mapped_size))) {
-				memmove(&mem->regions[regionidx],
-					&mem->regions[regionidx + 1],
-					sizeof(struct virtio_memory_regions) *
-						valid_regions);
-			} else {
-				valid_regions++;
-			}
-		}
-	}
-	mem->nregions = valid_regions;
+	/*TODO addback the logic that remove invalid memory regions */
 	dev->mem = mem;
 
-	/*
-	 * Calculate the address offset for each region.
-	 * This offset is used to identify the vhost virtual address
-	 * corresponding to a QEMU guest physical address.
-	 */
-	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
-		dev->mem->regions[regionidx].address_offset =
-			dev->mem->regions[regionidx].userspace_address -
-				dev->mem->base_address +
-				dev->mem->mapped_address -
-				dev->mem->regions[regionidx].guest_phys_address;
-
-	}
 	return 0;
 }
 
+
 /*
  * Called from CUSE IOCTL: VHOST_SET_VRING_NUM
  * The virtio device sends us the size of the descriptor ring.
@@ -896,38 +601,62 @@  get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
 	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
 	state->num = dev->virtqueue[state->index]->last_used_idx;
 
-	return 0;
-}
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		RTE_LOG(INFO, VHOST_CONFIG, 
+			"get_vring_base message is for release\n");
+		notify_ops->destroy_device(dev);
+		/*
+		 * sync call.
+		 * when it returns, it means it si removed from data core.
+		 */
+	}
+	/* TODO fix all munmap */
+	if (dev->mem) {
+		munmap((void *)(uintptr_t)dev->mem->mapped_address,
+			(size_t)dev->mem->mapped_size);
+		free(dev->mem);
+		dev->mem = NULL;
+	}
 
-/*
- * This function uses the eventfd_link kernel module to copy an eventfd file
- * descriptor provided by QEMU in to our process space.
- */
-static int
-eventfd_copy(struct virtio_net *dev, struct eventfd_copy *eventfd_copy)
-{
-	int eventfd_link, ret;
 
-	/* Open the character device to the kernel module. */
-	eventfd_link = open(eventfd_cdev, O_RDWR);
-	if (eventfd_link < 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") eventfd_link module is not loaded\n",
-			dev->device_fh);
-		return -1;
-	}
+	if (dev->virtqueue[VIRTIO_RXQ]->callfd > 0)
+		close((int)dev->virtqueue[VIRTIO_RXQ]->callfd);
+	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
+	if (dev->virtqueue[VIRTIO_TXQ]->callfd > 0)
+		close((int)dev->virtqueue[VIRTIO_TXQ]->callfd);
+	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
+	/* We don't cleanup callfd here as we willn't get CALLFD again */
+	
+	dev->virtqueue[VIRTIO_RXQ]->desc = NULL;
+	dev->virtqueue[VIRTIO_RXQ]->avail = NULL;
+	dev->virtqueue[VIRTIO_RXQ]->used = NULL;
+	dev->virtqueue[VIRTIO_RXQ]->last_used_idx = 0;
+	dev->virtqueue[VIRTIO_RXQ]->last_used_idx_res = 0;
+
+	dev->virtqueue[VIRTIO_TXQ]->desc = NULL;
+	dev->virtqueue[VIRTIO_TXQ]->avail = NULL;
+	dev->virtqueue[VIRTIO_TXQ]->used = NULL;
+	dev->virtqueue[VIRTIO_TXQ]->last_used_idx = 0;
+	dev->virtqueue[VIRTIO_TXQ]->last_used_idx_res = 0;
 
-	/* Call the IOCTL to copy the eventfd. */
-	ret = ioctl(eventfd_link, EVENTFD_COPY, eventfd_copy);
-	close(eventfd_link);
 
-	if (ret < 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") EVENTFD_COPY ioctl failed\n",
-			dev->device_fh);
-		return -1;
-	}
+	return 0;
+}
 
+static int
+virtio_is_ready(struct virtio_net *dev, int index)
+{
+	struct vhost_virtqueue *vq1, *vq2;
+	/* mq support in future.*/
+	vq1 = dev->virtqueue[index];
+	vq2 = dev->virtqueue[index ^ 1];
+	if (vq1 && vq2 && vq1->desc && vq2->desc && 
+		(vq1->kickfd > 0) && (vq1->callfd > 0) &&
+		(vq2->kickfd > 0) && (vq2->callfd > 0)) {
+		LOG_DEBUG(VHOST_CONFIG, "virtio is ready for processing.\n");
+		return 1;
+	}
+	LOG_DEBUG(VHOST_CONFIG, "virtio isn't ready for processing.\n");
 	return 0;
 }
 
@@ -940,7 +669,6 @@  static int
 set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 {
 	struct virtio_net *dev;
-	struct eventfd_copy	eventfd_kick;
 	struct vhost_virtqueue *vq;
 
 	dev = get_device(ctx);
@@ -953,14 +681,7 @@  set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 	if (vq->kickfd)
 		close((int)vq->kickfd);
 
-	/* Populate the eventfd_copy structure and call eventfd_copy. */
-	vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-	eventfd_kick.source_fd = vq->kickfd;
-	eventfd_kick.target_fd = file->fd;
-	eventfd_kick.target_pid = ctx.pid;
-
-	if (eventfd_copy(dev, &eventfd_kick))
-		return -1;
+	vq->kickfd = file->fd;
 
 	return 0;
 }
@@ -974,7 +695,6 @@  static int
 set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 {
 	struct virtio_net *dev;
-	struct eventfd_copy eventfd_call;
 	struct vhost_virtqueue *vq;
 
 	dev = get_device(ctx);
@@ -986,16 +706,11 @@  set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 
 	if (vq->callfd)
 		close((int)vq->callfd);
+	vq->callfd = file->fd;
 
-	/* Populate the eventfd_copy structure and call eventfd_copy. */
-	vq->callfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-	eventfd_call.source_fd = vq->callfd;
-	eventfd_call.target_fd = file->fd;
-	eventfd_call.target_pid = ctx.pid;
-
-	if (eventfd_copy(dev, &eventfd_call))
-		return -1;
-
+	if (virtio_is_ready(dev, file->index) &&
+		!(dev->flags & VIRTIO_DEV_RUNNING))
+			notify_ops->new_device(dev);
 	return 0;
 }
 
@@ -1024,6 +739,7 @@  set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 	 * If the device isn't already running and both backend fds are set,
 	 * we add the device.
 	 */
+	LOG_DEBUG(VHOST_CONFIG, "%s %d\n", __func__, file->fd);
 	if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
 		if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&
 			((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED))