[dpdk-dev,RFC,7/7] lib/librte_vhost: Add vhost-user implementation

Message ID 1415272471-3299-8-git-send-email-mukawa@igel.co.jp (mailing list archive)
State RFC, archived
Headers

Commit Message

Tetsuya Mukawa Nov. 6, 2014, 11:14 a.m. UTC
  This patch adds vhost-user implementation to librte_vhost.
To communicate with vhost-user of QEMU, speficy VHOST_DRV_USER as
a vhost_driver_type_t variable in rte_vhost_driver_register().

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_vhost/rte_virtio_net.h  |  19 +-
 lib/librte_vhost/vhost-net-user.c  | 541 +++++++++++++++++++++++++++++++++++++
 lib/librte_vhost/vhost-net.c       |  39 ++-
 lib/librte_vhost/vhost-net.h       |   7 +
 lib/librte_vhost/virtio-net-user.c | 410 ++++++++++++++++++++++++++++
 lib/librte_vhost/virtio-net.c      |  64 ++++-
 6 files changed, 1073 insertions(+), 7 deletions(-)
 create mode 100644 lib/librte_vhost/vhost-net-user.c
 create mode 100644 lib/librte_vhost/virtio-net-user.c
  

Comments

Huawei Xie Nov. 7, 2014, 9:25 p.m. UTC | #1
How about using client/server model and select/poll event handing mechanism rather than poll?
The polling could cause periodic jitter.

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Tetsuya Mukawa
> Sent: Thursday, November 06, 2014 4:15 AM
> To: dev@dpdk.org
> Cc: nakajima.yoshihiro@lab.ntt.co.jp; masutani.hitoshi@lab.ntt.co.jp
> Subject: [dpdk-dev] [RFC PATCH 7/7] lib/librte_vhost: Add vhost-user
> implementation
> 
> This patch adds vhost-user implementation to librte_vhost.
> To communicate with vhost-user of QEMU, speficy VHOST_DRV_USER as
> a vhost_driver_type_t variable in rte_vhost_driver_register().
> 
> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> ---
>  lib/librte_vhost/rte_virtio_net.h  |  19 +-
>  lib/librte_vhost/vhost-net-user.c  | 541
> +++++++++++++++++++++++++++++++++++++
>  lib/librte_vhost/vhost-net.c       |  39 ++-
>  lib/librte_vhost/vhost-net.h       |   7 +
>  lib/librte_vhost/virtio-net-user.c | 410 ++++++++++++++++++++++++++++
>  lib/librte_vhost/virtio-net.c      |  64 ++++-
>  6 files changed, 1073 insertions(+), 7 deletions(-)
>  create mode 100644 lib/librte_vhost/vhost-net-user.c
>  create mode 100644 lib/librte_vhost/virtio-net-user.c
> 
> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> index a9e20ea..af07900 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -75,17 +75,32 @@ struct buf_vector {
>   */
>  typedef enum {
>  	VHOST_DRV_CUSE, /* cuse driver */
> +	VHOST_DRV_USER, /* vhost-user driver */
>  	VHOST_DRV_NUM	/* the number of vhost driver types */
>  } vhost_driver_type_t;
> 
> +
> +/**
> + * Structure contains vhost-user session specific information
> + */
> +struct vhost_user_session {
> +	int		fh;		/**< session identifier */
> +	pthread_t	tid;		/**< thread id of session handler */
> +	int		socketfd;	/**< fd of socket */
> +	int		interval;	/**< reconnection interval of session
> */
> +};
> +
>  /**
>   * Structure contains information relating vhost driver.
>   */
>  struct vhost_driver {
>  	vhost_driver_type_t	type;		/**< driver type. */
>  	const char		*dev_name;	/**< accessing device name. */
> +	void			*priv;		/**< private data. */
>  	union {
>  		struct fuse_session *cuse_session;	/**< fuse session. */
> +		struct vhost_user_session *user_session;
> +						/**< vhost-user session. */
>  	};
>  };
> 
> @@ -199,9 +214,11 @@ struct vhost_driver *rte_vhost_driver_register(
>  		const char *dev_name, vhost_driver_type_t type);
> 
>  /* Register callbacks. */
> -int rte_vhost_driver_callback_register(struct virtio_net_device_ops const *
> const);
> +int rte_vhost_driver_callback_register(struct vhost_driver *drv,
> +			struct virtio_net_device_ops const * const, void *priv);
>  /* Start vhost driver session blocking loop. */
>  int rte_vhost_driver_session_start(struct vhost_driver *drv);
> +void rte_vhost_driver_session_stop(struct vhost_driver *drv);
> 
>  /**
>   * This function adds buffers to the virtio devices RX virtqueue. Buffers can
> diff --git a/lib/librte_vhost/vhost-net-user.c b/lib/librte_vhost/vhost-net-user.c
> new file mode 100644
> index 0000000..434f20f
> --- /dev/null
> +++ b/lib/librte_vhost/vhost-net-user.c
> @@ -0,0 +1,541 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright (c) 2014 IGEL Co/.Ltd.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of IGEL nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <linux/un.h>
> +
> +#define VHOST_USER_MAX_DEVICE		(32)
> +#define VHOST_USER_MAX_FD_NUM		(3)
> +
> +/* start id of vhost user device */
> +rte_atomic16_t vhost_user_device_id;
> +
> +static struct vhost_net_device_ops const *ops;
> +
> +typedef enum VhostUserRequest {
> +	VHOST_USER_NONE = 0,
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +#define VHOST_MEMORY_MAX_NREGIONS	8
> +
> +typedef struct VhostUserMemoryRegion {
> +	uint64_t guest_phys_addr;
> +	uint64_t memory_size;
> +	uint64_t userspace_addr;
> +	uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +	uint32_t nregions;
> +	uint32_t padding;
> +	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> +	VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK		(0x3)
> +#define VHOST_USER_REPLY_MASK		(0x1<<2)
> +	uint32_t flags;
> +	uint32_t size; /* the following payload size */
> +	union {
> +#define VHOST_USER_VRING_IDX_MASK	(0xff)
> +#define VHOST_USER_VRING_NOFD_MASK	(0x1<<8)
> +		uint64_t u64;
> +		struct vhost_vring_state state;
> +		struct vhost_vring_addr addr;
> +		VhostUserMemory memory;
> +	};
> +} __attribute__((packed)) VhostUserMsg;
> +
> +static VhostUserMsg m __attribute__ ((unused));
> +#define VHOST_USER_HDR_SIZE	(sizeof(m.request) \
> +		+ sizeof(m.flags) + sizeof(m.size))
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION		(0x1)
> +
> +static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
> +	-1,			/* VHOST_USER_NONE */
> +	VHOST_GET_FEATURES,	/* VHOST_USER_GET_FEATURES */
> +	VHOST_SET_FEATURES,	/* VHOST_USER_SET_FEATURES */
> +	VHOST_SET_OWNER,	/* VHOST_USER_SET_OWNER */
> +	VHOST_RESET_OWNER,	/* VHOST_USER_RESET_OWNER */
> +	VHOST_SET_MEM_TABLE,	/* VHOST_USER_SET_MEM_TABLE */
> +	VHOST_SET_LOG_BASE,	/* VHOST_USER_SET_LOG_BASE */
> +	VHOST_SET_LOG_FD,	/* VHOST_USER_SET_LOG_FD */
> +	VHOST_SET_VRING_NUM,	/* VHOST_USER_SET_VRING_NUM */
> +	VHOST_SET_VRING_ADDR,	/* VHOST_USER_SET_VRING_ADDR */
> +	VHOST_SET_VRING_BASE,	/* VHOST_USER_SET_VRING_BASE */
> +	VHOST_GET_VRING_BASE,	/* VHOST_USER_GET_VRING_BASE */
> +	VHOST_SET_VRING_KICK,	/* VHOST_USER_SET_VRING_KICK */
> +	VHOST_SET_VRING_CALL,	/* VHOST_USER_SET_VRING_CALL */
> +	VHOST_SET_VRING_ERR	/* VHOST_USER_SET_VRING_ERR */
> +};
> +
> +/**
> + * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
> when
> + * the device is added to the device linked list.
> + */
> +static struct vhost_device_ctx
> +vhost_driver_to_vhost_ctx(struct vhost_driver *drv)
> +{
> +	struct vhost_device_ctx ctx;
> +	int device_id = drv->user_session->fh;
> +
> +	ctx.type = VHOST_DRV_USER;
> +	ctx.fh = device_id;
> +	ctx.user.drv = drv;
> +
> +	return ctx;
> +}
> +
> +/**
> + * When the device is created in QEMU it gets initialised here and added to the
> device linked list.
> + */
> +static int
> +vhost_user_open(struct vhost_driver *drv)
> +{
> +	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
> +
> +	int ret;
> +
> +	ret = ops->new_device(ctx);
> +	if (ret == -1)
> +		return -1;
> +
> +	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device configuration
> started\n", ctx.fh);
> +
> +	return 0;
> +}
> +
> +/**
> + * When QEMU is shutdown or killed the device gets released.
> + */
> +static void
> +vhost_user_release(struct vhost_driver *drv)
> +{
> +	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
> +
> +	ops->destroy_device(ctx);
> +	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n",
> ctx.fh);
> +}
> +
> +/**
> + * Send data to vhost-user device on a QEMU.
> + */
> +static int
> +vhost_user_write(struct vhost_driver *drv, VhostUserMsg *msg,
> +		int *fds, size_t fd_num)
> +{
> +	int fd, len;
> +	size_t fd_size = fd_num * sizeof(int);
> +	char control[CMSG_SPACE(fd_size)];
> +	struct msghdr msg_header;
> +	struct iovec iov[1];
> +	struct cmsghdr *cmsg_header;
> +	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
> +
> +	if ((drv == NULL) || (msg == NULL))
> +		return -EINVAL;
> +
> +	fd = drv->user_session->socketfd;
> +
> +	memset(&msg_header, 0, sizeof(msg_header));
> +	memset(control, 0, sizeof(control));
> +
> +	/* set the payload */
> +	iov[0].iov_base = (void *)msg;
> +	iov[0].iov_len = VHOST_USER_HDR_SIZE + msg->size;
> +
> +	msg_header.msg_iov = iov;
> +	msg_header.msg_iovlen = 1;
> +
> +	if (fd_num) {
> +		msg_header.msg_control = control;
> +		msg_header.msg_controllen = sizeof(control);
> +		cmsg_header = CMSG_FIRSTHDR(&msg_header);
> +		cmsg_header->cmsg_len = CMSG_LEN(fd_size);
> +		cmsg_header->cmsg_level = SOL_SOCKET;
> +		cmsg_header->cmsg_type = SCM_RIGHTS;
> +		memcpy(CMSG_DATA(cmsg_header), fds, fd_size);
> +	} else {
> +		msg_header.msg_control = 0;
> +		msg_header.msg_controllen = 0;
> +	}
> +
> +	do {
> +		len = sendmsg(fd, &msg_header, 0);
> +	} while (len < 0 && errno == EINTR);
> +
> +	if (len < 0)
> +		goto error;
> +
> +	return 0;
> +
> +error:
> +	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device cannot send
> message\n", ctx.fh);
> +	return -EFAULT;
> +}
> +
> +/**
> + * Receive data from vhost-user device on a QEMU.
> + */
> +static int
> +vhost_user_read(struct vhost_driver *drv, VhostUserMsg *msg,
> +		int *fds, size_t *fd_num)
> +{
> +	int fd, len;
> +	size_t fd_size = (*fd_num) * sizeof(int);
> +	char control[CMSG_SPACE(fd_size)];
> +	struct msghdr msg_header;
> +	struct iovec iov[1];
> +	struct cmsghdr *cmsg_header;
> +	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
> +
> +	if ((drv == NULL) || (msg == NULL))
> +		return -EINVAL;
> +
> +	fd = drv->user_session->socketfd;
> +
> +	memset(&msg_header, 0, sizeof(msg_header));
> +	memset(control, 0, sizeof(control));
> +	*fd_num = 0;
> +
> +	/* set the payload */
> +	iov[0].iov_base = (void *)msg;
> +	iov[0].iov_len = VHOST_USER_HDR_SIZE;
> +
> +	msg_header.msg_iov = iov;
> +	msg_header.msg_iovlen = 1;
> +	msg_header.msg_control = control;
> +	msg_header.msg_controllen = sizeof(control);
> +
> +	if ((len = recvmsg(fd, &msg_header, 0)) <= 0)
> +		goto error;
> +
> +	if (msg_header.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
> +		goto error;
> +
> +	cmsg_header = CMSG_FIRSTHDR(&msg_header);
> +	if (cmsg_header && cmsg_header->cmsg_len > 0 &&
> +			cmsg_header->cmsg_level == SOL_SOCKET &&
> +			cmsg_header->cmsg_type == SCM_RIGHTS) {
> +		if (fd_size >= cmsg_header->cmsg_len - CMSG_LEN(0)) {
> +			fd_size = cmsg_header->cmsg_len - CMSG_LEN(0);
> +			memcpy(fds, CMSG_DATA(cmsg_header), fd_size);
> +			*fd_num = fd_size / sizeof(int);
> +		}
> +	}
> +
> +	if (read(fd, ((char *)msg) + len, msg->size) < 0)
> +		goto error;
> +
> +	return 0;
> +
> +error:
> +	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device cannot receive
> message\n", ctx.fh);
> +	return -EFAULT;
> +}
> +
> +/*
> + * Boilerplate code for vhost-user IOCTL
> + * Implicit arguments: ctx, req, result.
> + */
> +#define VHOST_USER_IOCTL(func) do {	\
> +	result = (func)(ctx);		\
> +} while (0)
> +
> +/*
> + * Boilerplate code for vhost-user Read IOCTL
> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> + */
> +#define VHOST_USER_IOCTL_R(type, var, func) do {\
> +	result = func(ctx, &(var));		\
> +} while (0)
> +
> +/*
> + * Boilerplate code for vhost-user Write IOCTL
> + * Implicit arguments: ctx, req, result, out_bufsz.
> + */
> +#define	VHOST_USER_IOCTL_W(type, var, func) do {\
> +	result = (func)(ctx, &(var));		\
> +	msg->flags |= VHOST_USER_REPLY_MASK;	\
> +	msg->size = sizeof(type);		\
> +	vhost_user_write(drv, msg, NULL, 0);	\
> +} while (0)
> +
> +/*
> + * Boilerplate code for vhost-user Read/Write IOCTL
> + * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
> + */
> +#define VHOST_USER_IOCTL_RW(type1, var1, type2, var2, func) do {\
> +	result = (func)(ctx, (var1), &(var2));			\
> +	msg->flags |= VHOST_USER_REPLY_MASK;			\
> +	msg->size = sizeof(type2);				\
> +	vhost_user_write(drv, msg, NULL, 0);			\
> +} while (0)
> +
> +/**
> + * The IOCTLs are handled using unix domain socket in userspace.
> + */
> +	static int
> +vhost_user_ioctl(struct vhost_driver *drv, VhostUserMsg *msg,
> +		int *fds, int fd_num)
> +{
> +	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
> +	struct vhost_vring_file file;
> +	int result = 0;
> +
> +	switch (ioctl_to_vhost_user_request[msg->request]) {
> +	case VHOST_GET_FEATURES:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_GET_FEATURES\n", ctx.fh);
> +		VHOST_USER_IOCTL_W(uint64_t, msg->u64, ops->get_features);
> +		break;
> +
> +	case VHOST_SET_FEATURES:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_SET_FEATURES\n", ctx.fh);
> +		VHOST_USER_IOCTL_R(uint64_t, msg->u64, ops->set_features);
> +		break;
> +
> +	case VHOST_RESET_OWNER:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_RESET_OWNER\n", ctx.fh);
> +		VHOST_USER_IOCTL(ops->reset_owner);
> +		break;
> +
> +	case VHOST_SET_OWNER:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_SET_OWNER\n", ctx.fh);
> +		VHOST_USER_IOCTL(ops->set_owner);
> +		break;
> +
> +	case VHOST_SET_MEM_TABLE:
> +		/*TODO fix race condition.*/
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_SET_MEM_TABLE\n", ctx.fh);
> +		/* all fds should be same, because physical memory consist of
> an one file */
> +		ctx.user.fds = fds;
> +		ctx.user.fd_num = fd_num;
> +		result = ops->set_mem_table(ctx, &msg->memory, msg-
> >memory.nregions);
> +		break;
> +
> +	case VHOST_SET_VRING_NUM:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_SET_VRING_NUM\n", ctx.fh);
> +		VHOST_USER_IOCTL_R(struct vhost_vring_state, msg->state,
> ops->set_vring_num);
> +		break;
> +
> +	case VHOST_SET_VRING_BASE:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_SET_VRING_BASE\n", ctx.fh);
> +		VHOST_USER_IOCTL_R(struct vhost_vring_state, msg->state,
> ops->set_vring_base);
> +		break;
> +
> +	case VHOST_GET_VRING_BASE:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_GET_VRING_BASE\n", ctx.fh);
> +		VHOST_USER_IOCTL_RW(uint32_t, msg->addr.index, struct
> vhost_vring_state, msg->state, ops->get_vring_base);
> +		break;
> +
> +	case VHOST_SET_VRING_ADDR:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_SET_VRING_ADDR\n", ctx.fh);
> +		VHOST_USER_IOCTL_R(struct vhost_vring_addr, msg->addr,
> ops->set_vring_addr);
> +		break;
> +
> +	case VHOST_SET_VRING_KICK:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_SET_VRING_KICK\n", ctx.fh);
> +		ctx.user.fds = fds;
> +		ctx.user.fd_num = fd_num;
> +		file.index = msg->u64;
> +		VHOST_USER_IOCTL_R(struct vhost_vring_file, file, ops-
> >set_vring_kick);
> +		break;
> +
> +	case VHOST_SET_VRING_CALL:
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL:
> VHOST_SET_VRING_CALL\n", ctx.fh);
> +		ctx.user.fds = fds;
> +		ctx.user.fd_num = fd_num;
> +		file.index = msg->u64;
> +		VHOST_USER_IOCTL_R(struct vhost_vring_file, file, ops-
> >set_vring_call);
> +		break;
> +
> +	case VHOST_NET_SET_BACKEND:
> +	default:
> +		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") IOCTL: DOESN
> NOT EXIST\n", ctx.fh);
> +		result = -1;
> +	}
> +
> +	if (result < 0)
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: FAIL\n",
> ctx.fh);
> +	else
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: SUCCESS\n",
> ctx.fh);
> +
> +	return result;
> +}
> +
> +/**
> + * vhost-user specific registration.
> + */
> +static int
> +vhost_user_driver_register(struct vhost_driver *drv)
> +{
> +	if ((drv == NULL) || (drv->dev_name == NULL) ||
> +			(strlen(drv->dev_name) > UNIX_PATH_MAX - 1))
> +		return -1;
> +
> +	ops = get_virtio_net_callbacks(drv->type);
> +
> +	drv->user_session = rte_malloc(NULL, sizeof(struct vhost_user_session),
> CACHE_LINE_SIZE);
> +	if (drv->user_session == NULL)
> +		return -1;
> +
> +	drv->user_session->fh =
> +		rte_atomic16_add_return(&vhost_user_device_id, 1) - 1; /* fh
> of first device is zero */
> +	drv->user_session->interval = 1;
> +
> +	return 0;
> +}
> +
> +/**
> + * When vhost-user driver starts, the session handler communicates with vhost-
> user
> + * device on a QEMU using a unix domain sokcet.
> + */
> +static void *
> +vhost_user_session_handler(void *data)
> +{
> +	struct vhost_driver *drv = data;
> +	int ret;
> +	struct sockaddr_un caddr;
> +	VhostUserMsg msg;
> +	int fds[VHOST_USER_MAX_FD_NUM];
> +	int socketfd;
> +	int interval;
> +	size_t fd_num;
> +
> +	if ((drv == NULL) || (drv->dev_name == NULL))
> +		return NULL;
> +
> +	bzero(&caddr, sizeof(caddr));
> +	caddr.sun_family = AF_LOCAL;
> +	strncpy((char *)&caddr.sun_path, drv->dev_name, strlen(drv-
> >dev_name));
> +
> +reconnect:
> +	drv->user_session->socketfd = socket(AF_UNIX, SOCK_STREAM, 0);
> +	if (drv->user_session->socketfd < 0)
> +		return NULL;
> +
> +	socketfd = drv->user_session->socketfd;
> +	interval = drv->user_session->interval;
> +	while (1) {
> +		ret = connect(socketfd, (struct sockaddr *)&caddr, sizeof(caddr));
> +		if (ret == 0)
> +			break; /* success */
> +		sleep(interval);
> +	}
> +
> +	ret = vhost_user_open(drv);
> +	if (ret != 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "(Socket %s) open failuer\n",
> drv->dev_name);
> +		return NULL;
> +	}
> +
> +	for (;;) {
> +		fd_num = VHOST_USER_MAX_FD_NUM;
> +		bzero(&msg, sizeof(VhostUserMsg));
> +		ret = vhost_user_read(drv, &msg, fds, &fd_num);
> +		if (ret != 0) {
> +			RTE_LOG(ERR, VHOST_CONFIG, "(Socket %s) read
> failuer\n", drv->dev_name);
> +			vhost_user_release(drv);
> +			goto reconnect;
> +		}
> +
> +		ret = vhost_user_ioctl(drv, &msg, fds, fd_num);
> +		if (ret != 0) {
> +			RTE_LOG(ERR, VHOST_CONFIG, "(Socket %s) request
> failuer\n", drv->dev_name);
> +			vhost_user_release(drv);
> +			goto reconnect;
> +		}
> +	}
> +
> +	return NULL;
> +}
> +
> +/**
> + * Create session handler
> + */
> +static int
> +vhost_user_driver_start(struct vhost_driver *drv)
> +{
> +	if (pthread_create(&drv->user_session->tid, NULL,
> vhost_user_session_handler, drv)) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +				"(Socket %s) starting event handler failuer\n",
> drv->dev_name);
> +		return -1;
> +	}
> +
> +	/* TODO: The event handler thread may need to run on a core user
> speficied. */
> +
> +	return 0;
> +}
> +
> +/**
> + * Destroy session handler
> + */
> +static void
> +vhost_user_driver_stop(struct vhost_driver *drv)
> +{
> +	pthread_t *tid = &drv->user_session->tid;
> +
> +	if (pthread_create(tid, NULL, vhost_user_session_handler, drv)) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +				"(Socket %s) starting event handler failuer\n",
> drv->dev_name);
> +		return;
> +	}
> +
> +	/* stop event thread and wait until connection is closed */
> +	if (*tid) {
> +		pthread_cancel(*tid);
> +		pthread_join(*tid, NULL);
> +	}
> +
> +	vhost_user_release(drv);
> +}
> diff --git a/lib/librte_vhost/vhost-net.c b/lib/librte_vhost/vhost-net.c
> index b0de5fd..10f41e9 100644
> --- a/lib/librte_vhost/vhost-net.c
> +++ b/lib/librte_vhost/vhost-net.c
> @@ -42,6 +42,11 @@
>   */
>  #include "vhost-net-cdev.c"
> 
> +/*
> + * Include vhost-user depend functions and definitions
> + */
> +#include "vhost-net-user.c"
> +
>  /**
>   * This function abstracts cuse and vhost-user driver registration.
>   */
> @@ -65,10 +70,17 @@ rte_vhost_driver_register(const char *dev_name,
> vhost_driver_type_t type)
>  		if (ret != 0)
>  			goto err;
>  		break;
> +	case VHOST_DRV_USER:
> +		ret = vhost_user_driver_register(drv);
> +		break;
>  	default:
> +		ret = -EINVAL;
>  		break;
>  	}
> 
> +	if (ret != 0)
> +		goto err;
> +
>  	return drv;
>  err:
>  	free(drv);
> @@ -81,17 +93,40 @@ err:
>  int
>  rte_vhost_driver_session_start(struct vhost_driver *drv)
>  {
> +	int ret;
> +
>  	if (drv == NULL)
>  		return -ENODEV;
> 
>  	switch (drv->type) {
>  	case VHOST_DRV_CUSE:
> -		vhost_cuse_driver_session_start(drv);
> +		ret = vhost_cuse_driver_session_start(drv);
> +		break;
> +	case VHOST_DRV_USER:
> +		ret = vhost_user_driver_start(drv);
>  		break;
>  	default:
> +		ret = -EINVAL;
>  		break;
>  	}
> 
> -	return 0;
> +	return ret;
>  }
> 
> +/**
> + * The vhost session is closed, only allow for vhost-user.
> + */
> +void
> +rte_vhost_driver_session_stop(struct vhost_driver *drv)
> +{
> +	if (drv == NULL)
> +		return;
> +
> +	switch (drv->type) {
> +	case VHOST_DRV_USER:
> +		vhost_user_driver_stop(drv);
> +		break;
> +	default:
> +		break;
> +	}
> +}
> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
> index ef04832..0e36ba0 100644
> --- a/lib/librte_vhost/vhost-net.h
> +++ b/lib/librte_vhost/vhost-net.h
> @@ -76,6 +76,12 @@ struct vhost_device_cuse_ctx {
>  	pid_t   pid;	/* PID of process calling the IOCTL. */
>  };
> 
> +struct vhost_device_user_ctx {
> +	int			*fds;
> +	int			fd_num;
> +	struct vhost_driver	*drv;
> +};
> +
>  /*
>   * Structure used to identify device context.
>   */
> @@ -83,6 +89,7 @@ struct vhost_device_ctx {
>  	vhost_driver_type_t	type;	/* driver type. */
>  	uint64_t		fh;	/* Populated with fi->fh to track the
> device index. */
>  	union {
> +		struct vhost_device_user_ctx user;
>  		struct vhost_device_cuse_ctx cdev;
>  	};
>  };
> diff --git a/lib/librte_vhost/virtio-net-user.c b/lib/librte_vhost/virtio-net-user.c
> new file mode 100644
> index 0000000..1e78f98
> --- /dev/null
> +++ b/lib/librte_vhost/virtio-net-user.c
> @@ -0,0 +1,410 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright (c) 2014 IGEL Co.,Ltd.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of IGEL nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +#include <stdlib.h>
> +
> +/* Functions defined in virtio_net.c */
> +static void init_device(struct vhost_device_ctx ctx, struct virtio_net *dev);
> +static void cleanup_device(struct virtio_net *dev);
> +static void free_device(struct virtio_net_config_ll *ll_dev);
> +static int new_device(struct vhost_device_ctx ctx);
> +static void destroy_device(struct vhost_device_ctx ctx);
> +static int set_owner(struct vhost_device_ctx ctx);
> +static int reset_owner(struct vhost_device_ctx ctx);
> +static int get_features(struct vhost_device_ctx ctx, uint64_t *pu);
> +static int set_features(struct vhost_device_ctx ctx, uint64_t *pu);
> +static int set_vring_num(struct vhost_device_ctx ctx, struct vhost_vring_state
> *state);
> +static int set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr
> *addr);
> +static int set_vring_base(struct vhost_device_ctx ctx, struct vhost_vring_state
> *state);
> +static int set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file);
> +
> +/* Root address of the linked list in the configuration core. */
> +static struct virtio_net_config_ll *user_ll_root;
> +
> +/**
> + * Retrieves an entry from the devices configuration linked list.
> + */
> +static struct virtio_net_config_ll *
> +user_get_config_ll_entry(struct vhost_device_ctx ctx)
> +{
> +	struct virtio_net_config_ll *ll_dev = user_ll_root;
> +
> +	/* Loop through linked list until the device_fh is found. */
> +	while (ll_dev != NULL) {
> +		if (ll_dev->dev.device_fh == ctx.fh)
> +			return ll_dev;
> +		ll_dev = ll_dev->next;
> +	}
> +
> +	return NULL;
> +}
> +
> +/**
> + * Searches the configuration core linked list and retrieves the device if it exists.
> + */
> +static struct virtio_net *
> +user_get_device(struct vhost_device_ctx ctx)
> +{
> +	struct virtio_net_config_ll *ll_dev;
> +
> +	ll_dev = user_get_config_ll_entry(ctx);
> +
> +	/* If a matching entry is found in the linked list, return the device in that
> entry. */
> +	if (ll_dev)
> +		return &ll_dev->dev;
> +
> +	RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Device not found in linked
> list.\n", ctx.fh);
> +	return NULL;
> +}
> +
> +/**
> + * Add entry containing a device to the device configuration linked list.
> + */
> +static void
> +user_add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
> +{
> +	struct virtio_net_config_ll *ll_dev = user_ll_root;
> +
> +	/* If ll_dev == NULL then this is the first device so go to else */
> +	if (ll_dev) {
> +		/* If the 1st device_fh != 0 then we insert our device here. */
> +		if (ll_dev->dev.device_fh != 0)	{
> +			new_ll_dev->dev.device_fh = 0;
> +			new_ll_dev->next = ll_dev;
> +			user_ll_root = new_ll_dev;
> +		} else {
> +			/* Increment through the ll until we find un unused
> device_fh. Insert the device at that entry*/
> +			while ((ll_dev->next != NULL) && (ll_dev->dev.device_fh
> == (ll_dev->next->dev.device_fh - 1)))
> +				ll_dev = ll_dev->next;
> +
> +			new_ll_dev->dev.device_fh = ll_dev->dev.device_fh + 1;
> +			new_ll_dev->next = ll_dev->next;
> +			ll_dev->next = new_ll_dev;
> +		}
> +	} else {
> +		user_ll_root = new_ll_dev;
> +		user_ll_root->dev.device_fh = 0;
> +	}
> +
> +}
> +
> +/**
> + * Remove an entry from the device configuration linked list.
> + */
> +static struct virtio_net_config_ll *
> +user_rm_config_ll_entry(struct virtio_net_config_ll *ll_dev, struct
> virtio_net_config_ll *ll_dev_last)
> +{
> +	/* First remove the device and then clean it up. */
> +	if (ll_dev == user_ll_root) {
> +		user_ll_root = ll_dev->next;
> +		cleanup_device(&ll_dev->dev);
> +		free_device(ll_dev);
> +		return user_ll_root;
> +	} else {
> +		if (likely(ll_dev_last != NULL)) {
> +			ll_dev_last->next = ll_dev->next;
> +			cleanup_device(&ll_dev->dev);
> +			free_device(ll_dev);
> +			return ll_dev_last->next;
> +		} else {
> +			cleanup_device(&ll_dev->dev);
> +			free_device(ll_dev);
> +			RTE_LOG(ERR, VHOST_CONFIG, "Remove entry from
> config_ll failed\n");
> +			return NULL;
> +		}
> +	}
> +}
> +
> +/**
> + * Returns the root entry of linked list
> + */
> +static struct virtio_net_config_ll *
> +user_get_config_ll_root(void)
> +{
> +	return user_ll_root;
> +}
> +
> +/**
> + * vhost-user specific device initialization.
> + */
> +static void
> +user_init_device(struct vhost_device_ctx ctx, struct virtio_net *dev)
> +{
> +	dev->priv = ctx.user.drv->priv;
> +}
> +
> +/**
> + * Locate the file containing QEMU's memory space and map it to our address
> space.
> + */
> +static int
> +user_host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
> int fd, size_t size)
> +{
> +	void *map;
> +
> +	map = mmap(0, size, PROT_READ|PROT_WRITE ,
> MAP_POPULATE|MAP_SHARED, fd, 0);
> +	close(fd);
> +
> +	if (map == MAP_FAILED) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Error mapping the
> file fd %d\n",  dev->device_fh, fd);
> +		return -1;
> +	}
> +
> +	/* Store the memory address and size in the device data structure */
> +	mem->mapped_address = (uint64_t)(uintptr_t)map;
> +	mem->mapped_size = size;
> +
> +	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Mem File: fd: %d - Size: %llu
> - VA: %p\n", dev->device_fh,
> +			fd, (long long unsigned)mem->mapped_size, map);
> +
> +	return 0;
> +}
> +
> +/*
> + * Called from IOCTL: VHOST_SET_MEM_TABLE
> + * This function creates and populates the memory structure for the device.
> This includes
> + * storing offsets used to translate buffer addresses.
> + */
> +static int
> +user_set_mem_table(struct vhost_device_ctx ctx, const void
> *mem_regions_addr, uint32_t nregions)
> +{
> +	struct virtio_net *dev;
> +	struct vhost_memory_region *mem_regions;
> +	struct virtio_memory *mem;
> +	uint64_t size = offsetof(struct vhost_memory, regions);
> +	uint32_t regionidx, valid_regions;
> +	size_t guest_memory_size = 0;
> +
> +	dev = user_get_device(ctx);
> +	if (dev == NULL)
> +		return -1;
> +
> +	if (dev->mem) {
> +		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> (size_t)dev->mem->mapped_size);
> +		free(dev->mem);
> +	}
> +
> +	/* Malloc the memory structure depending on the number of regions. */
> +	mem = calloc(1, sizeof(struct virtio_memory) + (sizeof(struct
> virtio_memory_regions) * nregions));
> +	if (mem == NULL) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to allocate
> memory for dev->mem.\n", dev->device_fh);
> +		return -1;
> +	}
> +
> +	mem->nregions = nregions;
> +
> +	mem_regions =
> (void*)(uintptr_t)((uint64_t)(uintptr_t)mem_regions_addr + size);
> +
> +	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
> +		/* Populate the region structure for each region. */
> +		mem->regions[regionidx].guest_phys_address =
> mem_regions[regionidx].guest_phys_addr;
> +		mem->regions[regionidx].guest_phys_address_end = mem-
> >regions[regionidx].guest_phys_address +
> +			mem_regions[regionidx].memory_size;
> +		mem->regions[regionidx].memory_size =
> mem_regions[regionidx].memory_size;
> +		mem->regions[regionidx].userspace_address =
> mem_regions[regionidx].userspace_addr;
> +		guest_memory_size += mem_regions[regionidx].memory_size;
> +
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u -
> GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
> +				regionidx, (void*)(uintptr_t)mem-
> >regions[regionidx].guest_phys_address,
> +				(void*)(uintptr_t)mem-
> >regions[regionidx].userspace_address,
> +				mem->regions[regionidx].memory_size);
> +	}
> +
> +	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
> +		/*set the base address mapping*/
> +		if (mem->regions[regionidx].guest_phys_address == 0x0) {
> +			mem->base_address = mem-
> >regions[regionidx].userspace_address;
> +			/* Map VM memory file */
> +			if (user_host_memory_map(dev, mem,
> ctx.user.fds[regionidx], guest_memory_size) != 0) {
> +				free(mem);
> +				return -1;
> +			}
> +		} else
> +			close(ctx.user.fds[regionidx]);
> +	}
> +
> +	/* Check that we have a valid base address. */
> +	if (mem->base_address == 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base
> address of qemu memory file.\n", dev->device_fh);
> +		free(mem);
> +		return -1;
> +	}
> +
> +	/* Check if all of our regions have valid mappings. Usually one does not
> exist in the QEMU memory file. */
> +	valid_regions = mem->nregions;
> +	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
> +		if ((mem->regions[regionidx].userspace_address < mem-
> >base_address) ||
> +				(mem->regions[regionidx].userspace_address >
> (mem->base_address + mem->mapped_size)))
> +			valid_regions--;
> +	}
> +
> +	/* If a region does not have a valid mapping we rebuild our memory
> struct to contain only valid entries. */
> +	if (valid_regions != mem->nregions) {
> +		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory
> regions exist in the QEMU mem file. Re-populating mem structure\n",
> +				dev->device_fh);
> +
> +		/* Re-populate the memory structure with only valid regions.
> Invalid regions are over-written with memmove. */
> +		valid_regions = 0;
> +
> +		for (regionidx = mem->nregions; 0 != regionidx--;) {
> +			if ((mem->regions[regionidx].userspace_address <
> mem->base_address) ||
> +					(mem-
> >regions[regionidx].userspace_address > (mem->base_address + mem-
> >mapped_size))) {
> +				memmove(&mem->regions[regionidx], &mem-
> >regions[regionidx + 1],
> +						sizeof(struct
> virtio_memory_regions) * valid_regions);
> +			} else {
> +				valid_regions++;
> +			}
> +		}
> +	}
> +	mem->nregions = valid_regions;
> +	dev->mem = mem;
> +
> +	/*
> +	 * Calculate the address offset for each region. This offset is used to
> identify the vhost virtual address
> +	 * corresponding to a QEMU guest physical address.
> +	 */
> +	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++)
> +		dev->mem->regions[regionidx].address_offset = dev->mem-
> >regions[regionidx].userspace_address - dev->mem->base_address
> +			+ dev->mem->mapped_address - dev->mem-
> >regions[regionidx].guest_phys_address;
> +
> +	return 0;
> +}
> +
> +/**
> + * Called from IOCTL: VHOST_GET_VRING_BASE
> + * We send the virtio device our available ring last used index.
> + */
> +static int
> +user_get_vring_base(struct vhost_device_ctx ctx, uint32_t index, struct
> vhost_vring_state *state)
> +{
> +	struct virtio_net *dev;
> +
> +	dev = user_get_device(ctx);
> +	if (dev == NULL)
> +		return -1;
> +
> +	state->index = index;
> +	/* State->index refers to the queue index. The TX queue is 1, RX queue is
> 0. */
> +	state->num = dev->virtqueue[state->index]->last_used_idx;
> +
> +	return 0;
> +}
> +
> +/**
> + * Called from IOCTL: VHOST_SET_VRING_CALL
> + * The virtio device sends an eventfd to interrupt the guest. This fd gets copied
> in
> + * to our process space.
> + * Also this message is sent when virtio-net device is reset by device driver on
> QEMU.
> + */
> +static int
> +user_set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> +{
> +	struct virtio_net *dev;
> +	struct vhost_virtqueue *vq;
> +
> +	dev = user_get_device(ctx);
> +	if (dev == NULL)
> +		return -1;
> +
> +	/* file->index refers to the queue index. The TX queue is 1, RX queue is 0.
> */
> +	vq = dev->virtqueue[file->index];
> +
> +	if (vq->kickfd)
> +		close((int)vq->kickfd);
> +
> +	/* Populate the eventfd_copy structure and call eventfd_copy. */
> +	vq->kickfd = ctx.user.fds[0];
> +
> +	return 0;
> +}
> +
> +/**
> + * Called from IOCTL: VHOST_SET_VRING_KICK
> + * The virtio device sends an eventfd that it can use to notify us. This fd gets
> copied in
> + * to our process space.
> + */
> +static int
> +user_set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> +{
> +	struct virtio_net *dev;
> +	struct vhost_virtqueue *vq;
> +
> +	dev = user_get_device(ctx);
> +	if (dev == NULL)
> +		return -1;
> +
> +	/* file->index refers to the queue index. The TX queue is 1, RX queue is 0.
> */
> +	vq = dev->virtqueue[file->index];
> +
> +	if (vq->callfd)
> +		close((int)vq->callfd);
> +
> +	/* Populate the eventfd_copy structure and call eventfd_copy. */
> +	vq->callfd = ctx.user.fds[0];
> +
> +	if ((dev->virtqueue[VIRTIO_RXQ] != NULL) && (dev-
> >virtqueue[VIRTIO_TXQ]) != NULL)
> +		return set_backend(ctx, file);
> +
> +	return 0;
> +}
> +
> +/**
> + * Function pointers are set for the device operations to allow to call functions
> + * when an IOCTL, device_add or device_release is received.
> + */
> +static const struct vhost_net_device_ops vhost_user_device_ops = {
> +	.new_device = new_device,
> +	.destroy_device = destroy_device,
> +
> +	.get_features = get_features,
> +	.set_features = set_features,
> +
> +	.set_mem_table = user_set_mem_table,
> +
> +	.set_vring_num = set_vring_num,
> +	.set_vring_addr = set_vring_addr,
> +	.set_vring_base = set_vring_base,
> +	.get_vring_base = user_get_vring_base,
> +
> +	.set_vring_kick = user_set_vring_kick,
> +	.set_vring_call = user_set_vring_call,
> +
> +	.set_backend = set_backend,
> +
> +	.set_owner = set_owner,
> +	.reset_owner = reset_owner,
> +};
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index 13fbb6f..db810e7 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -96,6 +96,12 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
>   */
>  #include "virtio-net-cdev.c"
> 
> +/**
> + * Include vhost-user depend functions and definitions.
> + */
> +#include "virtio-net-user.c"
> +
> +
>  /*
>   * Retrieves an entry from the devices configuration linked list.
>   */
> @@ -105,6 +111,8 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
>  	switch (ctx.type) {
>  	case VHOST_DRV_CUSE:
>  		return cdev_get_config_ll_entry(ctx);
> +	case VHOST_DRV_USER:
> +		return user_get_config_ll_entry(ctx);
>  	default:
>  		break;
>  	}
> @@ -120,6 +128,8 @@ get_device(struct vhost_device_ctx ctx)
>  	switch (ctx.type) {
>  	case VHOST_DRV_CUSE:
>  		return cdev_get_device(ctx);
> +	case VHOST_DRV_USER:
> +		return user_get_device(ctx);
>  	default:
>  		break;
>  	}
> @@ -136,6 +146,8 @@ add_config_ll_entry(vhost_driver_type_t type,
>  	switch (type) {
>  	case VHOST_DRV_CUSE:
>  		return cdev_add_config_ll_entry(new_ll_dev);
> +	case VHOST_DRV_USER:
> +		return user_add_config_ll_entry(new_ll_dev);
>  	default:
>  		break;
>  	}
> @@ -149,8 +161,39 @@ cleanup_device(struct virtio_net *dev)
>  {
>  	/* Unmap QEMU memory file if mapped. */
>  	if (dev->mem) {
> -		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> -			(size_t)dev->mem->mapped_size);
> +		{
> +			/*
> +			 * 'munmap()' will be failed when mapped_size isn't
> +			 * aligned with hugepage size.
> +			 * Usually a file size of QEMU physical memory is
> +			 * aligned by hugepage size. So In a case of CUSE,
> +			 * there is no problem. But with vhost-user, there is
> +			 * no way to get physical memory size.
> +			 *
> +			 * Let's assume hugepage size is 2MB or 1GB here.
> +			 * BTW, 'mmap()' automatically fixed size parameter
> +			 * to be aligned. Why does 'munmap()' do like so?
> +			 */
> +			int ret = 0;
> +			size_t hugepagesize, size = dev->mem->mapped_size;
> +
> +			/* assume hugepage size is 2MB */
> +			hugepagesize = 2 * 1024 * 1024;
> +			size = (size + hugepagesize - 1) /
> +						hugepagesize * hugepagesize;
> +			ret = munmap((void *)(uintptr_t)
> +						dev->mem->mapped_address,
> +						size);
> +			if (ret) {
> +				/* assume hugepage size is 1GB, try again */
> +				hugepagesize = 1024 * 1024 * 1024;
> +				size = (size + hugepagesize - 1) /
> +						hugepagesize * hugepagesize;
> +				munmap((void *)(uintptr_t)
> +						dev->mem->mapped_address,
> +						size);
> +			}
> +		}
>  		free(dev->mem);
>  	}
> 
> @@ -187,6 +230,8 @@ rm_config_ll_entry(vhost_driver_type_t type,
>  	switch (type) {
>  	case VHOST_DRV_CUSE:
>  		return cdev_rm_config_ll_entry(ll_dev, ll_dev_last);
> +	case VHOST_DRV_USER:
> +		return user_rm_config_ll_entry(ll_dev, ll_dev_last);
>  	default:
>  		break;
>  	}
> @@ -201,7 +246,9 @@ get_config_ll_root(struct vhost_device_ctx ctx)
>  {
>  	switch (ctx.type) {
>  	case VHOST_DRV_CUSE:
> -		return cdev_get_config_ll_root(ctx);
> +		return cdev_get_config_ll_root();
> +	case VHOST_DRV_USER:
> +		return user_get_config_ll_root();
>  	default:
>  		break;
>  	}
> @@ -232,6 +279,8 @@ init_device(struct vhost_device_ctx ctx, struct virtio_net
> *dev)
>  	switch (ctx.type) {
>  	case VHOST_DRV_CUSE:
>  		return cdev_init_device(ctx, dev);
> +	case VHOST_DRV_USER:
> +		return user_init_device(ctx, dev);
>  	default:
>  		break;
>  	}
> @@ -527,6 +576,8 @@ get_virtio_net_callbacks(vhost_driver_type_t type)
>  	switch (type) {
>  	case VHOST_DRV_CUSE:
>  		return &vhost_cuse_device_ops;
> +	case VHOST_DRV_USER:
> +		return &vhost_user_device_ops;
>  	default:
>  		break;
>  	}
> @@ -570,9 +621,14 @@ int rte_vhost_feature_enable(uint64_t feature_mask)
>   * Register ops so that we can add/remove device to data core.
>   */
>  int
> -rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const
> ops)
> +rte_vhost_driver_callback_register(struct vhost_driver *drv,
> +		struct virtio_net_device_ops const * const ops, void *priv)
>  {
> +	if (drv == NULL || ops == NULL)
> +		return -1;
> +
>  	notify_ops = ops;
> +	drv->priv = priv;
> 
>  	return 0;
>  }
> --
> 1.9.1
  
Tetsuya Mukawa Nov. 10, 2014, 5:11 a.m. UTC | #2
Hi XIe,

(2014/11/08 6:25), Xie, Huawei wrote:
> How about using client/server model and select/poll event handing mechanism rather than poll?
> The polling could cause periodic jitter.
>
Sounds nice. I will change like your comment.

Thanks,
Tetsuya
  
Huawei Xie Nov. 10, 2014, 8:18 a.m. UTC | #3
Tetsuya:
I already did this, :), and will publish the code for review after I do some cleanup next week.

> -----Original Message-----
> From: Tetsuya Mukawa [mailto:mukawa@igel.co.jp]
> Sent: Sunday, November 09, 2014 10:11 PM
> To: Xie, Huawei; dev@dpdk.org
> Cc: nakajima.yoshihiro@lab.ntt.co.jp; masutani.hitoshi@lab.ntt.co.jp
> Subject: Re: [dpdk-dev] [RFC PATCH 7/7] lib/librte_vhost: Add vhost-user
> implementation
> 
> Hi XIe,
> 
> (2014/11/08 6:25), Xie, Huawei wrote:
> > How about using client/server model and select/poll event handing mechanism
> rather than poll?
> > The polling could cause periodic jitter.
> >
> Sounds nice. I will change like your comment.
> 
> Thanks,
> Tetsuya
  
Tetsuya Mukawa Nov. 10, 2014, 8:55 a.m. UTC | #4
Hi Xie,

(2014/11/10 17:18), Xie, Huawei wrote:
> Tetsuya:
> I already did this, :), and will publish the code for review after I do some cleanup next week.

I appreciate it.
I guess your implementation assumes that all vhost-user functions you implemented are called
by virtio common layer. Is it right?

If so, I will change abstraction layer implementation by this week or early next week.
(Please also check email related with 'get_virtio_net_callbacks()').

Thanks,
Tetsuya
  
Huawei Xie Nov. 14, 2014, 12:07 a.m. UTC | #5
> +struct vhost_device_user_ctx {
> +	int			*fds;
> +	int			fd_num;
> +	struct vhost_driver	*drv;
> +};
> +
>  /*
>   * Structure used to identify device context.
>   */
> @@ -83,6 +89,7 @@ struct vhost_device_ctx {
>  	vhost_driver_type_t	type;	/* driver type. */
>  	uint64_t		fh;	/* Populated with fi->fh to track the
> device index. */
>  	union {
> +		struct vhost_device_user_ctx user;
>  		struct vhost_device_cuse_ctx cdev;
>  	};
>  };

Tetsuya:
It is ok we define the enum ctx, but so far I don't see absolute necessity to have user ctx.
Will send out RFC patch of my implementation today or next day to make it more clear.

I don't understand  why we keep two device lists.
  * in real case, will we allow to register two drivers?
     Besides, we have the open question whether we still need to keep the DPDK cuse driver. It requires maintenance effort
and extra kernel module;
     Btw, your framework to allow dynamically register different vhost driver is nice!
  * If two drivers are simultaneously accessing the device list, we could add lock.
 
> +user_get_device(struct vhost_device_ctx ctx)
> +user_add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
> +user_rm_config_ll_entry(struct virtio_net_config_ll *ll_dev, struct
> +user_get_config_ll_root(void)
> +user_init_device(struct vhost_device_ctx ctx, struct virtio_net *dev)
  
Tetsuya Mukawa Nov. 14, 2014, 4:41 a.m. UTC | #6
(2014/11/14 9:07), Xie, Huawei wrote:
>> +struct vhost_device_user_ctx {
>> +	int			*fds;
>> +	int			fd_num;
>> +	struct vhost_driver	*drv;
>> +};
>> +
>>  /*
>>   * Structure used to identify device context.
>>   */
>> @@ -83,6 +89,7 @@ struct vhost_device_ctx {
>>  	vhost_driver_type_t	type;	/* driver type. */
>>  	uint64_t		fh;	/* Populated with fi->fh to track the
>> device index. */
>>  	union {
>> +		struct vhost_device_user_ctx user;
>>  		struct vhost_device_cuse_ctx cdev;
>>  	};
>>  };
> Tetsuya:
> It is ok we define the enum ctx, but so far I don't see absolute necessity to have user ctx.
> Will send out RFC patch of my implementation today or next day to make it more clear.
Thanks, let's change implementation simpler as much as possible.
>
> I don't understand  why we keep two device lists.
>   * in real case, will we allow to register two drivers?
>      Besides, we have the open question whether we still need to keep the DPDK cuse driver. It requires maintenance effort
> and extra kernel module;
>      Btw, your framework to allow dynamically register different vhost driver is nice!
I assume some customers still need to use QEMU under 2.0.
But it's okay for me to remove vhost-cuse implementation.
What is you and intel plan?

>   * If two drivers are simultaneously accessing the device list, we could add lock.
Also we may need to remove some global variables that cannot be shared
between drivers.
To remove global variable, we may be possible to save these variable in ctx.

Thanks,
Tetsuya
  

Patch

diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index a9e20ea..af07900 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -75,17 +75,32 @@  struct buf_vector {
  */
 typedef enum {
 	VHOST_DRV_CUSE, /* cuse driver */
+	VHOST_DRV_USER, /* vhost-user driver */
 	VHOST_DRV_NUM	/* the number of vhost driver types */
 } vhost_driver_type_t;
 
+
+/**
+ * Structure contains vhost-user session specific information
+ */
+struct vhost_user_session {
+	int		fh;		/**< session identifier */
+	pthread_t	tid;		/**< thread id of session handler */
+	int		socketfd;	/**< fd of socket */
+	int		interval;	/**< reconnection interval of session */
+};
+
 /**
  * Structure contains information relating vhost driver.
  */
 struct vhost_driver {
 	vhost_driver_type_t	type;		/**< driver type. */
 	const char		*dev_name;	/**< accessing device name. */
+	void			*priv;		/**< private data. */
 	union {
 		struct fuse_session *cuse_session;	/**< fuse session. */
+		struct vhost_user_session *user_session;
+						/**< vhost-user session. */
 	};
 };
 
@@ -199,9 +214,11 @@  struct vhost_driver *rte_vhost_driver_register(
 		const char *dev_name, vhost_driver_type_t type);
 
 /* Register callbacks. */
-int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
+int rte_vhost_driver_callback_register(struct vhost_driver *drv,
+			struct virtio_net_device_ops const * const, void *priv);
 /* Start vhost driver session blocking loop. */
 int rte_vhost_driver_session_start(struct vhost_driver *drv);
+void rte_vhost_driver_session_stop(struct vhost_driver *drv);
 
 /**
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
diff --git a/lib/librte_vhost/vhost-net-user.c b/lib/librte_vhost/vhost-net-user.c
new file mode 100644
index 0000000..434f20f
--- /dev/null
+++ b/lib/librte_vhost/vhost-net-user.c
@@ -0,0 +1,541 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2014 IGEL Co/.Ltd.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/un.h>
+
+#define VHOST_USER_MAX_DEVICE		(32)
+#define VHOST_USER_MAX_FD_NUM		(3)
+
+/* start id of vhost user device */
+rte_atomic16_t vhost_user_device_id;
+
+static struct vhost_net_device_ops const *ops;
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_MAX
+} VhostUserRequest;
+
+#define VHOST_MEMORY_MAX_NREGIONS	8
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+	VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK		(0x3)
+#define VHOST_USER_REPLY_MASK		(0x1<<2)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK	(0xff)
+#define VHOST_USER_VRING_NOFD_MASK	(0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+	};
+} __attribute__((packed)) VhostUserMsg;
+
+static VhostUserMsg m __attribute__ ((unused));
+#define VHOST_USER_HDR_SIZE	(sizeof(m.request) \
+		+ sizeof(m.flags) + sizeof(m.size))
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION		(0x1)
+
+static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
+	-1,			/* VHOST_USER_NONE */
+	VHOST_GET_FEATURES,	/* VHOST_USER_GET_FEATURES */
+	VHOST_SET_FEATURES,	/* VHOST_USER_SET_FEATURES */
+	VHOST_SET_OWNER,	/* VHOST_USER_SET_OWNER */
+	VHOST_RESET_OWNER,	/* VHOST_USER_RESET_OWNER */
+	VHOST_SET_MEM_TABLE,	/* VHOST_USER_SET_MEM_TABLE */
+	VHOST_SET_LOG_BASE,	/* VHOST_USER_SET_LOG_BASE */
+	VHOST_SET_LOG_FD,	/* VHOST_USER_SET_LOG_FD */
+	VHOST_SET_VRING_NUM,	/* VHOST_USER_SET_VRING_NUM */
+	VHOST_SET_VRING_ADDR,	/* VHOST_USER_SET_VRING_ADDR */
+	VHOST_SET_VRING_BASE,	/* VHOST_USER_SET_VRING_BASE */
+	VHOST_GET_VRING_BASE,	/* VHOST_USER_GET_VRING_BASE */
+	VHOST_SET_VRING_KICK,	/* VHOST_USER_SET_VRING_KICK */
+	VHOST_SET_VRING_CALL,	/* VHOST_USER_SET_VRING_CALL */
+	VHOST_SET_VRING_ERR	/* VHOST_USER_SET_VRING_ERR */
+};
+
+/**
+ * Returns vhost_device_ctx from given fuse_req_t. The index is populated later when
+ * the device is added to the device linked list.
+ */
+static struct vhost_device_ctx
+vhost_driver_to_vhost_ctx(struct vhost_driver *drv)
+{
+	struct vhost_device_ctx ctx;
+	int device_id = drv->user_session->fh;
+
+	ctx.type = VHOST_DRV_USER;
+	ctx.fh = device_id;
+	ctx.user.drv = drv;
+
+	return ctx;
+}
+
+/**
+ * When the device is created in QEMU it gets initialised here and added to the device linked list.
+ */
+static int
+vhost_user_open(struct vhost_driver *drv)
+{
+	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
+
+	int ret;
+
+	ret = ops->new_device(ctx);
+	if (ret == -1)
+		return -1;
+
+	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device configuration started\n", ctx.fh);
+
+	return 0;
+}
+
+/**
+ * When QEMU is shutdown or killed the device gets released.
+ */
+static void
+vhost_user_release(struct vhost_driver *drv)
+{
+	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
+
+	ops->destroy_device(ctx);
+	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
+}
+
+/**
+ * Send data to vhost-user device on a QEMU.
+ */
+static int
+vhost_user_write(struct vhost_driver *drv, VhostUserMsg *msg,
+		int *fds, size_t fd_num)
+{
+	int fd, len;
+	size_t fd_size = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fd_size)];
+	struct msghdr msg_header;
+	struct iovec iov[1];
+	struct cmsghdr *cmsg_header;
+	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
+
+	if ((drv == NULL) || (msg == NULL))
+		return -EINVAL;
+
+	fd = drv->user_session->socketfd;
+
+	memset(&msg_header, 0, sizeof(msg_header));
+	memset(control, 0, sizeof(control));
+
+	/* set the payload */
+	iov[0].iov_base = (void *)msg;
+	iov[0].iov_len = VHOST_USER_HDR_SIZE + msg->size;
+
+	msg_header.msg_iov = iov;
+	msg_header.msg_iovlen = 1;
+
+	if (fd_num) {
+		msg_header.msg_control = control;
+		msg_header.msg_controllen = sizeof(control);
+		cmsg_header = CMSG_FIRSTHDR(&msg_header);
+		cmsg_header->cmsg_len = CMSG_LEN(fd_size);
+		cmsg_header->cmsg_level = SOL_SOCKET;
+		cmsg_header->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg_header), fds, fd_size);
+	} else {
+		msg_header.msg_control = 0;
+		msg_header.msg_controllen = 0;
+	}
+
+	do {
+		len = sendmsg(fd, &msg_header, 0);
+	} while (len < 0 && errno == EINTR);
+
+	if (len < 0)
+		goto error;
+
+	return 0;
+
+error:
+	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device cannot send message\n", ctx.fh);
+	return -EFAULT;
+}
+
+/**
+ * Receive data from vhost-user device on a QEMU.
+ */
+static int
+vhost_user_read(struct vhost_driver *drv, VhostUserMsg *msg,
+		int *fds, size_t *fd_num)
+{
+	int fd, len;
+	size_t fd_size = (*fd_num) * sizeof(int);
+	char control[CMSG_SPACE(fd_size)];
+	struct msghdr msg_header;
+	struct iovec iov[1];
+	struct cmsghdr *cmsg_header;
+	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
+
+	if ((drv == NULL) || (msg == NULL))
+		return -EINVAL;
+
+	fd = drv->user_session->socketfd;
+
+	memset(&msg_header, 0, sizeof(msg_header));
+	memset(control, 0, sizeof(control));
+	*fd_num = 0;
+
+	/* set the payload */
+	iov[0].iov_base = (void *)msg;
+	iov[0].iov_len = VHOST_USER_HDR_SIZE;
+
+	msg_header.msg_iov = iov;
+	msg_header.msg_iovlen = 1;
+	msg_header.msg_control = control;
+	msg_header.msg_controllen = sizeof(control);
+
+	if ((len = recvmsg(fd, &msg_header, 0)) <= 0)
+		goto error;
+
+	if (msg_header.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+		goto error;
+
+	cmsg_header = CMSG_FIRSTHDR(&msg_header);
+	if (cmsg_header && cmsg_header->cmsg_len > 0 &&
+			cmsg_header->cmsg_level == SOL_SOCKET &&
+			cmsg_header->cmsg_type == SCM_RIGHTS) {
+		if (fd_size >= cmsg_header->cmsg_len - CMSG_LEN(0)) {
+			fd_size = cmsg_header->cmsg_len - CMSG_LEN(0);
+			memcpy(fds, CMSG_DATA(cmsg_header), fd_size);
+			*fd_num = fd_size / sizeof(int);
+		}
+	}
+
+	if (read(fd, ((char *)msg) + len, msg->size) < 0)
+		goto error;
+
+	return 0;
+
+error:
+	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device cannot receive message\n", ctx.fh);
+	return -EFAULT;
+}
+
+/*
+ * Boilerplate code for vhost-user IOCTL
+ * Implicit arguments: ctx, req, result.
+ */
+#define VHOST_USER_IOCTL(func) do {	\
+	result = (func)(ctx);		\
+} while (0)
+
+/*
+ * Boilerplate code for vhost-user Read IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_USER_IOCTL_R(type, var, func) do {\
+	result = func(ctx, &(var));		\
+} while (0)
+
+/*
+ * Boilerplate code for vhost-user Write IOCTL
+ * Implicit arguments: ctx, req, result, out_bufsz.
+ */
+#define	VHOST_USER_IOCTL_W(type, var, func) do {\
+	result = (func)(ctx, &(var));		\
+	msg->flags |= VHOST_USER_REPLY_MASK;	\
+	msg->size = sizeof(type);		\
+	vhost_user_write(drv, msg, NULL, 0);	\
+} while (0)
+
+/*
+ * Boilerplate code for vhost-user Read/Write IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_USER_IOCTL_RW(type1, var1, type2, var2, func) do {\
+	result = (func)(ctx, (var1), &(var2));			\
+	msg->flags |= VHOST_USER_REPLY_MASK;			\
+	msg->size = sizeof(type2);				\
+	vhost_user_write(drv, msg, NULL, 0);			\
+} while (0)
+
+/**
+ * The IOCTLs are handled using unix domain socket in userspace.
+ */
+	static int
+vhost_user_ioctl(struct vhost_driver *drv, VhostUserMsg *msg,
+		int *fds, int fd_num)
+{
+	struct vhost_device_ctx ctx = vhost_driver_to_vhost_ctx(drv);
+	struct vhost_vring_file file;
+	int result = 0;
+
+	switch (ioctl_to_vhost_user_request[msg->request]) {
+	case VHOST_GET_FEATURES:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
+		VHOST_USER_IOCTL_W(uint64_t, msg->u64, ops->get_features);
+		break;
+
+	case VHOST_SET_FEATURES:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
+		VHOST_USER_IOCTL_R(uint64_t, msg->u64, ops->set_features);
+		break;
+
+	case VHOST_RESET_OWNER:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
+		VHOST_USER_IOCTL(ops->reset_owner);
+		break;
+
+	case VHOST_SET_OWNER:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
+		VHOST_USER_IOCTL(ops->set_owner);
+		break;
+
+	case VHOST_SET_MEM_TABLE:
+		/*TODO fix race condition.*/
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
+		/* all fds should be same, because physical memory consist of an one file */
+		ctx.user.fds = fds;
+		ctx.user.fd_num = fd_num;
+		result = ops->set_mem_table(ctx, &msg->memory, msg->memory.nregions);
+		break;
+
+	case VHOST_SET_VRING_NUM:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
+		VHOST_USER_IOCTL_R(struct vhost_vring_state, msg->state, ops->set_vring_num);
+		break;
+
+	case VHOST_SET_VRING_BASE:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
+		VHOST_USER_IOCTL_R(struct vhost_vring_state, msg->state, ops->set_vring_base);
+		break;
+
+	case VHOST_GET_VRING_BASE:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
+		VHOST_USER_IOCTL_RW(uint32_t, msg->addr.index, struct vhost_vring_state, msg->state, ops->get_vring_base);
+		break;
+
+	case VHOST_SET_VRING_ADDR:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
+		VHOST_USER_IOCTL_R(struct vhost_vring_addr, msg->addr, ops->set_vring_addr);
+		break;
+
+	case VHOST_SET_VRING_KICK:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
+		ctx.user.fds = fds;
+		ctx.user.fd_num = fd_num;
+		file.index = msg->u64;
+		VHOST_USER_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_kick);
+		break;
+
+	case VHOST_SET_VRING_CALL:
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
+		ctx.user.fds = fds;
+		ctx.user.fd_num = fd_num;
+		file.index = msg->u64;
+		VHOST_USER_IOCTL_R(struct vhost_vring_file, file, ops->set_vring_call);
+		break;
+
+	case VHOST_NET_SET_BACKEND:
+	default:
+		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
+		result = -1;
+	}
+
+	if (result < 0)
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
+	else
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
+
+	return result;
+}
+
+/**
+ * vhost-user specific registration.
+ */
+static int
+vhost_user_driver_register(struct vhost_driver *drv)
+{
+	if ((drv == NULL) || (drv->dev_name == NULL) ||
+			(strlen(drv->dev_name) > UNIX_PATH_MAX - 1))
+		return -1;
+
+	ops = get_virtio_net_callbacks(drv->type);
+
+	drv->user_session = rte_malloc(NULL, sizeof(struct vhost_user_session), CACHE_LINE_SIZE);
+	if (drv->user_session == NULL)
+		return -1;
+
+	drv->user_session->fh =
+		rte_atomic16_add_return(&vhost_user_device_id, 1) - 1; /* fh of first device is zero */
+	drv->user_session->interval = 1;
+
+	return 0;
+}
+
+/**
+ * When vhost-user driver starts, the session handler communicates with vhost-user
+ * device on a QEMU using a unix domain sokcet.
+ */
+static void *
+vhost_user_session_handler(void *data)
+{
+	struct vhost_driver *drv = data;
+	int ret;
+	struct sockaddr_un caddr;
+	VhostUserMsg msg;
+	int fds[VHOST_USER_MAX_FD_NUM];
+	int socketfd;
+	int interval;
+	size_t fd_num;
+
+	if ((drv == NULL) || (drv->dev_name == NULL))
+		return NULL;
+
+	bzero(&caddr, sizeof(caddr));
+	caddr.sun_family = AF_LOCAL;
+	strncpy((char *)&caddr.sun_path, drv->dev_name, strlen(drv->dev_name));
+
+reconnect:
+	drv->user_session->socketfd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (drv->user_session->socketfd < 0)
+		return NULL;
+
+	socketfd = drv->user_session->socketfd;
+	interval = drv->user_session->interval;
+	while (1) {
+		ret = connect(socketfd, (struct sockaddr *)&caddr, sizeof(caddr));
+		if (ret == 0)
+			break; /* success */
+		sleep(interval);
+	}
+
+	ret = vhost_user_open(drv);
+	if (ret != 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "(Socket %s) open failuer\n", drv->dev_name);
+		return NULL;
+	}
+
+	for (;;) {
+		fd_num = VHOST_USER_MAX_FD_NUM;
+		bzero(&msg, sizeof(VhostUserMsg));
+		ret = vhost_user_read(drv, &msg, fds, &fd_num);
+		if (ret != 0) {
+			RTE_LOG(ERR, VHOST_CONFIG, "(Socket %s) read failuer\n", drv->dev_name);
+			vhost_user_release(drv);
+			goto reconnect;
+		}
+
+		ret = vhost_user_ioctl(drv, &msg, fds, fd_num);
+		if (ret != 0) {
+			RTE_LOG(ERR, VHOST_CONFIG, "(Socket %s) request failuer\n", drv->dev_name);
+			vhost_user_release(drv);
+			goto reconnect;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * Create session handler
+ */
+static int
+vhost_user_driver_start(struct vhost_driver *drv)
+{
+	if (pthread_create(&drv->user_session->tid, NULL, vhost_user_session_handler, drv)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"(Socket %s) starting event handler failuer\n", drv->dev_name);
+		return -1;
+	}
+
+	/* TODO: The event handler thread may need to run on a core user speficied. */
+
+	return 0;
+}
+
+/**
+ * Destroy session handler
+ */
+static void
+vhost_user_driver_stop(struct vhost_driver *drv)
+{
+	pthread_t *tid = &drv->user_session->tid;
+
+	if (pthread_create(tid, NULL, vhost_user_session_handler, drv)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"(Socket %s) starting event handler failuer\n", drv->dev_name);
+		return;
+	}
+
+	/* stop event thread and wait until connection is closed */
+	if (*tid) {
+		pthread_cancel(*tid);
+		pthread_join(*tid, NULL);
+	}
+
+	vhost_user_release(drv);
+}
diff --git a/lib/librte_vhost/vhost-net.c b/lib/librte_vhost/vhost-net.c
index b0de5fd..10f41e9 100644
--- a/lib/librte_vhost/vhost-net.c
+++ b/lib/librte_vhost/vhost-net.c
@@ -42,6 +42,11 @@ 
  */
 #include "vhost-net-cdev.c"
 
+/*
+ * Include vhost-user depend functions and definitions
+ */
+#include "vhost-net-user.c"
+
 /**
  * This function abstracts cuse and vhost-user driver registration.
  */
@@ -65,10 +70,17 @@  rte_vhost_driver_register(const char *dev_name, vhost_driver_type_t type)
 		if (ret != 0)
 			goto err;
 		break;
+	case VHOST_DRV_USER:
+		ret = vhost_user_driver_register(drv);
+		break;
 	default:
+		ret = -EINVAL;
 		break;
 	}
 
+	if (ret != 0)
+		goto err;
+
 	return drv;
 err:
 	free(drv);
@@ -81,17 +93,40 @@  err:
 int
 rte_vhost_driver_session_start(struct vhost_driver *drv)
 {
+	int ret;
+
 	if (drv == NULL)
 		return -ENODEV;
 
 	switch (drv->type) {
 	case VHOST_DRV_CUSE:
-		vhost_cuse_driver_session_start(drv);
+		ret = vhost_cuse_driver_session_start(drv);
+		break;
+	case VHOST_DRV_USER:
+		ret = vhost_user_driver_start(drv);
 		break;
 	default:
+		ret = -EINVAL;
 		break;
 	}
 
-	return 0;
+	return ret;
 }
 
+/**
+ * The vhost session is closed, only allow for vhost-user.
+ */
+void
+rte_vhost_driver_session_stop(struct vhost_driver *drv)
+{
+	if (drv == NULL)
+		return;
+
+	switch (drv->type) {
+	case VHOST_DRV_USER:
+		vhost_user_driver_stop(drv);
+		break;
+	default:
+		break;
+	}
+}
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index ef04832..0e36ba0 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -76,6 +76,12 @@  struct vhost_device_cuse_ctx {
 	pid_t   pid;	/* PID of process calling the IOCTL. */
 };
 
+struct vhost_device_user_ctx {
+	int			*fds;
+	int			fd_num;
+	struct vhost_driver	*drv;
+};
+
 /*
  * Structure used to identify device context.
  */
@@ -83,6 +89,7 @@  struct vhost_device_ctx {
 	vhost_driver_type_t	type;	/* driver type. */
 	uint64_t		fh;	/* Populated with fi->fh to track the device index. */
 	union {
+		struct vhost_device_user_ctx user;
 		struct vhost_device_cuse_ctx cdev;
 	};
 };
diff --git a/lib/librte_vhost/virtio-net-user.c b/lib/librte_vhost/virtio-net-user.c
new file mode 100644
index 0000000..1e78f98
--- /dev/null
+++ b/lib/librte_vhost/virtio-net-user.c
@@ -0,0 +1,410 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2014 IGEL Co.,Ltd.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+/* Functions defined in virtio_net.c */
+static void init_device(struct vhost_device_ctx ctx, struct virtio_net *dev);
+static void cleanup_device(struct virtio_net *dev);
+static void free_device(struct virtio_net_config_ll *ll_dev);
+static int new_device(struct vhost_device_ctx ctx);
+static void destroy_device(struct vhost_device_ctx ctx);
+static int set_owner(struct vhost_device_ctx ctx);
+static int reset_owner(struct vhost_device_ctx ctx);
+static int get_features(struct vhost_device_ctx ctx, uint64_t *pu);
+static int set_features(struct vhost_device_ctx ctx, uint64_t *pu);
+static int set_vring_num(struct vhost_device_ctx ctx, struct vhost_vring_state *state);
+static int set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr);
+static int set_vring_base(struct vhost_device_ctx ctx, struct vhost_vring_state *state);
+static int set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file);
+
+/* Root address of the linked list in the configuration core. */
+static struct virtio_net_config_ll *user_ll_root;
+
+/**
+ * Retrieves an entry from the devices configuration linked list.
+ */
+static struct virtio_net_config_ll *
+user_get_config_ll_entry(struct vhost_device_ctx ctx)
+{
+	struct virtio_net_config_ll *ll_dev = user_ll_root;
+
+	/* Loop through linked list until the device_fh is found. */
+	while (ll_dev != NULL) {
+		if (ll_dev->dev.device_fh == ctx.fh)
+			return ll_dev;
+		ll_dev = ll_dev->next;
+	}
+
+	return NULL;
+}
+
+/**
+ * Searches the configuration core linked list and retrieves the device if it exists.
+ */
+static struct virtio_net *
+user_get_device(struct vhost_device_ctx ctx)
+{
+	struct virtio_net_config_ll *ll_dev;
+
+	ll_dev = user_get_config_ll_entry(ctx);
+
+	/* If a matching entry is found in the linked list, return the device in that entry. */
+	if (ll_dev)
+		return &ll_dev->dev;
+
+	RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Device not found in linked list.\n", ctx.fh);
+	return NULL;
+}
+
+/**
+ * Add entry containing a device to the device configuration linked list.
+ */
+static void
+user_add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
+{
+	struct virtio_net_config_ll *ll_dev = user_ll_root;
+
+	/* If ll_dev == NULL then this is the first device so go to else */
+	if (ll_dev) {
+		/* If the 1st device_fh != 0 then we insert our device here. */
+		if (ll_dev->dev.device_fh != 0)	{
+			new_ll_dev->dev.device_fh = 0;
+			new_ll_dev->next = ll_dev;
+			user_ll_root = new_ll_dev;
+		} else {
+			/* Increment through the ll until we find un unused device_fh. Insert the device at that entry*/
+			while ((ll_dev->next != NULL) && (ll_dev->dev.device_fh == (ll_dev->next->dev.device_fh - 1)))
+				ll_dev = ll_dev->next;
+
+			new_ll_dev->dev.device_fh = ll_dev->dev.device_fh + 1;
+			new_ll_dev->next = ll_dev->next;
+			ll_dev->next = new_ll_dev;
+		}
+	} else {
+		user_ll_root = new_ll_dev;
+		user_ll_root->dev.device_fh = 0;
+	}
+
+}
+
+/**
+ * Remove an entry from the device configuration linked list.
+ */
+static struct virtio_net_config_ll *
+user_rm_config_ll_entry(struct virtio_net_config_ll *ll_dev, struct virtio_net_config_ll *ll_dev_last)
+{
+	/* First remove the device and then clean it up. */
+	if (ll_dev == user_ll_root) {
+		user_ll_root = ll_dev->next;
+		cleanup_device(&ll_dev->dev);
+		free_device(ll_dev);
+		return user_ll_root;
+	} else {
+		if (likely(ll_dev_last != NULL)) {
+			ll_dev_last->next = ll_dev->next;
+			cleanup_device(&ll_dev->dev);
+			free_device(ll_dev);
+			return ll_dev_last->next;
+		} else {
+			cleanup_device(&ll_dev->dev);
+			free_device(ll_dev);
+			RTE_LOG(ERR, VHOST_CONFIG, "Remove entry from config_ll failed\n");
+			return NULL;
+		}
+	}
+}
+
+/**
+ * Returns the root entry of linked list
+ */
+static struct virtio_net_config_ll *
+user_get_config_ll_root(void)
+{
+	return user_ll_root;
+}
+
+/**
+ * vhost-user specific device initialization.
+ */
+static void
+user_init_device(struct vhost_device_ctx ctx, struct virtio_net *dev)
+{
+	dev->priv = ctx.user.drv->priv;
+}
+
+/**
+ * Locate the file containing QEMU's memory space and map it to our address space.
+ */
+static int
+user_host_memory_map(struct virtio_net *dev, struct virtio_memory *mem, int fd, size_t size)
+{
+	void *map;
+
+	map = mmap(0, size, PROT_READ|PROT_WRITE , MAP_POPULATE|MAP_SHARED, fd, 0);
+	close(fd);
+
+	if (map == MAP_FAILED) {
+		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Error mapping the file fd %d\n",  dev->device_fh, fd);
+		return -1;
+	}
+
+	/* Store the memory address and size in the device data structure */
+	mem->mapped_address = (uint64_t)(uintptr_t)map;
+	mem->mapped_size = size;
+
+	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Mem File: fd: %d - Size: %llu - VA: %p\n", dev->device_fh,
+			fd, (long long unsigned)mem->mapped_size, map);
+
+	return 0;
+}
+
+/*
+ * Called from IOCTL: VHOST_SET_MEM_TABLE
+ * This function creates and populates the memory structure for the device. This includes
+ * storing offsets used to translate buffer addresses.
+ */
+static int
+user_set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr, uint32_t nregions)
+{
+	struct virtio_net *dev;
+	struct vhost_memory_region *mem_regions;
+	struct virtio_memory *mem;
+	uint64_t size = offsetof(struct vhost_memory, regions);
+	uint32_t regionidx, valid_regions;
+	size_t guest_memory_size = 0;
+
+	dev = user_get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	if (dev->mem) {
+		munmap((void *)(uintptr_t)dev->mem->mapped_address, (size_t)dev->mem->mapped_size);
+		free(dev->mem);
+	}
+
+	/* Malloc the memory structure depending on the number of regions. */
+	mem = calloc(1, sizeof(struct virtio_memory) + (sizeof(struct virtio_memory_regions) * nregions));
+	if (mem == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to allocate memory for dev->mem.\n", dev->device_fh);
+		return -1;
+	}
+
+	mem->nregions = nregions;
+
+	mem_regions = (void*)(uintptr_t)((uint64_t)(uintptr_t)mem_regions_addr + size);
+
+	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
+		/* Populate the region structure for each region. */
+		mem->regions[regionidx].guest_phys_address = mem_regions[regionidx].guest_phys_addr;
+		mem->regions[regionidx].guest_phys_address_end = mem->regions[regionidx].guest_phys_address +
+			mem_regions[regionidx].memory_size;
+		mem->regions[regionidx].memory_size = mem_regions[regionidx].memory_size;
+		mem->regions[regionidx].userspace_address = mem_regions[regionidx].userspace_addr;
+		guest_memory_size += mem_regions[regionidx].memory_size;
+
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
+				regionidx, (void*)(uintptr_t)mem->regions[regionidx].guest_phys_address,
+				(void*)(uintptr_t)mem->regions[regionidx].userspace_address,
+				mem->regions[regionidx].memory_size);
+	}
+
+	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
+		/*set the base address mapping*/
+		if (mem->regions[regionidx].guest_phys_address == 0x0) {
+			mem->base_address = mem->regions[regionidx].userspace_address;
+			/* Map VM memory file */
+			if (user_host_memory_map(dev, mem, ctx.user.fds[regionidx], guest_memory_size) != 0) {
+				free(mem);
+				return -1;
+			}
+		} else
+			close(ctx.user.fds[regionidx]);
+	}
+
+	/* Check that we have a valid base address. */
+	if (mem->base_address == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base address of qemu memory file.\n", dev->device_fh);
+		free(mem);
+		return -1;
+	}
+
+	/* Check if all of our regions have valid mappings. Usually one does not exist in the QEMU memory file. */
+	valid_regions = mem->nregions;
+	for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
+		if ((mem->regions[regionidx].userspace_address < mem->base_address) ||
+				(mem->regions[regionidx].userspace_address > (mem->base_address + mem->mapped_size)))
+			valid_regions--;
+	}
+
+	/* If a region does not have a valid mapping we rebuild our memory struct to contain only valid entries. */
+	if (valid_regions != mem->nregions) {
+		LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\n",
+				dev->device_fh);
+
+		/* Re-populate the memory structure with only valid regions. Invalid regions are over-written with memmove. */
+		valid_regions = 0;
+
+		for (regionidx = mem->nregions; 0 != regionidx--;) {
+			if ((mem->regions[regionidx].userspace_address < mem->base_address) ||
+					(mem->regions[regionidx].userspace_address > (mem->base_address + mem->mapped_size))) {
+				memmove(&mem->regions[regionidx], &mem->regions[regionidx + 1],
+						sizeof(struct virtio_memory_regions) * valid_regions);
+			} else {
+				valid_regions++;
+			}
+		}
+	}
+	mem->nregions = valid_regions;
+	dev->mem = mem;
+
+	/*
+	 * Calculate the address offset for each region. This offset is used to identify the vhost virtual address
+	 * corresponding to a QEMU guest physical address.
+	 */
+	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++)
+		dev->mem->regions[regionidx].address_offset = dev->mem->regions[regionidx].userspace_address - dev->mem->base_address
+			+ dev->mem->mapped_address - dev->mem->regions[regionidx].guest_phys_address;
+
+	return 0;
+}
+
+/**
+ * Called from IOCTL: VHOST_GET_VRING_BASE
+ * We send the virtio device our available ring last used index.
+ */
+static int
+user_get_vring_base(struct vhost_device_ctx ctx, uint32_t index, struct vhost_vring_state *state)
+{
+	struct virtio_net *dev;
+
+	dev = user_get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	state->index = index;
+	/* State->index refers to the queue index. The TX queue is 1, RX queue is 0. */
+	state->num = dev->virtqueue[state->index]->last_used_idx;
+
+	return 0;
+}
+
+/**
+ * Called from IOCTL: VHOST_SET_VRING_CALL
+ * The virtio device sends an eventfd to interrupt the guest. This fd gets copied in
+ * to our process space.
+ * Also this message is sent when virtio-net device is reset by device driver on QEMU.
+ */
+static int
+user_set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = user_get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	/* file->index refers to the queue index. The TX queue is 1, RX queue is 0. */
+	vq = dev->virtqueue[file->index];
+
+	if (vq->kickfd)
+		close((int)vq->kickfd);
+
+	/* Populate the eventfd_copy structure and call eventfd_copy. */
+	vq->kickfd = ctx.user.fds[0];
+
+	return 0;
+}
+
+/**
+ * Called from IOCTL: VHOST_SET_VRING_KICK
+ * The virtio device sends an eventfd that it can use to notify us. This fd gets copied in
+ * to our process space.
+ */
+static int
+user_set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = user_get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	/* file->index refers to the queue index. The TX queue is 1, RX queue is 0. */
+	vq = dev->virtqueue[file->index];
+
+	if (vq->callfd)
+		close((int)vq->callfd);
+
+	/* Populate the eventfd_copy structure and call eventfd_copy. */
+	vq->callfd = ctx.user.fds[0];
+
+	if ((dev->virtqueue[VIRTIO_RXQ] != NULL) && (dev->virtqueue[VIRTIO_TXQ]) != NULL)
+		return set_backend(ctx, file);
+
+	return 0;
+}
+
+/**
+ * Function pointers are set for the device operations to allow to call functions
+ * when an IOCTL, device_add or device_release is received.
+ */
+static const struct vhost_net_device_ops vhost_user_device_ops = {
+	.new_device = new_device,
+	.destroy_device = destroy_device,
+
+	.get_features = get_features,
+	.set_features = set_features,
+
+	.set_mem_table = user_set_mem_table,
+
+	.set_vring_num = set_vring_num,
+	.set_vring_addr = set_vring_addr,
+	.set_vring_base = set_vring_base,
+	.get_vring_base = user_get_vring_base,
+
+	.set_vring_kick = user_set_vring_kick,
+	.set_vring_call = user_set_vring_call,
+
+	.set_backend = set_backend,
+
+	.set_owner = set_owner,
+	.reset_owner = reset_owner,
+};
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 13fbb6f..db810e7 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -96,6 +96,12 @@  qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
  */
 #include "virtio-net-cdev.c"
 
+/**
+ * Include vhost-user depend functions and definitions.
+ */
+#include "virtio-net-user.c"
+
+
 /*
  * Retrieves an entry from the devices configuration linked list.
  */
@@ -105,6 +111,8 @@  get_config_ll_entry(struct vhost_device_ctx ctx)
 	switch (ctx.type) {
 	case VHOST_DRV_CUSE:
 		return cdev_get_config_ll_entry(ctx);
+	case VHOST_DRV_USER:
+		return user_get_config_ll_entry(ctx);
 	default:
 		break;
 	}
@@ -120,6 +128,8 @@  get_device(struct vhost_device_ctx ctx)
 	switch (ctx.type) {
 	case VHOST_DRV_CUSE:
 		return cdev_get_device(ctx);
+	case VHOST_DRV_USER:
+		return user_get_device(ctx);
 	default:
 		break;
 	}
@@ -136,6 +146,8 @@  add_config_ll_entry(vhost_driver_type_t type,
 	switch (type) {
 	case VHOST_DRV_CUSE:
 		return cdev_add_config_ll_entry(new_ll_dev);
+	case VHOST_DRV_USER:
+		return user_add_config_ll_entry(new_ll_dev);
 	default:
 		break;
 	}
@@ -149,8 +161,39 @@  cleanup_device(struct virtio_net *dev)
 {
 	/* Unmap QEMU memory file if mapped. */
 	if (dev->mem) {
-		munmap((void *)(uintptr_t)dev->mem->mapped_address,
-			(size_t)dev->mem->mapped_size);
+		{
+			/*
+			 * 'munmap()' will be failed when mapped_size isn't
+			 * aligned with hugepage size.
+			 * Usually a file size of QEMU physical memory is
+			 * aligned by hugepage size. So In a case of CUSE,
+			 * there is no problem. But with vhost-user, there is
+			 * no way to get physical memory size.
+			 *
+			 * Let's assume hugepage size is 2MB or 1GB here.
+			 * BTW, 'mmap()' automatically fixed size parameter
+			 * to be aligned. Why does 'munmap()' do like so?
+			 */
+			int ret = 0;
+			size_t hugepagesize, size = dev->mem->mapped_size;
+
+			/* assume hugepage size is 2MB */
+			hugepagesize = 2 * 1024 * 1024;
+			size = (size + hugepagesize - 1) /
+						hugepagesize * hugepagesize;
+			ret = munmap((void *)(uintptr_t)
+						dev->mem->mapped_address,
+						size);
+			if (ret) {
+				/* assume hugepage size is 1GB, try again */
+				hugepagesize = 1024 * 1024 * 1024;
+				size = (size + hugepagesize - 1) /
+						hugepagesize * hugepagesize;
+				munmap((void *)(uintptr_t)
+						dev->mem->mapped_address,
+						size);
+			}
+		}
 		free(dev->mem);
 	}
 
@@ -187,6 +230,8 @@  rm_config_ll_entry(vhost_driver_type_t type,
 	switch (type) {
 	case VHOST_DRV_CUSE:
 		return cdev_rm_config_ll_entry(ll_dev, ll_dev_last);
+	case VHOST_DRV_USER:
+		return user_rm_config_ll_entry(ll_dev, ll_dev_last);
 	default:
 		break;
 	}
@@ -201,7 +246,9 @@  get_config_ll_root(struct vhost_device_ctx ctx)
 {
 	switch (ctx.type) {
 	case VHOST_DRV_CUSE:
-		return cdev_get_config_ll_root(ctx);
+		return cdev_get_config_ll_root();
+	case VHOST_DRV_USER:
+		return user_get_config_ll_root();
 	default:
 		break;
 	}
@@ -232,6 +279,8 @@  init_device(struct vhost_device_ctx ctx, struct virtio_net *dev)
 	switch (ctx.type) {
 	case VHOST_DRV_CUSE:
 		return cdev_init_device(ctx, dev);
+	case VHOST_DRV_USER:
+		return user_init_device(ctx, dev);
 	default:
 		break;
 	}
@@ -527,6 +576,8 @@  get_virtio_net_callbacks(vhost_driver_type_t type)
 	switch (type) {
 	case VHOST_DRV_CUSE:
 		return &vhost_cuse_device_ops;
+	case VHOST_DRV_USER:
+		return &vhost_user_device_ops;
 	default:
 		break;
 	}
@@ -570,9 +621,14 @@  int rte_vhost_feature_enable(uint64_t feature_mask)
  * Register ops so that we can add/remove device to data core.
  */
 int
-rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops)
+rte_vhost_driver_callback_register(struct vhost_driver *drv,
+		struct virtio_net_device_ops const * const ops, void *priv)
 {
+	if (drv == NULL || ops == NULL)
+		return -1;
+
 	notify_ops = ops;
+	drv->priv = priv;
 
 	return 0;
 }