diff mbox

[dpdk-dev,RFC,v2,10/14] vhost user support

Message ID 1422242440-28948-11-git-send-email-huawei.xie@intel.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Huawei Xie Jan. 26, 2015, 3:20 a.m. UTC
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
---
 lib/librte_vhost/Makefile                     |   5 +-
 lib/librte_vhost/vhost-net.h                  |   4 +
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c |  12 +-
 lib/librte_vhost/vhost_user/fd_man.c          |   4 +-
 lib/librte_vhost/vhost_user/vhost-net-user.c  | 428 ++++++++++++++++++++++++++
 lib/librte_vhost/vhost_user/vhost-net-user.h  | 108 +++++++
 lib/librte_vhost/vhost_user/virtio-net-user.c | 205 ++++++++++++
 lib/librte_vhost/vhost_user/virtio-net-user.h |  48 +++
 lib/librte_vhost/virtio-net.c                 |  26 +-
 lib/librte_vhost/virtio-net.h                 |  43 +++
 10 files changed, 865 insertions(+), 18 deletions(-)
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
 create mode 100644 lib/librte_vhost/virtio-net.h
diff mbox

Patch

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index e0d0ef6..b2f14a0 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -34,10 +34,11 @@  include $(RTE_SDK)/mk/rte.vars.mk
 # library name
 LIB = librte_vhost.a
 
-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
 LDFLAGS += -lfuse
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
+#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 11737cc..3f18f25 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -41,8 +41,12 @@ 
 
 #include <rte_log.h>
 
+#include "rte_virtio_net.h"
+
 #define VHOST_MEMORY_MAX_NREGIONS 8
 
+extern struct vhost_net_device_ops const *ops;
+
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
 #define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index edcbc10..1d2c403 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -50,8 +50,7 @@ 
 #include "rte_virtio_net.h"
 #include "vhost-net.h"
 #include "virtio-net-cdev.h"
-
-extern struct vhost_net_device_ops const *ops;
+#include "virtio-net.h"
 
 /* Line size for reading maps file. */
 static const uint32_t BUFSIZE = PATH_MAX;
@@ -268,6 +267,7 @@  cuse_set_mem_table(struct vhost_device_ctx ctx,
 	struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
 		((uint64_t)(uintptr_t)mem_regions_addr + size);
 	uint64_t base_address = 0, mapped_address, mapped_size;
+	struct virtio_net *dev;
 
 	for (idx = 0; idx < nregions; idx++) {
 		regions[idx].guest_phys_address =
@@ -335,6 +335,14 @@  cuse_set_mem_table(struct vhost_device_ctx ctx,
 			regions[idx].guest_phys_address;
 	}
 
+	dev = get_device(ctx);
+	if (dev && dev->mem && dev->mem->mapped_address) {
+		munmap((void *)(uintptr_t)dev->mem->mapped_address,
+			(size_t)dev->mem->mapped_size);
+		free(dev->mem);
+		dev->mem = NULL;
+	}
+
 	ops->set_mem_table(ctx, &regions[0], valid_regions);
 	return 0;
 }
diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
index 09187e0..0d2beb9 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -72,7 +72,7 @@  fdset_find_free_slot(struct fdset *pfdset)
 
 static void
 fdset_add_fd(struct fdset  *pfdset, int idx, int fd,
-	fd_cb rcb, fd_cb wcb, uint64_t dat)
+	fd_cb rcb, fd_cb wcb, void *dat)
 {
 	struct fdentry *pfdentry;
 
@@ -138,7 +138,7 @@  fdset_init(struct fdset *pfdset)
  * Register the fd in the fdset with read/write handler and context.
  */
 int
-fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
 {
 	int i;
 
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
new file mode 100644
index 0000000..c84fd3b
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -0,0 +1,428 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <errno.h>
+
+#include <rte_log.h>
+#include <rte_virtio_net.h>
+
+#include "fd_man.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+#include "virtio-net-user.h"
+
+static void vserver_new_vq_conn(int fd, void *data);
+static void vserver_message_handler(int fd, void *dat);
+struct vhost_net_device_ops const *ops;
+
+static struct vhost_server *g_vhost_server;
+
+static const char *vhost_message_str[VHOST_USER_MAX] = {
+	[VHOST_USER_NONE] = "VHOST_USER_NONE",
+	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR"
+};
+
+/**
+ * Create a unix domain socket, bind to path and listen for connection.
+ * @return
+ *  socket fd or -1 on failure
+ */
+static int
+uds_socket(const char *path)
+{
+	struct sockaddr_un un;
+	int sockfd;
+	int ret;
+
+	if (path == NULL)
+		return -1;
+
+	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (sockfd < 0)
+		return -1;
+	RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
+
+	memset(&un, 0, sizeof(un));
+	un.sun_family = AF_UNIX;
+	snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+	ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
+	if (ret == -1)
+		goto err;
+	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
+
+	ret = listen(sockfd, 1);
+	if (ret == -1)
+		goto err;
+
+	return sockfd;
+
+err:
+	close(sockfd);
+	return -1;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+	struct iovec iov;
+	struct msghdr msgh = { 0 };
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	iov.iov_base = buf;
+	iov.iov_len  = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	ret = recvmsg(sockfd, &msgh, 0);
+	if (ret <= 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
+		return ret;
+	}
+
+	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+		RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
+		return -1;
+	}
+
+	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+		if ((cmsg->cmsg_level == SOL_SOCKET) &&
+			(cmsg->cmsg_type == SCM_RIGHTS)) {
+			memcpy(fds, CMSG_DATA(cmsg), fdsize);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+	if (ret <= 0)
+		return ret;
+
+	if (msg && msg->size) {
+		if (msg->size > sizeof(msg->payload)) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"invalid msg size: %d\n", msg->size);
+			return -1;
+		}
+		ret = read(sockfd, &msg->payload, msg->size);
+		if (ret <= 0)
+			return ret;
+		if (ret != (int)msg->size) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"read control message failed\n");
+			return -1;
+		}
+	}
+
+	return ret;
+}
+
+static int
+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+	struct iovec iov;
+	struct msghdr msgh = { 0 };
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	iov.iov_base = buf;
+	iov.iov_len = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (fds && fd_num > 0) {
+		msgh.msg_control = control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), fds, fdsize);
+	} else {
+		msgh.msg_control = NULL;
+		msgh.msg_controllen = 0;
+	}
+
+	do {
+		ret = sendmsg(sockfd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	if (!msg)
+		return 0;
+
+	msg->flags &= ~VHOST_USER_VERSION_MASK;
+	msg->flags |= VHOST_USER_VERSION;
+	msg->flags |= VHOST_USER_REPLY_MASK;
+
+	ret = send_fd_message(sockfd, (char *)msg,
+		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+	return ret;
+}
+
+/* call back when there is new virtio connection.  */
+static void
+vserver_new_vq_conn(int fd, void *dat)
+{
+	struct vhost_server *vserver = (struct vhost_server *)dat;
+	int conn_fd;
+	int fh;
+	struct vhost_device_ctx vdev_ctx = { 0 };
+
+	conn_fd = accept(fd, NULL, NULL);
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"new virtio connection is %d\n", conn_fd);
+	if (conn_fd < 0)
+		return;
+
+	fh = ops->new_device(vdev_ctx);
+	if (fh == -1) {
+		close(conn_fd);
+		return;
+	}
+	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
+
+	fdset_add(&vserver->fdset,
+		conn_fd, vserver_message_handler, NULL, (void *)fh);
+}
+
+/* callback when there is message on the connfd */
+static void
+vserver_message_handler(int connfd, void *dat)
+{
+	struct vhost_device_ctx ctx;
+	uint32_t fh = (uint32_t)dat;
+	struct VhostUserMsg msg;
+	uint64_t features;
+	int ret;
+
+	ctx.fh = fh;
+	ret = read_vhost_message(connfd, &msg);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"vhost read message failed\n");
+
+		close(connfd);
+		fdset_del(&g_vhost_server->fdset, connfd);
+		ops->destroy_device(ctx);
+
+		return;
+	} else if (ret == 0) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"vhost peer closed\n");
+
+		close(connfd);
+		fdset_del(&g_vhost_server->fdset, connfd);
+		ops->destroy_device(ctx);
+
+		return;
+	}
+	if (msg.request > VHOST_USER_MAX) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"vhost read incorrect message\n");
+
+		close(connfd);
+		fdset_del(&g_vhost_server->fdset, connfd);
+
+		return;
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+		vhost_message_str[msg.request]);
+	switch (msg.request) {
+	case VHOST_USER_GET_FEATURES:
+		ret = ops->get_features(ctx, &features);
+		msg.payload.u64 = features;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(connfd, &msg);
+		break;
+	case VHOST_USER_SET_FEATURES:
+		features = msg.payload.u64;
+		ops->set_features(ctx, &features);
+		break;
+
+	case VHOST_USER_SET_OWNER:
+		ops->set_owner(ctx);
+		break;
+	case VHOST_USER_RESET_OWNER:
+		ops->reset_owner(ctx);
+		break;
+
+	case VHOST_USER_SET_MEM_TABLE:
+		user_set_mem_table(ctx, &msg);
+		break;
+
+	case VHOST_USER_SET_LOG_BASE:
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+	case VHOST_USER_SET_LOG_FD:
+		close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+		break;
+
+	case VHOST_USER_SET_VRING_NUM:
+		ops->set_vring_num(ctx, &msg.payload.state);
+		break;
+	case VHOST_USER_SET_VRING_ADDR:
+		ops->set_vring_addr(ctx, &msg.payload.addr);
+		break;
+	case VHOST_USER_SET_VRING_BASE:
+		ops->set_vring_base(ctx, &msg.payload.state);
+		break;
+
+	case VHOST_USER_GET_VRING_BASE:
+		ret = user_get_vring_base(ctx, &msg.payload.state);
+		msg.size = sizeof(msg.payload.state);
+		send_vhost_message(connfd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_KICK:
+		user_set_vring_kick(ctx, &msg);
+		break;
+	case VHOST_USER_SET_VRING_CALL:
+		user_set_vring_call(ctx, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ERR:
+		if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+			close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+		break;
+
+	default:
+		break;
+
+	}
+}
+
+
+/**
+ * Creates and initialise the vhost server.
+ */
+int
+rte_vhost_driver_register(const char *path)
+{
+
+	struct vhost_server *vserver;
+
+	if (g_vhost_server != NULL)
+		return -1;
+
+	vserver = calloc(sizeof(struct vhost_server), 1);
+	if (vserver == NULL)
+		return -1;
+
+	fdset_init(&vserver->fdset);
+
+	unlink(path);
+
+	vserver->listenfd = uds_socket(path);
+	if (vserver->listenfd < 0) {
+		free(vserver);
+		return -1;
+	}
+	vserver->path = path;
+
+	fdset_add(&vserver->fdset, vserver->listenfd,
+		vserver_new_vq_conn, NULL,
+		vserver);
+
+	ops = get_virtio_net_callbacks();
+
+	g_vhost_server = vserver;
+
+	return 0;
+}
+
+
+int
+rte_vhost_driver_session_start(void)
+{
+	fdset_event_dispatch(&g_vhost_server->fdset);
+	return 0;
+}
+
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
new file mode 100644
index 0000000..7e6cda4
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -0,0 +1,108 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "fd_man.h"
+
+struct vhost_server {
+	const char *path; /**< The path the uds is bind to. */
+	int listenfd;     /**< The listener sockfd. */
+	struct fdset fdset; /**< The fd list this vhost server manages. */
+};
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS    8
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+	VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   (0xff)
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    (0x1)
+
+/*****************************************************************************/
+#endif
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
new file mode 100644
index 0000000..6601fcd
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -0,0 +1,205 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <rte_log.h>
+
+#include "virtio-net.h"
+#include "virtio-net-user.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+
+int
+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	unsigned int idx;
+	struct VhostUserMemory memory = pmsg->payload.memory;
+	struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
+	uint64_t mapped_address, base_address = 0;
+
+	for (idx = 0; idx < memory.nregions; idx++) {
+		if (memory.regions[idx].guest_phys_addr == 0)
+			base_address = memory.regions[idx].userspace_addr;
+	}
+	if (base_address == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"couldn't find the mem region whose GPA is 0.\n");
+		return -1;
+	}
+
+	for (idx = 0; idx < memory.nregions; idx++) {
+		regions[idx].guest_phys_address =
+			memory.regions[idx].guest_phys_addr;
+		regions[idx].guest_phys_address_end =
+			memory.regions[idx].guest_phys_addr +
+			memory.regions[idx].memory_size;
+		regions[idx].memory_size = memory.regions[idx].memory_size;
+		regions[idx].userspace_address =
+			memory.regions[idx].userspace_addr;
+
+		/* This is ugly */
+		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
+			regions[idx].memory_size +
+				memory.regions[idx].mmap_offset,
+			PROT_READ | PROT_WRITE, MAP_SHARED,
+			pmsg->fds[idx],
+			0);
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"mapped region %d to %p\n",
+			idx, (void *)mapped_address);
+
+		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"mmap qemu guest failed.\n");
+			return -1;
+		}
+
+		mapped_address +=  memory.regions[idx].mmap_offset;
+
+		regions[idx].address_offset = mapped_address -
+			regions[idx].guest_phys_address;
+		LOG_DEBUG(VHOST_CONFIG,
+			"REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
+			idx,
+			(void *)(uintptr_t)regions[idx].guest_phys_address,
+			(void *)(uintptr_t)regions[idx].userspace_address,
+			 regions[idx].memory_size);
+	}
+	ops->set_mem_table(ctx, regions, memory.nregions);
+	return 0;
+}
+
+
+static int
+virtio_is_ready(struct virtio_net *dev)
+{
+	struct vhost_virtqueue *rvq, *tvq;
+
+	/* mq support in future.*/
+	rvq = dev->virtqueue[VIRTIO_RXQ];
+	tvq = dev->virtqueue[VIRTIO_TXQ];
+	if (rvq && tvq && rvq->desc && tvq->desc &&
+		(rvq->kickfd != (eventfd_t)-1) &&
+		(rvq->callfd != (eventfd_t)-1) &&
+		(tvq->kickfd != (eventfd_t)-1) &&
+		(tvq->callfd != (eventfd_t)-1)) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"virtio is now ready for processing.\n");
+		return 1;
+	}
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"virtio isn't ready for processing.\n");
+	return 0;
+}
+
+void
+user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = -1;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring call idx:%d file:%d\n", file.index, file.fd);
+	ops->set_vring_call(ctx, &file);
+}
+
+
+/*
+ *  In vhost-user, when we receive kick message, will test whether virtio
+ *  device is ready for packet processing.
+ */
+void
+user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+	struct virtio_net *dev = get_device(ctx);
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = -1;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring kick idx:%d file:%d\n", file.index, file.fd);
+	ops->set_vring_kick(ctx, &file);
+
+	if (virtio_is_ready(dev) &&
+		!(dev->flags & VIRTIO_DEV_RUNNING))
+			notify_ops->new_device(dev);
+
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+int
+user_get_vring_base(struct vhost_device_ctx ctx,
+	struct vhost_vring_state *state)
+{
+	struct virtio_net *dev = get_device(ctx);
+
+	/* We have to stop the queue (virtio) if it is running. */
+	if (dev->flags & VIRTIO_DEV_RUNNING)
+		notify_ops->destroy_device(dev);
+
+	/* Here we are safe to get the last used index */
+	ops->get_vring_base(ctx, state->index, state);
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring base idx:%d file:%d\n", state->index, state->num);
+	/*
+	 * Based on current qemu vhost-user implementation, this message is
+	 * sent and only sent in vhost_vring_stop.
+	 * TODO: cleanup the vring, it isn't usable since here.
+	 */
+	if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) {
+		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
+		dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
+	}
+	if (((int)dev->virtqueue[VIRTIO_TXQ]->callfd) >= 0) {
+		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
+		dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
+	}
+
+	return 0;
+
+}
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
new file mode 100644
index 0000000..0f6a75a
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -0,0 +1,48 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_USER_H
+#define _VIRTIO_NET_USER_H
+
+#include "vhost-net.h"
+#include "vhost-net-user.h"
+
+int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
+
+int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
+
+#endif
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 57a5801..c458ed9 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -50,6 +50,7 @@ 
 #include <rte_virtio_net.h>
 
 #include "vhost-net.h"
+#include "virtio-net.h"
 
 /*
  * Device linked list structure for configuration.
@@ -60,7 +61,7 @@  struct virtio_net_config_ll {
 };
 
 /* device ops to add/remove device to/from data core. */
-static struct virtio_net_device_ops const *notify_ops;
+struct virtio_net_device_ops const *notify_ops;
 /* root address of the linked list of managed virtio devices */
 static struct virtio_net_config_ll *ll_root;
 
@@ -88,8 +89,9 @@  qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
 		if ((qemu_va >= region->userspace_address) &&
 			(qemu_va <= region->userspace_address +
 			region->memory_size)) {
-			vhost_va = dev->mem->mapped_address + qemu_va -
-					dev->mem->base_address;
+			vhost_va = qemu_va + region->guest_phys_address +
+				region->address_offset -
+				region->userspace_address;
 			break;
 		}
 	}
@@ -119,7 +121,7 @@  get_config_ll_entry(struct vhost_device_ctx ctx)
  * Searches the configuration core linked list and
  * retrieves the device if it exists.
  */
-static struct virtio_net *
+struct virtio_net *
 get_device(struct vhost_device_ctx ctx)
 {
 	struct virtio_net_config_ll *ll_dev;
@@ -256,6 +258,11 @@  init_device(struct virtio_net *dev)
 	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
 	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
 
+	dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
+	dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
+	dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
+	dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
+
 	/* Backends are set to -1 indicating an inactive device. */
 	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
 	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
@@ -455,12 +462,6 @@  set_mem_table(struct vhost_device_ctx ctx,
 	if (dev == NULL)
 		return -1;
 
-	if (dev->mem) {
-		munmap((void *)(uintptr_t)dev->mem->mapped_address,
-			(size_t)dev->mem->mapped_size);
-		free(dev->mem);
-	}
-
 	/* Malloc the memory structure depending on the number of regions. */
 	mem = calloc(1, sizeof(struct virtio_memory) +
 		(sizeof(struct virtio_memory_regions) * nregions));
@@ -624,7 +625,7 @@  set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
 	vq = dev->virtqueue[file->index];
 
-	if (vq->kickfd)
+	if ((int)vq->kickfd >= 0)
 		close((int)vq->kickfd);
 
 	vq->kickfd = file->fd;
@@ -650,8 +651,9 @@  set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
 	vq = dev->virtqueue[file->index];
 
-	if (vq->callfd)
+	if ((int)vq->callfd >= 0)
 		close((int)vq->callfd);
+
 	vq->callfd = file->fd;
 
 	return 0;
diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
new file mode 100644
index 0000000..75fb57e
--- /dev/null
+++ b/lib/librte_vhost/virtio-net.h
@@ -0,0 +1,43 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_H
+#define _VIRTIO_NET_H
+
+#include "vhost-net.h"
+#include "rte_virtio_net.h"
+
+struct virtio_net_device_ops const *notify_ops;
+struct virtio_net *get_device(struct vhost_device_ctx ctx);
+
+#endif