[RFC] net/virtio-user: avoid parsing process mappings

Message ID 20180828075327.18842-1-tiwei.bie@intel.com (mailing list archive)
State RFC, archived
Delegated to: Maxime Coquelin
Headers
Series [RFC] net/virtio-user: avoid parsing process mappings |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues

Commit Message

Tiwei Bie Aug. 28, 2018, 7:53 a.m. UTC
Recently some memory APIs were introduced to allow users
to get the file descriptors for each memory segments. We
can leverage those APIs to get rid of the /proc magic on
memory table preparation in vhost-user backend.

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
This patch depends on below patch set:
https://patches.dpdk.org/project/dpdk/list/?series=1040

 drivers/net/virtio/virtio_user/vhost_user.c   | 214 ++++++++----------
 .../net/virtio/virtio_user/virtio_user_dev.c  |  19 ++
 2 files changed, 112 insertions(+), 121 deletions(-)
  

Comments

Burakov, Anatoly Aug. 28, 2018, 8:12 a.m. UTC | #1
On 28-Aug-18 8:53 AM, Tiwei Bie wrote:
> Recently some memory APIs were introduced to allow users
> to get the file descriptors for each memory segments. We
> can leverage those APIs to get rid of the /proc magic on
> memory table preparation in vhost-user backend.
> 
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> ---
> This patch depends on below patch set:
> https://patches.dpdk.org/project/dpdk/list/?series=1040

Great to see this done so quickly!

> 
>   drivers/net/virtio/virtio_user/vhost_user.c   | 214 ++++++++----------
>   .../net/virtio/virtio_user/virtio_user_dev.c  |  19 ++
>   2 files changed, 112 insertions(+), 121 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
> index ef6e43df8..770cfdc9a 100644
> --- a/drivers/net/virtio/virtio_user/vhost_user.c
> +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> @@ -11,6 +11,9 @@
>   #include <string.h>
>   #include <errno.h>
>   
> +#include <rte_fbarray.h>
> +#include <rte_eal_memconfig.h>
> +
>   #include "vhost.h"
>   #include "virtio_user_dev.h"
>   
> @@ -121,133 +124,106 @@ vhost_user_read(int fd, struct vhost_user_msg *msg)
>   	return -1;
>   }
>   
> -struct hugepage_file_info {
> -	uint64_t addr;            /**< virtual addr */
> -	size_t   size;            /**< the file size */
> -	char     path[PATH_MAX];  /**< path to backing file */
> +struct walk_arg {
> +	struct vhost_memory *vm;
> +	int *fds;
> +	int region_nr;
>   };
>   
> -/* Two possible options:
> - * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
> - * array. This is simple but cannot be used in secondary process because
> - * secondary process will close and munmap that file.
> - * 2. Match HUGEFILE_FMT to find hugepage files directly.
> - *
> - * We choose option 2.
> - */
>   static int
> -get_hugepage_file_info(struct hugepage_file_info huges[], int max)
> +update_memory_region(const struct rte_memseg_list *msl __rte_unused,
> +		const struct rte_memseg *ms, void *arg)
>   {
> -	int idx, k, exist;
> -	FILE *f;
> -	char buf[BUFSIZ], *tmp, *tail;
> -	char *str_underline, *str_start;
> -	int huge_index;
> -	uint64_t v_start, v_end;
> -	struct stat stats;
> -
> -	f = fopen("/proc/self/maps", "r");
> -	if (!f) {
> -		PMD_DRV_LOG(ERR, "cannot open /proc/self/maps");
> -		return -1;
> -	}
> -
> -	idx = 0;
> -	while (fgets(buf, sizeof(buf), f) != NULL) {
> -		if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
> -			PMD_DRV_LOG(ERR, "Failed to parse address");
> -			goto error;
> -		}
> -
> -		tmp = strchr(buf, ' ') + 1; /** skip address */
> -		tmp = strchr(tmp, ' ') + 1; /** skip perm */
> -		tmp = strchr(tmp, ' ') + 1; /** skip offset */
> -		tmp = strchr(tmp, ' ') + 1; /** skip dev */
> -		tmp = strchr(tmp, ' ') + 1; /** skip inode */
> -		while (*tmp == ' ')         /** skip spaces */
> -			tmp++;
> -		tail = strrchr(tmp, '\n');  /** remove newline if exists */
> -		if (tail)
> -			*tail = '\0';
> -
> -		/* Match HUGEFILE_FMT, aka "%s/%smap_%d",
> -		 * which is defined in eal_filesystem.h
> -		 */
> -		str_underline = strrchr(tmp, '_');
> -		if (!str_underline)
> -			continue;
> -
> -		str_start = str_underline - strlen("map");
> -		if (str_start < tmp)
> -			continue;
> -
> -		if (sscanf(str_start, "map_%d", &huge_index) != 1)
> -			continue;
> -
> -		/* skip duplicated file which is mapped to different regions */
> -		for (k = 0, exist = -1; k < idx; ++k) {
> -			if (!strcmp(huges[k].path, tmp)) {
> -				exist = k;
> -				break;
> -			}
> -		}
> -		if (exist >= 0)
> -			continue;
> -
> -		if (idx >= max) {
> -			PMD_DRV_LOG(ERR, "Exceed maximum of %d", max);
> -			goto error;
> -		}
> -
> -		huges[idx].addr = v_start;
> -		huges[idx].size = v_end - v_start; /* To be corrected later */
> -		snprintf(huges[idx].path, PATH_MAX, "%s", tmp);
> -		idx++;
> -	}
> -
> -	/* correct the size for files who have many regions */
> -	for (k = 0; k < idx; ++k) {
> -		if (stat(huges[k].path, &stats) < 0) {
> -			PMD_DRV_LOG(ERR, "Failed to stat %s, %s\n",
> -				    huges[k].path, strerror(errno));
> -			continue;
> -		}
> -		huges[k].size = stats.st_size;
> -		PMD_DRV_LOG(INFO, "file %s, size %zx\n",
> -			    huges[k].path, huges[k].size);
> -	}
> -
> -	fclose(f);
> -	return idx;
> -
> -error:
> -	fclose(f);
> -	return -1;
> -}
> -
> -static int
> -prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
> -{
> -	int i, num;
> -	struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
> +	struct walk_arg *wa = arg;
>   	struct vhost_memory_region *mr;
> +	uint64_t start_addr, end_addr, offset;
> +	int i, fd;
>   
> -	num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
> -	if (num < 0) {
> -		PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user");
> +	fd = rte_memseg_get_fd_thread_unsafe(ms);
> +	if (fd < 0) {
> +		PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p", ms);
>   		return -1;
>   	}
>   
> -	for (i = 0; i < num; ++i) {
> -		mr = &msg->payload.memory.regions[i];
> -		mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
> -		mr->userspace_addr = huges[i].addr;
> -		mr->memory_size = huges[i].size;
> -		mr->mmap_offset = 0;
> -		fds[i] = open(huges[i].path, O_RDWR);
> +	start_addr = (uint64_t)(uintptr_t)ms->addr;
> +	end_addr = start_addr + ms->len;
> +
> +	/*
> +	 * XXX workaround!
> +	 *
> +	 * For --single-file-segments case, offset should be:
> +	 * offset = rte_fbarray_find_idx(&msl->memseg_arr, ms) * msl->page_sz;
> +	 *
> +	 * But it's not true for non-single-file-segments case.
> +	 *
> +	 * This is a temporary workaround which assumes the file will
> +	 * also be mapped from the beginning in --single-file-segments
> +	 * case. It should be replaced when we have a memory API to
> +	 * get the offset.

Yes, this is an unfortunate consequence of having split personalities in 
memory subsystem. A good solution would be to just deprecate 
non-single-file segments mode and always use it, but we cannot do that 
because we support kernel versions that do not support fallocate() on 
hugetlbfs (and it still leaves legacy mem mode, which effectively 
behaves like non-single-file segments as far as virtio is concerned).

How about we add this API in v2 for seg fd patchset?

> +	 */
> +	offset = 0;
> +
> +	for (i = 0; i < wa->region_nr; i++) {
> +		if (wa->fds[i] != fd)
> +			continue;
> +
> +		mr = &wa->vm->regions[i];
> +
> +		if (mr->userspace_addr + mr->memory_size < end_addr)
> +			mr->memory_size = end_addr - mr->userspace_addr;
> +
> +		if (mr->userspace_addr > start_addr) {
> +			mr->userspace_addr = start_addr;
> +			mr->guest_phys_addr = start_addr;
> +		}
> +
> +		if (mr->mmap_offset > offset)
> +			mr->mmap_offset = offset;
> +
> +		PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
> +			" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
> +			mr->mmap_offset, mr->userspace_addr,
> +			mr->memory_size);
> +
> +		return 0;
> +	}
> +
> +	if (i >= VHOST_MEMORY_MAX_NREGIONS) {
> +		PMD_DRV_LOG(ERR, "Too many memory regions");
> +		return -1;
>   	}
>   
> -	msg->payload.memory.nregions = num;
> +	mr = &wa->vm->regions[i];
> +	wa->fds[i] = fd;
> +
> +	mr->guest_phys_addr = start_addr;
> +	mr->userspace_addr = start_addr;
> +	mr->memory_size = ms->len;
> +	mr->mmap_offset = offset;
> +
> +	PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
> +		" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
> +		mr->mmap_offset, mr->userspace_addr,
> +		mr->memory_size);
> +
> +	wa->region_nr++;
> +
> +	return 0;
> +}
> +
> +static int
> +prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
> +{
> +	struct walk_arg wa;
> +
> +	wa.region_nr = 0;
> +	wa.vm = &msg->payload.memory;
> +	wa.fds = fds;
> +
> +	if (rte_memseg_walk_thread_unsafe(update_memory_region, &wa) < 0)
> +		return -1;
> +
> +	msg->payload.memory.nregions = wa.region_nr;
>   	msg->payload.memory.padding = 0;
>   
>   	return 0;
> @@ -280,7 +256,7 @@ vhost_user_sock(struct virtio_user_dev *dev,
>   	int need_reply = 0;
>   	int fds[VHOST_MEMORY_MAX_NREGIONS];
>   	int fd_num = 0;
> -	int i, len;
> +	int len;
>   	int vhostfd = dev->vhostfd;
>   
>   	RTE_SET_USED(m);
> @@ -364,10 +340,6 @@ vhost_user_sock(struct virtio_user_dev *dev,
>   		return -1;
>   	}
>   
> -	if (req == VHOST_USER_SET_MEM_TABLE)
> -		for (i = 0; i < fd_num; ++i)
> -			close(fds[i]);
> -
>   	if (need_reply) {
>   		if (vhost_user_read(vhostfd, &msg) < 0) {
>   			PMD_DRV_LOG(ERR, "Received msg failed: %s",
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index 7df600b02..869e96f87 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -13,6 +13,8 @@
>   #include <sys/types.h>
>   #include <sys/stat.h>
>   
> +#include <rte_eal_memconfig.h>
> +
>   #include "vhost.h"
>   #include "virtio_user_dev.h"
>   #include "../virtio_ethdev.h"
> @@ -109,9 +111,24 @@ is_vhost_user_by_type(const char *path)
>   int
>   virtio_user_start_device(struct virtio_user_dev *dev)
>   {
> +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
>   	uint64_t features;
>   	int ret;
>   
> +	/*
> +	 * XXX workaround!
> +	 *
> +	 * We need to make sure that the locks will be
> +	 * taken in the correct order to avoid deadlocks.
> +	 *
> +	 * Before releasing this lock, this thread should
> +	 * not trigger any memory hotplug events.
> +	 *
> +	 * This is a temporary workaround, and should be
> +	 * replaced when we get proper supports from the
> +	 * memory subsystem in the future.
> +	 */
> +	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);

This feels like it should be a separate patch, because it fixes the 
issue that exists independently of whether we have segment fd API or not.

>   	pthread_mutex_lock(&dev->mutex);
>   
>   	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
> @@ -152,10 +169,12 @@ virtio_user_start_device(struct virtio_user_dev *dev)
>   
>   	dev->started = true;
>   	pthread_mutex_unlock(&dev->mutex);
> +	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
>   
>   	return 0;
>   error:
>   	pthread_mutex_unlock(&dev->mutex);
> +	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
>   	/* TODO: free resource here or caller to check */
>   	return -1;
>   }
>
  
Tiwei Bie Aug. 28, 2018, 8:42 a.m. UTC | #2
On Tue, Aug 28, 2018 at 09:12:38AM +0100, Burakov, Anatoly wrote:
> On 28-Aug-18 8:53 AM, Tiwei Bie wrote:
[...]
> >   
> > -	for (i = 0; i < num; ++i) {
> > -		mr = &msg->payload.memory.regions[i];
> > -		mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
> > -		mr->userspace_addr = huges[i].addr;
> > -		mr->memory_size = huges[i].size;
> > -		mr->mmap_offset = 0;
> > -		fds[i] = open(huges[i].path, O_RDWR);
> > +	start_addr = (uint64_t)(uintptr_t)ms->addr;
> > +	end_addr = start_addr + ms->len;
> > +
> > +	/*
> > +	 * XXX workaround!
> > +	 *
> > +	 * For --single-file-segments case, offset should be:
> > +	 * offset = rte_fbarray_find_idx(&msl->memseg_arr, ms) * msl->page_sz;
> > +	 *
> > +	 * But it's not true for non-single-file-segments case.
> > +	 *
> > +	 * This is a temporary workaround which assumes the file will
> > +	 * also be mapped from the beginning in --single-file-segments
> > +	 * case. It should be replaced when we have a memory API to
> > +	 * get the offset.
> 
> Yes, this is an unfortunate consequence of having split personalities in 
> memory subsystem. A good solution would be to just deprecate 
> non-single-file segments mode and always use it, but we cannot do that 
> because we support kernel versions that do not support fallocate() on 
> hugetlbfs (and it still leaves legacy mem mode, which effectively 
> behaves like non-single-file segments as far as virtio is concerned).
> 
> How about we add this API in v2 for seg fd patchset?

Sure, thanks!

Besides, I saw another minor issue. When --legacy-mem
and --single-file-segments are both specified, the EAL
init doesn't fail, but fd can't be got successfully.

> 
> > +	 */
> > +	offset = 0;
> > +
[...]
> > @@ -109,9 +111,24 @@ is_vhost_user_by_type(const char *path)
> >   int
> >   virtio_user_start_device(struct virtio_user_dev *dev)
> >   {
> > +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> >   	uint64_t features;
> >   	int ret;
> >   
> > +	/*
> > +	 * XXX workaround!
> > +	 *
> > +	 * We need to make sure that the locks will be
> > +	 * taken in the correct order to avoid deadlocks.
> > +	 *
> > +	 * Before releasing this lock, this thread should
> > +	 * not trigger any memory hotplug events.
> > +	 *
> > +	 * This is a temporary workaround, and should be
> > +	 * replaced when we get proper supports from the
> > +	 * memory subsystem in the future.
> > +	 */
> > +	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
> 
> This feels like it should be a separate patch, because it fixes the 
> issue that exists independently of whether we have segment fd API or not.

Yeah, I'll put it in a separate patch.

> 
> >   	pthread_mutex_lock(&dev->mutex);
> >   
> >   	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
> > @@ -152,10 +169,12 @@ virtio_user_start_device(struct virtio_user_dev *dev)
> >   
> >   	dev->started = true;
> >   	pthread_mutex_unlock(&dev->mutex);
> > +	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
> >   
> >   	return 0;
> >   error:
> >   	pthread_mutex_unlock(&dev->mutex);
> > +	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
> >   	/* TODO: free resource here or caller to check */
> >   	return -1;
> >   }
> > 
> 
> 
> -- 
> Thanks,
> Anatoly
  
Burakov, Anatoly Aug. 28, 2018, 12:56 p.m. UTC | #3
On 28-Aug-18 9:42 AM, Tiwei Bie wrote:
> On Tue, Aug 28, 2018 at 09:12:38AM +0100, Burakov, Anatoly wrote:
>> On 28-Aug-18 8:53 AM, Tiwei Bie wrote:
> [...]
>>>    
>>> -	for (i = 0; i < num; ++i) {
>>> -		mr = &msg->payload.memory.regions[i];
>>> -		mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
>>> -		mr->userspace_addr = huges[i].addr;
>>> -		mr->memory_size = huges[i].size;
>>> -		mr->mmap_offset = 0;
>>> -		fds[i] = open(huges[i].path, O_RDWR);
>>> +	start_addr = (uint64_t)(uintptr_t)ms->addr;
>>> +	end_addr = start_addr + ms->len;
>>> +
>>> +	/*
>>> +	 * XXX workaround!
>>> +	 *
>>> +	 * For --single-file-segments case, offset should be:
>>> +	 * offset = rte_fbarray_find_idx(&msl->memseg_arr, ms) * msl->page_sz;
>>> +	 *
>>> +	 * But it's not true for non-single-file-segments case.
>>> +	 *
>>> +	 * This is a temporary workaround which assumes the file will
>>> +	 * also be mapped from the beginning in --single-file-segments
>>> +	 * case. It should be replaced when we have a memory API to
>>> +	 * get the offset.
>>
>> Yes, this is an unfortunate consequence of having split personalities in
>> memory subsystem. A good solution would be to just deprecate
>> non-single-file segments mode and always use it, but we cannot do that
>> because we support kernel versions that do not support fallocate() on
>> hugetlbfs (and it still leaves legacy mem mode, which effectively
>> behaves like non-single-file segments as far as virtio is concerned).
>>
>> How about we add this API in v2 for seg fd patchset?
> 
> Sure, thanks!
> 
> Besides, I saw another minor issue. When --legacy-mem
> and --single-file-segments are both specified, the EAL
> init doesn't fail, but fd can't be got successfully.
> 

I'll have to look into this, because as far as i recall, i've submitted 
a patch specifically prohibiting legacy mem combined with single file 
segments. I'll double check though. Thanks for testing!
  

Patch

diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index ef6e43df8..770cfdc9a 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -11,6 +11,9 @@ 
 #include <string.h>
 #include <errno.h>
 
+#include <rte_fbarray.h>
+#include <rte_eal_memconfig.h>
+
 #include "vhost.h"
 #include "virtio_user_dev.h"
 
@@ -121,133 +124,106 @@  vhost_user_read(int fd, struct vhost_user_msg *msg)
 	return -1;
 }
 
-struct hugepage_file_info {
-	uint64_t addr;            /**< virtual addr */
-	size_t   size;            /**< the file size */
-	char     path[PATH_MAX];  /**< path to backing file */
+struct walk_arg {
+	struct vhost_memory *vm;
+	int *fds;
+	int region_nr;
 };
 
-/* Two possible options:
- * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
- * array. This is simple but cannot be used in secondary process because
- * secondary process will close and munmap that file.
- * 2. Match HUGEFILE_FMT to find hugepage files directly.
- *
- * We choose option 2.
- */
 static int
-get_hugepage_file_info(struct hugepage_file_info huges[], int max)
+update_memory_region(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
 {
-	int idx, k, exist;
-	FILE *f;
-	char buf[BUFSIZ], *tmp, *tail;
-	char *str_underline, *str_start;
-	int huge_index;
-	uint64_t v_start, v_end;
-	struct stat stats;
-
-	f = fopen("/proc/self/maps", "r");
-	if (!f) {
-		PMD_DRV_LOG(ERR, "cannot open /proc/self/maps");
-		return -1;
-	}
-
-	idx = 0;
-	while (fgets(buf, sizeof(buf), f) != NULL) {
-		if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
-			PMD_DRV_LOG(ERR, "Failed to parse address");
-			goto error;
-		}
-
-		tmp = strchr(buf, ' ') + 1; /** skip address */
-		tmp = strchr(tmp, ' ') + 1; /** skip perm */
-		tmp = strchr(tmp, ' ') + 1; /** skip offset */
-		tmp = strchr(tmp, ' ') + 1; /** skip dev */
-		tmp = strchr(tmp, ' ') + 1; /** skip inode */
-		while (*tmp == ' ')         /** skip spaces */
-			tmp++;
-		tail = strrchr(tmp, '\n');  /** remove newline if exists */
-		if (tail)
-			*tail = '\0';
-
-		/* Match HUGEFILE_FMT, aka "%s/%smap_%d",
-		 * which is defined in eal_filesystem.h
-		 */
-		str_underline = strrchr(tmp, '_');
-		if (!str_underline)
-			continue;
-
-		str_start = str_underline - strlen("map");
-		if (str_start < tmp)
-			continue;
-
-		if (sscanf(str_start, "map_%d", &huge_index) != 1)
-			continue;
-
-		/* skip duplicated file which is mapped to different regions */
-		for (k = 0, exist = -1; k < idx; ++k) {
-			if (!strcmp(huges[k].path, tmp)) {
-				exist = k;
-				break;
-			}
-		}
-		if (exist >= 0)
-			continue;
-
-		if (idx >= max) {
-			PMD_DRV_LOG(ERR, "Exceed maximum of %d", max);
-			goto error;
-		}
-
-		huges[idx].addr = v_start;
-		huges[idx].size = v_end - v_start; /* To be corrected later */
-		snprintf(huges[idx].path, PATH_MAX, "%s", tmp);
-		idx++;
-	}
-
-	/* correct the size for files who have many regions */
-	for (k = 0; k < idx; ++k) {
-		if (stat(huges[k].path, &stats) < 0) {
-			PMD_DRV_LOG(ERR, "Failed to stat %s, %s\n",
-				    huges[k].path, strerror(errno));
-			continue;
-		}
-		huges[k].size = stats.st_size;
-		PMD_DRV_LOG(INFO, "file %s, size %zx\n",
-			    huges[k].path, huges[k].size);
-	}
-
-	fclose(f);
-	return idx;
-
-error:
-	fclose(f);
-	return -1;
-}
-
-static int
-prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
-{
-	int i, num;
-	struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
+	struct walk_arg *wa = arg;
 	struct vhost_memory_region *mr;
+	uint64_t start_addr, end_addr, offset;
+	int i, fd;
 
-	num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
-	if (num < 0) {
-		PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user");
+	fd = rte_memseg_get_fd_thread_unsafe(ms);
+	if (fd < 0) {
+		PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p", ms);
 		return -1;
 	}
 
-	for (i = 0; i < num; ++i) {
-		mr = &msg->payload.memory.regions[i];
-		mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
-		mr->userspace_addr = huges[i].addr;
-		mr->memory_size = huges[i].size;
-		mr->mmap_offset = 0;
-		fds[i] = open(huges[i].path, O_RDWR);
+	start_addr = (uint64_t)(uintptr_t)ms->addr;
+	end_addr = start_addr + ms->len;
+
+	/*
+	 * XXX workaround!
+	 *
+	 * For --single-file-segments case, offset should be:
+	 * offset = rte_fbarray_find_idx(&msl->memseg_arr, ms) * msl->page_sz;
+	 *
+	 * But it's not true for non-single-file-segments case.
+	 *
+	 * This is a temporary workaround which assumes the file will
+	 * also be mapped from the beginning in --single-file-segments
+	 * case. It should be replaced when we have a memory API to
+	 * get the offset.
+	 */
+	offset = 0;
+
+	for (i = 0; i < wa->region_nr; i++) {
+		if (wa->fds[i] != fd)
+			continue;
+
+		mr = &wa->vm->regions[i];
+
+		if (mr->userspace_addr + mr->memory_size < end_addr)
+			mr->memory_size = end_addr - mr->userspace_addr;
+
+		if (mr->userspace_addr > start_addr) {
+			mr->userspace_addr = start_addr;
+			mr->guest_phys_addr = start_addr;
+		}
+
+		if (mr->mmap_offset > offset)
+			mr->mmap_offset = offset;
+
+		PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
+			" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
+			mr->mmap_offset, mr->userspace_addr,
+			mr->memory_size);
+
+		return 0;
+	}
+
+	if (i >= VHOST_MEMORY_MAX_NREGIONS) {
+		PMD_DRV_LOG(ERR, "Too many memory regions");
+		return -1;
 	}
 
-	msg->payload.memory.nregions = num;
+	mr = &wa->vm->regions[i];
+	wa->fds[i] = fd;
+
+	mr->guest_phys_addr = start_addr;
+	mr->userspace_addr = start_addr;
+	mr->memory_size = ms->len;
+	mr->mmap_offset = offset;
+
+	PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
+		" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
+		mr->mmap_offset, mr->userspace_addr,
+		mr->memory_size);
+
+	wa->region_nr++;
+
+	return 0;
+}
+
+static int
+prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
+{
+	struct walk_arg wa;
+
+	wa.region_nr = 0;
+	wa.vm = &msg->payload.memory;
+	wa.fds = fds;
+
+	if (rte_memseg_walk_thread_unsafe(update_memory_region, &wa) < 0)
+		return -1;
+
+	msg->payload.memory.nregions = wa.region_nr;
 	msg->payload.memory.padding = 0;
 
 	return 0;
@@ -280,7 +256,7 @@  vhost_user_sock(struct virtio_user_dev *dev,
 	int need_reply = 0;
 	int fds[VHOST_MEMORY_MAX_NREGIONS];
 	int fd_num = 0;
-	int i, len;
+	int len;
 	int vhostfd = dev->vhostfd;
 
 	RTE_SET_USED(m);
@@ -364,10 +340,6 @@  vhost_user_sock(struct virtio_user_dev *dev,
 		return -1;
 	}
 
-	if (req == VHOST_USER_SET_MEM_TABLE)
-		for (i = 0; i < fd_num; ++i)
-			close(fds[i]);
-
 	if (need_reply) {
 		if (vhost_user_read(vhostfd, &msg) < 0) {
 			PMD_DRV_LOG(ERR, "Received msg failed: %s",
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index 7df600b02..869e96f87 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -13,6 +13,8 @@ 
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <rte_eal_memconfig.h>
+
 #include "vhost.h"
 #include "virtio_user_dev.h"
 #include "../virtio_ethdev.h"
@@ -109,9 +111,24 @@  is_vhost_user_by_type(const char *path)
 int
 virtio_user_start_device(struct virtio_user_dev *dev)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	uint64_t features;
 	int ret;
 
+	/*
+	 * XXX workaround!
+	 *
+	 * We need to make sure that the locks will be
+	 * taken in the correct order to avoid deadlocks.
+	 *
+	 * Before releasing this lock, this thread should
+	 * not trigger any memory hotplug events.
+	 *
+	 * This is a temporary workaround, and should be
+	 * replaced when we get proper supports from the
+	 * memory subsystem in the future.
+	 */
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
 	pthread_mutex_lock(&dev->mutex);
 
 	if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
@@ -152,10 +169,12 @@  virtio_user_start_device(struct virtio_user_dev *dev)
 
 	dev->started = true;
 	pthread_mutex_unlock(&dev->mutex);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 
 	return 0;
 error:
 	pthread_mutex_unlock(&dev->mutex);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 	/* TODO: free resource here or caller to check */
 	return -1;
 }