[v2,2/2] vhost: cache gpa to hpa translation

Message ID 20200401145011.67357-2-yong.liu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series [v2,1/2] vhost: utilize dpdk dynamic memory allocator |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/travis-robot success Travis build: passed
ci/Intel-compilation success Compilation OK

Commit Message

Marvin Liu April 1, 2020, 2:50 p.m. UTC
  If Tx zero copy enabled, gpa to hpa mapping table is updated one by
one. This will harm performance when guest memory backend using 2M
hugepages. Now add cached mapping table which will sorted by using
sequence. Address translation will first check cached mapping table,
then check unsorted mapping table if no match found.

Signed-off-by: Marvin Liu <yong.liu@intel.com>
  

Comments

Gavin Hu April 1, 2020, 10:07 a.m. UTC | #1
Hi Marvin,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> Sent: Wednesday, April 1, 2020 10:50 PM
> To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com;
> zhihong.wang@intel.com
> Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> one. This will harm performance when guest memory backend using 2M
> hugepages. Now add cached mapping table which will sorted by using
> sequence. Address translation will first check cached mapping table,
> then check unsorted mapping table if no match found.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 2087d1400..5cb0e83dd 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -368,7 +368,9 @@ struct virtio_net {
>  	struct vhost_device_ops const *notify_ops;
> 
>  	uint32_t		nr_guest_pages;
> +	uint32_t		nr_cached_guest_pages;
>  	uint32_t		max_guest_pages;
> +	struct guest_page       *cached_guest_pages;
>  	struct guest_page       *guest_pages;
> 
>  	int			slave_req_fd;
> @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa,
> uint64_t size)
>  {
>  	uint32_t i;
>  	struct guest_page *page;
> +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> +
> +	for (i = 0; i < cached_pages; i++) {
> +		page = &dev->cached_guest_pages[i];
> +		if (gpa >= page->guest_phys_addr &&
> +			gpa + size < page->guest_phys_addr + page->size) {
> +			return gpa - page->guest_phys_addr +
> +				page->host_phys_addr;
> +		}
> +	}
Sorry, I did not see any speedup with cached guest pages in comparison to the old code below.
Is it not a simple copy? 
Is it a better idea to use hash instead to speed up the translation? 
/Gavin
> 
>  	for (i = 0; i < dev->nr_guest_pages; i++) {
>  		page = &dev->guest_pages[i];
> 
>  		if (gpa >= page->guest_phys_addr &&
>  		    gpa + size < page->guest_phys_addr + page->size) {
> +			rte_memcpy(&dev-
> >cached_guest_pages[cached_pages],
> +				   page, sizeof(struct guest_page));
> +			dev->nr_cached_guest_pages++;
>  			return gpa - page->guest_phys_addr +
>  			       page->host_phys_addr;
>  		}
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index 79fcb9d19..1bae1fddc 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
>  	}
> 
>  	rte_free(dev->guest_pages);
> +	rte_free(dev->cached_guest_pages);
>  	dev->guest_pages = NULL;
> +	dev->cached_guest_pages = NULL;
> 
>  	if (dev->log_addr) {
>  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> @@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev,
> uint64_t guest_phys_addr,
>  		   uint64_t host_phys_addr, uint64_t size)
>  {
>  	struct guest_page *page, *last_page;
> -	struct guest_page *old_pages;
> +	struct guest_page *old_pages, *old_cached_pages;
> 
>  	if (dev->nr_guest_pages == dev->max_guest_pages) {
>  		dev->max_guest_pages *= 2;
> @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev,
> uint64_t guest_phys_addr,
>  		dev->guest_pages = rte_realloc(dev->guest_pages,
>  					dev->max_guest_pages *
> sizeof(*page),
>  					RTE_CACHE_LINE_SIZE);
> -		if (dev->guest_pages == NULL) {
> +		old_cached_pages = dev->cached_guest_pages;
> +		dev->cached_guest_pages = rte_realloc(dev-
> >cached_guest_pages,
> +						dev->max_guest_pages *
> +						sizeof(*page),
> +						RTE_CACHE_LINE_SIZE);
> +		dev->nr_cached_guest_pages = 0;
> +		if (dev->guest_pages == NULL ||
> +				dev->cached_guest_pages == NULL) {
>  			VHOST_LOG_CONFIG(ERR, "cannot realloc
> guest_pages\n");
>  			rte_free(old_pages);
> +			rte_free(old_cached_pages);
> +			dev->guest_pages = NULL;
> +			dev->cached_guest_pages = NULL;
>  			return -1;
>  		}
>  	}
> @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *msg,
>  		}
>  	}
> 
> +	if (dev->cached_guest_pages == NULL) {
> +		dev->cached_guest_pages = rte_zmalloc(NULL,
> +						dev->max_guest_pages *
> +						sizeof(struct guest_page),
> +						RTE_CACHE_LINE_SIZE);
> +		if (dev->cached_guest_pages == NULL) {
> +			VHOST_LOG_CONFIG(ERR,
> +				"(%d) failed to allocate memory "
> +				"for dev->cached_guest_pages\n",
> +				dev->vid);
> +			return RTE_VHOST_MSG_RESULT_ERR;
> +		}
> +	}
> +
>  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> rte_vhost_memory) +
>  		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> 0);
>  	if (dev->mem == NULL) {
> --
> 2.17.1
  
Marvin Liu April 1, 2020, 1:01 p.m. UTC | #2
> -----Original Message-----
> From: Gavin Hu <Gavin.Hu@arm.com>
> Sent: Wednesday, April 1, 2020 6:07 PM
> To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye,
> Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> Hi Marvin,
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > Sent: Wednesday, April 1, 2020 10:50 PM
> > To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com;
> > zhihong.wang@intel.com
> > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> >
> > If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> > one. This will harm performance when guest memory backend using 2M
> > hugepages. Now add cached mapping table which will sorted by using
> > sequence. Address translation will first check cached mapping table,
> > then check unsorted mapping table if no match found.
> >
> > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > index 2087d1400..5cb0e83dd 100644
> > --- a/lib/librte_vhost/vhost.h
> > +++ b/lib/librte_vhost/vhost.h
> > @@ -368,7 +368,9 @@ struct virtio_net {
> >  	struct vhost_device_ops const *notify_ops;
> >
> >  	uint32_t		nr_guest_pages;
> > +	uint32_t		nr_cached_guest_pages;
> >  	uint32_t		max_guest_pages;
> > +	struct guest_page       *cached_guest_pages;
> >  	struct guest_page       *guest_pages;
> >
> >  	int			slave_req_fd;
> > @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa,
> > uint64_t size)
> >  {
> >  	uint32_t i;
> >  	struct guest_page *page;
> > +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> > +
> > +	for (i = 0; i < cached_pages; i++) {
> > +		page = &dev->cached_guest_pages[i];
> > +		if (gpa >= page->guest_phys_addr &&
> > +			gpa + size < page->guest_phys_addr + page->size) {
> > +			return gpa - page->guest_phys_addr +
> > +				page->host_phys_addr;
> > +		}
> > +	}
> Sorry, I did not see any speedup with cached guest pages in comparison to
> the old code below.
> Is it not a simple copy?
> Is it a better idea to use hash instead to speed up the translation?
> /Gavin

Hi Gavin,
Here just resort the overall mapping table according to using sequence.  
Most likely virtio driver will reuse recently cycled buffers, thus search will find match in beginning. 
That is simple fix for performance enhancement. If use hash for index, it will take much more cost in normal case.

Regards,
Marvin 


> >
> >  	for (i = 0; i < dev->nr_guest_pages; i++) {
> >  		page = &dev->guest_pages[i];
> >
> >  		if (gpa >= page->guest_phys_addr &&
> >  		    gpa + size < page->guest_phys_addr + page->size) {
> > +			rte_memcpy(&dev-
> > >cached_guest_pages[cached_pages],
> > +				   page, sizeof(struct guest_page));
> > +			dev->nr_cached_guest_pages++;
> >  			return gpa - page->guest_phys_addr +
> >  			       page->host_phys_addr;
> >  		}
> > diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> > index 79fcb9d19..1bae1fddc 100644
> > --- a/lib/librte_vhost/vhost_user.c
> > +++ b/lib/librte_vhost/vhost_user.c
> > @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> >  	}
> >
> >  	rte_free(dev->guest_pages);
> > +	rte_free(dev->cached_guest_pages);
> >  	dev->guest_pages = NULL;
> > +	dev->cached_guest_pages = NULL;
> >
> >  	if (dev->log_addr) {
> >  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> > @@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev,
> > uint64_t guest_phys_addr,
> >  		   uint64_t host_phys_addr, uint64_t size)
> >  {
> >  	struct guest_page *page, *last_page;
> > -	struct guest_page *old_pages;
> > +	struct guest_page *old_pages, *old_cached_pages;
> >
> >  	if (dev->nr_guest_pages == dev->max_guest_pages) {
> >  		dev->max_guest_pages *= 2;
> > @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev,
> > uint64_t guest_phys_addr,
> >  		dev->guest_pages = rte_realloc(dev->guest_pages,
> >  					dev->max_guest_pages *
> > sizeof(*page),
> >  					RTE_CACHE_LINE_SIZE);
> > -		if (dev->guest_pages == NULL) {
> > +		old_cached_pages = dev->cached_guest_pages;
> > +		dev->cached_guest_pages = rte_realloc(dev-
> > >cached_guest_pages,
> > +						dev->max_guest_pages *
> > +						sizeof(*page),
> > +						RTE_CACHE_LINE_SIZE);
> > +		dev->nr_cached_guest_pages = 0;
> > +		if (dev->guest_pages == NULL ||
> > +				dev->cached_guest_pages == NULL) {
> >  			VHOST_LOG_CONFIG(ERR, "cannot realloc
> > guest_pages\n");
> >  			rte_free(old_pages);
> > +			rte_free(old_cached_pages);
> > +			dev->guest_pages = NULL;
> > +			dev->cached_guest_pages = NULL;
> >  			return -1;
> >  		}
> >  	}
> > @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net
> > **pdev, struct VhostUserMsg *msg,
> >  		}
> >  	}
> >
> > +	if (dev->cached_guest_pages == NULL) {
> > +		dev->cached_guest_pages = rte_zmalloc(NULL,
> > +						dev->max_guest_pages *
> > +						sizeof(struct guest_page),
> > +						RTE_CACHE_LINE_SIZE);
> > +		if (dev->cached_guest_pages == NULL) {
> > +			VHOST_LOG_CONFIG(ERR,
> > +				"(%d) failed to allocate memory "
> > +				"for dev->cached_guest_pages\n",
> > +				dev->vid);
> > +			return RTE_VHOST_MSG_RESULT_ERR;
> > +		}
> > +	}
> > +
> >  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> > rte_vhost_memory) +
> >  		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> > 0);
> >  	if (dev->mem == NULL) {
> > --
> > 2.17.1
  
Xiaolong Ye April 2, 2020, 2:57 a.m. UTC | #3
On 04/01, Marvin Liu wrote:
>If Tx zero copy enabled, gpa to hpa mapping table is updated one by
>one. This will harm performance when guest memory backend using 2M
>hugepages. Now add cached mapping table which will sorted by using
>sequence. Address translation will first check cached mapping table,
>then check unsorted mapping table if no match found.
>
>Signed-off-by: Marvin Liu <yong.liu@intel.com>
>
>diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>index 2087d1400..5cb0e83dd 100644
>--- a/lib/librte_vhost/vhost.h
>+++ b/lib/librte_vhost/vhost.h
>@@ -368,7 +368,9 @@ struct virtio_net {
> 	struct vhost_device_ops const *notify_ops;
> 
> 	uint32_t		nr_guest_pages;
>+	uint32_t		nr_cached_guest_pages;
> 	uint32_t		max_guest_pages;
>+	struct guest_page       *cached_guest_pages;
> 	struct guest_page       *guest_pages;
> 
> 	int			slave_req_fd;
>@@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
> {
> 	uint32_t i;
> 	struct guest_page *page;
>+	uint32_t cached_pages = dev->nr_cached_guest_pages;
>+
>+	for (i = 0; i < cached_pages; i++) {
>+		page = &dev->cached_guest_pages[i];
>+		if (gpa >= page->guest_phys_addr &&
>+			gpa + size < page->guest_phys_addr + page->size) {
>+			return gpa - page->guest_phys_addr +
>+				page->host_phys_addr;
>+		}
>+	}
> 
> 	for (i = 0; i < dev->nr_guest_pages; i++) {
> 		page = &dev->guest_pages[i];
> 
> 		if (gpa >= page->guest_phys_addr &&
> 		    gpa + size < page->guest_phys_addr + page->size) {
>+			rte_memcpy(&dev->cached_guest_pages[cached_pages],
>+				   page, sizeof(struct guest_page));
>+			dev->nr_cached_guest_pages++;
> 			return gpa - page->guest_phys_addr +
> 			       page->host_phys_addr;
> 		}
>diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>index 79fcb9d19..1bae1fddc 100644
>--- a/lib/librte_vhost/vhost_user.c
>+++ b/lib/librte_vhost/vhost_user.c
>@@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> 	}
> 
> 	rte_free(dev->guest_pages);
>+	rte_free(dev->cached_guest_pages);
> 	dev->guest_pages = NULL;
>+	dev->cached_guest_pages = NULL;
> 
> 	if (dev->log_addr) {
> 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
>@@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> 		   uint64_t host_phys_addr, uint64_t size)
> {
> 	struct guest_page *page, *last_page;
>-	struct guest_page *old_pages;
>+	struct guest_page *old_pages, *old_cached_pages;
> 
> 	if (dev->nr_guest_pages == dev->max_guest_pages) {
> 		dev->max_guest_pages *= 2;
>@@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> 		dev->guest_pages = rte_realloc(dev->guest_pages,
> 					dev->max_guest_pages * sizeof(*page),
> 					RTE_CACHE_LINE_SIZE);
>-		if (dev->guest_pages == NULL) {
>+		old_cached_pages = dev->cached_guest_pages;
>+		dev->cached_guest_pages = rte_realloc(dev->cached_guest_pages,
>+						dev->max_guest_pages *
>+						sizeof(*page),
>+						RTE_CACHE_LINE_SIZE);
>+		dev->nr_cached_guest_pages = 0;
>+		if (dev->guest_pages == NULL ||
>+				dev->cached_guest_pages == NULL) {
> 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
> 			rte_free(old_pages);
>+			rte_free(old_cached_pages);
>+			dev->guest_pages = NULL;
>+			dev->cached_guest_pages = NULL;
> 			return -1;
> 		}
> 	}
>@@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
> 		}
> 	}
> 
>+	if (dev->cached_guest_pages == NULL) {
>+		dev->cached_guest_pages = rte_zmalloc(NULL,
>+						dev->max_guest_pages *
>+						sizeof(struct guest_page),
>+						RTE_CACHE_LINE_SIZE);
>+		if (dev->cached_guest_pages == NULL) {
>+			VHOST_LOG_CONFIG(ERR,
>+				"(%d) failed to allocate memory "
>+				"for dev->cached_guest_pages\n",
>+				dev->vid);
>+			return RTE_VHOST_MSG_RESULT_ERR;
>+		}
>+	}
>+
> 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
> 		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> 	if (dev->mem == NULL) {
>-- 
>2.17.1
>

Reviewed-by: Xiaolong Ye <xiaolong.ye@intel.com>
  
Gavin Hu April 2, 2020, 3:04 a.m. UTC | #4
Hi Marvin,

> -----Original Message-----
> From: Liu, Yong <yong.liu@intel.com>
> Sent: Wednesday, April 1, 2020 9:01 PM
> To: Gavin Hu <Gavin.Hu@arm.com>; maxime.coquelin@redhat.com; Ye,
> Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> 
> 
> > -----Original Message-----
> > From: Gavin Hu <Gavin.Hu@arm.com>
> > Sent: Wednesday, April 1, 2020 6:07 PM
> > To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye,
> > Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> > <zhihong.wang@intel.com>
> > Cc: dev@dpdk.org; nd <nd@arm.com>
> > Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> >
> > Hi Marvin,
> >
> > > -----Original Message-----
> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > > Sent: Wednesday, April 1, 2020 10:50 PM
> > > To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com;
> > > zhihong.wang@intel.com
> > > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > > Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> > >
> > > If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> > > one. This will harm performance when guest memory backend using 2M
> > > hugepages. Now add cached mapping table which will sorted by using
> > > sequence. Address translation will first check cached mapping table,
> > > then check unsorted mapping table if no match found.
> > >
> > > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> > >
> > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > > index 2087d1400..5cb0e83dd 100644
> > > --- a/lib/librte_vhost/vhost.h
> > > +++ b/lib/librte_vhost/vhost.h
> > > @@ -368,7 +368,9 @@ struct virtio_net {
> > >  	struct vhost_device_ops const *notify_ops;
> > >
> > >  	uint32_t		nr_guest_pages;
> > > +	uint32_t		nr_cached_guest_pages;
> > >  	uint32_t		max_guest_pages;
> > > +	struct guest_page       *cached_guest_pages;
> > >  	struct guest_page       *guest_pages;
> > >
> > >  	int			slave_req_fd;
> > > @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t
> gpa,
> > > uint64_t size)
> > >  {
> > >  	uint32_t i;
> > >  	struct guest_page *page;
> > > +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> > > +

Add a comment here, something like "Firstly look up in the cached pages"? 

> > > +	for (i = 0; i < cached_pages; i++) {

Should the searching order reversed  here to search the most recent entries? 

> > > +		page = &dev->cached_guest_pages[i];
> > > +		if (gpa >= page->guest_phys_addr &&
> > > +			gpa + size < page->guest_phys_addr + page->size) {
> > > +			return gpa - page->guest_phys_addr +
> > > +				page->host_phys_addr;
> > > +		}
> > > +	}
> > Sorry, I did not see any speedup with cached guest pages in comparison to
> > the old code below.
> > Is it not a simple copy?
> > Is it a better idea to use hash instead to speed up the translation?
> > /Gavin
> 
> Hi Gavin,
> Here just resort the overall mapping table according to using sequence.
> Most likely virtio driver will reuse recently cycled buffers, thus search will
> find match in beginning.
> That is simple fix for performance enhancement. If use hash for index, it will
> take much more cost in normal case.
> 
> Regards,
> Marvin

There are issues here, the cached table is growing over time, will it become less efficient when growing too big and even bigger than the original table and overflow happen? 
Is it a good idea to limit the cached entries to small therefore make the search quicker? 
/Gavin
> 
> 
> > >
Add a comment here, something like "Fall back to normal lookup if failed to get from the cached table"? 

> > >  	for (i = 0; i < dev->nr_guest_pages; i++) {
> > >  		page = &dev->guest_pages[i];
> > >
> > >  		if (gpa >= page->guest_phys_addr &&
> > >  		    gpa + size < page->guest_phys_addr + page->size) {
> > > +			rte_memcpy(&dev-
> > > >cached_guest_pages[cached_pages],
> > > +				   page, sizeof(struct guest_page));
> > > +			dev->nr_cached_guest_pages++;

Will overflow happen over time when there are many translations? 

> > >  			return gpa - page->guest_phys_addr +
> > >  			       page->host_phys_addr;
> > >  		}
> > > diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> > > index 79fcb9d19..1bae1fddc 100644
> > > --- a/lib/librte_vhost/vhost_user.c
> > > +++ b/lib/librte_vhost/vhost_user.c
> > > @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> > >  	}
> > >
> > >  	rte_free(dev->guest_pages);
> > > +	rte_free(dev->cached_guest_pages);
> > >  	dev->guest_pages = NULL;
> > > +	dev->cached_guest_pages = NULL;
> > >
> > >  	if (dev->log_addr) {
> > >  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> > > @@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev,
> > > uint64_t guest_phys_addr,
> > >  		   uint64_t host_phys_addr, uint64_t size)
> > >  {
> > >  	struct guest_page *page, *last_page;
> > > -	struct guest_page *old_pages;
> > > +	struct guest_page *old_pages, *old_cached_pages;
> > >
> > >  	if (dev->nr_guest_pages == dev->max_guest_pages) {
> > >  		dev->max_guest_pages *= 2;
> > > @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev,
> > > uint64_t guest_phys_addr,
> > >  		dev->guest_pages = rte_realloc(dev->guest_pages,
> > >  					dev->max_guest_pages *
> > > sizeof(*page),
> > >  					RTE_CACHE_LINE_SIZE);
> > > -		if (dev->guest_pages == NULL) {
> > > +		old_cached_pages = dev->cached_guest_pages;
> > > +		dev->cached_guest_pages = rte_realloc(dev-
> > > >cached_guest_pages,
> > > +						dev->max_guest_pages *
> > > +						sizeof(*page),
> > > +						RTE_CACHE_LINE_SIZE);
> > > +		dev->nr_cached_guest_pages = 0;
> > > +		if (dev->guest_pages == NULL ||
> > > +				dev->cached_guest_pages == NULL) {
> > >  			VHOST_LOG_CONFIG(ERR, "cannot realloc
> > > guest_pages\n");
> > >  			rte_free(old_pages);
> > > +			rte_free(old_cached_pages);
> > > +			dev->guest_pages = NULL;
> > > +			dev->cached_guest_pages = NULL;
> > >  			return -1;
> > >  		}
> > >  	}
> > > @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net
> > > **pdev, struct VhostUserMsg *msg,
> > >  		}
> > >  	}
> > >
> > > +	if (dev->cached_guest_pages == NULL) {
> > > +		dev->cached_guest_pages = rte_zmalloc(NULL,
> > > +						dev->max_guest_pages *
> > > +						sizeof(struct guest_page),
> > > +						RTE_CACHE_LINE_SIZE);
> > > +		if (dev->cached_guest_pages == NULL) {
> > > +			VHOST_LOG_CONFIG(ERR,
> > > +				"(%d) failed to allocate memory "
> > > +				"for dev->cached_guest_pages\n",
> > > +				dev->vid);
> > > +			return RTE_VHOST_MSG_RESULT_ERR;
> > > +		}
> > > +	}
> > > +
> > >  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> > > rte_vhost_memory) +
> > >  		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> > > 0);
> > >  	if (dev->mem == NULL) {
> > > --
> > > 2.17.1
  
Marvin Liu April 2, 2020, 4:45 a.m. UTC | #5
> -----Original Message-----
> From: Gavin Hu <Gavin.Hu@arm.com>
> Sent: Thursday, April 2, 2020 11:05 AM
> To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye,
> Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> Hi Marvin,
> 
> > -----Original Message-----
> > From: Liu, Yong <yong.liu@intel.com>
> > Sent: Wednesday, April 1, 2020 9:01 PM
> > To: Gavin Hu <Gavin.Hu@arm.com>; maxime.coquelin@redhat.com; Ye,
> > Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> > <zhihong.wang@intel.com>
> > Cc: dev@dpdk.org; nd <nd@arm.com>
> > Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> >
> >
> >
> > > -----Original Message-----
> > > From: Gavin Hu <Gavin.Hu@arm.com>
> > > Sent: Wednesday, April 1, 2020 6:07 PM
> > > To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye,
> > > Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong
> > > <zhihong.wang@intel.com>
> > > Cc: dev@dpdk.org; nd <nd@arm.com>
> > > Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa
> translation
> > >
> > > Hi Marvin,
> > >
> > > > -----Original Message-----
> > > > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > > > Sent: Wednesday, April 1, 2020 10:50 PM
> > > > To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com;
> > > > zhihong.wang@intel.com
> > > > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > > > Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation
> > > >
> > > > If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> > > > one. This will harm performance when guest memory backend using
> 2M
> > > > hugepages. Now add cached mapping table which will sorted by using
> > > > sequence. Address translation will first check cached mapping table,
> > > > then check unsorted mapping table if no match found.
> > > >
> > > > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> > > >
> > > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > > > index 2087d1400..5cb0e83dd 100644
> > > > --- a/lib/librte_vhost/vhost.h
> > > > +++ b/lib/librte_vhost/vhost.h
> > > > @@ -368,7 +368,9 @@ struct virtio_net {
> > > >  	struct vhost_device_ops const *notify_ops;
> > > >
> > > >  	uint32_t		nr_guest_pages;
> > > > +	uint32_t		nr_cached_guest_pages;
> > > >  	uint32_t		max_guest_pages;
> > > > +	struct guest_page       *cached_guest_pages;
> > > >  	struct guest_page       *guest_pages;
> > > >
> > > >  	int			slave_req_fd;
> > > > @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t
> > gpa,
> > > > uint64_t size)
> > > >  {
> > > >  	uint32_t i;
> > > >  	struct guest_page *page;
> > > > +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> > > > +
> 
> Add a comment here, something like "Firstly look up in the cached pages"?
> 
> > > > +	for (i = 0; i < cached_pages; i++) {
> 
> Should the searching order reversed  here to search the most recent entries?
> 
> > > > +		page = &dev->cached_guest_pages[i];
> > > > +		if (gpa >= page->guest_phys_addr &&
> > > > +			gpa + size < page->guest_phys_addr + page->size) {
> > > > +			return gpa - page->guest_phys_addr +
> > > > +				page->host_phys_addr;
> > > > +		}
> > > > +	}
> > > Sorry, I did not see any speedup with cached guest pages in comparison
> to
> > > the old code below.
> > > Is it not a simple copy?
> > > Is it a better idea to use hash instead to speed up the translation?
> > > /Gavin
> >
> > Hi Gavin,
> > Here just resort the overall mapping table according to using sequence.
> > Most likely virtio driver will reuse recently cycled buffers, thus search will
> > find match in beginning.
> > That is simple fix for performance enhancement. If use hash for index, it
> will
> > take much more cost in normal case.
> >
> > Regards,
> > Marvin
> 
> There are issues here, the cached table is growing over time, will it become
> less efficient when growing too big and even bigger than the original table
> and overflow happen?
> Is it a good idea to limit the cached entries to small therefore make the
> search quicker?
> /Gavin
> >

Gavin,
Cached table size is the same as mapping table, it only recorded entries from original table which have been used.
At worst case like every access of guest memory is random, cached table will be same size of original size. Search cost is same as before. 
It will be hard to predict which size is more suitable for caching, that is depend on how guest driver allocate buffer.  Maybe less than ten when using 2M page and thousands when using 4K page.
So here just add resorted table, which cost is much less in normal case and same as before at worst case. 

Thanks,
Marvin

> >
> > > >
> Add a comment here, something like "Fall back to normal lookup if failed to
> get from the cached table"?
> 
> > > >  	for (i = 0; i < dev->nr_guest_pages; i++) {
> > > >  		page = &dev->guest_pages[i];
> > > >
> > > >  		if (gpa >= page->guest_phys_addr &&
> > > >  		    gpa + size < page->guest_phys_addr + page->size) {
> > > > +			rte_memcpy(&dev-
> > > > >cached_guest_pages[cached_pages],
> > > > +				   page, sizeof(struct guest_page));
> > > > +			dev->nr_cached_guest_pages++;
> 
> Will overflow happen over time when there are many translations?
> 
> > > >  			return gpa - page->guest_phys_addr +
> > > >  			       page->host_phys_addr;
> > > >  		}
> > > > diff --git a/lib/librte_vhost/vhost_user.c
> b/lib/librte_vhost/vhost_user.c
> > > > index 79fcb9d19..1bae1fddc 100644
> > > > --- a/lib/librte_vhost/vhost_user.c
> > > > +++ b/lib/librte_vhost/vhost_user.c
> > > > @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> > > >  	}
> > > >
> > > >  	rte_free(dev->guest_pages);
> > > > +	rte_free(dev->cached_guest_pages);
> > > >  	dev->guest_pages = NULL;
> > > > +	dev->cached_guest_pages = NULL;
> > > >
> > > >  	if (dev->log_addr) {
> > > >  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> > > > @@ -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev,
> > > > uint64_t guest_phys_addr,
> > > >  		   uint64_t host_phys_addr, uint64_t size)
> > > >  {
> > > >  	struct guest_page *page, *last_page;
> > > > -	struct guest_page *old_pages;
> > > > +	struct guest_page *old_pages, *old_cached_pages;
> > > >
> > > >  	if (dev->nr_guest_pages == dev->max_guest_pages) {
> > > >  		dev->max_guest_pages *= 2;
> > > > @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev,
> > > > uint64_t guest_phys_addr,
> > > >  		dev->guest_pages = rte_realloc(dev->guest_pages,
> > > >  					dev->max_guest_pages *
> > > > sizeof(*page),
> > > >  					RTE_CACHE_LINE_SIZE);
> > > > -		if (dev->guest_pages == NULL) {
> > > > +		old_cached_pages = dev->cached_guest_pages;
> > > > +		dev->cached_guest_pages = rte_realloc(dev-
> > > > >cached_guest_pages,
> > > > +						dev->max_guest_pages *
> > > > +						sizeof(*page),
> > > > +						RTE_CACHE_LINE_SIZE);
> > > > +		dev->nr_cached_guest_pages = 0;
> > > > +		if (dev->guest_pages == NULL ||
> > > > +				dev->cached_guest_pages == NULL) {
> > > >  			VHOST_LOG_CONFIG(ERR, "cannot realloc
> > > > guest_pages\n");
> > > >  			rte_free(old_pages);
> > > > +			rte_free(old_cached_pages);
> > > > +			dev->guest_pages = NULL;
> > > > +			dev->cached_guest_pages = NULL;
> > > >  			return -1;
> > > >  		}
> > > >  	}
> > > > @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct
> virtio_net
> > > > **pdev, struct VhostUserMsg *msg,
> > > >  		}
> > > >  	}
> > > >
> > > > +	if (dev->cached_guest_pages == NULL) {
> > > > +		dev->cached_guest_pages = rte_zmalloc(NULL,
> > > > +						dev->max_guest_pages *
> > > > +						sizeof(struct guest_page),
> > > > +						RTE_CACHE_LINE_SIZE);
> > > > +		if (dev->cached_guest_pages == NULL) {
> > > > +			VHOST_LOG_CONFIG(ERR,
> > > > +				"(%d) failed to allocate memory "
> > > > +				"for dev->cached_guest_pages\n",
> > > > +				dev->vid);
> > > > +			return RTE_VHOST_MSG_RESULT_ERR;
> > > > +		}
> > > > +	}
> > > > +
> > > >  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> > > > rte_vhost_memory) +
> > > >  		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> > > > 0);
> > > >  	if (dev->mem == NULL) {
> > > > --
> > > > 2.17.1
  
Ma, LihongX April 3, 2020, 8:22 a.m. UTC | #6
Tested-by: ma,lihong<lihongx.ma@intel.com>

Regards,
Ma,lihong


-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liu, Yong
Sent: Wednesday, April 1, 2020 9:01 PM
To: Gavin Hu <Gavin.Hu@arm.com>; maxime.coquelin@redhat.com; Ye, Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
Cc: dev@dpdk.org; nd <nd@arm.com>
Subject: Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation



> -----Original Message-----
> From: Gavin Hu <Gavin.Hu@arm.com>
> Sent: Wednesday, April 1, 2020 6:07 PM
> To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Ye, 
> Xiaolong <xiaolong.ye@intel.com>; Wang, Zhihong 
> <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa 
> translation
> 
> Hi Marvin,
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > Sent: Wednesday, April 1, 2020 10:50 PM
> > To: maxime.coquelin@redhat.com; xiaolong.ye@intel.com; 
> > zhihong.wang@intel.com
> > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa 
> > translation
> >
> > If Tx zero copy enabled, gpa to hpa mapping table is updated one by 
> > one. This will harm performance when guest memory backend using 2M 
> > hugepages. Now add cached mapping table which will sorted by using 
> > sequence. Address translation will first check cached mapping table, 
> > then check unsorted mapping table if no match found.
> >
> > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h 
> > index 2087d1400..5cb0e83dd 100644
> > --- a/lib/librte_vhost/vhost.h
> > +++ b/lib/librte_vhost/vhost.h
> > @@ -368,7 +368,9 @@ struct virtio_net {
> >  	struct vhost_device_ops const *notify_ops;
> >
> >  	uint32_t		nr_guest_pages;
> > +	uint32_t		nr_cached_guest_pages;
> >  	uint32_t		max_guest_pages;
> > +	struct guest_page       *cached_guest_pages;
> >  	struct guest_page       *guest_pages;
> >
> >  	int			slave_req_fd;
> > @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t 
> > gpa, uint64_t size)  {
> >  	uint32_t i;
> >  	struct guest_page *page;
> > +	uint32_t cached_pages = dev->nr_cached_guest_pages;
> > +
> > +	for (i = 0; i < cached_pages; i++) {
> > +		page = &dev->cached_guest_pages[i];
> > +		if (gpa >= page->guest_phys_addr &&
> > +			gpa + size < page->guest_phys_addr + page->size) {
> > +			return gpa - page->guest_phys_addr +
> > +				page->host_phys_addr;
> > +		}
> > +	}
> Sorry, I did not see any speedup with cached guest pages in comparison 
> to the old code below.
> Is it not a simple copy?
> Is it a better idea to use hash instead to speed up the translation?
> /Gavin

Hi Gavin,
Here just resort the overall mapping table according to using sequence.  
Most likely virtio driver will reuse recently cycled buffers, thus search will find match in beginning. 
That is simple fix for performance enhancement. If use hash for index, it will take much more cost in normal case.

Regards,
Marvin 


> >
> >  	for (i = 0; i < dev->nr_guest_pages; i++) {
> >  		page = &dev->guest_pages[i];
> >
> >  		if (gpa >= page->guest_phys_addr &&
> >  		    gpa + size < page->guest_phys_addr + page->size) {
> > +			rte_memcpy(&dev-
> > >cached_guest_pages[cached_pages],
> > +				   page, sizeof(struct guest_page));
> > +			dev->nr_cached_guest_pages++;
> >  			return gpa - page->guest_phys_addr +
> >  			       page->host_phys_addr;
> >  		}
> > diff --git a/lib/librte_vhost/vhost_user.c 
> > b/lib/librte_vhost/vhost_user.c index 79fcb9d19..1bae1fddc 100644
> > --- a/lib/librte_vhost/vhost_user.c
> > +++ b/lib/librte_vhost/vhost_user.c
> > @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> >  	}
> >
> >  	rte_free(dev->guest_pages);
> > +	rte_free(dev->cached_guest_pages);
> >  	dev->guest_pages = NULL;
> > +	dev->cached_guest_pages = NULL;
> >
> >  	if (dev->log_addr) {
> >  		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); @@ 
> > -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t 
> > guest_phys_addr,
> >  		   uint64_t host_phys_addr, uint64_t size)  {
> >  	struct guest_page *page, *last_page;
> > -	struct guest_page *old_pages;
> > +	struct guest_page *old_pages, *old_cached_pages;
> >
> >  	if (dev->nr_guest_pages == dev->max_guest_pages) {
> >  		dev->max_guest_pages *= 2;
> > @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev, 
> > uint64_t guest_phys_addr,
> >  		dev->guest_pages = rte_realloc(dev->guest_pages,
> >  					dev->max_guest_pages *
> > sizeof(*page),
> >  					RTE_CACHE_LINE_SIZE);
> > -		if (dev->guest_pages == NULL) {
> > +		old_cached_pages = dev->cached_guest_pages;
> > +		dev->cached_guest_pages = rte_realloc(dev-
> > >cached_guest_pages,
> > +						dev->max_guest_pages *
> > +						sizeof(*page),
> > +						RTE_CACHE_LINE_SIZE);
> > +		dev->nr_cached_guest_pages = 0;
> > +		if (dev->guest_pages == NULL ||
> > +				dev->cached_guest_pages == NULL) {
> >  			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
> >  			rte_free(old_pages);
> > +			rte_free(old_cached_pages);
> > +			dev->guest_pages = NULL;
> > +			dev->cached_guest_pages = NULL;
> >  			return -1;
> >  		}
> >  	}
> > @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net 
> > **pdev, struct VhostUserMsg *msg,
> >  		}
> >  	}
> >
> > +	if (dev->cached_guest_pages == NULL) {
> > +		dev->cached_guest_pages = rte_zmalloc(NULL,
> > +						dev->max_guest_pages *
> > +						sizeof(struct guest_page),
> > +						RTE_CACHE_LINE_SIZE);
> > +		if (dev->cached_guest_pages == NULL) {
> > +			VHOST_LOG_CONFIG(ERR,
> > +				"(%d) failed to allocate memory "
> > +				"for dev->cached_guest_pages\n",
> > +				dev->vid);
> > +			return RTE_VHOST_MSG_RESULT_ERR;
> > +		}
> > +	}
> > +
> >  	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> > rte_vhost_memory) +
> >  		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> >  	if (dev->mem == NULL) {
> > --
> > 2.17.1
  
Maxime Coquelin April 27, 2020, 8:45 a.m. UTC | #7
Hi Marvin,

On 4/1/20 4:50 PM, Marvin Liu wrote:
> If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> one. This will harm performance when guest memory backend using 2M
> hugepages. Now add cached mapping table which will sorted by using
> sequence. Address translation will first check cached mapping table,
> then check unsorted mapping table if no match found.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> 

I don't like the approach, as I think it could have nasty effects.
For example, the system is loaded normally and let's say 25% of the
pages are used. Then we have a small spike, and buffers that were never
used start to be used, it will cause writing new entries into the cache
in the hot path when it is already overloaded. Wouldn't it increase the
number of packets dropped?

At set_mem_table time, instead of adding the guest pages unsorted, maybe
better to add them sorted there. Then you can use a better algorithm
than linear searching (O(n)), like binary search (O(log n)).

Thanks,
Maxime
  
Marvin Liu April 28, 2020, 12:44 a.m. UTC | #8
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Monday, April 27, 2020 4:45 PM
> To: Liu, Yong <yong.liu@intel.com>; Ye, Xiaolong <xiaolong.ye@intel.com>;
> Wang, Zhihong <zhihong.wang@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v2 2/2] vhost: cache gpa to hpa translation
> 
> Hi Marvin,
> 
> On 4/1/20 4:50 PM, Marvin Liu wrote:
> > If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> > one. This will harm performance when guest memory backend using 2M
> > hugepages. Now add cached mapping table which will sorted by using
> > sequence. Address translation will first check cached mapping table,
> > then check unsorted mapping table if no match found.
> >
> > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> 
> I don't like the approach, as I think it could have nasty effects.
> For example, the system is loaded normally and let's say 25% of the
> pages are used. Then we have a small spike, and buffers that were never
> used start to be used, it will cause writing new entries into the cache
> in the hot path when it is already overloaded. Wouldn't it increase the
> number of packets dropped?
> 
> At set_mem_table time, instead of adding the guest pages unsorted, maybe
> better to add them sorted there. Then you can use a better algorithm
> than linear searching (O(n)), like binary search (O(log n)).
> 

Maxime,
Thanks for input. Previous sorted way is according using sequence, it may cause more packets drop if accessing pages sequence varied a lot.
Based on current dpdk and virtio-net implementation, it is unlikely to be happened. Anyway, it is not the best choice.
I will use binary search replace current cache solution.

Regards,
Marvin

> Thanks,
> Maxime
>
  

Patch

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 2087d1400..5cb0e83dd 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -368,7 +368,9 @@  struct virtio_net {
 	struct vhost_device_ops const *notify_ops;
 
 	uint32_t		nr_guest_pages;
+	uint32_t		nr_cached_guest_pages;
 	uint32_t		max_guest_pages;
+	struct guest_page       *cached_guest_pages;
 	struct guest_page       *guest_pages;
 
 	int			slave_req_fd;
@@ -553,12 +555,25 @@  gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
 {
 	uint32_t i;
 	struct guest_page *page;
+	uint32_t cached_pages = dev->nr_cached_guest_pages;
+
+	for (i = 0; i < cached_pages; i++) {
+		page = &dev->cached_guest_pages[i];
+		if (gpa >= page->guest_phys_addr &&
+			gpa + size < page->guest_phys_addr + page->size) {
+			return gpa - page->guest_phys_addr +
+				page->host_phys_addr;
+		}
+	}
 
 	for (i = 0; i < dev->nr_guest_pages; i++) {
 		page = &dev->guest_pages[i];
 
 		if (gpa >= page->guest_phys_addr &&
 		    gpa + size < page->guest_phys_addr + page->size) {
+			rte_memcpy(&dev->cached_guest_pages[cached_pages],
+				   page, sizeof(struct guest_page));
+			dev->nr_cached_guest_pages++;
 			return gpa - page->guest_phys_addr +
 			       page->host_phys_addr;
 		}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 79fcb9d19..1bae1fddc 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -192,7 +192,9 @@  vhost_backend_cleanup(struct virtio_net *dev)
 	}
 
 	rte_free(dev->guest_pages);
+	rte_free(dev->cached_guest_pages);
 	dev->guest_pages = NULL;
+	dev->cached_guest_pages = NULL;
 
 	if (dev->log_addr) {
 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
@@ -898,7 +900,7 @@  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		   uint64_t host_phys_addr, uint64_t size)
 {
 	struct guest_page *page, *last_page;
-	struct guest_page *old_pages;
+	struct guest_page *old_pages, *old_cached_pages;
 
 	if (dev->nr_guest_pages == dev->max_guest_pages) {
 		dev->max_guest_pages *= 2;
@@ -906,9 +908,19 @@  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		dev->guest_pages = rte_realloc(dev->guest_pages,
 					dev->max_guest_pages * sizeof(*page),
 					RTE_CACHE_LINE_SIZE);
-		if (dev->guest_pages == NULL) {
+		old_cached_pages = dev->cached_guest_pages;
+		dev->cached_guest_pages = rte_realloc(dev->cached_guest_pages,
+						dev->max_guest_pages *
+						sizeof(*page),
+						RTE_CACHE_LINE_SIZE);
+		dev->nr_cached_guest_pages = 0;
+		if (dev->guest_pages == NULL ||
+				dev->cached_guest_pages == NULL) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
 			rte_free(old_pages);
+			rte_free(old_cached_pages);
+			dev->guest_pages = NULL;
+			dev->cached_guest_pages = NULL;
 			return -1;
 		}
 	}
@@ -1078,6 +1090,20 @@  vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		}
 	}
 
+	if (dev->cached_guest_pages == NULL) {
+		dev->cached_guest_pages = rte_zmalloc(NULL,
+						dev->max_guest_pages *
+						sizeof(struct guest_page),
+						RTE_CACHE_LINE_SIZE);
+		if (dev->cached_guest_pages == NULL) {
+			VHOST_LOG_CONFIG(ERR,
+				"(%d) failed to allocate memory "
+				"for dev->cached_guest_pages\n",
+				dev->vid);
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+	}
+
 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
 		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
 	if (dev->mem == NULL) {