[dpdk-dev,v2,2/6] vhost: introduce vhost_log_write

Message ID 1450321921-27799-3-git-send-email-yuanhan.liu@linux.intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Yuanhan Liu Dec. 17, 2015, 3:11 a.m. UTC
  Introduce vhost_log_write() helper function to log the dirty pages we
touched. Page size is harded code to 4096 (VHOST_LOG_PAGE), and each
log is presented by 1 bit.

Therefore, vhost_log_write() simply finds the right bit for related
page we are gonna change, and set it to 1. dev->log_base denotes the
start of the dirty page bitmap.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Victor Kaplansky <victork@redhat.com
---
 lib/librte_vhost/rte_virtio_net.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
  

Comments

Huawei Xie Dec. 21, 2015, 3:06 p.m. UTC | #1
On 12/17/2015 11:11 AM, Yuanhan Liu wrote:
> Introduce vhost_log_write() helper function to log the dirty pages we
> touched. Page size is harded code to 4096 (VHOST_LOG_PAGE), and each
> log is presented by 1 bit.
>
> Therefore, vhost_log_write() simply finds the right bit for related
> page we are gonna change, and set it to 1. dev->log_base denotes the
> start of the dirty page bitmap.
>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> Signed-off-by: Victor Kaplansky <victork@redhat.com
> ---
>  lib/librte_vhost/rte_virtio_net.h | 29 +++++++++++++++++++++++++++++
>  1 file changed, 29 insertions(+)
>
> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> index 8acee02..5726683 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -40,6 +40,7 @@
>   */
>  
>  #include <stdint.h>
> +#include <linux/vhost.h>
>  #include <linux/virtio_ring.h>
>  #include <linux/virtio_net.h>
>  #include <sys/eventfd.h>
> @@ -59,6 +60,8 @@ struct rte_mbuf;
>  /* Backend value set by guest. */
>  #define VIRTIO_DEV_STOPPED -1
>  
> +#define VHOST_LOG_PAGE	4096
> +
>  
>  /* Enum for virtqueue management. */
>  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
> @@ -205,6 +208,32 @@ gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
>  	return vhost_va;
>  }
>  
> +static inline void __attribute__((always_inline))
> +vhost_log_page(uint8_t *log_base, uint64_t page)
> +{
> +	log_base[page / 8] |= 1 << (page % 8);
> +}
> +
Those logging functions are not supposed to be API. Could we move them
into an internal header file?
> +static inline void __attribute__((always_inline))
> +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
> +{
> +	uint64_t page;
> +
Before we log, we need memory barrier to make sure updates are in place.
> +	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
> +		   !dev->log_base || !len))
> +		return;
> +
> +	if (unlikely(dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
> +		return;
> +
> +	page = addr / VHOST_LOG_PAGE;
> +	while (page * VHOST_LOG_PAGE < addr + len) {
Let us have a page_end var to make the code simpler?
> +		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
> +		page += VHOST_LOG_PAGE;
page += 1?
> +	}
> +}
> +
> +
>  /**
>   *  Disable features in feature_mask. Returns 0 on success.
>   */
  
Yuanhan Liu Dec. 22, 2015, 2:40 a.m. UTC | #2
On Mon, Dec 21, 2015 at 03:06:43PM +0000, Xie, Huawei wrote:
> On 12/17/2015 11:11 AM, Yuanhan Liu wrote:
> > Introduce vhost_log_write() helper function to log the dirty pages we
> > touched. Page size is harded code to 4096 (VHOST_LOG_PAGE), and each
> > log is presented by 1 bit.
> >
> > Therefore, vhost_log_write() simply finds the right bit for related
> > page we are gonna change, and set it to 1. dev->log_base denotes the
> > start of the dirty page bitmap.
> >
> > Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> > Signed-off-by: Victor Kaplansky <victork@redhat.com
> > ---
> >  lib/librte_vhost/rte_virtio_net.h | 29 +++++++++++++++++++++++++++++
> >  1 file changed, 29 insertions(+)
> >
> > diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> > index 8acee02..5726683 100644
> > --- a/lib/librte_vhost/rte_virtio_net.h
> > +++ b/lib/librte_vhost/rte_virtio_net.h
> > @@ -40,6 +40,7 @@
> >   */
> >  
> >  #include <stdint.h>
> > +#include <linux/vhost.h>
> >  #include <linux/virtio_ring.h>
> >  #include <linux/virtio_net.h>
> >  #include <sys/eventfd.h>
> > @@ -59,6 +60,8 @@ struct rte_mbuf;
> >  /* Backend value set by guest. */
> >  #define VIRTIO_DEV_STOPPED -1
> >  
> > +#define VHOST_LOG_PAGE	4096
> > +
> >  
> >  /* Enum for virtqueue management. */
> >  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
> > @@ -205,6 +208,32 @@ gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
> >  	return vhost_va;
> >  }
> >  
> > +static inline void __attribute__((always_inline))
> > +vhost_log_page(uint8_t *log_base, uint64_t page)
> > +{
> > +	log_base[page / 8] |= 1 << (page % 8);
> > +}
> > +
> Those logging functions are not supposed to be API. Could we move them
> into an internal header file?

Agreed. I should have put them into vhost_rxtx.c

> > +static inline void __attribute__((always_inline))
> > +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
> > +{
> > +	uint64_t page;
> > +
> Before we log, we need memory barrier to make sure updates are in place.
> > +	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
> > +		   !dev->log_base || !len))
> > +		return;

Put a memory barrier inside set_features()?

I see no var dependence here, why putting a barrier then? We are
accessing and modifying same var, doesn't the cache MESI protocol
will get rid of your concerns?

> > +
> > +	if (unlikely(dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
> > +		return;
> > +
> > +	page = addr / VHOST_LOG_PAGE;
> > +	while (page * VHOST_LOG_PAGE < addr + len) {
> Let us have a page_end var to make the code simpler?

Could do that.


> > +		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
> > +		page += VHOST_LOG_PAGE;
> page += 1?

Oops, right.

	--yliu

> > +	}
> > +}
> > +
> > +
> >  /**
> >   *  Disable features in feature_mask. Returns 0 on success.
> >   */
>
  
Huawei Xie Dec. 22, 2015, 2:45 a.m. UTC | #3
On 12/22/2015 10:40 AM, Yuanhan Liu wrote:
> On Mon, Dec 21, 2015 at 03:06:43PM +0000, Xie, Huawei wrote:
>> On 12/17/2015 11:11 AM, Yuanhan Liu wrote:
>>> Introduce vhost_log_write() helper function to log the dirty pages we
>>> touched. Page size is harded code to 4096 (VHOST_LOG_PAGE), and each
>>> log is presented by 1 bit.
>>>
>>> Therefore, vhost_log_write() simply finds the right bit for related
>>> page we are gonna change, and set it to 1. dev->log_base denotes the
>>> start of the dirty page bitmap.
>>>
>>> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
>>> Signed-off-by: Victor Kaplansky <victork@redhat.com
>>> ---
>>>  lib/librte_vhost/rte_virtio_net.h | 29 +++++++++++++++++++++++++++++
>>>  1 file changed, 29 insertions(+)
>>>
>>> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
>>> index 8acee02..5726683 100644
>>> --- a/lib/librte_vhost/rte_virtio_net.h
>>> +++ b/lib/librte_vhost/rte_virtio_net.h
>>> @@ -40,6 +40,7 @@
>>>   */
>>>  
>>>  #include <stdint.h>
>>> +#include <linux/vhost.h>
>>>  #include <linux/virtio_ring.h>
>>>  #include <linux/virtio_net.h>
>>>  #include <sys/eventfd.h>
>>> @@ -59,6 +60,8 @@ struct rte_mbuf;
>>>  /* Backend value set by guest. */
>>>  #define VIRTIO_DEV_STOPPED -1
>>>  
>>> +#define VHOST_LOG_PAGE	4096
>>> +
>>>  
>>>  /* Enum for virtqueue management. */
>>>  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>>> @@ -205,6 +208,32 @@ gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
>>>  	return vhost_va;
>>>  }
>>>  
>>> +static inline void __attribute__((always_inline))
>>> +vhost_log_page(uint8_t *log_base, uint64_t page)
>>> +{
>>> +	log_base[page / 8] |= 1 << (page % 8);
>>> +}
>>> +
>> Those logging functions are not supposed to be API. Could we move them
>> into an internal header file?
> Agreed. I should have put them into vhost_rxtx.c
>
>>> +static inline void __attribute__((always_inline))
>>> +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
>>> +{
>>> +	uint64_t page;
>>> +
>> Before we log, we need memory barrier to make sure updates are in place.
>>> +	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
>>> +		   !dev->log_base || !len))
>>> +		return;
> Put a memory barrier inside set_features()?
>
> I see no var dependence here, why putting a barrier then? We are
> accessing and modifying same var, doesn't the cache MESI protocol
> will get rid of your concerns?
This fence isn't about feature var. It is to ensure that updates to the
guest buffer are committed before the logging.
For IA strong memory model, compiler barrier is enough. For other weak
memory model, fence is required.
>>> +
>>> +	if (unlikely(dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
>>> +		return;
>>> +
>>> +	page = addr / VHOST_LOG_PAGE;
>>> +	while (page * VHOST_LOG_PAGE < addr + len) {
>> Let us have a page_end var to make the code simpler?
> Could do that.
>
>
>>> +		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
>>> +		page += VHOST_LOG_PAGE;
>> page += 1?
> Oops, right.
>
> 	--yliu
>
>>> +	}
>>> +}
>>> +
>>> +
>>>  /**
>>>   *  Disable features in feature_mask. Returns 0 on success.
>>>   */
  
Yuanhan Liu Dec. 22, 2015, 3:04 a.m. UTC | #4
On Tue, Dec 22, 2015 at 02:45:52AM +0000, Xie, Huawei wrote:
> >>> +static inline void __attribute__((always_inline))
> >>> +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
> >>> +{
> >>> +	uint64_t page;
> >>> +
> >> Before we log, we need memory barrier to make sure updates are in place.
> >>> +	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
> >>> +		   !dev->log_base || !len))
> >>> +		return;
> > Put a memory barrier inside set_features()?
> >
> > I see no var dependence here, why putting a barrier then? We are
> > accessing and modifying same var, doesn't the cache MESI protocol
> > will get rid of your concerns?
> This fence isn't about feature var. It is to ensure that updates to the
> guest buffer are committed before the logging.

Oh.., I was thinking you were talking about the "dev->features" field
concurrent access and modify you mentioned from V1.

> For IA strong memory model, compiler barrier is enough. For other weak
> memory model, fence is required.
> >>> +
> >>> +	if (unlikely(dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
> >>> +		return;

So that I should put a "rte_mb()" __here__?

	--yliu
> >>> +
> >>> +	page = addr / VHOST_LOG_PAGE;
> >>> +	while (page * VHOST_LOG_PAGE < addr + len) {
> >> Let us have a page_end var to make the code simpler?
> > Could do that.
> >
> >
> >>> +		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
> >>> +		page += VHOST_LOG_PAGE;
> >> page += 1?
> > Oops, right.
> >
> > 	--yliu
> >
> >>> +	}
> >>> +}
> >>> +
> >>> +
> >>>  /**
> >>>   *  Disable features in feature_mask. Returns 0 on success.
> >>>   */
>
  
Peter Xu Dec. 22, 2015, 5:11 a.m. UTC | #5
On Thu, Dec 17, 2015 at 11:11:57AM +0800, Yuanhan Liu wrote:
> +static inline void __attribute__((always_inline))
> +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
> +{
> +	uint64_t page;
> +
> +	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
> +		   !dev->log_base || !len))
> +		return;
> +
> +	if (unlikely(dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8)))

Should it be "<="?

Peter
  
Yuanhan Liu Dec. 22, 2015, 6:09 a.m. UTC | #6
On Tue, Dec 22, 2015 at 01:11:02PM +0800, Peter Xu wrote:
> On Thu, Dec 17, 2015 at 11:11:57AM +0800, Yuanhan Liu wrote:
> > +static inline void __attribute__((always_inline))
> > +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
> > +{
> > +	uint64_t page;
> > +
> > +	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
> > +		   !dev->log_base || !len))
> > +		return;
> > +
> > +	if (unlikely(dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
> 
> Should it be "<="?

Right, thanks for catching it.

	--yliu
  
Huawei Xie Dec. 22, 2015, 7:02 a.m. UTC | #7
On 12/22/2015 11:03 AM, Yuanhan Liu wrote:
> On Tue, Dec 22, 2015 at 02:45:52AM +0000, Xie, Huawei wrote:
>>>>> +static inline void __attribute__((always_inline))
>>>>> +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
>>>>> +{
>>>>> +	uint64_t page;
>>>>> +
>>>> Before we log, we need memory barrier to make sure updates are in place.
>>>>> +	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
>>>>> +		   !dev->log_base || !len))
>>>>> +		return;
>>> Put a memory barrier inside set_features()?
>>>
>>> I see no var dependence here, why putting a barrier then? We are
>>> accessing and modifying same var, doesn't the cache MESI protocol
>>> will get rid of your concerns?
>> This fence isn't about feature var. It is to ensure that updates to the
>> guest buffer are committed before the logging.
> Oh.., I was thinking you were talking about the "dev->features" field
> concurrent access and modify you mentioned from V1.
>
>> For IA strong memory model, compiler barrier is enough. For other weak
>> memory model, fence is required.
>>>>> +
>>>>> +	if (unlikely(dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
>>>>> +		return;
> So that I should put a "rte_mb()" __here__?
>
> 	--yliu

I find that we already have the arch dependent version of rte_smp_wmb()
        --huawei
>>>>> +
>>>>> +	page = addr / VHOST_LOG_PAGE;
>>>>> +	while (page * VHOST_LOG_PAGE < addr + len) {
>>>> Let us have a page_end var to make the code simpler?
>>> Could do that.
>>>
>>>
>>>>> +		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
>>>>> +		page += VHOST_LOG_PAGE;
>>>> page += 1?
>>> Oops, right.
>>>
>>> 	--yliu
>>>
>>>>> +	}
>>>>> +}
>>>>> +
>>>>> +
>>>>>  /**
>>>>>   *  Disable features in feature_mask. Returns 0 on success.
>>>>>   */
  

Patch

diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 8acee02..5726683 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -40,6 +40,7 @@ 
  */
 
 #include <stdint.h>
+#include <linux/vhost.h>
 #include <linux/virtio_ring.h>
 #include <linux/virtio_net.h>
 #include <sys/eventfd.h>
@@ -59,6 +60,8 @@  struct rte_mbuf;
 /* Backend value set by guest. */
 #define VIRTIO_DEV_STOPPED -1
 
+#define VHOST_LOG_PAGE	4096
+
 
 /* Enum for virtqueue management. */
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
@@ -205,6 +208,32 @@  gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
 	return vhost_va;
 }
 
+static inline void __attribute__((always_inline))
+vhost_log_page(uint8_t *log_base, uint64_t page)
+{
+	log_base[page / 8] |= 1 << (page % 8);
+}
+
+static inline void __attribute__((always_inline))
+vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
+{
+	uint64_t page;
+
+	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+		   !dev->log_base || !len))
+		return;
+
+	if (unlikely(dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+		return;
+
+	page = addr / VHOST_LOG_PAGE;
+	while (page * VHOST_LOG_PAGE < addr + len) {
+		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+		page += VHOST_LOG_PAGE;
+	}
+}
+
+
 /**
  *  Disable features in feature_mask. Returns 0 on success.
  */