[v2,08/16] vhost: add flush function for burst enqueue

Message ID 20190919163643.24130-9-yong.liu@intel.com
State Superseded
Headers show
Series
  • vhost packed ring performance optimization
Related show

Checks

Context Check Description
ci/Intel-compilation fail Compilation issues
ci/checkpatch success coding style OK

Commit Message

Liu, Yong Sept. 19, 2019, 4:36 p.m.
Flush used flags when burst enqueue function is finished. Descriptor's
flags are pre-calculated as them will be reset by vhost.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

Comments

Gavin Hu (Arm Technology China) Sept. 25, 2019, 3:37 a.m. | #1
Hi Marvin,

One typo and one comment about the barrier.

/Gavin

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> Sent: Friday, September 20, 2019 12:37 AM
> To: maxime.coquelin@redhat.com; tiwei.bie@intel.com;
> zhihong.wang@intel.com
> Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> Subject: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for burst
> enqueue
> 
> Flush used flags when burst enqueue function is finished. Descriptor's
> flags are pre-calculated as them will be reset by vhost.
s/them/they

> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 000648dd4..9c42c7db0 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -39,6 +39,9 @@
> 
>  #define VHOST_LOG_CACHE_NR 32
> 
> +#define VIRTIO_RX_USED_FLAG  (0ULL | VRING_DESC_F_AVAIL |
> VRING_DESC_F_USED \
> +				| VRING_DESC_F_WRITE)
> +#define VIRTIO_RX_USED_WRAP_FLAG (VRING_DESC_F_WRITE)
>  #define PACKED_DESCS_BURST (RTE_CACHE_LINE_SIZE / \
>  			    sizeof(struct vring_packed_desc))
> 
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index e2787b72e..8e4036204 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -169,6 +169,51 @@ update_shadow_packed(struct vhost_virtqueue
> *vq,
>  	vq->shadow_used_packed[i].count = count;
>  }
> 
> +static __rte_always_inline void
> +flush_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +	uint64_t *lens, uint16_t *ids, uint16_t flags)
> +{
> +	uint16_t i;
> +
> +	UNROLL_PRAGMA(PRAGMA_PARAM)
> +	for (i = 0; i < PACKED_DESCS_BURST; i++) {
> +		vq->desc_packed[vq->last_used_idx + i].id = ids[i];
> +		vq->desc_packed[vq->last_used_idx + i].len = lens[i];
> +	}
> +
> +	UNROLL_PRAGMA(PRAGMA_PARAM)
> +	for (i = 0; i < PACKED_DESCS_BURST; i++) {
> +		rte_smp_wmb();
Should this rte_smp_wmb() be moved above the loop? It guarantees the orderings of updates of id, len happens before the flags,
But all the flags of different descriptors should not be ordered. 

> +		vq->desc_packed[vq->last_used_idx + i].flags = flags;
> +	}
> +
> +	vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
> +				   sizeof(struct vring_packed_desc),
> +				   sizeof(struct vring_packed_desc) *
> +				   PACKED_DESCS_BURST);
> +	vhost_log_cache_sync(dev, vq);
> +
> +	vq->last_used_idx += PACKED_DESCS_BURST;
> +	if (vq->last_used_idx >= vq->size) {
> +		vq->used_wrap_counter ^= 1;
> +		vq->last_used_idx -= vq->size;
> +	}
> +}
> +
> +static __rte_always_inline void
> +flush_enqueue_burst_packed(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
> +	uint64_t *lens, uint16_t *ids)
> +{
> +	uint16_t flags = 0;
> +
> +	if (vq->used_wrap_counter)
> +		flags = VIRTIO_RX_USED_FLAG;
> +	else
> +		flags = VIRTIO_RX_USED_WRAP_FLAG;
> +
> +	flush_burst_packed(dev, vq, lens, ids, flags);
> +}
> +
>  static __rte_always_inline void
>  update_enqueue_shadow_packed(struct vhost_virtqueue *vq, uint16_t
> desc_idx,
>  	uint32_t len, uint16_t count)
> @@ -950,6 +995,7 @@ virtio_dev_rx_burst_packed(struct virtio_net *dev,
> struct vhost_virtqueue *vq,
>  	struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_DESCS_BURST];
>  	uint32_t buf_offset = dev->vhost_hlen;
>  	uint64_t lens[PACKED_DESCS_BURST];
> +	uint16_t ids[PACKED_DESCS_BURST];
> 
>  	uint16_t i;
> 
> @@ -1013,6 +1059,12 @@ virtio_dev_rx_burst_packed(struct virtio_net
> *dev, struct vhost_virtqueue *vq,
>  			   pkts[i]->pkt_len);
>  	}
> 
> +	UNROLL_PRAGMA(PRAGMA_PARAM)
> +	for (i = 0; i < PACKED_DESCS_BURST; i++)
> +		ids[i] = descs[avail_idx + i].id;
> +
> +	flush_enqueue_burst_packed(dev, vq, lens, ids);
> +
>  	return 0;
>  }
> 
> --
> 2.17.1
Liu, Yong Sept. 25, 2019, 5:37 a.m. | #2
> -----Original Message-----
> From: Gavin Hu (Arm Technology China) [mailto:Gavin.Hu@arm.com]
> Sent: Wednesday, September 25, 2019 11:38 AM
> To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Bie, Tiwei
> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for
> burst enqueue
> 
> Hi Marvin,
> 
> One typo and one comment about the barrier.
> 
> /Gavin
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > Sent: Friday, September 20, 2019 12:37 AM
> > To: maxime.coquelin@redhat.com; tiwei.bie@intel.com;
> > zhihong.wang@intel.com
> > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > Subject: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for burst
> > enqueue
> >
> > Flush used flags when burst enqueue function is finished. Descriptor's
> > flags are pre-calculated as them will be reset by vhost.
> s/them/they
> 

Thanks.

> >
> > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > index 000648dd4..9c42c7db0 100644
> > --- a/lib/librte_vhost/vhost.h
> > +++ b/lib/librte_vhost/vhost.h
> > @@ -39,6 +39,9 @@
> >
> >  #define VHOST_LOG_CACHE_NR 32
> >
> > +#define VIRTIO_RX_USED_FLAG  (0ULL | VRING_DESC_F_AVAIL |
> > VRING_DESC_F_USED \
> > +				| VRING_DESC_F_WRITE)
> > +#define VIRTIO_RX_USED_WRAP_FLAG (VRING_DESC_F_WRITE)
> >  #define PACKED_DESCS_BURST (RTE_CACHE_LINE_SIZE / \
> >  			    sizeof(struct vring_packed_desc))
> >
> > diff --git a/lib/librte_vhost/virtio_net.c
> b/lib/librte_vhost/virtio_net.c
> > index e2787b72e..8e4036204 100644
> > --- a/lib/librte_vhost/virtio_net.c
> > +++ b/lib/librte_vhost/virtio_net.c
> > @@ -169,6 +169,51 @@ update_shadow_packed(struct vhost_virtqueue
> > *vq,
> >  	vq->shadow_used_packed[i].count = count;
> >  }
> >
> > +static __rte_always_inline void
> > +flush_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > +	uint64_t *lens, uint16_t *ids, uint16_t flags)
> > +{
> > +	uint16_t i;
> > +
> > +	UNROLL_PRAGMA(PRAGMA_PARAM)
> > +	for (i = 0; i < PACKED_DESCS_BURST; i++) {
> > +		vq->desc_packed[vq->last_used_idx + i].id = ids[i];
> > +		vq->desc_packed[vq->last_used_idx + i].len = lens[i];
> > +	}
> > +
> > +	UNROLL_PRAGMA(PRAGMA_PARAM)
> > +	for (i = 0; i < PACKED_DESCS_BURST; i++) {
> > +		rte_smp_wmb();
> Should this rte_smp_wmb() be moved above the loop? It guarantees the
> orderings of updates of id, len happens before the flags,
> But all the flags of different descriptors should not be ordered.
> 
Hi Gavin,
For each descriptor, virtio driver will first check flags and then check read barrier, at the last driver will read id and length.
So wmb here is to guarantee that id and length are updated before flags. And afterwards wmb is to guarantee the sequence.

Thanks,
Marvin

> > +		vq->desc_packed[vq->last_used_idx + i].flags = flags;
> > +	}
> > +
> > +	vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
> > +				   sizeof(struct vring_packed_desc),
> > +				   sizeof(struct vring_packed_desc) *
> > +				   PACKED_DESCS_BURST);
> > +	vhost_log_cache_sync(dev, vq);
> > +
> > +	vq->last_used_idx += PACKED_DESCS_BURST;
> > +	if (vq->last_used_idx >= vq->size) {
> > +		vq->used_wrap_counter ^= 1;
> > +		vq->last_used_idx -= vq->size;
> > +	}
> > +}
> > +
> > +static __rte_always_inline void
> > +flush_enqueue_burst_packed(struct virtio_net *dev, struct
> > vhost_virtqueue *vq,
> > +	uint64_t *lens, uint16_t *ids)
> > +{
> > +	uint16_t flags = 0;
> > +
> > +	if (vq->used_wrap_counter)
> > +		flags = VIRTIO_RX_USED_FLAG;
> > +	else
> > +		flags = VIRTIO_RX_USED_WRAP_FLAG;
> > +
> > +	flush_burst_packed(dev, vq, lens, ids, flags);
> > +}
> > +
> >  static __rte_always_inline void
> >  update_enqueue_shadow_packed(struct vhost_virtqueue *vq, uint16_t
> > desc_idx,
> >  	uint32_t len, uint16_t count)
> > @@ -950,6 +995,7 @@ virtio_dev_rx_burst_packed(struct virtio_net *dev,
> > struct vhost_virtqueue *vq,
> >  	struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_DESCS_BURST];
> >  	uint32_t buf_offset = dev->vhost_hlen;
> >  	uint64_t lens[PACKED_DESCS_BURST];
> > +	uint16_t ids[PACKED_DESCS_BURST];
> >
> >  	uint16_t i;
> >
> > @@ -1013,6 +1059,12 @@ virtio_dev_rx_burst_packed(struct virtio_net
> > *dev, struct vhost_virtqueue *vq,
> >  			   pkts[i]->pkt_len);
> >  	}
> >
> > +	UNROLL_PRAGMA(PRAGMA_PARAM)
> > +	for (i = 0; i < PACKED_DESCS_BURST; i++)
> > +		ids[i] = descs[avail_idx + i].id;
> > +
> > +	flush_enqueue_burst_packed(dev, vq, lens, ids);
> > +
> >  	return 0;
> >  }
> >
> > --
> > 2.17.1
Liu, Yong Sept. 25, 2019, 6:51 a.m. | #3
> -----Original Message-----
> From: Liu, Yong
> Sent: Wednesday, September 25, 2019 1:38 PM
> To: Gavin Hu (Arm Technology China) <Gavin.Hu@arm.com>;
> maxime.coquelin@redhat.com; Bie, Tiwei <tiwei.bie@intel.com>; Wang, Zhihong
> <zhihong.wang@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for
> burst enqueue
> 
> 
> 
> > -----Original Message-----
> > From: Gavin Hu (Arm Technology China) [mailto:Gavin.Hu@arm.com]
> > Sent: Wednesday, September 25, 2019 11:38 AM
> > To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Bie,
> Tiwei
> > <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
> > Cc: dev@dpdk.org; nd <nd@arm.com>
> > Subject: RE: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for
> > burst enqueue
> >
> > Hi Marvin,
> >
> > One typo and one comment about the barrier.
> >
> > /Gavin
> >
> > > -----Original Message-----
> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > > Sent: Friday, September 20, 2019 12:37 AM
> > > To: maxime.coquelin@redhat.com; tiwei.bie@intel.com;
> > > zhihong.wang@intel.com
> > > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > > Subject: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for
> burst
> > > enqueue
> > >
> > > Flush used flags when burst enqueue function is finished. Descriptor's
> > > flags are pre-calculated as them will be reset by vhost.
> > s/them/they
> >
> 
> Thanks.
> 
> > >
> > > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> > >
> > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > > index 000648dd4..9c42c7db0 100644
> > > --- a/lib/librte_vhost/vhost.h
> > > +++ b/lib/librte_vhost/vhost.h
> > > @@ -39,6 +39,9 @@
> > >
> > >  #define VHOST_LOG_CACHE_NR 32
> > >
> > > +#define VIRTIO_RX_USED_FLAG  (0ULL | VRING_DESC_F_AVAIL |
> > > VRING_DESC_F_USED \
> > > +				| VRING_DESC_F_WRITE)
> > > +#define VIRTIO_RX_USED_WRAP_FLAG (VRING_DESC_F_WRITE)
> > >  #define PACKED_DESCS_BURST (RTE_CACHE_LINE_SIZE / \
> > >  			    sizeof(struct vring_packed_desc))
> > >
> > > diff --git a/lib/librte_vhost/virtio_net.c
> > b/lib/librte_vhost/virtio_net.c
> > > index e2787b72e..8e4036204 100644
> > > --- a/lib/librte_vhost/virtio_net.c
> > > +++ b/lib/librte_vhost/virtio_net.c
> > > @@ -169,6 +169,51 @@ update_shadow_packed(struct vhost_virtqueue
> > > *vq,
> > >  	vq->shadow_used_packed[i].count = count;
> > >  }
> > >
> > > +static __rte_always_inline void
> > > +flush_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > > +	uint64_t *lens, uint16_t *ids, uint16_t flags)
> > > +{
> > > +	uint16_t i;
> > > +
> > > +	UNROLL_PRAGMA(PRAGMA_PARAM)
> > > +	for (i = 0; i < PACKED_DESCS_BURST; i++) {
> > > +		vq->desc_packed[vq->last_used_idx + i].id = ids[i];
> > > +		vq->desc_packed[vq->last_used_idx + i].len = lens[i];
> > > +	}
> > > +
> > > +	UNROLL_PRAGMA(PRAGMA_PARAM)
> > > +	for (i = 0; i < PACKED_DESCS_BURST; i++) {
> > > +		rte_smp_wmb();
> > Should this rte_smp_wmb() be moved above the loop? It guarantees the
> > orderings of updates of id, len happens before the flags,
> > But all the flags of different descriptors should not be ordered.
> >
> Hi Gavin,
> For each descriptor, virtio driver will first check flags and then check
> read barrier, at the last driver will read id and length.
> So wmb here is to guarantee that id and length are updated before flags.
> And afterwards wmb is to guarantee the sequence.
> 
Gavin,
Checked with master branch, flags store sequence is not needed.
But in my environment, performance will be a litter better if ordered flags store.
I think it may be harmless to place wmb here. How about your idea?

> Thanks,
> Marvin
> 
> > > +		vq->desc_packed[vq->last_used_idx + i].flags = flags;
> > > +	}
> > > +
> > > +	vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
> > > +				   sizeof(struct vring_packed_desc),
> > > +				   sizeof(struct vring_packed_desc) *
> > > +				   PACKED_DESCS_BURST);
> > > +	vhost_log_cache_sync(dev, vq);
> > > +
> > > +	vq->last_used_idx += PACKED_DESCS_BURST;
> > > +	if (vq->last_used_idx >= vq->size) {
> > > +		vq->used_wrap_counter ^= 1;
> > > +		vq->last_used_idx -= vq->size;
> > > +	}
> > > +}
> > > +
> > > +static __rte_always_inline void
> > > +flush_enqueue_burst_packed(struct virtio_net *dev, struct
> > > vhost_virtqueue *vq,
> > > +	uint64_t *lens, uint16_t *ids)
> > > +{
> > > +	uint16_t flags = 0;
> > > +
> > > +	if (vq->used_wrap_counter)
> > > +		flags = VIRTIO_RX_USED_FLAG;
> > > +	else
> > > +		flags = VIRTIO_RX_USED_WRAP_FLAG;
> > > +
> > > +	flush_burst_packed(dev, vq, lens, ids, flags);
> > > +}
> > > +
> > >  static __rte_always_inline void
> > >  update_enqueue_shadow_packed(struct vhost_virtqueue *vq, uint16_t
> > > desc_idx,
> > >  	uint32_t len, uint16_t count)
> > > @@ -950,6 +995,7 @@ virtio_dev_rx_burst_packed(struct virtio_net *dev,
> > > struct vhost_virtqueue *vq,
> > >  	struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_DESCS_BURST];
> > >  	uint32_t buf_offset = dev->vhost_hlen;
> > >  	uint64_t lens[PACKED_DESCS_BURST];
> > > +	uint16_t ids[PACKED_DESCS_BURST];
> > >
> > >  	uint16_t i;
> > >
> > > @@ -1013,6 +1059,12 @@ virtio_dev_rx_burst_packed(struct virtio_net
> > > *dev, struct vhost_virtqueue *vq,
> > >  			   pkts[i]->pkt_len);
> > >  	}
> > >
> > > +	UNROLL_PRAGMA(PRAGMA_PARAM)
> > > +	for (i = 0; i < PACKED_DESCS_BURST; i++)
> > > +		ids[i] = descs[avail_idx + i].id;
> > > +
> > > +	flush_enqueue_burst_packed(dev, vq, lens, ids);
> > > +
> > >  	return 0;
> > >  }
> > >
> > > --
> > > 2.17.1
Gavin Hu (Arm Technology China) Sept. 25, 2019, 7:15 a.m. | #4
Hi Marvin,

> -----Original Message-----
> From: Liu, Yong <yong.liu@intel.com>
> Sent: Wednesday, September 25, 2019 2:52 PM
> To: Gavin Hu (Arm Technology China) <Gavin.Hu@arm.com>;
> 'maxime.coquelin@redhat.com' <maxime.coquelin@redhat.com>; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
> Cc: 'dev@dpdk.org' <dev@dpdk.org>; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for burst
> enqueue
> 
> > -----Original Message-----
> > From: Liu, Yong
> > Sent: Wednesday, September 25, 2019 1:38 PM
> > To: Gavin Hu (Arm Technology China) <Gavin.Hu@arm.com>;
> > maxime.coquelin@redhat.com; Bie, Tiwei <tiwei.bie@intel.com>; Wang,
> Zhihong
> > <zhihong.wang@intel.com>
> > Cc: dev@dpdk.org; nd <nd@arm.com>
> > Subject: RE: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for
> > burst enqueue
> >
> >
> >
> > > -----Original Message-----
> > > From: Gavin Hu (Arm Technology China) [mailto:Gavin.Hu@arm.com]
> > > Sent: Wednesday, September 25, 2019 11:38 AM
> > > To: Liu, Yong <yong.liu@intel.com>; maxime.coquelin@redhat.com; Bie,
> > Tiwei
> > > <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
> > > Cc: dev@dpdk.org; nd <nd@arm.com>
> > > Subject: RE: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for
> > > burst enqueue
> > >
> > > Hi Marvin,
> > >
> > > One typo and one comment about the barrier.
> > >
> > > /Gavin
> > >
> > > > -----Original Message-----
> > > > From: dev <dev-bounces@dpdk.org> On Behalf Of Marvin Liu
> > > > Sent: Friday, September 20, 2019 12:37 AM
> > > > To: maxime.coquelin@redhat.com; tiwei.bie@intel.com;
> > > > zhihong.wang@intel.com
> > > > Cc: dev@dpdk.org; Marvin Liu <yong.liu@intel.com>
> > > > Subject: [dpdk-dev] [PATCH v2 08/16] vhost: add flush function for
> > burst
> > > > enqueue
> > > >
> > > > Flush used flags when burst enqueue function is finished. Descriptor's
> > > > flags are pre-calculated as them will be reset by vhost.
> > > s/them/they
> > >
> >
> > Thanks.
> >
> > > >
> > > > Signed-off-by: Marvin Liu <yong.liu@intel.com>
> > > >
> > > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > > > index 000648dd4..9c42c7db0 100644
> > > > --- a/lib/librte_vhost/vhost.h
> > > > +++ b/lib/librte_vhost/vhost.h
> > > > @@ -39,6 +39,9 @@
> > > >
> > > >  #define VHOST_LOG_CACHE_NR 32
> > > >
> > > > +#define VIRTIO_RX_USED_FLAG  (0ULL | VRING_DESC_F_AVAIL |
> > > > VRING_DESC_F_USED \
> > > > +				| VRING_DESC_F_WRITE)
> > > > +#define VIRTIO_RX_USED_WRAP_FLAG (VRING_DESC_F_WRITE)
> > > >  #define PACKED_DESCS_BURST (RTE_CACHE_LINE_SIZE / \
> > > >  			    sizeof(struct vring_packed_desc))
> > > >
> > > > diff --git a/lib/librte_vhost/virtio_net.c
> > > b/lib/librte_vhost/virtio_net.c
> > > > index e2787b72e..8e4036204 100644
> > > > --- a/lib/librte_vhost/virtio_net.c
> > > > +++ b/lib/librte_vhost/virtio_net.c
> > > > @@ -169,6 +169,51 @@ update_shadow_packed(struct
> vhost_virtqueue
> > > > *vq,
> > > >  	vq->shadow_used_packed[i].count = count;
> > > >  }
> > > >
> > > > +static __rte_always_inline void
> > > > +flush_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > > > +	uint64_t *lens, uint16_t *ids, uint16_t flags)
> > > > +{
> > > > +	uint16_t i;
> > > > +
> > > > +	UNROLL_PRAGMA(PRAGMA_PARAM)
> > > > +	for (i = 0; i < PACKED_DESCS_BURST; i++) {
> > > > +		vq->desc_packed[vq->last_used_idx + i].id = ids[i];
> > > > +		vq->desc_packed[vq->last_used_idx + i].len = lens[i];
> > > > +	}
> > > > +
> > > > +	UNROLL_PRAGMA(PRAGMA_PARAM)
> > > > +	for (i = 0; i < PACKED_DESCS_BURST; i++) {
> > > > +		rte_smp_wmb();
> > > Should this rte_smp_wmb() be moved above the loop? It guarantees the
> > > orderings of updates of id, len happens before the flags,
> > > But all the flags of different descriptors should not be ordered.
> > >
> > Hi Gavin,
> > For each descriptor, virtio driver will first check flags and then check
> > read barrier, at the last driver will read id and length.
> > So wmb here is to guarantee that id and length are updated before flags.
> > And afterwards wmb is to guarantee the sequence.
> >
> Gavin,
> Checked with master branch, flags store sequence is not needed.
> But in my environment, performance will be a litter better if ordered flags
> store.
> I think it may be harmless to place wmb here. How about your idea?
The smp barrier on x86 is a compiler barrier only, it ensure data consistency, it will not help performance,
The slight better performance should come from run-to-run variances or system noise or sth else. 
The barrier will dampen the performance on weak memory ordered platforms, like aarch64. 
/Gavin
> 
> > Thanks,
> > Marvin
> >
> > > > +		vq->desc_packed[vq->last_used_idx + i].flags = flags;
> > > > +	}
> > > > +
> > > > +	vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
> > > > +				   sizeof(struct vring_packed_desc),
> > > > +				   sizeof(struct vring_packed_desc) *
> > > > +				   PACKED_DESCS_BURST);
> > > > +	vhost_log_cache_sync(dev, vq);
> > > > +
> > > > +	vq->last_used_idx += PACKED_DESCS_BURST;
> > > > +	if (vq->last_used_idx >= vq->size) {
> > > > +		vq->used_wrap_counter ^= 1;
> > > > +		vq->last_used_idx -= vq->size;
> > > > +	}
> > > > +}
> > > > +
> > > > +static __rte_always_inline void
> > > > +flush_enqueue_burst_packed(struct virtio_net *dev, struct
> > > > vhost_virtqueue *vq,
> > > > +	uint64_t *lens, uint16_t *ids)
> > > > +{
> > > > +	uint16_t flags = 0;
> > > > +
> > > > +	if (vq->used_wrap_counter)
> > > > +		flags = VIRTIO_RX_USED_FLAG;
> > > > +	else
> > > > +		flags = VIRTIO_RX_USED_WRAP_FLAG;
> > > > +
> > > > +	flush_burst_packed(dev, vq, lens, ids, flags);
> > > > +}
> > > > +
> > > >  static __rte_always_inline void
> > > >  update_enqueue_shadow_packed(struct vhost_virtqueue *vq,
> uint16_t
> > > > desc_idx,
> > > >  	uint32_t len, uint16_t count)
> > > > @@ -950,6 +995,7 @@ virtio_dev_rx_burst_packed(struct virtio_net
> *dev,
> > > > struct vhost_virtqueue *vq,
> > > >  	struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_DESCS_BURST];
> > > >  	uint32_t buf_offset = dev->vhost_hlen;
> > > >  	uint64_t lens[PACKED_DESCS_BURST];
> > > > +	uint16_t ids[PACKED_DESCS_BURST];
> > > >
> > > >  	uint16_t i;
> > > >
> > > > @@ -1013,6 +1059,12 @@ virtio_dev_rx_burst_packed(struct
> virtio_net
> > > > *dev, struct vhost_virtqueue *vq,
> > > >  			   pkts[i]->pkt_len);
> > > >  	}
> > > >
> > > > +	UNROLL_PRAGMA(PRAGMA_PARAM)
> > > > +	for (i = 0; i < PACKED_DESCS_BURST; i++)
> > > > +		ids[i] = descs[avail_idx + i].id;
> > > > +
> > > > +	flush_enqueue_burst_packed(dev, vq, lens, ids);
> > > > +
> > > >  	return 0;
> > > >  }
> > > >
> > > > --
> > > > 2.17.1

Patch

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 000648dd4..9c42c7db0 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -39,6 +39,9 @@ 
 
 #define VHOST_LOG_CACHE_NR 32
 
+#define VIRTIO_RX_USED_FLAG  (0ULL | VRING_DESC_F_AVAIL | VRING_DESC_F_USED \
+				| VRING_DESC_F_WRITE)
+#define VIRTIO_RX_USED_WRAP_FLAG (VRING_DESC_F_WRITE)
 #define PACKED_DESCS_BURST (RTE_CACHE_LINE_SIZE / \
 			    sizeof(struct vring_packed_desc))
 
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index e2787b72e..8e4036204 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -169,6 +169,51 @@  update_shadow_packed(struct vhost_virtqueue *vq,
 	vq->shadow_used_packed[i].count = count;
 }
 
+static __rte_always_inline void
+flush_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	uint64_t *lens, uint16_t *ids, uint16_t flags)
+{
+	uint16_t i;
+
+	UNROLL_PRAGMA(PRAGMA_PARAM)
+	for (i = 0; i < PACKED_DESCS_BURST; i++) {
+		vq->desc_packed[vq->last_used_idx + i].id = ids[i];
+		vq->desc_packed[vq->last_used_idx + i].len = lens[i];
+	}
+
+	UNROLL_PRAGMA(PRAGMA_PARAM)
+	for (i = 0; i < PACKED_DESCS_BURST; i++) {
+		rte_smp_wmb();
+		vq->desc_packed[vq->last_used_idx + i].flags = flags;
+	}
+
+	vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
+				   sizeof(struct vring_packed_desc),
+				   sizeof(struct vring_packed_desc) *
+				   PACKED_DESCS_BURST);
+	vhost_log_cache_sync(dev, vq);
+
+	vq->last_used_idx += PACKED_DESCS_BURST;
+	if (vq->last_used_idx >= vq->size) {
+		vq->used_wrap_counter ^= 1;
+		vq->last_used_idx -= vq->size;
+	}
+}
+
+static __rte_always_inline void
+flush_enqueue_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	uint64_t *lens, uint16_t *ids)
+{
+	uint16_t flags = 0;
+
+	if (vq->used_wrap_counter)
+		flags = VIRTIO_RX_USED_FLAG;
+	else
+		flags = VIRTIO_RX_USED_WRAP_FLAG;
+
+	flush_burst_packed(dev, vq, lens, ids, flags);
+}
+
 static __rte_always_inline void
 update_enqueue_shadow_packed(struct vhost_virtqueue *vq, uint16_t desc_idx,
 	uint32_t len, uint16_t count)
@@ -950,6 +995,7 @@  virtio_dev_rx_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_DESCS_BURST];
 	uint32_t buf_offset = dev->vhost_hlen;
 	uint64_t lens[PACKED_DESCS_BURST];
+	uint16_t ids[PACKED_DESCS_BURST];
 
 	uint16_t i;
 
@@ -1013,6 +1059,12 @@  virtio_dev_rx_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			   pkts[i]->pkt_len);
 	}
 
+	UNROLL_PRAGMA(PRAGMA_PARAM)
+	for (i = 0; i < PACKED_DESCS_BURST; i++)
+		ids[i] = descs[avail_idx + i].id;
+
+	flush_enqueue_burst_packed(dev, vq, lens, ids);
+
 	return 0;
 }