[1/2] vhost: add unsafe API to drain pkts in async vhost

Message ID 20210602042802.31943-2-cheng1.jiang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series vhost: handle memory hotplug for async vhost |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Jiang, Cheng1 June 2, 2021, 4:28 a.m. UTC
  Applications need to stop DMA transfers and finish all the in-flight
pkts when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to drain in-flight pkts which are
submitted to DMA engine in vhost async data path. And enable it in
vhost example.

Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
---
 examples/vhost/main.c       | 48 +++++++++++++++++++-
 examples/vhost/main.h       |  1 +
 lib/vhost/rte_vhost_async.h | 22 +++++++++
 lib/vhost/version.map       |  3 ++
 lib/vhost/virtio_net.c      | 90 +++++++++++++++++++++++++++----------
 5 files changed, 139 insertions(+), 25 deletions(-)
  

Comments

Maxime Coquelin June 7, 2021, 1:46 p.m. UTC | #1
On 6/2/21 6:28 AM, Cheng Jiang wrote:
> Applications need to stop DMA transfers and finish all the in-flight
> pkts when in VM memory hot-plug case and async vhost is used. This
> patch is to provide an unsafe API to drain in-flight pkts which are
> submitted to DMA engine in vhost async data path. And enable it in
> vhost example.
> 
> Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
> ---
>  examples/vhost/main.c       | 48 +++++++++++++++++++-
>  examples/vhost/main.h       |  1 +
>  lib/vhost/rte_vhost_async.h | 22 +++++++++
>  lib/vhost/version.map       |  3 ++
>  lib/vhost/virtio_net.c      | 90 +++++++++++++++++++++++++++----------
>  5 files changed, 139 insertions(+), 25 deletions(-)

Please split example and lib changes in dedicated patches.

> 
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index d2179eadb9..70bb67c7f8 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
>  
>  	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
>  					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
> -	if (complete_count)
> +	if (complete_count) {
>  		free_pkts(p_cpl, complete_count);
> +		__atomic_sub_fetch(&vdev->pkts_inflight, complete_count, __ATOMIC_SEQ_CST);
> +	}
> +
>  }
>  
>  static __rte_always_inline void
> @@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
>  		complete_async_pkts(vdev);
>  		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
>  					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
> +		__atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, __ATOMIC_SEQ_CST);
>  
>  		if (cpu_cpl_nr)
>  			free_pkts(m_cpu_cpl, cpu_cpl_nr);
> @@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
>  		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
>  					VIRTIO_RXQ, pkts, rx_count,
>  					m_cpu_cpl, &cpu_cpl_nr);
> +		__atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - cpu_cpl_nr,
> +					__ATOMIC_SEQ_CST);
> +
>  		if (cpu_cpl_nr)
>  			free_pkts(m_cpu_cpl, cpu_cpl_nr);
>  
> @@ -1397,8 +1404,15 @@ destroy_device(int vid)
>  		"(%d) device has been removed from data core\n",
>  		vdev->vid);
>  
> -	if (async_vhost_driver)
> +	if (async_vhost_driver) {
> +		uint16_t n_pkt = 0;
> +		struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> +		n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, VIRTIO_RXQ, m_cpl,
> +							vdev->pkts_inflight);
> +
> +		free_pkts(m_cpl, n_pkt);
>  		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> +	}
>  
>  	rte_free(vdev);
>  }
> @@ -1487,6 +1501,35 @@ new_device(int vid)
>  	return 0;
>  }
>  
> +static int
> +vring_state_changed(int vid, uint16_t queue_id, int enable)
> +{
> +	struct vhost_dev *vdev = NULL;
> +
> +	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> +		if (vdev->vid == vid)
> +			break;
> +	}
> +	if (!vdev)
> +		return -1;
> +
> +	if (queue_id != VIRTIO_RXQ)
> +		return 0;
> +
> +	if (async_vhost_driver) {
> +		if (!enable) {
> +			uint16_t n_pkt;
> +			struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> +
> +			n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, queue_id,
> +							m_cpl, vdev->pkts_inflight);
> +			free_pkts(m_cpl, n_pkt);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * These callback allow devices to be added to the data core when configuration
>   * has been fully complete.
> @@ -1495,6 +1538,7 @@ static const struct vhost_device_ops virtio_net_device_ops =
>  {
>  	.new_device =  new_device,
>  	.destroy_device = destroy_device,
> +	.vring_state_changed = vring_state_changed,
>  };
>  
>  /*
> diff --git a/examples/vhost/main.h b/examples/vhost/main.h
> index 0ccdce4b4a..e7b1ac60a6 100644
> --- a/examples/vhost/main.h
> +++ b/examples/vhost/main.h
> @@ -51,6 +51,7 @@ struct vhost_dev {
>  	uint64_t features;
>  	size_t hdr_len;
>  	uint16_t nr_vrings;
> +	uint16_t pkts_inflight;
>  	struct rte_vhost_memory *mem;
>  	struct device_statistics stats;
>  	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
> diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
> index 6faa31f5ad..041f40cf04 100644
> --- a/lib/vhost/rte_vhost_async.h
> +++ b/lib/vhost/rte_vhost_async.h
> @@ -193,4 +193,26 @@ __rte_experimental
>  uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
>  		struct rte_mbuf **pkts, uint16_t count);
>  
> +/**
> + * This function checks async completion status and empty all pakcets
> + * for a specific vhost device queue. Packets which are inflight will
> + * be returned in an array.
> + *
> + * @note This function does not perform any locking
> + *
> + * @param vid
> + *  id of vhost device to enqueue data
> + * @param queue_id
> + *  queue id to enqueue data
> + * @param pkts
> + *  blank array to get return packet pointer
> + * @param count
> + *  size of the packet array
> + * @return
> + *  num of packets returned
> + */
> +__rte_experimental
> +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint16_t count);
> +
>  #endif /* _RTE_VHOST_ASYNC_H_ */
> diff --git a/lib/vhost/version.map b/lib/vhost/version.map
> index 9103a23cd4..f480f188af 100644
> --- a/lib/vhost/version.map
> +++ b/lib/vhost/version.map
> @@ -79,4 +79,7 @@ EXPERIMENTAL {
>  
>  	# added in 21.05
>  	rte_vhost_get_negotiated_protocol_features;
> +
> +	# added in 21.08
> +	rte_vhost_drain_queue_thread_unsafe;
>  };
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 8da8a86a10..793510974a 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -2082,36 +2082,18 @@ write_back_completed_descs_packed(struct vhost_virtqueue *vq,
>  	} while (nr_left > 0);
>  }
>  
> -uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> +static __rte_always_inline uint16_t
> +vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
>  		struct rte_mbuf **pkts, uint16_t count)
>  {
> -	struct virtio_net *dev = get_device(vid);
>  	struct vhost_virtqueue *vq;
>  	uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
>  	uint16_t start_idx, pkts_idx, vq_size;
>  	struct async_inflight_info *pkts_info;
>  	uint16_t from, i;
>  
> -	if (!dev)
> -		return 0;
> -
> -	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> -	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> -		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
> -			dev->vid, __func__, queue_id);
> -		return 0;
> -	}
> -
>  	vq = dev->virtqueue[queue_id];
>  
> -	if (unlikely(!vq->async_registered)) {
> -		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
> -			dev->vid, __func__, queue_id);
> -		return 0;
> -	}
> -
> -	rte_spinlock_lock(&vq->access_lock);
> -
>  	pkts_idx = vq->async_pkts_idx % vq->size;
>  	pkts_info = vq->async_pkts_info;
>  	vq_size = vq->size;
> @@ -2119,14 +2101,14 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
>  		vq_size, vq->async_pkts_inflight_n);
>  
>  	if (count > vq->async_last_pkts_n)
> -		n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
> +		n_pkts_cpl = vq->async_ops.check_completed_copies(dev->vid,
>  			queue_id, 0, count - vq->async_last_pkts_n);
>  	n_pkts_cpl += vq->async_last_pkts_n;
>  
>  	n_pkts_put = RTE_MIN(count, n_pkts_cpl);
>  	if (unlikely(n_pkts_put == 0)) {
>  		vq->async_last_pkts_n = n_pkts_cpl;
> -		goto done;
> +		return 0;
>  	}
>  
>  	if (vq_is_packed(dev)) {
> @@ -2165,12 +2147,74 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
>  			vq->last_async_desc_idx_split += n_descs;
>  	}
>  
> -done:
> +	return n_pkts_put;
> +}
> +
> +uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint16_t count)
> +{
> +	struct virtio_net *dev = get_device(vid);
> +	struct vhost_virtqueue *vq;
> +	uint16_t n_pkts_put = 0;
> +
> +	if (!dev)
> +		return 0;
> +
> +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	vq = dev->virtqueue[queue_id];
> +
> +	if (unlikely(!vq->async_registered)) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	rte_spinlock_lock(&vq->access_lock);
> +
> +	n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
> +
>  	rte_spinlock_unlock(&vq->access_lock);
>  
>  	return n_pkts_put;
>  }
>  
> +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint16_t count)
> +{
> +	struct virtio_net *dev = get_device(vid);
> +	struct vhost_virtqueue *vq;
> +	uint16_t n_pkts = count;
> +
> +	if (!dev)
> +		return 0;
> +
> +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	vq = dev->virtqueue[queue_id];
> +
> +	if (unlikely(!vq->async_registered)) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	while (count)
> +		count -= vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
> +
> +	return n_pkts;
> +}
> +
>  static __rte_always_inline uint32_t
>  virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
>  	struct rte_mbuf **pkts, uint32_t count,
>
  
Jiang, Cheng1 June 8, 2021, 5:26 a.m. UTC | #2
Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <mcoqueli@redhat.com>
> Sent: Monday, June 7, 2021 9:46 PM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; maxime.coquelin@redhat.com;
> Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>
> Subject: Re: [PATCH 1/2] vhost: add unsafe API to drain pkts in async vhost
> 
> 
> 
> On 6/2/21 6:28 AM, Cheng Jiang wrote:
> > Applications need to stop DMA transfers and finish all the in-flight
> > pkts when in VM memory hot-plug case and async vhost is used. This
> > patch is to provide an unsafe API to drain in-flight pkts which are
> > submitted to DMA engine in vhost async data path. And enable it in
> > vhost example.
> >
> > Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
> > ---
> >  examples/vhost/main.c       | 48 +++++++++++++++++++-
> >  examples/vhost/main.h       |  1 +
> >  lib/vhost/rte_vhost_async.h | 22 +++++++++
> >  lib/vhost/version.map       |  3 ++
> >  lib/vhost/virtio_net.c      | 90 +++++++++++++++++++++++++++----------
> >  5 files changed, 139 insertions(+), 25 deletions(-)
> 
> Please split example and lib changes in dedicated patches.

Sure, it will be fixed in then next version.

Thanks,
Cheng

> 
> >
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > d2179eadb9..70bb67c7f8 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
> >
> >  	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> >  					VIRTIO_RXQ, p_cpl,
> MAX_PKT_BURST);
> > -	if (complete_count)
> > +	if (complete_count) {
> >  		free_pkts(p_cpl, complete_count);
> > +		__atomic_sub_fetch(&vdev->pkts_inflight, complete_count,
> __ATOMIC_SEQ_CST);
> > +	}
> > +
> >  }
> >
> >  static __rte_always_inline void
> > @@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
> >  		complete_async_pkts(vdev);
> >  		ret = rte_vhost_submit_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
> >  					m, nr_xmit, m_cpu_cpl,
> &cpu_cpl_nr);
> > +		__atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr,
> > +__ATOMIC_SEQ_CST);
> >
> >  		if (cpu_cpl_nr)
> >  			free_pkts(m_cpu_cpl, cpu_cpl_nr);
> > @@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
> >  		enqueue_count = rte_vhost_submit_enqueue_burst(vdev-
> >vid,
> >  					VIRTIO_RXQ, pkts, rx_count,
> >  					m_cpu_cpl, &cpu_cpl_nr);
> > +		__atomic_add_fetch(&vdev->pkts_inflight, enqueue_count
> - cpu_cpl_nr,
> > +					__ATOMIC_SEQ_CST);
> > +
> >  		if (cpu_cpl_nr)
> >  			free_pkts(m_cpu_cpl, cpu_cpl_nr);
> >
> > @@ -1397,8 +1404,15 @@ destroy_device(int vid)
> >  		"(%d) device has been removed from data core\n",
> >  		vdev->vid);
> >
> > -	if (async_vhost_driver)
> > +	if (async_vhost_driver) {
> > +		uint16_t n_pkt = 0;
> > +		struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> > +		n_pkt = rte_vhost_drain_queue_thread_unsafe(vid,
> VIRTIO_RXQ, m_cpl,
> > +							vdev->pkts_inflight);
> > +
> > +		free_pkts(m_cpl, n_pkt);
> >  		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> > +	}
> >
> >  	rte_free(vdev);
> >  }
> > @@ -1487,6 +1501,35 @@ new_device(int vid)
> >  	return 0;
> >  }
> >
> > +static int
> > +vring_state_changed(int vid, uint16_t queue_id, int enable) {
> > +	struct vhost_dev *vdev = NULL;
> > +
> > +	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> > +		if (vdev->vid == vid)
> > +			break;
> > +	}
> > +	if (!vdev)
> > +		return -1;
> > +
> > +	if (queue_id != VIRTIO_RXQ)
> > +		return 0;
> > +
> > +	if (async_vhost_driver) {
> > +		if (!enable) {
> > +			uint16_t n_pkt;
> > +			struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> > +
> > +			n_pkt = rte_vhost_drain_queue_thread_unsafe(vid,
> queue_id,
> > +							m_cpl, vdev-
> >pkts_inflight);
> > +			free_pkts(m_cpl, n_pkt);
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  /*
> >   * These callback allow devices to be added to the data core when
> configuration
> >   * has been fully complete.
> > @@ -1495,6 +1538,7 @@ static const struct vhost_device_ops
> > virtio_net_device_ops =  {
> >  	.new_device =  new_device,
> >  	.destroy_device = destroy_device,
> > +	.vring_state_changed = vring_state_changed,
> >  };
> >
> >  /*
> > diff --git a/examples/vhost/main.h b/examples/vhost/main.h index
> > 0ccdce4b4a..e7b1ac60a6 100644
> > --- a/examples/vhost/main.h
> > +++ b/examples/vhost/main.h
> > @@ -51,6 +51,7 @@ struct vhost_dev {
> >  	uint64_t features;
> >  	size_t hdr_len;
> >  	uint16_t nr_vrings;
> > +	uint16_t pkts_inflight;
> >  	struct rte_vhost_memory *mem;
> >  	struct device_statistics stats;
> >  	TAILQ_ENTRY(vhost_dev) global_vdev_entry; diff --git
> > a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h index
> > 6faa31f5ad..041f40cf04 100644
> > --- a/lib/vhost/rte_vhost_async.h
> > +++ b/lib/vhost/rte_vhost_async.h
> > @@ -193,4 +193,26 @@ __rte_experimental  uint16_t
> > rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> >  		struct rte_mbuf **pkts, uint16_t count);
> >
> > +/**
> > + * This function checks async completion status and empty all pakcets
> > + * for a specific vhost device queue. Packets which are inflight will
> > + * be returned in an array.
> > + *
> > + * @note This function does not perform any locking
> > + *
> > + * @param vid
> > + *  id of vhost device to enqueue data
> > + * @param queue_id
> > + *  queue id to enqueue data
> > + * @param pkts
> > + *  blank array to get return packet pointer
> > + * @param count
> > + *  size of the packet array
> > + * @return
> > + *  num of packets returned
> > + */
> > +__rte_experimental
> > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t
> queue_id,
> > +		struct rte_mbuf **pkts, uint16_t count);
> > +
> >  #endif /* _RTE_VHOST_ASYNC_H_ */
> > diff --git a/lib/vhost/version.map b/lib/vhost/version.map index
> > 9103a23cd4..f480f188af 100644
> > --- a/lib/vhost/version.map
> > +++ b/lib/vhost/version.map
> > @@ -79,4 +79,7 @@ EXPERIMENTAL {
> >
> >  	# added in 21.05
> >  	rte_vhost_get_negotiated_protocol_features;
> > +
> > +	# added in 21.08
> > +	rte_vhost_drain_queue_thread_unsafe;
> >  };
> > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index
> > 8da8a86a10..793510974a 100644
> > --- a/lib/vhost/virtio_net.c
> > +++ b/lib/vhost/virtio_net.c
> > @@ -2082,36 +2082,18 @@ write_back_completed_descs_packed(struct
> vhost_virtqueue *vq,
> >  	} while (nr_left > 0);
> >  }
> >
> > -uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> > +static __rte_always_inline uint16_t
> > +vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t
> > +queue_id,
> >  		struct rte_mbuf **pkts, uint16_t count)  {
> > -	struct virtio_net *dev = get_device(vid);
> >  	struct vhost_virtqueue *vq;
> >  	uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
> >  	uint16_t start_idx, pkts_idx, vq_size;
> >  	struct async_inflight_info *pkts_info;
> >  	uint16_t from, i;
> >
> > -	if (!dev)
> > -		return 0;
> > -
> > -	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > -	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > -		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue
> idx %d.\n",
> > -			dev->vid, __func__, queue_id);
> > -		return 0;
> > -	}
> > -
> >  	vq = dev->virtqueue[queue_id];
> >
> > -	if (unlikely(!vq->async_registered)) {
> > -		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> > -			dev->vid, __func__, queue_id);
> > -		return 0;
> > -	}
> > -
> > -	rte_spinlock_lock(&vq->access_lock);
> > -
> >  	pkts_idx = vq->async_pkts_idx % vq->size;
> >  	pkts_info = vq->async_pkts_info;
> >  	vq_size = vq->size;
> > @@ -2119,14 +2101,14 @@ uint16_t
> rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> >  		vq_size, vq->async_pkts_inflight_n);
> >
> >  	if (count > vq->async_last_pkts_n)
> > -		n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
> > +		n_pkts_cpl = vq->async_ops.check_completed_copies(dev-
> >vid,
> >  			queue_id, 0, count - vq->async_last_pkts_n);
> >  	n_pkts_cpl += vq->async_last_pkts_n;
> >
> >  	n_pkts_put = RTE_MIN(count, n_pkts_cpl);
> >  	if (unlikely(n_pkts_put == 0)) {
> >  		vq->async_last_pkts_n = n_pkts_cpl;
> > -		goto done;
> > +		return 0;
> >  	}
> >
> >  	if (vq_is_packed(dev)) {
> > @@ -2165,12 +2147,74 @@ uint16_t
> rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> >  			vq->last_async_desc_idx_split += n_descs;
> >  	}
> >
> > -done:
> > +	return n_pkts_put;
> > +}
> > +
> > +uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> > +		struct rte_mbuf **pkts, uint16_t count) {
> > +	struct virtio_net *dev = get_device(vid);
> > +	struct vhost_virtqueue *vq;
> > +	uint16_t n_pkts_put = 0;
> > +
> > +	if (!dev)
> > +		return 0;
> > +
> > +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue
> idx %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	vq = dev->virtqueue[queue_id];
> > +
> > +	if (unlikely(!vq->async_registered)) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	rte_spinlock_lock(&vq->access_lock);
> > +
> > +	n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts,
> > +count);
> > +
> >  	rte_spinlock_unlock(&vq->access_lock);
> >
> >  	return n_pkts_put;
> >  }
> >
> > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t
> queue_id,
> > +		struct rte_mbuf **pkts, uint16_t count) {
> > +	struct virtio_net *dev = get_device(vid);
> > +	struct vhost_virtqueue *vq;
> > +	uint16_t n_pkts = count;
> > +
> > +	if (!dev)
> > +		return 0;
> > +
> > +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue
> idx %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	vq = dev->virtqueue[queue_id];
> > +
> > +	if (unlikely(!vq->async_registered)) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	while (count)
> > +		count -= vhost_poll_enqueue_completed(dev, queue_id,
> pkts, count);
> > +
> > +	return n_pkts;
> > +}
> > +
> >  static __rte_always_inline uint32_t
> >  virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
> >  	struct rte_mbuf **pkts, uint32_t count,
> >
  

Patch

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d2179eadb9..70bb67c7f8 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -851,8 +851,11 @@  complete_async_pkts(struct vhost_dev *vdev)
 
 	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
 					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-	if (complete_count)
+	if (complete_count) {
 		free_pkts(p_cpl, complete_count);
+		__atomic_sub_fetch(&vdev->pkts_inflight, complete_count, __ATOMIC_SEQ_CST);
+	}
+
 }
 
 static __rte_always_inline void
@@ -895,6 +898,7 @@  drain_vhost(struct vhost_dev *vdev)
 		complete_async_pkts(vdev);
 		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
 					m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+		__atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, __ATOMIC_SEQ_CST);
 
 		if (cpu_cpl_nr)
 			free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1226,6 +1230,9 @@  drain_eth_rx(struct vhost_dev *vdev)
 		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
 					VIRTIO_RXQ, pkts, rx_count,
 					m_cpu_cpl, &cpu_cpl_nr);
+		__atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - cpu_cpl_nr,
+					__ATOMIC_SEQ_CST);
+
 		if (cpu_cpl_nr)
 			free_pkts(m_cpu_cpl, cpu_cpl_nr);
 
@@ -1397,8 +1404,15 @@  destroy_device(int vid)
 		"(%d) device has been removed from data core\n",
 		vdev->vid);
 
-	if (async_vhost_driver)
+	if (async_vhost_driver) {
+		uint16_t n_pkt = 0;
+		struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+		n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, VIRTIO_RXQ, m_cpl,
+							vdev->pkts_inflight);
+
+		free_pkts(m_cpl, n_pkt);
 		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+	}
 
 	rte_free(vdev);
 }
@@ -1487,6 +1501,35 @@  new_device(int vid)
 	return 0;
 }
 
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+	struct vhost_dev *vdev = NULL;
+
+	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+		if (vdev->vid == vid)
+			break;
+	}
+	if (!vdev)
+		return -1;
+
+	if (queue_id != VIRTIO_RXQ)
+		return 0;
+
+	if (async_vhost_driver) {
+		if (!enable) {
+			uint16_t n_pkt;
+			struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+			n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, queue_id,
+							m_cpl, vdev->pkts_inflight);
+			free_pkts(m_cpl, n_pkt);
+		}
+	}
+
+	return 0;
+}
+
 /*
  * These callback allow devices to be added to the data core when configuration
  * has been fully complete.
@@ -1495,6 +1538,7 @@  static const struct vhost_device_ops virtio_net_device_ops =
 {
 	.new_device =  new_device,
 	.destroy_device = destroy_device,
+	.vring_state_changed = vring_state_changed,
 };
 
 /*
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 0ccdce4b4a..e7b1ac60a6 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@  struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
+	uint16_t pkts_inflight;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 6faa31f5ad..041f40cf04 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -193,4 +193,26 @@  __rte_experimental
 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
 		struct rte_mbuf **pkts, uint16_t count);
 
+/**
+ * This function checks async completion status and empty all pakcets
+ * for a specific vhost device queue. Packets which are inflight will
+ * be returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ *  id of vhost device to enqueue data
+ * @param queue_id
+ *  queue id to enqueue data
+ * @param pkts
+ *  blank array to get return packet pointer
+ * @param count
+ *  size of the packet array
+ * @return
+ *  num of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
+		struct rte_mbuf **pkts, uint16_t count);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 9103a23cd4..f480f188af 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -79,4 +79,7 @@  EXPERIMENTAL {
 
 	# added in 21.05
 	rte_vhost_get_negotiated_protocol_features;
+
+	# added in 21.08
+	rte_vhost_drain_queue_thread_unsafe;
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 8da8a86a10..793510974a 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2082,36 +2082,18 @@  write_back_completed_descs_packed(struct vhost_virtqueue *vq,
 	} while (nr_left > 0);
 }
 
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
 		struct rte_mbuf **pkts, uint16_t count)
 {
-	struct virtio_net *dev = get_device(vid);
 	struct vhost_virtqueue *vq;
 	uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
 	uint16_t start_idx, pkts_idx, vq_size;
 	struct async_inflight_info *pkts_info;
 	uint16_t from, i;
 
-	if (!dev)
-		return 0;
-
-	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
-
 	vq = dev->virtqueue[queue_id];
 
-	if (unlikely(!vq->async_registered)) {
-		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
-
-	rte_spinlock_lock(&vq->access_lock);
-
 	pkts_idx = vq->async_pkts_idx % vq->size;
 	pkts_info = vq->async_pkts_info;
 	vq_size = vq->size;
@@ -2119,14 +2101,14 @@  uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
 		vq_size, vq->async_pkts_inflight_n);
 
 	if (count > vq->async_last_pkts_n)
-		n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
+		n_pkts_cpl = vq->async_ops.check_completed_copies(dev->vid,
 			queue_id, 0, count - vq->async_last_pkts_n);
 	n_pkts_cpl += vq->async_last_pkts_n;
 
 	n_pkts_put = RTE_MIN(count, n_pkts_cpl);
 	if (unlikely(n_pkts_put == 0)) {
 		vq->async_last_pkts_n = n_pkts_cpl;
-		goto done;
+		return 0;
 	}
 
 	if (vq_is_packed(dev)) {
@@ -2165,12 +2147,74 @@  uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
 			vq->last_async_desc_idx_split += n_descs;
 	}
 
-done:
+	return n_pkts_put;
+}
+
+uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+		struct rte_mbuf **pkts, uint16_t count)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
+	uint16_t n_pkts_put = 0;
+
+	if (!dev)
+		return 0;
+
+	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	if (unlikely(!vq->async_registered)) {
+		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	rte_spinlock_lock(&vq->access_lock);
+
+	n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
+
 	rte_spinlock_unlock(&vq->access_lock);
 
 	return n_pkts_put;
 }
 
+uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
+		struct rte_mbuf **pkts, uint16_t count)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
+	uint16_t n_pkts = count;
+
+	if (!dev)
+		return 0;
+
+	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	if (unlikely(!vq->async_registered)) {
+		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	while (count)
+		count -= vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
+
+	return n_pkts;
+}
+
 static __rte_always_inline uint32_t
 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
 	struct rte_mbuf **pkts, uint32_t count,