[v2,1/3] vhost: add unsafe API to drain pkts in async vhost

Message ID 20210615141513.16163-2-cheng1.jiang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series vhost: handle memory hotplug for async vhost |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Jiang, Cheng1 June 15, 2021, 2:15 p.m. UTC
  Applications need to stop DMA transfers and finish all the in-flight
pkts when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to drain in-flight pkts which are
submitted to DMA engine in vhost async data path.

Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
---
 lib/vhost/rte_vhost_async.h | 22 +++++++++
 lib/vhost/version.map       |  3 ++
 lib/vhost/virtio_net.c      | 90 +++++++++++++++++++++++++++----------
 3 files changed, 92 insertions(+), 23 deletions(-)
  

Comments

Sunil Pai G July 5, 2021, 2:58 p.m. UTC | #1
Hi Cheng, 

Comments inline.

<snipped>

> +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t
> queue_id,
> +		struct rte_mbuf **pkts, uint16_t count) {
> +	struct virtio_net *dev = get_device(vid);
> +	struct vhost_virtqueue *vq;
> +	uint16_t n_pkts = count;
> +
> +	if (!dev)
> +		return 0;
> +
> +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx
> %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	vq = dev->virtqueue[queue_id];
> +
> +	if (unlikely(!vq->async_registered)) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	while (count)
> +		count -= vhost_poll_enqueue_completed(dev, queue_id,
> pkts, count);

I think the drain API here assumes there is per virtqueue assignment of DMA device which need not be true.
If there are multiple DMA devices per virtqueue , the application would need a mechanism to change the device id per call to the drain API. 
So, its probably better to just call vhost_poll_enqueue_completed here and return to the application ? and have the loop in the application instead ?

> +
> +	return n_pkts;
> +}
> +

<snipped>
  
Maxime Coquelin July 6, 2021, 2:08 p.m. UTC | #2
On 6/15/21 4:15 PM, Cheng Jiang wrote:
> Applications need to stop DMA transfers and finish all the in-flight
> pkts when in VM memory hot-plug case and async vhost is used. This
> patch is to provide an unsafe API to drain in-flight pkts which are
> submitted to DMA engine in vhost async data path.
> 
> Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
> ---
>  lib/vhost/rte_vhost_async.h | 22 +++++++++
>  lib/vhost/version.map       |  3 ++
>  lib/vhost/virtio_net.c      | 90 +++++++++++++++++++++++++++----------
>  3 files changed, 92 insertions(+), 23 deletions(-)
> 
> diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
> index 6faa31f5ad..041f40cf04 100644
> --- a/lib/vhost/rte_vhost_async.h
> +++ b/lib/vhost/rte_vhost_async.h
> @@ -193,4 +193,26 @@ __rte_experimental
>  uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
>  		struct rte_mbuf **pkts, uint16_t count);
>  
> +/**
> + * This function checks async completion status and empty all pakcets
> + * for a specific vhost device queue. Packets which are inflight will
> + * be returned in an array.
> + *
> + * @note This function does not perform any locking
> + *
> + * @param vid
> + *  id of vhost device to enqueue data
> + * @param queue_id
> + *  queue id to enqueue data
> + * @param pkts
> + *  blank array to get return packet pointer
> + * @param count
> + *  size of the packet array
> + * @return
> + *  num of packets returned
> + */
> +__rte_experimental
> +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint16_t count);
> +
>  #endif /* _RTE_VHOST_ASYNC_H_ */
> diff --git a/lib/vhost/version.map b/lib/vhost/version.map
> index 9103a23cd4..f480f188af 100644
> --- a/lib/vhost/version.map
> +++ b/lib/vhost/version.map
> @@ -79,4 +79,7 @@ EXPERIMENTAL {
>  
>  	# added in 21.05
>  	rte_vhost_get_negotiated_protocol_features;
> +
> +	# added in 21.08
> +	rte_vhost_drain_queue_thread_unsafe;
>  };
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 8da8a86a10..793510974a 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -2082,36 +2082,18 @@ write_back_completed_descs_packed(struct vhost_virtqueue *vq,
>  	} while (nr_left > 0);
>  }
>  
> -uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> +static __rte_always_inline uint16_t
> +vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
>  		struct rte_mbuf **pkts, uint16_t count)
>  {
> -	struct virtio_net *dev = get_device(vid);
>  	struct vhost_virtqueue *vq;
>  	uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
>  	uint16_t start_idx, pkts_idx, vq_size;
>  	struct async_inflight_info *pkts_info;
>  	uint16_t from, i;
>  
> -	if (!dev)
> -		return 0;
> -
> -	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> -	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> -		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
> -			dev->vid, __func__, queue_id);
> -		return 0;
> -	}
> -
>  	vq = dev->virtqueue[queue_id];
>  
> -	if (unlikely(!vq->async_registered)) {
> -		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
> -			dev->vid, __func__, queue_id);
> -		return 0;
> -	}
> -
> -	rte_spinlock_lock(&vq->access_lock);
> -
>  	pkts_idx = vq->async_pkts_idx % vq->size;
>  	pkts_info = vq->async_pkts_info;
>  	vq_size = vq->size;
> @@ -2119,14 +2101,14 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
>  		vq_size, vq->async_pkts_inflight_n);
>  
>  	if (count > vq->async_last_pkts_n)
> -		n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
> +		n_pkts_cpl = vq->async_ops.check_completed_copies(dev->vid,
>  			queue_id, 0, count - vq->async_last_pkts_n);
>  	n_pkts_cpl += vq->async_last_pkts_n;
>  
>  	n_pkts_put = RTE_MIN(count, n_pkts_cpl);
>  	if (unlikely(n_pkts_put == 0)) {
>  		vq->async_last_pkts_n = n_pkts_cpl;
> -		goto done;
> +		return 0;
>  	}
>  
>  	if (vq_is_packed(dev)) {
> @@ -2165,12 +2147,74 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
>  			vq->last_async_desc_idx_split += n_descs;
>  	}
>  
> -done:
> +	return n_pkts_put;
> +}
> +
> +uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint16_t count)
> +{
> +	struct virtio_net *dev = get_device(vid);
> +	struct vhost_virtqueue *vq;
> +	uint16_t n_pkts_put = 0;
> +
> +	if (!dev)
> +		return 0;
> +
> +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	vq = dev->virtqueue[queue_id];
> +
> +	if (unlikely(!vq->async_registered)) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	rte_spinlock_lock(&vq->access_lock);
> +
> +	n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
> +
>  	rte_spinlock_unlock(&vq->access_lock);
>  
>  	return n_pkts_put;
>  }
>  
> +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint16_t count)
> +{
> +	struct virtio_net *dev = get_device(vid);
> +	struct vhost_virtqueue *vq;
> +	uint16_t n_pkts = count;
> +
> +	if (!dev)
> +		return 0;
> +
> +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	vq = dev->virtqueue[queue_id];
> +
> +	if (unlikely(!vq->async_registered)) {
> +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
> +			dev->vid, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	while (count)
> +		count -= vhost_poll_enqueue_completed(dev, queue_id, pkts, count);

I think we may want to improve the sync_ops so that
.check_completed_copies() returns an int. If for some reason the DMA
driver callback fails, we would poll forever.

Looking more into the code, I see that ioat_check_completed_copies_cb()
an return -1 (whereas it should return an unint32_t). It would lead to
undefined behaviour if the failure would happen. The IOAT driver needs
to be fixed, and also the callback prototype and its handling.

> +
> +	return n_pkts;
> +}
> +
>  static __rte_always_inline uint32_t
>  virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
>  	struct rte_mbuf **pkts, uint32_t count,
>
  
Jiang, Cheng1 July 7, 2021, 2:02 p.m. UTC | #3
Hi Sunil,

Replies are inline.

> -----Original Message-----
> From: Pai G, Sunil <sunil.pai.g@intel.com>
> Sent: Monday, July 5, 2021 10:58 PM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; maxime.coquelin@redhat.com;
> Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v2 1/3] vhost: add unsafe API to drain pkts in
> async vhost
> 
> Hi Cheng,
> 
> Comments inline.
> 
> <snipped>
> 
> > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t
> > queue_id,
> > +		struct rte_mbuf **pkts, uint16_t count) {
> > +	struct virtio_net *dev = get_device(vid);
> > +	struct vhost_virtqueue *vq;
> > +	uint16_t n_pkts = count;
> > +
> > +	if (!dev)
> > +		return 0;
> > +
> > +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx
> > %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	vq = dev->virtqueue[queue_id];
> > +
> > +	if (unlikely(!vq->async_registered)) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> > queue id %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	while (count)
> > +		count -= vhost_poll_enqueue_completed(dev, queue_id,
> > pkts, count);
> 
> I think the drain API here assumes there is per virtqueue assignment of DMA
> device which need not be true.
> If there are multiple DMA devices per virtqueue , the application would need
> a mechanism to change the device id per call to the drain API.

For now our async vhost didn't have the support for the usage of multiple DMA devices per virtqueue.
It's better that we change all the APIs at once in the future for the consistency if you need it.

> So, its probably better to just call vhost_poll_enqueue_completed here and
> return to the application ? and have the loop in the application instead ?

As for this one, I'm not sure why we need have the loop in the application.
The function of this API is that caller need to drain all the inflight pkts, it should be called only once to get the job done.
Don't you think?

Thanks.
Cheng

> 
> > +
> > +	return n_pkts;
> > +}
> > +
> 
> <snipped>
> 
>
  
Sunil Pai G July 8, 2021, 7:15 a.m. UTC | #4
Hi Cheng, 

Repsonse inline.

<snipped>

> As for this one, I'm not sure why we need have the loop in the application.
> The function of this API is that caller need to drain all the inflight pkts, it
> should be called only once to get the job done.
> Don't you think?

Perhaps yes, but my thought was to provide application the flexibility to change the DMA device per call to check_completed_copies callback if it did require it.

> 
> Thanks.
> Cheng
> 
> >
> > > +
> > > +	return n_pkts;
> > > +}
> > > +
> >
> > <snipped>
> >
> >
  
Hu, Jiayu July 8, 2021, 1:46 p.m. UTC | #5
Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, July 6, 2021 10:09 PM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>
> Subject: Re: [PATCH v2 1/3] vhost: add unsafe API to drain pkts in async vhost
> 
> 
> 
> On 6/15/21 4:15 PM, Cheng Jiang wrote:
> > Applications need to stop DMA transfers and finish all the in-flight
> > pkts when in VM memory hot-plug case and async vhost is used. This
> > patch is to provide an unsafe API to drain in-flight pkts which are
> > submitted to DMA engine in vhost async data path.
> >
> > Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
> > ---
> >  lib/vhost/rte_vhost_async.h | 22 +++++++++
> >  lib/vhost/version.map       |  3 ++
> >  lib/vhost/virtio_net.c      | 90 +++++++++++++++++++++++++++----------
> >  3 files changed, 92 insertions(+), 23 deletions(-)
> >
> > diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
> > index 6faa31f5ad..041f40cf04 100644
> > --- a/lib/vhost/rte_vhost_async.h
> > +++ b/lib/vhost/rte_vhost_async.h
> > @@ -193,4 +193,26 @@ __rte_experimental  uint16_t
> > rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> >  		struct rte_mbuf **pkts, uint16_t count);
> >
> > +/**
> > + * This function checks async completion status and empty all pakcets
> > + * for a specific vhost device queue. Packets which are inflight will
> > + * be returned in an array.
> > + *
> > + * @note This function does not perform any locking
> > + *
> > + * @param vid
> > + *  id of vhost device to enqueue data
> > + * @param queue_id
> > + *  queue id to enqueue data
> > + * @param pkts
> > + *  blank array to get return packet pointer
> > + * @param count
> > + *  size of the packet array
> > + * @return
> > + *  num of packets returned
> > + */
> > +__rte_experimental
> > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t
> queue_id,
> > +		struct rte_mbuf **pkts, uint16_t count);
> > +
> >  #endif /* _RTE_VHOST_ASYNC_H_ */
> > diff --git a/lib/vhost/version.map b/lib/vhost/version.map index
> > 9103a23cd4..f480f188af 100644
> > --- a/lib/vhost/version.map
> > +++ b/lib/vhost/version.map
> > @@ -79,4 +79,7 @@ EXPERIMENTAL {
> >
> >  	# added in 21.05
> >  	rte_vhost_get_negotiated_protocol_features;
> > +
> > +	# added in 21.08
> > +	rte_vhost_drain_queue_thread_unsafe;
> >  };
> > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index
> > 8da8a86a10..793510974a 100644
> > --- a/lib/vhost/virtio_net.c
> > +++ b/lib/vhost/virtio_net.c
> > @@ -2082,36 +2082,18 @@ write_back_completed_descs_packed(struct
> vhost_virtqueue *vq,
> >  	} while (nr_left > 0);
> >  }
> >
> > -uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> > +static __rte_always_inline uint16_t
> > +vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t
> > +queue_id,
> >  		struct rte_mbuf **pkts, uint16_t count)  {
> > -	struct virtio_net *dev = get_device(vid);
> >  	struct vhost_virtqueue *vq;
> >  	uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
> >  	uint16_t start_idx, pkts_idx, vq_size;
> >  	struct async_inflight_info *pkts_info;
> >  	uint16_t from, i;
> >
> > -	if (!dev)
> > -		return 0;
> > -
> > -	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > -	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > -		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue
> idx %d.\n",
> > -			dev->vid, __func__, queue_id);
> > -		return 0;
> > -	}
> > -
> >  	vq = dev->virtqueue[queue_id];
> >
> > -	if (unlikely(!vq->async_registered)) {
> > -		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> > -			dev->vid, __func__, queue_id);
> > -		return 0;
> > -	}
> > -
> > -	rte_spinlock_lock(&vq->access_lock);
> > -
> >  	pkts_idx = vq->async_pkts_idx % vq->size;
> >  	pkts_info = vq->async_pkts_info;
> >  	vq_size = vq->size;
> > @@ -2119,14 +2101,14 @@ uint16_t
> rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> >  		vq_size, vq->async_pkts_inflight_n);
> >
> >  	if (count > vq->async_last_pkts_n)
> > -		n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
> > +		n_pkts_cpl = vq->async_ops.check_completed_copies(dev-
> >vid,
> >  			queue_id, 0, count - vq->async_last_pkts_n);
> >  	n_pkts_cpl += vq->async_last_pkts_n;
> >
> >  	n_pkts_put = RTE_MIN(count, n_pkts_cpl);
> >  	if (unlikely(n_pkts_put == 0)) {
> >  		vq->async_last_pkts_n = n_pkts_cpl;
> > -		goto done;
> > +		return 0;
> >  	}
> >
> >  	if (vq_is_packed(dev)) {
> > @@ -2165,12 +2147,74 @@ uint16_t
> rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> >  			vq->last_async_desc_idx_split += n_descs;
> >  	}
> >
> > -done:
> > +	return n_pkts_put;
> > +}
> > +
> > +uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> > +		struct rte_mbuf **pkts, uint16_t count) {
> > +	struct virtio_net *dev = get_device(vid);
> > +	struct vhost_virtqueue *vq;
> > +	uint16_t n_pkts_put = 0;
> > +
> > +	if (!dev)
> > +		return 0;
> > +
> > +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue
> idx %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	vq = dev->virtqueue[queue_id];
> > +
> > +	if (unlikely(!vq->async_registered)) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	rte_spinlock_lock(&vq->access_lock);
> > +
> > +	n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts,
> > +count);
> > +
> >  	rte_spinlock_unlock(&vq->access_lock);
> >
> >  	return n_pkts_put;
> >  }
> >
> > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t
> queue_id,
> > +		struct rte_mbuf **pkts, uint16_t count) {
> > +	struct virtio_net *dev = get_device(vid);
> > +	struct vhost_virtqueue *vq;
> > +	uint16_t n_pkts = count;
> > +
> > +	if (!dev)
> > +		return 0;
> > +
> > +	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue
> idx %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	vq = dev->virtqueue[queue_id];
> > +
> > +	if (unlikely(!vq->async_registered)) {
> > +		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> > +			dev->vid, __func__, queue_id);
> > +		return 0;
> > +	}
> > +
> > +	while (count)
> > +		count -= vhost_poll_enqueue_completed(dev, queue_id, pkts,
> count);
> 
> I think we may want to improve the sync_ops so that
> .check_completed_copies() returns an int. If for some reason the DMA driver
> callback fails, we would poll forever.

Agree, it makes sense to change the return type of .check_completed_copies() to int
to report callback failure case.

> 
> Looking more into the code, I see that ioat_check_completed_copies_cb() an
> return -1 (whereas it should return an unint32_t). It would lead to undefined
> behaviour if the failure would happen. The IOAT driver needs to be fixed,
> and also the callback prototype and its handling.

Current async design only handles .transfer_data() failure, but requires
.check_completed_copies() to handle DMA copy failure. That is,
.check_completed_copies() always reports all copy success to vhost,
even if a DMA copy fails. And it can fall back to SW copy for the failed
DMA copies.

The another choice is to make vhost handle DMA copy failure. Although IOAT
driver already supports to report failed copies, dmadev is under discussion.
I am not sure if the interface will change after we have dmadev. So maybe
it's better to leave it open until we have dmadev. How do you think?

Thanks,
Jiayu

> 
> > +
> > +	return n_pkts;
> > +}
> > +
> >  static __rte_always_inline uint32_t
> >  virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
> >  	struct rte_mbuf **pkts, uint32_t count,
> >
  
Jiang, Cheng1 July 12, 2021, 6:31 a.m. UTC | #6
Hi,

> -----Original Message-----
> From: Pai G, Sunil <sunil.pai.g@intel.com>
> Sent: Thursday, July 8, 2021 3:15 PM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; maxime.coquelin@redhat.com;
> Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v2 1/3] vhost: add unsafe API to drain pkts in
> async vhost
> 
> Hi Cheng,
> 
> Repsonse inline.
> 
> <snipped>
> 
> > As for this one, I'm not sure why we need have the loop in the application.
> > The function of this API is that caller need to drain all the inflight
> > pkts, it should be called only once to get the job done.
> > Don't you think?
> 
> Perhaps yes, but my thought was to provide application the flexibility to
> change the DMA device per call to check_completed_copies callback if it did
> require it.

Got your point.
So you think this API should be defined as the unsafe version of poll completed API, right?

Thanks,
Cheng

> 
> >
> > Thanks.
> > Cheng
> >
> > >
> > > > +
> > > > +	return n_pkts;
> > > > +}
> > > > +
> > >
> > > <snipped>
> > >
> > >
  

Patch

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 6faa31f5ad..041f40cf04 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -193,4 +193,26 @@  __rte_experimental
 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
 		struct rte_mbuf **pkts, uint16_t count);
 
+/**
+ * This function checks async completion status and empty all pakcets
+ * for a specific vhost device queue. Packets which are inflight will
+ * be returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ *  id of vhost device to enqueue data
+ * @param queue_id
+ *  queue id to enqueue data
+ * @param pkts
+ *  blank array to get return packet pointer
+ * @param count
+ *  size of the packet array
+ * @return
+ *  num of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
+		struct rte_mbuf **pkts, uint16_t count);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 9103a23cd4..f480f188af 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -79,4 +79,7 @@  EXPERIMENTAL {
 
 	# added in 21.05
 	rte_vhost_get_negotiated_protocol_features;
+
+	# added in 21.08
+	rte_vhost_drain_queue_thread_unsafe;
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 8da8a86a10..793510974a 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2082,36 +2082,18 @@  write_back_completed_descs_packed(struct vhost_virtqueue *vq,
 	} while (nr_left > 0);
 }
 
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
 		struct rte_mbuf **pkts, uint16_t count)
 {
-	struct virtio_net *dev = get_device(vid);
 	struct vhost_virtqueue *vq;
 	uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
 	uint16_t start_idx, pkts_idx, vq_size;
 	struct async_inflight_info *pkts_info;
 	uint16_t from, i;
 
-	if (!dev)
-		return 0;
-
-	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
-
 	vq = dev->virtqueue[queue_id];
 
-	if (unlikely(!vq->async_registered)) {
-		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
-
-	rte_spinlock_lock(&vq->access_lock);
-
 	pkts_idx = vq->async_pkts_idx % vq->size;
 	pkts_info = vq->async_pkts_info;
 	vq_size = vq->size;
@@ -2119,14 +2101,14 @@  uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
 		vq_size, vq->async_pkts_inflight_n);
 
 	if (count > vq->async_last_pkts_n)
-		n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
+		n_pkts_cpl = vq->async_ops.check_completed_copies(dev->vid,
 			queue_id, 0, count - vq->async_last_pkts_n);
 	n_pkts_cpl += vq->async_last_pkts_n;
 
 	n_pkts_put = RTE_MIN(count, n_pkts_cpl);
 	if (unlikely(n_pkts_put == 0)) {
 		vq->async_last_pkts_n = n_pkts_cpl;
-		goto done;
+		return 0;
 	}
 
 	if (vq_is_packed(dev)) {
@@ -2165,12 +2147,74 @@  uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
 			vq->last_async_desc_idx_split += n_descs;
 	}
 
-done:
+	return n_pkts_put;
+}
+
+uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+		struct rte_mbuf **pkts, uint16_t count)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
+	uint16_t n_pkts_put = 0;
+
+	if (!dev)
+		return 0;
+
+	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	if (unlikely(!vq->async_registered)) {
+		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	rte_spinlock_lock(&vq->access_lock);
+
+	n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
+
 	rte_spinlock_unlock(&vq->access_lock);
 
 	return n_pkts_put;
 }
 
+uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
+		struct rte_mbuf **pkts, uint16_t count)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
+	uint16_t n_pkts = count;
+
+	if (!dev)
+		return 0;
+
+	VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	if (unlikely(!vq->async_registered)) {
+		VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	while (count)
+		count -= vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
+
+	return n_pkts;
+}
+
 static __rte_always_inline uint32_t
 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
 	struct rte_mbuf **pkts, uint32_t count,