[1/2] vhost: add unsafe API to drain pkts in async vhost
Checks
Commit Message
Applications need to stop DMA transfers and finish all the in-flight
pkts when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to drain in-flight pkts which are
submitted to DMA engine in vhost async data path. And enable it in
vhost example.
Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
---
examples/vhost/main.c | 48 +++++++++++++++++++-
examples/vhost/main.h | 1 +
lib/vhost/rte_vhost_async.h | 22 +++++++++
lib/vhost/version.map | 3 ++
lib/vhost/virtio_net.c | 90 +++++++++++++++++++++++++++----------
5 files changed, 139 insertions(+), 25 deletions(-)
Comments
On 6/2/21 6:28 AM, Cheng Jiang wrote:
> Applications need to stop DMA transfers and finish all the in-flight
> pkts when in VM memory hot-plug case and async vhost is used. This
> patch is to provide an unsafe API to drain in-flight pkts which are
> submitted to DMA engine in vhost async data path. And enable it in
> vhost example.
>
> Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
> ---
> examples/vhost/main.c | 48 +++++++++++++++++++-
> examples/vhost/main.h | 1 +
> lib/vhost/rte_vhost_async.h | 22 +++++++++
> lib/vhost/version.map | 3 ++
> lib/vhost/virtio_net.c | 90 +++++++++++++++++++++++++++----------
> 5 files changed, 139 insertions(+), 25 deletions(-)
Please split example and lib changes in dedicated patches.
>
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index d2179eadb9..70bb67c7f8 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
>
> complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
> - if (complete_count)
> + if (complete_count) {
> free_pkts(p_cpl, complete_count);
> + __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, __ATOMIC_SEQ_CST);
> + }
> +
> }
>
> static __rte_always_inline void
> @@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
> complete_async_pkts(vdev);
> ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
> + __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, __ATOMIC_SEQ_CST);
>
> if (cpu_cpl_nr)
> free_pkts(m_cpu_cpl, cpu_cpl_nr);
> @@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
> enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
> VIRTIO_RXQ, pkts, rx_count,
> m_cpu_cpl, &cpu_cpl_nr);
> + __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - cpu_cpl_nr,
> + __ATOMIC_SEQ_CST);
> +
> if (cpu_cpl_nr)
> free_pkts(m_cpu_cpl, cpu_cpl_nr);
>
> @@ -1397,8 +1404,15 @@ destroy_device(int vid)
> "(%d) device has been removed from data core\n",
> vdev->vid);
>
> - if (async_vhost_driver)
> + if (async_vhost_driver) {
> + uint16_t n_pkt = 0;
> + struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> + n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, VIRTIO_RXQ, m_cpl,
> + vdev->pkts_inflight);
> +
> + free_pkts(m_cpl, n_pkt);
> rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> + }
>
> rte_free(vdev);
> }
> @@ -1487,6 +1501,35 @@ new_device(int vid)
> return 0;
> }
>
> +static int
> +vring_state_changed(int vid, uint16_t queue_id, int enable)
> +{
> + struct vhost_dev *vdev = NULL;
> +
> + TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> + if (vdev->vid == vid)
> + break;
> + }
> + if (!vdev)
> + return -1;
> +
> + if (queue_id != VIRTIO_RXQ)
> + return 0;
> +
> + if (async_vhost_driver) {
> + if (!enable) {
> + uint16_t n_pkt;
> + struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> +
> + n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, queue_id,
> + m_cpl, vdev->pkts_inflight);
> + free_pkts(m_cpl, n_pkt);
> + }
> + }
> +
> + return 0;
> +}
> +
> /*
> * These callback allow devices to be added to the data core when configuration
> * has been fully complete.
> @@ -1495,6 +1538,7 @@ static const struct vhost_device_ops virtio_net_device_ops =
> {
> .new_device = new_device,
> .destroy_device = destroy_device,
> + .vring_state_changed = vring_state_changed,
> };
>
> /*
> diff --git a/examples/vhost/main.h b/examples/vhost/main.h
> index 0ccdce4b4a..e7b1ac60a6 100644
> --- a/examples/vhost/main.h
> +++ b/examples/vhost/main.h
> @@ -51,6 +51,7 @@ struct vhost_dev {
> uint64_t features;
> size_t hdr_len;
> uint16_t nr_vrings;
> + uint16_t pkts_inflight;
> struct rte_vhost_memory *mem;
> struct device_statistics stats;
> TAILQ_ENTRY(vhost_dev) global_vdev_entry;
> diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
> index 6faa31f5ad..041f40cf04 100644
> --- a/lib/vhost/rte_vhost_async.h
> +++ b/lib/vhost/rte_vhost_async.h
> @@ -193,4 +193,26 @@ __rte_experimental
> uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> struct rte_mbuf **pkts, uint16_t count);
>
> +/**
> + * This function checks async completion status and empty all pakcets
> + * for a specific vhost device queue. Packets which are inflight will
> + * be returned in an array.
> + *
> + * @note This function does not perform any locking
> + *
> + * @param vid
> + * id of vhost device to enqueue data
> + * @param queue_id
> + * queue id to enqueue data
> + * @param pkts
> + * blank array to get return packet pointer
> + * @param count
> + * size of the packet array
> + * @return
> + * num of packets returned
> + */
> +__rte_experimental
> +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint16_t count);
> +
> #endif /* _RTE_VHOST_ASYNC_H_ */
> diff --git a/lib/vhost/version.map b/lib/vhost/version.map
> index 9103a23cd4..f480f188af 100644
> --- a/lib/vhost/version.map
> +++ b/lib/vhost/version.map
> @@ -79,4 +79,7 @@ EXPERIMENTAL {
>
> # added in 21.05
> rte_vhost_get_negotiated_protocol_features;
> +
> + # added in 21.08
> + rte_vhost_drain_queue_thread_unsafe;
> };
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 8da8a86a10..793510974a 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -2082,36 +2082,18 @@ write_back_completed_descs_packed(struct vhost_virtqueue *vq,
> } while (nr_left > 0);
> }
>
> -uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> +static __rte_always_inline uint16_t
> +vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
> struct rte_mbuf **pkts, uint16_t count)
> {
> - struct virtio_net *dev = get_device(vid);
> struct vhost_virtqueue *vq;
> uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
> uint16_t start_idx, pkts_idx, vq_size;
> struct async_inflight_info *pkts_info;
> uint16_t from, i;
>
> - if (!dev)
> - return 0;
> -
> - VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> - VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
> - dev->vid, __func__, queue_id);
> - return 0;
> - }
> -
> vq = dev->virtqueue[queue_id];
>
> - if (unlikely(!vq->async_registered)) {
> - VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
> - dev->vid, __func__, queue_id);
> - return 0;
> - }
> -
> - rte_spinlock_lock(&vq->access_lock);
> -
> pkts_idx = vq->async_pkts_idx % vq->size;
> pkts_info = vq->async_pkts_info;
> vq_size = vq->size;
> @@ -2119,14 +2101,14 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> vq_size, vq->async_pkts_inflight_n);
>
> if (count > vq->async_last_pkts_n)
> - n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
> + n_pkts_cpl = vq->async_ops.check_completed_copies(dev->vid,
> queue_id, 0, count - vq->async_last_pkts_n);
> n_pkts_cpl += vq->async_last_pkts_n;
>
> n_pkts_put = RTE_MIN(count, n_pkts_cpl);
> if (unlikely(n_pkts_put == 0)) {
> vq->async_last_pkts_n = n_pkts_cpl;
> - goto done;
> + return 0;
> }
>
> if (vq_is_packed(dev)) {
> @@ -2165,12 +2147,74 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> vq->last_async_desc_idx_split += n_descs;
> }
>
> -done:
> + return n_pkts_put;
> +}
> +
> +uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint16_t count)
> +{
> + struct virtio_net *dev = get_device(vid);
> + struct vhost_virtqueue *vq;
> + uint16_t n_pkts_put = 0;
> +
> + if (!dev)
> + return 0;
> +
> + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
> + dev->vid, __func__, queue_id);
> + return 0;
> + }
> +
> + vq = dev->virtqueue[queue_id];
> +
> + if (unlikely(!vq->async_registered)) {
> + VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
> + dev->vid, __func__, queue_id);
> + return 0;
> + }
> +
> + rte_spinlock_lock(&vq->access_lock);
> +
> + n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
> +
> rte_spinlock_unlock(&vq->access_lock);
>
> return n_pkts_put;
> }
>
> +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint16_t count)
> +{
> + struct virtio_net *dev = get_device(vid);
> + struct vhost_virtqueue *vq;
> + uint16_t n_pkts = count;
> +
> + if (!dev)
> + return 0;
> +
> + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
> + dev->vid, __func__, queue_id);
> + return 0;
> + }
> +
> + vq = dev->virtqueue[queue_id];
> +
> + if (unlikely(!vq->async_registered)) {
> + VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
> + dev->vid, __func__, queue_id);
> + return 0;
> + }
> +
> + while (count)
> + count -= vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
> +
> + return n_pkts;
> +}
> +
> static __rte_always_inline uint32_t
> virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
> struct rte_mbuf **pkts, uint32_t count,
>
Hi Maxime,
> -----Original Message-----
> From: Maxime Coquelin <mcoqueli@redhat.com>
> Sent: Monday, June 7, 2021 9:46 PM
> To: Jiang, Cheng1 <cheng1.jiang@intel.com>; maxime.coquelin@redhat.com;
> Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Yang, YvonneX
> <yvonnex.yang@intel.com>
> Subject: Re: [PATCH 1/2] vhost: add unsafe API to drain pkts in async vhost
>
>
>
> On 6/2/21 6:28 AM, Cheng Jiang wrote:
> > Applications need to stop DMA transfers and finish all the in-flight
> > pkts when in VM memory hot-plug case and async vhost is used. This
> > patch is to provide an unsafe API to drain in-flight pkts which are
> > submitted to DMA engine in vhost async data path. And enable it in
> > vhost example.
> >
> > Signed-off-by: Cheng Jiang <cheng1.jiang@intel.com>
> > ---
> > examples/vhost/main.c | 48 +++++++++++++++++++-
> > examples/vhost/main.h | 1 +
> > lib/vhost/rte_vhost_async.h | 22 +++++++++
> > lib/vhost/version.map | 3 ++
> > lib/vhost/virtio_net.c | 90 +++++++++++++++++++++++++++----------
> > 5 files changed, 139 insertions(+), 25 deletions(-)
>
> Please split example and lib changes in dedicated patches.
Sure, it will be fixed in then next version.
Thanks,
Cheng
>
> >
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > d2179eadb9..70bb67c7f8 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
> >
> > complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> > VIRTIO_RXQ, p_cpl,
> MAX_PKT_BURST);
> > - if (complete_count)
> > + if (complete_count) {
> > free_pkts(p_cpl, complete_count);
> > + __atomic_sub_fetch(&vdev->pkts_inflight, complete_count,
> __ATOMIC_SEQ_CST);
> > + }
> > +
> > }
> >
> > static __rte_always_inline void
> > @@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
> > complete_async_pkts(vdev);
> > ret = rte_vhost_submit_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
> > m, nr_xmit, m_cpu_cpl,
> &cpu_cpl_nr);
> > + __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr,
> > +__ATOMIC_SEQ_CST);
> >
> > if (cpu_cpl_nr)
> > free_pkts(m_cpu_cpl, cpu_cpl_nr);
> > @@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
> > enqueue_count = rte_vhost_submit_enqueue_burst(vdev-
> >vid,
> > VIRTIO_RXQ, pkts, rx_count,
> > m_cpu_cpl, &cpu_cpl_nr);
> > + __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count
> - cpu_cpl_nr,
> > + __ATOMIC_SEQ_CST);
> > +
> > if (cpu_cpl_nr)
> > free_pkts(m_cpu_cpl, cpu_cpl_nr);
> >
> > @@ -1397,8 +1404,15 @@ destroy_device(int vid)
> > "(%d) device has been removed from data core\n",
> > vdev->vid);
> >
> > - if (async_vhost_driver)
> > + if (async_vhost_driver) {
> > + uint16_t n_pkt = 0;
> > + struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> > + n_pkt = rte_vhost_drain_queue_thread_unsafe(vid,
> VIRTIO_RXQ, m_cpl,
> > + vdev->pkts_inflight);
> > +
> > + free_pkts(m_cpl, n_pkt);
> > rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> > + }
> >
> > rte_free(vdev);
> > }
> > @@ -1487,6 +1501,35 @@ new_device(int vid)
> > return 0;
> > }
> >
> > +static int
> > +vring_state_changed(int vid, uint16_t queue_id, int enable) {
> > + struct vhost_dev *vdev = NULL;
> > +
> > + TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
> > + if (vdev->vid == vid)
> > + break;
> > + }
> > + if (!vdev)
> > + return -1;
> > +
> > + if (queue_id != VIRTIO_RXQ)
> > + return 0;
> > +
> > + if (async_vhost_driver) {
> > + if (!enable) {
> > + uint16_t n_pkt;
> > + struct rte_mbuf *m_cpl[vdev->pkts_inflight];
> > +
> > + n_pkt = rte_vhost_drain_queue_thread_unsafe(vid,
> queue_id,
> > + m_cpl, vdev-
> >pkts_inflight);
> > + free_pkts(m_cpl, n_pkt);
> > + }
> > + }
> > +
> > + return 0;
> > +}
> > +
> > /*
> > * These callback allow devices to be added to the data core when
> configuration
> > * has been fully complete.
> > @@ -1495,6 +1538,7 @@ static const struct vhost_device_ops
> > virtio_net_device_ops = {
> > .new_device = new_device,
> > .destroy_device = destroy_device,
> > + .vring_state_changed = vring_state_changed,
> > };
> >
> > /*
> > diff --git a/examples/vhost/main.h b/examples/vhost/main.h index
> > 0ccdce4b4a..e7b1ac60a6 100644
> > --- a/examples/vhost/main.h
> > +++ b/examples/vhost/main.h
> > @@ -51,6 +51,7 @@ struct vhost_dev {
> > uint64_t features;
> > size_t hdr_len;
> > uint16_t nr_vrings;
> > + uint16_t pkts_inflight;
> > struct rte_vhost_memory *mem;
> > struct device_statistics stats;
> > TAILQ_ENTRY(vhost_dev) global_vdev_entry; diff --git
> > a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h index
> > 6faa31f5ad..041f40cf04 100644
> > --- a/lib/vhost/rte_vhost_async.h
> > +++ b/lib/vhost/rte_vhost_async.h
> > @@ -193,4 +193,26 @@ __rte_experimental uint16_t
> > rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> > struct rte_mbuf **pkts, uint16_t count);
> >
> > +/**
> > + * This function checks async completion status and empty all pakcets
> > + * for a specific vhost device queue. Packets which are inflight will
> > + * be returned in an array.
> > + *
> > + * @note This function does not perform any locking
> > + *
> > + * @param vid
> > + * id of vhost device to enqueue data
> > + * @param queue_id
> > + * queue id to enqueue data
> > + * @param pkts
> > + * blank array to get return packet pointer
> > + * @param count
> > + * size of the packet array
> > + * @return
> > + * num of packets returned
> > + */
> > +__rte_experimental
> > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t
> queue_id,
> > + struct rte_mbuf **pkts, uint16_t count);
> > +
> > #endif /* _RTE_VHOST_ASYNC_H_ */
> > diff --git a/lib/vhost/version.map b/lib/vhost/version.map index
> > 9103a23cd4..f480f188af 100644
> > --- a/lib/vhost/version.map
> > +++ b/lib/vhost/version.map
> > @@ -79,4 +79,7 @@ EXPERIMENTAL {
> >
> > # added in 21.05
> > rte_vhost_get_negotiated_protocol_features;
> > +
> > + # added in 21.08
> > + rte_vhost_drain_queue_thread_unsafe;
> > };
> > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c index
> > 8da8a86a10..793510974a 100644
> > --- a/lib/vhost/virtio_net.c
> > +++ b/lib/vhost/virtio_net.c
> > @@ -2082,36 +2082,18 @@ write_back_completed_descs_packed(struct
> vhost_virtqueue *vq,
> > } while (nr_left > 0);
> > }
> >
> > -uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> > +static __rte_always_inline uint16_t
> > +vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t
> > +queue_id,
> > struct rte_mbuf **pkts, uint16_t count) {
> > - struct virtio_net *dev = get_device(vid);
> > struct vhost_virtqueue *vq;
> > uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
> > uint16_t start_idx, pkts_idx, vq_size;
> > struct async_inflight_info *pkts_info;
> > uint16_t from, i;
> >
> > - if (!dev)
> > - return 0;
> > -
> > - VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > - VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue
> idx %d.\n",
> > - dev->vid, __func__, queue_id);
> > - return 0;
> > - }
> > -
> > vq = dev->virtqueue[queue_id];
> >
> > - if (unlikely(!vq->async_registered)) {
> > - VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> > - dev->vid, __func__, queue_id);
> > - return 0;
> > - }
> > -
> > - rte_spinlock_lock(&vq->access_lock);
> > -
> > pkts_idx = vq->async_pkts_idx % vq->size;
> > pkts_info = vq->async_pkts_info;
> > vq_size = vq->size;
> > @@ -2119,14 +2101,14 @@ uint16_t
> rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> > vq_size, vq->async_pkts_inflight_n);
> >
> > if (count > vq->async_last_pkts_n)
> > - n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
> > + n_pkts_cpl = vq->async_ops.check_completed_copies(dev-
> >vid,
> > queue_id, 0, count - vq->async_last_pkts_n);
> > n_pkts_cpl += vq->async_last_pkts_n;
> >
> > n_pkts_put = RTE_MIN(count, n_pkts_cpl);
> > if (unlikely(n_pkts_put == 0)) {
> > vq->async_last_pkts_n = n_pkts_cpl;
> > - goto done;
> > + return 0;
> > }
> >
> > if (vq_is_packed(dev)) {
> > @@ -2165,12 +2147,74 @@ uint16_t
> rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> > vq->last_async_desc_idx_split += n_descs;
> > }
> >
> > -done:
> > + return n_pkts_put;
> > +}
> > +
> > +uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
> > + struct rte_mbuf **pkts, uint16_t count) {
> > + struct virtio_net *dev = get_device(vid);
> > + struct vhost_virtqueue *vq;
> > + uint16_t n_pkts_put = 0;
> > +
> > + if (!dev)
> > + return 0;
> > +
> > + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue
> idx %d.\n",
> > + dev->vid, __func__, queue_id);
> > + return 0;
> > + }
> > +
> > + vq = dev->virtqueue[queue_id];
> > +
> > + if (unlikely(!vq->async_registered)) {
> > + VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> > + dev->vid, __func__, queue_id);
> > + return 0;
> > + }
> > +
> > + rte_spinlock_lock(&vq->access_lock);
> > +
> > + n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts,
> > +count);
> > +
> > rte_spinlock_unlock(&vq->access_lock);
> >
> > return n_pkts_put;
> > }
> >
> > +uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t
> queue_id,
> > + struct rte_mbuf **pkts, uint16_t count) {
> > + struct virtio_net *dev = get_device(vid);
> > + struct vhost_virtqueue *vq;
> > + uint16_t n_pkts = count;
> > +
> > + if (!dev)
> > + return 0;
> > +
> > + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
> > + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
> > + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue
> idx %d.\n",
> > + dev->vid, __func__, queue_id);
> > + return 0;
> > + }
> > +
> > + vq = dev->virtqueue[queue_id];
> > +
> > + if (unlikely(!vq->async_registered)) {
> > + VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for
> queue id %d.\n",
> > + dev->vid, __func__, queue_id);
> > + return 0;
> > + }
> > +
> > + while (count)
> > + count -= vhost_poll_enqueue_completed(dev, queue_id,
> pkts, count);
> > +
> > + return n_pkts;
> > +}
> > +
> > static __rte_always_inline uint32_t
> > virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
> > struct rte_mbuf **pkts, uint32_t count,
> >
@@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
- if (complete_count)
+ if (complete_count) {
free_pkts(p_cpl, complete_count);
+ __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, __ATOMIC_SEQ_CST);
+ }
+
}
static __rte_always_inline void
@@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+ __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, __ATOMIC_SEQ_CST);
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
+ __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - cpu_cpl_nr,
+ __ATOMIC_SEQ_CST);
+
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1397,8 +1404,15 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
- if (async_vhost_driver)
+ if (async_vhost_driver) {
+ uint16_t n_pkt = 0;
+ struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+ n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, VIRTIO_RXQ, m_cpl,
+ vdev->pkts_inflight);
+
+ free_pkts(m_cpl, n_pkt);
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+ }
rte_free(vdev);
}
@@ -1487,6 +1501,35 @@ new_device(int vid)
return 0;
}
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+ struct vhost_dev *vdev = NULL;
+
+ TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+ if (vdev->vid == vid)
+ break;
+ }
+ if (!vdev)
+ return -1;
+
+ if (queue_id != VIRTIO_RXQ)
+ return 0;
+
+ if (async_vhost_driver) {
+ if (!enable) {
+ uint16_t n_pkt;
+ struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+ n_pkt = rte_vhost_drain_queue_thread_unsafe(vid, queue_id,
+ m_cpl, vdev->pkts_inflight);
+ free_pkts(m_cpl, n_pkt);
+ }
+ }
+
+ return 0;
+}
+
/*
* These callback allow devices to be added to the data core when configuration
* has been fully complete.
@@ -1495,6 +1538,7 @@ static const struct vhost_device_ops virtio_net_device_ops =
{
.new_device = new_device,
.destroy_device = destroy_device,
+ .vring_state_changed = vring_state_changed,
};
/*
@@ -51,6 +51,7 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
+ uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
@@ -193,4 +193,26 @@ __rte_experimental
uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
+/**
+ * This function checks async completion status and empty all pakcets
+ * for a specific vhost device queue. Packets which are inflight will
+ * be returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ * id of vhost device to enqueue data
+ * @param queue_id
+ * queue id to enqueue data
+ * @param pkts
+ * blank array to get return packet pointer
+ * @param count
+ * size of the packet array
+ * @return
+ * num of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint16_t count);
+
#endif /* _RTE_VHOST_ASYNC_H_ */
@@ -79,4 +79,7 @@ EXPERIMENTAL {
# added in 21.05
rte_vhost_get_negotiated_protocol_features;
+
+ # added in 21.08
+ rte_vhost_drain_queue_thread_unsafe;
};
@@ -2082,36 +2082,18 @@ write_back_completed_descs_packed(struct vhost_virtqueue *vq,
} while (nr_left > 0);
}
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
{
- struct virtio_net *dev = get_device(vid);
struct vhost_virtqueue *vq;
uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
uint16_t start_idx, pkts_idx, vq_size;
struct async_inflight_info *pkts_info;
uint16_t from, i;
- if (!dev)
- return 0;
-
- VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
- if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
- VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
- dev->vid, __func__, queue_id);
- return 0;
- }
-
vq = dev->virtqueue[queue_id];
- if (unlikely(!vq->async_registered)) {
- VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
- dev->vid, __func__, queue_id);
- return 0;
- }
-
- rte_spinlock_lock(&vq->access_lock);
-
pkts_idx = vq->async_pkts_idx % vq->size;
pkts_info = vq->async_pkts_info;
vq_size = vq->size;
@@ -2119,14 +2101,14 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
vq_size, vq->async_pkts_inflight_n);
if (count > vq->async_last_pkts_n)
- n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
+ n_pkts_cpl = vq->async_ops.check_completed_copies(dev->vid,
queue_id, 0, count - vq->async_last_pkts_n);
n_pkts_cpl += vq->async_last_pkts_n;
n_pkts_put = RTE_MIN(count, n_pkts_cpl);
if (unlikely(n_pkts_put == 0)) {
vq->async_last_pkts_n = n_pkts_cpl;
- goto done;
+ return 0;
}
if (vq_is_packed(dev)) {
@@ -2165,12 +2147,74 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
vq->last_async_desc_idx_split += n_descs;
}
-done:
+ return n_pkts_put;
+}
+
+uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint16_t count)
+{
+ struct virtio_net *dev = get_device(vid);
+ struct vhost_virtqueue *vq;
+ uint16_t n_pkts_put = 0;
+
+ if (!dev)
+ return 0;
+
+ VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
+ if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+ VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
+ dev->vid, __func__, queue_id);
+ return 0;
+ }
+
+ vq = dev->virtqueue[queue_id];
+
+ if (unlikely(!vq->async_registered)) {
+ VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
+ dev->vid, __func__, queue_id);
+ return 0;
+ }
+
+ rte_spinlock_lock(&vq->access_lock);
+
+ n_pkts_put = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
+
rte_spinlock_unlock(&vq->access_lock);
return n_pkts_put;
}
+uint16_t rte_vhost_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint16_t count)
+{
+ struct virtio_net *dev = get_device(vid);
+ struct vhost_virtqueue *vq;
+ uint16_t n_pkts = count;
+
+ if (!dev)
+ return 0;
+
+ VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
+ if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+ VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
+ dev->vid, __func__, queue_id);
+ return 0;
+ }
+
+ vq = dev->virtqueue[queue_id];
+
+ if (unlikely(!vq->async_registered)) {
+ VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
+ dev->vid, __func__, queue_id);
+ return 0;
+ }
+
+ while (count)
+ count -= vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
+
+ return n_pkts;
+}
+
static __rte_always_inline uint32_t
virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint32_t count,