Message ID | 20191021154016.16274-7-yong.liu@intel.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Maxime Coquelin |
Headers | show |
Series | vhost packed ring performance optimization | expand |
Context | Check | Description |
---|---|---|
ci/checkpatch | success | coding style OK |
ci/Intel-compilation | success | Compilation OK |
On 10/21/19 5:40 PM, Marvin Liu wrote: > Add batch dequeue function like enqueue function for packed ring, batch > dequeue function will not support chained descritpors, single packet > dequeue function will handle it. > > Signed-off-by: Marvin Liu <yong.liu@intel.com> > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h > index a2b9221e0..67724c342 100644 > --- a/lib/librte_vhost/vhost.h > +++ b/lib/librte_vhost/vhost.h > @@ -39,6 +39,9 @@ > > #define VHOST_LOG_CACHE_NR 32 > > +#define PACKED_DESC_SINGLE_DEQUEUE_FLAG (VRING_DESC_F_NEXT | \ > + VRING_DESC_F_INDIRECT) > + > #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \ > sizeof(struct vring_packed_desc)) > #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1) > diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c > index 317be1aed..f13fcafbb 100644 > --- a/lib/librte_vhost/virtio_net.c > +++ b/lib/librte_vhost/virtio_net.c > @@ -1635,6 +1635,114 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, > return i; > } > > +static __rte_always_inline int > +vhost_reserve_avail_batch_packed(struct virtio_net *dev, > + struct vhost_virtqueue *vq, > + struct rte_mempool *mbuf_pool, > + struct rte_mbuf **pkts, > + uint16_t avail_idx, > + uintptr_t *desc_addrs, > + uint16_t *ids) > +{ > + bool wrap = vq->avail_wrap_counter; > + struct vring_packed_desc *descs = vq->desc_packed; > + struct virtio_net_hdr *hdr; > + uint64_t lens[PACKED_BATCH_SIZE]; > + uint64_t buf_lens[PACKED_BATCH_SIZE]; > + uint32_t buf_offset = dev->vhost_hlen; > + uint16_t flags, i; > + > + if (unlikely(avail_idx & PACKED_BATCH_MASK)) > + return -1; > + if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) > + return -1; > + > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > + flags = descs[avail_idx + i].flags; > + if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || > + (wrap == !!(flags & VRING_DESC_F_USED)) || > + (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) > + return -1; > + } > + > + rte_smp_rmb(); > + > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) > + lens[i] = descs[avail_idx + i].len; > + > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > + desc_addrs[i] = vhost_iova_to_vva(dev, vq, > + descs[avail_idx + i].addr, > + &lens[i], VHOST_ACCESS_RW); > + } > + > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > + if (unlikely((lens[i] != descs[avail_idx + i].len))) > + return -1; > + } > + > + if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, PACKED_BATCH_SIZE)) Same here, you may want to create a variant of Flavio's virtio_dev_pktmbuf_alloc for bulk allocations. > + return -1; > + > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) > + buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; > + > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > + if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) > + goto free_buf; > + } > + > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > + pkts[i]->pkt_len = descs[avail_idx + i].len - buf_offset; > + pkts[i]->data_len = pkts[i]->pkt_len; > + ids[i] = descs[avail_idx + i].id; > + } > + > + if (virtio_net_with_host_offload(dev)) { > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > + hdr = (struct virtio_net_hdr *)(desc_addrs[i]); > + vhost_dequeue_offload(hdr, pkts[i]); > + } > + } > + > + return 0; > + > +free_buf: > + for (i = 0; i < PACKED_BATCH_SIZE; i++) > + rte_pktmbuf_free(pkts[i]); > + > + return -1; > +} > + > +static __rte_unused int > +virtio_dev_tx_batch_packed(struct virtio_net *dev, > + struct vhost_virtqueue *vq, > + struct rte_mempool *mbuf_pool, > + struct rte_mbuf **pkts) > +{ > + uint16_t avail_idx = vq->last_avail_idx; > + uint32_t buf_offset = dev->vhost_hlen; > + uintptr_t desc_addrs[PACKED_BATCH_SIZE]; > + uint16_t ids[PACKED_BATCH_SIZE]; > + uint16_t i; > + > + if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts, > + avail_idx, desc_addrs, ids)) > + return -1; > + > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) > + rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); > + > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) > + rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), > + (void *)(uintptr_t)(desc_addrs[i] + buf_offset), > + pkts[i]->pkt_len); > + > + vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); > + > + return 0; > +} > + > static __rte_always_inline int > vhost_dequeue_single_packed(struct virtio_net *dev, > struct vhost_virtqueue *vq, >
Thanks Maxime, has been modified in v8. > -----Original Message----- > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com] > Sent: Monday, October 21, 2019 5:47 PM > To: Liu, Yong <yong.liu@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>; Wang, > Zhihong <zhihong.wang@intel.com>; stephen@networkplumber.org; > gavin.hu@arm.com > Cc: dev@dpdk.org > Subject: Re: [PATCH v7 06/13] vhost: add packed ring batch dequeue > > > > On 10/21/19 5:40 PM, Marvin Liu wrote: > > Add batch dequeue function like enqueue function for packed ring, batch > > dequeue function will not support chained descritpors, single packet > > dequeue function will handle it. > > > > Signed-off-by: Marvin Liu <yong.liu@intel.com> > > > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h > > index a2b9221e0..67724c342 100644 > > --- a/lib/librte_vhost/vhost.h > > +++ b/lib/librte_vhost/vhost.h > > @@ -39,6 +39,9 @@ > > > > #define VHOST_LOG_CACHE_NR 32 > > > > +#define PACKED_DESC_SINGLE_DEQUEUE_FLAG (VRING_DESC_F_NEXT | \ > > + VRING_DESC_F_INDIRECT) > > + > > #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \ > > sizeof(struct vring_packed_desc)) > > #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1) > > diff --git a/lib/librte_vhost/virtio_net.c > b/lib/librte_vhost/virtio_net.c > > index 317be1aed..f13fcafbb 100644 > > --- a/lib/librte_vhost/virtio_net.c > > +++ b/lib/librte_vhost/virtio_net.c > > @@ -1635,6 +1635,114 @@ virtio_dev_tx_split(struct virtio_net *dev, > struct vhost_virtqueue *vq, > > return i; > > } > > > > +static __rte_always_inline int > > +vhost_reserve_avail_batch_packed(struct virtio_net *dev, > > + struct vhost_virtqueue *vq, > > + struct rte_mempool *mbuf_pool, > > + struct rte_mbuf **pkts, > > + uint16_t avail_idx, > > + uintptr_t *desc_addrs, > > + uint16_t *ids) > > +{ > > + bool wrap = vq->avail_wrap_counter; > > + struct vring_packed_desc *descs = vq->desc_packed; > > + struct virtio_net_hdr *hdr; > > + uint64_t lens[PACKED_BATCH_SIZE]; > > + uint64_t buf_lens[PACKED_BATCH_SIZE]; > > + uint32_t buf_offset = dev->vhost_hlen; > > + uint16_t flags, i; > > + > > + if (unlikely(avail_idx & PACKED_BATCH_MASK)) > > + return -1; > > + if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) > > + return -1; > > + > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > > + flags = descs[avail_idx + i].flags; > > + if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || > > + (wrap == !!(flags & VRING_DESC_F_USED)) || > > + (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) > > + return -1; > > + } > > + > > + rte_smp_rmb(); > > + > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) > > + lens[i] = descs[avail_idx + i].len; > > + > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > > + desc_addrs[i] = vhost_iova_to_vva(dev, vq, > > + descs[avail_idx + i].addr, > > + &lens[i], VHOST_ACCESS_RW); > > + } > > + > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > > + if (unlikely((lens[i] != descs[avail_idx + i].len))) > > + return -1; > > + } > > + > > + if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, PACKED_BATCH_SIZE)) > > Same here, you may want to create a variant of Flavio's > virtio_dev_pktmbuf_alloc for bulk allocations. > > > + return -1; > > + > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) > > + buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; > > + > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > > + if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) > > + goto free_buf; > > + } > > + > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > > + pkts[i]->pkt_len = descs[avail_idx + i].len - buf_offset; > > + pkts[i]->data_len = pkts[i]->pkt_len; > > + ids[i] = descs[avail_idx + i].id; > > + } > > + > > + if (virtio_net_with_host_offload(dev)) { > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { > > + hdr = (struct virtio_net_hdr *)(desc_addrs[i]); > > + vhost_dequeue_offload(hdr, pkts[i]); > > + } > > + } > > + > > + return 0; > > + > > +free_buf: > > + for (i = 0; i < PACKED_BATCH_SIZE; i++) > > + rte_pktmbuf_free(pkts[i]); > > + > > + return -1; > > +} > > + > > +static __rte_unused int > > +virtio_dev_tx_batch_packed(struct virtio_net *dev, > > + struct vhost_virtqueue *vq, > > + struct rte_mempool *mbuf_pool, > > + struct rte_mbuf **pkts) > > +{ > > + uint16_t avail_idx = vq->last_avail_idx; > > + uint32_t buf_offset = dev->vhost_hlen; > > + uintptr_t desc_addrs[PACKED_BATCH_SIZE]; > > + uint16_t ids[PACKED_BATCH_SIZE]; > > + uint16_t i; > > + > > + if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts, > > + avail_idx, desc_addrs, ids)) > > + return -1; > > + > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) > > + rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); > > + > > + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) > > + rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), > > + (void *)(uintptr_t)(desc_addrs[i] + buf_offset), > > + pkts[i]->pkt_len); > > + > > + vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); > > + > > + return 0; > > +} > > + > > static __rte_always_inline int > > vhost_dequeue_single_packed(struct virtio_net *dev, > > struct vhost_virtqueue *vq, > >
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index a2b9221e0..67724c342 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -39,6 +39,9 @@ #define VHOST_LOG_CACHE_NR 32 +#define PACKED_DESC_SINGLE_DEQUEUE_FLAG (VRING_DESC_F_NEXT | \ + VRING_DESC_F_INDIRECT) + #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \ sizeof(struct vring_packed_desc)) #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1) diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 317be1aed..f13fcafbb 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -1635,6 +1635,114 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, return i; } +static __rte_always_inline int +vhost_reserve_avail_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, + uint16_t avail_idx, + uintptr_t *desc_addrs, + uint16_t *ids) +{ + bool wrap = vq->avail_wrap_counter; + struct vring_packed_desc *descs = vq->desc_packed; + struct virtio_net_hdr *hdr; + uint64_t lens[PACKED_BATCH_SIZE]; + uint64_t buf_lens[PACKED_BATCH_SIZE]; + uint32_t buf_offset = dev->vhost_hlen; + uint16_t flags, i; + + if (unlikely(avail_idx & PACKED_BATCH_MASK)) + return -1; + if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) + return -1; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + flags = descs[avail_idx + i].flags; + if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || + (wrap == !!(flags & VRING_DESC_F_USED)) || + (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) + return -1; + } + + rte_smp_rmb(); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + lens[i] = descs[avail_idx + i].len; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + desc_addrs[i] = vhost_iova_to_vva(dev, vq, + descs[avail_idx + i].addr, + &lens[i], VHOST_ACCESS_RW); + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely((lens[i] != descs[avail_idx + i].len))) + return -1; + } + + if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, PACKED_BATCH_SIZE)) + return -1; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) + goto free_buf; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + pkts[i]->pkt_len = descs[avail_idx + i].len - buf_offset; + pkts[i]->data_len = pkts[i]->pkt_len; + ids[i] = descs[avail_idx + i].id; + } + + if (virtio_net_with_host_offload(dev)) { + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + hdr = (struct virtio_net_hdr *)(desc_addrs[i]); + vhost_dequeue_offload(hdr, pkts[i]); + } + } + + return 0; + +free_buf: + for (i = 0; i < PACKED_BATCH_SIZE; i++) + rte_pktmbuf_free(pkts[i]); + + return -1; +} + +static __rte_unused int +virtio_dev_tx_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts) +{ + uint16_t avail_idx = vq->last_avail_idx; + uint32_t buf_offset = dev->vhost_hlen; + uintptr_t desc_addrs[PACKED_BATCH_SIZE]; + uint16_t ids[PACKED_BATCH_SIZE]; + uint16_t i; + + if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts, + avail_idx, desc_addrs, ids)) + return -1; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), + (void *)(uintptr_t)(desc_addrs[i] + buf_offset), + pkts[i]->pkt_len); + + vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); + + return 0; +} + static __rte_always_inline int vhost_dequeue_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
Add batch dequeue function like enqueue function for packed ring, batch dequeue function will not support chained descritpors, single packet dequeue function will handle it. Signed-off-by: Marvin Liu <yong.liu@intel.com>