[dpdk-dev,v2] vhost: Only access header if offloading is supported in dequeue path
Commit Message
If offloading features are not negotiated, parsing the virtio header
is not needed.
Micro-benchmark with testpmd shows that the gain is +4% with indirect
descriptors, +1% when using direct descriptors.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
Changes since v1:
Comments
On Tue, Oct 11, 2016 at 09:45:27AM +0200, Maxime Coquelin wrote:
> @@ -684,12 +699,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
> struct rte_mempool *mbuf_pool)
> {
> struct vring_desc *desc;
> - uint64_t desc_addr;
> + uint64_t desc_addr = 0;
> uint32_t desc_avail, desc_offset;
> uint32_t mbuf_avail, mbuf_offset;
> uint32_t cpy_len;
> struct rte_mbuf *cur = m, *prev = m;
> - struct virtio_net_hdr *hdr;
> + struct virtio_net_hdr *hdr = NULL;
> /* A counter to avoid desc dead loop chain */
> uint32_t nr_desc = 1;
>
> @@ -698,12 +713,14 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
> (desc->flags & VRING_DESC_F_INDIRECT))
> return -1;
>
> - desc_addr = gpa_to_vva(dev, desc->addr);
> - if (unlikely(!desc_addr))
> - return -1;
> + if (virtio_net_with_host_offload(dev)) {
> + desc_addr = gpa_to_vva(dev, desc->addr);
> + if (unlikely(!desc_addr))
> + return -1;
>
> - hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> - rte_prefetch0(hdr);
> + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> + rte_prefetch0(hdr);
> + }
>
> /*
> * A virtio driver normally uses at least 2 desc buffers
> @@ -720,18 +737,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
> if (unlikely(!desc_addr))
> return -1;
>
> - rte_prefetch0((void *)(uintptr_t)desc_addr);
> -
> desc_offset = 0;
> desc_avail = desc->len;
> nr_desc += 1;
> -
> - PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
> } else {
> + if (!desc_addr) {
> + desc_addr = gpa_to_vva(dev, desc->addr);
> + if (unlikely(!desc_addr))
> + return -1;
> + }
> +
I think this piece of code make things a bit complex. I think what you
want to achieve is, besides saving hdr prefetch, to save one call to
gpa_to_vva() for the non-ANY_LAYOUT case. Does that matter too much?
How about just saving the hdr prefetch?
if (virtio_net_with_host_offload(dev)) {
hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
rte_prefetch0(hdr);
}
--yliu
> desc_avail = desc->len - dev->vhost_hlen;
> desc_offset = dev->vhost_hlen;
> }
>
> + rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
> +
> + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
> +
> mbuf_offset = 0;
> mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
> while (1) {
> @@ -795,7 +818,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
> prev->data_len = mbuf_offset;
> m->pkt_len += mbuf_offset;
>
> - if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)
> + if (virtio_net_with_host_offload(dev))
> vhost_dequeue_offload(hdr, m);
>
> return 0;
> --
> 2.7.4
On 10/11/2016 11:01 AM, Yuanhan Liu wrote:
> On Tue, Oct 11, 2016 at 09:45:27AM +0200, Maxime Coquelin wrote:
>> @@ -684,12 +699,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>> struct rte_mempool *mbuf_pool)
>> {
>> struct vring_desc *desc;
>> - uint64_t desc_addr;
>> + uint64_t desc_addr = 0;
>> uint32_t desc_avail, desc_offset;
>> uint32_t mbuf_avail, mbuf_offset;
>> uint32_t cpy_len;
>> struct rte_mbuf *cur = m, *prev = m;
>> - struct virtio_net_hdr *hdr;
>> + struct virtio_net_hdr *hdr = NULL;
>> /* A counter to avoid desc dead loop chain */
>> uint32_t nr_desc = 1;
>>
>> @@ -698,12 +713,14 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>> (desc->flags & VRING_DESC_F_INDIRECT))
>> return -1;
>>
>> - desc_addr = gpa_to_vva(dev, desc->addr);
>> - if (unlikely(!desc_addr))
>> - return -1;
>> + if (virtio_net_with_host_offload(dev)) {
>> + desc_addr = gpa_to_vva(dev, desc->addr);
>> + if (unlikely(!desc_addr))
>> + return -1;
>>
>> - hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
>> - rte_prefetch0(hdr);
>> + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
>> + rte_prefetch0(hdr);
>> + }
>>
>> /*
>> * A virtio driver normally uses at least 2 desc buffers
>> @@ -720,18 +737,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>> if (unlikely(!desc_addr))
>> return -1;
>>
>> - rte_prefetch0((void *)(uintptr_t)desc_addr);
>> -
>> desc_offset = 0;
>> desc_avail = desc->len;
>> nr_desc += 1;
>> -
>> - PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
>> } else {
>> + if (!desc_addr) {
>> + desc_addr = gpa_to_vva(dev, desc->addr);
>> + if (unlikely(!desc_addr))
>> + return -1;
>> + }
>> +
>
> I think this piece of code make things a bit complex. I think what you
> want to achieve is, besides saving hdr prefetch, to save one call to
> gpa_to_vva() for the non-ANY_LAYOUT case. Does that matter too much?
>
> How about just saving the hdr prefetch?
>
> if (virtio_net_with_host_offload(dev)) {
> hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> rte_prefetch0(hdr);
> }
Oops, you reply slipped through the cracks...
You're right, it doesn't matter too much, the thing to avoid id
definitely the hdr prefetch and access.
I'm sending a v3 now.
Thanks,
Maxime
=================
- Rebased
- Fix early out check in vhost_dequeue_offload
lib/librte_vhost/virtio_net.c | 47 ++++++++++++++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 12 deletions(-)
@@ -548,6 +548,18 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
return virtio_dev_rx(dev, queue_id, pkts, count);
}
+static inline bool
+virtio_net_with_host_offload(struct virtio_net *dev)
+{
+ if (dev->features &
+ (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN |
+ VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
+ VIRTIO_NET_F_HOST_UFO))
+ return true;
+
+ return false;
+}
+
static void
parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
{
@@ -600,6 +612,9 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
void *l4_hdr = NULL;
struct tcp_hdr *tcp_hdr = NULL;
+ if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
+ return;
+
parse_ethernet(m, &l4_proto, &l4_hdr);
if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
if (hdr->csum_start == (m->l2_len + m->l3_len)) {
@@ -684,12 +699,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
struct rte_mempool *mbuf_pool)
{
struct vring_desc *desc;
- uint64_t desc_addr;
+ uint64_t desc_addr = 0;
uint32_t desc_avail, desc_offset;
uint32_t mbuf_avail, mbuf_offset;
uint32_t cpy_len;
struct rte_mbuf *cur = m, *prev = m;
- struct virtio_net_hdr *hdr;
+ struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
uint32_t nr_desc = 1;
@@ -698,12 +713,14 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
- desc_addr = gpa_to_vva(dev, desc->addr);
- if (unlikely(!desc_addr))
- return -1;
+ if (virtio_net_with_host_offload(dev)) {
+ desc_addr = gpa_to_vva(dev, desc->addr);
+ if (unlikely(!desc_addr))
+ return -1;
- hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
- rte_prefetch0(hdr);
+ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+ rte_prefetch0(hdr);
+ }
/*
* A virtio driver normally uses at least 2 desc buffers
@@ -720,18 +737,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
if (unlikely(!desc_addr))
return -1;
- rte_prefetch0((void *)(uintptr_t)desc_addr);
-
desc_offset = 0;
desc_avail = desc->len;
nr_desc += 1;
-
- PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
} else {
+ if (!desc_addr) {
+ desc_addr = gpa_to_vva(dev, desc->addr);
+ if (unlikely(!desc_addr))
+ return -1;
+ }
+
desc_avail = desc->len - dev->vhost_hlen;
desc_offset = dev->vhost_hlen;
}
+ rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
mbuf_offset = 0;
mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
while (1) {
@@ -795,7 +818,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
prev->data_len = mbuf_offset;
m->pkt_len += mbuf_offset;
- if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)
+ if (virtio_net_with_host_offload(dev))
vhost_dequeue_offload(hdr, m);
return 0;