[dpdk-dev] virtio: Fix enqueue/dequeue can't handle chained vring descriptors.

Message ID 1430720780-27525-1-git-send-email-changchun.ouyang@intel.com (mailing list archive)
State Changes Requested, archived
Headers

Commit Message

Ouyang Changchun May 4, 2015, 6:26 a.m. UTC
  Vring enqueue need consider the 2 cases:
 1. Vring descriptors chained together, the first one is for virtio header, the rest are for real data;
 2. Only one descriptor, virtio header and real data share one single descriptor;

So does vring dequeue.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 60 +++++++++++++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 16 deletions(-)
  

Comments

Thomas Monjalon May 12, 2015, 10 a.m. UTC | #1
Hi Changchun,

Why the title begins with virtio for a patch on vhost?
Could you rephrase it in a positive form?

2015-05-04 14:26, Ouyang Changchun:
> Vring enqueue need consider the 2 cases:
>  1. Vring descriptors chained together, the first one is for virtio header, the rest are for real data;
>  2. Only one descriptor, virtio header and real data share one single descriptor;

Please explain what was not working before.

> So does vring dequeue.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_vhost/vhost_rxtx.c | 60 +++++++++++++++++++++++++++++++------------
>  1 file changed, 44 insertions(+), 16 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 510ffe8..3135883 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>  	uint64_t buff_addr = 0;
>  	uint64_t buff_hdr_addr = 0;
> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> +	uint32_t head[MAX_PKT_BURST];
>  	uint32_t head_idx, packet_success = 0;
>  	uint16_t avail_idx, res_cur_idx;
>  	uint16_t res_base_idx, res_end_idx;
> @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>  
>  	while (res_cur_idx != res_end_idx) {
> +		uint32_t offset = 0;
> +		uint32_t data_len, len_to_cpy;
> +		uint8_t plus_hdr = 0;

plus_hdr is not very meaningful to me

I'm not a vhost expert so I won't review the rest.
If nobody comments it in the coming days, it will be accepted.
  
Huawei Xie May 18, 2015, 9:39 a.m. UTC | #2
On 5/4/2015 2:27 PM, Ouyang Changchun wrote:
> Vring enqueue need consider the 2 cases:
>  1. Vring descriptors chained together, the first one is for virtio header, the rest are for real data;
>  2. Only one descriptor, virtio header and real data share one single descriptor;
>
> So does vring dequeue.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_vhost/vhost_rxtx.c | 60 +++++++++++++++++++++++++++++++------------
>  1 file changed, 44 insertions(+), 16 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 510ffe8..3135883 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>  	uint64_t buff_addr = 0;
>  	uint64_t buff_hdr_addr = 0;
> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> +	uint32_t head[MAX_PKT_BURST];
>  	uint32_t head_idx, packet_success = 0;
>  	uint16_t avail_idx, res_cur_idx;
>  	uint16_t res_base_idx, res_end_idx;
> @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>  
>  	while (res_cur_idx != res_end_idx) {
> +		uint32_t offset = 0;
> +		uint32_t data_len, len_to_cpy;
> +		uint8_t plus_hdr = 0;
> +
>  		/* Get descriptor from available ring */
>  		desc = &vq->desc[head[packet_success]];
>  
> @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  
>  		/* Copy virtio_hdr to packet and increment buffer address */
>  		buff_hdr_addr = buff_addr;
> -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>  
>  		/*
>  		 * If the descriptors are chained the header and data are
> @@ -136,24 +139,44 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  			desc = &vq->desc[desc->next];
>  			/* Buffer address translation. */
>  			buff_addr = gpa_to_vva(dev, desc->addr);
> -			desc->len = rte_pktmbuf_data_len(buff);
>  		} else {
>  			buff_addr += vq->vhost_hlen;
> -			desc->len = packet_len;
> +			plus_hdr = 1;
>  		}
>  
> +		data_len = rte_pktmbuf_data_len(buff);
> +		len_to_cpy = RTE_MIN(data_len, desc->len);
> +		do {
> +			if (len_to_cpy > 0) {
> +				/* Copy mbuf data to buffer */
> +				rte_memcpy((void *)(uintptr_t)buff_addr,
> +					(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
> +					len_to_cpy);
> +				PRINT_PACKET(dev, (uintptr_t)buff_addr,
> +					len_to_cpy, 0);
> +
> +				desc->len = len_to_cpy + (plus_hdr ? vq->vhost_hlen : 0);

Do we really need to rewrite the desc->len again and again?  At least we
only have the possibility to change the value of desc->len of the last
descriptor.

> +				offset += len_to_cpy;
> +				if (desc->flags & VRING_DESC_F_NEXT) {
> +					desc = &vq->desc[desc->next];
> +					buff_addr = gpa_to_vva(dev, desc->addr);
> +					len_to_cpy = RTE_MIN(data_len - offset, desc->len);
> +				} else
> +					break;

Still there are two issues here.
a) If the data couldn't be fully copied to chain of guest buffers, we
shouldn't do any copy.
b) scatter mbuf isn't considered.

> +			} else {
> +				desc->len = 0;
> +				if (desc->flags & VRING_DESC_F_NEXT)
> +                                        desc = &vq->desc[desc->next];
> +				else
> +					break;
> +			}
> +		} while (1);
> +
>  		/* Update used ring with desc information */
>  		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
>  							head[packet_success];
> -		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
> -
> -		/* Copy mbuf data to buffer */
> -		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
> -		rte_memcpy((void *)(uintptr_t)buff_addr,
> -			rte_pktmbuf_mtod(buff, const void *),
> -			rte_pktmbuf_data_len(buff));
> -		PRINT_PACKET(dev, (uintptr_t)buff_addr,
> -			rte_pktmbuf_data_len(buff), 0);
> +		vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> +							offset + vq->vhost_hlen;
>  
>  		res_cur_idx++;
>  		packet_success++;
> @@ -583,7 +606,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		desc = &vq->desc[head[entry_success]];
>  
>  		/* Discard first buffer as it is the virtio header */
> -		desc = &vq->desc[desc->next];
> +		if (desc->flags & VRING_DESC_F_NEXT) {
> +			desc = &vq->desc[desc->next];
> +			vb_offset = 0;
> +			vb_avail = desc->len;
> +		} else {
> +			vb_offset = vq->vhost_hlen;
> +			vb_avail = desc->len - vb_offset;
> +		}
>  
>  		/* Buffer address translation. */
>  		vb_addr = gpa_to_vva(dev, desc->addr);
> @@ -602,8 +632,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
>  		vq->used->ring[used_idx].id = head[entry_success];
>  		vq->used->ring[used_idx].len = 0;
>  
> -		vb_offset = 0;
> -		vb_avail = desc->len;
>  		/* Allocate an mbuf and populate the structure. */
>  		m = rte_pktmbuf_alloc(mbuf_pool);
>  		if (unlikely(m == NULL)) {
  
Ouyang Changchun May 18, 2015, 1:23 p.m. UTC | #3
Hi Huawei,

> -----Original Message-----
> From: Xie, Huawei
> Sent: Monday, May 18, 2015 5:39 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle
> chained vring descriptors.
> 
> On 5/4/2015 2:27 PM, Ouyang Changchun wrote:
> > Vring enqueue need consider the 2 cases:
> >  1. Vring descriptors chained together, the first one is for virtio
> > header, the rest are for real data;  2. Only one descriptor, virtio
> > header and real data share one single descriptor;
> >
> > So does vring dequeue.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >  lib/librte_vhost/vhost_rxtx.c | 60
> > +++++++++++++++++++++++++++++++------------
> >  1 file changed, 44 insertions(+), 16 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost_rxtx.c
> > b/lib/librte_vhost/vhost_rxtx.c index 510ffe8..3135883 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> >  	uint64_t buff_addr = 0;
> >  	uint64_t buff_hdr_addr = 0;
> > -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
> > +	uint32_t head[MAX_PKT_BURST];
> >  	uint32_t head_idx, packet_success = 0;
> >  	uint16_t avail_idx, res_cur_idx;
> >  	uint16_t res_base_idx, res_end_idx;
> > @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  	rte_prefetch0(&vq->desc[head[packet_success]]);
> >
> >  	while (res_cur_idx != res_end_idx) {
> > +		uint32_t offset = 0;
> > +		uint32_t data_len, len_to_cpy;
> > +		uint8_t plus_hdr = 0;
> > +
> >  		/* Get descriptor from available ring */
> >  		desc = &vq->desc[head[packet_success]];
> >
> > @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> > queue_id,
> >
> >  		/* Copy virtio_hdr to packet and increment buffer address */
> >  		buff_hdr_addr = buff_addr;
> > -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
> >
> >  		/*
> >  		 * If the descriptors are chained the header and data are @@
> > -136,24 +139,44 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> >  			desc = &vq->desc[desc->next];
> >  			/* Buffer address translation. */
> >  			buff_addr = gpa_to_vva(dev, desc->addr);
> > -			desc->len = rte_pktmbuf_data_len(buff);
> >  		} else {
> >  			buff_addr += vq->vhost_hlen;
> > -			desc->len = packet_len;
> > +			plus_hdr = 1;
> >  		}
> >
> > +		data_len = rte_pktmbuf_data_len(buff);
> > +		len_to_cpy = RTE_MIN(data_len, desc->len);
> > +		do {
> > +			if (len_to_cpy > 0) {
> > +				/* Copy mbuf data to buffer */
> > +				rte_memcpy((void *)(uintptr_t)buff_addr,
> > +					(const void
> *)(rte_pktmbuf_mtod(buff, const char *) + offset),
> > +					len_to_cpy);
> > +				PRINT_PACKET(dev, (uintptr_t)buff_addr,
> > +					len_to_cpy, 0);
> > +
> > +				desc->len = len_to_cpy + (plus_hdr ? vq-
> >vhost_hlen : 0);
> 
> Do we really need to rewrite the desc->len again and again?  At least we only
> have the possibility to change the value of desc->len of the last descriptor.

Well, I think we need change each descriptor's len in the chain here,
If aggregate all len to the last descriptor's len, it is possibly the length will exceed its original len,
e.g. use 8 descriptor(each has len of 1024) chained to recv a 8K packet, then last descriptor's len
will be 8K, and all other descriptor is 0, I don't think this situation make sense.  

> 
> > +				offset += len_to_cpy;
> > +				if (desc->flags & VRING_DESC_F_NEXT) {
> > +					desc = &vq->desc[desc->next];
> > +					buff_addr = gpa_to_vva(dev, desc-
> >addr);
> > +					len_to_cpy = RTE_MIN(data_len -
> offset, desc->len);
> > +				} else
> > +					break;
> 
> Still there are two issues here.
> a) If the data couldn't be fully copied to chain of guest buffers, we shouldn't
> do any copy.

Why don't copy any data is better than the current implementation?

> b) scatter mbuf isn't considered.

If we also consider scatter mbuf here, then this function will have exactly same logic with mergeable_rx,
Do you want to totally remove this function, just keep the mergeable rx function for all cases?

Changchun
  
Huawei Xie May 20, 2015, 5:26 a.m. UTC | #4
On 5/18/2015 9:23 PM, Ouyang, Changchun wrote:
> Hi Huawei,
>
>> -----Original Message-----
>> From: Xie, Huawei
>> Sent: Monday, May 18, 2015 5:39 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH] virtio: Fix enqueue/dequeue can't handle
>> chained vring descriptors.
>>
>> On 5/4/2015 2:27 PM, Ouyang Changchun wrote:
>>> Vring enqueue need consider the 2 cases:
>>>  1. Vring descriptors chained together, the first one is for virtio
>>> header, the rest are for real data;  2. Only one descriptor, virtio
>>> header and real data share one single descriptor;
>>>
>>> So does vring dequeue.
>>>
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>  lib/librte_vhost/vhost_rxtx.c | 60
>>> +++++++++++++++++++++++++++++++------------
>>>  1 file changed, 44 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/lib/librte_vhost/vhost_rxtx.c
>>> b/lib/librte_vhost/vhost_rxtx.c index 510ffe8..3135883 100644
>>> --- a/lib/librte_vhost/vhost_rxtx.c
>>> +++ b/lib/librte_vhost/vhost_rxtx.c
>>> @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
>> queue_id,
>>>  	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>>>  	uint64_t buff_addr = 0;
>>>  	uint64_t buff_hdr_addr = 0;
>>> -	uint32_t head[MAX_PKT_BURST], packet_len = 0;
>>> +	uint32_t head[MAX_PKT_BURST];
>>>  	uint32_t head_idx, packet_success = 0;
>>>  	uint16_t avail_idx, res_cur_idx;
>>>  	uint16_t res_base_idx, res_end_idx;
>>> @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
>> queue_id,
>>>  	rte_prefetch0(&vq->desc[head[packet_success]]);
>>>
>>>  	while (res_cur_idx != res_end_idx) {
>>> +		uint32_t offset = 0;
>>> +		uint32_t data_len, len_to_cpy;
>>> +		uint8_t plus_hdr = 0;
>>> +
>>>  		/* Get descriptor from available ring */
>>>  		desc = &vq->desc[head[packet_success]];
>>>
>>> @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
>>> queue_id,
>>>
>>>  		/* Copy virtio_hdr to packet and increment buffer address */
>>>  		buff_hdr_addr = buff_addr;
>>> -		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>>>
>>>  		/*
>>>  		 * If the descriptors are chained the header and data are @@
>>> -136,24 +139,44 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
>> queue_id,
>>>  			desc = &vq->desc[desc->next];
>>>  			/* Buffer address translation. */
>>>  			buff_addr = gpa_to_vva(dev, desc->addr);
>>> -			desc->len = rte_pktmbuf_data_len(buff);
>>>  		} else {
>>>  			buff_addr += vq->vhost_hlen;
>>> -			desc->len = packet_len;
>>> +			plus_hdr = 1;
>>>  		}
>>>
>>> +		data_len = rte_pktmbuf_data_len(buff);
>>> +		len_to_cpy = RTE_MIN(data_len, desc->len);
>>> +		do {
>>> +			if (len_to_cpy > 0) {
>>> +				/* Copy mbuf data to buffer */
>>> +				rte_memcpy((void *)(uintptr_t)buff_addr,
>>> +					(const void
>> *)(rte_pktmbuf_mtod(buff, const char *) + offset),
>>> +					len_to_cpy);
>>> +				PRINT_PACKET(dev, (uintptr_t)buff_addr,
>>> +					len_to_cpy, 0);
>>> +
>>> +				desc->len = len_to_cpy + (plus_hdr ? vq-
>>> vhost_hlen : 0);
>> Do we really need to rewrite the desc->len again and again?  At least we only
>> have the possibility to change the value of desc->len of the last descriptor.
> Well, I think we need change each descriptor's len in the chain here,
> If aggregate all len to the last descriptor's len, it is possibly the length will exceed its original len,
> e.g. use 8 descriptor(each has len of 1024) chained to recv a 8K packet, then last descriptor's len
> will be 8K, and all other descriptor is 0, I don't think this situation make sense.  
Let me explain this way.
We receive a packet with 350 bytes size, and we have descriptor chain of
10 descs, each with  100 byte size.
At least we don't need to change the len field of first three descriptors.
Whether we need to change the 4th len field to 50, and the rest of them
to zero is still a question(used->len is updated to 350).
We need check the VIRTIO spec.
>>> +				offset += len_to_cpy;
>>> +				if (desc->flags & VRING_DESC_F_NEXT) {
>>> +					desc = &vq->desc[desc->next];
>>> +					buff_addr = gpa_to_vva(dev, desc-
>>> addr);
>>> +					len_to_cpy = RTE_MIN(data_len -
>> offset, desc->len);
>>> +				} else
>>> +					break;
>> Still there are two issues here.
>> a) If the data couldn't be fully copied to chain of guest buffers, we shouldn't
>> do any copy.
> Why don't copy any data is better than the current implementation?
We don't need to pass part of a packet to guest.
>
>> b) scatter mbuf isn't considered.
> If we also consider scatter mbuf here, then this function will have exactly same logic with mergeable_rx,
> Do you want to totally remove this function, just keep the mergeable rx function for all cases?
>
> Changchun
>
>
>
  
Ouyang Changchun May 28, 2015, 3:16 p.m. UTC | #5
Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (5):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating
  lib_vhost: Add support copying scattered mbuf to vring

 lib/librte_vhost/vhost_rxtx.c | 287 +++++++++++++++++++++++++++++++-----------
 1 file changed, 216 insertions(+), 71 deletions(-)
  
Ouyang Changchun June 1, 2015, 8:25 a.m. UTC | #6
Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 194 ++++++++++++++++++++++++++----------------
 1 file changed, 122 insertions(+), 72 deletions(-)
  
Ouyang Changchun June 2, 2015, 8:51 a.m. UTC | #7
Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 192 ++++++++++++++++++++++++++----------------
 1 file changed, 120 insertions(+), 72 deletions(-)
  
Ouyang Changchun June 3, 2015, 6:02 a.m. UTC | #8
Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
 1 file changed, 111 insertions(+), 90 deletions(-)
  
Xu, Qian Q June 3, 2015, 7:50 a.m. UTC | #9
Tested-by: Qian Xu<qian.q.xu@intel.com>
Signed-off-by: Qian Xu<qian.q.xu@intel.com>

-Tested commit: 1a1109404e702d3ad1ccc1033df55c59bec1f89a
-Host OS/Kernel: FC21/3.19
-Guest OS/Kernel: FC21/3.19
-NIC: Intel 82599 10G
-Default x86_64-native-linuxapp-gcc configuration
-Total 2 cases, 2 passed.

Test Case 1:  test_perf_vhost_one_vm_dpdk_fwd_vhost-user
====================================================
On host:

1. Start up vhost-switch, vm2vm 0 means only one vm without vm to vm communication::

    taskset -c 18-20 <dpdk_folder>/examples/vhost/build/vhost-switch -c 0xf -n 4 --huge-dir /mnt/huge --socket-mem 1024,1024 -- -p 1 --mergeable 0 --zero-copy 0 --vm2vm 0 
   

2. Start VM with vhost user as backend::

taskset -c 22-28 \
/home/qxu10/qemu-2.2.0/x86_64-softmmu/qemu-system-x86_64 -name us-vhost-vm1 -cpu host \
-enable-kvm -m 4096 -object memory-backend-file,id=mem,size=4096M,mem-path=/mnt/huge,share=on -numa node,memdev=mem -mem-prealloc \
-smp cores=20,sockets=1 -drive file=/home/img/fc21-vm1.img \
-chardev socket,id=char0,path=/home/qxu10/dpdk/vhost-net -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce=on \
-device virtio-net-pci,mac=52:54:00:00:00:01,netdev=mynet1 \
-chardev socket,id=char1,path=/home/qxu10/dpdk/vhost-net -netdev type=vhost-user,id=mynet2,chardev=char1,vhostforce=on \
-device virtio-net-pci,mac=52:54:00:00:00:02,netdev=mynet2 \
-netdev tap,id=ipvm1,ifname=tap3,script=/etc/qemu-ifup -device rtl8139,netdev=ipvm1,id=net0,mac=00:00:00:00:00:09 -nographic

On guest:

3. ensure the dpdk folder copied to the guest with the same config file and build process as host. Then bind 2 virtio devices to igb_uio and start testpmd, below is the step for reference::

    ./<dpdk_folder>/tools/dpdk_nic_bind.py --bind igb_uio 00:03.0 00:04.0

    ./<dpdk_folder>/x86_64-native-linuxapp-gcc/app/test-pmd/testpmd -c f -n 4 -- -i --txqflags 0x0f00 --rxq=2 --disable-hw-vlan-filter
    
    $ >set fwd mac
    
    $ >start tx_first

4. After typing start tx_first in testpmd, user can see there would be 2 virtio device with MAC and vlan id registered in vhost-user, the log would be shown in host's vhost-sample output.

5. Send traffic(30second) to virtio1 and virtio2, and set the packet size from 64 to 1518. Check the performance in Mpps. The traffic sent to virtio1 should have the DEST MAC of Virtio1's MAC, Vlan id of Virtio1. The traffic sent to virtio2 should have the DEST MAC of Virtio2's MAC, Vlan id of Virtio2. The traffic's DEST IP and SRC IP is continuously incremental,e.g(from 192.168.1.1 to 192.168.1.63), so the packets can go to different queues via RSS/Hash. As to the functionality criteria, The received rate should not be zero. As to the performance criteria, need check it with developer or design doc/PRD. 

6. Check that if the packets have been to different queue at the guest testpmd stats display.
 
7. Check the packet data integrity. 
    
Test Case 2:  test_perf_virtio_one_vm_linux_fwd_vhost-user
===================================================
On host:

Same step as in TestCase1.

On guest:   
  
1. Set up routing on guest::

    $ systemctl stop firewalld.service
    
    $ systemctl disable firewalld.service
    
    $ systemctl stop ip6tables.service
    
    $ systemctl disable ip6tables.service

    $ systemctl stop iptables.service
    
    $ systemctl disable iptables.service

    $ systemctl stop NetworkManager.service
    
    $ systemctl disable NetworkManager.service
 
    $ echo 1 >/proc/sys/net/ipv4/ip_forward

    $ ip addr add 192.168.1.2/24 dev eth1    # eth1 is virtio1
    
    $ ip neigh add 192.168.1.1 lladdr 00:00:00:00:0a:0a dev eth1
    
    $ ip link set dev eth1 up
    
    $ ip addr add 192.168.2.2/24 dev eth2    # eth2 is virtio2
    
    $ ip neigh add 192.168.2.1 lladdr 00:00:00:00:00:0a  dev eth2
    
    $ ip link set dev eth2 up

2. Send traffic(30second) to virtio1 and virtio2. According to above script, traffic sent to virtio1 should have SRC IP (e.g: 192.168.1.1), DEST IP(e.g:192.168.2.1), DEST MAC as virtio1's MAC, VLAN ID as virtio1's VLAN. Traffic sent to virtio2 has the similar setting, SRC IP(e.g:192.168.2.1), DEST IP(e.g: 192.168.1.1), VLAN ID as virtio2's VLAN. Set the packet size from 64 to 1518 as well as jumbo frame.Check the performance in Mpps.As to the functionality criteria, The received rate should not be zero. As to the performance criteria, need check it with developer or design doc/PRD. 

3. Check if the data integrity of the forwarded packets, ensure no content changes.  

Thanks
Qian


-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ouyang Changchun
Sent: Wednesday, June 03, 2015 2:02 PM
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v5 0/4] Fix vhost enqueue/dequeue issue

Fix enqueue/dequeue can't handle chained vring descriptors; Remove unnecessary vring descriptor length updating; Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
 1 file changed, 111 insertions(+), 90 deletions(-)

--
1.8.4.2
  
Ouyang Changchun June 8, 2015, 3:18 a.m. UTC | #10
Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
 1 file changed, 111 insertions(+), 90 deletions(-)
  
Ouyang Changchun June 9, 2015, 1:03 a.m. UTC | #11
Fix enqueue/dequeue can't handle chained vring descriptors;
Remove unnecessary vring descriptor length updating;
Add support copying scattered mbuf to vring;

Changchun Ouyang (4):
  lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
  lib_vhost: Refine code style
  lib_vhost: Extract function
  lib_vhost: Remove unnecessary vring descriptor length updating

 lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
 1 file changed, 111 insertions(+), 90 deletions(-)
  
Huawei Xie June 10, 2015, 1:40 a.m. UTC | #12
Acked-by:  Huawei Xie <huawei.xie@intel.com>
On 6/9/2015 9:03 AM, Ouyang, Changchun wrote:
> Fix enqueue/dequeue can't handle chained vring descriptors;
> Remove unnecessary vring descriptor length updating;
> Add support copying scattered mbuf to vring;
>
> Changchun Ouyang (4):
>   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
>   lib_vhost: Refine code style
>   lib_vhost: Extract function
>   lib_vhost: Remove unnecessary vring descriptor length updating
>
>  lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
>  1 file changed, 111 insertions(+), 90 deletions(-)
>
  
Huawei Xie June 10, 2015, 6:49 a.m. UTC | #13
Acked-by: Huawei Xie <huawei.xie@intel.com>

On 6/9/2015 9:03 AM, Ouyang, Changchun wrote:
> Fix enqueue/dequeue can't handle chained vring descriptors;
> Remove unnecessary vring descriptor length updating;
> Add support copying scattered mbuf to vring;
>
> Changchun Ouyang (4):
>   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
>   lib_vhost: Refine code style
>   lib_vhost: Extract function
>   lib_vhost: Remove unnecessary vring descriptor length updating
>
>  lib/librte_vhost/vhost_rxtx.c | 201 +++++++++++++++++++++++-------------------
>  1 file changed, 111 insertions(+), 90 deletions(-)
>
  
Thomas Monjalon June 15, 2015, 9:42 a.m. UTC | #14
2015-06-09 09:03, Ouyang Changchun:
> Fix enqueue/dequeue can't handle chained vring descriptors;
> Remove unnecessary vring descriptor length updating;
> Add support copying scattered mbuf to vring;
> 
> Changchun Ouyang (4):
>   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
>   lib_vhost: Refine code style
>   lib_vhost: Extract function
>   lib_vhost: Remove unnecessary vring descriptor length updating

What changed in v7?
Is this test report still valuable for v7?
	http://dpdk.org/ml/archives/dev/2015-June/018610.html

Note: it's really convenient to put the relevant changelog in each commit,
and it would be nicer to have a changelog summary in this cover letter.
  
Ouyang Changchun June 16, 2015, 1:01 a.m. UTC | #15
Hi, Thomas


> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, June 15, 2015 5:43 PM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v7 0/4] Fix vhost enqueue/dequeue issue
> 
> 2015-06-09 09:03, Ouyang Changchun:
> > Fix enqueue/dequeue can't handle chained vring descriptors; Remove
> > unnecessary vring descriptor length updating; Add support copying
> > scattered mbuf to vring;
> >
> > Changchun Ouyang (4):
> >   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
> >   lib_vhost: Refine code style
> >   lib_vhost: Extract function
> >   lib_vhost: Remove unnecessary vring descriptor length updating
> 
> What changed in v7?
> Is this test report still valuable for v7?
> 	http://dpdk.org/ml/archives/dev/2015-June/018610.html
> 
Nothing really changed from v6 to v7, 
In V6, signed-off-by root, 
In v7, signed-off-by myself.
Yes, test report is still valuable.

> Note: it's really convenient to put the relevant changelog in each commit,
> and it would be nicer to have a changelog summary in this cover letter.
  
Thomas Monjalon June 17, 2015, 2:57 p.m. UTC | #16
2015-06-10 06:49, Xie, Huawei:
> On 6/9/2015 9:03 AM, Ouyang, Changchun wrote:
> > Fix enqueue/dequeue can't handle chained vring descriptors;
> > Remove unnecessary vring descriptor length updating;
> > Add support copying scattered mbuf to vring;
> >
> > Changchun Ouyang (4):
> >   lib_vhost: Fix enqueue/dequeue can't handle chained vring descriptors
> >   lib_vhost: Refine code style
> >   lib_vhost: Extract function
> >   lib_vhost: Remove unnecessary vring descriptor length updating
> 
> Acked-by: Huawei Xie <huawei.xie@intel.com>

Applied, thanks
  

Patch

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 510ffe8..3135883 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -59,7 +59,7 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint64_t buff_addr = 0;
 	uint64_t buff_hdr_addr = 0;
-	uint32_t head[MAX_PKT_BURST], packet_len = 0;
+	uint32_t head[MAX_PKT_BURST];
 	uint32_t head_idx, packet_success = 0;
 	uint16_t avail_idx, res_cur_idx;
 	uint16_t res_base_idx, res_end_idx;
@@ -113,6 +113,10 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	rte_prefetch0(&vq->desc[head[packet_success]]);
 
 	while (res_cur_idx != res_end_idx) {
+		uint32_t offset = 0;
+		uint32_t data_len, len_to_cpy;
+		uint8_t plus_hdr = 0;
+
 		/* Get descriptor from available ring */
 		desc = &vq->desc[head[packet_success]];
 
@@ -125,7 +129,6 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 		/* Copy virtio_hdr to packet and increment buffer address */
 		buff_hdr_addr = buff_addr;
-		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
 
 		/*
 		 * If the descriptors are chained the header and data are
@@ -136,24 +139,44 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
 			buff_addr = gpa_to_vva(dev, desc->addr);
-			desc->len = rte_pktmbuf_data_len(buff);
 		} else {
 			buff_addr += vq->vhost_hlen;
-			desc->len = packet_len;
+			plus_hdr = 1;
 		}
 
+		data_len = rte_pktmbuf_data_len(buff);
+		len_to_cpy = RTE_MIN(data_len, desc->len);
+		do {
+			if (len_to_cpy > 0) {
+				/* Copy mbuf data to buffer */
+				rte_memcpy((void *)(uintptr_t)buff_addr,
+					(const void *)(rte_pktmbuf_mtod(buff, const char *) + offset),
+					len_to_cpy);
+				PRINT_PACKET(dev, (uintptr_t)buff_addr,
+					len_to_cpy, 0);
+
+				desc->len = len_to_cpy + (plus_hdr ? vq->vhost_hlen : 0);
+				offset += len_to_cpy;
+				if (desc->flags & VRING_DESC_F_NEXT) {
+					desc = &vq->desc[desc->next];
+					buff_addr = gpa_to_vva(dev, desc->addr);
+					len_to_cpy = RTE_MIN(data_len - offset, desc->len);
+				} else
+					break;
+			} else {
+				desc->len = 0;
+				if (desc->flags & VRING_DESC_F_NEXT)
+                                        desc = &vq->desc[desc->next];
+				else
+					break;
+			}
+		} while (1);
+
 		/* Update used ring with desc information */
 		vq->used->ring[res_cur_idx & (vq->size - 1)].id =
 							head[packet_success];
-		vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
-
-		/* Copy mbuf data to buffer */
-		/* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
-		rte_memcpy((void *)(uintptr_t)buff_addr,
-			rte_pktmbuf_mtod(buff, const void *),
-			rte_pktmbuf_data_len(buff));
-		PRINT_PACKET(dev, (uintptr_t)buff_addr,
-			rte_pktmbuf_data_len(buff), 0);
+		vq->used->ring[res_cur_idx & (vq->size - 1)].len =
+							offset + vq->vhost_hlen;
 
 		res_cur_idx++;
 		packet_success++;
@@ -583,7 +606,14 @@  rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		desc = &vq->desc[head[entry_success]];
 
 		/* Discard first buffer as it is the virtio header */
-		desc = &vq->desc[desc->next];
+		if (desc->flags & VRING_DESC_F_NEXT) {
+			desc = &vq->desc[desc->next];
+			vb_offset = 0;
+			vb_avail = desc->len;
+		} else {
+			vb_offset = vq->vhost_hlen;
+			vb_avail = desc->len - vb_offset;
+		}
 
 		/* Buffer address translation. */
 		vb_addr = gpa_to_vva(dev, desc->addr);
@@ -602,8 +632,6 @@  rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		vq->used->ring[used_idx].id = head[entry_success];
 		vq->used->ring[used_idx].len = 0;
 
-		vb_offset = 0;
-		vb_avail = desc->len;
 		/* Allocate an mbuf and populate the structure. */
 		m = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(m == NULL)) {