[04/10] net/virtio: optimize flags update for packed ring

Message ID 20190319064312.13743-5-tiwei.bie@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers
Series net/virtio: cleanups and fixes for packed/split ring |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Tiwei Bie March 19, 2019, 6:43 a.m. UTC
  Cache the AVAIL, USED and WRITE bits to avoid calculating
them as much as possible. Note that, the WRITE bit isn't
cached for control queue.

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
 drivers/net/virtio/virtio_rxtx.c   | 31 ++++++++++----------------
 drivers/net/virtio/virtqueue.h     |  8 +++----
 3 files changed, 32 insertions(+), 42 deletions(-)
  

Comments

Jens Freimann March 19, 2019, 8:54 a.m. UTC | #1
On Tue, Mar 19, 2019 at 02:43:06PM +0800, Tiwei Bie wrote:
>Cache the AVAIL, USED and WRITE bits to avoid calculating
>them as much as possible. Note that, the WRITE bit isn't
>cached for control queue.
>
>Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>---
> drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
> drivers/net/virtio/virtio_rxtx.c   | 31 ++++++++++----------------
> drivers/net/virtio/virtqueue.h     |  8 +++----
> 3 files changed, 32 insertions(+), 42 deletions(-)
>
>diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
>index ff16fb63e..9060b6b33 100644
>--- a/drivers/net/virtio/virtio_ethdev.c
>+++ b/drivers/net/virtio/virtio_ethdev.c
>@@ -149,7 +149,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> 	int head;
> 	struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
> 	struct virtio_pmd_ctrl *result;
>-	bool avail_wrap_counter;
>+	uint16_t flags;
> 	int sum = 0;
> 	int nb_descs = 0;
> 	int k;
>@@ -161,14 +161,15 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> 	 * One RX packet for ACK.
> 	 */
> 	head = vq->vq_avail_idx;
>-	avail_wrap_counter = vq->avail_wrap_counter;
>+	flags = vq->cached_flags;
> 	desc[head].addr = cvq->virtio_net_hdr_mem;
> 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
> 	vq->vq_free_cnt--;
> 	nb_descs++;
> 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
> 		vq->vq_avail_idx -= vq->vq_nentries;
>-		vq->avail_wrap_counter ^= 1;
>+		vq->cached_flags ^=
>+			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);

Maybe name it avail_used_flags instead of cached flags. Also we could
use a constant value. 

> 	}
>
> 	for (k = 0; k < pkt_num; k++) {
>@@ -177,34 +178,31 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
> 		desc[vq->vq_avail_idx].len = dlen[k];
> 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
>-			VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>-			VRING_DESC_F_USED(!vq->avail_wrap_counter);
>+			vq->cached_flags;
> 		sum += dlen[k];
> 		vq->vq_free_cnt--;
> 		nb_descs++;
> 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
> 			vq->vq_avail_idx -= vq->vq_nentries;
>-			vq->avail_wrap_counter ^= 1;
>+			vq->cached_flags ^=
>+				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> 		}
> 	}
>
> 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
> 		+ sizeof(struct virtio_net_ctrl_hdr);
> 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
>-	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
>-		VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>-		VRING_DESC_F_USED(!vq->avail_wrap_counter);
>+	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | vq->cached_flags;
> 	vq->vq_free_cnt--;
> 	nb_descs++;
> 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
> 		vq->vq_avail_idx -= vq->vq_nentries;
>-		vq->avail_wrap_counter ^= 1;
>+		vq->cached_flags ^=
>+			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> 	}
>
> 	virtio_wmb(vq->hw->weak_barriers);
>-	desc[head].flags = VRING_DESC_F_NEXT |
>-		VRING_DESC_F_AVAIL(avail_wrap_counter) |
>-		VRING_DESC_F_USED(!avail_wrap_counter);
>+	desc[head].flags = VRING_DESC_F_NEXT | flags;
>
> 	virtio_wmb(vq->hw->weak_barriers);
> 	virtqueue_notify(vq);
>@@ -226,12 +224,12 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
> 			"vq->vq_avail_idx=%d\n"
> 			"vq->vq_used_cons_idx=%d\n"
>-			"vq->avail_wrap_counter=%d\n"
>+			"vq->cached_flags=0x%x\n"
> 			"vq->used_wrap_counter=%d\n",
> 			vq->vq_free_cnt,
> 			vq->vq_avail_idx,
> 			vq->vq_used_cons_idx,
>-			vq->avail_wrap_counter,
>+			vq->cached_flags,
> 			vq->used_wrap_counter);
>
> 	result = cvq->virtio_net_hdr_mz->addr;
>@@ -491,11 +489,10 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
> 	vq->vq_nentries = vq_size;
> 	vq->event_flags_shadow = 0;
> 	if (vtpci_packed_queue(hw)) {
>-		vq->avail_wrap_counter = 1;
> 		vq->used_wrap_counter = 1;
>-		vq->avail_used_flags =
>-			VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>-			VRING_DESC_F_USED(!vq->avail_wrap_counter);
>+		vq->cached_flags = VRING_DESC_F_AVAIL(1);
>+		if (queue_type == VTNET_RQ)
>+			vq->cached_flags |= VRING_DESC_F_WRITE;
> 	}
>
> 	/*
>diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
>index 771d3c3f6..3c354baef 100644
>--- a/drivers/net/virtio/virtio_rxtx.c
>+++ b/drivers/net/virtio/virtio_rxtx.c
>@@ -431,7 +431,7 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
> 				     struct rte_mbuf **cookie, uint16_t num)
> {
> 	struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
>-	uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
>+	uint16_t flags = vq->cached_flags;
> 	struct virtio_hw *hw = vq->hw;
> 	struct vq_desc_extra *dxp;
> 	uint16_t idx;
>@@ -460,11 +460,9 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
> 		start_dp[idx].flags = flags;
> 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
> 			vq->vq_avail_idx -= vq->vq_nentries;
>-			vq->avail_wrap_counter ^= 1;
>-			vq->avail_used_flags =
>-				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>-				VRING_DESC_F_USED(!vq->avail_wrap_counter);
>-			flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
>+			vq->cached_flags ^=
>+				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>+			flags = vq->cached_flags;

same here. it's not really cached, it's pre-calculated. And here we
could also use a pre-calculated constand/define. 

Otherwise looks good! Did you see any performance improvements?


regards,
Jens
  
Tiwei Bie March 19, 2019, 9:37 a.m. UTC | #2
On Tue, Mar 19, 2019 at 09:54:03AM +0100, Jens Freimann wrote:
> On Tue, Mar 19, 2019 at 02:43:06PM +0800, Tiwei Bie wrote:
> > Cache the AVAIL, USED and WRITE bits to avoid calculating
> > them as much as possible. Note that, the WRITE bit isn't
> > cached for control queue.
> > 
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > ---
> > drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
> > drivers/net/virtio/virtio_rxtx.c   | 31 ++++++++++----------------
> > drivers/net/virtio/virtqueue.h     |  8 +++----
> > 3 files changed, 32 insertions(+), 42 deletions(-)
> > 
> > diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> > index ff16fb63e..9060b6b33 100644
> > --- a/drivers/net/virtio/virtio_ethdev.c
> > +++ b/drivers/net/virtio/virtio_ethdev.c
> > @@ -149,7 +149,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> > 	int head;
> > 	struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
> > 	struct virtio_pmd_ctrl *result;
> > -	bool avail_wrap_counter;
> > +	uint16_t flags;
> > 	int sum = 0;
> > 	int nb_descs = 0;
> > 	int k;
> > @@ -161,14 +161,15 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> > 	 * One RX packet for ACK.
> > 	 */
> > 	head = vq->vq_avail_idx;
> > -	avail_wrap_counter = vq->avail_wrap_counter;
> > +	flags = vq->cached_flags;
> > 	desc[head].addr = cvq->virtio_net_hdr_mem;
> > 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
> > 	vq->vq_free_cnt--;
> > 	nb_descs++;
> > 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > 		vq->vq_avail_idx -= vq->vq_nentries;
> > -		vq->avail_wrap_counter ^= 1;
> > +		vq->cached_flags ^=
> > +			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> 
> Maybe name it avail_used_flags instead of cached flags. Also we could
> use a constant value.

It also contains the WRITE bit (not just AVAIL and USED bits)
for Rx path. That's why I didn't name it as avail_used_flags.

> 
> > 	}
> > 
> > 	for (k = 0; k < pkt_num; k++) {
> > @@ -177,34 +178,31 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> > 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
> > 		desc[vq->vq_avail_idx].len = dlen[k];
> > 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
> > -			VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
> > -			VRING_DESC_F_USED(!vq->avail_wrap_counter);
> > +			vq->cached_flags;
> > 		sum += dlen[k];
> > 		vq->vq_free_cnt--;
> > 		nb_descs++;
> > 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > 			vq->vq_avail_idx -= vq->vq_nentries;
> > -			vq->avail_wrap_counter ^= 1;
> > +			vq->cached_flags ^=
> > +				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> > 		}
> > 	}
> > 
> > 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
> > 		+ sizeof(struct virtio_net_ctrl_hdr);
> > 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
> > -	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
> > -		VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
> > -		VRING_DESC_F_USED(!vq->avail_wrap_counter);
> > +	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | vq->cached_flags;
> > 	vq->vq_free_cnt--;
> > 	nb_descs++;
> > 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > 		vq->vq_avail_idx -= vq->vq_nentries;
> > -		vq->avail_wrap_counter ^= 1;
> > +		vq->cached_flags ^=
> > +			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> > 	}
> > 
> > 	virtio_wmb(vq->hw->weak_barriers);
> > -	desc[head].flags = VRING_DESC_F_NEXT |
> > -		VRING_DESC_F_AVAIL(avail_wrap_counter) |
> > -		VRING_DESC_F_USED(!avail_wrap_counter);
> > +	desc[head].flags = VRING_DESC_F_NEXT | flags;
> > 
> > 	virtio_wmb(vq->hw->weak_barriers);
> > 	virtqueue_notify(vq);
> > @@ -226,12 +224,12 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> > 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
> > 			"vq->vq_avail_idx=%d\n"
> > 			"vq->vq_used_cons_idx=%d\n"
> > -			"vq->avail_wrap_counter=%d\n"
> > +			"vq->cached_flags=0x%x\n"
> > 			"vq->used_wrap_counter=%d\n",
> > 			vq->vq_free_cnt,
> > 			vq->vq_avail_idx,
> > 			vq->vq_used_cons_idx,
> > -			vq->avail_wrap_counter,
> > +			vq->cached_flags,
> > 			vq->used_wrap_counter);
> > 
> > 	result = cvq->virtio_net_hdr_mz->addr;
> > @@ -491,11 +489,10 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
> > 	vq->vq_nentries = vq_size;
> > 	vq->event_flags_shadow = 0;
> > 	if (vtpci_packed_queue(hw)) {
> > -		vq->avail_wrap_counter = 1;
> > 		vq->used_wrap_counter = 1;
> > -		vq->avail_used_flags =
> > -			VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
> > -			VRING_DESC_F_USED(!vq->avail_wrap_counter);
> > +		vq->cached_flags = VRING_DESC_F_AVAIL(1);
> > +		if (queue_type == VTNET_RQ)
> > +			vq->cached_flags |= VRING_DESC_F_WRITE;
> > 	}
> > 
> > 	/*
> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> > index 771d3c3f6..3c354baef 100644
> > --- a/drivers/net/virtio/virtio_rxtx.c
> > +++ b/drivers/net/virtio/virtio_rxtx.c
> > @@ -431,7 +431,7 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
> > 				     struct rte_mbuf **cookie, uint16_t num)
> > {
> > 	struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
> > -	uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
> > +	uint16_t flags = vq->cached_flags;
> > 	struct virtio_hw *hw = vq->hw;
> > 	struct vq_desc_extra *dxp;
> > 	uint16_t idx;
> > @@ -460,11 +460,9 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
> > 		start_dp[idx].flags = flags;
> > 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > 			vq->vq_avail_idx -= vq->vq_nentries;
> > -			vq->avail_wrap_counter ^= 1;
> > -			vq->avail_used_flags =
> > -				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
> > -				VRING_DESC_F_USED(!vq->avail_wrap_counter);
> > -			flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
> > +			vq->cached_flags ^=
> > +				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> > +			flags = vq->cached_flags;
> 
> same here. it's not really cached, it's pre-calculated. And here we
> could also use a pre-calculated constand/define.

For pre-calculated constant/define, do you mean VRING_DESC_F_AVAIL(1)
and VRING_DESC_F_USED(1)? There is still little code in virtio-user
using them without constantly passing 1. I planned to fully get rid
of them in a separate patch later (but I can do it in this series if
anyone wants).

> 
> Otherwise looks good! Did you see any performance improvements?

Yeah, I saw slightly better performance in a quick test.

Thanks,
Tiwei

> 
> 
> regards,
> Jens
  
Jens Freimann March 19, 2019, 10:11 a.m. UTC | #3
On Tue, Mar 19, 2019 at 05:37:34PM +0800, Tiwei Bie wrote:
>On Tue, Mar 19, 2019 at 09:54:03AM +0100, Jens Freimann wrote:
>> On Tue, Mar 19, 2019 at 02:43:06PM +0800, Tiwei Bie wrote:
>> > Cache the AVAIL, USED and WRITE bits to avoid calculating
>> > them as much as possible. Note that, the WRITE bit isn't
>> > cached for control queue.
>> >
>> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>> > ---
>> > drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
>> > drivers/net/virtio/virtio_rxtx.c   | 31 ++++++++++----------------
>> > drivers/net/virtio/virtqueue.h     |  8 +++----
>> > 3 files changed, 32 insertions(+), 42 deletions(-)
>> >
>> > diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
>> > index ff16fb63e..9060b6b33 100644
>> > --- a/drivers/net/virtio/virtio_ethdev.c
>> > +++ b/drivers/net/virtio/virtio_ethdev.c
>> > @@ -149,7 +149,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>> > 	int head;
>> > 	struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
>> > 	struct virtio_pmd_ctrl *result;
>> > -	bool avail_wrap_counter;
>> > +	uint16_t flags;
>> > 	int sum = 0;
>> > 	int nb_descs = 0;
>> > 	int k;
>> > @@ -161,14 +161,15 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>> > 	 * One RX packet for ACK.
>> > 	 */
>> > 	head = vq->vq_avail_idx;
>> > -	avail_wrap_counter = vq->avail_wrap_counter;
>> > +	flags = vq->cached_flags;
>> > 	desc[head].addr = cvq->virtio_net_hdr_mem;
>> > 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
>> > 	vq->vq_free_cnt--;
>> > 	nb_descs++;
>> > 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
>> > 		vq->vq_avail_idx -= vq->vq_nentries;
>> > -		vq->avail_wrap_counter ^= 1;
>> > +		vq->cached_flags ^=
>> > +			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>>
>> Maybe name it avail_used_flags instead of cached flags. Also we could
>> use a constant value.
>
>It also contains the WRITE bit (not just AVAIL and USED bits)
>for Rx path. That's why I didn't name it as avail_used_flags.

ok

>>
>> > 	}
>> >
>> > 	for (k = 0; k < pkt_num; k++) {
>> > @@ -177,34 +178,31 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>> > 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
>> > 		desc[vq->vq_avail_idx].len = dlen[k];
>> > 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
>> > -			VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>> > -			VRING_DESC_F_USED(!vq->avail_wrap_counter);
>> > +			vq->cached_flags;
>> > 		sum += dlen[k];
>> > 		vq->vq_free_cnt--;
>> > 		nb_descs++;
>> > 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
>> > 			vq->vq_avail_idx -= vq->vq_nentries;
>> > -			vq->avail_wrap_counter ^= 1;
>> > +			vq->cached_flags ^=
>> > +				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>> > 		}
>> > 	}
>> >
>> > 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
>> > 		+ sizeof(struct virtio_net_ctrl_hdr);
>> > 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
>> > -	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
>> > -		VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>> > -		VRING_DESC_F_USED(!vq->avail_wrap_counter);
>> > +	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | vq->cached_flags;
>> > 	vq->vq_free_cnt--;
>> > 	nb_descs++;
>> > 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
>> > 		vq->vq_avail_idx -= vq->vq_nentries;
>> > -		vq->avail_wrap_counter ^= 1;
>> > +		vq->cached_flags ^=
>> > +			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>> > 	}
>> >
>> > 	virtio_wmb(vq->hw->weak_barriers);
>> > -	desc[head].flags = VRING_DESC_F_NEXT |
>> > -		VRING_DESC_F_AVAIL(avail_wrap_counter) |
>> > -		VRING_DESC_F_USED(!avail_wrap_counter);
>> > +	desc[head].flags = VRING_DESC_F_NEXT | flags;
>> >
>> > 	virtio_wmb(vq->hw->weak_barriers);
>> > 	virtqueue_notify(vq);
>> > @@ -226,12 +224,12 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>> > 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
>> > 			"vq->vq_avail_idx=%d\n"
>> > 			"vq->vq_used_cons_idx=%d\n"
>> > -			"vq->avail_wrap_counter=%d\n"
>> > +			"vq->cached_flags=0x%x\n"
>> > 			"vq->used_wrap_counter=%d\n",
>> > 			vq->vq_free_cnt,
>> > 			vq->vq_avail_idx,
>> > 			vq->vq_used_cons_idx,
>> > -			vq->avail_wrap_counter,
>> > +			vq->cached_flags,
>> > 			vq->used_wrap_counter);
>> >
>> > 	result = cvq->virtio_net_hdr_mz->addr;
>> > @@ -491,11 +489,10 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
>> > 	vq->vq_nentries = vq_size;
>> > 	vq->event_flags_shadow = 0;
>> > 	if (vtpci_packed_queue(hw)) {
>> > -		vq->avail_wrap_counter = 1;
>> > 		vq->used_wrap_counter = 1;
>> > -		vq->avail_used_flags =
>> > -			VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>> > -			VRING_DESC_F_USED(!vq->avail_wrap_counter);
>> > +		vq->cached_flags = VRING_DESC_F_AVAIL(1);
>> > +		if (queue_type == VTNET_RQ)
>> > +			vq->cached_flags |= VRING_DESC_F_WRITE;
>> > 	}
>> >
>> > 	/*
>> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
>> > index 771d3c3f6..3c354baef 100644
>> > --- a/drivers/net/virtio/virtio_rxtx.c
>> > +++ b/drivers/net/virtio/virtio_rxtx.c
>> > @@ -431,7 +431,7 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
>> > 				     struct rte_mbuf **cookie, uint16_t num)
>> > {
>> > 	struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
>> > -	uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
>> > +	uint16_t flags = vq->cached_flags;
>> > 	struct virtio_hw *hw = vq->hw;
>> > 	struct vq_desc_extra *dxp;
>> > 	uint16_t idx;
>> > @@ -460,11 +460,9 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
>> > 		start_dp[idx].flags = flags;
>> > 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
>> > 			vq->vq_avail_idx -= vq->vq_nentries;
>> > -			vq->avail_wrap_counter ^= 1;
>> > -			vq->avail_used_flags =
>> > -				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>> > -				VRING_DESC_F_USED(!vq->avail_wrap_counter);
>> > -			flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
>> > +			vq->cached_flags ^=
>> > +				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>> > +			flags = vq->cached_flags;
>>
>> same here. it's not really cached, it's pre-calculated. And here we
>> could also use a pre-calculated constand/define.
>
>For pre-calculated constant/define, do you mean VRING_DESC_F_AVAIL(1)
>and VRING_DESC_F_USED(1)? There is still little code in virtio-user
>using them without constantly passing 1. I planned to fully get rid
>of them in a separate patch later (but I can do it in this series if
>anyone wants).

Yes, that's what I meant. And it's fine by me if you do it in a
follow-up.

>
>>
>> Otherwise looks good! Did you see any performance improvements?
>
>Yeah, I saw slightly better performance in a quick test.

Nice.

Reviewed-by: Jens Freimann <jfreimann@redhat.com> 

regards,
Jens 
>
>>
>>
>> regards,
>> Jens
  
Maxime Coquelin March 19, 2019, 12:50 p.m. UTC | #4
On 3/19/19 11:11 AM, Jens Freimann wrote:
>>>
>>> same here. it's not really cached, it's pre-calculated. And here we
>>> could also use a pre-calculated constand/define.
>>
>> For pre-calculated constant/define, do you mean VRING_DESC_F_AVAIL(1)
>> and VRING_DESC_F_USED(1)? There is still little code in virtio-user
>> using them without constantly passing 1. I planned to fully get rid
>> of them in a separate patch later (but I can do it in this series if
>> anyone wants).
> 
> Yes, that's what I meant. And it's fine by me if you do it in a
> follow-up.

Agree, it can be done in a separate patch.
  
Maxime Coquelin March 19, 2019, 12:58 p.m. UTC | #5
On 3/19/19 7:43 AM, Tiwei Bie wrote:
> Cache the AVAIL, USED and WRITE bits to avoid calculating
> them as much as possible. Note that, the WRITE bit isn't
> cached for control queue.
> 
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> ---
>   drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
>   drivers/net/virtio/virtio_rxtx.c   | 31 ++++++++++----------------
>   drivers/net/virtio/virtqueue.h     |  8 +++----
>   3 files changed, 32 insertions(+), 42 deletions(-)
> 


Nice patch!
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime
  

Patch

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index ff16fb63e..9060b6b33 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -149,7 +149,7 @@  virtio_send_command_packed(struct virtnet_ctl *cvq,
 	int head;
 	struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
 	struct virtio_pmd_ctrl *result;
-	bool avail_wrap_counter;
+	uint16_t flags;
 	int sum = 0;
 	int nb_descs = 0;
 	int k;
@@ -161,14 +161,15 @@  virtio_send_command_packed(struct virtnet_ctl *cvq,
 	 * One RX packet for ACK.
 	 */
 	head = vq->vq_avail_idx;
-	avail_wrap_counter = vq->avail_wrap_counter;
+	flags = vq->cached_flags;
 	desc[head].addr = cvq->virtio_net_hdr_mem;
 	desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
 	vq->vq_free_cnt--;
 	nb_descs++;
 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
 		vq->vq_avail_idx -= vq->vq_nentries;
-		vq->avail_wrap_counter ^= 1;
+		vq->cached_flags ^=
+			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
 	}
 
 	for (k = 0; k < pkt_num; k++) {
@@ -177,34 +178,31 @@  virtio_send_command_packed(struct virtnet_ctl *cvq,
 			+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
 		desc[vq->vq_avail_idx].len = dlen[k];
 		desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
-			VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
-			VRING_DESC_F_USED(!vq->avail_wrap_counter);
+			vq->cached_flags;
 		sum += dlen[k];
 		vq->vq_free_cnt--;
 		nb_descs++;
 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
 			vq->vq_avail_idx -= vq->vq_nentries;
-			vq->avail_wrap_counter ^= 1;
+			vq->cached_flags ^=
+				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
 		}
 	}
 
 	desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
 		+ sizeof(struct virtio_net_ctrl_hdr);
 	desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
-	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
-		VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
-		VRING_DESC_F_USED(!vq->avail_wrap_counter);
+	desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | vq->cached_flags;
 	vq->vq_free_cnt--;
 	nb_descs++;
 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
 		vq->vq_avail_idx -= vq->vq_nentries;
-		vq->avail_wrap_counter ^= 1;
+		vq->cached_flags ^=
+			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
 	}
 
 	virtio_wmb(vq->hw->weak_barriers);
-	desc[head].flags = VRING_DESC_F_NEXT |
-		VRING_DESC_F_AVAIL(avail_wrap_counter) |
-		VRING_DESC_F_USED(!avail_wrap_counter);
+	desc[head].flags = VRING_DESC_F_NEXT | flags;
 
 	virtio_wmb(vq->hw->weak_barriers);
 	virtqueue_notify(vq);
@@ -226,12 +224,12 @@  virtio_send_command_packed(struct virtnet_ctl *cvq,
 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
 			"vq->vq_avail_idx=%d\n"
 			"vq->vq_used_cons_idx=%d\n"
-			"vq->avail_wrap_counter=%d\n"
+			"vq->cached_flags=0x%x\n"
 			"vq->used_wrap_counter=%d\n",
 			vq->vq_free_cnt,
 			vq->vq_avail_idx,
 			vq->vq_used_cons_idx,
-			vq->avail_wrap_counter,
+			vq->cached_flags,
 			vq->used_wrap_counter);
 
 	result = cvq->virtio_net_hdr_mz->addr;
@@ -491,11 +489,10 @@  virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
 	vq->vq_nentries = vq_size;
 	vq->event_flags_shadow = 0;
 	if (vtpci_packed_queue(hw)) {
-		vq->avail_wrap_counter = 1;
 		vq->used_wrap_counter = 1;
-		vq->avail_used_flags =
-			VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
-			VRING_DESC_F_USED(!vq->avail_wrap_counter);
+		vq->cached_flags = VRING_DESC_F_AVAIL(1);
+		if (queue_type == VTNET_RQ)
+			vq->cached_flags |= VRING_DESC_F_WRITE;
 	}
 
 	/*
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 771d3c3f6..3c354baef 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -431,7 +431,7 @@  virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
 				     struct rte_mbuf **cookie, uint16_t num)
 {
 	struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
-	uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
+	uint16_t flags = vq->cached_flags;
 	struct virtio_hw *hw = vq->hw;
 	struct vq_desc_extra *dxp;
 	uint16_t idx;
@@ -460,11 +460,9 @@  virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
 		start_dp[idx].flags = flags;
 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
 			vq->vq_avail_idx -= vq->vq_nentries;
-			vq->avail_wrap_counter ^= 1;
-			vq->avail_used_flags =
-				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
-				VRING_DESC_F_USED(!vq->avail_wrap_counter);
-			flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
+			vq->cached_flags ^=
+				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
+			flags = vq->cached_flags;
 		}
 	}
 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
@@ -643,7 +641,7 @@  virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
 	dxp->ndescs = 1;
 	dxp->cookie = cookie;
 
-	flags = vq->avail_used_flags;
+	flags = vq->cached_flags;
 
 	/* prepend cannot fail, checked by caller */
 	hdr = (struct virtio_net_hdr *)
@@ -662,8 +660,7 @@  virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
 
 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
 		vq->vq_avail_idx -= vq->vq_nentries;
-		vq->avail_wrap_counter ^= 1;
-		vq->avail_used_flags ^=
+		vq->cached_flags ^=
 			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
 	}
 
@@ -705,7 +702,7 @@  virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 
 	head_dp = &vq->ring_packed.desc_packed[idx];
 	head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
-	head_flags |= vq->avail_used_flags;
+	head_flags |= vq->cached_flags;
 
 	if (can_push) {
 		/* prepend cannot fail, checked by caller */
@@ -730,10 +727,8 @@  virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 		idx++;
 		if (idx >= vq->vq_nentries) {
 			idx -= vq->vq_nentries;
-			vq->avail_wrap_counter ^= 1;
-			vq->avail_used_flags =
-				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
-				VRING_DESC_F_USED(!vq->avail_wrap_counter);
+			vq->cached_flags ^=
+				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
 		}
 	}
 
@@ -746,17 +741,15 @@  virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 		start_dp[idx].len  = cookie->data_len;
 		if (likely(idx != head_idx)) {
 			flags = cookie->next ? VRING_DESC_F_NEXT : 0;
-			flags |= vq->avail_used_flags;
+			flags |= vq->cached_flags;
 			start_dp[idx].flags = flags;
 		}
 		prev = idx;
 		idx++;
 		if (idx >= vq->vq_nentries) {
 			idx -= vq->vq_nentries;
-			vq->avail_wrap_counter ^= 1;
-			vq->avail_used_flags =
-				VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
-				VRING_DESC_F_USED(!vq->avail_wrap_counter);
+			vq->cached_flags ^=
+				VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
 		}
 	} while ((cookie = cookie->next) != NULL);
 
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 24fa873c3..80c0c43c3 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -193,10 +193,10 @@  struct virtqueue {
 	struct virtio_hw  *hw; /**< virtio_hw structure pointer. */
 	struct vring vq_ring;  /**< vring keeping desc, used and avail */
 	struct vring_packed ring_packed;  /**< vring keeping descs */
-	bool avail_wrap_counter;
 	bool used_wrap_counter;
+	uint16_t cached_flags; /**< cached flags for descs */
 	uint16_t event_flags_shadow;
-	uint16_t avail_used_flags;
+
 	/**
 	 * Last consumed descriptor in the used table,
 	 * trails vq_ring.used->idx.
@@ -478,9 +478,9 @@  virtqueue_notify(struct virtqueue *vq)
 	if (vtpci_packed_queue((vq)->hw)) { \
 		PMD_INIT_LOG(DEBUG, \
 		"VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \
-		"VQ: - avail_wrap_counter=%d; used_wrap_counter=%d", \
+		" cached_flags=0x%x; used_wrap_counter=%d", \
 		(vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \
-		(vq)->vq_avail_idx, (vq)->avail_wrap_counter, \
+		(vq)->vq_avail_idx, (vq)->cached_flags, \
 		(vq)->used_wrap_counter); \
 		break; \
 	} \