[04/10] net/virtio: optimize flags update for packed ring
Checks
Commit Message
Cache the AVAIL, USED and WRITE bits to avoid calculating
them as much as possible. Note that, the WRITE bit isn't
cached for control queue.
Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
drivers/net/virtio/virtio_rxtx.c | 31 ++++++++++----------------
drivers/net/virtio/virtqueue.h | 8 +++----
3 files changed, 32 insertions(+), 42 deletions(-)
Comments
On Tue, Mar 19, 2019 at 02:43:06PM +0800, Tiwei Bie wrote:
>Cache the AVAIL, USED and WRITE bits to avoid calculating
>them as much as possible. Note that, the WRITE bit isn't
>cached for control queue.
>
>Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>---
> drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
> drivers/net/virtio/virtio_rxtx.c | 31 ++++++++++----------------
> drivers/net/virtio/virtqueue.h | 8 +++----
> 3 files changed, 32 insertions(+), 42 deletions(-)
>
>diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
>index ff16fb63e..9060b6b33 100644
>--- a/drivers/net/virtio/virtio_ethdev.c
>+++ b/drivers/net/virtio/virtio_ethdev.c
>@@ -149,7 +149,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> int head;
> struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
> struct virtio_pmd_ctrl *result;
>- bool avail_wrap_counter;
>+ uint16_t flags;
> int sum = 0;
> int nb_descs = 0;
> int k;
>@@ -161,14 +161,15 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> * One RX packet for ACK.
> */
> head = vq->vq_avail_idx;
>- avail_wrap_counter = vq->avail_wrap_counter;
>+ flags = vq->cached_flags;
> desc[head].addr = cvq->virtio_net_hdr_mem;
> desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
> vq->vq_free_cnt--;
> nb_descs++;
> if (++vq->vq_avail_idx >= vq->vq_nentries) {
> vq->vq_avail_idx -= vq->vq_nentries;
>- vq->avail_wrap_counter ^= 1;
>+ vq->cached_flags ^=
>+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
Maybe name it avail_used_flags instead of cached flags. Also we could
use a constant value.
> }
>
> for (k = 0; k < pkt_num; k++) {
>@@ -177,34 +178,31 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
> desc[vq->vq_avail_idx].len = dlen[k];
> desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
>- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>- VRING_DESC_F_USED(!vq->avail_wrap_counter);
>+ vq->cached_flags;
> sum += dlen[k];
> vq->vq_free_cnt--;
> nb_descs++;
> if (++vq->vq_avail_idx >= vq->vq_nentries) {
> vq->vq_avail_idx -= vq->vq_nentries;
>- vq->avail_wrap_counter ^= 1;
>+ vq->cached_flags ^=
>+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> }
> }
>
> desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
> + sizeof(struct virtio_net_ctrl_hdr);
> desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
>- desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
>- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>- VRING_DESC_F_USED(!vq->avail_wrap_counter);
>+ desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | vq->cached_flags;
> vq->vq_free_cnt--;
> nb_descs++;
> if (++vq->vq_avail_idx >= vq->vq_nentries) {
> vq->vq_avail_idx -= vq->vq_nentries;
>- vq->avail_wrap_counter ^= 1;
>+ vq->cached_flags ^=
>+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> }
>
> virtio_wmb(vq->hw->weak_barriers);
>- desc[head].flags = VRING_DESC_F_NEXT |
>- VRING_DESC_F_AVAIL(avail_wrap_counter) |
>- VRING_DESC_F_USED(!avail_wrap_counter);
>+ desc[head].flags = VRING_DESC_F_NEXT | flags;
>
> virtio_wmb(vq->hw->weak_barriers);
> virtqueue_notify(vq);
>@@ -226,12 +224,12 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
> "vq->vq_avail_idx=%d\n"
> "vq->vq_used_cons_idx=%d\n"
>- "vq->avail_wrap_counter=%d\n"
>+ "vq->cached_flags=0x%x\n"
> "vq->used_wrap_counter=%d\n",
> vq->vq_free_cnt,
> vq->vq_avail_idx,
> vq->vq_used_cons_idx,
>- vq->avail_wrap_counter,
>+ vq->cached_flags,
> vq->used_wrap_counter);
>
> result = cvq->virtio_net_hdr_mz->addr;
>@@ -491,11 +489,10 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
> vq->vq_nentries = vq_size;
> vq->event_flags_shadow = 0;
> if (vtpci_packed_queue(hw)) {
>- vq->avail_wrap_counter = 1;
> vq->used_wrap_counter = 1;
>- vq->avail_used_flags =
>- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>- VRING_DESC_F_USED(!vq->avail_wrap_counter);
>+ vq->cached_flags = VRING_DESC_F_AVAIL(1);
>+ if (queue_type == VTNET_RQ)
>+ vq->cached_flags |= VRING_DESC_F_WRITE;
> }
>
> /*
>diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
>index 771d3c3f6..3c354baef 100644
>--- a/drivers/net/virtio/virtio_rxtx.c
>+++ b/drivers/net/virtio/virtio_rxtx.c
>@@ -431,7 +431,7 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
> struct rte_mbuf **cookie, uint16_t num)
> {
> struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
>- uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
>+ uint16_t flags = vq->cached_flags;
> struct virtio_hw *hw = vq->hw;
> struct vq_desc_extra *dxp;
> uint16_t idx;
>@@ -460,11 +460,9 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
> start_dp[idx].flags = flags;
> if (++vq->vq_avail_idx >= vq->vq_nentries) {
> vq->vq_avail_idx -= vq->vq_nentries;
>- vq->avail_wrap_counter ^= 1;
>- vq->avail_used_flags =
>- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>- VRING_DESC_F_USED(!vq->avail_wrap_counter);
>- flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
>+ vq->cached_flags ^=
>+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>+ flags = vq->cached_flags;
same here. it's not really cached, it's pre-calculated. And here we
could also use a pre-calculated constand/define.
Otherwise looks good! Did you see any performance improvements?
regards,
Jens
On Tue, Mar 19, 2019 at 09:54:03AM +0100, Jens Freimann wrote:
> On Tue, Mar 19, 2019 at 02:43:06PM +0800, Tiwei Bie wrote:
> > Cache the AVAIL, USED and WRITE bits to avoid calculating
> > them as much as possible. Note that, the WRITE bit isn't
> > cached for control queue.
> >
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > ---
> > drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
> > drivers/net/virtio/virtio_rxtx.c | 31 ++++++++++----------------
> > drivers/net/virtio/virtqueue.h | 8 +++----
> > 3 files changed, 32 insertions(+), 42 deletions(-)
> >
> > diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> > index ff16fb63e..9060b6b33 100644
> > --- a/drivers/net/virtio/virtio_ethdev.c
> > +++ b/drivers/net/virtio/virtio_ethdev.c
> > @@ -149,7 +149,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> > int head;
> > struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
> > struct virtio_pmd_ctrl *result;
> > - bool avail_wrap_counter;
> > + uint16_t flags;
> > int sum = 0;
> > int nb_descs = 0;
> > int k;
> > @@ -161,14 +161,15 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> > * One RX packet for ACK.
> > */
> > head = vq->vq_avail_idx;
> > - avail_wrap_counter = vq->avail_wrap_counter;
> > + flags = vq->cached_flags;
> > desc[head].addr = cvq->virtio_net_hdr_mem;
> > desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
> > vq->vq_free_cnt--;
> > nb_descs++;
> > if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > vq->vq_avail_idx -= vq->vq_nentries;
> > - vq->avail_wrap_counter ^= 1;
> > + vq->cached_flags ^=
> > + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>
> Maybe name it avail_used_flags instead of cached flags. Also we could
> use a constant value.
It also contains the WRITE bit (not just AVAIL and USED bits)
for Rx path. That's why I didn't name it as avail_used_flags.
>
> > }
> >
> > for (k = 0; k < pkt_num; k++) {
> > @@ -177,34 +178,31 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> > + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
> > desc[vq->vq_avail_idx].len = dlen[k];
> > desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
> > - VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
> > - VRING_DESC_F_USED(!vq->avail_wrap_counter);
> > + vq->cached_flags;
> > sum += dlen[k];
> > vq->vq_free_cnt--;
> > nb_descs++;
> > if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > vq->vq_avail_idx -= vq->vq_nentries;
> > - vq->avail_wrap_counter ^= 1;
> > + vq->cached_flags ^=
> > + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> > }
> > }
> >
> > desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
> > + sizeof(struct virtio_net_ctrl_hdr);
> > desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
> > - desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
> > - VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
> > - VRING_DESC_F_USED(!vq->avail_wrap_counter);
> > + desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | vq->cached_flags;
> > vq->vq_free_cnt--;
> > nb_descs++;
> > if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > vq->vq_avail_idx -= vq->vq_nentries;
> > - vq->avail_wrap_counter ^= 1;
> > + vq->cached_flags ^=
> > + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> > }
> >
> > virtio_wmb(vq->hw->weak_barriers);
> > - desc[head].flags = VRING_DESC_F_NEXT |
> > - VRING_DESC_F_AVAIL(avail_wrap_counter) |
> > - VRING_DESC_F_USED(!avail_wrap_counter);
> > + desc[head].flags = VRING_DESC_F_NEXT | flags;
> >
> > virtio_wmb(vq->hw->weak_barriers);
> > virtqueue_notify(vq);
> > @@ -226,12 +224,12 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
> > PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
> > "vq->vq_avail_idx=%d\n"
> > "vq->vq_used_cons_idx=%d\n"
> > - "vq->avail_wrap_counter=%d\n"
> > + "vq->cached_flags=0x%x\n"
> > "vq->used_wrap_counter=%d\n",
> > vq->vq_free_cnt,
> > vq->vq_avail_idx,
> > vq->vq_used_cons_idx,
> > - vq->avail_wrap_counter,
> > + vq->cached_flags,
> > vq->used_wrap_counter);
> >
> > result = cvq->virtio_net_hdr_mz->addr;
> > @@ -491,11 +489,10 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
> > vq->vq_nentries = vq_size;
> > vq->event_flags_shadow = 0;
> > if (vtpci_packed_queue(hw)) {
> > - vq->avail_wrap_counter = 1;
> > vq->used_wrap_counter = 1;
> > - vq->avail_used_flags =
> > - VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
> > - VRING_DESC_F_USED(!vq->avail_wrap_counter);
> > + vq->cached_flags = VRING_DESC_F_AVAIL(1);
> > + if (queue_type == VTNET_RQ)
> > + vq->cached_flags |= VRING_DESC_F_WRITE;
> > }
> >
> > /*
> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> > index 771d3c3f6..3c354baef 100644
> > --- a/drivers/net/virtio/virtio_rxtx.c
> > +++ b/drivers/net/virtio/virtio_rxtx.c
> > @@ -431,7 +431,7 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
> > struct rte_mbuf **cookie, uint16_t num)
> > {
> > struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
> > - uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
> > + uint16_t flags = vq->cached_flags;
> > struct virtio_hw *hw = vq->hw;
> > struct vq_desc_extra *dxp;
> > uint16_t idx;
> > @@ -460,11 +460,9 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
> > start_dp[idx].flags = flags;
> > if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > vq->vq_avail_idx -= vq->vq_nentries;
> > - vq->avail_wrap_counter ^= 1;
> > - vq->avail_used_flags =
> > - VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
> > - VRING_DESC_F_USED(!vq->avail_wrap_counter);
> > - flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
> > + vq->cached_flags ^=
> > + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> > + flags = vq->cached_flags;
>
> same here. it's not really cached, it's pre-calculated. And here we
> could also use a pre-calculated constand/define.
For pre-calculated constant/define, do you mean VRING_DESC_F_AVAIL(1)
and VRING_DESC_F_USED(1)? There is still little code in virtio-user
using them without constantly passing 1. I planned to fully get rid
of them in a separate patch later (but I can do it in this series if
anyone wants).
>
> Otherwise looks good! Did you see any performance improvements?
Yeah, I saw slightly better performance in a quick test.
Thanks,
Tiwei
>
>
> regards,
> Jens
On Tue, Mar 19, 2019 at 05:37:34PM +0800, Tiwei Bie wrote:
>On Tue, Mar 19, 2019 at 09:54:03AM +0100, Jens Freimann wrote:
>> On Tue, Mar 19, 2019 at 02:43:06PM +0800, Tiwei Bie wrote:
>> > Cache the AVAIL, USED and WRITE bits to avoid calculating
>> > them as much as possible. Note that, the WRITE bit isn't
>> > cached for control queue.
>> >
>> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>> > ---
>> > drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
>> > drivers/net/virtio/virtio_rxtx.c | 31 ++++++++++----------------
>> > drivers/net/virtio/virtqueue.h | 8 +++----
>> > 3 files changed, 32 insertions(+), 42 deletions(-)
>> >
>> > diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
>> > index ff16fb63e..9060b6b33 100644
>> > --- a/drivers/net/virtio/virtio_ethdev.c
>> > +++ b/drivers/net/virtio/virtio_ethdev.c
>> > @@ -149,7 +149,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>> > int head;
>> > struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
>> > struct virtio_pmd_ctrl *result;
>> > - bool avail_wrap_counter;
>> > + uint16_t flags;
>> > int sum = 0;
>> > int nb_descs = 0;
>> > int k;
>> > @@ -161,14 +161,15 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>> > * One RX packet for ACK.
>> > */
>> > head = vq->vq_avail_idx;
>> > - avail_wrap_counter = vq->avail_wrap_counter;
>> > + flags = vq->cached_flags;
>> > desc[head].addr = cvq->virtio_net_hdr_mem;
>> > desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
>> > vq->vq_free_cnt--;
>> > nb_descs++;
>> > if (++vq->vq_avail_idx >= vq->vq_nentries) {
>> > vq->vq_avail_idx -= vq->vq_nentries;
>> > - vq->avail_wrap_counter ^= 1;
>> > + vq->cached_flags ^=
>> > + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>>
>> Maybe name it avail_used_flags instead of cached flags. Also we could
>> use a constant value.
>
>It also contains the WRITE bit (not just AVAIL and USED bits)
>for Rx path. That's why I didn't name it as avail_used_flags.
ok
>>
>> > }
>> >
>> > for (k = 0; k < pkt_num; k++) {
>> > @@ -177,34 +178,31 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>> > + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
>> > desc[vq->vq_avail_idx].len = dlen[k];
>> > desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
>> > - VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>> > - VRING_DESC_F_USED(!vq->avail_wrap_counter);
>> > + vq->cached_flags;
>> > sum += dlen[k];
>> > vq->vq_free_cnt--;
>> > nb_descs++;
>> > if (++vq->vq_avail_idx >= vq->vq_nentries) {
>> > vq->vq_avail_idx -= vq->vq_nentries;
>> > - vq->avail_wrap_counter ^= 1;
>> > + vq->cached_flags ^=
>> > + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>> > }
>> > }
>> >
>> > desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
>> > + sizeof(struct virtio_net_ctrl_hdr);
>> > desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
>> > - desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
>> > - VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>> > - VRING_DESC_F_USED(!vq->avail_wrap_counter);
>> > + desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | vq->cached_flags;
>> > vq->vq_free_cnt--;
>> > nb_descs++;
>> > if (++vq->vq_avail_idx >= vq->vq_nentries) {
>> > vq->vq_avail_idx -= vq->vq_nentries;
>> > - vq->avail_wrap_counter ^= 1;
>> > + vq->cached_flags ^=
>> > + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>> > }
>> >
>> > virtio_wmb(vq->hw->weak_barriers);
>> > - desc[head].flags = VRING_DESC_F_NEXT |
>> > - VRING_DESC_F_AVAIL(avail_wrap_counter) |
>> > - VRING_DESC_F_USED(!avail_wrap_counter);
>> > + desc[head].flags = VRING_DESC_F_NEXT | flags;
>> >
>> > virtio_wmb(vq->hw->weak_barriers);
>> > virtqueue_notify(vq);
>> > @@ -226,12 +224,12 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>> > PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
>> > "vq->vq_avail_idx=%d\n"
>> > "vq->vq_used_cons_idx=%d\n"
>> > - "vq->avail_wrap_counter=%d\n"
>> > + "vq->cached_flags=0x%x\n"
>> > "vq->used_wrap_counter=%d\n",
>> > vq->vq_free_cnt,
>> > vq->vq_avail_idx,
>> > vq->vq_used_cons_idx,
>> > - vq->avail_wrap_counter,
>> > + vq->cached_flags,
>> > vq->used_wrap_counter);
>> >
>> > result = cvq->virtio_net_hdr_mz->addr;
>> > @@ -491,11 +489,10 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
>> > vq->vq_nentries = vq_size;
>> > vq->event_flags_shadow = 0;
>> > if (vtpci_packed_queue(hw)) {
>> > - vq->avail_wrap_counter = 1;
>> > vq->used_wrap_counter = 1;
>> > - vq->avail_used_flags =
>> > - VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>> > - VRING_DESC_F_USED(!vq->avail_wrap_counter);
>> > + vq->cached_flags = VRING_DESC_F_AVAIL(1);
>> > + if (queue_type == VTNET_RQ)
>> > + vq->cached_flags |= VRING_DESC_F_WRITE;
>> > }
>> >
>> > /*
>> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
>> > index 771d3c3f6..3c354baef 100644
>> > --- a/drivers/net/virtio/virtio_rxtx.c
>> > +++ b/drivers/net/virtio/virtio_rxtx.c
>> > @@ -431,7 +431,7 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
>> > struct rte_mbuf **cookie, uint16_t num)
>> > {
>> > struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
>> > - uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
>> > + uint16_t flags = vq->cached_flags;
>> > struct virtio_hw *hw = vq->hw;
>> > struct vq_desc_extra *dxp;
>> > uint16_t idx;
>> > @@ -460,11 +460,9 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
>> > start_dp[idx].flags = flags;
>> > if (++vq->vq_avail_idx >= vq->vq_nentries) {
>> > vq->vq_avail_idx -= vq->vq_nentries;
>> > - vq->avail_wrap_counter ^= 1;
>> > - vq->avail_used_flags =
>> > - VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
>> > - VRING_DESC_F_USED(!vq->avail_wrap_counter);
>> > - flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
>> > + vq->cached_flags ^=
>> > + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>> > + flags = vq->cached_flags;
>>
>> same here. it's not really cached, it's pre-calculated. And here we
>> could also use a pre-calculated constand/define.
>
>For pre-calculated constant/define, do you mean VRING_DESC_F_AVAIL(1)
>and VRING_DESC_F_USED(1)? There is still little code in virtio-user
>using them without constantly passing 1. I planned to fully get rid
>of them in a separate patch later (but I can do it in this series if
>anyone wants).
Yes, that's what I meant. And it's fine by me if you do it in a
follow-up.
>
>>
>> Otherwise looks good! Did you see any performance improvements?
>
>Yeah, I saw slightly better performance in a quick test.
Nice.
Reviewed-by: Jens Freimann <jfreimann@redhat.com>
regards,
Jens
>
>>
>>
>> regards,
>> Jens
On 3/19/19 11:11 AM, Jens Freimann wrote:
>>>
>>> same here. it's not really cached, it's pre-calculated. And here we
>>> could also use a pre-calculated constand/define.
>>
>> For pre-calculated constant/define, do you mean VRING_DESC_F_AVAIL(1)
>> and VRING_DESC_F_USED(1)? There is still little code in virtio-user
>> using them without constantly passing 1. I planned to fully get rid
>> of them in a separate patch later (but I can do it in this series if
>> anyone wants).
>
> Yes, that's what I meant. And it's fine by me if you do it in a
> follow-up.
Agree, it can be done in a separate patch.
On 3/19/19 7:43 AM, Tiwei Bie wrote:
> Cache the AVAIL, USED and WRITE bits to avoid calculating
> them as much as possible. Note that, the WRITE bit isn't
> cached for control queue.
>
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> ---
> drivers/net/virtio/virtio_ethdev.c | 35 ++++++++++++++----------------
> drivers/net/virtio/virtio_rxtx.c | 31 ++++++++++----------------
> drivers/net/virtio/virtqueue.h | 8 +++----
> 3 files changed, 32 insertions(+), 42 deletions(-)
>
Nice patch!
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
@@ -149,7 +149,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
int head;
struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
struct virtio_pmd_ctrl *result;
- bool avail_wrap_counter;
+ uint16_t flags;
int sum = 0;
int nb_descs = 0;
int k;
@@ -161,14 +161,15 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
* One RX packet for ACK.
*/
head = vq->vq_avail_idx;
- avail_wrap_counter = vq->avail_wrap_counter;
+ flags = vq->cached_flags;
desc[head].addr = cvq->virtio_net_hdr_mem;
desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
vq->vq_free_cnt--;
nb_descs++;
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->avail_wrap_counter ^= 1;
+ vq->cached_flags ^=
+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
}
for (k = 0; k < pkt_num; k++) {
@@ -177,34 +178,31 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
+ sizeof(ctrl->status) + sizeof(uint8_t) * sum;
desc[vq->vq_avail_idx].len = dlen[k];
desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
- VRING_DESC_F_USED(!vq->avail_wrap_counter);
+ vq->cached_flags;
sum += dlen[k];
vq->vq_free_cnt--;
nb_descs++;
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->avail_wrap_counter ^= 1;
+ vq->cached_flags ^=
+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
}
}
desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
+ sizeof(struct virtio_net_ctrl_hdr);
desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
- desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
- VRING_DESC_F_USED(!vq->avail_wrap_counter);
+ desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | vq->cached_flags;
vq->vq_free_cnt--;
nb_descs++;
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->avail_wrap_counter ^= 1;
+ vq->cached_flags ^=
+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
}
virtio_wmb(vq->hw->weak_barriers);
- desc[head].flags = VRING_DESC_F_NEXT |
- VRING_DESC_F_AVAIL(avail_wrap_counter) |
- VRING_DESC_F_USED(!avail_wrap_counter);
+ desc[head].flags = VRING_DESC_F_NEXT | flags;
virtio_wmb(vq->hw->weak_barriers);
virtqueue_notify(vq);
@@ -226,12 +224,12 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
"vq->vq_avail_idx=%d\n"
"vq->vq_used_cons_idx=%d\n"
- "vq->avail_wrap_counter=%d\n"
+ "vq->cached_flags=0x%x\n"
"vq->used_wrap_counter=%d\n",
vq->vq_free_cnt,
vq->vq_avail_idx,
vq->vq_used_cons_idx,
- vq->avail_wrap_counter,
+ vq->cached_flags,
vq->used_wrap_counter);
result = cvq->virtio_net_hdr_mz->addr;
@@ -491,11 +489,10 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
vq->vq_nentries = vq_size;
vq->event_flags_shadow = 0;
if (vtpci_packed_queue(hw)) {
- vq->avail_wrap_counter = 1;
vq->used_wrap_counter = 1;
- vq->avail_used_flags =
- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
- VRING_DESC_F_USED(!vq->avail_wrap_counter);
+ vq->cached_flags = VRING_DESC_F_AVAIL(1);
+ if (queue_type == VTNET_RQ)
+ vq->cached_flags |= VRING_DESC_F_WRITE;
}
/*
@@ -431,7 +431,7 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
struct rte_mbuf **cookie, uint16_t num)
{
struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
- uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
+ uint16_t flags = vq->cached_flags;
struct virtio_hw *hw = vq->hw;
struct vq_desc_extra *dxp;
uint16_t idx;
@@ -460,11 +460,9 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
start_dp[idx].flags = flags;
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->avail_wrap_counter ^= 1;
- vq->avail_used_flags =
- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
- VRING_DESC_F_USED(!vq->avail_wrap_counter);
- flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
+ vq->cached_flags ^=
+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
+ flags = vq->cached_flags;
}
}
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
@@ -643,7 +641,7 @@ virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
dxp->ndescs = 1;
dxp->cookie = cookie;
- flags = vq->avail_used_flags;
+ flags = vq->cached_flags;
/* prepend cannot fail, checked by caller */
hdr = (struct virtio_net_hdr *)
@@ -662,8 +660,7 @@ virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
if (++vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
- vq->avail_wrap_counter ^= 1;
- vq->avail_used_flags ^=
+ vq->cached_flags ^=
VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
}
@@ -705,7 +702,7 @@ virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
head_dp = &vq->ring_packed.desc_packed[idx];
head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
- head_flags |= vq->avail_used_flags;
+ head_flags |= vq->cached_flags;
if (can_push) {
/* prepend cannot fail, checked by caller */
@@ -730,10 +727,8 @@ virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->avail_wrap_counter ^= 1;
- vq->avail_used_flags =
- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
- VRING_DESC_F_USED(!vq->avail_wrap_counter);
+ vq->cached_flags ^=
+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
}
}
@@ -746,17 +741,15 @@ virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
start_dp[idx].len = cookie->data_len;
if (likely(idx != head_idx)) {
flags = cookie->next ? VRING_DESC_F_NEXT : 0;
- flags |= vq->avail_used_flags;
+ flags |= vq->cached_flags;
start_dp[idx].flags = flags;
}
prev = idx;
idx++;
if (idx >= vq->vq_nentries) {
idx -= vq->vq_nentries;
- vq->avail_wrap_counter ^= 1;
- vq->avail_used_flags =
- VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
- VRING_DESC_F_USED(!vq->avail_wrap_counter);
+ vq->cached_flags ^=
+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
}
} while ((cookie = cookie->next) != NULL);
@@ -193,10 +193,10 @@ struct virtqueue {
struct virtio_hw *hw; /**< virtio_hw structure pointer. */
struct vring vq_ring; /**< vring keeping desc, used and avail */
struct vring_packed ring_packed; /**< vring keeping descs */
- bool avail_wrap_counter;
bool used_wrap_counter;
+ uint16_t cached_flags; /**< cached flags for descs */
uint16_t event_flags_shadow;
- uint16_t avail_used_flags;
+
/**
* Last consumed descriptor in the used table,
* trails vq_ring.used->idx.
@@ -478,9 +478,9 @@ virtqueue_notify(struct virtqueue *vq)
if (vtpci_packed_queue((vq)->hw)) { \
PMD_INIT_LOG(DEBUG, \
"VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \
- "VQ: - avail_wrap_counter=%d; used_wrap_counter=%d", \
+ " cached_flags=0x%x; used_wrap_counter=%d", \
(vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \
- (vq)->vq_avail_idx, (vq)->avail_wrap_counter, \
+ (vq)->vq_avail_idx, (vq)->cached_flags, \
(vq)->used_wrap_counter); \
break; \
} \