[v2] net/vhost: merge vhost stats loop in vhost Tx/Rx

Message ID 20210928014348.1747-1-gaoxiangliu0@163.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series [v2] net/vhost: merge vhost stats loop in vhost Tx/Rx |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/github-robot: build success github build: passed
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS

Commit Message

Gaoxiang Liu Sept. 28, 2021, 1:43 a.m. UTC
  To improve performance in vhost Tx/Rx, merge vhost stats loop.
eth_vhost_tx has 2 loop of send num iteraion.
It can be merge into one.
eth_vhost_rx has the same issue as Tx.

Fixes: 4d6cf2ac93dc ("net/vhost: add extended statistics")

Signed-off-by: Gaoxiang Liu <gaoxiangliu0@163.com>
---

v2:
 * Fix coding style issues.
---
 drivers/net/vhost/rte_eth_vhost.c | 62 ++++++++++++++-----------------
 1 file changed, 28 insertions(+), 34 deletions(-)
  

Comments

Gaoxiang Liu Oct. 6, 2021, 10:42 a.m. UTC | #1
Hi, Maxime, Chenbo
Please help to review the patch.

Thanks.
Gaoxiang






On 09/28/2021 09:43, Gaoxiang Liu wrote:
To improve performance in vhost Tx/Rx, merge vhost stats loop.
eth_vhost_tx has 2 loop of send num iteraion.
It can be merge into one.
eth_vhost_rx has the same issue as Tx.

Fixes: 4d6cf2ac93dc ("net/vhost: add extended statistics")

Signed-off-by: Gaoxiang Liu <gaoxiangliu0@163.com>
---

v2:
* Fix coding style issues.
---
drivers/net/vhost/rte_eth_vhost.c | 62 ++++++++++++++-----------------
1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index a202931e9a..a4129980f2 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -336,38 +336,29 @@ vhost_count_xcast_packets(struct vhost_queue *vq,
}

static void
-vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs,
-                  uint16_t count, uint64_t nb_bytes,
-                  uint64_t nb_missed)
+vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf)
{
     uint32_t pkt_len = 0;
-     uint64_t i = 0;
     uint64_t index;
     struct vhost_stats *pstats = &vq->stats;

-     pstats->xstats[VHOST_BYTE] += nb_bytes;
-     pstats->xstats[VHOST_MISSED_PKT] += nb_missed;
-     pstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
-
-     for (i = 0; i < count ; i++) {
-          pstats->xstats[VHOST_PKT]++;
-          pkt_len = bufs[i]->pkt_len;
-          if (pkt_len == 64) {
-               pstats->xstats[VHOST_64_PKT]++;
-          } else if (pkt_len > 64 && pkt_len < 1024) {
-               index = (sizeof(pkt_len) * 8)
-                    - __builtin_clz(pkt_len) - 5;
-               pstats->xstats[index]++;
-          } else {
-               if (pkt_len < 64)
-                    pstats->xstats[VHOST_UNDERSIZE_PKT]++;
-               else if (pkt_len <= 1522)
-                    pstats->xstats[VHOST_1024_TO_1522_PKT]++;
-               else if (pkt_len > 1522)
-                    pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
-          }
-          vhost_count_xcast_packets(vq, bufs[i]);
+     pstats->xstats[VHOST_PKT]++;
+     pkt_len = buf->pkt_len;
+     if (pkt_len == 64) {
+          pstats->xstats[VHOST_64_PKT]++;
+     } else if (pkt_len > 64 && pkt_len < 1024) {
+          index = (sizeof(pkt_len) * 8)
+               - __builtin_clz(pkt_len) - 5;
+          pstats->xstats[index]++;
+     } else {
+          if (pkt_len < 64)
+               pstats->xstats[VHOST_UNDERSIZE_PKT]++;
+          else if (pkt_len <= 1522)
+               pstats->xstats[VHOST_1024_TO_1522_PKT]++;
+          else if (pkt_len > 1522)
+               pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
     }
+     vhost_count_xcast_packets(vq, buf);
}

static uint16_t
@@ -376,7 +367,6 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
     struct vhost_queue *r = q;
     uint16_t i, nb_rx = 0;
     uint16_t nb_receive = nb_bufs;
-     uint64_t nb_bytes = 0;

     if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
          return 0;
@@ -411,11 +401,11 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
          if (r->internal->vlan_strip)
               rte_vlan_strip(bufs[i]);

-          nb_bytes += bufs[i]->pkt_len;
-     }
+          r->stats.bytes += bufs[i]->pkt_len;
+          r->stats.xstats[VHOST_BYTE] += bufs[i]->pkt_len;

-     r->stats.bytes += nb_bytes;
-     vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0);
+          vhost_update_single_packet_xstats(r, bufs[i]);
+     }

out:
     rte_atomic32_set(&r->while_queuing, 0);
@@ -471,16 +461,20 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
               break;
     }

-     for (i = 0; likely(i < nb_tx); i++)
+     for (i = 0; likely(i < nb_tx); i++) {
          nb_bytes += bufs[i]->pkt_len;
+          vhost_update_single_packet_xstats(r, bufs[i]);
+     }

     nb_missed = nb_bufs - nb_tx;

     r->stats.pkts += nb_tx;
     r->stats.bytes += nb_bytes;
-     r->stats.missed_pkts += nb_bufs - nb_tx;
+     r->stats.missed_pkts += nb_missed;

-     vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed);
+     r->stats.xstats[VHOST_BYTE] += nb_bytes;
+     r->stats.xstats[VHOST_MISSED_PKT] += nb_missed;
+     r->stats.xstats[VHOST_UNICAST_PKT] += nb_missed;

     /* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and
      * ifHCOutBroadcastPkts counters are increased when packets are not
--
2.32.0
  
Maxime Coquelin Oct. 15, 2021, 12:16 p.m. UTC | #2
Hi,

On 9/28/21 03:43, Gaoxiang Liu wrote:
> To improve performance in vhost Tx/Rx, merge vhost stats loop.
> eth_vhost_tx has 2 loop of send num iteraion.
> It can be merge into one.
> eth_vhost_rx has the same issue as Tx.
> 
> Fixes: 4d6cf2ac93dc ("net/vhost: add extended statistics")

Please remove the Fixes tag, this is an optimization, not a fix.

> 
> Signed-off-by: Gaoxiang Liu <gaoxiangliu0@163.com>
> ---
> 
> v2:
>   * Fix coding style issues.
> ---
>   drivers/net/vhost/rte_eth_vhost.c | 62 ++++++++++++++-----------------
>   1 file changed, 28 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
> index a202931e9a..a4129980f2 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -336,38 +336,29 @@ vhost_count_xcast_packets(struct vhost_queue *vq,
>   }
>   
>   static void
> -vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs,
> -			   uint16_t count, uint64_t nb_bytes,
> -			   uint64_t nb_missed)
> +vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf)

I tried to build without and with your patch, and I think that what can
explain most of the performance difference is that without your patch
the function is not inlined, whereas it is implicitely inlined with your
patch applied.

I agree with your patch, but I think we might add __rte_always_inline to
this function to make it explicit. What do you think?

Other than that:

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


>   {
>   	uint32_t pkt_len = 0;
> -	uint64_t i = 0;
>   	uint64_t index;
>   	struct vhost_stats *pstats = &vq->stats;
>   
> -	pstats->xstats[VHOST_BYTE] += nb_bytes;
> -	pstats->xstats[VHOST_MISSED_PKT] += nb_missed;
> -	pstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
> -
> -	for (i = 0; i < count ; i++) {
> -		pstats->xstats[VHOST_PKT]++;
> -		pkt_len = bufs[i]->pkt_len;
> -		if (pkt_len == 64) {
> -			pstats->xstats[VHOST_64_PKT]++;
> -		} else if (pkt_len > 64 && pkt_len < 1024) {
> -			index = (sizeof(pkt_len) * 8)
> -				- __builtin_clz(pkt_len) - 5;
> -			pstats->xstats[index]++;
> -		} else {
> -			if (pkt_len < 64)
> -				pstats->xstats[VHOST_UNDERSIZE_PKT]++;
> -			else if (pkt_len <= 1522)
> -				pstats->xstats[VHOST_1024_TO_1522_PKT]++;
> -			else if (pkt_len > 1522)
> -				pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
> -		}
> -		vhost_count_xcast_packets(vq, bufs[i]);
> +	pstats->xstats[VHOST_PKT]++;
> +	pkt_len = buf->pkt_len;
> +	if (pkt_len == 64) {
> +		pstats->xstats[VHOST_64_PKT]++;
> +	} else if (pkt_len > 64 && pkt_len < 1024) {
> +		index = (sizeof(pkt_len) * 8)
> +			- __builtin_clz(pkt_len) - 5;
> +		pstats->xstats[index]++;
> +	} else {
> +		if (pkt_len < 64)
> +			pstats->xstats[VHOST_UNDERSIZE_PKT]++;
> +		else if (pkt_len <= 1522)
> +			pstats->xstats[VHOST_1024_TO_1522_PKT]++;
> +		else if (pkt_len > 1522)
> +			pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
>   	}
> +	vhost_count_xcast_packets(vq, buf);
>   }
>   
>   static uint16_t
> @@ -376,7 +367,6 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>   	struct vhost_queue *r = q;
>   	uint16_t i, nb_rx = 0;
>   	uint16_t nb_receive = nb_bufs;
> -	uint64_t nb_bytes = 0;
>   
>   	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
>   		return 0;
> @@ -411,11 +401,11 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>   		if (r->internal->vlan_strip)
>   			rte_vlan_strip(bufs[i]);
>   
> -		nb_bytes += bufs[i]->pkt_len;
> -	}
> +		r->stats.bytes += bufs[i]->pkt_len;
> +		r->stats.xstats[VHOST_BYTE] += bufs[i]->pkt_len;
>   
> -	r->stats.bytes += nb_bytes;
> -	vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0);
> +		vhost_update_single_packet_xstats(r, bufs[i]);
> +	}
>   
>   out:
>   	rte_atomic32_set(&r->while_queuing, 0);
> @@ -471,16 +461,20 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>   			break;
>   	}
>   
> -	for (i = 0; likely(i < nb_tx); i++)
> +	for (i = 0; likely(i < nb_tx); i++) {
>   		nb_bytes += bufs[i]->pkt_len;
> +		vhost_update_single_packet_xstats(r, bufs[i]);
> +	}
>   
>   	nb_missed = nb_bufs - nb_tx;
>   
>   	r->stats.pkts += nb_tx;
>   	r->stats.bytes += nb_bytes;
> -	r->stats.missed_pkts += nb_bufs - nb_tx;
> +	r->stats.missed_pkts += nb_missed;
>   
> -	vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed);
> +	r->stats.xstats[VHOST_BYTE] += nb_bytes;
> +	r->stats.xstats[VHOST_MISSED_PKT] += nb_missed;
> +	r->stats.xstats[VHOST_UNICAST_PKT] += nb_missed;
>   
>   	/* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and
>   	 * ifHCOutBroadcastPkts counters are increased when packets are not
>
  
Gaoxiang Liu Oct. 16, 2021, 8:59 a.m. UTC | #3
Hi, Maxime
I agree with you.The inline should be added to vhost_update_single_packet_xstats function.
I will fix it in [PATCH v3].

Thanks,
Gaoxiang



发自 网易邮箱大师




---- 回复的原邮件 ----
| 发件人 | Maxime Coquelin<maxime.coquelin@redhat.com> |
| 日期 | 2021年10月15日 20:16 |
| 收件人 | Gaoxiang Liu<gaoxiangliu0@163.com>、chenbo.xia@intel.com<chenbo.xia@intel.com> |
| 抄送至 | dev@dpdk.org<dev@dpdk.org>、liugaoxiang@huawei.com<liugaoxiang@huawei.com> |
| 主题 | Re: [PATCH v2] net/vhost: merge vhost stats loop in vhost Tx/Rx |
Hi,

On 9/28/21 03:43, Gaoxiang Liu wrote:
> To improve performance in vhost Tx/Rx, merge vhost stats loop.
> eth_vhost_tx has 2 loop of send num iteraion.
> It can be merge into one.
> eth_vhost_rx has the same issue as Tx.
>
> Fixes: 4d6cf2ac93dc ("net/vhost: add extended statistics")

Please remove the Fixes tag, this is an optimization, not a fix.

>
> Signed-off-by: Gaoxiang Liu <gaoxiangliu0@163.com>
> ---
>
> v2:
>   * Fix coding style issues.
> ---
>   drivers/net/vhost/rte_eth_vhost.c | 62 ++++++++++++++-----------------
>   1 file changed, 28 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
> index a202931e9a..a4129980f2 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -336,38 +336,29 @@ vhost_count_xcast_packets(struct vhost_queue *vq,
>   }
>  
>   static void
> -vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs,
> -                  uint16_t count, uint64_t nb_bytes,
> -                  uint64_t nb_missed)
> +vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf)

I tried to build without and with your patch, and I think that what can
explain most of the performance difference is that without your patch
the function is not inlined, whereas it is implicitely inlined with your
patch applied.

I agree with your patch, but I think we might add __rte_always_inline to
this function to make it explicit. What do you think?

Other than that:

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime


>   {
>        uint32_t pkt_len = 0;
> -     uint64_t i = 0;
>        uint64_t index;
>        struct vhost_stats *pstats = &vq->stats;
>  
> -     pstats->xstats[VHOST_BYTE] += nb_bytes;
> -     pstats->xstats[VHOST_MISSED_PKT] += nb_missed;
> -     pstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
> -
> -     for (i = 0; i < count ; i++) {
> -          pstats->xstats[VHOST_PKT]++;
> -          pkt_len = bufs[i]->pkt_len;
> -          if (pkt_len == 64) {
> -               pstats->xstats[VHOST_64_PKT]++;
> -          } else if (pkt_len > 64 && pkt_len < 1024) {
> -               index = (sizeof(pkt_len) * 8)
> -                    - __builtin_clz(pkt_len) - 5;
> -               pstats->xstats[index]++;
> -          } else {
> -               if (pkt_len < 64)
> -                    pstats->xstats[VHOST_UNDERSIZE_PKT]++;
> -               else if (pkt_len <= 1522)
> -                    pstats->xstats[VHOST_1024_TO_1522_PKT]++;
> -               else if (pkt_len > 1522)
> -                    pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
> -          }
> -          vhost_count_xcast_packets(vq, bufs[i]);
> +     pstats->xstats[VHOST_PKT]++;
> +     pkt_len = buf->pkt_len;
> +     if (pkt_len == 64) {
> +          pstats->xstats[VHOST_64_PKT]++;
> +     } else if (pkt_len > 64 && pkt_len < 1024) {
> +          index = (sizeof(pkt_len) * 8)
> +               - __builtin_clz(pkt_len) - 5;
> +          pstats->xstats[index]++;
> +     } else {
> +          if (pkt_len < 64)
> +               pstats->xstats[VHOST_UNDERSIZE_PKT]++;
> +          else if (pkt_len <= 1522)
> +               pstats->xstats[VHOST_1024_TO_1522_PKT]++;
> +          else if (pkt_len > 1522)
> +               pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
>        }
> +     vhost_count_xcast_packets(vq, buf);
>   }
>  
>   static uint16_t
> @@ -376,7 +367,6 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>        struct vhost_queue *r = q;
>        uint16_t i, nb_rx = 0;
>        uint16_t nb_receive = nb_bufs;
> -     uint64_t nb_bytes = 0;
>  
>        if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
>             return 0;
> @@ -411,11 +401,11 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>             if (r->internal->vlan_strip)
>                  rte_vlan_strip(bufs[i]);
>  
> -          nb_bytes += bufs[i]->pkt_len;
> -     }
> +          r->stats.bytes += bufs[i]->pkt_len;
> +          r->stats.xstats[VHOST_BYTE] += bufs[i]->pkt_len;
>  
> -     r->stats.bytes += nb_bytes;
> -     vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0);
> +          vhost_update_single_packet_xstats(r, bufs[i]);
> +     }
>  
>   out:
>        rte_atomic32_set(&r->while_queuing, 0);
> @@ -471,16 +461,20 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>                  break;
>        }
>  
> -     for (i = 0; likely(i < nb_tx); i++)
> +     for (i = 0; likely(i < nb_tx); i++) {
>             nb_bytes += bufs[i]->pkt_len;
> +          vhost_update_single_packet_xstats(r, bufs[i]);
> +     }
>  
>        nb_missed = nb_bufs - nb_tx;
>  
>        r->stats.pkts += nb_tx;
>        r->stats.bytes += nb_bytes;
> -     r->stats.missed_pkts += nb_bufs - nb_tx;
> +     r->stats.missed_pkts += nb_missed;
>  
> -     vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed);
> +     r->stats.xstats[VHOST_BYTE] += nb_bytes;
> +     r->stats.xstats[VHOST_MISSED_PKT] += nb_missed;
> +     r->stats.xstats[VHOST_UNICAST_PKT] += nb_missed;
>  
>        /* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and
>         * ifHCOutBroadcastPkts counters are increased when packets are not
>
  

Patch

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index a202931e9a..a4129980f2 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -336,38 +336,29 @@  vhost_count_xcast_packets(struct vhost_queue *vq,
 }
 
 static void
-vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs,
-			   uint16_t count, uint64_t nb_bytes,
-			   uint64_t nb_missed)
+vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf)
 {
 	uint32_t pkt_len = 0;
-	uint64_t i = 0;
 	uint64_t index;
 	struct vhost_stats *pstats = &vq->stats;
 
-	pstats->xstats[VHOST_BYTE] += nb_bytes;
-	pstats->xstats[VHOST_MISSED_PKT] += nb_missed;
-	pstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
-
-	for (i = 0; i < count ; i++) {
-		pstats->xstats[VHOST_PKT]++;
-		pkt_len = bufs[i]->pkt_len;
-		if (pkt_len == 64) {
-			pstats->xstats[VHOST_64_PKT]++;
-		} else if (pkt_len > 64 && pkt_len < 1024) {
-			index = (sizeof(pkt_len) * 8)
-				- __builtin_clz(pkt_len) - 5;
-			pstats->xstats[index]++;
-		} else {
-			if (pkt_len < 64)
-				pstats->xstats[VHOST_UNDERSIZE_PKT]++;
-			else if (pkt_len <= 1522)
-				pstats->xstats[VHOST_1024_TO_1522_PKT]++;
-			else if (pkt_len > 1522)
-				pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
-		}
-		vhost_count_xcast_packets(vq, bufs[i]);
+	pstats->xstats[VHOST_PKT]++;
+	pkt_len = buf->pkt_len;
+	if (pkt_len == 64) {
+		pstats->xstats[VHOST_64_PKT]++;
+	} else if (pkt_len > 64 && pkt_len < 1024) {
+		index = (sizeof(pkt_len) * 8)
+			- __builtin_clz(pkt_len) - 5;
+		pstats->xstats[index]++;
+	} else {
+		if (pkt_len < 64)
+			pstats->xstats[VHOST_UNDERSIZE_PKT]++;
+		else if (pkt_len <= 1522)
+			pstats->xstats[VHOST_1024_TO_1522_PKT]++;
+		else if (pkt_len > 1522)
+			pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
 	}
+	vhost_count_xcast_packets(vq, buf);
 }
 
 static uint16_t
@@ -376,7 +367,6 @@  eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	struct vhost_queue *r = q;
 	uint16_t i, nb_rx = 0;
 	uint16_t nb_receive = nb_bufs;
-	uint64_t nb_bytes = 0;
 
 	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
 		return 0;
@@ -411,11 +401,11 @@  eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		if (r->internal->vlan_strip)
 			rte_vlan_strip(bufs[i]);
 
-		nb_bytes += bufs[i]->pkt_len;
-	}
+		r->stats.bytes += bufs[i]->pkt_len;
+		r->stats.xstats[VHOST_BYTE] += bufs[i]->pkt_len;
 
-	r->stats.bytes += nb_bytes;
-	vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0);
+		vhost_update_single_packet_xstats(r, bufs[i]);
+	}
 
 out:
 	rte_atomic32_set(&r->while_queuing, 0);
@@ -471,16 +461,20 @@  eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 			break;
 	}
 
-	for (i = 0; likely(i < nb_tx); i++)
+	for (i = 0; likely(i < nb_tx); i++) {
 		nb_bytes += bufs[i]->pkt_len;
+		vhost_update_single_packet_xstats(r, bufs[i]);
+	}
 
 	nb_missed = nb_bufs - nb_tx;
 
 	r->stats.pkts += nb_tx;
 	r->stats.bytes += nb_bytes;
-	r->stats.missed_pkts += nb_bufs - nb_tx;
+	r->stats.missed_pkts += nb_missed;
 
-	vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed);
+	r->stats.xstats[VHOST_BYTE] += nb_bytes;
+	r->stats.xstats[VHOST_MISSED_PKT] += nb_missed;
+	r->stats.xstats[VHOST_UNICAST_PKT] += nb_missed;
 
 	/* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and
 	 * ifHCOutBroadcastPkts counters are increased when packets are not