vhost: merge repeated loop in vhost Tx

Message ID 20210910090530.893-1-gaoxiangliu0@163.com (mailing list archive)
State Changes Requested, archived
Delegated to: Maxime Coquelin
Headers
Series vhost: merge repeated loop in vhost Tx |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/github-robot: build success github build: passed
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS

Commit Message

Gaoxiang Liu Sept. 10, 2021, 9:05 a.m. UTC
  To improve performance of vhost Tx, merge repeated loop in eth_vhost_tx.
Move "vlan insert" from eth_vhost_tx to virtio_dev_rx_packed
and virtio_dev_rx_split to reduce a loop iteration.

Fixes: f63d356ee993 ("net/vhost: insert/strip VLAN header in software")
Cc: stable@dpdk.org

Signed-off-by: Gaoxiang Liu <gaoxiangliu0@163.com>
---
 drivers/net/vhost/rte_eth_vhost.c | 25 ++++---------------------
 lib/vhost/virtio_net.c            | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 21 deletions(-)
  

Comments

Maxime Coquelin Sept. 23, 2021, 11:30 a.m. UTC | #1
On 9/10/21 11:05, Gaoxiang Liu wrote:
> To improve performance of vhost Tx, merge repeated loop in eth_vhost_tx.
> Move "vlan insert" from eth_vhost_tx to virtio_dev_rx_packed
> and virtio_dev_rx_split to reduce a loop iteration.
> 
> Fixes: f63d356ee993 ("net/vhost: insert/strip VLAN header in software")
> Cc: stable@dpdk.org

This kind of performance optimization should not be backported to stable
branches.

> 
> Signed-off-by: Gaoxiang Liu <gaoxiangliu0@163.com>
> ---
>   drivers/net/vhost/rte_eth_vhost.c | 25 ++++---------------------
>   lib/vhost/virtio_net.c            | 21 +++++++++++++++++++++
>   2 files changed, 25 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
> index a202931e9a..ae20550976 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -428,7 +428,6 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>   {
>   	struct vhost_queue *r = q;
>   	uint16_t i, nb_tx = 0;
> -	uint16_t nb_send = 0;
>   	uint64_t nb_bytes = 0;
>   	uint64_t nb_missed = 0;
>   
> @@ -440,33 +439,17 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>   	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
>   		goto out;
>   
> -	for (i = 0; i < nb_bufs; i++) {
> -		struct rte_mbuf *m = bufs[i];
> -
> -		/* Do VLAN tag insertion */
> -		if (m->ol_flags & PKT_TX_VLAN_PKT) {
> -			int error = rte_vlan_insert(&m);
> -			if (unlikely(error)) {
> -				rte_pktmbuf_free(m);
> -				continue;
> -			}
> -		}
> -
> -		bufs[nb_send] = m;
> -		++nb_send;
> -	}
> -
>   	/* Enqueue packets to guest RX queue */
> -	while (nb_send) {
> +	while (nb_bufs) {
>   		uint16_t nb_pkts;
> -		uint16_t num = (uint16_t)RTE_MIN(nb_send,
> +		uint16_t num = (uint16_t)RTE_MIN(nb_bufs,
>   						 VHOST_MAX_PKT_BURST);
>   
>   		nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
>   						  &bufs[nb_tx], num);
>   
>   		nb_tx += nb_pkts;
> -		nb_send -= nb_pkts;
> +		nb_bufs -= nb_pkts;
>   		if (nb_pkts < num)
>   			break;
>   	}
> @@ -474,7 +457,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>   	for (i = 0; likely(i < nb_tx); i++)
>   		nb_bytes += bufs[i]->pkt_len;
>   
> -	nb_missed = nb_bufs - nb_tx;
> +	nb_missed = nb_bufs;
>   
>   	r->stats.pkts += nb_tx;
>   	r->stats.bytes += nb_bytes;
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 8549afbbe1..2057f4e7fe 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -1218,6 +1218,16 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
>   		uint16_t nr_vec = 0;
>   
> +		/* Do VLAN tag insertion */
> +		if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
> +			int error = rte_vlan_insert(&pkts[pkt_idx]);
> +			if (unlikely(error)) {
> +				rte_pktmbuf_free(pkts[pkt_idx]);
> +				pkts[pkt_idx] = NULL;
> +				continue;
> +			}
> +		}
> +
>   		if (unlikely(reserve_avail_buf_split(dev, vq,
>   						pkt_len, buf_vec, &num_buffers,
>   						avail_head, &nr_vec) < 0)) {
> @@ -1490,6 +1500,17 @@ virtio_dev_rx_packed(struct virtio_net *dev,
>   	do {
>   		rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
>   
> +		/* Do VLAN tag insertion */
> +		if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
> +			int error = rte_vlan_insert(&pkts[pkt_idx]);
> +			if (unlikely(error)) {
> +				rte_pktmbuf_free(pkts[pkt_idx]);
> +				pkts[pkt_idx] = NULL;
> +				pkt_idx++;
> +				continue;
> +			}
> +		}
> +
>   		if (count - pkt_idx >= PACKED_BATCH_SIZE) {
>   			if (!virtio_dev_rx_sync_batch_packed(dev, vq,
>   							&pkts[pkt_idx])) {
> 

It would make sense to do that in virtio_enqueue_offload, and it would
avoid code duplication.

Regards,
Maxime
  

Patch

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index a202931e9a..ae20550976 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -428,7 +428,6 @@  eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
 	struct vhost_queue *r = q;
 	uint16_t i, nb_tx = 0;
-	uint16_t nb_send = 0;
 	uint64_t nb_bytes = 0;
 	uint64_t nb_missed = 0;
 
@@ -440,33 +439,17 @@  eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
 		goto out;
 
-	for (i = 0; i < nb_bufs; i++) {
-		struct rte_mbuf *m = bufs[i];
-
-		/* Do VLAN tag insertion */
-		if (m->ol_flags & PKT_TX_VLAN_PKT) {
-			int error = rte_vlan_insert(&m);
-			if (unlikely(error)) {
-				rte_pktmbuf_free(m);
-				continue;
-			}
-		}
-
-		bufs[nb_send] = m;
-		++nb_send;
-	}
-
 	/* Enqueue packets to guest RX queue */
-	while (nb_send) {
+	while (nb_bufs) {
 		uint16_t nb_pkts;
-		uint16_t num = (uint16_t)RTE_MIN(nb_send,
+		uint16_t num = (uint16_t)RTE_MIN(nb_bufs,
 						 VHOST_MAX_PKT_BURST);
 
 		nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
 						  &bufs[nb_tx], num);
 
 		nb_tx += nb_pkts;
-		nb_send -= nb_pkts;
+		nb_bufs -= nb_pkts;
 		if (nb_pkts < num)
 			break;
 	}
@@ -474,7 +457,7 @@  eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	for (i = 0; likely(i < nb_tx); i++)
 		nb_bytes += bufs[i]->pkt_len;
 
-	nb_missed = nb_bufs - nb_tx;
+	nb_missed = nb_bufs;
 
 	r->stats.pkts += nb_tx;
 	r->stats.bytes += nb_bytes;
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 8549afbbe1..2057f4e7fe 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1218,6 +1218,16 @@  virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
 		uint16_t nr_vec = 0;
 
+		/* Do VLAN tag insertion */
+		if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
+			int error = rte_vlan_insert(&pkts[pkt_idx]);
+			if (unlikely(error)) {
+				rte_pktmbuf_free(pkts[pkt_idx]);
+				pkts[pkt_idx] = NULL;
+				continue;
+			}
+		}
+
 		if (unlikely(reserve_avail_buf_split(dev, vq,
 						pkt_len, buf_vec, &num_buffers,
 						avail_head, &nr_vec) < 0)) {
@@ -1490,6 +1500,17 @@  virtio_dev_rx_packed(struct virtio_net *dev,
 	do {
 		rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
 
+		/* Do VLAN tag insertion */
+		if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
+			int error = rte_vlan_insert(&pkts[pkt_idx]);
+			if (unlikely(error)) {
+				rte_pktmbuf_free(pkts[pkt_idx]);
+				pkts[pkt_idx] = NULL;
+				pkt_idx++;
+				continue;
+			}
+		}
+
 		if (count - pkt_idx >= PACKED_BATCH_SIZE) {
 			if (!virtio_dev_rx_sync_batch_packed(dev, vq,
 							&pkts[pkt_idx])) {