[v4,3/4] net/virtio: allocate fake mbuf in Rx queue

Message ID 20210316093825.478723-4-maxime.coquelin@redhat.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers
Series net/virtio: make virtqueue struct cache-friendly |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Maxime Coquelin March 16, 2021, 9:38 a.m. UTC
  While it is worth clarifying whether the fake mbuf
in virtnet_rx struct is really necessary, it is sure
that it heavily impacts cache usage by being part of
the struct. Indeed, it uses two cachelines, and
requires alignment on a cacheline.

Before this series, it means it took 120 bytes in
virtnet_rx struct:

struct virtnet_rx {
	struct virtqueue *         vq;                   /*     0     8 */

	/* XXX 56 bytes hole, try to pack */

	/* --- cacheline 1 boundary (64 bytes) --- */
	struct rte_mbuf            fake_mbuf __attribute__((__aligned__(64))); /*    64   128 */
	/* --- cacheline 3 boundary (192 bytes) --- */

This patch allocates it using malloc in order to optimize
virtnet_rx cache usage and so virtqueue cache usage.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: David Marchand <david.marchand@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c | 13 +++++++++++++
 drivers/net/virtio/virtio_rxtx.c   |  9 +++------
 drivers/net/virtio/virtio_rxtx.h   |  2 +-
 3 files changed, 17 insertions(+), 7 deletions(-)
  

Comments

Chenbo Xia March 17, 2021, 9:10 a.m. UTC | #1
Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, March 16, 2021 5:38 PM
> To: dev@dpdk.org; Xia, Chenbo <chenbo.xia@intel.com>; amorenoz@redhat.com;
> david.marchand@redhat.com; olivier.matz@6wind.com; bnemeth@redhat.com
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH v4 3/4] net/virtio: allocate fake mbuf in Rx queue
> 
> While it is worth clarifying whether the fake mbuf
> in virtnet_rx struct is really necessary, it is sure
> that it heavily impacts cache usage by being part of
> the struct. Indeed, it uses two cachelines, and
> requires alignment on a cacheline.
> 
> Before this series, it means it took 120 bytes in
> virtnet_rx struct:
> 
> struct virtnet_rx {
> 	struct virtqueue *         vq;                   /*     0     8 */
> 
> 	/* XXX 56 bytes hole, try to pack */
> 
> 	/* --- cacheline 1 boundary (64 bytes) --- */
> 	struct rte_mbuf            fake_mbuf __attribute__((__aligned__(64)));
> /*    64   128 */
> 	/* --- cacheline 3 boundary (192 bytes) --- */
> 
> This patch allocates it using malloc in order to optimize
> virtnet_rx cache usage and so virtqueue cache usage.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> Reviewed-by: David Marchand <david.marchand@redhat.com>
> ---
>  drivers/net/virtio/virtio_ethdev.c | 13 +++++++++++++
>  drivers/net/virtio/virtio_rxtx.c   |  9 +++------
>  drivers/net/virtio/virtio_rxtx.h   |  2 +-
>  3 files changed, 17 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index d5643733f7..be9faa3b6c 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -435,6 +435,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>  	int queue_type = virtio_get_queue_type(hw, queue_idx);
>  	int ret;
>  	int numa_node = dev->device->numa_node;
> +	struct rte_mbuf *fake_mbuf = NULL;
> 
>  	PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
>  			queue_idx, numa_node);
> @@ -550,10 +551,19 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>  			goto free_hdr_mz;
>  		}
> 
> +		fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
> +				RTE_CACHE_LINE_SIZE, numa_node);
> +		if (!fake_mbuf) {
> +			PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
> +			ret = -ENOMEM;
> +			goto free_sw_ring;
> +		}
> +
>  		vq->sw_ring = sw_ring;
>  		rxvq = &vq->rxq;
>  		rxvq->port_id = dev->data->port_id;
>  		rxvq->mz = mz;
> +		rxvq->fake_mbuf = fake_mbuf;
>  	} else if (queue_type == VTNET_TQ) {
>  		txvq = &vq->txq;
>  		txvq->port_id = dev->data->port_id;
> @@ -612,6 +622,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
> 
>  clean_vq:
>  	hw->cvq = NULL;
> +	rte_free(fake_mbuf);
> +free_sw_ring:
>  	rte_free(sw_ring);
>  free_hdr_mz:
>  	rte_memzone_free(hdr_mz);
> @@ -641,6 +653,7 @@ virtio_free_queues(struct virtio_hw *hw)
> 
>  		queue_type = virtio_get_queue_type(hw, i);
>  		if (queue_type == VTNET_RQ) {
> +			rte_free(vq->rxq.fake_mbuf);
>  			rte_free(vq->sw_ring);
>  			rte_memzone_free(vq->rxq.mz);
>  		} else if (queue_type == VTNET_TQ) {
> diff --git a/drivers/net/virtio/virtio_rxtx.c
> b/drivers/net/virtio/virtio_rxtx.c
> index 32af8d3d11..8df913b0ba 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -703,12 +703,9 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev,
> uint16_t queue_idx)
>  		virtio_rxq_vec_setup(rxvq);
>  	}
> 
> -	memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
> -	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
> -	     desc_idx++) {
> -		vq->sw_ring[vq->vq_nentries + desc_idx] =
> -			&rxvq->fake_mbuf;
> -	}
> +	memset(rxvq->fake_mbuf, 0, sizeof(*rxvq->fake_mbuf));
> +	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; desc_idx++)

I just notice that the macro 'RTE_PMD_VIRTIO_RX_MAX_BURST' and 'RTE_VIRTIO_VPMD_RX_BURST'
should always have the same value, so maybe better to make them into one macro later?

For this patch:

Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>

> +		vq->sw_ring[vq->vq_nentries + desc_idx] = rxvq->fake_mbuf;
> 
>  	if (hw->use_vec_rx && !virtio_with_packed_queue(hw)) {
>  		while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
> diff --git a/drivers/net/virtio/virtio_rxtx.h
> b/drivers/net/virtio/virtio_rxtx.h
> index 7f1036be6f..6ce5d67d15 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -19,7 +19,7 @@ struct virtnet_stats {
> 
>  struct virtnet_rx {
>  	/* dummy mbuf, for wraparound when processing RX ring. */
> -	struct rte_mbuf fake_mbuf;
> +	struct rte_mbuf *fake_mbuf;
>  	uint64_t mbuf_initializer; /**< value to init mbufs. */
>  	struct rte_mempool *mpool; /**< mempool for mbuf allocation */
> 
> --
> 2.29.2
  

Patch

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index d5643733f7..be9faa3b6c 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -435,6 +435,7 @@  virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
 	int queue_type = virtio_get_queue_type(hw, queue_idx);
 	int ret;
 	int numa_node = dev->device->numa_node;
+	struct rte_mbuf *fake_mbuf = NULL;
 
 	PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
 			queue_idx, numa_node);
@@ -550,10 +551,19 @@  virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
 			goto free_hdr_mz;
 		}
 
+		fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
+				RTE_CACHE_LINE_SIZE, numa_node);
+		if (!fake_mbuf) {
+			PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
+			ret = -ENOMEM;
+			goto free_sw_ring;
+		}
+
 		vq->sw_ring = sw_ring;
 		rxvq = &vq->rxq;
 		rxvq->port_id = dev->data->port_id;
 		rxvq->mz = mz;
+		rxvq->fake_mbuf = fake_mbuf;
 	} else if (queue_type == VTNET_TQ) {
 		txvq = &vq->txq;
 		txvq->port_id = dev->data->port_id;
@@ -612,6 +622,8 @@  virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
 
 clean_vq:
 	hw->cvq = NULL;
+	rte_free(fake_mbuf);
+free_sw_ring:
 	rte_free(sw_ring);
 free_hdr_mz:
 	rte_memzone_free(hdr_mz);
@@ -641,6 +653,7 @@  virtio_free_queues(struct virtio_hw *hw)
 
 		queue_type = virtio_get_queue_type(hw, i);
 		if (queue_type == VTNET_RQ) {
+			rte_free(vq->rxq.fake_mbuf);
 			rte_free(vq->sw_ring);
 			rte_memzone_free(vq->rxq.mz);
 		} else if (queue_type == VTNET_TQ) {
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 32af8d3d11..8df913b0ba 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -703,12 +703,9 @@  virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
 		virtio_rxq_vec_setup(rxvq);
 	}
 
-	memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
-	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
-	     desc_idx++) {
-		vq->sw_ring[vq->vq_nentries + desc_idx] =
-			&rxvq->fake_mbuf;
-	}
+	memset(rxvq->fake_mbuf, 0, sizeof(*rxvq->fake_mbuf));
+	for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; desc_idx++)
+		vq->sw_ring[vq->vq_nentries + desc_idx] = rxvq->fake_mbuf;
 
 	if (hw->use_vec_rx && !virtio_with_packed_queue(hw)) {
 		while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 7f1036be6f..6ce5d67d15 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -19,7 +19,7 @@  struct virtnet_stats {
 
 struct virtnet_rx {
 	/* dummy mbuf, for wraparound when processing RX ring. */
-	struct rte_mbuf fake_mbuf;
+	struct rte_mbuf *fake_mbuf;
 	uint64_t mbuf_initializer; /**< value to init mbufs. */
 	struct rte_mempool *mpool; /**< mempool for mbuf allocation */