[v4,3/4] net/virtio: allocate fake mbuf in Rx queue
Checks
Commit Message
While it is worth clarifying whether the fake mbuf
in virtnet_rx struct is really necessary, it is sure
that it heavily impacts cache usage by being part of
the struct. Indeed, it uses two cachelines, and
requires alignment on a cacheline.
Before this series, it means it took 120 bytes in
virtnet_rx struct:
struct virtnet_rx {
struct virtqueue * vq; /* 0 8 */
/* XXX 56 bytes hole, try to pack */
/* --- cacheline 1 boundary (64 bytes) --- */
struct rte_mbuf fake_mbuf __attribute__((__aligned__(64))); /* 64 128 */
/* --- cacheline 3 boundary (192 bytes) --- */
This patch allocates it using malloc in order to optimize
virtnet_rx cache usage and so virtqueue cache usage.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: David Marchand <david.marchand@redhat.com>
---
drivers/net/virtio/virtio_ethdev.c | 13 +++++++++++++
drivers/net/virtio/virtio_rxtx.c | 9 +++------
drivers/net/virtio/virtio_rxtx.h | 2 +-
3 files changed, 17 insertions(+), 7 deletions(-)
Comments
Hi Maxime,
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, March 16, 2021 5:38 PM
> To: dev@dpdk.org; Xia, Chenbo <chenbo.xia@intel.com>; amorenoz@redhat.com;
> david.marchand@redhat.com; olivier.matz@6wind.com; bnemeth@redhat.com
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [PATCH v4 3/4] net/virtio: allocate fake mbuf in Rx queue
>
> While it is worth clarifying whether the fake mbuf
> in virtnet_rx struct is really necessary, it is sure
> that it heavily impacts cache usage by being part of
> the struct. Indeed, it uses two cachelines, and
> requires alignment on a cacheline.
>
> Before this series, it means it took 120 bytes in
> virtnet_rx struct:
>
> struct virtnet_rx {
> struct virtqueue * vq; /* 0 8 */
>
> /* XXX 56 bytes hole, try to pack */
>
> /* --- cacheline 1 boundary (64 bytes) --- */
> struct rte_mbuf fake_mbuf __attribute__((__aligned__(64)));
> /* 64 128 */
> /* --- cacheline 3 boundary (192 bytes) --- */
>
> This patch allocates it using malloc in order to optimize
> virtnet_rx cache usage and so virtqueue cache usage.
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> Reviewed-by: David Marchand <david.marchand@redhat.com>
> ---
> drivers/net/virtio/virtio_ethdev.c | 13 +++++++++++++
> drivers/net/virtio/virtio_rxtx.c | 9 +++------
> drivers/net/virtio/virtio_rxtx.h | 2 +-
> 3 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index d5643733f7..be9faa3b6c 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -435,6 +435,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
> int queue_type = virtio_get_queue_type(hw, queue_idx);
> int ret;
> int numa_node = dev->device->numa_node;
> + struct rte_mbuf *fake_mbuf = NULL;
>
> PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
> queue_idx, numa_node);
> @@ -550,10 +551,19 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
> goto free_hdr_mz;
> }
>
> + fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
> + RTE_CACHE_LINE_SIZE, numa_node);
> + if (!fake_mbuf) {
> + PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
> + ret = -ENOMEM;
> + goto free_sw_ring;
> + }
> +
> vq->sw_ring = sw_ring;
> rxvq = &vq->rxq;
> rxvq->port_id = dev->data->port_id;
> rxvq->mz = mz;
> + rxvq->fake_mbuf = fake_mbuf;
> } else if (queue_type == VTNET_TQ) {
> txvq = &vq->txq;
> txvq->port_id = dev->data->port_id;
> @@ -612,6 +622,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>
> clean_vq:
> hw->cvq = NULL;
> + rte_free(fake_mbuf);
> +free_sw_ring:
> rte_free(sw_ring);
> free_hdr_mz:
> rte_memzone_free(hdr_mz);
> @@ -641,6 +653,7 @@ virtio_free_queues(struct virtio_hw *hw)
>
> queue_type = virtio_get_queue_type(hw, i);
> if (queue_type == VTNET_RQ) {
> + rte_free(vq->rxq.fake_mbuf);
> rte_free(vq->sw_ring);
> rte_memzone_free(vq->rxq.mz);
> } else if (queue_type == VTNET_TQ) {
> diff --git a/drivers/net/virtio/virtio_rxtx.c
> b/drivers/net/virtio/virtio_rxtx.c
> index 32af8d3d11..8df913b0ba 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -703,12 +703,9 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev,
> uint16_t queue_idx)
> virtio_rxq_vec_setup(rxvq);
> }
>
> - memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
> - for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
> - desc_idx++) {
> - vq->sw_ring[vq->vq_nentries + desc_idx] =
> - &rxvq->fake_mbuf;
> - }
> + memset(rxvq->fake_mbuf, 0, sizeof(*rxvq->fake_mbuf));
> + for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; desc_idx++)
I just notice that the macro 'RTE_PMD_VIRTIO_RX_MAX_BURST' and 'RTE_VIRTIO_VPMD_RX_BURST'
should always have the same value, so maybe better to make them into one macro later?
For this patch:
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
> + vq->sw_ring[vq->vq_nentries + desc_idx] = rxvq->fake_mbuf;
>
> if (hw->use_vec_rx && !virtio_with_packed_queue(hw)) {
> while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
> diff --git a/drivers/net/virtio/virtio_rxtx.h
> b/drivers/net/virtio/virtio_rxtx.h
> index 7f1036be6f..6ce5d67d15 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -19,7 +19,7 @@ struct virtnet_stats {
>
> struct virtnet_rx {
> /* dummy mbuf, for wraparound when processing RX ring. */
> - struct rte_mbuf fake_mbuf;
> + struct rte_mbuf *fake_mbuf;
> uint64_t mbuf_initializer; /**< value to init mbufs. */
> struct rte_mempool *mpool; /**< mempool for mbuf allocation */
>
> --
> 2.29.2
@@ -435,6 +435,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
int queue_type = virtio_get_queue_type(hw, queue_idx);
int ret;
int numa_node = dev->device->numa_node;
+ struct rte_mbuf *fake_mbuf = NULL;
PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
queue_idx, numa_node);
@@ -550,10 +551,19 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
goto free_hdr_mz;
}
+ fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
+ RTE_CACHE_LINE_SIZE, numa_node);
+ if (!fake_mbuf) {
+ PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
+ ret = -ENOMEM;
+ goto free_sw_ring;
+ }
+
vq->sw_ring = sw_ring;
rxvq = &vq->rxq;
rxvq->port_id = dev->data->port_id;
rxvq->mz = mz;
+ rxvq->fake_mbuf = fake_mbuf;
} else if (queue_type == VTNET_TQ) {
txvq = &vq->txq;
txvq->port_id = dev->data->port_id;
@@ -612,6 +622,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
clean_vq:
hw->cvq = NULL;
+ rte_free(fake_mbuf);
+free_sw_ring:
rte_free(sw_ring);
free_hdr_mz:
rte_memzone_free(hdr_mz);
@@ -641,6 +653,7 @@ virtio_free_queues(struct virtio_hw *hw)
queue_type = virtio_get_queue_type(hw, i);
if (queue_type == VTNET_RQ) {
+ rte_free(vq->rxq.fake_mbuf);
rte_free(vq->sw_ring);
rte_memzone_free(vq->rxq.mz);
} else if (queue_type == VTNET_TQ) {
@@ -703,12 +703,9 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
virtio_rxq_vec_setup(rxvq);
}
- memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
- for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
- desc_idx++) {
- vq->sw_ring[vq->vq_nentries + desc_idx] =
- &rxvq->fake_mbuf;
- }
+ memset(rxvq->fake_mbuf, 0, sizeof(*rxvq->fake_mbuf));
+ for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; desc_idx++)
+ vq->sw_ring[vq->vq_nentries + desc_idx] = rxvq->fake_mbuf;
if (hw->use_vec_rx && !virtio_with_packed_queue(hw)) {
while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
@@ -19,7 +19,7 @@ struct virtnet_stats {
struct virtnet_rx {
/* dummy mbuf, for wraparound when processing RX ring. */
- struct rte_mbuf fake_mbuf;
+ struct rte_mbuf *fake_mbuf;
uint64_t mbuf_initializer; /**< value to init mbufs. */
struct rte_mempool *mpool; /**< mempool for mbuf allocation */