[21.02,v2,2/2] net/virtio: fix memory init with vDPA backend
Checks
Commit Message
This patch fixes an overhead met with mlx5-vdpa Kernel
driver, where for every page in the mapped area, all the
memory tables gets updated. For example, with 2MB hugepages,
a single IOTLB_UPDATE for a 1GB region causes 512 memory
updates on mlx5-vdpa side.
Using batching mode, the mlx5 driver will only trigger a
single memory update for all the IOTLB updates that happen
between the batch begin and batch end commands.
Fixes: 6b901437056e ("net/virtio: introduce vhost-vDPA backend")
Cc: stable@dpdk.org
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
drivers/net/virtio/virtio_user/vhost.h | 4 +
drivers/net/virtio/virtio_user/vhost_vdpa.c | 106 +++++++++++++++++-
.../net/virtio/virtio_user/virtio_user_dev.c | 3 +-
3 files changed, 107 insertions(+), 6 deletions(-)
Comments
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Wednesday, November 25, 2020 11:21 PM
> To: dev@dpdk.org; Xia, Chenbo <chenbo.xia@intel.com>; amorenoz@redhat.com;
> jasowang@redhat.com; david.marchand@redhat.com
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>; stable@dpdk.org
> Subject: [PATCH 21.02 v2 2/2] net/virtio: fix memory init with vDPA backend
>
> This patch fixes an overhead met with mlx5-vdpa Kernel
> driver, where for every page in the mapped area, all the
> memory tables gets updated. For example, with 2MB hugepages,
> a single IOTLB_UPDATE for a 1GB region causes 512 memory
> updates on mlx5-vdpa side.
>
> Using batching mode, the mlx5 driver will only trigger a
> single memory update for all the IOTLB updates that happen
> between the batch begin and batch end commands.
>
> Fixes: 6b901437056e ("net/virtio: introduce vhost-vDPA backend")
> Cc: stable@dpdk.org
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> drivers/net/virtio/virtio_user/vhost.h | 4 +
> drivers/net/virtio/virtio_user/vhost_vdpa.c | 106 +++++++++++++++++-
> .../net/virtio/virtio_user/virtio_user_dev.c | 3 +-
> 3 files changed, 107 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/virtio/virtio_user/vhost.h
> b/drivers/net/virtio/virtio_user/vhost.h
> index c1dcc50b58..be286173b0 100644
> --- a/drivers/net/virtio/virtio_user/vhost.h
> +++ b/drivers/net/virtio/virtio_user/vhost.h
> @@ -90,6 +90,10 @@ enum vhost_user_request {
> #define VHOST_BACKEND_F_IOTLB_MSG_V2 1
> #endif
>
> +#ifndef VHOST_BACKEND_F_IOTLB_BATCH
> +#define VHOST_BACKEND_F_IOTLB_BATCH 2
> +#endif
> +
> extern const char * const vhost_msg_strings[VHOST_USER_MAX];
>
> struct vhost_memory_region {
> diff --git a/drivers/net/virtio/virtio_user/vhost_vdpa.c
> b/drivers/net/virtio/virtio_user/vhost_vdpa.c
> index b6c81d6f17..269bab2f8e 100644
> --- a/drivers/net/virtio/virtio_user/vhost_vdpa.c
> +++ b/drivers/net/virtio/virtio_user/vhost_vdpa.c
> @@ -70,6 +70,8 @@ struct vhost_iotlb_msg {
> #define VHOST_IOTLB_UPDATE 2
> #define VHOST_IOTLB_INVALIDATE 3
> #define VHOST_IOTLB_ACCESS_FAIL 4
> +#define VHOST_IOTLB_BATCH_BEGIN 5
> +#define VHOST_IOTLB_BATCH_END 6
> uint8_t type;
> };
>
> @@ -84,6 +86,56 @@ struct vhost_msg {
> };
> };
>
> +static int
> +vhost_vdpa_iotlb_batch_begin(struct virtio_user_dev *dev)
> +{
> + struct vhost_msg msg = {};
> +
> + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_BATCH)))
> + return 0;
> +
> + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)))
> {
> + PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
> + return -1;
> + }
> +
> + msg.type = VHOST_IOTLB_MSG_V2;
> + msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
> +
> + if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
> + PMD_DRV_LOG(ERR, "Failed to send IOTLB batch begin (%s)",
> + strerror(errno));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int
> +vhost_vdpa_iotlb_batch_end(struct virtio_user_dev *dev)
> +{
> + struct vhost_msg msg = {};
> +
> + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_BATCH)))
> + return 0;
> +
> + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)))
> {
> + PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
> + return -1;
> + }
> +
> + msg.type = VHOST_IOTLB_MSG_V2;
> + msg.iotlb.type = VHOST_IOTLB_BATCH_END;
> +
> + if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
> + PMD_DRV_LOG(ERR, "Failed to send IOTLB batch end (%s)",
> + strerror(errno));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> static int
> vhost_vdpa_dma_map(struct virtio_user_dev *dev, void *addr,
> uint64_t iova, size_t len)
> @@ -136,6 +188,39 @@ vhost_vdpa_dma_unmap(struct virtio_user_dev *dev,
> __rte_unused void *addr,
> return 0;
> }
>
> +static int
> +vhost_vdpa_dma_map_batch(struct virtio_user_dev *dev, void *addr,
> + uint64_t iova, size_t len)
> +{
> + int ret;
> +
> + if (vhost_vdpa_iotlb_batch_begin(dev) < 0)
> + return -1;
> +
> + ret = vhost_vdpa_dma_map(dev, addr, iova, len);
> +
> + if (vhost_vdpa_iotlb_batch_end(dev) < 0)
> + return -1;
> +
> + return ret;
> +}
> +
> +static int
> +vhost_vdpa_dma_unmap_batch(struct virtio_user_dev *dev, void *addr,
> + uint64_t iova, size_t len)
> +{
> + int ret;
> +
> + if (vhost_vdpa_iotlb_batch_begin(dev) < 0)
> + return -1;
> +
> + ret = vhost_vdpa_dma_unmap(dev, addr, iova, len);
> +
> + if (vhost_vdpa_iotlb_batch_end(dev) < 0)
> + return -1;
> +
> + return ret;
> +}
>
> static int
> vhost_vdpa_map_contig(const struct rte_memseg_list *msl,
> @@ -173,21 +258,32 @@ vhost_vdpa_map(const struct rte_memseg_list *msl, const
> struct rte_memseg *ms,
> static int
> vhost_vdpa_dma_map_all(struct virtio_user_dev *dev)
> {
> + int ret;
> +
> + if (vhost_vdpa_iotlb_batch_begin(dev) < 0)
> + return -1;
> +
> vhost_vdpa_dma_unmap(dev, NULL, 0, SIZE_MAX);
>
> if (rte_eal_iova_mode() == RTE_IOVA_VA) {
> /* with IOVA as VA mode, we can get away with mapping contiguous
> * chunks rather than going page-by-page.
> */
> - int ret = rte_memseg_contig_walk_thread_unsafe(
> + ret = rte_memseg_contig_walk_thread_unsafe(
> vhost_vdpa_map_contig, dev);
> if (ret)
> - return ret;
> + goto batch_end;
> /* we have to continue the walk because we've skipped the
> * external segments during the config walk.
> */
> }
> - return rte_memseg_walk_thread_unsafe(vhost_vdpa_map, dev);
> + ret = rte_memseg_walk_thread_unsafe(vhost_vdpa_map, dev);
> +
> +batch_end:
> + if (vhost_vdpa_iotlb_batch_end(dev) < 0)
> + return -1;
> +
> + return ret;
> }
>
> /* with below features, vhost vdpa does not need to do the checksum and TSO,
> @@ -307,6 +403,6 @@ struct virtio_user_backend_ops virtio_ops_vdpa = {
> .setup = vhost_vdpa_setup,
> .send_request = vhost_vdpa_ioctl,
> .enable_qp = vhost_vdpa_enable_queue_pair,
> - .dma_map = vhost_vdpa_dma_map,
> - .dma_unmap = vhost_vdpa_dma_unmap,
> + .dma_map = vhost_vdpa_dma_map_batch,
> + .dma_unmap = vhost_vdpa_dma_unmap_batch,
> };
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index 96bc6b232d..095deeea82 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -445,7 +445,8 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
> 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
>
> #define VHOST_VDPA_SUPPORTED_PROTOCOL_FEATURES \
> - (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
> + (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | \
> + 1ULL << VHOST_BACKEND_F_IOTLB_BATCH)
> int
> virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
> int cq, int queue_size, const char *mac, char **ifname,
> --
> 2.26.2
Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
@@ -90,6 +90,10 @@ enum vhost_user_request {
#define VHOST_BACKEND_F_IOTLB_MSG_V2 1
#endif
+#ifndef VHOST_BACKEND_F_IOTLB_BATCH
+#define VHOST_BACKEND_F_IOTLB_BATCH 2
+#endif
+
extern const char * const vhost_msg_strings[VHOST_USER_MAX];
struct vhost_memory_region {
@@ -70,6 +70,8 @@ struct vhost_iotlb_msg {
#define VHOST_IOTLB_UPDATE 2
#define VHOST_IOTLB_INVALIDATE 3
#define VHOST_IOTLB_ACCESS_FAIL 4
+#define VHOST_IOTLB_BATCH_BEGIN 5
+#define VHOST_IOTLB_BATCH_END 6
uint8_t type;
};
@@ -84,6 +86,56 @@ struct vhost_msg {
};
};
+static int
+vhost_vdpa_iotlb_batch_begin(struct virtio_user_dev *dev)
+{
+ struct vhost_msg msg = {};
+
+ if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_BATCH)))
+ return 0;
+
+ if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) {
+ PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
+ return -1;
+ }
+
+ msg.type = VHOST_IOTLB_MSG_V2;
+ msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
+
+ if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
+ PMD_DRV_LOG(ERR, "Failed to send IOTLB batch begin (%s)",
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+vhost_vdpa_iotlb_batch_end(struct virtio_user_dev *dev)
+{
+ struct vhost_msg msg = {};
+
+ if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_BATCH)))
+ return 0;
+
+ if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) {
+ PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
+ return -1;
+ }
+
+ msg.type = VHOST_IOTLB_MSG_V2;
+ msg.iotlb.type = VHOST_IOTLB_BATCH_END;
+
+ if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
+ PMD_DRV_LOG(ERR, "Failed to send IOTLB batch end (%s)",
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
static int
vhost_vdpa_dma_map(struct virtio_user_dev *dev, void *addr,
uint64_t iova, size_t len)
@@ -136,6 +188,39 @@ vhost_vdpa_dma_unmap(struct virtio_user_dev *dev, __rte_unused void *addr,
return 0;
}
+static int
+vhost_vdpa_dma_map_batch(struct virtio_user_dev *dev, void *addr,
+ uint64_t iova, size_t len)
+{
+ int ret;
+
+ if (vhost_vdpa_iotlb_batch_begin(dev) < 0)
+ return -1;
+
+ ret = vhost_vdpa_dma_map(dev, addr, iova, len);
+
+ if (vhost_vdpa_iotlb_batch_end(dev) < 0)
+ return -1;
+
+ return ret;
+}
+
+static int
+vhost_vdpa_dma_unmap_batch(struct virtio_user_dev *dev, void *addr,
+ uint64_t iova, size_t len)
+{
+ int ret;
+
+ if (vhost_vdpa_iotlb_batch_begin(dev) < 0)
+ return -1;
+
+ ret = vhost_vdpa_dma_unmap(dev, addr, iova, len);
+
+ if (vhost_vdpa_iotlb_batch_end(dev) < 0)
+ return -1;
+
+ return ret;
+}
static int
vhost_vdpa_map_contig(const struct rte_memseg_list *msl,
@@ -173,21 +258,32 @@ vhost_vdpa_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
static int
vhost_vdpa_dma_map_all(struct virtio_user_dev *dev)
{
+ int ret;
+
+ if (vhost_vdpa_iotlb_batch_begin(dev) < 0)
+ return -1;
+
vhost_vdpa_dma_unmap(dev, NULL, 0, SIZE_MAX);
if (rte_eal_iova_mode() == RTE_IOVA_VA) {
/* with IOVA as VA mode, we can get away with mapping contiguous
* chunks rather than going page-by-page.
*/
- int ret = rte_memseg_contig_walk_thread_unsafe(
+ ret = rte_memseg_contig_walk_thread_unsafe(
vhost_vdpa_map_contig, dev);
if (ret)
- return ret;
+ goto batch_end;
/* we have to continue the walk because we've skipped the
* external segments during the config walk.
*/
}
- return rte_memseg_walk_thread_unsafe(vhost_vdpa_map, dev);
+ ret = rte_memseg_walk_thread_unsafe(vhost_vdpa_map, dev);
+
+batch_end:
+ if (vhost_vdpa_iotlb_batch_end(dev) < 0)
+ return -1;
+
+ return ret;
}
/* with below features, vhost vdpa does not need to do the checksum and TSO,
@@ -307,6 +403,6 @@ struct virtio_user_backend_ops virtio_ops_vdpa = {
.setup = vhost_vdpa_setup,
.send_request = vhost_vdpa_ioctl,
.enable_qp = vhost_vdpa_enable_queue_pair,
- .dma_map = vhost_vdpa_dma_map,
- .dma_unmap = vhost_vdpa_dma_unmap,
+ .dma_map = vhost_vdpa_dma_map_batch,
+ .dma_unmap = vhost_vdpa_dma_unmap_batch,
};
@@ -445,7 +445,8 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
1ULL << VHOST_USER_PROTOCOL_F_STATUS)
#define VHOST_VDPA_SUPPORTED_PROTOCOL_FEATURES \
- (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
+ (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | \
+ 1ULL << VHOST_BACKEND_F_IOTLB_BATCH)
int
virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
int cq, int queue_size, const char *mac, char **ifname,