[2/4] vdpa/mlx5: support direct HW notifications

Message ID 1585059877-2369-3-git-send-email-asafp@mellanox.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers
Series vdpa/mlx5: support direct notification |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Asaf Penso March 24, 2020, 2:24 p.m. UTC
  From: Matan Azrad <matan@mellanox.com>

Add support for the next 2 callbacks:
get_vfio_device_fd and get_notify_area.

This will allow direct HW doorbell ringing from guest and will save CPU
usage in host.

By this patch, the QEMU will map the physical address of the virtio
device in guest directly to the physical address of the HW device
doorbell.

The guest doorbell write is 2 bytes transaction while some Mellanox nics
support only 4 bytes transactions.

Remove ConnectX-5 and BF1 devices support which don't support 2B
doorbell writes for HW triggering.

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/vdpa/mlx5/mlx5_vdpa.c | 74 ++++++++++++++++++++++++++++++++++++-------
 drivers/vdpa/mlx5/mlx5_vdpa.h |  1 +
 2 files changed, 63 insertions(+), 12 deletions(-)
  

Comments

Maxime Coquelin April 15, 2020, 9:47 a.m. UTC | #1
On 3/24/20 3:24 PM, Asaf Penso wrote:
> From: Matan Azrad <matan@mellanox.com>
> 
> Add support for the next 2 callbacks:
> get_vfio_device_fd and get_notify_area.
> 
> This will allow direct HW doorbell ringing from guest and will save CPU
> usage in host.
> 
> By this patch, the QEMU will map the physical address of the virtio
> device in guest directly to the physical address of the HW device
> doorbell.
> 
> The guest doorbell write is 2 bytes transaction while some Mellanox nics
> support only 4 bytes transactions.
> 
> Remove ConnectX-5 and BF1 devices support which don't support 2B
> doorbell writes for HW triggering.

Couldn't we have different rte_vdpa_dev_ops depending on whether
doorbell write support?

> Signed-off-by: Matan Azrad <matan@mellanox.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
>  drivers/vdpa/mlx5/mlx5_vdpa.c | 74 ++++++++++++++++++++++++++++++++++++-------
>  drivers/vdpa/mlx5/mlx5_vdpa.h |  1 +
>  2 files changed, 63 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
> index 5542c29..4eb6abf 100644
> --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
> @@ -133,6 +133,29 @@
>  }
>  
>  static int
> +mlx5_vdpa_direct_db_prepare(struct mlx5_vdpa_priv *priv)
> +{
> +	int ret;
> +
> +	if (priv->direct_notifier) {
> +		ret = rte_vhost_host_notifier_ctrl(priv->vid, false);
> +		if (ret != 0) {
> +			DRV_LOG(INFO, "Direct HW notifier FD cannot be "
> +				"destroyed for device %d: %d.", priv->vid, ret);
> +			return -1;
> +		}
> +		priv->direct_notifier = 0;
> +	}
> +	ret = rte_vhost_host_notifier_ctrl(priv->vid, true);
> +	if (ret != 0)
> +		DRV_LOG(INFO, "Direct HW notifier FD cannot be configured for"
> +			" device %d: %d.", priv->vid, ret);
> +	else
> +		priv->direct_notifier = 1;
> +	return 0;
> +}
> +
> +static int
>  mlx5_vdpa_features_set(int vid)
>  {
>  	int did = rte_vhost_get_vdpa_device_id(vid);
> @@ -209,8 +232,9 @@
>  		return -1;
>  	}
>  	priv->vid = vid;
> -	if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_virtqs_prepare(priv) ||
> -	    mlx5_vdpa_steer_setup(priv) || mlx5_vdpa_cqe_event_setup(priv)) {
> +	if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_direct_db_prepare(priv) ||
> +	    mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
> +	    mlx5_vdpa_cqe_event_setup(priv)) {
>  		mlx5_vdpa_dev_close(vid);
>  		return -1;
>  	}
> @@ -218,6 +242,40 @@
>  	return 0;
>  }
>  
> +static int
> +mlx5_vdpa_get_device_fd(int vid)
> +{
> +	int did = rte_vhost_get_vdpa_device_id(vid);
> +	struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
> +
> +	if (priv == NULL) {
> +		DRV_LOG(ERR, "Invalid device id: %d.", did);
> +		return -EINVAL;
> +	}
> +	return priv->ctx->cmd_fd;
> +}
> +
> +static int
> +mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
> +{
> +	int did = rte_vhost_get_vdpa_device_id(vid);
> +	struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
> +
> +	RTE_SET_USED(qid);
> +	if (priv == NULL) {
> +		DRV_LOG(ERR, "Invalid device id: %d.", did);
> +		return -EINVAL;
> +	}
> +	if (!priv->var) {
> +		DRV_LOG(ERR, "VAR was not created for device %d, is the device"
> +			" configured?.", did);
> +		return -EINVAL;
> +	}
> +	*offset = priv->var->mmap_off;
> +	*size = priv->var->length;
> +	return 0;
> +}
> +
>  static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
>  	.get_queue_num = mlx5_vdpa_get_queue_num,
>  	.get_features = mlx5_vdpa_get_vdpa_features,
> @@ -228,8 +286,8 @@
>  	.set_features = mlx5_vdpa_features_set,
>  	.migration_done = NULL,
>  	.get_vfio_group_fd = NULL,
> -	.get_vfio_device_fd = NULL,
> -	.get_notify_area = NULL,
> +	.get_vfio_device_fd = mlx5_vdpa_get_device_fd,
> +	.get_notify_area = mlx5_vdpa_get_notify_area,
>  };
>  
>  static struct ibv_device *
> @@ -520,14 +578,6 @@
>  static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
>  	{
>  		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
> -			       PCI_DEVICE_ID_MELLANOX_CONNECTX5BF)
> -	},
> -	{
> -		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
> -			       PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF)
> -	},
> -	{
> -		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
>  				PCI_DEVICE_ID_MELLANOX_CONNECTX6)
>  	},
>  	{
> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
> index 3324c9d..75af410 100644
> --- a/drivers/vdpa/mlx5/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
> @@ -100,6 +100,7 @@ struct mlx5_vdpa_steer {
>  struct mlx5_vdpa_priv {
>  	TAILQ_ENTRY(mlx5_vdpa_priv) next;
>  	uint8_t configured;
> +	uint8_t direct_notifier; /* Whether direct notifier is on or off. */
>  	int id; /* vDPA device id. */
>  	int vid; /* vhost device id. */
>  	struct ibv_context *ctx; /* Device context. */
>
  
Maxime Coquelin April 17, 2020, 11:54 a.m. UTC | #2
On 4/15/20 11:47 AM, Maxime Coquelin wrote:
> 
> 
> On 3/24/20 3:24 PM, Asaf Penso wrote:
>> From: Matan Azrad <matan@mellanox.com>
>>
>> Add support for the next 2 callbacks:
>> get_vfio_device_fd and get_notify_area.
>>
>> This will allow direct HW doorbell ringing from guest and will save CPU
>> usage in host.
>>
>> By this patch, the QEMU will map the physical address of the virtio
>> device in guest directly to the physical address of the HW device
>> doorbell.
>>
>> The guest doorbell write is 2 bytes transaction while some Mellanox nics
>> support only 4 bytes transactions.
>>
>> Remove ConnectX-5 and BF1 devices support which don't support 2B
>> doorbell writes for HW triggering.
> 
> Couldn't we have different rte_vdpa_dev_ops depending on whether
> doorbell write support?

I'll take the patch so that it lands into -rc1.
But I would like to discuss the opportunity to have different dev_ops
based on the device IDs, so that BF can still be supported. Maybe that
could be done in -rc2?

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime
  
Matan Azrad April 26, 2020, 7:06 a.m. UTC | #3
From: Maxime Coquelin <maxime.coquelin@redhat.com>
> On 4/15/20 11:47 AM, Maxime Coquelin wrote:
> >
> >
> > On 3/24/20 3:24 PM, Asaf Penso wrote:
> >> From: Matan Azrad <matan@mellanox.com>
> >>
> >> Add support for the next 2 callbacks:
> >> get_vfio_device_fd and get_notify_area.
> >>
> >> This will allow direct HW doorbell ringing from guest and will save
> >> CPU usage in host.
> >>
> >> By this patch, the QEMU will map the physical address of the virtio
> >> device in guest directly to the physical address of the HW device
> >> doorbell.
> >>
> >> The guest doorbell write is 2 bytes transaction while some Mellanox
> >> nics support only 4 bytes transactions.
> >>
> >> Remove ConnectX-5 and BF1 devices support which don't support 2B
> >> doorbell writes for HW triggering.
> >
> > Couldn't we have different rte_vdpa_dev_ops depending on whether
> > doorbell write support?
> 
> I'll take the patch so that it lands into -rc1.
> But I would like to discuss the opportunity to have different dev_ops based
> on the device IDs, so that BF can still be supported. Maybe that could be
> done in -rc2?

Mellanox decided to remove BF1 from GA and from upstream support.
GA devices are CX6dx + BF2.

> 
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> 
> Thanks,
> Maxime
>
  
Maxime Coquelin April 27, 2020, 7:45 a.m. UTC | #4
On 4/26/20 9:06 AM, Matan Azrad wrote:
> 
> 
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> On 4/15/20 11:47 AM, Maxime Coquelin wrote:
>>>
>>>
>>> On 3/24/20 3:24 PM, Asaf Penso wrote:
>>>> From: Matan Azrad <matan@mellanox.com>
>>>>
>>>> Add support for the next 2 callbacks:
>>>> get_vfio_device_fd and get_notify_area.
>>>>
>>>> This will allow direct HW doorbell ringing from guest and will save
>>>> CPU usage in host.
>>>>
>>>> By this patch, the QEMU will map the physical address of the virtio
>>>> device in guest directly to the physical address of the HW device
>>>> doorbell.
>>>>
>>>> The guest doorbell write is 2 bytes transaction while some Mellanox
>>>> nics support only 4 bytes transactions.
>>>>
>>>> Remove ConnectX-5 and BF1 devices support which don't support 2B
>>>> doorbell writes for HW triggering.
>>>
>>> Couldn't we have different rte_vdpa_dev_ops depending on whether
>>> doorbell write support?
>>
>> I'll take the patch so that it lands into -rc1.
>> But I would like to discuss the opportunity to have different dev_ops based
>> on the device IDs, so that BF can still be supported. Maybe that could be
>> done in -rc2?
> 
> Mellanox decided to remove BF1 from GA and from upstream support.
> GA devices are CX6dx + BF2.

Thanks for the clarification.

Maxime

>>
>> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>
>> Thanks,
>> Maxime
>>
>
  

Patch

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 5542c29..4eb6abf 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -133,6 +133,29 @@ 
 }
 
 static int
+mlx5_vdpa_direct_db_prepare(struct mlx5_vdpa_priv *priv)
+{
+	int ret;
+
+	if (priv->direct_notifier) {
+		ret = rte_vhost_host_notifier_ctrl(priv->vid, false);
+		if (ret != 0) {
+			DRV_LOG(INFO, "Direct HW notifier FD cannot be "
+				"destroyed for device %d: %d.", priv->vid, ret);
+			return -1;
+		}
+		priv->direct_notifier = 0;
+	}
+	ret = rte_vhost_host_notifier_ctrl(priv->vid, true);
+	if (ret != 0)
+		DRV_LOG(INFO, "Direct HW notifier FD cannot be configured for"
+			" device %d: %d.", priv->vid, ret);
+	else
+		priv->direct_notifier = 1;
+	return 0;
+}
+
+static int
 mlx5_vdpa_features_set(int vid)
 {
 	int did = rte_vhost_get_vdpa_device_id(vid);
@@ -209,8 +232,9 @@ 
 		return -1;
 	}
 	priv->vid = vid;
-	if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_virtqs_prepare(priv) ||
-	    mlx5_vdpa_steer_setup(priv) || mlx5_vdpa_cqe_event_setup(priv)) {
+	if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_direct_db_prepare(priv) ||
+	    mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
+	    mlx5_vdpa_cqe_event_setup(priv)) {
 		mlx5_vdpa_dev_close(vid);
 		return -1;
 	}
@@ -218,6 +242,40 @@ 
 	return 0;
 }
 
+static int
+mlx5_vdpa_get_device_fd(int vid)
+{
+	int did = rte_vhost_get_vdpa_device_id(vid);
+	struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+
+	if (priv == NULL) {
+		DRV_LOG(ERR, "Invalid device id: %d.", did);
+		return -EINVAL;
+	}
+	return priv->ctx->cmd_fd;
+}
+
+static int
+mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
+{
+	int did = rte_vhost_get_vdpa_device_id(vid);
+	struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
+
+	RTE_SET_USED(qid);
+	if (priv == NULL) {
+		DRV_LOG(ERR, "Invalid device id: %d.", did);
+		return -EINVAL;
+	}
+	if (!priv->var) {
+		DRV_LOG(ERR, "VAR was not created for device %d, is the device"
+			" configured?.", did);
+		return -EINVAL;
+	}
+	*offset = priv->var->mmap_off;
+	*size = priv->var->length;
+	return 0;
+}
+
 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
 	.get_queue_num = mlx5_vdpa_get_queue_num,
 	.get_features = mlx5_vdpa_get_vdpa_features,
@@ -228,8 +286,8 @@ 
 	.set_features = mlx5_vdpa_features_set,
 	.migration_done = NULL,
 	.get_vfio_group_fd = NULL,
-	.get_vfio_device_fd = NULL,
-	.get_notify_area = NULL,
+	.get_vfio_device_fd = mlx5_vdpa_get_device_fd,
+	.get_notify_area = mlx5_vdpa_get_notify_area,
 };
 
 static struct ibv_device *
@@ -520,14 +578,6 @@ 
 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
 	{
 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
-			       PCI_DEVICE_ID_MELLANOX_CONNECTX5BF)
-	},
-	{
-		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
-			       PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF)
-	},
-	{
-		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
 				PCI_DEVICE_ID_MELLANOX_CONNECTX6)
 	},
 	{
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index 3324c9d..75af410 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -100,6 +100,7 @@  struct mlx5_vdpa_steer {
 struct mlx5_vdpa_priv {
 	TAILQ_ENTRY(mlx5_vdpa_priv) next;
 	uint8_t configured;
+	uint8_t direct_notifier; /* Whether direct notifier is on or off. */
 	int id; /* vDPA device id. */
 	int vid; /* vhost device id. */
 	struct ibv_context *ctx; /* Device context. */