[v2,5/7] net/mlx5: add port representor awareness
diff mbox series

Message ID 20180614083047.10812-6-adrien.mazarguil@6wind.com
State Superseded, archived
Headers show
Series
  • net/mlx5: add port representor support
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply issues

Commit Message

Adrien Mazarguil June 14, 2018, 8:34 a.m. UTC
The current PCI probing method is not aware of Verbs port representors,
which appear as standard Verbs devices bound to the same PCI address and
cannot be distinguished.

Problem is that more often than not, the wrong Verbs device is used,
resulting in unexpected traffic.

This patch adds necessary heuristics to bind affected driver instances to
the intended (i.e. non-representor) device.

(Patch based on prior work from Yuanhan Liu)

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
--
v2 changes:

- Fixed digit detection in mlx5_cmp_ibv_name() so that "foo1" and "foo10"
  are compared on the integer conversion of "1" against "10" instead of ""
  and "0".
---
 drivers/net/mlx5/mlx5.c | 66 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 5 deletions(-)

Comments

Xueming Li June 16, 2018, 8:37 a.m. UTC | #1
Reviewed-by: Xueming Li <xuemingl@mellanox.com>

One minor issue we should be able to ignore.

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Adrien Mazarguil
> Sent: Thursday, June 14, 2018 4:35 PM
> To: Shahaf Shuler <shahafs@mellanox.com>
> Cc: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2 5/7] net/mlx5: add port representor awareness
> 
> The current PCI probing method is not aware of Verbs port representors, which appear as standard Verbs
> devices bound to the same PCI address and cannot be distinguished.
> 
> Problem is that more often than not, the wrong Verbs device is used, resulting in unexpected traffic.
> 
> This patch adds necessary heuristics to bind affected driver instances to the intended (i.e. non-
> representor) device.
> 
> (Patch based on prior work from Yuanhan Liu)
> 
> Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> --
> v2 changes:
> 
> - Fixed digit detection in mlx5_cmp_ibv_name() so that "foo1" and "foo10"
>   are compared on the integer conversion of "1" against "10" instead of ""
>   and "0".
> ---
>  drivers/net/mlx5/mlx5.c | 66 ++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 61 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index c9815d721..498f80c89 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -3,6 +3,7 @@
>   * Copyright 2015 Mellanox Technologies, Ltd
>   */
> 
> +#include <ctype.h>
>  #include <stddef.h>
>  #include <unistd.h>
>  #include <string.h>
> @@ -1170,6 +1171,34 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,  }
> 
>  /**
> + * Comparison callback to sort Verbs device names.
> + *
> + * This is meant to be used with qsort().
> + *
> + * @param a[in]
> + *   Pointer to pointer to first Verbs device.
> + * @param b[in]
> + *   Pointer to pointer to second Verbs device.
> + *
> + * @return
> + *   0 if both names are equal, less than 0 if the first argument is less
> + *   than the second, greater than 0 otherwise.
> + */
> +static int
> +mlx5_cmp_ibv_name(const void *a, const void *b) {
> +	const char *name_a = (*(const struct ibv_device *const *)a)->name;
> +	const char *name_b = (*(const struct ibv_device *const *)b)->name;
> +	size_t i = 0;
> +
> +	while (name_a[i] && name_a[i] == name_b[i])
> +		++i;
> +	while (i && isdigit(name_a[i - 1]) && isdigit(name_b[i - 1]))

name_a[i - 1] and name_b[i - 1] must be same here.

> +		--i;
> +	return atoi(name_a + i) - atoi(name_b + i); }
> +
> +/**
>   * DPDK callback to register a PCI device.
>   *
>   * This function creates an Ethernet device for each port of a given @@ -1189,6 +1218,7 @@
> mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,  {
>  	struct ibv_device **ibv_list;
>  	struct rte_eth_dev **eth_list = NULL;
> +	int n = 0;
>  	int vf;
>  	int ret;
> 
> @@ -1210,6 +1240,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>  		DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?");
>  		return -rte_errno;
>  	}
> +
> +	struct ibv_device *ibv_match[ret + 1];
> +
>  	while (ret-- > 0) {
>  		struct rte_pci_addr pci_addr;
> 
> @@ -1221,14 +1254,37 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>  		    pci_dev->addr.devid != pci_addr.devid ||
>  		    pci_dev->addr.function != pci_addr.function)
>  			continue;
> -		DRV_LOG(INFO, "PCI information matches, using device \"%s\"",
> +		DRV_LOG(INFO, "PCI information matches for device \"%s\"",
>  			ibv_list[ret]->name);
> -		break;
> +		ibv_match[n++] = ibv_list[ret];
> +	}
> +	ibv_match[n] = NULL;
> +	if (n > 1) {
> +		/*
> +		 * The existence of several matching entries means port
> +		 * representors have been instantiated. No existing Verbs
> +		 * call nor /sys entries can tell them apart at this point.
> +		 *
> +		 * While definitely hackish, assume their names are numbered
> +		 * based on order of creation with master device first,
> +		 * followed by first port representor, followed by the
> +		 * second one and so on.
> +		 */
> +		DRV_LOG(WARNING,
> +			"probing device with port representors involves"
> +			" heuristics with uncertain outcome");
> +		qsort(ibv_match, n, sizeof(*ibv_match), mlx5_cmp_ibv_name);
> +		DRV_LOG(WARNING, "assuming \"%s\" is the master device",
> +			ibv_match[0]->name);
> +		for (ret = 1; ret < n; ++ret)
> +			DRV_LOG(WARNING,
> +				"assuming \"%s\" is port representor #%d",
> +				ibv_match[ret]->name, ret - 1);
>  	}
> -	if (ret >= 0)
> -		eth_list = mlx5_dev_spawn(&pci_dev->device, ibv_list[ret], vf);
> +	if (n)
> +		eth_list = mlx5_dev_spawn(&pci_dev->device, ibv_match[0], vf);
>  	mlx5_glue->free_device_list(ibv_list);
> -	if (!ret) {
> +	if (!n) {
>  		DRV_LOG(WARNING,
>  			"no Verbs device matches PCI device " PCI_PRI_FMT ","
>  			" are kernel drivers loaded?",
> --
> 2.11.0
Adrien Mazarguil June 27, 2018, 1:32 p.m. UTC | #2
On Sat, Jun 16, 2018 at 08:37:14AM +0000, Xueming(Steven) Li wrote:
> Reviewed-by: Xueming Li <xuemingl@mellanox.com>
> 
> One minor issue we should be able to ignore.
<snip>
> > +static int
> > +mlx5_cmp_ibv_name(const void *a, const void *b) {
> > +	const char *name_a = (*(const struct ibv_device *const *)a)->name;
> > +	const char *name_b = (*(const struct ibv_device *const *)b)->name;
> > +	size_t i = 0;
> > +
> > +	while (name_a[i] && name_a[i] == name_b[i])
> > +		++i;
> > +	while (i && isdigit(name_a[i - 1]) && isdigit(name_b[i - 1]))
> 
> name_a[i - 1] and name_b[i - 1] must be same here.

Indeed, I'll simplify it in v3, thanks.

Patch
diff mbox series

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index c9815d721..498f80c89 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -3,6 +3,7 @@ 
  * Copyright 2015 Mellanox Technologies, Ltd
  */
 
+#include <ctype.h>
 #include <stddef.h>
 #include <unistd.h>
 #include <string.h>
@@ -1170,6 +1171,34 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 }
 
 /**
+ * Comparison callback to sort Verbs device names.
+ *
+ * This is meant to be used with qsort().
+ *
+ * @param a[in]
+ *   Pointer to pointer to first Verbs device.
+ * @param b[in]
+ *   Pointer to pointer to second Verbs device.
+ *
+ * @return
+ *   0 if both names are equal, less than 0 if the first argument is less
+ *   than the second, greater than 0 otherwise.
+ */
+static int
+mlx5_cmp_ibv_name(const void *a, const void *b)
+{
+	const char *name_a = (*(const struct ibv_device *const *)a)->name;
+	const char *name_b = (*(const struct ibv_device *const *)b)->name;
+	size_t i = 0;
+
+	while (name_a[i] && name_a[i] == name_b[i])
+		++i;
+	while (i && isdigit(name_a[i - 1]) && isdigit(name_b[i - 1]))
+		--i;
+	return atoi(name_a + i) - atoi(name_b + i);
+}
+
+/**
  * DPDK callback to register a PCI device.
  *
  * This function creates an Ethernet device for each port of a given
@@ -1189,6 +1218,7 @@  mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 {
 	struct ibv_device **ibv_list;
 	struct rte_eth_dev **eth_list = NULL;
+	int n = 0;
 	int vf;
 	int ret;
 
@@ -1210,6 +1240,9 @@  mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?");
 		return -rte_errno;
 	}
+
+	struct ibv_device *ibv_match[ret + 1];
+
 	while (ret-- > 0) {
 		struct rte_pci_addr pci_addr;
 
@@ -1221,14 +1254,37 @@  mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		    pci_dev->addr.devid != pci_addr.devid ||
 		    pci_dev->addr.function != pci_addr.function)
 			continue;
-		DRV_LOG(INFO, "PCI information matches, using device \"%s\"",
+		DRV_LOG(INFO, "PCI information matches for device \"%s\"",
 			ibv_list[ret]->name);
-		break;
+		ibv_match[n++] = ibv_list[ret];
+	}
+	ibv_match[n] = NULL;
+	if (n > 1) {
+		/*
+		 * The existence of several matching entries means port
+		 * representors have been instantiated. No existing Verbs
+		 * call nor /sys entries can tell them apart at this point.
+		 *
+		 * While definitely hackish, assume their names are numbered
+		 * based on order of creation with master device first,
+		 * followed by first port representor, followed by the
+		 * second one and so on.
+		 */
+		DRV_LOG(WARNING,
+			"probing device with port representors involves"
+			" heuristics with uncertain outcome");
+		qsort(ibv_match, n, sizeof(*ibv_match), mlx5_cmp_ibv_name);
+		DRV_LOG(WARNING, "assuming \"%s\" is the master device",
+			ibv_match[0]->name);
+		for (ret = 1; ret < n; ++ret)
+			DRV_LOG(WARNING,
+				"assuming \"%s\" is port representor #%d",
+				ibv_match[ret]->name, ret - 1);
 	}
-	if (ret >= 0)
-		eth_list = mlx5_dev_spawn(&pci_dev->device, ibv_list[ret], vf);
+	if (n)
+		eth_list = mlx5_dev_spawn(&pci_dev->device, ibv_match[0], vf);
 	mlx5_glue->free_device_list(ibv_list);
-	if (!ret) {
+	if (!n) {
 		DRV_LOG(WARNING,
 			"no Verbs device matches PCI device " PCI_PRI_FMT ","
 			" are kernel drivers loaded?",