[dpdk-dev,v2] devargs: add blacklisting by linux interface name

Message ID 1444058768-9208-1-git-send-email-3chas3@gmail.com (mailing list archive)
State Changes Requested, archived
Headers

Commit Message

Chas Williams Oct. 5, 2015, 3:26 p.m. UTC
  If a system is using deterministic interface names, it may be easier in
some cases to use the interface name to blacklist an interface.

Signed-off-by: Chas Williams <3chas3@gmail.com>
---
 app/test/test_devargs.c                     |  2 ++
 lib/librte_eal/common/eal_common_devargs.c  |  9 +++++++--
 lib/librte_eal/common/eal_common_options.c  |  2 +-
 lib/librte_eal/common/eal_common_pci.c      | 10 ++++++++--
 lib/librte_eal/common/include/rte_devargs.h |  2 ++
 lib/librte_eal/common/include/rte_pci.h     |  1 +
 lib/librte_eal/linuxapp/eal/eal_pci.c       | 15 +++++++++++++++
 7 files changed, 36 insertions(+), 5 deletions(-)
  

Comments

Stephen Hemminger Oct. 6, 2015, 7:35 a.m. UTC | #1
On Mon,  5 Oct 2015 11:26:08 -0400
Chas Williams <3chas3@gmail.com> wrote:

> diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
> index 83e3c28..852c149 100644
> --- a/lib/librte_eal/common/include/rte_pci.h
> +++ b/lib/librte_eal/common/include/rte_pci.h
> @@ -161,6 +161,7 @@ struct rte_pci_device {
>  	struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE];   /**< PCI Memory Resource */
>  	struct rte_intr_handle intr_handle;     /**< Interrupt handle */
>  	struct rte_pci_driver *driver;          /**< Associated driver */
> +	char name[32];

Why not use IFNAMSIZ rather than magic constant here?
  
Chas Williams Oct. 6, 2015, 2:41 p.m. UTC | #2
On Tue, 2015-10-06 at 08:35 +0100, Stephen Hemminger wrote:
> On Mon,  5 Oct 2015 11:26:08 -0400
> Chas Williams <3chas3@gmail.com> wrote:
> 
> > diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
> > index 83e3c28..852c149 100644
> > --- a/lib/librte_eal/common/include/rte_pci.h
> > +++ b/lib/librte_eal/common/include/rte_pci.h
> > @@ -161,6 +161,7 @@ struct rte_pci_device {
> >  	struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE];   /**< PCI Memory Resource */
> >  	struct rte_intr_handle intr_handle;     /**< Interrupt handle */
> >  	struct rte_pci_driver *driver;          /**< Associated driver */
> > +	char name[32];
> 
> Why not use IFNAMSIZ rather than magic constant here?

No particular reason.  It just matches the virtual device name size.
I will change it.
  
Olivier Matz Oct. 13, 2015, 12:49 p.m. UTC | #3
Hi Chas,

On 10/05/2015 05:26 PM, Chas Williams wrote:
> If a system is using deterministic interface names, it may be easier in
> some cases to use the interface name to blacklist an interface.
> 
> Signed-off-by: Chas Williams <3chas3@gmail.com>
> ---
>  app/test/test_devargs.c                     |  2 ++
>  lib/librte_eal/common/eal_common_devargs.c  |  9 +++++++--
>  lib/librte_eal/common/eal_common_options.c  |  2 +-
>  lib/librte_eal/common/eal_common_pci.c      | 10 ++++++++--
>  lib/librte_eal/common/include/rte_devargs.h |  2 ++
>  lib/librte_eal/common/include/rte_pci.h     |  1 +
>  lib/librte_eal/linuxapp/eal/eal_pci.c       | 15 +++++++++++++++
>  7 files changed, 36 insertions(+), 5 deletions(-)
> 
> diff --git a/app/test/test_devargs.c b/app/test/test_devargs.c
> index f7fc59c..27855ff 100644

> 
> [...]
> 

> @@ -352,6 +354,19 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
>  		return -1;
>  	}
>  
> +	/* get network interface name */
> +	snprintf(filename, sizeof(filename), "%s/net", dirname);
> +	dir = opendir(filename);
> +	if (dir) {
> +		while ((e = readdir(dir)) != NULL) {
> +			if (e->d_name[0] == '.')
> +				continue;
> +
> +			strncpy(dev->name, e->d_name, sizeof(dev->name));
> +		}
> +		closedir(dir);
> +	}
> +
>  	if (!ret) {
>  		if (!strcmp(driver, "vfio-pci"))
>  			dev->kdrv = RTE_KDRV_VFIO;
> 

For PCI devices that have several interfaces (I think it's the case for
some Mellanox boards), maybe we should not store the interface name?

Another small comment about the strncpy(): it's maybe safer to ensure
that dev->name is properly nul-terminated.

Regards,
Olivier
  
Chas Williams Oct. 14, 2015, 1:41 p.m. UTC | #4
On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:
> Hi Chas,
> 
> > @@ -352,6 +354,19 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
> >  		return -1;
> >  	}
> >  
> > +	/* get network interface name */
> > +	snprintf(filename, sizeof(filename), "%s/net", dirname);
> > +	dir = opendir(filename);
> > +	if (dir) {
> > +		while ((e = readdir(dir)) != NULL) {
> > +			if (e->d_name[0] == '.')
> > +				continue;
> > +
> > +			strncpy(dev->name, e->d_name, sizeof(dev->name));
> > +		}
> > +		closedir(dir);
> > +	}
> > +
> >  	if (!ret) {
> >  		if (!strcmp(driver, "vfio-pci"))
> >  			dev->kdrv = RTE_KDRV_VFIO;
> > 
> 
> For PCI devices that have several interfaces (I think it's the case for
> some Mellanox boards), maybe we should not store the interface name?

I am not sure what you mean here.  If a device has multiple ethernet
interfaces, then it should a have seperate PCI device address space for
each interface (I dont know of any DPDK drivers that don't make this
assumption as well).  If the device is multiprotocol, say Infiniband,
the device might have a net/ subdirectory, but it will be called something
like ib0 which you might want to blacklist for some reason.

> Another small comment about the strncpy(): it's maybe safer to ensure
> that dev->name is properly nul-terminated.

A good idea but it shouldn't happen in practice since dev.name will
be IFNAMSIZ.  I will fix it in the next version.
  
Thomas Monjalon Nov. 4, 2015, 10:40 p.m. UTC | #5
2015-10-14 09:41, Charles  Williams:
> On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:
> > For PCI devices that have several interfaces (I think it's the case for
> > some Mellanox boards), maybe we should not store the interface name?
> 
> I am not sure what you mean here.  If a device has multiple ethernet
> interfaces, then it should a have seperate PCI device address space for
> each interface (I dont know of any DPDK drivers that don't make this
> assumption as well).

mlx4 and cxgbe?
  
Chas Williams Nov. 5, 2015, 4:39 p.m. UTC | #6
On Wed, 2015-11-04 at 23:40 +0100, Thomas Monjalon wrote:
> 2015-10-14 09:41, Charles  Williams:
> > On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:
> > > For PCI devices that have several interfaces (I think it's the case for
> > > some Mellanox boards), maybe we should not store the interface name?
> > 
> > I am not sure what you mean here.  If a device has multiple ethernet
> > interfaces, then it should a have seperate PCI device address space for
> > each interface (I dont know of any DPDK drivers that don't make this
> > assumption as well).
> 
> mlx4 and cxgbe?

OK, I see now.  I don't know of a way to tell if a device has multiple
ports just from the pci vendor/device id without maintaining some
sort of table.

Do these devices have multiple interfaces listed in their
/sys/devices/.../net diretory?  If so, matching one of the listed
interfaces can just blacklist the whole device similar to blacklisting
by the device id.
  
Stephen Hemminger Nov. 5, 2015, 7:23 p.m. UTC | #7
On Thu, 05 Nov 2015 11:39:04 -0500
"Charles (Chas) Williams" <3chas3@gmail.com> wrote:

> On Wed, 2015-11-04 at 23:40 +0100, Thomas Monjalon wrote:
> > 2015-10-14 09:41, Charles  Williams:  
> > > On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:  
> > > > For PCI devices that have several interfaces (I think it's the case for
> > > > some Mellanox boards), maybe we should not store the interface name?  
> > > 
> > > I am not sure what you mean here.  If a device has multiple ethernet
> > > interfaces, then it should a have seperate PCI device address space for
> > > each interface (I dont know of any DPDK drivers that don't make this
> > > assumption as well).  
> > 
> > mlx4 and cxgbe?  
> 
> OK, I see now.  I don't know of a way to tell if a device has multiple
> ports just from the pci vendor/device id without maintaining some
> sort of table.
> 
> Do these devices have multiple interfaces listed in their
> /sys/devices/.../net diretory?  If so, matching one of the listed
> interfaces can just blacklist the whole device similar to blacklisting
> by the device id.

Devices with multiple ports are supposed to report the port via /sys/class/net/xxx/portid

But you aren't going to be able to blacklist only one port of these devices.
The two drivers would be fighting over registers and IRQ management.
Plus kernel bind/unbind is by PCI id.
  
Chas Williams Nov. 10, 2015, 6:51 p.m. UTC | #8
On Thu, 2015-11-05 at 11:23 -0800, Stephen Hemminger wrote:
> On Thu, 05 Nov 2015 11:39:04 -0500
> "Charles (Chas) Williams" <3chas3@gmail.com> wrote:
> 
> > On Wed, 2015-11-04 at 23:40 +0100, Thomas Monjalon wrote:
> > > 2015-10-14 09:41, Charles  Williams:  
> > > > On Tue, 2015-10-13 at 14:49 +0200, Olivier MATZ wrote:  
> > > > > For PCI devices that have several interfaces (I think it's the case for
> > > > > some Mellanox boards), maybe we should not store the interface name?  
> > > > 
> > > > I am not sure what you mean here.  If a device has multiple ethernet
> > > > interfaces, then it should a have seperate PCI device address space for
> > > > each interface (I dont know of any DPDK drivers that don't make this
> > > > assumption as well).  
> > > 
> > > mlx4 and cxgbe?  
> > 
> > OK, I see now.  I don't know of a way to tell if a device has multiple
> > ports just from the pci vendor/device id without maintaining some
> > sort of table.
> > 
> > Do these devices have multiple interfaces listed in their
> > /sys/devices/.../net diretory?  If so, matching one of the listed
> > interfaces can just blacklist the whole device similar to blacklisting
> > by the device id.
> 
> Devices with multiple ports are supposed to report the port via /sys/class/net/xxx/portid

But I want to find the ports associated by the PCI devices.


> But you aren't going to be able to blacklist only one port of these devices.
> The two drivers would be fighting over registers and IRQ management.
> Plus kernel bind/unbind is by PCI id.

I understand that.  Blacklisting an interface on a multiple port device
would be essentially the same as blacklist by the PCI device id.  You
can't split the PCI device.  I just need to find the list of ports
associated with a single PCI device.
  

Patch

diff --git a/app/test/test_devargs.c b/app/test/test_devargs.c
index f7fc59c..27855ff 100644
--- a/app/test/test_devargs.c
+++ b/app/test/test_devargs.c
@@ -73,6 +73,8 @@  test_devargs(void)
 		goto fail;
 	if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, "0000:01:00.1") < 0)
 		goto fail;
+	if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, "eth0") < 0)
+		goto fail;
 	if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 2)
 		goto fail;
 	if (rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 2)
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index ec56165..1fb8bad 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -101,8 +101,13 @@  rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
 	case RTE_DEVTYPE_BLACKLISTED_PCI:
 		/* try to parse pci identifier */
 		if (eal_parse_pci_BDF(buf, &devargs->pci.addr) != 0 &&
-		    eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0)
-			goto fail;
+		    eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0) {
+			/* save as interface name instead */
+			ret = snprintf(devargs->pci.name,
+				       sizeof(devargs->pci.name), "%s", buf);
+			if (ret < 0 || ret >= (int)sizeof(devargs->pci.name))
+				goto fail;
+		}
 
 		break;
 	case RTE_DEVTYPE_VIRTUAL:
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 1f459ac..6920088 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -889,7 +889,7 @@  eal_common_usage(void)
 	       "  -r RANKS            Force number of memory ranks (don't detect)\n"
 	       "  -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n"
 	       "                      Prevent EAL from using this PCI device. The argument\n"
-	       "                      format is <domain:bus:devid.func>.\n"
+	       "                      format is <domain:bus:devid.func> or <name>.\n"
 	       "  -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n"
 	       "                      Only use the specified PCI devices. The argument format\n"
 	       "                      is <[domain:]bus:devid.func>. This option can be present\n"
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index dcfe947..288c8bd 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -93,8 +93,14 @@  static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
 		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
 			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
 			continue;
-		if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
-			return devargs;
+
+		if (devargs->pci.name[0] == '\0') {
+			if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
+				return devargs;
+		} else {
+			if (strcmp(dev->name, devargs->pci.name) == 0)
+				return devargs;
+		}
 	}
 	return NULL;
 }
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 7084ae2..bc436ec 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -81,6 +81,8 @@  struct rte_devargs {
 		struct {
 			/** PCI location. */
 			struct rte_pci_addr addr;
+			/** Interface name. */
+			char name[32];
 		} pci;
 		/** Used if type is RTE_DEVTYPE_VIRTUAL. */
 		struct {
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 83e3c28..852c149 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -161,6 +161,7 @@  struct rte_pci_device {
 	struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE];   /**< PCI Memory Resource */
 	struct rte_intr_handle intr_handle;     /**< Interrupt handle */
 	struct rte_pci_driver *driver;          /**< Associated driver */
+	char name[32];				/**< Interface name (if any) */
 	uint16_t max_vfs;                       /**< sriov enable if not zero */
 	int numa_node;                          /**< NUMA node connection */
 	struct rte_devargs *devargs;            /**< Device user arguments */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index bc5b5be..befb71f 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -260,6 +260,8 @@  pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 	unsigned long tmp;
 	struct rte_pci_device *dev;
 	char driver[PATH_MAX];
+	struct dirent *e;
+	DIR *dir;
 	int ret;
 
 	dev = malloc(sizeof(*dev));
@@ -352,6 +354,19 @@  pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 		return -1;
 	}
 
+	/* get network interface name */
+	snprintf(filename, sizeof(filename), "%s/net", dirname);
+	dir = opendir(filename);
+	if (dir) {
+		while ((e = readdir(dir)) != NULL) {
+			if (e->d_name[0] == '.')
+				continue;
+
+			strncpy(dev->name, e->d_name, sizeof(dev->name));
+		}
+		closedir(dir);
+	}
+
 	if (!ret) {
 		if (!strcmp(driver, "vfio-pci"))
 			dev->kdrv = RTE_KDRV_VFIO;