[dpdk-dev,v3,1/3] eal: enable uio_pci_generic support

Message ID 1424451557-27419-2-git-send-email-bruce.richardson@intel.com (mailing list archive)
State Accepted, archived
Headers

Commit Message

Bruce Richardson Feb. 20, 2015, 4:59 p.m. UTC
  From: Zhou Danny <danny.zhou@intel.com>

Change the EAL PCI code so that it can work with both the
uio_pci_generic in-tree driver, as well as the igb_uio
DPDK-specific driver.

This involves changes to
1) Modify method of retrieving BAR resource mapping information
2) Mapping using resource files in /sys rather than /dev/uio*
2) Setup bus master bit in NIC's PCIe configuration space for
uio_pci_generic.

Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>

---
v3 changes:
- Remove the requirement for the kernel_driver_name to be
 stored for each device
- Ensure greater commonality of code between uio_generic and
 igb_uio, by always checking if bus mastering is on, and
 always using resource0 for mmap.
- Ensure secondary processes do not map resources unused in primary process

v2 changes:
- Change variable name of kernel driver with precise comment.
- Fix a union definition error in v1 patchset.
- Move redefined macro IORESOURCE_MEM to rte_pci.h with comment.
---
 lib/librte_eal/common/include/rte_pci.h            |   3 +
 lib/librte_eal/linuxapp/eal/eal_pci.c              |   4 +-
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c          | 167 ++++++++++-----------
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |   8 +-
 4 files changed, 91 insertions(+), 91 deletions(-)
  

Comments

David Marchand Feb. 23, 2015, 3:24 p.m. UTC | #1
Hello,

Ok this is coming too late, but anyway, my comments.


On Fri, Feb 20, 2015 at 5:59 PM, Bruce Richardson <
bruce.richardson@intel.com> wrote:

[snip]

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> index 54cce08..2b16fcb 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> @@ -36,6 +36,7 @@
>  #include <dirent.h>
>  #include <sys/stat.h>
>  #include <sys/mman.h>
> +#include <linux/pci_regs.h>
>
>  #include <rte_log.h>
>  #include <rte_pci.h>
> @@ -47,71 +48,73 @@
>  #include "eal_filesystem.h"
>  #include "eal_pci_init.h"
>
> -static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
> -
>  void *pci_map_addr = NULL;
>
>
>  #define OFF_MAX              ((uint64_t)(off_t)-1)
>  static int
> -pci_uio_get_mappings(const char *devname, struct pci_map maps[], int
> nb_maps)
> +pci_uio_get_mappings(struct rte_pci_device *dev,
> +                       struct pci_map maps[], int nb_maps)
>  {
> -       int i;
> -       char dirname[PATH_MAX];
> +       struct rte_pci_addr *loc = &dev->addr;
> +       int i = 0;
>         char filename[PATH_MAX];
> -       uint64_t offset, size;
> +       unsigned long long start_addr, end_addr, flags;
> +       FILE *f;
>
> -       for (i = 0; i != nb_maps; i++) {
> +       snprintf(filename, sizeof(filename),
> +               SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource",
> +               loc->domain, loc->bus, loc->devid, loc->function);
>
> -               /* check if map directory exists */
> -               snprintf(dirname, sizeof(dirname),
> -                       "%s/maps/map%u", devname, i);
> -
> -               if (access(dirname, F_OK) != 0)
> -                       break;
> +       f = fopen(filename, "r");
> +       if (f == NULL) {
> +               RTE_LOG(ERR, EAL,
> +               "%s(): cannot open sysfs %s\n",
> +               __func__, filename);
> +               return -1;
> +       }
>
> -               /* get mapping offset */
> -               snprintf(filename, sizeof(filename),
> -                       "%s/offset", dirname);
> -               if (pci_parse_sysfs_value(filename, &offset) < 0) {
> -                       RTE_LOG(ERR, EAL,
> -                               "%s(): cannot parse offset of %s\n",
> -                               __func__, dirname);
> -                       return -1;
> +       while (fscanf(f, "%llx %llx %llx", &start_addr,
> +                       &end_addr, &flags) == 3 && i < nb_maps) {
> +               if (flags & IORESOURCE_MEM) {
> +                       maps[i].offset = 0x0;
> +                       maps[i].size = end_addr - start_addr + 1;
> +                       maps[i].phaddr = start_addr;
> +                       i++;
>                 }
> +       }
> +       fclose(f);
>
> -               /* get mapping size */
> -               snprintf(filename, sizeof(filename),
> -                       "%s/size", dirname);
> -               if (pci_parse_sysfs_value(filename, &size) < 0) {
> -                       RTE_LOG(ERR, EAL,
> -                               "%s(): cannot parse size of %s\n",
> -                               __func__, dirname);
> -                       return -1;
> -               }
> +       return i;
> +}
>
> -               /* get mapping physical address */
> -               snprintf(filename, sizeof(filename),
> -                       "%s/addr", dirname);
> -               if (pci_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
> -                       RTE_LOG(ERR, EAL,
> -                               "%s(): cannot parse addr of %s\n",
> -                               __func__, dirname);
> -                       return -1;
> -               }
>

This function ends up reinventing the wheel from eal_pci.c plus it adds
some new array with mappings in them but not indexed the same way as
eal_pci.c see comments at the end of this mail.


> +static int
> +pci_uio_set_bus_master(int dev_fd)
> +{
> +       uint16_t reg;
> +       int ret;
>
> -               if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
> -                       RTE_LOG(ERR, EAL,
> -                               "%s(): offset/size exceed system max
> value\n",
> -                               __func__);
> -                       return -1;
> -               }
> +       ret = pread(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
> +       if (ret != sizeof(reg)) {
> +               RTE_LOG(ERR, EAL,
> +                       "Cannot read command from PCI config space!\n");
> +               return -1;
> +       }
> +
> +       /* return if bus mastering is already on */
> +       if (reg & PCI_COMMAND_MASTER)
> +               return 0;
> +
> +       reg |= PCI_COMMAND_MASTER;
>
> -               maps[i].offset = offset;
> -               maps[i].size = size;
> +       ret = pwrite(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
> +       if (ret != sizeof(reg)) {
> +               RTE_LOG(ERR, EAL,
> +                       "Cannot write command to PCI config space!\n");
> +               return -1;
>         }
>
> -       return i;
> +       return 0;
>  }
>

It would have been the good time to have a generic api to access pci config
space.
Something like what Stephen proposed.


>  static int
> @@ -127,6 +130,10 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>                         continue;
>
>                 for (i = 0; i != uio_res->nb_maps; i++) {
> +                       /* ignore mappings unused in primary process */
> +                       if (uio_res->maps[i].addr == NULL)
> +                               continue;
> +

                        /*
>                          * open devname, to mmap it
>                          */
> @@ -282,6 +289,7 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>  {
>         int i, j;
>         char dirname[PATH_MAX];
> +       char cfgname[PATH_MAX];
>         char devname[PATH_MAX]; /* contains the /dev/uioX */
>         void *mapaddr;
>         int uio_num;
> @@ -294,6 +302,7 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>         struct pci_map *maps;
>
>         dev->intr_handle.fd = -1;
> +       dev->intr_handle.uio_cfg_fd = -1;
>         dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
>
>         /* secondary processes - use already recorded details */
> @@ -318,6 +327,28 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>         }
>         dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
>
> +       snprintf(cfgname, sizeof(cfgname),
> +                       "/sys/class/uio/uio%u/device/config", uio_num);
> +       dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
> +       if (dev->intr_handle.uio_cfg_fd < 0) {
> +               RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> +                       cfgname, strerror(errno));
> +               return -1;
> +       }
> +
> +       /* update devname for mmap  */
> +       snprintf(devname, sizeof(devname),
> +               SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d",
> +               loc->domain, loc->bus, loc->devid, loc->function, 0);
>

Why bother with a %d if you hardcode 0 ?
More importantly, since you hardcode "devname" to /sys/.../resource0, then
the mmap code will use a fd on this file.
I really am skeptical on the fact that it can work for devices that have no
bar0.

Then, how is this supposed to work ?

You rely on sysfs mmap code for pci resources.
Is this really equivalent to uio mmap operations ?



> +
> +       /* set bus master that is not done by uio_pci_generic */
> +       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +               if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
> +                       RTE_LOG(ERR, EAL, "Cannot set up bus
> mastering!\n");
> +                       return -1;
> +               }
> +       }
> +
>

You are already running in a primary process, this check is useless.


>         /* allocate the mapping details for secondary processes*/
>         uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
>         if (uio_res == NULL) {
> @@ -330,13 +361,12 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>         memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr));
>
>         /* collect info about device mappings */
> -       nb_maps = pci_uio_get_mappings(dirname, uio_res->maps,
> -                                      RTE_DIM(uio_res->maps));
> +       nb_maps = pci_uio_get_mappings(dev, uio_res->maps,
> +                                       RTE_DIM(uio_res->maps));
>         if (nb_maps < 0) {
>                 rte_free(uio_res);
>                 return nb_maps;
>         }
> -
>         uio_res->nb_maps = nb_maps;
>
>         /* Map all BARs */
>


Ok then after this, we use this temp array uio_res->maps and we loop all
over the pci resources.
Why do we have all these loops ?

I could see no point before, and with this change, it is bothering me again.
Won't it be easier to loop on the pci resources discovered by eal_pci.c
before this function is called ?

eal_pci.c is responsible for discovering pci devices, prepare those devices
(filling mem_resource[] for example), then eal_pci_uio.c / eal_pci_vfio.c
do the "mapping" stuff.
So uio / vfio must not overwrite what has already been set before.
  

Patch

diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 66ed793..4301c16 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -104,6 +104,9 @@  extern struct pci_device_list pci_device_list; /**< Global list of PCI devices.
 /** Nb. of values in PCI resource format. */
 #define PCI_RESOURCE_FMT_NVAL 3
 
+/** IO resource type: memory address space */
+#define IORESOURCE_MEM        0x00000200
+
 /**
  * A structure describing a PCI resource.
  */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 15db9c4..63bcbce 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -138,8 +138,6 @@  pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
 }
 
 /* parse the "resource" sysfs file */
-#define IORESOURCE_MEM  0x00000200
-
 static int
 pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
 {
@@ -519,7 +517,7 @@  pci_map_device(struct rte_pci_device *dev)
 			return ret;
 	}
 #endif
-	/* map resources for devices that use igb_uio */
+	/* map resources for devices that use uio_pci_generic or igb_uio */
 	if (!mapped) {
 		ret = pci_uio_map_resource(dev);
 		if (ret != 0)
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index 54cce08..2b16fcb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -36,6 +36,7 @@ 
 #include <dirent.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
+#include <linux/pci_regs.h>
 
 #include <rte_log.h>
 #include <rte_pci.h>
@@ -47,71 +48,73 @@ 
 #include "eal_filesystem.h"
 #include "eal_pci_init.h"
 
-static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
-
 void *pci_map_addr = NULL;
 
 
 #define OFF_MAX              ((uint64_t)(off_t)-1)
 static int
-pci_uio_get_mappings(const char *devname, struct pci_map maps[], int nb_maps)
+pci_uio_get_mappings(struct rte_pci_device *dev,
+			struct pci_map maps[], int nb_maps)
 {
-	int i;
-	char dirname[PATH_MAX];
+	struct rte_pci_addr *loc = &dev->addr;
+	int i = 0;
 	char filename[PATH_MAX];
-	uint64_t offset, size;
+	unsigned long long start_addr, end_addr, flags;
+	FILE *f;
 
-	for (i = 0; i != nb_maps; i++) {
+	snprintf(filename, sizeof(filename),
+		SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource",
+		loc->domain, loc->bus, loc->devid, loc->function);
 
-		/* check if map directory exists */
-		snprintf(dirname, sizeof(dirname),
-			"%s/maps/map%u", devname, i);
-
-		if (access(dirname, F_OK) != 0)
-			break;
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL,
+		"%s(): cannot open sysfs %s\n",
+		__func__, filename);
+		return -1;
+	}
 
-		/* get mapping offset */
-		snprintf(filename, sizeof(filename),
-			"%s/offset", dirname);
-		if (pci_parse_sysfs_value(filename, &offset) < 0) {
-			RTE_LOG(ERR, EAL,
-				"%s(): cannot parse offset of %s\n",
-				__func__, dirname);
-			return -1;
+	while (fscanf(f, "%llx %llx %llx", &start_addr,
+			&end_addr, &flags) == 3 && i < nb_maps) {
+		if (flags & IORESOURCE_MEM) {
+			maps[i].offset = 0x0;
+			maps[i].size = end_addr - start_addr + 1;
+			maps[i].phaddr = start_addr;
+			i++;
 		}
+	}
+	fclose(f);
 
-		/* get mapping size */
-		snprintf(filename, sizeof(filename),
-			"%s/size", dirname);
-		if (pci_parse_sysfs_value(filename, &size) < 0) {
-			RTE_LOG(ERR, EAL,
-				"%s(): cannot parse size of %s\n",
-				__func__, dirname);
-			return -1;
-		}
+	return i;
+}
 
-		/* get mapping physical address */
-		snprintf(filename, sizeof(filename),
-			"%s/addr", dirname);
-		if (pci_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
-			RTE_LOG(ERR, EAL,
-				"%s(): cannot parse addr of %s\n",
-				__func__, dirname);
-			return -1;
-		}
+static int
+pci_uio_set_bus_master(int dev_fd)
+{
+	uint16_t reg;
+	int ret;
 
-		if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
-			RTE_LOG(ERR, EAL,
-				"%s(): offset/size exceed system max value\n",
-				__func__);
-			return -1;
-		}
+	ret = pread(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
+	if (ret != sizeof(reg)) {
+		RTE_LOG(ERR, EAL,
+			"Cannot read command from PCI config space!\n");
+		return -1;
+	}
+
+	/* return if bus mastering is already on */
+	if (reg & PCI_COMMAND_MASTER)
+		return 0;
+
+	reg |= PCI_COMMAND_MASTER;
 
-		maps[i].offset = offset;
-		maps[i].size = size;
+	ret = pwrite(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
+	if (ret != sizeof(reg)) {
+		RTE_LOG(ERR, EAL,
+			"Cannot write command to PCI config space!\n");
+		return -1;
 	}
 
-	return i;
+	return 0;
 }
 
 static int
@@ -127,6 +130,10 @@  pci_uio_map_secondary(struct rte_pci_device *dev)
 			continue;
 
 		for (i = 0; i != uio_res->nb_maps; i++) {
+			/* ignore mappings unused in primary process */
+			if (uio_res->maps[i].addr == NULL)
+				continue;
+
 			/*
 			 * open devname, to mmap it
 			 */
@@ -282,6 +289,7 @@  pci_uio_map_resource(struct rte_pci_device *dev)
 {
 	int i, j;
 	char dirname[PATH_MAX];
+	char cfgname[PATH_MAX];
 	char devname[PATH_MAX]; /* contains the /dev/uioX */
 	void *mapaddr;
 	int uio_num;
@@ -294,6 +302,7 @@  pci_uio_map_resource(struct rte_pci_device *dev)
 	struct pci_map *maps;
 
 	dev->intr_handle.fd = -1;
+	dev->intr_handle.uio_cfg_fd = -1;
 	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
 
 	/* secondary processes - use already recorded details */
@@ -318,6 +327,28 @@  pci_uio_map_resource(struct rte_pci_device *dev)
 	}
 	dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
 
+	snprintf(cfgname, sizeof(cfgname),
+			"/sys/class/uio/uio%u/device/config", uio_num);
+	dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
+	if (dev->intr_handle.uio_cfg_fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+			cfgname, strerror(errno));
+		return -1;
+	}
+
+	/* update devname for mmap  */
+	snprintf(devname, sizeof(devname),
+		SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d",
+		loc->domain, loc->bus, loc->devid, loc->function, 0);
+
+	/* set bus master that is not done by uio_pci_generic */
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
+			RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
+			return -1;
+		}
+	}
+
 	/* allocate the mapping details for secondary processes*/
 	uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
 	if (uio_res == NULL) {
@@ -330,13 +361,12 @@  pci_uio_map_resource(struct rte_pci_device *dev)
 	memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr));
 
 	/* collect info about device mappings */
-	nb_maps = pci_uio_get_mappings(dirname, uio_res->maps,
-				       RTE_DIM(uio_res->maps));
+	nb_maps = pci_uio_get_mappings(dev, uio_res->maps,
+					RTE_DIM(uio_res->maps));
 	if (nb_maps < 0) {
 		rte_free(uio_res);
 		return nb_maps;
 	}
-
 	uio_res->nb_maps = nb_maps;
 
 	/* Map all BARs */
@@ -403,38 +433,3 @@  pci_uio_map_resource(struct rte_pci_device *dev)
 
 	return 0;
 }
-
-/*
- * parse a sysfs file containing one integer value
- * different to the eal version, as it needs to work with 64-bit values
- */
-static int
-pci_parse_sysfs_value(const char *filename, uint64_t *val)
-{
-	FILE *f;
-	char buf[BUFSIZ];
-	char *end = NULL;
-
-	f = fopen(filename, "r");
-	if (f == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
-				__func__, filename);
-		return -1;
-	}
-
-	if (fgets(buf, sizeof(buf), f) == NULL) {
-		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
-				__func__, filename);
-		fclose(f);
-		return -1;
-	}
-	*val = strtoull(buf, &end, 0);
-	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
-		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
-				__func__, filename);
-		fclose(f);
-		return -1;
-	}
-	fclose(f);
-	return 0;
-}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index 23eafd9..6a159c7 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -50,8 +50,12 @@  enum rte_intr_handle_type {
 
 /** Handle for interrupts. */
 struct rte_intr_handle {
-	int vfio_dev_fd;                 /**< VFIO device file descriptor */
-	int fd;                          /**< file descriptor */
+	union {
+		int vfio_dev_fd;  /**< VFIO device file descriptor */
+		int uio_cfg_fd;  /**< UIO config file descriptor
+					for uio_pci_generic */
+	};
+	int fd;	 /**< interrupt event file descriptor */
 	enum rte_intr_handle_type type;  /**< handle type */
 };