[dpdk-dev,v3,1/3] eal: enable uio_pci_generic support
Commit Message
From: Zhou Danny <danny.zhou@intel.com>
Change the EAL PCI code so that it can work with both the
uio_pci_generic in-tree driver, as well as the igb_uio
DPDK-specific driver.
This involves changes to
1) Modify method of retrieving BAR resource mapping information
2) Mapping using resource files in /sys rather than /dev/uio*
2) Setup bus master bit in NIC's PCIe configuration space for
uio_pci_generic.
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
v3 changes:
- Remove the requirement for the kernel_driver_name to be
stored for each device
- Ensure greater commonality of code between uio_generic and
igb_uio, by always checking if bus mastering is on, and
always using resource0 for mmap.
- Ensure secondary processes do not map resources unused in primary process
v2 changes:
- Change variable name of kernel driver with precise comment.
- Fix a union definition error in v1 patchset.
- Move redefined macro IORESOURCE_MEM to rte_pci.h with comment.
---
lib/librte_eal/common/include/rte_pci.h | 3 +
lib/librte_eal/linuxapp/eal/eal_pci.c | 4 +-
lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 167 ++++++++++-----------
.../linuxapp/eal/include/exec-env/rte_interrupts.h | 8 +-
4 files changed, 91 insertions(+), 91 deletions(-)
Comments
Hello,
Ok this is coming too late, but anyway, my comments.
On Fri, Feb 20, 2015 at 5:59 PM, Bruce Richardson <
bruce.richardson@intel.com> wrote:
[snip]
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> index 54cce08..2b16fcb 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> @@ -36,6 +36,7 @@
> #include <dirent.h>
> #include <sys/stat.h>
> #include <sys/mman.h>
> +#include <linux/pci_regs.h>
>
> #include <rte_log.h>
> #include <rte_pci.h>
> @@ -47,71 +48,73 @@
> #include "eal_filesystem.h"
> #include "eal_pci_init.h"
>
> -static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
> -
> void *pci_map_addr = NULL;
>
>
> #define OFF_MAX ((uint64_t)(off_t)-1)
> static int
> -pci_uio_get_mappings(const char *devname, struct pci_map maps[], int
> nb_maps)
> +pci_uio_get_mappings(struct rte_pci_device *dev,
> + struct pci_map maps[], int nb_maps)
> {
> - int i;
> - char dirname[PATH_MAX];
> + struct rte_pci_addr *loc = &dev->addr;
> + int i = 0;
> char filename[PATH_MAX];
> - uint64_t offset, size;
> + unsigned long long start_addr, end_addr, flags;
> + FILE *f;
>
> - for (i = 0; i != nb_maps; i++) {
> + snprintf(filename, sizeof(filename),
> + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource",
> + loc->domain, loc->bus, loc->devid, loc->function);
>
> - /* check if map directory exists */
> - snprintf(dirname, sizeof(dirname),
> - "%s/maps/map%u", devname, i);
> -
> - if (access(dirname, F_OK) != 0)
> - break;
> + f = fopen(filename, "r");
> + if (f == NULL) {
> + RTE_LOG(ERR, EAL,
> + "%s(): cannot open sysfs %s\n",
> + __func__, filename);
> + return -1;
> + }
>
> - /* get mapping offset */
> - snprintf(filename, sizeof(filename),
> - "%s/offset", dirname);
> - if (pci_parse_sysfs_value(filename, &offset) < 0) {
> - RTE_LOG(ERR, EAL,
> - "%s(): cannot parse offset of %s\n",
> - __func__, dirname);
> - return -1;
> + while (fscanf(f, "%llx %llx %llx", &start_addr,
> + &end_addr, &flags) == 3 && i < nb_maps) {
> + if (flags & IORESOURCE_MEM) {
> + maps[i].offset = 0x0;
> + maps[i].size = end_addr - start_addr + 1;
> + maps[i].phaddr = start_addr;
> + i++;
> }
> + }
> + fclose(f);
>
> - /* get mapping size */
> - snprintf(filename, sizeof(filename),
> - "%s/size", dirname);
> - if (pci_parse_sysfs_value(filename, &size) < 0) {
> - RTE_LOG(ERR, EAL,
> - "%s(): cannot parse size of %s\n",
> - __func__, dirname);
> - return -1;
> - }
> + return i;
> +}
>
> - /* get mapping physical address */
> - snprintf(filename, sizeof(filename),
> - "%s/addr", dirname);
> - if (pci_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
> - RTE_LOG(ERR, EAL,
> - "%s(): cannot parse addr of %s\n",
> - __func__, dirname);
> - return -1;
> - }
>
This function ends up reinventing the wheel from eal_pci.c plus it adds
some new array with mappings in them but not indexed the same way as
eal_pci.c see comments at the end of this mail.
> +static int
> +pci_uio_set_bus_master(int dev_fd)
> +{
> + uint16_t reg;
> + int ret;
>
> - if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
> - RTE_LOG(ERR, EAL,
> - "%s(): offset/size exceed system max
> value\n",
> - __func__);
> - return -1;
> - }
> + ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND);
> + if (ret != sizeof(reg)) {
> + RTE_LOG(ERR, EAL,
> + "Cannot read command from PCI config space!\n");
> + return -1;
> + }
> +
> + /* return if bus mastering is already on */
> + if (reg & PCI_COMMAND_MASTER)
> + return 0;
> +
> + reg |= PCI_COMMAND_MASTER;
>
> - maps[i].offset = offset;
> - maps[i].size = size;
> + ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND);
> + if (ret != sizeof(reg)) {
> + RTE_LOG(ERR, EAL,
> + "Cannot write command to PCI config space!\n");
> + return -1;
> }
>
> - return i;
> + return 0;
> }
>
It would have been the good time to have a generic api to access pci config
space.
Something like what Stephen proposed.
> static int
> @@ -127,6 +130,10 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
> continue;
>
> for (i = 0; i != uio_res->nb_maps; i++) {
> + /* ignore mappings unused in primary process */
> + if (uio_res->maps[i].addr == NULL)
> + continue;
> +
/*
> * open devname, to mmap it
> */
> @@ -282,6 +289,7 @@ pci_uio_map_resource(struct rte_pci_device *dev)
> {
> int i, j;
> char dirname[PATH_MAX];
> + char cfgname[PATH_MAX];
> char devname[PATH_MAX]; /* contains the /dev/uioX */
> void *mapaddr;
> int uio_num;
> @@ -294,6 +302,7 @@ pci_uio_map_resource(struct rte_pci_device *dev)
> struct pci_map *maps;
>
> dev->intr_handle.fd = -1;
> + dev->intr_handle.uio_cfg_fd = -1;
> dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
>
> /* secondary processes - use already recorded details */
> @@ -318,6 +327,28 @@ pci_uio_map_resource(struct rte_pci_device *dev)
> }
> dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
>
> + snprintf(cfgname, sizeof(cfgname),
> + "/sys/class/uio/uio%u/device/config", uio_num);
> + dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
> + if (dev->intr_handle.uio_cfg_fd < 0) {
> + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> + cfgname, strerror(errno));
> + return -1;
> + }
> +
> + /* update devname for mmap */
> + snprintf(devname, sizeof(devname),
> + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d",
> + loc->domain, loc->bus, loc->devid, loc->function, 0);
>
Why bother with a %d if you hardcode 0 ?
More importantly, since you hardcode "devname" to /sys/.../resource0, then
the mmap code will use a fd on this file.
I really am skeptical on the fact that it can work for devices that have no
bar0.
Then, how is this supposed to work ?
You rely on sysfs mmap code for pci resources.
Is this really equivalent to uio mmap operations ?
> +
> + /* set bus master that is not done by uio_pci_generic */
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> + if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
> + RTE_LOG(ERR, EAL, "Cannot set up bus
> mastering!\n");
> + return -1;
> + }
> + }
> +
>
You are already running in a primary process, this check is useless.
> /* allocate the mapping details for secondary processes*/
> uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
> if (uio_res == NULL) {
> @@ -330,13 +361,12 @@ pci_uio_map_resource(struct rte_pci_device *dev)
> memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr));
>
> /* collect info about device mappings */
> - nb_maps = pci_uio_get_mappings(dirname, uio_res->maps,
> - RTE_DIM(uio_res->maps));
> + nb_maps = pci_uio_get_mappings(dev, uio_res->maps,
> + RTE_DIM(uio_res->maps));
> if (nb_maps < 0) {
> rte_free(uio_res);
> return nb_maps;
> }
> -
> uio_res->nb_maps = nb_maps;
>
> /* Map all BARs */
>
Ok then after this, we use this temp array uio_res->maps and we loop all
over the pci resources.
Why do we have all these loops ?
I could see no point before, and with this change, it is bothering me again.
Won't it be easier to loop on the pci resources discovered by eal_pci.c
before this function is called ?
eal_pci.c is responsible for discovering pci devices, prepare those devices
(filling mem_resource[] for example), then eal_pci_uio.c / eal_pci_vfio.c
do the "mapping" stuff.
So uio / vfio must not overwrite what has already been set before.
@@ -104,6 +104,9 @@ extern struct pci_device_list pci_device_list; /**< Global list of PCI devices.
/** Nb. of values in PCI resource format. */
#define PCI_RESOURCE_FMT_NVAL 3
+/** IO resource type: memory address space */
+#define IORESOURCE_MEM 0x00000200
+
/**
* A structure describing a PCI resource.
*/
@@ -138,8 +138,6 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
}
/* parse the "resource" sysfs file */
-#define IORESOURCE_MEM 0x00000200
-
static int
pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
{
@@ -519,7 +517,7 @@ pci_map_device(struct rte_pci_device *dev)
return ret;
}
#endif
- /* map resources for devices that use igb_uio */
+ /* map resources for devices that use uio_pci_generic or igb_uio */
if (!mapped) {
ret = pci_uio_map_resource(dev);
if (ret != 0)
@@ -36,6 +36,7 @@
#include <dirent.h>
#include <sys/stat.h>
#include <sys/mman.h>
+#include <linux/pci_regs.h>
#include <rte_log.h>
#include <rte_pci.h>
@@ -47,71 +48,73 @@
#include "eal_filesystem.h"
#include "eal_pci_init.h"
-static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
-
void *pci_map_addr = NULL;
#define OFF_MAX ((uint64_t)(off_t)-1)
static int
-pci_uio_get_mappings(const char *devname, struct pci_map maps[], int nb_maps)
+pci_uio_get_mappings(struct rte_pci_device *dev,
+ struct pci_map maps[], int nb_maps)
{
- int i;
- char dirname[PATH_MAX];
+ struct rte_pci_addr *loc = &dev->addr;
+ int i = 0;
char filename[PATH_MAX];
- uint64_t offset, size;
+ unsigned long long start_addr, end_addr, flags;
+ FILE *f;
- for (i = 0; i != nb_maps; i++) {
+ snprintf(filename, sizeof(filename),
+ SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource",
+ loc->domain, loc->bus, loc->devid, loc->function);
- /* check if map directory exists */
- snprintf(dirname, sizeof(dirname),
- "%s/maps/map%u", devname, i);
-
- if (access(dirname, F_OK) != 0)
- break;
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot open sysfs %s\n",
+ __func__, filename);
+ return -1;
+ }
- /* get mapping offset */
- snprintf(filename, sizeof(filename),
- "%s/offset", dirname);
- if (pci_parse_sysfs_value(filename, &offset) < 0) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot parse offset of %s\n",
- __func__, dirname);
- return -1;
+ while (fscanf(f, "%llx %llx %llx", &start_addr,
+ &end_addr, &flags) == 3 && i < nb_maps) {
+ if (flags & IORESOURCE_MEM) {
+ maps[i].offset = 0x0;
+ maps[i].size = end_addr - start_addr + 1;
+ maps[i].phaddr = start_addr;
+ i++;
}
+ }
+ fclose(f);
- /* get mapping size */
- snprintf(filename, sizeof(filename),
- "%s/size", dirname);
- if (pci_parse_sysfs_value(filename, &size) < 0) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot parse size of %s\n",
- __func__, dirname);
- return -1;
- }
+ return i;
+}
- /* get mapping physical address */
- snprintf(filename, sizeof(filename),
- "%s/addr", dirname);
- if (pci_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot parse addr of %s\n",
- __func__, dirname);
- return -1;
- }
+static int
+pci_uio_set_bus_master(int dev_fd)
+{
+ uint16_t reg;
+ int ret;
- if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
- RTE_LOG(ERR, EAL,
- "%s(): offset/size exceed system max value\n",
- __func__);
- return -1;
- }
+ ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND);
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL,
+ "Cannot read command from PCI config space!\n");
+ return -1;
+ }
+
+ /* return if bus mastering is already on */
+ if (reg & PCI_COMMAND_MASTER)
+ return 0;
+
+ reg |= PCI_COMMAND_MASTER;
- maps[i].offset = offset;
- maps[i].size = size;
+ ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND);
+ if (ret != sizeof(reg)) {
+ RTE_LOG(ERR, EAL,
+ "Cannot write command to PCI config space!\n");
+ return -1;
}
- return i;
+ return 0;
}
static int
@@ -127,6 +130,10 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
continue;
for (i = 0; i != uio_res->nb_maps; i++) {
+ /* ignore mappings unused in primary process */
+ if (uio_res->maps[i].addr == NULL)
+ continue;
+
/*
* open devname, to mmap it
*/
@@ -282,6 +289,7 @@ pci_uio_map_resource(struct rte_pci_device *dev)
{
int i, j;
char dirname[PATH_MAX];
+ char cfgname[PATH_MAX];
char devname[PATH_MAX]; /* contains the /dev/uioX */
void *mapaddr;
int uio_num;
@@ -294,6 +302,7 @@ pci_uio_map_resource(struct rte_pci_device *dev)
struct pci_map *maps;
dev->intr_handle.fd = -1;
+ dev->intr_handle.uio_cfg_fd = -1;
dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
/* secondary processes - use already recorded details */
@@ -318,6 +327,28 @@ pci_uio_map_resource(struct rte_pci_device *dev)
}
dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+ snprintf(cfgname, sizeof(cfgname),
+ "/sys/class/uio/uio%u/device/config", uio_num);
+ dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
+ if (dev->intr_handle.uio_cfg_fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ cfgname, strerror(errno));
+ return -1;
+ }
+
+ /* update devname for mmap */
+ snprintf(devname, sizeof(devname),
+ SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d",
+ loc->domain, loc->bus, loc->devid, loc->function, 0);
+
+ /* set bus master that is not done by uio_pci_generic */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
+ RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
+ return -1;
+ }
+ }
+
/* allocate the mapping details for secondary processes*/
uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
if (uio_res == NULL) {
@@ -330,13 +361,12 @@ pci_uio_map_resource(struct rte_pci_device *dev)
memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr));
/* collect info about device mappings */
- nb_maps = pci_uio_get_mappings(dirname, uio_res->maps,
- RTE_DIM(uio_res->maps));
+ nb_maps = pci_uio_get_mappings(dev, uio_res->maps,
+ RTE_DIM(uio_res->maps));
if (nb_maps < 0) {
rte_free(uio_res);
return nb_maps;
}
-
uio_res->nb_maps = nb_maps;
/* Map all BARs */
@@ -403,38 +433,3 @@ pci_uio_map_resource(struct rte_pci_device *dev)
return 0;
}
-
-/*
- * parse a sysfs file containing one integer value
- * different to the eal version, as it needs to work with 64-bit values
- */
-static int
-pci_parse_sysfs_value(const char *filename, uint64_t *val)
-{
- FILE *f;
- char buf[BUFSIZ];
- char *end = NULL;
-
- f = fopen(filename, "r");
- if (f == NULL) {
- RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
- __func__, filename);
- return -1;
- }
-
- if (fgets(buf, sizeof(buf), f) == NULL) {
- RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
- __func__, filename);
- fclose(f);
- return -1;
- }
- *val = strtoull(buf, &end, 0);
- if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
- RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
- __func__, filename);
- fclose(f);
- return -1;
- }
- fclose(f);
- return 0;
-}
@@ -50,8 +50,12 @@ enum rte_intr_handle_type {
/** Handle for interrupts. */
struct rte_intr_handle {
- int vfio_dev_fd; /**< VFIO device file descriptor */
- int fd; /**< file descriptor */
+ union {
+ int vfio_dev_fd; /**< VFIO device file descriptor */
+ int uio_cfg_fd; /**< UIO config file descriptor
+ for uio_pci_generic */
+ };
+ int fd; /**< interrupt event file descriptor */
enum rte_intr_handle_type type; /**< handle type */
};