From patchwork Mon Jul 15 07:52:10 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 56423 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id D89631B96E; Mon, 15 Jul 2019 09:54:14 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id 8FD5E322C for ; Mon, 15 Jul 2019 09:54:11 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Jul 2019 00:54:11 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.63,493,1557212400"; d="scan'208";a="175025605" Received: from npg-dpdk-virtio-tbie-2.sh.intel.com ([10.67.104.66]) by FMSMGA003.fm.intel.com with ESMTP; 15 Jul 2019 00:54:09 -0700 From: Tiwei Bie To: dev@dpdk.org Cc: ferruh.yigit@intel.com, anatoly.burakov@intel.com, bruce.richardson@intel.com, keith.wiles@intel.com, david.marchand@redhat.com, alejandro.lucero@netronome.com, cunming.liang@intel.com Date: Mon, 15 Jul 2019 15:52:10 +0800 Message-Id: <20190715075214.16616-2-tiwei.bie@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190715075214.16616-1-tiwei.bie@intel.com> References: <20190403071844.21126-1-tiwei.bie@intel.com> <20190715075214.16616-1-tiwei.bie@intel.com> Subject: [dpdk-dev] [RFC v2 1/5] bus/pci: introduce an internal representation of PCI device X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch introduces an internal representation of the PCI device which will be used to store the internal information that don't have to be exposed, e.g. the VFIO region sizes/offsets. In this patch, the internal structure is simply a wrapper of the rte_pci_device structure. More fields will be added in the coming patches. Suggested-by: David Marchand Signed-off-by: Tiwei Bie --- drivers/bus/pci/bsd/pci.c | 14 ++++++++------ drivers/bus/pci/linux/pci.c | 25 ++++++++++++++----------- drivers/bus/pci/pci_common.c | 2 +- drivers/bus/pci/private.h | 12 ++++++++++++ 4 files changed, 35 insertions(+), 18 deletions(-) diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c index a2de70910..636868f38 100644 --- a/drivers/bus/pci/bsd/pci.c +++ b/drivers/bus/pci/bsd/pci.c @@ -213,16 +213,18 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, static int pci_scan_one(int dev_pci_fd, struct pci_conf *conf) { + struct rte_pci_device_internal *pdev; struct rte_pci_device *dev; struct pci_bar_io bar; unsigned i, max; - dev = malloc(sizeof(*dev)); - if (dev == NULL) { + pdev = malloc(sizeof(*pdev)); + if (pdev == NULL) return -1; - } - memset(dev, 0, sizeof(*dev)); + memset(pdev, 0, sizeof(*pdev)); + + dev = &pdev->device; dev->device.bus = &rte_pci_bus.bus; dev->addr.domain = conf->pc_sel.pc_domain; @@ -308,7 +310,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) memmove(dev2->mem_resource, dev->mem_resource, sizeof(dev->mem_resource)); - free(dev); + free(pdev); } return 0; } @@ -318,7 +320,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) return 0; skipdev: - free(dev); + free(pdev); return 0; } diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index 33c8ea7e9..dfab7b81b 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -219,22 +219,25 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) { char filename[PATH_MAX]; unsigned long tmp; + struct rte_pci_device_internal *pdev; struct rte_pci_device *dev; char driver[PATH_MAX]; int ret; - dev = malloc(sizeof(*dev)); - if (dev == NULL) + pdev = malloc(sizeof(*pdev)); + if (pdev == NULL) return -1; - memset(dev, 0, sizeof(*dev)); + memset(pdev, 0, sizeof(*pdev)); + + dev = &pdev->device; dev->device.bus = &rte_pci_bus.bus; dev->addr = *addr; /* get vendor id */ snprintf(filename, sizeof(filename), "%s/vendor", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); + free(pdev); return -1; } dev->id.vendor_id = (uint16_t)tmp; @@ -242,7 +245,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) /* get device id */ snprintf(filename, sizeof(filename), "%s/device", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); + free(pdev); return -1; } dev->id.device_id = (uint16_t)tmp; @@ -251,7 +254,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) snprintf(filename, sizeof(filename), "%s/subsystem_vendor", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); + free(pdev); return -1; } dev->id.subsystem_vendor_id = (uint16_t)tmp; @@ -260,7 +263,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) snprintf(filename, sizeof(filename), "%s/subsystem_device", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); + free(pdev); return -1; } dev->id.subsystem_device_id = (uint16_t)tmp; @@ -269,7 +272,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) snprintf(filename, sizeof(filename), "%s/class", dirname); if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); + free(pdev); return -1; } /* the least 24 bits are valid: class, subclass, program interface */ @@ -309,7 +312,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) snprintf(filename, sizeof(filename), "%s/resource", dirname); if (pci_parse_sysfs_resource(filename, dev) < 0) { RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); - free(dev); + free(pdev); return -1; } @@ -318,7 +321,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) ret = pci_get_kernel_driver_by_path(filename, driver, sizeof(driver)); if (ret < 0) { RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); - free(dev); + free(pdev); return -1; } @@ -382,7 +385,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) RTE_LOG(ERR, EAL, "Unexpected device scan at %s!\n", filename); } - free(dev); + free(pdev); } return 0; } diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c index d2af472ef..8b9deca8b 100644 --- a/drivers/bus/pci/pci_common.c +++ b/drivers/bus/pci/pci_common.c @@ -523,7 +523,7 @@ pci_unplug(struct rte_device *dev) if (ret == 0) { rte_pci_remove_device(pdev); rte_devargs_remove(dev->devargs); - free(pdev); + free(RTE_PCI_DEVICE_INTERNAL(pdev)); } return ret; } diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h index 8a5524052..3e2abd818 100644 --- a/drivers/bus/pci/private.h +++ b/drivers/bus/pci/private.h @@ -10,6 +10,14 @@ #include #include +/* + * Convert struct rte_pci_device to struct rte_pci_device_internal + */ +#define RTE_PCI_DEVICE_INTERNAL(ptr) \ + container_of(ptr, struct rte_pci_device_internal, device) +#define RTE_PCI_DEVICE_INTERNAL_CONST(ptr) \ + container_of(ptr, const struct rte_pci_device_internal, device) + extern struct rte_pci_bus rte_pci_bus; struct rte_pci_driver; @@ -17,6 +25,10 @@ struct rte_pci_device; extern struct rte_pci_bus rte_pci_bus; +struct rte_pci_device_internal { + struct rte_pci_device device; +}; + /** * Probe the PCI bus * From patchwork Mon Jul 15 07:52:11 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 56424 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id C3DBB1B9AC; Mon, 15 Jul 2019 09:54:16 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id 1E8DF1B95C for ; Mon, 15 Jul 2019 09:54:12 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Jul 2019 00:54:12 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.63,493,1557212400"; d="scan'208";a="175025611" Received: from npg-dpdk-virtio-tbie-2.sh.intel.com ([10.67.104.66]) by FMSMGA003.fm.intel.com with ESMTP; 15 Jul 2019 00:54:11 -0700 From: Tiwei Bie To: dev@dpdk.org Cc: ferruh.yigit@intel.com, anatoly.burakov@intel.com, bruce.richardson@intel.com, keith.wiles@intel.com, david.marchand@redhat.com, alejandro.lucero@netronome.com, cunming.liang@intel.com Date: Mon, 15 Jul 2019 15:52:11 +0800 Message-Id: <20190715075214.16616-3-tiwei.bie@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190715075214.16616-1-tiwei.bie@intel.com> References: <20190403071844.21126-1-tiwei.bie@intel.com> <20190715075214.16616-1-tiwei.bie@intel.com> Subject: [dpdk-dev] [RFC v2 2/5] bus/pci: avoid depending on private value in kernel source X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The value 40 used in VFIO_GET_REGION_ADDR() is a private value (VFIO_PCI_OFFSET_SHIFT) defined in Linux kernel source [1]. It is not part of VFIO API, and we should not depend on it. [1] https://github.com/torvalds/linux/blob/6fbc7275c7a9/drivers/vfio/pci/vfio_pci_private.h#L19 Signed-off-by: Tiwei Bie --- drivers/bus/pci/linux/pci.c | 4 +- drivers/bus/pci/linux/pci_init.h | 4 +- drivers/bus/pci/linux/pci_vfio.c | 176 ++++++++++++++++++++++++------- drivers/bus/pci/private.h | 10 ++ 4 files changed, 154 insertions(+), 40 deletions(-) diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index dfab7b81b..00bfbb301 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -639,7 +639,7 @@ int rte_pci_read_config(const struct rte_pci_device *device, return pci_uio_read_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: - return pci_vfio_read_config(intr_handle, buf, len, offset); + return pci_vfio_read_config(device, buf, len, offset); #endif default: rte_pci_device_name(&device->addr, devname, @@ -663,7 +663,7 @@ int rte_pci_write_config(const struct rte_pci_device *device, return pci_uio_write_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: - return pci_vfio_write_config(intr_handle, buf, len, offset); + return pci_vfio_write_config(device, buf, len, offset); #endif default: rte_pci_device_name(&device->addr, devname, diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h index c2e603a37..c6542a8f9 100644 --- a/drivers/bus/pci/linux/pci_init.h +++ b/drivers/bus/pci/linux/pci_init.h @@ -64,9 +64,9 @@ int pci_uio_ioport_unmap(struct rte_pci_ioport *p); #endif /* access config space */ -int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, +int pci_vfio_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offs); -int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, +int pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs); int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index ee3123965..2dc4a9299 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -49,35 +49,82 @@ static struct rte_tailq_elem rte_vfio_tailq = { }; EAL_REGISTER_TAILQ(rte_vfio_tailq) +static int +pci_vfio_get_region(const struct rte_pci_device *dev, int index, + uint64_t *size, uint64_t *offset) +{ + const struct rte_pci_device_internal *pdev = + RTE_PCI_DEVICE_INTERNAL_CONST(dev); + + if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS) + return -1; + + if (pdev->region[index].size == 0 && pdev->region[index].offset == 0) + return -1; + + *size = pdev->region[index].size; + *offset = pdev->region[index].offset; + + return 0; +} + int -pci_vfio_read_config(const struct rte_intr_handle *intr_handle, +pci_vfio_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offs) { - return pread64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); + uint64_t size, offset; + int fd; + + fd = dev->intr_handle.vfio_dev_fd; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pread64(fd, buf, len, offset + offs); } int -pci_vfio_write_config(const struct rte_intr_handle *intr_handle, +pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs) { - return pwrite64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); + uint64_t size, offset; + int fd; + + fd = dev->intr_handle.vfio_dev_fd; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pwrite64(fd, buf, len, offset + offs); } /* get PCI BAR number where MSI-X interrupts are */ static int -pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) +pci_vfio_get_msix_bar(const struct rte_pci_device *dev, int fd, + struct pci_msix_table *msix_table) { int ret; uint32_t reg; uint16_t flags; uint8_t cap_id, cap_offset; + uint64_t size, offset; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } /* read PCI capability pointer from config space */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_CAPABILITY_LIST); + ret = pread64(fd, ®, sizeof(reg), offset + PCI_CAPABILITY_LIST); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " "config space!\n"); @@ -90,9 +137,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) while (cap_offset) { /* read PCI capability ID */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); + ret = pread64(fd, ®, sizeof(reg), offset + cap_offset); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " "config space!\n"); @@ -105,8 +150,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) /* if we haven't reached MSI-X, check next capability */ if (cap_id != PCI_CAP_ID_MSIX) { ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); + offset + cap_offset); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " "config space!\n"); @@ -122,8 +166,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) else { /* table offset resides in the next 4 bytes */ ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 4); + offset + cap_offset + 4); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " "space!\n"); @@ -131,8 +174,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) } ret = pread64(fd, &flags, sizeof(flags), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 2); + offset + cap_offset + 2); if (ret != sizeof(flags)) { RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " "space!\n"); @@ -152,14 +194,19 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) /* set PCI bus mastering */ static int -pci_vfio_set_bus_master(int dev_fd, bool op) +pci_vfio_set_bus_master(const struct rte_pci_device *dev, int dev_fd, bool op) { + uint64_t size, offset; uint16_t reg; int ret; - ret = pread64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + + ret = pread64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); return -1; @@ -171,10 +218,7 @@ pci_vfio_set_bus_master(int dev_fd, bool op) else reg &= ~(PCI_COMMAND_MASTER); - ret = pwrite64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - + ret = pwrite64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); return -1; @@ -405,14 +449,21 @@ pci_vfio_disable_notifier(struct rte_pci_device *dev) #endif static int -pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) +pci_vfio_is_ioport_bar(const struct rte_pci_device *dev, + int vfio_dev_fd, int bar_index) { + uint64_t size, offset; uint32_t ioport_bar; int ret; + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) - + PCI_BASE_ADDRESS_0 + bar_index*4); + offset + PCI_BASE_ADDRESS_0 + bar_index*4); if (ret != sizeof(ioport_bar)) { RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n", PCI_BASE_ADDRESS_0 + bar_index*4); @@ -431,7 +482,7 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) } /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { + if (pci_vfio_set_bus_master(dev, vfio_dev_fd, true)) { RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); return -1; } @@ -645,11 +696,40 @@ pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) return ret; } +static int +pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd, + struct vfio_device_info *device_info) +{ + struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev); + struct vfio_region_info *reg = NULL; + int nb_maps, i, ret; + + nb_maps = RTE_MIN((int)device_info->num_regions, + VFIO_PCI_CONFIG_REGION_INDEX + 1); + + for (i = 0; i < nb_maps; i++) { + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); + if (ret < 0) { + RTE_LOG(DEBUG, EAL, "%s cannot get device region info error %i (%s)\n", + dev->name, errno, strerror(errno)); + return -1; + } + + pdev->region[i].size = reg->size; + pdev->region[i].offset = reg->offset; + + free(reg); + } + + return 0; +} static int pci_vfio_map_resource_primary(struct rte_pci_device *dev) { + struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev); struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + struct vfio_region_info *reg = NULL; char pci_addr[PATH_MAX] = {0}; int vfio_dev_fd; struct rte_pci_addr *loc = &dev->addr; @@ -690,11 +770,22 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) /* map BARs */ maps = vfio_res->maps; + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, + VFIO_PCI_CONFIG_REGION_INDEX); + if (ret < 0) { + RTE_LOG(ERR, EAL, "%s cannot get device region info error %i (%s)\n", + dev->name, errno, strerror(errno)); + goto err_vfio_res; + } + pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].size = reg->size; + pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].offset = reg->offset; + free(reg); + vfio_res->msix_table.bar_index = -1; /* get MSI-X BAR, if any (we have to know where it is because we can't * easily mmap it when using VFIO) */ - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table); + ret = pci_vfio_get_msix_bar(dev, vfio_dev_fd, &vfio_res->msix_table); if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); @@ -715,7 +806,6 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) } for (i = 0; i < (int) vfio_res->nb_maps; i++) { - struct vfio_region_info *reg = NULL; void *bar_addr; ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); @@ -726,8 +816,11 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) goto err_vfio_res; } + pdev->region[i].size = reg->size; + pdev->region[i].offset = reg->offset; + /* chk for io port region */ - ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); + ret = pci_vfio_is_ioport_bar(dev, vfio_dev_fd, i); if (ret < 0) { free(reg); goto err_vfio_res; @@ -833,6 +926,10 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) if (ret) return ret; + ret = pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info); + if (ret) + return ret; + /* map BARs */ maps = vfio_res->maps; @@ -938,7 +1035,7 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) return -1; } - if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { + if (pci_vfio_set_bus_master(dev, dev->intr_handle.vfio_dev_fd, false)) { RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", pci_addr); return -1; @@ -1016,14 +1113,21 @@ int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) { + uint64_t size, offset; + if (bar < VFIO_PCI_BAR0_REGION_INDEX || bar > VFIO_PCI_BAR5_REGION_INDEX) { RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); return -1; } + if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of region %d.\n", bar); + return -1; + } + p->dev = dev; - p->base = VFIO_GET_REGION_ADDR(bar); + p->base = offset; return 0; } diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h index 3e2abd818..c09185b86 100644 --- a/drivers/bus/pci/private.h +++ b/drivers/bus/pci/private.h @@ -10,6 +10,8 @@ #include #include +#define RTE_MAX_PCI_REGIONS 9 + /* * Convert struct rte_pci_device to struct rte_pci_device_internal */ @@ -25,8 +27,16 @@ struct rte_pci_device; extern struct rte_pci_bus rte_pci_bus; +struct rte_pci_region { + uint64_t size; + uint64_t offset; +}; + struct rte_pci_device_internal { struct rte_pci_device device; + + /* PCI regions provided by e.g. VFIO. */ + struct rte_pci_region region[RTE_MAX_PCI_REGIONS]; }; /** From patchwork Mon Jul 15 07:52:12 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 56425 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id A266D1B9D9; Mon, 15 Jul 2019 09:54:19 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id ACE783772 for ; Mon, 15 Jul 2019 09:54:14 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Jul 2019 00:54:14 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.63,493,1557212400"; d="scan'208";a="175025616" Received: from npg-dpdk-virtio-tbie-2.sh.intel.com ([10.67.104.66]) by FMSMGA003.fm.intel.com with ESMTP; 15 Jul 2019 00:54:12 -0700 From: Tiwei Bie To: dev@dpdk.org Cc: ferruh.yigit@intel.com, anatoly.burakov@intel.com, bruce.richardson@intel.com, keith.wiles@intel.com, david.marchand@redhat.com, alejandro.lucero@netronome.com, cunming.liang@intel.com Date: Mon, 15 Jul 2019 15:52:12 +0800 Message-Id: <20190715075214.16616-4-tiwei.bie@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190715075214.16616-1-tiwei.bie@intel.com> References: <20190403071844.21126-1-tiwei.bie@intel.com> <20190715075214.16616-1-tiwei.bie@intel.com> Subject: [dpdk-dev] [RFC v2 3/5] bus/pci: introduce helper for MMIO read and write X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The MMIO regions may not be mmap-able for mediated PCI device. In this case, the application should explicitly do read and write to access these regions. Signed-off-by: Tiwei Bie --- drivers/bus/pci/bsd/pci.c | 22 ++++++++++++ drivers/bus/pci/linux/pci.c | 46 ++++++++++++++++++++++++ drivers/bus/pci/linux/pci_init.h | 10 ++++++ drivers/bus/pci/linux/pci_uio.c | 22 ++++++++++++ drivers/bus/pci/linux/pci_vfio.c | 36 +++++++++++++++++++ drivers/bus/pci/rte_bus_pci.h | 48 +++++++++++++++++++++++++ drivers/bus/pci/rte_bus_pci_version.map | 7 ++++ 7 files changed, 191 insertions(+) diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c index 636868f38..d4d2c9016 100644 --- a/drivers/bus/pci/bsd/pci.c +++ b/drivers/bus/pci/bsd/pci.c @@ -527,6 +527,28 @@ int rte_pci_write_config(const struct rte_pci_device *dev, return -1; } +/* Read PCI MMIO space. */ +int rte_pci_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offset) +{ + if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL || + (uint64_t)offset + len > dev->mem_resource[bar].len) + return -1; + memcpy(buf, (uint8_t *)dev->mem_resource[bar].addr + offset, len); + return len; +} + +/* Write PCI MMIO space. */ +int rte_pci_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offset) +{ + if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL || + (uint64_t)offset + len > dev->mem_resource[bar].len) + return -1; + memcpy((uint8_t *)dev->mem_resource[bar].addr + offset, buf, len); + return len; +} + int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index 00bfbb301..bdfc8c5ff 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -674,6 +674,52 @@ int rte_pci_write_config(const struct rte_pci_device *device, } } +/* Read PCI MMIO space. */ +int rte_pci_mmio_read(const struct rte_pci_device *device, int bar, + void *buf, size_t len, off_t offset) +{ + char devname[RTE_DEV_NAME_MAX_LEN] = ""; + + switch (device->kdrv) { + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + return pci_uio_mmio_read(device, bar, buf, len, offset); +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + return pci_vfio_mmio_read(device, bar, buf, len, offset); +#endif + default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); + RTE_LOG(ERR, EAL, + "Unknown driver type for %s\n", devname); + return -1; + } +} + +/* Write PCI MMIO space. */ +int rte_pci_mmio_write(const struct rte_pci_device *device, int bar, + const void *buf, size_t len, off_t offset) +{ + char devname[RTE_DEV_NAME_MAX_LEN] = ""; + + switch (device->kdrv) { + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + return pci_uio_mmio_write(device, bar, buf, len, offset); +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + return pci_vfio_mmio_write(device, bar, buf, len, offset); +#endif + default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); + RTE_LOG(ERR, EAL, + "Unknown driver type for %s\n", devname); + return -1; + } +} + #if defined(RTE_ARCH_X86) static int pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h index c6542a8f9..158a16977 100644 --- a/drivers/bus/pci/linux/pci_init.h +++ b/drivers/bus/pci/linux/pci_init.h @@ -35,6 +35,11 @@ int pci_uio_read_config(const struct rte_intr_handle *intr_handle, int pci_uio_write_config(const struct rte_intr_handle *intr_handle, const void *buf, size_t len, off_t offs); +int pci_uio_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offset); +int pci_uio_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offset); + int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p); void pci_uio_ioport_read(struct rte_pci_ioport *p, @@ -69,6 +74,11 @@ int pci_vfio_read_config(const struct rte_pci_device *dev, int pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs); +int pci_vfio_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offset); +int pci_vfio_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offset); + int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p); void pci_vfio_ioport_read(struct rte_pci_ioport *p, diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c index f240fe4f2..623273541 100644 --- a/drivers/bus/pci/linux/pci_uio.c +++ b/drivers/bus/pci/linux/pci_uio.c @@ -45,6 +45,28 @@ pci_uio_write_config(const struct rte_intr_handle *intr_handle, return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); } +int +pci_uio_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offset) +{ + if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL || + (uint64_t)offset + len > dev->mem_resource[bar].len) + return -1; + memcpy(buf, (uint8_t *)dev->mem_resource[bar].addr + offset, len); + return len; +} + +int +pci_uio_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offset) +{ + if (bar >= PCI_MAX_RESOURCE || dev->mem_resource[bar].addr == NULL || + (uint64_t)offset + len > dev->mem_resource[bar].len) + return -1; + memcpy((uint8_t *)dev->mem_resource[bar].addr + offset, buf, len); + return len; +} + static int pci_uio_set_bus_master(int dev_fd) { diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index 2dc4a9299..204698be0 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -1164,6 +1164,42 @@ pci_vfio_ioport_unmap(struct rte_pci_ioport *p) return -1; } +int +pci_vfio_mmio_read(const struct rte_pci_device *dev, int bar, + void *buf, size_t len, off_t offs) +{ + uint64_t size, offset; + int fd; + + fd = dev->intr_handle.vfio_dev_fd; + + if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pread64(fd, buf, len, offset + offs); +} + +int +pci_vfio_mmio_write(const struct rte_pci_device *dev, int bar, + const void *buf, size_t len, off_t offs) +{ + uint64_t size, offset; + int fd; + + fd = dev->intr_handle.vfio_dev_fd; + + if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pwrite64(fd, buf, len, offset + offs); +} + int pci_vfio_is_enabled(void) { diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h index 06e004cd3..86527b421 100644 --- a/drivers/bus/pci/rte_bus_pci.h +++ b/drivers/bus/pci/rte_bus_pci.h @@ -285,6 +285,54 @@ int rte_pci_read_config(const struct rte_pci_device *device, int rte_pci_write_config(const struct rte_pci_device *device, const void *buf, size_t len, off_t offset); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Read from a MMIO pci resource. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param bar + * Index of the io pci resource we want to access. + * @param buf + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into MMIO space described by @bar + * @return + * Number of bytes read on success, negative on error. + */ +__rte_experimental +int rte_pci_mmio_read(const struct rte_pci_device *device, int bar, + void *buf, size_t len, off_t offset); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Write to a MMIO pci resource. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param bar + * Index of the io pci resource we want to access. + * @param buf + * A data buffer containing the bytes should be written + * @param len + * The length of the data buffer. + * @param offset + * The offset into MMIO space described by @bar + * @return + * Number of bytes written on success, negative on error. + */ +__rte_experimental +int rte_pci_mmio_write(const struct rte_pci_device *device, int bar, + const void *buf, size_t len, off_t offset); + /** * A structure used to access io resources for a pci device. * rte_pci_ioport is arch, os, driver specific, and should not be used outside diff --git a/drivers/bus/pci/rte_bus_pci_version.map b/drivers/bus/pci/rte_bus_pci_version.map index 27e9c4f10..141bdf48e 100644 --- a/drivers/bus/pci/rte_bus_pci_version.map +++ b/drivers/bus/pci/rte_bus_pci_version.map @@ -16,3 +16,10 @@ DPDK_17.11 { local: *; }; + +EXPERIMENTAL { + global: + + rte_pci_mmio_read; + rte_pci_mmio_write; +}; From patchwork Mon Jul 15 07:52:13 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 56426 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id C18421BC07; Mon, 15 Jul 2019 09:54:22 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id 3DB071B995 for ; Mon, 15 Jul 2019 09:54:16 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Jul 2019 00:54:15 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.63,493,1557212400"; d="scan'208";a="175025620" Received: from npg-dpdk-virtio-tbie-2.sh.intel.com ([10.67.104.66]) by FMSMGA003.fm.intel.com with ESMTP; 15 Jul 2019 00:54:14 -0700 From: Tiwei Bie To: dev@dpdk.org Cc: ferruh.yigit@intel.com, anatoly.burakov@intel.com, bruce.richardson@intel.com, keith.wiles@intel.com, david.marchand@redhat.com, alejandro.lucero@netronome.com, cunming.liang@intel.com Date: Mon, 15 Jul 2019 15:52:13 +0800 Message-Id: <20190715075214.16616-5-tiwei.bie@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190715075214.16616-1-tiwei.bie@intel.com> References: <20190403071844.21126-1-tiwei.bie@intel.com> <20190715075214.16616-1-tiwei.bie@intel.com> Subject: [dpdk-dev] [RFC v2 4/5] eal: add a helper for reading string from sysfs X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds a helper for reading string from sysfs. Signed-off-by: Cunming Liang Signed-off-by: Tiwei Bie --- lib/librte_eal/common/eal_filesystem.h | 10 ++++++++++ lib/librte_eal/freebsd/eal/eal.c | 22 ++++++++++++++++++++++ lib/librte_eal/linux/eal/eal.c | 22 ++++++++++++++++++++++ lib/librte_eal/rte_eal_version.map | 1 + 4 files changed, 55 insertions(+) diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h index 5d21f07c2..be4c51ebb 100644 --- a/lib/librte_eal/common/eal_filesystem.h +++ b/lib/librte_eal/common/eal_filesystem.h @@ -104,4 +104,14 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id * Used to read information from files on /sys */ int eal_parse_sysfs_value(const char *filename, unsigned long *val); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Function to read a line from a file on the filesystem. + * Used to read information from files on /sys + */ +__rte_experimental +int rte_eal_parse_sysfs_str(const char *filename, char *buf, unsigned long sz); + #endif /* EAL_FILESYSTEM_H */ diff --git a/lib/librte_eal/freebsd/eal/eal.c b/lib/librte_eal/freebsd/eal/eal.c index d53f0fe69..78720685f 100644 --- a/lib/librte_eal/freebsd/eal/eal.c +++ b/lib/librte_eal/freebsd/eal/eal.c @@ -209,6 +209,28 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) return 0; } +int +rte_eal_parse_sysfs_str(const char *filename, char *buf, unsigned long sz) +{ + FILE *f; + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs file %s\n", + __func__, filename); + return -1; + } + + if (fgets(buf, sz, f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs file %s\n", + __func__, filename); + fclose(f); + return -1; + } + + fclose(f); + return 0; +} /* create memory configuration in shared/mmap memory. Take out * a write lock on the memsegs, so we can auto-detect primary/secondary. diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 2e5499f9b..44bad45d3 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -295,6 +295,28 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) return 0; } +int +rte_eal_parse_sysfs_str(const char *filename, char *buf, unsigned long sz) +{ + FILE *f; + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs file %s\n", + __func__, filename); + return -1; + } + + if (fgets(buf, sz, f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs file %s\n", + __func__, filename); + fclose(f); + return -1; + } + + fclose(f); + return 0; +} /* create memory configuration in shared/mmap memory. Take out * a write lock on the memsegs, so we can auto-detect primary/secondary. diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 1892d9ea9..a9559176b 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -331,6 +331,7 @@ EXPERIMENTAL { rte_dev_hotplug_handle_enable; rte_dev_iterator_init; rte_dev_iterator_next; + rte_eal_parse_sysfs_str; rte_extmem_attach; rte_extmem_detach; rte_extmem_register; From patchwork Mon Jul 15 07:52:14 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 56427 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 529571BD62; Mon, 15 Jul 2019 09:54:24 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id 0CB521B95C for ; Mon, 15 Jul 2019 09:54:17 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Jul 2019 00:54:17 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.63,493,1557212400"; d="scan'208";a="175025626" Received: from npg-dpdk-virtio-tbie-2.sh.intel.com ([10.67.104.66]) by FMSMGA003.fm.intel.com with ESMTP; 15 Jul 2019 00:54:16 -0700 From: Tiwei Bie To: dev@dpdk.org Cc: ferruh.yigit@intel.com, anatoly.burakov@intel.com, bruce.richardson@intel.com, keith.wiles@intel.com, david.marchand@redhat.com, alejandro.lucero@netronome.com, cunming.liang@intel.com Date: Mon, 15 Jul 2019 15:52:14 +0800 Message-Id: <20190715075214.16616-6-tiwei.bie@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190715075214.16616-1-tiwei.bie@intel.com> References: <20190403071844.21126-1-tiwei.bie@intel.com> <20190715075214.16616-1-tiwei.bie@intel.com> Subject: [dpdk-dev] [RFC v2 5/5] bus/pci: add mdev support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds the mdev (Mediated device) support in PCI bus driver. With this patch, the PCI bus driver will be able to scan and probe the mediated PCI devices (i.e. the Mediated devices whose device API is "vfio-pci") in the system. There are several things different between physical PCI devices and mediated PCI devices: - Mediated PCI devices have to be accessed through VFIO API; - The regions in mediated PCI devices may not be mmap-able, and drivers need to call read/write function to access them in this case; - Mediated PCI devices use UUID as device address; Signed-off-by: Cunming Liang Signed-off-by: Tiwei Bie --- drivers/bus/pci/linux/Makefile | 1 + drivers/bus/pci/linux/pci.c | 30 +++- drivers/bus/pci/linux/pci_init.h | 15 +- drivers/bus/pci/linux/pci_vfio.c | 104 ++++++++++-- drivers/bus/pci/linux/pci_vfio_mdev.c | 236 ++++++++++++++++++++++++++ drivers/bus/pci/meson.build | 3 +- drivers/bus/pci/pci_common.c | 29 ++-- drivers/bus/pci/rte_bus_pci.h | 17 +- lib/librte_eal/linux/eal/eal.c | 17 +- 9 files changed, 404 insertions(+), 48 deletions(-) create mode 100644 drivers/bus/pci/linux/pci_vfio_mdev.c diff --git a/drivers/bus/pci/linux/Makefile b/drivers/bus/pci/linux/Makefile index 90404468b..c17ab2484 100644 --- a/drivers/bus/pci/linux/Makefile +++ b/drivers/bus/pci/linux/Makefile @@ -4,3 +4,4 @@ SRCS += pci.c SRCS += pci_uio.c SRCS += pci_vfio.c +SRCS += pci_vfio_mdev.c diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index bdfc8c5ff..5be898803 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -31,7 +31,7 @@ extern struct rte_pci_bus rte_pci_bus; -static int +int pci_get_kernel_driver_by_path(const char *filename, char *dri_name, size_t len) { @@ -71,7 +71,7 @@ rte_pci_map_device(struct rte_pci_device *dev) switch (dev->kdrv) { case RTE_KDRV_VFIO: #ifdef VFIO_PRESENT - if (pci_vfio_is_enabled()) + if (pci_vfio_is_enabled(dev)) ret = pci_vfio_map_resource(dev); #endif break; @@ -100,7 +100,7 @@ rte_pci_unmap_device(struct rte_pci_device *dev) switch (dev->kdrv) { case RTE_KDRV_VFIO: #ifdef VFIO_PRESENT - if (pci_vfio_is_enabled()) + if (pci_vfio_is_enabled(dev)) pci_vfio_unmap_resource(dev); #endif break; @@ -348,6 +348,15 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) int ret; TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { + /* + * Insert physical PCI devices before all mediated + * PCI devices. + */ + if (dev2->is_mdev) { + rte_pci_insert_device(dev2, dev); + return 0; + } + ret = rte_pci_addr_cmp(&dev->addr, &dev2->addr); if (ret > 0) continue; @@ -471,8 +480,14 @@ rte_pci_scan(void) return 0; #ifdef VFIO_PRESENT - if (!pci_vfio_is_enabled()) - RTE_LOG(DEBUG, EAL, "VFIO PCI modules not loaded\n"); + if (!rte_vfio_is_enabled("vfio_pci")) + RTE_LOG(DEBUG, EAL, "VFIO PCI module not loaded\n"); + + if (!rte_vfio_is_enabled("vfio_mdev")) + RTE_LOG(DEBUG, EAL, "VFIO MDEV module not loaded\n"); + + if (pci_scan_mdev() != 0) + return -1; #endif dir = opendir(rte_pci_get_sysfs_path()); @@ -788,7 +803,7 @@ rte_pci_ioport_map(struct rte_pci_device *dev, int bar, switch (dev->kdrv) { #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: - if (pci_vfio_is_enabled()) + if (pci_vfio_is_enabled(dev)) ret = pci_vfio_ioport_map(dev, bar, p); break; #endif @@ -877,8 +892,7 @@ rte_pci_ioport_unmap(struct rte_pci_ioport *p) switch (p->dev->kdrv) { #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: - if (pci_vfio_is_enabled()) - ret = pci_vfio_ioport_unmap(p); + ret = -1; break; #endif case RTE_KDRV_IGB_UIO: diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h index 158a16977..12739ba51 100644 --- a/drivers/bus/pci/linux/pci_init.h +++ b/drivers/bus/pci/linux/pci_init.h @@ -17,6 +17,9 @@ extern void *pci_map_addr; void *pci_find_max_end_va(void); +int pci_get_kernel_driver_by_path(const char *filename, char *dri_name, + size_t len); + /* parse one line of the "resource" sysfs file (note that the 'line' * string is modified) */ @@ -91,7 +94,17 @@ int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); int pci_vfio_map_resource(struct rte_pci_device *dev); int pci_vfio_unmap_resource(struct rte_pci_device *dev); -int pci_vfio_is_enabled(void); +int pci_vfio_is_enabled(struct rte_pci_device *dev); + +int pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd, + struct vfio_device_info *device_info); + +int pci_vfio_get_pci_id(struct rte_pci_device *dev, int vfio_dev_fd, + struct rte_pci_id *pci_id); + +const char *pci_mdev_get_sysfs_path(void); + +int pci_scan_mdev(void); #endif diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index 204698be0..7cea57ff9 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "eal_filesystem.h" @@ -696,7 +697,7 @@ pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) return ret; } -static int +int pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd, struct vfio_device_info *device_info) { @@ -731,6 +732,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; struct vfio_region_info *reg = NULL; char pci_addr[PATH_MAX] = {0}; + const char *sysfs_base; int vfio_dev_fd; struct rte_pci_addr *loc = &dev->addr; int i, ret; @@ -746,10 +748,16 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) #endif /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + if (dev->is_mdev) { + sysfs_base = pci_mdev_get_sysfs_path(); + rte_uuid_unparse(dev->uuid, pci_addr, sizeof(pci_addr)); + } else { + sysfs_base = rte_pci_get_sysfs_path(); + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, loc->domain, loc->bus, loc->devid, loc->function); + } - ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, + ret = rte_vfio_setup_device(sysfs_base, pci_addr, &vfio_dev_fd, &device_info); if (ret) return ret; @@ -889,6 +897,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) { struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; char pci_addr[PATH_MAX] = {0}; + const char *sysfs_base; int vfio_dev_fd; struct rte_pci_addr *loc = &dev->addr; int i, ret; @@ -904,8 +913,14 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) #endif /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + if (dev->is_mdev) { + sysfs_base = pci_mdev_get_sysfs_path(); + rte_uuid_unparse(dev->uuid, pci_addr, sizeof(pci_addr)); + } else { + sysfs_base = rte_pci_get_sysfs_path(); + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, loc->domain, loc->bus, loc->devid, loc->function); + } /* if we're in a secondary process, just find our tailq entry */ TAILQ_FOREACH(vfio_res, vfio_res_list, next) { @@ -921,7 +936,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) return -1; } - ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, + ret = rte_vfio_setup_device(sysfs_base, pci_addr, &vfio_dev_fd, &device_info); if (ret) return ret; @@ -1011,6 +1026,7 @@ find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list, static int pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) { + const char *sysfs_base; char pci_addr[PATH_MAX] = {0}; struct rte_pci_addr *loc = &dev->addr; struct mapped_pci_resource *vfio_res = NULL; @@ -1018,8 +1034,14 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) int ret; /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + if (dev->is_mdev) { + sysfs_base = pci_mdev_get_sysfs_path(); + rte_uuid_unparse(dev->uuid, pci_addr, sizeof(pci_addr)); + } else { + sysfs_base = rte_pci_get_sysfs_path(); + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, loc->domain, loc->bus, loc->devid, loc->function); + } #ifdef HAVE_VFIO_DEV_REQ_INTERFACE ret = pci_vfio_disable_notifier(dev); @@ -1041,7 +1063,7 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) return -1; } - ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, + ret = rte_vfio_release_device(sysfs_base, pci_addr, dev->intr_handle.vfio_dev_fd); if (ret < 0) { RTE_LOG(ERR, EAL, @@ -1068,6 +1090,7 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) static int pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev) { + const char *sysfs_base; char pci_addr[PATH_MAX] = {0}; struct rte_pci_addr *loc = &dev->addr; struct mapped_pci_resource *vfio_res = NULL; @@ -1075,10 +1098,16 @@ pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev) int ret; /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + if (dev->is_mdev) { + sysfs_base = pci_mdev_get_sysfs_path(); + rte_uuid_unparse(dev->uuid, pci_addr, sizeof(pci_addr)); + } else { + sysfs_base = rte_pci_get_sysfs_path(); + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, loc->domain, loc->bus, loc->devid, loc->function); + } - ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, + ret = rte_vfio_release_device(sysfs_base, pci_addr, dev->intr_handle.vfio_dev_fd); if (ret < 0) { RTE_LOG(ERR, EAL, @@ -1201,8 +1230,61 @@ pci_vfio_mmio_write(const struct rte_pci_device *dev, int bar, } int -pci_vfio_is_enabled(void) +pci_vfio_is_enabled(struct rte_pci_device *dev) { - return rte_vfio_is_enabled("vfio_pci"); + return rte_vfio_is_enabled(dev->is_mdev ? "vfio_mdev" : "vfio_pci"); } + +int +pci_vfio_get_pci_id(struct rte_pci_device *dev, int vfio_dev_fd, + struct rte_pci_id *pci_id) +{ + uint64_t size, offset; + int class; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(DEBUG, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + + /* vendor_id */ + if (pread64(vfio_dev_fd, &pci_id->vendor_id, sizeof(uint16_t), + offset + PCI_VENDOR_ID) != sizeof(uint16_t)) { + RTE_LOG(DEBUG, EAL, "Cannot read VendorID from PCI config space\n"); + return -1; + } + + /* device_id */ + if (pread64(vfio_dev_fd, &pci_id->device_id, sizeof(uint16_t), + offset + PCI_DEVICE_ID) != sizeof(uint16_t)) { + RTE_LOG(DEBUG, EAL, "Cannot read DeviceID from PCI config space\n"); + return -1; + } + + /* subsystem_vendor_id */ + if (pread64(vfio_dev_fd, &pci_id->subsystem_vendor_id, sizeof(uint16_t), + offset + PCI_SUBSYSTEM_VENDOR_ID) != sizeof(uint16_t)) { + RTE_LOG(DEBUG, EAL, "Cannot read SubVendorID from PCI config space\n"); + return -1; + } + + /* subsystem_device_id */ + if (pread64(vfio_dev_fd, &pci_id->subsystem_device_id, sizeof(uint16_t), + offset + PCI_SUBSYSTEM_ID) != sizeof(uint16_t)) { + RTE_LOG(DEBUG, EAL, "Cannot read SubDeviceID from PCI config space\n"); + return -1; + } + + /* class_id */ + if (pread64(vfio_dev_fd, &class, sizeof(uint32_t), + offset + PCI_CLASS_REVISION) != sizeof(uint32_t)) { + RTE_LOG(DEBUG, EAL, "Cannot read ClassID from PCI config space\n"); + return -1; + } + pci_id->class_id = class >> 8; + + return 0; +} + #endif diff --git a/drivers/bus/pci/linux/pci_vfio_mdev.c b/drivers/bus/pci/linux/pci_vfio_mdev.c new file mode 100644 index 000000000..dab7e9b35 --- /dev/null +++ b/drivers/bus/pci/linux/pci_vfio_mdev.c @@ -0,0 +1,236 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_filesystem.h" + +#include "private.h" +#include "pci_init.h" + +#ifdef VFIO_PRESENT + +extern struct rte_pci_bus rte_pci_bus; + +#define SYSFS_MDEV_DEVICES "/sys/bus/mdev/devices" + +const char *pci_mdev_get_sysfs_path(void) +{ + const char *path = NULL; + + path = getenv("SYSFS_MDEV_DEVICES"); + if (path == NULL) + return SYSFS_MDEV_DEVICES; + + return path; +} + +static int +is_pci_device(const char *dirname) +{ + char device_api[PATH_MAX]; + char filename[PATH_MAX]; + char *ptr; + + /* get device_api */ + snprintf(filename, sizeof(filename), "%s/mdev_type/device_api", + dirname); + + if (rte_eal_parse_sysfs_str(filename, device_api, + sizeof(device_api)) < 0) { + return -1; + } + + ptr = strchr(device_api, '\n'); + if (ptr != NULL) + *ptr = '\0'; + + return strcmp(device_api, "vfio-pci") == 0; +} + +static int +pci_scan_one_mdev(const char *dirname, const rte_uuid_t addr) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + char name[RTE_UUID_STRLEN]; + char filename[PATH_MAX]; + char path[PATH_MAX]; + char driver[PATH_MAX]; + char *ptr; + struct rte_pci_device_internal *pdev; + struct rte_pci_device *dev; + bool need_release = false; + const char *sysfs_base; + unsigned long tmp; + int vfio_dev_fd; + int ret; + + sysfs_base = pci_mdev_get_sysfs_path(); + + pdev = malloc(sizeof(*pdev)); + if (pdev == NULL) + return -1; + + memset(pdev, 0, sizeof(*pdev)); + + dev = &pdev->device; + dev->device.bus = &rte_pci_bus.bus; + rte_uuid_unparse(addr, name, sizeof(name)); + + /* parse driver */ + snprintf(filename, sizeof(filename), "%s/driver", dirname); + ret = pci_get_kernel_driver_by_path(filename, driver, sizeof(driver)); + if (ret < 0) { + RTE_LOG(DEBUG, EAL, "%s: failed to get kernel driver\n", name); + goto err; + } + + if (ret != 0 || strcmp(driver, "vfio_mdev") != 0) { + RTE_LOG(DEBUG, EAL, "%s: unsupported mdev driver\n", name); + goto err; + } + + dev->kdrv = RTE_KDRV_VFIO; + + dev->is_mdev = 1; + rte_uuid_copy(dev->uuid, addr); + + snprintf(filename, sizeof(filename), "%s/%s", sysfs_base, name); + + /* Get the path of the parent device. */ + if (realpath(filename, path) == NULL) { + RTE_LOG(DEBUG, EAL, "%s: failed to get parent device\n", name); + goto err; + } + + ptr = strrchr(path, '/'); + if (ptr == NULL) { + RTE_LOG(DEBUG, EAL, "%s: failed to parse parent device\n", + name); + goto err; + } + *ptr = '\0'; + + /* get numa node, default to 0 if not present */ + snprintf(filename, sizeof(filename), "%s/numa_node", path); + + if (access(filename, F_OK) != -1) { + if (eal_parse_sysfs_value(filename, &tmp) == 0) + dev->device.numa_node = tmp; + else + dev->device.numa_node = -1; + } else { + dev->device.numa_node = 0; + } + + pci_name_set(dev); + + if (rte_vfio_setup_device(sysfs_base, name, &vfio_dev_fd, + &device_info) != 0) { + RTE_LOG(DEBUG, EAL, "%s: failed to setup device\n", name); + goto err; + } + + need_release = true; + + if (pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info) != 0) { + RTE_LOG(DEBUG, EAL, "%s: failed to get regions\n", name); + goto err; + } + + if (pci_vfio_get_pci_id(dev, vfio_dev_fd, &dev->id) != 0) { + RTE_LOG(DEBUG, EAL, "%s: failed to access the device\n", name); + goto err; + } + + /* device is valid, add to the list (sorted) */ + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); + } else { + struct rte_pci_device *dev2; + int ret; + + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { + /* + * Insert mediated PCI devices after all physical + * PCI devices. + */ + if (!dev2->is_mdev) + continue; + ret = rte_uuid_compare(dev->uuid, dev2->uuid); + if (ret > 0) + continue; + if (ret < 0) + rte_pci_insert_device(dev2, dev); + else /* already registered */ + free(pdev); + return 0; + } + + rte_pci_add_device(dev); + } + + return 0; + +err: + if (need_release) + rte_vfio_release_device(sysfs_base, name, vfio_dev_fd); + free(pdev); + return 1; +} + +int +pci_scan_mdev(void) +{ + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + rte_uuid_t addr; + + dir = opendir(pci_mdev_get_sysfs_path()); + if (dir == NULL) { + RTE_LOG(DEBUG, EAL, "%s(): opendir failed: %s\n", + __func__, strerror(errno)); + return 0; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (rte_uuid_parse(e->d_name, addr) != 0) + continue; + + snprintf(dirname, sizeof(dirname), "%s/%s", + pci_mdev_get_sysfs_path(), e->d_name); + + if (!is_pci_device(dirname)) + continue; + + if (pci_scan_one_mdev(dirname, addr) < 0) + goto error; + } + closedir(dir); + return 0; + +error: + closedir(dir); + return -1; +} + +#endif /* VFIO_PRESENT */ diff --git a/drivers/bus/pci/meson.build b/drivers/bus/pci/meson.build index a312ecc03..890b7bda0 100644 --- a/drivers/bus/pci/meson.build +++ b/drivers/bus/pci/meson.build @@ -11,7 +11,8 @@ sources = files('pci_common.c', if is_linux sources += files('linux/pci.c', 'linux/pci_uio.c', - 'linux/pci_vfio.c') + 'linux/pci_vfio.c', + 'linux/pci_vfio_mdev.c') includes += include_directories('linux') else sources += files('bsd/pci.c') diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c index 8b9deca8b..ec314cb07 100644 --- a/drivers/bus/pci/pci_common.c +++ b/drivers/bus/pci/pci_common.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "private.h" @@ -61,8 +62,10 @@ pci_name_set(struct rte_pci_device *dev) struct rte_devargs *devargs; /* Each device has its internal, canonical name set. */ - rte_pci_device_name(&dev->addr, - dev->name, sizeof(dev->name)); + if (dev->is_mdev) + rte_uuid_unparse(dev->uuid, dev->name, sizeof(dev->name)); + else + rte_pci_device_name(&dev->addr, dev->name, sizeof(dev->name)); devargs = pci_devargs_lookup(dev); dev->device.devargs = devargs; /* In blacklist mode, if the device is not blacklisted, no @@ -124,21 +127,17 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, { int ret; bool already_probed; - struct rte_pci_addr *loc; if ((dr == NULL) || (dev == NULL)) return -EINVAL; - loc = &dev->addr; - /* The device is not blacklisted; Check if driver supports it */ if (!rte_pci_match(dr, dev)) /* Match of device and driver failed */ return 1; - RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, loc->function, - dev->device.numa_node); + RTE_LOG(INFO, EAL, "PCI device %s on NUMA socket %i\n", + dev->name, dev->device.numa_node); /* no initialization when blacklisted, return without error */ if (dev->device.devargs != NULL && @@ -208,7 +207,6 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, static int rte_pci_detach_dev(struct rte_pci_device *dev) { - struct rte_pci_addr *loc; struct rte_pci_driver *dr; int ret = 0; @@ -216,11 +214,9 @@ rte_pci_detach_dev(struct rte_pci_device *dev) return -EINVAL; dr = dev->driver; - loc = &dev->addr; - RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, - loc->function, dev->device.numa_node); + RTE_LOG(DEBUG, EAL, "PCI device %s on NUMA socket %i\n", + dev->name, dev->device.numa_node); RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, dev->id.device_id, dr->driver.name); @@ -297,10 +293,9 @@ rte_pci_probe(void) ret = pci_probe_all_drivers(dev); if (ret < 0) { if (ret != -EEXIST) { - RTE_LOG(ERR, EAL, "Requested device " - PCI_PRI_FMT " cannot be used\n", - dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); + RTE_LOG(ERR, EAL, + "Requested device %s cannot be used\n", + dev->name); rte_errno = errno; failed++; } diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h index 86527b421..47e669e9c 100644 --- a/drivers/bus/pci/rte_bus_pci.h +++ b/drivers/bus/pci/rte_bus_pci.h @@ -51,13 +51,26 @@ TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); struct rte_devargs; +/* + * NOTE: we can't include rte_uuid.h directly due to the conflicts + * introduced by stdbool.h + */ +typedef unsigned char rte_uuid_t[16]; + +/* It's RTE_UUID_STRLEN, which is bigger than PCI_PRI_STR_SIZE. */ +#define RTE_PCI_NAME_LEN (36 + 1) + /** * A structure describing a PCI device. */ struct rte_pci_device { TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ struct rte_device device; /**< Inherit core device */ - struct rte_pci_addr addr; /**< PCI location. */ + union { + struct rte_pci_addr addr; /**< PCI location. */ + rte_uuid_t uuid; /**< Mdev location. */ + }; + uint8_t is_mdev; /**< True for mediated PCI device */ struct rte_pci_id id; /**< PCI ID. */ struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */ @@ -65,7 +78,7 @@ struct rte_pci_device { struct rte_pci_driver *driver; /**< PCI driver used in probing */ uint16_t max_vfs; /**< sriov enable if not zero */ enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ - char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */ + char name[RTE_PCI_NAME_LEN]; /**< PCI/Mdev location (ASCII) */ struct rte_intr_handle vfio_req_intr_handle; /**< Handler of VFIO request interrupt */ }; diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 44bad45d3..942148180 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -1068,6 +1068,15 @@ rte_eal_init(int argc, char **argv) return -1; } +#ifdef VFIO_PRESENT + if (rte_eal_vfio_setup() < 0) { + rte_eal_init_alert("Cannot init VFIO"); + rte_errno = EAGAIN; + rte_atomic32_clear(&run_once); + return -1; + } +#endif + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices"); rte_errno = ENODEV; @@ -1151,14 +1160,6 @@ rte_eal_init(int argc, char **argv) return -1; } -#ifdef VFIO_PRESENT - if (rte_eal_vfio_setup() < 0) { - rte_eal_init_alert("Cannot init VFIO"); - rte_errno = EAGAIN; - rte_atomic32_clear(&run_once); - return -1; - } -#endif /* in secondary processes, memory init may allocate additional fbarrays * not present in primary processes, so to avoid any potential issues, * initialize memzones first.