From patchwork Wed Jul 3 05:45:08 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 55927 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 06FEB4D27; Wed, 3 Jul 2019 07:47:38 +0200 (CEST) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by dpdk.org (Postfix) with ESMTP id 1F03F1B955 for ; Wed, 3 Jul 2019 07:47:35 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga107.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 02 Jul 2019 22:47:35 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.63,446,1557212400"; d="scan'208";a="165877114" Received: from npg-dpdk-virtio-tbie-2.sh.intel.com ([10.67.104.151]) by fmsmga007.fm.intel.com with ESMTP; 02 Jul 2019 22:47:34 -0700 From: Tiwei Bie To: dev@dpdk.org Cc: anatoly.burakov@intel.com Date: Wed, 3 Jul 2019 13:45:08 +0800 Message-Id: <20190703054508.22824-1-tiwei.bie@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [dpdk-dev] [RFC PATCH] bus/pci: avoid depending on private value in kernel source X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The value 40 used in VFIO_GET_REGION_ADDR() is a private value (VFIO_PCI_OFFSET_SHIFT) defined in Linux kernel source [1]. It is not part of VFIO API, and we should not depend on it. [1] https://github.com/torvalds/linux/blob/6fbc7275c7a9/drivers/vfio/pci/vfio_pci_private.h#L19 Signed-off-by: Tiwei Bie --- drivers/bus/pci/linux/pci.c | 18 +++- drivers/bus/pci/linux/pci_init.h | 6 +- drivers/bus/pci/linux/pci_vfio.c | 157 ++++++++++++++++++++++++------- drivers/bus/pci/rte_bus_pci.h | 12 +++ lib/librte_eal/linux/eal/eal.c | 17 ++-- 5 files changed, 163 insertions(+), 47 deletions(-) diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index b931cf9d1..13fbdb018 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -337,6 +337,20 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) } else dev->kdrv = RTE_KDRV_NONE; + switch (dev->kdrv) { + case RTE_KDRV_VFIO: +#ifdef VFIO_PRESENT + ret = pci_vfio_fill_regions(dev); + if (ret != 0) { + free(dev); + return 1; + } +#endif + break; + default: + break; + } + /* device is valid, add in list (sorted) */ if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { rte_pci_add_device(dev); @@ -721,7 +735,7 @@ int rte_pci_read_config(const struct rte_pci_device *device, return pci_uio_read_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: - return pci_vfio_read_config(intr_handle, buf, len, offset); + return pci_vfio_read_config(device, buf, len, offset); #endif default: rte_pci_device_name(&device->addr, devname, @@ -745,7 +759,7 @@ int rte_pci_write_config(const struct rte_pci_device *device, return pci_uio_write_config(intr_handle, buf, len, offset); #ifdef VFIO_PRESENT case RTE_KDRV_VFIO: - return pci_vfio_write_config(intr_handle, buf, len, offset); + return pci_vfio_write_config(device, buf, len, offset); #endif default: rte_pci_device_name(&device->addr, devname, diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h index c2e603a37..a8399f39b 100644 --- a/drivers/bus/pci/linux/pci_init.h +++ b/drivers/bus/pci/linux/pci_init.h @@ -64,9 +64,9 @@ int pci_uio_ioport_unmap(struct rte_pci_ioport *p); #endif /* access config space */ -int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, +int pci_vfio_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offs); -int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, +int pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs); int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, @@ -83,6 +83,8 @@ int pci_vfio_unmap_resource(struct rte_pci_device *dev); int pci_vfio_is_enabled(void); +int pci_vfio_fill_regions(struct rte_pci_device *dev); + #endif #endif /* EAL_PCI_INIT_H_ */ diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index ebf6ccd3c..06db8679c 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -48,35 +48,111 @@ static struct rte_tailq_elem rte_vfio_tailq = { }; EAL_REGISTER_TAILQ(rte_vfio_tailq) +static int pci_vfio_get_region_info(int vfio_dev_fd, + struct vfio_region_info **info, int region); + +int pci_vfio_fill_regions(struct rte_pci_device *dev) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + struct vfio_region_info *reg = NULL; + int vfio_dev_fd, nb_maps; + int i, ret; + + ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), dev->name, + &vfio_dev_fd, &device_info); + if (ret != 0) + return -1; + + nb_maps = RTE_MIN((int)device_info.num_regions, + VFIO_PCI_CONFIG_REGION_INDEX + 1); + + for (i = 0; i < nb_maps; i++) { + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); + if (ret < 0) { + RTE_LOG(DEBUG, EAL, "%s cannot get device region info error %i (%s)\n", + dev->name, errno, strerror(errno)); + goto out; + } + + dev->region[i].size = reg->size; + dev->region[i].offset = reg->offset; + + free(reg); + } + +out: + rte_vfio_release_device(rte_pci_get_sysfs_path(), dev->name, + vfio_dev_fd); + return ret; +} + +static int +pci_vfio_get_region(const struct rte_pci_device *dev, int index, + uint64_t *size, uint64_t *offset) +{ + if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS) + return -1; + + if (dev->region[index].size == 0 && dev->region[index].offset == 0) + return -1; + + *size = dev->region[index].size; + *offset = dev->region[index].offset; + + return 0; +} + int -pci_vfio_read_config(const struct rte_intr_handle *intr_handle, +pci_vfio_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offs) { - return pread64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); + uint64_t size, offset; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pread64(dev->intr_handle.vfio_dev_fd, buf, len, offset + offs); } int -pci_vfio_write_config(const struct rte_intr_handle *intr_handle, +pci_vfio_write_config(const struct rte_pci_device *dev, const void *buf, size_t len, off_t offs) { - return pwrite64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); + uint64_t size, offset; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) + return -1; + + if ((uint64_t)len + offs > size) + return -1; + + return pwrite64(dev->intr_handle.vfio_dev_fd, buf, len, offset + offs); } /* get PCI BAR number where MSI-X interrupts are */ static int -pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) +pci_vfio_get_msix_bar(const struct rte_pci_device *dev, int fd, + struct pci_msix_table *msix_table) { int ret; uint32_t reg; uint16_t flags; uint8_t cap_id, cap_offset; + uint64_t size, offset; + + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } /* read PCI capability pointer from config space */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_CAPABILITY_LIST); + ret = pread64(fd, ®, sizeof(reg), offset + PCI_CAPABILITY_LIST); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " "config space!\n"); @@ -89,9 +165,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) while (cap_offset) { /* read PCI capability ID */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); + ret = pread64(fd, ®, sizeof(reg), offset + cap_offset); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " "config space!\n"); @@ -104,8 +178,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) /* if we haven't reached MSI-X, check next capability */ if (cap_id != PCI_CAP_ID_MSIX) { ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); + offset + cap_offset); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " "config space!\n"); @@ -121,8 +194,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) else { /* table offset resides in the next 4 bytes */ ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 4); + offset + cap_offset + 4); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " "space!\n"); @@ -130,8 +202,7 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) } ret = pread64(fd, &flags, sizeof(flags), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 2); + offset + cap_offset + 2); if (ret != sizeof(flags)) { RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " "space!\n"); @@ -151,14 +222,19 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) /* set PCI bus mastering */ static int -pci_vfio_set_bus_master(int dev_fd, bool op) +pci_vfio_set_bus_master(const struct rte_pci_device *dev, int dev_fd, bool op) { + uint64_t size, offset; uint16_t reg; int ret; - ret = pread64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + + ret = pread64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); return -1; @@ -170,10 +246,7 @@ pci_vfio_set_bus_master(int dev_fd, bool op) else reg &= ~(PCI_COMMAND_MASTER); - ret = pwrite64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - + ret = pwrite64(dev_fd, ®, sizeof(reg), offset + PCI_COMMAND); if (ret != sizeof(reg)) { RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); return -1; @@ -402,14 +475,21 @@ pci_vfio_disable_notifier(struct rte_pci_device *dev) #endif static int -pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) +pci_vfio_is_ioport_bar(const struct rte_pci_device *dev, + int vfio_dev_fd, int bar_index) { + uint64_t size, offset; uint32_t ioport_bar; int ret; + if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, + &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of CONFIG region.\n"); + return -1; + } + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) - + PCI_BASE_ADDRESS_0 + bar_index*4); + offset + PCI_BASE_ADDRESS_0 + bar_index*4); if (ret != sizeof(ioport_bar)) { RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n", PCI_BASE_ADDRESS_0 + bar_index*4); @@ -428,7 +508,7 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) } /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { + if (pci_vfio_set_bus_master(dev, vfio_dev_fd, true)) { RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); return -1; } @@ -691,7 +771,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) /* get MSI-X BAR, if any (we have to know where it is because we can't * easily mmap it when using VFIO) */ - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table); + ret = pci_vfio_get_msix_bar(dev, vfio_dev_fd, &vfio_res->msix_table); if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); @@ -724,7 +804,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) } /* chk for io port region */ - ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); + ret = pci_vfio_is_ioport_bar(dev, vfio_dev_fd, i); if (ret < 0) { free(reg); goto err_vfio_res; @@ -935,7 +1015,7 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) return -1; } - if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { + if (pci_vfio_set_bus_master(dev, dev->intr_handle.vfio_dev_fd, false)) { RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", pci_addr); return -1; @@ -1013,14 +1093,21 @@ int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) { + uint64_t size, offset; + if (bar < VFIO_PCI_BAR0_REGION_INDEX || bar > VFIO_PCI_BAR5_REGION_INDEX) { RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); return -1; } + if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) { + RTE_LOG(ERR, EAL, "Cannot get offset of region %d.\n", bar); + return -1; + } + p->dev = dev; - p->base = VFIO_GET_REGION_ADDR(bar); + p->base = offset; return 0; } diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h index 06e004cd3..1d13bed75 100644 --- a/drivers/bus/pci/rte_bus_pci.h +++ b/drivers/bus/pci/rte_bus_pci.h @@ -51,6 +51,16 @@ TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); struct rte_devargs; +#define RTE_MAX_PCI_REGIONS 9 + +/* + * Regions provided by e.g. VFIO. + */ +struct rte_pci_region { + uint64_t size; + uint64_t offset; +}; + /** * A structure describing a PCI device. */ @@ -59,6 +69,8 @@ struct rte_pci_device { struct rte_device device; /**< Inherit core device */ struct rte_pci_addr addr; /**< PCI location. */ struct rte_pci_id id; /**< PCI ID. */ + struct rte_pci_region region[RTE_MAX_PCI_REGIONS]; + /**< PCI regions. */ struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */ struct rte_intr_handle intr_handle; /**< Interrupt handle */ diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 8a0b387ce..29b1a873a 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -1068,6 +1068,15 @@ rte_eal_init(int argc, char **argv) return -1; } +#ifdef VFIO_PRESENT + if (rte_eal_vfio_setup() < 0) { + rte_eal_init_alert("Cannot init VFIO"); + rte_errno = EAGAIN; + rte_atomic32_clear(&run_once); + return -1; + } +#endif + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices"); rte_errno = ENODEV; @@ -1130,14 +1139,6 @@ rte_eal_init(int argc, char **argv) return -1; } -#ifdef VFIO_PRESENT - if (rte_eal_vfio_setup() < 0) { - rte_eal_init_alert("Cannot init VFIO"); - rte_errno = EAGAIN; - rte_atomic32_clear(&run_once); - return -1; - } -#endif /* in secondary processes, memory init may allocate additional fbarrays * not present in primary processes, so to avoid any potential issues, * initialize memzones first.