[dpdk-dev,RFC,3/3] bus/pci: use the new device memory API for BAR mapping

Message ID ca9f07f8d5adcb6cf1a410360e9adc8cf68cae1b.1527764061.git.anatoly.burakov@intel.com (mailing list archive)
State Rejected, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Burakov, Anatoly May 31, 2018, 10:57 a.m. UTC
  Adjust PCI infrastructure to reserve device memory through the
new device memory API. Any hotplug event will reserve memory, any
hot-unplug event will release memory back to the system.

This allows for more reliable PCI mappings in secondary processes,
and will be crucial to support multiprocess hotplug.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/bus/pci/linux/pci_init.h |  1 -
 drivers/bus/pci/linux/pci_uio.c  | 11 +----------
 drivers/bus/pci/linux/pci_vfio.c | 27 ++++++++++++---------------
 lib/librte_pci/Makefile          |  1 +
 lib/librte_pci/rte_pci.c         | 20 +++++++++++++++++++-
 5 files changed, 33 insertions(+), 27 deletions(-)
  

Patch

diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h
index c2e603a37..bc9279c66 100644
--- a/drivers/bus/pci/linux/pci_init.h
+++ b/drivers/bus/pci/linux/pci_init.h
@@ -14,7 +14,6 @@ 
 /*
  * Helper function to map PCI resources right after hugepages in virtual memory
  */
-extern void *pci_map_addr;
 void *pci_find_max_end_va(void);
 
 /* parse one line of the "resource" sysfs file (note that the 'line'
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index d423e4bb0..dbf108b6f 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -26,8 +26,6 @@ 
 #include "eal_filesystem.h"
 #include "pci_init.h"
 
-void *pci_map_addr = NULL;
-
 #define OFF_MAX              ((uint64_t)(off_t)-1)
 
 int
@@ -316,19 +314,12 @@  pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
 		goto error;
 	}
 
-	/* try mapping somewhere close to the end of hugepages */
-	if (pci_map_addr == NULL)
-		pci_map_addr = pci_find_max_end_va();
-
-	mapaddr = pci_map_resource(pci_map_addr, fd, 0,
+	mapaddr = pci_map_resource(NULL, fd, 0,
 			(size_t)dev->mem_resource[res_idx].len, 0);
 	close(fd);
 	if (mapaddr == MAP_FAILED)
 		goto error;
 
-	pci_map_addr = RTE_PTR_ADD(mapaddr,
-			(size_t)dev->mem_resource[res_idx].len);
-
 	maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
 	maps[map_idx].size = dev->mem_resource[res_idx].len;
 	maps[map_idx].addr = mapaddr;
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index aeeaa9ed8..f390ea37a 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -324,7 +324,7 @@  pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
 
 static int
 pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
-		int bar_index, int additional_flags)
+		int bar_index)
 {
 	struct memreg {
 		unsigned long offset, size;
@@ -371,9 +371,14 @@  pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
 		memreg[0].size = bar->size;
 	}
 
-	/* reserve the address using an inaccessible mapping */
-	bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE |
-			MAP_ANONYMOUS | additional_flags, -1, 0);
+	if (bar->addr == NULL) {
+		bar_addr = rte_mem_dev_memory_alloc(bar->size, 0);
+		if (bar_addr == NULL) {
+			RTE_LOG(ERR, EAL, "%s(): cannot reserve space for device\n",
+				__func__);
+			return -1;
+		}
+	}
 	if (bar_addr != MAP_FAILED) {
 		void *map_addr = NULL;
 		if (memreg[0].size) {
@@ -469,7 +474,6 @@  pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 
 	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
 		struct vfio_region_info reg = { .argsz = sizeof(reg) };
-		void *bar_addr;
 
 		reg.index = i;
 
@@ -494,19 +498,12 @@  pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 		if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
 			continue;
 
-		/* try mapping somewhere close to the end of hugepages */
-		if (pci_map_addr == NULL)
-			pci_map_addr = pci_find_max_end_va();
-
-		bar_addr = pci_map_addr;
-		pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
-
-		maps[i].addr = bar_addr;
+		maps[i].addr = NULL;
 		maps[i].offset = reg.offset;
 		maps[i].size = reg.size;
 		maps[i].path = NULL; /* vfio doesn't have per-resource paths */
 
-		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
+		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i);
 		if (ret < 0) {
 			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
 					pci_addr, i, strerror(errno));
@@ -574,7 +571,7 @@  pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 	maps = vfio_res->maps;
 
 	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
-		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED);
+		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i);
 		if (ret < 0) {
 			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
 					pci_addr, i, strerror(errno));
diff --git a/lib/librte_pci/Makefile b/lib/librte_pci/Makefile
index 94a632670..f996fe33c 100644
--- a/lib/librte_pci/Makefile
+++ b/lib/librte_pci/Makefile
@@ -8,6 +8,7 @@  LIB = librte_pci.a
 
 CFLAGS := -I$(SRCDIR) $(CFLAGS)
 CFLAGS += $(WERROR_FLAGS) -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal
 
 EXPORT_MAP := rte_pci_version.map
diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c
index 530738dbd..c425a624e 100644
--- a/lib/librte_pci/rte_pci.c
+++ b/lib/librte_pci/rte_pci.c
@@ -151,6 +151,16 @@  pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
 {
 	void *mapaddr;
 
+	if (requested_addr == NULL) {
+		requested_addr = rte_mem_dev_memory_alloc(size, 0);
+		if (requested_addr == NULL) {
+			RTE_LOG(ERR, EAL, "%s(): cannot reserve space for device\n",
+				__func__);
+			return MAP_FAILED;
+		}
+	}
+	additional_flags |= MAP_FIXED;
+
 	/* Map the PCI memory resource of device */
 	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
 			MAP_SHARED | additional_flags, fd, offset);
@@ -170,15 +180,23 @@  pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
 void
 pci_unmap_resource(void *requested_addr, size_t size)
 {
+	void *mapped;
 	if (requested_addr == NULL)
 		return;
 
+	mapped = mmap(requested_addr, size, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
 	/* Unmap the PCI memory resource of device */
-	if (munmap(requested_addr, size)) {
+	if (mapped == MAP_FAILED) {
 		RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, %#zx): %s\n",
 			__func__, requested_addr, size,
 			strerror(errno));
 	} else
 		RTE_LOG(DEBUG, EAL, "  PCI memory unmapped at %p\n",
 				requested_addr);
+	if (rte_mem_dev_memory_free(requested_addr, size))
+		RTE_LOG(ERR, EAL, "%s(): cannot mark %p-%p as free\n",
+			__func__, requested_addr,
+			RTE_PTR_ADD(requested_addr, size));
 }