Message ID | 1415700565-19157-1-git-send-email-anatoly.burakov@intel.com (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
On Tue, Nov 11, 2014 at 10:09:25AM +0000, Anatoly Burakov wrote: > Multi-process DPDK application must mmap hugepages and PCI resources > into the same virtual address space. By default the virtual addresses > are chosen by the primary process automatically when calling the mmap. > But sometimes the chosen virtual addresses aren't usable in secondary > process - for example, secondary process is linked with more libraries > than primary process, and the library occupies the same address space > that the primary process has requested for PCI mappings. > > This patch makes EAL try and map PCI BARs right after the hugepages > (instead of location chosen by mmap) in virtual memory, so that PCI BARs > have less chance of ending up in random places in virtual memory. > > Signed-off-by: Liang Xu <liang.xu@cinfotech.cn> > Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> Acked-by: Bruce Richardson <bruce.richardson@intel.com> > --- > lib/librte_eal/linuxapp/eal/eal_pci.c | 30 ++++++++++++++++------ > lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 13 ++++++++-- > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 19 +++++++++++--- > lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 6 +++++ > 4 files changed, 55 insertions(+), 13 deletions(-) > > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c > index 5fe3961..79fbbb8 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c > @@ -97,6 +97,25 @@ error: > return -1; > } > > +void * > +pci_find_max_end_va(void) > +{ > + const struct rte_memseg *seg = rte_eal_get_physmem_layout(); > + const struct rte_memseg *last = seg; > + unsigned i = 0; > + > + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { > + if (seg->addr == NULL) > + break; > + > + if (seg->addr > last->addr) > + last = seg; > + > + } > + return RTE_PTR_ADD(last->addr, last->len); > +} > + > + > /* map a particular resource from a file */ > void * > pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) > @@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) > /* Map the PCI memory resource of device */ > mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, > MAP_SHARED, fd, offset); > - if (mapaddr == MAP_FAILED || > - (requested_addr != NULL && mapaddr != requested_addr)) { > + if (mapaddr == MAP_FAILED) { > RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", > __func__, fd, requested_addr, > (unsigned long)size, (unsigned long)offset, > strerror(errno), mapaddr); > - goto fail; > + } else { > + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); > } > > - RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); > - > return mapaddr; > - > -fail: > - return NULL; > } > > /* parse the "resource" sysfs file */ > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > index 7e62266..e53f06b 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > @@ -35,6 +35,7 @@ > #include <fcntl.h> > #include <dirent.h> > #include <sys/stat.h> > +#include <sys/mman.h> > > #include <rte_log.h> > #include <rte_pci.h> > @@ -48,6 +49,8 @@ > > static int pci_parse_sysfs_value(const char *filename, uint64_t *val); > > +void *pci_map_addr = NULL; > + > > #define OFF_MAX ((uint64_t)(off_t)-1) > static int > @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev) > if (maps[j].addr != NULL) > fail = 1; > else { > - mapaddr = pci_map_resource(NULL, fd, (off_t)offset, > + /* try mapping somewhere close to the end of hugepages */ > + if (pci_map_addr == NULL) > + pci_map_addr = pci_find_max_end_va(); > + > + mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset, > (size_t)maps[j].size); > - if (mapaddr == NULL) > + if (mapaddr == MAP_FAILED) > fail = 1; > + > + pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) maps[j].size); > } > > if (fail) { > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > index c776ddc..c1246e8 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > @@ -37,6 +37,7 @@ > #include <sys/eventfd.h> > #include <sys/socket.h> > #include <sys/ioctl.h> > +#include <sys/mman.h> > > #include <rte_log.h> > #include <rte_pci.h> > @@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev) > if (i == msix_bar) > continue; > > - bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, > - reg.size); > + if (internal_config.process_type == RTE_PROC_PRIMARY) { > + /* try mapping somewhere close to the end of hugepages */ > + if (pci_map_addr == NULL) > + pci_map_addr = pci_find_max_end_va(); > + > + bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset, > + reg.size); > + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); > + } else { > + bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, > + reg.size); > + } > > - if (bar_addr == NULL) { > + if (bar_addr == MAP_FAILED || > + (internal_config.process_type == RTE_PROC_SECONDARY && > + bar_addr != maps[i].addr)) { > RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, > strerror(errno)); > close(vfio_dev_fd); > diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > index d758bee..1070eb8 100644 > --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h > @@ -59,6 +59,12 @@ struct mapped_pci_resource { > TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); > extern struct mapped_pci_res_list *pci_res_list; > > +/* > + * Helper function to map PCI resources right after hugepages in virtual memory > + */ > +extern void *pci_map_addr; > +void *pci_find_max_end_va(void); > + > void *pci_map_resource(void *requested_addr, int fd, off_t offset, > size_t size); > > -- > 1.8.1.4 >
> > Multi-process DPDK application must mmap hugepages and PCI resources > > into the same virtual address space. By default the virtual addresses > > are chosen by the primary process automatically when calling the mmap. > > But sometimes the chosen virtual addresses aren't usable in secondary > > process - for example, secondary process is linked with more libraries > > than primary process, and the library occupies the same address space > > that the primary process has requested for PCI mappings. > > > > This patch makes EAL try and map PCI BARs right after the hugepages > > (instead of location chosen by mmap) in virtual memory, so that PCI BARs > > have less chance of ending up in random places in virtual memory. > > > > Signed-off-by: Liang Xu <liang.xu@cinfotech.cn> > > Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> > > Acked-by: Bruce Richardson <bruce.richardson@intel.com> Applied Thanks
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 5fe3961..79fbbb8 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -97,6 +97,25 @@ error: return -1; } +void * +pci_find_max_end_va(void) +{ + const struct rte_memseg *seg = rte_eal_get_physmem_layout(); + const struct rte_memseg *last = seg; + unsigned i = 0; + + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { + if (seg->addr == NULL) + break; + + if (seg->addr > last->addr) + last = seg; + + } + return RTE_PTR_ADD(last->addr, last->len); +} + + /* map a particular resource from a file */ void * pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) @@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) /* Map the PCI memory resource of device */ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); - if (mapaddr == MAP_FAILED || - (requested_addr != NULL && mapaddr != requested_addr)) { + if (mapaddr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", __func__, fd, requested_addr, (unsigned long)size, (unsigned long)offset, strerror(errno), mapaddr); - goto fail; + } else { + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); } - RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); - return mapaddr; - -fail: - return NULL; } /* parse the "resource" sysfs file */ diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 7e62266..e53f06b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -35,6 +35,7 @@ #include <fcntl.h> #include <dirent.h> #include <sys/stat.h> +#include <sys/mman.h> #include <rte_log.h> #include <rte_pci.h> @@ -48,6 +49,8 @@ static int pci_parse_sysfs_value(const char *filename, uint64_t *val); +void *pci_map_addr = NULL; + #define OFF_MAX ((uint64_t)(off_t)-1) static int @@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev) if (maps[j].addr != NULL) fail = 1; else { - mapaddr = pci_map_resource(NULL, fd, (off_t)offset, + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset, (size_t)maps[j].size); - if (mapaddr == NULL) + if (mapaddr == MAP_FAILED) fail = 1; + + pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) maps[j].size); } if (fail) { diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index c776ddc..c1246e8 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -37,6 +37,7 @@ #include <sys/eventfd.h> #include <sys/socket.h> #include <sys/ioctl.h> +#include <sys/mman.h> #include <rte_log.h> #include <rte_pci.h> @@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev) if (i == msix_bar) continue; - bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, - reg.size); + if (internal_config.process_type == RTE_PROC_PRIMARY) { + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset, + reg.size); + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); + } else { + bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, + reg.size); + } - if (bar_addr == NULL) { + if (bar_addr == MAP_FAILED || + (internal_config.process_type == RTE_PROC_SECONDARY && + bar_addr != maps[i].addr)) { RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, strerror(errno)); close(vfio_dev_fd); diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h index d758bee..1070eb8 100644 --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h @@ -59,6 +59,12 @@ struct mapped_pci_resource { TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); extern struct mapped_pci_res_list *pci_res_list; +/* + * Helper function to map PCI resources right after hugepages in virtual memory + */ +extern void *pci_map_addr; +void *pci_find_max_end_va(void); + void *pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size);