From patchwork Tue Jan 31 17:44:53 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Walker, Benjamin" X-Patchwork-Id: 20116 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 9DCF21023; Tue, 31 Jan 2017 18:45:01 +0100 (CET) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id 984F9B6D for ; Tue, 31 Jan 2017 18:44:59 +0100 (CET) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga103.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 31 Jan 2017 09:44:55 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.33,315,1477983600"; d="scan'208";a="815497678" Received: from bwalker-desk.ch.intel.com ([143.182.137.72]) by FMSMGA003.fm.intel.com with ESMTP; 31 Jan 2017 09:44:55 -0800 From: Ben Walker To: dev@dpdk.org Cc: Ben Walker Date: Tue, 31 Jan 2017 10:44:53 -0700 Message-Id: <20170131174453.15796-1-benjamin.walker@intel.com> X-Mailer: git-send-email 2.9.3 In-Reply-To: <20170131174054.15324-1-benjamin.walker@intel.com> References: <20170131174054.15324-1-benjamin.walker@intel.com> Subject: [dpdk-dev] [PATCH v4] eal: Support running as unprivileged user X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" For Linux kernel 4.0 and newer, the ability to obtain physical page frame numbers for unprivileged users from /proc/self/pagemap was removed. Instead, when an IOMMU is present, simply choose our own DMA addresses instead. Signed-off-by: Ben Walker Acked-by: Sergio Gonzalez Monroy --- lib/librte_eal/common/eal_private.h | 12 +++++ lib/librte_eal/linuxapp/eal/eal_memory.c | 75 +++++++++++++++++++++++--------- lib/librte_eal/linuxapp/eal/eal_pci.c | 6 ++- 3 files changed, 71 insertions(+), 22 deletions(-) diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 9e7d8f6..8b2d323 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -34,6 +34,7 @@ #ifndef _EAL_PRIVATE_H_ #define _EAL_PRIVATE_H_ +#include #include #include @@ -301,4 +302,15 @@ int rte_eal_hugepage_init(void); */ int rte_eal_hugepage_attach(void); +/** + * Returns true if the system is able to obtain + * physical addresses. Return false if using DMA + * addresses through an IOMMU. + * + * Drivers based on uio will not load unless physical + * addresses are obtainable. It is only possible to get + * physical addresses when running as a privileged user. + */ +bool rte_eal_using_phys_addrs(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index a956bb2..657c6f4 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -64,6 +64,7 @@ #define _FILE_OFFSET_BITS 64 #include #include +#include #include #include #include @@ -122,26 +123,24 @@ int rte_xen_dom0_supported(void) static uint64_t baseaddr_offset; -static unsigned proc_pagemap_readable; +static bool phys_addrs_available = true; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" static void -test_proc_pagemap_readable(void) +test_phys_addrs_available(void) { - int fd = open("/proc/self/pagemap", O_RDONLY); + uint64_t tmp; + phys_addr_t physaddr; - if (fd < 0) { + physaddr = rte_mem_virt2phy(&tmp); + if (physaddr == RTE_BAD_PHYS_ADDR) { RTE_LOG(ERR, EAL, - "Cannot open /proc/self/pagemap: %s. " - "virt2phys address translation will not work\n", + "Cannot obtain physical addresses: %s. " + "Only vfio will function.\n", strerror(errno)); - return; + phys_addrs_available = false; } - - /* Is readable */ - close(fd); - proc_pagemap_readable = 1; } /* Lock page in physical memory and prevent from swapping. */ @@ -190,7 +189,7 @@ rte_mem_virt2phy(const void *virtaddr) } /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ - if (!proc_pagemap_readable) + if (!phys_addrs_available) return RTE_BAD_PHYS_ADDR; /* standard page size */ @@ -229,6 +228,9 @@ rte_mem_virt2phy(const void *virtaddr) * the pfn (page frame number) are bits 0-54 (see * pagemap.txt in linux Documentation) */ + if ((page & 0x7fffffffffffffULL) == 0) + return RTE_BAD_PHYS_ADDR; + physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); @@ -242,7 +244,7 @@ rte_mem_virt2phy(const void *virtaddr) static int find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { - unsigned i; + unsigned int i; phys_addr_t addr; for (i = 0; i < hpi->num_pages[0]; i++) { @@ -255,6 +257,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) } /* + * For each hugepage in hugepg_tbl, fill the physaddr value sequentially. + */ +static int +set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned int i; + static phys_addr_t addr; + + for (i = 0; i < hpi->num_pages[0]; i++) { + hugepg_tbl[i].physaddr = addr; + addr += hugepg_tbl[i].size; + } + return 0; +} + +/* * Check whether address-space layout randomization is enabled in * the kernel. This is important for multi-process as it can prevent * two processes mapping data to the same virtual address @@ -951,7 +969,7 @@ rte_eal_hugepage_init(void) int nr_hugefiles, nr_hugepages = 0; void *addr; - test_proc_pagemap_readable(); + test_phys_addrs_available(); memset(used_hp, 0, sizeof(used_hp)); @@ -1043,11 +1061,22 @@ rte_eal_hugepage_init(void) continue; } - /* find physical addresses and sockets for each hugepage */ - if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; + if (phys_addrs_available) { + /* find physical addresses for each hugepage */ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { + RTE_LOG(DEBUG, EAL, "Failed to find phys addr " + "for %u MB pages\n", + (unsigned int)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + } else { + /* set physical addresses for each hugepage */ + if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { + RTE_LOG(DEBUG, EAL, "Failed to set phys addr " + "for %u MB pages\n", + (unsigned int)(hpi->hugepage_sz / 0x100000)); + goto fail; + } } if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){ @@ -1289,7 +1318,7 @@ rte_eal_hugepage_attach(void) "into secondary processes\n"); } - test_proc_pagemap_readable(); + test_phys_addrs_available(); if (internal_config.xen_dom0_support) { #ifdef RTE_LIBRTE_XEN_DOM0 @@ -1426,3 +1455,9 @@ rte_eal_hugepage_attach(void) close(fd_hugepage); return -1; } + +bool +rte_eal_using_phys_addrs(void) +{ + return phys_addrs_available; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index e2fc219..61d55b9 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -99,8 +99,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) break; case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); + if (rte_eal_using_phys_addrs()) { + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + } break; default: RTE_LOG(DEBUG, EAL,