[4/6] mem: use address hint for mapping hugepages
Checks
Commit Message
Linux kernel uses a really high address as starting address for
serving mmaps calls. If there exists addressing limitations and
IOVA mode is VA, this starting address is likely too high for
those devices. However, it is possible to use a lower address in
the process virtual address space as with 64 bits there is a lot
of available space.
This patch adds an address hint as starting address for 64 bits
systems.
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
---
lib/librte_eal/linuxapp/eal/eal_memory.c | 50 ++++++++++++++++++++++++++------
1 file changed, 41 insertions(+), 9 deletions(-)
Comments
On 02-Jul-18 6:27 PM, Alejandro Lucero wrote:
> Linux kernel uses a really high address as starting address for
> serving mmaps calls. If there exists addressing limitations and
> IOVA mode is VA, this starting address is likely too high for
> those devices. However, it is possible to use a lower address in
> the process virtual address space as with 64 bits there is a lot
> of available space.
>
> This patch adds an address hint as starting address for 64 bits
> systems.
>
> Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
> ---
<snip>
> long aligned_addr;
>
> - if (internal_config.base_virtaddr != 0) {
> - addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
> - baseaddr_offset);
> - }
> - else addr = NULL;
> -
> RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
>
> fd = open("/dev/zero", O_RDONLY);
> @@ -278,7 +289,22 @@
> return NULL;
> }
> do {
> - addr = mmap(addr,
> + if (internal_config.base_virtaddr != 0) {
> + addr_hint = (void *) (uintptr_t)
> + (internal_config.base_virtaddr +
> + baseaddr_offset);
> + }
> +#ifdef RTE_ARCH_64
> + else {
> + addr_hint = (void *) (uintptr_t) (baseaddr +
> + baseaddr_offset);
> + }
> +#else
> + else {
> + addr_hint = NULL;
> + }
> +#endif
If my understanding is correct, calculations are all done on static
variables, only the result is assigned to addr_hint which is local. Can
we move this into a function and put these #ifdefs there, while keeping
this code clean?
> + addr = mmap(addr_hint,
> (*size) + hugepage_sz, PROT_READ,
> #ifdef RTE_ARCH_PPC_64
> MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
> @@ -286,8 +312,14 @@
> MAP_PRIVATE,
> #endif
> fd, 0);
> - if (addr == MAP_FAILED)
> + if (addr == MAP_FAILED) {
> + /* map failed. Let's try with less memory */
> *size -= hugepage_sz;
> + } else if (addr_hint && addr != addr_hint) {
> + /* map not using hint. Let's try with another offset */
Comment is slightly misleading - "map not using hint" implies we are
about to map something without using hint. Suggested rewording:
suggested address hint was not used, try with another offset
> + addr = MAP_FAILED;
> + baseaddr_offset += 0x100000000;
> + }
> } while (addr == MAP_FAILED && *size > 0);
>
> if (addr == MAP_FAILED) {
>
On Tue, Jul 3, 2018 at 10:17 AM, Burakov, Anatoly <anatoly.burakov@intel.com
> wrote:
> On 02-Jul-18 6:27 PM, Alejandro Lucero wrote:
>
>> Linux kernel uses a really high address as starting address for
>> serving mmaps calls. If there exists addressing limitations and
>> IOVA mode is VA, this starting address is likely too high for
>> those devices. However, it is possible to use a lower address in
>> the process virtual address space as with 64 bits there is a lot
>> of available space.
>>
>> This patch adds an address hint as starting address for 64 bits
>> systems.
>>
>> Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
>> ---
>>
>
> <snip>
>
>
> long aligned_addr;
>> - if (internal_config.base_virtaddr != 0) {
>> - addr = (void*) (uintptr_t) (internal_config.base_virtaddr
>> +
>> - baseaddr_offset);
>> - }
>> - else addr = NULL;
>> -
>> RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
>> fd = open("/dev/zero", O_RDONLY);
>> @@ -278,7 +289,22 @@
>> return NULL;
>> }
>> do {
>> - addr = mmap(addr,
>> + if (internal_config.base_virtaddr != 0) {
>> + addr_hint = (void *) (uintptr_t)
>> + (internal_config.base_virtaddr +
>> + baseaddr_offset);
>> + }
>> +#ifdef RTE_ARCH_64
>> + else {
>> + addr_hint = (void *) (uintptr_t) (baseaddr +
>> + baseaddr_offset);
>> + }
>> +#else
>> + else {
>> + addr_hint = NULL;
>> + }
>> +#endif
>>
>
> If my understanding is correct, calculations are all done on static
> variables, only the result is assigned to addr_hint which is local. Can we
> move this into a function and put these #ifdefs there, while keeping this
> code clean?
Ok.
>
>
> + addr = mmap(addr_hint,
>> (*size) + hugepage_sz, PROT_READ,
>> #ifdef RTE_ARCH_PPC_64
>> MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
>> @@ -286,8 +312,14 @@
>> MAP_PRIVATE,
>> #endif
>> fd, 0);
>> - if (addr == MAP_FAILED)
>> + if (addr == MAP_FAILED) {
>> + /* map failed. Let's try with less memory */
>> *size -= hugepage_sz;
>> + } else if (addr_hint && addr != addr_hint) {
>> + /* map not using hint. Let's try with another
>> offset */
>>
>
> Comment is slightly misleading - "map not using hint" implies we are about
> to map something without using hint. Suggested rewording:
>
> suggested address hint was not used, try with another offset
What about "hint was not used. Try with another offset" ?
By the way, I forgot to unmap the memory in this case. I will fix this in
next version.
>
>
> + addr = MAP_FAILED;
>> + baseaddr_offset += 0x100000000;
>> + }
>> } while (addr == MAP_FAILED && *size > 0);
>> if (addr == MAP_FAILED) {
>>
>>
>
> --
> Thanks,
> Anatoly
>
On 03-Jul-18 11:44 AM, Alejandro Lucero wrote:
>
> + addr = mmap(addr_hint,
> (*size) + hugepage_sz, PROT_READ,
> #ifdef RTE_ARCH_PPC_64
> MAP_PRIVATE | MAP_ANONYMOUS |
> MAP_HUGETLB,
> @@ -286,8 +312,14 @@
> MAP_PRIVATE,
> #endif
> fd, 0);
> - if (addr == MAP_FAILED)
> + if (addr == MAP_FAILED) {
> + /* map failed. Let's try with less memory */
> *size -= hugepage_sz;
> + } else if (addr_hint && addr != addr_hint) {
> + /* map not using hint. Let's try with
> another offset */
>
>
> Comment is slightly misleading - "map not using hint" implies we are
> about to map something without using hint. Suggested rewording:
>
> suggested address hint was not used, try with another offset
>
>
> What about "hint was not used. Try with another offset" ?
Works for me :)
@@ -88,6 +88,23 @@
static uint64_t baseaddr_offset;
+#ifdef RTE_ARCH_64
+/*
+ * Linux kernel uses a really high address as starting address for serving
+ * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
+ * this starting address is likely too high for those devices. However, it
+ * is possible to use a lower address in the process virtual address space
+ * as with 64 bits there is a lot of available space.
+ *
+ * Current known limitations are 39 or 40 bits. Setting the starting address
+ * at 4GB implies there are 508GB or 1020GB for mapping the available
+ * hugepages. This is likely enough for most systems, although a device with
+ * addressing limitations should call rte_dev_check_dma_mask for ensuring all
+ * memory is within supported range.
+ */
+static uint64_t baseaddr = 0x100000000;
+#endif
+
static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
@@ -260,16 +277,10 @@
static void *
get_virtual_area(size_t *size, size_t hugepage_sz)
{
- void *addr;
+ void *addr, *addr_hint;
int fd;
long aligned_addr;
- if (internal_config.base_virtaddr != 0) {
- addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
- baseaddr_offset);
- }
- else addr = NULL;
-
RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
fd = open("/dev/zero", O_RDONLY);
@@ -278,7 +289,22 @@
return NULL;
}
do {
- addr = mmap(addr,
+ if (internal_config.base_virtaddr != 0) {
+ addr_hint = (void *) (uintptr_t)
+ (internal_config.base_virtaddr +
+ baseaddr_offset);
+ }
+#ifdef RTE_ARCH_64
+ else {
+ addr_hint = (void *) (uintptr_t) (baseaddr +
+ baseaddr_offset);
+ }
+#else
+ else {
+ addr_hint = NULL;
+ }
+#endif
+ addr = mmap(addr_hint,
(*size) + hugepage_sz, PROT_READ,
#ifdef RTE_ARCH_PPC_64
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
@@ -286,8 +312,14 @@
MAP_PRIVATE,
#endif
fd, 0);
- if (addr == MAP_FAILED)
+ if (addr == MAP_FAILED) {
+ /* map failed. Let's try with less memory */
*size -= hugepage_sz;
+ } else if (addr_hint && addr != addr_hint) {
+ /* map not using hint. Let's try with another offset */
+ addr = MAP_FAILED;
+ baseaddr_offset += 0x100000000;
+ }
} while (addr == MAP_FAILED && *size > 0);
if (addr == MAP_FAILED) {