[v4,1/2] eal: make base address hint OS-specific

Message ID 09c3f9d74e1e49aa5b3608d4bf4a773d086e83ff.1564577214.git.anatoly.burakov@intel.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series [v4,1/2] eal: make base address hint OS-specific |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-Compile-Testing success Compile Testing PASS
ci/mellanox-Performance-Testing success Performance Testing PASS

Commit Message

Burakov, Anatoly July 31, 2019, 12:47 p.m. UTC
  Not all OS's follow Linux's memory layout, which may lead to
problems following the suggested common address hint absent
of a base-virtaddr flag. Make this address hint OS-specific.

Cc: stable@dpdk.org

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/librte_eal/common/eal_common_memory.c | 19 +------------------
 lib/librte_eal/common/eal_private.h       |  6 ++++++
 lib/librte_eal/freebsd/eal/eal_memory.c   | 10 ++++++++++
 lib/librte_eal/linux/eal/eal_memory.c     | 20 ++++++++++++++++++++
 4 files changed, 37 insertions(+), 18 deletions(-)
  

Comments

David Marchand Oct. 2, 2019, 11:41 a.m. UTC | #1
On Wed, Jul 31, 2019 at 2:47 PM Anatoly Burakov
<anatoly.burakov@intel.com> wrote:
>
> Not all OS's follow Linux's memory layout, which may lead to
> problems following the suggested common address hint absent
> of a base-virtaddr flag. Make this address hint OS-specific.
>
> Cc: stable@dpdk.org

Missing Fixes: ?

>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>  lib/librte_eal/common/eal_common_memory.c | 19 +------------------
>  lib/librte_eal/common/eal_private.h       |  6 ++++++
>  lib/librte_eal/freebsd/eal/eal_memory.c   | 10 ++++++++++
>  lib/librte_eal/linux/eal/eal_memory.c     | 20 ++++++++++++++++++++
>  4 files changed, 37 insertions(+), 18 deletions(-)
>
> diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
> index 19ea47570..4a9cc1f19 100644
> --- a/lib/librte_eal/common/eal_common_memory.c
> +++ b/lib/librte_eal/common/eal_common_memory.c
> @@ -40,23 +40,6 @@
>  static void *next_baseaddr;
>  static uint64_t system_page_sz;
>
> -#ifdef RTE_ARCH_64
> -/*
> - * Linux kernel uses a really high address as starting address for serving
> - * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
> - * this starting address is likely too high for those devices. However, it
> - * is possible to use a lower address in the process virtual address space
> - * as with 64 bits there is a lot of available space.
> - *
> - * Current known limitations are 39 or 40 bits. Setting the starting address
> - * at 4GB implies there are 508GB or 1020GB for mapping the available
> - * hugepages. This is likely enough for most systems, although a device with
> - * addressing limitations should call rte_mem_check_dma_mask for ensuring all
> - * memory is within supported range.
> - */
> -static uint64_t baseaddr = 0x100000000;
> -#endif
> -
>  #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
>  void *
>  eal_get_virtual_area(void *requested_addr, size_t *size,
> @@ -85,7 +68,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
>  #ifdef RTE_ARCH_64
>         if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 &&
>                         rte_eal_process_type() == RTE_PROC_PRIMARY)
> -               next_baseaddr = (void *) baseaddr;
> +               next_baseaddr = (void *) eal_get_baseaddr();
>  #endif
>         if (requested_addr == NULL && next_baseaddr != NULL) {
>                 requested_addr = next_baseaddr;
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 798ede553..31eae2278 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -381,4 +381,10 @@ rte_option_init(void);
>  void
>  rte_option_usage(void);
>
> +/**
> + * Get OS-specific EAL mapping base address.
> + */
> +uint64_t
> +eal_get_baseaddr(void);
> +
>  #endif /* _EAL_PRIVATE_H_ */
> diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c
> index 9b9a0577a..1bfdb52fb 100644
> --- a/lib/librte_eal/freebsd/eal/eal_memory.c
> +++ b/lib/librte_eal/freebsd/eal/eal_memory.c
> @@ -22,6 +22,16 @@
>
>  #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
>
> +uint64_t eal_get_baseaddr(void)
> +{
> +       /*
> +        * FreeBSD may allocate something in the space we will be mapping things
> +        * before we get a chance to do that, so use a base address that's far
> +        * away from where malloc() et al usually map things.
> +        */
> +       return 0x1000000000;
> +}
> +
>  /*
>   * Get physical address of any mapped virtual address in the current process.
>   */
> diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c
> index 1c089a1ef..8516f0d35 100644
> --- a/lib/librte_eal/linux/eal/eal_memory.c
> +++ b/lib/librte_eal/linux/eal/eal_memory.c
> @@ -70,6 +70,26 @@ static int phys_addrs_available = -1;
>
>  #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
>
> +uint64_t eal_get_baseaddr(void)
> +{
> +       /*
> +        * Linux kernel uses a really high address as starting address for
> +        * serving mmaps calls. If there exists addressing limitations and IOVA
> +        * mode is VA, this starting address is likely too high for those
> +        * devices. However, it is possible to use a lower address in the
> +        * process virtual address space as with 64 bits there is a lot of
> +        * available space.
> +        *
> +        * Current known limitations are 39 or 40 bits. Setting the starting
> +        * address at 4GB implies there are 508GB or 1020GB for mapping the
> +        * available hugepages. This is likely enough for most systems, although
> +        * a device with addressing limitations should call
> +        * rte_mem_check_dma_mask for ensuring all memory is within supported
> +        * range.
> +        */
> +       return 0x100000000;
> +}
> +
>  /*
>   * Get physical address of any mapped virtual address in the current process.
>   */
> --
> 2.17.1

What about windows port?
  
Stephen Hemminger Oct. 2, 2019, 2:42 p.m. UTC | #2
On Wed, 2 Oct 2019 13:41:30 +0200
David Marchand <david.marchand@redhat.com> wrote:

> > +uint64_t eal_get_baseaddr(void)
> > +{
> > +       /*
> > +        * Linux kernel uses a really high address as starting address for
> > +        * serving mmaps calls. If there exists addressing limitations and IOVA
> > +        * mode is VA, this starting address is likely too high for those
> > +        * devices. However, it is possible to use a lower address in the
> > +        * process virtual address space as with 64 bits there is a lot of
> > +        * available space.
> > +        *
> > +        * Current known limitations are 39 or 40 bits. Setting the starting
> > +        * address at 4GB implies there are 508GB or 1020GB for mapping the
> > +        * available hugepages. This is likely enough for most systems, although
> > +        * a device with addressing limitations should call
> > +        * rte_mem_check_dma_mask for ensuring all memory is within supported
> > +        * range.
> > +        */
> > +       return 0x100000000;

Is this going to work right on 32  bit builds where sizeof(uint) == 4
then constants default to 32. Does it need ul or ull suffix (or a cast)?
  
Burakov, Anatoly Oct. 24, 2019, 12:32 p.m. UTC | #3
On 02-Oct-19 12:41 PM, David Marchand wrote:
> On Wed, Jul 31, 2019 at 2:47 PM Anatoly Burakov
> <anatoly.burakov@intel.com> wrote:
>>
>> Not all OS's follow Linux's memory layout, which may lead to
>> problems following the suggested common address hint absent
>> of a base-virtaddr flag. Make this address hint OS-specific.
>>
>> Cc: stable@dpdk.org
> 
> Missing Fixes: ?

No, not really. This isn't a bug as such - it was intended behavior. 
It's just /wrong/ intended behavior :)

> 
> What about windows port?
> 
> 

I don't see it supporting secondary processes or mapping shared config 
anywhere. So probably unimplemented for now.
  
Burakov, Anatoly Oct. 24, 2019, 12:32 p.m. UTC | #4
On 02-Oct-19 3:42 PM, Stephen Hemminger wrote:
> On Wed, 2 Oct 2019 13:41:30 +0200
> David Marchand <david.marchand@redhat.com> wrote:
> 
>>> +uint64_t eal_get_baseaddr(void)
>>> +{
>>> +       /*
>>> +        * Linux kernel uses a really high address as starting address for
>>> +        * serving mmaps calls. If there exists addressing limitations and IOVA
>>> +        * mode is VA, this starting address is likely too high for those
>>> +        * devices. However, it is possible to use a lower address in the
>>> +        * process virtual address space as with 64 bits there is a lot of
>>> +        * available space.
>>> +        *
>>> +        * Current known limitations are 39 or 40 bits. Setting the starting
>>> +        * address at 4GB implies there are 508GB or 1020GB for mapping the
>>> +        * available hugepages. This is likely enough for most systems, although
>>> +        * a device with addressing limitations should call
>>> +        * rte_mem_check_dma_mask for ensuring all memory is within supported
>>> +        * range.
>>> +        */
>>> +       return 0x100000000;
> 
> Is this going to work right on 32  bit builds where sizeof(uint) == 4
> then constants default to 32. Does it need ul or ull suffix (or a cast)?
> 

Adding ULL in v2, thanks.
  

Patch

diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 19ea47570..4a9cc1f19 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -40,23 +40,6 @@ 
 static void *next_baseaddr;
 static uint64_t system_page_sz;
 
-#ifdef RTE_ARCH_64
-/*
- * Linux kernel uses a really high address as starting address for serving
- * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
- * this starting address is likely too high for those devices. However, it
- * is possible to use a lower address in the process virtual address space
- * as with 64 bits there is a lot of available space.
- *
- * Current known limitations are 39 or 40 bits. Setting the starting address
- * at 4GB implies there are 508GB or 1020GB for mapping the available
- * hugepages. This is likely enough for most systems, although a device with
- * addressing limitations should call rte_mem_check_dma_mask for ensuring all
- * memory is within supported range.
- */
-static uint64_t baseaddr = 0x100000000;
-#endif
-
 #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
 void *
 eal_get_virtual_area(void *requested_addr, size_t *size,
@@ -85,7 +68,7 @@  eal_get_virtual_area(void *requested_addr, size_t *size,
 #ifdef RTE_ARCH_64
 	if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 &&
 			rte_eal_process_type() == RTE_PROC_PRIMARY)
-		next_baseaddr = (void *) baseaddr;
+		next_baseaddr = (void *) eal_get_baseaddr();
 #endif
 	if (requested_addr == NULL && next_baseaddr != NULL) {
 		requested_addr = next_baseaddr;
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 798ede553..31eae2278 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -381,4 +381,10 @@  rte_option_init(void);
 void
 rte_option_usage(void);
 
+/**
+ * Get OS-specific EAL mapping base address.
+ */
+uint64_t
+eal_get_baseaddr(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c
index 9b9a0577a..1bfdb52fb 100644
--- a/lib/librte_eal/freebsd/eal/eal_memory.c
+++ b/lib/librte_eal/freebsd/eal/eal_memory.c
@@ -22,6 +22,16 @@ 
 
 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
 
+uint64_t eal_get_baseaddr(void)
+{
+	/*
+	 * FreeBSD may allocate something in the space we will be mapping things
+	 * before we get a chance to do that, so use a base address that's far
+	 * away from where malloc() et al usually map things.
+	 */
+	return 0x1000000000;
+}
+
 /*
  * Get physical address of any mapped virtual address in the current process.
  */
diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c
index 1c089a1ef..8516f0d35 100644
--- a/lib/librte_eal/linux/eal/eal_memory.c
+++ b/lib/librte_eal/linux/eal/eal_memory.c
@@ -70,6 +70,26 @@  static int phys_addrs_available = -1;
 
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
 
+uint64_t eal_get_baseaddr(void)
+{
+	/*
+	 * Linux kernel uses a really high address as starting address for
+	 * serving mmaps calls. If there exists addressing limitations and IOVA
+	 * mode is VA, this starting address is likely too high for those
+	 * devices. However, it is possible to use a lower address in the
+	 * process virtual address space as with 64 bits there is a lot of
+	 * available space.
+	 *
+	 * Current known limitations are 39 or 40 bits. Setting the starting
+	 * address at 4GB implies there are 508GB or 1020GB for mapping the
+	 * available hugepages. This is likely enough for most systems, although
+	 * a device with addressing limitations should call
+	 * rte_mem_check_dma_mask for ensuring all memory is within supported
+	 * range.
+	 */
+	return 0x100000000;
+}
+
 /*
  * Get physical address of any mapped virtual address in the current process.
  */