[v4,3/8] eal: introduce memory management wrappers

Message ID 20200428235015.2820677-4-dmitry.kozliuk@gmail.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Windows basic memory management |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Dmitry Kozlyuk April 28, 2020, 11:50 p.m. UTC
  Introduce OS-independent wrappers for memory management operations used
across DPDK and specifically in common code of EAL:

* rte_mem_map()
* rte_mem_unmap()
* rte_get_page_size()
* rte_mem_lock()

Windows uses different APIs for memory mapping and reservation, while
Unices reserve memory by mapping it. Introduce EAL private functions to
support memory reservation in common code:

* eal_mem_reserve()
* eal_mem_free()
* eal_mem_set_dump()

Wrappers follow POSIX semantics limited to DPDK tasks, but their
signatures deliberately differ from POSIX ones to be more safe and
expressive.

Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
---
 lib/librte_eal/common/eal_common_fbarray.c |  37 +++---
 lib/librte_eal/common/eal_common_memory.c  |  62 ++++-----
 lib/librte_eal/common/eal_private.h        |  74 ++++++++++-
 lib/librte_eal/freebsd/Makefile            |   1 +
 lib/librte_eal/include/rte_memory.h        |  86 ++++++++++++
 lib/librte_eal/linux/Makefile              |   1 +
 lib/librte_eal/linux/eal_memalloc.c        |   5 +-
 lib/librte_eal/rte_eal_version.map         |   4 +
 lib/librte_eal/unix/eal_unix_memory.c      | 144 +++++++++++++++++++++
 lib/librte_eal/unix/meson.build            |   1 +
 10 files changed, 350 insertions(+), 65 deletions(-)
 create mode 100644 lib/librte_eal/unix/eal_unix_memory.c
  

Comments

Burakov, Anatoly April 29, 2020, 5:13 p.m. UTC | #1
On 29-Apr-20 12:50 AM, Dmitry Kozlyuk wrote:
> Introduce OS-independent wrappers for memory management operations used
> across DPDK and specifically in common code of EAL:
> 
> * rte_mem_map()
> * rte_mem_unmap()
> * rte_get_page_size()
> * rte_mem_lock()
> 
> Windows uses different APIs for memory mapping and reservation, while
> Unices reserve memory by mapping it. Introduce EAL private functions to
> support memory reservation in common code:
> 
> * eal_mem_reserve()
> * eal_mem_free()
> * eal_mem_set_dump()
> 
> Wrappers follow POSIX semantics limited to DPDK tasks, but their
> signatures deliberately differ from POSIX ones to be more safe and
> expressive.
> 
> Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
> ---

<snip>

>   	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
>   
> @@ -105,24 +94,24 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
>   			return NULL;
>   		}
>   
> -		mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
> -				mmap_flags, -1, 0);
> -		if (mapped_addr == MAP_FAILED && allow_shrink)
> -			*size -= page_sz;
> +		mapped_addr = eal_mem_reserve(
> +			requested_addr, (size_t)map_sz, reserve_flags);
> +		if ((mapped_addr == NULL) && allow_shrink)
> +			size -= page_sz;

Should be *size -= page_sz, size is a pointer in this case.

>   
> -		if (mapped_addr != MAP_FAILED && addr_is_hint &&
> -		    mapped_addr != requested_addr) {
> +		if ((mapped_addr != NULL) && addr_is_hint &&
> +				(mapped_addr != requested_addr)) {
>   			try++;
>   			next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
>   			if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
>   				/* hint was not used. Try with another offset */
> -				munmap(mapped_addr, map_sz);
> -				mapped_addr = MAP_FAILED;
> +				eal_mem_free(mapped_addr, *size);

Why change map_sz to *size?

> +				mapped_addr = NULL;
>   				requested_addr = next_baseaddr;
>   			}
>   		}
>   	} while ((allow_shrink || addr_is_hint) &&
> -		 mapped_addr == MAP_FAILED && *size > 0);
> +		(mapped_addr == NULL) && (*size > 0));
>   

<snip>

> @@ -547,10 +531,10 @@ rte_eal_memdevice_init(void)
>   int
>   rte_mem_lock_page(const void *virt)
>   {
> -	unsigned long virtual = (unsigned long)virt;
> -	int page_size = getpagesize();
> -	unsigned long aligned = (virtual & ~(page_size - 1));
> -	return mlock((void *)aligned, page_size);
> +	uintptr_t virtual = (uintptr_t)virt;
> +	int page_size = rte_get_page_size();
> +	uintptr_t aligned = (virtual & ~(page_size - 1));

Might as well fix to use macros? e.g.

size_t pagesz = rte_get_page_size();
return rte_mem_lock(RTE_PTR_ALIGN(virt, pagesz), pagesz);

(also, note that rte_get_page_size() returns size_t, not int)

> +	return rte_mem_lock((void *)aligned, page_size);
>   }
>   
>   int
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 3aafd892f..67ca83e47 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -11,6 +11,7 @@
>   

<snip>

> + *  Reservation size. Must be a multiple of system page size.
> + * @param flags
> + *  Reservation options, a combination of eal_mem_reserve_flags.
> + * @returns
> + *  Starting address of the reserved area on success, NULL on failure.
> + *  Callers must not access this memory until remapping it.
> + */
> +void *eal_mem_reserve(void *requested_addr, size_t size, int flags);

Should we also require requested_addr to be page-aligned?

Also, here and in other added API's, nitpick but our coding style guide 
(and the code style in this file) suggests that return value should be 
on a separate line, e.g.

void *
eal_mem_reserve(...);

> +
> +/**
> + * Free memory obtained by eal_mem_reserve() or eal_mem_alloc().
> + *
> + * If *virt* and *size* describe a part of the reserved region,
> + * only this part of the region is freed (accurately up to the system
> + * page size). If *virt* points to allocated memory, *size* must match
> + * the one specified on allocation. The behavior is undefined
> + * if the memory pointed by *virt* is obtained from another source
> + * than listed above.
> + *

<snip>

> +}
> +
> +static int
> +mem_rte_to_sys_prot(int prot)
> +{
> +	int sys_prot = 0;

Maybe set it to PROT_NONE to make it more obvious?

> +
> +	if (prot & RTE_PROT_READ)
> +		sys_prot |= PROT_READ;
> +	if (prot & RTE_PROT_WRITE)
> +		sys_prot |= PROT_WRITE;
> +	if (prot & RTE_PROT_EXECUTE)
> +		sys_prot |= PROT_EXEC;
> +
> +	return sys_prot;
> +}
> +
> +void *
> +rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
> +	int fd, size_t offset)
> +{
> +	int sys_prot = 0;

Not necessary to initialize sys_prot (and it's counter-productive as 
compiler warning about uninitialized usage is a *good thing*!).

> +	int sys_flags = 0;
> +
> +	sys_prot = mem_rte_to_sys_prot(prot);
> +
> +	if (flags & RTE_MAP_SHARED)
> +		sys_flags |= MAP_SHARED;
> +	if (flags & RTE_MAP_ANONYMOUS)
> +		sys_flags |= MAP_ANONYMOUS;
> +	if (flags & RTE_MAP_PRIVATE)
> +		sys_flags |= MAP_PRIVATE;
> +	if (flags & RTE_MAP_FORCE_ADDRESS)
> +		sys_flags |= MAP_FIXED;
> +
> +	return mem_map(requested_addr, size, sys_prot, sys_flags, fd, offset);
> +}
> +
> +int
> +rte_mem_unmap(void *virt, size_t size)
> +{
> +	return mem_unmap(virt, size);
> +}
> +
> +size_t
> +rte_get_page_size(void)
> +{
> +	return sysconf(_SC_PAGESIZE);

Can we perhaps cache this value?

> +}
> +
> +int
> +rte_mem_lock(const void *virt, size_t size)
> +{
> +	return mlock(virt, size);

This call can fail. It should pass errno as rte_errno as well, just like 
all other calls from this family.

Also, if the implementation "may require" page alignment, how about 
requiring it unconditionally?

> +}
> diff --git a/lib/librte_eal/unix/meson.build b/lib/librte_eal/unix/meson.build
> index cfa1b4ef9..5734f26ad 100644
> --- a/lib/librte_eal/unix/meson.build
> +++ b/lib/librte_eal/unix/meson.build
> @@ -3,4 +3,5 @@
>   
>   sources += files(
>   	'eal_unix.c',
> +	'eal_unix_memory.c',
>   )
>
  
Burakov, Anatoly April 30, 2020, 1:59 p.m. UTC | #2
On 29-Apr-20 6:13 PM, Burakov, Anatoly wrote:
>> @@ -547,10 +531,10 @@ rte_eal_memdevice_init(void)
>>   int
>>   rte_mem_lock_page(const void *virt)
>>   {
>> -    unsigned long virtual = (unsigned long)virt;
>> -    int page_size = getpagesize();
>> -    unsigned long aligned = (virtual & ~(page_size - 1));
>> -    return mlock((void *)aligned, page_size);
>> +    uintptr_t virtual = (uintptr_t)virt;
>> +    int page_size = rte_get_page_size();
>> +    uintptr_t aligned = (virtual & ~(page_size - 1));
> 
> Might as well fix to use macros? e.g.
> 
> size_t pagesz = rte_get_page_size();
> return rte_mem_lock(RTE_PTR_ALIGN(virt, pagesz), pagesz);
> 
> (also, note that rte_get_page_size() returns size_t, not int)

Apologies, this should've been RTE_PTR_ALIGN_FLOOR(virt, pagesz)
  
Dmitry Kozlyuk May 1, 2020, 7 p.m. UTC | #3
Thanks for pointing out the errors, see some comments inline.

On 2020-04-29 18:13 GMT+0100 Burakov, Anatoly wrote:
> On 29-Apr-20 12:50 AM, Dmitry Kozlyuk wrote: 
<snip>
> > + *  Reservation size. Must be a multiple of system page size.
> > + * @param flags
> > + *  Reservation options, a combination of eal_mem_reserve_flags.
> > + * @returns
> > + *  Starting address of the reserved area on success, NULL on failure.
> > + *  Callers must not access this memory until remapping it.
> > + */
> > +void *eal_mem_reserve(void *requested_addr, size_t size, int flags);  
> 
> Should we also require requested_addr to be page-aligned?

Yes.

> Also, here and in other added API's, nitpick but our coding style guide 
> (and the code style in this file) suggests that return value should be 
> on a separate line, e.g.
> 
> void *
> eal_mem_reserve(...);

Will follow your advice in v5 to keep the style within this file consistent.
However, DPDK Coding Style explicitly says:

	Unlike function definitions, the function prototypes do not need to
	place the function return type on a separate line.

[snip]
> > +
> > +int
> > +rte_mem_lock(const void *virt, size_t size)
> > +{
> > +	return mlock(virt, size);  
> 
> This call can fail. It should pass errno as rte_errno as well, just like 
> all other calls from this family.
> 
> Also, if the implementation "may require" page alignment, how about 
> requiring it unconditionally?

IMO even better to document this function as locking all pages crossed by the
address region. This would save address checking/alignment at call site and
all implementations work this way. Locking memory implies paging system.
  
Burakov, Anatoly May 5, 2020, 2:43 p.m. UTC | #4
On 01-May-20 8:00 PM, Dmitry Kozlyuk wrote:
> Thanks for pointing out the errors, see some comments inline.
> 
> On 2020-04-29 18:13 GMT+0100 Burakov, Anatoly wrote:
>> On 29-Apr-20 12:50 AM, Dmitry Kozlyuk wrote:
> <snip>
>>> + *  Reservation size. Must be a multiple of system page size.
>>> + * @param flags
>>> + *  Reservation options, a combination of eal_mem_reserve_flags.
>>> + * @returns
>>> + *  Starting address of the reserved area on success, NULL on failure.
>>> + *  Callers must not access this memory until remapping it.
>>> + */
>>> +void *eal_mem_reserve(void *requested_addr, size_t size, int flags);
>>
>> Should we also require requested_addr to be page-aligned?
> 
> Yes.
> 
>> Also, here and in other added API's, nitpick but our coding style guide
>> (and the code style in this file) suggests that return value should be
>> on a separate line, e.g.
>>
>> void *
>> eal_mem_reserve(...);
> 
> Will follow your advice in v5 to keep the style within this file consistent.
> However, DPDK Coding Style explicitly says:
> 
> 	Unlike function definitions, the function prototypes do not need to
> 	place the function return type on a separate line.
> 
> [snip]
>>> +
>>> +int
>>> +rte_mem_lock(const void *virt, size_t size)
>>> +{
>>> +	return mlock(virt, size);
>>
>> This call can fail. It should pass errno as rte_errno as well, just like
>> all other calls from this family.
>>
>> Also, if the implementation "may require" page alignment, how about
>> requiring it unconditionally?
> 
> IMO even better to document this function as locking all pages crossed by the
> address region. This would save address checking/alignment at call site and
> all implementations work this way. Locking memory implies paging system.
> 

I don't think any other external API we provide does automagic pointer 
alignment, so i'm not sure if it indeed would be better to have it align 
automatically. It's also better from the standpoint of not silently 
allowing seemingly invalid arguments. So, i would lean on the side of 
requiring alignment, but not doing it ourselves.
  

Patch

diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
index 1e55757ca..b3b6c8521 100644
--- a/lib/librte_eal/common/eal_common_fbarray.c
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -5,15 +5,15 @@ 
 #include <fcntl.h>
 #include <inttypes.h>
 #include <limits.h>
-#include <sys/mman.h>
 #include <stdint.h>
 #include <errno.h>
 #include <string.h>
 #include <unistd.h>
 
 #include <rte_common.h>
-#include <rte_log.h>
 #include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_memory.h>
 #include <rte_spinlock.h>
 #include <rte_tailq.h>
 
@@ -92,12 +92,9 @@  resize_and_map(int fd, void *addr, size_t len)
 		return -1;
 	}
 
-	map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
-			MAP_SHARED | MAP_FIXED, fd, 0);
+	map_addr = rte_mem_map(addr, len, RTE_PROT_READ | RTE_PROT_WRITE,
+			RTE_MAP_SHARED | RTE_MAP_FORCE_ADDRESS, fd, 0);
 	if (map_addr != addr) {
-		RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
-		/* pass errno up the chain */
-		rte_errno = errno;
 		return -1;
 	}
 	return 0;
@@ -735,7 +732,7 @@  rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
 		return -1;
 	}
 
-	page_sz = sysconf(_SC_PAGESIZE);
+	page_sz = rte_get_page_size();
 	if (page_sz == (size_t)-1) {
 		free(ma);
 		return -1;
@@ -756,9 +753,11 @@  rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
 
 	if (internal_config.no_shconf) {
 		/* remap virtual area as writable */
-		void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
-				MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
-		if (new_data == MAP_FAILED) {
+		static const int flags = RTE_MAP_FORCE_ADDRESS |
+			RTE_MAP_PRIVATE | RTE_MAP_ANONYMOUS;
+		void *new_data = rte_mem_map(data, mmap_len,
+			RTE_PROT_READ | RTE_PROT_WRITE, flags, fd, 0);
+		if (new_data == NULL) {
 			RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
 					__func__, strerror(errno));
 			goto fail;
@@ -823,7 +822,7 @@  rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
 	return 0;
 fail:
 	if (data)
-		munmap(data, mmap_len);
+		rte_mem_unmap(data, mmap_len);
 	if (fd >= 0)
 		close(fd);
 	free(ma);
@@ -861,7 +860,7 @@  rte_fbarray_attach(struct rte_fbarray *arr)
 		return -1;
 	}
 
-	page_sz = sysconf(_SC_PAGESIZE);
+	page_sz = rte_get_page_size();
 	if (page_sz == (size_t)-1) {
 		free(ma);
 		return -1;
@@ -913,7 +912,7 @@  rte_fbarray_attach(struct rte_fbarray *arr)
 	return 0;
 fail:
 	if (data)
-		munmap(data, mmap_len);
+		rte_mem_unmap(data, mmap_len);
 	if (fd >= 0)
 		close(fd);
 	free(ma);
@@ -941,8 +940,7 @@  rte_fbarray_detach(struct rte_fbarray *arr)
 	 * really do anything about it, things will blow up either way.
 	 */
 
-	size_t page_sz = sysconf(_SC_PAGESIZE);
-
+	size_t page_sz = rte_get_page_size();
 	if (page_sz == (size_t)-1)
 		return -1;
 
@@ -961,7 +959,7 @@  rte_fbarray_detach(struct rte_fbarray *arr)
 		goto out;
 	}
 
-	munmap(arr->data, mmap_len);
+	rte_mem_unmap(arr->data, mmap_len);
 
 	/* area is unmapped, close fd and remove the tailq entry */
 	if (tmp->fd >= 0)
@@ -996,8 +994,7 @@  rte_fbarray_destroy(struct rte_fbarray *arr)
 	 * really do anything about it, things will blow up either way.
 	 */
 
-	size_t page_sz = sysconf(_SC_PAGESIZE);
-
+	size_t page_sz = rte_get_page_size();
 	if (page_sz == (size_t)-1)
 		return -1;
 
@@ -1046,7 +1043,7 @@  rte_fbarray_destroy(struct rte_fbarray *arr)
 		}
 		close(fd);
 	}
-	munmap(arr->data, mmap_len);
+	rte_mem_unmap(arr->data, mmap_len);
 
 	/* area is unmapped, remove the tailq entry */
 	TAILQ_REMOVE(&mem_area_tailq, tmp, next);
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 4c897a13f..1196a8037 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -11,7 +11,6 @@ 
 #include <string.h>
 #include <unistd.h>
 #include <inttypes.h>
-#include <sys/mman.h>
 #include <sys/queue.h>
 
 #include <rte_fbarray.h>
@@ -40,18 +39,10 @@ 
 static void *next_baseaddr;
 static uint64_t system_page_sz;
 
-#ifdef RTE_EXEC_ENV_LINUX
-#define RTE_DONTDUMP MADV_DONTDUMP
-#elif defined RTE_EXEC_ENV_FREEBSD
-#define RTE_DONTDUMP MADV_NOCORE
-#else
-#error "madvise doesn't support this OS"
-#endif
-
 #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
 void *
 eal_get_virtual_area(void *requested_addr, size_t *size,
-		size_t page_sz, int flags, int mmap_flags)
+	size_t page_sz, int flags, int reserve_flags)
 {
 	bool addr_is_hint, allow_shrink, unmap, no_align;
 	uint64_t map_sz;
@@ -59,9 +50,7 @@  eal_get_virtual_area(void *requested_addr, size_t *size,
 	uint8_t try = 0;
 
 	if (system_page_sz == 0)
-		system_page_sz = sysconf(_SC_PAGESIZE);
-
-	mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+		system_page_sz = rte_get_page_size();
 
 	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
 
@@ -105,24 +94,24 @@  eal_get_virtual_area(void *requested_addr, size_t *size,
 			return NULL;
 		}
 
-		mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
-				mmap_flags, -1, 0);
-		if (mapped_addr == MAP_FAILED && allow_shrink)
-			*size -= page_sz;
+		mapped_addr = eal_mem_reserve(
+			requested_addr, (size_t)map_sz, reserve_flags);
+		if ((mapped_addr == NULL) && allow_shrink)
+			size -= page_sz;
 
-		if (mapped_addr != MAP_FAILED && addr_is_hint &&
-		    mapped_addr != requested_addr) {
+		if ((mapped_addr != NULL) && addr_is_hint &&
+				(mapped_addr != requested_addr)) {
 			try++;
 			next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
 			if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
 				/* hint was not used. Try with another offset */
-				munmap(mapped_addr, map_sz);
-				mapped_addr = MAP_FAILED;
+				eal_mem_free(mapped_addr, *size);
+				mapped_addr = NULL;
 				requested_addr = next_baseaddr;
 			}
 		}
 	} while ((allow_shrink || addr_is_hint) &&
-		 mapped_addr == MAP_FAILED && *size > 0);
+		(mapped_addr == NULL) && (*size > 0));
 
 	/* align resulting address - if map failed, we will ignore the value
 	 * anyway, so no need to add additional checks.
@@ -132,20 +121,17 @@  eal_get_virtual_area(void *requested_addr, size_t *size,
 
 	if (*size == 0) {
 		RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
-			strerror(errno));
-		rte_errno = errno;
+			strerror(rte_errno));
 		return NULL;
-	} else if (mapped_addr == MAP_FAILED) {
+	} else if (mapped_addr == NULL) {
 		RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
-			strerror(errno));
-		/* pass errno up the call chain */
-		rte_errno = errno;
+			strerror(rte_errno));
 		return NULL;
 	} else if (requested_addr != NULL && !addr_is_hint &&
 			aligned_addr != requested_addr) {
 		RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
 			requested_addr, aligned_addr);
-		munmap(mapped_addr, map_sz);
+		eal_mem_free(mapped_addr, map_sz);
 		rte_errno = EADDRNOTAVAIL;
 		return NULL;
 	} else if (requested_addr != NULL && addr_is_hint &&
@@ -161,7 +147,7 @@  eal_get_virtual_area(void *requested_addr, size_t *size,
 		aligned_addr, *size);
 
 	if (unmap) {
-		munmap(mapped_addr, map_sz);
+		eal_mem_free(mapped_addr, map_sz);
 	} else if (!no_align) {
 		void *map_end, *aligned_end;
 		size_t before_len, after_len;
@@ -179,19 +165,17 @@  eal_get_virtual_area(void *requested_addr, size_t *size,
 		/* unmap space before aligned mmap address */
 		before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
 		if (before_len > 0)
-			munmap(mapped_addr, before_len);
+			eal_mem_free(mapped_addr, before_len);
 
 		/* unmap space after aligned end mmap address */
 		after_len = RTE_PTR_DIFF(map_end, aligned_end);
 		if (after_len > 0)
-			munmap(aligned_end, after_len);
+			eal_mem_free(aligned_end, after_len);
 	}
 
 	if (!unmap) {
 		/* Exclude these pages from a core dump. */
-		if (madvise(aligned_addr, *size, RTE_DONTDUMP) != 0)
-			RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
-				strerror(errno));
+		eal_mem_set_dump(aligned_addr, *size, false);
 	}
 
 	return aligned_addr;
@@ -547,10 +531,10 @@  rte_eal_memdevice_init(void)
 int
 rte_mem_lock_page(const void *virt)
 {
-	unsigned long virtual = (unsigned long)virt;
-	int page_size = getpagesize();
-	unsigned long aligned = (virtual & ~(page_size - 1));
-	return mlock((void *)aligned, page_size);
+	uintptr_t virtual = (uintptr_t)virt;
+	int page_size = rte_get_page_size();
+	uintptr_t aligned = (virtual & ~(page_size - 1));
+	return rte_mem_lock((void *)aligned, page_size);
 }
 
 int
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 3aafd892f..67ca83e47 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -11,6 +11,7 @@ 
 
 #include <rte_dev.h>
 #include <rte_lcore.h>
+#include <rte_memory.h>
 
 /**
  * Structure storing internal configuration (per-lcore)
@@ -202,6 +203,24 @@  int rte_eal_alarm_init(void);
  */
 int rte_eal_check_module(const char *module_name);
 
+/**
+ * Memory reservation flags.
+ */
+enum eal_mem_reserve_flags {
+	/**
+	 * Reserve hugepages. May be unsupported by some platforms.
+	 */
+	EAL_RESERVE_HUGEPAGES = 1 << 0,
+	/**
+	 * Force reserving memory at the requested address.
+	 * This can be a destructive action depending on the implementation.
+	 *
+	 * @see RTE_MAP_FORCE_ADDRESS for description of possible consequences
+	 *      (although implementations are not required to use it).
+	 */
+	EAL_RESERVE_FORCE_ADDRESS = 1 << 1
+};
+
 /**
  * Get virtual area of specified size from the OS.
  *
@@ -215,8 +234,8 @@  int rte_eal_check_module(const char *module_name);
  *   Page size on which to align requested virtual area.
  * @param flags
  *   EAL_VIRTUAL_AREA_* flags.
- * @param mmap_flags
- *   Extra flags passed directly to mmap().
+ * @param reserve_flags
+ *   Extra flags passed directly to rte_mem_reserve().
  *
  * @return
  *   Virtual area address if successful.
@@ -233,7 +252,7 @@  int rte_eal_check_module(const char *module_name);
 /**< immediately unmap reserved virtual area. */
 void *
 eal_get_virtual_area(void *requested_addr, size_t *size,
-		size_t page_sz, int flags, int mmap_flags);
+		size_t page_sz, int flags, int reserve_flags);
 
 /**
  * Get cpu core_id.
@@ -493,4 +512,53 @@  int eal_file_lock(int fd, enum eal_flock_op op, enum eal_flock_mode mode);
  */
 int eal_file_truncate(int fd, ssize_t size);
 
+/**
+ * Reserve a region of virtual memory.
+ *
+ * Use eal_mem_free() to free reserved memory.
+ *
+ * @param requested_addr
+ *  A desired reservation address. The system may not respect it.
+ *  NULL means the address will be chosen by the system.
+ * @param size
+ *  Reservation size. Must be a multiple of system page size.
+ * @param flags
+ *  Reservation options, a combination of eal_mem_reserve_flags.
+ * @returns
+ *  Starting address of the reserved area on success, NULL on failure.
+ *  Callers must not access this memory until remapping it.
+ */
+void *eal_mem_reserve(void *requested_addr, size_t size, int flags);
+
+/**
+ * Free memory obtained by eal_mem_reserve() or eal_mem_alloc().
+ *
+ * If *virt* and *size* describe a part of the reserved region,
+ * only this part of the region is freed (accurately up to the system
+ * page size). If *virt* points to allocated memory, *size* must match
+ * the one specified on allocation. The behavior is undefined
+ * if the memory pointed by *virt* is obtained from another source
+ * than listed above.
+ *
+ * @param virt
+ *  A virtual address in a region previously reserved.
+ * @param size
+ *  Number of bytes to unreserve.
+ */
+void eal_mem_free(void *virt, size_t size);
+
+/**
+ * Configure memory region inclusion into core dumps.
+ *
+ * @param virt
+ *  Starting address of the region.
+ * @param size
+ *  Size of the region.
+ * @param dump
+ *  True to include memory into core dumps, false to exclude.
+ * @return
+ *  0 on success, (-1) on failure and rte_errno is set.
+ */
+int eal_mem_set_dump(void *virt, size_t size, bool dump);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/freebsd/Makefile b/lib/librte_eal/freebsd/Makefile
index a26c455c7..647dfd0f2 100644
--- a/lib/librte_eal/freebsd/Makefile
+++ b/lib/librte_eal/freebsd/Makefile
@@ -77,6 +77,7 @@  SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_reciprocal.c
 
 # from unix dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_unix.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_unix_memory.c
 
 # from arch dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_cpuflags.c
diff --git a/lib/librte_eal/include/rte_memory.h b/lib/librte_eal/include/rte_memory.h
index 65374d53a..5cceeedc8 100644
--- a/lib/librte_eal/include/rte_memory.h
+++ b/lib/librte_eal/include/rte_memory.h
@@ -82,6 +82,92 @@  struct rte_memseg_list {
 	struct rte_fbarray memseg_arr;
 };
 
+/**
+ * Memory protection flags.
+ */
+enum rte_mem_prot {
+	RTE_PROT_READ = 1 << 0,   /**< Read access. */
+	RTE_PROT_WRITE = 1 << 1,  /**< Write access. */
+	RTE_PROT_EXECUTE = 1 << 2 /**< Code execution. */
+};
+
+/**
+ * Additional flags for memory mapping.
+ */
+enum rte_map_flags {
+	/** Changes to the mapped memory are visible to other processes. */
+	RTE_MAP_SHARED = 1 << 0,
+	/** Mapping is not backed by a regular file. */
+	RTE_MAP_ANONYMOUS = 1 << 1,
+	/** Copy-on-write mapping, changes are invisible to other processes. */
+	RTE_MAP_PRIVATE = 1 << 2,
+	/**
+	 * Force mapping to the requested address. This flag should be used
+	 * with caution, because to fulfill the request implementation
+	 * may remove all other mappings in the requested region. However,
+	 * it is not required to do so, thus mapping with this flag may fail.
+	 */
+	RTE_MAP_FORCE_ADDRESS = 1 << 3
+};
+
+/**
+ * Map a portion of an opened file or the page file into memory.
+ *
+ * This function is similar to POSIX mmap(3) with common MAP_ANONYMOUS
+ * extension, except for the return value.
+ *
+ * @param requested_addr
+ *  Desired virtual address for mapping. Can be NULL to let OS choose.
+ * @param size
+ *  Size of the mapping in bytes.
+ * @param prot
+ *  Protection flags, a combination of rte_mem_prot values.
+ * @param flags
+ *  Addtional mapping flags, a combination of rte_map_flags.
+ * @param fd
+ *  Mapped file descriptor. Can be negative for anonymous mapping.
+ * @param offset
+ *  Offset of the mapped region in fd. Must be 0 for anonymous mappings.
+ * @return
+ *  Mapped address or NULL on failure and rte_errno is set to OS error.
+ */
+__rte_experimental
+void *rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
+	int fd, size_t offset);
+
+/**
+ * OS-independent implementation of POSIX munmap(3).
+ */
+__rte_experimental
+int rte_mem_unmap(void *virt, size_t size);
+
+/**
+ * Get system page size. This function never fails.
+ *
+ * @return
+ *   Page size in bytes.
+ */
+__rte_experimental
+size_t rte_get_page_size(void);
+
+/**
+ * Lock region in physical memory and prevent it from swapping.
+ *
+ * @param virt
+ *   The virtual address.
+ * @param size
+ *   Size of the region.
+ * @return
+ *   0 on success, negative on error.
+ *
+ * @note Implementations may require *virt* and *size*
+ *       to be multiples of system page size.
+ * @see rte_get_page_size() to retrieve the page size.
+ * @see rte_mem_lock_page() to lock an entire single page.
+ */
+__rte_experimental
+int rte_mem_lock(const void *virt, size_t size);
+
 /**
  * Lock page in physical memory and prevent from swapping.
  *
diff --git a/lib/librte_eal/linux/Makefile b/lib/librte_eal/linux/Makefile
index fa41f00bf..06428f0de 100644
--- a/lib/librte_eal/linux/Makefile
+++ b/lib/librte_eal/linux/Makefile
@@ -84,6 +84,7 @@  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_reciprocal.c
 
 # from unix dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_unix.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_unix_memory.c
 
 # from arch dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_cpuflags.c
diff --git a/lib/librte_eal/linux/eal_memalloc.c b/lib/librte_eal/linux/eal_memalloc.c
index 2c717f8bd..bf29b83c6 100644
--- a/lib/librte_eal/linux/eal_memalloc.c
+++ b/lib/librte_eal/linux/eal_memalloc.c
@@ -630,7 +630,7 @@  alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 mapped:
 	munmap(addr, alloc_sz);
 unmapped:
-	flags = MAP_FIXED;
+	flags = EAL_RESERVE_FORCE_ADDRESS;
 	new_addr = eal_get_virtual_area(addr, &alloc_sz, alloc_sz, 0, flags);
 	if (new_addr != addr) {
 		if (new_addr != NULL)
@@ -687,8 +687,7 @@  free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
 		return -1;
 	}
 
-	if (madvise(ms->addr, ms->len, MADV_DONTDUMP) != 0)
-		RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
+	eal_mem_set_dump(ms->addr, ms->len, false);
 
 	exit_early = false;
 
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 6088e7f6c..5d6d3a8c6 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -373,7 +373,11 @@  EXPERIMENTAL {
 	__rte_trace_point_register;
 	per_lcore_trace_mem;
 	per_lcore_trace_point_sz;
+	rte_get_page_size;
 	rte_log_can_log;
+	rte_mem_lock;
+	rte_mem_map;
+	rte_mem_unmap;
 	rte_thread_getname;
 	rte_trace_dump;
 	rte_trace_is_enabled;
diff --git a/lib/librte_eal/unix/eal_unix_memory.c b/lib/librte_eal/unix/eal_unix_memory.c
new file mode 100644
index 000000000..3eab7b941
--- /dev/null
+++ b/lib/librte_eal/unix/eal_unix_memory.c
@@ -0,0 +1,144 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Dmitry Kozlyuk
+ */
+
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+
+#include "eal_private.h"
+
+#ifdef RTE_EXEC_ENV_LINUX
+#define EAL_DONTDUMP MADV_DONTDUMP
+#define EAL_DODUMP   MADV_DODUMP
+#elif defined RTE_EXEC_ENV_FREEBSD
+#define EAL_DONTDUMP MADV_NOCORE
+#define EAL_DODUMP   MADV_CORE
+#else
+#error "madvise doesn't support this OS"
+#endif
+
+static void *
+mem_map(void *requested_addr, size_t size, int prot, int flags,
+	int fd, size_t offset)
+{
+	void *virt = mmap(requested_addr, size, prot, flags, fd, offset);
+	if (virt == MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL,
+			"Cannot mmap(%p, 0x%zx, 0x%x, 0x%x, %d, 0x%zx): %s\n",
+			requested_addr, size, prot, flags, fd, offset,
+			strerror(errno));
+		rte_errno = errno;
+		return NULL;
+	}
+	return virt;
+}
+
+static int
+mem_unmap(void *virt, size_t size)
+{
+	int ret = munmap(virt, size);
+	if (ret < 0) {
+		RTE_LOG(DEBUG, EAL, "Cannot munmap(%p, 0x%zx): %s\n",
+			virt, size, strerror(errno));
+		rte_errno = errno;
+	}
+	return ret;
+}
+
+void *
+eal_mem_reserve(void *requested_addr, size_t size, int flags)
+{
+	int sys_flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+	if (flags & EAL_RESERVE_HUGEPAGES) {
+#ifdef MAP_HUGETLB
+		sys_flags |= MAP_HUGETLB;
+#else
+		rte_errno = ENOTSUP;
+		return NULL;
+#endif
+	}
+
+	if (flags & EAL_RESERVE_FORCE_ADDRESS)
+		sys_flags |= MAP_FIXED;
+
+	return mem_map(requested_addr, size, PROT_NONE, sys_flags, -1, 0);
+}
+
+void
+eal_mem_free(void *virt, size_t size)
+{
+	mem_unmap(virt, size);
+}
+
+int
+eal_mem_set_dump(void *virt, size_t size, bool dump)
+{
+	int flags = dump ? EAL_DODUMP : EAL_DONTDUMP;
+	int ret = madvise(virt, size, flags);
+	if (ret) {
+		RTE_LOG(DEBUG, EAL, "madvise(%p, %#zx, %d) failed: %s\n",
+				virt, size, flags, strerror(rte_errno));
+		rte_errno = errno;
+	}
+	return ret;
+}
+
+static int
+mem_rte_to_sys_prot(int prot)
+{
+	int sys_prot = 0;
+
+	if (prot & RTE_PROT_READ)
+		sys_prot |= PROT_READ;
+	if (prot & RTE_PROT_WRITE)
+		sys_prot |= PROT_WRITE;
+	if (prot & RTE_PROT_EXECUTE)
+		sys_prot |= PROT_EXEC;
+
+	return sys_prot;
+}
+
+void *
+rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
+	int fd, size_t offset)
+{
+	int sys_prot = 0;
+	int sys_flags = 0;
+
+	sys_prot = mem_rte_to_sys_prot(prot);
+
+	if (flags & RTE_MAP_SHARED)
+		sys_flags |= MAP_SHARED;
+	if (flags & RTE_MAP_ANONYMOUS)
+		sys_flags |= MAP_ANONYMOUS;
+	if (flags & RTE_MAP_PRIVATE)
+		sys_flags |= MAP_PRIVATE;
+	if (flags & RTE_MAP_FORCE_ADDRESS)
+		sys_flags |= MAP_FIXED;
+
+	return mem_map(requested_addr, size, sys_prot, sys_flags, fd, offset);
+}
+
+int
+rte_mem_unmap(void *virt, size_t size)
+{
+	return mem_unmap(virt, size);
+}
+
+size_t
+rte_get_page_size(void)
+{
+	return sysconf(_SC_PAGESIZE);
+}
+
+int
+rte_mem_lock(const void *virt, size_t size)
+{
+	return mlock(virt, size);
+}
diff --git a/lib/librte_eal/unix/meson.build b/lib/librte_eal/unix/meson.build
index cfa1b4ef9..5734f26ad 100644
--- a/lib/librte_eal/unix/meson.build
+++ b/lib/librte_eal/unix/meson.build
@@ -3,4 +3,5 @@ 
 
 sources += files(
 	'eal_unix.c',
+	'eal_unix_memory.c',
 )