[v6,03/21] malloc: index heaps using heap ID rather than NUMA node
Checks
Commit Message
Switch over all parts of EAL to use heap ID instead of NUMA node
ID to identify heaps. Heap ID for DPDK-internal heaps is NUMA
node's index within the detected NUMA node list. Heap ID for
external heaps will be order of their creation.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
config/common_base | 1 +
config/rte_config.h | 1 +
.../common/include/rte_eal_memconfig.h | 4 +-
.../common/include/rte_malloc_heap.h | 1 +
lib/librte_eal/common/malloc_heap.c | 98 +++++++++++++------
lib/librte_eal/common/malloc_heap.h | 3 +
lib/librte_eal/common/rte_malloc.c | 41 +++++---
7 files changed, 106 insertions(+), 43 deletions(-)
Comments
On Thu, Sep 27, 2018 at 11:47 AM Anatoly Burakov <anatoly.burakov@intel.com>
wrote:
> Switch over all parts of EAL to use heap ID instead of NUMA node
> ID to identify heaps. Heap ID for DPDK-internal heaps is NUMA
> node's index within the detected NUMA node list. Heap ID for
> external heaps will be order of their creation.
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
> config/common_base | 1 +
> config/rte_config.h | 1 +
> .../common/include/rte_eal_memconfig.h | 4 +-
> .../common/include/rte_malloc_heap.h | 1 +
> lib/librte_eal/common/malloc_heap.c | 98 +++++++++++++------
> lib/librte_eal/common/malloc_heap.h | 3 +
> lib/librte_eal/common/rte_malloc.c | 41 +++++---
> 7 files changed, 106 insertions(+), 43 deletions(-)
>
> diff --git a/config/common_base b/config/common_base
> index 155c7d40e..b52770b27 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -61,6 +61,7 @@ CONFIG_RTE_CACHE_LINE_SIZE=64
> CONFIG_RTE_LIBRTE_EAL=y
> CONFIG_RTE_MAX_LCORE=128
> CONFIG_RTE_MAX_NUMA_NODES=8
> +CONFIG_RTE_MAX_HEAPS=32
> CONFIG_RTE_MAX_MEMSEG_LISTS=64
> # each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages
> # or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is
> smaller
> diff --git a/config/rte_config.h b/config/rte_config.h
> index 567051b9c..5dd2ac1ad 100644
> --- a/config/rte_config.h
> +++ b/config/rte_config.h
> @@ -24,6 +24,7 @@
> #define RTE_BUILD_SHARED_LIB
>
> /* EAL defines */
> +#define RTE_MAX_HEAPS 32
> #define RTE_MAX_MEMSEG_LISTS 128
> #define RTE_MAX_MEMSEG_PER_LIST 8192
> #define RTE_MAX_MEM_MB_PER_LIST 32768
> diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h
> b/lib/librte_eal/common/include/rte_eal_memconfig.h
> index 6baa6854f..d7920a4e0 100644
> --- a/lib/librte_eal/common/include/rte_eal_memconfig.h
> +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
> @@ -72,8 +72,8 @@ struct rte_mem_config {
>
> struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for
> objects */
>
> - /* Heaps of Malloc per socket */
> - struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
> + /* Heaps of Malloc */
> + struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
>
> /* address of mem_config in primary process. used to map shared
> config into
> * exact same address the primary process maps it.
> diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h
> b/lib/librte_eal/common/include/rte_malloc_heap.h
> index d43fa9097..e7ac32d42 100644
> --- a/lib/librte_eal/common/include/rte_malloc_heap.h
> +++ b/lib/librte_eal/common/include/rte_malloc_heap.h
> @@ -27,6 +27,7 @@ struct malloc_heap {
>
> unsigned alloc_count;
> size_t total_size;
> + unsigned int socket_id;
> } __rte_cache_aligned;
>
> #endif /* _RTE_MALLOC_HEAP_H_ */
> diff --git a/lib/librte_eal/common/malloc_heap.c
> b/lib/librte_eal/common/malloc_heap.c
> index 3c8e2063b..1d1e35708 100644
> --- a/lib/librte_eal/common/malloc_heap.c
> +++ b/lib/librte_eal/common/malloc_heap.c
> @@ -66,6 +66,21 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
> return check_flag & flags;
> }
>
> +int
> +malloc_socket_to_heap_id(unsigned int socket_id)
> +{
> + struct rte_mem_config *mcfg =
> rte_eal_get_configuration()->mem_config;
> + int i;
> +
> + for (i = 0; i < RTE_MAX_HEAPS; i++) {
> + struct malloc_heap *heap = &mcfg->malloc_heaps[i];
> +
> + if (heap->socket_id == socket_id)
> + return i;
> + }
> + return -1;
> +}
> +
> /*
> * Expand the heap with a memory area.
> */
> @@ -93,12 +108,13 @@ malloc_add_seg(const struct rte_memseg_list *msl,
> struct rte_mem_config *mcfg =
> rte_eal_get_configuration()->mem_config;
> struct rte_memseg_list *found_msl;
> struct malloc_heap *heap;
> - int msl_idx;
> + int msl_idx, heap_idx;
>
> if (msl->external)
> return 0;
>
> - heap = &mcfg->malloc_heaps[msl->socket_id];
> + heap_idx = malloc_socket_to_heap_id(msl->socket_id);
>
malloc_socket_to_heap_id can return -1 so it requires to handle that
possibility.
> + heap = &mcfg->malloc_heaps[heap_idx];
>
> /* msl is const, so find it */
> msl_idx = msl - mcfg->memsegs;
> @@ -111,6 +127,7 @@ malloc_add_seg(const struct rte_memseg_list *msl,
> malloc_heap_add_memory(heap, found_msl, ms->addr, len);
>
> heap->total_size += len;
> + heap->socket_id = msl->socket_id;
>
> RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
> msl->socket_id);
> @@ -561,12 +578,14 @@ alloc_more_mem_on_socket(struct malloc_heap *heap,
> size_t size, int socket,
>
> /* this will try lower page sizes first */
> static void *
> -heap_alloc_on_socket(const char *type, size_t size, int socket,
> - unsigned int flags, size_t align, size_t bound, bool
> contig)
> +malloc_heap_alloc_on_heap_id(const char *type, size_t size,
> + unsigned int heap_id, unsigned int flags, size_t align,
> + size_t bound, bool contig)
> {
> struct rte_mem_config *mcfg =
> rte_eal_get_configuration()->mem_config;
> - struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
> + struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
> unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
> + int socket_id;
> void *ret;
>
> rte_spinlock_lock(&(heap->lock));
> @@ -584,12 +603,28 @@ heap_alloc_on_socket(const char *type, size_t size,
> int socket,
> * we may still be able to allocate memory from appropriate page
> sizes,
> * we just need to request more memory first.
> */
> +
> + socket_id = rte_socket_id_by_idx(heap_id);
> + /*
> + * if socket ID is negative, we cannot find a socket ID for this
> heap -
> + * which means it's an external heap. those can have unexpected
> page
> + * sizes, so if the user asked to allocate from there - assume user
> + * knows what they're doing, and allow allocating from there with
> any
> + * page size flags.
> + */
> + if (socket_id < 0)
> + size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
> +
> ret = heap_alloc(heap, type, size, size_flags, align, bound,
> contig);
> if (ret != NULL)
> goto alloc_unlock;
>
> - if (!alloc_more_mem_on_socket(heap, size, socket, flags, align,
> bound,
> - contig)) {
> + /* if socket ID is invalid, this is an external heap */
> + if (socket_id < 0)
> + goto alloc_unlock;
> +
> + if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
> + bound, contig)) {
> ret = heap_alloc(heap, type, size, flags, align, bound,
> contig);
>
> /* this should have succeeded */
> @@ -605,7 +640,7 @@ void *
> malloc_heap_alloc(const char *type, size_t size, int socket_arg,
> unsigned int flags, size_t align, size_t bound, bool
> contig)
> {
> - int socket, i, cur_socket;
> + int socket, heap_id, i;
> void *ret;
>
> /* return NULL if size is 0 or alignment is not power-of-2 */
> @@ -620,22 +655,25 @@ malloc_heap_alloc(const char *type, size_t size, int
> socket_arg,
> else
> socket = socket_arg;
>
> - /* Check socket parameter */
> - if (socket >= RTE_MAX_NUMA_NODES)
> + /* turn socket ID into heap ID */
> + heap_id = malloc_socket_to_heap_id(socket);
> + /* if heap id is negative, socket ID was invalid */
> + if (heap_id < 0)
> return NULL;
>
> - ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
> - contig);
> + ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags,
> align,
> + bound, contig);
> if (ret != NULL || socket_arg != SOCKET_ID_ANY)
> return ret;
>
> - /* try other heaps */
> + /* try other heaps. we are only iterating through native DPDK
> sockets,
> + * so external heaps won't be included.
> + */
> for (i = 0; i < (int) rte_socket_count(); i++) {
> - cur_socket = rte_socket_id_by_idx(i);
> - if (cur_socket == socket)
> + if (i == heap_id)
> continue;
> - ret = heap_alloc_on_socket(type, size, cur_socket, flags,
> - align, bound, contig);
> + ret = malloc_heap_alloc_on_heap_id(type, size, i, flags,
> align,
> + bound, contig);
> if (ret != NULL)
> return ret;
> }
> @@ -643,11 +681,11 @@ malloc_heap_alloc(const char *type, size_t size, int
> socket_arg,
> }
>
> static void *
> -heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int
> flags,
> - size_t align, bool contig)
> +heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
> + unsigned int flags, size_t align, bool contig)
> {
> struct rte_mem_config *mcfg =
> rte_eal_get_configuration()->mem_config;
> - struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
> + struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
> void *ret;
>
> rte_spinlock_lock(&(heap->lock));
> @@ -665,7 +703,7 @@ void *
> malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int
> flags,
> size_t align, bool contig)
> {
> - int socket, i, cur_socket;
> + int socket, i, cur_socket, heap_id;
> void *ret;
>
> /* return NULL if align is not power-of-2 */
> @@ -680,11 +718,13 @@ malloc_heap_alloc_biggest(const char *type, int
> socket_arg, unsigned int flags,
> else
> socket = socket_arg;
>
> - /* Check socket parameter */
> - if (socket >= RTE_MAX_NUMA_NODES)
> + /* turn socket ID into heap ID */
> + heap_id = malloc_socket_to_heap_id(socket);
> + /* if heap id is negative, socket ID was invalid */
> + if (heap_id < 0)
> return NULL;
>
> - ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
> + ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
> contig);
> if (ret != NULL || socket_arg != SOCKET_ID_ANY)
> return ret;
> @@ -694,8 +734,8 @@ malloc_heap_alloc_biggest(const char *type, int
> socket_arg, unsigned int flags,
> cur_socket = rte_socket_id_by_idx(i);
> if (cur_socket == socket)
> continue;
> - ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
> - align, contig);
> + ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
> + contig);
> if (ret != NULL)
> return ret;
> }
> @@ -760,7 +800,7 @@ malloc_heap_free(struct malloc_elem *elem)
> /* ...of which we can't avail if we are in legacy mode, or if this
> is an
> * externally allocated segment.
> */
> - if (internal_config.legacy_mem || msl->external)
> + if (internal_config.legacy_mem || (msl->external > 0))
> goto free_unlock;
>
> /* check if we can free any memory back to the system */
> @@ -917,7 +957,7 @@ malloc_heap_resize(struct malloc_elem *elem, size_t
> size)
> }
>
> /*
> - * Function to retrieve data for heap on given socket
> + * Function to retrieve data for a given heap
> */
> int
> malloc_heap_get_stats(struct malloc_heap *heap,
> @@ -955,7 +995,7 @@ malloc_heap_get_stats(struct malloc_heap *heap,
> }
>
> /*
> - * Function to retrieve data for heap on given socket
> + * Function to retrieve data for a given heap
> */
> void
> malloc_heap_dump(struct malloc_heap *heap, FILE *f)
> diff --git a/lib/librte_eal/common/malloc_heap.h
> b/lib/librte_eal/common/malloc_heap.h
> index f52cb5559..61b844b6f 100644
> --- a/lib/librte_eal/common/malloc_heap.h
> +++ b/lib/librte_eal/common/malloc_heap.h
> @@ -46,6 +46,9 @@ malloc_heap_get_stats(struct malloc_heap *heap,
> void
> malloc_heap_dump(struct malloc_heap *heap, FILE *f);
>
> +int
> +malloc_socket_to_heap_id(unsigned int socket_id);
> +
> int
> rte_eal_malloc_heap_init(void);
>
> diff --git a/lib/librte_eal/common/rte_malloc.c
> b/lib/librte_eal/common/rte_malloc.c
> index 47ca5a742..73d6df31d 100644
> --- a/lib/librte_eal/common/rte_malloc.c
> +++ b/lib/librte_eal/common/rte_malloc.c
> @@ -152,11 +152,20 @@ rte_malloc_get_socket_stats(int socket,
> struct rte_malloc_socket_stats *socket_stats)
> {
> struct rte_mem_config *mcfg =
> rte_eal_get_configuration()->mem_config;
> + int heap_idx, ret = -1;
>
> - if (socket >= RTE_MAX_NUMA_NODES || socket < 0)
> - return -1;
> + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
>
> - return malloc_heap_get_stats(&mcfg->malloc_heaps[socket],
> socket_stats);
> + heap_idx = malloc_socket_to_heap_id(socket);
> + if (heap_idx < 0)
> + goto unlock;
> +
> + ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
> + socket_stats);
> +unlock:
> + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
> +
> + return ret;
> }
>
> /*
> @@ -168,12 +177,14 @@ rte_malloc_dump_heaps(FILE *f)
> struct rte_mem_config *mcfg =
> rte_eal_get_configuration()->mem_config;
> unsigned int idx;
>
> - for (idx = 0; idx < rte_socket_count(); idx++) {
> - unsigned int socket = rte_socket_id_by_idx(idx);
> - fprintf(f, "Heap on socket %i:\n", socket);
> - malloc_heap_dump(&mcfg->malloc_heaps[socket], f);
> + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
> +
> + for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
> + fprintf(f, "Heap id: %u\n", idx);
> + malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
> }
>
> + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
> }
>
> /*
> @@ -182,14 +193,19 @@ rte_malloc_dump_heaps(FILE *f)
> void
> rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
> {
> - unsigned int socket;
> + struct rte_mem_config *mcfg =
> rte_eal_get_configuration()->mem_config;
> + unsigned int heap_id;
> struct rte_malloc_socket_stats sock_stats;
> +
> + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
> +
> /* Iterate through all initialised heaps */
> - for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) {
> - if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0))
> - continue;
> + for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
> + struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
>
> - fprintf(f, "Socket:%u\n", socket);
> + malloc_heap_get_stats(heap, &sock_stats);
> +
> + fprintf(f, "Heap id:%u\n", heap_id);
> fprintf(f, "\tHeap_size:%zu,\n",
> sock_stats.heap_totalsz_bytes);
> fprintf(f, "\tFree_size:%zu,\n",
> sock_stats.heap_freesz_bytes);
> fprintf(f, "\tAlloc_size:%zu,\n",
> sock_stats.heap_allocsz_bytes);
> @@ -198,6 +214,7 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char
> *type)
> fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
> fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
> }
> + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
> return;
> }
>
> --
> 2.17.1
>
On 27-Sep-18 2:01 PM, Alejandro Lucero wrote:
> On Thu, Sep 27, 2018 at 11:47 AM Anatoly Burakov <anatoly.burakov@intel.com>
> wrote:
>
>> Switch over all parts of EAL to use heap ID instead of NUMA node
>> ID to identify heaps. Heap ID for DPDK-internal heaps is NUMA
>> node's index within the detected NUMA node list. Heap ID for
>> external heaps will be order of their creation.
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
>> ---
>> config/common_base | 1 +
>> config/rte_config.h | 1 +
>> .../common/include/rte_eal_memconfig.h | 4 +-
>> .../common/include/rte_malloc_heap.h | 1 +
>> lib/librte_eal/common/malloc_heap.c | 98 +++++++++++++------
>> lib/librte_eal/common/malloc_heap.h | 3 +
>> lib/librte_eal/common/rte_malloc.c | 41 +++++---
>> 7 files changed, 106 insertions(+), 43 deletions(-)
>>
>> diff --git a/config/common_base b/config/common_base
>> index 155c7d40e..b52770b27 100644
>> --- a/config/common_base
>> +++ b/config/common_base
>> @@ -61,6 +61,7 @@ CONFIG_RTE_CACHE_LINE_SIZE=64
>> CONFIG_RTE_LIBRTE_EAL=y
>> CONFIG_RTE_MAX_LCORE=128
>> CONFIG_RTE_MAX_NUMA_NODES=8
>> +CONFIG_RTE_MAX_HEAPS=32
>> CONFIG_RTE_MAX_MEMSEG_LISTS=64
>> # each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages
>> # or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is
>> smaller
>> diff --git a/config/rte_config.h b/config/rte_config.h
>> index 567051b9c..5dd2ac1ad 100644
>> --- a/config/rte_config.h
>> +++ b/config/rte_config.h
>> @@ -24,6 +24,7 @@
>> #define RTE_BUILD_SHARED_LIB
>>
>> /* EAL defines */
>> +#define RTE_MAX_HEAPS 32
>> #define RTE_MAX_MEMSEG_LISTS 128
>> #define RTE_MAX_MEMSEG_PER_LIST 8192
>> #define RTE_MAX_MEM_MB_PER_LIST 32768
>> diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h
>> b/lib/librte_eal/common/include/rte_eal_memconfig.h
>> index 6baa6854f..d7920a4e0 100644
>> --- a/lib/librte_eal/common/include/rte_eal_memconfig.h
>> +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
>> @@ -72,8 +72,8 @@ struct rte_mem_config {
>>
>> struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for
>> objects */
>>
>> - /* Heaps of Malloc per socket */
>> - struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
>> + /* Heaps of Malloc */
>> + struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
>>
>> /* address of mem_config in primary process. used to map shared
>> config into
>> * exact same address the primary process maps it.
>> diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h
>> b/lib/librte_eal/common/include/rte_malloc_heap.h
>> index d43fa9097..e7ac32d42 100644
>> --- a/lib/librte_eal/common/include/rte_malloc_heap.h
>> +++ b/lib/librte_eal/common/include/rte_malloc_heap.h
>> @@ -27,6 +27,7 @@ struct malloc_heap {
>>
>> unsigned alloc_count;
>> size_t total_size;
>> + unsigned int socket_id;
>> } __rte_cache_aligned;
>>
>> #endif /* _RTE_MALLOC_HEAP_H_ */
>> diff --git a/lib/librte_eal/common/malloc_heap.c
>> b/lib/librte_eal/common/malloc_heap.c
>> index 3c8e2063b..1d1e35708 100644
>> --- a/lib/librte_eal/common/malloc_heap.c
>> +++ b/lib/librte_eal/common/malloc_heap.c
>> @@ -66,6 +66,21 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
>> return check_flag & flags;
>> }
>>
>> +int
>> +malloc_socket_to_heap_id(unsigned int socket_id)
>> +{
>> + struct rte_mem_config *mcfg =
>> rte_eal_get_configuration()->mem_config;
>> + int i;
>> +
>> + for (i = 0; i < RTE_MAX_HEAPS; i++) {
>> + struct malloc_heap *heap = &mcfg->malloc_heaps[i];
>> +
>> + if (heap->socket_id == socket_id)
>> + return i;
>> + }
>> + return -1;
>> +}
>> +
>> /*
>> * Expand the heap with a memory area.
>> */
>> @@ -93,12 +108,13 @@ malloc_add_seg(const struct rte_memseg_list *msl,
>> struct rte_mem_config *mcfg =
>> rte_eal_get_configuration()->mem_config;
>> struct rte_memseg_list *found_msl;
>> struct malloc_heap *heap;
>> - int msl_idx;
>> + int msl_idx, heap_idx;
>>
>> if (msl->external)
>> return 0;
>>
>> - heap = &mcfg->malloc_heaps[msl->socket_id];
>> + heap_idx = malloc_socket_to_heap_id(msl->socket_id);
>>
>
> malloc_socket_to_heap_id can return -1 so it requires to handle that
> possibility.
>
Not really, this is called from memseg walk function - we know the msl
and its socket ID are valid. Or at least something has gone *very* wrong
if we got a -1 result :) However, i guess this check won't hurt.
On Thu, Sep 27, 2018 at 2:18 PM Burakov, Anatoly <anatoly.burakov@intel.com>
wrote:
> On 27-Sep-18 2:01 PM, Alejandro Lucero wrote:
> > On Thu, Sep 27, 2018 at 11:47 AM Anatoly Burakov <
> anatoly.burakov@intel.com>
> > wrote:
> >
> >> Switch over all parts of EAL to use heap ID instead of NUMA node
> >> ID to identify heaps. Heap ID for DPDK-internal heaps is NUMA
> >> node's index within the detected NUMA node list. Heap ID for
> >> external heaps will be order of their creation.
> >>
> >> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> >> ---
> >> config/common_base | 1 +
> >> config/rte_config.h | 1 +
> >> .../common/include/rte_eal_memconfig.h | 4 +-
> >> .../common/include/rte_malloc_heap.h | 1 +
> >> lib/librte_eal/common/malloc_heap.c | 98 +++++++++++++------
> >> lib/librte_eal/common/malloc_heap.h | 3 +
> >> lib/librte_eal/common/rte_malloc.c | 41 +++++---
> >> 7 files changed, 106 insertions(+), 43 deletions(-)
> >>
> >> diff --git a/config/common_base b/config/common_base
> >> index 155c7d40e..b52770b27 100644
> >> --- a/config/common_base
> >> +++ b/config/common_base
> >> @@ -61,6 +61,7 @@ CONFIG_RTE_CACHE_LINE_SIZE=64
> >> CONFIG_RTE_LIBRTE_EAL=y
> >> CONFIG_RTE_MAX_LCORE=128
> >> CONFIG_RTE_MAX_NUMA_NODES=8
> >> +CONFIG_RTE_MAX_HEAPS=32
> >> CONFIG_RTE_MAX_MEMSEG_LISTS=64
> >> # each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST
> pages
> >> # or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is
> >> smaller
> >> diff --git a/config/rte_config.h b/config/rte_config.h
> >> index 567051b9c..5dd2ac1ad 100644
> >> --- a/config/rte_config.h
> >> +++ b/config/rte_config.h
> >> @@ -24,6 +24,7 @@
> >> #define RTE_BUILD_SHARED_LIB
> >>
> >> /* EAL defines */
> >> +#define RTE_MAX_HEAPS 32
> >> #define RTE_MAX_MEMSEG_LISTS 128
> >> #define RTE_MAX_MEMSEG_PER_LIST 8192
> >> #define RTE_MAX_MEM_MB_PER_LIST 32768
> >> diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h
> >> b/lib/librte_eal/common/include/rte_eal_memconfig.h
> >> index 6baa6854f..d7920a4e0 100644
> >> --- a/lib/librte_eal/common/include/rte_eal_memconfig.h
> >> +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
> >> @@ -72,8 +72,8 @@ struct rte_mem_config {
> >>
> >> struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs
> for
> >> objects */
> >>
> >> - /* Heaps of Malloc per socket */
> >> - struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
> >> + /* Heaps of Malloc */
> >> + struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
> >>
> >> /* address of mem_config in primary process. used to map shared
> >> config into
> >> * exact same address the primary process maps it.
> >> diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h
> >> b/lib/librte_eal/common/include/rte_malloc_heap.h
> >> index d43fa9097..e7ac32d42 100644
> >> --- a/lib/librte_eal/common/include/rte_malloc_heap.h
> >> +++ b/lib/librte_eal/common/include/rte_malloc_heap.h
> >> @@ -27,6 +27,7 @@ struct malloc_heap {
> >>
> >> unsigned alloc_count;
> >> size_t total_size;
> >> + unsigned int socket_id;
> >> } __rte_cache_aligned;
> >>
> >> #endif /* _RTE_MALLOC_HEAP_H_ */
> >> diff --git a/lib/librte_eal/common/malloc_heap.c
> >> b/lib/librte_eal/common/malloc_heap.c
> >> index 3c8e2063b..1d1e35708 100644
> >> --- a/lib/librte_eal/common/malloc_heap.c
> >> +++ b/lib/librte_eal/common/malloc_heap.c
> >> @@ -66,6 +66,21 @@ check_hugepage_sz(unsigned flags, uint64_t
> hugepage_sz)
> >> return check_flag & flags;
> >> }
> >>
> >> +int
> >> +malloc_socket_to_heap_id(unsigned int socket_id)
> >> +{
> >> + struct rte_mem_config *mcfg =
> >> rte_eal_get_configuration()->mem_config;
> >> + int i;
> >> +
> >> + for (i = 0; i < RTE_MAX_HEAPS; i++) {
> >> + struct malloc_heap *heap = &mcfg->malloc_heaps[i];
> >> +
> >> + if (heap->socket_id == socket_id)
> >> + return i;
> >> + }
> >> + return -1;
> >> +}
> >> +
> >> /*
> >> * Expand the heap with a memory area.
> >> */
> >> @@ -93,12 +108,13 @@ malloc_add_seg(const struct rte_memseg_list *msl,
> >> struct rte_mem_config *mcfg =
> >> rte_eal_get_configuration()->mem_config;
> >> struct rte_memseg_list *found_msl;
> >> struct malloc_heap *heap;
> >> - int msl_idx;
> >> + int msl_idx, heap_idx;
> >>
> >> if (msl->external)
> >> return 0;
> >>
> >> - heap = &mcfg->malloc_heaps[msl->socket_id];
> >> + heap_idx = malloc_socket_to_heap_id(msl->socket_id);
> >>
> >
> > malloc_socket_to_heap_id can return -1 so it requires to handle that
> > possibility.
> >
>
> Not really, this is called from memseg walk function - we know the msl
> and its socket ID are valid. Or at least something has gone *very* wrong
> if we got a -1 result :) However, i guess this check won't hurt.
>
>
Although that error is impossible now, not doing the check could be a
problem if there is another code path in the future where socket_id has not
checked yet.
> --
> Thanks,
> Anatoly
>
@@ -61,6 +61,7 @@ CONFIG_RTE_CACHE_LINE_SIZE=64
CONFIG_RTE_LIBRTE_EAL=y
CONFIG_RTE_MAX_LCORE=128
CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_HEAPS=32
CONFIG_RTE_MAX_MEMSEG_LISTS=64
# each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages
# or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller
@@ -24,6 +24,7 @@
#define RTE_BUILD_SHARED_LIB
/* EAL defines */
+#define RTE_MAX_HEAPS 32
#define RTE_MAX_MEMSEG_LISTS 128
#define RTE_MAX_MEMSEG_PER_LIST 8192
#define RTE_MAX_MEM_MB_PER_LIST 32768
@@ -72,8 +72,8 @@ struct rte_mem_config {
struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
- /* Heaps of Malloc per socket */
- struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
+ /* Heaps of Malloc */
+ struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
/* address of mem_config in primary process. used to map shared config into
* exact same address the primary process maps it.
@@ -27,6 +27,7 @@ struct malloc_heap {
unsigned alloc_count;
size_t total_size;
+ unsigned int socket_id;
} __rte_cache_aligned;
#endif /* _RTE_MALLOC_HEAP_H_ */
@@ -66,6 +66,21 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
return check_flag & flags;
}
+int
+malloc_socket_to_heap_id(unsigned int socket_id)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i;
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+ if (heap->socket_id == socket_id)
+ return i;
+ }
+ return -1;
+}
+
/*
* Expand the heap with a memory area.
*/
@@ -93,12 +108,13 @@ malloc_add_seg(const struct rte_memseg_list *msl,
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
struct rte_memseg_list *found_msl;
struct malloc_heap *heap;
- int msl_idx;
+ int msl_idx, heap_idx;
if (msl->external)
return 0;
- heap = &mcfg->malloc_heaps[msl->socket_id];
+ heap_idx = malloc_socket_to_heap_id(msl->socket_id);
+ heap = &mcfg->malloc_heaps[heap_idx];
/* msl is const, so find it */
msl_idx = msl - mcfg->memsegs;
@@ -111,6 +127,7 @@ malloc_add_seg(const struct rte_memseg_list *msl,
malloc_heap_add_memory(heap, found_msl, ms->addr, len);
heap->total_size += len;
+ heap->socket_id = msl->socket_id;
RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
msl->socket_id);
@@ -561,12 +578,14 @@ alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
/* this will try lower page sizes first */
static void *
-heap_alloc_on_socket(const char *type, size_t size, int socket,
- unsigned int flags, size_t align, size_t bound, bool contig)
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+ unsigned int heap_id, unsigned int flags, size_t align,
+ size_t bound, bool contig)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+ int socket_id;
void *ret;
rte_spinlock_lock(&(heap->lock));
@@ -584,12 +603,28 @@ heap_alloc_on_socket(const char *type, size_t size, int socket,
* we may still be able to allocate memory from appropriate page sizes,
* we just need to request more memory first.
*/
+
+ socket_id = rte_socket_id_by_idx(heap_id);
+ /*
+ * if socket ID is negative, we cannot find a socket ID for this heap -
+ * which means it's an external heap. those can have unexpected page
+ * sizes, so if the user asked to allocate from there - assume user
+ * knows what they're doing, and allow allocating from there with any
+ * page size flags.
+ */
+ if (socket_id < 0)
+ size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
+
ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
if (ret != NULL)
goto alloc_unlock;
- if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound,
- contig)) {
+ /* if socket ID is invalid, this is an external heap */
+ if (socket_id < 0)
+ goto alloc_unlock;
+
+ if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
+ bound, contig)) {
ret = heap_alloc(heap, type, size, flags, align, bound, contig);
/* this should have succeeded */
@@ -605,7 +640,7 @@ void *
malloc_heap_alloc(const char *type, size_t size, int socket_arg,
unsigned int flags, size_t align, size_t bound, bool contig)
{
- int socket, i, cur_socket;
+ int socket, heap_id, i;
void *ret;
/* return NULL if size is 0 or alignment is not power-of-2 */
@@ -620,22 +655,25 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
else
socket = socket_arg;
- /* Check socket parameter */
- if (socket >= RTE_MAX_NUMA_NODES)
+ /* turn socket ID into heap ID */
+ heap_id = malloc_socket_to_heap_id(socket);
+ /* if heap id is negative, socket ID was invalid */
+ if (heap_id < 0)
return NULL;
- ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
- contig);
+ ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
+ bound, contig);
if (ret != NULL || socket_arg != SOCKET_ID_ANY)
return ret;
- /* try other heaps */
+ /* try other heaps. we are only iterating through native DPDK sockets,
+ * so external heaps won't be included.
+ */
for (i = 0; i < (int) rte_socket_count(); i++) {
- cur_socket = rte_socket_id_by_idx(i);
- if (cur_socket == socket)
+ if (i == heap_id)
continue;
- ret = heap_alloc_on_socket(type, size, cur_socket, flags,
- align, bound, contig);
+ ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
+ bound, contig);
if (ret != NULL)
return ret;
}
@@ -643,11 +681,11 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
}
static void *
-heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
- size_t align, bool contig)
+heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
+ unsigned int flags, size_t align, bool contig)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
void *ret;
rte_spinlock_lock(&(heap->lock));
@@ -665,7 +703,7 @@ void *
malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
size_t align, bool contig)
{
- int socket, i, cur_socket;
+ int socket, i, cur_socket, heap_id;
void *ret;
/* return NULL if align is not power-of-2 */
@@ -680,11 +718,13 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
else
socket = socket_arg;
- /* Check socket parameter */
- if (socket >= RTE_MAX_NUMA_NODES)
+ /* turn socket ID into heap ID */
+ heap_id = malloc_socket_to_heap_id(socket);
+ /* if heap id is negative, socket ID was invalid */
+ if (heap_id < 0)
return NULL;
- ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+ ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
contig);
if (ret != NULL || socket_arg != SOCKET_ID_ANY)
return ret;
@@ -694,8 +734,8 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
cur_socket = rte_socket_id_by_idx(i);
if (cur_socket == socket)
continue;
- ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
- align, contig);
+ ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
+ contig);
if (ret != NULL)
return ret;
}
@@ -760,7 +800,7 @@ malloc_heap_free(struct malloc_elem *elem)
/* ...of which we can't avail if we are in legacy mode, or if this is an
* externally allocated segment.
*/
- if (internal_config.legacy_mem || msl->external)
+ if (internal_config.legacy_mem || (msl->external > 0))
goto free_unlock;
/* check if we can free any memory back to the system */
@@ -917,7 +957,7 @@ malloc_heap_resize(struct malloc_elem *elem, size_t size)
}
/*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
*/
int
malloc_heap_get_stats(struct malloc_heap *heap,
@@ -955,7 +995,7 @@ malloc_heap_get_stats(struct malloc_heap *heap,
}
/*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
*/
void
malloc_heap_dump(struct malloc_heap *heap, FILE *f)
@@ -46,6 +46,9 @@ malloc_heap_get_stats(struct malloc_heap *heap,
void
malloc_heap_dump(struct malloc_heap *heap, FILE *f);
+int
+malloc_socket_to_heap_id(unsigned int socket_id);
+
int
rte_eal_malloc_heap_init(void);
@@ -152,11 +152,20 @@ rte_malloc_get_socket_stats(int socket,
struct rte_malloc_socket_stats *socket_stats)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int heap_idx, ret = -1;
- if (socket >= RTE_MAX_NUMA_NODES || socket < 0)
- return -1;
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
- return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats);
+ heap_idx = malloc_socket_to_heap_id(socket);
+ if (heap_idx < 0)
+ goto unlock;
+
+ ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+ socket_stats);
+unlock:
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
}
/*
@@ -168,12 +177,14 @@ rte_malloc_dump_heaps(FILE *f)
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
unsigned int idx;
- for (idx = 0; idx < rte_socket_count(); idx++) {
- unsigned int socket = rte_socket_id_by_idx(idx);
- fprintf(f, "Heap on socket %i:\n", socket);
- malloc_heap_dump(&mcfg->malloc_heaps[socket], f);
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ fprintf(f, "Heap id: %u\n", idx);
+ malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
}
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
}
/*
@@ -182,14 +193,19 @@ rte_malloc_dump_heaps(FILE *f)
void
rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
{
- unsigned int socket;
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int heap_id;
struct rte_malloc_socket_stats sock_stats;
+
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
/* Iterate through all initialised heaps */
- for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) {
- if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0))
- continue;
+ for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
- fprintf(f, "Socket:%u\n", socket);
+ malloc_heap_get_stats(heap, &sock_stats);
+
+ fprintf(f, "Heap id:%u\n", heap_id);
fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
@@ -198,6 +214,7 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
}
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
return;
}