From patchwork Thu May 31 09:51:00 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anatoly Burakov X-Patchwork-Id: 40527 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id CF8A74CC3; Thu, 31 May 2018 11:51:10 +0200 (CEST) Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id 3BD6F4C93 for ; Thu, 31 May 2018 11:51:05 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 31 May 2018 02:51:03 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.49,463,1520924400"; d="scan'208";a="43602546" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga007.fm.intel.com with ESMTP; 31 May 2018 02:51:02 -0700 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id w4V9p2Rx017705 for ; Thu, 31 May 2018 10:51:02 +0100 Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id w4V9p1J9025377 for ; Thu, 31 May 2018 10:51:01 +0100 Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id w4V9p1tB025364 for dev@dpdk.org; Thu, 31 May 2018 10:51:01 +0100 From: Anatoly Burakov To: dev@dpdk.org Date: Thu, 31 May 2018 10:51:00 +0100 Message-Id: <6dae56b03a4df667351cbf10bf2aa06808871067.1527758967.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH v6 2/3] malloc: allow reserving biggest element X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add an internal-only function to allocate biggest element from the heap. Nominally, it supports SOCKET_ID_ANY as its socket argument, but it's essentially useless because other sockets will only be allocated from if the entire heap on current or specified socket is busy. Still, asking to reserve a biggest element will allow fixing race condition in memzone reserve that has been there for a long time. Signed-off-by: Anatoly Burakov Acked-by: Remy Horton --- lib/librte_eal/common/malloc_heap.c | 126 ++++++++++++++++++++++++++++ lib/librte_eal/common/malloc_heap.h | 4 + 2 files changed, 130 insertions(+) diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index d6cf3af81..12aaf2d72 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -148,6 +148,52 @@ find_suitable_element(struct malloc_heap *heap, size_t size, return NULL; } +/* + * Iterates through the freelist for a heap to find a free element with the + * biggest size and requested alignment. Will also set size to whatever element + * size that was found. + * Returns null on failure, or pointer to element on success. + */ +static struct malloc_elem * +find_biggest_element(struct malloc_heap *heap, size_t *size, + unsigned int flags, size_t align, bool contig) +{ + struct malloc_elem *elem, *max_elem = NULL; + size_t idx, max_size = 0; + + for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) { + size_t cur_size; + if (!check_hugepage_sz(flags, elem->msl->page_sz)) + continue; + if (contig) { + cur_size = + malloc_elem_find_max_iova_contig(elem, + align); + } else { + void *data_start = RTE_PTR_ADD(elem, + MALLOC_ELEM_HEADER_LEN); + void *data_end = RTE_PTR_ADD(elem, elem->size - + MALLOC_ELEM_TRAILER_LEN); + void *aligned = RTE_PTR_ALIGN_CEIL(data_start, + align); + /* check if aligned data start is beyond end */ + if (aligned >= data_end) + continue; + cur_size = RTE_PTR_DIFF(data_end, aligned); + } + if (cur_size > max_size) { + max_size = cur_size; + max_elem = elem; + } + } + } + + *size = max_size; + return max_elem; +} + /* * Main function to allocate a block of memory from the heap. * It locks the free list, scans it, and adds a new memseg if the @@ -174,6 +220,26 @@ heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size, return elem == NULL ? NULL : (void *)(&elem[1]); } +static void * +heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused, + unsigned int flags, size_t align, bool contig) +{ + struct malloc_elem *elem; + size_t size; + + align = RTE_CACHE_LINE_ROUNDUP(align); + + elem = find_biggest_element(heap, &size, flags, align, contig); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, 0, contig); + + /* increase heap's count of allocated elements */ + heap->alloc_count++; + } + + return elem == NULL ? NULL : (void *)(&elem[1]); +} + /* this function is exposed in malloc_mp.h */ void rollback_expand_heap(struct rte_memseg **ms, int n_segs, @@ -575,6 +641,66 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg, return NULL; } +static void * +heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags, + size_t align, bool contig) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct malloc_heap *heap = &mcfg->malloc_heaps[socket]; + void *ret; + + rte_spinlock_lock(&(heap->lock)); + + align = align == 0 ? 1 : align; + + ret = heap_alloc_biggest(heap, type, flags, align, contig); + + rte_spinlock_unlock(&(heap->lock)); + + return ret; +} + +void * +malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags, + size_t align, bool contig) +{ + int socket, i, cur_socket; + void *ret; + + /* return NULL if align is not power-of-2 */ + if ((align && !rte_is_power_of_2(align))) + return NULL; + + if (!rte_eal_has_hugepages()) + socket_arg = SOCKET_ID_ANY; + + if (socket_arg == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_arg; + + /* Check socket parameter */ + if (socket >= RTE_MAX_NUMA_NODES) + return NULL; + + ret = heap_alloc_biggest_on_socket(type, socket, flags, align, + contig); + if (ret != NULL || socket_arg != SOCKET_ID_ANY) + return ret; + + /* try other heaps */ + for (i = 0; i < (int) rte_socket_count(); i++) { + cur_socket = rte_socket_id_by_idx(i); + if (cur_socket == socket) + continue; + ret = heap_alloc_biggest_on_socket(type, cur_socket, flags, + align, contig); + if (ret != NULL) + return ret; + } + return NULL; +} + /* this function is exposed in malloc_mp.h */ int malloc_heap_free_pages(void *aligned_start, size_t aligned_len) diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index 03b801415..f52cb5559 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -29,6 +29,10 @@ void * malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags, size_t align, size_t bound, bool contig); +void * +malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags, + size_t align, bool contig); + int malloc_heap_free(struct malloc_elem *elem);