From patchwork Tue Dec 19 11:04:33 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 32421 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 3B54C1B1B5; Tue, 19 Dec 2017 12:05:21 +0100 (CET) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 707001B014 for ; Tue, 19 Dec 2017 12:05:03 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 19 Dec 2017 03:05:02 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,426,1508828400"; d="scan'208";a="17261487" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga001.jf.intel.com with ESMTP; 19 Dec 2017 03:05:00 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id vBJB4xod030153; Tue, 19 Dec 2017 11:05:00 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id vBJB4xej005371; Tue, 19 Dec 2017 11:04:59 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id vBJB4xTI005367; Tue, 19 Dec 2017 11:04:59 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, keith.wiles@intel.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net Date: Tue, 19 Dec 2017 11:04:33 +0000 Message-Id: X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 10/23] eal: read hugepage counts from node-specific sysfs path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" For non-legacy memory init mode, instead of looking at generic sysfs path, look at sysfs paths pertaining to each NUMA node for hugepage counts. Note that per-NUMA node path does not provide information regarding reserved pages, so we might not get the best info from these paths, but this saves us from the whole mapping/remapping business before we're actually able to tell which page is on which socket, because we no longer require our memory to be physically contiguous. Legacy memory init will not use this. Signed-off-by: Anatoly Burakov --- lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 73 +++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 6 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 86e174f..a85c15a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -59,6 +59,7 @@ #include "eal_filesystem.h" static const char sys_dir_path[] = "/sys/kernel/mm/hugepages"; +static const char sys_pages_numa_dir_path[] = "/sys/devices/system/node"; /* this function is only called from eal_hugepage_info_init which itself * is only called from a primary process */ @@ -99,6 +100,42 @@ get_num_hugepages(const char *subdir) return num_pages; } +static uint32_t +get_num_hugepages_on_node(const char *subdir, unsigned socket) { + char path[PATH_MAX], socketpath[PATH_MAX]; + DIR *socketdir; + long unsigned num_pages = 0; + const char *nr_hp_file = "free_hugepages"; + + snprintf(socketpath, sizeof(socketpath), "%s/node%u/hugepages", + sys_pages_numa_dir_path, socket); + + socketdir = opendir(socketpath); + if (socketdir) { + /* Keep calm and carry on */ + closedir(socketdir); + } else { + /* Can't find socket dir, so ignore it */ + return 0; + } + + snprintf(path, sizeof(path), "%s/%s/%s", + socketpath, subdir, nr_hp_file); + if (eal_parse_sysfs_value(path, &num_pages) < 0) + return 0; + + if (num_pages == 0) + RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n", + subdir); + + /* we want to return a uint32_t and more than this looks suspicious + * anyway ... */ + if (num_pages > UINT32_MAX) + num_pages = UINT32_MAX; + + return num_pages; +} + static uint64_t get_default_hp_size(void) { @@ -277,7 +314,7 @@ eal_hugepage_info_init(void) { const char dirent_start_text[] = "hugepages-"; const size_t dirent_start_len = sizeof(dirent_start_text) - 1; - unsigned i, num_sizes = 0; + unsigned i, total_pages, num_sizes = 0; DIR *dir; struct dirent *dirent; @@ -331,9 +368,24 @@ eal_hugepage_info_init(void) if (clear_hugedir(hpi->hugedir) == -1) break; - /* for now, put all pages into socket 0, - * later they will be sorted */ - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + /* first, try to put all hugepages into relevant sockets, but + * if first attempts fails, fall back to collecting all pages + * in one socket and sorting them later */ + total_pages = 0; + /* we also don't want to do this for legacy init */ + if (!internal_config.legacy_mem) + for (i = 0; i < rte_num_sockets(); i++) { + unsigned num_pages = + get_num_hugepages_on_node( + dirent->d_name, i); + hpi->num_pages[i] = num_pages; + total_pages += num_pages; + } + /* we failed to sort memory from the get go, so fall + * back to old way */ + if (total_pages == 0) { + hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + } #ifndef RTE_ARCH_64 /* for 32-bit systems, limit number of hugepages to @@ -357,10 +409,19 @@ eal_hugepage_info_init(void) sizeof(internal_config.hugepage_info[0]), compare_hpi); /* now we have all info, check we have at least one valid size */ - for (i = 0; i < num_sizes; i++) + for (i = 0; i < num_sizes; i++) { + /* pages may no longer all be on socket 0, so check all */ + unsigned j, num_pages = 0; + + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + struct hugepage_info *hpi = + &internal_config.hugepage_info[i]; + num_pages += hpi->num_pages[j]; + } if (internal_config.hugepage_info[i].hugedir != NULL && - internal_config.hugepage_info[i].num_pages[0] > 0) + num_pages > 0) return 0; + } /* no valid hugepage mounts available, return error */ return -1;