get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/35752/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 35752,
    "url": "https://patches.dpdk.org/api/patches/35752/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/d6cda96f6a094385bfab3c9d292af2e8490f893b.1520428025.git.anatoly.burakov@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<d6cda96f6a094385bfab3c9d292af2e8490f893b.1520428025.git.anatoly.burakov@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/d6cda96f6a094385bfab3c9d292af2e8490f893b.1520428025.git.anatoly.burakov@intel.com",
    "date": "2018-03-07T16:56:42",
    "name": "[dpdk-dev,v2,14/41] eal: add support for mapping hugepages at runtime",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "f42c65c62cac36b3d386e035a7a3eb098196a887",
    "submitter": {
        "id": 4,
        "url": "https://patches.dpdk.org/api/people/4/?format=api",
        "name": "Anatoly Burakov",
        "email": "anatoly.burakov@intel.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/d6cda96f6a094385bfab3c9d292af2e8490f893b.1520428025.git.anatoly.burakov@intel.com/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/35752/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/35752/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id D45191B3D1;\n\tWed,  7 Mar 2018 17:57:43 +0100 (CET)",
            "from mga18.intel.com (mga18.intel.com [134.134.136.126])\n\tby dpdk.org (Postfix) with ESMTP id 9B743A84F\n\tfor <dev@dpdk.org>; Wed,  7 Mar 2018 17:57:27 +0100 (CET)",
            "from fmsmga003.fm.intel.com ([10.253.24.29])\n\tby orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t07 Mar 2018 08:57:15 -0800",
            "from irvmail001.ir.intel.com ([163.33.26.43])\n\tby FMSMGA003.fm.intel.com with ESMTP; 07 Mar 2018 08:57:12 -0800",
            "from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com\n\t[10.237.217.45])\n\tby irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id\n\tw27GvB57032386; Wed, 7 Mar 2018 16:57:11 GMT",
            "from sivswdev01.ir.intel.com (localhost [127.0.0.1])\n\tby sivswdev01.ir.intel.com with ESMTP id w27GvBls006713;\n\tWed, 7 Mar 2018 16:57:11 GMT",
            "(from aburakov@localhost)\n\tby sivswdev01.ir.intel.com with LOCAL id w27GvBTn006709;\n\tWed, 7 Mar 2018 16:57:11 GMT"
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.47,436,1515484800\"; d=\"scan'208\";a=\"32086342\"",
        "From": "Anatoly Burakov <anatoly.burakov@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "Bruce Richardson <bruce.richardson@intel.com>, keith.wiles@intel.com,\n\tjianfeng.tan@intel.com, andras.kovacs@ericsson.com,\n\tlaszlo.vadkeri@ericsson.com, benjamin.walker@intel.com,\n\tthomas@monjalon.net, konstantin.ananyev@intel.com,\n\tkuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com,\n\tnelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch,\n\tjerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com,\n\tolivier.matz@6wind.com",
        "Date": "Wed,  7 Mar 2018 16:56:42 +0000",
        "Message-Id": "<d6cda96f6a094385bfab3c9d292af2e8490f893b.1520428025.git.anatoly.burakov@intel.com>",
        "X-Mailer": "git-send-email 1.7.0.7",
        "In-Reply-To": [
            "<cover.1520428025.git.anatoly.burakov@intel.com>",
            "<cover.1520428025.git.anatoly.burakov@intel.com>"
        ],
        "References": [
            "<cover.1520428025.git.anatoly.burakov@intel.com>",
            "<cover.1520083504.git.anatoly.burakov@intel.com>\n\t<cover.1520428025.git.anatoly.burakov@intel.com>"
        ],
        "Subject": "[dpdk-dev] [PATCH v2 14/41] eal: add support for mapping hugepages\n\tat runtime",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Nothing uses this code yet. The bulk of it is copied from old\nmemory allocation code (linuxapp eal_memory.c). We provide an\nEAL-internal API to allocate either one page or multiple pages,\nguaranteeing that we'll get contiguous VA for all of the pages\nthat we requested.\n\nFor single-file segments, we will use fallocate() to grow and\nshrink memory segments, however fallocate() is not supported\non all kernel versions, so we will fall back to using\nftruncate() to grow the file, and disable shrinking as there's\nlittle we can do there. This will enable vhost use cases where\nhaving single file segments is of great value even without\nsupport for hot-unplugging memory.\n\nNot supported on FreeBSD.\n\nLocking is done via fcntl() because that way, when it comes to\ntaking out write locks or unlocking on deallocation, we don't\nhave to keep original fd's around. Plus, using fcntl() gives us\nability to lock parts of a file, which is useful for single-file\nsegments.\n\nSigned-off-by: Anatoly Burakov <anatoly.burakov@intel.com>\n---\n lib/librte_eal/bsdapp/eal/Makefile         |   1 +\n lib/librte_eal/bsdapp/eal/eal_memalloc.c   |  26 ++\n lib/librte_eal/bsdapp/eal/meson.build      |   1 +\n lib/librte_eal/common/eal_memalloc.h       |  19 +\n lib/librte_eal/linuxapp/eal/Makefile       |   2 +\n lib/librte_eal/linuxapp/eal/eal_memalloc.c | 609 +++++++++++++++++++++++++++++\n lib/librte_eal/linuxapp/eal/meson.build    |   1 +\n 7 files changed, 659 insertions(+)\n create mode 100644 lib/librte_eal/bsdapp/eal/eal_memalloc.c\n create mode 100644 lib/librte_eal/common/eal_memalloc.h\n create mode 100644 lib/librte_eal/linuxapp/eal/eal_memalloc.c",
    "diff": "diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile\nindex 1b43d77..19f9322 100644\n--- a/lib/librte_eal/bsdapp/eal/Makefile\n+++ b/lib/librte_eal/bsdapp/eal/Makefile\n@@ -29,6 +29,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c\n+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memalloc.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c\ndiff --git a/lib/librte_eal/bsdapp/eal/eal_memalloc.c b/lib/librte_eal/bsdapp/eal/eal_memalloc.c\nnew file mode 100644\nindex 0000000..be8340b\n--- /dev/null\n+++ b/lib/librte_eal/bsdapp/eal/eal_memalloc.c\n@@ -0,0 +1,26 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2017-2018 Intel Corporation\n+ */\n+\n+#include <inttypes.h>\n+\n+#include <rte_log.h>\n+#include <rte_memory.h>\n+\n+#include \"eal_memalloc.h\"\n+\n+int\n+eal_memalloc_alloc_page_bulk(struct rte_memseg **ms __rte_unused,\n+\t\tint __rte_unused n, uint64_t __rte_unused size,\n+\t\tint __rte_unused socket, bool __rte_unused exact)\n+{\n+\tRTE_LOG(ERR, EAL, \"Memory hotplug not supported on FreeBSD\\n\");\n+\treturn -1;\n+}\n+\n+struct rte_memseg *\n+eal_memalloc_alloc_page(uint64_t __rte_unused size, int __rte_unused socket)\n+{\n+\tRTE_LOG(ERR, EAL, \"Memory hotplug not supported on FreeBSD\\n\");\n+\treturn NULL;\n+}\ndiff --git a/lib/librte_eal/bsdapp/eal/meson.build b/lib/librte_eal/bsdapp/eal/meson.build\nindex e83fc91..4b40223 100644\n--- a/lib/librte_eal/bsdapp/eal/meson.build\n+++ b/lib/librte_eal/bsdapp/eal/meson.build\n@@ -8,6 +8,7 @@ env_sources = files('eal_alarm.c',\n \t\t'eal_hugepage_info.c',\n \t\t'eal_interrupts.c',\n \t\t'eal_lcore.c',\n+\t\t'eal_memalloc.c',\n \t\t'eal_thread.c',\n \t\t'eal_timer.c',\n \t\t'eal.c',\ndiff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h\nnew file mode 100644\nindex 0000000..c1076cf\n--- /dev/null\n+++ b/lib/librte_eal/common/eal_memalloc.h\n@@ -0,0 +1,19 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2017-2018 Intel Corporation\n+ */\n+\n+#ifndef EAL_MEMALLOC_H\n+#define EAL_MEMALLOC_H\n+\n+#include <stdbool.h>\n+\n+#include <rte_memory.h>\n+\n+struct rte_memseg *\n+eal_memalloc_alloc_page(uint64_t size, int socket);\n+\n+int\n+eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size,\n+\t\tint socket, bool exact);\n+\n+#endif // EAL_MEMALLOC_H\ndiff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile\nindex c407a43..af6b9be 100644\n--- a/lib/librte_eal/linuxapp/eal/Makefile\n+++ b/lib/librte_eal/linuxapp/eal/Makefile\n@@ -36,6 +36,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c\n+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memalloc.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c\n SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c\n@@ -82,6 +83,7 @@ CFLAGS_eal_interrupts.o := -D_GNU_SOURCE\n CFLAGS_eal_vfio_mp_sync.o := -D_GNU_SOURCE\n CFLAGS_eal_timer.o := -D_GNU_SOURCE\n CFLAGS_eal_lcore.o := -D_GNU_SOURCE\n+CFLAGS_eal_memalloc.o := -D_GNU_SOURCE\n CFLAGS_eal_thread.o := -D_GNU_SOURCE\n CFLAGS_eal_log.o := -D_GNU_SOURCE\n CFLAGS_eal_common_log.o := -D_GNU_SOURCE\ndiff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c\nnew file mode 100644\nindex 0000000..1ba1201\n--- /dev/null\n+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c\n@@ -0,0 +1,609 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2017-2018 Intel Corporation\n+ */\n+\n+#define _FILE_OFFSET_BITS 64\n+#include <errno.h>\n+#include <stdarg.h>\n+#include <stdbool.h>\n+#include <stdlib.h>\n+#include <stdio.h>\n+#include <stdint.h>\n+#include <inttypes.h>\n+#include <string.h>\n+#include <sys/mman.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include <sys/queue.h>\n+#include <sys/file.h>\n+#include <unistd.h>\n+#include <limits.h>\n+#include <fcntl.h>\n+#include <sys/ioctl.h>\n+#include <sys/time.h>\n+#include <signal.h>\n+#include <setjmp.h>\n+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES\n+#include <numa.h>\n+#include <numaif.h>\n+#endif\n+\n+#include <rte_common.h>\n+#include <rte_log.h>\n+#include <rte_eal_memconfig.h>\n+#include <rte_eal.h>\n+#include <rte_memory.h>\n+#include <rte_spinlock.h>\n+\n+#include \"eal_filesystem.h\"\n+#include \"eal_internal_cfg.h\"\n+#include \"eal_memalloc.h\"\n+\n+/*\n+ * not all kernel version support fallocate on hugetlbfs, so fall back to\n+ * ftruncate and disallow deallocation if fallocate is not supported.\n+ */\n+static int fallocate_supported = -1; /* unknown */\n+\n+/*\n+ * If each page is in a separate file, we can close fd's since we need each fd\n+ * only once. However, in single file segments mode, we can get away with using\n+ * a single fd for entire segments, but we need to store them somewhere. Each\n+ * fd is different within each process, so we'll store them in a local tailq.\n+ */\n+struct msl_entry {\n+\tTAILQ_ENTRY(msl_entry) next;\n+\tunsigned int msl_idx;\n+\tint fd;\n+};\n+\n+/** Double linked list of memseg list fd's. */\n+TAILQ_HEAD(msl_entry_list, msl_entry);\n+\n+static struct msl_entry_list msl_entry_list =\n+\t\tTAILQ_HEAD_INITIALIZER(msl_entry_list);\n+static rte_spinlock_t tailq_lock = RTE_SPINLOCK_INITIALIZER;\n+\n+static sigjmp_buf huge_jmpenv;\n+\n+static void __rte_unused huge_sigbus_handler(int signo __rte_unused)\n+{\n+\tsiglongjmp(huge_jmpenv, 1);\n+}\n+\n+/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile,\n+ * non-static local variable in the stack frame calling sigsetjmp might be\n+ * clobbered by a call to longjmp.\n+ */\n+static int __rte_unused huge_wrap_sigsetjmp(void)\n+{\n+\treturn sigsetjmp(huge_jmpenv, 1);\n+}\n+\n+static struct sigaction huge_action_old;\n+static int huge_need_recover;\n+\n+static void __rte_unused\n+huge_register_sigbus(void)\n+{\n+\tsigset_t mask;\n+\tstruct sigaction action;\n+\n+\tsigemptyset(&mask);\n+\tsigaddset(&mask, SIGBUS);\n+\taction.sa_flags = 0;\n+\taction.sa_mask = mask;\n+\taction.sa_handler = huge_sigbus_handler;\n+\n+\thuge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old);\n+}\n+\n+static void __rte_unused\n+huge_recover_sigbus(void)\n+{\n+\tif (huge_need_recover) {\n+\t\tsigaction(SIGBUS, &huge_action_old, NULL);\n+\t\thuge_need_recover = 0;\n+\t}\n+}\n+\n+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES\n+static bool\n+prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id)\n+{\n+\tbool have_numa = true;\n+\n+\t/* Check if kernel supports NUMA. */\n+\tif (numa_available() != 0) {\n+\t\tRTE_LOG(DEBUG, EAL, \"NUMA is not supported.\\n\");\n+\t\thave_numa = false;\n+\t}\n+\n+\tif (have_numa) {\n+\t\tRTE_LOG(DEBUG, EAL, \"Trying to obtain current memory policy.\\n\");\n+\t\tif (get_mempolicy(oldpolicy, oldmask->maskp,\n+\t\t\t\t  oldmask->size + 1, 0, 0) < 0) {\n+\t\t\tRTE_LOG(ERR, EAL,\n+\t\t\t\t\"Failed to get current mempolicy: %s. \"\n+\t\t\t\t\"Assuming MPOL_DEFAULT.\\n\", strerror(errno));\n+\t\t\toldpolicy = MPOL_DEFAULT;\n+\t\t}\n+\t\tRTE_LOG(DEBUG, EAL,\n+\t\t\t\"Setting policy MPOL_PREFERRED for socket %d\\n\",\n+\t\t\tsocket_id);\n+\t\tnuma_set_preferred(socket_id);\n+\t}\n+\treturn have_numa;\n+}\n+\n+static void\n+resotre_numa(int *oldpolicy, struct bitmask *oldmask)\n+{\n+\tRTE_LOG(DEBUG, EAL,\n+\t\t\"Restoring previous memory policy: %d\\n\", *oldpolicy);\n+\tif (oldpolicy == MPOL_DEFAULT) {\n+\t\tnuma_set_localalloc();\n+\t} else if (set_mempolicy(*oldpolicy, oldmask->maskp,\n+\t\t\t\t oldmask->size + 1) < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Failed to restore mempolicy: %s\\n\",\n+\t\t\tstrerror(errno));\n+\t\tnuma_set_localalloc();\n+\t}\n+\tnuma_free_cpumask(oldmask);\n+}\n+#endif\n+\n+static struct msl_entry *\n+get_msl_entry_by_idx(unsigned int list_idx)\n+{\n+\tstruct msl_entry *te;\n+\n+\trte_spinlock_lock(&tailq_lock);\n+\n+\tTAILQ_FOREACH(te, &msl_entry_list, next) {\n+\t\tif (te->msl_idx == list_idx)\n+\t\t\tbreak;\n+\t}\n+\tif (te == NULL) {\n+\t\t/* doesn't exist, so create it and set fd to -1 */\n+\n+\t\tte = malloc(sizeof(*te));\n+\t\tif (te == NULL) {\n+\t\t\tRTE_LOG(ERR, EAL, \"%s(): cannot allocate tailq entry for memseg list\\n\",\n+\t\t\t\t__func__);\n+\t\t\tgoto unlock;\n+\t\t}\n+\t\tte->msl_idx = list_idx;\n+\t\tte->fd = -1;\n+\t\tTAILQ_INSERT_TAIL(&msl_entry_list, te, next);\n+\t}\n+unlock:\n+\trte_spinlock_unlock(&tailq_lock);\n+\treturn te;\n+}\n+\n+/*\n+ * uses fstat to report the size of a file on disk\n+ */\n+static off_t\n+getFileSize(int fd)\n+{\n+\tstruct stat st;\n+\tif (fstat(fd, &st) < 0)\n+\t\treturn 0;\n+\treturn st.st_size;\n+}\n+\n+/*\n+ * uses fstat to check if file size on disk is zero (regular fstat won't show\n+ * true file size due to how fallocate works)\n+ */\n+static bool\n+is_zero_length(int fd)\n+{\n+\tstruct stat st;\n+\tif (fstat(fd, &st) < 0)\n+\t\treturn false;\n+\treturn st.st_blocks == 0;\n+}\n+\n+static int\n+get_page_fd(char *path, int buflen, struct hugepage_info *hi,\n+\t\tunsigned int list_idx, unsigned int seg_idx)\n+{\n+\tint fd;\n+\n+\tif (internal_config.single_file_segments) {\n+\t\t/*\n+\t\t * try to find a tailq entry, for this memseg list, or create\n+\t\t * one if it doesn't exist.\n+\t\t */\n+\t\tstruct msl_entry *te = get_msl_entry_by_idx(list_idx);\n+\t\tif (te == NULL) {\n+\t\t\tRTE_LOG(ERR, EAL, \"%s(): cannot allocate tailq entry for memseg list\\n\",\n+\t\t\t\t__func__);\n+\t\t\treturn -1;\n+\t\t} else if (te->fd < 0) {\n+\t\t\t/* create a hugepage file */\n+\t\t\teal_get_hugefile_path(path, buflen, hi->hugedir,\n+\t\t\t\t\tlist_idx);\n+\t\t\tfd = open(path, O_CREAT | O_RDWR, 0600);\n+\t\t\tif (fd < 0) {\n+\t\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): open failed: %s\\n\",\n+\t\t\t\t\t__func__, strerror(errno));\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\t\t\tte->fd = fd;\n+\t\t} else {\n+\t\t\tfd = te->fd;\n+\t\t}\n+\t} else {\n+\t\t/* one file per page, just create it */\n+\t\teal_get_hugefile_path(path, buflen, hi->hugedir,\n+\t\t\t\tlist_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);\n+\t\tfd = open(path, O_CREAT | O_RDWR, 0600);\n+\t\tif (fd < 0) {\n+\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): open failed: %s\\n\", __func__,\n+\t\t\t\t\tstrerror(errno));\n+\t\t\treturn -1;\n+\t\t}\n+\t}\n+\treturn fd;\n+}\n+\n+/* returns 1 on successful lock, 0 on unsuccessful lock, -1 on error */\n+static int lock(int fd, uint64_t offset, uint64_t len, int type)\n+{\n+\tstruct flock lck = {0};\n+\tint ret;\n+\n+\tlck.l_type = type;\n+\tlck.l_whence = SEEK_SET;\n+\tlck.l_start = offset;\n+\tlck.l_len = len;\n+\n+\tret = fcntl(fd, F_SETLK, &lck);\n+\n+\tif (ret && (errno == EAGAIN || errno == EACCES)) {\n+\t\t/* locked by another process, not an error */\n+\t\treturn 0;\n+\t} else if (ret) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): error calling fcntl(): %s\\n\",\n+\t\t\t__func__, strerror(errno));\n+\t\t/* we've encountered an unexpected error */\n+\t\treturn -1;\n+\t}\n+\treturn 1;\n+}\n+\n+static int\n+resize_hugefile(int fd, uint64_t fa_offset, uint64_t page_sz,\n+\t\tbool grow)\n+{\n+\tbool again = false;\n+\tdo {\n+\t\tif (fallocate_supported == 0) {\n+\t\t\t/* we cannot deallocate memory if fallocate() is not\n+\t\t\t * supported, but locks are still needed to prevent\n+\t\t\t * primary process' initialization from clearing out\n+\t\t\t * huge pages used by this process.\n+\t\t\t */\n+\n+\t\t\tif (!grow) {\n+\t\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): fallocate not supported, not freeing page back to the system\\n\",\n+\t\t\t\t\t__func__);\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\t\t\tuint64_t new_size = fa_offset + page_sz;\n+\t\t\tuint64_t cur_size = getFileSize(fd);\n+\n+\t\t\t/* fallocate isn't supported, fall back to ftruncate */\n+\t\t\tif (new_size > cur_size &&\n+\t\t\t\t\tftruncate(fd, new_size) < 0) {\n+\t\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): ftruncate() failed: %s\\n\",\n+\t\t\t\t\t__func__, strerror(errno));\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\t\t\t/* not being able to take out a read lock is an error */\n+\t\t\tif (lock(fd, fa_offset, page_sz, F_RDLCK) != 1)\n+\t\t\t\treturn -1;\n+\t\t} else {\n+\t\t\tint flags = grow ? 0 : FALLOC_FL_PUNCH_HOLE |\n+\t\t\t\t\tFALLOC_FL_KEEP_SIZE;\n+\t\t\tint ret;\n+\n+\t\t\t/* if fallocate() is supported, we need to take out a\n+\t\t\t * read lock on allocate (to prevent other processes\n+\t\t\t * from deallocating this page), and take out a write\n+\t\t\t * lock on deallocate (to ensure nobody else is using\n+\t\t\t * this page).\n+\t\t\t *\n+\t\t\t * we can't use flock() for this, as we actually need to\n+\t\t\t * lock part of the file, not the entire file.\n+\t\t\t */\n+\n+\t\t\tif (!grow) {\n+\t\t\t\tret = lock(fd, fa_offset, page_sz, F_WRLCK);\n+\n+\t\t\t\tif (ret < 0)\n+\t\t\t\t\treturn -1;\n+\t\t\t\telse if (ret == 0)\n+\t\t\t\t\t/* failed to lock, not an error */\n+\t\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t\tif (fallocate(fd, flags, fa_offset, page_sz) < 0) {\n+\t\t\t\tif (fallocate_supported == -1 &&\n+\t\t\t\t\t\terrno == ENOTSUP) {\n+\t\t\t\t\tRTE_LOG(ERR, EAL, \"%s(): fallocate() not supported, hugepage deallocation will be disabled\\n\",\n+\t\t\t\t\t\t__func__);\n+\t\t\t\t\tagain = true;\n+\t\t\t\t\tfallocate_supported = 0;\n+\t\t\t\t} else {\n+\t\t\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): fallocate() failed: %s\\n\",\n+\t\t\t\t\t\t__func__,\n+\t\t\t\t\t\tstrerror(errno));\n+\t\t\t\t\treturn -1;\n+\t\t\t\t}\n+\t\t\t} else {\n+\t\t\t\tfallocate_supported = 1;\n+\n+\t\t\t\tif (grow) {\n+\t\t\t\t\t/* if can't read lock, it's an error */\n+\t\t\t\t\tif (lock(fd, fa_offset, page_sz,\n+\t\t\t\t\t\t\tF_RDLCK) != 1)\n+\t\t\t\t\t\treturn -1;\n+\t\t\t\t} else {\n+\t\t\t\t\t/* if can't unlock, it's an error */\n+\t\t\t\t\tif (lock(fd, fa_offset, page_sz,\n+\t\t\t\t\t\t\tF_UNLCK) != 1)\n+\t\t\t\t\t\treturn -1;\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\t} while (again);\n+\treturn 0;\n+}\n+\n+static int\n+alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id,\n+\t\tstruct hugepage_info *hi, unsigned int list_idx,\n+\t\tunsigned int seg_idx)\n+{\n+\tint cur_socket_id = 0;\n+\tuint64_t map_offset;\n+\tchar path[PATH_MAX];\n+\tint ret = 0;\n+\tint fd;\n+\n+\tfd = get_page_fd(path, sizeof(path), hi, list_idx, seg_idx);\n+\tif (fd < 0)\n+\t\treturn -1;\n+\n+\n+\tif (internal_config.single_file_segments) {\n+\t\tmap_offset = seg_idx * size;\n+\t\tret = resize_hugefile(fd, map_offset, size, true);\n+\t\tif (ret < 1)\n+\t\t\tgoto resized;\n+\t} else {\n+\t\tmap_offset = 0;\n+\t\tif (ftruncate(fd, size) < 0) {\n+\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): ftruncate() failed: %s\\n\",\n+\t\t\t\t__func__, strerror(errno));\n+\t\t\tgoto resized;\n+\t\t}\n+\t\t/* we've allocated a page - take out a read lock. we're using\n+\t\t * fcntl() locks rather than flock() here because doing that\n+\t\t * gives us one huge advantage - fcntl() locks are per-process,\n+\t\t * not per-file descriptor, which means that we don't have to\n+\t\t * keep the original fd's around to keep a lock on the file.\n+\t\t *\n+\t\t * this is useful, because when it comes to unmapping pages, we\n+\t\t * will have to take out a write lock (to figure out if another\n+\t\t * process still has this page mapped), and to do itwith flock()\n+\t\t * we'll have to use original fd, as lock is associated with\n+\t\t * that particular fd. with fcntl(), this is not necessary - we\n+\t\t * can open a new fd and use fcntl() on that.\n+\t\t */\n+\t\tret = lock(fd, map_offset, size, F_RDLCK);\n+\n+\t\t/* this should not fail */\n+\t\tif (ret != 1) {\n+\t\t\tRTE_LOG(ERR, EAL, \"%s(): error locking file: %s\\n\",\n+\t\t\t\t__func__,\n+\t\t\t\tstrerror(errno));\n+\t\t\tgoto resized;\n+\t\t}\n+\t}\n+\n+\t/*\n+\t * map the segment, and populate page tables, the kernel fills this\n+\t * segment with zeros if it's a new page.\n+\t */\n+\tvoid *va = mmap(addr, size, PROT_READ | PROT_WRITE,\n+\t\t\tMAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, map_offset);\n+\t/* for non-single file segments, we can close fd here */\n+\tif (!internal_config.single_file_segments)\n+\t\tclose(fd);\n+\n+\tif (va == MAP_FAILED) {\n+\t\tRTE_LOG(DEBUG, EAL, \"%s(): mmap() failed: %s\\n\", __func__,\n+\t\t\tstrerror(errno));\n+\t\tgoto resized;\n+\t}\n+\tif (va != addr) {\n+\t\tRTE_LOG(DEBUG, EAL, \"%s(): wrong mmap() address\\n\", __func__);\n+\t\tgoto mapped;\n+\t}\n+\n+\trte_iova_t iova = rte_mem_virt2iova(addr);\n+\tif (iova == RTE_BAD_PHYS_ADDR) {\n+\t\tRTE_LOG(DEBUG, EAL, \"%s(): can't get IOVA addr\\n\",\n+\t\t\t__func__);\n+\t\tgoto mapped;\n+\t}\n+\n+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES\n+\tmove_pages(getpid(), 1, &addr, NULL, &cur_socket_id, 0);\n+\n+\tif (cur_socket_id != socket_id) {\n+\t\tRTE_LOG(DEBUG, EAL,\n+\t\t\t\t\"%s(): allocation happened on wrong socket (wanted %d, got %d)\\n\",\n+\t\t\t__func__, socket_id, cur_socket_id);\n+\t\tgoto mapped;\n+\t}\n+#endif\n+\n+\t/* In linux, hugetlb limitations, like cgroup, are\n+\t * enforced at fault time instead of mmap(), even\n+\t * with the option of MAP_POPULATE. Kernel will send\n+\t * a SIGBUS signal. To avoid to be killed, save stack\n+\t * environment here, if SIGBUS happens, we can jump\n+\t * back here.\n+\t */\n+\tif (huge_wrap_sigsetjmp()) {\n+\t\tRTE_LOG(DEBUG, EAL, \"SIGBUS: Cannot mmap more hugepages of size %uMB\\n\",\n+\t\t\t(unsigned int)(size / 0x100000));\n+\t\tgoto mapped;\n+\t}\n+\t*(int *)addr = *(int *) addr;\n+\n+\tms->addr = addr;\n+\tms->hugepage_sz = size;\n+\tms->len = size;\n+\tms->nchannel = rte_memory_get_nchannel();\n+\tms->nrank = rte_memory_get_nrank();\n+\tms->iova = iova;\n+\tms->socket_id = socket_id;\n+\n+\treturn 0;\n+\n+mapped:\n+\tmunmap(addr, size);\n+resized:\n+\tif (internal_config.single_file_segments) {\n+\t\tresize_hugefile(fd, map_offset, size, false);\n+\t\tif (is_zero_length(fd)) {\n+\t\t\tstruct msl_entry *te = get_msl_entry_by_idx(list_idx);\n+\t\t\tif (te != NULL && te->fd >= 0) {\n+\t\t\t\tclose(te->fd);\n+\t\t\t\tte->fd = -1;\n+\t\t\t}\n+\t\t\t/* ignore errors, can't make it any worse */\n+\t\t\tunlink(path);\n+\t\t}\n+\t} else {\n+\t\tclose(fd);\n+\t\tunlink(path);\n+\t}\n+\treturn -1;\n+}\n+\n+int\n+eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,\n+\t\tuint64_t size, int socket, bool exact)\n+{\n+\tstruct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;\n+\tstruct rte_memseg_list *msl = NULL;\n+\tvoid *addr;\n+\tunsigned int msl_idx;\n+\tint cur_idx, end_idx, i, ret = -1;\n+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES\n+\tbool have_numa;\n+\tint oldpolicy;\n+\tstruct bitmask *oldmask = numa_allocate_nodemask();\n+#endif\n+\tstruct hugepage_info *hi = NULL;\n+\n+\t/* dynamic allocation not supported in legacy mode */\n+\tif (internal_config.legacy_mem)\n+\t\tgoto restore_numa;\n+\n+\tfor (i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) {\n+\t\tif (size ==\n+\t\t\t\tinternal_config.hugepage_info[i].hugepage_sz) {\n+\t\t\thi = &internal_config.hugepage_info[i];\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\tif (!hi) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): can't find relevant hugepage_info entry\\n\",\n+\t\t\t__func__);\n+\t\tgoto restore_numa;\n+\t}\n+\n+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES\n+\thave_numa = prepare_numa(&oldpolicy, oldmask, socket);\n+#endif\n+\n+\t/* there may be several memsegs for this page size and socket id, so try\n+\t * allocating on all of them.\n+\t */\n+\n+\t/* find our memseg list */\n+\tfor (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {\n+\t\tstruct rte_memseg_list *cur_msl = &mcfg->memsegs[msl_idx];\n+\n+\t\tif (cur_msl->hugepage_sz != size)\n+\t\t\tcontinue;\n+\t\tif (cur_msl->socket_id != socket)\n+\t\t\tcontinue;\n+\t\tmsl = cur_msl;\n+\n+\t\t/* try finding space in memseg list */\n+\t\tcur_idx = rte_fbarray_find_next_n_free(&msl->memseg_arr, 0, n);\n+\n+\t\tif (cur_idx < 0)\n+\t\t\tcontinue;\n+\n+\t\tend_idx = cur_idx + n;\n+\n+\t\tfor (i = 0; cur_idx < end_idx; cur_idx++, i++) {\n+\t\t\tstruct rte_memseg *cur;\n+\n+\t\t\tcur = rte_fbarray_get(&msl->memseg_arr, cur_idx);\n+\t\t\taddr = RTE_PTR_ADD(msl->base_va,\n+\t\t\t\t\tcur_idx * msl->hugepage_sz);\n+\n+\t\t\tif (alloc_page(cur, addr, size, socket, hi, msl_idx,\n+\t\t\t\t\tcur_idx)) {\n+\t\t\t\tRTE_LOG(DEBUG, EAL, \"attempted to allocate %i pages, but only %i were allocated\\n\",\n+\t\t\t\t\tn, i);\n+\n+\t\t\t\t/* if exact number wasn't requested, stop */\n+\t\t\t\tif (!exact)\n+\t\t\t\t\tret = i;\n+\t\t\t\tgoto restore_numa;\n+\t\t\t}\n+\t\t\tif (ms)\n+\t\t\t\tms[i] = cur;\n+\n+\t\t\trte_fbarray_set_used(&msl->memseg_arr, cur_idx);\n+\t\t}\n+\t\tret = n;\n+\n+\t\tbreak;\n+\t}\n+\t/* we didn't break */\n+\tif (!msl) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): couldn't find suitable memseg_list\\n\",\n+\t\t\t__func__);\n+\t}\n+\n+restore_numa:\n+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES\n+\tif (have_numa)\n+\t\tresotre_numa(&oldpolicy, oldmask);\n+#endif\n+\treturn ret;\n+}\n+\n+struct rte_memseg *\n+eal_memalloc_alloc_page(uint64_t size, int socket)\n+{\n+\tstruct rte_memseg *ms;\n+\tif (eal_memalloc_alloc_page_bulk(&ms, 1, size, socket, true) < 0)\n+\t\treturn NULL;\n+\t/* return pointer to newly allocated memseg */\n+\treturn ms;\n+}\ndiff --git a/lib/librte_eal/linuxapp/eal/meson.build b/lib/librte_eal/linuxapp/eal/meson.build\nindex 03974ff..5254c6c 100644\n--- a/lib/librte_eal/linuxapp/eal/meson.build\n+++ b/lib/librte_eal/linuxapp/eal/meson.build\n@@ -10,6 +10,7 @@ env_sources = files('eal_alarm.c',\n \t\t'eal_debug.c',\n \t\t'eal_hugepage_info.c',\n \t\t'eal_interrupts.c',\n+\t\t'eal_memalloc.c',\n \t\t'eal_lcore.c',\n \t\t'eal_log.c',\n \t\t'eal_thread.c',\n",
    "prefixes": [
        "dpdk-dev",
        "v2",
        "14/41"
    ]
}