get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/13114/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 13114,
    "url": "https://patches.dpdk.org/api/patches/13114/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/1464665827-24965-1-git-send-email-jianfeng.tan@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1464665827-24965-1-git-send-email-jianfeng.tan@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1464665827-24965-1-git-send-email-jianfeng.tan@intel.com",
    "date": "2016-05-31T03:37:07",
    "name": "[dpdk-dev,v5] eal: fix allocating all free hugepages",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "986cd3e8e893158fb69eec30454a9d8d757e2234",
    "submitter": {
        "id": 313,
        "url": "https://patches.dpdk.org/api/people/313/?format=api",
        "name": "Jianfeng Tan",
        "email": "jianfeng.tan@intel.com"
    },
    "delegate": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/1464665827-24965-1-git-send-email-jianfeng.tan@intel.com/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/13114/comments/",
    "check": "pending",
    "checks": "https://patches.dpdk.org/api/patches/13114/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id BE2205A17;\n\tTue, 31 May 2016 05:37:22 +0200 (CEST)",
            "from mga03.intel.com (mga03.intel.com [134.134.136.65])\n\tby dpdk.org (Postfix) with ESMTP id E7DAE58C5\n\tfor <dev@dpdk.org>; Tue, 31 May 2016 05:37:20 +0200 (CEST)",
            "from fmsmga002.fm.intel.com ([10.253.24.26])\n\tby orsmga103.jf.intel.com with ESMTP; 30 May 2016 20:37:19 -0700",
            "from dpdk06.sh.intel.com ([10.239.128.225])\n\tby fmsmga002.fm.intel.com with ESMTP; 30 May 2016 20:37:18 -0700"
        ],
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.26,393,1459839600\"; d=\"scan'208\";a=\"991918876\"",
        "From": "Jianfeng Tan <jianfeng.tan@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "sergio.gonzalez.monroy@intel.com, nhorman@tuxdriver.com,\n\tdavid.marchand@6wind.com, thomas.monjalon@6wind.com,\n\tJianfeng Tan <jianfeng.tan@intel.com>",
        "Date": "Tue, 31 May 2016 03:37:07 +0000",
        "Message-Id": "<1464665827-24965-1-git-send-email-jianfeng.tan@intel.com>",
        "X-Mailer": "git-send-email 2.1.4",
        "In-Reply-To": "<1453661393-85704-1-git-send-email-jianfeng.tan@intel.com>",
        "References": "<1453661393-85704-1-git-send-email-jianfeng.tan@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v5] eal: fix allocating all free hugepages",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "EAL memory init allocates all free hugepages of the whole system,\nwhich seen from sysfs, even when applications do not ask so many.\nWhen there is a limitation on how many hugepages an application can\nuse (such as cgroup.hugetlb), or hugetlbfs is specified with an\noption of size (exceeding the quota of the fs), it just fails to\nstart even there are enough hugepages allocated.\n\nTo fix above issue, this patch:\n - Changes the logic to continue memory init to see if hugetlb\n   requirement of application can be addressed by already allocated\n   hugepages.\n - To make sure each hugepage is allocated successfully, we add a\n   recover mechanism, which relies on a mem access to fault-in\n   hugepages, and if it fails with SIGBUS, recover to previously\n   saved stack environment with siglongjmp().\n\nFor the case of CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS (enabled by\ndefault when compiling IVSHMEM target), it's indispensable to\nmapp all free hugepages in the system. Under this case, it fails\nto start when allocating fails.\n\nTest example:\n  a. cgcreate -g hugetlb:/test-subgroup\n  b. cgset -r hugetlb.1GB.limit_in_bytes=2147483648 test-subgroup\n  c. cgexec -g hugetlb:test-subgroup \\\n          ./examples/helloworld/build/helloworld -c 0x2 -n 4\n\n       \nFixes: af75078fece (\"first public release\")\n\nSigned-off-by: Jianfeng Tan <jianfeng.tan@intel.com>\nAcked-by: Neil Horman <nhorman@tuxdriver.com>\n---\nv5:\n - Make this method as default instead of using an option.\n - When SIGBUS is triggered in the case of RTE_EAL_SINGLE_FILE_SEGMENTS,\n   just return error.\n - Add prefix \"huge_\" to newly added function and static variables.\n - Move the internal_config.memory assignment after the page allocations.\nv4:\n - Change map_all_hugepages to return unsigned instead of int.\nv3:\n - Reword commit message to include it fixes the hugetlbfs quota issue.\n - setjmp -> sigsetjmp.\n - Fix RTE_LOG complaint from ERR to DEBUG as it does not mean init error\n   so far.\n - Fix the second map_all_hugepages's return value check.\nv2:\n - Address the compiling error by move setjmp into a wrap method.\n\n lib/librte_eal/linuxapp/eal/eal.c        |  20 -----\n lib/librte_eal/linuxapp/eal/eal_memory.c | 138 ++++++++++++++++++++++++++++---\n 2 files changed, 125 insertions(+), 33 deletions(-)",
    "diff": "diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c\nindex 8aafd51..4a8dfbd 100644\n--- a/lib/librte_eal/linuxapp/eal/eal.c\n+++ b/lib/librte_eal/linuxapp/eal/eal.c\n@@ -465,24 +465,6 @@ eal_parse_vfio_intr(const char *mode)\n \treturn -1;\n }\n \n-static inline size_t\n-eal_get_hugepage_mem_size(void)\n-{\n-\tuint64_t size = 0;\n-\tunsigned i, j;\n-\n-\tfor (i = 0; i < internal_config.num_hugepage_sizes; i++) {\n-\t\tstruct hugepage_info *hpi = &internal_config.hugepage_info[i];\n-\t\tif (hpi->hugedir != NULL) {\n-\t\t\tfor (j = 0; j < RTE_MAX_NUMA_NODES; j++) {\n-\t\t\t\tsize += hpi->hugepage_sz * hpi->num_pages[j];\n-\t\t\t}\n-\t\t}\n-\t}\n-\n-\treturn (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;\n-}\n-\n /* Parse the arguments for --log-level only */\n static void\n eal_log_level_parse(int argc, char **argv)\n@@ -766,8 +748,6 @@ rte_eal_init(int argc, char **argv)\n \tif (internal_config.memory == 0 && internal_config.force_sockets == 0) {\n \t\tif (internal_config.no_hugetlbfs)\n \t\t\tinternal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;\n-\t\telse\n-\t\t\tinternal_config.memory = eal_get_hugepage_mem_size();\n \t}\n \n \tif (internal_config.vmware_tsc_map == 1) {\ndiff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c\nindex 5b9132c..dc6f49b 100644\n--- a/lib/librte_eal/linuxapp/eal/eal_memory.c\n+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c\n@@ -80,6 +80,8 @@\n #include <errno.h>\n #include <sys/ioctl.h>\n #include <sys/time.h>\n+#include <signal.h>\n+#include <setjmp.h>\n \n #include <rte_log.h>\n #include <rte_memory.h>\n@@ -309,6 +311,21 @@ get_virtual_area(size_t *size, size_t hugepage_sz)\n \treturn addr;\n }\n \n+static sigjmp_buf huge_jmpenv;\n+\n+static void huge_sigbus_handler(int signo __rte_unused)\n+{\n+\tsiglongjmp(huge_jmpenv, 1);\n+}\n+\n+/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile,\n+ * non-static local variable in the stack frame calling sigsetjmp might be\n+ * clobbered by a call to longjmp.\n+ */\n+static int huge_wrap_sigsetjmp(void)\n+{\n+\treturn sigsetjmp(huge_jmpenv, 1);\n+}\n /*\n  * Mmap all hugepages of hugepage table: it first open a file in\n  * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the\n@@ -316,7 +333,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz)\n  * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to\n  * map continguous physical blocks in contiguous virtual blocks.\n  */\n-static int\n+static unsigned\n map_all_hugepages(struct hugepage_file *hugepg_tbl,\n \t\tstruct hugepage_info *hpi, int orig)\n {\n@@ -394,9 +411,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,\n \t\t/* try to create hugepage file */\n \t\tfd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);\n \t\tif (fd < 0) {\n-\t\t\tRTE_LOG(ERR, EAL, \"%s(): open failed: %s\\n\", __func__,\n+\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): open failed: %s\\n\", __func__,\n \t\t\t\t\tstrerror(errno));\n-\t\t\treturn -1;\n+\t\t\treturn i;\n \t\t}\n \n \t\t/* map the segment, and populate page tables,\n@@ -404,10 +421,10 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,\n \t\tvirtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,\n \t\t\t\tMAP_SHARED | MAP_POPULATE, fd, 0);\n \t\tif (virtaddr == MAP_FAILED) {\n-\t\t\tRTE_LOG(ERR, EAL, \"%s(): mmap failed: %s\\n\", __func__,\n+\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): mmap failed: %s\\n\", __func__,\n \t\t\t\t\tstrerror(errno));\n \t\t\tclose(fd);\n-\t\t\treturn -1;\n+\t\t\treturn i;\n \t\t}\n \n \t\tif (orig) {\n@@ -417,12 +434,33 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,\n \t\t\thugepg_tbl[i].final_va = virtaddr;\n \t\t}\n \n+\t\tif (orig) {\n+\t\t\t/* In linux, hugetlb limitations, like cgroup, are\n+\t\t\t * enforced at fault time instead of mmap(), even\n+\t\t\t * with the option of MAP_POPULATE. Kernel will send\n+\t\t\t * a SIGBUS signal. To avoid to be killed, save stack\n+\t\t\t * environment here, if SIGBUS happens, we can jump\n+\t\t\t * back here.\n+\t\t\t */\n+\t\t\tif (huge_wrap_sigsetjmp()) {\n+\t\t\t\tRTE_LOG(DEBUG, EAL, \"SIGBUS: Cannot mmap more \"\n+\t\t\t\t\t\"hugepages of size %u MB\\n\",\n+\t\t\t\t\t(unsigned)(hugepage_sz / 0x100000));\n+\t\t\t\tmunmap(virtaddr, hugepage_sz);\n+\t\t\t\tclose(fd);\n+\t\t\t\tunlink(hugepg_tbl[i].filepath);\n+\t\t\t\treturn i;\n+\t\t\t}\n+\t\t\t*(int *)virtaddr = 0;\n+\t\t}\n+\n+\n \t\t/* set shared flock on the file. */\n \t\tif (flock(fd, LOCK_SH | LOCK_NB) == -1) {\n-\t\t\tRTE_LOG(ERR, EAL, \"%s(): Locking file failed:%s \\n\",\n+\t\t\tRTE_LOG(DEBUG, EAL, \"%s(): Locking file failed:%s \\n\",\n \t\t\t\t__func__, strerror(errno));\n \t\t\tclose(fd);\n-\t\t\treturn -1;\n+\t\t\treturn i;\n \t\t}\n \n \t\tclose(fd);\n@@ -430,7 +468,8 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,\n \t\tvma_addr = (char *)vma_addr + hugepage_sz;\n \t\tvma_len -= hugepage_sz;\n \t}\n-\treturn 0;\n+\n+\treturn i;\n }\n \n #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS\n@@ -1036,6 +1075,51 @@ calc_num_pages_per_socket(uint64_t * memory,\n \treturn total_num_pages;\n }\n \n+static inline size_t\n+eal_get_hugepage_mem_size(void)\n+{\n+\tuint64_t size = 0;\n+\tunsigned i, j;\n+\n+\tfor (i = 0; i < internal_config.num_hugepage_sizes; i++) {\n+\t\tstruct hugepage_info *hpi = &internal_config.hugepage_info[i];\n+\t\tif (hpi->hugedir != NULL) {\n+\t\t\tfor (j = 0; j < RTE_MAX_NUMA_NODES; j++) {\n+\t\t\t\tsize += hpi->hugepage_sz * hpi->num_pages[j];\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\treturn (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;\n+}\n+\n+static struct sigaction huge_action_old;\n+static int huge_need_recover;\n+\n+static void\n+huge_register_sigbus(void)\n+{\n+\tsigset_t mask;\n+\tstruct sigaction action;\n+\n+\tsigemptyset(&mask);\n+\tsigaddset(&mask, SIGBUS);\n+\taction.sa_flags = 0;\n+\taction.sa_mask = mask;\n+\taction.sa_handler = huge_sigbus_handler;\n+\n+\thuge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old);\n+}\n+\n+static void\n+huge_recover_sigbus(void)\n+{\n+\tif (huge_need_recover) {\n+\t\tsigaction(SIGBUS, &huge_action_old, NULL);\n+\t\thuge_need_recover = 0;\n+\t}\n+}\n+\n /*\n  * Prepare physical memory mapping: fill configuration structure with\n  * these infos, return 0 on success.\n@@ -1122,8 +1206,11 @@ rte_eal_hugepage_init(void)\n \n \thp_offset = 0; /* where we start the current page size entries */\n \n+\thuge_register_sigbus();\n+\n \t/* map all hugepages and sort them */\n \tfor (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){\n+\t\tunsigned pages_old, pages_new;\n \t\tstruct hugepage_info *hpi;\n \n \t\t/*\n@@ -1137,10 +1224,28 @@ rte_eal_hugepage_init(void)\n \t\t\tcontinue;\n \n \t\t/* map all hugepages available */\n-\t\tif (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){\n-\t\t\tRTE_LOG(DEBUG, EAL, \"Failed to mmap %u MB hugepages\\n\",\n-\t\t\t\t\t(unsigned)(hpi->hugepage_sz / 0x100000));\n+\t\tpages_old = hpi->num_pages[0];\n+\t\tpages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1);\n+\t\tif (pages_new < pages_old) {\n+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS\n+\t\t\tRTE_LOG(ERR, EAL,\n+\t\t\t\t\"%d not %d hugepages of size %u MB allocated\\n\",\n+\t\t\t\tpages_new, pages_old,\n+\t\t\t\t(unsigned)(hpi->hugepage_sz / 0x100000));\n \t\t\tgoto fail;\n+#else\n+\t\t\tRTE_LOG(DEBUG, EAL,\n+\t\t\t\t\"%d not %d hugepages of size %u MB allocated\\n\",\n+\t\t\t\tpages_new, pages_old,\n+\t\t\t\t(unsigned)(hpi->hugepage_sz / 0x100000));\n+\n+\t\t\tint pages = pages_old - pages_new;\n+\n+\t\t\tnr_hugepages -= pages;\n+\t\t\thpi->num_pages[0] = pages_new;\n+\t\t\tif (pages_new == 0)\n+\t\t\t\tcontinue;\n+#endif\n \t\t}\n \n \t\t/* find physical addresses and sockets for each hugepage */\n@@ -1172,8 +1277,9 @@ rte_eal_hugepage_init(void)\n \t\thp_offset += new_pages_count[i];\n #else\n \t\t/* remap all hugepages */\n-\t\tif (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){\n-\t\t\tRTE_LOG(DEBUG, EAL, \"Failed to remap %u MB pages\\n\",\n+\t\tif (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) !=\n+\t\t    hpi->num_pages[0]) {\n+\t\t\tRTE_LOG(ERR, EAL, \"Failed to remap %u MB pages\\n\",\n \t\t\t\t\t(unsigned)(hpi->hugepage_sz / 0x100000));\n \t\t\tgoto fail;\n \t\t}\n@@ -1187,6 +1293,11 @@ rte_eal_hugepage_init(void)\n #endif\n \t}\n \n+\thuge_recover_sigbus();\n+\n+\tif (internal_config.memory == 0 && internal_config.force_sockets == 0)\n+\t\tinternal_config.memory = eal_get_hugepage_mem_size();\n+\n #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS\n \tnr_hugefiles = 0;\n \tfor (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {\n@@ -1373,6 +1484,7 @@ rte_eal_hugepage_init(void)\n \treturn 0;\n \n fail:\n+\thuge_recover_sigbus();\n \tfree(tmp_hp);\n \treturn -1;\n }\n",
    "prefixes": [
        "dpdk-dev",
        "v5"
    ]
}