@@ -95,6 +95,7 @@ eal_long_options[] = {
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
{OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
{OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
+ {OPT_REUSE_MAP, 0, NULL, OPT_REUSE_MAP_NUM },
{0, 0, NULL, 0 }
};
@@ -850,7 +851,9 @@ eal_parse_common_option(int opt, const char *optarg,
case OPT_NO_HUGE_NUM:
conf->no_hugetlbfs = 1;
break;
-
+ case OPT_REUSE_MAP_NUM:
+ conf->reuse_map = 1;
+ break;
case OPT_NO_PCI_NUM:
conf->no_pci = 1;
break;
@@ -62,6 +62,6 @@ struct hugepage_file {
* Read the information from linux on what hugepages are available
* for the EAL to use
*/
-int eal_hugepage_info_init(void);
+int eal_hugepage_info_init(int reuse);
#endif /* EAL_HUGEPAGES_H */
@@ -64,6 +64,7 @@ struct internal_config {
volatile unsigned force_nchannel; /**< force number of channels */
volatile unsigned force_nrank; /**< force number of ranks */
volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
+ volatile unsigned reuse_map;
unsigned hugepage_unlink; /**< true to unlink backing files */
volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
volatile unsigned no_pci; /**< true to disable PCI */
@@ -83,6 +83,8 @@ enum {
OPT_VMWARE_TSC_MAP_NUM,
#define OPT_XEN_DOM0 "xen-dom0"
OPT_XEN_DOM0_NUM,
+#define OPT_REUSE_MAP "reuse-map"
+ OPT_REUSE_MAP_NUM,
OPT_LONG_MAX_NUM
};
@@ -171,7 +171,22 @@ malloc_heap_alloc(struct malloc_heap *heap,
}
rte_spinlock_unlock(&heap->lock);
- return elem == NULL ? NULL : (void *)(&elem[1]);
+ if (elem == NULL) {
+ return NULL;
+ }
+
+ /*
+ * It's ugly here. The reason is that with reuse-map opt,
+ * the memory may not be zeroed by hugepagefs during process boot.
+ * However,some user of memzone alloc ,eg. rte_kni_alloc, assumes
+ * the page is zeroed.
+ */
+
+ /*
+ * No need to memset in rte_free() now, may delete it later.
+ */
+ memset(&elem[1], 0, size);
+ return (void *)(&elem[1]);
}
/*
@@ -344,6 +344,7 @@ eal_usage(const char *prgname)
" --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n"
" --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n"
" --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n"
+ " --"OPT_REUSE_MAP" Reuse exist page cache mapping for fast startup\n"
"\n");
/* Allow the application to print its usage message too if hook is set */
if ( rte_application_usage_hook ) {
@@ -766,7 +767,7 @@ rte_eal_init(int argc, char **argv)
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
internal_config.xen_dom0_support == 0 &&
- eal_hugepage_info_init() < 0)
+ eal_hugepage_info_init(internal_config.reuse_map) < 0)
rte_panic("Cannot get hugepage information\n");
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
@@ -58,19 +58,24 @@
#include "eal_internal_cfg.h"
#include "eal_hugepages.h"
#include "eal_filesystem.h"
+#include <sys/mman.h>
+#include <sys/stat.h>
static const char sys_dir_path[] = "/sys/kernel/mm/hugepages";
/* this function is only called from eal_hugepage_info_init which itself
* is only called from a primary process */
static uint32_t
-get_num_hugepages(const char *subdir)
+get_num_hugepages(const char *subdir, int reuse)
{
char path[PATH_MAX];
long unsigned resv_pages, num_pages = 0;
const char *nr_hp_file = "free_hugepages";
const char *nr_rsvd_file = "resv_hugepages";
+ if (reuse == 1) {
+ nr_hp_file = "nr_hugepages";
+ }
/* first, check how many reserved pages kernel reports */
snprintf(path, sizeof(path), "%s/%s/%s",
sys_dir_path, subdir, nr_rsvd_file);
@@ -124,6 +129,190 @@ get_default_hp_size(void)
return size;
}
+
+/*
+ * If file number under mp equals to nr_pages
+ * and each has a valid page mapped, return 1.
+ * or else return negative value;
+ * */
+static int check_mp(const char *mp, int nr_pages, uint64_t pagesize)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ const char dirent_start_text[] = "rtemap";
+ const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
+ int dir_fd;
+ int fd;
+ struct stat file_stat;
+ void *file_mmap;
+ unsigned char *mincore_vec;
+ int cnt = 0;
+ int rc = 0;
+
+ if (mp == NULL || nr_pages <= 0 || pagesize == 0 ) {
+ rc = -EINVAL;
+ goto ret4;
+ }
+
+ int default_page_size = getpagesize();
+ mincore_vec = calloc(1, (pagesize + default_page_size - 1) / default_page_size);
+ if (mincore_vec == NULL) {
+ RTE_LOG(ERR, EAL, "calloc failed\n");
+ rc = -ENOMEM;
+ goto ret4;
+ }
+
+ dir = opendir(mp);
+ if (dir == NULL) {
+ rc = -1;
+ goto ret3;
+ }
+
+ for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
+ if ( strncmp(dirent->d_name, ".", 1 ) == 0
+ || strncmp(dirent->d_name, "..", 2 ) == 0 ) {
+ continue;
+ }
+
+ /*rabbish file name appears in the mount point. check fail*/
+ if (strncmp(dirent->d_name, dirent_start_text, dirent_start_len) != 0
+ && strncmp(dirent->d_name, ".", 1 ) != 0
+ && strncmp(dirent->d_name, "..", 2 ) != 0 ) {
+ rc = -2;
+ goto ret2;
+ }
+
+ /*
+ * If the file has a valid pagesize mapping return true.
+ * else return false. don't support one-segment currently
+ */
+ dir_fd = dirfd(dir);
+ fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+ if (fd == -1) {
+ RTE_LOG(ERR, EAL, "open file failed %s\n", dirent->d_name);
+ rc = -3;
+ goto ret2;
+ }
+
+ if ( fstat( fd, &file_stat ) < 0 ) {
+ RTE_LOG(ERR, EAL, "Could not stat file %s\n", dirent->d_name);
+ rc = -4;
+ goto ret1;
+ }
+
+ if ( (uint64_t)file_stat.st_size != pagesize ) {
+ RTE_LOG(ERR, EAL, "%s file size %ld pagesize %d\n",
+ dirent->d_name, file_stat.st_size, (int)pagesize);
+ rc = -5;
+ goto ret1;
+ }
+
+ file_mmap = mmap((void *)0, pagesize, PROT_READ, MAP_SHARED|MAP_POPULATE, fd, 0);
+ if ( file_mmap == MAP_FAILED ) {
+ RTE_LOG(ERR, EAL, "Could not mmap file %s\n", dirent->d_name);
+ rc = -6;
+ goto ret1;
+ }
+
+ if ( mincore(file_mmap, pagesize, mincore_vec) != 0 ) {
+ RTE_LOG(ERR, EAL, "Could not call mincore for file");
+ rc = -7;
+ goto ret;
+ }
+
+ if (! (mincore_vec[0] & 1) ){
+ rc = -8;
+ goto ret;
+ }
+
+ //need to unmap this so that
+ ///proc/self/numa_maps parse will not fail
+ munmap(file_mmap, pagesize);
+ close(fd);
+ cnt++;
+ }/*for loop end*/
+
+ if (cnt != nr_pages) {
+ rc = -9;
+ goto ret2;
+ }
+
+ /*the successfull case*/
+ rc = 1;
+ goto ret2;
+
+ret:
+ munmap(file_mmap, pagesize);
+ret1:
+ close(fd);
+ret2:
+ close(dir_fd);
+ret3:
+ free(mincore_vec);
+ret4:
+ return rc;
+}
+
+typedef int (*visit_cb) (const char *mp, int nr_pages, uint64_t pagesize);
+/*
+ * check every valid mount point with a least one page.
+ * */
+static const char *
+get_hugepage_dir(uint64_t hugepage_sz);
+static inline int visit_each_sysdir_entry(visit_cb cb_fn)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ const char dirent_start_text[] = "hugepages-";
+ const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
+ int num_sizes = 0;
+ uint64_t hugepage_sz = 0;
+ uint32_t nr;
+ const char *hugedir;
+
+ dir = opendir(sys_dir_path);
+ if (dir == NULL) {
+ return 0;
+ }
+
+ for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
+ if (strncmp(dirent->d_name, dirent_start_text,
+ dirent_start_len) != 0) {
+ continue;
+ }
+
+ /*shall not happend*/
+ if (num_sizes >= MAX_HUGEPAGE_SIZES) {
+ return 0;
+ }
+
+ hugepage_sz = rte_str_to_size(&dirent->d_name[dirent_start_len]);
+ hugedir = get_hugepage_dir(hugepage_sz);
+ nr = get_num_hugepages(dirent->d_name, 1);
+ if (hugedir != NULL && nr != 0) {
+ if ( 0 == cb_fn(hugedir, nr, hugepage_sz) ) {
+ return 0;
+ }
+ }
+ num_sizes++;
+ }
+ return 1;
+}
+
+/*
+ * Return 1 only when the following conditions meet:
+ * 0) Use input reuse opt.
+ * 1) The file number under hugepage mount point shall be equal to the nr pages.
+ * 2) Each file 's mapping shall be there
+ */
+static int eal_trust_exist_mapping(int reuse_opt)
+{
+ if (reuse_opt == 0)
+ return 0;
+
+ return visit_each_sysdir_entry(check_mp);
+}
+
static const char *
get_hugepage_dir(uint64_t hugepage_sz)
{
@@ -274,7 +463,7 @@ compare_hpi(const void *a, const void *b)
* initialization procedure.
*/
int
-eal_hugepage_info_init(void)
+eal_hugepage_info_init(int user_opt)
{
const char dirent_start_text[] = "hugepages-";
const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
@@ -306,7 +495,7 @@ eal_hugepage_info_init(void)
if (hpi->hugedir == NULL) {
uint32_t num_pages;
- num_pages = get_num_hugepages(dirent->d_name);
+ num_pages = get_num_hugepages(dirent->d_name, 0);
if (num_pages > 0)
RTE_LOG(NOTICE, EAL,
"%" PRIu32 " hugepages of size "
@@ -325,13 +514,17 @@ eal_hugepage_info_init(void)
"Failed to lock hugepage directory!\n");
break;
}
- /* clear out the hugepages dir from unused pages */
- if (clear_hugedir(hpi->hugedir) == -1)
- break;
+
+ int reuse = eal_trust_exist_mapping(user_opt);
+ if (reuse == 0) {
+ /* clear out the hugepages dir from unused pages */
+ if (clear_hugedir(hpi->hugedir) == -1)
+ break;
+ }
/* for now, put all pages into socket 0,
* later they will be sorted */
- hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
+ hpi->num_pages[0] = get_num_hugepages(dirent->d_name, reuse);
#ifndef RTE_ARCH_64
/* for 32-bit systems, limit number of hugepages to