[v2] eal: fix eal init may failed when too much continuous memsegs under legacy mode

Message ID 20230522124107.99877-1-changfengnan@bytedance.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v2] eal: fix eal init may failed when too much continuous memsegs under legacy mode |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK
ci/intel-Functional success Functional PASS
ci/github-robot: build fail github build: failed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-unit-testing fail Testing issues
ci/iol-abi-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/iol-x86_64-unit-testing fail Testing issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing fail Unit Testing FAIL
ci/intel-Testing success Testing PASS

Commit Message

Fengnan Chang May 22, 2023, 12:41 p.m. UTC
  Under legacy mode, if the number of continuous memsegs greater
than RTE_MAX_MEMSEG_PER_LIST, eal init will failed even though
another memseg list is empty, because only one memseg list used
to check in remap_needed_hugepages.
Fix this by add a argment indicate how many pages mapped in
remap_segment, remap_segment try to mapped most pages it can, if
exceed it's capbility, remap_needed_hugepages will continue to
map other left pages.

For example:
hugepage configure:
cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages
10241
10239

startup log:
EAL: Detected memory type: socket_id:0 hugepage_sz:2097152
EAL: Detected memory type: socket_id:1 hugepage_sz:2097152
EAL: Creating 4 segment lists: n_segs:8192 socket_id:0 hugepage_sz:2097152
EAL: Creating 4 segment lists: n_segs:8192 socket_id:1 hugepage_sz:2097152
EAL: Requesting 13370 pages of size 2MB from socket 0
EAL: Requesting 7110 pages of size 2MB from socket 1
EAL: Attempting to map 14220M on socket 1
EAL: Allocated 14220M on socket 1
EAL: Attempting to map 26740M on socket 0
EAL: Could not find space for memseg. Please increase 32768 and/or 65536 in
configuration.
EAL: Couldn't remap hugepage files into memseg lists
EAL: FATAL: Cannot init memory
EAL: Cannot init memory

Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
Signed-off-by: Lin Li <lilintjpu@bytedance.com>
---
 lib/eal/linux/eal_memory.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)
  

Comments

Anatoly Burakov May 22, 2023, 1:28 p.m. UTC | #1
On 5/22/2023 1:41 PM, Fengnan Chang wrote:
> Under legacy mode, if the number of continuous memsegs greater
> than RTE_MAX_MEMSEG_PER_LIST, eal init will failed even though
> another memseg list is empty, because only one memseg list used
> to check in remap_needed_hugepages.
> Fix this by add a argment indicate how many pages mapped in
> remap_segment, remap_segment try to mapped most pages it can, if
> exceed it's capbility, remap_needed_hugepages will continue to
> map other left pages.
> 
> For example:
> hugepage configure:
> cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages
> 10241
> 10239
> 
> startup log:
> EAL: Detected memory type: socket_id:0 hugepage_sz:2097152
> EAL: Detected memory type: socket_id:1 hugepage_sz:2097152
> EAL: Creating 4 segment lists: n_segs:8192 socket_id:0 hugepage_sz:2097152
> EAL: Creating 4 segment lists: n_segs:8192 socket_id:1 hugepage_sz:2097152
> EAL: Requesting 13370 pages of size 2MB from socket 0
> EAL: Requesting 7110 pages of size 2MB from socket 1
> EAL: Attempting to map 14220M on socket 1
> EAL: Allocated 14220M on socket 1
> EAL: Attempting to map 26740M on socket 0
> EAL: Could not find space for memseg. Please increase 32768 and/or 65536 in
> configuration.
> EAL: Couldn't remap hugepage files into memseg lists
> EAL: FATAL: Cannot init memory
> EAL: Cannot init memory
> 
> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> Signed-off-by: Lin Li <lilintjpu@bytedance.com>
> ---
>   lib/eal/linux/eal_memory.c | 33 +++++++++++++++++++++------------
>   1 file changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
> index 60fc8cc6ca..b2e6453fbe 100644
> --- a/lib/eal/linux/eal_memory.c
> +++ b/lib/eal/linux/eal_memory.c
> @@ -657,12 +657,12 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
>   }
>   
>   static int
> -remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
> +remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end, int *mapped_seg_len)
>   {
>   	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
>   	struct rte_memseg_list *msl;
>   	struct rte_fbarray *arr;
> -	int cur_page, seg_len;
> +	int cur_page, seg_len, cur_len;
>   	unsigned int msl_idx;
>   	int ms_idx;
>   	uint64_t page_sz;
> @@ -692,8 +692,9 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>   
>   		/* leave space for a hole if array is not empty */
>   		empty = arr->count == 0;
> +		cur_len = RTE_MIN((unsigned int)seg_len, arr->len - arr->count - (empty ? 0 : 1));
>   		ms_idx = rte_fbarray_find_next_n_free(arr, 0,
> -				seg_len + (empty ? 0 : 1));
> +				cur_len + (empty ? 0 : 1));
>   
>   		/* memseg list is full? */
>   		if (ms_idx < 0)
> @@ -704,12 +705,12 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>   		 */
>   		if (!empty)
>   			ms_idx++;
> +		*mapped_seg_len = cur_len;
>   		break;
>   	}
>   	if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
> -		RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
> -				RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
> -				RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
> +		RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase RTE_MAX_MEMSEG_PER_LIST "
> +				"RTE_MAX_MEMSEG_PER_TYPE and/or RTE_MAX_MEM_MB_PER_TYPE in configuration.\n");
>   		return -1;
>   	}
>   
> @@ -725,6 +726,8 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>   		void *addr;
>   		int fd;
>   
> +		if (cur_page - seg_start == *mapped_seg_len)
> +			break;
>   		fd = open(hfile->filepath, O_RDWR);
>   		if (fd < 0) {
>   			RTE_LOG(ERR, EAL, "Could not open '%s': %s\n",
> @@ -986,7 +989,7 @@ prealloc_segments(struct hugepage_file *hugepages, int n_pages)
>   static int
>   remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
>   {
> -	int cur_page, seg_start_page, new_memseg, ret;
> +	int cur_page, seg_start_page, new_memseg, ret, mapped_seg_len = 0;
>   
>   	seg_start_page = 0;
>   	for (cur_page = 0; cur_page < n_pages; cur_page++) {
> @@ -1023,21 +1026,27 @@ remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
>   			/* if this isn't the first time, remap segment */
>   			if (cur_page != 0) {
>   				ret = remap_segment(hugepages, seg_start_page,
> -						cur_page);
> +						cur_page, &mapped_seg_len);
>   				if (ret != 0)
>   					return -1;
>   			}
> +			cur_page = seg_start_page + mapped_seg_len;
>   			/* remember where we started */
>   			seg_start_page = cur_page;
> +			mapped_seg_len = 0;
>   		}
>   		/* continuation of previous memseg */
>   	}
>   	/* we were stopped, but we didn't remap the last segment, do it now */
>   	if (cur_page != 0) {
> -		ret = remap_segment(hugepages, seg_start_page,
> -				cur_page);
> -		if (ret != 0)
> -			return -1;
> +		while (seg_start_page < n_pages) {
> +			ret = remap_segment(hugepages, seg_start_page,
> +					cur_page, &mapped_seg_len);
> +			if (ret != 0)
> +				return -1;
> +			seg_start_page = seg_start_page + mapped_seg_len;
> +			mapped_seg_len = 0;
> +		}
>   	}
>   	return 0;
>   }

This works, but I feel like it's overcomplicated - the same logic you're 
trying to use can just be implemented using `find_biggest_free()` + 
`find_contig_free()` and returning `seg_len` from `remap_segment()`?

Something like the following:

---

diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
index 60fc8cc6ca..08acc787fc 100644
--- a/lib/eal/linux/eal_memory.c
+++ b/lib/eal/linux/eal_memory.c
@@ -681,6 +681,7 @@ remap_segment(struct hugepage_file *hugepages, int 
seg_start, int seg_end)

  	/* find free space in memseg lists */
  	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		int free_len;
  		bool empty;
  		msl = &mcfg->memsegs[msl_idx];
  		arr = &msl->memseg_arr;
@@ -692,18 +693,27 @@ remap_segment(struct hugepage_file *hugepages, int 
seg_start, int seg_end)

  		/* leave space for a hole if array is not empty */
  		empty = arr->count == 0;
-		ms_idx = rte_fbarray_find_next_n_free(arr, 0,
-				seg_len + (empty ? 0 : 1));

-		/* memseg list is full? */
-		if (ms_idx < 0)
-			continue;
+		/* find start of the biggest contiguous block and its size */
+		ms_idx = rte_fbarray_find_biggest_free(arr, 0);
+		free_len = rte_fbarray_find_contig_free(arr, ms_idx);

  		/* leave some space between memsegs, they are not IOVA
  		 * contiguous, so they shouldn't be VA contiguous either.
  		 */
-		if (!empty)
+		if (!empty) {
  			ms_idx++;
+			free_len--;
+		}
+
+		/* memseg list is full? */
+		if (free_len < 1)
+			continue;
+
+		/* we might not get all of the space we wanted */
+		free_len = RTE_MIN(seg_len, free_len);
+		seg_end = seg_start + free_len;
+		seg_len = seg_end - seg_start;
  		break;
  	}
  	if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
@@ -787,7 +797,7 @@ remap_segment(struct hugepage_file *hugepages, int 
seg_start, int seg_end)
  	}
  	RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
  			(seg_len * page_sz) >> 20, socket_id);
-	return 0;
+	return seg_len;
  }

  static uint64_t
@@ -1022,10 +1032,17 @@ remap_needed_hugepages(struct hugepage_file 
*hugepages, int n_pages)
  		if (new_memseg) {
  			/* if this isn't the first time, remap segment */
  			if (cur_page != 0) {
-				ret = remap_segment(hugepages, seg_start_page,
-						cur_page);
-				if (ret != 0)
-					return -1;
+				int n_remapped = 0;
+				int n_needed = cur_page - seg_start_page;
+
+				while (n_remapped < n_needed) {
+					ret = remap_segment(hugepages,
+							seg_start_page,
+							cur_page);
+					if (ret < 0)
+						return -1;
+					n_remapped += ret;
+				}
  			}
  			/* remember where we started */
  			seg_start_page = cur_page;
@@ -1034,10 +1051,16 @@ remap_needed_hugepages(struct hugepage_file 
*hugepages, int n_pages)
  	}
  	/* we were stopped, but we didn't remap the last segment, do it now */
  	if (cur_page != 0) {
-		ret = remap_segment(hugepages, seg_start_page,
-				cur_page);
-		if (ret != 0)
-			return -1;
+		int n_remapped = 0;
+		int n_needed = cur_page - seg_start_page;
+
+		while (n_remapped < n_needed) {
+			ret = remap_segment(
+					hugepages, seg_start_page, cur_page);
+			if (ret < 0)
+				return -1;
+			n_remapped += ret;
+		}
  	}
  	return 0;
  }

---

This should do the trick? Also, this probably needs to be duplicated for 
Windows and FreeBSD init as well, because AFAIK they follow the legacy 
mem init logic.
  
Fengnan Chang May 25, 2023, 12:49 p.m. UTC | #2
Burakov, Anatoly <anatoly.burakov@intel.com> 于2023年5月22日周一 21:28写道:
>
> On 5/22/2023 1:41 PM, Fengnan Chang wrote:
> > Under legacy mode, if the number of continuous memsegs greater
> > than RTE_MAX_MEMSEG_PER_LIST, eal init will failed even though
> > another memseg list is empty, because only one memseg list used
> > to check in remap_needed_hugepages.
> > Fix this by add a argment indicate how many pages mapped in
> > remap_segment, remap_segment try to mapped most pages it can, if
> > exceed it's capbility, remap_needed_hugepages will continue to
> > map other left pages.
> >
> > For example:
> > hugepage configure:
> > cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages
> > 10241
> > 10239
> >
> > startup log:
> > EAL: Detected memory type: socket_id:0 hugepage_sz:2097152
> > EAL: Detected memory type: socket_id:1 hugepage_sz:2097152
> > EAL: Creating 4 segment lists: n_segs:8192 socket_id:0 hugepage_sz:2097152
> > EAL: Creating 4 segment lists: n_segs:8192 socket_id:1 hugepage_sz:2097152
> > EAL: Requesting 13370 pages of size 2MB from socket 0
> > EAL: Requesting 7110 pages of size 2MB from socket 1
> > EAL: Attempting to map 14220M on socket 1
> > EAL: Allocated 14220M on socket 1
> > EAL: Attempting to map 26740M on socket 0
> > EAL: Could not find space for memseg. Please increase 32768 and/or 65536 in
> > configuration.
> > EAL: Couldn't remap hugepage files into memseg lists
> > EAL: FATAL: Cannot init memory
> > EAL: Cannot init memory
> >
> > Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> > Signed-off-by: Lin Li <lilintjpu@bytedance.com>
> > ---
> >   lib/eal/linux/eal_memory.c | 33 +++++++++++++++++++++------------
> >   1 file changed, 21 insertions(+), 12 deletions(-)
> >
> > diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
> > index 60fc8cc6ca..b2e6453fbe 100644
> > --- a/lib/eal/linux/eal_memory.c
> > +++ b/lib/eal/linux/eal_memory.c
> > @@ -657,12 +657,12 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
> >   }
> >
> >   static int
> > -remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
> > +remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end, int *mapped_seg_len)
> >   {
> >       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> >       struct rte_memseg_list *msl;
> >       struct rte_fbarray *arr;
> > -     int cur_page, seg_len;
> > +     int cur_page, seg_len, cur_len;
> >       unsigned int msl_idx;
> >       int ms_idx;
> >       uint64_t page_sz;
> > @@ -692,8 +692,9 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
> >
> >               /* leave space for a hole if array is not empty */
> >               empty = arr->count == 0;
> > +             cur_len = RTE_MIN((unsigned int)seg_len, arr->len - arr->count - (empty ? 0 : 1));
> >               ms_idx = rte_fbarray_find_next_n_free(arr, 0,
> > -                             seg_len + (empty ? 0 : 1));
> > +                             cur_len + (empty ? 0 : 1));
> >
> >               /* memseg list is full? */
> >               if (ms_idx < 0)
> > @@ -704,12 +705,12 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
> >                */
> >               if (!empty)
> >                       ms_idx++;
> > +             *mapped_seg_len = cur_len;
> >               break;
> >       }
> >       if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
> > -             RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
> > -                             RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
> > -                             RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
> > +             RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase RTE_MAX_MEMSEG_PER_LIST "
> > +                             "RTE_MAX_MEMSEG_PER_TYPE and/or RTE_MAX_MEM_MB_PER_TYPE in configuration.\n");
> >               return -1;
> >       }
> >
> > @@ -725,6 +726,8 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
> >               void *addr;
> >               int fd;
> >
> > +             if (cur_page - seg_start == *mapped_seg_len)
> > +                     break;
> >               fd = open(hfile->filepath, O_RDWR);
> >               if (fd < 0) {
> >                       RTE_LOG(ERR, EAL, "Could not open '%s': %s\n",
> > @@ -986,7 +989,7 @@ prealloc_segments(struct hugepage_file *hugepages, int n_pages)
> >   static int
> >   remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
> >   {
> > -     int cur_page, seg_start_page, new_memseg, ret;
> > +     int cur_page, seg_start_page, new_memseg, ret, mapped_seg_len = 0;
> >
> >       seg_start_page = 0;
> >       for (cur_page = 0; cur_page < n_pages; cur_page++) {
> > @@ -1023,21 +1026,27 @@ remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
> >                       /* if this isn't the first time, remap segment */
> >                       if (cur_page != 0) {
> >                               ret = remap_segment(hugepages, seg_start_page,
> > -                                             cur_page);
> > +                                             cur_page, &mapped_seg_len);
> >                               if (ret != 0)
> >                                       return -1;
> >                       }
> > +                     cur_page = seg_start_page + mapped_seg_len;
> >                       /* remember where we started */
> >                       seg_start_page = cur_page;
> > +                     mapped_seg_len = 0;
> >               }
> >               /* continuation of previous memseg */
> >       }
> >       /* we were stopped, but we didn't remap the last segment, do it now */
> >       if (cur_page != 0) {
> > -             ret = remap_segment(hugepages, seg_start_page,
> > -                             cur_page);
> > -             if (ret != 0)
> > -                     return -1;
> > +             while (seg_start_page < n_pages) {
> > +                     ret = remap_segment(hugepages, seg_start_page,
> > +                                     cur_page, &mapped_seg_len);
> > +                     if (ret != 0)
> > +                             return -1;
> > +                     seg_start_page = seg_start_page + mapped_seg_len;
> > +                     mapped_seg_len = 0;
> > +             }
> >       }
> >       return 0;
> >   }
>
> This works, but I feel like it's overcomplicated - the same logic you're
> trying to use can just be implemented using `find_biggest_free()` +
> `find_contig_free()` and returning `seg_len` from `remap_segment()`?
>
> Something like the following:
>
> ---
>
> diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
> index 60fc8cc6ca..08acc787fc 100644
> --- a/lib/eal/linux/eal_memory.c
> +++ b/lib/eal/linux/eal_memory.c
> @@ -681,6 +681,7 @@ remap_segment(struct hugepage_file *hugepages, int
> seg_start, int seg_end)
>
>         /* find free space in memseg lists */
>         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
> +               int free_len;
>                 bool empty;
>                 msl = &mcfg->memsegs[msl_idx];
>                 arr = &msl->memseg_arr;
> @@ -692,18 +693,27 @@ remap_segment(struct hugepage_file *hugepages, int
> seg_start, int seg_end)
>
>                 /* leave space for a hole if array is not empty */
>                 empty = arr->count == 0;
> -               ms_idx = rte_fbarray_find_next_n_free(arr, 0,
> -                               seg_len + (empty ? 0 : 1));
>
> -               /* memseg list is full? */
> -               if (ms_idx < 0)
> -                       continue;
> +               /* find start of the biggest contiguous block and its size */
> +               ms_idx = rte_fbarray_find_biggest_free(arr, 0);
> +               free_len = rte_fbarray_find_contig_free(arr, ms_idx);
>
>                 /* leave some space between memsegs, they are not IOVA
>                  * contiguous, so they shouldn't be VA contiguous either.
>                  */
> -               if (!empty)
> +               if (!empty) {
>                         ms_idx++;
> +                       free_len--;
> +               }
> +
> +               /* memseg list is full? */
> +               if (free_len < 1)
> +                       continue;
> +
> +               /* we might not get all of the space we wanted */
> +               free_len = RTE_MIN(seg_len, free_len);
> +               seg_end = seg_start + free_len;
> +               seg_len = seg_end - seg_start;
>                 break;
>         }
>         if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
> @@ -787,7 +797,7 @@ remap_segment(struct hugepage_file *hugepages, int
> seg_start, int seg_end)
>         }
>         RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
>                         (seg_len * page_sz) >> 20, socket_id);
> -       return 0;
> +       return seg_len;
>   }
>
>   static uint64_t
> @@ -1022,10 +1032,17 @@ remap_needed_hugepages(struct hugepage_file
> *hugepages, int n_pages)
>                 if (new_memseg) {
>                         /* if this isn't the first time, remap segment */
>                         if (cur_page != 0) {
> -                               ret = remap_segment(hugepages, seg_start_page,
> -                                               cur_page);
> -                               if (ret != 0)
> -                                       return -1;
> +                               int n_remapped = 0;
> +                               int n_needed = cur_page - seg_start_page;
> +
> +                               while (n_remapped < n_needed) {
> +                                       ret = remap_segment(hugepages,
> +                                                       seg_start_page,
> +                                                       cur_page);
> +                                       if (ret < 0)
> +                                               return -1;
> +                                       n_remapped += ret;
> +                               }
>                         }
>                         /* remember where we started */
>                         seg_start_page = cur_page;
> @@ -1034,10 +1051,16 @@ remap_needed_hugepages(struct hugepage_file
> *hugepages, int n_pages)
>         }
>         /* we were stopped, but we didn't remap the last segment, do it now */
>         if (cur_page != 0) {
> -               ret = remap_segment(hugepages, seg_start_page,
> -                               cur_page);
> -               if (ret != 0)
> -                       return -1;
> +               int n_remapped = 0;
> +               int n_needed = cur_page - seg_start_page;
> +
> +               while (n_remapped < n_needed) {
> +                       ret = remap_segment(
> +                                       hugepages, seg_start_page, cur_page);
> +                       if (ret < 0)
> +                               return -1;
> +                       n_remapped += ret;
> +               }
>         }
>         return 0;
>   }
>
> ---
>
> This should do the trick? Also, this probably needs to be duplicated for
> Windows and FreeBSD init as well, because AFAIK they follow the legacy
> mem init logic.
I take a simple look at FreeBSD and Windows code, FreeBSD don't have this
problem,  FreeBSD map hugepage is one by one, not mapp multiple at once
and windows call eal_dynmem_hugepage_init when have hugepage.
So It seems just modify linux is ok.

>
> --
> Thanks,
> Anatoly
>
  
Anatoly Burakov May 25, 2023, 2:26 p.m. UTC | #3
On 5/25/2023 1:49 PM, Fengnan Chang wrote:
> Burakov, Anatoly <anatoly.burakov@intel.com> 于2023年5月22日周一 21:28写道:
>>
>> On 5/22/2023 1:41 PM, Fengnan Chang wrote:
>>> Under legacy mode, if the number of continuous memsegs greater
>>> than RTE_MAX_MEMSEG_PER_LIST, eal init will failed even though
>>> another memseg list is empty, because only one memseg list used
>>> to check in remap_needed_hugepages.
>>> Fix this by add a argment indicate how many pages mapped in
>>> remap_segment, remap_segment try to mapped most pages it can, if
>>> exceed it's capbility, remap_needed_hugepages will continue to
>>> map other left pages.
>>>
>>> For example:
>>> hugepage configure:
>>> cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages
>>> 10241
>>> 10239
>>>
>>> startup log:
>>> EAL: Detected memory type: socket_id:0 hugepage_sz:2097152
>>> EAL: Detected memory type: socket_id:1 hugepage_sz:2097152
>>> EAL: Creating 4 segment lists: n_segs:8192 socket_id:0 hugepage_sz:2097152
>>> EAL: Creating 4 segment lists: n_segs:8192 socket_id:1 hugepage_sz:2097152
>>> EAL: Requesting 13370 pages of size 2MB from socket 0
>>> EAL: Requesting 7110 pages of size 2MB from socket 1
>>> EAL: Attempting to map 14220M on socket 1
>>> EAL: Allocated 14220M on socket 1
>>> EAL: Attempting to map 26740M on socket 0
>>> EAL: Could not find space for memseg. Please increase 32768 and/or 65536 in
>>> configuration.
>>> EAL: Couldn't remap hugepage files into memseg lists
>>> EAL: FATAL: Cannot init memory
>>> EAL: Cannot init memory
>>>
>>> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
>>> Signed-off-by: Lin Li <lilintjpu@bytedance.com>
>>> ---
>>>    lib/eal/linux/eal_memory.c | 33 +++++++++++++++++++++------------
>>>    1 file changed, 21 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
>>> index 60fc8cc6ca..b2e6453fbe 100644
>>> --- a/lib/eal/linux/eal_memory.c
>>> +++ b/lib/eal/linux/eal_memory.c
>>> @@ -657,12 +657,12 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
>>>    }
>>>
>>>    static int
>>> -remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>>> +remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end, int *mapped_seg_len)
>>>    {
>>>        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
>>>        struct rte_memseg_list *msl;
>>>        struct rte_fbarray *arr;
>>> -     int cur_page, seg_len;
>>> +     int cur_page, seg_len, cur_len;
>>>        unsigned int msl_idx;
>>>        int ms_idx;
>>>        uint64_t page_sz;
>>> @@ -692,8 +692,9 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>>>
>>>                /* leave space for a hole if array is not empty */
>>>                empty = arr->count == 0;
>>> +             cur_len = RTE_MIN((unsigned int)seg_len, arr->len - arr->count - (empty ? 0 : 1));
>>>                ms_idx = rte_fbarray_find_next_n_free(arr, 0,
>>> -                             seg_len + (empty ? 0 : 1));
>>> +                             cur_len + (empty ? 0 : 1));
>>>
>>>                /* memseg list is full? */
>>>                if (ms_idx < 0)
>>> @@ -704,12 +705,12 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>>>                 */
>>>                if (!empty)
>>>                        ms_idx++;
>>> +             *mapped_seg_len = cur_len;
>>>                break;
>>>        }
>>>        if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
>>> -             RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
>>> -                             RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
>>> -                             RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
>>> +             RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase RTE_MAX_MEMSEG_PER_LIST "
>>> +                             "RTE_MAX_MEMSEG_PER_TYPE and/or RTE_MAX_MEM_MB_PER_TYPE in configuration.\n");
>>>                return -1;
>>>        }
>>>
>>> @@ -725,6 +726,8 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
>>>                void *addr;
>>>                int fd;
>>>
>>> +             if (cur_page - seg_start == *mapped_seg_len)
>>> +                     break;
>>>                fd = open(hfile->filepath, O_RDWR);
>>>                if (fd < 0) {
>>>                        RTE_LOG(ERR, EAL, "Could not open '%s': %s\n",
>>> @@ -986,7 +989,7 @@ prealloc_segments(struct hugepage_file *hugepages, int n_pages)
>>>    static int
>>>    remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
>>>    {
>>> -     int cur_page, seg_start_page, new_memseg, ret;
>>> +     int cur_page, seg_start_page, new_memseg, ret, mapped_seg_len = 0;
>>>
>>>        seg_start_page = 0;
>>>        for (cur_page = 0; cur_page < n_pages; cur_page++) {
>>> @@ -1023,21 +1026,27 @@ remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
>>>                        /* if this isn't the first time, remap segment */
>>>                        if (cur_page != 0) {
>>>                                ret = remap_segment(hugepages, seg_start_page,
>>> -                                             cur_page);
>>> +                                             cur_page, &mapped_seg_len);
>>>                                if (ret != 0)
>>>                                        return -1;
>>>                        }
>>> +                     cur_page = seg_start_page + mapped_seg_len;
>>>                        /* remember where we started */
>>>                        seg_start_page = cur_page;
>>> +                     mapped_seg_len = 0;
>>>                }
>>>                /* continuation of previous memseg */
>>>        }
>>>        /* we were stopped, but we didn't remap the last segment, do it now */
>>>        if (cur_page != 0) {
>>> -             ret = remap_segment(hugepages, seg_start_page,
>>> -                             cur_page);
>>> -             if (ret != 0)
>>> -                     return -1;
>>> +             while (seg_start_page < n_pages) {
>>> +                     ret = remap_segment(hugepages, seg_start_page,
>>> +                                     cur_page, &mapped_seg_len);
>>> +                     if (ret != 0)
>>> +                             return -1;
>>> +                     seg_start_page = seg_start_page + mapped_seg_len;
>>> +                     mapped_seg_len = 0;
>>> +             }
>>>        }
>>>        return 0;
>>>    }
>>
>> This works, but I feel like it's overcomplicated - the same logic you're
>> trying to use can just be implemented using `find_biggest_free()` +
>> `find_contig_free()` and returning `seg_len` from `remap_segment()`?
>>
>> Something like the following:
>>
>> ---
>>
>> diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
>> index 60fc8cc6ca..08acc787fc 100644
>> --- a/lib/eal/linux/eal_memory.c
>> +++ b/lib/eal/linux/eal_memory.c
>> @@ -681,6 +681,7 @@ remap_segment(struct hugepage_file *hugepages, int
>> seg_start, int seg_end)
>>
>>          /* find free space in memseg lists */
>>          for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
>> +               int free_len;
>>                  bool empty;
>>                  msl = &mcfg->memsegs[msl_idx];
>>                  arr = &msl->memseg_arr;
>> @@ -692,18 +693,27 @@ remap_segment(struct hugepage_file *hugepages, int
>> seg_start, int seg_end)
>>
>>                  /* leave space for a hole if array is not empty */
>>                  empty = arr->count == 0;
>> -               ms_idx = rte_fbarray_find_next_n_free(arr, 0,
>> -                               seg_len + (empty ? 0 : 1));
>>
>> -               /* memseg list is full? */
>> -               if (ms_idx < 0)
>> -                       continue;
>> +               /* find start of the biggest contiguous block and its size */
>> +               ms_idx = rte_fbarray_find_biggest_free(arr, 0);
>> +               free_len = rte_fbarray_find_contig_free(arr, ms_idx);
>>
>>                  /* leave some space between memsegs, they are not IOVA
>>                   * contiguous, so they shouldn't be VA contiguous either.
>>                   */
>> -               if (!empty)
>> +               if (!empty) {
>>                          ms_idx++;
>> +                       free_len--;
>> +               }
>> +
>> +               /* memseg list is full? */
>> +               if (free_len < 1)
>> +                       continue;
>> +
>> +               /* we might not get all of the space we wanted */
>> +               free_len = RTE_MIN(seg_len, free_len);
>> +               seg_end = seg_start + free_len;
>> +               seg_len = seg_end - seg_start;
>>                  break;
>>          }
>>          if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
>> @@ -787,7 +797,7 @@ remap_segment(struct hugepage_file *hugepages, int
>> seg_start, int seg_end)
>>          }
>>          RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
>>                          (seg_len * page_sz) >> 20, socket_id);
>> -       return 0;
>> +       return seg_len;
>>    }
>>
>>    static uint64_t
>> @@ -1022,10 +1032,17 @@ remap_needed_hugepages(struct hugepage_file
>> *hugepages, int n_pages)
>>                  if (new_memseg) {
>>                          /* if this isn't the first time, remap segment */
>>                          if (cur_page != 0) {
>> -                               ret = remap_segment(hugepages, seg_start_page,
>> -                                               cur_page);
>> -                               if (ret != 0)
>> -                                       return -1;
>> +                               int n_remapped = 0;
>> +                               int n_needed = cur_page - seg_start_page;
>> +
>> +                               while (n_remapped < n_needed) {
>> +                                       ret = remap_segment(hugepages,
>> +                                                       seg_start_page,
>> +                                                       cur_page);
>> +                                       if (ret < 0)
>> +                                               return -1;
>> +                                       n_remapped += ret;
>> +                               }
>>                          }
>>                          /* remember where we started */
>>                          seg_start_page = cur_page;
>> @@ -1034,10 +1051,16 @@ remap_needed_hugepages(struct hugepage_file
>> *hugepages, int n_pages)
>>          }
>>          /* we were stopped, but we didn't remap the last segment, do it now */
>>          if (cur_page != 0) {
>> -               ret = remap_segment(hugepages, seg_start_page,
>> -                               cur_page);
>> -               if (ret != 0)
>> -                       return -1;
>> +               int n_remapped = 0;
>> +               int n_needed = cur_page - seg_start_page;
>> +
>> +               while (n_remapped < n_needed) {
>> +                       ret = remap_segment(
>> +                                       hugepages, seg_start_page, cur_page);
>> +                       if (ret < 0)
>> +                               return -1;
>> +                       n_remapped += ret;
>> +               }
>>          }
>>          return 0;
>>    }
>>
>> ---
>>
>> This should do the trick? Also, this probably needs to be duplicated for
>> Windows and FreeBSD init as well, because AFAIK they follow the legacy
>> mem init logic.
> I take a simple look at FreeBSD and Windows code, FreeBSD don't have this
> problem,  FreeBSD map hugepage is one by one, not mapp multiple at once
> and windows call eal_dynmem_hugepage_init when have hugepage.
> So It seems just modify linux is ok.

Right, OK then.
  

Patch

diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c
index 60fc8cc6ca..b2e6453fbe 100644
--- a/lib/eal/linux/eal_memory.c
+++ b/lib/eal/linux/eal_memory.c
@@ -657,12 +657,12 @@  unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
 }
 
 static int
-remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
+remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end, int *mapped_seg_len)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	struct rte_memseg_list *msl;
 	struct rte_fbarray *arr;
-	int cur_page, seg_len;
+	int cur_page, seg_len, cur_len;
 	unsigned int msl_idx;
 	int ms_idx;
 	uint64_t page_sz;
@@ -692,8 +692,9 @@  remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
 
 		/* leave space for a hole if array is not empty */
 		empty = arr->count == 0;
+		cur_len = RTE_MIN((unsigned int)seg_len, arr->len - arr->count - (empty ? 0 : 1));
 		ms_idx = rte_fbarray_find_next_n_free(arr, 0,
-				seg_len + (empty ? 0 : 1));
+				cur_len + (empty ? 0 : 1));
 
 		/* memseg list is full? */
 		if (ms_idx < 0)
@@ -704,12 +705,12 @@  remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
 		 */
 		if (!empty)
 			ms_idx++;
+		*mapped_seg_len = cur_len;
 		break;
 	}
 	if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
-		RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
-				RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
-				RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
+		RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase RTE_MAX_MEMSEG_PER_LIST "
+				"RTE_MAX_MEMSEG_PER_TYPE and/or RTE_MAX_MEM_MB_PER_TYPE in configuration.\n");
 		return -1;
 	}
 
@@ -725,6 +726,8 @@  remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
 		void *addr;
 		int fd;
 
+		if (cur_page - seg_start == *mapped_seg_len)
+			break;
 		fd = open(hfile->filepath, O_RDWR);
 		if (fd < 0) {
 			RTE_LOG(ERR, EAL, "Could not open '%s': %s\n",
@@ -986,7 +989,7 @@  prealloc_segments(struct hugepage_file *hugepages, int n_pages)
 static int
 remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
 {
-	int cur_page, seg_start_page, new_memseg, ret;
+	int cur_page, seg_start_page, new_memseg, ret, mapped_seg_len = 0;
 
 	seg_start_page = 0;
 	for (cur_page = 0; cur_page < n_pages; cur_page++) {
@@ -1023,21 +1026,27 @@  remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
 			/* if this isn't the first time, remap segment */
 			if (cur_page != 0) {
 				ret = remap_segment(hugepages, seg_start_page,
-						cur_page);
+						cur_page, &mapped_seg_len);
 				if (ret != 0)
 					return -1;
 			}
+			cur_page = seg_start_page + mapped_seg_len;
 			/* remember where we started */
 			seg_start_page = cur_page;
+			mapped_seg_len = 0;
 		}
 		/* continuation of previous memseg */
 	}
 	/* we were stopped, but we didn't remap the last segment, do it now */
 	if (cur_page != 0) {
-		ret = remap_segment(hugepages, seg_start_page,
-				cur_page);
-		if (ret != 0)
-			return -1;
+		while (seg_start_page < n_pages) {
+			ret = remap_segment(hugepages, seg_start_page,
+					cur_page, &mapped_seg_len);
+			if (ret != 0)
+				return -1;
+			seg_start_page = seg_start_page + mapped_seg_len;
+			mapped_seg_len = 0;
+		}
 	}
 	return 0;
 }