eal: fix modify data area after memset

Message ID 20230912090415.48709-1-changfengnan@bytedance.com (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers
Series eal: fix modify data area after memset |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS

Commit Message

Fengnan Chang Sept. 12, 2023, 9:04 a.m. UTC
  Let's look at this path:
malloc_elem_free
   ->malloc_elem_join_adjacent_free
      ->join_elem(elem, elem->next)

0. cur elem's pad > 0
1. data area memset in malloc_elem_free first.
2. next elem is free, try to join cur elem and next.
3. in join_elem, try to modify inner->size, this address had
memset in step 1, it casue the content of addrees become non-zero.

If user call rte_zmalloc, and pick this elem, it can't get all
zero'd memory.

Fixes: 2808a12cc053 (malloc: fix memory element size in case of padding)
Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
 lib/eal/common/malloc_elem.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
  

Comments

Fengnan Chang Sept. 22, 2023, 8:12 a.m. UTC | #1
ping

Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:
>
> Let's look at this path:
> malloc_elem_free
>    ->malloc_elem_join_adjacent_free
>       ->join_elem(elem, elem->next)
>
> 0. cur elem's pad > 0
> 1. data area memset in malloc_elem_free first.
> 2. next elem is free, try to join cur elem and next.
> 3. in join_elem, try to modify inner->size, this address had
> memset in step 1, it casue the content of addrees become non-zero.
>
> If user call rte_zmalloc, and pick this elem, it can't get all
> zero'd memory.
>
> Fixes: 2808a12cc053 (malloc: fix memory element size in case of padding)
> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> ---
>  lib/eal/common/malloc_elem.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/lib/eal/common/malloc_elem.c b/lib/eal/common/malloc_elem.c
> index 619c040aa3..93a23fa8d4 100644
> --- a/lib/eal/common/malloc_elem.c
> +++ b/lib/eal/common/malloc_elem.c
> @@ -492,7 +492,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
>   * be contiguous in memory.
>   */
>  static inline void
> -join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
> +join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2, bool update_inner)
>  {
>         struct malloc_elem *next = elem2->next;
>         elem1->size += elem2->size;
> @@ -502,7 +502,7 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
>                 elem1->heap->last = elem1;
>         elem1->next = next;
>         elem1->dirty |= elem2->dirty;
> -       if (elem1->pad) {
> +       if (elem1->pad && update_inner) {
>                 struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
>                 inner->size = elem1->size - elem1->pad;
>         }
> @@ -526,7 +526,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
>
>                 /* remove from free list, join to this one */
>                 malloc_elem_free_list_remove(elem->next);
> -               join_elem(elem, elem->next);
> +               join_elem(elem, elem->next, false);
>
>                 /* erase header, trailer and pad */
>                 memset(erase, MALLOC_POISON, erase_len);
> @@ -550,7 +550,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
>                 malloc_elem_free_list_remove(elem->prev);
>
>                 new_elem = elem->prev;
> -               join_elem(new_elem, elem);
> +               join_elem(new_elem, elem, false);
>
>                 /* erase header, trailer and pad */
>                 memset(erase, MALLOC_POISON, erase_len);
> @@ -683,7 +683,7 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
>          * join the two
>          */
>         malloc_elem_free_list_remove(elem->next);
> -       join_elem(elem, elem->next);
> +       join_elem(elem, elem->next, true);
>
>         if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
>                 /* now we have a big block together. Lets cut it down a bit, by splitting */
> --
> 2.20.1
>
  
Thomas Monjalon Oct. 17, 2023, 1:32 p.m. UTC | #2
We need a careful review here, please.


12/09/2023 11:04, Fengnan Chang:
> Let's look at this path:
> malloc_elem_free
>    ->malloc_elem_join_adjacent_free
>       ->join_elem(elem, elem->next)
> 
> 0. cur elem's pad > 0
> 1. data area memset in malloc_elem_free first.
> 2. next elem is free, try to join cur elem and next.
> 3. in join_elem, try to modify inner->size, this address had
> memset in step 1, it casue the content of addrees become non-zero.
> 
> If user call rte_zmalloc, and pick this elem, it can't get all
> zero'd memory.
> 
> Fixes: 2808a12cc053 (malloc: fix memory element size in case of padding)
> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> ---
>  lib/eal/common/malloc_elem.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/eal/common/malloc_elem.c b/lib/eal/common/malloc_elem.c
> index 619c040aa3..93a23fa8d4 100644
> --- a/lib/eal/common/malloc_elem.c
> +++ b/lib/eal/common/malloc_elem.c
> @@ -492,7 +492,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
>   * be contiguous in memory.
>   */
>  static inline void
> -join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
> +join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2, bool update_inner)
>  {
>  	struct malloc_elem *next = elem2->next;
>  	elem1->size += elem2->size;
> @@ -502,7 +502,7 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
>  		elem1->heap->last = elem1;
>  	elem1->next = next;
>  	elem1->dirty |= elem2->dirty;
> -	if (elem1->pad) {
> +	if (elem1->pad && update_inner) {
>  		struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
>  		inner->size = elem1->size - elem1->pad;
>  	}
> @@ -526,7 +526,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
>  
>  		/* remove from free list, join to this one */
>  		malloc_elem_free_list_remove(elem->next);
> -		join_elem(elem, elem->next);
> +		join_elem(elem, elem->next, false);
>  
>  		/* erase header, trailer and pad */
>  		memset(erase, MALLOC_POISON, erase_len);
> @@ -550,7 +550,7 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
>  		malloc_elem_free_list_remove(elem->prev);
>  
>  		new_elem = elem->prev;
> -		join_elem(new_elem, elem);
> +		join_elem(new_elem, elem, false);
>  
>  		/* erase header, trailer and pad */
>  		memset(erase, MALLOC_POISON, erase_len);
> @@ -683,7 +683,7 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
>  	 * join the two
>  	 */
>  	malloc_elem_free_list_remove(elem->next);
> -	join_elem(elem, elem->next);
> +	join_elem(elem, elem->next, true);
>  
>  	if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
>  		/* now we have a big block together. Lets cut it down a bit, by splitting */
>
  
Dmitry Kozlyuk Oct. 22, 2023, 8:22 p.m. UTC | #3
2023-09-22 16:12 (UTC+0800), Fengnan Chang:
> ping
> 
> Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:
> >
> > Let's look at this path:
> > malloc_elem_free  
> >    ->malloc_elem_join_adjacent_free
> >       ->join_elem(elem, elem->next)  
> >
> > 0. cur elem's pad > 0
> > 1. data area memset in malloc_elem_free first.
> > 2. next elem is free, try to join cur elem and next.
> > 3. in join_elem, try to modify inner->size, this address had
> > memset in step 1, it casue the content of addrees become non-zero.
> >
> > If user call rte_zmalloc, and pick this elem, it can't get all
> > zero'd memory.

malloc_elem_join_adjacent_free() always calls memset() after join_elem(),
for the next and the previous element respectively.
How to reproduce this bug?
  
Fengnan Chang Oct. 23, 2023, 9:07 a.m. UTC | #4
Dmitry Kozlyuk <dmitry.kozliuk@gmail.com> 于2023年10月23日周一 04:22写道:
>
> 2023-09-22 16:12 (UTC+0800), Fengnan Chang:
> > ping
> >
> > Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:
> > >
> > > Let's look at this path:
> > > malloc_elem_free
> > >    ->malloc_elem_join_adjacent_free
> > >       ->join_elem(elem, elem->next)
> > >
> > > 0. cur elem's pad > 0
> > > 1. data area memset in malloc_elem_free first.
> > > 2. next elem is free, try to join cur elem and next.
> > > 3. in join_elem, try to modify inner->size, this address had
> > > memset in step 1, it casue the content of addrees become non-zero.
> > >
> > > If user call rte_zmalloc, and pick this elem, it can't get all
> > > zero'd memory.
>
> malloc_elem_join_adjacent_free() always calls memset() after join_elem(),
> for the next and the previous element respectively.
when try to call join_elem() for the next element in
malloc_elem_join_adjacent_free(),
the memset is try to memset *next* element, but join_elem() is update
*current* element's
content, which shoudn't happen, it's two different element.

> How to reproduce this bug?
when I test this patch,
https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/
I have a case try to alloc 64/128/192 size object and free with 16 threads,
after every
alloc I'll check wheather all content is 0 or not.
It's not easy to reproduce, you can have a try, it's easier to find
this problem in code level.
  
Stephen Hemminger Oct. 25, 2023, 4:03 p.m. UTC | #5
On Mon, 23 Oct 2023 17:07:21 +0800
Fengnan Chang <changfengnan@bytedance.com> wrote:

> Dmitry Kozlyuk <dmitry.kozliuk@gmail.com> 于2023年10月23日周一 04:22写道:
> >
> > 2023-09-22 16:12 (UTC+0800), Fengnan Chang:  
> > > ping
> > >
> > > Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:  
> > > >
> > > > Let's look at this path:
> > > > malloc_elem_free  
> > > >    ->malloc_elem_join_adjacent_free
> > > >       ->join_elem(elem, elem->next)  
> > > >
> > > > 0. cur elem's pad > 0
> > > > 1. data area memset in malloc_elem_free first.
> > > > 2. next elem is free, try to join cur elem and next.
> > > > 3. in join_elem, try to modify inner->size, this address had
> > > > memset in step 1, it casue the content of addrees become non-zero.
> > > >
> > > > If user call rte_zmalloc, and pick this elem, it can't get all
> > > > zero'd memory.  
> >
> > malloc_elem_join_adjacent_free() always calls memset() after join_elem(),
> > for the next and the previous element respectively.  
> when try to call join_elem() for the next element in
> malloc_elem_join_adjacent_free(),
> the memset is try to memset *next* element, but join_elem() is update
> *current* element's
> content, which shoudn't happen, it's two different element.
> 
> > How to reproduce this bug?  
> when I test this patch,
> https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/
> I have a case try to alloc 64/128/192 size object and free with 16 threads,
> after every
> alloc I'll check wheather all content is 0 or not.
> It's not easy to reproduce, you can have a try, it's easier to find
> this problem in code level.

I tried to make a test that would reproduce the problem but it did not.

diff --git a/app/test/test_malloc.c b/app/test/test_malloc.c
index cd579c503cf5..cfd45d6a28eb 100644
--- a/app/test/test_malloc.c
+++ b/app/test/test_malloc.c
@@ -28,6 +28,7 @@
 #include <rte_string_fns.h>

 #define N 10000
+#define BINS 100

 static int
 is_mem_on_socket(int32_t socket);
@@ -69,13 +70,24 @@ is_aligned(void *p, int align)
 	return 1;
 }

+static bool is_all_zero(uint8_t *mem, size_t sz)
+{
+	size_t i;
+
+	for (i = 0; i < sz; i++)
+		if (mem[i] != 0)
+			return false;
+
+	return true;
+}
+
 static int
 test_align_overlap_per_lcore(__rte_unused void *arg)
 {
 	const unsigned align1 = 8,
 			align2 = 64,
 			align3 = 2048;
-	unsigned i,j;
+	unsigned int i;
 	void *p1 = NULL, *p2 = NULL, *p3 = NULL;
 	int ret = 0;

@@ -86,11 +98,12 @@ test_align_overlap_per_lcore(__rte_unused void *arg)
 			ret = -1;
 			break;
 		}
-		for(j = 0; j < 1000 ; j++) {
-			if( *(char *)p1 != 0) {
-				printf("rte_zmalloc didn't zero the allocated memory\n");
-				ret = -1;
-			}
+
+		if (!is_all_zero(p1, 1000)) {
+			printf("rte_zmalloc didn't zero the allocated memory\n");
+			ret = -1;
+			rte_free(p1);
+			break;
 		}
 		p2 = rte_malloc("dummy", 1000, align2);
 		if (!p2){
@@ -140,6 +153,66 @@ test_align_overlap_per_lcore(__rte_unused void *arg)
 	return ret;
 }

+/*
+ * Allocate random size chunks and make sure that they are
+ * always zero.
+ */
+static int
+test_zmalloc(__rte_unused void *arg)
+{
+	unsigned int i, n;
+	void *slots[BINS] = { };
+			void *p1;
+	size_t sz;
+
+	/* Allocate many variable size chunks */
+	for (i = 0; i < BINS; i++) {
+		sz = rte_rand_max(1024) + 1;
+		p1 = rte_zmalloc("slots", sz, 0);
+		if (p1 == NULL) {
+			printf("rte_zmalloc(%zu) returned NULL (i=%u)\n", sz, i);
+			goto fail;
+		}
+		slots[i] = p1;
+		if (!is_all_zero(p1, sz))
+			goto fail;
+	}
+
+	/* Drop one chunk per iteration */
+	for (n = BINS; n > 0; n--) {
+		/* Swap in a new block into a slot */
+		for (i = 0; i < N; i++) {
+			unsigned int bin = rte_rand_max(n);
+
+			sz = rte_rand_max(1024) + 1;
+			p1 = rte_zmalloc("swap", sz, 0);
+			if (!p1){
+				printf("rte_zmalloc(%zu) returned NULL (i=%u)\n", sz, i);
+				goto fail;
+			}
+
+			if (!is_all_zero(p1, sz)) {
+				printf("rte_zmalloc didn't zero the allocated memory\n");
+				goto fail;
+			}
+
+			rte_free(slots[bin]);
+			slots[bin] = p1;
+		}
+
+		/* Drop last bin */
+		rte_free(slots[n]);
+		slots[n] = NULL;
+	}
+
+	return 0;
+fail:
+	for (i = 0; i < BINS; i++)
+		rte_free(slots[i]);
+
+	return -1;
+}
+
 static int
 test_reordered_free_per_lcore(__rte_unused void *arg)
 {
@@ -1020,6 +1091,21 @@ test_malloc(void)
 	}
 	else printf("test_realloc() passed\n");

+	/*----------------------------*/
+	RTE_LCORE_FOREACH_WORKER(lcore_id) {
+		rte_eal_remote_launch(test_zmalloc, NULL, lcore_id);
+	}
+
+	RTE_LCORE_FOREACH_WORKER(lcore_id) {
+		if (rte_eal_wait_lcore(lcore_id) < 0)
+			ret = -1;
+	}
+	if (ret < 0){
+		printf("test_zmalloc() failed\n");
+		return ret;
+	}
+	else printf("test_zmalloc() passed\n");
+
 	/*----------------------------*/
 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
 		rte_eal_remote_launch(test_align_overlap_per_lcore, NULL, lcore_id);
  
Fengnan Chang Oct. 30, 2023, 12:31 p.m. UTC | #6
Thanks for your response, It's my fault,  I got mixed up,
this problem only can reproduce after apply
https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/
,
I'll reorganize this to previous patch.
So sorry for waste your time.

Stephen Hemminger <stephen@networkplumber.org> 于2023年10月26日周四 00:04写道:

> On Mon, 23 Oct 2023 17:07:21 +0800
> Fengnan Chang <changfengnan@bytedance.com> wrote:
>
> > Dmitry Kozlyuk <dmitry.kozliuk@gmail.com> 于2023年10月23日周一 04:22写道:
> > >
> > > 2023-09-22 16:12 (UTC+0800), Fengnan Chang:
> > > > ping
> > > >
> > > > Fengnan Chang <changfengnan@bytedance.com> 于2023年9月12日周二 17:05写道:
> > > > >
> > > > > Let's look at this path:
> > > > > malloc_elem_free
> > > > >    ->malloc_elem_join_adjacent_free
> > > > >       ->join_elem(elem, elem->next)
> > > > >
> > > > > 0. cur elem's pad > 0
> > > > > 1. data area memset in malloc_elem_free first.
> > > > > 2. next elem is free, try to join cur elem and next.
> > > > > 3. in join_elem, try to modify inner->size, this address had
> > > > > memset in step 1, it casue the content of addrees become non-zero.
> > > > >
> > > > > If user call rte_zmalloc, and pick this elem, it can't get all
> > > > > zero'd memory.
> > >
> > > malloc_elem_join_adjacent_free() always calls memset() after
> join_elem(),
> > > for the next and the previous element respectively.
> > when try to call join_elem() for the next element in
> > malloc_elem_join_adjacent_free(),
> > the memset is try to memset *next* element, but join_elem() is update
> > *current* element's
> > content, which shoudn't happen, it's two different element.
> >
> > > How to reproduce this bug?
> > when I test this patch,
> >
> https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/
> > I have a case try to alloc 64/128/192 size object and free with 16
> threads,
> > after every
> > alloc I'll check wheather all content is 0 or not.
> > It's not easy to reproduce, you can have a try, it's easier to find
> > this problem in code level.
>
> I tried to make a test that would reproduce the problem but it did not.
>
> diff --git a/app/test/test_malloc.c b/app/test/test_malloc.c
> index cd579c503cf5..cfd45d6a28eb 100644
> --- a/app/test/test_malloc.c
> +++ b/app/test/test_malloc.c
> @@ -28,6 +28,7 @@
>  #include <rte_string_fns.h>
>
>  #define N 10000
> +#define BINS 100
>
>  static int
>  is_mem_on_socket(int32_t socket);
> @@ -69,13 +70,24 @@ is_aligned(void *p, int align)
>         return 1;
>  }
>
> +static bool is_all_zero(uint8_t *mem, size_t sz)
> +{
> +       size_t i;
> +
> +       for (i = 0; i < sz; i++)
> +               if (mem[i] != 0)
> +                       return false;
> +
> +       return true;
> +}
> +
>  static int
>  test_align_overlap_per_lcore(__rte_unused void *arg)
>  {
>         const unsigned align1 = 8,
>                         align2 = 64,
>                         align3 = 2048;
> -       unsigned i,j;
> +       unsigned int i;
>         void *p1 = NULL, *p2 = NULL, *p3 = NULL;
>         int ret = 0;
>
> @@ -86,11 +98,12 @@ test_align_overlap_per_lcore(__rte_unused void *arg)
>                         ret = -1;
>                         break;
>                 }
> -               for(j = 0; j < 1000 ; j++) {
> -                       if( *(char *)p1 != 0) {
> -                               printf("rte_zmalloc didn't zero the
> allocated memory\n");
> -                               ret = -1;
> -                       }
> +
> +               if (!is_all_zero(p1, 1000)) {
> +                       printf("rte_zmalloc didn't zero the allocated
> memory\n");
> +                       ret = -1;
> +                       rte_free(p1);
> +                       break;
>                 }
>                 p2 = rte_malloc("dummy", 1000, align2);
>                 if (!p2){
> @@ -140,6 +153,66 @@ test_align_overlap_per_lcore(__rte_unused void *arg)
>         return ret;
>  }
>
> +/*
> + * Allocate random size chunks and make sure that they are
> + * always zero.
> + */
> +static int
> +test_zmalloc(__rte_unused void *arg)
> +{
> +       unsigned int i, n;
> +       void *slots[BINS] = { };
> +                       void *p1;
> +       size_t sz;
> +
> +       /* Allocate many variable size chunks */
> +       for (i = 0; i < BINS; i++) {
> +               sz = rte_rand_max(1024) + 1;
> +               p1 = rte_zmalloc("slots", sz, 0);
> +               if (p1 == NULL) {
> +                       printf("rte_zmalloc(%zu) returned NULL (i=%u)\n",
> sz, i);
> +                       goto fail;
> +               }
> +               slots[i] = p1;
> +               if (!is_all_zero(p1, sz))
> +                       goto fail;
> +       }
> +
> +       /* Drop one chunk per iteration */
> +       for (n = BINS; n > 0; n--) {
> +               /* Swap in a new block into a slot */
> +               for (i = 0; i < N; i++) {
> +                       unsigned int bin = rte_rand_max(n);
> +
> +                       sz = rte_rand_max(1024) + 1;
> +                       p1 = rte_zmalloc("swap", sz, 0);
> +                       if (!p1){
> +                               printf("rte_zmalloc(%zu) returned NULL
> (i=%u)\n", sz, i);
> +                               goto fail;
> +                       }
> +
> +                       if (!is_all_zero(p1, sz)) {
> +                               printf("rte_zmalloc didn't zero the
> allocated memory\n");
> +                               goto fail;
> +                       }
> +
> +                       rte_free(slots[bin]);
> +                       slots[bin] = p1;
> +               }
> +
> +               /* Drop last bin */
> +               rte_free(slots[n]);
> +               slots[n] = NULL;
> +       }
> +
> +       return 0;
> +fail:
> +       for (i = 0; i < BINS; i++)
> +               rte_free(slots[i]);
> +
> +       return -1;
> +}
> +
>  static int
>  test_reordered_free_per_lcore(__rte_unused void *arg)
>  {
> @@ -1020,6 +1091,21 @@ test_malloc(void)
>         }
>         else printf("test_realloc() passed\n");
>
> +       /*----------------------------*/
> +       RTE_LCORE_FOREACH_WORKER(lcore_id) {
> +               rte_eal_remote_launch(test_zmalloc, NULL, lcore_id);
> +       }
> +
> +       RTE_LCORE_FOREACH_WORKER(lcore_id) {
> +               if (rte_eal_wait_lcore(lcore_id) < 0)
> +                       ret = -1;
> +       }
> +       if (ret < 0){
> +               printf("test_zmalloc() failed\n");
> +               return ret;
> +       }
> +       else printf("test_zmalloc() passed\n");
> +
>         /*----------------------------*/
>         RTE_LCORE_FOREACH_WORKER(lcore_id) {
>                 rte_eal_remote_launch(test_align_overlap_per_lcore, NULL,
> lcore_id);
>
  

Patch

diff --git a/lib/eal/common/malloc_elem.c b/lib/eal/common/malloc_elem.c
index 619c040aa3..93a23fa8d4 100644
--- a/lib/eal/common/malloc_elem.c
+++ b/lib/eal/common/malloc_elem.c
@@ -492,7 +492,7 @@  malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
  * be contiguous in memory.
  */
 static inline void
-join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
+join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2, bool update_inner)
 {
 	struct malloc_elem *next = elem2->next;
 	elem1->size += elem2->size;
@@ -502,7 +502,7 @@  join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
 		elem1->heap->last = elem1;
 	elem1->next = next;
 	elem1->dirty |= elem2->dirty;
-	if (elem1->pad) {
+	if (elem1->pad && update_inner) {
 		struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
 		inner->size = elem1->size - elem1->pad;
 	}
@@ -526,7 +526,7 @@  malloc_elem_join_adjacent_free(struct malloc_elem *elem)
 
 		/* remove from free list, join to this one */
 		malloc_elem_free_list_remove(elem->next);
-		join_elem(elem, elem->next);
+		join_elem(elem, elem->next, false);
 
 		/* erase header, trailer and pad */
 		memset(erase, MALLOC_POISON, erase_len);
@@ -550,7 +550,7 @@  malloc_elem_join_adjacent_free(struct malloc_elem *elem)
 		malloc_elem_free_list_remove(elem->prev);
 
 		new_elem = elem->prev;
-		join_elem(new_elem, elem);
+		join_elem(new_elem, elem, false);
 
 		/* erase header, trailer and pad */
 		memset(erase, MALLOC_POISON, erase_len);
@@ -683,7 +683,7 @@  malloc_elem_resize(struct malloc_elem *elem, size_t size)
 	 * join the two
 	 */
 	malloc_elem_free_list_remove(elem->next);
-	join_elem(elem, elem->next);
+	join_elem(elem, elem->next, true);
 
 	if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
 		/* now we have a big block together. Lets cut it down a bit, by splitting */