[1/4] test/stack: avoid trivial memory allocations

Message ID 20200805155721.19808-2-steven.lariau@arm.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series test/stack: improve multithreaded test |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS

Commit Message

Steven Lariau Aug. 5, 2020, 3:57 p.m. UTC
  Replace the arguments array by one argument.
All objects in the args array have the same values, so there is no need
to use an array, only one struct is enough.
The args object is a lot smaller, and the allocation can be replaced
with a stack variable.

The allocation of obj_table isn't needed either, because MAX_BULK is
small. The allocation can instead be replaced with a static array.

Signed-off-by: Steven Lariau <steven.lariau@arm.com>
Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_stack.c | 39 ++++++---------------------------------
 1 file changed, 6 insertions(+), 33 deletions(-)
  

Comments

Eads, Gage Aug. 11, 2020, 8:13 p.m. UTC | #1
Hi Steven,

> -----Original Message-----
> From: Steven Lariau <steven.lariau@arm.com>
> Sent: Wednesday, August 5, 2020 10:57 AM
> To: Eads, Gage <gage.eads@intel.com>; Olivier Matz
> <olivier.matz@6wind.com>
> Cc: dev@dpdk.org; honnappa.nagarahalli@arm.com;
> dharmik.thakkar@arm.com; nd@arm.com; Steven Lariau
> <steven.lariau@arm.com>
> Subject: [PATCH 1/4] test/stack: avoid trivial memory allocations
> 
> Replace the arguments array by one argument.
> All objects in the args array have the same values, so there is no need
> to use an array, only one struct is enough.
> The args object is a lot smaller, and the allocation can be replaced
> with a stack variable.
> 
> The allocation of obj_table isn't needed either, because MAX_BULK is
> small. The allocation can instead be replaced with a static array.
> 
> Signed-off-by: Steven Lariau <steven.lariau@arm.com>
> Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  app/test/test_stack.c | 39 ++++++---------------------------------
>  1 file changed, 6 insertions(+), 33 deletions(-)
> 
> diff --git a/app/test/test_stack.c b/app/test/test_stack.c
> index c8dac1f55..5a7273a7d 100644
> --- a/app/test/test_stack.c
> +++ b/app/test/test_stack.c
> @@ -280,16 +280,9 @@ static int
>  stack_thread_push_pop(void *args)
>  {
>  	struct test_args *t = args;
> -	void **obj_table;
> +	void *obj_table[MAX_BULK];
>  	int i;
> 
> -	obj_table = rte_calloc(NULL, STACK_SIZE, sizeof(void *), 0);
> -	if (obj_table == NULL) {
> -		printf("[%s():%u] failed to calloc %zu bytes\n",
> -		       __func__, __LINE__, STACK_SIZE * sizeof(void *));
> -		return -1;
> -	}
> -
>  	for (i = 0; i < NUM_ITERS_PER_THREAD; i++) {
>  		unsigned int success, num;
> 
> @@ -310,28 +303,25 @@ stack_thread_push_pop(void *args)
>  		if (rte_stack_push(t->s, obj_table, num) != num) {
>  			printf("[%s():%u] Failed to push %u pointers\n",
>  			       __func__, __LINE__, num);
> -			rte_free(obj_table);
>  			return -1;
>  		}
> 
>  		if (rte_stack_pop(t->s, obj_table, num) != num) {
>  			printf("[%s():%u] Failed to pop %u pointers\n",
>  			       __func__, __LINE__, num);
> -			rte_free(obj_table);
>  			return -1;
>  		}
> 
>  		rte_atomic64_sub(t->sz, num);
>  	}
> 
> -	rte_free(obj_table);
>  	return 0;
>  }

Agreed, the dynamic allocation is unnecessary.

> 
>  static int
>  test_stack_multithreaded(uint32_t flags)
>  {
> -	struct test_args *args;
> +	struct test_args args;
>  	unsigned int lcore_id;
>  	struct rte_stack *s;
>  	rte_atomic64_t size;
> @@ -344,45 +334,28 @@ test_stack_multithreaded(uint32_t flags)
>  	printf("[%s():%u] Running with %u lcores\n",
>  	       __func__, __LINE__, rte_lcore_count());
> 
> -	args = rte_malloc(NULL, sizeof(struct test_args) * RTE_MAX_LCORE,
> 0);
> -	if (args == NULL) {
> -		printf("[%s():%u] failed to malloc %zu bytes\n",
> -		       __func__, __LINE__,
> -		       sizeof(struct test_args) * RTE_MAX_LCORE);
> -		return -1;
> -	}
> -
>  	s = rte_stack_create("test", STACK_SIZE, rte_socket_id(), flags);
>  	if (s == NULL) {
>  		printf("[%s():%u] Failed to create a stack\n",
>  		       __func__, __LINE__);
> -		rte_free(args);
>  		return -1;
>  	}
> 
>  	rte_atomic64_init(&size);
> +	args.s = s;
> +	args.sz = &size;
> 
>  	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
> -		args[lcore_id].s = s;
> -		args[lcore_id].sz = &size;
> -
>  		if (rte_eal_remote_launch(stack_thread_push_pop,
> -					  &args[lcore_id], lcore_id))
> +					  &args, lcore_id))
>  			rte_panic("Failed to launch lcore %d\n", lcore_id);
>  	}


In general we shouldn't pass a stack variable to other threads. Though your
code here looks fine, I'd rather err on the safe side in case this is ever used
as a template/basis for some other code...particularly since there's no
performance/correctness/etc. penalty to using dynamically allocated memory.

To support patch 2/4, you can instead convert the rte_malloc to allocate a
single shared test_args structure. Or perhaps move patch 4 earlier in the series,
and simply pass the stack pointer instead.

Thanks,
Gage
  
Stephen Hemminger Aug. 11, 2020, 8:38 p.m. UTC | #2
On Tue, 11 Aug 2020 20:13:24 +0000
"Eads, Gage" <gage.eads@intel.com> wrote:

> Hi Steven,
> 
> > -----Original Message-----
> > From: Steven Lariau <steven.lariau@arm.com>
> > Sent: Wednesday, August 5, 2020 10:57 AM
> > To: Eads, Gage <gage.eads@intel.com>; Olivier Matz
> > <olivier.matz@6wind.com>
> > Cc: dev@dpdk.org; honnappa.nagarahalli@arm.com;
> > dharmik.thakkar@arm.com; nd@arm.com; Steven Lariau
> > <steven.lariau@arm.com>
> > Subject: [PATCH 1/4] test/stack: avoid trivial memory allocations
> > 
> > Replace the arguments array by one argument.
> > All objects in the args array have the same values, so there is no need
> > to use an array, only one struct is enough.
> > The args object is a lot smaller, and the allocation can be replaced
> > with a stack variable.
> > 
> > The allocation of obj_table isn't needed either, because MAX_BULK is
> > small. The allocation can instead be replaced with a static array.
> > 
> > Signed-off-by: Steven Lariau <steven.lariau@arm.com>
> > Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  app/test/test_stack.c | 39 ++++++---------------------------------
> >  1 file changed, 6 insertions(+), 33 deletions(-)
> > 
> > diff --git a/app/test/test_stack.c b/app/test/test_stack.c
> > index c8dac1f55..5a7273a7d 100644
> > --- a/app/test/test_stack.c
> > +++ b/app/test/test_stack.c
> > @@ -280,16 +280,9 @@ static int
> >  stack_thread_push_pop(void *args)
> >  {
> >  	struct test_args *t = args;
> > -	void **obj_table;
> > +	void *obj_table[MAX_BULK];
> >  	int i;
> > 
> > -	obj_table = rte_calloc(NULL, STACK_SIZE, sizeof(void *), 0);
> > -	if (obj_table == NULL) {
> > -		printf("[%s():%u] failed to calloc %zu bytes\n",
> > -		       __func__, __LINE__, STACK_SIZE * sizeof(void *));
> > -		return -1;
> > -	}
> > -
> >  	for (i = 0; i < NUM_ITERS_PER_THREAD; i++) {
> >  		unsigned int success, num;
> > 
> > @@ -310,28 +303,25 @@ stack_thread_push_pop(void *args)
> >  		if (rte_stack_push(t->s, obj_table, num) != num) {
> >  			printf("[%s():%u] Failed to push %u pointers\n",
> >  			       __func__, __LINE__, num);
> > -			rte_free(obj_table);
> >  			return -1;
> >  		}
> > 
> >  		if (rte_stack_pop(t->s, obj_table, num) != num) {
> >  			printf("[%s():%u] Failed to pop %u pointers\n",
> >  			       __func__, __LINE__, num);
> > -			rte_free(obj_table);
> >  			return -1;
> >  		}
> > 
> >  		rte_atomic64_sub(t->sz, num);
> >  	}
> > 
> > -	rte_free(obj_table);
> >  	return 0;
> >  }  
> 
> Agreed, the dynamic allocation is unnecessary.
> 
> > 
> >  static int
> >  test_stack_multithreaded(uint32_t flags)
> >  {
> > -	struct test_args *args;
> > +	struct test_args args;
> >  	unsigned int lcore_id;
> >  	struct rte_stack *s;
> >  	rte_atomic64_t size;
> > @@ -344,45 +334,28 @@ test_stack_multithreaded(uint32_t flags)
> >  	printf("[%s():%u] Running with %u lcores\n",
> >  	       __func__, __LINE__, rte_lcore_count());
> > 
> > -	args = rte_malloc(NULL, sizeof(struct test_args) * RTE_MAX_LCORE,
> > 0);
> > -	if (args == NULL) {
> > -		printf("[%s():%u] failed to malloc %zu bytes\n",
> > -		       __func__, __LINE__,
> > -		       sizeof(struct test_args) * RTE_MAX_LCORE);
> > -		return -1;
> > -	}
> > -
> >  	s = rte_stack_create("test", STACK_SIZE, rte_socket_id(), flags);
> >  	if (s == NULL) {
> >  		printf("[%s():%u] Failed to create a stack\n",
> >  		       __func__, __LINE__);
> > -		rte_free(args);
> >  		return -1;
> >  	}
> > 
> >  	rte_atomic64_init(&size);
> > +	args.s = s;
> > +	args.sz = &size;
> > 
> >  	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
> > -		args[lcore_id].s = s;
> > -		args[lcore_id].sz = &size;
> > -
> >  		if (rte_eal_remote_launch(stack_thread_push_pop,
> > -					  &args[lcore_id], lcore_id))
> > +					  &args, lcore_id))
> >  			rte_panic("Failed to launch lcore %d\n", lcore_id);
> >  	}  
> 
> 
> In general we shouldn't pass a stack variable to other threads. Though your
> code here looks fine, I'd rather err on the safe side in case this is ever used
> as a template/basis for some other code...particularly since there's no
> performance/correctness/etc. penalty to using dynamically allocated memory.
> 
> To support patch 2/4, you can instead convert the rte_malloc to allocate a
> single shared test_args structure. Or perhaps move patch 4 earlier in the series,
> and simply pass the stack pointer instead.
> 
> Thanks,
> Gage

There is no gain to using rte_malloc unless you are doing primary/secondary process
or trying to test rte_malloc. Why not use regular malloc which has good tools and library support.
  
Honnappa Nagarahalli Aug. 11, 2020, 8:49 p.m. UTC | #3
<snip>

> > >
> > > Replace the arguments array by one argument.
> > > All objects in the args array have the same values, so there is no
> > > need to use an array, only one struct is enough.
> > > The args object is a lot smaller, and the allocation can be replaced
> > > with a stack variable.
> > >
> > > The allocation of obj_table isn't needed either, because MAX_BULK is
> > > small. The allocation can instead be replaced with a static array.
> > >
> > > Signed-off-by: Steven Lariau <steven.lariau@arm.com>
> > > Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> > > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> > >  app/test/test_stack.c | 39 ++++++---------------------------------
> > >  1 file changed, 6 insertions(+), 33 deletions(-)
> > >
> > > diff --git a/app/test/test_stack.c b/app/test/test_stack.c index
> > > c8dac1f55..5a7273a7d 100644
> > > --- a/app/test/test_stack.c
> > > +++ b/app/test/test_stack.c
> > > @@ -280,16 +280,9 @@ static int
> > >  stack_thread_push_pop(void *args)
> > >  {
> > >  	struct test_args *t = args;
> > > -	void **obj_table;
> > > +	void *obj_table[MAX_BULK];
> > >  	int i;
> > >
> > > -	obj_table = rte_calloc(NULL, STACK_SIZE, sizeof(void *), 0);
> > > -	if (obj_table == NULL) {
> > > -		printf("[%s():%u] failed to calloc %zu bytes\n",
> > > -		       __func__, __LINE__, STACK_SIZE * sizeof(void *));
> > > -		return -1;
> > > -	}
> > > -
> > >  	for (i = 0; i < NUM_ITERS_PER_THREAD; i++) {
> > >  		unsigned int success, num;
> > >
> > > @@ -310,28 +303,25 @@ stack_thread_push_pop(void *args)
> > >  		if (rte_stack_push(t->s, obj_table, num) != num) {
> > >  			printf("[%s():%u] Failed to push %u pointers\n",
> > >  			       __func__, __LINE__, num);
> > > -			rte_free(obj_table);
> > >  			return -1;
> > >  		}
> > >
> > >  		if (rte_stack_pop(t->s, obj_table, num) != num) {
> > >  			printf("[%s():%u] Failed to pop %u pointers\n",
> > >  			       __func__, __LINE__, num);
> > > -			rte_free(obj_table);
> > >  			return -1;
> > >  		}
> > >
> > >  		rte_atomic64_sub(t->sz, num);
> > >  	}
> > >
> > > -	rte_free(obj_table);
> > >  	return 0;
> > >  }
> >
> > Agreed, the dynamic allocation is unnecessary.
> >
> > >
> > >  static int
> > >  test_stack_multithreaded(uint32_t flags)  {
> > > -	struct test_args *args;
> > > +	struct test_args args;
> > >  	unsigned int lcore_id;
> > >  	struct rte_stack *s;
> > >  	rte_atomic64_t size;
> > > @@ -344,45 +334,28 @@ test_stack_multithreaded(uint32_t flags)
> > >  	printf("[%s():%u] Running with %u lcores\n",
> > >  	       __func__, __LINE__, rte_lcore_count());
> > >
> > > -	args = rte_malloc(NULL, sizeof(struct test_args) * RTE_MAX_LCORE,
> > > 0);
> > > -	if (args == NULL) {
> > > -		printf("[%s():%u] failed to malloc %zu bytes\n",
> > > -		       __func__, __LINE__,
> > > -		       sizeof(struct test_args) * RTE_MAX_LCORE);
> > > -		return -1;
> > > -	}
> > > -
> > >  	s = rte_stack_create("test", STACK_SIZE, rte_socket_id(), flags);
> > >  	if (s == NULL) {
> > >  		printf("[%s():%u] Failed to create a stack\n",
> > >  		       __func__, __LINE__);
> > > -		rte_free(args);
> > >  		return -1;
> > >  	}
> > >
> > >  	rte_atomic64_init(&size);
> > > +	args.s = s;
> > > +	args.sz = &size;
> > >
> > >  	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
> > > -		args[lcore_id].s = s;
> > > -		args[lcore_id].sz = &size;
> > > -
> > >  		if (rte_eal_remote_launch(stack_thread_push_pop,
> > > -					  &args[lcore_id], lcore_id))
> > > +					  &args, lcore_id))
> > >  			rte_panic("Failed to launch lcore %d\n", lcore_id);
> > >  	}
> >
> >
> > In general we shouldn't pass a stack variable to other threads. Though
> > your code here looks fine, I'd rather err on the safe side in case
> > this is ever used as a template/basis for some other
> > code...particularly since there's no performance/correctness/etc. penalty to
> using dynamically allocated memory.
> >
> > To support patch 2/4, you can instead convert the rte_malloc to
> > allocate a single shared test_args structure. Or perhaps move patch 4
> > earlier in the series, and simply pass the stack pointer instead.
> >
> > Thanks,
> > Gage
> 
> There is no gain to using rte_malloc unless you are doing primary/secondary
> process or trying to test rte_malloc. Why not use regular malloc which has
> good tools and library support.

I think making 'args' a global variable is enough in this case.
  
Eads, Gage Aug. 11, 2020, 9:14 p.m. UTC | #4
> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Tuesday, August 11, 2020 3:50 PM
> To: Stephen Hemminger <stephen@networkplumber.org>; Eads, Gage
> <gage.eads@intel.com>
> Cc: Steven Lariau <Steven.Lariau@arm.com>; Olivier Matz
> <olivier.matz@6wind.com>; dev@dpdk.org; Dharmik Thakkar
> <Dharmik.Thakkar@arm.com>; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [dpdk-dev] [PATCH 1/4] test/stack: avoid trivial memory
> allocations
> 
> <snip>
> 
> > > >
> > > > Replace the arguments array by one argument.
> > > > All objects in the args array have the same values, so there is no
> > > > need to use an array, only one struct is enough.
> > > > The args object is a lot smaller, and the allocation can be replaced
> > > > with a stack variable.
> > > >
> > > > The allocation of obj_table isn't needed either, because MAX_BULK is
> > > > small. The allocation can instead be replaced with a static array.
> > > >
> > > > Signed-off-by: Steven Lariau <steven.lariau@arm.com>
> > > > Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> > > > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > > >  app/test/test_stack.c | 39 ++++++---------------------------------
> > > >  1 file changed, 6 insertions(+), 33 deletions(-)
> > > >
> > > > diff --git a/app/test/test_stack.c b/app/test/test_stack.c index
> > > > c8dac1f55..5a7273a7d 100644
> > > > --- a/app/test/test_stack.c
> > > > +++ b/app/test/test_stack.c
> > > > @@ -280,16 +280,9 @@ static int
> > > >  stack_thread_push_pop(void *args)
> > > >  {
> > > >  	struct test_args *t = args;
> > > > -	void **obj_table;
> > > > +	void *obj_table[MAX_BULK];
> > > >  	int i;
> > > >
> > > > -	obj_table = rte_calloc(NULL, STACK_SIZE, sizeof(void *), 0);
> > > > -	if (obj_table == NULL) {
> > > > -		printf("[%s():%u] failed to calloc %zu bytes\n",
> > > > -		       __func__, __LINE__, STACK_SIZE * sizeof(void *));
> > > > -		return -1;
> > > > -	}
> > > > -
> > > >  	for (i = 0; i < NUM_ITERS_PER_THREAD; i++) {
> > > >  		unsigned int success, num;
> > > >
> > > > @@ -310,28 +303,25 @@ stack_thread_push_pop(void *args)
> > > >  		if (rte_stack_push(t->s, obj_table, num) != num) {
> > > >  			printf("[%s():%u] Failed to push %u pointers\n",
> > > >  			       __func__, __LINE__, num);
> > > > -			rte_free(obj_table);
> > > >  			return -1;
> > > >  		}
> > > >
> > > >  		if (rte_stack_pop(t->s, obj_table, num) != num) {
> > > >  			printf("[%s():%u] Failed to pop %u pointers\n",
> > > >  			       __func__, __LINE__, num);
> > > > -			rte_free(obj_table);
> > > >  			return -1;
> > > >  		}
> > > >
> > > >  		rte_atomic64_sub(t->sz, num);
> > > >  	}
> > > >
> > > > -	rte_free(obj_table);
> > > >  	return 0;
> > > >  }
> > >
> > > Agreed, the dynamic allocation is unnecessary.
> > >
> > > >
> > > >  static int
> > > >  test_stack_multithreaded(uint32_t flags)  {
> > > > -	struct test_args *args;
> > > > +	struct test_args args;
> > > >  	unsigned int lcore_id;
> > > >  	struct rte_stack *s;
> > > >  	rte_atomic64_t size;
> > > > @@ -344,45 +334,28 @@ test_stack_multithreaded(uint32_t flags)
> > > >  	printf("[%s():%u] Running with %u lcores\n",
> > > >  	       __func__, __LINE__, rte_lcore_count());
> > > >
> > > > -	args = rte_malloc(NULL, sizeof(struct test_args) * RTE_MAX_LCORE,
> > > > 0);
> > > > -	if (args == NULL) {
> > > > -		printf("[%s():%u] failed to malloc %zu bytes\n",
> > > > -		       __func__, __LINE__,
> > > > -		       sizeof(struct test_args) * RTE_MAX_LCORE);
> > > > -		return -1;
> > > > -	}
> > > > -
> > > >  	s = rte_stack_create("test", STACK_SIZE, rte_socket_id(), flags);
> > > >  	if (s == NULL) {
> > > >  		printf("[%s():%u] Failed to create a stack\n",
> > > >  		       __func__, __LINE__);
> > > > -		rte_free(args);
> > > >  		return -1;
> > > >  	}
> > > >
> > > >  	rte_atomic64_init(&size);
> > > > +	args.s = s;
> > > > +	args.sz = &size;
> > > >
> > > >  	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
> > > > -		args[lcore_id].s = s;
> > > > -		args[lcore_id].sz = &size;
> > > > -
> > > >  		if (rte_eal_remote_launch(stack_thread_push_pop,
> > > > -					  &args[lcore_id], lcore_id))
> > > > +					  &args, lcore_id))
> > > >  			rte_panic("Failed to launch lcore %d\n", lcore_id);
> > > >  	}
> > >
> > >
> > > In general we shouldn't pass a stack variable to other threads. Though
> > > your code here looks fine, I'd rather err on the safe side in case
> > > this is ever used as a template/basis for some other
> > > code...particularly since there's no performance/correctness/etc.
> penalty to
> > using dynamically allocated memory.
> > >
> > > To support patch 2/4, you can instead convert the rte_malloc to
> > > allocate a single shared test_args structure. Or perhaps move patch 4
> > > earlier in the series, and simply pass the stack pointer instead.
> > >
> > > Thanks,
> > > Gage
> >
> > There is no gain to using rte_malloc unless you are doing
> primary/secondary
> > process or trying to test rte_malloc. Why not use regular malloc which has
> > good tools and library support.
> 
> I think making 'args' a global variable is enough in this case.

Agreed.

Thanks,
Gage
  

Patch

diff --git a/app/test/test_stack.c b/app/test/test_stack.c
index c8dac1f55..5a7273a7d 100644
--- a/app/test/test_stack.c
+++ b/app/test/test_stack.c
@@ -280,16 +280,9 @@  static int
 stack_thread_push_pop(void *args)
 {
 	struct test_args *t = args;
-	void **obj_table;
+	void *obj_table[MAX_BULK];
 	int i;
 
-	obj_table = rte_calloc(NULL, STACK_SIZE, sizeof(void *), 0);
-	if (obj_table == NULL) {
-		printf("[%s():%u] failed to calloc %zu bytes\n",
-		       __func__, __LINE__, STACK_SIZE * sizeof(void *));
-		return -1;
-	}
-
 	for (i = 0; i < NUM_ITERS_PER_THREAD; i++) {
 		unsigned int success, num;
 
@@ -310,28 +303,25 @@  stack_thread_push_pop(void *args)
 		if (rte_stack_push(t->s, obj_table, num) != num) {
 			printf("[%s():%u] Failed to push %u pointers\n",
 			       __func__, __LINE__, num);
-			rte_free(obj_table);
 			return -1;
 		}
 
 		if (rte_stack_pop(t->s, obj_table, num) != num) {
 			printf("[%s():%u] Failed to pop %u pointers\n",
 			       __func__, __LINE__, num);
-			rte_free(obj_table);
 			return -1;
 		}
 
 		rte_atomic64_sub(t->sz, num);
 	}
 
-	rte_free(obj_table);
 	return 0;
 }
 
 static int
 test_stack_multithreaded(uint32_t flags)
 {
-	struct test_args *args;
+	struct test_args args;
 	unsigned int lcore_id;
 	struct rte_stack *s;
 	rte_atomic64_t size;
@@ -344,45 +334,28 @@  test_stack_multithreaded(uint32_t flags)
 	printf("[%s():%u] Running with %u lcores\n",
 	       __func__, __LINE__, rte_lcore_count());
 
-	args = rte_malloc(NULL, sizeof(struct test_args) * RTE_MAX_LCORE, 0);
-	if (args == NULL) {
-		printf("[%s():%u] failed to malloc %zu bytes\n",
-		       __func__, __LINE__,
-		       sizeof(struct test_args) * RTE_MAX_LCORE);
-		return -1;
-	}
-
 	s = rte_stack_create("test", STACK_SIZE, rte_socket_id(), flags);
 	if (s == NULL) {
 		printf("[%s():%u] Failed to create a stack\n",
 		       __func__, __LINE__);
-		rte_free(args);
 		return -1;
 	}
 
 	rte_atomic64_init(&size);
+	args.s = s;
+	args.sz = &size;
 
 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
-		args[lcore_id].s = s;
-		args[lcore_id].sz = &size;
-
 		if (rte_eal_remote_launch(stack_thread_push_pop,
-					  &args[lcore_id], lcore_id))
+					  &args, lcore_id))
 			rte_panic("Failed to launch lcore %d\n", lcore_id);
 	}
 
-	lcore_id = rte_lcore_id();
-
-	args[lcore_id].s = s;
-	args[lcore_id].sz = &size;
-
-	stack_thread_push_pop(&args[lcore_id]);
+	stack_thread_push_pop(&args);
 
 	rte_eal_mp_wait_lcore();
 
 	rte_stack_free(s);
-	rte_free(args);
-
 	return 0;
 }