ring: compilation fix with GCC-12

Message ID 20220805090348.1947658-1-amitprakashs@marvell.com (mailing list archive)
State Changes Requested, archived
Delegated to: Thomas Monjalon
Headers
Series ring: compilation fix with GCC-12 |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/github-robot: build success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-x86_64-unit-testing fail Testing issues
ci/intel-Testing success Testing PASS

Commit Message

Amit Prakash Shukla Aug. 5, 2022, 9:03 a.m. UTC
  GCC 12 raises the following warning:

In function '__rte_ring_dequeue_elems_128',
    inlined from '__rte_ring_dequeue_elems' at
		../lib/ring/rte_ring_elem_pvt.h:262:3,
    inlined from '__rte_ring_do_hts_dequeue_elem' at
		../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
    inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
		../lib/ring/rte_ring_hts.h:83:9,
    inlined from 'rte_ring_dequeue_bulk_elem' at
		../lib/ring/rte_ring_elem.h:391:10,
    inlined from 'rte_ring_dequeue_elem' at
		../lib/ring/rte_ring_elem.h:476:9,
    inlined from 'rte_ring_dequeue' at
		../lib/ring/rte_ring.h:463:9,
    inlined from 'rxa_intr_ring_dequeue' at
		../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
	32 bytes into a region of size 8 overflows the destination
	[-Werror=stringop-overflow=]
  234 |    memcpy((void *)(obj + i), (void *)(ring + idx), 32);
      |    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
Also it would be better to change to rte_memcpy as the function is
called in fastpath.

Bugzilla ID: 1062
Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
Cc: stable@dpdk.org

Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
---
 lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)
  

Comments

Stephen Hemminger Aug. 5, 2022, 3:37 p.m. UTC | #1
On Fri, 5 Aug 2022 14:33:48 +0530
Amit Prakash Shukla <amitprakashs@marvell.com> wrote:

> GCC 12 raises the following warning:
> 
> In function '__rte_ring_dequeue_elems_128',
>     inlined from '__rte_ring_dequeue_elems' at
> 		../lib/ring/rte_ring_elem_pvt.h:262:3,
>     inlined from '__rte_ring_do_hts_dequeue_elem' at
> 		../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
>     inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
> 		../lib/ring/rte_ring_hts.h:83:9,
>     inlined from 'rte_ring_dequeue_bulk_elem' at
> 		../lib/ring/rte_ring_elem.h:391:10,
>     inlined from 'rte_ring_dequeue_elem' at
> 		../lib/ring/rte_ring_elem.h:476:9,
>     inlined from 'rte_ring_dequeue' at
> 		../lib/ring/rte_ring.h:463:9,
>     inlined from 'rxa_intr_ring_dequeue' at
> 		../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
> 	32 bytes into a region of size 8 overflows the destination
> 	[-Werror=stringop-overflow=]
>   234 |    memcpy((void *)(obj + i), (void *)(ring + idx), 32);
>       |    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
> Also it would be better to change to rte_memcpy as the function is
> called in fastpath.
> 
> Bugzilla ID: 1062
> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>

This seems to be just using a different routine and in effect
wallpapering over a possibly valid warning.
  
Honnappa Nagarahalli Aug. 6, 2022, 6:35 p.m. UTC | #2
<snip>

> 
> GCC 12 raises the following warning:
> 
> In function '__rte_ring_dequeue_elems_128',
>     inlined from '__rte_ring_dequeue_elems' at
> 		../lib/ring/rte_ring_elem_pvt.h:262:3,
>     inlined from '__rte_ring_do_hts_dequeue_elem' at
> 		../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
>     inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
> 		../lib/ring/rte_ring_hts.h:83:9,
>     inlined from 'rte_ring_dequeue_bulk_elem' at
> 		../lib/ring/rte_ring_elem.h:391:10,
>     inlined from 'rte_ring_dequeue_elem' at
> 		../lib/ring/rte_ring_elem.h:476:9,
>     inlined from 'rte_ring_dequeue' at
> 		../lib/ring/rte_ring.h:463:9,
>     inlined from 'rxa_intr_ring_dequeue' at
> 		../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
> 	32 bytes into a region of size 8 overflows the destination
> 	[-Werror=stringop-overflow=]
>   234 |    memcpy((void *)(obj + i), (void *)(ring + idx), 32);
>       |    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
Any reason why this replacement fixes the problem?
Do you have any performance numbers with this change?

> Also it would be better to change to rte_memcpy as the function is called in
> fastpath.
On Arm platforms, memcpy in the later versions has the best performance.

> 
> Bugzilla ID: 1062
> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
> ---
>  lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
>  1 file changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h index
> 83788c56e6..3d85b13333 100644
> --- a/lib/ring/rte_ring_elem_pvt.h
> +++ b/lib/ring/rte_ring_elem_pvt.h
> @@ -10,6 +10,8 @@
>  #ifndef _RTE_RING_ELEM_PVT_H_
>  #define _RTE_RING_ELEM_PVT_H_
> 
> +#include <rte_memcpy.h>
> +
>  static __rte_always_inline void
>  __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
>  		uint32_t idx, const void *obj_table, uint32_t n) @@ -97,20
> +99,20 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t
> prod_head,
>  	const rte_int128_t *obj = (const rte_int128_t *)obj_table;
>  	if (likely(idx + n <= size)) {
>  		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> -			memcpy((void *)(ring + idx),
> +			rte_memcpy((void *)(ring + idx),
>  				(const void *)(obj + i), 32);
>  		switch (n & 0x1) {
>  		case 1:
> -			memcpy((void *)(ring + idx),
> +			rte_memcpy((void *)(ring + idx),
>  				(const void *)(obj + i), 16);
>  		}
>  	} else {
>  		for (i = 0; idx < size; i++, idx++)
> -			memcpy((void *)(ring + idx),
> +			rte_memcpy((void *)(ring + idx),
>  				(const void *)(obj + i), 16);
>  		/* Start at the beginning */
>  		for (idx = 0; i < n; i++, idx++)
> -			memcpy((void *)(ring + idx),
> +			rte_memcpy((void *)(ring + idx),
>  				(const void *)(obj + i), 16);
>  	}
>  }
> @@ -231,17 +233,17 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r,
> uint32_t prod_head,
>  	rte_int128_t *obj = (rte_int128_t *)obj_table;
>  	if (likely(idx + n <= size)) {
>  		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> -			memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 32);
>  		switch (n & 0x1) {
>  		case 1:
> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>  		}
>  	} else {
>  		for (i = 0; idx < size; i++, idx++)
> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>  		/* Start at the beginning */
>  		for (idx = 0; i < n; i++, idx++)
> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>  	}
>  }
> 
> --
> 2.25.1
  
Konstantin Ananyev Aug. 7, 2022, 12:26 p.m. UTC | #3
06/08/2022 19:35, Honnappa Nagarahalli пишет:
> <snip>
> 
>>
>> GCC 12 raises the following warning:
>>
>> In function '__rte_ring_dequeue_elems_128',
>>      inlined from '__rte_ring_dequeue_elems' at
>> 		../lib/ring/rte_ring_elem_pvt.h:262:3,
>>      inlined from '__rte_ring_do_hts_dequeue_elem' at
>> 		../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
>>      inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
>> 		../lib/ring/rte_ring_hts.h:83:9,
>>      inlined from 'rte_ring_dequeue_bulk_elem' at
>> 		../lib/ring/rte_ring_elem.h:391:10,
>>      inlined from 'rte_ring_dequeue_elem' at
>> 		../lib/ring/rte_ring_elem.h:476:9,
>>      inlined from 'rte_ring_dequeue' at
>> 		../lib/ring/rte_ring.h:463:9,
>>      inlined from 'rxa_intr_ring_dequeue' at
>> 		../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
>> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
>> 	32 bytes into a region of size 8 overflows the destination
>> 	[-Werror=stringop-overflow=]
>>    234 |    memcpy((void *)(obj + i), (void *)(ring + idx), 32);
>>        |    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>>
>> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
> Any reason why this replacement fixes the problem?
> Do you have any performance numbers with this change?
> 
>> Also it would be better to change to rte_memcpy as the function is called in
>> fastpath.
> On Arm platforms, memcpy in the later versions has the best performance.

I agree with Honnappa, it is better to keep memcpy() here.
Actually what is strange - why it ends up in 
__rte_ring_dequeue_elems_128() at all?
Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
which should boil down to ___rte_ring_dequeue_elems_64().
it should go to __rte_ring_dequeue_elems_128() at all.

Another q - is this warning happens only on arm platforms?

> 
>>
>> Bugzilla ID: 1062
>> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
>> ---
>>   lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
>>   1 file changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h index
>> 83788c56e6..3d85b13333 100644
>> --- a/lib/ring/rte_ring_elem_pvt.h
>> +++ b/lib/ring/rte_ring_elem_pvt.h
>> @@ -10,6 +10,8 @@
>>   #ifndef _RTE_RING_ELEM_PVT_H_
>>   #define _RTE_RING_ELEM_PVT_H_
>>
>> +#include <rte_memcpy.h>
>> +
>>   static __rte_always_inline void
>>   __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
>>   		uint32_t idx, const void *obj_table, uint32_t n) @@ -97,20
>> +99,20 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t
>> prod_head,
>>   	const rte_int128_t *obj = (const rte_int128_t *)obj_table;
>>   	if (likely(idx + n <= size)) {
>>   		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
>> -			memcpy((void *)(ring + idx),
>> +			rte_memcpy((void *)(ring + idx),
>>   				(const void *)(obj + i), 32);
>>   		switch (n & 0x1) {
>>   		case 1:
>> -			memcpy((void *)(ring + idx),
>> +			rte_memcpy((void *)(ring + idx),
>>   				(const void *)(obj + i), 16);
>>   		}
>>   	} else {
>>   		for (i = 0; idx < size; i++, idx++)
>> -			memcpy((void *)(ring + idx),
>> +			rte_memcpy((void *)(ring + idx),
>>   				(const void *)(obj + i), 16);
>>   		/* Start at the beginning */
>>   		for (idx = 0; i < n; i++, idx++)
>> -			memcpy((void *)(ring + idx),
>> +			rte_memcpy((void *)(ring + idx),
>>   				(const void *)(obj + i), 16);
>>   	}
>>   }
>> @@ -231,17 +233,17 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r,
>> uint32_t prod_head,
>>   	rte_int128_t *obj = (rte_int128_t *)obj_table;
>>   	if (likely(idx + n <= size)) {
>>   		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
>> -			memcpy((void *)(obj + i), (void *)(ring + idx), 32);
>> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 32);
>>   		switch (n & 0x1) {
>>   		case 1:
>> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>>   		}
>>   	} else {
>>   		for (i = 0; idx < size; i++, idx++)
>> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>>   		/* Start at the beginning */
>>   		for (idx = 0; i < n; i++, idx++)
>> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>>   	}
>>   }
>>
>> --
>> 2.25.1
>
  
Amit Prakash Shukla Aug. 23, 2022, 9:38 a.m. UTC | #4
Thanks for the feedback. My apologies for delayed reply.

> -----Original Message-----
> From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> Sent: Sunday, August 7, 2022 5:56 PM
> To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Amit Prakash
> Shukla <amitprakashs@marvell.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> stable@dpdk.org; nd <nd@arm.com>
> Subject: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
> 
> External Email
> 
> ----------------------------------------------------------------------
> 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > <snip>
> >
> >>
> >> GCC 12 raises the following warning:
> >>
> >> In function '__rte_ring_dequeue_elems_128',
> >>      inlined from '__rte_ring_dequeue_elems' at
> >> 		../lib/ring/rte_ring_elem_pvt.h:262:3,
> >>      inlined from '__rte_ring_do_hts_dequeue_elem' at
> >> 		../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
> >>      inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
> >> 		../lib/ring/rte_ring_hts.h:83:9,
> >>      inlined from 'rte_ring_dequeue_bulk_elem' at
> >> 		../lib/ring/rte_ring_elem.h:391:10,
> >>      inlined from 'rte_ring_dequeue_elem' at
> >> 		../lib/ring/rte_ring_elem.h:476:9,
> >>      inlined from 'rte_ring_dequeue' at
> >> 		../lib/ring/rte_ring.h:463:9,
> >>      inlined from 'rxa_intr_ring_dequeue' at
> >> 		../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
> >> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
> >> 	32 bytes into a region of size 8 overflows the destination
> >> 	[-Werror=stringop-overflow=]
> >>    234 |    memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> >>        |    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> >>
> >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
> > Any reason why this replacement fixes the problem?
> > Do you have any performance numbers with this change?
> >
> >> Also it would be better to change to rte_memcpy as the function is
> >> called in fastpath.
> > On Arm platforms, memcpy in the later versions has the best performance.
> 
> I agree with Honnappa, it is better to keep memcpy() here.
> Actually what is strange - why it ends up in
> __rte_ring_dequeue_elems_128() at all?
> Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(), which
> should boil down to ___rte_ring_dequeue_elems_64().
> it should go to __rte_ring_dequeue_elems_128() at all.

I agree. After having close look and doing few experiments, ideally it should not be going to __rte_ring_dequeue_elems_128().
Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile time which in this case it is evaluated to 8 bytes so 
__rte_ring_dequeue_elems_128() shall not be in the path. Looks like more of a gcc-12 bug.?

> 
> Another q - is this warning happens only on arm platforms?
Warning is observed on x86 with build type as debug.
"meson --werror --buildtype=debug build"

> 
> >
> >>
> >> Bugzilla ID: 1062
> >> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
> >> Cc: stable@dpdk.org
> >>
> >> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
> >> ---
> >>   lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
> >>   1 file changed, 10 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/lib/ring/rte_ring_elem_pvt.h
> >> b/lib/ring/rte_ring_elem_pvt.h index
> >> 83788c56e6..3d85b13333 100644
> >> --- a/lib/ring/rte_ring_elem_pvt.h
> >> +++ b/lib/ring/rte_ring_elem_pvt.h
> >> @@ -10,6 +10,8 @@
> >>   #ifndef _RTE_RING_ELEM_PVT_H_
> >>   #define _RTE_RING_ELEM_PVT_H_
> >>
> >> +#include <rte_memcpy.h>
> >> +
> >>   static __rte_always_inline void
> >>   __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
> >>   		uint32_t idx, const void *obj_table, uint32_t n) @@ -97,20
> >> +99,20 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t
> >> prod_head,
> >>   	const rte_int128_t *obj = (const rte_int128_t *)obj_table;
> >>   	if (likely(idx + n <= size)) {
> >>   		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> >> -			memcpy((void *)(ring + idx),
> >> +			rte_memcpy((void *)(ring + idx),
> >>   				(const void *)(obj + i), 32);
> >>   		switch (n & 0x1) {
> >>   		case 1:
> >> -			memcpy((void *)(ring + idx),
> >> +			rte_memcpy((void *)(ring + idx),
> >>   				(const void *)(obj + i), 16);
> >>   		}
> >>   	} else {
> >>   		for (i = 0; idx < size; i++, idx++)
> >> -			memcpy((void *)(ring + idx),
> >> +			rte_memcpy((void *)(ring + idx),
> >>   				(const void *)(obj + i), 16);
> >>   		/* Start at the beginning */
> >>   		for (idx = 0; i < n; i++, idx++)
> >> -			memcpy((void *)(ring + idx),
> >> +			rte_memcpy((void *)(ring + idx),
> >>   				(const void *)(obj + i), 16);
> >>   	}
> >>   }
> >> @@ -231,17 +233,17 @@ __rte_ring_dequeue_elems_128(struct rte_ring
> >> *r, uint32_t prod_head,
> >>   	rte_int128_t *obj = (rte_int128_t *)obj_table;
> >>   	if (likely(idx + n <= size)) {
> >>   		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> >> -			memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> >> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> 32);
> >>   		switch (n & 0x1) {
> >>   		case 1:
> >> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> >> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> 16);
> >>   		}
> >>   	} else {
> >>   		for (i = 0; idx < size; i++, idx++)
> >> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> >> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> 16);
> >>   		/* Start at the beginning */
> >>   		for (idx = 0; i < n; i++, idx++)
> >> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> >> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> 16);
> >>   	}
> >>   }
> >>
> >> --
> >> 2.25.1
> >
  
Amit Prakash Shukla Aug. 23, 2022, 9:41 a.m. UTC | #5
> -----Original Message-----
> From: Amit Prakash Shukla <amitprakashs@marvell.com>
> Sent: Tuesday, August 23, 2022 3:08 PM
> To: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> stable@dpdk.org; nd <nd@arm.com>
> Subject: RE: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
> 
> Thanks for the feedback. My apologies for delayed reply.
> 
> > -----Original Message-----
> > From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> > Sent: Sunday, August 7, 2022 5:56 PM
> > To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Amit
> Prakash
> > Shukla <amitprakashs@marvell.com>
> > Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> > stable@dpdk.org; nd <nd@arm.com>
> > Subject: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
> >
> > External Email
> >
> > ----------------------------------------------------------------------
> > 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > > <snip>
> > >
> > >>
> > >> GCC 12 raises the following warning:
> > >>
> > >> In function '__rte_ring_dequeue_elems_128',
> > >>      inlined from '__rte_ring_dequeue_elems' at
> > >> 		../lib/ring/rte_ring_elem_pvt.h:262:3,
> > >>      inlined from '__rte_ring_do_hts_dequeue_elem' at
> > >> 		../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
> > >>      inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
> > >> 		../lib/ring/rte_ring_hts.h:83:9,
> > >>      inlined from 'rte_ring_dequeue_bulk_elem' at
> > >> 		../lib/ring/rte_ring_elem.h:391:10,
> > >>      inlined from 'rte_ring_dequeue_elem' at
> > >> 		../lib/ring/rte_ring_elem.h:476:9,
> > >>      inlined from 'rte_ring_dequeue' at
> > >> 		../lib/ring/rte_ring.h:463:9,
> > >>      inlined from 'rxa_intr_ring_dequeue' at
> > >> 		../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
> > >> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
> > >> 	32 bytes into a region of size 8 overflows the destination
> > >> 	[-Werror=stringop-overflow=]
> > >>    234 |    memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> > >>        |
> ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> > >>
> > >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation
> issue.
> > > Any reason why this replacement fixes the problem?
> > > Do you have any performance numbers with this change?
> > >
> > >> Also it would be better to change to rte_memcpy as the function is
> > >> called in fastpath.
> > > On Arm platforms, memcpy in the later versions has the best
> performance.
> >
> > I agree with Honnappa, it is better to keep memcpy() here.
> > Actually what is strange - why it ends up in
> > __rte_ring_dequeue_elems_128() at all?
> > Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
> > which should boil down to ___rte_ring_dequeue_elems_64().
> > it should go to __rte_ring_dequeue_elems_128() at all.
> 
> I agree. After having close look and doing few experiments, ideally it should
> not be going to __rte_ring_dequeue_elems_128().
> Sizeof(in call of rte_ring_dequeue_elem) gets evaluated at compile time
> which in this case it is evaluated to 8 bytes so
> __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more
> of a gcc-12 bug.?
> 
> >
> > Another q - is this warning happens only on arm platforms?
> Warning is observed on x86 with build type as debug.
> "meson --werror --buildtype=debug build"
> 
> >
> > >
> > >>
> > >> Bugzilla ID: 1062
> > >> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
> > >> Cc: stable@dpdk.org
> > >>
> > >> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
> > >> ---
> > >>   lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
> > >>   1 file changed, 10 insertions(+), 8 deletions(-)
> > >>
> > >> diff --git a/lib/ring/rte_ring_elem_pvt.h
> > >> b/lib/ring/rte_ring_elem_pvt.h index
> > >> 83788c56e6..3d85b13333 100644
> > >> --- a/lib/ring/rte_ring_elem_pvt.h
> > >> +++ b/lib/ring/rte_ring_elem_pvt.h
> > >> @@ -10,6 +10,8 @@
> > >>   #ifndef _RTE_RING_ELEM_PVT_H_
> > >>   #define _RTE_RING_ELEM_PVT_H_
> > >>
> > >> +#include <rte_memcpy.h>
> > >> +
> > >>   static __rte_always_inline void
> > >>   __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
> > >>   		uint32_t idx, const void *obj_table, uint32_t n) @@ -97,20
> > >> +99,20 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r,
> uint32_t
> > >> prod_head,
> > >>   	const rte_int128_t *obj = (const rte_int128_t *)obj_table;
> > >>   	if (likely(idx + n <= size)) {
> > >>   		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> > >> -			memcpy((void *)(ring + idx),
> > >> +			rte_memcpy((void *)(ring + idx),
> > >>   				(const void *)(obj + i), 32);
> > >>   		switch (n & 0x1) {
> > >>   		case 1:
> > >> -			memcpy((void *)(ring + idx),
> > >> +			rte_memcpy((void *)(ring + idx),
> > >>   				(const void *)(obj + i), 16);
> > >>   		}
> > >>   	} else {
> > >>   		for (i = 0; idx < size; i++, idx++)
> > >> -			memcpy((void *)(ring + idx),
> > >> +			rte_memcpy((void *)(ring + idx),
> > >>   				(const void *)(obj + i), 16);
> > >>   		/* Start at the beginning */
> > >>   		for (idx = 0; i < n; i++, idx++)
> > >> -			memcpy((void *)(ring + idx),
> > >> +			rte_memcpy((void *)(ring + idx),
> > >>   				(const void *)(obj + i), 16);
> > >>   	}
> > >>   }
> > >> @@ -231,17 +233,17 @@ __rte_ring_dequeue_elems_128(struct
> rte_ring
> > >> *r, uint32_t prod_head,
> > >>   	rte_int128_t *obj = (rte_int128_t *)obj_table;
> > >>   	if (likely(idx + n <= size)) {
> > >>   		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> > >> -			memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> > >> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> > 32);
> > >>   		switch (n & 0x1) {
> > >>   		case 1:
> > >> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> > >> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> > 16);
> > >>   		}
> > >>   	} else {
> > >>   		for (i = 0; idx < size; i++, idx++)
> > >> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> > >> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> > 16);
> > >>   		/* Start at the beginning */
> > >>   		for (idx = 0; i < n; i++, idx++)
> > >> -			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> > >> +			rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> > 16);
> > >>   	}
> > >>   }
> > >>
> > >> --
> > >> 2.25.1
> > >
  
Thomas Monjalon Jan. 12, 2023, 9:41 p.m. UTC | #6
23/08/2022 11:38, Amit Prakash Shukla:
> From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> > 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
> > > 
> > > Any reason why this replacement fixes the problem?
> > > Do you have any performance numbers with this change?
> > >
> > >> Also it would be better to change to rte_memcpy as the function is
> > >> called in fastpath.
> > > 
> > > On Arm platforms, memcpy in the later versions has the best performance.
> > 
> > I agree with Honnappa, it is better to keep memcpy() here.
> > Actually what is strange - why it ends up in
> > __rte_ring_dequeue_elems_128() at all?
> > Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(), which
> > should boil down to ___rte_ring_dequeue_elems_64().
> > it should go to __rte_ring_dequeue_elems_128() at all.
> 
> I agree. After having close look and doing few experiments,
> ideally it should not be going to __rte_ring_dequeue_elems_128().
> Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile time
> which in this case it is evaluated to 8 bytes so 
> __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more of a gcc-12 bug.?
> 
> > Another q - is this warning happens only on arm platforms?
> 
> Warning is observed on x86 with build type as debug.
> "meson --werror --buildtype=debug build"

I confirm the compilation issue on x86 with GCC 12 in a debug build.

We need to find a workaround.
Is it reported to GCC already?
  
Amit Prakash Shukla Jan. 13, 2023, 12:39 p.m. UTC | #7
Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Friday, January 13, 2023 3:12 AM
> To: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Amit Prakash Shukla
> <amitprakashs@marvell.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> david.marchand@redhat.com; bruce.richardson@intel.com;
> ferruh.yigit@amd.com
> Subject: Re: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
> 
> 23/08/2022 11:38, Amit Prakash Shukla:
> > From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> > > 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > > >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation
> issue.
> > > >
> > > > Any reason why this replacement fixes the problem?
> > > > Do you have any performance numbers with this change?
> > > >
> > > >> Also it would be better to change to rte_memcpy as the function
> > > >> is called in fastpath.
> > > >
> > > > On Arm platforms, memcpy in the later versions has the best
> performance.
> > >
> > > I agree with Honnappa, it is better to keep memcpy() here.
> > > Actually what is strange - why it ends up in
> > > __rte_ring_dequeue_elems_128() at all?
> > > Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
> > > which should boil down to ___rte_ring_dequeue_elems_64().
> > > it should go to __rte_ring_dequeue_elems_128() at all.
> >
> > I agree. After having close look and doing few experiments, ideally it
> > should not be going to __rte_ring_dequeue_elems_128().
> > Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile
> > time which in this case it is evaluated to 8 bytes so
> > __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more
> of a gcc-12 bug.?
> >
> > > Another q - is this warning happens only on arm platforms?
> >
> > Warning is observed on x86 with build type as debug.
> > "meson --werror --buildtype=debug build"
> 
> I confirm the compilation issue on x86 with GCC 12 in a debug build.
> 
> We need to find a workaround.
> Is it reported to GCC already?
> 
I found an old gcc bug reporting similar issue. This bug seems to be re-opened recently in Dec-2022. Not sure if it is reopened specifically for gcc-12.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89689

Kevin has push a work around for DPDK-21.11.3.
https://git.dpdk.org/dpdk-stable/commit/?h=21.11&id=e1d728588dc73af9ed60cc0074d51a7f24b2ba60
  
Thomas Monjalon Jan. 13, 2023, 1:11 p.m. UTC | #8
13/01/2023 13:39, Amit Prakash Shukla:
> From: Thomas Monjalon <thomas@monjalon.net>
> > 23/08/2022 11:38, Amit Prakash Shukla:
> > > From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> > > > 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > > > >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation
> > issue.
> > > > >
> > > > > Any reason why this replacement fixes the problem?
> > > > > Do you have any performance numbers with this change?
> > > > >
> > > > >> Also it would be better to change to rte_memcpy as the function
> > > > >> is called in fastpath.
> > > > >
> > > > > On Arm platforms, memcpy in the later versions has the best
> > performance.
> > > >
> > > > I agree with Honnappa, it is better to keep memcpy() here.
> > > > Actually what is strange - why it ends up in
> > > > __rte_ring_dequeue_elems_128() at all?
> > > > Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
> > > > which should boil down to ___rte_ring_dequeue_elems_64().
> > > > it should go to __rte_ring_dequeue_elems_128() at all.
> > >
> > > I agree. After having close look and doing few experiments, ideally it
> > > should not be going to __rte_ring_dequeue_elems_128().
> > > Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile
> > > time which in this case it is evaluated to 8 bytes so
> > > __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more
> > of a gcc-12 bug.?
> > >
> > > > Another q - is this warning happens only on arm platforms?
> > >
> > > Warning is observed on x86 with build type as debug.
> > > "meson --werror --buildtype=debug build"
> > 
> > I confirm the compilation issue on x86 with GCC 12 in a debug build.
> > 
> > We need to find a workaround.
> > Is it reported to GCC already?
> > 
> I found an old gcc bug reporting similar issue. This bug seems to be re-opened recently in Dec-2022. Not sure if it is reopened specifically for gcc-12.
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89689

Please would you like to open a bug specific to GCC 12?

> Kevin has push a work around for DPDK-21.11.3.
> https://git.dpdk.org/dpdk-stable/commit/?h=21.11&id=e1d728588dc73af9ed60cc0074d51a7f24b2ba60

In the meantime we could use Kevin's workaround:

#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 120000)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-overflow"
#pragma GCC diagnostic ignored "-Wstringop-overread"
#endif

Opinions?
  
Konstantin Ananyev Feb. 13, 2023, 1:48 a.m. UTC | #9
13/01/2023 13:11, Thomas Monjalon пишет:
> 13/01/2023 13:39, Amit Prakash Shukla:
>> From: Thomas Monjalon <thomas@monjalon.net>
>>> 23/08/2022 11:38, Amit Prakash Shukla:
>>>> From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
>>>>> 06/08/2022 19:35, Honnappa Nagarahalli пишет:
>>>>>>> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation
>>> issue.
>>>>>>
>>>>>> Any reason why this replacement fixes the problem?
>>>>>> Do you have any performance numbers with this change?
>>>>>>
>>>>>>> Also it would be better to change to rte_memcpy as the function
>>>>>>> is called in fastpath.
>>>>>>
>>>>>> On Arm platforms, memcpy in the later versions has the best
>>> performance.
>>>>>
>>>>> I agree with Honnappa, it is better to keep memcpy() here.
>>>>> Actually what is strange - why it ends up in
>>>>> __rte_ring_dequeue_elems_128() at all?
>>>>> Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
>>>>> which should boil down to ___rte_ring_dequeue_elems_64().
>>>>> it should go to __rte_ring_dequeue_elems_128() at all.
>>>>
>>>> I agree. After having close look and doing few experiments, ideally it
>>>> should not be going to __rte_ring_dequeue_elems_128().
>>>> Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile
>>>> time which in this case it is evaluated to 8 bytes so
>>>> __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more
>>> of a gcc-12 bug.?
>>>>
>>>>> Another q - is this warning happens only on arm platforms?
>>>>
>>>> Warning is observed on x86 with build type as debug.
>>>> "meson --werror --buildtype=debug build"
>>>
>>> I confirm the compilation issue on x86 with GCC 12 in a debug build.
>>>
>>> We need to find a workaround.
>>> Is it reported to GCC already?
>>>
>> I found an old gcc bug reporting similar issue. This bug seems to be re-opened recently in Dec-2022. Not sure if it is reopened specifically for gcc-12.
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89689
> 
> Please would you like to open a bug specific to GCC 12?
> 
>> Kevin has push a work around for DPDK-21.11.3.
>> https://git.dpdk.org/dpdk-stable/commit/?h=21.11&id=e1d728588dc73af9ed60cc0074d51a7f24b2ba60
> 
> In the meantime we could use Kevin's workaround:
> 
> #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 120000)
> #pragma GCC diagnostic push
> #pragma GCC diagnostic ignored "-Wstringop-overflow"
> #pragma GCC diagnostic ignored "-Wstringop-overread"
> #endif
> 
> Opinions?
> 
> 

Yep, disable warnings should work.
Anoter way to consider - change enqueue/dequeue_elems_128()
functions to not use memcpy() at all.
Instead of that they can copy 2*num 64-bit entities directly,
same as _64_ versions do.
Something like the patch below.
That's pretty similar to what Amit initially proposed,
but without rte_memcpy() involvement.
Performance-wise I don't expect noticeable difference with
what we have right now.
But sure, we'll need to do extra checks here.


diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h
index 83788c56e6..de79040618 100644
--- a/lib/ring/rte_ring_elem_pvt.h
+++ b/lib/ring/rte_ring_elem_pvt.h
@@ -93,25 +93,32 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, 
uint32_t prod_head,
  	unsigned int i;
  	const uint32_t size = r->size;
  	uint32_t idx = prod_head & r->mask;
-	rte_int128_t *ring = (rte_int128_t *)&r[1];
-	const rte_int128_t *obj = (const rte_int128_t *)obj_table;
+	uint64_t *ring = (uint64_t *)&r[1];
+	const unaligned_uint64_t *obj = (const unaligned_uint64_t *)obj_table;
  	if (likely(idx + n <= size)) {
-		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
-			memcpy((void *)(ring + idx),
-				(const void *)(obj + i), 32);
+		idx *= 2;
+		for (i = 0; i < 2 * (n & ~0x1); i += 4, idx += 4) {
+			ring[idx] = obj[i];
+			ring[idx + 1] = obj[i + 1];
+			ring[idx + 2] = obj[i + 2];
+			ring[idx + 3] = obj[i + 3];
+		}
  		switch (n & 0x1) {
  		case 1:
-			memcpy((void *)(ring + idx),
-				(const void *)(obj + i), 16);
+			ring[idx] = obj[i];
+			ring[idx + 1] = obj[i + 1];
  		}
  	} else {
-		for (i = 0; idx < size; i++, idx++)
-			memcpy((void *)(ring + idx),
-				(const void *)(obj + i), 16);
+		idx *= 2;
+		for (i = 0; idx < 2 * size; i += 2, idx += 2) {
+			ring[idx] = obj[i];
+			ring[idx + 1] = obj[i + 1];
+		}
  		/* Start at the beginning */
-		for (idx = 0; i < n; i++, idx++)
-			memcpy((void *)(ring + idx),
-				(const void *)(obj + i), 16);
+		for (idx = 0; i < 2 * n; i += 2, idx += 2) {
+			ring[idx] = obj[i];
+			ring[idx + 1] = obj[i + 1];
+		}
  	}
  }

@@ -227,21 +234,32 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r, 
uint32_t prod_head,
  	unsigned int i;
  	const uint32_t size = r->size;
  	uint32_t idx = prod_head & r->mask;
-	rte_int128_t *ring = (rte_int128_t *)&r[1];
-	rte_int128_t *obj = (rte_int128_t *)obj_table;
+	uint64_t *ring = (uint64_t *)&r[1];
+	unaligned_uint64_t *obj = (unaligned_uint64_t *)obj_table;
  	if (likely(idx + n <= size)) {
-		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
-			memcpy((void *)(obj + i), (void *)(ring + idx), 32);
+		idx *= 2;
+		for (i = 0; i < 2 * (n & ~0x1); i += 4, idx += 4) {
+			obj[i] = ring[idx];
+			obj[i + 1] = ring[idx + 1];
+			obj[i + 2] = ring[idx + 2];
+			obj[i + 3] = ring[idx + 3];
+		}
  		switch (n & 0x1) {
  		case 1:
-			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+			obj[i] = ring[idx];
+			obj[i + 1] = ring[idx + 1];
  		}
  	} else {
-		for (i = 0; idx < size; i++, idx++)
-			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+		idx *= 2;
+		for (i = 0; idx < 2 * size; i += 2, idx += 2) {
+			obj[i] = ring[idx];
+			obj[i + 1] = ring[idx + 1];
+		}
  		/* Start at the beginning */
-		for (idx = 0; i < n; i++, idx++)
-			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+		for (idx = 0; i < 2 * n; i += 2, idx += 2) {
+			obj[i] = ring[idx];
+			obj[i + 1] = ring[idx + 1];
+		}
  	}
  }
  

Patch

diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h
index 83788c56e6..3d85b13333 100644
--- a/lib/ring/rte_ring_elem_pvt.h
+++ b/lib/ring/rte_ring_elem_pvt.h
@@ -10,6 +10,8 @@ 
 #ifndef _RTE_RING_ELEM_PVT_H_
 #define _RTE_RING_ELEM_PVT_H_
 
+#include <rte_memcpy.h>
+
 static __rte_always_inline void
 __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
 		uint32_t idx, const void *obj_table, uint32_t n)
@@ -97,20 +99,20 @@  __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t prod_head,
 	const rte_int128_t *obj = (const rte_int128_t *)obj_table;
 	if (likely(idx + n <= size)) {
 		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
-			memcpy((void *)(ring + idx),
+			rte_memcpy((void *)(ring + idx),
 				(const void *)(obj + i), 32);
 		switch (n & 0x1) {
 		case 1:
-			memcpy((void *)(ring + idx),
+			rte_memcpy((void *)(ring + idx),
 				(const void *)(obj + i), 16);
 		}
 	} else {
 		for (i = 0; idx < size; i++, idx++)
-			memcpy((void *)(ring + idx),
+			rte_memcpy((void *)(ring + idx),
 				(const void *)(obj + i), 16);
 		/* Start at the beginning */
 		for (idx = 0; i < n; i++, idx++)
-			memcpy((void *)(ring + idx),
+			rte_memcpy((void *)(ring + idx),
 				(const void *)(obj + i), 16);
 	}
 }
@@ -231,17 +233,17 @@  __rte_ring_dequeue_elems_128(struct rte_ring *r, uint32_t prod_head,
 	rte_int128_t *obj = (rte_int128_t *)obj_table;
 	if (likely(idx + n <= size)) {
 		for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
-			memcpy((void *)(obj + i), (void *)(ring + idx), 32);
+			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 32);
 		switch (n & 0x1) {
 		case 1:
-			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
 		}
 	} else {
 		for (i = 0; idx < size; i++, idx++)
-			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
 		/* Start at the beginning */
 		for (idx = 0; i < n; i++, idx++)
-			memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+			rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
 	}
 }