[v3,3/3] eal/stack: enable lock-free stack for aarch64
Checks
Commit Message
Enable both c11 atomic and non c11 atomic lock-free stack for aarch64.
Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
doc/guides/prog_guide/env_abstraction_layer.rst | 4 ++--
doc/guides/rel_notes/release_19_08.rst | 3 +++
lib/librte_stack/rte_stack_lf_c11.h | 4 ++--
lib/librte_stack/rte_stack_lf_generic.h | 4 ++--
4 files changed, 9 insertions(+), 6 deletions(-)
Comments
> Enable both c11 atomic and non c11 atomic lock-free stack for aarch64.
>
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Acked-by: Gage Eads <gage.eads@intel.com>
Thanks,
Gage
> -----Original Message-----
> From: Phil Yang <phil.yang@arm.com>
> Sent: Friday, June 28, 2019 1:42 PM
> To: dev@dpdk.org
> Cc: thomas@monjalon.net; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> hemant.agrawal@nxp.com; Honnappa.Nagarahalli@arm.com;
> gavin.hu@arm.com; nd@arm.com; gage.eads@intel.com
> Subject: [EXT] [PATCH v3 3/3] eal/stack: enable lock-free stack for aarch64
> Enable both c11 atomic and non c11 atomic lock-free stack for aarch64.
>
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>
> Removed Items
> -------------
> diff --git a/lib/librte_stack/rte_stack_lf_c11.h
> b/lib/librte_stack/rte_stack_lf_c11.h
> index 3d677ae..67c21fd 100644
> --- a/lib/librte_stack/rte_stack_lf_c11.h
> +++ b/lib/librte_stack/rte_stack_lf_c11.h
> @@ -36,7 +36,7 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list
> *list,
> struct rte_stack_lf_elem *last,
> unsigned int num)
> {
> -#ifndef RTE_ARCH_X86_64
> +#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
> RTE_SET_USED(first);
> RTE_SET_USED(last);
> RTE_SET_USED(list);
> @@ -88,7 +88,7 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
> void **obj_table,
> struct rte_stack_lf_elem **last)
> {
> -#ifndef RTE_ARCH_X86_64
> +#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
> RTE_SET_USED(obj_table);
> RTE_SET_USED(last);
> RTE_SET_USED(list);
> diff --git a/lib/librte_stack/rte_stack_lf_generic.h
> b/lib/librte_stack/rte_stack_lf_generic.h
> index 3182151..488fd9f 100644
> --- a/lib/librte_stack/rte_stack_lf_generic.h
> +++ b/lib/librte_stack/rte_stack_lf_generic.h
> @@ -36,7 +36,7 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list
> *list,
> struct rte_stack_lf_elem *last,
> unsigned int num)
> {
> -#ifndef RTE_ARCH_X86_64
> +#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
> RTE_SET_USED(first);
> RTE_SET_USED(last);
> RTE_SET_USED(list);
> @@ -84,7 +84,7 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
> void **obj_table,
> struct rte_stack_lf_elem **last)
> {
> -#ifndef RTE_ARCH_X86_64
> +#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
> RTE_SET_USED(obj_table);
> RTE_SET_USED(last);
> RTE_SET_USED(list);
Can we remove this all #ifdef clutter by adding the following
$ git diff
diff --git a/lib/librte_stack/rte_stack_lf.h b/lib/librte_stack/rte_stack_lf.h
index f5581f0c2..46af08b83 100644
--- a/lib/librte_stack/rte_stack_lf.h
+++ b/lib/librte_stack/rte_stack_lf.h
@@ -5,7 +5,7 @@
#ifndef _RTE_STACK_LF_H_
#define _RTE_STACK_LF_H_
-#ifdef RTE_USE_C11_MEM_MODEL
+#if defined (RTE_USE_C11_MEM_MODEL) && defined(RTE_ARCH_X86_64) && defined(RTE_ARCH_ARM64)
#include "rte_stack_lf_c11.h"
#else
#include "rte_stack_lf_generic.h"
> -----Original Message-----
> From: Jerin Jacob Kollanukkaran [mailto:jerinj@marvell.com]
> Sent: Thursday, July 18, 2019 11:18 PM
> To: Phil Yang <phil.yang@arm.com>; dev@dpdk.org
> Cc: thomas@monjalon.net; hemant.agrawal@nxp.com;
> Honnappa.Nagarahalli@arm.com; gavin.hu@arm.com; nd@arm.com; Eads,
> Gage <gage.eads@intel.com>
> Subject: RE: [EXT] [PATCH v3 3/3] eal/stack: enable lock-free stack for
> aarch64
>
> > -----Original Message-----
> > From: Phil Yang <phil.yang@arm.com>
> > Sent: Friday, June 28, 2019 1:42 PM
> > To: dev@dpdk.org
> > Cc: thomas@monjalon.net; Jerin Jacob Kollanukkaran
> > <jerinj@marvell.com>; hemant.agrawal@nxp.com;
> > Honnappa.Nagarahalli@arm.com; gavin.hu@arm.com; nd@arm.com;
> > gage.eads@intel.com
> > Subject: [EXT] [PATCH v3 3/3] eal/stack: enable lock-free stack for
> > aarch64 Enable both c11 atomic and non c11 atomic lock-free stack for
> aarch64.
> >
> > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> >
> > Removed Items
> > -------------
> > diff --git a/lib/librte_stack/rte_stack_lf_c11.h
> > b/lib/librte_stack/rte_stack_lf_c11.h
> > index 3d677ae..67c21fd 100644
> > --- a/lib/librte_stack/rte_stack_lf_c11.h
> > +++ b/lib/librte_stack/rte_stack_lf_c11.h
> > @@ -36,7 +36,7 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list
> > *list,
> > struct rte_stack_lf_elem *last,
> > unsigned int num)
> > {
> > -#ifndef RTE_ARCH_X86_64
> > +#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
> > RTE_SET_USED(first);
> > RTE_SET_USED(last);
> > RTE_SET_USED(list);
> > @@ -88,7 +88,7 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list
> *list,
> > void **obj_table,
> > struct rte_stack_lf_elem **last)
> > {
> > -#ifndef RTE_ARCH_X86_64
> > +#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
> > RTE_SET_USED(obj_table);
> > RTE_SET_USED(last);
> > RTE_SET_USED(list);
> > diff --git a/lib/librte_stack/rte_stack_lf_generic.h
> > b/lib/librte_stack/rte_stack_lf_generic.h
> > index 3182151..488fd9f 100644
> > --- a/lib/librte_stack/rte_stack_lf_generic.h
> > +++ b/lib/librte_stack/rte_stack_lf_generic.h
> > @@ -36,7 +36,7 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list
> > *list,
> > struct rte_stack_lf_elem *last,
> > unsigned int num)
> > {
> > -#ifndef RTE_ARCH_X86_64
> > +#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
> > RTE_SET_USED(first);
> > RTE_SET_USED(last);
> > RTE_SET_USED(list);
> > @@ -84,7 +84,7 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list
> *list,
> > void **obj_table,
> > struct rte_stack_lf_elem **last)
> > {
> > -#ifndef RTE_ARCH_X86_64
> > +#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
> > RTE_SET_USED(obj_table);
> > RTE_SET_USED(last);
> > RTE_SET_USED(list);
>
>
> Can we remove this all #ifdef clutter by adding the following
>
> $ git diff
> diff --git a/lib/librte_stack/rte_stack_lf.h b/lib/librte_stack/rte_stack_lf.h
> index f5581f0c2..46af08b83 100644
> --- a/lib/librte_stack/rte_stack_lf.h
> +++ b/lib/librte_stack/rte_stack_lf.h
> @@ -5,7 +5,7 @@
> #ifndef _RTE_STACK_LF_H_
> #define _RTE_STACK_LF_H_
>
> -#ifdef RTE_USE_C11_MEM_MODEL
> +#if defined (RTE_USE_C11_MEM_MODEL) && defined(RTE_ARCH_X86_64)
> &&
> +defined(RTE_ARCH_ARM64)
I assume you meant (defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64))?
> #include "rte_stack_lf_c11.h"
> #else
> #include "rte_stack_lf_generic.h"
>
>
The ifdefs in those two headers prevent DPDK from trying to build rte_atomic128_cmp_exchange() on architectures that don't implement it. So the proposal wouldn't quite work, since rte_stack_lf_generic.h calls rte_atomic128_cmp_exchange().
Something like this could work:
#if !(defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64))
#include rte_stack_lf_stubs.h
#else
#ifdef RTE_USE_C11_MEM_MODEL
#include "rte_stack_lf_c11.h"
#else
#include "rte_stack_lf_generic.h"
#endif
#endif
Where rte_stack_lf_stubs.h is a new header containing stub implementations of __rte_stack_lf_count, __rte_stack_lf_push_elems, and __rte_stack_lf_pop_elems. It still has some ifdef clutter, but less overall.
> > Can we remove this all #ifdef clutter by adding the following
> >
> > $ git diff
> > diff --git a/lib/librte_stack/rte_stack_lf.h
> > b/lib/librte_stack/rte_stack_lf.h index f5581f0c2..46af08b83 100644
> > --- a/lib/librte_stack/rte_stack_lf.h
> > +++ b/lib/librte_stack/rte_stack_lf.h
> > @@ -5,7 +5,7 @@
> > #ifndef _RTE_STACK_LF_H_
> > #define _RTE_STACK_LF_H_
> >
> > -#ifdef RTE_USE_C11_MEM_MODEL
> > +#if defined (RTE_USE_C11_MEM_MODEL) &&
> defined(RTE_ARCH_X86_64)
> > &&
> > +defined(RTE_ARCH_ARM64)
>
> I assume you meant (defined(RTE_ARCH_X86_64) ||
> defined(RTE_ARCH_ARM64))?
Yup.
>
> > #include "rte_stack_lf_c11.h"
> > #else
> > #include "rte_stack_lf_generic.h"
> >
> >
>
> The ifdefs in those two headers prevent DPDK from trying to build
> rte_atomic128_cmp_exchange() on architectures that don't implement it. So
> the proposal wouldn't quite work, since rte_stack_lf_generic.h calls
> rte_atomic128_cmp_exchange().
>
> Something like this could work:
>
> #if !(defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)) #include
> rte_stack_lf_stubs.h #else #ifdef RTE_USE_C11_MEM_MODEL #include
> "rte_stack_lf_c11.h"
> #else
> #include "rte_stack_lf_generic.h"
> #endif
> #endif
>
> Where rte_stack_lf_stubs.h is a new header containing stub
> implementations of __rte_stack_lf_count, __rte_stack_lf_push_elems, and
> __rte_stack_lf_pop_elems. It still has some ifdef clutter, but less overall.
Agree. I prefer to take this route to reduce the ifdef clutter across generic and c11 files.
> -----Original Message-----
> From: Jerin Jacob Kollanukkaran <jerinj@marvell.com>
> Sent: Friday, July 19, 2019 1:03 PM
> To: Eads, Gage <gage.eads@intel.com>; Phil Yang (Arm Technology China)
> <Phil.Yang@arm.com>; dev@dpdk.org
> Cc: thomas@monjalon.net; hemant.agrawal@nxp.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology
> China) <Gavin.Hu@arm.com>; nd <nd@arm.com>
> Subject: RE: [EXT] [PATCH v3 3/3] eal/stack: enable lock-free stack for
> aarch64
>
> > > Can we remove this all #ifdef clutter by adding the following
> > >
> > > $ git diff
> > > diff --git a/lib/librte_stack/rte_stack_lf.h
> > > b/lib/librte_stack/rte_stack_lf.h index f5581f0c2..46af08b83 100644
> > > --- a/lib/librte_stack/rte_stack_lf.h
> > > +++ b/lib/librte_stack/rte_stack_lf.h
> > > @@ -5,7 +5,7 @@
> > > #ifndef _RTE_STACK_LF_H_
> > > #define _RTE_STACK_LF_H_
> > >
> > > -#ifdef RTE_USE_C11_MEM_MODEL
> > > +#if defined (RTE_USE_C11_MEM_MODEL) &&
> > defined(RTE_ARCH_X86_64)
> > > &&
> > > +defined(RTE_ARCH_ARM64)
> >
> > I assume you meant (defined(RTE_ARCH_X86_64) ||
> > defined(RTE_ARCH_ARM64))?
>
> Yup.
>
> >
> > > #include "rte_stack_lf_c11.h"
> > > #else
> > > #include "rte_stack_lf_generic.h"
> > >
> > >
> >
> > The ifdefs in those two headers prevent DPDK from trying to build
> > rte_atomic128_cmp_exchange() on architectures that don't implement it.
> So
> > the proposal wouldn't quite work, since rte_stack_lf_generic.h calls
> > rte_atomic128_cmp_exchange().
> >
> > Something like this could work:
> >
> > #if !(defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)) #include
> > rte_stack_lf_stubs.h #else #ifdef RTE_USE_C11_MEM_MODEL #include
> > "rte_stack_lf_c11.h"
> > #else
> > #include "rte_stack_lf_generic.h"
> > #endif
> > #endif
> >
> > Where rte_stack_lf_stubs.h is a new header containing stub
> > implementations of __rte_stack_lf_count, __rte_stack_lf_push_elems,
> and
> > __rte_stack_lf_pop_elems. It still has some ifdef clutter, but less overall.
>
> Agree. I prefer to take this route to reduce the ifdef clutter across generic
> and c11 files.
Got it. I will update it in the next version.
Thanks,
Phil Yang
@@ -592,8 +592,8 @@ Known Issues
Alternatively, applications can use the lock-free stack mempool handler. When
considering this handler, note that:
- - It is currently limited to the x86_64 platform, because it uses an
- instruction (16-byte compare-and-swap) that is not yet available on other
+ - It is currently limited to the aarch64 and x86_64 platforms, because it uses
+ an instruction (16-byte compare-and-swap) that is not yet available on other
platforms.
- It has worse average-case performance than the non-preemptive rte_ring, but
software caching (e.g. the mempool cache) can mitigate this by reducing the
@@ -99,6 +99,9 @@ New Features
Updated ``librte_telemetry`` to fetch the global metrics from the
``librte_metrics`` library.
+* **Added Lock-free Stack for aarch64.**
+
+ The lock-free stack implementation is enabled for aarch64 platforms.
Removed Items
-------------
@@ -36,7 +36,7 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
struct rte_stack_lf_elem *last,
unsigned int num)
{
-#ifndef RTE_ARCH_X86_64
+#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
RTE_SET_USED(first);
RTE_SET_USED(last);
RTE_SET_USED(list);
@@ -88,7 +88,7 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
void **obj_table,
struct rte_stack_lf_elem **last)
{
-#ifndef RTE_ARCH_X86_64
+#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
RTE_SET_USED(obj_table);
RTE_SET_USED(last);
RTE_SET_USED(list);
@@ -36,7 +36,7 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
struct rte_stack_lf_elem *last,
unsigned int num)
{
-#ifndef RTE_ARCH_X86_64
+#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
RTE_SET_USED(first);
RTE_SET_USED(last);
RTE_SET_USED(list);
@@ -84,7 +84,7 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
void **obj_table,
struct rte_stack_lf_elem **last)
{
-#ifndef RTE_ARCH_X86_64
+#if !defined(RTE_ARCH_X86_64) && !defined(RTE_ARCH_ARM64)
RTE_SET_USED(obj_table);
RTE_SET_USED(last);
RTE_SET_USED(list);