[v3,1/5] lpm: add sve support for lookup on Arm platform

Message ID 20210112025709.1121523-2-ruifeng.wang@arm.com (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series lpm lookup with sve support |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Ruifeng Wang Jan. 12, 2021, 2:57 a.m. UTC
  Added new path to do lpm4 lookup by using scalable vector extension.
The SVE path will be selected if compiler has flag SVE set.

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
v2:
Fixed tbl8 group index calculation. (Vladimir)

 lib/librte_eal/arm/include/rte_vect.h |  3 +
 lib/librte_lpm/meson.build            |  2 +-
 lib/librte_lpm/rte_lpm.h              |  4 ++
 lib/librte_lpm/rte_lpm_sve.h          | 83 +++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_lpm/rte_lpm_sve.h
  

Comments

David Marchand Jan. 13, 2021, 3:58 p.m. UTC | #1
On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
>
> Added new path to do lpm4 lookup by using scalable vector extension.
> The SVE path will be selected if compiler has flag SVE set.
>
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>

Review please?
  
David Marchand Jan. 27, 2021, 1:04 p.m. UTC | #2
On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index 1afe55cdc..28b57683b 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>         uint32_t defv);
>
>  #if defined(RTE_ARCH_ARM)
> +#ifdef __ARM_FEATURE_SVE
> +#include "rte_lpm_sve.h"
> +#else
>  #include "rte_lpm_neon.h"
> +#endif
>  #elif defined(RTE_ARCH_PPC_64)
>  #include "rte_lpm_altivec.h"
>  #else
> diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h
> new file mode 100644
> index 000000000..2e319373e
> --- /dev/null
> +++ b/lib/librte_lpm/rte_lpm_sve.h
> @@ -0,0 +1,83 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Arm Limited
> + */
> +
> +#ifndef _RTE_LPM_SVE_H_
> +#define _RTE_LPM_SVE_H_
> +
> +#include <rte_vect.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +__rte_internal
> +static void

I was looking into use of the __rte_internal tag in the tree.

This helper is called from a inlined API used by applications, so out
of the DPDK build.
It looks like the compiler is not complaining when compiling examples
(I hacked my env to cross compile with gcc 10 + SVE enabled) but this
seems incorrect to me.

Is there really a need for this helper?
It is only used below afaics.


> +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> +               uint32_t *__rte_restrict next_hops, const uint32_t n)
> +{

[snip]


> +}
> +
> +static inline void
> +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +               uint32_t defv)
> +{
> +       uint32_t i, ips[4];
> +
> +       vst1q_s32((int32_t *)ips, ip);
> +       for (i = 0; i < 4; i++)
> +               hop[i] = defv;
> +
> +       __rte_lpm_lookup_vec(lpm, ips, hop, 4);
> +}


--
David Marchand
  
Honnappa Nagarahalli Jan. 27, 2021, 9:03 p.m. UTC | #3
<snip>

> 
> On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com>
> wrote:
> > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index
> > 1afe55cdc..28b57683b 100644
> > --- a/lib/librte_lpm/rte_lpm.h
> > +++ b/lib/librte_lpm/rte_lpm.h
> > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm,
> xmm_t ip, uint32_t hop[4],
> >         uint32_t defv);
> >
> >  #if defined(RTE_ARCH_ARM)
> > +#ifdef __ARM_FEATURE_SVE
> > +#include "rte_lpm_sve.h"
> > +#else
> >  #include "rte_lpm_neon.h"
> > +#endif
> >  #elif defined(RTE_ARCH_PPC_64)
> >  #include "rte_lpm_altivec.h"
> >  #else
> > diff --git a/lib/librte_lpm/rte_lpm_sve.h
> > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index
> > 000000000..2e319373e
> > --- /dev/null
> > +++ b/lib/librte_lpm/rte_lpm_sve.h
> > @@ -0,0 +1,83 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Arm Limited
> > + */
> > +
> > +#ifndef _RTE_LPM_SVE_H_
> > +#define _RTE_LPM_SVE_H_
> > +
> > +#include <rte_vect.h>
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +__rte_internal
> > +static void
> 
> I was looking into use of the __rte_internal tag in the tree.
> 
> This helper is called from a inlined API used by applications, so out of the
> DPDK build.
> It looks like the compiler is not complaining when compiling examples (I
> hacked my env to cross compile with gcc 10 + SVE enabled) but this seems
> incorrect to me.
> 
> Is there really a need for this helper?
> It is only used below afaics.
I do not think it is required.

At the same time the commit log when '__rte_internal' was introduced is confusing.
It says "Introduce the __rte_internal tag to mark internal ABI function which is used only by the drivers or other libraries". Why would an internal function have an ABI?

> 
> 
> > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> > +               uint32_t *__rte_restrict next_hops, const uint32_t n)
> > +{
> 
> [snip]
> 
> 
> > +}
> > +
> > +static inline void
> > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +               uint32_t defv)
> > +{
> > +       uint32_t i, ips[4];
> > +
> > +       vst1q_s32((int32_t *)ips, ip);
> > +       for (i = 0; i < 4; i++)
> > +               hop[i] = defv;
> > +
> > +       __rte_lpm_lookup_vec(lpm, ips, hop, 4); }
> 
> 
> --
> David Marchand
  
Ruifeng Wang Jan. 28, 2021, 5:47 a.m. UTC | #4
> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Wednesday, January 27, 2021 9:05 PM
> To: Ruifeng Wang <Ruifeng.Wang@arm.com>
> Cc: jerinj@marvell.com; Jan Viktorin <viktorin@rehivetech.com>; Bruce
> Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin
> <vladimir.medvedkin@intel.com>; dev <dev@dpdk.org>; Pavan Nikhilesh
> <pbhagavatula@marvell.com>; hemant.agrawal@nxp.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on
> Arm platform
> 
> On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com>
> wrote:
> > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index
> > 1afe55cdc..28b57683b 100644
> > --- a/lib/librte_lpm/rte_lpm.h
> > +++ b/lib/librte_lpm/rte_lpm.h
> > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm,
> xmm_t ip, uint32_t hop[4],
> >         uint32_t defv);
> >
> >  #if defined(RTE_ARCH_ARM)
> > +#ifdef __ARM_FEATURE_SVE
> > +#include "rte_lpm_sve.h"
> > +#else
> >  #include "rte_lpm_neon.h"
> > +#endif
> >  #elif defined(RTE_ARCH_PPC_64)
> >  #include "rte_lpm_altivec.h"
> >  #else
> > diff --git a/lib/librte_lpm/rte_lpm_sve.h
> > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index
> > 000000000..2e319373e
> > --- /dev/null
> > +++ b/lib/librte_lpm/rte_lpm_sve.h
> > @@ -0,0 +1,83 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Arm Limited
> > + */
> > +
> > +#ifndef _RTE_LPM_SVE_H_
> > +#define _RTE_LPM_SVE_H_
> > +
> > +#include <rte_vect.h>
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +__rte_internal
> > +static void
> 
> I was looking into use of the __rte_internal tag in the tree.
> 
> This helper is called from a inlined API used by applications, so out of the
> DPDK build.
> It looks like the compiler is not complaining when compiling examples (I
> hacked my env to cross compile with gcc 10 + SVE enabled) but this seems
> incorrect to me.
> 
> Is there really a need for this helper?
> It is only used below afaics.

My intention was to keep the helper generic. So it can be used not only in rte_lpm_lookupx4
as below, but also in other lookup functions like rte_lpm_lookup_bulk where number of IPs
to be looked up is not a fixed value.

Will removing __rte_internal tag resolve the issue? 

> 
> 
> > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> > +               uint32_t *__rte_restrict next_hops, const uint32_t n)
> > +{
> 
> [snip]
> 
> 
> > +}
> > +
> > +static inline void
> > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +               uint32_t defv)
> > +{
> > +       uint32_t i, ips[4];
> > +
> > +       vst1q_s32((int32_t *)ips, ip);
> > +       for (i = 0; i < 4; i++)
> > +               hop[i] = defv;
> > +
> > +       __rte_lpm_lookup_vec(lpm, ips, hop, 4); }
> 
> 
> --
> David Marchand
  
David Marchand Jan. 28, 2021, 8:03 a.m. UTC | #5
On Wed, Jan 27, 2021 at 10:03 PM Honnappa Nagarahalli
<Honnappa.Nagarahalli@arm.com> wrote:
>
> <snip>
>
> >
> > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com>
> > wrote:
> > > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index
> > > 1afe55cdc..28b57683b 100644
> > > --- a/lib/librte_lpm/rte_lpm.h
> > > +++ b/lib/librte_lpm/rte_lpm.h
> > > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm,
> > xmm_t ip, uint32_t hop[4],
> > >         uint32_t defv);
> > >
> > >  #if defined(RTE_ARCH_ARM)
> > > +#ifdef __ARM_FEATURE_SVE
> > > +#include "rte_lpm_sve.h"
> > > +#else
> > >  #include "rte_lpm_neon.h"
> > > +#endif
> > >  #elif defined(RTE_ARCH_PPC_64)
> > >  #include "rte_lpm_altivec.h"
> > >  #else
> > > diff --git a/lib/librte_lpm/rte_lpm_sve.h
> > > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index
> > > 000000000..2e319373e
> > > --- /dev/null
> > > +++ b/lib/librte_lpm/rte_lpm_sve.h
> > > @@ -0,0 +1,83 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2020 Arm Limited
> > > + */
> > > +
> > > +#ifndef _RTE_LPM_SVE_H_
> > > +#define _RTE_LPM_SVE_H_
> > > +
> > > +#include <rte_vect.h>
> > > +
> > > +#ifdef __cplusplus
> > > +extern "C" {
> > > +#endif
> > > +
> > > +__rte_internal
> > > +static void
> >
> > I was looking into use of the __rte_internal tag in the tree.
> >
> > This helper is called from a inlined API used by applications, so out of the
> > DPDK build.
> > It looks like the compiler is not complaining when compiling examples (I
> > hacked my env to cross compile with gcc 10 + SVE enabled) but this seems
> > incorrect to me.
> >
> > Is there really a need for this helper?
> > It is only used below afaics.
> I do not think it is required.
>
> At the same time the commit log when '__rte_internal' was introduced is confusing.
> It says "Introduce the __rte_internal tag to mark internal ABI function which is used only by the drivers or other libraries". Why would an internal function have an ABI?

It happens that drivers/libraries in DPDK offer some interface for
other parts of the DPDK to use.
But we might want them to keep them hidden to final applications,
because this is purely internal and/or we don't want to guarantee
compatibility in later versions.
For such cases, a function can be marked __rte_internal.


This tag has two impacts:
- a marked symbol is versionned as INTERNAL when exported (so this
does not apply to inlines),
- if an application tries to use a marked API, an error is triggered
at build time to prevent use of such API,
  
Honnappa Nagarahalli Jan. 28, 2021, 12:24 p.m. UTC | #6
<snip>

> 
> On Wed, Jan 27, 2021 at 10:03 PM Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com> wrote:
> >
> > <snip>
> >
> > >
> > > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com>
> > > wrote:
> > > > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> > > > index 1afe55cdc..28b57683b 100644
> > > > --- a/lib/librte_lpm/rte_lpm.h
> > > > +++ b/lib/librte_lpm/rte_lpm.h
> > > > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm,
> > > xmm_t ip, uint32_t hop[4],
> > > >         uint32_t defv);
> > > >
> > > >  #if defined(RTE_ARCH_ARM)
> > > > +#ifdef __ARM_FEATURE_SVE
> > > > +#include "rte_lpm_sve.h"
> > > > +#else
> > > >  #include "rte_lpm_neon.h"
> > > > +#endif
> > > >  #elif defined(RTE_ARCH_PPC_64)
> > > >  #include "rte_lpm_altivec.h"
> > > >  #else
> > > > diff --git a/lib/librte_lpm/rte_lpm_sve.h
> > > > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index
> > > > 000000000..2e319373e
> > > > --- /dev/null
> > > > +++ b/lib/librte_lpm/rte_lpm_sve.h
> > > > @@ -0,0 +1,83 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright(c) 2020 Arm Limited
> > > > + */
> > > > +
> > > > +#ifndef _RTE_LPM_SVE_H_
> > > > +#define _RTE_LPM_SVE_H_
> > > > +
> > > > +#include <rte_vect.h>
> > > > +
> > > > +#ifdef __cplusplus
> > > > +extern "C" {
> > > > +#endif
> > > > +
> > > > +__rte_internal
> > > > +static void
> > >
> > > I was looking into use of the __rte_internal tag in the tree.
> > >
> > > This helper is called from a inlined API used by applications, so
> > > out of the DPDK build.
> > > It looks like the compiler is not complaining when compiling
> > > examples (I hacked my env to cross compile with gcc 10 + SVE
> > > enabled) but this seems incorrect to me.
> > >
> > > Is there really a need for this helper?
> > > It is only used below afaics.
> > I do not think it is required.
> >
> > At the same time the commit log when '__rte_internal' was introduced is
> confusing.
> > It says "Introduce the __rte_internal tag to mark internal ABI function which is
> used only by the drivers or other libraries". Why would an internal function have
> an ABI?
> 
> It happens that drivers/libraries in DPDK offer some interface for other parts of
> the DPDK to use.
> But we might want them to keep them hidden to final applications, because this
> is purely internal and/or we don't want to guarantee compatibility in later
> versions.
> For such cases, a function can be marked __rte_internal.
> 
> 
> This tag has two impacts:
> - a marked symbol is versionned as INTERNAL when exported (so this does not
> apply to inlines),
> - if an application tries to use a marked API, an error is triggered at build time to
> prevent use of such API,
Thanks David, it makes sense now. The word 'internal ABI' in the commit log caused the confusion.
Is this required because all the header files (header files meant for the application and the DPDK internal header files) are in the same directory?

From the above definition, we do not need the internal tag for this function as it is very much internal to LPM library.

> 
> 
> --
> David Marchand
  

Patch

diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
index a739e6e66..093e9122a 100644
--- a/lib/librte_eal/arm/include/rte_vect.h
+++ b/lib/librte_eal/arm/include/rte_vect.h
@@ -9,6 +9,9 @@ 
 #include "generic/rte_vect.h"
 #include "rte_debug.h"
 #include "arm_neon.h"
+#ifdef __ARM_FEATURE_SVE
+#include <arm_sve.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
index 6cfc083c5..f93c86640 100644
--- a/lib/librte_lpm/meson.build
+++ b/lib/librte_lpm/meson.build
@@ -5,6 +5,6 @@  sources = files('rte_lpm.c', 'rte_lpm6.c')
 headers = files('rte_lpm.h', 'rte_lpm6.h')
 # since header files have different names, we can install all vector headers
 # without worrying about which architecture we actually need
-headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
+headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h')
 deps += ['hash']
 deps += ['rcu']
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 1afe55cdc..28b57683b 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -402,7 +402,11 @@  rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv);
 
 #if defined(RTE_ARCH_ARM)
+#ifdef __ARM_FEATURE_SVE
+#include "rte_lpm_sve.h"
+#else
 #include "rte_lpm_neon.h"
+#endif
 #elif defined(RTE_ARCH_PPC_64)
 #include "rte_lpm_altivec.h"
 #else
diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h
new file mode 100644
index 000000000..2e319373e
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm_sve.h
@@ -0,0 +1,83 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Arm Limited
+ */
+
+#ifndef _RTE_LPM_SVE_H_
+#define _RTE_LPM_SVE_H_
+
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+__rte_internal
+static void
+__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
+		uint32_t *__rte_restrict next_hops, const uint32_t n)
+{
+	uint32_t i = 0;
+	svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop;
+	svuint32_t v_mask_xv, v_mask_v, v_mask_hop;
+	svbool_t pg = svwhilelt_b32(i, n);
+	svbool_t pv;
+
+	do {
+		v_ip = svld1(pg, &ips[i]);
+		/* Get indices for tbl24[] */
+		v_idx = svlsr_x(pg, v_ip, 8);
+		/* Extract values from tbl24[] */
+		v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24,
+						v_idx);
+
+		/* Create mask with valid set */
+		v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS);
+		/* Create mask with valid and valid_group set */
+		v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK);
+		/* Create predicate for tbl24 entries: (valid && !valid_group) */
+		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v);
+		/* Create mask for next_hop in table entry */
+		v_mask_hop = svdup_u32_z(pg, 0x00ffffff);
+		/* Extract next_hop and write back */
+		v_hop = svand_x(pv, v_tbl24, v_mask_hop);
+		svst1(pv, &next_hops[i], v_hop);
+
+		/* Update predicate for tbl24 entries: (valid && valid_group) */
+		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv);
+		/* Compute tbl8 index */
+		v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff));
+		v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
+		v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)),
+				v_idx);
+		/* Extract values from tbl8[] */
+		v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8,
+						v_idx);
+		/* Update predicate for tbl8 entries: (valid) */
+		pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v);
+		/* Extract next_hop and write back */
+		v_hop = svand_x(pv, v_tbl8, v_mask_hop);
+		svst1(pv, &next_hops[i], v_hop);
+
+		i += svlen(v_ip);
+		pg = svwhilelt_b32(i, n);
+	} while (svptest_any(svptrue_b32(), pg));
+}
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+		uint32_t defv)
+{
+	uint32_t i, ips[4];
+
+	vst1q_s32((int32_t *)ips, ip);
+	for (i = 0; i < 4; i++)
+		hop[i] = defv;
+
+	__rte_lpm_lookup_vec(lpm, ips, hop, 4);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_SVE_H_ */