[v3] graph: expose node context as pointers

Message ID 20240325100500.694748-2-rjarry@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v3] graph: expose node context as pointers |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation fail Compilation issues
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/github-robot: build fail github build: failed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing warning Testing issues
ci/iol-compile-arm64-testing fail Testing issues
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS

Commit Message

Robin Jarry March 25, 2024, 10:05 a.m. UTC
  In some cases, the node context data is used to store two pointers
because the data is larger than the reserved 16 bytes. Having to define
intermediate structures just to be able to cast is tedious. Add two
pointers that take the same space than ctx.

Signed-off-by: Robin Jarry <rjarry@redhat.com>
---

Notes:
    v3:
    
    * Added __extension__ to the unnamed struct inside the union.
    * Fixed C++ header checks.
    * Replaced alignas() with an explicit static_assert.
    
    v2:
    
    * Added __extension__ (not sure where it is needed, I don't have access to windows).
    * It still fails the header check for C++. It seems not possible to align an unnamed union...
      Tyler, do you have an idea about how to fix that?
    * Added static_assert to ensure the anonymous union is not larger than RTE_NODE_CTX_SZ.

 lib/graph/rte_graph_worker_common.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)
  

Comments

Jerin Jacob March 25, 2024, 10:59 a.m. UTC | #1
On Mon, Mar 25, 2024 at 3:35 PM Robin Jarry <rjarry@redhat.com> wrote:
>
> In some cases, the node context data is used to store two pointers
> because the data is larger than the reserved 16 bytes. Having to define
> intermediate structures just to be able to cast is tedious. Add two
> pointers that take the same space than ctx.
>
> Signed-off-by: Robin Jarry <rjarry@redhat.com>
> ---
>
> Notes:
>     v3:
>
>     * Added __extension__ to the unnamed struct inside the union.
>     * Fixed C++ header checks.
>     * Replaced alignas() with an explicit static_assert.
>
>     v2:
>
>     * Added __extension__ (not sure where it is needed, I don't have access to windows).
>     * It still fails the header check for C++. It seems not possible to align an unnamed union...
>       Tyler, do you have an idea about how to fix that?
>     * Added static_assert to ensure the anonymous union is not larger than RTE_NODE_CTX_SZ.
>
>  lib/graph/rte_graph_worker_common.h | 21 ++++++++++++++++++++-
>  1 file changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/lib/graph/rte_graph_worker_common.h b/lib/graph/rte_graph_worker_common.h
> index 36d864e2c14e..722e9dac0d36 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -12,7 +12,9 @@
>   * process, enqueue and move streams of objects to the next nodes.
>   */
>
> +#include <assert.h>
>  #include <stdalign.h>
> +#include <stddef.h>
>
>  #include <rte_common.h>
>  #include <rte_cycles.h>
> @@ -112,7 +114,19 @@ struct __rte_cache_aligned rte_node {
>         };
>         /* Fast path area  */
>  #define RTE_NODE_CTX_SZ 16
> -       alignas(RTE_CACHE_LINE_SIZE) uint8_t ctx[RTE_NODE_CTX_SZ]; /**< Node Context. */
> +       /*
> +        * alignas(RTE_CACHE_LINE_SIZE) cannot be used for ctx since it is part of an unnamed union.
> +        * The compiler shifts the next field on the next cache line which is not what we want.
> +        * The alignment is enforced via a explcicit static asserts below.
> +        */
> +       union {
> +               uint8_t ctx[RTE_NODE_CTX_SZ];
> +               /* Convenience aliases to store pointers without complex casting. */
> +               __extension__ struct {
> +                       void *ctx_ptr;
> +                       void *ctx_ptr2;
> +               };
> +       }; /**< Node Context. */
>         uint16_t size;          /**< Total number of objects available. */
>         uint16_t idx;           /**< Number of objects used. */
>         rte_graph_off_t off;    /**< Offset of node in the graph reel. */
> @@ -130,6 +144,11 @@ struct __rte_cache_aligned rte_node {
>         alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */
>  };
>
> +static_assert(offsetof(struct rte_node, ctx) % RTE_CACHE_LINE_SIZE == 0,
> +       "rte_node ctx must be aligned on a cache line");


This will fail in 32bit machine.
https://mails.dpdk.org/archives/test-report/2024-March/623806.html

I can think of following solution to add before ctx.
RTE_MARKER fastpath __rte_cache__aligned;


> +static_assert(offsetof(struct rte_node, size) - offsetof(struct rte_node, ctx) == RTE_NODE_CTX_SZ,
> +       "rte_node context union cannot be larger than RTE_NODE_CTX_SZ");
> +
>  /**
>   * @internal
>   *
> --
> 2.44.0
>
  
Robin Jarry March 25, 2024, 11:02 a.m. UTC | #2
Jerin Jacob, Mar 25, 2024 at 11:59:
> > +static_assert(offsetof(struct rte_node, ctx) % RTE_CACHE_LINE_SIZE == 0,
> > +       "rte_node ctx must be aligned on a cache line");
>
>
> This will fail in 32bit machine.
> https://mails.dpdk.org/archives/test-report/2024-March/623806.html

Hi Jerin, yes I saw that :(

> I can think of following solution to add before ctx.
> RTE_MARKER fastpath __rte_cache__aligned;

It will not be taken into account for MSVC. Is that OK?
  
Jerin Jacob March 25, 2024, 11:08 a.m. UTC | #3
On Mon, Mar 25, 2024 at 4:32 PM Robin Jarry <rjarry@redhat.com> wrote:
>
> Jerin Jacob, Mar 25, 2024 at 11:59:
> > > +static_assert(offsetof(struct rte_node, ctx) % RTE_CACHE_LINE_SIZE == 0,
> > > +       "rte_node ctx must be aligned on a cache line");
> >
> >
> > This will fail in 32bit machine.
> > https://mails.dpdk.org/archives/test-report/2024-March/623806.html
>
> Hi Jerin, yes I saw that :(
>
> > I can think of following solution to add before ctx.
> > RTE_MARKER fastpath __rte_cache__aligned;
>
> It will not be taken into account for MSVC. Is that OK?

Why?. rte_mbuf has a similar scheme.
RTE_MARKER cacheline1 __rte_cache_min_aligned;

>
  
Robin Jarry March 25, 2024, 11:15 a.m. UTC | #4
Jerin Jacob, Mar 25, 2024 at 12:08:
> > It will not be taken into account for MSVC. Is that OK?
>
> Why?. rte_mbuf has a similar scheme.
> RTE_MARKER cacheline1 __rte_cache_min_aligned;

RTE_MARKER* types seem not defined for the MSVC toolchain.

https://github.com/DPDK/dpdk/blob/v24.03-rc4/lib/eal/include/rte_common.h#L589-L602

Maybe I am missing something.
  
Jerin Jacob March 25, 2024, 11:35 a.m. UTC | #5
On Mon, Mar 25, 2024 at 4:45 PM Robin Jarry <rjarry@redhat.com> wrote:
>
> Jerin Jacob, Mar 25, 2024 at 12:08:
> > > It will not be taken into account for MSVC. Is that OK?
> >
> > Why?. rte_mbuf has a similar scheme.
> > RTE_MARKER cacheline1 __rte_cache_min_aligned;
>
> RTE_MARKER* types seem not defined for the MSVC toolchain.
>
> https://github.com/DPDK/dpdk/blob/v24.03-rc4/lib/eal/include/rte_common.h#L589-L602

Hmm. Not sure, how mbuf is building for MSCV tool chain then.

Another option could be to have a helper inline function/macro to take
care of casting to make app code clean of casting.

>
> Maybe I am missing something.
  
Bruce Richardson March 25, 2024, 12:07 p.m. UTC | #6
On Mon, Mar 25, 2024 at 05:05:12PM +0530, Jerin Jacob wrote:
> On Mon, Mar 25, 2024 at 4:45 PM Robin Jarry <rjarry@redhat.com> wrote:
> >
> > Jerin Jacob, Mar 25, 2024 at 12:08:
> > > > It will not be taken into account for MSVC. Is that OK?
> > >
> > > Why?. rte_mbuf has a similar scheme.
> > > RTE_MARKER cacheline1 __rte_cache_min_aligned;
> >
> > RTE_MARKER* types seem not defined for the MSVC toolchain.
> >
> > https://github.com/DPDK/dpdk/blob/v24.03-rc4/lib/eal/include/rte_common.h#L589-L602
> 
> Hmm. Not sure, how mbuf is building for MSCV tool chain then.
> 
> Another option could be to have a helper inline function/macro to take
> care of casting to make app code clean of casting.

The markers are being removed from DPDK and being replaced by more
portable, and more standards-conforming constructs. We should not be adding
more markers to existing structures. See [1]

/Bruce

[1] https://patches.dpdk.org/project/dpdk/list/?series=31579
  
David Marchand March 25, 2024, 12:08 p.m. UTC | #7
On Mon, Mar 25, 2024 at 12:35 PM Jerin Jacob <jerinjacobk@gmail.com> wrote:
>
> On Mon, Mar 25, 2024 at 4:45 PM Robin Jarry <rjarry@redhat.com> wrote:
> >
> > Jerin Jacob, Mar 25, 2024 at 12:08:
> > > > It will not be taken into account for MSVC. Is that OK?
> > >
> > > Why?. rte_mbuf has a similar scheme.
> > > RTE_MARKER cacheline1 __rte_cache_min_aligned;
> >
> > RTE_MARKER* types seem not defined for the MSVC toolchain.

There is some work in progress to stop using those markers.
https://patchwork.dpdk.org/project/dpdk/list/?series=31579&state=*


> >
> > https://github.com/DPDK/dpdk/blob/v24.03-rc4/lib/eal/include/rte_common.h#L589-L602
>
> Hmm. Not sure, how mbuf is building for MSCV tool chain then.

Atm, MSVC builds a really small list of libraries.

http://git.dpdk.org/dpdk/tree/lib/meson.build#n71?id=v24.03-rc4

if is_ms_compiler
    libraries = [
            'log',
            'kvargs',
            'telemetry',
            'eal',
            'ring',
    ]
endif


>
> Another option could be to have a helper inline function/macro to take
> care of casting to make app code clean of casting.

That could be an option.
  
Robin Jarry March 25, 2024, 3:20 p.m. UTC | #8
Jerin Jacob, Mar 25, 2024 at 12:35:
> Another option could be to have a helper inline function/macro to take
> care of casting to make app code clean of casting.

Would something like this be suitable?

#define RTE_NODE_CTX_PTR1(n) ((void **)(n)->ctx)[0]
#define RTE_NODE_CTX_PTR2(n) ((void **)(n)->ctx)[1]
  
Jerin Jacob March 25, 2024, 3:47 p.m. UTC | #9
On Mon, Mar 25, 2024 at 8:50 PM Robin Jarry <rjarry@redhat.com> wrote:
>
> Jerin Jacob, Mar 25, 2024 at 12:35:
> > Another option could be to have a helper inline function/macro to take
> > care of casting to make app code clean of casting.
>
> Would something like this be suitable?
>
> #define RTE_NODE_CTX_PTR1(n) ((void **)(n)->ctx)[0]
> #define RTE_NODE_CTX_PTR2(n) ((void **)(n)->ctx)[1]

Works for me. No strong opinion about the name, RTE_NODE_CTX_AS_PTR1
may be more reflecting the intent.

>
  
Robin Jarry March 25, 2024, 3:51 p.m. UTC | #10
Jerin Jacob, Mar 25, 2024 at 16:47:
> > #define RTE_NODE_CTX_PTR1(n) ((void **)(n)->ctx)[0]
> > #define RTE_NODE_CTX_PTR2(n) ((void **)(n)->ctx)[1]
>
> Works for me. No strong opinion about the name, RTE_NODE_CTX_AS_PTR1
> may be more reflecting the intent.

I also thought about adding inline getter/setter functions but that's 
more code. It may be cleaner:

 static inline void *rte_node_ctx_ptr1_get(struct rte_node *n) {
     return ((void **)node->ctx)[0];
 }
 static inline void *rte_node_ctx_ptr2_get(struct rte_node *n) {
     return ((void **)node->ctx)[1];
 }
 static inline void rte_node_ctx_ptr1_set(struct rte_node *n, void *p) {
     ((void **)node->ctx)[0] = p;
 }
 static inline void rte_node_ctx_ptr2_set(struct rte_node *n, void *p) {
     ((void **)node->ctx)[1] = p;
 }

I don't have a strong opinion. I'll go either way.
  
Jerin Jacob March 25, 2024, 3:56 p.m. UTC | #11
On Mon, Mar 25, 2024 at 9:21 PM Robin Jarry <rjarry@redhat.com> wrote:
>
> Jerin Jacob, Mar 25, 2024 at 16:47:
> > > #define RTE_NODE_CTX_PTR1(n) ((void **)(n)->ctx)[0]
> > > #define RTE_NODE_CTX_PTR2(n) ((void **)(n)->ctx)[1]
> >
> > Works for me. No strong opinion about the name, RTE_NODE_CTX_AS_PTR1
> > may be more reflecting the intent.
>
> I also thought about adding inline getter/setter functions but that's
> more code. It may be cleaner:
>
>  static inline void *rte_node_ctx_ptr1_get(struct rte_node *n) {
>      return ((void **)node->ctx)[0];
>  }
>  static inline void *rte_node_ctx_ptr2_get(struct rte_node *n) {
>      return ((void **)node->ctx)[1];
>  }
>  static inline void rte_node_ctx_ptr1_set(struct rte_node *n, void *p) {
>      ((void **)node->ctx)[0] = p;
>  }
>  static inline void rte_node_ctx_ptr2_set(struct rte_node *n, void *p) {
>      ((void **)node->ctx)[1] = p;
>  }
>
> I don't have a strong opinion. I'll go either way.

Inline is better.

>
  

Patch

diff --git a/lib/graph/rte_graph_worker_common.h b/lib/graph/rte_graph_worker_common.h
index 36d864e2c14e..722e9dac0d36 100644
--- a/lib/graph/rte_graph_worker_common.h
+++ b/lib/graph/rte_graph_worker_common.h
@@ -12,7 +12,9 @@ 
  * process, enqueue and move streams of objects to the next nodes.
  */
 
+#include <assert.h>
 #include <stdalign.h>
+#include <stddef.h>
 
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -112,7 +114,19 @@  struct __rte_cache_aligned rte_node {
 	};
 	/* Fast path area  */
 #define RTE_NODE_CTX_SZ 16
-	alignas(RTE_CACHE_LINE_SIZE) uint8_t ctx[RTE_NODE_CTX_SZ]; /**< Node Context. */
+	/*
+	 * alignas(RTE_CACHE_LINE_SIZE) cannot be used for ctx since it is part of an unnamed union.
+	 * The compiler shifts the next field on the next cache line which is not what we want.
+	 * The alignment is enforced via a explcicit static asserts below.
+	 */
+	union {
+		uint8_t ctx[RTE_NODE_CTX_SZ];
+		/* Convenience aliases to store pointers without complex casting. */
+		__extension__ struct {
+			void *ctx_ptr;
+			void *ctx_ptr2;
+		};
+	}; /**< Node Context. */
 	uint16_t size;		/**< Total number of objects available. */
 	uint16_t idx;		/**< Number of objects used. */
 	rte_graph_off_t off;	/**< Offset of node in the graph reel. */
@@ -130,6 +144,11 @@  struct __rte_cache_aligned rte_node {
 	alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */
 };
 
+static_assert(offsetof(struct rte_node, ctx) % RTE_CACHE_LINE_SIZE == 0,
+	"rte_node ctx must be aligned on a cache line");
+static_assert(offsetof(struct rte_node, size) - offsetof(struct rte_node, ctx) == RTE_NODE_CTX_SZ,
+	"rte_node context union cannot be larger than RTE_NODE_CTX_SZ");
+
 /**
  * @internal
  *