[v2,04/15] node: switch IPv4 metadata to dynamic mbuf field

Message ID 20201026222013.2147904-5-thomas@monjalon.net (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series remove mbuf userdata |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Thomas Monjalon Oct. 26, 2020, 10:20 p.m. UTC
  The node_mbuf_priv1 was stored in the deprecated mbuf field udata64.
It is moved to a dynamic field in order to allow removal of udata64.

Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
 lib/librte_node/ip4_lookup.c   |  7 +++++++
 lib/librte_node/ip4_rewrite.c  | 10 ++++++++++
 lib/librte_node/node_private.h | 12 ++++++++++--
 3 files changed, 27 insertions(+), 2 deletions(-)
  

Comments

Olivier Matz Oct. 27, 2020, 9:32 a.m. UTC | #1
Hi Thomas,

On Mon, Oct 26, 2020 at 11:20:02PM +0100, Thomas Monjalon wrote:
> The node_mbuf_priv1 was stored in the deprecated mbuf field udata64.
> It is moved to a dynamic field in order to allow removal of udata64.
> 
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> ---
>  lib/librte_node/ip4_lookup.c   |  7 +++++++
>  lib/librte_node/ip4_rewrite.c  | 10 ++++++++++
>  lib/librte_node/node_private.h | 12 ++++++++++--
>  3 files changed, 27 insertions(+), 2 deletions(-)

<...>

> --- a/lib/librte_node/node_private.h
> +++ b/lib/librte_node/node_private.h
> @@ -8,6 +8,7 @@
>  #include <rte_common.h>
>  #include <rte_log.h>
>  #include <rte_mbuf.h>
> +#include <rte_mbuf_dyn.h>
>  
>  extern int rte_node_logtype;
>  #define NODE_LOG(level, node_name, ...)                                        \
> @@ -21,7 +22,6 @@ extern int rte_node_logtype;
>  #define node_dbg(node_name, ...) NODE_LOG(DEBUG, node_name, __VA_ARGS__)
>  
>  /**
> - *
>   * Node mbuf private data to store next hop, ttl and checksum.
>   */
>  struct node_mbuf_priv1 {
> @@ -37,6 +37,13 @@ struct node_mbuf_priv1 {
>  	};
>  };
>  
> +static const struct rte_mbuf_dynfield node_mbuf_priv1_dynfield_desc = {
> +	.name = "rte_node_dynfield_priv1",
> +	.size = sizeof(struct node_mbuf_priv1 *),
> +	.align = __alignof__(struct node_mbuf_priv1 *),
> +};
> +extern int node_mbuf_priv1_dynfield_offset;
> +

It should be "struct node_mbuf_priv1", not "struct node_mbuf_priv1 *"
  
Thomas Monjalon Oct. 27, 2020, 9:34 a.m. UTC | #2
27/10/2020 10:32, Olivier Matz:
> On Mon, Oct 26, 2020 at 11:20:02PM +0100, Thomas Monjalon wrote:
> > +static const struct rte_mbuf_dynfield node_mbuf_priv1_dynfield_desc = {
> > +	.name = "rte_node_dynfield_priv1",
> > +	.size = sizeof(struct node_mbuf_priv1 *),
> > +	.align = __alignof__(struct node_mbuf_priv1 *),
> > +};
> > +extern int node_mbuf_priv1_dynfield_offset;
> > +
> 
> It should be "struct node_mbuf_priv1", not "struct node_mbuf_priv1 *"

Yes, will fix in v3.
  
Nithin Dabilpuram Oct. 27, 2020, 2:23 p.m. UTC | #3
On Mon, Oct 26, 2020 at 11:20:02PM +0100, Thomas Monjalon wrote:
> The node_mbuf_priv1 was stored in the deprecated mbuf field udata64.
> It is moved to a dynamic field in order to allow removal of udata64.
> 
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> ---
>  lib/librte_node/ip4_lookup.c   |  7 +++++++
>  lib/librte_node/ip4_rewrite.c  | 10 ++++++++++
>  lib/librte_node/node_private.h | 12 ++++++++++--
>  3 files changed, 27 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
> index 8835aab9dd..c2f6d653f9 100644
> --- a/lib/librte_node/ip4_lookup.c
> +++ b/lib/librte_node/ip4_lookup.c
> @@ -21,6 +21,8 @@
>  
>  #include "node_private.h"
>  
> +int node_mbuf_priv1_dynfield_offset;
> +

This change doesn't work in secondary as it is process local memory.

>  #define IPV4_L3FWD_LPM_MAX_RULES 1024
>  #define IPV4_L3FWD_LPM_NUMBER_TBL8S (1 << 8)
>  
> @@ -178,6 +180,11 @@ ip4_lookup_node_init(const struct rte_graph *graph, struct rte_node *node)
>  	RTE_SET_USED(node);
>  
>  	if (!init_once) {
> +		node_mbuf_priv1_dynfield_offset = rte_mbuf_dynfield_register(
> +				&node_mbuf_priv1_dynfield_desc);
> +		if (node_mbuf_priv1_dynfield_offset < 0)
> +			return -rte_errno;
> +
>  		/* Setup LPM tables for all sockets */
>  		RTE_LCORE_FOREACH(lcore_id)
>  		{
> diff --git a/lib/librte_node/ip4_rewrite.c b/lib/librte_node/ip4_rewrite.c
> index bb7f671b5c..1c4e51968c 100644
> --- a/lib/librte_node/ip4_rewrite.c
> +++ b/lib/librte_node/ip4_rewrite.c
> @@ -248,9 +248,19 @@ ip4_rewrite_node_process(struct rte_graph *graph, struct rte_node *node,
>  static int
>  ip4_rewrite_node_init(const struct rte_graph *graph, struct rte_node *node)
>  {
> +	static bool init_once;
>  
>  	RTE_SET_USED(graph);
>  	RTE_SET_USED(node);
> +
> +	if (!init_once) {
> +		node_mbuf_priv1_dynfield_offset = rte_mbuf_dynfield_register(
> +				&node_mbuf_priv1_dynfield_desc);
> +		if (node_mbuf_priv1_dynfield_offset < 0)
> +			return -rte_errno;
> +		init_once = true;
> +	}
> +
>  	node_dbg("ip4_rewrite", "Initialized ip4_rewrite node initialized");
>  
>  	return 0;
> diff --git a/lib/librte_node/node_private.h b/lib/librte_node/node_private.h
> index ab7941c12b..359b40faed 100644
> --- a/lib/librte_node/node_private.h
> +++ b/lib/librte_node/node_private.h
> @@ -8,6 +8,7 @@
>  #include <rte_common.h>
>  #include <rte_log.h>
>  #include <rte_mbuf.h>
> +#include <rte_mbuf_dyn.h>
>  
>  extern int rte_node_logtype;
>  #define NODE_LOG(level, node_name, ...)                                        \
> @@ -21,7 +22,6 @@ extern int rte_node_logtype;
>  #define node_dbg(node_name, ...) NODE_LOG(DEBUG, node_name, __VA_ARGS__)
>  
>  /**
> - *
>   * Node mbuf private data to store next hop, ttl and checksum.
>   */
>  struct node_mbuf_priv1 {
> @@ -37,6 +37,13 @@ struct node_mbuf_priv1 {
>  	};
>  };
>  
> +static const struct rte_mbuf_dynfield node_mbuf_priv1_dynfield_desc = {
> +	.name = "rte_node_dynfield_priv1",
> +	.size = sizeof(struct node_mbuf_priv1 *),
> +	.align = __alignof__(struct node_mbuf_priv1 *),
> +};
> +extern int node_mbuf_priv1_dynfield_offset;
> +
>  /**
>   * Node mbuf private area 2.
>   */
> @@ -60,7 +67,8 @@ struct node_mbuf_priv2 {
>  static __rte_always_inline struct node_mbuf_priv1 *
>  node_mbuf_priv1(struct rte_mbuf *m)
>  {
> -	return (struct node_mbuf_priv1 *)&m->udata64;
> +	return RTE_MBUF_DYNFIELD(m,
> +		node_mbuf_priv1_dynfield_offset, struct node_mbuf_priv1 *);

There is a performance regression of ~1.4% in our platform (Octeontx2) because
of this	change.

>  }
>  
>  /**
> -- 
> 2.28.0
>
  
Thomas Monjalon Oct. 27, 2020, 2:33 p.m. UTC | #4
27/10/2020 15:23, Nithin Dabilpuram:
> On Mon, Oct 26, 2020 at 11:20:02PM +0100, Thomas Monjalon wrote:
> > The node_mbuf_priv1 was stored in the deprecated mbuf field udata64.
> > It is moved to a dynamic field in order to allow removal of udata64.
> > 
> > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> > ---
> > --- a/lib/librte_node/ip4_lookup.c
> > +++ b/lib/librte_node/ip4_lookup.c
> > +int node_mbuf_priv1_dynfield_offset;
> > +
> 
> This change doesn't work in secondary as it is process local memory.

Yes that's an issue.
Can we copy the value when starting a secondary process?

[...]
> >  static __rte_always_inline struct node_mbuf_priv1 *
> >  node_mbuf_priv1(struct rte_mbuf *m)
> >  {
> > -	return (struct node_mbuf_priv1 *)&m->udata64;
> > +	return RTE_MBUF_DYNFIELD(m,
> > +		node_mbuf_priv1_dynfield_offset, struct node_mbuf_priv1 *);
> 
> There is a performance regression of ~1.4% in our platform (Octeontx2) because
> of this	change.

Yes that's the price to pay for a more extensible DPDK.
It is much cheaper than not having room for new features,
or having features conflicting on the same mbuf field like here.
  
Nithin Dabilpuram Oct. 27, 2020, 3:33 p.m. UTC | #5
On Tue, Oct 27, 2020 at 03:33:35PM +0100, Thomas Monjalon wrote:
> 27/10/2020 15:23, Nithin Dabilpuram:
> > On Mon, Oct 26, 2020 at 11:20:02PM +0100, Thomas Monjalon wrote:
> > > The node_mbuf_priv1 was stored in the deprecated mbuf field udata64.
> > > It is moved to a dynamic field in order to allow removal of udata64.
> > > 
> > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> > > ---
> > > --- a/lib/librte_node/ip4_lookup.c
> > > +++ b/lib/librte_node/ip4_lookup.c
> > > +int node_mbuf_priv1_dynfield_offset;
> > > +
> > 
> > This change doesn't work in secondary as it is process local memory.
> 
> Yes that's an issue.
> Can we copy the value when starting a secondary process?

Currently there is no call back which will be called only in secondary.

Can you move this value to node->ctx 8'th byte offset ? Node context is of size
16 bytes and should be sufficient.
Currently first 8 B of node->ctx is used to store that socket's lpm pointer.

> 
> [...]
> > >  static __rte_always_inline struct node_mbuf_priv1 *
> > >  node_mbuf_priv1(struct rte_mbuf *m)
> > >  {
> > > -	return (struct node_mbuf_priv1 *)&m->udata64;
> > > +	return RTE_MBUF_DYNFIELD(m,
> > > +		node_mbuf_priv1_dynfield_offset, struct node_mbuf_priv1 *);
> > 
> > There is a performance regression of ~1.4% in our platform (Octeontx2) because
> > of this	change.
> 
> Yes that's the price to pay for a more extensible DPDK.
> It is much cheaper than not having room for new features,
> or having features conflicting on the same mbuf field like here.
> 
>
  
Thomas Monjalon Oct. 27, 2020, 3:57 p.m. UTC | #6
27/10/2020 16:33, Nithin Dabilpuram:
> On Tue, Oct 27, 2020 at 03:33:35PM +0100, Thomas Monjalon wrote:
> > 27/10/2020 15:23, Nithin Dabilpuram:
> > > On Mon, Oct 26, 2020 at 11:20:02PM +0100, Thomas Monjalon wrote:
> > > > The node_mbuf_priv1 was stored in the deprecated mbuf field udata64.
> > > > It is moved to a dynamic field in order to allow removal of udata64.
> > > > 
> > > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> > > > ---
> > > > --- a/lib/librte_node/ip4_lookup.c
> > > > +++ b/lib/librte_node/ip4_lookup.c
> > > > +int node_mbuf_priv1_dynfield_offset;
> > > > +
> > > 
> > > This change doesn't work in secondary as it is process local memory.
> > 
> > Yes that's an issue.
> > Can we copy the value when starting a secondary process?
> 
> Currently there is no call back which will be called only in secondary.
> 
> Can you move this value to node->ctx 8'th byte offset ? Node context is of size
> 16 bytes and should be sufficient.
> Currently first 8 B of node->ctx is used to store that socket's lpm pointer.

Please would you be able to do such patch?
  
Nithin Dabilpuram Oct. 27, 2020, 4:16 p.m. UTC | #7
On Tue, Oct 27, 2020 at 04:57:16PM +0100, Thomas Monjalon wrote:
> 27/10/2020 16:33, Nithin Dabilpuram:
> > On Tue, Oct 27, 2020 at 03:33:35PM +0100, Thomas Monjalon wrote:
> > > 27/10/2020 15:23, Nithin Dabilpuram:
> > > > On Mon, Oct 26, 2020 at 11:20:02PM +0100, Thomas Monjalon wrote:
> > > > > The node_mbuf_priv1 was stored in the deprecated mbuf field udata64.
> > > > > It is moved to a dynamic field in order to allow removal of udata64.
> > > > > 
> > > > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> > > > > ---
> > > > > --- a/lib/librte_node/ip4_lookup.c
> > > > > +++ b/lib/librte_node/ip4_lookup.c
> > > > > +int node_mbuf_priv1_dynfield_offset;
> > > > > +
> > > > 
> > > > This change doesn't work in secondary as it is process local memory.
> > > 
> > > Yes that's an issue.
> > > Can we copy the value when starting a secondary process?
> > 
> > Currently there is no call back which will be called only in secondary.
> > 
> > Can you move this value to node->ctx 8'th byte offset ? Node context is of size
> > 16 bytes and should be sufficient.
> > Currently first 8 B of node->ctx is used to store that socket's lpm pointer.
> 
> Please would you be able to do such patch?

Ack, will send a patch on this thread so that you can include in next version.
> 
> 
>
  
Thomas Monjalon Oct. 27, 2020, 4:26 p.m. UTC | #8
27/10/2020 17:16, Nithin Dabilpuram:
> On Tue, Oct 27, 2020 at 04:57:16PM +0100, Thomas Monjalon wrote:
> > 27/10/2020 16:33, Nithin Dabilpuram:
> > > On Tue, Oct 27, 2020 at 03:33:35PM +0100, Thomas Monjalon wrote:
> > > > 27/10/2020 15:23, Nithin Dabilpuram:
> > > > > On Mon, Oct 26, 2020 at 11:20:02PM +0100, Thomas Monjalon wrote:
> > > > > > The node_mbuf_priv1 was stored in the deprecated mbuf field udata64.
> > > > > > It is moved to a dynamic field in order to allow removal of udata64.
> > > > > > 
> > > > > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> > > > > > ---
> > > > > > --- a/lib/librte_node/ip4_lookup.c
> > > > > > +++ b/lib/librte_node/ip4_lookup.c
> > > > > > +int node_mbuf_priv1_dynfield_offset;
> > > > > > +
> > > > > 
> > > > > This change doesn't work in secondary as it is process local memory.
> > > > 
> > > > Yes that's an issue.
> > > > Can we copy the value when starting a secondary process?
> > > 
> > > Currently there is no call back which will be called only in secondary.
> > > 
> > > Can you move this value to node->ctx 8'th byte offset ? Node context is of size
> > > 16 bytes and should be sufficient.
> > > Currently first 8 B of node->ctx is used to store that socket's lpm pointer.
> > 
> > Please would you be able to do such patch?
> 
> Ack, will send a patch on this thread so that you can include in next version.

Thanks
  

Patch

diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
index 8835aab9dd..c2f6d653f9 100644
--- a/lib/librte_node/ip4_lookup.c
+++ b/lib/librte_node/ip4_lookup.c
@@ -21,6 +21,8 @@ 
 
 #include "node_private.h"
 
+int node_mbuf_priv1_dynfield_offset;
+
 #define IPV4_L3FWD_LPM_MAX_RULES 1024
 #define IPV4_L3FWD_LPM_NUMBER_TBL8S (1 << 8)
 
@@ -178,6 +180,11 @@  ip4_lookup_node_init(const struct rte_graph *graph, struct rte_node *node)
 	RTE_SET_USED(node);
 
 	if (!init_once) {
+		node_mbuf_priv1_dynfield_offset = rte_mbuf_dynfield_register(
+				&node_mbuf_priv1_dynfield_desc);
+		if (node_mbuf_priv1_dynfield_offset < 0)
+			return -rte_errno;
+
 		/* Setup LPM tables for all sockets */
 		RTE_LCORE_FOREACH(lcore_id)
 		{
diff --git a/lib/librte_node/ip4_rewrite.c b/lib/librte_node/ip4_rewrite.c
index bb7f671b5c..1c4e51968c 100644
--- a/lib/librte_node/ip4_rewrite.c
+++ b/lib/librte_node/ip4_rewrite.c
@@ -248,9 +248,19 @@  ip4_rewrite_node_process(struct rte_graph *graph, struct rte_node *node,
 static int
 ip4_rewrite_node_init(const struct rte_graph *graph, struct rte_node *node)
 {
+	static bool init_once;
 
 	RTE_SET_USED(graph);
 	RTE_SET_USED(node);
+
+	if (!init_once) {
+		node_mbuf_priv1_dynfield_offset = rte_mbuf_dynfield_register(
+				&node_mbuf_priv1_dynfield_desc);
+		if (node_mbuf_priv1_dynfield_offset < 0)
+			return -rte_errno;
+		init_once = true;
+	}
+
 	node_dbg("ip4_rewrite", "Initialized ip4_rewrite node initialized");
 
 	return 0;
diff --git a/lib/librte_node/node_private.h b/lib/librte_node/node_private.h
index ab7941c12b..359b40faed 100644
--- a/lib/librte_node/node_private.h
+++ b/lib/librte_node/node_private.h
@@ -8,6 +8,7 @@ 
 #include <rte_common.h>
 #include <rte_log.h>
 #include <rte_mbuf.h>
+#include <rte_mbuf_dyn.h>
 
 extern int rte_node_logtype;
 #define NODE_LOG(level, node_name, ...)                                        \
@@ -21,7 +22,6 @@  extern int rte_node_logtype;
 #define node_dbg(node_name, ...) NODE_LOG(DEBUG, node_name, __VA_ARGS__)
 
 /**
- *
  * Node mbuf private data to store next hop, ttl and checksum.
  */
 struct node_mbuf_priv1 {
@@ -37,6 +37,13 @@  struct node_mbuf_priv1 {
 	};
 };
 
+static const struct rte_mbuf_dynfield node_mbuf_priv1_dynfield_desc = {
+	.name = "rte_node_dynfield_priv1",
+	.size = sizeof(struct node_mbuf_priv1 *),
+	.align = __alignof__(struct node_mbuf_priv1 *),
+};
+extern int node_mbuf_priv1_dynfield_offset;
+
 /**
  * Node mbuf private area 2.
  */
@@ -60,7 +67,8 @@  struct node_mbuf_priv2 {
 static __rte_always_inline struct node_mbuf_priv1 *
 node_mbuf_priv1(struct rte_mbuf *m)
 {
-	return (struct node_mbuf_priv1 *)&m->udata64;
+	return RTE_MBUF_DYNFIELD(m,
+		node_mbuf_priv1_dynfield_offset, struct node_mbuf_priv1 *);
 }
 
 /**