[v7,2/4] mbuf: remove rte marker fields

Message ID 1710972098-2209-3-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series remove use of RTE_MARKER fields in libraries |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Tyler Retzlaff March 20, 2024, 10:01 p.m. UTC
  RTE_MARKER typedefs are a GCC extension unsupported by MSVC. Remove
RTE_MARKER fields from rte_mbuf struct.

Maintain alignment of fields after removed cacheline1 marker by placing
C11 alignas(RTE_CACHE_LINE_MIN_SIZE).

Provide new rearm_data and rx_descriptor_fields1 fields in anonymous
unions as single element arrays of with types matching the original
markers to maintain API compatibility.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 doc/guides/rel_notes/release_24_03.rst |   2 +
 lib/mbuf/rte_mbuf.h                    |   4 +-
 lib/mbuf/rte_mbuf_core.h               | 188 ++++++++++++++++++---------------
 3 files changed, 104 insertions(+), 90 deletions(-)
  

Comments

Bruce Richardson March 21, 2024, 10:32 a.m. UTC | #1
On Wed, Mar 20, 2024 at 03:01:36PM -0700, Tyler Retzlaff wrote:
> RTE_MARKER typedefs are a GCC extension unsupported by MSVC. Remove
> RTE_MARKER fields from rte_mbuf struct.
> 
> Maintain alignment of fields after removed cacheline1 marker by placing
> C11 alignas(RTE_CACHE_LINE_MIN_SIZE).
> 
> Provide new rearm_data and rx_descriptor_fields1 fields in anonymous
> unions as single element arrays of with types matching the original
> markers to maintain API compatibility.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  doc/guides/rel_notes/release_24_03.rst |   2 +
>  lib/mbuf/rte_mbuf.h                    |   4 +-
>  lib/mbuf/rte_mbuf_core.h               | 188 ++++++++++++++++++---------------
>  3 files changed, 104 insertions(+), 90 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_24_03.rst b/doc/guides/rel_notes/release_24_03.rst
> index 14826ea..4f18cca 100644
> --- a/doc/guides/rel_notes/release_24_03.rst
> +++ b/doc/guides/rel_notes/release_24_03.rst
> @@ -216,6 +216,8 @@ Removed Items
>  
>  * acc101: Removed obsolete code for non productized HW variant.
>  
> +* mbuf: ``RTE_MARKER`` fields ``cacheline0`` and ``cacheline1``
> +  have been removed from ``struct rte_mbuf``.
>  
>  API Changes
>  -----------
> diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
> index 286b32b..4c4722e 100644
> --- a/lib/mbuf/rte_mbuf.h
> +++ b/lib/mbuf/rte_mbuf.h
> @@ -108,7 +108,7 @@
>  static inline void
>  rte_mbuf_prefetch_part1(struct rte_mbuf *m)
>  {
> -	rte_prefetch0(&m->cacheline0);
> +	rte_prefetch0(m);
>  }
>  
>  /**
> @@ -126,7 +126,7 @@
>  rte_mbuf_prefetch_part2(struct rte_mbuf *m)
>  {
>  #if RTE_CACHE_LINE_SIZE == 64
> -	rte_prefetch0(&m->cacheline1);
> +	rte_prefetch0(RTE_PTR_ADD(m, RTE_CACHE_LINE_MIN_SIZE));
>  #else
>  	RTE_SET_USED(m);
>  #endif
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index 9f58076..665213c 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -465,8 +465,6 @@ enum {
>   * The generic rte_mbuf, containing a packet mbuf.
>   */
>  struct __rte_cache_aligned rte_mbuf {
> -	RTE_MARKER cacheline0;
> -
>  	void *buf_addr;           /**< Virtual address of segment buffer. */
>  #if RTE_IOVA_IN_MBUF
>  	/**
> @@ -488,116 +486,130 @@ struct __rte_cache_aligned rte_mbuf {
>  #endif
>  
>  	/* next 8 bytes are initialised on RX descriptor rearm */
> -	RTE_MARKER64 rearm_data;
> -	uint16_t data_off;
> -
> -	/**
> -	 * Reference counter. Its size should at least equal to the size
> -	 * of port field (16 bits), to support zero-copy broadcast.
> -	 * It should only be accessed using the following functions:
> -	 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
> -	 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
> -	 * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag.
> -	 */
> -	RTE_ATOMIC(uint16_t) refcnt;
> +	union {
> +		uint64_t rearm_data[1];
> +		__extension__
> +		struct {
> +			uint16_t data_off;
> +
> +			/**
> +			 * Reference counter. Its size should at least equal to the size
> +			 * of port field (16 bits), to support zero-copy broadcast.
> +			 * It should only be accessed using the following functions:
> +			 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
> +			 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
> +			 * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag.
> +			 */
> +			RTE_ATOMIC(uint16_t) refcnt;
>  
> -	/**
> -	 * Number of segments. Only valid for the first segment of an mbuf
> -	 * chain.
> -	 */
> -	uint16_t nb_segs;
> +			/**
> +			 * Number of segments. Only valid for the first segment of an mbuf
> +			 * chain.
> +			 */
> +			uint16_t nb_segs;
>  
> -	/** Input port (16 bits to support more than 256 virtual ports).
> -	 * The event eth Tx adapter uses this field to specify the output port.
> -	 */
> -	uint16_t port;
> +			/** Input port (16 bits to support more than 256 virtual ports).
> +			 * The event eth Tx adapter uses this field to specify the output port.
> +			 */
> +			uint16_t port;
> +		};
> +	};
>  
>  	uint64_t ol_flags;        /**< Offload features. */
>  
>  	/* remaining bytes are set on RX when pulling packet from descriptor */
> -	RTE_MARKER rx_descriptor_fields1;
> -
> -	/*
> -	 * The packet type, which is the combination of outer/inner L2, L3, L4
> -	 * and tunnel types. The packet_type is about data really present in the
> -	 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
> -	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
> -	 * vlan is stripped from the data.
> -	 */
>  	union {
> -		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
> +		void *rx_descriptor_fields1[1];

Can we make this array the actual size of all the fields, rather than just
an 8-byte value? That would allow the right think to be done if assigning
the descriptor fields from one mbuf to another, or when using memset or
memcpy on them.

/Bruce
  
Tyler Retzlaff March 21, 2024, 3:31 p.m. UTC | #2
On Thu, Mar 21, 2024 at 10:32:02AM +0000, Bruce Richardson wrote:
> On Wed, Mar 20, 2024 at 03:01:36PM -0700, Tyler Retzlaff wrote:
> > RTE_MARKER typedefs are a GCC extension unsupported by MSVC. Remove
> > RTE_MARKER fields from rte_mbuf struct.
> > 
> > Maintain alignment of fields after removed cacheline1 marker by placing
> > C11 alignas(RTE_CACHE_LINE_MIN_SIZE).
> > 
> > Provide new rearm_data and rx_descriptor_fields1 fields in anonymous
> > unions as single element arrays of with types matching the original
> > markers to maintain API compatibility.
> > 
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
> >  doc/guides/rel_notes/release_24_03.rst |   2 +
> >  lib/mbuf/rte_mbuf.h                    |   4 +-
> >  lib/mbuf/rte_mbuf_core.h               | 188 ++++++++++++++++++---------------
> >  3 files changed, 104 insertions(+), 90 deletions(-)
> > 
> > diff --git a/doc/guides/rel_notes/release_24_03.rst b/doc/guides/rel_notes/release_24_03.rst
> > index 14826ea..4f18cca 100644
> > --- a/doc/guides/rel_notes/release_24_03.rst
> > +++ b/doc/guides/rel_notes/release_24_03.rst
> > @@ -216,6 +216,8 @@ Removed Items
> >  
> >  * acc101: Removed obsolete code for non productized HW variant.
> >  
> > +* mbuf: ``RTE_MARKER`` fields ``cacheline0`` and ``cacheline1``
> > +  have been removed from ``struct rte_mbuf``.
> >  
> >  API Changes
> >  -----------
> > diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
> > index 286b32b..4c4722e 100644
> > --- a/lib/mbuf/rte_mbuf.h
> > +++ b/lib/mbuf/rte_mbuf.h
> > @@ -108,7 +108,7 @@
> >  static inline void
> >  rte_mbuf_prefetch_part1(struct rte_mbuf *m)
> >  {
> > -	rte_prefetch0(&m->cacheline0);
> > +	rte_prefetch0(m);
> >  }
> >  
> >  /**
> > @@ -126,7 +126,7 @@
> >  rte_mbuf_prefetch_part2(struct rte_mbuf *m)
> >  {
> >  #if RTE_CACHE_LINE_SIZE == 64
> > -	rte_prefetch0(&m->cacheline1);
> > +	rte_prefetch0(RTE_PTR_ADD(m, RTE_CACHE_LINE_MIN_SIZE));
> >  #else
> >  	RTE_SET_USED(m);
> >  #endif
> > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> > index 9f58076..665213c 100644
> > --- a/lib/mbuf/rte_mbuf_core.h
> > +++ b/lib/mbuf/rte_mbuf_core.h
> > @@ -465,8 +465,6 @@ enum {
> >   * The generic rte_mbuf, containing a packet mbuf.
> >   */
> >  struct __rte_cache_aligned rte_mbuf {
> > -	RTE_MARKER cacheline0;
> > -
> >  	void *buf_addr;           /**< Virtual address of segment buffer. */
> >  #if RTE_IOVA_IN_MBUF
> >  	/**
> > @@ -488,116 +486,130 @@ struct __rte_cache_aligned rte_mbuf {
> >  #endif
> >  
> >  	/* next 8 bytes are initialised on RX descriptor rearm */
> > -	RTE_MARKER64 rearm_data;
> > -	uint16_t data_off;
> > -
> > -	/**
> > -	 * Reference counter. Its size should at least equal to the size
> > -	 * of port field (16 bits), to support zero-copy broadcast.
> > -	 * It should only be accessed using the following functions:
> > -	 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
> > -	 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
> > -	 * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag.
> > -	 */
> > -	RTE_ATOMIC(uint16_t) refcnt;
> > +	union {
> > +		uint64_t rearm_data[1];
> > +		__extension__
> > +		struct {
> > +			uint16_t data_off;
> > +
> > +			/**
> > +			 * Reference counter. Its size should at least equal to the size
> > +			 * of port field (16 bits), to support zero-copy broadcast.
> > +			 * It should only be accessed using the following functions:
> > +			 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
> > +			 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
> > +			 * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag.
> > +			 */
> > +			RTE_ATOMIC(uint16_t) refcnt;
> >  
> > -	/**
> > -	 * Number of segments. Only valid for the first segment of an mbuf
> > -	 * chain.
> > -	 */
> > -	uint16_t nb_segs;
> > +			/**
> > +			 * Number of segments. Only valid for the first segment of an mbuf
> > +			 * chain.
> > +			 */
> > +			uint16_t nb_segs;
> >  
> > -	/** Input port (16 bits to support more than 256 virtual ports).
> > -	 * The event eth Tx adapter uses this field to specify the output port.
> > -	 */
> > -	uint16_t port;
> > +			/** Input port (16 bits to support more than 256 virtual ports).
> > +			 * The event eth Tx adapter uses this field to specify the output port.
> > +			 */
> > +			uint16_t port;
> > +		};
> > +	};
> >  
> >  	uint64_t ol_flags;        /**< Offload features. */
> >  
> >  	/* remaining bytes are set on RX when pulling packet from descriptor */
> > -	RTE_MARKER rx_descriptor_fields1;
> > -
> > -	/*
> > -	 * The packet type, which is the combination of outer/inner L2, L3, L4
> > -	 * and tunnel types. The packet_type is about data really present in the
> > -	 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
> > -	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
> > -	 * vlan is stripped from the data.
> > -	 */
> >  	union {
> > -		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
> > +		void *rx_descriptor_fields1[1];
> 
> Can we make this array the actual size of all the fields, rather than just
> an 8-byte value? That would allow the right think to be done if assigning
> the descriptor fields from one mbuf to another, or when using memset or
> memcpy on them.

Morten pointed out in a previous version that the marker being an array of
void * was a bug to begin with.

The other field of the union is 24 bytes. I suppose it would be possible
to conditionally compile the array to be either 3 or 6 elements. I guess
this would be an improvement over what the marker is doing now.

Just a reminder that we cannot 'correct' the type since that would
require adaptation of calling code.

What do others think? Keep it as a single element array or conditional
compile based on sizeof(void *)?

> 
> /Bruce
  
Morten Brørup March 21, 2024, 4:19 p.m. UTC | #3
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Thursday, 21 March 2024 16.31
> 
> On Thu, Mar 21, 2024 at 10:32:02AM +0000, Bruce Richardson wrote:
> > On Wed, Mar 20, 2024 at 03:01:36PM -0700, Tyler Retzlaff wrote:
> > > RTE_MARKER typedefs are a GCC extension unsupported by MSVC. Remove
> > > RTE_MARKER fields from rte_mbuf struct.
> > >
> > > Maintain alignment of fields after removed cacheline1 marker by placing
> > > C11 alignas(RTE_CACHE_LINE_MIN_SIZE).
> > >
> > > Provide new rearm_data and rx_descriptor_fields1 fields in anonymous
> > > unions as single element arrays of with types matching the original
> > > markers to maintain API compatibility.
> > >
> > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > ---
> > >  doc/guides/rel_notes/release_24_03.rst |   2 +
> > >  lib/mbuf/rte_mbuf.h                    |   4 +-
> > >  lib/mbuf/rte_mbuf_core.h               | 188 ++++++++++++++++++----------
> -----
> > >  3 files changed, 104 insertions(+), 90 deletions(-)
> > >
> > > diff --git a/doc/guides/rel_notes/release_24_03.rst
> b/doc/guides/rel_notes/release_24_03.rst
> > > index 14826ea..4f18cca 100644
> > > --- a/doc/guides/rel_notes/release_24_03.rst
> > > +++ b/doc/guides/rel_notes/release_24_03.rst
> > > @@ -216,6 +216,8 @@ Removed Items
> > >
> > >  * acc101: Removed obsolete code for non productized HW variant.
> > >
> > > +* mbuf: ``RTE_MARKER`` fields ``cacheline0`` and ``cacheline1``
> > > +  have been removed from ``struct rte_mbuf``.
> > >
> > >  API Changes
> > >  -----------
> > > diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
> > > index 286b32b..4c4722e 100644
> > > --- a/lib/mbuf/rte_mbuf.h
> > > +++ b/lib/mbuf/rte_mbuf.h
> > > @@ -108,7 +108,7 @@
> > >  static inline void
> > >  rte_mbuf_prefetch_part1(struct rte_mbuf *m)
> > >  {
> > > -	rte_prefetch0(&m->cacheline0);
> > > +	rte_prefetch0(m);
> > >  }
> > >
> > >  /**
> > > @@ -126,7 +126,7 @@
> > >  rte_mbuf_prefetch_part2(struct rte_mbuf *m)
> > >  {
> > >  #if RTE_CACHE_LINE_SIZE == 64
> > > -	rte_prefetch0(&m->cacheline1);
> > > +	rte_prefetch0(RTE_PTR_ADD(m, RTE_CACHE_LINE_MIN_SIZE));
> > >  #else
> > >  	RTE_SET_USED(m);
> > >  #endif
> > > diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> > > index 9f58076..665213c 100644
> > > --- a/lib/mbuf/rte_mbuf_core.h
> > > +++ b/lib/mbuf/rte_mbuf_core.h
> > > @@ -465,8 +465,6 @@ enum {
> > >   * The generic rte_mbuf, containing a packet mbuf.
> > >   */
> > >  struct __rte_cache_aligned rte_mbuf {
> > > -	RTE_MARKER cacheline0;
> > > -
> > >  	void *buf_addr;           /**< Virtual address of segment buffer. */
> > >  #if RTE_IOVA_IN_MBUF
> > >  	/**
> > > @@ -488,116 +486,130 @@ struct __rte_cache_aligned rte_mbuf {
> > >  #endif
> > >
> > >  	/* next 8 bytes are initialised on RX descriptor rearm */
> > > -	RTE_MARKER64 rearm_data;
> > > -	uint16_t data_off;
> > > -
> > > -	/**
> > > -	 * Reference counter. Its size should at least equal to the size
> > > -	 * of port field (16 bits), to support zero-copy broadcast.
> > > -	 * It should only be accessed using the following functions:
> > > -	 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
> > > -	 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
> > > -	 * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag.
> > > -	 */
> > > -	RTE_ATOMIC(uint16_t) refcnt;
> > > +	union {
> > > +		uint64_t rearm_data[1];
> > > +		__extension__
> > > +		struct {
> > > +			uint16_t data_off;
> > > +
> > > +			/**
> > > +			 * Reference counter. Its size should at least equal to
> the size
> > > +			 * of port field (16 bits), to support zero-copy
> broadcast.
> > > +			 * It should only be accessed using the following
> functions:
> > > +			 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
> > > +			 * rte_mbuf_refcnt_set(). The functionality of these
> functions (atomic,
> > > +			 * or non-atomic) is controlled by the
> RTE_MBUF_REFCNT_ATOMIC flag.
> > > +			 */
> > > +			RTE_ATOMIC(uint16_t) refcnt;
> > >
> > > -	/**
> > > -	 * Number of segments. Only valid for the first segment of an mbuf
> > > -	 * chain.
> > > -	 */
> > > -	uint16_t nb_segs;
> > > +			/**
> > > +			 * Number of segments. Only valid for the first segment of
> an mbuf
> > > +			 * chain.
> > > +			 */
> > > +			uint16_t nb_segs;
> > >
> > > -	/** Input port (16 bits to support more than 256 virtual ports).
> > > -	 * The event eth Tx adapter uses this field to specify the output port.
> > > -	 */
> > > -	uint16_t port;
> > > +			/** Input port (16 bits to support more than 256 virtual
> ports).
> > > +			 * The event eth Tx adapter uses this field to specify the
> output port.
> > > +			 */
> > > +			uint16_t port;
> > > +		};
> > > +	};
> > >
> > >  	uint64_t ol_flags;        /**< Offload features. */
> > >
> > >  	/* remaining bytes are set on RX when pulling packet from descriptor */
> > > -	RTE_MARKER rx_descriptor_fields1;
> > > -
> > > -	/*
> > > -	 * The packet type, which is the combination of outer/inner L2, L3, L4
> > > -	 * and tunnel types. The packet_type is about data really present in the
> > > -	 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
> > > -	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
> > > -	 * vlan is stripped from the data.
> > > -	 */
> > >  	union {
> > > -		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
> > > +		void *rx_descriptor_fields1[1];
> >
> > Can we make this array the actual size of all the fields, rather than just
> > an 8-byte value? That would allow the right think to be done if assigning
> > the descriptor fields from one mbuf to another, or when using memset or
> > memcpy on them.
> 
> Morten pointed out in a previous version that the marker being an array of
> void * was a bug to begin with.
> 
> The other field of the union is 24 bytes. I suppose it would be possible
> to conditionally compile the array to be either 3 or 6 elements. I guess
> this would be an improvement over what the marker is doing now.

Agree; it would be an improvement to give it the same size as the other struct in the union.

> 
> Just a reminder that we cannot 'correct' the type since that would
> require adaptation of calling code.

I considered the following:
Only drivers should be using rx_descriptor_fields1.
We could probably change it to an array of uint64_t (or uint32_t, or even uint8_t) without breaking anything, because the drivers should only be using the address of rx_descriptor_fields1, not the value of it.
However, keeping it an array of void* is certain to avoid any 32/64 bit CPU alignment related issues, because void* is the natural size to any CPU.

> 
> What do others think? Keep it as a single element array or conditional
> compile based on sizeof(void *)?

Going for an array of 3/6 void pointers should be safe. I'm in favor of this.

At your discretion, if you think it clarifies anything, consider adding a comment that the type void* is used for historical reasons (or something similar).
  
Morten Brørup March 26, 2024, 11:12 a.m. UTC | #4
> From: Tyler Retzlaff [mailto:roretzla@linux.microsoft.com]
> Sent: Wednesday, 20 March 2024 23.02
> 
> RTE_MARKER typedefs are a GCC extension unsupported by MSVC. Remove
> RTE_MARKER fields from rte_mbuf struct.
> 
> Maintain alignment of fields after removed cacheline1 marker by placing
> C11 alignas(RTE_CACHE_LINE_MIN_SIZE).
> 
> Provide new rearm_data and rx_descriptor_fields1 fields in anonymous
> unions as single element arrays of with types matching the original
> markers to maintain API compatibility.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>

I agree with the concepts in this patch.
A few comments inline below.

> ---
>  doc/guides/rel_notes/release_24_03.rst |   2 +
>  lib/mbuf/rte_mbuf.h                    |   4 +-
>  lib/mbuf/rte_mbuf_core.h               | 188 ++++++++++++++++++--------
> -------
>  3 files changed, 104 insertions(+), 90 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_24_03.rst
> b/doc/guides/rel_notes/release_24_03.rst
> index 14826ea..4f18cca 100644
> --- a/doc/guides/rel_notes/release_24_03.rst
> +++ b/doc/guides/rel_notes/release_24_03.rst
> @@ -216,6 +216,8 @@ Removed Items
> 
>  * acc101: Removed obsolete code for non productized HW variant.
> 
> +* mbuf: ``RTE_MARKER`` fields ``cacheline0`` and ``cacheline1``
> +  have been removed from ``struct rte_mbuf``.
> 
>  API Changes
>  -----------
> diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
> index 286b32b..4c4722e 100644
> --- a/lib/mbuf/rte_mbuf.h
> +++ b/lib/mbuf/rte_mbuf.h
> @@ -108,7 +108,7 @@
>  static inline void
>  rte_mbuf_prefetch_part1(struct rte_mbuf *m)
>  {
> -	rte_prefetch0(&m->cacheline0);
> +	rte_prefetch0(m);
>  }
> 
>  /**
> @@ -126,7 +126,7 @@
>  rte_mbuf_prefetch_part2(struct rte_mbuf *m)
>  {
>  #if RTE_CACHE_LINE_SIZE == 64
> -	rte_prefetch0(&m->cacheline1);
> +	rte_prefetch0(RTE_PTR_ADD(m, RTE_CACHE_LINE_MIN_SIZE));
>  #else
>  	RTE_SET_USED(m);
>  #endif
> diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
> index 9f58076..665213c 100644
> --- a/lib/mbuf/rte_mbuf_core.h
> +++ b/lib/mbuf/rte_mbuf_core.h
> @@ -465,8 +465,6 @@ enum {
>   * The generic rte_mbuf, containing a packet mbuf.
>   */
>  struct __rte_cache_aligned rte_mbuf {
> -	RTE_MARKER cacheline0;
> -
>  	void *buf_addr;           /**< Virtual address of segment buffer.
> */
>  #if RTE_IOVA_IN_MBUF
>  	/**
> @@ -488,116 +486,130 @@ struct __rte_cache_aligned rte_mbuf {
>  #endif
> 
>  	/* next 8 bytes are initialised on RX descriptor rearm */
> -	RTE_MARKER64 rearm_data;
> -	uint16_t data_off;
> -
> -	/**
> -	 * Reference counter. Its size should at least equal to the size
> -	 * of port field (16 bits), to support zero-copy broadcast.
> -	 * It should only be accessed using the following functions:
> -	 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
> -	 * rte_mbuf_refcnt_set(). The functionality of these functions
> (atomic,
> -	 * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC
> flag.
> -	 */
> -	RTE_ATOMIC(uint16_t) refcnt;
> +	union {
> +		uint64_t rearm_data[1];
> +		__extension__
> +		struct {
> +			uint16_t data_off;
> +
> +			/**
> +			 * Reference counter. Its size should at least equal
> to the size
> +			 * of port field (16 bits), to support zero-copy
> broadcast.
> +			 * It should only be accessed using the following
> functions:
> +			 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(),
> and
> +			 * rte_mbuf_refcnt_set(). The functionality of these
> functions (atomic,
> +			 * or non-atomic) is controlled by the
> RTE_MBUF_REFCNT_ATOMIC flag.
> +			 */
> +			RTE_ATOMIC(uint16_t) refcnt;
> 
> -	/**
> -	 * Number of segments. Only valid for the first segment of an mbuf
> -	 * chain.
> -	 */
> -	uint16_t nb_segs;
> +			/**
> +			 * Number of segments. Only valid for the first
> segment of an mbuf
> +			 * chain.
> +			 */
> +			uint16_t nb_segs;
> 
> -	/** Input port (16 bits to support more than 256 virtual ports).
> -	 * The event eth Tx adapter uses this field to specify the output
> port.
> -	 */
> -	uint16_t port;
> +			/** Input port (16 bits to support more than 256
> virtual ports).
> +			 * The event eth Tx adapter uses this field to
> specify the output port.
> +			 */
> +			uint16_t port;
> +		};
> +	};
> 
>  	uint64_t ol_flags;        /**< Offload features. */
> 
>  	/* remaining bytes are set on RX when pulling packet from
> descriptor */
> -	RTE_MARKER rx_descriptor_fields1;
> -
> -	/*
> -	 * The packet type, which is the combination of outer/inner L2,
> L3, L4
> -	 * and tunnel types. The packet_type is about data really present
> in the
> -	 * mbuf. Example: if vlan stripping is enabled, a received vlan
> packet
> -	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because
> the
> -	 * vlan is stripped from the data.
> -	 */
>  	union {
> -		uint32_t packet_type; /**< L2/L3/L4 and tunnel information.
> */
> +		void *rx_descriptor_fields1[1];

(As suggested by Bruce, and already discussed, please make this array 3/6 elements.)

>  		__extension__
>  		struct {
> -			uint8_t l2_type:4;   /**< (Outer) L2 type. */
> -			uint8_t l3_type:4;   /**< (Outer) L3 type. */
> -			uint8_t l4_type:4;   /**< (Outer) L4 type. */
> -			uint8_t tun_type:4;  /**< Tunnel type. */
> +			/*
> +			 * The packet type, which is the combination of
> outer/inner L2, L3, L4
> +			 * and tunnel types. The packet_type is about data
> really present in the
> +			 * mbuf. Example: if vlan stripping is enabled, a
> received vlan packet
> +			 * would have RTE_PTYPE_L2_ETHER and not
> RTE_PTYPE_L2_VLAN because the
> +			 * vlan is stripped from the data.
> +			 */
>  			union {
> -				uint8_t inner_esp_next_proto;
> -				/**< ESP next protocol type, valid if
> -				 * RTE_PTYPE_TUNNEL_ESP tunnel type is set
> -				 * on both Tx and Rx.
> -				 */
> +				uint32_t packet_type; /**< L2/L3/L4 and tunnel
> information. */
>  				__extension__
>  				struct {
> -					uint8_t inner_l2_type:4;
> -					/**< Inner L2 type. */
> -					uint8_t inner_l3_type:4;
> -					/**< Inner L3 type. */
> +					uint8_t l2_type:4;   /**< (Outer) L2
> type. */
> +					uint8_t l3_type:4;   /**< (Outer) L3
> type. */
> +					uint8_t l4_type:4;   /**< (Outer) L4
> type. */
> +					uint8_t tun_type:4;  /**< Tunnel type. */
> +					union {
> +						uint8_t inner_esp_next_proto;
> +						/**< ESP next protocol type, valid
> if
> +						 * RTE_PTYPE_TUNNEL_ESP tunnel type
> is set
> +						 * on both Tx and Rx.
> +						 */

If the comment describing the field doesn't fit on the same line as the field, please put the description before the field, not after it.

This applies to many more field descriptions in the rte_mbuf structure.

Please note that multiple field's descriptions were also "after" before this patch, so perhaps the descriptions should be moved around in a preceding patch. Or not at all, if minimal changes are preferred.
I wouldn't mind moving them with this patch, but some people prefer multiple smaller patches for separate changes.

> +						__extension__
> +						struct {
> +							uint8_t inner_l2_type:4;
> +							/**< Inner L2 type. */
> +							uint8_t inner_l3_type:4;
> +							/**< Inner L3 type. */
> +						};
> +					};
> +					uint8_t inner_l4_type:4; /**< Inner L4
> type. */
>  				};
>  			};
> -			uint8_t inner_l4_type:4; /**< Inner L4 type. */
> -		};
> -	};
> 
> -	uint32_t pkt_len;         /**< Total pkt len: sum of all segments.
> */
> -	uint16_t data_len;        /**< Amount of data in segment buffer.
> */
> -	/** VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_VLAN is set. */
> -	uint16_t vlan_tci;
> +			/*
> +			 * pkt_len is not actually part of
> rx_descriptor_fields1
> +			 * but is included here to account for changes in
> alignment
> +			 * and offset for void * on 32-bit vs 64-bit targets.
> +			 */

I disagree with this comment.
For non-segmented packets, it is part of rx_descriptor_fields1.
Also, the high-level comment says "/* remaining bytes are set on RX when pulling packet from descriptor */", which also covers the pkt_len field.
I think you should not add the comment.

> +			uint32_t pkt_len;         /**< Total pkt len: sum of
> all segments. */
> 
> -	union {
> -		union {
> -			uint32_t rss;     /**< RSS hash result if RSS enabled
> */
> -			struct {
> +			uint16_t data_len;        /**< Amount of data in
> segment buffer. */
> +			/** VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_VLAN
> is set. */
> +			uint16_t vlan_tci;
> +
> +			union {
>  				union {
> +					uint32_t rss;     /**< RSS hash result if
> RSS enabled */
>  					struct {
> -						uint16_t hash;
> -						uint16_t id;
> -					};
> -					uint32_t lo;
> -					/**< Second 4 flexible bytes */
> -				};
> -				uint32_t hi;
> -				/**< First 4 flexible bytes or FD ID, dependent
> -				 * on RTE_MBUF_F_RX_FDIR_* flag in ol_flags.
> -				 */
> -			} fdir;	/**< Filter identifier if FDIR enabled */
> -			struct rte_mbuf_sched sched;
> -			/**< Hierarchical scheduler : 8 bytes */
> -			struct {
> -				uint32_t reserved1;
> -				uint16_t reserved2;
> -				uint16_t txq;
> -				/**< The event eth Tx adapter uses this field
> -				 * to store Tx queue id.
> -				 * @see rte_event_eth_tx_adapter_txq_set()
> -				 */
> -			} txadapter; /**< Eventdev ethdev Tx adapter */
> -			uint32_t usr;
> -			/**< User defined tags. See rte_distributor_process()
> */
> -		} hash;                   /**< hash information */
> -	};
> +						union {
> +							struct {
> +								uint16_t hash;
> +								uint16_t id;
> +							};
> +							uint32_t lo;
> +							/**< Second 4 flexible bytes
> */
> +						};
> +						uint32_t hi;
> +						/**< First 4 flexible bytes or FD
> ID, dependent
> +						 * on RTE_MBUF_F_RX_FDIR_* flag in
> ol_flags.
> +						 */
> +					} fdir;	/**< Filter identifier if
> FDIR enabled */
> +					struct rte_mbuf_sched sched;
> +					/**< Hierarchical scheduler : 8 bytes */
> +					struct {
> +						uint32_t reserved1;
> +						uint16_t reserved2;
> +						uint16_t txq;
> +						/**< The event eth Tx adapter uses
> this field
> +						 * to store Tx queue id.
> +						 * @see
> rte_event_eth_tx_adapter_txq_set()
> +						 */
> +					} txadapter; /**< Eventdev ethdev Tx
> adapter */
> +					uint32_t usr;
> +					/**< User defined tags. See
> rte_distributor_process() */
> +				} hash;                   /**< hash information
> */
> +			};
> 
> -	/** Outer VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_QINQ is
> set. */
> -	uint16_t vlan_tci_outer;
> +			/** Outer VLAN TCI (CPU order), valid if
> RTE_MBUF_F_RX_QINQ is set. */
> +			uint16_t vlan_tci_outer;
> 
> -	uint16_t buf_len;         /**< Length of segment buffer. */
> +			uint16_t buf_len;         /**< Length of segment
> buffer. */
> +		};
> +	};
> 
>  	struct rte_mempool *pool; /**< Pool from which mbuf was allocated.
> */
> 
>  	/* second cache line - fields only used in slow path or on TX */
> -	alignas(RTE_CACHE_LINE_MIN_SIZE) RTE_MARKER cacheline1;
> -
> +	alignas(RTE_CACHE_LINE_MIN_SIZE)

I strongly prefer having this alignas() immediately preceding the field it applies to; otherwise it might be overlooked.
I.e. both after #if RTE_IOVA_IN_MBUF and #else, instead of only before #if RTE_IOVA_IN_MBUF.

>  #if RTE_IOVA_IN_MBUF
>  	/**
>  	 * Next segment of scattered packet. Must be NULL in the last
> --
> 1.8.3.1
  

Patch

diff --git a/doc/guides/rel_notes/release_24_03.rst b/doc/guides/rel_notes/release_24_03.rst
index 14826ea..4f18cca 100644
--- a/doc/guides/rel_notes/release_24_03.rst
+++ b/doc/guides/rel_notes/release_24_03.rst
@@ -216,6 +216,8 @@  Removed Items
 
 * acc101: Removed obsolete code for non productized HW variant.
 
+* mbuf: ``RTE_MARKER`` fields ``cacheline0`` and ``cacheline1``
+  have been removed from ``struct rte_mbuf``.
 
 API Changes
 -----------
diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h
index 286b32b..4c4722e 100644
--- a/lib/mbuf/rte_mbuf.h
+++ b/lib/mbuf/rte_mbuf.h
@@ -108,7 +108,7 @@ 
 static inline void
 rte_mbuf_prefetch_part1(struct rte_mbuf *m)
 {
-	rte_prefetch0(&m->cacheline0);
+	rte_prefetch0(m);
 }
 
 /**
@@ -126,7 +126,7 @@ 
 rte_mbuf_prefetch_part2(struct rte_mbuf *m)
 {
 #if RTE_CACHE_LINE_SIZE == 64
-	rte_prefetch0(&m->cacheline1);
+	rte_prefetch0(RTE_PTR_ADD(m, RTE_CACHE_LINE_MIN_SIZE));
 #else
 	RTE_SET_USED(m);
 #endif
diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h
index 9f58076..665213c 100644
--- a/lib/mbuf/rte_mbuf_core.h
+++ b/lib/mbuf/rte_mbuf_core.h
@@ -465,8 +465,6 @@  enum {
  * The generic rte_mbuf, containing a packet mbuf.
  */
 struct __rte_cache_aligned rte_mbuf {
-	RTE_MARKER cacheline0;
-
 	void *buf_addr;           /**< Virtual address of segment buffer. */
 #if RTE_IOVA_IN_MBUF
 	/**
@@ -488,116 +486,130 @@  struct __rte_cache_aligned rte_mbuf {
 #endif
 
 	/* next 8 bytes are initialised on RX descriptor rearm */
-	RTE_MARKER64 rearm_data;
-	uint16_t data_off;
-
-	/**
-	 * Reference counter. Its size should at least equal to the size
-	 * of port field (16 bits), to support zero-copy broadcast.
-	 * It should only be accessed using the following functions:
-	 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
-	 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
-	 * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag.
-	 */
-	RTE_ATOMIC(uint16_t) refcnt;
+	union {
+		uint64_t rearm_data[1];
+		__extension__
+		struct {
+			uint16_t data_off;
+
+			/**
+			 * Reference counter. Its size should at least equal to the size
+			 * of port field (16 bits), to support zero-copy broadcast.
+			 * It should only be accessed using the following functions:
+			 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
+			 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
+			 * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag.
+			 */
+			RTE_ATOMIC(uint16_t) refcnt;
 
-	/**
-	 * Number of segments. Only valid for the first segment of an mbuf
-	 * chain.
-	 */
-	uint16_t nb_segs;
+			/**
+			 * Number of segments. Only valid for the first segment of an mbuf
+			 * chain.
+			 */
+			uint16_t nb_segs;
 
-	/** Input port (16 bits to support more than 256 virtual ports).
-	 * The event eth Tx adapter uses this field to specify the output port.
-	 */
-	uint16_t port;
+			/** Input port (16 bits to support more than 256 virtual ports).
+			 * The event eth Tx adapter uses this field to specify the output port.
+			 */
+			uint16_t port;
+		};
+	};
 
 	uint64_t ol_flags;        /**< Offload features. */
 
 	/* remaining bytes are set on RX when pulling packet from descriptor */
-	RTE_MARKER rx_descriptor_fields1;
-
-	/*
-	 * The packet type, which is the combination of outer/inner L2, L3, L4
-	 * and tunnel types. The packet_type is about data really present in the
-	 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
-	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
-	 * vlan is stripped from the data.
-	 */
 	union {
-		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
+		void *rx_descriptor_fields1[1];
 		__extension__
 		struct {
-			uint8_t l2_type:4;   /**< (Outer) L2 type. */
-			uint8_t l3_type:4;   /**< (Outer) L3 type. */
-			uint8_t l4_type:4;   /**< (Outer) L4 type. */
-			uint8_t tun_type:4;  /**< Tunnel type. */
+			/*
+			 * The packet type, which is the combination of outer/inner L2, L3, L4
+			 * and tunnel types. The packet_type is about data really present in the
+			 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
+			 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
+			 * vlan is stripped from the data.
+			 */
 			union {
-				uint8_t inner_esp_next_proto;
-				/**< ESP next protocol type, valid if
-				 * RTE_PTYPE_TUNNEL_ESP tunnel type is set
-				 * on both Tx and Rx.
-				 */
+				uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
 				__extension__
 				struct {
-					uint8_t inner_l2_type:4;
-					/**< Inner L2 type. */
-					uint8_t inner_l3_type:4;
-					/**< Inner L3 type. */
+					uint8_t l2_type:4;   /**< (Outer) L2 type. */
+					uint8_t l3_type:4;   /**< (Outer) L3 type. */
+					uint8_t l4_type:4;   /**< (Outer) L4 type. */
+					uint8_t tun_type:4;  /**< Tunnel type. */
+					union {
+						uint8_t inner_esp_next_proto;
+						/**< ESP next protocol type, valid if
+						 * RTE_PTYPE_TUNNEL_ESP tunnel type is set
+						 * on both Tx and Rx.
+						 */
+						__extension__
+						struct {
+							uint8_t inner_l2_type:4;
+							/**< Inner L2 type. */
+							uint8_t inner_l3_type:4;
+							/**< Inner L3 type. */
+						};
+					};
+					uint8_t inner_l4_type:4; /**< Inner L4 type. */
 				};
 			};
-			uint8_t inner_l4_type:4; /**< Inner L4 type. */
-		};
-	};
 
-	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
-	uint16_t data_len;        /**< Amount of data in segment buffer. */
-	/** VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_VLAN is set. */
-	uint16_t vlan_tci;
+			/*
+			 * pkt_len is not actually part of rx_descriptor_fields1
+			 * but is included here to account for changes in alignment
+			 * and offset for void * on 32-bit vs 64-bit targets.
+			 */
+			uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
 
-	union {
-		union {
-			uint32_t rss;     /**< RSS hash result if RSS enabled */
-			struct {
+			uint16_t data_len;        /**< Amount of data in segment buffer. */
+			/** VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_VLAN is set. */
+			uint16_t vlan_tci;
+
+			union {
 				union {
+					uint32_t rss;     /**< RSS hash result if RSS enabled */
 					struct {
-						uint16_t hash;
-						uint16_t id;
-					};
-					uint32_t lo;
-					/**< Second 4 flexible bytes */
-				};
-				uint32_t hi;
-				/**< First 4 flexible bytes or FD ID, dependent
-				 * on RTE_MBUF_F_RX_FDIR_* flag in ol_flags.
-				 */
-			} fdir;	/**< Filter identifier if FDIR enabled */
-			struct rte_mbuf_sched sched;
-			/**< Hierarchical scheduler : 8 bytes */
-			struct {
-				uint32_t reserved1;
-				uint16_t reserved2;
-				uint16_t txq;
-				/**< The event eth Tx adapter uses this field
-				 * to store Tx queue id.
-				 * @see rte_event_eth_tx_adapter_txq_set()
-				 */
-			} txadapter; /**< Eventdev ethdev Tx adapter */
-			uint32_t usr;
-			/**< User defined tags. See rte_distributor_process() */
-		} hash;                   /**< hash information */
-	};
+						union {
+							struct {
+								uint16_t hash;
+								uint16_t id;
+							};
+							uint32_t lo;
+							/**< Second 4 flexible bytes */
+						};
+						uint32_t hi;
+						/**< First 4 flexible bytes or FD ID, dependent
+						 * on RTE_MBUF_F_RX_FDIR_* flag in ol_flags.
+						 */
+					} fdir;	/**< Filter identifier if FDIR enabled */
+					struct rte_mbuf_sched sched;
+					/**< Hierarchical scheduler : 8 bytes */
+					struct {
+						uint32_t reserved1;
+						uint16_t reserved2;
+						uint16_t txq;
+						/**< The event eth Tx adapter uses this field
+						 * to store Tx queue id.
+						 * @see rte_event_eth_tx_adapter_txq_set()
+						 */
+					} txadapter; /**< Eventdev ethdev Tx adapter */
+					uint32_t usr;
+					/**< User defined tags. See rte_distributor_process() */
+				} hash;                   /**< hash information */
+			};
 
-	/** Outer VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_QINQ is set. */
-	uint16_t vlan_tci_outer;
+			/** Outer VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_QINQ is set. */
+			uint16_t vlan_tci_outer;
 
-	uint16_t buf_len;         /**< Length of segment buffer. */
+			uint16_t buf_len;         /**< Length of segment buffer. */
+		};
+	};
 
 	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
 
 	/* second cache line - fields only used in slow path or on TX */
-	alignas(RTE_CACHE_LINE_MIN_SIZE) RTE_MARKER cacheline1;
-
+	alignas(RTE_CACHE_LINE_MIN_SIZE)
 #if RTE_IOVA_IN_MBUF
 	/**
 	 * Next segment of scattered packet. Must be NULL in the last