[v2] mbuf: use C11 atomics for refcnt operations

Message ID 1594116633-14554-1-git-send-email-phil.yang@arm.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series [v2] mbuf: use C11 atomics for refcnt operations |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/travis-robot warning Travis build: failed
ci/iol-broadcom-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS

Commit Message

Phil Yang July 7, 2020, 10:10 a.m. UTC
  Use C11 atomics with explicit ordering instead of rte_atomic ops which
enforce unnecessary barriers on aarch64.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
v2:
Fix ABI issue: revert the rte_mbuf_ext_shared_info struct refcnt field
to refcnt_atomic.

 lib/librte_mbuf/rte_mbuf.c      |  1 -
 lib/librte_mbuf/rte_mbuf.h      | 19 ++++++++++---------
 lib/librte_mbuf/rte_mbuf_core.h | 11 +++--------
 3 files changed, 13 insertions(+), 18 deletions(-)
  

Comments

Stephen Hemminger July 7, 2020, 5:13 p.m. UTC | #1
On Tue,  7 Jul 2020 18:10:33 +0800
Phil Yang <phil.yang@arm.com> wrote:

> +	return (uint16_t)(__atomic_add_fetch((int16_t *)&shinfo->refcnt_atomic,
> +

Why do you need so many casts here?
The type of refcnt_atomic is now uint16 after your patch.
  
Phil Yang July 8, 2020, 4:48 a.m. UTC | #2
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Wednesday, July 8, 2020 1:13 AM
> To: Phil Yang <Phil.Yang@arm.com>
> Cc: david.marchand@redhat.com; dev@dpdk.org; drc@linux.vnet.ibm.com;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> olivier.matz@6wind.com; Ruifeng Wang <Ruifeng.Wang@arm.com>; nd
> <nd@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v2] mbuf: use C11 atomics for refcnt
> operations
> 
> On Tue,  7 Jul 2020 18:10:33 +0800
> Phil Yang <phil.yang@arm.com> wrote:
> 
> > +	return (uint16_t)(__atomic_add_fetch((int16_t *)&shinfo-
> >refcnt_atomic,
> > +
> 
> Why do you need so many casts here?
> The type of refcnt_atomic is now uint16 after your patch.

In the existing code, the input parameter type for this API is signed integer. For example:
drivers/net/netvsc/hn_rxtx.c:531
lib/librte_mbuf/rte_mbuf.h:1194

However, the output type of rte_mbuf_ext_refcnt related APIs is not uniform. We use these typecast to consistent with the current API definition.
  
Phil Yang July 8, 2020, 5:11 a.m. UTC | #3
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Phil Yang
> Sent: Tuesday, July 7, 2020 6:11 PM
> To: david.marchand@redhat.com; dev@dpdk.org
> Cc: drc@linux.vnet.ibm.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; olivier.matz@6wind.com; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; nd <nd@arm.com>
> Subject: [dpdk-dev] [PATCH v2] mbuf: use C11 atomics for refcnt operations
> 
> Use C11 atomics with explicit ordering instead of rte_atomic ops which
> enforce unnecessary barriers on aarch64.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
> v2:
> Fix ABI issue: revert the rte_mbuf_ext_shared_info struct refcnt field
> to refcnt_atomic.
> 

<snip>

> diff --git a/lib/librte_mbuf/rte_mbuf_core.h
> b/lib/librte_mbuf/rte_mbuf_core.h
> index 16600f1..806313a 100644
> --- a/lib/librte_mbuf/rte_mbuf_core.h
> +++ b/lib/librte_mbuf/rte_mbuf_core.h
> @@ -18,7 +18,6 @@
> 
>  #include <stdint.h>
>  #include <rte_compat.h>
> -#include <generic/rte_atomic.h>
> 
>  #ifdef __cplusplus
>  extern "C" {
> @@ -495,12 +494,8 @@ struct rte_mbuf {
>  	 * or non-atomic) is controlled by the
> CONFIG_RTE_MBUF_REFCNT_ATOMIC
>  	 * config option.
>  	 */
> -	RTE_STD_C11
> -	union {
> -		rte_atomic16_t refcnt_atomic; /**< Atomically accessed
> refcnt */
> -		/** Non-atomically accessed refcnt */
> -		uint16_t refcnt;
> -	};
> +	uint16_t refcnt;
> +
>  	uint16_t nb_segs;         /**< Number of segments. */
> 
>  	/** Input port (16 bits to support more than 256 virtual ports).
> @@ -679,7 +674,7 @@ typedef void
> (*rte_mbuf_extbuf_free_callback_t)(void *addr, void *opaque);
>  struct rte_mbuf_ext_shared_info {
>  	rte_mbuf_extbuf_free_callback_t free_cb; /**< Free callback
> function */
>  	void *fcb_opaque;                        /**< Free callback argument */
> -	rte_atomic16_t refcnt_atomic;        /**< Atomically accessed refcnt */
> +	uint16_t refcnt_atomic;              /**< Atomically accessed refcnt */

It still causes an ABI check failure in Travis CI on this type change.
I think we need an exception in libabigail.abignore for this. 

Thanks,
Phil
>  };
> 
>  /**< Maximum number of nb_segs allowed. */
> --
> 2.7.4
  
Olivier Matz July 8, 2020, 11:43 a.m. UTC | #4
On Wed, Jul 08, 2020 at 04:48:33AM +0000, Phil Yang wrote:
> > -----Original Message-----
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > Sent: Wednesday, July 8, 2020 1:13 AM
> > To: Phil Yang <Phil.Yang@arm.com>
> > Cc: david.marchand@redhat.com; dev@dpdk.org; drc@linux.vnet.ibm.com;
> > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> > olivier.matz@6wind.com; Ruifeng Wang <Ruifeng.Wang@arm.com>; nd
> > <nd@arm.com>
> > Subject: Re: [dpdk-dev] [PATCH v2] mbuf: use C11 atomics for refcnt
> > operations
> > 
> > On Tue,  7 Jul 2020 18:10:33 +0800
> > Phil Yang <phil.yang@arm.com> wrote:
> > 
> > > +	return (uint16_t)(__atomic_add_fetch((int16_t *)&shinfo-
> > >refcnt_atomic,
> > > +
> > 
> > Why do you need so many casts here?
> > The type of refcnt_atomic is now uint16 after your patch.
> 
> In the existing code, the input parameter type for this API is signed integer. For example:
> drivers/net/netvsc/hn_rxtx.c:531
> lib/librte_mbuf/rte_mbuf.h:1194
> 
> However, the output type of rte_mbuf_ext_refcnt related APIs is not uniform. We use these typecast to consistent with the current API definition.

Would it make sense to cast the increment instead?

I mean:

	return __atomic_add_fetch(&m->refcnt, (uint16_t)value, __ATOMIC_ACQ_REL);

instead of:

	return (uint16_t)(__atomic_add_fetch((int16_t *)&m->refcnt, value,
				__ATOMIC_ACQ_REL));


The same could apply to __rte_pktmbuf_pinned_extbuf_decref() I think:

> -	if (likely(rte_atomic16_add_return
> -			(&shinfo->refcnt_atomic, -1)))
> +	if (likely(__atomic_add_fetch((int *)&shinfo->refcnt_atomic, -1,
> +				     __ATOMIC_ACQ_REL)))

By the way, why was the cast was to (int *) in this case? I think
it can overwrite fields beside refcnt.
  
Olivier Matz July 8, 2020, 11:44 a.m. UTC | #5
Hi,

On Tue, Jul 07, 2020 at 06:10:33PM +0800, Phil Yang wrote:
> Use C11 atomics with explicit ordering instead of rte_atomic ops which
> enforce unnecessary barriers on aarch64.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
> v2:
> Fix ABI issue: revert the rte_mbuf_ext_shared_info struct refcnt field
> to refcnt_atomic.
> 
>  lib/librte_mbuf/rte_mbuf.c      |  1 -
>  lib/librte_mbuf/rte_mbuf.h      | 19 ++++++++++---------
>  lib/librte_mbuf/rte_mbuf_core.h | 11 +++--------
>  3 files changed, 13 insertions(+), 18 deletions(-)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> index ae91ae2..8a456e5 100644
> --- a/lib/librte_mbuf/rte_mbuf.c
> +++ b/lib/librte_mbuf/rte_mbuf.c
> @@ -22,7 +22,6 @@
>  #include <rte_eal.h>
>  #include <rte_per_lcore.h>
>  #include <rte_lcore.h>
> -#include <rte_atomic.h>
>  #include <rte_branch_prediction.h>
>  #include <rte_mempool.h>
>  #include <rte_mbuf.h>
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index f8e492e..4a7a98c 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -37,7 +37,6 @@
>  #include <rte_config.h>
>  #include <rte_mempool.h>
>  #include <rte_memory.h>
> -#include <rte_atomic.h>
>  #include <rte_prefetch.h>
>  #include <rte_branch_prediction.h>
>  #include <rte_byteorder.h>
> @@ -365,7 +364,7 @@ rte_pktmbuf_priv_flags(struct rte_mempool *mp)
>  static inline uint16_t
>  rte_mbuf_refcnt_read(const struct rte_mbuf *m)
>  {
> -	return (uint16_t)(rte_atomic16_read(&m->refcnt_atomic));
> +	return __atomic_load_n(&m->refcnt, __ATOMIC_RELAXED);
>  }
>  
>  /**
> @@ -378,14 +377,15 @@ rte_mbuf_refcnt_read(const struct rte_mbuf *m)
>  static inline void
>  rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
>  {
> -	rte_atomic16_set(&m->refcnt_atomic, (int16_t)new_value);
> +	__atomic_store_n(&m->refcnt, new_value, __ATOMIC_RELAXED);
>  }
>  
>  /* internal */
>  static inline uint16_t
>  __rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
>  {
> -	return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value));
> +	return (uint16_t)(__atomic_add_fetch((int16_t *)&m->refcnt, value,
> +					__ATOMIC_ACQ_REL));
>  }
>  
>  /**
> @@ -466,7 +466,7 @@ rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
>  static inline uint16_t
>  rte_mbuf_ext_refcnt_read(const struct rte_mbuf_ext_shared_info *shinfo)
>  {
> -	return (uint16_t)(rte_atomic16_read(&shinfo->refcnt_atomic));
> +	return __atomic_load_n(&shinfo->refcnt_atomic, __ATOMIC_RELAXED);
>  }
>  
>  /**
> @@ -481,7 +481,7 @@ static inline void
>  rte_mbuf_ext_refcnt_set(struct rte_mbuf_ext_shared_info *shinfo,
>  	uint16_t new_value)
>  {
> -	rte_atomic16_set(&shinfo->refcnt_atomic, (int16_t)new_value);
> +	__atomic_store_n(&shinfo->refcnt_atomic, new_value, __ATOMIC_RELAXED);
>  }
>  
>  /**
> @@ -505,7 +505,8 @@ rte_mbuf_ext_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo,
>  		return (uint16_t)value;
>  	}
>  
> -	return (uint16_t)rte_atomic16_add_return(&shinfo->refcnt_atomic, value);
> +	return (uint16_t)(__atomic_add_fetch((int16_t *)&shinfo->refcnt_atomic,
> +					    value, __ATOMIC_ACQ_REL));
>  }
>  
>  /** Mbuf prefetch */
> @@ -1304,8 +1305,8 @@ static inline int __rte_pktmbuf_pinned_extbuf_decref(struct rte_mbuf *m)
>  	 * Direct usage of add primitive to avoid
>  	 * duplication of comparing with one.
>  	 */
> -	if (likely(rte_atomic16_add_return
> -			(&shinfo->refcnt_atomic, -1)))
> +	if (likely(__atomic_add_fetch((int *)&shinfo->refcnt_atomic, -1,
> +				     __ATOMIC_ACQ_REL)))
>  		return 1;
>  
>  	/* Reinitialize counter before mbuf freeing. */
> diff --git a/lib/librte_mbuf/rte_mbuf_core.h b/lib/librte_mbuf/rte_mbuf_core.h
> index 16600f1..806313a 100644
> --- a/lib/librte_mbuf/rte_mbuf_core.h
> +++ b/lib/librte_mbuf/rte_mbuf_core.h
> @@ -18,7 +18,6 @@
>  
>  #include <stdint.h>
>  #include <rte_compat.h>
> -#include <generic/rte_atomic.h>
>  
>  #ifdef __cplusplus
>  extern "C" {
> @@ -495,12 +494,8 @@ struct rte_mbuf {
>  	 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
>  	 * config option.
>  	 */
> -	RTE_STD_C11
> -	union {
> -		rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
> -		/** Non-atomically accessed refcnt */
> -		uint16_t refcnt;
> -	};
> +	uint16_t refcnt;
> +

It seems this patch does 2 things:
- remove refcnt_atomic
- use C11 atomics

The first change is an API break. I think it should be announced in a deprecation
notice. The one about atomic does not talk about it.

So I suggest to keep refcnt_atomic until next version.


>  	uint16_t nb_segs;         /**< Number of segments. */
>  
>  	/** Input port (16 bits to support more than 256 virtual ports).
> @@ -679,7 +674,7 @@ typedef void (*rte_mbuf_extbuf_free_callback_t)(void *addr, void *opaque);
>  struct rte_mbuf_ext_shared_info {
>  	rte_mbuf_extbuf_free_callback_t free_cb; /**< Free callback function */
>  	void *fcb_opaque;                        /**< Free callback argument */
> -	rte_atomic16_t refcnt_atomic;        /**< Atomically accessed refcnt */
> +	uint16_t refcnt_atomic;              /**< Atomically accessed refcnt */
>  };
>  
>  /**< Maximum number of nb_segs allowed. */
> -- 
> 2.7.4
>
  
Phil Yang July 9, 2020, 9:52 a.m. UTC | #6
> -----Original Message-----
> From: Olivier Matz <olivier.matz@6wind.com>
> Sent: Wednesday, July 8, 2020 7:43 PM
> To: Phil Yang <Phil.Yang@arm.com>
> Cc: Stephen Hemminger <stephen@networkplumber.org>;
> david.marchand@redhat.com; dev@dpdk.org; drc@linux.vnet.ibm.com;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; nd <nd@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v2] mbuf: use C11 atomics for refcnt
> operations
> 
> On Wed, Jul 08, 2020 at 04:48:33AM +0000, Phil Yang wrote:
> > > -----Original Message-----
> > > From: Stephen Hemminger <stephen@networkplumber.org>
> > > Sent: Wednesday, July 8, 2020 1:13 AM
> > > To: Phil Yang <Phil.Yang@arm.com>
> > > Cc: david.marchand@redhat.com; dev@dpdk.org;
> drc@linux.vnet.ibm.com;
> > > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> > > olivier.matz@6wind.com; Ruifeng Wang <Ruifeng.Wang@arm.com>; nd
> > > <nd@arm.com>
> > > Subject: Re: [dpdk-dev] [PATCH v2] mbuf: use C11 atomics for refcnt
> > > operations
> > >
> > > On Tue,  7 Jul 2020 18:10:33 +0800
> > > Phil Yang <phil.yang@arm.com> wrote:
> > >
> > > > +	return (uint16_t)(__atomic_add_fetch((int16_t *)&shinfo-
> > > >refcnt_atomic,
> > > > +
> > >
> > > Why do you need so many casts here?
> > > The type of refcnt_atomic is now uint16 after your patch.
> >
> > In the existing code, the input parameter type for this API is signed integer.
> For example:
> > drivers/net/netvsc/hn_rxtx.c:531
> > lib/librte_mbuf/rte_mbuf.h:1194
> >
> > However, the output type of rte_mbuf_ext_refcnt related APIs is not
> uniform. We use these typecast to consistent with the current API definition.
> 
> Would it make sense to cast the increment instead?

Yes. It is better.
Thanks.

> 
> I mean:
> 
> 	return __atomic_add_fetch(&m->refcnt, (uint16_t)value,
> __ATOMIC_ACQ_REL);
> 
> instead of:
> 
> 	return (uint16_t)(__atomic_add_fetch((int16_t *)&m->refcnt, value,
> 				__ATOMIC_ACQ_REL));
> 
> 
> The same could apply to __rte_pktmbuf_pinned_extbuf_decref() I think:
> 
> > -	if (likely(rte_atomic16_add_return
> > -			(&shinfo->refcnt_atomic, -1)))
> > +	if (likely(__atomic_add_fetch((int *)&shinfo->refcnt_atomic, -1,
> > +				     __ATOMIC_ACQ_REL)))
> 
> By the way, why was the cast was to (int *) in this case? I think
> it can overwrite fields beside refcnt.
Fixed in the next version.

Thanks,
Phil
  
Phil Yang July 9, 2020, 10 a.m. UTC | #7
> -----Original Message-----
> From: Olivier Matz <olivier.matz@6wind.com>
> Sent: Wednesday, July 8, 2020 7:44 PM
> To: Phil Yang <Phil.Yang@arm.com>
> Cc: david.marchand@redhat.com; dev@dpdk.org; drc@linux.vnet.ibm.com;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; nd <nd@arm.com>
> Subject: Re: [PATCH v2] mbuf: use C11 atomics for refcnt operations
> 
> Hi,
> 
> On Tue, Jul 07, 2020 at 06:10:33PM +0800, Phil Yang wrote:
> > Use C11 atomics with explicit ordering instead of rte_atomic ops which
> > enforce unnecessary barriers on aarch64.
> >
> > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> > v2:
> > Fix ABI issue: revert the rte_mbuf_ext_shared_info struct refcnt field
> > to refcnt_atomic.
> >
> >  lib/librte_mbuf/rte_mbuf.c      |  1 -
> >  lib/librte_mbuf/rte_mbuf.h      | 19 ++++++++++---------
> >  lib/librte_mbuf/rte_mbuf_core.h | 11 +++--------
> >  3 files changed, 13 insertions(+), 18 deletions(-)
> >

<snip>

> 
> It seems this patch does 2 things:
> - remove refcnt_atomic
> - use C11 atomics
> 
> The first change is an API break. I think it should be announced in a
> deprecation
> notice. The one about atomic does not talk about it.
> 
> So I suggest to keep refcnt_atomic until next version.

Agreed.
I did a local test, this approach doesn't have any ABI breakage issue.
I will update in the next version. 

Thanks,
Phil

> 
> 
> >  	uint16_t nb_segs;         /**< Number of segments. */
> >
> >  	/** Input port (16 bits to support more than 256 virtual ports).
> > @@ -679,7 +674,7 @@ typedef void
> (*rte_mbuf_extbuf_free_callback_t)(void *addr, void *opaque);
> >  struct rte_mbuf_ext_shared_info {
> >  	rte_mbuf_extbuf_free_callback_t free_cb; /**< Free callback
> function */
> >  	void *fcb_opaque;                        /**< Free callback argument */
> > -	rte_atomic16_t refcnt_atomic;        /**< Atomically accessed refcnt */
> > +	uint16_t refcnt_atomic;              /**< Atomically accessed refcnt */
> >  };
> >
> >  /**< Maximum number of nb_segs allowed. */
> > --
> > 2.7.4
> >
  

Patch

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index ae91ae2..8a456e5 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -22,7 +22,6 @@ 
 #include <rte_eal.h>
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
-#include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_mempool.h>
 #include <rte_mbuf.h>
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f8e492e..4a7a98c 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -37,7 +37,6 @@ 
 #include <rte_config.h>
 #include <rte_mempool.h>
 #include <rte_memory.h>
-#include <rte_atomic.h>
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
 #include <rte_byteorder.h>
@@ -365,7 +364,7 @@  rte_pktmbuf_priv_flags(struct rte_mempool *mp)
 static inline uint16_t
 rte_mbuf_refcnt_read(const struct rte_mbuf *m)
 {
-	return (uint16_t)(rte_atomic16_read(&m->refcnt_atomic));
+	return __atomic_load_n(&m->refcnt, __ATOMIC_RELAXED);
 }
 
 /**
@@ -378,14 +377,15 @@  rte_mbuf_refcnt_read(const struct rte_mbuf *m)
 static inline void
 rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
 {
-	rte_atomic16_set(&m->refcnt_atomic, (int16_t)new_value);
+	__atomic_store_n(&m->refcnt, new_value, __ATOMIC_RELAXED);
 }
 
 /* internal */
 static inline uint16_t
 __rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
 {
-	return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value));
+	return (uint16_t)(__atomic_add_fetch((int16_t *)&m->refcnt, value,
+					__ATOMIC_ACQ_REL));
 }
 
 /**
@@ -466,7 +466,7 @@  rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
 static inline uint16_t
 rte_mbuf_ext_refcnt_read(const struct rte_mbuf_ext_shared_info *shinfo)
 {
-	return (uint16_t)(rte_atomic16_read(&shinfo->refcnt_atomic));
+	return __atomic_load_n(&shinfo->refcnt_atomic, __ATOMIC_RELAXED);
 }
 
 /**
@@ -481,7 +481,7 @@  static inline void
 rte_mbuf_ext_refcnt_set(struct rte_mbuf_ext_shared_info *shinfo,
 	uint16_t new_value)
 {
-	rte_atomic16_set(&shinfo->refcnt_atomic, (int16_t)new_value);
+	__atomic_store_n(&shinfo->refcnt_atomic, new_value, __ATOMIC_RELAXED);
 }
 
 /**
@@ -505,7 +505,8 @@  rte_mbuf_ext_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo,
 		return (uint16_t)value;
 	}
 
-	return (uint16_t)rte_atomic16_add_return(&shinfo->refcnt_atomic, value);
+	return (uint16_t)(__atomic_add_fetch((int16_t *)&shinfo->refcnt_atomic,
+					    value, __ATOMIC_ACQ_REL));
 }
 
 /** Mbuf prefetch */
@@ -1304,8 +1305,8 @@  static inline int __rte_pktmbuf_pinned_extbuf_decref(struct rte_mbuf *m)
 	 * Direct usage of add primitive to avoid
 	 * duplication of comparing with one.
 	 */
-	if (likely(rte_atomic16_add_return
-			(&shinfo->refcnt_atomic, -1)))
+	if (likely(__atomic_add_fetch((int *)&shinfo->refcnt_atomic, -1,
+				     __ATOMIC_ACQ_REL)))
 		return 1;
 
 	/* Reinitialize counter before mbuf freeing. */
diff --git a/lib/librte_mbuf/rte_mbuf_core.h b/lib/librte_mbuf/rte_mbuf_core.h
index 16600f1..806313a 100644
--- a/lib/librte_mbuf/rte_mbuf_core.h
+++ b/lib/librte_mbuf/rte_mbuf_core.h
@@ -18,7 +18,6 @@ 
 
 #include <stdint.h>
 #include <rte_compat.h>
-#include <generic/rte_atomic.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -495,12 +494,8 @@  struct rte_mbuf {
 	 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
 	 * config option.
 	 */
-	RTE_STD_C11
-	union {
-		rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
-		/** Non-atomically accessed refcnt */
-		uint16_t refcnt;
-	};
+	uint16_t refcnt;
+
 	uint16_t nb_segs;         /**< Number of segments. */
 
 	/** Input port (16 bits to support more than 256 virtual ports).
@@ -679,7 +674,7 @@  typedef void (*rte_mbuf_extbuf_free_callback_t)(void *addr, void *opaque);
 struct rte_mbuf_ext_shared_info {
 	rte_mbuf_extbuf_free_callback_t free_cb; /**< Free callback function */
 	void *fcb_opaque;                        /**< Free callback argument */
-	rte_atomic16_t refcnt_atomic;        /**< Atomically accessed refcnt */
+	uint16_t refcnt_atomic;              /**< Atomically accessed refcnt */
 };
 
 /**< Maximum number of nb_segs allowed. */