[v2,09/19] rcu: use rte optional stdatomic API

Message ID 1697574677-16578-10-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series use rte optional stdatomic API |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Tyler Retzlaff Oct. 17, 2023, 8:31 p.m. UTC
  Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional stdatomic API

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/rcu/rte_rcu_qsbr.c | 48 +++++++++++++++++------------------
 lib/rcu/rte_rcu_qsbr.h | 68 +++++++++++++++++++++++++-------------------------
 2 files changed, 58 insertions(+), 58 deletions(-)
  

Comments

Ruifeng Wang Oct. 25, 2023, 9:41 a.m. UTC | #1
> -----Original Message-----
> From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Sent: Wednesday, October 18, 2023 4:31 AM
> To: dev@dpdk.org
> Cc: Akhil Goyal <gakhil@marvell.com>; Anatoly Burakov <anatoly.burakov@intel.com>; Andrew
> Rybchenko <andrew.rybchenko@oktetlabs.ru>; Bruce Richardson <bruce.richardson@intel.com>;
> Chenbo Xia <chenbo.xia@intel.com>; Ciara Power <ciara.power@intel.com>; David Christensen
> <drc@linux.vnet.ibm.com>; David Hunt <david.hunt@intel.com>; Dmitry Kozlyuk
> <dmitry.kozliuk@gmail.com>; Dmitry Malloy <dmitrym@microsoft.com>; Elena Agostini
> <eagostini@nvidia.com>; Erik Gabriel Carrillo <erik.g.carrillo@intel.com>; Fan Zhang
> <fanzhang.oss@gmail.com>; Ferruh Yigit <ferruh.yigit@amd.com>; Harman Kalra
> <hkalra@marvell.com>; Harry van Haaren <harry.van.haaren@intel.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; jerinj@marvell.com; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>; Matan Azrad <matan@nvidia.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; Narcisa Ana Maria Vasile <navasile@linux.microsoft.com>;
> Nicolas Chautru <nicolas.chautru@intel.com>; Olivier Matz <olivier.matz@6wind.com>; Ori
> Kam <orika@nvidia.com>; Pallavi Kadam <pallavi.kadam@intel.com>; Pavan Nikhilesh
> <pbhagavatula@marvell.com>; Reshma Pattan <reshma.pattan@intel.com>; Sameh Gobriel
> <sameh.gobriel@intel.com>; Shijith Thotton <sthotton@marvell.com>; Sivaprasad Tummala
> <sivaprasad.tummala@amd.com>; Stephen Hemminger <stephen@networkplumber.org>; Suanming Mou
> <suanmingm@nvidia.com>; Sunil Kumar Kori <skori@marvell.com>; thomas@monjalon.net;
> Viacheslav Ovsiienko <viacheslavo@nvidia.com>; Vladimir Medvedkin
> <vladimir.medvedkin@intel.com>; Yipeng Wang <yipeng1.wang@intel.com>; Tyler Retzlaff
> <roretzla@linux.microsoft.com>
> Subject: [PATCH v2 09/19] rcu: use rte optional stdatomic API
> 
> Replace the use of gcc builtin __atomic_xxx intrinsics with corresponding rte_atomic_xxx
> optional stdatomic API
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  lib/rcu/rte_rcu_qsbr.c | 48 +++++++++++++++++------------------
>  lib/rcu/rte_rcu_qsbr.h | 68 +++++++++++++++++++++++++-------------------------
>  2 files changed, 58 insertions(+), 58 deletions(-)
> 
> diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c index 17be93e..4dc7714 100644
> --- a/lib/rcu/rte_rcu_qsbr.c
> +++ b/lib/rcu/rte_rcu_qsbr.c
> @@ -102,21 +102,21 @@
>  	 * go out of sync. Hence, additional checks are required.
>  	 */
>  	/* Check if the thread is already registered */
> -	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> -					__ATOMIC_RELAXED);
> +	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> +					rte_memory_order_relaxed);
>  	if (old_bmap & 1UL << id)
>  		return 0;
> 
>  	do {
>  		new_bmap = old_bmap | (1UL << id);
> -		success = __atomic_compare_exchange(
> +		success = rte_atomic_compare_exchange_strong_explicit(
>  					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> -					&old_bmap, &new_bmap, 0,
> -					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
> +					&old_bmap, new_bmap,
> +					rte_memory_order_release, rte_memory_order_relaxed);
> 
>  		if (success)
> -			__atomic_fetch_add(&v->num_threads,
> -						1, __ATOMIC_RELAXED);
> +			rte_atomic_fetch_add_explicit(&v->num_threads,
> +						1, rte_memory_order_relaxed);
>  		else if (old_bmap & (1UL << id))
>  			/* Someone else registered this thread.
>  			 * Counter should not be incremented.
> @@ -154,8 +154,8 @@
>  	 * go out of sync. Hence, additional checks are required.
>  	 */
>  	/* Check if the thread is already unregistered */
> -	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> -					__ATOMIC_RELAXED);
> +	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> +					rte_memory_order_relaxed);
>  	if (!(old_bmap & (1UL << id)))
>  		return 0;
> 
> @@ -165,14 +165,14 @@
>  		 * completed before removal of the thread from the list of
>  		 * reporting threads.
>  		 */
> -		success = __atomic_compare_exchange(
> +		success = rte_atomic_compare_exchange_strong_explicit(
>  					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> -					&old_bmap, &new_bmap, 0,
> -					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
> +					&old_bmap, new_bmap,
> +					rte_memory_order_release, rte_memory_order_relaxed);
> 
>  		if (success)
> -			__atomic_fetch_sub(&v->num_threads,
> -						1, __ATOMIC_RELAXED);
> +			rte_atomic_fetch_sub_explicit(&v->num_threads,
> +						1, rte_memory_order_relaxed);
>  		else if (!(old_bmap & (1UL << id)))
>  			/* Someone else unregistered this thread.
>  			 * Counter should not be incremented.
> @@ -227,8 +227,8 @@
> 
>  	fprintf(f, "  Registered thread IDs = ");
>  	for (i = 0; i < v->num_elems; i++) {
> -		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> -					__ATOMIC_ACQUIRE);
> +		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> +					rte_memory_order_acquire);
>  		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
>  		while (bmap) {
>  			t = __builtin_ctzl(bmap);
> @@ -241,26 +241,26 @@
>  	fprintf(f, "\n");
> 
>  	fprintf(f, "  Token = %" PRIu64 "\n",
> -			__atomic_load_n(&v->token, __ATOMIC_ACQUIRE));
> +			rte_atomic_load_explicit(&v->token, rte_memory_order_acquire));
> 
>  	fprintf(f, "  Least Acknowledged Token = %" PRIu64 "\n",
> -			__atomic_load_n(&v->acked_token, __ATOMIC_ACQUIRE));
> +			rte_atomic_load_explicit(&v->acked_token,
> +rte_memory_order_acquire));
> 
>  	fprintf(f, "Quiescent State Counts for readers:\n");
>  	for (i = 0; i < v->num_elems; i++) {
> -		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> -					__ATOMIC_ACQUIRE);
> +		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> +					rte_memory_order_acquire);
>  		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
>  		while (bmap) {
>  			t = __builtin_ctzl(bmap);
>  			fprintf(f, "thread ID = %u, count = %" PRIu64 ", lock count = %u\n",
>  				id + t,
> -				__atomic_load_n(
> +				rte_atomic_load_explicit(
>  					&v->qsbr_cnt[id + t].cnt,
> -					__ATOMIC_RELAXED),
> -				__atomic_load_n(
> +					rte_memory_order_relaxed),
> +				rte_atomic_load_explicit(
>  					&v->qsbr_cnt[id + t].lock_cnt,
> -					__ATOMIC_RELAXED));
> +					rte_memory_order_relaxed));
>  			bmap &= ~(1UL << t);
>  		}
>  	}
> diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h index 87e1b55..9f4aed2 100644
> --- a/lib/rcu/rte_rcu_qsbr.h
> +++ b/lib/rcu/rte_rcu_qsbr.h
> @@ -63,11 +63,11 @@
>   * Given thread id needs to be converted to index into the array and
>   * the id within the array element.
>   */
> -#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(uint64_t) * 8)
> +#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(RTE_ATOMIC(uint64_t)) *
> +8)
>  #define __RTE_QSBR_THRID_ARRAY_SIZE(max_threads) \
>  	RTE_ALIGN(RTE_ALIGN_MUL_CEIL(max_threads, \
>  		__RTE_QSBR_THRID_ARRAY_ELM_SIZE) >> 3, RTE_CACHE_LINE_SIZE) -#define
> __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t *) \
> +#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t __rte_atomic *) \

Is it equivalent to ((RTE_ATOMIC(uint64_t) *)?

>  	((struct rte_rcu_qsbr_cnt *)(v + 1) + v->max_threads) + i)  #define
> __RTE_QSBR_THRID_INDEX_SHIFT 6  #define __RTE_QSBR_THRID_MASK 0x3f @@ -75,13 +75,13 @@
> 

<snip>
  
Tyler Retzlaff Oct. 25, 2023, 10:38 p.m. UTC | #2
On Wed, Oct 25, 2023 at 09:41:22AM +0000, Ruifeng Wang wrote:
> > -----Original Message-----
> > From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > Sent: Wednesday, October 18, 2023 4:31 AM
> > To: dev@dpdk.org
> > Cc: Akhil Goyal <gakhil@marvell.com>; Anatoly Burakov <anatoly.burakov@intel.com>; Andrew
> > Rybchenko <andrew.rybchenko@oktetlabs.ru>; Bruce Richardson <bruce.richardson@intel.com>;
> > Chenbo Xia <chenbo.xia@intel.com>; Ciara Power <ciara.power@intel.com>; David Christensen
> > <drc@linux.vnet.ibm.com>; David Hunt <david.hunt@intel.com>; Dmitry Kozlyuk
> > <dmitry.kozliuk@gmail.com>; Dmitry Malloy <dmitrym@microsoft.com>; Elena Agostini
> > <eagostini@nvidia.com>; Erik Gabriel Carrillo <erik.g.carrillo@intel.com>; Fan Zhang
> > <fanzhang.oss@gmail.com>; Ferruh Yigit <ferruh.yigit@amd.com>; Harman Kalra
> > <hkalra@marvell.com>; Harry van Haaren <harry.van.haaren@intel.com>; Honnappa Nagarahalli
> > <Honnappa.Nagarahalli@arm.com>; jerinj@marvell.com; Konstantin Ananyev
> > <konstantin.v.ananyev@yandex.ru>; Matan Azrad <matan@nvidia.com>; Maxime Coquelin
> > <maxime.coquelin@redhat.com>; Narcisa Ana Maria Vasile <navasile@linux.microsoft.com>;
> > Nicolas Chautru <nicolas.chautru@intel.com>; Olivier Matz <olivier.matz@6wind.com>; Ori
> > Kam <orika@nvidia.com>; Pallavi Kadam <pallavi.kadam@intel.com>; Pavan Nikhilesh
> > <pbhagavatula@marvell.com>; Reshma Pattan <reshma.pattan@intel.com>; Sameh Gobriel
> > <sameh.gobriel@intel.com>; Shijith Thotton <sthotton@marvell.com>; Sivaprasad Tummala
> > <sivaprasad.tummala@amd.com>; Stephen Hemminger <stephen@networkplumber.org>; Suanming Mou
> > <suanmingm@nvidia.com>; Sunil Kumar Kori <skori@marvell.com>; thomas@monjalon.net;
> > Viacheslav Ovsiienko <viacheslavo@nvidia.com>; Vladimir Medvedkin
> > <vladimir.medvedkin@intel.com>; Yipeng Wang <yipeng1.wang@intel.com>; Tyler Retzlaff
> > <roretzla@linux.microsoft.com>
> > Subject: [PATCH v2 09/19] rcu: use rte optional stdatomic API
> > 
> > Replace the use of gcc builtin __atomic_xxx intrinsics with corresponding rte_atomic_xxx
> > optional stdatomic API
> > 
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
> >  lib/rcu/rte_rcu_qsbr.c | 48 +++++++++++++++++------------------
> >  lib/rcu/rte_rcu_qsbr.h | 68 +++++++++++++++++++++++++-------------------------
> >  2 files changed, 58 insertions(+), 58 deletions(-)
> > 
> > diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c index 17be93e..4dc7714 100644
> > --- a/lib/rcu/rte_rcu_qsbr.c
> > +++ b/lib/rcu/rte_rcu_qsbr.c
> > @@ -102,21 +102,21 @@
> >  	 * go out of sync. Hence, additional checks are required.
> >  	 */
> >  	/* Check if the thread is already registered */
> > -	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > -					__ATOMIC_RELAXED);
> > +	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > +					rte_memory_order_relaxed);
> >  	if (old_bmap & 1UL << id)
> >  		return 0;
> > 
> >  	do {
> >  		new_bmap = old_bmap | (1UL << id);
> > -		success = __atomic_compare_exchange(
> > +		success = rte_atomic_compare_exchange_strong_explicit(
> >  					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > -					&old_bmap, &new_bmap, 0,
> > -					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
> > +					&old_bmap, new_bmap,
> > +					rte_memory_order_release, rte_memory_order_relaxed);
> > 
> >  		if (success)
> > -			__atomic_fetch_add(&v->num_threads,
> > -						1, __ATOMIC_RELAXED);
> > +			rte_atomic_fetch_add_explicit(&v->num_threads,
> > +						1, rte_memory_order_relaxed);
> >  		else if (old_bmap & (1UL << id))
> >  			/* Someone else registered this thread.
> >  			 * Counter should not be incremented.
> > @@ -154,8 +154,8 @@
> >  	 * go out of sync. Hence, additional checks are required.
> >  	 */
> >  	/* Check if the thread is already unregistered */
> > -	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > -					__ATOMIC_RELAXED);
> > +	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > +					rte_memory_order_relaxed);
> >  	if (!(old_bmap & (1UL << id)))
> >  		return 0;
> > 
> > @@ -165,14 +165,14 @@
> >  		 * completed before removal of the thread from the list of
> >  		 * reporting threads.
> >  		 */
> > -		success = __atomic_compare_exchange(
> > +		success = rte_atomic_compare_exchange_strong_explicit(
> >  					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > -					&old_bmap, &new_bmap, 0,
> > -					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
> > +					&old_bmap, new_bmap,
> > +					rte_memory_order_release, rte_memory_order_relaxed);
> > 
> >  		if (success)
> > -			__atomic_fetch_sub(&v->num_threads,
> > -						1, __ATOMIC_RELAXED);
> > +			rte_atomic_fetch_sub_explicit(&v->num_threads,
> > +						1, rte_memory_order_relaxed);
> >  		else if (!(old_bmap & (1UL << id)))
> >  			/* Someone else unregistered this thread.
> >  			 * Counter should not be incremented.
> > @@ -227,8 +227,8 @@
> > 
> >  	fprintf(f, "  Registered thread IDs = ");
> >  	for (i = 0; i < v->num_elems; i++) {
> > -		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > -					__ATOMIC_ACQUIRE);
> > +		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > +					rte_memory_order_acquire);
> >  		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
> >  		while (bmap) {
> >  			t = __builtin_ctzl(bmap);
> > @@ -241,26 +241,26 @@
> >  	fprintf(f, "\n");
> > 
> >  	fprintf(f, "  Token = %" PRIu64 "\n",
> > -			__atomic_load_n(&v->token, __ATOMIC_ACQUIRE));
> > +			rte_atomic_load_explicit(&v->token, rte_memory_order_acquire));
> > 
> >  	fprintf(f, "  Least Acknowledged Token = %" PRIu64 "\n",
> > -			__atomic_load_n(&v->acked_token, __ATOMIC_ACQUIRE));
> > +			rte_atomic_load_explicit(&v->acked_token,
> > +rte_memory_order_acquire));
> > 
> >  	fprintf(f, "Quiescent State Counts for readers:\n");
> >  	for (i = 0; i < v->num_elems; i++) {
> > -		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > -					__ATOMIC_ACQUIRE);
> > +		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > +					rte_memory_order_acquire);
> >  		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
> >  		while (bmap) {
> >  			t = __builtin_ctzl(bmap);
> >  			fprintf(f, "thread ID = %u, count = %" PRIu64 ", lock count = %u\n",
> >  				id + t,
> > -				__atomic_load_n(
> > +				rte_atomic_load_explicit(
> >  					&v->qsbr_cnt[id + t].cnt,
> > -					__ATOMIC_RELAXED),
> > -				__atomic_load_n(
> > +					rte_memory_order_relaxed),
> > +				rte_atomic_load_explicit(
> >  					&v->qsbr_cnt[id + t].lock_cnt,
> > -					__ATOMIC_RELAXED));
> > +					rte_memory_order_relaxed));
> >  			bmap &= ~(1UL << t);
> >  		}
> >  	}
> > diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h index 87e1b55..9f4aed2 100644
> > --- a/lib/rcu/rte_rcu_qsbr.h
> > +++ b/lib/rcu/rte_rcu_qsbr.h
> > @@ -63,11 +63,11 @@
> >   * Given thread id needs to be converted to index into the array and
> >   * the id within the array element.
> >   */
> > -#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(uint64_t) * 8)
> > +#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(RTE_ATOMIC(uint64_t)) *
> > +8)
> >  #define __RTE_QSBR_THRID_ARRAY_SIZE(max_threads) \
> >  	RTE_ALIGN(RTE_ALIGN_MUL_CEIL(max_threads, \
> >  		__RTE_QSBR_THRID_ARRAY_ELM_SIZE) >> 3, RTE_CACHE_LINE_SIZE) -#define
> > __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t *) \
> > +#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t __rte_atomic *) \
> 
> Is it equivalent to ((RTE_ATOMIC(uint64_t) *)?

i'm not sure if you're asking about the resultant type of the expression
or not?

in this context we aren't specifying an atomic type but rather adding
the atomic qualifier to what should already be a variable that has an
atomic specified type with a cast which is why we use __rte_atomic.

> 
> >  	((struct rte_rcu_qsbr_cnt *)(v + 1) + v->max_threads) + i)  #define
> > __RTE_QSBR_THRID_INDEX_SHIFT 6  #define __RTE_QSBR_THRID_MASK 0x3f @@ -75,13 +75,13 @@
> > 
> 
> <snip>
  
Ruifeng Wang Oct. 26, 2023, 4:24 a.m. UTC | #3
> -----Original Message-----
> From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Sent: Thursday, October 26, 2023 6:38 AM
> To: Ruifeng Wang <Ruifeng.Wang@arm.com>
> Cc: dev@dpdk.org; Akhil Goyal <gakhil@marvell.com>; Anatoly Burakov
> <anatoly.burakov@intel.com>; Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>; Bruce
> Richardson <bruce.richardson@intel.com>; Chenbo Xia <chenbo.xia@intel.com>; Ciara Power
> <ciara.power@intel.com>; David Christensen <drc@linux.vnet.ibm.com>; David Hunt
> <david.hunt@intel.com>; Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>; Dmitry Malloy
> <dmitrym@microsoft.com>; Elena Agostini <eagostini@nvidia.com>; Erik Gabriel Carrillo
> <erik.g.carrillo@intel.com>; Fan Zhang <fanzhang.oss@gmail.com>; Ferruh Yigit
> <ferruh.yigit@amd.com>; Harman Kalra <hkalra@marvell.com>; Harry van Haaren
> <harry.van.haaren@intel.com>; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> jerinj@marvell.com; Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>; Matan Azrad
> <matan@nvidia.com>; Maxime Coquelin <maxime.coquelin@redhat.com>; Narcisa Ana Maria Vasile
> <navasile@linux.microsoft.com>; Nicolas Chautru <nicolas.chautru@intel.com>; Olivier Matz
> <olivier.matz@6wind.com>; Ori Kam <orika@nvidia.com>; Pallavi Kadam
> <pallavi.kadam@intel.com>; Pavan Nikhilesh <pbhagavatula@marvell.com>; Reshma Pattan
> <reshma.pattan@intel.com>; Sameh Gobriel <sameh.gobriel@intel.com>; Shijith Thotton
> <sthotton@marvell.com>; Sivaprasad Tummala <sivaprasad.tummala@amd.com>; Stephen Hemminger
> <stephen@networkplumber.org>; Suanming Mou <suanmingm@nvidia.com>; Sunil Kumar Kori
> <skori@marvell.com>; thomas@monjalon.net; Viacheslav Ovsiienko <viacheslavo@nvidia.com>;
> Vladimir Medvedkin <vladimir.medvedkin@intel.com>; Yipeng Wang <yipeng1.wang@intel.com>;
> nd <nd@arm.com>
> Subject: Re: [PATCH v2 09/19] rcu: use rte optional stdatomic API
> 
> On Wed, Oct 25, 2023 at 09:41:22AM +0000, Ruifeng Wang wrote:
> > > -----Original Message-----
> > > From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > Sent: Wednesday, October 18, 2023 4:31 AM
> > > To: dev@dpdk.org
> > > Cc: Akhil Goyal <gakhil@marvell.com>; Anatoly Burakov
> > > <anatoly.burakov@intel.com>; Andrew Rybchenko
> > > <andrew.rybchenko@oktetlabs.ru>; Bruce Richardson
> > > <bruce.richardson@intel.com>; Chenbo Xia <chenbo.xia@intel.com>;
> > > Ciara Power <ciara.power@intel.com>; David Christensen
> > > <drc@linux.vnet.ibm.com>; David Hunt <david.hunt@intel.com>; Dmitry
> > > Kozlyuk <dmitry.kozliuk@gmail.com>; Dmitry Malloy
> > > <dmitrym@microsoft.com>; Elena Agostini <eagostini@nvidia.com>; Erik
> > > Gabriel Carrillo <erik.g.carrillo@intel.com>; Fan Zhang
> > > <fanzhang.oss@gmail.com>; Ferruh Yigit <ferruh.yigit@amd.com>;
> > > Harman Kalra <hkalra@marvell.com>; Harry van Haaren
> > > <harry.van.haaren@intel.com>; Honnappa Nagarahalli
> > > <Honnappa.Nagarahalli@arm.com>; jerinj@marvell.com; Konstantin
> > > Ananyev <konstantin.v.ananyev@yandex.ru>; Matan Azrad
> > > <matan@nvidia.com>; Maxime Coquelin <maxime.coquelin@redhat.com>;
> > > Narcisa Ana Maria Vasile <navasile@linux.microsoft.com>; Nicolas
> > > Chautru <nicolas.chautru@intel.com>; Olivier Matz
> > > <olivier.matz@6wind.com>; Ori Kam <orika@nvidia.com>; Pallavi Kadam
> > > <pallavi.kadam@intel.com>; Pavan Nikhilesh
> > > <pbhagavatula@marvell.com>; Reshma Pattan <reshma.pattan@intel.com>;
> > > Sameh Gobriel <sameh.gobriel@intel.com>; Shijith Thotton
> > > <sthotton@marvell.com>; Sivaprasad Tummala
> > > <sivaprasad.tummala@amd.com>; Stephen Hemminger
> > > <stephen@networkplumber.org>; Suanming Mou <suanmingm@nvidia.com>;
> > > Sunil Kumar Kori <skori@marvell.com>; thomas@monjalon.net;
> > > Viacheslav Ovsiienko <viacheslavo@nvidia.com>; Vladimir Medvedkin
> > > <vladimir.medvedkin@intel.com>; Yipeng Wang
> > > <yipeng1.wang@intel.com>; Tyler Retzlaff
> > > <roretzla@linux.microsoft.com>
> > > Subject: [PATCH v2 09/19] rcu: use rte optional stdatomic API
> > >
> > > Replace the use of gcc builtin __atomic_xxx intrinsics with
> > > corresponding rte_atomic_xxx optional stdatomic API
> > >
> > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > ---
> > >  lib/rcu/rte_rcu_qsbr.c | 48 +++++++++++++++++------------------
> > >  lib/rcu/rte_rcu_qsbr.h | 68
> > > +++++++++++++++++++++++++-------------------------
> > >  2 files changed, 58 insertions(+), 58 deletions(-)
> > >
> > > diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c index
> > > 17be93e..4dc7714 100644
> > > --- a/lib/rcu/rte_rcu_qsbr.c
> > > +++ b/lib/rcu/rte_rcu_qsbr.c
> > > @@ -102,21 +102,21 @@
> > >  	 * go out of sync. Hence, additional checks are required.
> > >  	 */
> > >  	/* Check if the thread is already registered */
> > > -	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > -					__ATOMIC_RELAXED);
> > > +	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > +					rte_memory_order_relaxed);
> > >  	if (old_bmap & 1UL << id)
> > >  		return 0;
> > >
> > >  	do {
> > >  		new_bmap = old_bmap | (1UL << id);
> > > -		success = __atomic_compare_exchange(
> > > +		success = rte_atomic_compare_exchange_strong_explicit(
> > >  					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > -					&old_bmap, &new_bmap, 0,
> > > -					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
> > > +					&old_bmap, new_bmap,
> > > +					rte_memory_order_release, rte_memory_order_relaxed);
> > >
> > >  		if (success)
> > > -			__atomic_fetch_add(&v->num_threads,
> > > -						1, __ATOMIC_RELAXED);
> > > +			rte_atomic_fetch_add_explicit(&v->num_threads,
> > > +						1, rte_memory_order_relaxed);
> > >  		else if (old_bmap & (1UL << id))
> > >  			/* Someone else registered this thread.
> > >  			 * Counter should not be incremented.
> > > @@ -154,8 +154,8 @@
> > >  	 * go out of sync. Hence, additional checks are required.
> > >  	 */
> > >  	/* Check if the thread is already unregistered */
> > > -	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > -					__ATOMIC_RELAXED);
> > > +	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > +					rte_memory_order_relaxed);
> > >  	if (!(old_bmap & (1UL << id)))
> > >  		return 0;
> > >
> > > @@ -165,14 +165,14 @@
> > >  		 * completed before removal of the thread from the list of
> > >  		 * reporting threads.
> > >  		 */
> > > -		success = __atomic_compare_exchange(
> > > +		success = rte_atomic_compare_exchange_strong_explicit(
> > >  					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > -					&old_bmap, &new_bmap, 0,
> > > -					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
> > > +					&old_bmap, new_bmap,
> > > +					rte_memory_order_release, rte_memory_order_relaxed);
> > >
> > >  		if (success)
> > > -			__atomic_fetch_sub(&v->num_threads,
> > > -						1, __ATOMIC_RELAXED);
> > > +			rte_atomic_fetch_sub_explicit(&v->num_threads,
> > > +						1, rte_memory_order_relaxed);
> > >  		else if (!(old_bmap & (1UL << id)))
> > >  			/* Someone else unregistered this thread.
> > >  			 * Counter should not be incremented.
> > > @@ -227,8 +227,8 @@
> > >
> > >  	fprintf(f, "  Registered thread IDs = ");
> > >  	for (i = 0; i < v->num_elems; i++) {
> > > -		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > -					__ATOMIC_ACQUIRE);
> > > +		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > +					rte_memory_order_acquire);
> > >  		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
> > >  		while (bmap) {
> > >  			t = __builtin_ctzl(bmap);
> > > @@ -241,26 +241,26 @@
> > >  	fprintf(f, "\n");
> > >
> > >  	fprintf(f, "  Token = %" PRIu64 "\n",
> > > -			__atomic_load_n(&v->token, __ATOMIC_ACQUIRE));
> > > +			rte_atomic_load_explicit(&v->token, rte_memory_order_acquire));
> > >
> > >  	fprintf(f, "  Least Acknowledged Token = %" PRIu64 "\n",
> > > -			__atomic_load_n(&v->acked_token, __ATOMIC_ACQUIRE));
> > > +			rte_atomic_load_explicit(&v->acked_token,
> > > +rte_memory_order_acquire));
> > >
> > >  	fprintf(f, "Quiescent State Counts for readers:\n");
> > >  	for (i = 0; i < v->num_elems; i++) {
> > > -		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > -					__ATOMIC_ACQUIRE);
> > > +		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > +					rte_memory_order_acquire);
> > >  		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
> > >  		while (bmap) {
> > >  			t = __builtin_ctzl(bmap);
> > >  			fprintf(f, "thread ID = %u, count = %" PRIu64 ", lock count = %u\n",
> > >  				id + t,
> > > -				__atomic_load_n(
> > > +				rte_atomic_load_explicit(
> > >  					&v->qsbr_cnt[id + t].cnt,
> > > -					__ATOMIC_RELAXED),
> > > -				__atomic_load_n(
> > > +					rte_memory_order_relaxed),
> > > +				rte_atomic_load_explicit(
> > >  					&v->qsbr_cnt[id + t].lock_cnt,
> > > -					__ATOMIC_RELAXED));
> > > +					rte_memory_order_relaxed));
> > >  			bmap &= ~(1UL << t);
> > >  		}
> > >  	}
> > > diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h index
> > > 87e1b55..9f4aed2 100644
> > > --- a/lib/rcu/rte_rcu_qsbr.h
> > > +++ b/lib/rcu/rte_rcu_qsbr.h
> > > @@ -63,11 +63,11 @@
> > >   * Given thread id needs to be converted to index into the array and
> > >   * the id within the array element.
> > >   */
> > > -#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(uint64_t) * 8)
> > > +#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE
> > > +(sizeof(RTE_ATOMIC(uint64_t)) *
> > > +8)
> > >  #define __RTE_QSBR_THRID_ARRAY_SIZE(max_threads) \
> > >  	RTE_ALIGN(RTE_ALIGN_MUL_CEIL(max_threads, \
> > >  		__RTE_QSBR_THRID_ARRAY_ELM_SIZE) >> 3, RTE_CACHE_LINE_SIZE)
> > > -#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t *) \
> > > +#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t __rte_atomic *)
> > > +\
> >
> > Is it equivalent to ((RTE_ATOMIC(uint64_t) *)?
> 
> i'm not sure if you're asking about the resultant type of the expression or not?

I see other places are using specifier hence the question.

> 
> in this context we aren't specifying an atomic type but rather adding the atomic qualifier
> to what should already be a variable that has an atomic specified type with a cast which
> is why we use __rte_atomic.

I read from document [1] that atomic qualified type may have a different size from the original type.
If that is the case, the size difference could cause issue in bitmap array accessing.
Did I misunderstand?

[1] https://en.cppreference.com/w/c/language/atomic

> 
> >
> > >  	((struct rte_rcu_qsbr_cnt *)(v + 1) + v->max_threads) + i)
> > > #define __RTE_QSBR_THRID_INDEX_SHIFT 6  #define
> > > __RTE_QSBR_THRID_MASK 0x3f @@ -75,13 +75,13 @@
> > >
> >
> > <snip>
  
Tyler Retzlaff Oct. 26, 2023, 4:36 p.m. UTC | #4
On Thu, Oct 26, 2023 at 04:24:54AM +0000, Ruifeng Wang wrote:
> > -----Original Message-----
> > From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > Sent: Thursday, October 26, 2023 6:38 AM
> > To: Ruifeng Wang <Ruifeng.Wang@arm.com>
> > Cc: dev@dpdk.org; Akhil Goyal <gakhil@marvell.com>; Anatoly Burakov
> > <anatoly.burakov@intel.com>; Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>; Bruce
> > Richardson <bruce.richardson@intel.com>; Chenbo Xia <chenbo.xia@intel.com>; Ciara Power
> > <ciara.power@intel.com>; David Christensen <drc@linux.vnet.ibm.com>; David Hunt
> > <david.hunt@intel.com>; Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>; Dmitry Malloy
> > <dmitrym@microsoft.com>; Elena Agostini <eagostini@nvidia.com>; Erik Gabriel Carrillo
> > <erik.g.carrillo@intel.com>; Fan Zhang <fanzhang.oss@gmail.com>; Ferruh Yigit
> > <ferruh.yigit@amd.com>; Harman Kalra <hkalra@marvell.com>; Harry van Haaren
> > <harry.van.haaren@intel.com>; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> > jerinj@marvell.com; Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>; Matan Azrad
> > <matan@nvidia.com>; Maxime Coquelin <maxime.coquelin@redhat.com>; Narcisa Ana Maria Vasile
> > <navasile@linux.microsoft.com>; Nicolas Chautru <nicolas.chautru@intel.com>; Olivier Matz
> > <olivier.matz@6wind.com>; Ori Kam <orika@nvidia.com>; Pallavi Kadam
> > <pallavi.kadam@intel.com>; Pavan Nikhilesh <pbhagavatula@marvell.com>; Reshma Pattan
> > <reshma.pattan@intel.com>; Sameh Gobriel <sameh.gobriel@intel.com>; Shijith Thotton
> > <sthotton@marvell.com>; Sivaprasad Tummala <sivaprasad.tummala@amd.com>; Stephen Hemminger
> > <stephen@networkplumber.org>; Suanming Mou <suanmingm@nvidia.com>; Sunil Kumar Kori
> > <skori@marvell.com>; thomas@monjalon.net; Viacheslav Ovsiienko <viacheslavo@nvidia.com>;
> > Vladimir Medvedkin <vladimir.medvedkin@intel.com>; Yipeng Wang <yipeng1.wang@intel.com>;
> > nd <nd@arm.com>
> > Subject: Re: [PATCH v2 09/19] rcu: use rte optional stdatomic API
> > 
> > On Wed, Oct 25, 2023 at 09:41:22AM +0000, Ruifeng Wang wrote:
> > > > -----Original Message-----
> > > > From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > > Sent: Wednesday, October 18, 2023 4:31 AM
> > > > To: dev@dpdk.org
> > > > Cc: Akhil Goyal <gakhil@marvell.com>; Anatoly Burakov
> > > > <anatoly.burakov@intel.com>; Andrew Rybchenko
> > > > <andrew.rybchenko@oktetlabs.ru>; Bruce Richardson
> > > > <bruce.richardson@intel.com>; Chenbo Xia <chenbo.xia@intel.com>;
> > > > Ciara Power <ciara.power@intel.com>; David Christensen
> > > > <drc@linux.vnet.ibm.com>; David Hunt <david.hunt@intel.com>; Dmitry
> > > > Kozlyuk <dmitry.kozliuk@gmail.com>; Dmitry Malloy
> > > > <dmitrym@microsoft.com>; Elena Agostini <eagostini@nvidia.com>; Erik
> > > > Gabriel Carrillo <erik.g.carrillo@intel.com>; Fan Zhang
> > > > <fanzhang.oss@gmail.com>; Ferruh Yigit <ferruh.yigit@amd.com>;
> > > > Harman Kalra <hkalra@marvell.com>; Harry van Haaren
> > > > <harry.van.haaren@intel.com>; Honnappa Nagarahalli
> > > > <Honnappa.Nagarahalli@arm.com>; jerinj@marvell.com; Konstantin
> > > > Ananyev <konstantin.v.ananyev@yandex.ru>; Matan Azrad
> > > > <matan@nvidia.com>; Maxime Coquelin <maxime.coquelin@redhat.com>;
> > > > Narcisa Ana Maria Vasile <navasile@linux.microsoft.com>; Nicolas
> > > > Chautru <nicolas.chautru@intel.com>; Olivier Matz
> > > > <olivier.matz@6wind.com>; Ori Kam <orika@nvidia.com>; Pallavi Kadam
> > > > <pallavi.kadam@intel.com>; Pavan Nikhilesh
> > > > <pbhagavatula@marvell.com>; Reshma Pattan <reshma.pattan@intel.com>;
> > > > Sameh Gobriel <sameh.gobriel@intel.com>; Shijith Thotton
> > > > <sthotton@marvell.com>; Sivaprasad Tummala
> > > > <sivaprasad.tummala@amd.com>; Stephen Hemminger
> > > > <stephen@networkplumber.org>; Suanming Mou <suanmingm@nvidia.com>;
> > > > Sunil Kumar Kori <skori@marvell.com>; thomas@monjalon.net;
> > > > Viacheslav Ovsiienko <viacheslavo@nvidia.com>; Vladimir Medvedkin
> > > > <vladimir.medvedkin@intel.com>; Yipeng Wang
> > > > <yipeng1.wang@intel.com>; Tyler Retzlaff
> > > > <roretzla@linux.microsoft.com>
> > > > Subject: [PATCH v2 09/19] rcu: use rte optional stdatomic API
> > > >
> > > > Replace the use of gcc builtin __atomic_xxx intrinsics with
> > > > corresponding rte_atomic_xxx optional stdatomic API
> > > >
> > > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > > ---
> > > >  lib/rcu/rte_rcu_qsbr.c | 48 +++++++++++++++++------------------
> > > >  lib/rcu/rte_rcu_qsbr.h | 68
> > > > +++++++++++++++++++++++++-------------------------
> > > >  2 files changed, 58 insertions(+), 58 deletions(-)
> > > >
> > > > diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c index
> > > > 17be93e..4dc7714 100644
> > > > --- a/lib/rcu/rte_rcu_qsbr.c
> > > > +++ b/lib/rcu/rte_rcu_qsbr.c
> > > > @@ -102,21 +102,21 @@
> > > >  	 * go out of sync. Hence, additional checks are required.
> > > >  	 */
> > > >  	/* Check if the thread is already registered */
> > > > -	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > -					__ATOMIC_RELAXED);
> > > > +	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > +					rte_memory_order_relaxed);
> > > >  	if (old_bmap & 1UL << id)
> > > >  		return 0;
> > > >
> > > >  	do {
> > > >  		new_bmap = old_bmap | (1UL << id);
> > > > -		success = __atomic_compare_exchange(
> > > > +		success = rte_atomic_compare_exchange_strong_explicit(
> > > >  					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > -					&old_bmap, &new_bmap, 0,
> > > > -					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
> > > > +					&old_bmap, new_bmap,
> > > > +					rte_memory_order_release, rte_memory_order_relaxed);
> > > >
> > > >  		if (success)
> > > > -			__atomic_fetch_add(&v->num_threads,
> > > > -						1, __ATOMIC_RELAXED);
> > > > +			rte_atomic_fetch_add_explicit(&v->num_threads,
> > > > +						1, rte_memory_order_relaxed);
> > > >  		else if (old_bmap & (1UL << id))
> > > >  			/* Someone else registered this thread.
> > > >  			 * Counter should not be incremented.
> > > > @@ -154,8 +154,8 @@
> > > >  	 * go out of sync. Hence, additional checks are required.
> > > >  	 */
> > > >  	/* Check if the thread is already unregistered */
> > > > -	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > -					__ATOMIC_RELAXED);
> > > > +	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > +					rte_memory_order_relaxed);
> > > >  	if (!(old_bmap & (1UL << id)))
> > > >  		return 0;
> > > >
> > > > @@ -165,14 +165,14 @@
> > > >  		 * completed before removal of the thread from the list of
> > > >  		 * reporting threads.
> > > >  		 */
> > > > -		success = __atomic_compare_exchange(
> > > > +		success = rte_atomic_compare_exchange_strong_explicit(
> > > >  					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > -					&old_bmap, &new_bmap, 0,
> > > > -					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
> > > > +					&old_bmap, new_bmap,
> > > > +					rte_memory_order_release, rte_memory_order_relaxed);
> > > >
> > > >  		if (success)
> > > > -			__atomic_fetch_sub(&v->num_threads,
> > > > -						1, __ATOMIC_RELAXED);
> > > > +			rte_atomic_fetch_sub_explicit(&v->num_threads,
> > > > +						1, rte_memory_order_relaxed);
> > > >  		else if (!(old_bmap & (1UL << id)))
> > > >  			/* Someone else unregistered this thread.
> > > >  			 * Counter should not be incremented.
> > > > @@ -227,8 +227,8 @@
> > > >
> > > >  	fprintf(f, "  Registered thread IDs = ");
> > > >  	for (i = 0; i < v->num_elems; i++) {
> > > > -		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > -					__ATOMIC_ACQUIRE);
> > > > +		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > +					rte_memory_order_acquire);
> > > >  		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
> > > >  		while (bmap) {
> > > >  			t = __builtin_ctzl(bmap);
> > > > @@ -241,26 +241,26 @@
> > > >  	fprintf(f, "\n");
> > > >
> > > >  	fprintf(f, "  Token = %" PRIu64 "\n",
> > > > -			__atomic_load_n(&v->token, __ATOMIC_ACQUIRE));
> > > > +			rte_atomic_load_explicit(&v->token, rte_memory_order_acquire));
> > > >
> > > >  	fprintf(f, "  Least Acknowledged Token = %" PRIu64 "\n",
> > > > -			__atomic_load_n(&v->acked_token, __ATOMIC_ACQUIRE));
> > > > +			rte_atomic_load_explicit(&v->acked_token,
> > > > +rte_memory_order_acquire));
> > > >
> > > >  	fprintf(f, "Quiescent State Counts for readers:\n");
> > > >  	for (i = 0; i < v->num_elems; i++) {
> > > > -		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > -					__ATOMIC_ACQUIRE);
> > > > +		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
> > > > +					rte_memory_order_acquire);
> > > >  		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
> > > >  		while (bmap) {
> > > >  			t = __builtin_ctzl(bmap);
> > > >  			fprintf(f, "thread ID = %u, count = %" PRIu64 ", lock count = %u\n",
> > > >  				id + t,
> > > > -				__atomic_load_n(
> > > > +				rte_atomic_load_explicit(
> > > >  					&v->qsbr_cnt[id + t].cnt,
> > > > -					__ATOMIC_RELAXED),
> > > > -				__atomic_load_n(
> > > > +					rte_memory_order_relaxed),
> > > > +				rte_atomic_load_explicit(
> > > >  					&v->qsbr_cnt[id + t].lock_cnt,
> > > > -					__ATOMIC_RELAXED));
> > > > +					rte_memory_order_relaxed));
> > > >  			bmap &= ~(1UL << t);
> > > >  		}
> > > >  	}
> > > > diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h index
> > > > 87e1b55..9f4aed2 100644
> > > > --- a/lib/rcu/rte_rcu_qsbr.h
> > > > +++ b/lib/rcu/rte_rcu_qsbr.h
> > > > @@ -63,11 +63,11 @@
> > > >   * Given thread id needs to be converted to index into the array and
> > > >   * the id within the array element.
> > > >   */
> > > > -#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(uint64_t) * 8)
> > > > +#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE
> > > > +(sizeof(RTE_ATOMIC(uint64_t)) *
> > > > +8)
> > > >  #define __RTE_QSBR_THRID_ARRAY_SIZE(max_threads) \
> > > >  	RTE_ALIGN(RTE_ALIGN_MUL_CEIL(max_threads, \
> > > >  		__RTE_QSBR_THRID_ARRAY_ELM_SIZE) >> 3, RTE_CACHE_LINE_SIZE)
> > > > -#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t *) \
> > > > +#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t __rte_atomic *)
> > > > +\
> > >
> > > Is it equivalent to ((RTE_ATOMIC(uint64_t) *)?
> > 
> > i'm not sure if you're asking about the resultant type of the expression or not?
> 
> I see other places are using specifier hence the question.
> 
> > 
> > in this context we aren't specifying an atomic type but rather adding the atomic qualifier
> > to what should already be a variable that has an atomic specified type with a cast which
> > is why we use __rte_atomic.
> 
> I read from document [1] that atomic qualified type may have a different size from the original type.
> If that is the case, the size difference could cause issue in bitmap array accessing.
> Did I misunderstand?
> 
> [1] https://en.cppreference.com/w/c/language/atomic
> 

you do not misunderstand, the standard allows atomic specified type
sizes to differ from their ordinary native type sizes. though i have
some issues with how cppreference is wording things here as compared
with the actual standard.

one of the reasons is it allows all standard atomic functions to be
'generic' which means they can be used on objects of arbitrary size
instead of just integer and pointer types. i.e. you can use it on
struct, union and array types.

it's implementation defined how the operations are made atomic and
is obviously target processor dependent, but in cases when the processor
has no intrinsic support to perform the operation atomically the toolchain
may generate the code that uses a hidden lock. you can test whether this
is the case for arbitrary objects using standard specified atomic_is_lock_free.
https://en.cppreference.com/w/c/atomic/atomic_is_lock_free

so that's the long answer form of why they *may* be different size,
alignment etc.. but the real question is in this instance will it be?

probably not.

mainly because it wouldn't make a lot of sense for clang/gcc to suddenly
decide that sizeof(uint64_t) != sizeof(_Atomic(uint64_t)) or that they
should need to use a lock on amd64 processor to load/store atomically
(assuming native alignment) etc..

a lot of the above is why we had a lot of discussion about how and when
we could enable the use of standard C11 atomics in dpdk. as you've
probably noticed for existing platforms, toolchains and targets it is
actually defaulted off, but it does allow binary packagers or users to
build with it on.

for compatibility only the strictest of guarantees can be made when dpdk
and the application are both built consistently to use or not use
standard atomics. it is strongly cautioned that applications should not
attempt to use an unmatched atomic operation on a dpdk atomic object.
i.e. if you enabled standard atomics, don't use __atomic_load_n directly
on a field from a public dpdk structure, instead use
rte_atomic_load_explicit and make sure your application defines
RTE_ENABLE_STDATOMIC.

hope this explanation helps.

> > 
> > >
> > > >  	((struct rte_rcu_qsbr_cnt *)(v + 1) + v->max_threads) + i)
> > > > #define __RTE_QSBR_THRID_INDEX_SHIFT 6  #define
> > > > __RTE_QSBR_THRID_MASK 0x3f @@ -75,13 +75,13 @@
> > > >
> > >
> > > <snip>
  

Patch

diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c
index 17be93e..4dc7714 100644
--- a/lib/rcu/rte_rcu_qsbr.c
+++ b/lib/rcu/rte_rcu_qsbr.c
@@ -102,21 +102,21 @@ 
 	 * go out of sync. Hence, additional checks are required.
 	 */
 	/* Check if the thread is already registered */
-	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
-					__ATOMIC_RELAXED);
+	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					rte_memory_order_relaxed);
 	if (old_bmap & 1UL << id)
 		return 0;
 
 	do {
 		new_bmap = old_bmap | (1UL << id);
-		success = __atomic_compare_exchange(
+		success = rte_atomic_compare_exchange_strong_explicit(
 					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
-					&old_bmap, &new_bmap, 0,
-					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
+					&old_bmap, new_bmap,
+					rte_memory_order_release, rte_memory_order_relaxed);
 
 		if (success)
-			__atomic_fetch_add(&v->num_threads,
-						1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_add_explicit(&v->num_threads,
+						1, rte_memory_order_relaxed);
 		else if (old_bmap & (1UL << id))
 			/* Someone else registered this thread.
 			 * Counter should not be incremented.
@@ -154,8 +154,8 @@ 
 	 * go out of sync. Hence, additional checks are required.
 	 */
 	/* Check if the thread is already unregistered */
-	old_bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
-					__ATOMIC_RELAXED);
+	old_bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					rte_memory_order_relaxed);
 	if (!(old_bmap & (1UL << id)))
 		return 0;
 
@@ -165,14 +165,14 @@ 
 		 * completed before removal of the thread from the list of
 		 * reporting threads.
 		 */
-		success = __atomic_compare_exchange(
+		success = rte_atomic_compare_exchange_strong_explicit(
 					__RTE_QSBR_THRID_ARRAY_ELM(v, i),
-					&old_bmap, &new_bmap, 0,
-					__ATOMIC_RELEASE, __ATOMIC_RELAXED);
+					&old_bmap, new_bmap,
+					rte_memory_order_release, rte_memory_order_relaxed);
 
 		if (success)
-			__atomic_fetch_sub(&v->num_threads,
-						1, __ATOMIC_RELAXED);
+			rte_atomic_fetch_sub_explicit(&v->num_threads,
+						1, rte_memory_order_relaxed);
 		else if (!(old_bmap & (1UL << id)))
 			/* Someone else unregistered this thread.
 			 * Counter should not be incremented.
@@ -227,8 +227,8 @@ 
 
 	fprintf(f, "  Registered thread IDs = ");
 	for (i = 0; i < v->num_elems; i++) {
-		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
-					__ATOMIC_ACQUIRE);
+		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					rte_memory_order_acquire);
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 		while (bmap) {
 			t = __builtin_ctzl(bmap);
@@ -241,26 +241,26 @@ 
 	fprintf(f, "\n");
 
 	fprintf(f, "  Token = %" PRIu64 "\n",
-			__atomic_load_n(&v->token, __ATOMIC_ACQUIRE));
+			rte_atomic_load_explicit(&v->token, rte_memory_order_acquire));
 
 	fprintf(f, "  Least Acknowledged Token = %" PRIu64 "\n",
-			__atomic_load_n(&v->acked_token, __ATOMIC_ACQUIRE));
+			rte_atomic_load_explicit(&v->acked_token, rte_memory_order_acquire));
 
 	fprintf(f, "Quiescent State Counts for readers:\n");
 	for (i = 0; i < v->num_elems; i++) {
-		bmap = __atomic_load_n(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
-					__ATOMIC_ACQUIRE);
+		bmap = rte_atomic_load_explicit(__RTE_QSBR_THRID_ARRAY_ELM(v, i),
+					rte_memory_order_acquire);
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 		while (bmap) {
 			t = __builtin_ctzl(bmap);
 			fprintf(f, "thread ID = %u, count = %" PRIu64 ", lock count = %u\n",
 				id + t,
-				__atomic_load_n(
+				rte_atomic_load_explicit(
 					&v->qsbr_cnt[id + t].cnt,
-					__ATOMIC_RELAXED),
-				__atomic_load_n(
+					rte_memory_order_relaxed),
+				rte_atomic_load_explicit(
 					&v->qsbr_cnt[id + t].lock_cnt,
-					__ATOMIC_RELAXED));
+					rte_memory_order_relaxed));
 			bmap &= ~(1UL << t);
 		}
 	}
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index 87e1b55..9f4aed2 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -63,11 +63,11 @@ 
  * Given thread id needs to be converted to index into the array and
  * the id within the array element.
  */
-#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(uint64_t) * 8)
+#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(RTE_ATOMIC(uint64_t)) * 8)
 #define __RTE_QSBR_THRID_ARRAY_SIZE(max_threads) \
 	RTE_ALIGN(RTE_ALIGN_MUL_CEIL(max_threads, \
 		__RTE_QSBR_THRID_ARRAY_ELM_SIZE) >> 3, RTE_CACHE_LINE_SIZE)
-#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t *) \
+#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t __rte_atomic *) \
 	((struct rte_rcu_qsbr_cnt *)(v + 1) + v->max_threads) + i)
 #define __RTE_QSBR_THRID_INDEX_SHIFT 6
 #define __RTE_QSBR_THRID_MASK 0x3f
@@ -75,13 +75,13 @@ 
 
 /* Worker thread counter */
 struct rte_rcu_qsbr_cnt {
-	uint64_t cnt;
+	RTE_ATOMIC(uint64_t) cnt;
 	/**< Quiescent state counter. Value 0 indicates the thread is offline
 	 *   64b counter is used to avoid adding more code to address
 	 *   counter overflow. Changing this to 32b would require additional
 	 *   changes to various APIs.
 	 */
-	uint32_t lock_cnt;
+	RTE_ATOMIC(uint32_t) lock_cnt;
 	/**< Lock counter. Used when RTE_LIBRTE_RCU_DEBUG is enabled */
 } __rte_cache_aligned;
 
@@ -97,16 +97,16 @@  struct rte_rcu_qsbr_cnt {
  * 2) Register thread ID array
  */
 struct rte_rcu_qsbr {
-	uint64_t token __rte_cache_aligned;
+	RTE_ATOMIC(uint64_t) token __rte_cache_aligned;
 	/**< Counter to allow for multiple concurrent quiescent state queries */
-	uint64_t acked_token;
+	RTE_ATOMIC(uint64_t) acked_token;
 	/**< Least token acked by all the threads in the last call to
 	 *   rte_rcu_qsbr_check API.
 	 */
 
 	uint32_t num_elems __rte_cache_aligned;
 	/**< Number of elements in the thread ID array */
-	uint32_t num_threads;
+	RTE_ATOMIC(uint32_t) num_threads;
 	/**< Number of threads currently using this QS variable */
 	uint32_t max_threads;
 	/**< Maximum number of threads using this QS variable */
@@ -311,13 +311,13 @@  struct rte_rcu_qsbr_dq_parameters {
 	 * the following will not move down after the load of any shared
 	 * data structure.
 	 */
-	t = __atomic_load_n(&v->token, __ATOMIC_RELAXED);
+	t = rte_atomic_load_explicit(&v->token, rte_memory_order_relaxed);
 
-	/* __atomic_store_n(cnt, __ATOMIC_RELAXED) is used to ensure
+	/* rte_atomic_store_explicit(cnt, rte_memory_order_relaxed) is used to ensure
 	 * 'cnt' (64b) is accessed atomically.
 	 */
-	__atomic_store_n(&v->qsbr_cnt[thread_id].cnt,
-		t, __ATOMIC_RELAXED);
+	rte_atomic_store_explicit(&v->qsbr_cnt[thread_id].cnt,
+		t, rte_memory_order_relaxed);
 
 	/* The subsequent load of the data structure should not
 	 * move above the store. Hence a store-load barrier
@@ -326,7 +326,7 @@  struct rte_rcu_qsbr_dq_parameters {
 	 * writer might not see that the reader is online, even though
 	 * the reader is referencing the shared data structure.
 	 */
-	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+	rte_atomic_thread_fence(rte_memory_order_seq_cst);
 }
 
 /**
@@ -362,8 +362,8 @@  struct rte_rcu_qsbr_dq_parameters {
 	 * data structure can not move after this store.
 	 */
 
-	__atomic_store_n(&v->qsbr_cnt[thread_id].cnt,
-		__RTE_QSBR_CNT_THR_OFFLINE, __ATOMIC_RELEASE);
+	rte_atomic_store_explicit(&v->qsbr_cnt[thread_id].cnt,
+		__RTE_QSBR_CNT_THR_OFFLINE, rte_memory_order_release);
 }
 
 /**
@@ -394,8 +394,8 @@  struct rte_rcu_qsbr_dq_parameters {
 
 #if defined(RTE_LIBRTE_RCU_DEBUG)
 	/* Increment the lock counter */
-	__atomic_fetch_add(&v->qsbr_cnt[thread_id].lock_cnt,
-				1, __ATOMIC_ACQUIRE);
+	rte_atomic_fetch_add_explicit(&v->qsbr_cnt[thread_id].lock_cnt,
+				1, rte_memory_order_acquire);
 #endif
 }
 
@@ -427,8 +427,8 @@  struct rte_rcu_qsbr_dq_parameters {
 
 #if defined(RTE_LIBRTE_RCU_DEBUG)
 	/* Decrement the lock counter */
-	__atomic_fetch_sub(&v->qsbr_cnt[thread_id].lock_cnt,
-				1, __ATOMIC_RELEASE);
+	rte_atomic_fetch_sub_explicit(&v->qsbr_cnt[thread_id].lock_cnt,
+				1, rte_memory_order_release);
 
 	__RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, WARNING,
 				"Lock counter %u. Nested locks?\n",
@@ -461,7 +461,7 @@  struct rte_rcu_qsbr_dq_parameters {
 	 * structure are visible to the workers before the token
 	 * update is visible.
 	 */
-	t = __atomic_fetch_add(&v->token, 1, __ATOMIC_RELEASE) + 1;
+	t = rte_atomic_fetch_add_explicit(&v->token, 1, rte_memory_order_release) + 1;
 
 	return t;
 }
@@ -493,16 +493,16 @@  struct rte_rcu_qsbr_dq_parameters {
 	 * Later loads of the shared data structure should not move
 	 * above this load. Hence, use load-acquire.
 	 */
-	t = __atomic_load_n(&v->token, __ATOMIC_ACQUIRE);
+	t = rte_atomic_load_explicit(&v->token, rte_memory_order_acquire);
 
 	/* Check if there are updates available from the writer.
 	 * Inform the writer that updates are visible to this reader.
 	 * Prior loads of the shared data structure should not move
 	 * beyond this store. Hence use store-release.
 	 */
-	if (t != __atomic_load_n(&v->qsbr_cnt[thread_id].cnt, __ATOMIC_RELAXED))
-		__atomic_store_n(&v->qsbr_cnt[thread_id].cnt,
-					 t, __ATOMIC_RELEASE);
+	if (t != rte_atomic_load_explicit(&v->qsbr_cnt[thread_id].cnt, rte_memory_order_relaxed))
+		rte_atomic_store_explicit(&v->qsbr_cnt[thread_id].cnt,
+					 t, rte_memory_order_release);
 
 	__RTE_RCU_DP_LOG(DEBUG, "%s: update: token = %" PRIu64 ", Thread ID = %d",
 		__func__, t, thread_id);
@@ -517,7 +517,7 @@  struct rte_rcu_qsbr_dq_parameters {
 	uint32_t i, j, id;
 	uint64_t bmap;
 	uint64_t c;
-	uint64_t *reg_thread_id;
+	RTE_ATOMIC(uint64_t) *reg_thread_id;
 	uint64_t acked_token = __RTE_QSBR_CNT_MAX;
 
 	for (i = 0, reg_thread_id = __RTE_QSBR_THRID_ARRAY_ELM(v, 0);
@@ -526,7 +526,7 @@  struct rte_rcu_qsbr_dq_parameters {
 		/* Load the current registered thread bit map before
 		 * loading the reader thread quiescent state counters.
 		 */
-		bmap = __atomic_load_n(reg_thread_id, __ATOMIC_ACQUIRE);
+		bmap = rte_atomic_load_explicit(reg_thread_id, rte_memory_order_acquire);
 		id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 
 		while (bmap) {
@@ -534,9 +534,9 @@  struct rte_rcu_qsbr_dq_parameters {
 			__RTE_RCU_DP_LOG(DEBUG,
 				"%s: check: token = %" PRIu64 ", wait = %d, Bit Map = 0x%" PRIx64 ", Thread ID = %d",
 				__func__, t, wait, bmap, id + j);
-			c = __atomic_load_n(
+			c = rte_atomic_load_explicit(
 					&v->qsbr_cnt[id + j].cnt,
-					__ATOMIC_ACQUIRE);
+					rte_memory_order_acquire);
 			__RTE_RCU_DP_LOG(DEBUG,
 				"%s: status: token = %" PRIu64 ", wait = %d, Thread QS cnt = %" PRIu64 ", Thread ID = %d",
 				__func__, t, wait, c, id+j);
@@ -554,8 +554,8 @@  struct rte_rcu_qsbr_dq_parameters {
 				/* This thread might have unregistered.
 				 * Re-read the bitmap.
 				 */
-				bmap = __atomic_load_n(reg_thread_id,
-						__ATOMIC_ACQUIRE);
+				bmap = rte_atomic_load_explicit(reg_thread_id,
+						rte_memory_order_acquire);
 
 				continue;
 			}
@@ -576,8 +576,8 @@  struct rte_rcu_qsbr_dq_parameters {
 	 * no need to update this very accurately using compare-and-swap.
 	 */
 	if (acked_token != __RTE_QSBR_CNT_MAX)
-		__atomic_store_n(&v->acked_token, acked_token,
-			__ATOMIC_RELAXED);
+		rte_atomic_store_explicit(&v->acked_token, acked_token,
+			rte_memory_order_relaxed);
 
 	return 1;
 }
@@ -598,7 +598,7 @@  struct rte_rcu_qsbr_dq_parameters {
 			"%s: check: token = %" PRIu64 ", wait = %d, Thread ID = %d",
 			__func__, t, wait, i);
 		while (1) {
-			c = __atomic_load_n(&cnt->cnt, __ATOMIC_ACQUIRE);
+			c = rte_atomic_load_explicit(&cnt->cnt, rte_memory_order_acquire);
 			__RTE_RCU_DP_LOG(DEBUG,
 				"%s: status: token = %" PRIu64 ", wait = %d, Thread QS cnt = %" PRIu64 ", Thread ID = %d",
 				__func__, t, wait, c, i);
@@ -628,8 +628,8 @@  struct rte_rcu_qsbr_dq_parameters {
 	 * no need to update this very accurately using compare-and-swap.
 	 */
 	if (acked_token != __RTE_QSBR_CNT_MAX)
-		__atomic_store_n(&v->acked_token, acked_token,
-			__ATOMIC_RELAXED);
+		rte_atomic_store_explicit(&v->acked_token, acked_token,
+			rte_memory_order_relaxed);
 
 	return 1;
 }