[v3,3/7] dma/idxd: replace rte atomics with GCC builtin atomics

Message ID 1679612036-30773-4-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series replace rte atomics with GCC builtin atomics |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Tyler Retzlaff March 23, 2023, 10:53 p.m. UTC
  Replace the use of rte_atomic.h types and functions, instead use GCC
supplied C++11 memory model builtins.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/dma/idxd/idxd_internal.h | 3 +--
 drivers/dma/idxd/idxd_pci.c      | 8 +++++---
 2 files changed, 6 insertions(+), 5 deletions(-)
  

Comments

David Marchand May 24, 2023, 8:09 p.m. UTC | #1
Hello Bruce, Kevin,

Review please.


On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  drivers/dma/idxd/idxd_internal.h | 3 +--
>  drivers/dma/idxd/idxd_pci.c      | 8 +++++---
>  2 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
> index 180a858..cd41777 100644
> --- a/drivers/dma/idxd/idxd_internal.h
> +++ b/drivers/dma/idxd/idxd_internal.h
> @@ -7,7 +7,6 @@
>
>  #include <rte_dmadev_pmd.h>
>  #include <rte_spinlock.h>
> -#include <rte_atomic.h>
>
>  #include "idxd_hw_defs.h"
>
> @@ -34,7 +33,7 @@ struct idxd_pci_common {
>         rte_spinlock_t lk;
>
>         uint8_t wq_cfg_sz;
> -       rte_atomic16_t ref_count;
> +       uint16_t ref_count;
>         volatile struct rte_idxd_bar0 *regs;
>         volatile uint32_t *wq_regs_base;
>         volatile struct rte_idxd_grpcfg *grp_regs;
> diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
> index 781fa02..2de5d15 100644
> --- a/drivers/dma/idxd/idxd_pci.c
> +++ b/drivers/dma/idxd/idxd_pci.c
> @@ -6,7 +6,6 @@
>  #include <rte_devargs.h>
>  #include <rte_dmadev_pmd.h>
>  #include <rte_malloc.h>
> -#include <rte_atomic.h>
>
>  #include "idxd_internal.h"
>
> @@ -136,7 +135,9 @@
>         /* if this is the last WQ on the device, disable the device and free
>          * the PCI struct
>          */
> -       is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
> +       /* NOTE: review for potential ordering optimization */
> +       is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
> +               __ATOMIC_SEQ_CST) - 1 == 0;
>         if (is_last_wq) {
>                 /* disable the device */
>                 err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
> @@ -350,7 +351,8 @@
>                                 free(idxd.u.pci);
>                         return ret;
>                 }
> -               rte_atomic16_inc(&idxd.u.pci->ref_count);
> +               /* NOTE: review for potential ordering optimization */
> +               __atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
>         }
>
>         return 0;
> --
> 1.8.3.1
>
  
Bruce Richardson May 25, 2023, 8:41 a.m. UTC | #2
On Wed, May 24, 2023 at 10:09:04PM +0200, David Marchand wrote:
> Hello Bruce, Kevin,
> 
> Review please.
> 
> 
> On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > Replace the use of rte_atomic.h types and functions, instead use GCC
> > supplied C++11 memory model builtins.
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>

Two small comments inline below.

Acked-by: Bruce Richardson <bruce.richardson@intel.com>

> > ---
> >  drivers/dma/idxd/idxd_internal.h | 3 +--
> >  drivers/dma/idxd/idxd_pci.c      | 8 +++++---
> >  2 files changed, 6 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
> > index 180a858..cd41777 100644
> > --- a/drivers/dma/idxd/idxd_internal.h
> > +++ b/drivers/dma/idxd/idxd_internal.h
> > @@ -7,7 +7,6 @@
> >
> >  #include <rte_dmadev_pmd.h>
> >  #include <rte_spinlock.h>
> > -#include <rte_atomic.h>
> >
> >  #include "idxd_hw_defs.h"
> >
> > @@ -34,7 +33,7 @@ struct idxd_pci_common {
> >         rte_spinlock_t lk;
> >
> >         uint8_t wq_cfg_sz;
> > -       rte_atomic16_t ref_count;
> > +       uint16_t ref_count;
> >         volatile struct rte_idxd_bar0 *regs;
> >         volatile uint32_t *wq_regs_base;
> >         volatile struct rte_idxd_grpcfg *grp_regs;
> > diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
> > index 781fa02..2de5d15 100644
> > --- a/drivers/dma/idxd/idxd_pci.c
> > +++ b/drivers/dma/idxd/idxd_pci.c
> > @@ -6,7 +6,6 @@
> >  #include <rte_devargs.h>
> >  #include <rte_dmadev_pmd.h>
> >  #include <rte_malloc.h>
> > -#include <rte_atomic.h>
> >
> >  #include "idxd_internal.h"
> >
> > @@ -136,7 +135,9 @@
> >         /* if this is the last WQ on the device, disable the device and free
> >          * the PCI struct
> >          */
> > -       is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
> > +       /* NOTE: review for potential ordering optimization */
> > +       is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
> > +               __ATOMIC_SEQ_CST) - 1 == 0;

Rather than "__atomic_fetch_sub(...) - 1 == 0", I think just comparing
"== 1" is simpler and better. I would also bracket the comparison for
clarity.

> >         if (is_last_wq) {
> >                 /* disable the device */
> >                 err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
> > @@ -350,7 +351,8 @@
> >                                 free(idxd.u.pci);
> >                         return ret;
> >                 }
> > -               rte_atomic16_inc(&idxd.u.pci->ref_count);
> > +               /* NOTE: review for potential ordering optimization */

I think we can drop the note. Since this is not datapath code the perf is
not that important.

> > +               __atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
> >         }
> >
> >         return 0;
> > --
> > 1.8.3.1
> >
> 
> -- 
> David Marchand
>
  
Kevin Laatz May 25, 2023, 12:57 p.m. UTC | #3
On 23/03/2023 22:53, Tyler Retzlaff wrote:
> Replace the use of rte_atomic.h types and functions, instead use GCC
> supplied C++11 memory model builtins.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>   drivers/dma/idxd/idxd_internal.h | 3 +--
>   drivers/dma/idxd/idxd_pci.c      | 8 +++++---
>   2 files changed, 6 insertions(+), 5 deletions(-)
>
Acked-by: Kevin Laatz <kevin.laatz@intel.com>
  
Morten Brørup May 25, 2023, 1:59 p.m. UTC | #4
> From: Bruce Richardson [mailto:bruce.richardson@intel.com]
> Sent: Thursday, 25 May 2023 10.42
> 
> On Wed, May 24, 2023 at 10:09:04PM +0200, David Marchand wrote:
> > Hello Bruce, Kevin,
> >
> > Review please.
> >
> >
> > On Thu, Mar 23, 2023 at 11:54 PM Tyler Retzlaff
> > <roretzla@linux.microsoft.com> wrote:
> > >
> > > Replace the use of rte_atomic.h types and functions, instead use GCC
> > > supplied C++11 memory model builtins.
> > >
> > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> 
> Two small comments inline below.
> 
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> 
> > > ---
> > >  drivers/dma/idxd/idxd_internal.h | 3 +--
> > >  drivers/dma/idxd/idxd_pci.c      | 8 +++++---
> > >  2 files changed, 6 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/dma/idxd/idxd_internal.h
> b/drivers/dma/idxd/idxd_internal.h
> > > index 180a858..cd41777 100644
> > > --- a/drivers/dma/idxd/idxd_internal.h
> > > +++ b/drivers/dma/idxd/idxd_internal.h
> > > @@ -7,7 +7,6 @@
> > >
> > >  #include <rte_dmadev_pmd.h>
> > >  #include <rte_spinlock.h>
> > > -#include <rte_atomic.h>
> > >
> > >  #include "idxd_hw_defs.h"
> > >
> > > @@ -34,7 +33,7 @@ struct idxd_pci_common {
> > >         rte_spinlock_t lk;
> > >
> > >         uint8_t wq_cfg_sz;
> > > -       rte_atomic16_t ref_count;
> > > +       uint16_t ref_count;
> > >         volatile struct rte_idxd_bar0 *regs;
> > >         volatile uint32_t *wq_regs_base;
> > >         volatile struct rte_idxd_grpcfg *grp_regs;
> > > diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
> > > index 781fa02..2de5d15 100644
> > > --- a/drivers/dma/idxd/idxd_pci.c
> > > +++ b/drivers/dma/idxd/idxd_pci.c
> > > @@ -6,7 +6,6 @@
> > >  #include <rte_devargs.h>
> > >  #include <rte_dmadev_pmd.h>
> > >  #include <rte_malloc.h>
> > > -#include <rte_atomic.h>
> > >
> > >  #include "idxd_internal.h"
> > >
> > > @@ -136,7 +135,9 @@
> > >         /* if this is the last WQ on the device, disable the device and
> free
> > >          * the PCI struct
> > >          */
> > > -       is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
> > > +       /* NOTE: review for potential ordering optimization */
> > > +       is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
> > > +               __ATOMIC_SEQ_CST) - 1 == 0;
> 
> Rather than "__atomic_fetch_sub(...) - 1 == 0", I think just comparing
> "== 1" is simpler and better. I would also bracket the comparison for
> clarity.
> 
> > >         if (is_last_wq) {
> > >                 /* disable the device */
> > >                 err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
> > > @@ -350,7 +351,8 @@
> > >                                 free(idxd.u.pci);
> > >                         return ret;
> > >                 }
> > > -               rte_atomic16_inc(&idxd.u.pci->ref_count);
> > > +               /* NOTE: review for potential ordering optimization */
> 
> I think we can drop the note. Since this is not datapath code the perf is
> not that important.

Following up on my previous input to the discussion about these notes...

I agree with Bruce on this location. Here it is purely used in the control plane, and atomicity is required, but optimization of this would be a waste of brain power, so we can drop the notes in such situations. Perhaps Honnappa was referring to something similar - and then I agree with Honnappa too. ;-)

In principle: This specific note has been actively considered for optimization, and the conclusion was that further optimization is not required, and thus SEQ_CST is the correct choice here. Ideal to change now, but could be changed with a later (separate) patch as well.

> 
> > > +               __atomic_fetch_add(&idxd.u.pci->ref_count, 1,
> __ATOMIC_SEQ_CST);
> > >         }
> > >
> > >         return 0;
> > > --
> > > 1.8.3.1
> > >
> >
> > --
> > David Marchand
> >
  

Patch

diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
index 180a858..cd41777 100644
--- a/drivers/dma/idxd/idxd_internal.h
+++ b/drivers/dma/idxd/idxd_internal.h
@@ -7,7 +7,6 @@ 
 
 #include <rte_dmadev_pmd.h>
 #include <rte_spinlock.h>
-#include <rte_atomic.h>
 
 #include "idxd_hw_defs.h"
 
@@ -34,7 +33,7 @@  struct idxd_pci_common {
 	rte_spinlock_t lk;
 
 	uint8_t wq_cfg_sz;
-	rte_atomic16_t ref_count;
+	uint16_t ref_count;
 	volatile struct rte_idxd_bar0 *regs;
 	volatile uint32_t *wq_regs_base;
 	volatile struct rte_idxd_grpcfg *grp_regs;
diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c
index 781fa02..2de5d15 100644
--- a/drivers/dma/idxd/idxd_pci.c
+++ b/drivers/dma/idxd/idxd_pci.c
@@ -6,7 +6,6 @@ 
 #include <rte_devargs.h>
 #include <rte_dmadev_pmd.h>
 #include <rte_malloc.h>
-#include <rte_atomic.h>
 
 #include "idxd_internal.h"
 
@@ -136,7 +135,9 @@ 
 	/* if this is the last WQ on the device, disable the device and free
 	 * the PCI struct
 	 */
-	is_last_wq = rte_atomic16_dec_and_test(&idxd->u.pci->ref_count);
+	/* NOTE: review for potential ordering optimization */
+	is_last_wq = __atomic_fetch_sub(&idxd->u.pci->ref_count, 1,
+		__ATOMIC_SEQ_CST) - 1 == 0;
 	if (is_last_wq) {
 		/* disable the device */
 		err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
@@ -350,7 +351,8 @@ 
 				free(idxd.u.pci);
 			return ret;
 		}
-		rte_atomic16_inc(&idxd.u.pci->ref_count);
+		/* NOTE: review for potential ordering optimization */
+		__atomic_fetch_add(&idxd.u.pci->ref_count, 1, __ATOMIC_SEQ_CST);
 	}
 
 	return 0;