[v4,06/14] eal: use prefetch intrinsics

Message ID 1681247548-18590-7-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series msvc integration changes |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Tyler Retzlaff April 11, 2023, 9:12 p.m. UTC
  Inline assembly is not supported for MSVC x64 instead use _mm_prefetch
and _mm_cldemote intrinsics.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/x86/include/rte_prefetch.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
  

Comments

Bruce Richardson April 12, 2023, 9:05 a.m. UTC | #1
On Tue, Apr 11, 2023 at 02:12:20PM -0700, Tyler Retzlaff wrote:
> Inline assembly is not supported for MSVC x64 instead use _mm_prefetch
> and _mm_cldemote intrinsics.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---

Acked-by: Bruce Richardson <bruce.richardson@intel.com>

One comment inline below for future consideration.

>  lib/eal/x86/include/rte_prefetch.h | 29 +++++++++++++++++++++++++++++
>  1 file changed, 29 insertions(+)
> 
> diff --git a/lib/eal/x86/include/rte_prefetch.h b/lib/eal/x86/include/rte_prefetch.h
> index 7fd01c4..1391af0 100644
> --- a/lib/eal/x86/include/rte_prefetch.h
> +++ b/lib/eal/x86/include/rte_prefetch.h
> @@ -13,6 +13,7 @@
>  #include <rte_common.h>
>  #include "generic/rte_prefetch.h"
>  
> +#ifndef RTE_TOOLCHAIN_MSVC
>  static inline void rte_prefetch0(const volatile void *p)
>  {
>  	asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p));
> @@ -43,6 +44,34 @@ static inline void rte_prefetch_non_temporal(const volatile void *p)
>  {
>  	asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p));
>  }
> +#else
> +static inline void rte_prefetch0(const volatile void *p)
> +{
> +	_mm_prefetch(p, 1);
> +}
> +
> +static inline void rte_prefetch1(const volatile void *p)
> +{
> +	_mm_prefetch(p, 2);
> +}
> +
> +static inline void rte_prefetch2(const volatile void *p)
> +{
> +	_mm_prefetch(p, 3);
> +}
> +
> +static inline void rte_prefetch_non_temporal(const volatile void *p)
> +{
> +	_mm_prefetch(p, 0);
> +}

For these prefetch instructions, I'm not sure there is any reason why we
can't drop the inline assembly versions. The instructions are very old at
this point and should be widely supported by all compilers we use.

Rather than using hard-coded 1, 2, 3 values in the prefetch calls, I
believe there should be defines for the levels: "_MM_HINT_T0",
"_MM_HINT_T1" etc.

> +__rte_experimental
> +static inline void
> +rte_cldemote(const volatile void *p)
> +{
> +	_mm_cldemote(p);
> +}
> +#endif
> +
>  
>  #ifdef __cplusplus
>  }
> -- 
> 1.8.3.1
>
  
Konstantin Ananyev April 12, 2023, 12:31 p.m. UTC | #2
> On Tue, Apr 11, 2023 at 02:12:20PM -0700, Tyler Retzlaff wrote:
> > Inline assembly is not supported for MSVC x64 instead use _mm_prefetch
> > and _mm_cldemote intrinsics.
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
> 
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> 
> One comment inline below for future consideration.
> 
> >  lib/eal/x86/include/rte_prefetch.h | 29 +++++++++++++++++++++++++++++
> >  1 file changed, 29 insertions(+)
> >
> > diff --git a/lib/eal/x86/include/rte_prefetch.h b/lib/eal/x86/include/rte_prefetch.h
> > index 7fd01c4..1391af0 100644
> > --- a/lib/eal/x86/include/rte_prefetch.h
> > +++ b/lib/eal/x86/include/rte_prefetch.h
> > @@ -13,6 +13,7 @@
> >  #include <rte_common.h>
> >  #include "generic/rte_prefetch.h"
> >
> > +#ifndef RTE_TOOLCHAIN_MSVC
> >  static inline void rte_prefetch0(const volatile void *p)
> >  {
> >  	asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p));
> > @@ -43,6 +44,34 @@ static inline void rte_prefetch_non_temporal(const volatile void *p)
> >  {
> >  	asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p));
> >  }
> > +#else
> > +static inline void rte_prefetch0(const volatile void *p)
> > +{
> > +	_mm_prefetch(p, 1);
> > +}
> > +
> > +static inline void rte_prefetch1(const volatile void *p)
> > +{
> > +	_mm_prefetch(p, 2);
> > +}
> > +
> > +static inline void rte_prefetch2(const volatile void *p)
> > +{
> > +	_mm_prefetch(p, 3);
> > +}
> > +
> > +static inline void rte_prefetch_non_temporal(const volatile void *p)
> > +{
> > +	_mm_prefetch(p, 0);
> > +}
> 
> For these prefetch instructions, I'm not sure there is any reason why we
> can't drop the inline assembly versions. The instructions are very old at
> this point and should be widely supported by all compilers we use.
> 
> Rather than using hard-coded 1, 2, 3 values in the prefetch calls, I
> believe there should be defines for the levels: "_MM_HINT_T0",
> "_MM_HINT_T1" etc.

+1
 

> 
> > +__rte_experimental
> > +static inline void
> > +rte_cldemote(const volatile void *p)
> > +{
> > +	_mm_cldemote(p);
> > +}
> > +#endif
> > +
> >
> >  #ifdef __cplusplus
> >  }
> > --
> > 1.8.3.1
> >
  
Tyler Retzlaff April 12, 2023, 3:19 p.m. UTC | #3
On Wed, Apr 12, 2023 at 10:05:57AM +0100, Bruce Richardson wrote:
> On Tue, Apr 11, 2023 at 02:12:20PM -0700, Tyler Retzlaff wrote:
> > Inline assembly is not supported for MSVC x64 instead use _mm_prefetch
> > and _mm_cldemote intrinsics.
> > 
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
> 
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> 
> One comment inline below for future consideration.
> 
> >  lib/eal/x86/include/rte_prefetch.h | 29 +++++++++++++++++++++++++++++
> >  1 file changed, 29 insertions(+)
> > 
> > diff --git a/lib/eal/x86/include/rte_prefetch.h b/lib/eal/x86/include/rte_prefetch.h
> > index 7fd01c4..1391af0 100644
> > --- a/lib/eal/x86/include/rte_prefetch.h
> > +++ b/lib/eal/x86/include/rte_prefetch.h
> > @@ -13,6 +13,7 @@
> >  #include <rte_common.h>
> >  #include "generic/rte_prefetch.h"
> >  
> > +#ifndef RTE_TOOLCHAIN_MSVC
> >  static inline void rte_prefetch0(const volatile void *p)
> >  {
> >  	asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p));
> > @@ -43,6 +44,34 @@ static inline void rte_prefetch_non_temporal(const volatile void *p)
> >  {
> >  	asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p));
> >  }
> > +#else
> > +static inline void rte_prefetch0(const volatile void *p)
> > +{
> > +	_mm_prefetch(p, 1);
> > +}
> > +
> > +static inline void rte_prefetch1(const volatile void *p)
> > +{
> > +	_mm_prefetch(p, 2);
> > +}
> > +
> > +static inline void rte_prefetch2(const volatile void *p)
> > +{
> > +	_mm_prefetch(p, 3);
> > +}
> > +
> > +static inline void rte_prefetch_non_temporal(const volatile void *p)
> > +{
> > +	_mm_prefetch(p, 0);
> > +}
> 
> For these prefetch instructions, I'm not sure there is any reason why we
> can't drop the inline assembly versions. The instructions are very old at
> this point and should be widely supported by all compilers we use.
> 
> Rather than using hard-coded 1, 2, 3 values in the prefetch calls, I
> believe there should be defines for the levels: "_MM_HINT_T0",
> "_MM_HINT_T1" etc.

hm, i did not know about these and i bet they fix the problem i had.
i.e. if i use e.g. bare '1' i would not get the same prefetch codegen on
gcc/msvc but these defines probably resolve that problem.

let me take another look at this one.

> 
> > +__rte_experimental
> > +static inline void
> > +rte_cldemote(const volatile void *p)
> > +{
> > +	_mm_cldemote(p);
> > +}
> > +#endif
> > +
> >  
> >  #ifdef __cplusplus
> >  }
> > -- 
> > 1.8.3.1
> >
  

Patch

diff --git a/lib/eal/x86/include/rte_prefetch.h b/lib/eal/x86/include/rte_prefetch.h
index 7fd01c4..1391af0 100644
--- a/lib/eal/x86/include/rte_prefetch.h
+++ b/lib/eal/x86/include/rte_prefetch.h
@@ -13,6 +13,7 @@ 
 #include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
+#ifndef RTE_TOOLCHAIN_MSVC
 static inline void rte_prefetch0(const volatile void *p)
 {
 	asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p));
@@ -43,6 +44,34 @@  static inline void rte_prefetch_non_temporal(const volatile void *p)
 {
 	asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p));
 }
+#else
+static inline void rte_prefetch0(const volatile void *p)
+{
+	_mm_prefetch(p, 1);
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+	_mm_prefetch(p, 2);
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+	_mm_prefetch(p, 3);
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	_mm_prefetch(p, 0);
+}
+__rte_experimental
+static inline void
+rte_cldemote(const volatile void *p)
+{
+	_mm_cldemote(p);
+}
+#endif
+
 
 #ifdef __cplusplus
 }