[dpdk-dev,v3,3/5] hash: add fallback to software CRC32 implementation

Message ID faece334ae2ae58553c0a19c5f8830993ae543f3.1416280649.git.e_zhumabekov@sts.kz (mailing list archive)
State Superseded, archived
Headers

Commit Message

Yerden Zhumabekov Nov. 18, 2014, 3:21 a.m. UTC
  Initially, SSE4.2 support is detected via CPUID instruction.

Added rte_hash_crc_set_alg() function to detect and set CRC32
implementation if necessary. SSE4.2 is allowed by default. If it's
not available, fall back to sw implementation.

Depending on compiler attributes support, best available algorithm
may be detected upon application startup.

Signed-off-by: Yerden Zhumabekov <e_zhumabekov@sts.kz>
---
 lib/librte_hash/rte_hash_crc.h |   64 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 2 deletions(-)
  

Comments

Yerden Zhumabekov Nov. 18, 2014, 4:56 a.m. UTC | #1
Sorry, maybe I made a mistake here.

Accoring to lib/librte_eal/common/eal_common_cpuflags.c code, it seemed
to me that constructor attribute is not supported by intel compiler. So
in that case here I decided to leave the code for autodetection. Am I
correct?

18.11.2014 9:21, Yerden Zhumabekov пишет:
> Initially, SSE4.2 support is detected via CPUID instruction.
>
> Added rte_hash_crc_set_alg() function to detect and set CRC32
> implementation if necessary. SSE4.2 is allowed by default. If it's
> not available, fall back to sw implementation.
>
> Depending on compiler attributes support, best available algorithm
> may be detected upon application startup.
>
> Signed-off-by: Yerden Zhumabekov <e_zhumabekov@sts.kz>
> ---
>  lib/librte_hash/rte_hash_crc.h |   64 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 62 insertions(+), 2 deletions(-)
>
> diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> index 15f687a..c1b75e8 100644
> --- a/lib/librte_hash/rte_hash_crc.h
> +++ b/lib/librte_hash/rte_hash_crc.h
> @@ -45,7 +45,11 @@ extern "C" {
>  #endif
>  
>  #include <stdint.h>
> +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
>  #include <nmmintrin.h>
> +#endif
> +#include <rte_cpuflags.h>
> +#include <rte_branch_prediction.h>
>  
>  /* Lookup tables for software implementation of CRC32C */
>  static uint32_t crc32c_tables[8][256] = {{
> @@ -363,8 +367,44 @@ crc32c_2words(uint64_t data, uint32_t init_val)
>  	return crc;
>  }
>  
> +enum crc32_alg_t {
> +	CRC32_SW = 0,
> +	CRC32_SSE42,
> +	CRC32_AUTODETECT
> +};
> +
> +static enum crc32_alg_t crc32_alg = CRC32_AUTODETECT;
> +
> +/**
> + * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> + * calculation.
> + *
> + * @param flag
> + *   unsigned integer flag
> + *   - (CRC32_SW) Don't use SSE4.2 intrinsics
> + *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available, set by default
> + */
> +static inline void
> +rte_hash_crc_set_alg(enum crc32_alg_t alg)
> +{
> +	int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
> +	enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
> +	crc32_alg = (alg == CRC32_SSE42) ? alg_supp : CRC32_SW;
> +}
> +
> +/* Best available algorithm is detected via CPUID instruction */
> +#ifndef __INTEL_COMPILER
> +static inline void __attribute__((constructor))
> +rte_hash_crc_try_sse42(void)
> +{
> +	rte_hash_crc_set_alg(CRC32_SSE42);
> +}
> +#endif
> +
>  /**
>   * Use single crc32 instruction to perform a hash on a 4 byte value.
> + * Fall back to software crc32 implementation in case SSE4.2 is
> + * not supported
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -376,11 +416,22 @@ crc32c_2words(uint64_t data, uint32_t init_val)
>  static inline uint32_t
>  rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
>  {
> -	return _mm_crc32_u32(init_val, data);
> +#ifdef __INTEL_COMPILER
> +	if (unlikely(crc32_alg == CRC32_AUTODETECT))
> +		rte_hash_crc_set_alg(CRC32_SSE42);
> +#endif
> +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> +	if (likely(crc32_alg == CRC32_SSE42))
> +		return _mm_crc32_u32(init_val, data);
> +#endif
> +
> +	return crc32c_1word(data, init_val);
>  }
>  
>  /**
>   * Use single crc32 instruction to perform a hash on a 8 byte value.
> + * Fall back to software crc32 implementation in case SSE4.2 is
> + * not supported
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -392,7 +443,16 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
>  static inline uint32_t
>  rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
>  {
> -	return _mm_crc32_u64(init_val, data);
> +#ifdef __INTEL_COMPILER
> +	if (unlikely(crc32_alg == CRC32_AUTODETECT))
> +		rte_hash_crc_set_alg(CRC32_SSE42);
> +#endif
> +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> +	if (likely(crc32_alg == CRC32_SSE42))
> +		return _mm_crc32_u64(init_val, data);
> +#endif
> +
> +	return crc32c_2words(data, init_val);
>  }
>  
>  /**
  
Neil Horman Nov. 18, 2014, 1:33 p.m. UTC | #2
On Tue, Nov 18, 2014 at 10:56:24AM +0600, Yerden Zhumabekov wrote:
> Sorry, maybe I made a mistake here.
> 
> Accoring to lib/librte_eal/common/eal_common_cpuflags.c code, it seemed
> to me that constructor attribute is not supported by intel compiler. So
> in that case here I decided to leave the code for autodetection. Am I
> correct?
> 

I don't think thats correct. The Intel Compiler claims support for most GCC
features, except where explicitly stated in the release notes, and I don't find
any documentation clearly excepting the constructor attribute from that list.
That said, since the intel compiler isn't open, I don't have access to it and
cannot confirm either way, though if its the case, the DPDK has a major issue,
as __attribute__((constructor)) is used extensively throughout the code
Neil



> 18.11.2014 9:21, Yerden Zhumabekov пишет:
> > Initially, SSE4.2 support is detected via CPUID instruction.
> >
> > Added rte_hash_crc_set_alg() function to detect and set CRC32
> > implementation if necessary. SSE4.2 is allowed by default. If it's
> > not available, fall back to sw implementation.
> >
> > Depending on compiler attributes support, best available algorithm
> > may be detected upon application startup.
> >
> > Signed-off-by: Yerden Zhumabekov <e_zhumabekov@sts.kz>
> > ---
> >  lib/librte_hash/rte_hash_crc.h |   64 ++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 62 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> > index 15f687a..c1b75e8 100644
> > --- a/lib/librte_hash/rte_hash_crc.h
> > +++ b/lib/librte_hash/rte_hash_crc.h
> > @@ -45,7 +45,11 @@ extern "C" {
> >  #endif
> >  
> >  #include <stdint.h>
> > +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> >  #include <nmmintrin.h>
> > +#endif
> > +#include <rte_cpuflags.h>
> > +#include <rte_branch_prediction.h>
> >  
> >  /* Lookup tables for software implementation of CRC32C */
> >  static uint32_t crc32c_tables[8][256] = {{
> > @@ -363,8 +367,44 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> >  	return crc;
> >  }
> >  
> > +enum crc32_alg_t {
> > +	CRC32_SW = 0,
> > +	CRC32_SSE42,
> > +	CRC32_AUTODETECT
> > +};
> > +
> > +static enum crc32_alg_t crc32_alg = CRC32_AUTODETECT;
> > +
> > +/**
> > + * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> > + * calculation.
> > + *
> > + * @param flag
> > + *   unsigned integer flag
> > + *   - (CRC32_SW) Don't use SSE4.2 intrinsics
> > + *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available, set by default
> > + */
> > +static inline void
> > +rte_hash_crc_set_alg(enum crc32_alg_t alg)
> > +{
> > +	int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
> > +	enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
> > +	crc32_alg = (alg == CRC32_SSE42) ? alg_supp : CRC32_SW;
> > +}
> > +
> > +/* Best available algorithm is detected via CPUID instruction */
> > +#ifndef __INTEL_COMPILER
> > +static inline void __attribute__((constructor))
> > +rte_hash_crc_try_sse42(void)
> > +{
> > +	rte_hash_crc_set_alg(CRC32_SSE42);
> > +}
> > +#endif
> > +
> >  /**
> >   * Use single crc32 instruction to perform a hash on a 4 byte value.
> > + * Fall back to software crc32 implementation in case SSE4.2 is
> > + * not supported
> >   *
> >   * @param data
> >   *   Data to perform hash on.
> > @@ -376,11 +416,22 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> >  static inline uint32_t
> >  rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> >  {
> > -	return _mm_crc32_u32(init_val, data);
> > +#ifdef __INTEL_COMPILER
> > +	if (unlikely(crc32_alg == CRC32_AUTODETECT))
> > +		rte_hash_crc_set_alg(CRC32_SSE42);
> > +#endif
> > +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> > +	if (likely(crc32_alg == CRC32_SSE42))
> > +		return _mm_crc32_u32(init_val, data);
> > +#endif
> > +
> > +	return crc32c_1word(data, init_val);
> >  }
> >  
> >  /**
> >   * Use single crc32 instruction to perform a hash on a 8 byte value.
> > + * Fall back to software crc32 implementation in case SSE4.2 is
> > + * not supported
> >   *
> >   * @param data
> >   *   Data to perform hash on.
> > @@ -392,7 +443,16 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> >  static inline uint32_t
> >  rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
> >  {
> > -	return _mm_crc32_u64(init_val, data);
> > +#ifdef __INTEL_COMPILER
> > +	if (unlikely(crc32_alg == CRC32_AUTODETECT))
> > +		rte_hash_crc_set_alg(CRC32_SSE42);
> > +#endif
> > +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> > +	if (likely(crc32_alg == CRC32_SSE42))
> > +		return _mm_crc32_u64(init_val, data);
> > +#endif
> > +
> > +	return crc32c_2words(data, init_val);
> >  }
> >  
> >  /**
> 
> -- 
> Sincerely,
> 
> Yerden Zhumabekov
> State Technical Service
> Astana, KZ
> 
>
  
Yerden Zhumabekov Nov. 18, 2014, 1:37 p.m. UTC | #3
18.11.2014 19:33, Neil Horman пишет:
> On Tue, Nov 18, 2014 at 10:56:24AM +0600, Yerden Zhumabekov wrote:
>> Sorry, maybe I made a mistake here.
>>
>> Accoring to lib/librte_eal/common/eal_common_cpuflags.c code, it seemed
>> to me that constructor attribute is not supported by intel compiler. So
>> in that case here I decided to leave the code for autodetection. Am I
>> correct?
>>
> I don't think thats correct. The Intel Compiler claims support for most GCC
> features, except where explicitly stated in the release notes, and I don't find
> any documentation clearly excepting the constructor attribute from that list.
> That said, since the intel compiler isn't open, I don't have access to it and
> cannot confirm either way, though if its the case, the DPDK has a major issue,
> as __attribute__((constructor)) is used extensively throughout the code
> Neil

My bad. Ok, I'll redo it again and send the series as 'v4'.
Thanks.
  
Thomas Monjalon Nov. 18, 2014, 1:43 p.m. UTC | #4
2014-11-18 08:33, Neil Horman:
> On Tue, Nov 18, 2014 at 10:56:24AM +0600, Yerden Zhumabekov wrote:
> > Sorry, maybe I made a mistake here.
> > 
> > Accoring to lib/librte_eal/common/eal_common_cpuflags.c code, it seemed
> > to me that constructor attribute is not supported by intel compiler. So
> > in that case here I decided to leave the code for autodetection. Am I
> > correct?
> 
> I don't think thats correct. The Intel Compiler claims support for most GCC
> features, except where explicitly stated in the release notes, and I don't find
> any documentation clearly excepting the constructor attribute from that list.
> That said, since the intel compiler isn't open, I don't have access to it and
> cannot confirm either way, though if its the case, the DPDK has a major issue,
> as __attribute__((constructor)) is used extensively throughout the code

The comment of rte_cpu_check_supported() is:
	"with ICC, the check is generated by the compiler"
So in my understanding, the constructor attribute is not set because this function
isn't needed for ICC.
I'd like to see an explanation of how CPU flags are set by ICC.
  

Patch

diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index 15f687a..c1b75e8 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -45,7 +45,11 @@  extern "C" {
 #endif
 
 #include <stdint.h>
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
 #include <nmmintrin.h>
+#endif
+#include <rte_cpuflags.h>
+#include <rte_branch_prediction.h>
 
 /* Lookup tables for software implementation of CRC32C */
 static uint32_t crc32c_tables[8][256] = {{
@@ -363,8 +367,44 @@  crc32c_2words(uint64_t data, uint32_t init_val)
 	return crc;
 }
 
+enum crc32_alg_t {
+	CRC32_SW = 0,
+	CRC32_SSE42,
+	CRC32_AUTODETECT
+};
+
+static enum crc32_alg_t crc32_alg = CRC32_AUTODETECT;
+
+/**
+ * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
+ * calculation.
+ *
+ * @param flag
+ *   unsigned integer flag
+ *   - (CRC32_SW) Don't use SSE4.2 intrinsics
+ *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available, set by default
+ */
+static inline void
+rte_hash_crc_set_alg(enum crc32_alg_t alg)
+{
+	int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
+	enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
+	crc32_alg = (alg == CRC32_SSE42) ? alg_supp : CRC32_SW;
+}
+
+/* Best available algorithm is detected via CPUID instruction */
+#ifndef __INTEL_COMPILER
+static inline void __attribute__((constructor))
+rte_hash_crc_try_sse42(void)
+{
+	rte_hash_crc_set_alg(CRC32_SSE42);
+}
+#endif
+
 /**
  * Use single crc32 instruction to perform a hash on a 4 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
  *
  * @param data
  *   Data to perform hash on.
@@ -376,11 +416,22 @@  crc32c_2words(uint64_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 {
-	return _mm_crc32_u32(init_val, data);
+#ifdef __INTEL_COMPILER
+	if (unlikely(crc32_alg == CRC32_AUTODETECT))
+		rte_hash_crc_set_alg(CRC32_SSE42);
+#endif
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+	if (likely(crc32_alg == CRC32_SSE42))
+		return _mm_crc32_u32(init_val, data);
+#endif
+
+	return crc32c_1word(data, init_val);
 }
 
 /**
  * Use single crc32 instruction to perform a hash on a 8 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
  *
  * @param data
  *   Data to perform hash on.
@@ -392,7 +443,16 @@  rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
 {
-	return _mm_crc32_u64(init_val, data);
+#ifdef __INTEL_COMPILER
+	if (unlikely(crc32_alg == CRC32_AUTODETECT))
+		rte_hash_crc_set_alg(CRC32_SSE42);
+#endif
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+	if (likely(crc32_alg == CRC32_SSE42))
+		return _mm_crc32_u64(init_val, data);
+#endif
+
+	return crc32c_2words(data, init_val);
 }
 
 /**