[dpdk-dev,v2,3/4] hash: add fallback to software CRC32 implementation

Message ID 1416160760-16087-4-git-send-email-e_zhumabekov@sts.kz (mailing list archive)
State Superseded, archived
Headers

Commit Message

Yerden Zhumabekov Nov. 16, 2014, 5:59 p.m. UTC
  Initially, SSE4.2 support is detected via CPUID instruction.

Added rte_hash_crc_set_alg() function to detect and set CRC32
implementation if necessary. SSE4.2 is allowed by default. If it's
not available, fall back to sw implementation.

Signed-off-by: Yerden Zhumabekov <e_zhumabekov@sts.kz>
---
 lib/librte_hash/rte_hash_crc.h |   60 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 2 deletions(-)
  

Comments

Ananyev, Konstantin Nov. 17, 2014, 12:34 p.m. UTC | #1
Hi Yerden,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yerden Zhumabekov
> Sent: Sunday, November 16, 2014 5:59 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2 3/4] hash: add fallback to software CRC32 implementation
> 
> Initially, SSE4.2 support is detected via CPUID instruction.
> 
> Added rte_hash_crc_set_alg() function to detect and set CRC32
> implementation if necessary. SSE4.2 is allowed by default. If it's
> not available, fall back to sw implementation.
> 
> Signed-off-by: Yerden Zhumabekov <e_zhumabekov@sts.kz>
> ---
>  lib/librte_hash/rte_hash_crc.h |   60 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 58 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> index 74e2d92..178b162 100644
> --- a/lib/librte_hash/rte_hash_crc.h
> +++ b/lib/librte_hash/rte_hash_crc.h
> @@ -45,7 +45,11 @@ extern "C" {
>  #endif
> 
>  #include <stdint.h>
> +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
>  #include <nmmintrin.h>
> +#endif
> +#include <rte_cpuflags.h>
> +#include <rte_branch_prediction.h>
> 
>  /* Lookup table for software implementation of CRC32C */
>  static const uint32_t crc32c_table[256] = {
> @@ -152,8 +156,42 @@ crc32c_2words(uint64_t data, uint32_t init_val)
>  	return init_val;
>  }
> 
> +enum crc32_alg_t {
> +	CRC32_SW = 0,
> +	CRC32_SSE42,
> +	CRC32_AUTODETECT
> +};
> +
> +/* Default algorithm is left for autodetection,
> + * it is detected on first run of hash function
> + */
> +static enum crc32_alg_t crc32_alg = CRC32_AUTODETECT;
> +
> +/**
> + * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> + * hash calculation.
> + *
> + * @param flag
> + *   unsigned integer flag
> + *   - (CRC32_SW) Don't use SSE4.2 intrinsics
> + *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available, set by default
> + */
> +static inline void
> +rte_hash_crc_set_alg(enum crc32_alg_t alg)
> +{
> +	int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
> +	enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
> +
> +	if (alg == CRC32_SSE42)
> +		crc32_alg = alg_supp;
> +	else
> +		crc32_alg = CRC32_SW;
> +}
> +

Wonder can we define that function with __attribute__((constructor))?
Then, I suppose we can remove CRC32_AUTODETECT, and remove:
if (unlikely(crc32_alg == CRC32_AUTODETECT))
   rte_hash_crc_set_alg(CRC32_SSE42);   
from rte_hash_crc_*byte().

Konstantin

>  /**
>   * Use single crc32 instruction to perform a hash on a 4 byte value.
> + * Fall back to software crc32 implementation in case SSE4.2 is
> + * not supported
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -165,11 +203,21 @@ crc32c_2words(uint64_t data, uint32_t init_val)
>  static inline uint32_t
>  rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
>  {
> -	return _mm_crc32_u32(init_val, data);
> +	if (unlikely(crc32_alg == CRC32_AUTODETECT))
> +		rte_hash_crc_set_alg(CRC32_SSE42);
> +
> +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> +	if (likely(crc32_alg == CRC32_SSE42))
> +		return _mm_crc32_u32(init_val, data);
> +#endif
> +
> +	return crc32c_1word(data, init_val);
>  }
> 
>  /**
>   * Use single crc32 instruction to perform a hash on a 8 byte value.
> + * Fall back to software crc32 implementation in case SSE4.2 is
> + * not supported
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -181,7 +229,15 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
>  static inline uint32_t
>  rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
>  {
> -	return _mm_crc32_u64(init_val, data);
> +	if (unlikely(crc32_alg == CRC32_AUTODETECT))
> +		rte_hash_crc_set_alg(CRC32_SSE42);
> +
> +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> +	if (likely(crc32_alg == CRC32_SSE42))
> +		return _mm_crc32_u64(init_val, data);
> +#endif
> +
> +	return crc32c_2words(data, init_val);
>  }
> 
>  /**
> --
> 1.7.9.5
  
Yerden Zhumabekov Nov. 17, 2014, 12:41 p.m. UTC | #2
17.11.2014 18:34, Ananyev, Konstantin пишет:
> Hi Yerden,
>
>> +static inline void
>> +rte_hash_crc_set_alg(enum crc32_alg_t alg)
>> +{
>> +	int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
>> +	enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
>> +
>> +	if (alg == CRC32_SSE42)
>> +		crc32_alg = alg_supp;
>> +	else
>> +		crc32_alg = CRC32_SW;
>> +}
>> +
> Wonder can we define that function with __attribute__((constructor))?
> Then, I suppose we can remove CRC32_AUTODETECT, and remove:
> if (unlikely(crc32_alg == CRC32_AUTODETECT))
>    rte_hash_crc_set_alg(CRC32_SSE42);   
> from rte_hash_crc_*byte().
Nice feature  I was unfamiliar with :)
Since I'm going to revise the patch series anyway, I'll apply it and
test. Thank you.
  
Neil Horman Nov. 17, 2014, 2:06 p.m. UTC | #3
On Mon, Nov 17, 2014 at 12:34:04PM +0000, Ananyev, Konstantin wrote:
> 
> Hi Yerden,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yerden Zhumabekov
> > Sent: Sunday, November 16, 2014 5:59 PM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v2 3/4] hash: add fallback to software CRC32 implementation
> > 
> > Initially, SSE4.2 support is detected via CPUID instruction.
> > 
> > Added rte_hash_crc_set_alg() function to detect and set CRC32
> > implementation if necessary. SSE4.2 is allowed by default. If it's
> > not available, fall back to sw implementation.
> > 
> > Signed-off-by: Yerden Zhumabekov <e_zhumabekov@sts.kz>
> > ---
> >  lib/librte_hash/rte_hash_crc.h |   60 ++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 58 insertions(+), 2 deletions(-)
> > 
> > diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> > index 74e2d92..178b162 100644
> > --- a/lib/librte_hash/rte_hash_crc.h
> > +++ b/lib/librte_hash/rte_hash_crc.h
> > @@ -45,7 +45,11 @@ extern "C" {
> >  #endif
> > 
> >  #include <stdint.h>
> > +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> >  #include <nmmintrin.h>
> > +#endif
> > +#include <rte_cpuflags.h>
> > +#include <rte_branch_prediction.h>
> > 
> >  /* Lookup table for software implementation of CRC32C */
> >  static const uint32_t crc32c_table[256] = {
> > @@ -152,8 +156,42 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> >  	return init_val;
> >  }
> > 
> > +enum crc32_alg_t {
> > +	CRC32_SW = 0,
> > +	CRC32_SSE42,
> > +	CRC32_AUTODETECT
> > +};
> > +
> > +/* Default algorithm is left for autodetection,
> > + * it is detected on first run of hash function
> > + */
> > +static enum crc32_alg_t crc32_alg = CRC32_AUTODETECT;
> > +
> > +/**
> > + * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> > + * hash calculation.
> > + *
> > + * @param flag
> > + *   unsigned integer flag
> > + *   - (CRC32_SW) Don't use SSE4.2 intrinsics
> > + *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available, set by default
> > + */
> > +static inline void
> > +rte_hash_crc_set_alg(enum crc32_alg_t alg)
> > +{
> > +	int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
> > +	enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
> > +
> > +	if (alg == CRC32_SSE42)
> > +		crc32_alg = alg_supp;
> > +	else
> > +		crc32_alg = CRC32_SW;
> > +}
> > +
> 
> Wonder can we define that function with __attribute__((constructor))?
> Then, I suppose we can remove CRC32_AUTODETECT, and remove:
> if (unlikely(crc32_alg == CRC32_AUTODETECT))
>    rte_hash_crc_set_alg(CRC32_SSE42);   
> from rte_hash_crc_*byte().
> 
That would make sense.
Neil
  

Patch

diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index 74e2d92..178b162 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -45,7 +45,11 @@  extern "C" {
 #endif
 
 #include <stdint.h>
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
 #include <nmmintrin.h>
+#endif
+#include <rte_cpuflags.h>
+#include <rte_branch_prediction.h>
 
 /* Lookup table for software implementation of CRC32C */
 static const uint32_t crc32c_table[256] = {
@@ -152,8 +156,42 @@  crc32c_2words(uint64_t data, uint32_t init_val)
 	return init_val;
 }
 
+enum crc32_alg_t {
+	CRC32_SW = 0,
+	CRC32_SSE42,
+	CRC32_AUTODETECT
+};
+
+/* Default algorithm is left for autodetection,
+ * it is detected on first run of hash function
+ */
+static enum crc32_alg_t crc32_alg = CRC32_AUTODETECT;
+
+/**
+ * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
+ * hash calculation.
+ *
+ * @param flag
+ *   unsigned integer flag
+ *   - (CRC32_SW) Don't use SSE4.2 intrinsics
+ *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available, set by default
+ */
+static inline void
+rte_hash_crc_set_alg(enum crc32_alg_t alg)
+{
+	int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
+	enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
+
+	if (alg == CRC32_SSE42)
+		crc32_alg = alg_supp;
+	else
+		crc32_alg = CRC32_SW;
+}
+
 /**
  * Use single crc32 instruction to perform a hash on a 4 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
  *
  * @param data
  *   Data to perform hash on.
@@ -165,11 +203,21 @@  crc32c_2words(uint64_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 {
-	return _mm_crc32_u32(init_val, data);
+	if (unlikely(crc32_alg == CRC32_AUTODETECT))
+		rte_hash_crc_set_alg(CRC32_SSE42);
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+	if (likely(crc32_alg == CRC32_SSE42))
+		return _mm_crc32_u32(init_val, data);
+#endif
+
+	return crc32c_1word(data, init_val);
 }
 
 /**
  * Use single crc32 instruction to perform a hash on a 8 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
  *
  * @param data
  *   Data to perform hash on.
@@ -181,7 +229,15 @@  rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
 {
-	return _mm_crc32_u64(init_val, data);
+	if (unlikely(crc32_alg == CRC32_AUTODETECT))
+		rte_hash_crc_set_alg(CRC32_SSE42);
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+	if (likely(crc32_alg == CRC32_SSE42))
+		return _mm_crc32_u64(init_val, data);
+#endif
+
+	return crc32c_2words(data, init_val);
 }
 
 /**