[dpdk-dev,v2,3/4] hash: add fallback to software CRC32 implementation
Commit Message
Initially, SSE4.2 support is detected via CPUID instruction.
Added rte_hash_crc_set_alg() function to detect and set CRC32
implementation if necessary. SSE4.2 is allowed by default. If it's
not available, fall back to sw implementation.
Signed-off-by: Yerden Zhumabekov <e_zhumabekov@sts.kz>
---
lib/librte_hash/rte_hash_crc.h | 60 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 58 insertions(+), 2 deletions(-)
Comments
Hi Yerden,
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yerden Zhumabekov
> Sent: Sunday, November 16, 2014 5:59 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2 3/4] hash: add fallback to software CRC32 implementation
>
> Initially, SSE4.2 support is detected via CPUID instruction.
>
> Added rte_hash_crc_set_alg() function to detect and set CRC32
> implementation if necessary. SSE4.2 is allowed by default. If it's
> not available, fall back to sw implementation.
>
> Signed-off-by: Yerden Zhumabekov <e_zhumabekov@sts.kz>
> ---
> lib/librte_hash/rte_hash_crc.h | 60 ++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 58 insertions(+), 2 deletions(-)
>
> diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> index 74e2d92..178b162 100644
> --- a/lib/librte_hash/rte_hash_crc.h
> +++ b/lib/librte_hash/rte_hash_crc.h
> @@ -45,7 +45,11 @@ extern "C" {
> #endif
>
> #include <stdint.h>
> +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> #include <nmmintrin.h>
> +#endif
> +#include <rte_cpuflags.h>
> +#include <rte_branch_prediction.h>
>
> /* Lookup table for software implementation of CRC32C */
> static const uint32_t crc32c_table[256] = {
> @@ -152,8 +156,42 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> return init_val;
> }
>
> +enum crc32_alg_t {
> + CRC32_SW = 0,
> + CRC32_SSE42,
> + CRC32_AUTODETECT
> +};
> +
> +/* Default algorithm is left for autodetection,
> + * it is detected on first run of hash function
> + */
> +static enum crc32_alg_t crc32_alg = CRC32_AUTODETECT;
> +
> +/**
> + * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> + * hash calculation.
> + *
> + * @param flag
> + * unsigned integer flag
> + * - (CRC32_SW) Don't use SSE4.2 intrinsics
> + * - (CRC32_SSE42) Use SSE4.2 intrinsics if available, set by default
> + */
> +static inline void
> +rte_hash_crc_set_alg(enum crc32_alg_t alg)
> +{
> + int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
> + enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
> +
> + if (alg == CRC32_SSE42)
> + crc32_alg = alg_supp;
> + else
> + crc32_alg = CRC32_SW;
> +}
> +
Wonder can we define that function with __attribute__((constructor))?
Then, I suppose we can remove CRC32_AUTODETECT, and remove:
if (unlikely(crc32_alg == CRC32_AUTODETECT))
rte_hash_crc_set_alg(CRC32_SSE42);
from rte_hash_crc_*byte().
Konstantin
> /**
> * Use single crc32 instruction to perform a hash on a 4 byte value.
> + * Fall back to software crc32 implementation in case SSE4.2 is
> + * not supported
> *
> * @param data
> * Data to perform hash on.
> @@ -165,11 +203,21 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> static inline uint32_t
> rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> {
> - return _mm_crc32_u32(init_val, data);
> + if (unlikely(crc32_alg == CRC32_AUTODETECT))
> + rte_hash_crc_set_alg(CRC32_SSE42);
> +
> +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> + if (likely(crc32_alg == CRC32_SSE42))
> + return _mm_crc32_u32(init_val, data);
> +#endif
> +
> + return crc32c_1word(data, init_val);
> }
>
> /**
> * Use single crc32 instruction to perform a hash on a 8 byte value.
> + * Fall back to software crc32 implementation in case SSE4.2 is
> + * not supported
> *
> * @param data
> * Data to perform hash on.
> @@ -181,7 +229,15 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> static inline uint32_t
> rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
> {
> - return _mm_crc32_u64(init_val, data);
> + if (unlikely(crc32_alg == CRC32_AUTODETECT))
> + rte_hash_crc_set_alg(CRC32_SSE42);
> +
> +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> + if (likely(crc32_alg == CRC32_SSE42))
> + return _mm_crc32_u64(init_val, data);
> +#endif
> +
> + return crc32c_2words(data, init_val);
> }
>
> /**
> --
> 1.7.9.5
17.11.2014 18:34, Ananyev, Konstantin пишет:
> Hi Yerden,
>
>> +static inline void
>> +rte_hash_crc_set_alg(enum crc32_alg_t alg)
>> +{
>> + int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
>> + enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
>> +
>> + if (alg == CRC32_SSE42)
>> + crc32_alg = alg_supp;
>> + else
>> + crc32_alg = CRC32_SW;
>> +}
>> +
> Wonder can we define that function with __attribute__((constructor))?
> Then, I suppose we can remove CRC32_AUTODETECT, and remove:
> if (unlikely(crc32_alg == CRC32_AUTODETECT))
> rte_hash_crc_set_alg(CRC32_SSE42);
> from rte_hash_crc_*byte().
Nice feature I was unfamiliar with :)
Since I'm going to revise the patch series anyway, I'll apply it and
test. Thank you.
On Mon, Nov 17, 2014 at 12:34:04PM +0000, Ananyev, Konstantin wrote:
>
> Hi Yerden,
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yerden Zhumabekov
> > Sent: Sunday, November 16, 2014 5:59 PM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v2 3/4] hash: add fallback to software CRC32 implementation
> >
> > Initially, SSE4.2 support is detected via CPUID instruction.
> >
> > Added rte_hash_crc_set_alg() function to detect and set CRC32
> > implementation if necessary. SSE4.2 is allowed by default. If it's
> > not available, fall back to sw implementation.
> >
> > Signed-off-by: Yerden Zhumabekov <e_zhumabekov@sts.kz>
> > ---
> > lib/librte_hash/rte_hash_crc.h | 60 ++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 58 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> > index 74e2d92..178b162 100644
> > --- a/lib/librte_hash/rte_hash_crc.h
> > +++ b/lib/librte_hash/rte_hash_crc.h
> > @@ -45,7 +45,11 @@ extern "C" {
> > #endif
> >
> > #include <stdint.h>
> > +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
> > #include <nmmintrin.h>
> > +#endif
> > +#include <rte_cpuflags.h>
> > +#include <rte_branch_prediction.h>
> >
> > /* Lookup table for software implementation of CRC32C */
> > static const uint32_t crc32c_table[256] = {
> > @@ -152,8 +156,42 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> > return init_val;
> > }
> >
> > +enum crc32_alg_t {
> > + CRC32_SW = 0,
> > + CRC32_SSE42,
> > + CRC32_AUTODETECT
> > +};
> > +
> > +/* Default algorithm is left for autodetection,
> > + * it is detected on first run of hash function
> > + */
> > +static enum crc32_alg_t crc32_alg = CRC32_AUTODETECT;
> > +
> > +/**
> > + * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> > + * hash calculation.
> > + *
> > + * @param flag
> > + * unsigned integer flag
> > + * - (CRC32_SW) Don't use SSE4.2 intrinsics
> > + * - (CRC32_SSE42) Use SSE4.2 intrinsics if available, set by default
> > + */
> > +static inline void
> > +rte_hash_crc_set_alg(enum crc32_alg_t alg)
> > +{
> > + int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
> > + enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
> > +
> > + if (alg == CRC32_SSE42)
> > + crc32_alg = alg_supp;
> > + else
> > + crc32_alg = CRC32_SW;
> > +}
> > +
>
> Wonder can we define that function with __attribute__((constructor))?
> Then, I suppose we can remove CRC32_AUTODETECT, and remove:
> if (unlikely(crc32_alg == CRC32_AUTODETECT))
> rte_hash_crc_set_alg(CRC32_SSE42);
> from rte_hash_crc_*byte().
>
That would make sense.
Neil
@@ -45,7 +45,11 @@ extern "C" {
#endif
#include <stdint.h>
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
#include <nmmintrin.h>
+#endif
+#include <rte_cpuflags.h>
+#include <rte_branch_prediction.h>
/* Lookup table for software implementation of CRC32C */
static const uint32_t crc32c_table[256] = {
@@ -152,8 +156,42 @@ crc32c_2words(uint64_t data, uint32_t init_val)
return init_val;
}
+enum crc32_alg_t {
+ CRC32_SW = 0,
+ CRC32_SSE42,
+ CRC32_AUTODETECT
+};
+
+/* Default algorithm is left for autodetection,
+ * it is detected on first run of hash function
+ */
+static enum crc32_alg_t crc32_alg = CRC32_AUTODETECT;
+
+/**
+ * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
+ * hash calculation.
+ *
+ * @param flag
+ * unsigned integer flag
+ * - (CRC32_SW) Don't use SSE4.2 intrinsics
+ * - (CRC32_SSE42) Use SSE4.2 intrinsics if available, set by default
+ */
+static inline void
+rte_hash_crc_set_alg(enum crc32_alg_t alg)
+{
+ int sse42_supp = rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2);
+ enum crc32_alg_t alg_supp = sse42_supp ? CRC32_SSE42 : CRC32_SW;
+
+ if (alg == CRC32_SSE42)
+ crc32_alg = alg_supp;
+ else
+ crc32_alg = CRC32_SW;
+}
+
/**
* Use single crc32 instruction to perform a hash on a 4 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
*
* @param data
* Data to perform hash on.
@@ -165,11 +203,21 @@ crc32c_2words(uint64_t data, uint32_t init_val)
static inline uint32_t
rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
{
- return _mm_crc32_u32(init_val, data);
+ if (unlikely(crc32_alg == CRC32_AUTODETECT))
+ rte_hash_crc_set_alg(CRC32_SSE42);
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+ if (likely(crc32_alg == CRC32_SSE42))
+ return _mm_crc32_u32(init_val, data);
+#endif
+
+ return crc32c_1word(data, init_val);
}
/**
* Use single crc32 instruction to perform a hash on a 8 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
*
* @param data
* Data to perform hash on.
@@ -181,7 +229,15 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
static inline uint32_t
rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
{
- return _mm_crc32_u64(init_val, data);
+ if (unlikely(crc32_alg == CRC32_AUTODETECT))
+ rte_hash_crc_set_alg(CRC32_SSE42);
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+ if (likely(crc32_alg == CRC32_SSE42))
+ return _mm_crc32_u64(init_val, data);
+#endif
+
+ return crc32c_2words(data, init_val);
}
/**