Using carryless multiply instructions from RISC-V's Zbc extension,
implement a Barrett reduction that calculates CRC-32C checksums.
Based on the approach described by Intel's whitepaper on "Fast CRC
Computation for Generic Polynomials Using PCLMULQDQ Instruction", which
is also described here
(https://web.archive.org/web/20240111232520/https://mary.rs/lab/crc32/)
Add a case to the autotest_hash unit test.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
MAINTAINERS | 1 +
app/test/test_hash.c | 7 +++
lib/hash/meson.build | 1 +
lib/hash/rte_crc_riscv64.h | 89 ++++++++++++++++++++++++++++++++++++++
lib/hash/rte_hash_crc.c | 13 +++++-
lib/hash/rte_hash_crc.h | 6 ++-
6 files changed, 115 insertions(+), 2 deletions(-)
create mode 100644 lib/hash/rte_crc_riscv64.h
@@ -322,6 +322,7 @@ M: Stanislaw Kardach <stanislaw.kardach@gmail.com>
F: config/riscv/
F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
F: lib/eal/riscv/
+F: lib/hash/rte_crc_riscv64.h
Intel x86
M: Bruce Richardson <bruce.richardson@intel.com>
@@ -231,6 +231,13 @@ test_crc32_hash_alg_equiv(void)
printf("Failed checking CRC32_SW against CRC32_ARM64\n");
break;
}
+
+ /* Check against 8-byte-operand RISCV64 CRC32 if available */
+ rte_hash_crc_set_alg(CRC32_RISCV64);
+ if (hash_val != rte_hash_crc(data64, data_len, init_val)) {
+ printf("Failed checking CRC32_SW against CRC32_RISC64\n");
+ break;
+ }
}
/* Resetting to best available algorithm */
@@ -12,6 +12,7 @@ headers = files(
indirect_headers += files(
'rte_crc_arm64.h',
'rte_crc_generic.h',
+ 'rte_crc_riscv64.h',
'rte_crc_sw.h',
'rte_crc_x86.h',
'rte_thash_x86_gfni.h',
new file mode 100644
@@ -0,0 +1,89 @@
+/* SPDX-License_Identifier: BSD-3-Clause
+ * Copyright(c) ByteDance 2024
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <riscv_bitmanip.h>
+
+#ifndef _RTE_CRC_RISCV64_H_
+#define _RTE_CRC_RISCV64_H_
+
+/*
+ * CRC-32C takes a reflected input (bit 7 is the lsb) and produces a reflected
+ * output. As reflecting the value we're checksumming is expensive, we instead
+ * reflect the polynomial P (0x11EDC6F41) and mu and our CRC32 algorithm.
+ *
+ * The mu constant is used for a Barrett reduction. It's 2^96 / P (0x11F91CAF6)
+ * reflected. Picking 2^96 rather than 2^64 means we can calculate a 64-bit crc
+ * using only two multiplications (https://mary.rs/lab/crc32/)
+ */
+static const uint64_t p = 0x105EC76F1;
+static const uint64_t mu = 0x4869EC38DEA713F1UL;
+
+/* Calculate the CRC32C checksum using a Barrett reduction */
+static inline uint32_t
+crc32c_riscv64(uint64_t data, uint32_t init_val, uint32_t bits)
+{
+ assert((bits == 64) || (bits == 32) || (bits == 16) || (bits == 8));
+
+ /* Combine data with the initial value */
+ uint64_t crc = (uint64_t)(data ^ init_val) << (64 - bits);
+
+ /*
+ * Multiply by mu, which is 2^96 / P. Division by 2^96 occurs by taking
+ * the lower 64 bits of the result (remember we're inverted)
+ */
+ crc = __riscv_clmul_64(crc, mu);
+ /* Multiply by P */
+ crc = __riscv_clmulh_64(crc, p);
+
+ /* Subtract from original (only needed for smaller sizes) */
+ if (bits == 16 || bits == 8)
+ crc ^= init_val >> bits;
+
+ return crc;
+}
+
+/*
+ * Use carryless multiply to perform hash on a value, falling back on the
+ * software in case the Zbc extension is not supported
+ */
+static inline uint32_t
+rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 8);
+
+ return crc32c_1byte(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 16);
+
+ return crc32c_2bytes(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 32);
+
+ return crc32c_1word(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 64);
+
+ return crc32c_2words(data, init_val);
+}
+
+#endif /* _RTE_CRC_RISCV64_H_ */
@@ -15,7 +15,7 @@ RTE_LOG_REGISTER_SUFFIX(hash_crc_logtype, crc, INFO);
uint8_t rte_hash_crc32_alg = CRC32_SW;
/**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
* calculation.
*
* @param alg
@@ -24,6 +24,7 @@ uint8_t rte_hash_crc32_alg = CRC32_SW;
* - (CRC32_SSE42) Use SSE4.2 intrinsics if available
* - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
* - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ * - (CRC32_RISCV64) Use RISCV64 Zbc extension if available
*
*/
void
@@ -52,6 +53,14 @@ rte_hash_crc_set_alg(uint8_t alg)
rte_hash_crc32_alg = CRC32_ARM64;
#endif
+#if defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+ if (!(alg & CRC32_RISCV64))
+ HASH_CRC_LOG(WARNING,
+ "Unsupported CRC32 algorithm requested using CRC32_RISCV64");
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_EXT_ZBC))
+ rte_hash_crc32_alg = CRC32_RISCV64;
+#endif
+
if (rte_hash_crc32_alg == CRC32_SW)
HASH_CRC_LOG(WARNING,
"Unsupported CRC32 algorithm requested using CRC32_SW");
@@ -64,6 +73,8 @@ RTE_INIT(rte_hash_crc_init_alg)
rte_hash_crc_set_alg(CRC32_SSE42_x64);
#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32)
rte_hash_crc_set_alg(CRC32_ARM64);
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+ rte_hash_crc_set_alg(CRC32_RISCV64);
#else
rte_hash_crc_set_alg(CRC32_SW);
#endif
@@ -28,6 +28,7 @@ extern "C" {
#define CRC32_x64 (1U << 2)
#define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42)
#define CRC32_ARM64 (1U << 3)
+#define CRC32_RISCV64 (1U << 4)
extern uint8_t rte_hash_crc32_alg;
@@ -35,12 +36,14 @@ extern uint8_t rte_hash_crc32_alg;
#include "rte_crc_arm64.h"
#elif defined(RTE_ARCH_X86)
#include "rte_crc_x86.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+#include "rte_crc_riscv64.h"
#else
#include "rte_crc_generic.h"
#endif
/**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
* calculation.
*
* @param alg
@@ -49,6 +52,7 @@ extern uint8_t rte_hash_crc32_alg;
* - (CRC32_SSE42) Use SSE4.2 intrinsics if available
* - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
* - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ * - (CRC32_RISCV64) Use RISC-V Carry-less multiply if available (default rv64gc_zbc)
*/
void
rte_hash_crc_set_alg(uint8_t alg);