@@ -323,6 +323,7 @@ F: config/riscv/
F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
F: lib/eal/riscv/
F: lib/hash/rte_crc_riscv64.h
+F: lib/net/net_crc_zbc.c
Intel x86
M: Bruce Richardson <bruce.richardson@intel.com>
@@ -168,6 +168,15 @@ test_crc(void)
return ret;
}
+ /* set CRC riscv mode */
+ rte_net_crc_set_alg(RTE_NET_CRC_ZBC);
+
+ ret = test_crc_calc();
+ if (ret < 0) {
+ printf("test crc (riscv64 zbc clmul): failed (%d)\n", ret);
+ return ret;
+ }
+
return 0;
}
@@ -125,4 +125,8 @@ elif (dpdk_conf.has('RTE_ARCH_ARM64') and
cc.get_define('__ARM_FEATURE_CRYPTO', args: machine_args) != '')
sources += files('net_crc_neon.c')
cflags += ['-DCC_ARM64_NEON_PMULL_SUPPORT']
+elif (dpdk_conf.has('RTE_ARCH_RISCV') and
+ cc.get_define('RTE_RISCV_FEATURE_ZBC', args: machine_args) != '')
+ sources += files('net_crc_zbc.c')
+ cflags += ['-DCC_RISCV64_ZBC_CLMUL_SUPPORT']
endif
@@ -42,4 +42,15 @@ rte_crc16_ccitt_neon_handler(const uint8_t *data, uint32_t data_len);
uint32_t
rte_crc32_eth_neon_handler(const uint8_t *data, uint32_t data_len);
+/* RISCV64 Zbc */
+void
+rte_net_crc_zbc_init(void);
+
+uint32_t
+rte_crc16_ccitt_zbc_handler(const uint8_t *data, uint32_t data_len);
+
+uint32_t
+rte_crc32_eth_zbc_handler(const uint8_t *data, uint32_t data_len);
+
+
#endif /* _NET_CRC_H_ */
new file mode 100644
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) ByteDance 2024
+ */
+
+#include <riscv_bitmanip.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_net_crc.h>
+
+#include "net_crc.h"
+
+/* CLMUL CRC computation context structure */
+struct crc_clmul_ctx {
+ uint64_t Pr;
+ uint64_t mu;
+ uint64_t k3;
+ uint64_t k4;
+ uint64_t k5;
+};
+
+struct crc_clmul_ctx crc32_eth_clmul;
+struct crc_clmul_ctx crc16_ccitt_clmul;
+
+/* Perform Barrett's reduction on 8, 16, 32 or 64-bit value */
+static inline uint32_t
+crc32_barrett_zbc(
+ const uint64_t data,
+ uint32_t crc,
+ uint32_t bits,
+ const struct crc_clmul_ctx *params)
+{
+ assert((bits == 64) || (bits == 32) || (bits == 16) || (bits == 8));
+
+ /* Combine data with the initial value */
+ uint64_t temp = (uint64_t)(data ^ crc) << (64 - bits);
+
+ /*
+ * Multiply by mu, which is 2^96 / P. Division by 2^96 occurs by taking
+ * the lower 64 bits of the result (remember we're inverted)
+ */
+ temp = __riscv_clmul_64(temp, params->mu);
+ /* Multiply by P */
+ temp = __riscv_clmulh_64(temp, params->Pr);
+
+ /* Subtract from original (only needed for smaller sizes) */
+ if (bits == 16 || bits == 8)
+ temp ^= crc >> bits;
+
+ return temp;
+}
+
+/* Repeat Barrett's reduction for short buffer sizes */
+static inline uint32_t
+crc32_repeated_barrett_zbc(
+ const uint8_t *data,
+ uint32_t data_len,
+ uint32_t crc,
+ const struct crc_clmul_ctx *params)
+{
+ while (data_len >= 8) {
+ crc = crc32_barrett_zbc(*(const uint64_t *)data, crc, 64, params);
+ data += 8;
+ data_len -= 8;
+ }
+ if (data_len >= 4) {
+ crc = crc32_barrett_zbc(*(const uint32_t *)data, crc, 32, params);
+ data += 4;
+ data_len -= 4;
+ }
+ if (data_len >= 2) {
+ crc = crc32_barrett_zbc(*(const uint16_t *)data, crc, 16, params);
+ data += 2;
+ data_len -= 2;
+ }
+ if (data_len >= 1)
+ crc = crc32_barrett_zbc(*(const uint8_t *)data, crc, 8, params);
+
+ return crc;
+}
+
+/* Perform a reduction by 1 on a buffer (minimum length 2) */
+static inline void
+crc32_reduce_zbc(const uint64_t *data, uint64_t *high, uint64_t *low,
+ const struct crc_clmul_ctx *params)
+{
+ uint64_t highh = __riscv_clmulh_64(params->k3, *high);
+ uint64_t highl = __riscv_clmul_64(params->k3, *high);
+ uint64_t lowh = __riscv_clmulh_64(params->k4, *low);
+ uint64_t lowl = __riscv_clmul_64(params->k4, *low);
+
+ *high = highl ^ lowl;
+ *low = highh ^ lowh;
+
+ *high ^= *(data++);
+ *low ^= *(data++);
+}
+
+static inline uint32_t
+crc32_eth_calc_zbc(
+ const uint8_t *data,
+ uint32_t data_len,
+ uint32_t crc,
+ const struct crc_clmul_ctx *params)
+{
+ uint64_t high, low;
+ /* Minimum length we can do reduction-by-1 over */
+ const uint32_t min_len = 16;
+ /* Barrett reduce until buffer aligned to 8-byte word */
+ uint32_t misalign = (size_t)data & 7;
+ if (misalign != 0 && misalign <= data_len) {
+ crc = crc32_repeated_barrett_zbc(data, misalign, crc, params);
+ data += misalign;
+ data_len -= misalign;
+ }
+
+ if (data_len < min_len)
+ return crc32_repeated_barrett_zbc(data, data_len, crc, params);
+
+ /* Fold buffer into two 8-byte words */
+ high = *((const uint64_t *)data) ^ crc;
+ low = *((const uint64_t *)(data + 8));
+ data += 16;
+ data_len -= 16;
+
+ for (; data_len >= 16; data_len -= 16, data += 16)
+ crc32_reduce_zbc((const uint64_t *)data, &high, &low, params);
+
+ /* Fold last 128 bits into 96 */
+ low = __riscv_clmul_64(params->k4, high) ^ low;
+ high = __riscv_clmulh_64(params->k4, high);
+ /* Upper 32 bits of high are now zero */
+ high = (low >> 32) | (high << 32);
+
+ /* Fold last 96 bits into 64 */
+ low = __riscv_clmul_64(low & 0xffffffff, params->k5);
+ low ^= high;
+
+ /*
+ * Barrett reduction of remaining 64 bits, using high to store initial
+ * value of low
+ */
+ high = low;
+ low = __riscv_clmul_64(low, params->mu);
+ low &= 0xffffffff;
+ low = __riscv_clmul_64(low, params->Pr);
+ crc = (high ^ low) >> 32;
+
+ /* Combine crc with any excess */
+ crc = crc32_repeated_barrett_zbc(data, data_len, crc, params);
+
+ return crc;
+}
+
+void
+rte_net_crc_zbc_init(void)
+{
+ /* Initialise CRC32 data */
+ crc32_eth_clmul.Pr = 0x1db710641LL; /* polynomial P reversed */
+ crc32_eth_clmul.mu = 0xb4e5b025f7011641LL; /* (2 ^ 64 / P) reversed */
+ crc32_eth_clmul.k3 = 0x1751997d0LL; /* (x^(128+32) mod P << 32) reversed << 1 */
+ crc32_eth_clmul.k4 = 0x0ccaa009eLL; /* (x^(128-32) mod P << 32) reversed << 1 */
+ crc32_eth_clmul.k5 = 0x163cd6124LL; /* (x^64 mod P << 32) reversed << 1 */
+
+ /* Initialise CRC16 data */
+ /* Same calculations as above, with polynomial << 16 */
+ crc16_ccitt_clmul.Pr = 0x10811LL;
+ crc16_ccitt_clmul.mu = 0x859b040b1c581911LL;
+ crc16_ccitt_clmul.k3 = 0x8e10LL;
+ crc16_ccitt_clmul.k4 = 0x189aeLL;
+ crc16_ccitt_clmul.k5 = 0x114aaLL;
+}
+
+uint32_t
+rte_crc16_ccitt_zbc_handler(const uint8_t *data, uint32_t data_len)
+{
+ /* Negate the crc, which is present in the lower 16-bits */
+ return (uint16_t)~crc32_eth_calc_zbc(data,
+ data_len,
+ 0xffff,
+ &crc16_ccitt_clmul);
+}
+
+uint32_t
+rte_crc32_eth_zbc_handler(const uint8_t *data, uint32_t data_len)
+{
+ return ~crc32_eth_calc_zbc(data,
+ data_len,
+ 0xffffffffUL,
+ &crc32_eth_clmul);
+}
@@ -67,6 +67,12 @@ static const rte_net_crc_handler handlers_neon[] = {
[RTE_NET_CRC32_ETH] = rte_crc32_eth_neon_handler,
};
#endif
+#ifdef CC_RISCV64_ZBC_CLMUL_SUPPORT
+static const rte_net_crc_handler handlers_zbc[] = {
+ [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_zbc_handler,
+ [RTE_NET_CRC32_ETH] = rte_crc32_eth_zbc_handler,
+};
+#endif
static uint16_t max_simd_bitwidth;
@@ -244,6 +250,31 @@ neon_pmull_init(void)
#endif
}
+/* ZBC/CLMUL handling */
+
+#define ZBC_CLMUL_CPU_SUPPORTED \
+ rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_EXT_ZBC)
+
+static const rte_net_crc_handler *
+zbc_clmul_get_handlers(void)
+{
+#ifdef CC_RISCV64_ZBC_CLMUL_SUPPORT
+ if (ZBC_CLMUL_CPU_SUPPORTED)
+ return handlers_zbc;
+#endif
+ NET_LOG(INFO, "Requirements not met, can't use Zbc");
+ return NULL;
+}
+
+static void
+zbc_clmul_init(void)
+{
+#ifdef CC_RISCV64_ZBC_CLMUL_SUPPORT
+ if (ZBC_CLMUL_CPU_SUPPORTED)
+ rte_net_crc_zbc_init();
+#endif
+}
+
/* Default handling */
static uint32_t
@@ -260,6 +291,9 @@ rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len)
if (handlers != NULL)
return handlers[RTE_NET_CRC16_CCITT](data, data_len);
handlers = neon_pmull_get_handlers();
+ if (handlers != NULL)
+ return handlers[RTE_NET_CRC16_CCITT](data, data_len);
+ handlers = zbc_clmul_get_handlers();
if (handlers != NULL)
return handlers[RTE_NET_CRC16_CCITT](data, data_len);
handlers = handlers_scalar;
@@ -282,6 +316,8 @@ rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
handlers = neon_pmull_get_handlers();
if (handlers != NULL)
return handlers[RTE_NET_CRC32_ETH](data, data_len);
+ handlers = zbc_clmul_get_handlers();
+ return handlers[RTE_NET_CRC32_ETH](data, data_len);
handlers = handlers_scalar;
return handlers[RTE_NET_CRC32_ETH](data, data_len);
}
@@ -306,6 +342,9 @@ rte_net_crc_set_alg(enum rte_net_crc_alg alg)
break; /* for x86, always break here */
case RTE_NET_CRC_NEON:
handlers = neon_pmull_get_handlers();
+ break;
+ case RTE_NET_CRC_ZBC:
+ handlers = zbc_clmul_get_handlers();
/* fall-through */
case RTE_NET_CRC_SCALAR:
/* fall-through */
@@ -338,4 +377,5 @@ RTE_INIT(rte_net_crc_init)
sse42_pclmulqdq_init();
avx512_vpclmulqdq_init();
neon_pmull_init();
+ zbc_clmul_init();
}
@@ -24,6 +24,7 @@ enum rte_net_crc_alg {
RTE_NET_CRC_SSE42,
RTE_NET_CRC_NEON,
RTE_NET_CRC_AVX512,
+ RTE_NET_CRC_ZBC,
};
/**
@@ -37,6 +38,7 @@ enum rte_net_crc_alg {
* - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
* - RTE_NET_CRC_NEON (Use ARM Neon intrinsic)
* - RTE_NET_CRC_AVX512 (Use 512-bit AVX intrinsic)
+ * - RTE_NET_CRC_ZBC (Use RISC-V Zbc extension)
*/
void
rte_net_crc_set_alg(enum rte_net_crc_alg alg);