@@ -26,39 +26,40 @@ cnxk_ml_io_quantize_single(struct cnxk_ml_io *input, uint8_t *dbuffer, uint8_t *
if (dtype == qtype) {
rte_memcpy(qbuffer, dbuffer, input->sz_d);
- } else {
- switch (qtype) {
- case RTE_ML_IO_TYPE_INT8:
- ret = rte_ml_io_float32_to_int8(qscale, nb_elements, dbuffer, qbuffer);
- break;
- case RTE_ML_IO_TYPE_UINT8:
- ret = rte_ml_io_float32_to_uint8(qscale, nb_elements, dbuffer, qbuffer);
- break;
- case RTE_ML_IO_TYPE_INT16:
- ret = rte_ml_io_float32_to_int16(qscale, nb_elements, dbuffer, qbuffer);
- break;
- case RTE_ML_IO_TYPE_UINT16:
- ret = rte_ml_io_float32_to_uint16(qscale, nb_elements, dbuffer, qbuffer);
- break;
- case RTE_ML_IO_TYPE_INT32:
- ret = rte_ml_io_float32_to_int32(qscale, nb_elements, dbuffer, qbuffer);
- break;
- case RTE_ML_IO_TYPE_UINT32:
- ret = rte_ml_io_float32_to_uint32(qscale, nb_elements, dbuffer, qbuffer);
- break;
- case RTE_ML_IO_TYPE_INT64:
- ret = rte_ml_io_float32_to_int64(qscale, nb_elements, dbuffer, qbuffer);
- break;
- case RTE_ML_IO_TYPE_UINT64:
- ret = rte_ml_io_float32_to_uint64(qscale, nb_elements, dbuffer, qbuffer);
- break;
- case RTE_ML_IO_TYPE_FP16:
- ret = rte_ml_io_float32_to_float16(nb_elements, dbuffer, qbuffer);
- break;
- default:
- plt_err("Unsupported qtype : %u", qtype);
- ret = -ENOTSUP;
- }
+ return ret;
+ }
+
+ switch (qtype) {
+ case RTE_ML_IO_TYPE_INT8:
+ ret = rte_ml_io_float32_to_int8(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_UINT8:
+ ret = rte_ml_io_float32_to_uint8(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_INT16:
+ ret = rte_ml_io_float32_to_int16(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_UINT16:
+ ret = rte_ml_io_float32_to_uint16(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_INT32:
+ ret = rte_ml_io_float32_to_int32(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_UINT32:
+ ret = rte_ml_io_float32_to_uint32(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_INT64:
+ ret = rte_ml_io_float32_to_int64(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_UINT64:
+ ret = rte_ml_io_float32_to_uint64(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_FP16:
+ ret = rte_ml_io_float32_to_float16(dbuffer, qbuffer, nb_elements);
+ break;
+ default:
+ plt_err("Unsupported qtype : %u", qtype);
+ ret = -ENOTSUP;
}
return ret;
@@ -80,39 +81,40 @@ cnxk_ml_io_dequantize_single(struct cnxk_ml_io *output, uint8_t *qbuffer, uint8_
if (dtype == qtype) {
rte_memcpy(dbuffer, qbuffer, output->sz_q);
- } else {
- switch (qtype) {
- case RTE_ML_IO_TYPE_INT8:
- ret = rte_ml_io_int8_to_float32(dscale, nb_elements, qbuffer, dbuffer);
- break;
- case RTE_ML_IO_TYPE_UINT8:
- ret = rte_ml_io_uint8_to_float32(dscale, nb_elements, qbuffer, dbuffer);
- break;
- case RTE_ML_IO_TYPE_INT16:
- ret = rte_ml_io_int16_to_float32(dscale, nb_elements, qbuffer, dbuffer);
- break;
- case RTE_ML_IO_TYPE_UINT16:
- ret = rte_ml_io_uint16_to_float32(dscale, nb_elements, qbuffer, dbuffer);
- break;
- case RTE_ML_IO_TYPE_INT32:
- ret = rte_ml_io_int32_to_float32(dscale, nb_elements, qbuffer, dbuffer);
- break;
- case RTE_ML_IO_TYPE_UINT32:
- ret = rte_ml_io_uint32_to_float32(dscale, nb_elements, qbuffer, dbuffer);
- break;
- case RTE_ML_IO_TYPE_INT64:
- ret = rte_ml_io_int64_to_float32(dscale, nb_elements, qbuffer, dbuffer);
- break;
- case RTE_ML_IO_TYPE_UINT64:
- ret = rte_ml_io_uint64_to_float32(dscale, nb_elements, qbuffer, dbuffer);
- break;
- case RTE_ML_IO_TYPE_FP16:
- ret = rte_ml_io_float16_to_float32(nb_elements, qbuffer, dbuffer);
- break;
- default:
- plt_err("Unsupported qtype: %u", qtype);
- ret = -ENOTSUP;
- }
+ return 0;
+ }
+
+ switch (qtype) {
+ case RTE_ML_IO_TYPE_INT8:
+ ret = rte_ml_io_int8_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_UINT8:
+ ret = rte_ml_io_uint8_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_INT16:
+ ret = rte_ml_io_int16_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_UINT16:
+ ret = rte_ml_io_uint16_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_INT32:
+ ret = rte_ml_io_int32_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_UINT32:
+ ret = rte_ml_io_uint32_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_INT64:
+ ret = rte_ml_io_int64_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_UINT64:
+ ret = rte_ml_io_uint64_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+ break;
+ case RTE_ML_IO_TYPE_FP16:
+ ret = rte_ml_io_float16_to_float32(qbuffer, dbuffer, nb_elements);
+ break;
+ default:
+ plt_err("Unsupported qtype: %u", qtype);
+ ret = -ENOTSUP;
}
return ret;
@@ -52,459 +52,6 @@ __rte_internal
void
rte_ml_io_type_to_str(enum rte_ml_io_type type, char *str, int len);
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to signed 8-bit
- * integer format (INT8).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in signed 8-bit integer format (INT8) to single precision
- * floating format (float32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
- * 8-bit integer format (UINT8).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in unsigned 8-bit integer format (UINT8) to single precision
- * floating format (float32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to signed
- * 16-bit integer format (INT16).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in signed 16-bit integer format (INT16) to single precision
- * floating format (float32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
- * 16-bit integer format (UINT16).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in unsigned 16-bit integer format (UINT16) to single
- * precision floating format (float32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32)
- * to signed 32-bit integer format (INT32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in signed 32-bit integer format (INT32)
- * to single precision floating format (float32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32)
- * to unsigned 32-bit integer format (UINT32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in unsigned 32-bit integer format (UINT32)
- * to single precision floating format (float32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32)
- * to signed 64-bit integer format (INT64).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store INT64 numbers. Size of buffer is equal to (nb_elements * 8) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in signed 64-bit integer format (INT64)
- * to single precision floating format (float32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing INT64 numbers. Size of buffer is equal to (nb_elements * 8) bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32)
- * to unsigned 64-bit integer format (UINT64).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- * Output buffer to store UINT64 numbers. Size of buffer is equal to (nb_elements * 8) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in unsigned 64-bit integer format (UINT64)
- * to single precision floating format (float32).
- *
- * @param[in] scale
- * Scale factor for conversion.
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing UINT64 numbers. Size of buffer is equal to (nb_elements * 8) bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to half
- * precision floating point format (FP16).
- *
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
- * @param[out] output
- * Output buffer to store float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in half precision floating format (FP16) to single precision
- * floating point format (float32).
- *
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to brain
- * floating point format (bfloat16).
- *
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
- * @param[out] output
- * Output buffer to store bfloat16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in brain floating point format (bfloat16) to single precision
- * floating point format (float32).
- *
- * @param[in] nb_elements
- * Number of elements in the buffer.
- * @param[in] input
- * Input buffer containing bfloat16 numbers. Size of buffer is equal to (nb_elements * 2)
- * bytes.
- * @param[out] output
- * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- * - 0, Success.
- * - < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output);
-
#ifdef __cplusplus
}
#endif
@@ -17,7 +17,7 @@
*/
static inline void
-__float32_to_int8_neon_s8x8(float scale, float *input, int8_t *output)
+__float32_to_int8_neon_s8x8(const float *input, int8_t *output, float scale, int8_t zero_point)
{
int16x4_t s16x4_l;
int16x4_t s16x4_h;
@@ -30,7 +30,8 @@ __float32_to_int8_neon_s8x8(float scale, float *input, int8_t *output)
* Use round to nearest with ties away rounding mode.
*/
f32x4 = vld1q_f32(input);
- f32x4 = vmulq_n_f32(f32x4, scale);
+ f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+ f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
s32x4 = vcvtaq_s32_f32(f32x4);
s16x4_l = vqmovn_s32(s32x4);
@@ -38,7 +39,8 @@ __float32_to_int8_neon_s8x8(float scale, float *input, int8_t *output)
* Use round to nearest with ties away rounding mode.
*/
f32x4 = vld1q_f32(input + 4);
- f32x4 = vmulq_n_f32(f32x4, scale);
+ f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+ f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
s32x4 = vcvtaq_s32_f32(f32x4);
s16x4_h = vqmovn_s32(s32x4);
@@ -47,31 +49,37 @@ __float32_to_int8_neon_s8x8(float scale, float *input, int8_t *output)
/* narrow to int8_t */
s8x8 = vqmovn_s16(s16x8);
+ s8x8 = vmax_s8(s8x8, vdup_n_s8(INT8_MIN + 1));
/* store 8 elements */
vst1_s8(output, s8x8);
}
static inline void
-__float32_to_int8_neon_s8x1(float scale, float *input, int8_t *output)
+__float32_to_int8_neon_s8x1(const float *input, int8_t *output, float scale, int8_t zero_point)
{
- int32_t s32;
+ float32x2_t f32x2;
+ int32x2_t s32x2;
int16_t s16;
/* scale and convert, round to nearest with ties away rounding mode */
- s32 = vcvtas_s32_f32(scale * (*input));
+ f32x2 = vdiv_f32(vdup_n_f32(*input), vdup_n_f32(scale));
+ f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
+ s32x2 = vcvta_s32_f32(f32x2);
+ s32x2 = vmax_s32(s32x2, vdup_n_s32(INT8_MIN + 1));
/* saturate narrow */
- s16 = vqmovns_s32(s32);
+ s16 = vqmovns_s32(vget_lane_s32(s32x2, 0));
/* convert to int8_t */
*output = vqmovnh_s16(s16);
}
int
-rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int8(const void *input, void *output, uint64_t nb_elements, float scale,
+ int8_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
int8_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -80,14 +88,14 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (int8_t *)output;
vlen = 2 * sizeof(float) / sizeof(int8_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __float32_to_int8_neon_s8x8(scale, input_buffer, output_buffer);
+ __float32_to_int8_neon_s8x8(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -95,7 +103,7 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __float32_to_int8_neon_s8x1(scale, input_buffer, output_buffer);
+ __float32_to_int8_neon_s8x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -104,7 +112,7 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
}
static inline void
-__int8_to_float32_neon_f32x8(float scale, int8_t *input, float *output)
+__int8_to_float32_neon_f32x8(const int8_t *input, float *output, float scale, int8_t zero_point)
{
float32x4_t f32x4;
int16x8_t s16x8;
@@ -122,6 +130,7 @@ __int8_to_float32_neon_f32x8(float scale, int8_t *input, float *output)
s16x4 = vget_low_s16(s16x8);
s32x4 = vmovl_s16(s16x4);
f32x4 = vcvtq_f32_s32(s32x4);
+ f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
f32x4 = vmulq_n_f32(f32x4, scale);
vst1q_f32(output, f32x4);
@@ -129,20 +138,22 @@ __int8_to_float32_neon_f32x8(float scale, int8_t *input, float *output)
s16x4 = vget_high_s16(s16x8);
s32x4 = vmovl_s16(s16x4);
f32x4 = vcvtq_f32_s32(s32x4);
+ f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
f32x4 = vmulq_n_f32(f32x4, scale);
vst1q_f32(output + 4, f32x4);
}
static inline void
-__int8_to_float32_neon_f32x1(float scale, int8_t *input, float *output)
+__int8_to_float32_neon_f32x1(const int8_t *input, float *output, float scale, int8_t zero_point)
{
- *output = scale * vcvts_f32_s32((int32_t)*input);
+ *output = scale * (vcvts_f32_s32((int32_t)*input) - (float)zero_point);
}
int
-rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int8_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int8_t zero_point)
{
- int8_t *input_buffer;
+ const int8_t *input_buffer;
float *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -151,14 +162,14 @@ rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (int8_t *)input;
+ input_buffer = (const int8_t *)input;
output_buffer = (float *)output;
vlen = 2 * sizeof(float) / sizeof(int8_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __int8_to_float32_neon_f32x8(scale, input_buffer, output_buffer);
+ __int8_to_float32_neon_f32x8(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -166,7 +177,7 @@ rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __int8_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+ __int8_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -175,7 +186,7 @@ rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *
}
static inline void
-__float32_to_uint8_neon_u8x8(float scale, float *input, uint8_t *output)
+__float32_to_uint8_neon_u8x8(const float *input, uint8_t *output, float scale, uint8_t zero_point)
{
uint16x4_t u16x4_l;
uint16x4_t u16x4_h;
@@ -188,7 +199,8 @@ __float32_to_uint8_neon_u8x8(float scale, float *input, uint8_t *output)
* use round to nearest with ties away rounding mode.
*/
f32x4 = vld1q_f32(input);
- f32x4 = vmulq_n_f32(f32x4, scale);
+ f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+ f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
u32x4 = vcvtaq_u32_f32(f32x4);
u16x4_l = vqmovn_u32(u32x4);
@@ -196,7 +208,8 @@ __float32_to_uint8_neon_u8x8(float scale, float *input, uint8_t *output)
* use round to nearest with ties away rounding mode.
*/
f32x4 = vld1q_f32(input + 4);
- f32x4 = vmulq_n_f32(f32x4, scale);
+ f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+ f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
u32x4 = vcvtaq_u32_f32(f32x4);
u16x4_h = vqmovn_u32(u32x4);
@@ -211,25 +224,29 @@ __float32_to_uint8_neon_u8x8(float scale, float *input, uint8_t *output)
}
static inline void
-__float32_to_uint8_neon_u8x1(float scale, float *input, uint8_t *output)
+__float32_to_uint8_neon_u8x1(const float *input, uint8_t *output, float scale, uint8_t zero_point)
{
- uint32_t u32;
+ float32x2_t f32x2;
+ uint32x2_t u32x2;
uint16_t u16;
/* scale and convert, round to nearest with ties away rounding mode */
- u32 = vcvtas_u32_f32(scale * (*input));
+ f32x2 = vdiv_f32(vdup_n_f32(*input), vdup_n_f32(scale));
+ f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
+ u32x2 = vcvta_u32_f32(f32x2);
/* saturate narrow */
- u16 = vqmovns_u32(u32);
+ u16 = vqmovns_u32(vget_lane_u32(u32x2, 0));
/* convert to uint8_t */
*output = vqmovnh_u16(u16);
}
int
-rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint8(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint8_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
uint8_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -238,14 +255,14 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint8_t *)output;
vlen = 2 * sizeof(float) / sizeof(uint8_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __float32_to_uint8_neon_u8x8(scale, input_buffer, output_buffer);
+ __float32_to_uint8_neon_u8x8(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -253,7 +270,7 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __float32_to_uint8_neon_u8x1(scale, input_buffer, output_buffer);
+ __float32_to_uint8_neon_u8x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -262,45 +279,48 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__uint8_to_float32_neon_f32x8(float scale, uint8_t *input, float *output)
+__uint8_to_float32_neon_f32x8(const uint8_t *input, float *output, float scale, uint8_t zero_point)
{
float32x4_t f32x4;
uint16x8_t u16x8;
- uint16x4_t u16x4;
- uint32x4_t u32x4;
+ int16x8_t s16x8;
+ int16x4_t s16x4;
+ int32x4_t s32x4;
uint8x8_t u8x8;
/* load 8 x uint8_t elements */
u8x8 = vld1_u8(input);
-
- /* widen uint8_t to uint16_t */
u16x8 = vmovl_u8(u8x8);
+ s16x8 = vreinterpretq_s16_u16(u16x8);
/* convert lower 4 elements: widen to uint32_t, convert to float, scale and store */
- u16x4 = vget_low_u16(u16x8);
- u32x4 = vmovl_u16(u16x4);
- f32x4 = vcvtq_f32_u32(u32x4);
+ s16x4 = vget_low_s16(s16x8);
+ s32x4 = vmovl_s16(s16x4);
+ f32x4 = vcvtq_f32_s32(s32x4);
+ f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
f32x4 = vmulq_n_f32(f32x4, scale);
vst1q_f32(output, f32x4);
/* convert higher 4 elements: widen to uint32_t, convert to float, scale and store */
- u16x4 = vget_high_u16(u16x8);
- u32x4 = vmovl_u16(u16x4);
- f32x4 = vcvtq_f32_u32(u32x4);
+ s16x4 = vget_high_s16(s16x8);
+ s32x4 = vmovl_s16(s16x4);
+ f32x4 = vcvtq_f32_s32(s32x4);
+ f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
f32x4 = vmulq_n_f32(f32x4, scale);
vst1q_f32(output + 4, f32x4);
}
static inline void
-__uint8_to_float32_neon_f32x1(float scale, uint8_t *input, float *output)
+__uint8_to_float32_neon_f32x1(const uint8_t *input, float *output, float scale, uint8_t zero_point)
{
- *output = scale * vcvts_f32_u32((uint32_t)*input);
+ *output = scale * (vcvts_f32_u32((uint32_t)*input) - (float)zero_point);
}
int
-rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint8_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint8_t zero_point)
{
- uint8_t *input_buffer;
+ const uint8_t *input_buffer;
float *output_buffer;
uint64_t nb_iterations;
uint64_t vlen;
@@ -309,14 +329,14 @@ rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint8_t *)input;
+ input_buffer = (const uint8_t *)input;
output_buffer = (float *)output;
vlen = 2 * sizeof(float) / sizeof(uint8_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __uint8_to_float32_neon_f32x8(scale, input_buffer, output_buffer);
+ __uint8_to_float32_neon_f32x8(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -324,7 +344,7 @@ rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __uint8_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+ __uint8_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -333,7 +353,7 @@ rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__float32_to_int16_neon_s16x4(float scale, float *input, int16_t *output)
+__float32_to_int16_neon_s16x4(const float *input, int16_t *output, float scale, int16_t zero_point)
{
float32x4_t f32x4;
int16x4_t s16x4;
@@ -343,34 +363,43 @@ __float32_to_int16_neon_s16x4(float scale, float *input, int16_t *output)
f32x4 = vld1q_f32(input);
/* scale */
- f32x4 = vmulq_n_f32(f32x4, scale);
+ f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+
+ /* add zero point */
+ f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
/* convert to int32x4_t using round to nearest with ties away rounding mode */
s32x4 = vcvtaq_s32_f32(f32x4);
/* saturate narrow to int16x4_t */
s16x4 = vqmovn_s32(s32x4);
+ s16x4 = vmax_s16(s16x4, vdup_n_s16(INT16_MIN + 1));
/* store 4 elements */
vst1_s16(output, s16x4);
}
static inline void
-__float32_to_int16_neon_s16x1(float scale, float *input, int16_t *output)
+__float32_to_int16_neon_s16x1(const float *input, int16_t *output, float scale, int16_t zero_point)
{
- int32_t s32;
+ float32x2_t f32x2;
+ int32x2_t s32x2;
/* scale and convert, round to nearest with ties away rounding mode */
- s32 = vcvtas_s32_f32(scale * (*input));
+ f32x2 = vdiv_f32(vdup_n_f32(*input), vdup_n_f32(scale));
+ f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
+ s32x2 = vcvta_s32_f32(f32x2);
+ s32x2 = vmax_s32(s32x2, vdup_n_s32(INT16_MIN + 1));
/* saturate narrow */
- *output = vqmovns_s32(s32);
+ *output = vqmovns_s32(vget_lane_s32(s32x2, 0));
}
int
-rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int16(const void *input, void *output, uint64_t nb_elements, float scale,
+ int16_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
int16_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -379,14 +408,14 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (int16_t *)output;
vlen = 2 * sizeof(float) / sizeof(int16_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __float32_to_int16_neon_s16x4(scale, input_buffer, output_buffer);
+ __float32_to_int16_neon_s16x4(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -394,7 +423,7 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __float32_to_int16_neon_s16x1(scale, input_buffer, output_buffer);
+ __float32_to_int16_neon_s16x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -403,7 +432,7 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__int16_to_float32_neon_f32x4(float scale, int16_t *input, float *output)
+__int16_to_float32_neon_f32x4(const int16_t *input, float *output, float scale, int16_t zero_point)
{
float32x4_t f32x4;
int16x4_t s16x4;
@@ -418,6 +447,9 @@ __int16_to_float32_neon_f32x4(float scale, int16_t *input, float *output)
/* convert int32_t to float */
f32x4 = vcvtq_f32_s32(s32x4);
+ /* subtract zero point */
+ f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
+
/* scale */
f32x4 = vmulq_n_f32(f32x4, scale);
@@ -426,15 +458,16 @@ __int16_to_float32_neon_f32x4(float scale, int16_t *input, float *output)
}
static inline void
-__int16_to_float32_neon_f32x1(float scale, int16_t *input, float *output)
+__int16_to_float32_neon_f32x1(const int16_t *input, float *output, float scale, int16_t zero_point)
{
- *output = scale * vcvts_f32_s32((int32_t)*input);
+ *output = scale * (vcvts_f32_s32((int32_t)*input) - (float)zero_point);
}
int
-rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int16_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int16_t zero_point)
{
- int16_t *input_buffer;
+ const int16_t *input_buffer;
float *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -443,14 +476,14 @@ rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (int16_t *)input;
+ input_buffer = (const int16_t *)input;
output_buffer = (float *)output;
vlen = 2 * sizeof(float) / sizeof(int16_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __int16_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+ __int16_to_float32_neon_f32x4(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -458,7 +491,7 @@ rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __int16_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+ __int16_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -467,7 +500,8 @@ rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__float32_to_uint16_neon_u16x4(float scale, float *input, uint16_t *output)
+__float32_to_uint16_neon_u16x4(const float *input, uint16_t *output, float scale,
+ uint16_t zero_point)
{
float32x4_t f32x4;
uint16x4_t u16x4;
@@ -477,7 +511,10 @@ __float32_to_uint16_neon_u16x4(float scale, float *input, uint16_t *output)
f32x4 = vld1q_f32(input);
/* scale */
- f32x4 = vmulq_n_f32(f32x4, scale);
+ f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+
+ /* add zero point */
+ f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
/* convert using round to nearest with ties to away rounding mode */
u32x4 = vcvtaq_u32_f32(f32x4);
@@ -490,21 +527,23 @@ __float32_to_uint16_neon_u16x4(float scale, float *input, uint16_t *output)
}
static inline void
-__float32_to_uint16_neon_u16x1(float scale, float *input, uint16_t *output)
+__float32_to_uint16_neon_u16x1(const float *input, uint16_t *output, float scale,
+ uint16_t zero_point)
{
uint32_t u32;
/* scale and convert, round to nearest with ties away rounding mode */
- u32 = vcvtas_u32_f32(scale * (*input));
+ u32 = vcvtas_u32_f32((*input) / scale + (float)zero_point);
/* saturate narrow */
- *output = vqmovns_u32(u32);
+ *output = vqmovns_u32(u32) + zero_point;
}
int
-rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint16(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint16_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
uint16_t *output_buffer;
uint64_t nb_iterations;
uint64_t vlen;
@@ -513,14 +552,14 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint16_t *)output;
vlen = 2 * sizeof(float) / sizeof(uint16_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __float32_to_uint16_neon_u16x4(scale, input_buffer, output_buffer);
+ __float32_to_uint16_neon_u16x4(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -528,7 +567,7 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __float32_to_uint16_neon_u16x1(scale, input_buffer, output_buffer);
+ __float32_to_uint16_neon_u16x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -537,7 +576,8 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__uint16_to_float32_neon_f32x4(float scale, uint16_t *input, float *output)
+__uint16_to_float32_neon_f32x4(const uint16_t *input, float *output, float scale,
+ uint16_t zero_point)
{
float32x4_t f32x4;
uint16x4_t u16x4;
@@ -552,6 +592,9 @@ __uint16_to_float32_neon_f32x4(float scale, uint16_t *input, float *output)
/* convert uint32_t to float */
f32x4 = vcvtq_f32_u32(u32x4);
+ /* subtract zero point */
+ f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
+
/* scale */
f32x4 = vmulq_n_f32(f32x4, scale);
@@ -560,15 +603,17 @@ __uint16_to_float32_neon_f32x4(float scale, uint16_t *input, float *output)
}
static inline void
-__uint16_to_float32_neon_f32x1(float scale, uint16_t *input, float *output)
+__uint16_to_float32_neon_f32x1(const uint16_t *input, float *output, float scale,
+ uint16_t zero_point)
{
- *output = scale * vcvts_f32_u32((uint32_t)*input);
+ *output = scale * (vcvts_f32_u32((uint32_t)*input) - (float)zero_point);
}
int
-rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint16_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint16_t zero_point)
{
- uint16_t *input_buffer;
+ const uint16_t *input_buffer;
float *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -577,14 +622,14 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint16_t *)input;
+ input_buffer = (const uint16_t *)input;
output_buffer = (float *)output;
vlen = 2 * sizeof(float) / sizeof(uint16_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __uint16_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+ __uint16_to_float32_neon_f32x4(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -592,7 +637,7 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __uint16_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+ __uint16_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -601,7 +646,7 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__float32_to_int32_neon_s32x4(float scale, float *input, int32_t *output)
+__float32_to_int32_neon_s32x4(const float *input, int32_t *output, float scale, int32_t zero_point)
{
float32x4_t f32x4;
int32x4_t s32x4;
@@ -610,26 +655,43 @@ __float32_to_int32_neon_s32x4(float scale, float *input, int32_t *output)
f32x4 = vld1q_f32(input);
/* scale */
- f32x4 = vmulq_n_f32(f32x4, scale);
+ f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+
+ /* add zero point */
+ f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
/* convert to int32x4_t using round to nearest with ties away rounding mode */
s32x4 = vcvtaq_s32_f32(f32x4);
+ /* add zero_point */
+ s32x4 = vaddq_s32(s32x4, vdupq_n_s32(zero_point));
+ s32x4 = vmaxq_s32(s32x4, vdupq_n_s32(INT32_MIN + 1));
+
/* store 4 elements */
vst1q_s32(output, s32x4);
}
static inline void
-__float32_to_int32_neon_s32x1(float scale, float *input, int32_t *output)
+__float32_to_int32_neon_s32x1(const float *input, int32_t *output, float scale, int32_t zero_point)
{
+ float32x2_t f32x2;
+ int32x2_t s32x2;
+
/* scale and convert, round to nearest with ties away rounding mode */
- *output = vcvtas_s32_f32(scale * (*input));
+ f32x2 = vdiv_f32(vdup_n_f32(*input), vdup_n_f32(scale));
+ f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
+ s32x2 = vcvta_s32_f32(f32x2);
+ s32x2 = vmax_s32(s32x2, vdup_n_s32(INT16_MIN + 1));
+
+ /* saturate narrow */
+ vst1_lane_s32(output, s32x2, 0);
}
int
-rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int32_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
int32_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -638,14 +700,14 @@ rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (int32_t *)output;
vlen = 2 * sizeof(float) / sizeof(int32_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __float32_to_int32_neon_s32x4(scale, input_buffer, output_buffer);
+ __float32_to_int32_neon_s32x4(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -653,7 +715,7 @@ rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __float32_to_int32_neon_s32x1(scale, input_buffer, output_buffer);
+ __float32_to_int32_neon_s32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -662,7 +724,7 @@ rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__int32_to_float32_neon_f32x4(float scale, int32_t *input, float *output)
+__int32_to_float32_neon_f32x4(const int32_t *input, float *output, float scale, int32_t zero_point)
{
float32x4_t f32x4;
int32x4_t s32x4;
@@ -673,6 +735,9 @@ __int32_to_float32_neon_f32x4(float scale, int32_t *input, float *output)
/* convert int32_t to float */
f32x4 = vcvtq_f32_s32(s32x4);
+ /* subtract zero point */
+ f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
+
/* scale */
f32x4 = vmulq_n_f32(f32x4, scale);
@@ -681,15 +746,16 @@ __int32_to_float32_neon_f32x4(float scale, int32_t *input, float *output)
}
static inline void
-__int32_to_float32_neon_f32x1(float scale, int32_t *input, float *output)
+__int32_to_float32_neon_f32x1(const int32_t *input, float *output, float scale, int32_t zero_point)
{
- *output = scale * vcvts_f32_s32(*input);
+ *output = scale * (vcvts_f32_s32(*input) - (float)zero_point);
}
int
-rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int32_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int32_t zero_point)
{
- int32_t *input_buffer;
+ const int32_t *input_buffer;
float *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -698,14 +764,14 @@ rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (int32_t *)input;
+ input_buffer = (const int32_t *)input;
output_buffer = (float *)output;
vlen = 2 * sizeof(float) / sizeof(int32_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __int32_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+ __int32_to_float32_neon_f32x4(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -713,7 +779,7 @@ rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __int32_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+ __int32_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -722,7 +788,8 @@ rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__float32_to_uint32_neon_u32x4(float scale, float *input, uint32_t *output)
+__float32_to_uint32_neon_u32x4(const float *input, uint32_t *output, float scale,
+ uint32_t zero_point)
{
float32x4_t f32x4;
uint32x4_t u32x4;
@@ -731,7 +798,10 @@ __float32_to_uint32_neon_u32x4(float scale, float *input, uint32_t *output)
f32x4 = vld1q_f32(input);
/* scale */
- f32x4 = vmulq_n_f32(f32x4, scale);
+ f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+
+ /* add zero point */
+ f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
/* convert using round to nearest with ties to away rounding mode */
u32x4 = vcvtaq_u32_f32(f32x4);
@@ -741,16 +811,18 @@ __float32_to_uint32_neon_u32x4(float scale, float *input, uint32_t *output)
}
static inline void
-__float32_to_uint32_neon_u32x1(float scale, float *input, uint32_t *output)
+__float32_to_uint32_neon_u32x1(const float *input, uint32_t *output, float scale,
+ uint32_t zero_point)
{
/* scale and convert, round to nearest with ties away rounding mode */
- *output = vcvtas_u32_f32(scale * (*input));
+ *output = vcvtas_u32_f32((*input) / scale + (float)zero_point);
}
int
-rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint32_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
uint32_t *output_buffer;
uint64_t nb_iterations;
uint64_t vlen;
@@ -759,14 +831,14 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint32_t *)output;
vlen = 2 * sizeof(float) / sizeof(uint32_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __float32_to_uint32_neon_u32x4(scale, input_buffer, output_buffer);
+ __float32_to_uint32_neon_u32x4(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -774,7 +846,7 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __float32_to_uint32_neon_u32x1(scale, input_buffer, output_buffer);
+ __float32_to_uint32_neon_u32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -783,7 +855,8 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__uint32_to_float32_neon_f32x4(float scale, uint32_t *input, float *output)
+__uint32_to_float32_neon_f32x4(const uint32_t *input, float *output, float scale,
+ uint32_t zero_point)
{
float32x4_t f32x4;
uint32x4_t u32x4;
@@ -794,6 +867,9 @@ __uint32_to_float32_neon_f32x4(float scale, uint32_t *input, float *output)
/* convert uint32_t to float */
f32x4 = vcvtq_f32_u32(u32x4);
+ /* subtract zero point */
+ f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
+
/* scale */
f32x4 = vmulq_n_f32(f32x4, scale);
@@ -802,15 +878,17 @@ __uint32_to_float32_neon_f32x4(float scale, uint32_t *input, float *output)
}
static inline void
-__uint32_to_float32_neon_f32x1(float scale, uint32_t *input, float *output)
+__uint32_to_float32_neon_f32x1(const uint32_t *input, float *output, float scale,
+ uint32_t zero_point)
{
- *output = scale * vcvts_f32_u32(*input);
+ *output = scale * (vcvts_f32_u32(*input) - (float)zero_point);
}
int
-rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint32_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint32_t zero_point)
{
- uint32_t *input_buffer;
+ const uint32_t *input_buffer;
float *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -819,14 +897,14 @@ rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint32_t *)input;
+ input_buffer = (const uint32_t *)input;
output_buffer = (float *)output;
vlen = 2 * sizeof(float) / sizeof(uint32_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __uint32_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+ __uint32_to_float32_neon_f32x4(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -834,7 +912,7 @@ rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __uint32_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+ __uint32_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -843,55 +921,68 @@ rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__float32_to_int64_neon_s64x2(float scale, float *input, int64_t *output)
+__float32_to_int64_neon_s64x2(const float *input, int64_t *output, float scale, int64_t zero_point)
{
float32x2_t f32x2;
float64x2_t f64x2;
int64x2_t s64x2;
+ int64_t s64;
/* load 2 x float elements */
f32x2 = vld1_f32(input);
/* scale */
- f32x2 = vmul_n_f32(f32x2, scale);
+ f32x2 = vdiv_f32(f32x2, vdup_n_f32(scale));
+
+ /* add zero point */
+ f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
/* convert to float64x2_t */
f64x2 = vcvt_f64_f32(f32x2);
/* convert to int64x2_t */
s64x2 = vcvtaq_s64_f64(f64x2);
+ s64 = vgetq_lane_s64(s64x2, 0);
+ s64 = (s64 == INT64_MIN) ? INT64_MIN + 1 : s64;
- /* store 2 elements */
- vst1q_s64(output, s64x2);
+ /* store lane 0 of int64x2_t */
+ *output = s64;
}
static inline void
-__float32_to_int64_neon_s64x1(float scale, float *input, int64_t *output)
+__float32_to_int64_neon_s64x1(const float *input, int64_t *output, float scale, int64_t zero_point)
{
float32x2_t f32x2;
float64x2_t f64x2;
int64x2_t s64x2;
+ int64_t s64;
/* load 1 x float element */
f32x2 = vdup_n_f32(*input);
/* scale */
- f32x2 = vmul_n_f32(f32x2, scale);
+ f32x2 = vdiv_f32(f32x2, vdup_n_f32(scale));
+
+ /* add zero point */
+ f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
/* convert to float64x2_t */
f64x2 = vcvt_f64_f32(f32x2);
/* convert to int64x2_t */
s64x2 = vcvtaq_s64_f64(f64x2);
+ s64 = vgetq_lane_s64(s64x2, 0);
+ s64 = (s64 == INT64_MIN) ? INT64_MIN + 1 : s64;
/* store lane 0 of int64x2_t */
- vst1q_lane_s64(output, s64x2, 0);
+ *output = s64;
}
int
-rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int64(const void *input, void *output, uint64_t nb_elements, float scale,
+ int64_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
int64_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -900,14 +991,14 @@ rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (int64_t *)output;
vlen = 4 * sizeof(float) / sizeof(int64_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __float32_to_int64_neon_s64x2(scale, input_buffer, output_buffer);
+ __float32_to_int64_neon_s64x2(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -915,7 +1006,7 @@ rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __float32_to_int64_neon_s64x1(scale, input_buffer, output_buffer);
+ __float32_to_int64_neon_s64x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -924,7 +1015,7 @@ rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__int64_to_float32_neon_f32x2(float scale, int64_t *input, float *output)
+__int64_to_float32_neon_f32x2(const int64_t *input, float *output, float scale, int64_t zero_point)
{
int64x2_t s64x2;
float64x2_t f64x2;
@@ -939,6 +1030,9 @@ __int64_to_float32_neon_f32x2(float scale, int64_t *input, float *output)
/* convert float64x2_t to float32x2_t */
f32x2 = vcvt_f32_f64(f64x2);
+ /* subtract zero_point */
+ f32x2 = vsub_f32(f32x2, vdup_n_f32(zero_point));
+
/* scale */
f32x2 = vmul_n_f32(f32x2, scale);
@@ -947,7 +1041,7 @@ __int64_to_float32_neon_f32x2(float scale, int64_t *input, float *output)
}
static inline void
-__int64_to_float32_neon_f32x1(float scale, int64_t *input, float *output)
+__int64_to_float32_neon_f32x1(const int64_t *input, float *output, float scale, int64_t zero_point)
{
int64x2_t s64x2;
float64x2_t f64x2;
@@ -962,17 +1056,21 @@ __int64_to_float32_neon_f32x1(float scale, int64_t *input, float *output)
/* convert float64x2_t to float32x2_t */
f32x2 = vcvt_f32_f64(f64x2);
+ /* subtract zero_point */
+ f32x2 = vsub_f32(f32x2, vdup_n_f32(zero_point));
+
/* scale */
f32x2 = vmul_n_f32(f32x2, scale);
- /* store float32x2_t */
+ /* store float32x2_t lane 0 */
vst1_lane_f32(output, f32x2, 0);
}
int
-rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int64_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int64_t zero_point)
{
- int64_t *input_buffer;
+ const int64_t *input_buffer;
float *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -981,14 +1079,14 @@ rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (int64_t *)input;
+ input_buffer = (const int64_t *)input;
output_buffer = (float *)output;
vlen = 4 * sizeof(float) / sizeof(int64_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __int64_to_float32_neon_f32x2(scale, input_buffer, output_buffer);
+ __int64_to_float32_neon_f32x2(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -996,7 +1094,7 @@ rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __int64_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+ __int64_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -1005,7 +1103,8 @@ rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__float32_to_uint64_neon_u64x2(float scale, float *input, uint64_t *output)
+__float32_to_uint64_neon_u64x2(const float *input, uint64_t *output, float scale,
+ uint64_t zero_point)
{
float32x2_t f32x2;
float64x2_t f64x2;
@@ -1015,7 +1114,10 @@ __float32_to_uint64_neon_u64x2(float scale, float *input, uint64_t *output)
f32x2 = vld1_f32(input);
/* scale */
- f32x2 = vmul_n_f32(f32x2, scale);
+ f32x2 = vdiv_f32(f32x2, vdup_n_f32(scale));
+
+ /* add zero point */
+ f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
/* convert to float64x2_t */
f64x2 = vcvt_f64_f32(f32x2);
@@ -1028,7 +1130,8 @@ __float32_to_uint64_neon_u64x2(float scale, float *input, uint64_t *output)
}
static inline void
-__float32_to_uint64_neon_u64x1(float scale, float *input, uint64_t *output)
+__float32_to_uint64_neon_u64x1(const float *input, uint64_t *output, float scale,
+ uint64_t zero_point)
{
float32x2_t f32x2;
float64x2_t f64x2;
@@ -1038,7 +1141,10 @@ __float32_to_uint64_neon_u64x1(float scale, float *input, uint64_t *output)
f32x2 = vld1_lane_f32(input, vdup_n_f32(0), 0);
/* scale */
- f32x2 = vmul_n_f32(f32x2, scale);
+ f32x2 = vdiv_f32(f32x2, vdup_n_f32(scale));
+
+ /* add zero_point */
+ f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
/* convert to float64x2_t */
f64x2 = vcvt_f64_f32(f32x2);
@@ -1051,9 +1157,10 @@ __float32_to_uint64_neon_u64x1(float scale, float *input, uint64_t *output)
}
int
-rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint64(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint64_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
uint64_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -1062,14 +1169,14 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint64_t *)output;
vlen = 4 * sizeof(float) / sizeof(uint64_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __float32_to_uint64_neon_u64x2(scale, input_buffer, output_buffer);
+ __float32_to_uint64_neon_u64x2(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -1077,7 +1184,7 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __float32_to_uint64_neon_u64x1(scale, input_buffer, output_buffer);
+ __float32_to_uint64_neon_u64x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -1086,7 +1193,8 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__uint64_to_float32_neon_f32x2(float scale, uint64_t *input, float *output)
+__uint64_to_float32_neon_f32x2(const uint64_t *input, float *output, float scale,
+ uint64_t zero_point)
{
uint64x2_t u64x2;
float64x2_t f64x2;
@@ -1101,6 +1209,9 @@ __uint64_to_float32_neon_f32x2(float scale, uint64_t *input, float *output)
/* convert float64x2_t to float32x2_t */
f32x2 = vcvt_f32_f64(f64x2);
+ /* subtract zero_point */
+ f32x2 = vsub_f32(f32x2, vdup_n_f32((float)zero_point));
+
/* scale */
f32x2 = vmul_n_f32(f32x2, scale);
@@ -1109,7 +1220,8 @@ __uint64_to_float32_neon_f32x2(float scale, uint64_t *input, float *output)
}
static inline void
-__uint64_to_float32_neon_f32x1(float scale, uint64_t *input, float *output)
+__uint64_to_float32_neon_f32x1(const uint64_t *input, float *output, float scale,
+ uint64_t zero_point)
{
uint64x2_t u64x2;
float64x2_t f64x2;
@@ -1124,17 +1236,21 @@ __uint64_to_float32_neon_f32x1(float scale, uint64_t *input, float *output)
/* convert float64x2_t to float32x2_t */
f32x2 = vcvt_f32_f64(f64x2);
+ /* subtract zero_point */
+ f32x2 = vsub_f32(f32x2, vdup_n_f32((float)zero_point));
+
/* scale */
f32x2 = vmul_n_f32(f32x2, scale);
- /* store float32x2_t */
+ /* store float32x2_t lane 0 */
vst1_lane_f32(output, f32x2, 0);
}
int
-rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint64_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint64_t zero_point)
{
- uint64_t *input_buffer;
+ const uint64_t *input_buffer;
float *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -1143,14 +1259,14 @@ rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint64_t *)input;
+ input_buffer = (const uint64_t *)input;
output_buffer = (float *)output;
vlen = 4 * sizeof(float) / sizeof(uint64_t);
nb_iterations = nb_elements / vlen;
/* convert vlen elements in each iteration */
for (i = 0; i < nb_iterations; i++) {
- __uint64_to_float32_neon_f32x2(scale, input_buffer, output_buffer);
+ __uint64_to_float32_neon_f32x2(input_buffer, output_buffer, scale, zero_point);
input_buffer += vlen;
output_buffer += vlen;
}
@@ -1158,7 +1274,7 @@ rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void
/* convert leftover elements */
i = i * vlen;
for (; i < nb_elements; i++) {
- __uint64_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+ __uint64_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
input_buffer++;
output_buffer++;
}
@@ -1167,7 +1283,7 @@ rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void
}
static inline void
-__float32_to_float16_neon_f16x4(float32_t *input, float16_t *output)
+__float32_to_float16_neon_f16x4(const float32_t *input, float16_t *output)
{
float32x4_t f32x4;
float16x4_t f16x4;
@@ -1183,7 +1299,7 @@ __float32_to_float16_neon_f16x4(float32_t *input, float16_t *output)
}
static inline void
-__float32_to_float16_neon_f16x1(float32_t *input, float16_t *output)
+__float32_to_float16_neon_f16x1(const float32_t *input, float16_t *output)
{
float32x4_t f32x4;
float16x4_t f16x4;
@@ -1199,9 +1315,9 @@ __float32_to_float16_neon_f16x1(float32_t *input, float16_t *output)
}
int
-rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_float16(const void *input, void *output, uint64_t nb_elements)
{
- float32_t *input_buffer;
+ const float32_t *input_buffer;
float16_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -1210,7 +1326,7 @@ rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
if ((nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float32_t *)input;
+ input_buffer = (const float32_t *)input;
output_buffer = (float16_t *)output;
vlen = 2 * sizeof(float32_t) / sizeof(float16_t);
nb_iterations = nb_elements / vlen;
@@ -1234,7 +1350,7 @@ rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
}
static inline void
-__float16_to_float32_neon_f32x4(float16_t *input, float32_t *output)
+__float16_to_float32_neon_f32x4(const float16_t *input, float32_t *output)
{
float16x4_t f16x4;
float32x4_t f32x4;
@@ -1250,7 +1366,7 @@ __float16_to_float32_neon_f32x4(float16_t *input, float32_t *output)
}
static inline void
-__float16_to_float32_neon_f32x1(float16_t *input, float32_t *output)
+__float16_to_float32_neon_f32x1(const float16_t *input, float32_t *output)
{
float16x4_t f16x4;
float32x4_t f32x4;
@@ -1266,9 +1382,9 @@ __float16_to_float32_neon_f32x1(float16_t *input, float32_t *output)
}
int
-rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float16_to_float32(const void *input, void *output, uint64_t nb_elements)
{
- float16_t *input_buffer;
+ const float16_t *input_buffer;
float32_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -1277,7 +1393,7 @@ rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output)
if ((nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float16_t *)input;
+ input_buffer = (const float16_t *)input;
output_buffer = (float32_t *)output;
vlen = 2 * sizeof(float32_t) / sizeof(float16_t);
nb_iterations = nb_elements / vlen;
@@ -18,7 +18,7 @@
#ifdef __ARM_FEATURE_BF16
static inline void
-__float32_to_bfloat16_neon_f16x4(float32_t *input, bfloat16_t *output)
+__float32_to_bfloat16_neon_f16x4(const float32_t *input, bfloat16_t *output)
{
float32x4_t f32x4;
bfloat16x4_t bf16x4;
@@ -34,7 +34,7 @@ __float32_to_bfloat16_neon_f16x4(float32_t *input, bfloat16_t *output)
}
static inline void
-__float32_to_bfloat16_neon_f16x1(float32_t *input, bfloat16_t *output)
+__float32_to_bfloat16_neon_f16x1(const float32_t *input, bfloat16_t *output)
{
float32x4_t f32x4;
bfloat16x4_t bf16x4;
@@ -50,9 +50,9 @@ __float32_to_bfloat16_neon_f16x1(float32_t *input, bfloat16_t *output)
}
int
-rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_bfloat16(const void *input, void *output, uint64_t nb_elements)
{
- float32_t *input_buffer;
+ const float32_t *input_buffer;
bfloat16_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -61,7 +61,7 @@ rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
if ((nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float32_t *)input;
+ input_buffer = (const float32_t *)input;
output_buffer = (bfloat16_t *)output;
vlen = 2 * sizeof(float32_t) / sizeof(bfloat16_t);
nb_iterations = nb_elements / vlen;
@@ -85,7 +85,7 @@ rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
}
static inline void
-__bfloat16_to_float32_neon_f32x4(bfloat16_t *input, float32_t *output)
+__bfloat16_to_float32_neon_f32x4(const bfloat16_t *input, float32_t *output)
{
bfloat16x4_t bf16x4;
float32x4_t f32x4;
@@ -101,7 +101,7 @@ __bfloat16_to_float32_neon_f32x4(bfloat16_t *input, float32_t *output)
}
static inline void
-__bfloat16_to_float32_neon_f32x1(bfloat16_t *input, float32_t *output)
+__bfloat16_to_float32_neon_f32x1(const bfloat16_t *input, float32_t *output)
{
bfloat16x4_t bf16x4;
float32x4_t f32x4;
@@ -117,9 +117,9 @@ __bfloat16_to_float32_neon_f32x1(bfloat16_t *input, float32_t *output)
}
int
-rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_bfloat16_to_float32(const void *input, void *output, uint64_t nb_elements)
{
- bfloat16_t *input_buffer;
+ const bfloat16_t *input_buffer;
float32_t *output_buffer;
uint64_t nb_iterations;
uint32_t vlen;
@@ -128,7 +128,7 @@ rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output)
if ((nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (bfloat16_t *)input;
+ input_buffer = (const bfloat16_t *)input;
output_buffer = (float32_t *)output;
vlen = 2 * sizeof(float32_t) / sizeof(bfloat16_t);
nb_iterations = nb_elements / vlen;
@@ -10,9 +10,10 @@
*/
int
-rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int8(const void *input, void *output, uint64_t nb_elements, float scale,
+ int8_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
int8_t *output_buffer;
uint64_t i;
int i32;
@@ -20,11 +21,11 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (int8_t *)output;
for (i = 0; i < nb_elements; i++) {
- i32 = (int32_t)round((*input_buffer) * scale);
+ i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
if (i32 < INT8_MIN)
i32 = INT8_MIN;
@@ -42,20 +43,21 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
}
int
-rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int8_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int8_t zero_point)
{
- int8_t *input_buffer;
+ const int8_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (int8_t *)input;
+ input_buffer = (const int8_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = scale * (float)(*input_buffer);
+ *output_buffer = scale * (float)(*input_buffer - zero_point);
input_buffer++;
output_buffer++;
@@ -65,9 +67,10 @@ rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *
}
int
-rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint8(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint8_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
uint8_t *output_buffer;
int32_t i32;
uint64_t i;
@@ -75,11 +78,11 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint8_t *)output;
for (i = 0; i < nb_elements; i++) {
- i32 = (int32_t)round((*input_buffer) * scale);
+ i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
if (i32 < 0)
i32 = 0;
@@ -97,20 +100,21 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint8_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint8_t zero_point)
{
- uint8_t *input_buffer;
+ const uint8_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint8_t *)input;
+ input_buffer = (const uint8_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = scale * (float)(*input_buffer);
+ *output_buffer = scale * (float)(*input_buffer - zero_point);
input_buffer++;
output_buffer++;
@@ -120,9 +124,10 @@ rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int16(const void *input, void *output, uint64_t nb_elements, float scale,
+ int16_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
int16_t *output_buffer;
int32_t i32;
uint64_t i;
@@ -130,11 +135,11 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (int16_t *)output;
for (i = 0; i < nb_elements; i++) {
- i32 = (int32_t)round((*input_buffer) * scale);
+ i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
if (i32 < INT16_MIN)
i32 = INT16_MIN;
@@ -152,20 +157,21 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int16_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int16_t zero_point)
{
- int16_t *input_buffer;
+ const int16_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (int16_t *)input;
+ input_buffer = (const int16_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = scale * (float)(*input_buffer);
+ *output_buffer = scale * (float)(*input_buffer - zero_point);
input_buffer++;
output_buffer++;
@@ -175,9 +181,10 @@ rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint16(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint16_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
uint16_t *output_buffer;
int32_t i32;
uint64_t i;
@@ -185,11 +192,11 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint16_t *)output;
for (i = 0; i < nb_elements; i++) {
- i32 = (int32_t)round((*input_buffer) * scale);
+ i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
if (i32 < 0)
i32 = 0;
@@ -207,20 +214,21 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint16_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint16_t zero_point)
{
- uint16_t *input_buffer;
+ const uint16_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint16_t *)input;
+ input_buffer = (const uint16_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = scale * (float)(*input_buffer);
+ *output_buffer = scale * (float)(*input_buffer - zero_point);
input_buffer++;
output_buffer++;
@@ -230,20 +238,21 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int32_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
int32_t *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (int32_t *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = (int32_t)round((*input_buffer) * scale);
+ *output_buffer = (int32_t)(round(*input_buffer / scale) + zero_point);
input_buffer++;
output_buffer++;
@@ -253,20 +262,21 @@ rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int32_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int32_t zero_point)
{
- int32_t *input_buffer;
+ const int32_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (int32_t *)input;
+ input_buffer = (const int32_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = scale * (float)(*input_buffer);
+ *output_buffer = scale * (float)(*input_buffer - zero_point);
input_buffer++;
output_buffer++;
@@ -276,9 +286,10 @@ rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint32_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
uint32_t *output_buffer;
int32_t i32;
uint64_t i;
@@ -286,11 +297,11 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint32_t *)output;
for (i = 0; i < nb_elements; i++) {
- i32 = (int32_t)round((*input_buffer) * scale);
+ i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
if (i32 < 0)
i32 = 0;
@@ -305,20 +316,21 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint32_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint32_t zero_point)
{
- uint32_t *input_buffer;
+ const uint32_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint32_t *)input;
+ input_buffer = (const uint32_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = scale * (float)(*input_buffer);
+ *output_buffer = scale * (float)(*input_buffer - zero_point);
input_buffer++;
output_buffer++;
@@ -328,20 +340,21 @@ rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int64(const void *input, void *output, uint64_t nb_elements, float scale,
+ int64_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
int64_t *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (int64_t *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = (int64_t)round((*input_buffer) * scale);
+ *output_buffer = (int64_t)(round(*input_buffer / scale) + zero_point);
input_buffer++;
output_buffer++;
@@ -351,20 +364,21 @@ rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int64_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ int64_t zero_point)
{
- int64_t *input_buffer;
+ const int64_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (int64_t *)input;
+ input_buffer = (const int64_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = scale * (float)(*input_buffer);
+ *output_buffer = scale * (float)(*input_buffer - zero_point);
input_buffer++;
output_buffer++;
@@ -374,9 +388,10 @@ rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint64(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint64_t zero_point)
{
- float *input_buffer;
+ const float *input_buffer;
uint64_t *output_buffer;
int64_t i64;
uint64_t i;
@@ -384,11 +399,11 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint64_t *)output;
for (i = 0; i < nb_elements; i++) {
- i64 = (int64_t)round((*input_buffer) * scale);
+ i64 = (int64_t)(round(*input_buffer / scale) + zero_point);
if (i64 < 0)
i64 = 0;
@@ -403,20 +418,21 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
}
int
-rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint64_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+ uint64_t zero_point)
{
- uint64_t *input_buffer;
+ const uint64_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint64_t *)input;
+ input_buffer = (const uint64_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
- *output_buffer = scale * (float)(*input_buffer);
+ *output_buffer = scale * (float)(*input_buffer - zero_point);
input_buffer++;
output_buffer++;
@@ -548,16 +564,16 @@ __float32_to_float16_scalar_rtn(float x)
}
int
-rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_float16(const void *input, void *output, uint64_t nb_elements)
{
- float *input_buffer;
+ const float *input_buffer;
uint16_t *output_buffer;
uint64_t i;
if ((nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint16_t *)output;
for (i = 0; i < nb_elements; i++) {
@@ -632,16 +648,16 @@ __float16_to_float32_scalar_rtx(uint16_t f16)
}
int
-rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float16_to_float32(const void *input, void *output, uint64_t nb_elements)
{
- uint16_t *input_buffer;
+ const uint16_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint16_t *)input;
+ input_buffer = (const uint16_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
@@ -92,16 +92,16 @@ __float32_to_bfloat16_scalar_rtn(float x)
}
int
-rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_bfloat16(const void *input, void *output, uint64_t nb_elements)
{
- float *input_buffer;
+ const float *input_buffer;
uint16_t *output_buffer;
uint64_t i;
if ((nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (float *)input;
+ input_buffer = (const float *)input;
output_buffer = (uint16_t *)output;
for (i = 0; i < nb_elements; i++) {
@@ -174,16 +174,16 @@ __bfloat16_to_float32_scalar_rtx(uint16_t f16)
}
int
-rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_bfloat16_to_float32(const void *input, void *output, uint64_t nb_elements)
{
- uint16_t *input_buffer;
+ const uint16_t *input_buffer;
float *output_buffer;
uint64_t i;
if ((nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
- input_buffer = (uint16_t *)input;
+ input_buffer = (const uint16_t *)input;
output_buffer = (float *)output;
for (i = 0; i < nb_elements; i++) {
@@ -1013,6 +1013,468 @@ rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer);
/* IO operations */
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to signed 8-bit
+ * integer format (INT8).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] i8
+ * Output buffer to store INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_int8(const void *fp32, void *i8, uint64_t nb_elements, float scale,
+ int8_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in signed 8-bit integer format (INT8) to single precision
+ * floating format (float32).
+ *
+ * @param[in] i8
+ * Input buffer containing INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_int8_to_float32(const void *i8, void *fp32, uint64_t nb_elements, float scale,
+ int8_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
+ * 8-bit integer format (UINT8).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] ui8
+ * Output buffer to store UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_uint8(const void *fp32, void *ui8, uint64_t nb_elements, float scale,
+ uint8_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in unsigned 8-bit integer format (UINT8) to single precision
+ * floating format (float32).
+ *
+ * @param[in] ui8
+ * Input buffer containing UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_uint8_to_float32(const void *ui8, void *fp32, uint64_t nb_elements, float scale,
+ uint8_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to signed
+ * 16-bit integer format (INT16).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] i16
+ * Output buffer to store INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_int16(const void *fp32, void *i16, uint64_t nb_elements, float scale,
+ int16_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in signed 16-bit integer format (INT16) to single precision
+ * floating format (float32).
+ *
+ * @param[in] i16
+ * Input buffer containing INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_int16_to_float32(const void *i16, void *fp32, uint64_t nb_elements, float scale,
+ int16_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
+ * 16-bit integer format (UINT16).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] ui16
+ * Output buffer to store UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_uint16(const void *fp32, void *ui16, uint64_t nb_elements, float scale,
+ uint16_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in unsigned 16-bit integer format (UINT16) to single
+ * precision floating format (float32).
+ *
+ * @param[in] ui16
+ * Input buffer containing UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_uint16_to_float32(const void *ui16, void *fp32, uint64_t nb_elements, float scale,
+ uint16_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to signed
+ * 32-bit integer format (INT32).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] i32
+ * Output buffer to store INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_int32(const void *fp32, void *i32, uint64_t nb_elements, float scale,
+ int32_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in signed 32-bit integer format (INT32) to single precision
+ * floating format (float32).
+ *
+ * @param[in] i32
+ * Input buffer containing INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+
+__rte_experimental
+int
+rte_ml_io_int32_to_float32(const void *i32, void *fp32, uint64_t nb_elements, float scale,
+ int32_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
+ * 32-bit integer format (UINT32).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] ui32
+ * Output buffer to store UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_uint32(const void *fp32, void *ui32, uint64_t nb_elements, float scale,
+ uint32_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in unsigned 32-bit integer format (UINT32) to single
+ * precision floating format (float32).
+ *
+ * @param[in] ui32
+ * Input buffer containing UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_uint32_to_float32(const void *ui32, void *fp32, uint64_t nb_elements, float scale,
+ uint32_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to signed
+ * 64-bit integer format (INT64).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] i64
+ * Output buffer to store INT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_int64(const void *fp32, void *i64, uint64_t nb_elements, float scale,
+ int64_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in signed 64-bit integer format (INT64) to single precision
+ * floating format (float32).
+ *
+ * @param[in] i64
+ * Input buffer containing INT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_int64_to_float32(const void *i64, void *fp32, uint64_t nb_elements, float scale,
+ int64_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
+ * 64-bit integer format (UINT64).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] ui64
+ * Output buffer to store UINT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_uint64(const void *fp32, void *ui64, uint64_t nb_elements, float scale,
+ uint64_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in unsigned 64-bit integer format (UINT64) to single
+ *precision floating format (float32).
+ *
+ * @param[in] ui64
+ * Input buffer containing UINT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ * @param[in] scale
+ * Scale factor for conversion.
+ * @param[in] zero_point
+ * Zero point for conversion.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_uint64_to_float32(const void *ui64, void *fp32, uint64_t nb_elements, float scale,
+ uint64_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to half
+ * precision floating point format (FP16).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
+ * @param[out] fp16
+ * Output buffer to store float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_float16(const void *fp32, void *fp16, uint64_t nb_elements);
+
+/**
+ * Convert a buffer containing numbers in half precision floating format (FP16) to single precision
+ * floating point format (float32).
+ *
+ * @param[in] fp16
+ * Input buffer containing float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float16_to_float32(const void *fp16, void *fp32, uint64_t nb_elements);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to brain
+ * floating point format (bfloat16).
+ *
+ * @param[in] fp32
+ * Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
+ * @param[out] bf16
+ * Output buffer to store bfloat16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_bfloat16(const void *fp32, void *bf16, uint64_t nb_elements);
+
+/**
+ * Convert a buffer containing numbers in brain floating point format (bfloat16) to single precision
+ * floating point format (float32).
+ *
+ * @param[in] bf16
+ * Input buffer containing bfloat16 numbers. Size of buffer is equal to (nb_elements * 2)
+ * bytes.
+ * @param[out] fp32
+ * Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ * Number of elements in the buffer.
+ *
+ * @return
+ * - 0, Success.
+ * - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_bfloat16_to_float32(const void *bf16, void *fp32, uint64_t nb_elements);
+
/**
* Quantize input data.
*
@@ -23,6 +23,26 @@ EXPERIMENTAL {
rte_ml_dev_xstats_names_get;
rte_ml_dev_xstats_reset;
rte_ml_enqueue_burst;
+ rte_ml_io_float32_to_int8;
+ rte_ml_io_int8_to_float32;
+ rte_ml_io_float32_to_uint8;
+ rte_ml_io_uint8_to_float32;
+ rte_ml_io_float32_to_int16;
+ rte_ml_io_int16_to_float32;
+ rte_ml_io_float32_to_uint16;
+ rte_ml_io_uint16_to_float32;
+ rte_ml_io_float32_to_int32;
+ rte_ml_io_int32_to_float32;
+ rte_ml_io_float32_to_uint32;
+ rte_ml_io_uint32_to_float32;
+ rte_ml_io_float32_to_int64;
+ rte_ml_io_int64_to_float32;
+ rte_ml_io_float32_to_uint64;
+ rte_ml_io_uint64_to_float32;
+ rte_ml_io_float32_to_float16;
+ rte_ml_io_float16_to_float32;
+ rte_ml_io_float32_to_bfloat16;
+ rte_ml_io_bfloat16_to_float32;
rte_ml_io_dequantize;
rte_ml_io_quantize;
rte_ml_model_info_get;
@@ -50,24 +70,4 @@ INTERNAL {
rte_ml_io_type_size_get;
rte_ml_io_type_to_str;
- rte_ml_io_float32_to_int8;
- rte_ml_io_int8_to_float32;
- rte_ml_io_float32_to_uint8;
- rte_ml_io_uint8_to_float32;
- rte_ml_io_float32_to_int16;
- rte_ml_io_int16_to_float32;
- rte_ml_io_float32_to_uint16;
- rte_ml_io_uint16_to_float32;
- rte_ml_io_float32_to_int32;
- rte_ml_io_int32_to_float32;
- rte_ml_io_float32_to_uint32;
- rte_ml_io_uint32_to_float32;
- rte_ml_io_float32_to_int64;
- rte_ml_io_int64_to_float32;
- rte_ml_io_float32_to_uint64;
- rte_ml_io_uint64_to_float32;
- rte_ml_io_float32_to_float16;
- rte_ml_io_float16_to_float32;
- rte_ml_io_float32_to_bfloat16;
- rte_ml_io_bfloat16_to_float32;
};