diff mbox series

[v5,23/39] ml/cnxk: enable quantization and dequantization

Message ID	20230207160719.1307-24-syalavarthi@marvell.com (mailing list archive)
State	Superseded, archived
Delegated to:	Thomas Monjalon
Headers	From: Srikanth Yalavarthi <syalavarthi@marvell.com> To: Srikanth Yalavarthi <syalavarthi@marvell.com> CC: <dev@dpdk.org>, <sshankarnara@marvell.com>, <jerinj@marvell.com>, <aprabhu@marvell.com>, <ptakkar@marvell.com>, <pshukla@marvell.com> Subject: [PATCH v5 23/39] ml/cnxk: enable quantization and dequantization Date: Tue, 7 Feb 2023 08:07:03 -0800 Message-ID: <20230207160719.1307-24-syalavarthi@marvell.com> In-Reply-To: <20230207160719.1307-1-syalavarthi@marvell.com> References: <20221208200220.20267-1-syalavarthi@marvell.com> <20230207160719.1307-1-syalavarthi@marvell.com> MIME-Version: 1.0 Content-Type: text/plain Precedence: list Errors-To: dev-bounces@dpdk.org
Series	Implementation of ML CNXK driver \| [v5,00/39] Implementation of ML CNXK driver [v5,01/39] common/cnxk: add ML headers and ROC code for cnxk [v5,02/39] ml/cnxk: add skeleton for ML cnxk driver [v5,03/39] ml/cnxk: enable probe and remove of ML device [v5,04/39] ml/cnxk: add driver support to get device info [v5,05/39] ml/cnxk: add support for configure and close [v5,06/39] ml/cnxk: parse ML firmware path from device args [v5,07/39] ml/cnxk: enable firmware load and device reset [v5,08/39] ml/cnxk: enable support for simulator environment [v5,09/39] ml/cnxk: enable support for device start and stop [v5,10/39] ml/cnxk: add support to create device queue-pairs [v5,11/39] ml/cnxk: add functions to load and unload models [v5,12/39] ml/cnxk: enable validity checks for model metadata [v5,13/39] ml/cnxk: add internal structures for derived info [v5,14/39] ml/cnxk: add internal structures for tiles and OCM [v5,15/39] ml/cnxk: add structures for slow and fast path JDs [v5,16/39] ml/cnxk: find OCM mask and page slots for a model [v5,17/39] ml/cnxk: add support to reserve and free OCM pages [v5,18/39] ml/cnxk: enable support to start an ML model [v5,19/39] ml/cnxk: enable support to stop an ML models [v5,20/39] ml/cnxk: enable support to get model information [v5,21/39] ml/cnxk: enable support to update model params [v5,22/39] ml/cnxk: add support to get IO buffer sizes [v5,23/39] ml/cnxk: enable quantization and dequantization [v5,24/39] ml/cnxk: enable support to dump device debug info [v5,25/39] ml/cnxk: add driver support for device selftest [v5,26/39] ml/cnxk: enqueue a burst of inference requests [v5,27/39] ml/cnxk: dequeue a burst of inference requests [v5,28/39] ml/cnxk: add internal function for sync mode run [v5,29/39] ml/cnxk: enable support for firmware error codes [v5,30/39] ml/cnxk: add support to get and reset device stats [v5,31/39] ml/cnxk: add support to handle extended dev stats [v5,32/39] ml/cnxk: enable support to get xstats in cycles [v5,33/39] ml/cnxk: add support to report DPE FW warnings [v5,34/39] ml/cnxk: add support to enable model data caching [v5,35/39] ml/cnxk: add support to select OCM allocation mode [v5,36/39] ml/cnxk: add support to use lock during jcmd enq [v5,37/39] ml/cnxk: add support to select poll memory region [v5,38/39] ml/cnxk: add user guide for marvell cnxk ml driver [v5,39/39] ml/cnxk: enable support for configurable ocm page

Checks

Context	Check	Description
ci/checkpatch	success	coding style OK

Commit Message

Srikanth Yalavarthi Feb. 7, 2023, 4:07 p.m. UTC

  Implemented driver functions to quantize / dequantize input
and output data. Support is enabled for multiple batches.
Quantization / dequantization use the type conversion functions
defined in ML common code.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_ops.c | 151 +++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

diff mbox series

Patch

diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index b5c89bee40..231c9b340b 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -5,6 +5,8 @@ 
 #include <rte_mldev.h>
 #include <rte_mldev_pmd.h>
 
+#include <mldev_utils.h>
+
 #include "cn10k_ml_dev.h"
 #include "cn10k_ml_model.h"
 #include "cn10k_ml_ops.h"
@@ -983,6 +985,153 @@  cn10k_ml_io_output_size_get(struct rte_ml_dev *dev, uint16_t model_id, uint32_t
 	return 0;
 }
 
+static int
+cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches, void *dbuffer,
+		     void *qbuffer)
+{
+	struct cn10k_ml_model *model;
+	uint8_t *lcl_dbuffer;
+	uint8_t *lcl_qbuffer;
+	uint32_t batch_id;
+	uint32_t i;
+	int ret;
+
+	model = dev->data->models[model_id];
+
+	if (model == NULL) {
+		plt_err("Invalid model_id = %u", model_id);
+		return -EINVAL;
+	}
+
+	lcl_dbuffer = dbuffer;
+	lcl_qbuffer = qbuffer;
+	batch_id = 0;
+
+next_batch:
+	for (i = 0; i < model->metadata.model.num_input; i++) {
+		if (model->metadata.input[i].input_type ==
+		    model->metadata.input[i].model_input_type) {
+			rte_memcpy(lcl_qbuffer, lcl_dbuffer, model->addr.input[i].sz_d);
+		} else {
+			switch (model->metadata.input[i].model_input_type) {
+			case RTE_ML_IO_TYPE_INT8:
+				ret = rte_ml_io_float32_to_int8(model->metadata.input[i].qscale,
+								model->addr.input[i].nb_elements,
+								lcl_dbuffer, lcl_qbuffer);
+				break;
+			case RTE_ML_IO_TYPE_UINT8:
+				ret = rte_ml_io_float32_to_uint8(model->metadata.input[i].qscale,
+								 model->addr.input[i].nb_elements,
+								 lcl_dbuffer, lcl_qbuffer);
+				break;
+			case RTE_ML_IO_TYPE_INT16:
+				ret = rte_ml_io_float32_to_int16(model->metadata.input[i].qscale,
+								 model->addr.input[i].nb_elements,
+								 lcl_dbuffer, lcl_qbuffer);
+				break;
+			case RTE_ML_IO_TYPE_UINT16:
+				ret = rte_ml_io_float32_to_uint16(model->metadata.input[i].qscale,
+								  model->addr.input[i].nb_elements,
+								  lcl_dbuffer, lcl_qbuffer);
+				break;
+			case RTE_ML_IO_TYPE_FP16:
+				ret = rte_ml_io_float32_to_float16(model->addr.input[i].nb_elements,
+								   lcl_dbuffer, lcl_qbuffer);
+				break;
+			default:
+				plt_err("Unsupported model_input_type[%u] : %u", i,
+					model->metadata.input[i].model_input_type);
+				ret = -ENOTSUP;
+			}
+			if (ret < 0)
+				return ret;
+		}
+
+		lcl_dbuffer += model->addr.input[i].sz_d;
+		lcl_qbuffer += model->addr.input[i].sz_q;
+	}
+
+	batch_id++;
+	if (batch_id < PLT_DIV_CEIL(nb_batches, model->batch_size))
+		goto next_batch;
+
+	return 0;
+}
+
+static int
+cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches,
+		       void *qbuffer, void *dbuffer)
+{
+	struct cn10k_ml_model *model;
+	uint8_t *lcl_qbuffer;
+	uint8_t *lcl_dbuffer;
+	uint32_t batch_id;
+	uint32_t i;
+	int ret;
+
+	model = dev->data->models[model_id];
+
+	if (model == NULL) {
+		plt_err("Invalid model_id = %u", model_id);
+		return -EINVAL;
+	}
+
+	lcl_dbuffer = dbuffer;
+	lcl_qbuffer = qbuffer;
+	batch_id = 0;
+
+next_batch:
+	for (i = 0; i < model->metadata.model.num_output; i++) {
+		if (model->metadata.output[i].output_type ==
+		    model->metadata.output[i].model_output_type) {
+			rte_memcpy(lcl_dbuffer, lcl_qbuffer, model->addr.output[i].sz_q);
+		} else {
+			switch (model->metadata.output[i].model_output_type) {
+			case RTE_ML_IO_TYPE_INT8:
+				ret = rte_ml_io_int8_to_float32(model->metadata.output[i].dscale,
+								model->addr.output[i].nb_elements,
+								lcl_qbuffer, lcl_dbuffer);
+				break;
+			case RTE_ML_IO_TYPE_UINT8:
+				ret = rte_ml_io_uint8_to_float32(model->metadata.output[i].dscale,
+								 model->addr.output[i].nb_elements,
+								 lcl_qbuffer, lcl_dbuffer);
+				break;
+			case RTE_ML_IO_TYPE_INT16:
+				ret = rte_ml_io_int16_to_float32(model->metadata.output[i].dscale,
+								 model->addr.output[i].nb_elements,
+								 lcl_qbuffer, lcl_dbuffer);
+				break;
+			case RTE_ML_IO_TYPE_UINT16:
+				ret = rte_ml_io_uint16_to_float32(model->metadata.output[i].dscale,
+								  model->addr.output[i].nb_elements,
+								  lcl_qbuffer, lcl_dbuffer);
+				break;
+			case RTE_ML_IO_TYPE_FP16:
+				ret = rte_ml_io_float16_to_float32(
+					model->addr.output[i].nb_elements, lcl_qbuffer,
+					lcl_dbuffer);
+				break;
+			default:
+				plt_err("Unsupported model_output_type[%u] : %u", i,
+					model->metadata.output[i].model_output_type);
+				ret = -ENOTSUP;
+			}
+			if (ret < 0)
+				return ret;
+		}
+
+		lcl_qbuffer += model->addr.output[i].sz_q;
+		lcl_dbuffer += model->addr.output[i].sz_d;
+	}
+
+	batch_id++;
+	if (batch_id < PLT_DIV_CEIL(nb_batches, model->batch_size))
+		goto next_batch;
+
+	return 0;
+}
+
 struct rte_ml_dev_ops cn10k_ml_ops = {
 	/* Device control ops */
 	.dev_info_get = cn10k_ml_dev_info_get,
@@ -1006,4 +1155,6 @@  struct rte_ml_dev_ops cn10k_ml_ops = {
 	/* I/O ops */
 	.io_input_size_get = cn10k_ml_io_input_size_get,
 	.io_output_size_get = cn10k_ml_io_output_size_get,
+	.io_quantize = cn10k_ml_io_quantize,
+	.io_dequantize = cn10k_ml_io_dequantize,
 };