@@ -28,6 +28,10 @@ New Features
Added support in mldev library for models with multiple inputs and outputs.
+ * **Added support for Marvell TVM models in ML CNXK driver.**
+
+ Added support for models compiled using TVM framework in ML CNXK driver.
+
.. This section should contain new features added in this release.
Sample format:
@@ -371,10 +371,6 @@ cn10k_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_c
else
cn10k_mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_lf;
- cnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst;
- cnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst;
- cnxk_mldev->mldev->op_error_get = cn10k_ml_op_error_get;
-
return 0;
}
@@ -47,6 +47,12 @@ struct cnxk_ml_io {
/* Scale */
float scale;
+
+ /* Dequantized offset */
+ uint32_t offset_d;
+
+ /* Quantized offset */
+ uint32_t offset_q;
};
/* Model / Layer IO structure */
@@ -632,6 +632,10 @@ cnxk_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *co
cnxk_mldev->max_nb_layers =
cnxk_mldev->cn10k_mldev.fw.req->cn10k_req.jd.fw_load.cap.s.max_models;
+ cnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst;
+ cnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst;
+ cnxk_mldev->mldev->op_error_get = cn10k_ml_op_error_get;
+
/* Allocate and initialize index_map */
if (cnxk_mldev->index_map == NULL) {
cnxk_mldev->index_map =
@@ -24,6 +24,11 @@ struct cnxk_ml_req {
union {
/* CN10K */
struct cn10k_ml_req cn10k_req;
+
+#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM
+ /* MVTVM */
+ struct mvtvm_ml_req mvtvm_req;
+#endif
};
/* Address of status field */
@@ -198,6 +198,16 @@ mvtvm_ml_model_io_info_set(struct cnxk_ml_model *model)
model->mvtvm.info.total_input_sz_d += model->mvtvm.info.input[i].sz_d;
model->mvtvm.info.total_input_sz_q += model->mvtvm.info.input[i].sz_q;
+ model->mvtvm.info.input[i].offset_d = model->mvtvm.info.total_input_sz_d;
+ model->mvtvm.info.input[i].offset_q = model->mvtvm.info.total_input_sz_q;
+
+ model->mvtvm.input_tensor[i].device = metadata->input[i].device;
+ model->mvtvm.input_tensor[i].ndim = metadata->input[i].ndim;
+ model->mvtvm.input_tensor[i].dtype = metadata->input[i].datatype;
+ model->mvtvm.input_tensor[i].shape = metadata->input[i].shape;
+ model->mvtvm.input_tensor[i].strides = NULL;
+ model->mvtvm.input_tensor[i].byte_offset = model->mvtvm.info.input[i].offset_q;
+
plt_ml_dbg("model_id = %u, input[%u] - sz_d = %u sz_q = %u", model->model_id, i,
model->mvtvm.info.input[i].sz_d, model->mvtvm.info.input[i].sz_q);
}
@@ -231,6 +241,16 @@ mvtvm_ml_model_io_info_set(struct cnxk_ml_model *model)
model->mvtvm.info.total_output_sz_d += model->mvtvm.info.output[i].sz_d;
model->mvtvm.info.total_output_sz_q += model->mvtvm.info.output[i].sz_q;
+ model->mvtvm.info.output[i].offset_d = model->mvtvm.info.total_output_sz_d;
+ model->mvtvm.info.output[i].offset_q = model->mvtvm.info.total_output_sz_q;
+
+ model->mvtvm.output_tensor[i].device = metadata->output[i].device;
+ model->mvtvm.output_tensor[i].ndim = metadata->output[i].ndim;
+ model->mvtvm.output_tensor[i].dtype = metadata->output[i].datatype;
+ model->mvtvm.output_tensor[i].shape = metadata->output[i].shape;
+ model->mvtvm.output_tensor[i].strides = NULL;
+ model->mvtvm.output_tensor[i].byte_offset = model->mvtvm.info.output[i].offset_q;
+
plt_ml_dbg("model_id = %u, output[%u] - sz_d = %u sz_q = %u", model->model_id, i,
model->mvtvm.info.output[i].sz_d, model->mvtvm.info.output[i].sz_q);
}
@@ -69,6 +69,12 @@ struct mvtvm_ml_model_data {
/* Stats for burst ops */
struct mvtvm_ml_model_xstats *burst_xstats;
+
+ /* Input Tensor */
+ DLTensor input_tensor[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
+
+ /* Output Tensor */
+ DLTensor output_tensor[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
};
enum cnxk_ml_model_type mvtvm_ml_model_type_get(struct rte_ml_model_params *params);
@@ -19,6 +19,12 @@
/* ML model macros */
#define MVTVM_ML_MODEL_MEMZONE_NAME "ml_mvtvm_model_mz"
+__rte_hot static void
+mvtvm_ml_set_poll_addr(struct cnxk_ml_req *req)
+{
+ req->status = &req->mvtvm_req.status;
+}
+
void
mvtvm_ml_model_xstat_name_set(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model,
uint16_t stat_id, uint16_t entry, char *suffix)
@@ -242,6 +248,7 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *
callback->tvmrt_free = cn10k_ml_free;
callback->tvmrt_quantize = mvtvm_ml_io_quantize;
callback->tvmrt_dequantize = mvtvm_ml_io_dequantize;
+ callback->tvmrt_inference = cn10k_ml_inference_sync;
} else {
callback = NULL;
}
@@ -285,6 +292,19 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *
model->mvtvm.burst_xstats[qp_id].dequeued_count = 0;
}
+ /* Set model specific fast path functions */
+ if (model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) {
+ model->enqueue_single = cn10k_ml_enqueue_single;
+ model->result_update = cn10k_ml_result_update;
+ model->set_error_code = cn10k_ml_set_error_code;
+ model->set_poll_addr = cn10k_ml_set_poll_addr;
+ } else {
+ model->enqueue_single = mvtvm_ml_enqueue_single;
+ model->result_update = mvtvm_ml_result_update;
+ model->set_error_code = mvtvm_ml_set_error_code;
+ model->set_poll_addr = mvtvm_ml_set_poll_addr;
+ }
+
return 0;
error:
@@ -495,3 +515,107 @@ mvtvm_ml_io_dequantize(void *device, uint16_t model_id, const char *layer_name,
return 0;
}
+
+static int
+mvtvm_ml_model_run(struct cnxk_ml_model *model, struct rte_ml_op *op, struct cnxk_ml_req *req)
+{
+ uint8_t i;
+
+ rte_memcpy(req->mvtvm_req.input_tensor, model->mvtvm.input_tensor,
+ model->mvtvm.metadata.model.num_input * sizeof(DLTensor));
+ for (i = 0; i < model->mvtvm.metadata.model.num_input; i++) {
+ req->mvtvm_req.input_tensor[i].data = op->input[i]->addr;
+ req->mvtvm_req.input_tensor[i].byte_offset = 0;
+ }
+
+ rte_memcpy(req->mvtvm_req.output_tensor, model->mvtvm.output_tensor,
+ model->mvtvm.metadata.model.num_output * sizeof(DLTensor));
+ for (i = 0; i < model->mvtvm.metadata.model.num_output; i++) {
+ req->mvtvm_req.output_tensor[i].data = op->output[i]->addr;
+ req->mvtvm_req.output_tensor[i].byte_offset = 0;
+ }
+
+ tvmdp_model_run(model->model_id, model->mvtvm.metadata.model.num_input,
+ req->mvtvm_req.input_tensor, model->mvtvm.metadata.model.num_output,
+ req->mvtvm_req.output_tensor, &req->mvtvm_req.result,
+ &req->mvtvm_req.status);
+
+ plt_write64(ML_CNXK_POLL_JOB_FINISH, req->status);
+
+ return 0;
+}
+
+__rte_hot void
+mvtvm_ml_set_error_code(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype)
+{
+ RTE_SET_USED(stype);
+
+ req->mvtvm_req.result.error_code = etype;
+}
+
+__rte_hot bool
+mvtvm_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, uint16_t layer_id,
+ struct cnxk_ml_qp *qp, uint64_t head)
+{
+ struct cnxk_ml_model *model;
+ struct cnxk_ml_queue *queue;
+ struct cnxk_ml_req *req;
+
+ RTE_SET_USED(layer_id);
+
+ queue = &qp->queue;
+ req = &queue->reqs[head];
+ model = cnxk_mldev->mldev->data->models[op->model_id];
+
+ model->set_poll_addr(req);
+ memset(&req->mvtvm_req.result, 0, sizeof(struct mvtvm_ml_result));
+ req->mvtvm_req.result.error_code = 0x0;
+ req->mvtvm_req.result.user_ptr = op->user_ptr;
+
+ cnxk_ml_set_poll_ptr(req);
+ mvtvm_ml_model_run(model, op, req);
+ req->timeout = plt_tsc_cycles() + queue->wait_cycles;
+ req->op = op;
+
+ return true;
+}
+
+__rte_hot void
+mvtvm_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request)
+{
+ struct mvtvm_ml_model_xstats *xstats;
+ struct mvtvm_ml_result *result;
+ struct cnxk_ml_model *model;
+ struct cnxk_ml_req *req;
+ uint64_t tvm_rt_latency;
+ struct cnxk_ml_qp *qp;
+ struct rte_ml_op *op;
+
+ req = (struct cnxk_ml_req *)request;
+ result = &req->mvtvm_req.result;
+ op = req->op;
+ qp = cnxk_mldev->mldev->data->queue_pairs[qp_id];
+ op->impl_opaque = result->error_code;
+
+ if (likely(result->error_code == 0)) {
+ qp->stats.dequeued_count++;
+ op->status = RTE_ML_OP_STATUS_SUCCESS;
+
+ model = cnxk_mldev->mldev->data->models[op->model_id];
+ xstats = &model->mvtvm.burst_xstats[qp_id];
+
+ if (unlikely(xstats->dequeued_count == xstats->tvm_rt_reset_count)) {
+ xstats->tvm_rt_latency_min = UINT64_MAX;
+ xstats->tvm_rt_latency_max = 0;
+ }
+ tvm_rt_latency = result->stats.end_ns - result->stats.start_ns;
+ xstats->tvm_rt_latency = tvm_rt_latency;
+ xstats->tvm_rt_latency_tot += tvm_rt_latency;
+ xstats->tvm_rt_latency_min = RTE_MIN(xstats->tvm_rt_latency_min, tvm_rt_latency);
+ xstats->tvm_rt_latency_max = RTE_MAX(xstats->tvm_rt_latency_max, tvm_rt_latency);
+ xstats->dequeued_count++;
+ } else {
+ qp->stats.dequeue_err_count++;
+ op->status = RTE_ML_OP_STATUS_ERROR;
+ }
+}
@@ -16,6 +16,44 @@
struct cnxk_ml_dev;
struct cnxk_ml_model;
struct cnxk_ml_layer;
+struct cnxk_ml_qp;
+struct cnxk_ml_req;
+
+/* Inference stats */
+struct mvtvm_ml_stats {
+ /* Start ns */
+ uint64_t start_ns;
+
+ /* Start ns */
+ uint64_t end_ns;
+};
+
+/* Result structure */
+struct mvtvm_ml_result {
+ /* Job error code */
+ uint64_t error_code;
+
+ /* Inference stats */
+ struct mvtvm_ml_stats stats;
+
+ /* User context pointer */
+ void *user_ptr;
+};
+
+/* MVTVM specific request */
+struct mvtvm_ml_req {
+ /* Input tensors */
+ DLTensor input_tensor[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
+
+ /* Output tensors */
+ DLTensor output_tensor[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
+
+ /* Status field for poll mode requests */
+ volatile uint64_t status;
+
+ /* Result */
+ struct mvtvm_ml_result result;
+};
int mvtvm_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_config *conf);
int mvtvm_ml_dev_close(struct cnxk_ml_dev *cnxk_mldev);
@@ -29,6 +67,11 @@ int mvtvm_ml_io_quantize(void *device, uint16_t model_id, const char *layer_name
int mvtvm_ml_io_dequantize(void *device, uint16_t model_id, const char *layer_name, void *qbuffer,
const DLTensor **deq_tensor);
+__rte_hot bool mvtvm_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op,
+ uint16_t layer_id, struct cnxk_ml_qp *qp, uint64_t head);
+__rte_hot void mvtvm_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request);
+__rte_hot void mvtvm_ml_set_error_code(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype);
+
void mvtvm_ml_model_xstat_name_set(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model,
uint16_t stat_id, uint16_t entry, char *suffix);
uint64_t mvtvm_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model,