@@ -9,6 +9,8 @@
#include "cn10k_ml_ocm.h"
+#include "cnxk_ml_io.h"
+
/* Dummy Device ops */
extern struct rte_ml_dev_ops ml_dev_dummy_ops;
@@ -21,9 +23,6 @@ extern struct rte_ml_dev_ops ml_dev_dummy_ops;
/* Device alignment size */
#define ML_CN10K_ALIGN_SIZE 128
-/* Maximum number of models per device */
-#define ML_CN10K_MAX_MODELS 16
-
/* Maximum number of queue-pairs per device, spinlock version */
#define ML_CN10K_MAX_QP_PER_DEVICE_SL 16
@@ -455,8 +454,8 @@ struct cn10k_ml_xstats {
struct cn10k_ml_xstats_entry *entries;
/* Store num stats and offset of the stats for each model */
- uint16_t count_per_model[ML_CN10K_MAX_MODELS];
- uint16_t offset_for_model[ML_CN10K_MAX_MODELS];
+ uint16_t count_per_model[ML_CNXK_MAX_MODELS];
+ uint16_t offset_for_model[ML_CNXK_MAX_MODELS];
uint16_t count_mode_device;
uint16_t count_mode_model;
uint16_t count;
@@ -6,10 +6,10 @@
#include <mldev_utils.h>
-#include "cn10k_ml_model.h"
#include "cn10k_ml_ocm.h"
#include "cnxk_ml_dev.h"
+#include "cnxk_ml_model.h"
static enum rte_ml_io_type
cn10k_ml_io_type_map(uint8_t type)
@@ -311,19 +311,17 @@ cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata)
}
void
-cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_t *base_dma_addr)
+cn10k_ml_layer_addr_update(struct cnxk_ml_layer *layer, uint8_t *buffer, uint8_t *base_dma_addr)
{
struct cn10k_ml_model_metadata *metadata;
- struct cn10k_ml_model_addr *addr;
+ struct cn10k_ml_layer_addr *addr;
size_t model_data_size;
uint8_t *dma_addr_load;
uint8_t *dma_addr_run;
- uint8_t i;
- uint8_t j;
int fpos;
- metadata = &model->metadata;
- addr = &model->addr;
+ metadata = &layer->glow.metadata;
+ addr = &layer->glow.addr;
model_data_size = metadata->init_model.file_size + metadata->main_model.file_size +
metadata->finish_model.file_size + metadata->weights_bias.file_size;
@@ -361,102 +359,136 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
addr->wb_base_addr = PLT_PTR_SUB(dma_addr_load, metadata->weights_bias.mem_offset);
addr->wb_load_addr = PLT_PTR_ADD(addr->wb_base_addr, metadata->weights_bias.mem_offset);
rte_memcpy(addr->wb_load_addr, PLT_PTR_ADD(buffer, fpos), metadata->weights_bias.file_size);
+}
+
+void
+cn10k_ml_layer_info_update(struct cnxk_ml_layer *layer)
+{
+ struct cn10k_ml_model_metadata *metadata;
+ uint8_t i;
+ uint8_t j;
+
+ metadata = &layer->glow.metadata;
/* Inputs */
- addr->total_input_sz_d = 0;
- addr->total_input_sz_q = 0;
+ layer->info.nb_inputs = metadata->model.num_input;
+ layer->info.total_input_sz_d = 0;
+ layer->info.total_input_sz_q = 0;
for (i = 0; i < metadata->model.num_input; i++) {
if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
- addr->input[i].nb_dims = 4;
- addr->input[i].shape[0] = metadata->input1[i].shape.w;
- addr->input[i].shape[1] = metadata->input1[i].shape.x;
- addr->input[i].shape[2] = metadata->input1[i].shape.y;
- addr->input[i].shape[3] = metadata->input1[i].shape.z;
-
- addr->input[i].nb_elements =
+ strncpy(layer->info.input[i].name, (char *)metadata->input1[i].input_name,
+ MRVL_ML_INPUT_NAME_LEN);
+ layer->info.input[i].dtype = metadata->input1[i].input_type;
+ layer->info.input[i].qtype = metadata->input1[i].model_input_type;
+ layer->info.input[i].nb_dims = 4;
+ layer->info.input[i].shape[0] = metadata->input1[i].shape.w;
+ layer->info.input[i].shape[1] = metadata->input1[i].shape.x;
+ layer->info.input[i].shape[2] = metadata->input1[i].shape.y;
+ layer->info.input[i].shape[3] = metadata->input1[i].shape.z;
+ layer->info.input[i].nb_elements =
metadata->input1[i].shape.w * metadata->input1[i].shape.x *
metadata->input1[i].shape.y * metadata->input1[i].shape.z;
- addr->input[i].sz_d =
- addr->input[i].nb_elements *
+ layer->info.input[i].sz_d =
+ layer->info.input[i].nb_elements *
rte_ml_io_type_size_get(metadata->input1[i].input_type);
- addr->input[i].sz_q =
- addr->input[i].nb_elements *
+ layer->info.input[i].sz_q =
+ layer->info.input[i].nb_elements *
rte_ml_io_type_size_get(metadata->input1[i].model_input_type);
- addr->total_input_sz_d += addr->input[i].sz_d;
- addr->total_input_sz_q += addr->input[i].sz_q;
+ layer->info.input[i].scale = metadata->input1[i].qscale;
+
+ layer->info.total_input_sz_d += layer->info.input[i].sz_d;
+ layer->info.total_input_sz_q += layer->info.input[i].sz_q;
plt_ml_dbg(
- "model_id = %u, input[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
- model->model_id, i, metadata->input1[i].shape.w,
+ "index = %u, input1[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
+ layer->index, i, metadata->input1[i].shape.w,
metadata->input1[i].shape.x, metadata->input1[i].shape.y,
- metadata->input1[i].shape.z, addr->input[i].sz_d,
- addr->input[i].sz_q);
+ metadata->input1[i].shape.z, layer->info.input[i].sz_d,
+ layer->info.input[i].sz_q);
} else {
j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
- addr->input[i].nb_dims = 4;
- addr->input[i].shape[0] = metadata->input2[j].shape.w;
- addr->input[i].shape[1] = metadata->input2[j].shape.x;
- addr->input[i].shape[2] = metadata->input2[j].shape.y;
- addr->input[i].shape[3] = metadata->input2[j].shape.z;
-
- addr->input[i].nb_elements =
+ strncpy(layer->info.input[i].name, (char *)metadata->input2[j].input_name,
+ MRVL_ML_INPUT_NAME_LEN);
+ layer->info.input[i].dtype = metadata->input2[j].input_type;
+ layer->info.input[i].qtype = metadata->input2[j].model_input_type;
+ layer->info.input[i].nb_dims = 4;
+ layer->info.input[i].shape[0] = metadata->input2[j].shape.w;
+ layer->info.input[i].shape[1] = metadata->input2[j].shape.x;
+ layer->info.input[i].shape[2] = metadata->input2[j].shape.y;
+ layer->info.input[i].shape[3] = metadata->input2[j].shape.z;
+ layer->info.input[i].nb_elements =
metadata->input2[j].shape.w * metadata->input2[j].shape.x *
metadata->input2[j].shape.y * metadata->input2[j].shape.z;
- addr->input[i].sz_d =
- addr->input[i].nb_elements *
+ layer->info.input[i].sz_d =
+ layer->info.input[i].nb_elements *
rte_ml_io_type_size_get(metadata->input2[j].input_type);
- addr->input[i].sz_q =
- addr->input[i].nb_elements *
+ layer->info.input[i].sz_q =
+ layer->info.input[i].nb_elements *
rte_ml_io_type_size_get(metadata->input2[j].model_input_type);
- addr->total_input_sz_d += addr->input[i].sz_d;
- addr->total_input_sz_q += addr->input[i].sz_q;
+ layer->info.input[i].scale = metadata->input2[j].qscale;
+
+ layer->info.total_input_sz_d += layer->info.input[i].sz_d;
+ layer->info.total_input_sz_q += layer->info.input[i].sz_q;
plt_ml_dbg(
- "model_id = %u, input2[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
- model->model_id, j, metadata->input2[j].shape.w,
+ "index = %u, input2[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
+ layer->index, j, metadata->input2[j].shape.w,
metadata->input2[j].shape.x, metadata->input2[j].shape.y,
- metadata->input2[j].shape.z, addr->input[i].sz_d,
- addr->input[i].sz_q);
+ metadata->input2[j].shape.z, layer->info.input[i].sz_d,
+ layer->info.input[i].sz_q);
}
}
/* Outputs */
- addr->total_output_sz_q = 0;
- addr->total_output_sz_d = 0;
+ layer->info.nb_outputs = metadata->model.num_output;
+ layer->info.total_output_sz_q = 0;
+ layer->info.total_output_sz_d = 0;
for (i = 0; i < metadata->model.num_output; i++) {
if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
- addr->output[i].nb_dims = 1;
- addr->output[i].shape[0] = metadata->output1[i].size;
- addr->output[i].nb_elements = metadata->output1[i].size;
- addr->output[i].sz_d =
- addr->output[i].nb_elements *
+ strncpy(layer->info.output[i].name,
+ (char *)metadata->output1[i].output_name, MRVL_ML_OUTPUT_NAME_LEN);
+ layer->info.output[i].dtype = metadata->output1[i].output_type;
+ layer->info.output[i].qtype = metadata->output1[i].model_output_type;
+ layer->info.output[i].nb_dims = 1;
+ layer->info.output[i].shape[0] = metadata->output1[i].size;
+ layer->info.output[i].nb_elements = metadata->output1[i].size;
+ layer->info.output[i].sz_d =
+ layer->info.output[i].nb_elements *
rte_ml_io_type_size_get(metadata->output1[i].output_type);
- addr->output[i].sz_q =
- addr->output[i].nb_elements *
+ layer->info.output[i].sz_q =
+ layer->info.output[i].nb_elements *
rte_ml_io_type_size_get(metadata->output1[i].model_output_type);
- addr->total_output_sz_q += addr->output[i].sz_q;
- addr->total_output_sz_d += addr->output[i].sz_d;
+ layer->info.output[i].scale = metadata->output1[i].dscale;
- plt_ml_dbg("model_id = %u, output[%u] - sz_d = %u, sz_q = %u",
- model->model_id, i, addr->output[i].sz_d, addr->output[i].sz_q);
+ layer->info.total_output_sz_q += layer->info.output[i].sz_q;
+ layer->info.total_output_sz_d += layer->info.output[i].sz_d;
+
+ plt_ml_dbg("index = %u, output1[%u] - sz_d = %u, sz_q = %u", layer->index,
+ i, layer->info.output[i].sz_d, layer->info.output[i].sz_q);
} else {
j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
- addr->output[i].nb_dims = 1;
- addr->output[i].shape[0] = metadata->output2[j].size;
- addr->output[i].nb_elements = metadata->output2[j].size;
- addr->output[i].sz_d =
- addr->output[i].nb_elements *
+ strncpy(layer->info.output[i].name,
+ (char *)metadata->output2[j].output_name, MRVL_ML_OUTPUT_NAME_LEN);
+ layer->info.output[i].dtype = metadata->output2[j].output_type;
+ layer->info.output[i].qtype = metadata->output2[j].model_output_type;
+ layer->info.output[i].nb_dims = 1;
+ layer->info.output[i].shape[0] = metadata->output2[j].size;
+ layer->info.output[i].nb_elements = metadata->output2[j].size;
+ layer->info.output[i].sz_d =
+ layer->info.output[i].nb_elements *
rte_ml_io_type_size_get(metadata->output2[j].output_type);
- addr->output[i].sz_q =
- addr->output[i].nb_elements *
+ layer->info.output[i].sz_q =
+ layer->info.output[i].nb_elements *
rte_ml_io_type_size_get(metadata->output2[j].model_output_type);
- addr->total_output_sz_q += addr->output[i].sz_q;
- addr->total_output_sz_d += addr->output[i].sz_d;
+ layer->info.output[i].scale = metadata->output2[j].dscale;
+
+ layer->info.total_output_sz_q += layer->info.output[i].sz_q;
+ layer->info.total_output_sz_d += layer->info.output[i].sz_d;
- plt_ml_dbg("model_id = %u, output2[%u] - sz_d = %u, sz_q = %u",
- model->model_id, j, addr->output[i].sz_d, addr->output[i].sz_q);
+ plt_ml_dbg("index = %u, output2[%u] - sz_d = %u, sz_q = %u", layer->index,
+ j, layer->info.output[i].sz_d, layer->info.output[i].sz_q);
}
}
}
@@ -514,23 +546,23 @@ cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *cn10k_mldev, uint16_t model_
}
void
-cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)
+cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cnxk_ml_model *model)
{
struct cn10k_ml_model_metadata *metadata;
- struct cn10k_ml_model_addr *addr;
+ struct cn10k_ml_dev *cn10k_mldev;
+ struct cnxk_ml_dev *cnxk_mldev;
struct rte_ml_model_info *info;
struct rte_ml_io_info *output;
struct rte_ml_io_info *input;
- struct cn10k_ml_dev *mldev;
+ struct cnxk_ml_layer *layer;
uint8_t i;
- uint8_t j;
- mldev = dev->data->dev_private;
- metadata = &model->metadata;
+ cnxk_mldev = dev->data->dev_private;
+ cn10k_mldev = &cnxk_mldev->cn10k_mldev;
+ metadata = &model->glow.metadata;
info = PLT_PTR_CAST(model->info);
input = PLT_PTR_ADD(info, sizeof(struct rte_ml_model_info));
output = PLT_PTR_ADD(input, metadata->model.num_input * sizeof(struct rte_ml_io_info));
- addr = &model->addr;
/* Set model info */
memset(info, 0, sizeof(struct rte_ml_model_info));
@@ -542,7 +574,8 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)
info->device_id = dev->data->dev_id;
info->io_layout = RTE_ML_IO_LAYOUT_PACKED;
info->min_batches = model->batch_size;
- info->max_batches = mldev->fw.req->jd.fw_load.cap.s.max_num_batches / model->batch_size;
+ info->max_batches =
+ cn10k_mldev->fw.req->jd.fw_load.cap.s.max_num_batches / model->batch_size;
info->nb_inputs = metadata->model.num_input;
info->input_info = input;
info->nb_outputs = metadata->model.num_output;
@@ -550,56 +583,26 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)
info->wb_size = metadata->weights_bias.file_size;
/* Set input info */
+ layer = &model->layer[0];
for (i = 0; i < info->nb_inputs; i++) {
- if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
- rte_memcpy(input[i].name, metadata->input1[i].input_name,
- MRVL_ML_INPUT_NAME_LEN);
- input[i].nb_dims = addr->input[i].nb_dims;
- input[i].shape = addr->input[i].shape;
- input[i].type = metadata->input1[i].model_input_type;
- input[i].nb_elements = addr->input[i].nb_elements;
- input[i].size =
- addr->input[i].nb_elements *
- rte_ml_io_type_size_get(metadata->input1[i].model_input_type);
- } else {
- j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
-
- rte_memcpy(input[i].name, metadata->input2[j].input_name,
- MRVL_ML_INPUT_NAME_LEN);
- input[i].nb_dims = addr->input[i].nb_dims;
- input[i].shape = addr->input[i].shape;
- input[i].type = metadata->input2[j].model_input_type;
- input[i].nb_elements = addr->input[i].nb_elements;
- input[i].size =
- addr->input[i].nb_elements *
- rte_ml_io_type_size_get(metadata->input2[j].model_input_type);
- }
+ rte_memcpy(input[i].name, layer->info.input[i].name, MRVL_ML_INPUT_NAME_LEN);
+ input[i].nb_dims = layer->info.input[i].nb_dims;
+ input[i].shape = &layer->info.input[i].shape[0];
+ input[i].type = layer->info.input[i].qtype;
+ input[i].nb_elements = layer->info.input[i].nb_elements;
+ input[i].size = layer->info.input[i].nb_elements *
+ rte_ml_io_type_size_get(layer->info.input[i].qtype);
}
/* Set output info */
+ layer = &model->layer[0];
for (i = 0; i < info->nb_outputs; i++) {
- if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
- rte_memcpy(output[i].name, metadata->output1[i].output_name,
- MRVL_ML_OUTPUT_NAME_LEN);
- output[i].nb_dims = addr->output[i].nb_dims;
- output[i].shape = addr->output[i].shape;
- output[i].type = metadata->output1[i].model_output_type;
- output[i].nb_elements = addr->output[i].nb_elements;
- output[i].size =
- addr->output[i].nb_elements *
- rte_ml_io_type_size_get(metadata->output1[i].model_output_type);
- } else {
- j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
-
- rte_memcpy(output[i].name, metadata->output2[j].output_name,
- MRVL_ML_OUTPUT_NAME_LEN);
- output[i].nb_dims = addr->output[i].nb_dims;
- output[i].shape = addr->output[i].shape;
- output[i].type = metadata->output2[j].model_output_type;
- output[i].nb_elements = addr->output[i].nb_elements;
- output[i].size =
- addr->output[i].nb_elements *
- rte_ml_io_type_size_get(metadata->output2[j].model_output_type);
- }
+ rte_memcpy(output[i].name, layer->info.output[i].name, MRVL_ML_INPUT_NAME_LEN);
+ output[i].nb_dims = layer->info.output[i].nb_dims;
+ output[i].shape = &layer->info.output[i].shape[0];
+ output[i].type = layer->info.output[i].qtype;
+ output[i].nb_elements = layer->info.output[i].nb_elements;
+ output[i].size = layer->info.output[i].nb_elements *
+ rte_ml_io_type_size_get(layer->info.output[i].qtype);
}
}
@@ -13,15 +13,8 @@
#include "cn10k_ml_ocm.h"
#include "cn10k_ml_ops.h"
-struct cnxk_ml_dev;
-
-/* Model state */
-enum cn10k_ml_model_state {
- ML_CN10K_MODEL_STATE_LOADED,
- ML_CN10K_MODEL_STATE_JOB_ACTIVE,
- ML_CN10K_MODEL_STATE_STARTED,
- ML_CN10K_MODEL_STATE_UNKNOWN,
-};
+struct cnxk_ml_model;
+struct cnxk_ml_layer;
/* Model Metadata : v 2.3.0.1 */
#define MRVL_ML_MODEL_MAGIC_STRING "MRVL"
@@ -369,7 +362,7 @@ struct cn10k_ml_model_metadata {
};
/* Model address structure */
-struct cn10k_ml_model_addr {
+struct cn10k_ml_layer_addr {
/* Base DMA address for load */
void *base_dma_addr_load;
@@ -408,58 +401,10 @@ struct cn10k_ml_model_addr {
/* End tile */
uint8_t tile_end;
-
- /* Input address and size */
- struct {
- /* Number of dimensions in shape */
- uint32_t nb_dims;
-
- /* Shape of input */
- uint32_t shape[4];
-
- /* Number of elements */
- uint32_t nb_elements;
-
- /* Dequantized input size */
- uint32_t sz_d;
-
- /* Quantized input size */
- uint32_t sz_q;
- } input[MRVL_ML_NUM_INPUT_OUTPUT];
-
- /* Output address and size */
- struct {
- /* Number of dimensions in shape */
- uint32_t nb_dims;
-
- /* Shape of input */
- uint32_t shape[4];
-
- /* Number of elements */
- uint32_t nb_elements;
-
- /* Dequantize output size */
- uint32_t sz_d;
-
- /* Quantized output size */
- uint32_t sz_q;
- } output[MRVL_ML_NUM_INPUT_OUTPUT];
-
- /* Total size of quantized input */
- uint32_t total_input_sz_q;
-
- /* Total size of dequantized input */
- uint32_t total_input_sz_d;
-
- /* Total size of quantized output */
- uint32_t total_output_sz_q;
-
- /* Total size of dequantized output */
- uint32_t total_output_sz_d;
};
/* Model fast-path stats */
-struct cn10k_ml_model_stats {
+struct cn10k_ml_layer_stats {
/* Total hardware latency, sum of all inferences */
uint64_t hw_latency_tot;
@@ -488,59 +433,38 @@ struct cn10k_ml_model_stats {
uint64_t fw_reset_count;
};
-/* Model Object */
-struct cn10k_ml_model {
- /* Device reference */
- struct cnxk_ml_dev *mldev;
-
- /* Name */
- char name[RTE_ML_STR_MAX];
-
- /* ID */
- uint16_t model_id;
-
- /* Batch size */
- uint32_t batch_size;
-
- /* Metadata */
+struct cn10k_ml_layer_data {
+ /* Model / Layer: metadata */
struct cn10k_ml_model_metadata metadata;
- /* Address structure */
- struct cn10k_ml_model_addr addr;
+ /* Layer: address structure */
+ struct cn10k_ml_layer_addr addr;
- /* Tile and memory information object */
- struct cn10k_ml_ocm_model_map model_mem_map;
+ /* Layer: Tile and memory information object */
+ struct cn10k_ml_ocm_layer_map ocm_map;
- /* Internal model information structure
- * Size of the buffer = sizeof(struct rte_ml_model_info)
- * + num_inputs * sizeof(struct rte_ml_io_info)
- * + num_outputs * sizeof(struct rte_ml_io_info).
- * Structures would be arranged in the same order in the buffer.
- */
- uint8_t *info;
-
- /* Spinlock, used to update model state */
- plt_spinlock_t lock;
-
- /* State */
- enum cn10k_ml_model_state state;
-
- /* Slow-path operations request pointer */
+ /* Layer: Slow-path operations request pointer */
struct cn10k_ml_req *req;
- /* Stats for burst ops */
- struct cn10k_ml_model_stats *burst_stats;
+ /* Layer: Stats for burst ops */
+ struct cn10k_ml_layer_stats *burst_stats;
- /* Stats for sync ops */
- struct cn10k_ml_model_stats *sync_stats;
+ /* Layer: Stats for sync ops */
+ struct cn10k_ml_layer_stats *sync_stats;
+};
+
+struct cn10k_ml_model_data {
+ /* Model / Layer: metadata */
+ struct cn10k_ml_model_metadata metadata;
};
int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size);
void cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata);
-void cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer,
+void cn10k_ml_layer_addr_update(struct cnxk_ml_layer *layer, uint8_t *buffer,
uint8_t *base_dma_addr);
+void cn10k_ml_layer_info_update(struct cnxk_ml_layer *layer);
int cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *cn10k_mldev, uint16_t model_id,
uint8_t *buffer, uint16_t *wb_pages, uint16_t *scratch_pages);
-void cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model);
+void cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cnxk_ml_model *model);
#endif /* _CN10K_ML_MODEL_H_ */
@@ -6,10 +6,10 @@
#include <roc_api.h>
-#include "cn10k_ml_model.h"
#include "cn10k_ml_ocm.h"
#include "cnxk_ml_dev.h"
+#include "cnxk_ml_model.h"
/* OCM macros */
#define BYTE_LEN 8
@@ -333,12 +333,14 @@ cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t w
}
void
-cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint64_t tilemask,
- int wb_page_start, uint16_t wb_pages, uint16_t scratch_pages)
+cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t layer_id,
+ uint64_t tilemask, int wb_page_start, uint16_t wb_pages,
+ uint16_t scratch_pages)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
+ struct cnxk_ml_layer *layer;
struct cn10k_ml_ocm *ocm;
int scratch_page_start;
@@ -353,6 +355,7 @@ cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint64_t t
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
ocm = &cn10k_mldev->ocm;
model = dev->data->models[model_id];
+ layer = &model->layer[layer_id];
/* Get first set bit, tile_start */
tile_start = 0;
@@ -382,8 +385,8 @@ cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint64_t t
PLT_MAX(ocm->tile_ocm_info[tile_id].last_wb_page, wb_page_end);
}
- model->addr.tile_start = tile_start;
- model->addr.tile_end = tile_end;
+ layer->glow.addr.tile_start = tile_start;
+ layer->glow.addr.tile_end = tile_end;
plt_ml_dbg("model_id = %u, tilemask = 0x%016lx", model_id, tilemask);
plt_ml_dbg("model_id = %u, wb_page_start = %d, wb_page_end = %d", model_id, wb_page_start,
@@ -393,12 +396,14 @@ cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint64_t t
}
void
-cn10k_ml_ocm_free_pages(struct rte_ml_dev *dev, uint16_t model_id)
+cn10k_ml_ocm_free_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t layer_id)
{
- struct cn10k_ml_model *local_model;
+ struct cnxk_ml_model *local_model;
+ struct cnxk_ml_layer *local_layer;
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
+ struct cnxk_ml_layer *layer;
struct cn10k_ml_ocm *ocm;
int scratch_resize_pages;
@@ -409,16 +414,19 @@ cn10k_ml_ocm_free_pages(struct rte_ml_dev *dev, uint16_t model_id)
int tile_id;
int page_id;
uint16_t i;
+ uint16_t j;
cnxk_mldev = dev->data->dev_private;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
ocm = &cn10k_mldev->ocm;
model = dev->data->models[model_id];
+ layer = &model->layer[layer_id];
/* Update OCM info for WB memory */
- wb_page_start = model->model_mem_map.wb_page_start;
- wb_page_end = wb_page_start + model->model_mem_map.wb_pages - 1;
- for (tile_id = model->addr.tile_start; tile_id <= model->addr.tile_end; tile_id++) {
+ wb_page_start = layer->glow.ocm_map.wb_page_start;
+ wb_page_end = wb_page_start + layer->glow.ocm_map.wb_pages - 1;
+ for (tile_id = layer->glow.addr.tile_start; tile_id <= layer->glow.addr.tile_end;
+ tile_id++) {
for (page_id = wb_page_start; page_id <= wb_page_end; page_id++) {
CLEAR_BIT(ocm->tile_ocm_info[tile_id].ocm_mask[page_id / OCM_MAP_WORD_SIZE],
page_id % OCM_MAP_WORD_SIZE);
@@ -432,11 +440,19 @@ cn10k_ml_ocm_free_pages(struct rte_ml_dev *dev, uint16_t model_id)
scratch_resize_pages = 0;
for (i = 0; i < dev->data->nb_models; i++) {
local_model = dev->data->models[i];
- if ((i != model_id) && (local_model != NULL)) {
- if (IS_BIT_SET(local_model->model_mem_map.tilemask, tile_id))
- scratch_resize_pages = PLT_MAX(
- (int)local_model->model_mem_map.scratch_pages,
- scratch_resize_pages);
+ if (local_model == NULL)
+ continue;
+
+ for (j = 0; j < local_model->nb_layers; j++) {
+ local_layer = &local_model->layer[j];
+ if (local_layer != layer &&
+ local_layer->glow.ocm_map.ocm_reserved) {
+ if (IS_BIT_SET(local_layer->glow.ocm_map.tilemask, tile_id))
+ scratch_resize_pages =
+ PLT_MAX((int)local_layer->glow.ocm_map
+ .scratch_pages,
+ scratch_resize_pages);
+ }
}
}
@@ -27,7 +27,7 @@ struct cn10k_ml_ocm_tile_info {
};
/* Model OCM map structure */
-struct cn10k_ml_ocm_model_map {
+struct cn10k_ml_ocm_layer_map {
/* Status of OCM reservation */
bool ocm_reserved;
@@ -77,9 +77,10 @@ struct cn10k_ml_ocm {
int cn10k_ml_ocm_tilecount(uint64_t tilemask, int *start, int *end);
int cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t wb_pages,
uint16_t scratch_pages, uint64_t *tilemask);
-void cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint64_t tilemask,
- int wb_page_start, uint16_t wb_pages, uint16_t scratch_pages);
-void cn10k_ml_ocm_free_pages(struct rte_ml_dev *dev, uint16_t model_id);
+void cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t layer_id,
+ uint64_t tilemask, int wb_page_start, uint16_t wb_pages,
+ uint16_t scratch_pages);
+void cn10k_ml_ocm_free_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t layer_id);
void cn10k_ml_ocm_print(struct rte_ml_dev *dev, FILE *fp);
#endif /* _CN10K_ML_OCM_H_ */
@@ -7,10 +7,10 @@
#include <mldev_utils.h>
-#include "cn10k_ml_model.h"
#include "cn10k_ml_ops.h"
#include "cnxk_ml_dev.h"
+#include "cnxk_ml_model.h"
/* ML model macros */
#define CN10K_ML_MODEL_MEMZONE_NAME "ml_cn10k_model_mz"
@@ -202,7 +202,7 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct cn10k_ml_ocm *ocm;
char str[STR_LEN];
uint8_t i;
@@ -215,77 +215,80 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
/* Print debug info */
print_line(fp, LINE_LEN);
- fprintf(fp, " Model Information (%s)\n", model->metadata.model.name);
+ fprintf(fp, " Model Information (%s)\n", model->glow.metadata.model.name);
print_line(fp, LINE_LEN);
- fprintf(fp, "%*s : %s\n", FIELD_LEN, "name", model->metadata.model.name);
- fprintf(fp, "%*s : %u.%u.%u.%u\n", FIELD_LEN, "version", model->metadata.model.version[0],
- model->metadata.model.version[1], model->metadata.model.version[2],
- model->metadata.model.version[3]);
+ fprintf(fp, "%*s : %s\n", FIELD_LEN, "name", model->glow.metadata.model.name);
+ fprintf(fp, "%*s : %u.%u.%u.%u\n", FIELD_LEN, "version",
+ model->glow.metadata.model.version[0], model->glow.metadata.model.version[1],
+ model->glow.metadata.model.version[2], model->glow.metadata.model.version[3]);
if (strlen(model->name) != 0)
fprintf(fp, "%*s : %s\n", FIELD_LEN, "debug_name", model->name);
fprintf(fp, "%*s : 0x%016lx\n", FIELD_LEN, "model", PLT_U64_CAST(model));
fprintf(fp, "%*s : %u\n", FIELD_LEN, "model_id", model->model_id);
- fprintf(fp, "%*s : %u\n", FIELD_LEN, "batch_size", model->metadata.model.batch_size);
- fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_layers", model->metadata.model.num_layers);
+ fprintf(fp, "%*s : %u\n", FIELD_LEN, "batch_size", model->glow.metadata.model.batch_size);
+ fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_layers", model->glow.metadata.model.num_layers);
/* Print model state */
- if (model->state == ML_CN10K_MODEL_STATE_LOADED)
+ if (model->state == ML_CNXK_MODEL_STATE_LOADED)
fprintf(fp, "%*s : %s\n", FIELD_LEN, "state", "loaded");
- if (model->state == ML_CN10K_MODEL_STATE_JOB_ACTIVE)
+ if (model->state == ML_CNXK_MODEL_STATE_JOB_ACTIVE)
fprintf(fp, "%*s : %s\n", FIELD_LEN, "state", "job_active");
- if (model->state == ML_CN10K_MODEL_STATE_STARTED)
+ if (model->state == ML_CNXK_MODEL_STATE_STARTED)
fprintf(fp, "%*s : %s\n", FIELD_LEN, "state", "started");
/* Print OCM status */
fprintf(fp, "%*s : %" PRIu64 " bytes\n", FIELD_LEN, "wb_size",
- model->metadata.model.ocm_wb_range_end - model->metadata.model.ocm_wb_range_start +
- 1);
- fprintf(fp, "%*s : %u\n", FIELD_LEN, "wb_pages", model->model_mem_map.wb_pages);
+ model->glow.metadata.model.ocm_wb_range_end -
+ model->glow.metadata.model.ocm_wb_range_start + 1);
+ fprintf(fp, "%*s : %u\n", FIELD_LEN, "wb_pages", model->layer[0].glow.ocm_map.wb_pages);
fprintf(fp, "%*s : %" PRIu64 " bytes\n", FIELD_LEN, "scratch_size",
- ocm->size_per_tile - model->metadata.model.ocm_tmp_range_floor);
- fprintf(fp, "%*s : %u\n", FIELD_LEN, "scratch_pages", model->model_mem_map.scratch_pages);
+ ocm->size_per_tile - model->glow.metadata.model.ocm_tmp_range_floor);
+ fprintf(fp, "%*s : %u\n", FIELD_LEN, "scratch_pages",
+ model->layer[0].glow.ocm_map.scratch_pages);
fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_tiles",
- model->metadata.model.tile_end - model->metadata.model.tile_start + 1);
+ model->glow.metadata.model.tile_end - model->glow.metadata.model.tile_start + 1);
- if (model->state == ML_CN10K_MODEL_STATE_STARTED) {
+ if (model->state == ML_CNXK_MODEL_STATE_STARTED) {
fprintf(fp, "%*s : 0x%0*" PRIx64 "\n", FIELD_LEN, "tilemask",
- ML_CN10K_OCM_NUMTILES / 4, model->model_mem_map.tilemask);
+ ML_CN10K_OCM_NUMTILES / 4, model->layer[0].glow.ocm_map.tilemask);
fprintf(fp, "%*s : 0x%" PRIx64 "\n", FIELD_LEN, "ocm_wb_start",
- model->model_mem_map.wb_page_start * cn10k_mldev->ocm.page_size);
+ model->layer[0].glow.ocm_map.wb_page_start * cn10k_mldev->ocm.page_size);
}
- fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_inputs", model->metadata.model.num_input);
- fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_outputs", model->metadata.model.num_output);
+ fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_inputs", model->glow.metadata.model.num_input);
+ fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_outputs", model->glow.metadata.model.num_output);
fprintf(fp, "\n");
print_line(fp, LINE_LEN);
fprintf(fp, "%8s %16s %12s %18s %12s\n", "input", "input_name", "input_type",
"model_input_type", "quantize");
print_line(fp, LINE_LEN);
- for (i = 0; i < model->metadata.model.num_input; i++) {
+ for (i = 0; i < model->glow.metadata.model.num_input; i++) {
if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
fprintf(fp, "%8u ", i);
- fprintf(fp, "%*s ", 16, model->metadata.input1[i].input_name);
- rte_ml_io_type_to_str(model->metadata.input1[i].input_type, str, STR_LEN);
+ fprintf(fp, "%*s ", 16, model->glow.metadata.input1[i].input_name);
+ rte_ml_io_type_to_str(model->glow.metadata.input1[i].input_type, str,
+ STR_LEN);
fprintf(fp, "%*s ", 12, str);
- rte_ml_io_type_to_str(model->metadata.input1[i].model_input_type, str,
+ rte_ml_io_type_to_str(model->glow.metadata.input1[i].model_input_type, str,
STR_LEN);
fprintf(fp, "%*s ", 18, str);
fprintf(fp, "%*s", 12,
- (model->metadata.input1[i].quantize == 1 ? "Yes" : "No"));
+ (model->glow.metadata.input1[i].quantize == 1 ? "Yes" : "No"));
fprintf(fp, "\n");
} else {
j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
fprintf(fp, "%8u ", i);
- fprintf(fp, "%*s ", 16, model->metadata.input2[j].input_name);
- rte_ml_io_type_to_str(model->metadata.input2[j].input_type, str, STR_LEN);
+ fprintf(fp, "%*s ", 16, model->glow.metadata.input2[j].input_name);
+ rte_ml_io_type_to_str(model->glow.metadata.input2[j].input_type, str,
+ STR_LEN);
fprintf(fp, "%*s ", 12, str);
- rte_ml_io_type_to_str(model->metadata.input2[j].model_input_type, str,
+ rte_ml_io_type_to_str(model->glow.metadata.input2[j].model_input_type, str,
STR_LEN);
fprintf(fp, "%*s ", 18, str);
fprintf(fp, "%*s", 12,
- (model->metadata.input2[j].quantize == 1 ? "Yes" : "No"));
+ (model->glow.metadata.input2[j].quantize == 1 ? "Yes" : "No"));
fprintf(fp, "\n");
}
}
@@ -295,29 +298,31 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
fprintf(fp, "%8s %16s %12s %18s %12s\n", "output", "output_name", "output_type",
"model_output_type", "dequantize");
print_line(fp, LINE_LEN);
- for (i = 0; i < model->metadata.model.num_output; i++) {
+ for (i = 0; i < model->glow.metadata.model.num_output; i++) {
if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
fprintf(fp, "%8u ", i);
- fprintf(fp, "%*s ", 16, model->metadata.output1[i].output_name);
- rte_ml_io_type_to_str(model->metadata.output1[i].output_type, str, STR_LEN);
- fprintf(fp, "%*s ", 12, str);
- rte_ml_io_type_to_str(model->metadata.output1[i].model_output_type, str,
+ fprintf(fp, "%*s ", 16, model->glow.metadata.output1[i].output_name);
+ rte_ml_io_type_to_str(model->glow.metadata.output1[i].output_type, str,
STR_LEN);
+ fprintf(fp, "%*s ", 12, str);
+ rte_ml_io_type_to_str(model->glow.metadata.output1[i].model_output_type,
+ str, STR_LEN);
fprintf(fp, "%*s ", 18, str);
fprintf(fp, "%*s", 12,
- (model->metadata.output1[i].dequantize == 1 ? "Yes" : "No"));
+ (model->glow.metadata.output1[i].dequantize == 1 ? "Yes" : "No"));
fprintf(fp, "\n");
} else {
j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
fprintf(fp, "%8u ", i);
- fprintf(fp, "%*s ", 16, model->metadata.output2[j].output_name);
- rte_ml_io_type_to_str(model->metadata.output2[j].output_type, str, STR_LEN);
- fprintf(fp, "%*s ", 12, str);
- rte_ml_io_type_to_str(model->metadata.output2[j].model_output_type, str,
+ fprintf(fp, "%*s ", 16, model->glow.metadata.output2[j].output_name);
+ rte_ml_io_type_to_str(model->glow.metadata.output2[j].output_type, str,
STR_LEN);
+ fprintf(fp, "%*s ", 12, str);
+ rte_ml_io_type_to_str(model->glow.metadata.output2[j].model_output_type,
+ str, STR_LEN);
fprintf(fp, "%*s ", 18, str);
fprintf(fp, "%*s", 12,
- (model->metadata.output2[j].dequantize == 1 ? "Yes" : "No"));
+ (model->glow.metadata.output2[j].dequantize == 1 ? "Yes" : "No"));
fprintf(fp, "\n");
}
}
@@ -327,14 +332,14 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
}
static void
-cn10k_ml_prep_sp_job_descriptor(struct cn10k_ml_dev *cn10k_mldev, struct cn10k_ml_model *model,
+cn10k_ml_prep_sp_job_descriptor(struct cn10k_ml_dev *cn10k_mldev, struct cnxk_ml_model *model,
struct cn10k_ml_req *req, enum cn10k_ml_job_type job_type)
{
struct cn10k_ml_model_metadata *metadata;
- struct cn10k_ml_model_addr *addr;
+ struct cn10k_ml_layer_addr *addr;
- metadata = &model->metadata;
- addr = &model->addr;
+ metadata = &model->glow.metadata;
+ addr = &model->layer[0].glow.addr;
memset(&req->jd, 0, sizeof(struct cn10k_ml_jd));
req->jd.hdr.jce.w0.u64 = 0;
@@ -345,7 +350,7 @@ cn10k_ml_prep_sp_job_descriptor(struct cn10k_ml_dev *cn10k_mldev, struct cn10k_m
req->jd.hdr.result = roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &req->result);
if (job_type == ML_CN10K_JOB_TYPE_MODEL_START) {
- if (!model->metadata.model.ocm_relocatable)
+ if (!model->glow.metadata.model.ocm_relocatable)
req->jd.hdr.sp_flags = ML_CN10K_SP_FLAGS_OCM_NONRELOCATABLE;
else
req->jd.hdr.sp_flags = 0x0;
@@ -385,7 +390,7 @@ cn10k_ml_prep_sp_job_descriptor(struct cn10k_ml_dev *cn10k_mldev, struct cn10k_m
req->jd.model_start.output.s.ddr_range_end = metadata->model.ddr_output_range_end;
req->extended_args.start.ddr_scratch_base_address = PLT_U64_CAST(
- roc_ml_addr_ap2mlip(&cn10k_mldev->roc, model->addr.scratch_base_addr));
+ roc_ml_addr_ap2mlip(&cn10k_mldev->roc, addr->scratch_base_addr));
req->extended_args.start.ddr_scratch_range_start =
metadata->model.ddr_scratch_range_start;
req->extended_args.start.ddr_scratch_range_end =
@@ -445,7 +450,7 @@ cn10k_ml_xstats_init(struct rte_ml_dev *dev)
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
/* Allocate memory for xstats entries. Don't allocate during reconfigure */
- nb_stats = RTE_DIM(device_stats) + ML_CN10K_MAX_MODELS * RTE_DIM(model_stats);
+ nb_stats = RTE_DIM(device_stats) + ML_CNXK_MAX_MODELS * RTE_DIM(model_stats);
if (cn10k_mldev->xstats.entries == NULL)
cn10k_mldev->xstats.entries = rte_zmalloc(
"cn10k_ml_xstats", sizeof(struct cn10k_ml_xstats_entry) * nb_stats,
@@ -472,7 +477,7 @@ cn10k_ml_xstats_init(struct rte_ml_dev *dev)
cn10k_mldev->xstats.count_mode_device = stat_id;
/* Initialize model xstats */
- for (model = 0; model < ML_CN10K_MAX_MODELS; model++) {
+ for (model = 0; model < ML_CNXK_MAX_MODELS; model++) {
cn10k_mldev->xstats.offset_for_model[model] = stat_id;
for (i = 0; i < RTE_DIM(model_stats); i++) {
@@ -521,7 +526,7 @@ cn10k_ml_xstats_model_name_update(struct rte_ml_dev *dev, uint16_t model_id)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
uint16_t rclk_freq;
uint16_t sclk_freq;
uint16_t stat_id;
@@ -543,7 +548,7 @@ cn10k_ml_xstats_model_name_update(struct rte_ml_dev *dev, uint16_t model_id)
for (i = 0; i < RTE_DIM(model_stats); i++) {
snprintf(cn10k_mldev->xstats.entries[stat_id].map.name,
sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s",
- model->metadata.model.name, model_stats[i].name, suffix);
+ model->layer[0].glow.metadata.model.name, model_stats[i].name, suffix);
stat_id++;
}
}
@@ -576,9 +581,9 @@ cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused,
do { \
value = 0; \
for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \
- value += model->burst_stats[qp_id].str##_latency_tot; \
- count += model->burst_stats[qp_id].dequeued_count - \
- model->burst_stats[qp_id].str##_reset_count; \
+ value += model->layer[0].glow.burst_stats[qp_id].str##_latency_tot; \
+ count += model->layer[0].glow.burst_stats[qp_id].dequeued_count - \
+ model->layer[0].glow.burst_stats[qp_id].str##_reset_count; \
} \
if (count != 0) \
value = value / count; \
@@ -588,9 +593,10 @@ cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused,
do { \
value = UINT64_MAX; \
for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \
- value = PLT_MIN(value, model->burst_stats[qp_id].str##_latency_min); \
- count += model->burst_stats[qp_id].dequeued_count - \
- model->burst_stats[qp_id].str##_reset_count; \
+ value = PLT_MIN( \
+ value, model->layer[0].glow.burst_stats[qp_id].str##_latency_min); \
+ count += model->layer[0].glow.burst_stats[qp_id].dequeued_count - \
+ model->layer[0].glow.burst_stats[qp_id].str##_reset_count; \
} \
if (count == 0) \
value = 0; \
@@ -600,9 +606,10 @@ cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused,
do { \
value = 0; \
for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \
- value = PLT_MAX(value, model->burst_stats[qp_id].str##_latency_max); \
- count += model->burst_stats[qp_id].dequeued_count - \
- model->burst_stats[qp_id].str##_reset_count; \
+ value = PLT_MAX( \
+ value, model->layer[0].glow.burst_stats[qp_id].str##_latency_max); \
+ count += model->layer[0].glow.burst_stats[qp_id].dequeued_count - \
+ model->layer[0].glow.burst_stats[qp_id].str##_reset_count; \
} \
if (count == 0) \
value = 0; \
@@ -611,7 +618,7 @@ cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused,
static uint64_t
cn10k_ml_model_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx, enum cn10k_ml_xstats_type type)
{
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
uint16_t rclk_freq; /* MHz */
uint16_t sclk_freq; /* MHz */
uint64_t count = 0;
@@ -692,28 +699,28 @@ cn10k_ml_device_xstats_reset(struct rte_ml_dev *dev, const uint16_t stat_ids[],
#define ML_AVG_RESET_FOREACH_QP(dev, model, qp_id, str) \
do { \
for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \
- model->burst_stats[qp_id].str##_latency_tot = 0; \
- model->burst_stats[qp_id].str##_reset_count = \
- model->burst_stats[qp_id].dequeued_count; \
+ model->layer[0].glow.burst_stats[qp_id].str##_latency_tot = 0; \
+ model->layer[0].glow.burst_stats[qp_id].str##_reset_count = \
+ model->layer[0].glow.burst_stats[qp_id].dequeued_count; \
} \
} while (0)
#define ML_MIN_RESET_FOREACH_QP(dev, model, qp_id, str) \
do { \
for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) \
- model->burst_stats[qp_id].str##_latency_min = UINT64_MAX; \
+ model->layer[0].glow.burst_stats[qp_id].str##_latency_min = UINT64_MAX; \
} while (0)
#define ML_MAX_RESET_FOREACH_QP(dev, model, qp_id, str) \
do { \
for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) \
- model->burst_stats[qp_id].str##_latency_max = 0; \
+ model->layer[0].glow.burst_stats[qp_id].str##_latency_max = 0; \
} while (0)
static void
cn10k_ml_reset_model_stat(struct rte_ml_dev *dev, uint16_t model_id, enum cn10k_ml_xstats_type type)
{
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
uint32_t qp_id;
model = dev->data->models[model_id];
@@ -749,7 +756,7 @@ cn10k_ml_model_xstats_reset(struct rte_ml_dev *dev, int32_t model_id, const uint
struct cn10k_ml_xstats_entry *xs;
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
int32_t lcl_model_id = 0;
uint16_t start_id;
uint16_t end_id;
@@ -758,7 +765,7 @@ cn10k_ml_model_xstats_reset(struct rte_ml_dev *dev, int32_t model_id, const uint
cnxk_mldev = dev->data->dev_private;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
- for (i = 0; i < ML_CN10K_MAX_MODELS; i++) {
+ for (i = 0; i < ML_CNXK_MAX_MODELS; i++) {
if (model_id == -1) {
model = dev->data->models[i];
if (model == NULL) /* Skip inactive models */
@@ -803,7 +810,7 @@ static int
cn10k_ml_cache_model_data(struct rte_ml_dev *dev, uint16_t model_id)
{
struct rte_ml_model_info *info;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct rte_ml_buff_seg seg[2];
struct rte_ml_buff_seg *inp;
struct rte_ml_buff_seg *out;
@@ -854,7 +861,7 @@ cn10k_ml_cache_model_data(struct rte_ml_dev *dev, uint16_t model_id)
op.input = &inp;
op.output = &out;
- memset(model->req, 0, sizeof(struct cn10k_ml_req));
+ memset(model->layer[0].glow.req, 0, sizeof(struct cn10k_ml_req));
ret = cn10k_ml_inference_sync(dev, &op);
plt_memzone_free(mz);
@@ -875,7 +882,7 @@ cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)
memset(dev_info, 0, sizeof(struct rte_ml_dev_info));
dev_info->driver_name = dev->device->driver->name;
- dev_info->max_models = ML_CN10K_MAX_MODELS;
+ dev_info->max_models = ML_CNXK_MAX_MODELS;
if (cn10k_mldev->hw_queue_lock)
dev_info->max_queue_pairs = ML_CN10K_MAX_QP_PER_DEVICE_SL;
else
@@ -895,7 +902,7 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
struct rte_ml_dev_info dev_info;
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct cn10k_ml_ocm *ocm;
struct cn10k_ml_qp *qp;
uint16_t model_id;
@@ -1001,11 +1008,11 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
for (model_id = 0; model_id < dev->data->nb_models; model_id++) {
model = dev->data->models[model_id];
if (model != NULL) {
- if (model->state == ML_CN10K_MODEL_STATE_STARTED) {
+ if (model->state == ML_CNXK_MODEL_STATE_STARTED) {
if (cn10k_ml_model_stop(dev, model_id) != 0)
plt_err("Could not stop model %u", model_id);
}
- if (model->state == ML_CN10K_MODEL_STATE_LOADED) {
+ if (model->state == ML_CNXK_MODEL_STATE_LOADED) {
if (cn10k_ml_model_unload(dev, model_id) != 0)
plt_err("Could not unload model %u", model_id);
}
@@ -1093,7 +1100,7 @@ cn10k_ml_dev_close(struct rte_ml_dev *dev)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct cn10k_ml_qp *qp;
uint16_t model_id;
uint16_t qp_id;
@@ -1111,11 +1118,11 @@ cn10k_ml_dev_close(struct rte_ml_dev *dev)
for (model_id = 0; model_id < dev->data->nb_models; model_id++) {
model = dev->data->models[model_id];
if (model != NULL) {
- if (model->state == ML_CN10K_MODEL_STATE_STARTED) {
+ if (model->state == ML_CNXK_MODEL_STATE_STARTED) {
if (cn10k_ml_model_stop(dev, model_id) != 0)
plt_err("Could not stop model %u", model_id);
}
- if (model->state == ML_CN10K_MODEL_STATE_LOADED) {
+ if (model->state == ML_CNXK_MODEL_STATE_LOADED) {
if (cn10k_ml_model_unload(dev, model_id) != 0)
plt_err("Could not unload model %u", model_id);
}
@@ -1294,7 +1301,7 @@ cn10k_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mod
xstats_mode_count = cn10k_mldev->xstats.count_mode_device;
break;
case RTE_ML_DEV_XSTATS_MODEL:
- if (model_id >= ML_CN10K_MAX_MODELS)
+ if (model_id >= ML_CNXK_MAX_MODELS)
break;
xstats_mode_count = cn10k_mldev->xstats.count_per_model[model_id];
break;
@@ -1386,7 +1393,7 @@ cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode
xstats_mode_count = cn10k_mldev->xstats.count_mode_device;
break;
case RTE_ML_DEV_XSTATS_MODEL:
- if (model_id >= ML_CN10K_MAX_MODELS)
+ if (model_id >= ML_CNXK_MAX_MODELS)
return -EINVAL;
xstats_mode_count = cn10k_mldev->xstats.count_per_model[model_id];
break;
@@ -1447,7 +1454,7 @@ cn10k_ml_dev_dump(struct rte_ml_dev *dev, FILE *fp)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct cn10k_ml_fw *fw;
uint32_t head_loc;
@@ -1588,7 +1595,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
{
struct cn10k_ml_model_metadata *metadata;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
char str[RTE_MEMZONE_NAMESIZE];
const struct plt_memzone *mz;
@@ -1643,9 +1650,9 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
metadata->model.num_input * sizeof(struct rte_ml_io_info) +
metadata->model.num_output * sizeof(struct rte_ml_io_info);
model_info_size = PLT_ALIGN_CEIL(model_info_size, ML_CN10K_ALIGN_SIZE);
- model_stats_size = (dev->data->nb_queue_pairs + 1) * sizeof(struct cn10k_ml_model_stats);
+ model_stats_size = (dev->data->nb_queue_pairs + 1) * sizeof(struct cn10k_ml_layer_stats);
- mz_size = PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE) +
+ mz_size = PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_model), ML_CN10K_ALIGN_SIZE) +
2 * model_data_size + model_scratch_size + model_info_size +
PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE) +
model_stats_size;
@@ -1659,62 +1666,85 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
}
model = mz->addr;
- model->mldev = cnxk_mldev;
+ model->cnxk_mldev = cnxk_mldev;
model->model_id = idx;
+ dev->data->models[idx] = model;
- rte_memcpy(&model->metadata, params->addr, sizeof(struct cn10k_ml_model_metadata));
- cn10k_ml_model_metadata_update(&model->metadata);
+ rte_memcpy(&model->glow.metadata, params->addr, sizeof(struct cn10k_ml_model_metadata));
+ cn10k_ml_model_metadata_update(&model->glow.metadata);
+
+ /* Set model name */
+ rte_memcpy(model->name, (char *)model->glow.metadata.model.name, 64);
/* Enable support for batch_size of 256 */
- if (model->metadata.model.batch_size == 0)
+ if (model->glow.metadata.model.batch_size == 0)
model->batch_size = 256;
else
- model->batch_size = model->metadata.model.batch_size;
+ model->batch_size = model->glow.metadata.model.batch_size;
+
+ /* Since the number of layers that the driver would be handling for glow models is
+ * always 1. consider the entire model as a model with single layer. This would
+ * ignore the num_layers from metadata.
+ */
+ model->nb_layers = 1;
+
+ /* Copy metadata to internal buffer */
+ rte_memcpy(&model->layer[0].glow.metadata, params->addr,
+ sizeof(struct cn10k_ml_model_metadata));
+ cn10k_ml_model_metadata_update(&model->layer[0].glow.metadata);
+ model->layer[0].model = model;
/* Set DMA base address */
base_dma_addr = PLT_PTR_ADD(
- mz->addr, PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE));
- cn10k_ml_model_addr_update(model, params->addr, base_dma_addr);
- model->addr.scratch_base_addr = PLT_PTR_ADD(base_dma_addr, 2 * model_data_size);
+ mz->addr, PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_model), ML_CN10K_ALIGN_SIZE));
+ cn10k_ml_layer_addr_update(&model->layer[0], params->addr, base_dma_addr);
+ model->layer[0].glow.addr.scratch_base_addr =
+ PLT_PTR_ADD(base_dma_addr, 2 * model_data_size);
/* Copy data from load to run. run address to be used by MLIP */
- rte_memcpy(model->addr.base_dma_addr_run, model->addr.base_dma_addr_load, model_data_size);
+ rte_memcpy(model->layer[0].glow.addr.base_dma_addr_run,
+ model->layer[0].glow.addr.base_dma_addr_load, model_data_size);
+
+ /* Update internal I/O data structure */
+ cn10k_ml_layer_info_update(&model->layer[0]);
/* Initialize model_mem_map */
- memset(&model->model_mem_map, 0, sizeof(struct cn10k_ml_ocm_model_map));
- model->model_mem_map.ocm_reserved = false;
- model->model_mem_map.tilemask = 0;
- model->model_mem_map.wb_page_start = -1;
- model->model_mem_map.wb_pages = wb_pages;
- model->model_mem_map.scratch_pages = scratch_pages;
+ memset(&model->layer[0].glow.ocm_map, 0, sizeof(struct cn10k_ml_ocm_layer_map));
+ model->layer[0].glow.ocm_map.ocm_reserved = false;
+ model->layer[0].glow.ocm_map.tilemask = 0;
+ model->layer[0].glow.ocm_map.wb_page_start = -1;
+ model->layer[0].glow.ocm_map.wb_pages = wb_pages;
+ model->layer[0].glow.ocm_map.scratch_pages = scratch_pages;
/* Set model info */
- model->info = PLT_PTR_ADD(model->addr.scratch_base_addr, model_scratch_size);
+ model->info = PLT_PTR_ADD(model->layer[0].glow.addr.scratch_base_addr, model_scratch_size);
cn10k_ml_model_info_set(dev, model);
/* Set slow-path request address and state */
- model->req = PLT_PTR_ADD(model->info, model_info_size);
+ model->layer[0].glow.req = PLT_PTR_ADD(model->info, model_info_size);
/* Reset burst and sync stats */
- model->burst_stats = PLT_PTR_ADD(
- model->req, PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE));
+ model->layer[0].glow.burst_stats =
+ PLT_PTR_ADD(model->layer[0].glow.req,
+ PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE));
for (qp_id = 0; qp_id < dev->data->nb_queue_pairs + 1; qp_id++) {
- model->burst_stats[qp_id].hw_latency_tot = 0;
- model->burst_stats[qp_id].hw_latency_min = UINT64_MAX;
- model->burst_stats[qp_id].hw_latency_max = 0;
- model->burst_stats[qp_id].fw_latency_tot = 0;
- model->burst_stats[qp_id].fw_latency_min = UINT64_MAX;
- model->burst_stats[qp_id].fw_latency_max = 0;
- model->burst_stats[qp_id].hw_reset_count = 0;
- model->burst_stats[qp_id].fw_reset_count = 0;
- model->burst_stats[qp_id].dequeued_count = 0;
- }
- model->sync_stats =
- PLT_PTR_ADD(model->burst_stats,
- dev->data->nb_queue_pairs * sizeof(struct cn10k_ml_model_stats));
+ model->layer[0].glow.burst_stats[qp_id].hw_latency_tot = 0;
+ model->layer[0].glow.burst_stats[qp_id].hw_latency_min = UINT64_MAX;
+ model->layer[0].glow.burst_stats[qp_id].hw_latency_max = 0;
+ model->layer[0].glow.burst_stats[qp_id].fw_latency_tot = 0;
+ model->layer[0].glow.burst_stats[qp_id].fw_latency_min = UINT64_MAX;
+ model->layer[0].glow.burst_stats[qp_id].fw_latency_max = 0;
+ model->layer[0].glow.burst_stats[qp_id].hw_reset_count = 0;
+ model->layer[0].glow.burst_stats[qp_id].fw_reset_count = 0;
+ model->layer[0].glow.burst_stats[qp_id].dequeued_count = 0;
+ }
+
+ model->layer[0].glow.sync_stats =
+ PLT_PTR_ADD(model->layer[0].glow.burst_stats,
+ dev->data->nb_queue_pairs * sizeof(struct cn10k_ml_layer_stats));
plt_spinlock_init(&model->lock);
- model->state = ML_CN10K_MODEL_STATE_LOADED;
+ model->state = ML_CNXK_MODEL_STATE_LOADED;
dev->data->models[idx] = model;
cnxk_mldev->nb_models_loaded++;
@@ -1730,7 +1760,7 @@ int
cn10k_ml_model_unload(struct rte_ml_dev *dev, uint16_t model_id)
{
char str[RTE_MEMZONE_NAMESIZE];
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct cnxk_ml_dev *cnxk_mldev;
cnxk_mldev = dev->data->dev_private;
@@ -1741,7 +1771,7 @@ cn10k_ml_model_unload(struct rte_ml_dev *dev, uint16_t model_id)
return -EINVAL;
}
- if (model->state != ML_CN10K_MODEL_STATE_LOADED) {
+ if (model->state != ML_CNXK_MODEL_STATE_LOADED) {
plt_err("Cannot unload. Model in use.");
return -EBUSY;
}
@@ -1758,7 +1788,7 @@ cn10k_ml_model_start(struct rte_ml_dev *dev, uint16_t model_id)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct cn10k_ml_ocm *ocm;
struct cn10k_ml_req *req;
@@ -1783,7 +1813,7 @@ cn10k_ml_model_start(struct rte_ml_dev *dev, uint16_t model_id)
}
/* Prepare JD */
- req = model->req;
+ req = model->layer[0].glow.req;
cn10k_ml_prep_sp_job_descriptor(cn10k_mldev, model, req, ML_CN10K_JOB_TYPE_MODEL_START);
req->result.error_code.u64 = 0x0;
req->result.user_ptr = NULL;
@@ -1791,63 +1821,66 @@ cn10k_ml_model_start(struct rte_ml_dev *dev, uint16_t model_id)
plt_write64(ML_CNXK_POLL_JOB_START, &req->status);
plt_wmb();
- num_tiles = model->metadata.model.tile_end - model->metadata.model.tile_start + 1;
+ num_tiles = model->layer[0].glow.metadata.model.tile_end -
+ model->layer[0].glow.metadata.model.tile_start + 1;
locked = false;
while (!locked) {
if (plt_spinlock_trylock(&model->lock) != 0) {
- if (model->state == ML_CN10K_MODEL_STATE_STARTED) {
+ if (model->state == ML_CNXK_MODEL_STATE_STARTED) {
plt_ml_dbg("Model already started, model = 0x%016lx",
PLT_U64_CAST(model));
plt_spinlock_unlock(&model->lock);
return 1;
}
- if (model->state == ML_CN10K_MODEL_STATE_JOB_ACTIVE) {
+ if (model->state == ML_CNXK_MODEL_STATE_JOB_ACTIVE) {
plt_err("A slow-path job is active for the model = 0x%016lx",
PLT_U64_CAST(model));
plt_spinlock_unlock(&model->lock);
return -EBUSY;
}
- model->state = ML_CN10K_MODEL_STATE_JOB_ACTIVE;
+ model->state = ML_CNXK_MODEL_STATE_JOB_ACTIVE;
plt_spinlock_unlock(&model->lock);
locked = true;
}
}
- while (!model->model_mem_map.ocm_reserved) {
+ while (!model->layer[0].glow.ocm_map.ocm_reserved) {
if (plt_spinlock_trylock(&ocm->lock) != 0) {
wb_page_start = cn10k_ml_ocm_tilemask_find(
- dev, num_tiles, model->model_mem_map.wb_pages,
- model->model_mem_map.scratch_pages, &tilemask);
+ dev, num_tiles, model->layer[0].glow.ocm_map.wb_pages,
+ model->layer[0].glow.ocm_map.scratch_pages, &tilemask);
if (wb_page_start == -1) {
plt_err("Free pages not available on OCM tiles");
plt_err("Failed to start model = 0x%016lx, name = %s",
- PLT_U64_CAST(model), model->metadata.model.name);
+ PLT_U64_CAST(model),
+ model->layer[0].glow.metadata.model.name);
plt_spinlock_unlock(&ocm->lock);
return -ENOMEM;
}
- model->model_mem_map.tilemask = tilemask;
- model->model_mem_map.wb_page_start = wb_page_start;
+ model->layer[0].glow.ocm_map.tilemask = tilemask;
+ model->layer[0].glow.ocm_map.wb_page_start = wb_page_start;
- cn10k_ml_ocm_reserve_pages(
- dev, model->model_id, model->model_mem_map.tilemask,
- model->model_mem_map.wb_page_start, model->model_mem_map.wb_pages,
- model->model_mem_map.scratch_pages);
- model->model_mem_map.ocm_reserved = true;
+ cn10k_ml_ocm_reserve_pages(dev, model->model_id, 0,
+ model->layer[0].glow.ocm_map.tilemask,
+ model->layer[0].glow.ocm_map.wb_page_start,
+ model->layer[0].glow.ocm_map.wb_pages,
+ model->layer[0].glow.ocm_map.scratch_pages);
+ model->layer[0].glow.ocm_map.ocm_reserved = true;
plt_spinlock_unlock(&ocm->lock);
}
}
/* Update JD */
- cn10k_ml_ocm_tilecount(model->model_mem_map.tilemask, &tile_start, &tile_end);
+ cn10k_ml_ocm_tilecount(model->layer[0].glow.ocm_map.tilemask, &tile_start, &tile_end);
req->jd.model_start.tilemask = GENMASK_ULL(tile_end, tile_start);
req->jd.model_start.ocm_wb_base_address =
- model->model_mem_map.wb_page_start * ocm->page_size;
+ model->layer[0].glow.ocm_map.wb_page_start * ocm->page_size;
job_enqueued = false;
job_dequeued = false;
@@ -1880,10 +1913,10 @@ cn10k_ml_model_start(struct rte_ml_dev *dev, uint16_t model_id)
while (!locked) {
if (plt_spinlock_trylock(&model->lock) != 0) {
if (ret == 0) {
- model->state = ML_CN10K_MODEL_STATE_STARTED;
+ model->state = ML_CNXK_MODEL_STATE_STARTED;
cnxk_mldev->nb_models_started++;
} else {
- model->state = ML_CN10K_MODEL_STATE_UNKNOWN;
+ model->state = ML_CNXK_MODEL_STATE_UNKNOWN;
}
plt_spinlock_unlock(&model->lock);
@@ -1891,12 +1924,12 @@ cn10k_ml_model_start(struct rte_ml_dev *dev, uint16_t model_id)
}
}
- if (model->state == ML_CN10K_MODEL_STATE_UNKNOWN) {
- while (model->model_mem_map.ocm_reserved) {
+ if (model->state == ML_CNXK_MODEL_STATE_UNKNOWN) {
+ while (model->layer[0].glow.ocm_map.ocm_reserved) {
if (plt_spinlock_trylock(&ocm->lock) != 0) {
- cn10k_ml_ocm_free_pages(dev, model->model_id);
- model->model_mem_map.ocm_reserved = false;
- model->model_mem_map.tilemask = 0x0;
+ cn10k_ml_ocm_free_pages(dev, model->model_id, 0);
+ model->layer[0].glow.ocm_map.ocm_reserved = false;
+ model->layer[0].glow.ocm_map.tilemask = 0x0;
plt_spinlock_unlock(&ocm->lock);
}
}
@@ -1917,7 +1950,7 @@ cn10k_ml_model_stop(struct rte_ml_dev *dev, uint16_t model_id)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct cn10k_ml_ocm *ocm;
struct cn10k_ml_req *req;
@@ -1937,7 +1970,7 @@ cn10k_ml_model_stop(struct rte_ml_dev *dev, uint16_t model_id)
}
/* Prepare JD */
- req = model->req;
+ req = model->layer[0].glow.req;
cn10k_ml_prep_sp_job_descriptor(cn10k_mldev, model, req, ML_CN10K_JOB_TYPE_MODEL_STOP);
req->result.error_code.u64 = 0x0;
req->result.user_ptr = NULL;
@@ -1948,31 +1981,31 @@ cn10k_ml_model_stop(struct rte_ml_dev *dev, uint16_t model_id)
locked = false;
while (!locked) {
if (plt_spinlock_trylock(&model->lock) != 0) {
- if (model->state == ML_CN10K_MODEL_STATE_LOADED) {
+ if (model->state == ML_CNXK_MODEL_STATE_LOADED) {
plt_ml_dbg("Model not started, model = 0x%016lx",
PLT_U64_CAST(model));
plt_spinlock_unlock(&model->lock);
return 1;
}
- if (model->state == ML_CN10K_MODEL_STATE_JOB_ACTIVE) {
+ if (model->state == ML_CNXK_MODEL_STATE_JOB_ACTIVE) {
plt_err("A slow-path job is active for the model = 0x%016lx",
PLT_U64_CAST(model));
plt_spinlock_unlock(&model->lock);
return -EBUSY;
}
- model->state = ML_CN10K_MODEL_STATE_JOB_ACTIVE;
+ model->state = ML_CNXK_MODEL_STATE_JOB_ACTIVE;
plt_spinlock_unlock(&model->lock);
locked = true;
}
}
- while (model->model_mem_map.ocm_reserved) {
+ while (model->layer[0].glow.ocm_map.ocm_reserved) {
if (plt_spinlock_trylock(&ocm->lock) != 0) {
- cn10k_ml_ocm_free_pages(dev, model->model_id);
- model->model_mem_map.ocm_reserved = false;
- model->model_mem_map.tilemask = 0x0;
+ cn10k_ml_ocm_free_pages(dev, model->model_id, 0);
+ model->layer[0].glow.ocm_map.ocm_reserved = false;
+ model->layer[0].glow.ocm_map.tilemask = 0x0;
plt_spinlock_unlock(&ocm->lock);
}
}
@@ -2008,7 +2041,7 @@ cn10k_ml_model_stop(struct rte_ml_dev *dev, uint16_t model_id)
while (!locked) {
if (plt_spinlock_trylock(&model->lock) != 0) {
cnxk_mldev->nb_models_stopped++;
- model->state = ML_CN10K_MODEL_STATE_LOADED;
+ model->state = ML_CNXK_MODEL_STATE_LOADED;
plt_spinlock_unlock(&model->lock);
locked = true;
}
@@ -2021,7 +2054,7 @@ static int
cn10k_ml_model_info_get(struct rte_ml_dev *dev, uint16_t model_id,
struct rte_ml_model_info *model_info)
{
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
model = dev->data->models[model_id];
@@ -2040,7 +2073,7 @@ cn10k_ml_model_info_get(struct rte_ml_dev *dev, uint16_t model_id,
static int
cn10k_ml_model_params_update(struct rte_ml_dev *dev, uint16_t model_id, void *buffer)
{
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
size_t size;
model = dev->data->models[model_id];
@@ -2050,19 +2083,23 @@ cn10k_ml_model_params_update(struct rte_ml_dev *dev, uint16_t model_id, void *bu
return -EINVAL;
}
- if (model->state == ML_CN10K_MODEL_STATE_UNKNOWN)
+ if (model->state == ML_CNXK_MODEL_STATE_UNKNOWN)
return -1;
- else if (model->state != ML_CN10K_MODEL_STATE_LOADED)
+ else if (model->state != ML_CNXK_MODEL_STATE_LOADED)
return -EBUSY;
- size = model->metadata.init_model.file_size + model->metadata.main_model.file_size +
- model->metadata.finish_model.file_size + model->metadata.weights_bias.file_size;
+ size = model->layer[0].glow.metadata.init_model.file_size +
+ model->layer[0].glow.metadata.main_model.file_size +
+ model->layer[0].glow.metadata.finish_model.file_size +
+ model->layer[0].glow.metadata.weights_bias.file_size;
/* Update model weights & bias */
- rte_memcpy(model->addr.wb_load_addr, buffer, model->metadata.weights_bias.file_size);
+ rte_memcpy(model->layer[0].glow.addr.wb_load_addr, buffer,
+ model->layer[0].glow.metadata.weights_bias.file_size);
/* Copy data from load to run. run address to be used by MLIP */
- rte_memcpy(model->addr.base_dma_addr_run, model->addr.base_dma_addr_load, size);
+ rte_memcpy(model->layer[0].glow.addr.base_dma_addr_run,
+ model->layer[0].glow.addr.base_dma_addr_load, size);
return 0;
}
@@ -2071,7 +2108,7 @@ static int
cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_buff_seg **dbuffer,
struct rte_ml_buff_seg **qbuffer)
{
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
uint8_t model_input_type;
uint8_t *lcl_dbuffer;
uint8_t *lcl_qbuffer;
@@ -2091,57 +2128,58 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_bu
lcl_dbuffer = dbuffer[0]->addr;
lcl_qbuffer = qbuffer[0]->addr;
- for (i = 0; i < model->metadata.model.num_input; i++) {
+ for (i = 0; i < model->layer[0].glow.metadata.model.num_input; i++) {
if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
- input_type = model->metadata.input1[i].input_type;
- model_input_type = model->metadata.input1[i].model_input_type;
- qscale = model->metadata.input1[i].qscale;
+ input_type = model->layer[0].glow.metadata.input1[i].input_type;
+ model_input_type = model->layer[0].glow.metadata.input1[i].model_input_type;
+ qscale = model->layer[0].glow.metadata.input1[i].qscale;
} else {
j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
- input_type = model->metadata.input2[j].input_type;
- model_input_type = model->metadata.input2[j].model_input_type;
- qscale = model->metadata.input2[j].qscale;
+ input_type = model->layer[0].glow.metadata.input2[j].input_type;
+ model_input_type = model->layer[0].glow.metadata.input2[j].model_input_type;
+ qscale = model->layer[0].glow.metadata.input2[j].qscale;
}
if (input_type == model_input_type) {
- rte_memcpy(lcl_qbuffer, lcl_dbuffer, model->addr.input[i].sz_d);
+ rte_memcpy(lcl_qbuffer, lcl_dbuffer, model->layer[0].info.input[i].sz_d);
} else {
- switch (model->metadata.input1[i].model_input_type) {
+ switch (model->layer[0].glow.metadata.input1[i].model_input_type) {
case RTE_ML_IO_TYPE_INT8:
- ret = rte_ml_io_float32_to_int8(qscale,
- model->addr.input[i].nb_elements,
- lcl_dbuffer, lcl_qbuffer);
+ ret = rte_ml_io_float32_to_int8(
+ qscale, model->layer[0].info.input[i].nb_elements,
+ lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_UINT8:
- ret = rte_ml_io_float32_to_uint8(qscale,
- model->addr.input[i].nb_elements,
- lcl_dbuffer, lcl_qbuffer);
+ ret = rte_ml_io_float32_to_uint8(
+ qscale, model->layer[0].info.input[i].nb_elements,
+ lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_INT16:
- ret = rte_ml_io_float32_to_int16(qscale,
- model->addr.input[i].nb_elements,
- lcl_dbuffer, lcl_qbuffer);
+ ret = rte_ml_io_float32_to_int16(
+ qscale, model->layer[0].info.input[i].nb_elements,
+ lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_UINT16:
- ret = rte_ml_io_float32_to_uint16(qscale,
- model->addr.input[i].nb_elements,
- lcl_dbuffer, lcl_qbuffer);
+ ret = rte_ml_io_float32_to_uint16(
+ qscale, model->layer[0].info.input[i].nb_elements,
+ lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_FP16:
- ret = rte_ml_io_float32_to_float16(model->addr.input[i].nb_elements,
- lcl_dbuffer, lcl_qbuffer);
+ ret = rte_ml_io_float32_to_float16(
+ model->layer[0].info.input[i].nb_elements, lcl_dbuffer,
+ lcl_qbuffer);
break;
default:
plt_err("Unsupported model_input_type[%u] : %u", i,
- model->metadata.input1[i].model_input_type);
+ model->layer[0].glow.metadata.input1[i].model_input_type);
ret = -ENOTSUP;
}
if (ret < 0)
return ret;
}
- lcl_dbuffer += model->addr.input[i].sz_d;
- lcl_qbuffer += model->addr.input[i].sz_q;
+ lcl_dbuffer += model->layer[0].info.input[i].sz_d;
+ lcl_qbuffer += model->layer[0].info.input[i].sz_q;
}
return 0;
@@ -2151,7 +2189,7 @@ static int
cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_buff_seg **qbuffer,
struct rte_ml_buff_seg **dbuffer)
{
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
uint8_t model_output_type;
uint8_t *lcl_qbuffer;
uint8_t *lcl_dbuffer;
@@ -2171,58 +2209,60 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_
lcl_dbuffer = dbuffer[0]->addr;
lcl_qbuffer = qbuffer[0]->addr;
- for (i = 0; i < model->metadata.model.num_output; i++) {
+ for (i = 0; i < model->layer[0].glow.metadata.model.num_output; i++) {
if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
- output_type = model->metadata.output1[i].output_type;
- model_output_type = model->metadata.output1[i].model_output_type;
- dscale = model->metadata.output1[i].dscale;
+ output_type = model->layer[0].glow.metadata.output1[i].output_type;
+ model_output_type =
+ model->layer[0].glow.metadata.output1[i].model_output_type;
+ dscale = model->layer[0].glow.metadata.output1[i].dscale;
} else {
j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
- output_type = model->metadata.output2[j].output_type;
- model_output_type = model->metadata.output2[j].model_output_type;
- dscale = model->metadata.output2[j].dscale;
+ output_type = model->layer[0].glow.metadata.output2[j].output_type;
+ model_output_type =
+ model->layer[0].glow.metadata.output2[j].model_output_type;
+ dscale = model->layer[0].glow.metadata.output2[j].dscale;
}
if (output_type == model_output_type) {
- rte_memcpy(lcl_dbuffer, lcl_qbuffer, model->addr.output[i].sz_q);
+ rte_memcpy(lcl_dbuffer, lcl_qbuffer, model->layer[0].info.output[i].sz_q);
} else {
- switch (model->metadata.output1[i].model_output_type) {
+ switch (model->layer[0].glow.metadata.output1[i].model_output_type) {
case RTE_ML_IO_TYPE_INT8:
- ret = rte_ml_io_int8_to_float32(dscale,
- model->addr.output[i].nb_elements,
- lcl_qbuffer, lcl_dbuffer);
+ ret = rte_ml_io_int8_to_float32(
+ dscale, model->layer[0].info.output[i].nb_elements,
+ lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_UINT8:
- ret = rte_ml_io_uint8_to_float32(dscale,
- model->addr.output[i].nb_elements,
- lcl_qbuffer, lcl_dbuffer);
+ ret = rte_ml_io_uint8_to_float32(
+ dscale, model->layer[0].info.output[i].nb_elements,
+ lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_INT16:
- ret = rte_ml_io_int16_to_float32(dscale,
- model->addr.output[i].nb_elements,
- lcl_qbuffer, lcl_dbuffer);
+ ret = rte_ml_io_int16_to_float32(
+ dscale, model->layer[0].info.output[i].nb_elements,
+ lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_UINT16:
- ret = rte_ml_io_uint16_to_float32(dscale,
- model->addr.output[i].nb_elements,
- lcl_qbuffer, lcl_dbuffer);
+ ret = rte_ml_io_uint16_to_float32(
+ dscale, model->layer[0].info.output[i].nb_elements,
+ lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_FP16:
ret = rte_ml_io_float16_to_float32(
- model->addr.output[i].nb_elements, lcl_qbuffer,
+ model->layer[0].info.output[i].nb_elements, lcl_qbuffer,
lcl_dbuffer);
break;
default:
plt_err("Unsupported model_output_type[%u] : %u", i,
- model->metadata.output1[i].model_output_type);
+ model->layer[0].glow.metadata.output1[i].model_output_type);
ret = -ENOTSUP;
}
if (ret < 0)
return ret;
}
- lcl_qbuffer += model->addr.output[i].sz_q;
- lcl_dbuffer += model->addr.output[i].sz_d;
+ lcl_qbuffer += model->layer[0].info.output[i].sz_q;
+ lcl_dbuffer += model->layer[0].info.output[i].sz_d;
}
return 0;
@@ -2250,10 +2290,10 @@ static __rte_always_inline void
cn10k_ml_result_update(struct rte_ml_dev *dev, int qp_id, struct cn10k_ml_result *result,
struct rte_ml_op *op)
{
- struct cn10k_ml_model_stats *stats;
+ struct cn10k_ml_layer_stats *stats;
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct cn10k_ml_qp *qp;
uint64_t hw_latency;
uint64_t fw_latency;
@@ -2263,9 +2303,9 @@ cn10k_ml_result_update(struct rte_ml_dev *dev, int qp_id, struct cn10k_ml_result
if (likely(qp_id >= 0)) {
qp = dev->data->queue_pairs[qp_id];
qp->stats.dequeued_count++;
- stats = &model->burst_stats[qp_id];
+ stats = &model->layer[0].glow.burst_stats[qp_id];
} else {
- stats = model->sync_stats;
+ stats = model->layer[0].glow.sync_stats;
}
if (unlikely(stats->dequeued_count == stats->hw_reset_count)) {
@@ -2469,7 +2509,7 @@ cn10k_ml_inference_sync(struct rte_ml_dev *dev, struct rte_ml_op *op)
{
struct cn10k_ml_dev *cn10k_mldev;
struct cnxk_ml_dev *cnxk_mldev;
- struct cn10k_ml_model *model;
+ struct cnxk_ml_model *model;
struct cn10k_ml_req *req;
bool timeout;
int ret = 0;
@@ -2477,7 +2517,7 @@ cn10k_ml_inference_sync(struct rte_ml_dev *dev, struct rte_ml_op *op)
cnxk_mldev = dev->data->dev_private;
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
model = dev->data->models[op->model_id];
- req = model->req;
+ req = model->layer[0].glow.req;
cn10k_ml_set_poll_addr(req);
cn10k_ml_prep_fp_job_descriptor(cn10k_mldev, req, op);
new file mode 100644
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Marvell.
+ */
+
+#ifndef _CNXK_ML_IO_H_
+#define _CNXK_ML_IO_H_
+
+#include <rte_mldev.h>
+
+/* Maximum number of models per device */
+#define ML_CNXK_MAX_MODELS 16
+
+/* Maximum number of layers per model */
+#define ML_CNXK_MODEL_MAX_LAYERS 32
+
+/* Maximum number of inputs or outputs per layer or model */
+#define ML_CNXK_MODEL_MAX_INPUT_OUTPUT 32
+
+/* Maximum number of dimensions per I/O shape */
+#define ML_CNXK_MODEL_MAX_DIMS 8
+
+/* Input / Output structure */
+struct cnxk_ml_io {
+ /* name */
+ char name[RTE_ML_STR_MAX];
+
+ /* dequantized data type */
+ enum rte_ml_io_type dtype;
+
+ /* quantized data type */
+ enum rte_ml_io_type qtype;
+
+ /* Number of dimensions in shape */
+ uint32_t nb_dims;
+
+ /* Shape of input */
+ uint32_t shape[ML_CNXK_MODEL_MAX_DIMS];
+
+ /* Number of elements */
+ uint32_t nb_elements;
+
+ /* Dequantized input size */
+ uint32_t sz_d;
+
+ /* Quantized input size */
+ uint32_t sz_q;
+
+ /* Scale */
+ float scale;
+};
+
+/* Model / Layer IO structure */
+struct cnxk_ml_io_info {
+ /* Number of inputs */
+ uint16_t nb_inputs;
+
+ /* Model / Layer inputs */
+ struct cnxk_ml_io input[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
+
+ /* Total size of quantized input */
+ uint32_t total_input_sz_q;
+
+ /* Total size of dequantized input */
+ uint32_t total_input_sz_d;
+
+ /* Number of outputs */
+ uint16_t nb_outputs;
+
+ /* Model / Layer outputs */
+ struct cnxk_ml_io output[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
+
+ /* Total size of quantized output */
+ uint32_t total_output_sz_q;
+
+ /* Total size of dequantized output */
+ uint32_t total_output_sz_d;
+};
+
+#endif /* _CNXK_ML_IO_H_ */
new file mode 100644
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Marvell.
+ */
+
+#include <rte_mldev.h>
+
+#include "cnxk_ml_model.h"
new file mode 100644
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Marvell.
+ */
+
+#ifndef _CNXK_ML_MODEL_H_
+#define _CNXK_ML_MODEL_H_
+
+#include <rte_mldev.h>
+
+#include <roc_api.h>
+
+#include "cn10k_ml_model.h"
+
+#include "cnxk_ml_io.h"
+
+struct cnxk_ml_dev;
+struct cnxk_ml_model;
+
+/* Model state */
+enum cnxk_ml_model_state {
+ /* Unknown state */
+ ML_CNXK_MODEL_STATE_UNKNOWN,
+
+ /* Model loaded */
+ ML_CNXK_MODEL_STATE_LOADED,
+
+ /* A slow-path job is active, start or stop */
+ ML_CNXK_MODEL_STATE_JOB_ACTIVE,
+
+ /* Model started */
+ ML_CNXK_MODEL_STATE_STARTED,
+};
+
+/* Layer state */
+enum cnxk_ml_layer_state {
+ /* Unknown state */
+ ML_CNXK_LAYER_STATE_UNKNOWN,
+
+ /* Layer loaded */
+ ML_CNXK_LAYER_STATE_LOADED,
+
+ /* A slow-path job is active, start or stop */
+ ML_CNXK_LAYER_STATE_JOB_ACTIVE,
+
+ /* Layer started */
+ ML_CNXK_LAYER_STATE_STARTED,
+};
+
+/* Layer object */
+struct cnxk_ml_layer {
+ /* Name*/
+ char name[RTE_ML_STR_MAX];
+
+ /* Model handle */
+ struct cnxk_ml_model *model;
+
+ /* Index mapped with firmware's model_id */
+ uint16_t index;
+
+ /* Input / Output */
+ struct cnxk_ml_io_info info;
+
+ /* Batch size */
+ uint32_t batch_size;
+
+ /* State */
+ enum cnxk_ml_layer_state state;
+
+ /* Glow layer specific data */
+ struct cn10k_ml_layer_data glow;
+};
+
+/* Model Object */
+struct cnxk_ml_model {
+ /* Device reference */
+ struct cnxk_ml_dev *cnxk_mldev;
+
+ /* ID */
+ uint16_t model_id;
+
+ /* Name */
+ char name[RTE_ML_STR_MAX];
+
+ /* Model specific data - glow */
+ struct cn10k_ml_model_data glow;
+
+ /* Batch size */
+ uint32_t batch_size;
+
+ /* Number of layers */
+ uint16_t nb_layers;
+
+ /* Layer info */
+ struct cnxk_ml_layer layer[ML_CNXK_MODEL_MAX_LAYERS];
+
+ /* State */
+ enum cnxk_ml_model_state state;
+
+ /* Internal model information structure
+ * Size of the buffer = sizeof(struct rte_ml_model_info)
+ * + num_inputs * sizeof(struct rte_ml_io_info)
+ * + num_outputs * sizeof(struct rte_ml_io_info).
+ * Structures would be arranged in the same order in the buffer.
+ */
+ uint8_t *info;
+
+ /* Spinlock, used to update model state */
+ plt_spinlock_t lock;
+};
+
+#endif /* _CNXK_ML_MODEL_H_ */
@@ -13,6 +13,8 @@ driver_sdk_headers = files(
'cn10k_ml_model.h',
'cn10k_ml_ocm.h',
'cnxk_ml_dev.h',
+ 'cnxk_ml_io.h',
+ 'cnxk_ml_model.h',
)
sources = files(
@@ -21,6 +23,7 @@ sources = files(
'cn10k_ml_model.c',
'cn10k_ml_ocm.c',
'cnxk_ml_dev.c',
+ 'cnxk_ml_model.c',
)
deps += ['mldev', 'common_cnxk', 'kvargs', 'hash']