@@ -199,3 +199,91 @@ cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata)
cn10k_ml_io_type_map(metadata->output[i].model_output_type);
}
}
+
+void
+cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_t *base_dma_addr)
+{
+ struct cn10k_ml_model_metadata *metadata;
+ struct cn10k_ml_model_addr *addr;
+ size_t model_data_size;
+ uint8_t *dma_addr_load;
+ uint8_t *dma_addr_run;
+ uint8_t i;
+ int fpos;
+
+ metadata = &model->metadata;
+ addr = &model->addr;
+ model_data_size = metadata->init_model.file_size + metadata->main_model.file_size +
+ metadata->finish_model.file_size + metadata->weights_bias.file_size;
+
+ /* Base address */
+ addr->base_dma_addr_load = base_dma_addr;
+ addr->base_dma_addr_run = PLT_PTR_ADD(addr->base_dma_addr_load, model_data_size);
+
+ /* Init Section */
+ dma_addr_load = addr->base_dma_addr_load;
+ dma_addr_run = addr->base_dma_addr_run;
+ fpos = sizeof(struct cn10k_ml_model_metadata);
+ addr->init_load_addr = dma_addr_load;
+ addr->init_run_addr = dma_addr_run;
+ memcpy(dma_addr_load, PLT_PTR_ADD(buffer, fpos), metadata->init_model.file_size);
+
+ /* Main Section */
+ dma_addr_load += metadata->init_model.file_size;
+ dma_addr_run += metadata->init_model.file_size;
+ fpos += metadata->init_model.file_size;
+ addr->main_load_addr = dma_addr_load;
+ addr->main_run_addr = dma_addr_run;
+ memcpy(dma_addr_load, PLT_PTR_ADD(buffer, fpos), metadata->main_model.file_size);
+
+ /* Finish Section */
+ dma_addr_load += metadata->main_model.file_size;
+ dma_addr_run += metadata->main_model.file_size;
+ fpos += metadata->main_model.file_size;
+ addr->finish_load_addr = dma_addr_load;
+ addr->finish_run_addr = dma_addr_run;
+ memcpy(dma_addr_load, PLT_PTR_ADD(buffer, fpos), metadata->finish_model.file_size);
+
+ /* Weights & Bias Section*/
+ dma_addr_load += metadata->finish_model.file_size;
+ fpos += metadata->finish_model.file_size;
+ addr->wb_base_addr = PLT_PTR_SUB(dma_addr_load, metadata->weights_bias.mem_offset);
+ addr->wb_load_addr = PLT_PTR_ADD(addr->wb_base_addr, metadata->weights_bias.mem_offset);
+ memcpy(addr->wb_load_addr, PLT_PTR_ADD(buffer, fpos), metadata->weights_bias.file_size);
+
+ /* Inputs */
+ addr->total_input_sz_d = 0;
+ addr->total_input_sz_q = 0;
+ for (i = 0; i < metadata->model.num_input; i++) {
+ addr->input[i].nb_elements =
+ model->metadata.input[i].shape.w * model->metadata.input[i].shape.x *
+ model->metadata.input[i].shape.y * model->metadata.input[i].shape.z;
+ addr->input[i].sz_d = addr->input[i].nb_elements *
+ ml_io_type_size_get(metadata->input[i].input_type);
+ addr->input[i].sz_q = addr->input[i].nb_elements *
+ ml_io_type_size_get(metadata->input[i].model_input_type);
+ addr->total_input_sz_d += addr->input[i].sz_d;
+ addr->total_input_sz_q += addr->input[i].sz_q;
+
+ plt_ml_dbg("model_id = %d, input[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
+ model->model_id, i, metadata->input[i].shape.w,
+ metadata->input[i].shape.x, metadata->input[i].shape.y,
+ metadata->input[i].shape.z, addr->input[i].sz_d, addr->input[i].sz_q);
+ }
+
+ /* Outputs */
+ addr->total_output_sz_q = 0;
+ addr->total_output_sz_d = 0;
+ for (i = 0; i < metadata->model.num_output; i++) {
+ addr->output[i].nb_elements = metadata->output[i].size;
+ addr->output[i].sz_d = addr->output[i].nb_elements *
+ ml_io_type_size_get(metadata->output[i].output_type);
+ addr->output[i].sz_q = addr->output[i].nb_elements *
+ ml_io_type_size_get(metadata->output[i].model_output_type);
+ addr->total_output_sz_q += addr->output[i].sz_q;
+ addr->total_output_sz_d += addr->output[i].sz_d;
+
+ plt_ml_dbg("model_id = %d, output[%u] - sz_d = %u, sz_q = %u", model->model_id, i,
+ addr->output[i].sz_d, addr->output[i].sz_q);
+ }
+}
@@ -325,6 +325,81 @@ struct cn10k_ml_model_metadata {
uint8_t reserved3[16];
};
+/* Model address structure */
+struct cn10k_ml_model_addr {
+ /* Base DMA address for load */
+ void *base_dma_addr_load;
+
+ /* Base DMA address for run */
+ void *base_dma_addr_run;
+
+ /* Init section load address */
+ void *init_load_addr;
+
+ /* Init section run address */
+ void *init_run_addr;
+
+ /* Main section load address */
+ void *main_load_addr;
+
+ /* Main section run address */
+ void *main_run_addr;
+
+ /* Finish section load address */
+ void *finish_load_addr;
+
+ /* Finish section run address */
+ void *finish_run_addr;
+
+ /* Weights and Bias base address */
+ void *wb_base_addr;
+
+ /* Weights and bias load address */
+ void *wb_load_addr;
+
+ /* Start tile */
+ uint8_t tile_start;
+
+ /* End tile */
+ uint8_t tile_end;
+
+ /* Input address and size */
+ struct {
+ /* Number of elements */
+ uint32_t nb_elements;
+
+ /* Dequantized input size */
+ uint32_t sz_d;
+
+ /* Quantized input size */
+ uint32_t sz_q;
+ } input[MRVL_ML_INPUT_OUTPUT_SIZE];
+
+ /* Output address and size */
+ struct {
+ /* Number of elements */
+ uint32_t nb_elements;
+
+ /* Dequantize output size */
+ uint32_t sz_d;
+
+ /* Quantized output size */
+ uint32_t sz_q;
+ } output[MRVL_ML_INPUT_OUTPUT_SIZE];
+
+ /* Total size of quantized input */
+ uint32_t total_input_sz_q;
+
+ /* Total size of dequantized input */
+ uint32_t total_input_sz_d;
+
+ /* Total size of quantized output */
+ uint32_t total_output_sz_q;
+
+ /* Total size of dequantized output */
+ uint32_t total_output_sz_d;
+};
+
/* ML Model Object */
struct cn10k_ml_model {
/* Device reference */
@@ -342,6 +417,9 @@ struct cn10k_ml_model {
/* Model metadata */
struct cn10k_ml_model_metadata metadata;
+ /* Model address structure */
+ struct cn10k_ml_model_addr addr;
+
/* Model lock, used to update model state */
plt_spinlock_t lock;
@@ -351,5 +429,7 @@ struct cn10k_ml_model {
int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size);
void cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata);
+void cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer,
+ uint8_t *base_dma_addr);
#endif /* _CN10K_ML_MODEL_H_ */
@@ -408,11 +408,14 @@ cn10k_ml_dev_queue_pair_setup(struct rte_ml_dev *dev, uint16_t queue_pair_id,
int
cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, int16_t *model_id)
{
+ struct cn10k_ml_model_metadata *metadata;
struct cn10k_ml_model *model;
struct cn10k_ml_dev *mldev;
char str[RTE_MEMZONE_NAMESIZE];
const struct plt_memzone *mz;
+ size_t model_data_size;
+ uint8_t *base_dma_addr;
uint64_t mz_size;
uint16_t idx;
bool found;
@@ -439,7 +442,12 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
}
/* Get MZ size */
- mz_size = PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE);
+ metadata = (struct cn10k_ml_model_metadata *)params->addr;
+ model_data_size = metadata->init_model.file_size + metadata->main_model.file_size +
+ metadata->finish_model.file_size + metadata->weights_bias.file_size;
+ model_data_size = PLT_ALIGN_CEIL(model_data_size, ML_CN10K_ALIGN_SIZE);
+ mz_size = PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE) +
+ 2 * model_data_size;
/* Allocate memzone for model object and model data */
snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", CN10K_ML_MODEL_MEMZONE_NAME, idx);
@@ -462,6 +470,14 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
else
model->batch_size = model->metadata.model.batch_size;
+ /* Set DMA base address */
+ base_dma_addr = PLT_PTR_ADD(
+ mz->addr, PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE));
+ cn10k_ml_model_addr_update(model, params->addr, base_dma_addr);
+
+ /* Copy data from load to run. run address to be used by MLIP */
+ memcpy(model->addr.base_dma_addr_run, model->addr.base_dma_addr_load, model_data_size);
+
plt_spinlock_init(&model->lock);
model->state = ML_CN10K_MODEL_STATE_LOADED;
dev->data->models[idx] = model;