@@ -7,6 +7,8 @@
#include <roc_api.h>
+#include "cn10k_ml_ocm.h"
+
/* Marvell OCTEON CN10K ML PMD device name */
#define MLDEV_NAME_CN10K_PMD ml_cn10k
@@ -215,6 +217,9 @@ struct cn10k_ml_dev {
/* ML Firmware */
struct cn10k_ml_fw fw;
+ /* ML OCM info */
+ struct cn10k_ml_ocm ocm;
+
/* Number of models loaded */
uint16_t nb_models_loaded;
};
@@ -8,6 +8,7 @@
#include "cn10k_ml_dev.h"
#include "cn10k_ml_model.h"
+#include "cn10k_ml_ocm.h"
static enum rte_ml_io_type
cn10k_ml_io_type_map(uint8_t type)
@@ -287,3 +288,55 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
addr->output[i].sz_d, addr->output[i].sz_q);
}
}
+
+int
+cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, int16_t model_id, uint8_t *buffer,
+ uint16_t *wb_pages, uint16_t *scratch_pages)
+{
+ struct cn10k_ml_model_metadata *metadata;
+ struct cn10k_ml_ocm *ocm;
+ uint64_t scratch_size;
+ uint64_t wb_size;
+
+ metadata = (struct cn10k_ml_model_metadata *)buffer;
+ ocm = &mldev->ocm;
+
+ /* Assume wb_size is zero for non-relocatable models */
+ if (metadata->model.ocm_relocatable)
+ wb_size = metadata->model.ocm_wb_range_end - metadata->model.ocm_wb_range_start + 1;
+ else
+ wb_size = 0;
+
+ if (wb_size % ocm->page_size)
+ *wb_pages = wb_size / ocm->page_size + 1;
+ else
+ *wb_pages = wb_size / ocm->page_size;
+ plt_ml_dbg("model_id = %d, wb_size = %" PRIu64 ", wb_pages = %u", model_id, wb_size,
+ *wb_pages);
+
+ scratch_size = ocm->size_per_tile - metadata->model.ocm_tmp_range_floor;
+ if (metadata->model.ocm_tmp_range_floor % ocm->page_size)
+ *scratch_pages = scratch_size / ocm->page_size + 1;
+ else
+ *scratch_pages = scratch_size / ocm->page_size;
+ plt_ml_dbg("model_id = %d, scratch_size = %" PRIu64 ", scratch_pages = %u", model_id,
+ scratch_size, *scratch_pages);
+
+ /* Check if the model can be loaded on OCM */
+ if ((*wb_pages + *scratch_pages) > ML_CN10K_OCM_NUMPAGES) {
+ plt_err("Cannot create the model, OCM relocatable = %u",
+ metadata->model.ocm_relocatable);
+ plt_err("wb_pages (%u) + scratch_pages (%u) > %u", *wb_pages, *scratch_pages,
+ ML_CN10K_OCM_NUMPAGES);
+ return -ENOMEM;
+ }
+
+ /* Update scratch_pages to block the full tile for OCM non-relocatable model. This would
+ * prevent the library from allocating the remaining space on the tile to other models.
+ */
+ if (!metadata->model.ocm_relocatable)
+ *scratch_pages =
+ PLT_MAX(PLT_U64_CAST(*scratch_pages), PLT_U64_CAST(ML_CN10K_OCM_NUMPAGES));
+
+ return 0;
+}
@@ -10,6 +10,7 @@
#include <roc_api.h>
#include "cn10k_ml_dev.h"
+#include "cn10k_ml_ocm.h"
/* Maximum number of models per device */
#define ML_CN10K_MAX_MODELS 16
@@ -420,6 +421,9 @@ struct cn10k_ml_model {
/* Model address structure */
struct cn10k_ml_model_addr addr;
+ /* Tile and memory information object */
+ struct cn10k_ml_ocm_model_map model_mem_map;
+
/* Model lock, used to update model state */
plt_spinlock_t lock;
@@ -431,5 +435,7 @@ int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size);
void cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata);
void cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer,
uint8_t *base_dma_addr);
+int cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, int16_t model_id, uint8_t *buffer,
+ uint16_t *wb_pages, uint16_t *scratch_pages);
#endif /* _CN10K_ML_MODEL_H_ */
new file mode 100644
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022 Marvell.
+ */
+
+#include "cn10k_ml_ocm.h"
new file mode 100644
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022 Marvell.
+ */
+
+#ifndef _CN10K_ML_OCM_H_
+#define _CN10K_ML_OCM_H_
+
+#include <rte_mldev.h>
+
+/* Page size in bytes. */
+#define ML_CN10K_OCM_PAGESIZE 0x4000
+
+/* Number of OCM tiles. */
+#define ML_CN10K_OCM_NUMTILES 0x8
+
+/* OCM in bytes, per tile. */
+#define ML_CN10K_OCM_TILESIZE 0x100000
+
+/* OCM pages, per tile. */
+#define ML_CN10K_OCM_NUMPAGES (ML_CN10K_OCM_TILESIZE / ML_CN10K_OCM_PAGESIZE)
+
+/* Maximum OCM mask words, per tile, 8 bit words. */
+#define ML_CN10K_OCM_MASKWORDS (ML_CN10K_OCM_NUMPAGES / 8)
+
+/* ML OCM and Tile information structure */
+struct cn10k_ml_ocm_tile_info {
+ /* Mask of used / allotted pages on tile's OCM */
+ uint8_t ocm_mask[ML_CN10K_OCM_MASKWORDS];
+
+ /* Last pages in the tile's OCM used for weights and bias, default = -1 */
+ int last_wb_page;
+
+ /* Number pages used for scratch memory on the tile's OCM */
+ uint16_t scratch_pages;
+};
+
+/* ML Model OCM map structure */
+struct cn10k_ml_ocm_model_map {
+ /* Status of OCM reservation */
+ bool ocm_reserved;
+
+ /* Mask of OCM tiles for the model */
+ uint64_t tilemask;
+
+ /* Start page for the model load, default = -1 */
+ int wb_page_start;
+
+ /* Number of pages required for weights and bias */
+ uint16_t wb_pages;
+
+ /* Number of pages required for scratch memory */
+ uint16_t scratch_pages;
+};
+
+/* OCM state structure */
+struct cn10k_ml_ocm {
+ /* OCM spinlock, used to update OCM state */
+ rte_spinlock_t lock;
+
+ /* Number of OCM tiles */
+ uint8_t num_tiles;
+
+ /* OCM size per each tile */
+ uint64_t size_per_tile;
+
+ /* Size of OCM page */
+ uint64_t page_size;
+
+ /* Number of OCM pages */
+ uint16_t num_pages;
+
+ /* Words per OCM mask */
+ uint16_t mask_words;
+
+ /* OCM memory info and status*/
+ struct cn10k_ml_ocm_tile_info tile_ocm_info[ML_CN10K_OCM_NUMTILES];
+};
+
+#endif /* _CN10K_ML_OCM_H_ */
@@ -126,8 +126,10 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
struct rte_ml_dev_info dev_info;
struct cn10k_ml_model *model;
struct cn10k_ml_dev *mldev;
+ struct cn10k_ml_ocm *ocm;
struct cn10k_ml_qp *qp;
uint32_t mz_size;
+ uint16_t tile_id;
int16_t model_id;
uint16_t qp_id;
int ret;
@@ -250,6 +252,18 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
}
dev->data->nb_models = conf->nb_models;
+ ocm = &mldev->ocm;
+ ocm->num_tiles = ML_CN10K_OCM_NUMTILES;
+ ocm->size_per_tile = ML_CN10K_OCM_TILESIZE;
+ ocm->page_size = ML_CN10K_OCM_PAGESIZE;
+ ocm->num_pages = ocm->size_per_tile / ocm->page_size;
+ ocm->mask_words = ocm->num_pages / (8 * sizeof(uint8_t));
+
+ for (tile_id = 0; tile_id < ocm->num_tiles; tile_id++)
+ ocm->tile_ocm_info[tile_id].last_wb_page = -1;
+
+ rte_spinlock_init(&ocm->lock);
+
mldev->nb_models_loaded = 0;
mldev->state = ML_CN10K_DEV_STATE_CONFIGURED;
@@ -416,6 +430,8 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
const struct plt_memzone *mz;
size_t model_data_size;
uint8_t *base_dma_addr;
+ uint16_t scratch_pages;
+ uint16_t wb_pages;
uint64_t mz_size;
uint16_t idx;
bool found;
@@ -441,6 +457,11 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
return -ENOMEM;
}
+ /* Get WB and scratch pages, check if model can be loaded. */
+ ret = cn10k_ml_model_ocm_pages_count(mldev, idx, params->addr, &wb_pages, &scratch_pages);
+ if (ret < 0)
+ return ret;
+
/* Get MZ size */
metadata = (struct cn10k_ml_model_metadata *)params->addr;
model_data_size = metadata->init_model.file_size + metadata->main_model.file_size +
@@ -478,6 +499,13 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
/* Copy data from load to run. run address to be used by MLIP */
memcpy(model->addr.base_dma_addr_run, model->addr.base_dma_addr_load, model_data_size);
+ memset(&model->model_mem_map, 0, sizeof(struct cn10k_ml_ocm_model_map));
+ model->model_mem_map.ocm_reserved = false;
+ model->model_mem_map.tilemask = 0;
+ model->model_mem_map.wb_page_start = -1;
+ model->model_mem_map.wb_pages = wb_pages;
+ model->model_mem_map.scratch_pages = scratch_pages;
+
plt_spinlock_init(&model->lock);
model->state = ML_CN10K_MODEL_STATE_LOADED;
dev->data->models[idx] = model;
@@ -11,12 +11,14 @@ sources = files(
'cn10k_ml_dev.c',
'cn10k_ml_ops.c',
'cn10k_ml_model.c',
+ 'cn10k_ml_ocm.c',
)
headers = files(
'cn10k_ml_dev.h',
'cn10k_ml_ops.h',
'cn10k_ml_model.h',
+ 'cn10k_ml_ocm.h',
)
deps += ['mldev', 'common_ml', 'common_cnxk', 'kvargs', 'hash']