@@ -175,6 +175,22 @@ Runtime Config Options
With the above configuration, ML cnxk driver is configured to use ML registers
for polling in fastpath requests.
+- ``OCM page size`` (default ``16384``)
+
+ Option to specify the page size in bytes to be used for OCM management. Available
+ OCM is split into multiple pages of specified sizes and the pages are allocated to
+ the models. The parameter ``ocm_page_size`` ``devargs`` is used to specify the page
+ size to be used.
+
+ Supported page sizes by the driver are 1 KB, 2 KB, 4 KB, 8 KB and 16 KB. Default
+ page size is 16 KB.
+
+ For example::
+
+ -a 0000:00:10.0,ocm_page_size=8192
+
+ With the above configuration, page size of OCM is set to 8192 bytes / 8 KB.
+
Debugging Options
-----------------
@@ -24,6 +24,7 @@
#define CN10K_ML_OCM_ALLOC_MODE "ocm_alloc_mode"
#define CN10K_ML_DEV_HW_QUEUE_LOCK "hw_queue_lock"
#define CN10K_ML_FW_POLL_MEM "poll_mem"
+#define CN10K_ML_OCM_PAGE_SIZE "ocm_page_size"
#define CN10K_ML_FW_PATH_DEFAULT "/lib/firmware/mlip-fw.bin"
#define CN10K_ML_FW_ENABLE_DPE_WARNINGS_DEFAULT 1
@@ -32,6 +33,7 @@
#define CN10K_ML_OCM_ALLOC_MODE_DEFAULT "lowest"
#define CN10K_ML_DEV_HW_QUEUE_LOCK_DEFAULT 1
#define CN10K_ML_FW_POLL_MEM_DEFAULT "ddr"
+#define CN10K_ML_OCM_PAGE_SIZE_DEFAULT 16384
/* ML firmware macros */
#define FW_MEMZONE_NAME "ml_cn10k_fw_mz"
@@ -53,8 +55,12 @@ static const char *const valid_args[] = {CN10K_ML_FW_PATH,
CN10K_ML_OCM_ALLOC_MODE,
CN10K_ML_DEV_HW_QUEUE_LOCK,
CN10K_ML_FW_POLL_MEM,
+ CN10K_ML_OCM_PAGE_SIZE,
NULL};
+/* Supported OCM page sizes: 1KB, 2KB, 4KB, 8KB and 16KB */
+static const int valid_ocm_page_size[] = {1024, 2048, 4096, 8192, 16384};
+
/* Dummy operations for ML device */
struct rte_ml_dev_ops ml_dev_dummy_ops = {0};
@@ -95,12 +101,15 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde
struct rte_kvargs *kvlist = NULL;
bool ocm_alloc_mode_set = false;
bool hw_queue_lock_set = false;
+ bool ocm_page_size_set = false;
char *ocm_alloc_mode = NULL;
bool poll_mem_set = false;
bool fw_path_set = false;
char *poll_mem = NULL;
char *fw_path = NULL;
int ret = 0;
+ bool found;
+ uint8_t i;
if (devargs == NULL)
goto check_args;
@@ -191,6 +200,17 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde
poll_mem_set = true;
}
+ if (rte_kvargs_count(kvlist, CN10K_ML_OCM_PAGE_SIZE) == 1) {
+ ret = rte_kvargs_process(kvlist, CN10K_ML_OCM_PAGE_SIZE, &parse_integer_arg,
+ &mldev->ocm_page_size);
+ if (ret < 0) {
+ plt_err("Error processing arguments, key = %s\n", CN10K_ML_OCM_PAGE_SIZE);
+ ret = -EINVAL;
+ goto exit;
+ }
+ ocm_page_size_set = true;
+ }
+
check_args:
if (!fw_path_set)
mldev->fw.path = CN10K_ML_FW_PATH_DEFAULT;
@@ -272,6 +292,32 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde
}
plt_info("ML: %s = %s", CN10K_ML_FW_POLL_MEM, mldev->fw.poll_mem);
+ if (!ocm_page_size_set) {
+ mldev->ocm_page_size = CN10K_ML_OCM_PAGE_SIZE_DEFAULT;
+ } else {
+ if (mldev->ocm_page_size < 0) {
+ plt_err("Invalid argument, %s = %d\n", CN10K_ML_OCM_PAGE_SIZE,
+ mldev->ocm_page_size);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ found = false;
+ for (i = 0; i < PLT_DIM(valid_ocm_page_size); i++) {
+ if (mldev->ocm_page_size == valid_ocm_page_size[i]) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ plt_err("Unsupported ocm_page_size = %d\n", mldev->ocm_page_size);
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+ plt_info("ML: %s = %d", CN10K_ML_OCM_PAGE_SIZE, mldev->ocm_page_size);
+
exit:
if (kvlist)
rte_kvargs_free(kvlist);
@@ -814,10 +860,11 @@ RTE_PMD_REGISTER_PCI(MLDEV_NAME_CN10K_PMD, cn10k_mldev_pmd);
RTE_PMD_REGISTER_PCI_TABLE(MLDEV_NAME_CN10K_PMD, pci_id_ml_table);
RTE_PMD_REGISTER_KMOD_DEP(MLDEV_NAME_CN10K_PMD, "vfio-pci");
-RTE_PMD_REGISTER_PARAM_STRING(MLDEV_NAME_CN10K_PMD,
- CN10K_ML_FW_PATH "=<path>" CN10K_ML_FW_ENABLE_DPE_WARNINGS
- "=<0|1>" CN10K_ML_FW_REPORT_DPE_WARNINGS
- "=<0|1>" CN10K_ML_DEV_CACHE_MODEL_DATA
- "=<0|1>" CN10K_ML_OCM_ALLOC_MODE
- "=<lowest|largest>" CN10K_ML_DEV_HW_QUEUE_LOCK
- "=<0|1>" CN10K_ML_FW_POLL_MEM "=<ddr|register>");
+RTE_PMD_REGISTER_PARAM_STRING(MLDEV_NAME_CN10K_PMD, CN10K_ML_FW_PATH
+ "=<path>" CN10K_ML_FW_ENABLE_DPE_WARNINGS
+ "=<0|1>" CN10K_ML_FW_REPORT_DPE_WARNINGS
+ "=<0|1>" CN10K_ML_DEV_CACHE_MODEL_DATA
+ "=<0|1>" CN10K_ML_OCM_ALLOC_MODE
+ "=<lowest|largest>" CN10K_ML_DEV_HW_QUEUE_LOCK
+ "=<0|1>" CN10K_ML_FW_POLL_MEM "=<ddr|register>" CN10K_ML_OCM_PAGE_SIZE
+ "=<1024|2048|4096|8192|16384>");
@@ -406,6 +406,9 @@ struct cn10k_ml_dev {
/* Use spinlock version of ROC enqueue */
int hw_queue_lock;
+ /* OCM page size */
+ int ocm_page_size;
+
/* JCMD enqueue function handler */
bool (*ml_jcmdq_enqueue)(struct roc_ml *roc_ml, struct ml_job_cmd_s *job_cmd);
@@ -339,11 +339,11 @@ cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, uint16_t model_id, ui
scratch_size, *scratch_pages);
/* Check if the model can be loaded on OCM */
- if ((*wb_pages + *scratch_pages) > ML_CN10K_OCM_NUMPAGES) {
+ if ((*wb_pages + *scratch_pages) > mldev->ocm.num_pages) {
plt_err("Cannot create the model, OCM relocatable = %u",
metadata->model.ocm_relocatable);
plt_err("wb_pages (%u) + scratch_pages (%u) > %u", *wb_pages, *scratch_pages,
- ML_CN10K_OCM_NUMPAGES);
+ mldev->ocm.num_pages);
return -ENOMEM;
}
@@ -352,7 +352,7 @@ cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, uint16_t model_id, ui
*/
if (!metadata->model.ocm_relocatable)
*scratch_pages =
- PLT_MAX(PLT_U64_CAST(*scratch_pages), PLT_U64_CAST(ML_CN10K_OCM_NUMPAGES));
+ PLT_MAX(PLT_U64_CAST(*scratch_pages), PLT_U64_CAST(mldev->ocm.num_pages));
return 0;
}
@@ -220,13 +220,13 @@ cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t w
struct cn10k_ml_dev *mldev;
struct cn10k_ml_ocm *ocm;
- uint8_t local_ocm_mask[ML_CN10K_OCM_MASKWORDS] = {0};
uint16_t used_scratch_pages_max;
uint16_t scratch_page_start;
int used_last_wb_page_max;
uint16_t scratch_page_end;
uint8_t search_start_tile;
uint8_t search_end_tile;
+ uint8_t *local_ocm_mask;
int wb_page_start_curr;
int max_slot_sz_curr;
uint8_t tile_start;
@@ -268,6 +268,9 @@ cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t w
search_end_tile = start_tile;
}
+ /* nibbles + prefix '0x' */
+ local_ocm_mask = rte_zmalloc("local_ocm_mask", mldev->ocm.mask_words, RTE_CACHE_LINE_SIZE);
+
tile_start = search_start_tile;
start_search:
used_scratch_pages_max = 0;
@@ -279,7 +282,7 @@ cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t w
PLT_MAX(ocm->tile_ocm_info[tile_id].last_wb_page, used_last_wb_page_max);
}
- memset(local_ocm_mask, 0, sizeof(local_ocm_mask));
+ memset(local_ocm_mask, 0, mldev->ocm.mask_words);
for (tile_id = tile_start; tile_id < tile_start + num_tiles; tile_id++) {
for (word_id = 0; word_id < ocm->mask_words; word_id++)
local_ocm_mask[word_id] |= ocm->tile_ocm_info[tile_id].ocm_mask[word_id];
@@ -332,6 +335,8 @@ cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t w
if (wb_page_start != -1)
*tilemask = GENMASK_ULL(tile_idx + num_tiles - 1, tile_idx);
+ rte_free(local_ocm_mask);
+
return wb_page_start;
}
@@ -480,7 +485,7 @@ cn10k_ml_ocm_pagemask_to_str(struct cn10k_ml_ocm_tile_info *tile_info, uint16_t
void
cn10k_ml_ocm_print(struct rte_ml_dev *dev, FILE *fp)
{
- char str[ML_CN10K_OCM_NUMPAGES / 4 + 2]; /* nibbles + prefix '0x' */
+ char *str;
struct cn10k_ml_dev *mldev;
struct cn10k_ml_ocm *ocm;
uint8_t tile_id;
@@ -490,12 +495,15 @@ cn10k_ml_ocm_print(struct rte_ml_dev *dev, FILE *fp)
mldev = dev->data->dev_private;
ocm = &mldev->ocm;
+ /* nibbles + prefix '0x' */
+ str = rte_zmalloc("ocm_mask_str", mldev->ocm.num_pages / 4 + 2, RTE_CACHE_LINE_SIZE);
+
fprintf(fp, "OCM State:\n");
for (tile_id = 0; tile_id < ocm->num_tiles; tile_id++) {
cn10k_ml_ocm_pagemask_to_str(&ocm->tile_ocm_info[tile_id], ocm->mask_words, str);
wb_pages = 0 - ocm->tile_ocm_info[tile_id].scratch_pages;
- for (word_id = 0; word_id < ML_CN10K_OCM_MASKWORDS; word_id++)
+ for (word_id = 0; word_id < mldev->ocm.mask_words; word_id++)
wb_pages +=
__builtin_popcount(ocm->tile_ocm_info[tile_id].ocm_mask[word_id]);
@@ -506,4 +514,6 @@ cn10k_ml_ocm_print(struct rte_ml_dev *dev, FILE *fp)
tile_id, ocm->tile_ocm_info[tile_id].scratch_pages, wb_pages,
ocm->tile_ocm_info[tile_id].last_wb_page, str);
}
+
+ rte_free(str);
}
@@ -8,25 +8,16 @@
#include <rte_mldev.h>
#include <rte_mldev_pmd.h>
-/* Page size in bytes. */
-#define ML_CN10K_OCM_PAGESIZE 0x4000
-
/* Number of OCM tiles. */
#define ML_CN10K_OCM_NUMTILES 0x8
/* OCM in bytes, per tile. */
#define ML_CN10K_OCM_TILESIZE 0x100000
-/* OCM pages, per tile. */
-#define ML_CN10K_OCM_NUMPAGES (ML_CN10K_OCM_TILESIZE / ML_CN10K_OCM_PAGESIZE)
-
-/* Maximum OCM mask words, per tile, 8 bit words. */
-#define ML_CN10K_OCM_MASKWORDS (ML_CN10K_OCM_NUMPAGES / 8)
-
/* OCM and Tile information structure */
struct cn10k_ml_ocm_tile_info {
/* Mask of used / allotted pages on tile's OCM */
- uint8_t ocm_mask[ML_CN10K_OCM_MASKWORDS];
+ uint8_t *ocm_mask;
/* Last pages in the tile's OCM used for weights and bias, default = -1 */
int last_wb_page;
@@ -78,6 +69,9 @@ struct cn10k_ml_ocm {
/* OCM memory info and status*/
struct cn10k_ml_ocm_tile_info tile_ocm_info[ML_CN10K_OCM_NUMTILES];
+
+ /* Memory for ocm_mask */
+ uint8_t *ocm_mask;
};
int cn10k_ml_ocm_tilecount(uint64_t tilemask, int *start, int *end);
@@ -311,8 +311,8 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
if (model->state == ML_CN10K_MODEL_STATE_STARTED) {
fprintf(fp, "%*s : 0x%0*" PRIx64 "\n", FIELD_LEN, "tilemask",
ML_CN10K_OCM_NUMTILES / 4, model->model_mem_map.tilemask);
- fprintf(fp, "%*s : 0x%x\n", FIELD_LEN, "ocm_wb_start",
- model->model_mem_map.wb_page_start * ML_CN10K_OCM_PAGESIZE);
+ fprintf(fp, "%*s : 0x%" PRIx64 "\n", FIELD_LEN, "ocm_wb_start",
+ model->model_mem_map.wb_page_start * mldev->ocm.page_size);
}
fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_inputs", model->metadata.model.num_input);
@@ -781,12 +781,18 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
ocm = &mldev->ocm;
ocm->num_tiles = ML_CN10K_OCM_NUMTILES;
ocm->size_per_tile = ML_CN10K_OCM_TILESIZE;
- ocm->page_size = ML_CN10K_OCM_PAGESIZE;
+ ocm->page_size = mldev->ocm_page_size;
ocm->num_pages = ocm->size_per_tile / ocm->page_size;
ocm->mask_words = ocm->num_pages / (8 * sizeof(uint8_t));
- for (tile_id = 0; tile_id < ocm->num_tiles; tile_id++)
+ /* Allocate memory for ocm_mask */
+ ocm->ocm_mask =
+ rte_zmalloc("ocm_mask", ocm->mask_words * ocm->num_tiles, RTE_CACHE_LINE_SIZE);
+
+ for (tile_id = 0; tile_id < ocm->num_tiles; tile_id++) {
+ ocm->tile_ocm_info[tile_id].ocm_mask = ocm->ocm_mask + tile_id * ocm->mask_words;
ocm->tile_ocm_info[tile_id].last_wb_page = -1;
+ }
rte_spinlock_init(&ocm->lock);
@@ -856,6 +862,9 @@ cn10k_ml_dev_close(struct rte_ml_dev *dev)
mldev = dev->data->dev_private;
+ /* Release ocm_mask memory */
+ rte_free(mldev->ocm.ocm_mask);
+
/* Stop and unload all models */
for (model_id = 0; model_id < dev->data->nb_models; model_id++) {
model = dev->data->models[model_id];