[v4,14/39] ml/cnxk: add internal structures for tiles and OCM

Message ID 20230201092310.23252-15-syalavarthi@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Implementation of ML CNXK driver |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Srikanth Yalavarthi Feb. 1, 2023, 9:22 a.m. UTC
  Added internal structures to handle tile and OCM information and
OCM to model memory mapping. Initialize the fields to platform
specific defaults and compute the OCM / tile requirements for model.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_dev.h   |  5 ++
 drivers/ml/cnxk/cn10k_ml_model.c | 53 +++++++++++++++++++++
 drivers/ml/cnxk/cn10k_ml_model.h |  6 +++
 drivers/ml/cnxk/cn10k_ml_ocm.c   |  5 ++
 drivers/ml/cnxk/cn10k_ml_ocm.h   | 79 ++++++++++++++++++++++++++++++++
 drivers/ml/cnxk/cn10k_ml_ops.c   | 29 ++++++++++++
 drivers/ml/cnxk/meson.build      |  2 +
 7 files changed, 179 insertions(+)
 create mode 100644 drivers/ml/cnxk/cn10k_ml_ocm.c
 create mode 100644 drivers/ml/cnxk/cn10k_ml_ocm.h
  

Patch

diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h
index 7cf6268115..02a4496c97 100644
--- a/drivers/ml/cnxk/cn10k_ml_dev.h
+++ b/drivers/ml/cnxk/cn10k_ml_dev.h
@@ -7,6 +7,8 @@ 
 
 #include <roc_api.h>
 
+#include "cn10k_ml_ocm.h"
+
 /* Marvell OCTEON CN10K ML PMD device name */
 #define MLDEV_NAME_CN10K_PMD ml_cn10k
 
@@ -215,6 +217,9 @@  struct cn10k_ml_dev {
 	/* Firmware */
 	struct cn10k_ml_fw fw;
 
+	/* OCM info */
+	struct cn10k_ml_ocm ocm;
+
 	/* Number of models loaded */
 	uint16_t nb_models_loaded;
 };
diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c
index dafcae106b..30911b7ffe 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.c
+++ b/drivers/ml/cnxk/cn10k_ml_model.c
@@ -8,6 +8,7 @@ 
 
 #include "cn10k_ml_dev.h"
 #include "cn10k_ml_model.h"
+#include "cn10k_ml_ocm.h"
 
 static enum rte_ml_io_type
 cn10k_ml_io_type_map(uint8_t type)
@@ -303,3 +304,55 @@  cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
 			   addr->output[i].sz_d, addr->output[i].sz_q);
 	}
 }
+
+int
+cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, int16_t model_id, uint8_t *buffer,
+			       uint16_t *wb_pages, uint16_t *scratch_pages)
+{
+	struct cn10k_ml_model_metadata *metadata;
+	struct cn10k_ml_ocm *ocm;
+	uint64_t scratch_size;
+	uint64_t wb_size;
+
+	metadata = (struct cn10k_ml_model_metadata *)buffer;
+	ocm = &mldev->ocm;
+
+	/* Assume wb_size is zero for non-relocatable models */
+	if (metadata->model.ocm_relocatable)
+		wb_size = metadata->model.ocm_wb_range_end - metadata->model.ocm_wb_range_start + 1;
+	else
+		wb_size = 0;
+
+	if (wb_size % ocm->page_size)
+		*wb_pages = wb_size / ocm->page_size + 1;
+	else
+		*wb_pages = wb_size / ocm->page_size;
+	plt_ml_dbg("model_id = %d, wb_size = %" PRIu64 ", wb_pages = %u", model_id, wb_size,
+		   *wb_pages);
+
+	scratch_size = ocm->size_per_tile - metadata->model.ocm_tmp_range_floor;
+	if (metadata->model.ocm_tmp_range_floor % ocm->page_size)
+		*scratch_pages = scratch_size / ocm->page_size + 1;
+	else
+		*scratch_pages = scratch_size / ocm->page_size;
+	plt_ml_dbg("model_id = %d, scratch_size = %" PRIu64 ", scratch_pages = %u", model_id,
+		   scratch_size, *scratch_pages);
+
+	/* Check if the model can be loaded on OCM */
+	if ((*wb_pages + *scratch_pages) > ML_CN10K_OCM_NUMPAGES) {
+		plt_err("Cannot create the model, OCM relocatable = %u",
+			metadata->model.ocm_relocatable);
+		plt_err("wb_pages (%u) + scratch_pages (%u) > %u", *wb_pages, *scratch_pages,
+			ML_CN10K_OCM_NUMPAGES);
+		return -ENOMEM;
+	}
+
+	/* Update scratch_pages to block the full tile for OCM non-relocatable model. This would
+	 * prevent the library from allocating the remaining space on the tile to other models.
+	 */
+	if (!metadata->model.ocm_relocatable)
+		*scratch_pages =
+			PLT_MAX(PLT_U64_CAST(*scratch_pages), PLT_U64_CAST(ML_CN10K_OCM_NUMPAGES));
+
+	return 0;
+}
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index 7e276c3b12..ebd296c609 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -10,6 +10,7 @@ 
 #include <roc_api.h>
 
 #include "cn10k_ml_dev.h"
+#include "cn10k_ml_ocm.h"
 
 /* Model state */
 enum cn10k_ml_model_state {
@@ -417,6 +418,9 @@  struct cn10k_ml_model {
 	/* Address structure */
 	struct cn10k_ml_model_addr addr;
 
+	/* Tile and memory information object */
+	struct cn10k_ml_ocm_model_map model_mem_map;
+
 	/* Spinlock, used to update model state */
 	plt_spinlock_t lock;
 
@@ -428,5 +432,7 @@  int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size);
 void cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata);
 void cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer,
 				uint8_t *base_dma_addr);
+int cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, int16_t model_id, uint8_t *buffer,
+				   uint16_t *wb_pages, uint16_t *scratch_pages);
 
 #endif /* _CN10K_ML_MODEL_H_ */
diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.c b/drivers/ml/cnxk/cn10k_ml_ocm.c
new file mode 100644
index 0000000000..b1c62f2963
--- /dev/null
+++ b/drivers/ml/cnxk/cn10k_ml_ocm.c
@@ -0,0 +1,5 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022 Marvell.
+ */
+
+#include "cn10k_ml_ocm.h"
diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.h b/drivers/ml/cnxk/cn10k_ml_ocm.h
new file mode 100644
index 0000000000..44390396f9
--- /dev/null
+++ b/drivers/ml/cnxk/cn10k_ml_ocm.h
@@ -0,0 +1,79 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022 Marvell.
+ */
+
+#ifndef _CN10K_ML_OCM_H_
+#define _CN10K_ML_OCM_H_
+
+#include <rte_mldev.h>
+
+/* Page size in bytes. */
+#define ML_CN10K_OCM_PAGESIZE 0x4000
+
+/* Number of OCM tiles. */
+#define ML_CN10K_OCM_NUMTILES 0x8
+
+/* OCM in bytes, per tile. */
+#define ML_CN10K_OCM_TILESIZE 0x100000
+
+/* OCM pages, per tile. */
+#define ML_CN10K_OCM_NUMPAGES (ML_CN10K_OCM_TILESIZE / ML_CN10K_OCM_PAGESIZE)
+
+/* Maximum OCM mask words, per tile, 8 bit words. */
+#define ML_CN10K_OCM_MASKWORDS (ML_CN10K_OCM_NUMPAGES / 8)
+
+/* OCM and Tile information structure */
+struct cn10k_ml_ocm_tile_info {
+	/* Mask of used / allotted pages on tile's OCM */
+	uint8_t ocm_mask[ML_CN10K_OCM_MASKWORDS];
+
+	/* Last pages in the tile's OCM used for weights and bias, default = -1 */
+	int last_wb_page;
+
+	/* Number pages used for scratch memory on the tile's OCM */
+	uint16_t scratch_pages;
+};
+
+/* Model OCM map structure */
+struct cn10k_ml_ocm_model_map {
+	/* Status of OCM reservation */
+	bool ocm_reserved;
+
+	/* Mask of OCM tiles for the model */
+	uint64_t tilemask;
+
+	/* Start page for the model load, default = -1 */
+	int wb_page_start;
+
+	/* Number of pages required for weights and bias */
+	uint16_t wb_pages;
+
+	/* Number of pages required for scratch memory */
+	uint16_t scratch_pages;
+};
+
+/* OCM state structure */
+struct cn10k_ml_ocm {
+	/* OCM spinlock, used to update OCM state */
+	rte_spinlock_t lock;
+
+	/* Number of OCM tiles */
+	uint8_t num_tiles;
+
+	/* OCM size per each tile */
+	uint64_t size_per_tile;
+
+	/* Size of OCM page */
+	uint64_t page_size;
+
+	/* Number of OCM pages */
+	uint16_t num_pages;
+
+	/* Words per OCM mask */
+	uint16_t mask_words;
+
+	/* OCM memory info and status*/
+	struct cn10k_ml_ocm_tile_info tile_ocm_info[ML_CN10K_OCM_NUMTILES];
+};
+
+#endif /* _CN10K_ML_OCM_H_ */
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index 20f15ec35d..9ccf52332f 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -126,8 +126,10 @@  cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
 	struct rte_ml_dev_info dev_info;
 	struct cn10k_ml_model *model;
 	struct cn10k_ml_dev *mldev;
+	struct cn10k_ml_ocm *ocm;
 	struct cn10k_ml_qp *qp;
 	uint32_t mz_size;
+	uint16_t tile_id;
 	int16_t model_id;
 	uint16_t qp_id;
 	int ret;
@@ -250,6 +252,18 @@  cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
 	}
 	dev->data->nb_models = conf->nb_models;
 
+	ocm = &mldev->ocm;
+	ocm->num_tiles = ML_CN10K_OCM_NUMTILES;
+	ocm->size_per_tile = ML_CN10K_OCM_TILESIZE;
+	ocm->page_size = ML_CN10K_OCM_PAGESIZE;
+	ocm->num_pages = ocm->size_per_tile / ocm->page_size;
+	ocm->mask_words = ocm->num_pages / (8 * sizeof(uint8_t));
+
+	for (tile_id = 0; tile_id < ocm->num_tiles; tile_id++)
+		ocm->tile_ocm_info[tile_id].last_wb_page = -1;
+
+	rte_spinlock_init(&ocm->lock);
+
 	mldev->nb_models_loaded = 0;
 	mldev->state = ML_CN10K_DEV_STATE_CONFIGURED;
 
@@ -416,6 +430,8 @@  cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	const struct plt_memzone *mz;
 	size_t model_data_size;
 	uint8_t *base_dma_addr;
+	uint16_t scratch_pages;
+	uint16_t wb_pages;
 	uint64_t mz_size;
 	uint16_t idx;
 	bool found;
@@ -441,6 +457,11 @@  cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 		return -ENOMEM;
 	}
 
+	/* Get WB and scratch pages, check if model can be loaded. */
+	ret = cn10k_ml_model_ocm_pages_count(mldev, idx, params->addr, &wb_pages, &scratch_pages);
+	if (ret < 0)
+		return ret;
+
 	/* Compute memzone size */
 	metadata = (struct cn10k_ml_model_metadata *)params->addr;
 	model_data_size = metadata->init_model.file_size + metadata->main_model.file_size +
@@ -478,6 +499,14 @@  cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	/* Copy data from load to run. run address to be used by MLIP */
 	rte_memcpy(model->addr.base_dma_addr_run, model->addr.base_dma_addr_load, model_data_size);
 
+	/* Initialize model_mem_map */
+	memset(&model->model_mem_map, 0, sizeof(struct cn10k_ml_ocm_model_map));
+	model->model_mem_map.ocm_reserved = false;
+	model->model_mem_map.tilemask = 0;
+	model->model_mem_map.wb_page_start = -1;
+	model->model_mem_map.wb_pages = wb_pages;
+	model->model_mem_map.scratch_pages = scratch_pages;
+
 	plt_spinlock_init(&model->lock);
 	model->state = ML_CN10K_MODEL_STATE_LOADED;
 	dev->data->models[idx] = model;
diff --git a/drivers/ml/cnxk/meson.build b/drivers/ml/cnxk/meson.build
index 799e8f2470..393bc629b0 100644
--- a/drivers/ml/cnxk/meson.build
+++ b/drivers/ml/cnxk/meson.build
@@ -11,12 +11,14 @@  driver_sdk_headers = files(
         'cn10k_ml_dev.h',
         'cn10k_ml_ops.h',
         'cn10k_ml_model.h',
+        'cn10k_ml_ocm.h',
 )
 
 sources = files(
         'cn10k_ml_dev.c',
         'cn10k_ml_ops.c',
         'cn10k_ml_model.c',
+        'cn10k_ml_ocm.c',
 )
 
 deps += ['mldev', 'common_cnxk', 'kvargs', 'hash']