@@ -2,4 +2,200 @@
* Copyright (c) 2022 Marvell.
*/
+#include <rte_hash_crc.h>
+
+#include <ml_utils.h>
+
+#include "cn10k_ml_dev.h"
#include "cn10k_ml_model.h"
+
+static enum rte_ml_io_type
+cn10k_ml_io_type_map(uint8_t type)
+{
+ switch (type) {
+ case 1:
+ return RTE_ML_IO_TYPE_INT8;
+ case 2:
+ return RTE_ML_IO_TYPE_UINT8;
+ case 3:
+ return RTE_ML_IO_TYPE_INT16;
+ case 4:
+ return RTE_ML_IO_TYPE_UINT16;
+ case 5:
+ return RTE_ML_IO_TYPE_INT32;
+ case 6:
+ return RTE_ML_IO_TYPE_UINT32;
+ case 7:
+ return RTE_ML_IO_TYPE_FP16;
+ case 8:
+ return RTE_ML_IO_TYPE_FP32;
+ }
+
+ return RTE_ML_IO_TYPE_UNKNOWN;
+}
+
+int
+cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
+{
+ struct cn10k_ml_model_metadata *metadata;
+ uint32_t payload_crc32c;
+ uint32_t header_crc32c;
+ uint8_t version[4];
+ uint8_t i;
+
+ metadata = (struct cn10k_ml_model_metadata *)buffer;
+
+ /* Header CRC check */
+ if (metadata->metadata_header.header_crc32c != 0) {
+ header_crc32c = rte_hash_crc(
+ buffer, sizeof(metadata->metadata_header) - sizeof(uint32_t), 0);
+
+ if (header_crc32c != metadata->metadata_header.header_crc32c) {
+ plt_err("Invalid model, Header CRC mismatch");
+ return -EINVAL;
+ }
+ }
+
+ /* Payload CRC check */
+ if (metadata->metadata_header.payload_crc32c != 0) {
+ payload_crc32c = rte_hash_crc(buffer + sizeof(metadata->metadata_header),
+ size - sizeof(metadata->metadata_header), 0);
+
+ if (payload_crc32c != metadata->metadata_header.payload_crc32c) {
+ plt_err("Invalid model, Payload CRC mismatch");
+ return -EINVAL;
+ }
+ }
+
+ /* Model magic string */
+ if (strncmp((char *)metadata->metadata_header.magic, MRVL_ML_MODEL_MAGIC_STRING, 4) != 0) {
+ plt_err("Invalid model, magic = %s", metadata->metadata_header.magic);
+ return -EINVAL;
+ }
+
+ /* Target architecture */
+ if (metadata->metadata_header.target_architecture != MRVL_ML_MODEL_TARGET_ARCH) {
+ plt_err("Model target architecture (%u) not supported",
+ metadata->metadata_header.target_architecture);
+ return -ENOTSUP;
+ }
+
+ /* Header version */
+ memcpy(version, metadata->metadata_header.version, 4 * sizeof(uint8_t));
+ if (version[0] * 1000 + version[1] * 100 < MRVL_ML_MODEL_VERSION) {
+ plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported", version[0],
+ version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION / 1000) % 10,
+ (MRVL_ML_MODEL_VERSION / 100) % 10, (MRVL_ML_MODEL_VERSION / 10) % 10,
+ MRVL_ML_MODEL_VERSION % 10);
+ return -ENOTSUP;
+ }
+
+ /* Init section */
+ if (metadata->init_model.file_size == 0) {
+ plt_err("Invalid metadata, init_model.file_size = %u",
+ metadata->init_model.file_size);
+ return -EINVAL;
+ }
+
+ /* Main section */
+ if (metadata->main_model.file_size == 0) {
+ plt_err("Invalid metadata, main_model.file_size = %u",
+ metadata->main_model.file_size);
+ return -EINVAL;
+ }
+
+ /* Finish section */
+ if (metadata->finish_model.file_size == 0) {
+ plt_err("Invalid metadata, finish_model.file_size = %u",
+ metadata->finish_model.file_size);
+ return -EINVAL;
+ }
+
+ /* Weights and Bias */
+ if (metadata->weights_bias.file_size == 0) {
+ plt_err("Invalid metadata, weights_bias.file_size = %u",
+ metadata->weights_bias.file_size);
+ return -EINVAL;
+ }
+
+ if (metadata->weights_bias.relocatable != 1) {
+ plt_err("Model not supported, non-relocatable weights and bias");
+ return -ENOTSUP;
+ }
+
+ /* Inputs */
+ for (i = 0; i < metadata->model.num_input; i++) {
+ if (ml_io_type_size_get(cn10k_ml_io_type_map(metadata->input[i].input_type)) <= 0) {
+ plt_err("Invalid metadata, input[%u] : input_type = %u", i,
+ metadata->input[i].input_type);
+ return -EINVAL;
+ }
+
+ if (ml_io_type_size_get(
+ cn10k_ml_io_type_map(metadata->input[i].model_input_type)) <= 0) {
+ plt_err("Invalid metadata, input[%u] : model_input_type = %u", i,
+ metadata->input[i].model_input_type);
+ return -EINVAL;
+ }
+
+ if (metadata->input[i].relocatable != 1) {
+ plt_err("Model not supported, non-relocatable input: %u", i);
+ return -ENOTSUP;
+ }
+ }
+
+ /* Outputs */
+ for (i = 0; i < metadata->model.num_output; i++) {
+ if (ml_io_type_size_get(cn10k_ml_io_type_map(metadata->output[i].output_type)) <=
+ 0) {
+ plt_err("Invalid metadata, output[%u] : output_type = %u", i,
+ metadata->output[i].output_type);
+ return -EINVAL;
+ }
+
+ if (ml_io_type_size_get(
+ cn10k_ml_io_type_map(metadata->output[i].model_output_type)) <= 0) {
+ plt_err("Invalid metadata, output[%u] : model_output_type = %u", i,
+ metadata->output[i].model_output_type);
+ return -EINVAL;
+ }
+
+ if (metadata->output[i].relocatable != 1) {
+ plt_err("Model not supported, non-relocatable output: %u", i);
+ return -ENOTSUP;
+ }
+ }
+
+ return 0;
+}
+
+void
+cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata)
+{
+ uint8_t i;
+
+ for (i = 0; i < metadata->model.num_input; i++) {
+ metadata->input[i].input_type = cn10k_ml_io_type_map(metadata->input[i].input_type);
+ metadata->input[i].model_input_type =
+ cn10k_ml_io_type_map(metadata->input[i].model_input_type);
+
+ if (metadata->input[i].shape.w == 0)
+ metadata->input[i].shape.w = 1;
+
+ if (metadata->input[i].shape.x == 0)
+ metadata->input[i].shape.x = 1;
+
+ if (metadata->input[i].shape.y == 0)
+ metadata->input[i].shape.y = 1;
+
+ if (metadata->input[i].shape.z == 0)
+ metadata->input[i].shape.z = 1;
+ }
+
+ for (i = 0; i < metadata->model.num_output; i++) {
+ metadata->output[i].output_type =
+ cn10k_ml_io_type_map(metadata->output[i].output_type);
+ metadata->output[i].model_output_type =
+ cn10k_ml_io_type_map(metadata->output[i].model_output_type);
+ }
+}
@@ -22,6 +22,309 @@ enum cn10k_ml_model_state {
ML_CN10K_MODEL_STATE_UNKNOWN,
};
+/* Model Metadata : v 2.1.0.2 */
+#define MRVL_ML_MODEL_MAGIC_STRING "MRVL"
+#define MRVL_ML_MODEL_TARGET_ARCH 128
+#define MRVL_ML_MODEL_VERSION 2100
+#define MRVL_ML_MODEL_NAME_LEN 64
+#define MRVL_ML_INPUT_NAME_LEN 16
+#define MRVL_ML_OUTPUT_NAME_LEN 16
+#define MRVL_ML_INPUT_OUTPUT_SIZE 8
+
+/* Model file metadata structure */
+struct cn10k_ml_model_metadata {
+ /* Header (256-byte) */
+ struct {
+ /* Magic string ('M', 'R', 'V', 'L') */
+ uint8_t magic[4];
+
+ /* Metadata version */
+ uint8_t version[4];
+
+ /* Metadata size */
+ uint32_t metadata_size;
+
+ /* Unique ID */
+ uint8_t uuid[128];
+
+ /* Model target architecture
+ * 0 = Undefined
+ * 1 = M1K
+ * 128 = MLIP
+ * 256 = Experimental
+ */
+ uint32_t target_architecture;
+ uint8_t reserved[104];
+
+ /* CRC of data after metadata_header (i.e. after first 256 bytes) */
+ uint32_t payload_crc32c;
+
+ /* CRC of first 252 bytes of metadata_header, after payload_crc calculation */
+ uint32_t header_crc32c;
+ } metadata_header;
+
+ /* Model information (256-byte) */
+ struct {
+ /* Model name string */
+ uint8_t name[MRVL_ML_MODEL_NAME_LEN];
+
+ /* Model version info (xx.xx.xx.xx) */
+ uint8_t version[4];
+
+ /* Model code size (init + main + finish) */
+ uint32_t code_size;
+
+ /* Model data size (Weights and Bias) */
+ uint32_t data_size;
+
+ /* OCM start offset, set to ocm_wb_range_start */
+ uint32_t ocm_start;
+
+ /* OCM start offset, set to max OCM size */
+ uint32_t ocm_end;
+
+ /* Relocatable flag (always yes)
+ * 0 = Not relocatable
+ * 1 = Relocatable
+ */
+ uint8_t ocm_relocatable;
+
+ /* Tile relocatable flag (always yes)
+ * 0 = Not relocatable
+ * 1 = Relocatable
+ */
+ uint8_t tile_relocatable;
+
+ /* Start tile (Always 0) */
+ uint8_t tile_start;
+
+ /* End tile (num_tiles - 1) */
+ uint8_t tile_end;
+
+ /* Inference batch size */
+ uint8_t batch_size;
+
+ /* Number of input tensors (Max 8) */
+ uint8_t num_input;
+
+ /* Number of output tensors (Max 8) */
+ uint8_t num_output;
+ uint8_t reserved1;
+
+ /* Total input size in bytes */
+ uint32_t input_size;
+
+ /* Total output size in bytes */
+ uint32_t output_size;
+
+ /* Table size in bytes */
+ uint32_t table_size;
+
+ /* Number of layers in the network */
+ uint32_t num_layers;
+ uint32_t reserved2;
+
+ /* Floor of absolute OCM region */
+ uint64_t ocm_tmp_range_floor;
+
+ /* Relative OCM start address of WB data block */
+ uint64_t ocm_wb_range_start;
+
+ /* Relative OCM end address of WB data block */
+ uint64_t ocm_wb_range_end;
+
+ /* Relative DDR start address of WB data block */
+ uint64_t ddr_wb_range_start;
+
+ /* Relative DDR end address of all outputs */
+ uint64_t ddr_wb_range_end;
+
+ /* Relative DDR start address of all inputs */
+ uint64_t ddr_input_range_start;
+
+ /* Relative DDR end address of all inputs */
+ uint64_t ddr_input_range_end;
+
+ /* Relative DDR start address of all outputs */
+ uint64_t ddr_output_range_start;
+
+ /* Relative ddr end address of all outputs */
+ uint64_t ddr_output_range_end;
+
+ /* Compiler version */
+ uint8_t compiler_version[8];
+
+ /* CDK version */
+ uint8_t cdk_version[4];
+
+ /* Lower batch optimization support
+ * 0 - No,
+ * 1 - Yes
+ */
+ uint8_t supports_lower_batch_size_optimization;
+ uint8_t reserved[59];
+ } model;
+
+ /* Init section (64-byte) */
+ struct {
+ uint32_t file_offset;
+ uint32_t file_size;
+ uint8_t reserved[56];
+ } init_model;
+
+ /* Main section (64-byte) */
+ struct {
+ uint32_t file_offset;
+ uint32_t file_size;
+ uint8_t reserved[56];
+ } main_model;
+
+ /* Finish section (64-byte) */
+ struct {
+ uint32_t file_offset;
+ uint32_t file_size;
+ uint8_t reserved[56];
+ } finish_model;
+
+ uint8_t reserved1[512]; /* End of 2k bytes */
+
+ /* Weights and Biases (64-byte) */
+ struct {
+ /* Memory offset, Set to ddr_wb_range_start */
+ uint64_t mem_offset;
+ uint32_t file_offset;
+ uint32_t file_size;
+
+ /* Relocatable flag for WB
+ * 1 = Relocatable
+ * 2 = Not relocatable
+ */
+ uint8_t relocatable;
+ uint8_t reserved[47];
+ } weights_bias;
+
+ /* Input (512-byte, 64-byte per input) provisioned for 8 inputs */
+ struct {
+ /* DDR offset (in ocm absolute addresses for input) */
+ uint64_t mem_offset;
+
+ /* Relocatable flag
+ * 1 = Relocatable
+ * 2 = Not relocatable
+ */
+ uint8_t relocatable;
+
+ /* Input quantization
+ * 1 = Requires quantization
+ * 2 = Pre-quantized
+ */
+ uint8_t quantize;
+
+ /* Type of incoming input
+ * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
+ * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+ */
+ uint8_t input_type;
+
+ /* Type of input required by model
+ * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
+ * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+ */
+ uint8_t model_input_type;
+
+ /* float_32 qscale value
+ * quantized = non-quantized * qscale
+ */
+ float qscale;
+
+ /* Input shape */
+ struct {
+ /* Input format
+ * 1 = NCHW
+ * 2 = NHWC
+ */
+ uint8_t format;
+ uint8_t reserved[3];
+ uint32_t w;
+ uint32_t x;
+ uint32_t y;
+ uint32_t z;
+ } shape;
+ uint8_t reserved[4];
+
+ /* Name of input */
+ uint8_t input_name[MRVL_ML_INPUT_NAME_LEN];
+
+ /* DDR range end
+ * new = mem_offset + size_bytes - 1
+ */
+ uint64_t ddr_range_end;
+ } input[MRVL_ML_INPUT_OUTPUT_SIZE];
+
+ /* Output (512 byte, 64-byte per input) provisioned for 8 outputs */
+ struct {
+ /* DDR offset in ocm absolute addresses for output */
+ uint64_t mem_offset;
+
+ /* Relocatable flag
+ * 1 = Relocatable
+ * 2 = Not relocatable
+ */
+ uint8_t relocatable;
+
+ /* Output dequantization
+ * 1 = De-quantization required
+ * 2 = De-quantization not required
+ */
+ uint8_t dequantize;
+
+ /* Type of outgoing output
+ * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
+ * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+ */
+ uint8_t output_type;
+
+ /* Type of output produced by model
+ * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
+ * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+ */
+ uint8_t model_output_type;
+
+ /* float_32 dscale value
+ * dequantized = quantized * dscale
+ */
+ float dscale;
+
+ /* Number of items in the output */
+ uint32_t size;
+ uint8_t reserved[20];
+
+ /* DDR range end
+ * new = mem_offset + size_bytes - 1
+ */
+ uint64_t ddr_range_end;
+ uint8_t output_name[MRVL_ML_OUTPUT_NAME_LEN];
+ } output[MRVL_ML_INPUT_OUTPUT_SIZE];
+
+ uint8_t reserved2[1792];
+
+ /* Model data */
+ struct {
+ uint8_t reserved1[4068];
+
+ /* Beta: xx.xx.xx.xx,
+ * Later: YYYYMM.xx.xx
+ */
+ uint8_t compiler_version[8];
+
+ /* M1K CDK version (xx.xx.xx.xx) */
+ uint8_t m1k_cdk_version[4];
+ } data;
+
+ /* Hidden 16 bytes of magic code */
+ uint8_t reserved3[16];
+};
+
/* ML Model Object */
struct cn10k_ml_model {
/* Device reference */
@@ -33,6 +336,12 @@ struct cn10k_ml_model {
/* Model ID */
int16_t model_id;
+ /* Batch size */
+ uint32_t batch_size;
+
+ /* Model metadata */
+ struct cn10k_ml_model_metadata metadata;
+
/* Model lock, used to update model state */
plt_spinlock_t lock;
@@ -40,4 +349,7 @@ struct cn10k_ml_model {
enum cn10k_ml_model_state state;
};
+int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size);
+void cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata);
+
#endif /* _CN10K_ML_MODEL_H_ */
@@ -416,8 +416,11 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
uint64_t mz_size;
uint16_t idx;
bool found;
+ int ret;
- PLT_SET_USED(params);
+ ret = cn10k_ml_model_metadata_check(params->addr, params->size);
+ if (ret != 0)
+ return ret;
mldev = dev->data->dev_private;
@@ -450,6 +453,15 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
model->mldev = mldev;
model->model_id = idx;
+ memcpy(&model->metadata, params->addr, sizeof(struct cn10k_ml_model_metadata));
+ cn10k_ml_model_metadata_update(&model->metadata);
+
+ /* Enable support for batch_size of 256 */
+ if (model->metadata.model.batch_size == 0)
+ model->batch_size = 256;
+ else
+ model->batch_size = model->metadata.model.batch_size;
+
plt_spinlock_init(&model->lock);
model->state = ML_CN10K_MODEL_STATE_LOADED;
dev->data->models[idx] = model;
@@ -19,7 +19,7 @@ headers = files(
'cn10k_ml_model.h',
)
-deps += ['mldev', 'common_ml', 'common_cnxk', 'kvargs']
+deps += ['mldev', 'common_ml', 'common_cnxk', 'kvargs', 'hash']
if get_option('buildtype').contains('debug')
cflags += [ '-DCNXK_ML_DEV_DEBUG' ]