From patchwork Sun Apr 23 05:08:12 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Srikanth Yalavarthi X-Patchwork-Id: 126425 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id B5128429BA; Sun, 23 Apr 2023 07:08:26 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id D932242BD9; Sun, 23 Apr 2023 07:08:22 +0200 (CEST) Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com [67.231.156.173]) by mails.dpdk.org (Postfix) with ESMTP id 105EC40A80 for ; Sun, 23 Apr 2023 07:08:21 +0200 (CEST) Received: from pps.filterd (m0045851.ppops.net [127.0.0.1]) by mx0b-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 33N4iPB5009043 for ; Sat, 22 Apr 2023 22:08:21 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-type; s=pfpt0220; bh=4lpqvncZNW/IYRnZDkegETqCGFDrReJ5ft7zaFd5eJo=; b=YHJ/OpE9q+FVqDwmJLNHtgCxa8ddBQsmh9/1hgvnsKQmtGpiIgbvqv+BBtEdoQqbrCj9 IMRSCTzT8zICcXuYwyAt4eDTli+VS74IIyQ6+O0pfLsts4XdoLKqSzuy4o3VmEQfZMTx nwZgGiLofutFdPx25zvd/x5yrGTIksi6cDLBjRUf6BYiKIRTnzMATFYWutgQVGFp1jHh h5VGcUpGLzl8zOCsiGnwy0GmJcCMF3chzWYDbMSak4O6cXdS9ccwJuz8buRRBPkxJJ7w 2NpE9oyrCfz8PGfbKAuF5NOoUxgASg8Kp38NZbJrQMQNWhOnGlK3LY4uRMOsdc2JOsiX pQ== Received: from dc5-exch01.marvell.com ([199.233.59.181]) by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3q4f3p225y-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Sat, 22 Apr 2023 22:08:21 -0700 Received: from DC5-EXCH02.marvell.com (10.69.176.39) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.48; Sat, 22 Apr 2023 22:08:19 -0700 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH02.marvell.com (10.69.176.39) with Microsoft SMTP Server id 15.0.1497.48 via Frontend Transport; Sat, 22 Apr 2023 22:08:19 -0700 Received: from ml-host-33.caveonetworks.com (unknown [10.110.143.233]) by maili.marvell.com (Postfix) with ESMTP id 96F9F3F704C; Sat, 22 Apr 2023 22:08:18 -0700 (PDT) From: Srikanth Yalavarthi To: Srikanth Yalavarthi CC: , , , Subject: [PATCH v1 1/3] ml/cnxk: split metadata fields into sections Date: Sat, 22 Apr 2023 22:08:12 -0700 Message-ID: <20230423050814.825-2-syalavarthi@marvell.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20230423050814.825-1-syalavarthi@marvell.com> References: <20230423050814.825-1-syalavarthi@marvell.com> MIME-Version: 1.0 X-Proofpoint-ORIG-GUID: 5suXOda_no1MdEr59k8t5BKXrmeGElBw X-Proofpoint-GUID: 5suXOda_no1MdEr59k8t5BKXrmeGElBw X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.254,Aquarius:18.0.942,Hydra:6.0.573,FMLib:17.11.170.22 definitions=2023-04-23_02,2023-04-21_01,2023-02-09_01 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Split metadata into header, model sections, weights & bias, input / output and data sections. This is a preparatory step to introduce v2301 of model metadata. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_model.c | 26 +- drivers/ml/cnxk/cn10k_ml_model.h | 487 ++++++++++++++++--------------- 2 files changed, 270 insertions(+), 243 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c index 2ded05c5dc..c0b7b061f5 100644 --- a/drivers/ml/cnxk/cn10k_ml_model.c +++ b/drivers/ml/cnxk/cn10k_ml_model.c @@ -47,42 +47,42 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size) metadata = (struct cn10k_ml_model_metadata *)buffer; /* Header CRC check */ - if (metadata->metadata_header.header_crc32c != 0) { - header_crc32c = rte_hash_crc( - buffer, sizeof(metadata->metadata_header) - sizeof(uint32_t), 0); + if (metadata->header.header_crc32c != 0) { + header_crc32c = + rte_hash_crc(buffer, sizeof(metadata->header) - sizeof(uint32_t), 0); - if (header_crc32c != metadata->metadata_header.header_crc32c) { + if (header_crc32c != metadata->header.header_crc32c) { plt_err("Invalid model, Header CRC mismatch"); return -EINVAL; } } /* Payload CRC check */ - if (metadata->metadata_header.payload_crc32c != 0) { - payload_crc32c = rte_hash_crc(buffer + sizeof(metadata->metadata_header), - size - sizeof(metadata->metadata_header), 0); + if (metadata->header.payload_crc32c != 0) { + payload_crc32c = rte_hash_crc(buffer + sizeof(metadata->header), + size - sizeof(metadata->header), 0); - if (payload_crc32c != metadata->metadata_header.payload_crc32c) { + if (payload_crc32c != metadata->header.payload_crc32c) { plt_err("Invalid model, Payload CRC mismatch"); return -EINVAL; } } /* Model magic string */ - if (strncmp((char *)metadata->metadata_header.magic, MRVL_ML_MODEL_MAGIC_STRING, 4) != 0) { - plt_err("Invalid model, magic = %s", metadata->metadata_header.magic); + if (strncmp((char *)metadata->header.magic, MRVL_ML_MODEL_MAGIC_STRING, 4) != 0) { + plt_err("Invalid model, magic = %s", metadata->header.magic); return -EINVAL; } /* Target architecture */ - if (metadata->metadata_header.target_architecture != MRVL_ML_MODEL_TARGET_ARCH) { + if (metadata->header.target_architecture != MRVL_ML_MODEL_TARGET_ARCH) { plt_err("Model target architecture (%u) not supported", - metadata->metadata_header.target_architecture); + metadata->header.target_architecture); return -ENOTSUP; } /* Header version */ - rte_memcpy(version, metadata->metadata_header.version, 4 * sizeof(uint8_t)); + rte_memcpy(version, metadata->header.version, 4 * sizeof(uint8_t)); if (version[0] * 1000 + version[1] * 100 < MRVL_ML_MODEL_VERSION) { plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported", version[0], version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION / 1000) % 10, diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h index 1bc748265d..b30ad5a981 100644 --- a/drivers/ml/cnxk/cn10k_ml_model.h +++ b/drivers/ml/cnxk/cn10k_ml_model.h @@ -30,298 +30,325 @@ enum cn10k_ml_model_state { #define MRVL_ML_OUTPUT_NAME_LEN 16 #define MRVL_ML_INPUT_OUTPUT_SIZE 8 -/* Model file metadata structure */ -struct cn10k_ml_model_metadata { - /* Header (256-byte) */ - struct { - /* Magic string ('M', 'R', 'V', 'L') */ - uint8_t magic[4]; +/* Header (256-byte) */ +struct cn10k_ml_model_metadata_header { + /* Magic string ('M', 'R', 'V', 'L') */ + uint8_t magic[4]; - /* Metadata version */ - uint8_t version[4]; + /* Metadata version */ + uint8_t version[4]; - /* Metadata size */ - uint32_t metadata_size; + /* Metadata size */ + uint32_t metadata_size; - /* Unique ID */ - uint8_t uuid[128]; + /* Unique ID */ + uint8_t uuid[128]; - /* Model target architecture - * 0 = Undefined - * 1 = M1K - * 128 = MLIP - * 256 = Experimental - */ - uint32_t target_architecture; - uint8_t reserved[104]; + /* Model target architecture + * 0 = Undefined + * 1 = M1K + * 128 = MLIP + * 256 = Experimental + */ + uint32_t target_architecture; + uint8_t reserved[104]; - /* CRC of data after metadata_header (i.e. after first 256 bytes) */ - uint32_t payload_crc32c; + /* CRC of data after header (i.e. after first 256 bytes) */ + uint32_t payload_crc32c; - /* CRC of first 252 bytes of metadata_header, after payload_crc calculation */ - uint32_t header_crc32c; - } metadata_header; + /* CRC of first 252 bytes of header, after payload_crc calculation */ + uint32_t header_crc32c; +}; - /* Model information (256-byte) */ - struct { - /* Model name string */ - uint8_t name[MRVL_ML_MODEL_NAME_LEN]; +/* Model information (256-byte) */ +struct cn10k_ml_model_metadata_model { + /* Model name string */ + uint8_t name[MRVL_ML_MODEL_NAME_LEN]; - /* Model version info (xx.xx.xx.xx) */ - uint8_t version[4]; + /* Model version info (xx.xx.xx.xx) */ + uint8_t version[4]; - /* Model code size (Init + Main + Finish) */ - uint32_t code_size; + /* Model code size (Init + Main + Finish) */ + uint32_t code_size; - /* Model data size (Weights and Bias) */ - uint32_t data_size; + /* Model data size (Weights and Bias) */ + uint32_t data_size; - /* OCM start offset, set to ocm_wb_range_start */ - uint32_t ocm_start; + /* OCM start offset, set to ocm_wb_range_start */ + uint32_t ocm_start; - /* OCM start offset, set to max OCM size */ - uint32_t ocm_end; + /* OCM start offset, set to max OCM size */ + uint32_t ocm_end; - /* Relocatable flag (always yes) - * 0 = Not relocatable - * 1 = Relocatable - */ - uint8_t ocm_relocatable; + /* Relocatable flag (always yes) + * 0 = Not relocatable + * 1 = Relocatable + */ + uint8_t ocm_relocatable; - /* Tile relocatable flag (always yes) - * 0 = Not relocatable - * 1 = Relocatable - */ - uint8_t tile_relocatable; + /* Tile relocatable flag (always yes) + * 0 = Not relocatable + * 1 = Relocatable + */ + uint8_t tile_relocatable; - /* Start tile (Always 0) */ - uint8_t tile_start; + /* Start tile (Always 0) */ + uint8_t tile_start; - /* End tile (num_tiles - 1) */ - uint8_t tile_end; + /* End tile (num_tiles - 1) */ + uint8_t tile_end; - /* Inference batch size */ - uint8_t batch_size; + /* Inference batch size */ + uint8_t batch_size; - /* Number of input tensors (Max 8) */ - uint8_t num_input; + /* Number of input tensors (Max 8) */ + uint8_t num_input; - /* Number of output tensors (Max 8) */ - uint8_t num_output; - uint8_t reserved1; + /* Number of output tensors (Max 8) */ + uint8_t num_output; + uint8_t reserved_1; - /* Total input size in bytes */ - uint32_t input_size; + /* Total input size in bytes */ + uint32_t input_size; - /* Total output size in bytes */ - uint32_t output_size; + /* Total output size in bytes */ + uint32_t output_size; - /* Table size in bytes */ - uint32_t table_size; + /* Table size in bytes */ + uint32_t table_size; - /* Number of layers in the network */ - uint32_t num_layers; - uint32_t reserved2; + /* Number of layers in the network */ + uint32_t num_layers; + uint32_t reserved_2; - /* Floor of absolute OCM region */ - uint64_t ocm_tmp_range_floor; + /* Floor of absolute OCM region */ + uint64_t ocm_tmp_range_floor; - /* Relative OCM start address of WB data block */ - uint64_t ocm_wb_range_start; + /* Relative OCM start address of WB data block */ + uint64_t ocm_wb_range_start; - /* Relative OCM end address of WB data block */ - uint64_t ocm_wb_range_end; + /* Relative OCM end address of WB data block */ + uint64_t ocm_wb_range_end; - /* Relative DDR start address of WB data block */ - uint64_t ddr_wb_range_start; + /* Relative DDR start address of WB data block */ + uint64_t ddr_wb_range_start; - /* Relative DDR end address of all outputs */ - uint64_t ddr_wb_range_end; + /* Relative DDR end address of all outputs */ + uint64_t ddr_wb_range_end; - /* Relative DDR start address of all inputs */ - uint64_t ddr_input_range_start; + /* Relative DDR start address of all inputs */ + uint64_t ddr_input_range_start; - /* Relative DDR end address of all inputs */ - uint64_t ddr_input_range_end; + /* Relative DDR end address of all inputs */ + uint64_t ddr_input_range_end; - /* Relative DDR start address of all outputs */ - uint64_t ddr_output_range_start; + /* Relative DDR start address of all outputs */ + uint64_t ddr_output_range_start; - /* Relative DDR end address of all outputs */ - uint64_t ddr_output_range_end; + /* Relative DDR end address of all outputs */ + uint64_t ddr_output_range_end; - /* Compiler version */ - uint8_t compiler_version[8]; + /* Compiler version */ + uint8_t compiler_version[8]; - /* CDK version */ - uint8_t cdk_version[4]; + /* CDK version */ + uint8_t cdk_version[4]; - /* Lower batch optimization support - * 0 - No, - * 1 - Yes - */ - uint8_t supports_lower_batch_size_optimization; - uint8_t reserved3[59]; - } model; + /* Lower batch optimization support + * 0 - No, + * 1 - Yes + */ + uint8_t supports_lower_batch_size_optimization; + uint8_t reserved_3[59]; +}; - /* Init section (64-byte) */ - struct { - uint32_t file_offset; - uint32_t file_size; - uint8_t reserved[56]; - } init_model; +/* Init section (64-byte) */ +struct cn10k_ml_model_metadata_init_section { + uint32_t file_offset; + uint32_t file_size; + uint8_t reserved[56]; +}; - /* Main section (64-byte) */ - struct { - uint32_t file_offset; - uint32_t file_size; - uint8_t reserved[56]; - } main_model; +/* Main section (64-byte) */ +struct cn10k_ml_model_metadata_main_section { + uint32_t file_offset; + uint32_t file_size; + uint8_t reserved[56]; +}; - /* Finish section (64-byte) */ - struct { - uint32_t file_offset; - uint32_t file_size; - uint8_t reserved[56]; - } finish_model; +/* Finish section (64-byte) */ +struct cn10k_ml_model_metadata_finish_section { + uint32_t file_offset; + uint32_t file_size; + uint8_t reserved[56]; +}; - uint8_t reserved1[512]; /* End of 2k bytes */ +/* Weights and Bias (64-byte) */ +struct cn10k_ml_model_metadata_weights_bias_section { + /* Memory offset, set to ddr_wb_range_start */ + uint64_t mem_offset; + uint32_t file_offset; + uint32_t file_size; - /* Weights and Bias (64-byte) */ + /* Relocatable flag for WB + * 1 = Relocatable + * 2 = Not relocatable + */ + uint8_t relocatable; + uint8_t reserved[47]; +}; + +/* Input section (64-byte per input) */ +struct cn10k_ml_model_metadata_input_section { + /* DDR offset (in OCM absolute addresses for input) */ + uint64_t mem_offset; + + /* Relocatable flag + * 1 = Relocatable + * 2 = Not relocatable + */ + uint8_t relocatable; + + /* Input quantization + * 1 = Requires quantization + * 2 = Pre-quantized + */ + uint8_t quantize; + + /* Type of incoming input + * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16, + * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32 + */ + uint8_t input_type; + + /* Type of input required by model + * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16, + * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32 + */ + uint8_t model_input_type; + + /* float_32 qscale value + * quantized = non-quantized * qscale + */ + float qscale; + + /* Input shape */ struct { - /* Memory offset, set to ddr_wb_range_start */ - uint64_t mem_offset; - uint32_t file_offset; - uint32_t file_size; - - /* Relocatable flag for WB - * 1 = Relocatable - * 2 = Not relocatable + /* Input format + * 1 = NCHW + * 2 = NHWC */ - uint8_t relocatable; - uint8_t reserved[47]; - } weights_bias; + uint8_t format; + uint8_t reserved[3]; + uint32_t w; + uint32_t x; + uint32_t y; + uint32_t z; + } shape; + uint8_t reserved[4]; + + /* Name of input */ + uint8_t input_name[MRVL_ML_INPUT_NAME_LEN]; + + /* DDR range end + * new = mem_offset + size_bytes - 1 + */ + uint64_t ddr_range_end; +}; - /* Input (512-byte, 64-byte per input) provisioned for 8 inputs */ - struct { - /* DDR offset (in OCM absolute addresses for input) */ - uint64_t mem_offset; +/* Output section (64-byte per output) */ +struct cn10k_ml_model_metadata_output_section { + /* DDR offset in OCM absolute addresses for output */ + uint64_t mem_offset; - /* Relocatable flag - * 1 = Relocatable - * 2 = Not relocatable - */ - uint8_t relocatable; + /* Relocatable flag + * 1 = Relocatable + * 2 = Not relocatable + */ + uint8_t relocatable; - /* Input quantization - * 1 = Requires quantization - * 2 = Pre-quantized - */ - uint8_t quantize; + /* Output dequantization + * 1 = De-quantization required + * 2 = De-quantization not required + */ + uint8_t dequantize; - /* Type of incoming input - * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16, - * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32 - */ - uint8_t input_type; + /* Type of outgoing output + * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16 + * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32 + */ + uint8_t output_type; - /* Type of input required by model - * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16, - * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32 - */ - uint8_t model_input_type; + /* Type of output produced by model + * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16 + * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32 + */ + uint8_t model_output_type; - /* float_32 qscale value - * quantized = non-quantized * qscale - */ - float qscale; - - /* Input shape */ - struct { - /* Input format - * 1 = NCHW - * 2 = NHWC - */ - uint8_t format; - uint8_t reserved[3]; - uint32_t w; - uint32_t x; - uint32_t y; - uint32_t z; - } shape; - uint8_t reserved[4]; - - /* Name of input */ - uint8_t input_name[MRVL_ML_INPUT_NAME_LEN]; - - /* DDR range end - * new = mem_offset + size_bytes - 1 - */ - uint64_t ddr_range_end; - } input[MRVL_ML_INPUT_OUTPUT_SIZE]; + /* float_32 dscale value + * dequantized = quantized * dscale + */ + float dscale; - /* Output (512 byte, 64-byte per input) provisioned for 8 outputs */ - struct { - /* DDR offset in OCM absolute addresses for output */ - uint64_t mem_offset; + /* Number of items in the output */ + uint32_t size; + uint8_t reserved[20]; - /* Relocatable flag - * 1 = Relocatable - * 2 = Not relocatable - */ - uint8_t relocatable; + /* DDR range end + * new = mem_offset + size_bytes - 1 + */ + uint64_t ddr_range_end; + uint8_t output_name[MRVL_ML_OUTPUT_NAME_LEN]; +}; - /* Output dequantization - * 1 = De-quantization required - * 2 = De-quantization not required - */ - uint8_t dequantize; +/* Model data */ +struct cn10k_ml_model_metadata_data_section { + uint8_t reserved[4068]; - /* Type of outgoing output - * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16 - * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32 - */ - uint8_t output_type; + /* Beta: xx.xx.xx.xx, + * Later: YYYYMM.xx.xx + */ + uint8_t compiler_version[8]; - /* Type of output produced by model - * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16 - * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32 - */ - uint8_t model_output_type; + /* M1K CDK version (xx.xx.xx.xx) */ + uint8_t m1k_cdk_version[4]; +}; - /* float_32 dscale value - * dequantized = quantized * dscale - */ - float dscale; +/* Model file metadata structure */ +struct cn10k_ml_model_metadata { + /* Header (256-byte) */ + struct cn10k_ml_model_metadata_header header; - /* Number of items in the output */ - uint32_t size; - uint8_t reserved[20]; + /* Model information (256-byte) */ + struct cn10k_ml_model_metadata_model model; - /* DDR range end - * new = mem_offset + size_bytes - 1 - */ - uint64_t ddr_range_end; - uint8_t output_name[MRVL_ML_OUTPUT_NAME_LEN]; - } output[MRVL_ML_INPUT_OUTPUT_SIZE]; + /* Init section (64-byte) */ + struct cn10k_ml_model_metadata_init_section init_model; - uint8_t reserved2[1792]; + /* Main section (64-byte) */ + struct cn10k_ml_model_metadata_main_section main_model; - /* Model data */ - struct { - uint8_t reserved1[4068]; + /* Finish section (64-byte) */ + struct cn10k_ml_model_metadata_finish_section finish_model; - /* Beta: xx.xx.xx.xx, - * Later: YYYYMM.xx.xx - */ - uint8_t compiler_version[8]; + uint8_t reserved_1[512]; /* End of 2k bytes */ + + /* Weights and Bias (64-byte) */ + struct cn10k_ml_model_metadata_weights_bias_section weights_bias; + + /* Input (512-bytes, 64-byte per input) provisioned for 8 inputs */ + struct cn10k_ml_model_metadata_input_section input[MRVL_ML_INPUT_OUTPUT_SIZE]; + + /* Output (512-bytes, 64-byte per output) provisioned for 8 outputs */ + struct cn10k_ml_model_metadata_output_section output[MRVL_ML_INPUT_OUTPUT_SIZE]; - /* M1K CDK version (xx.xx.xx.xx) */ - uint8_t m1k_cdk_version[4]; - } data; + uint8_t reserved_2[1792]; + + /* Model data */ + struct cn10k_ml_model_metadata_data_section data; /* Hidden 16 bytes of magic code */ - uint8_t reserved3[16]; + uint8_t reserved_3[16]; }; /* Model address structure */