[v5,15/39] ml/cnxk: add structures for slow and fast path JDs

Message ID 20230207160719.1307-16-syalavarthi@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Implementation of ML CNXK driver |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Srikanth Yalavarthi Feb. 7, 2023, 4:06 p.m. UTC
  Added JD structures for load, unload and run jobs. Initialize
job command and allocate memory for request structures for slow
path jobs.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_dev.h   | 99 ++++++++++++++++++++++++++++++++
 drivers/ml/cnxk/cn10k_ml_model.h |  4 ++
 drivers/ml/cnxk/cn10k_ml_ops.c   | 19 +++++-
 drivers/ml/cnxk/cn10k_ml_ops.h   |  4 ++
 4 files changed, 125 insertions(+), 1 deletion(-)
  

Patch

diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h
index 02a4496c97..68fcc957fa 100644
--- a/drivers/ml/cnxk/cn10k_ml_dev.h
+++ b/drivers/ml/cnxk/cn10k_ml_dev.h
@@ -188,6 +188,105 @@  struct cn10k_ml_jd {
 
 			uint8_t rsvd[8];
 		} fw_load;
+
+		struct cn10k_ml_jd_section_model_start {
+			/* Source model start address in DDR relative to ML_MLR_BASE */
+			uint64_t model_src_ddr_addr;
+
+			/* Destination model start address in DDR relative to ML_MLR_BASE */
+			uint64_t model_dst_ddr_addr;
+
+			/* Offset to model init section in the model */
+			uint64_t model_init_offset : 32;
+
+			/* Size of init section in the model */
+			uint64_t model_init_size : 32;
+
+			/* Offset to model main section in the model */
+			uint64_t model_main_offset : 32;
+
+			/* Size of main section in the model */
+			uint64_t model_main_size : 32;
+
+			/* Offset to model finish section in the model */
+			uint64_t model_finish_offset : 32;
+
+			/* Size of finish section in the model */
+			uint64_t model_finish_size : 32;
+
+			/* Offset to WB in model bin */
+			uint64_t model_wb_offset : 32;
+
+			/* Number of model layers */
+			uint64_t num_layers : 8;
+
+			/* Number of gather entries, 0 means linear input mode (= no gather) */
+			uint64_t num_gather_entries : 8;
+
+			/* Number of scatter entries 0 means linear input mode (= no scatter) */
+			uint64_t num_scatter_entries : 8;
+
+			/* Tile mask to load model */
+			uint64_t tilemask : 8;
+
+			/* Batch size of model  */
+			uint64_t batch_size : 32;
+
+			/* OCM WB base address */
+			uint64_t ocm_wb_base_address : 32;
+
+			/* OCM WB range start */
+			uint64_t ocm_wb_range_start : 32;
+
+			/* OCM WB range End */
+			uint64_t ocm_wb_range_end : 32;
+
+			/* DDR WB address */
+			uint64_t ddr_wb_base_address;
+
+			/* DDR WB range start */
+			uint64_t ddr_wb_range_start : 32;
+
+			/* DDR WB range end */
+			uint64_t ddr_wb_range_end : 32;
+
+			union {
+				/* Points to gather list if num_gather_entries > 0 */
+				void *gather_list;
+				struct {
+					/* Linear input mode */
+					uint64_t ddr_range_start : 32;
+					uint64_t ddr_range_end : 32;
+				} s;
+			} input;
+
+			union {
+				/* Points to scatter list if num_scatter_entries > 0 */
+				void *scatter_list;
+				struct {
+					/* Linear output mode */
+					uint64_t ddr_range_start : 32;
+					uint64_t ddr_range_end : 32;
+				} s;
+			} output;
+		} model_start;
+
+		struct cn10k_ml_jd_section_model_stop {
+			uint8_t rsvd[96];
+		} model_stop;
+
+		struct cn10k_ml_jd_section_model_run {
+			/* Address of the input for the run relative to ML_MLR_BASE */
+			uint64_t input_ddr_addr;
+
+			/* Address of the output for the run relative to ML_MLR_BASE */
+			uint64_t output_ddr_addr;
+
+			/* Number of batches to run in variable batch processing */
+			uint16_t num_batches;
+
+			uint8_t rsvd[78];
+		} model_run;
 	};
 };
 
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index 7893635787..355915deeb 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -11,6 +11,7 @@ 
 
 #include "cn10k_ml_dev.h"
 #include "cn10k_ml_ocm.h"
+#include "cn10k_ml_ops.h"
 
 /* Model state */
 enum cn10k_ml_model_state {
@@ -426,6 +427,9 @@  struct cn10k_ml_model {
 
 	/* State */
 	enum cn10k_ml_model_state state;
+
+	/* Slow-path operations request pointer */
+	struct cn10k_ml_req *req;
 };
 
 int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size);
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index 302ce8a452..56adce12ea 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -12,6 +12,10 @@ 
 /* ML model macros */
 #define CN10K_ML_MODEL_MEMZONE_NAME "ml_cn10k_model_mz"
 
+/* ML Job descriptor flags */
+#define ML_FLAGS_POLL_COMPL BIT(0)
+#define ML_FLAGS_SSO_COMPL  BIT(1)
+
 static void
 qp_memzone_name_get(char *name, int size, int dev_id, int qp_id)
 {
@@ -65,6 +69,7 @@  cn10k_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_des
 	struct cn10k_ml_qp *qp;
 	uint32_t len;
 	uint8_t *va;
+	uint64_t i;
 
 	/* Allocate queue pair */
 	qp = rte_zmalloc_socket("cn10k_ml_pmd_queue_pair", sizeof(struct cn10k_ml_qp), ROC_ALIGN,
@@ -95,6 +100,12 @@  cn10k_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_des
 	qp->queue.wait_cycles = ML_CN10K_CMD_TIMEOUT * plt_tsc_hz();
 	qp->nb_desc = nb_desc;
 
+	/* Initialize job command */
+	for (i = 0; i < qp->nb_desc; i++) {
+		memset(&qp->queue.reqs[i].jd, 0, sizeof(struct cn10k_ml_jd));
+		qp->queue.reqs[i].jcmd.w1.s.jobptr = PLT_U64_CAST(&qp->queue.reqs[i].jd);
+	}
+
 	return qp;
 
 qp_free:
@@ -468,7 +479,8 @@  cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 			  metadata->finish_model.file_size + metadata->weights_bias.file_size;
 	model_data_size = PLT_ALIGN_CEIL(model_data_size, ML_CN10K_ALIGN_SIZE);
 	mz_size = PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE) +
-		  2 * model_data_size;
+		  2 * model_data_size +
+		  PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE);
 
 	/* Allocate memzone for model object and model data */
 	snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", CN10K_ML_MODEL_MEMZONE_NAME, idx);
@@ -507,6 +519,11 @@  cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	model->model_mem_map.wb_pages = wb_pages;
 	model->model_mem_map.scratch_pages = scratch_pages;
 
+	/* Set slow-path request address and state */
+	model->req = PLT_PTR_ADD(
+		mz->addr, PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE) +
+				  2 * model_data_size);
+
 	plt_spinlock_init(&model->lock);
 	model->state = ML_CN10K_MODEL_STATE_LOADED;
 	dev->data->models[idx] = model;
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h
index d7842ecd73..c86ce66f19 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.h
+++ b/drivers/ml/cnxk/cn10k_ml_ops.h
@@ -6,6 +6,7 @@ 
 #define _CN10K_ML_OPS_H_
 
 #include <rte_mldev.h>
+#include <rte_mldev_pmd.h>
 
 #include <roc_api.h>
 
@@ -21,6 +22,9 @@  struct cn10k_ml_req {
 
 	/* Status field for poll mode requests */
 	volatile uint64_t status;
+
+	/* Job command */
+	struct ml_job_cmd_s jcmd;
 } __rte_aligned(ROC_ALIGN);
 
 /* Request queue */