From patchwork Thu Dec  8 20:17:53 2022
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Srikanth Yalavarthi <syalavarthi@marvell.com>
X-Patchwork-Id: 120663
X-Patchwork-Delegate: thomas@monjalon.net
Return-Path: <dev-bounces@dpdk.org>
X-Original-To: patchwork@inbox.dpdk.org
Delivered-To: patchwork@inbox.dpdk.org
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 1B100A0093;
	Thu,  8 Dec 2022 21:21:38 +0100 (CET)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id D784C42DC3;
	Thu,  8 Dec 2022 21:19:34 +0100 (CET)
Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com
 [67.231.156.173])
 by mails.dpdk.org (Postfix) with ESMTP id 7FF5B42D45
 for <dev@dpdk.org>; Thu,  8 Dec 2022 21:19:18 +0100 (CET)
Received: from pps.filterd (m0045851.ppops.net [127.0.0.1])
 by mx0b-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id
 2B8JkNPA006986 for <dev@dpdk.org>; Thu, 8 Dec 2022 12:19:18 -0800
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;
 h=from : to : cc :
 subject : date : message-id : in-reply-to : references : mime-version :
 content-type; s=pfpt0220; bh=D24L1HYc9AOcLiy/6DQcLXGzH/3/MuQtQTalBzOVzdw=;
 b=T5HDTZcGPVBOAn1xp252894hKQVThck2HdmWQRpLzXObNcBQ43xvNElwtg9UPy9L5aKn
 KZ3GBNBDk833qa9wLfg0rTmxEwb20sR65FqlpPVx6jFduiEXO+Dd4nBewnnfBkB8Uanb
 Kcy9pEUWh3VGrEP18+1H2dw7YmhuwppdNTKng0rOiPU+7EjYh2D7YaHAoltPdiHYQOG5
 HBJCZtXrYXZShvIzevuxRgQlaBWlgQNB5SJT2D0aFFfGPTXwP+BIEHrqwmRvDfuN+rWL
 tPSTpmoWh8VvboE0XqEFeNNhKSuiudvDxp3ajBxpaE5qNxANEFPz39vpAwdcHfyPwb42 vg==
Received: from dc5-exch02.marvell.com ([199.233.59.182])
 by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3m86usnj1h-2
 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)
 for <dev@dpdk.org>; Thu, 08 Dec 2022 12:19:17 -0800
Received: from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH02.marvell.com
 (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.18;
 Thu, 8 Dec 2022 12:19:15 -0800
Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com
 (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.2 via Frontend
 Transport; Thu, 8 Dec 2022 12:19:15 -0800
Received: from ml-host-33.caveonetworks.com (unknown [10.110.143.233])
 by maili.marvell.com (Postfix) with ESMTP id F0E8E3F70F7;
 Thu,  8 Dec 2022 12:18:18 -0800 (PST)
From: Srikanth Yalavarthi <syalavarthi@marvell.com>
To: Srikanth Yalavarthi <syalavarthi@marvell.com>
CC: <dev@dpdk.org>, <sshankarnara@marvell.com>, <jerinj@marvell.com>,
 <aprabhu@marvell.com>
Subject: [PATCH v2 25/37] ml/cnxk: enqueue a burst of inference requests
Date: Thu, 8 Dec 2022 12:17:53 -0800
Message-ID: <20221208201806.21893-26-syalavarthi@marvell.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20221208201806.21893-1-syalavarthi@marvell.com>
References: <20221208200220.20267-1-syalavarthi@marvell.com>
 <20221208201806.21893-1-syalavarthi@marvell.com>
MIME-Version: 1.0
X-Proofpoint-GUID: ers6DzDskz7hhscEtv7iXsay2OrHRGku
X-Proofpoint-ORIG-GUID: ers6DzDskz7hhscEtv7iXsay2OrHRGku
X-Proofpoint-Virus-Version: vendor=baseguard
 engine=ICAP:2.0.205,Aquarius:18.0.923,Hydra:6.0.545,FMLib:17.11.122.1
 definitions=2022-12-08_11,2022-12-08_01,2022-06-22_01
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

Enabled driver support to enqueue a burst of inference requests
to ML device. Enqueue uses internal ML request structure to queue
the inferences and job completion through polling.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_ops.c | 96 ++++++++++++++++++++++++++++++++++
 drivers/ml/cnxk/cn10k_ml_ops.h |  7 +++
 2 files changed, 103 insertions(+)

diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index 9cf3bb4a9f..6f2d1adac8 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -285,6 +285,28 @@ cn10k_ml_prep_sp_job_descriptor(struct cn10k_ml_dev *mldev, struct cn10k_ml_mode
 	}
 }
 
+static __rte_always_inline void
+cn10k_ml_prep_fp_job_descriptor(struct rte_ml_dev *dev, struct cn10k_ml_req *req,
+				struct rte_ml_op *op)
+{
+	struct cn10k_ml_dev *mldev;
+
+	mldev = dev->data->dev_private;
+
+	req->jd.hdr.jce.w0.u64 = 0;
+	req->jd.hdr.jce.w1.u64 = PLT_U64_CAST(&req->status);
+	req->jd.hdr.model_id = op->model_id;
+	req->jd.hdr.job_type = ML_CN10K_JOB_TYPE_MODEL_RUN;
+	req->jd.hdr.fp_flags = ML_FLAGS_POLL_COMPL;
+	req->jd.hdr.sp_flags = 0x0;
+	req->jd.hdr.result = roc_ml_addr_ap2mlip(&mldev->roc, &req->result);
+	req->jd.model_run.input_ddr_addr =
+		PLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, op->input.addr));
+	req->jd.model_run.output_ddr_addr =
+		PLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, op->output.addr));
+	req->jd.model_run.num_batches = op->nb_batches;
+}
+
 static int
 cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)
 {
@@ -450,6 +472,8 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
 
 	rte_spinlock_init(&ocm->lock);
 
+	dev->enqueue_burst = cn10k_ml_enqueue_burst;
+
 	mldev->nb_models_loaded = 0;
 	mldev->state = ML_CN10K_DEV_STATE_CONFIGURED;
 
@@ -1379,6 +1403,78 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, int16_t model_id, uint16_t nb_bat
 	return 0;
 }
 
+static __rte_always_inline void
+queue_index_advance(uint64_t *index, uint64_t nb_desc)
+{
+	*index = (*index + 1) % nb_desc;
+}
+
+static __rte_always_inline uint64_t
+queue_pending_count(uint64_t head, uint64_t tail, uint64_t nb_desc)
+{
+	return (nb_desc + head - tail) % nb_desc;
+}
+
+static __rte_always_inline uint64_t
+queue_free_count(uint64_t head, uint64_t tail, uint64_t nb_desc)
+{
+	return nb_desc - queue_pending_count(head, tail, nb_desc) - 1;
+}
+
+__rte_hot uint16_t
+cn10k_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,
+		       uint16_t nb_ops)
+{
+	struct cn10k_ml_queue *queue;
+	struct cn10k_ml_dev *mldev;
+	struct cn10k_ml_req *req;
+	struct cn10k_ml_qp *qp;
+	struct rte_ml_op *op;
+
+	uint16_t count;
+	uint64_t head;
+	bool enqueued;
+
+	mldev = dev->data->dev_private;
+	qp = dev->data->queue_pairs[qp_id];
+	queue = &qp->queue;
+
+	head = queue->head;
+	nb_ops = PLT_MIN(nb_ops, queue_free_count(head, queue->tail, qp->nb_desc));
+	count = 0;
+
+	if (unlikely(nb_ops == 0))
+		return 0;
+
+enqueue_req:
+	op = ops[count];
+	req = &queue->reqs[head];
+
+	cn10k_ml_prep_fp_job_descriptor(dev, req, op);
+
+	memset(&req->result, 0, sizeof(struct cn10k_ml_result));
+	req->result.user_ptr = op->user_ptr;
+
+	plt_write64(ML_CN10K_POLL_JOB_START, &req->status);
+	enqueued = roc_ml_jcmdq_enqueue_lf(&mldev->roc, &req->jcmd);
+	if (unlikely(!enqueued))
+		goto jcmdq_full;
+
+	req->timeout = plt_tsc_cycles() + queue->wait_cycles;
+	req->op = op;
+
+	queue_index_advance(&head, qp->nb_desc);
+	count++;
+
+	if (count < nb_ops)
+		goto enqueue_req;
+
+jcmdq_full:
+	queue->head = head;
+
+	return count;
+}
+
 struct rte_ml_dev_ops cn10k_ml_ops = {
 	/* Device control ops */
 	.dev_info_get = cn10k_ml_dev_info_get,
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h
index 5e7e42ee88..e3f61beeab 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.h
+++ b/drivers/ml/cnxk/cn10k_ml_ops.h
@@ -28,6 +28,9 @@ struct cn10k_ml_req {
 
 	/* Request timeout cycle */
 	uint64_t timeout;
+
+	/* ML op */
+	struct rte_ml_op *op;
 } __rte_aligned(ROC_ALIGN);
 
 /* ML request queue */
@@ -67,4 +70,8 @@ int cn10k_ml_model_unload(struct rte_ml_dev *dev, int16_t model_id);
 int cn10k_ml_model_start(struct rte_ml_dev *dev, int16_t model_id);
 int cn10k_ml_model_stop(struct rte_ml_dev *dev, int16_t model_id);
 
+/* Fast-path ops */
+__rte_hot uint16_t cn10k_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id,
+					  struct rte_ml_op **ops, uint16_t nb_ops);
+
 #endif /* _CN10K_ML_OPS_H_ */