From patchwork Tue Feb  7 16:06:50 2023
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Srikanth Yalavarthi <syalavarthi@marvell.com>
X-Patchwork-Id: 123327
X-Patchwork-Delegate: thomas@monjalon.net
Return-Path: <dev-bounces@dpdk.org>
X-Original-To: patchwork@inbox.dpdk.org
Delivered-To: patchwork@inbox.dpdk.org
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id F1A2A41C30;
	Tue,  7 Feb 2023 17:08:38 +0100 (CET)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id A302642D5C;
	Tue,  7 Feb 2023 17:07:38 +0100 (CET)
Received: from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com
 [67.231.148.174])
 by mails.dpdk.org (Postfix) with ESMTP id A9B12427E9
 for <dev@dpdk.org>; Tue,  7 Feb 2023 17:07:28 +0100 (CET)
Received: from pps.filterd (m0045849.ppops.net [127.0.0.1])
 by mx0a-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id
 317EgVTk017206 for <dev@dpdk.org>; Tue, 7 Feb 2023 08:07:27 -0800
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;
 h=from : to : cc :
 subject : date : message-id : in-reply-to : references : mime-version :
 content-type; s=pfpt0220; bh=RYpDoya02thFYmDn17LxXYEPWQh4uGg+c5B0gjY+ebo=;
 b=f/xjUs/UoQKYlTlhV76KX8awSKByhrzpORc7kWiydU7ZIw8L5u7zjVpY15qaWkE72UF7
 +SCk07+f0jA/sg1rtwsO789mie2EbRJqa0xYHz/heAplSo3yN6YIhsn9ClqVXyu10Cy7
 tY93fu7iR9FeIzc0U94hOdqOBUVu8gDPh83l8arSIiPcxcN+dbcze8hQudg/9cAoP1EW
 EsODZfSIOkOSEzV4GsqSP4YJNKLJCo/5/XZw9MdeG0jVrf8DuGavCG7g5XP720RywZ0q
 6UBU4yZjLxztT7WJIxNd63rgKiLYuebZ2dRcllZQ6y87jjffb0OuNj8f2dUyw1FGXyNC VQ==
Received: from dc5-exch02.marvell.com ([199.233.59.182])
 by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 3nkdyrssx7-7
 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)
 for <dev@dpdk.org>; Tue, 07 Feb 2023 08:07:27 -0800
Received: from DC5-EXCH02.marvell.com (10.69.176.39) by DC5-EXCH02.marvell.com
 (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.42;
 Tue, 7 Feb 2023 08:07:25 -0800
Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH02.marvell.com
 (10.69.176.39) with Microsoft SMTP Server id 15.0.1497.42 via Frontend
 Transport; Tue, 7 Feb 2023 08:07:25 -0800
Received: from ml-host-33.caveonetworks.com (unknown [10.110.143.233])
 by maili.marvell.com (Postfix) with ESMTP id E82333F7085;
 Tue,  7 Feb 2023 08:07:24 -0800 (PST)
From: Srikanth Yalavarthi <syalavarthi@marvell.com>
To: Srikanth Yalavarthi <syalavarthi@marvell.com>
CC: <dev@dpdk.org>, <sshankarnara@marvell.com>, <jerinj@marvell.com>,
 <aprabhu@marvell.com>, <ptakkar@marvell.com>, <pshukla@marvell.com>
Subject: [PATCH v5 10/39] ml/cnxk: add support to create device queue-pairs
Date: Tue, 7 Feb 2023 08:06:50 -0800
Message-ID: <20230207160719.1307-11-syalavarthi@marvell.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20230207160719.1307-1-syalavarthi@marvell.com>
References: <20221208200220.20267-1-syalavarthi@marvell.com>
 <20230207160719.1307-1-syalavarthi@marvell.com>
MIME-Version: 1.0
X-Proofpoint-ORIG-GUID: TzVTmsmnLtnaQnWOSmh2GJOfpVjueuuY
X-Proofpoint-GUID: TzVTmsmnLtnaQnWOSmh2GJOfpVjueuuY
X-Proofpoint-Virus-Version: vendor=baseguard
 engine=ICAP:2.0.219,Aquarius:18.0.930,Hydra:6.0.562,FMLib:17.11.122.1
 definitions=2023-02-07_07,2023-02-06_03,2022-06-22_01
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

Enabled support to create and destroy device queue-pairs. Updated
configure stage to create array to store queue-pair handles. Added
internal structure for queue-pair, queue and ML inference requests.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_ops.c | 207 ++++++++++++++++++++++++++++++++-
 drivers/ml/cnxk/cn10k_ml_ops.h |  33 +++++-
 2 files changed, 237 insertions(+), 3 deletions(-)

diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index a9f14fe4c5..82670330d1 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -8,6 +8,97 @@
 #include "cn10k_ml_dev.h"
 #include "cn10k_ml_ops.h"
 
+static void
+qp_memzone_name_get(char *name, int size, int dev_id, int qp_id)
+{
+	snprintf(name, size, "cn10k_ml_qp_mem_%u:%u", dev_id, qp_id);
+}
+
+static int
+cn10k_ml_qp_destroy(const struct rte_ml_dev *dev, struct cn10k_ml_qp *qp)
+{
+	const struct rte_memzone *qp_mem;
+	char name[RTE_MEMZONE_NAMESIZE];
+	int ret;
+
+	qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp->id);
+	qp_mem = rte_memzone_lookup(name);
+	ret = rte_memzone_free(qp_mem);
+	if (ret)
+		return ret;
+
+	rte_free(qp);
+
+	return 0;
+}
+
+static int
+cn10k_ml_dev_queue_pair_release(struct rte_ml_dev *dev, uint16_t queue_pair_id)
+{
+	struct cn10k_ml_qp *qp;
+	int ret;
+
+	qp = dev->data->queue_pairs[queue_pair_id];
+	if (qp == NULL)
+		return -EINVAL;
+
+	ret = cn10k_ml_qp_destroy(dev, qp);
+	if (ret) {
+		plt_err("Could not destroy queue pair %u", queue_pair_id);
+		return ret;
+	}
+
+	dev->data->queue_pairs[queue_pair_id] = NULL;
+
+	return 0;
+}
+
+static struct cn10k_ml_qp *
+cn10k_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_desc, int socket_id)
+{
+	const struct rte_memzone *qp_mem;
+	char name[RTE_MEMZONE_NAMESIZE];
+	struct cn10k_ml_qp *qp;
+	uint32_t len;
+	uint8_t *va;
+
+	/* Allocate queue pair */
+	qp = rte_zmalloc_socket("cn10k_ml_pmd_queue_pair", sizeof(struct cn10k_ml_qp), ROC_ALIGN,
+				socket_id);
+	if (qp == NULL) {
+		plt_err("Could not allocate queue pair");
+		return NULL;
+	}
+
+	/* For request queue */
+	len = nb_desc * sizeof(struct cn10k_ml_req);
+	qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp_id);
+	qp_mem = rte_memzone_reserve_aligned(
+		name, len, socket_id, RTE_MEMZONE_SIZE_HINT_ONLY | RTE_MEMZONE_256MB, ROC_ALIGN);
+	if (qp_mem == NULL) {
+		plt_err("Could not reserve memzone: %s", name);
+		goto qp_free;
+	}
+
+	va = qp_mem->addr;
+	memset(va, 0, len);
+
+	/* Initialize Request queue */
+	qp->id = qp_id;
+	qp->queue.reqs = (struct cn10k_ml_req *)va;
+	qp->queue.head = 0;
+	qp->queue.tail = 0;
+	qp->queue.wait_cycles = ML_CN10K_CMD_TIMEOUT * plt_tsc_hz();
+	qp->nb_desc = nb_desc;
+
+	return qp;
+
+qp_free:
+	rte_free(qp);
+
+	return NULL;
+}
+
 static int
 cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)
 {
@@ -30,6 +121,9 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
 {
 	struct rte_ml_dev_info dev_info;
 	struct cn10k_ml_dev *mldev;
+	struct cn10k_ml_qp *qp;
+	uint32_t mz_size;
+	uint16_t qp_id;
 	int ret;
 
 	if (dev == NULL || conf == NULL)
@@ -68,21 +162,83 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
 		return -ENOTSUP;
 	}
 
+	/* Configure queue-pairs */
+	if (dev->data->queue_pairs == NULL) {
+		mz_size = sizeof(dev->data->queue_pairs[0]) * conf->nb_queue_pairs;
+		dev->data->queue_pairs =
+			rte_zmalloc("cn10k_mldev_queue_pairs", mz_size, RTE_CACHE_LINE_SIZE);
+		if (dev->data->queue_pairs == NULL) {
+			dev->data->nb_queue_pairs = 0;
+			plt_err("Failed to get memory for queue_pairs, nb_queue_pairs %u",
+				conf->nb_queue_pairs);
+			return -ENOMEM;
+		}
+	} else { /* Re-configure */
+		void **queue_pairs;
+
+		/* Release all queue pairs as ML spec doesn't support queue_pair_destroy. */
+		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {
+			qp = dev->data->queue_pairs[qp_id];
+			if (qp != NULL) {
+				ret = cn10k_ml_dev_queue_pair_release(dev, qp_id);
+				if (ret < 0)
+					return ret;
+			}
+		}
+
+		queue_pairs = dev->data->queue_pairs;
+		queue_pairs =
+			rte_realloc(queue_pairs, sizeof(queue_pairs[0]) * conf->nb_queue_pairs,
+				    RTE_CACHE_LINE_SIZE);
+		if (queue_pairs == NULL) {
+			dev->data->nb_queue_pairs = 0;
+			plt_err("Failed to realloc queue_pairs, nb_queue_pairs = %u",
+				conf->nb_queue_pairs);
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		memset(queue_pairs, 0, sizeof(queue_pairs[0]) * conf->nb_queue_pairs);
+		dev->data->queue_pairs = queue_pairs;
+	}
+	dev->data->nb_queue_pairs = conf->nb_queue_pairs;
+
 	mldev->state = ML_CN10K_DEV_STATE_CONFIGURED;
 
 	return 0;
+
+error:
+	if (dev->data->queue_pairs != NULL)
+		rte_free(dev->data->queue_pairs);
+
+	return ret;
 }
 
 static int
 cn10k_ml_dev_close(struct rte_ml_dev *dev)
 {
 	struct cn10k_ml_dev *mldev;
+	struct cn10k_ml_qp *qp;
+	uint16_t qp_id;
 
 	if (dev == NULL)
 		return -EINVAL;
 
 	mldev = dev->data->dev_private;
 
+	/* Destroy all queue pairs */
+	for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {
+		qp = dev->data->queue_pairs[qp_id];
+		if (qp != NULL) {
+			if (cn10k_ml_qp_destroy(dev, qp) != 0)
+				plt_err("Could not destroy queue pair %u", qp_id);
+			dev->data->queue_pairs[qp_id] = NULL;
+		}
+	}
+
+	if (dev->data->queue_pairs)
+		rte_free(dev->data->queue_pairs);
+
 	/* Unload firmware */
 	cn10k_ml_fw_unload(mldev);
 
@@ -140,9 +296,56 @@ cn10k_ml_dev_stop(struct rte_ml_dev *dev)
 	return 0;
 }
 
+static int
+cn10k_ml_dev_queue_pair_setup(struct rte_ml_dev *dev, uint16_t queue_pair_id,
+			      const struct rte_ml_dev_qp_conf *qp_conf, int socket_id)
+{
+	struct rte_ml_dev_info dev_info;
+	struct cn10k_ml_qp *qp;
+	uint32_t nb_desc;
+
+	if (queue_pair_id >= dev->data->nb_queue_pairs) {
+		plt_err("Queue-pair id = %u (>= max queue pairs supported, %u)\n", queue_pair_id,
+			dev->data->nb_queue_pairs);
+		return -EINVAL;
+	}
+
+	if (dev->data->queue_pairs[queue_pair_id] != NULL)
+		cn10k_ml_dev_queue_pair_release(dev, queue_pair_id);
+
+	cn10k_ml_dev_info_get(dev, &dev_info);
+	if ((qp_conf->nb_desc > dev_info.max_desc) || (qp_conf->nb_desc == 0)) {
+		plt_err("Could not setup queue pair for %u descriptors", qp_conf->nb_desc);
+		return -EINVAL;
+	}
+	plt_ml_dbg("Creating queue-pair, queue_pair_id = %u, nb_desc = %u", queue_pair_id,
+		   qp_conf->nb_desc);
+
+	/* As the number of usable descriptors is 1 less than the queue size being created, we
+	 * increment the size of queue by 1 than the requested size, except when the requested size
+	 * is equal to the maximum possible size.
+	 */
+	nb_desc =
+		(qp_conf->nb_desc == dev_info.max_desc) ? dev_info.max_desc : qp_conf->nb_desc + 1;
+	qp = cn10k_ml_qp_create(dev, queue_pair_id, nb_desc, socket_id);
+	if (qp == NULL) {
+		plt_err("Could not create queue pair %u", queue_pair_id);
+		return -ENOMEM;
+	}
+	dev->data->queue_pairs[queue_pair_id] = qp;
+
+	return 0;
+}
+
 struct rte_ml_dev_ops cn10k_ml_ops = {
 	/* Device control ops */
-	.dev_info_get = cn10k_ml_dev_info_get, .dev_configure = cn10k_ml_dev_configure,
-	.dev_close = cn10k_ml_dev_close,       .dev_start = cn10k_ml_dev_start,
+	.dev_info_get = cn10k_ml_dev_info_get,
+	.dev_configure = cn10k_ml_dev_configure,
+	.dev_close = cn10k_ml_dev_close,
+	.dev_start = cn10k_ml_dev_start,
 	.dev_stop = cn10k_ml_dev_stop,
+
+	/* Queue-pair handling ops */
+	.dev_queue_pair_setup = cn10k_ml_dev_queue_pair_setup,
+	.dev_queue_pair_release = cn10k_ml_dev_queue_pair_release,
 };
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h
index fe18730aca..289c7c5587 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.h
+++ b/drivers/ml/cnxk/cn10k_ml_ops.h
@@ -5,9 +5,13 @@
 #ifndef _CN10K_ML_OPS_H_
 #define _CN10K_ML_OPS_H_
 
+#include <rte_mldev.h>
+
+#include <roc_api.h>
+
 #include "cn10k_ml_dev.h"
 
-/* ML request */
+/* Request structure */
 struct cn10k_ml_req {
 	/* Job descriptor */
 	struct cn10k_ml_jd jd;
@@ -19,6 +23,33 @@ struct cn10k_ml_req {
 	volatile uint64_t status;
 } __rte_aligned(ROC_ALIGN);
 
+/* Request queue */
+struct cn10k_ml_queue {
+	/* Array of requests */
+	struct cn10k_ml_req *reqs;
+
+	/* Head of the queue, used for enqueue */
+	uint64_t head;
+
+	/* Tail of the queue, used for dequeue */
+	uint64_t tail;
+
+	/* Wait cycles before timeout */
+	uint64_t wait_cycles;
+};
+
+/* Queue-pair structure */
+struct cn10k_ml_qp {
+	/* ID */
+	uint32_t id;
+
+	/* Number of descriptors */
+	uint64_t nb_desc;
+
+	/* Request queue */
+	struct cn10k_ml_queue queue;
+};
+
 /* Device ops */
 extern struct rte_ml_dev_ops cn10k_ml_ops;