[v5,10/39] ml/cnxk: add support to create device queue-pairs

Message ID 20230207160719.1307-11-syalavarthi@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Implementation of ML CNXK driver |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Srikanth Yalavarthi Feb. 7, 2023, 4:06 p.m. UTC
  Enabled support to create and destroy device queue-pairs. Updated
configure stage to create array to store queue-pair handles. Added
internal structure for queue-pair, queue and ML inference requests.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_ops.c | 207 ++++++++++++++++++++++++++++++++-
 drivers/ml/cnxk/cn10k_ml_ops.h |  33 +++++-
 2 files changed, 237 insertions(+), 3 deletions(-)
  

Patch

diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index a9f14fe4c5..82670330d1 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -8,6 +8,97 @@ 
 #include "cn10k_ml_dev.h"
 #include "cn10k_ml_ops.h"
 
+static void
+qp_memzone_name_get(char *name, int size, int dev_id, int qp_id)
+{
+	snprintf(name, size, "cn10k_ml_qp_mem_%u:%u", dev_id, qp_id);
+}
+
+static int
+cn10k_ml_qp_destroy(const struct rte_ml_dev *dev, struct cn10k_ml_qp *qp)
+{
+	const struct rte_memzone *qp_mem;
+	char name[RTE_MEMZONE_NAMESIZE];
+	int ret;
+
+	qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp->id);
+	qp_mem = rte_memzone_lookup(name);
+	ret = rte_memzone_free(qp_mem);
+	if (ret)
+		return ret;
+
+	rte_free(qp);
+
+	return 0;
+}
+
+static int
+cn10k_ml_dev_queue_pair_release(struct rte_ml_dev *dev, uint16_t queue_pair_id)
+{
+	struct cn10k_ml_qp *qp;
+	int ret;
+
+	qp = dev->data->queue_pairs[queue_pair_id];
+	if (qp == NULL)
+		return -EINVAL;
+
+	ret = cn10k_ml_qp_destroy(dev, qp);
+	if (ret) {
+		plt_err("Could not destroy queue pair %u", queue_pair_id);
+		return ret;
+	}
+
+	dev->data->queue_pairs[queue_pair_id] = NULL;
+
+	return 0;
+}
+
+static struct cn10k_ml_qp *
+cn10k_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_desc, int socket_id)
+{
+	const struct rte_memzone *qp_mem;
+	char name[RTE_MEMZONE_NAMESIZE];
+	struct cn10k_ml_qp *qp;
+	uint32_t len;
+	uint8_t *va;
+
+	/* Allocate queue pair */
+	qp = rte_zmalloc_socket("cn10k_ml_pmd_queue_pair", sizeof(struct cn10k_ml_qp), ROC_ALIGN,
+				socket_id);
+	if (qp == NULL) {
+		plt_err("Could not allocate queue pair");
+		return NULL;
+	}
+
+	/* For request queue */
+	len = nb_desc * sizeof(struct cn10k_ml_req);
+	qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp_id);
+	qp_mem = rte_memzone_reserve_aligned(
+		name, len, socket_id, RTE_MEMZONE_SIZE_HINT_ONLY | RTE_MEMZONE_256MB, ROC_ALIGN);
+	if (qp_mem == NULL) {
+		plt_err("Could not reserve memzone: %s", name);
+		goto qp_free;
+	}
+
+	va = qp_mem->addr;
+	memset(va, 0, len);
+
+	/* Initialize Request queue */
+	qp->id = qp_id;
+	qp->queue.reqs = (struct cn10k_ml_req *)va;
+	qp->queue.head = 0;
+	qp->queue.tail = 0;
+	qp->queue.wait_cycles = ML_CN10K_CMD_TIMEOUT * plt_tsc_hz();
+	qp->nb_desc = nb_desc;
+
+	return qp;
+
+qp_free:
+	rte_free(qp);
+
+	return NULL;
+}
+
 static int
 cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)
 {
@@ -30,6 +121,9 @@  cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
 {
 	struct rte_ml_dev_info dev_info;
 	struct cn10k_ml_dev *mldev;
+	struct cn10k_ml_qp *qp;
+	uint32_t mz_size;
+	uint16_t qp_id;
 	int ret;
 
 	if (dev == NULL || conf == NULL)
@@ -68,21 +162,83 @@  cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
 		return -ENOTSUP;
 	}
 
+	/* Configure queue-pairs */
+	if (dev->data->queue_pairs == NULL) {
+		mz_size = sizeof(dev->data->queue_pairs[0]) * conf->nb_queue_pairs;
+		dev->data->queue_pairs =
+			rte_zmalloc("cn10k_mldev_queue_pairs", mz_size, RTE_CACHE_LINE_SIZE);
+		if (dev->data->queue_pairs == NULL) {
+			dev->data->nb_queue_pairs = 0;
+			plt_err("Failed to get memory for queue_pairs, nb_queue_pairs %u",
+				conf->nb_queue_pairs);
+			return -ENOMEM;
+		}
+	} else { /* Re-configure */
+		void **queue_pairs;
+
+		/* Release all queue pairs as ML spec doesn't support queue_pair_destroy. */
+		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {
+			qp = dev->data->queue_pairs[qp_id];
+			if (qp != NULL) {
+				ret = cn10k_ml_dev_queue_pair_release(dev, qp_id);
+				if (ret < 0)
+					return ret;
+			}
+		}
+
+		queue_pairs = dev->data->queue_pairs;
+		queue_pairs =
+			rte_realloc(queue_pairs, sizeof(queue_pairs[0]) * conf->nb_queue_pairs,
+				    RTE_CACHE_LINE_SIZE);
+		if (queue_pairs == NULL) {
+			dev->data->nb_queue_pairs = 0;
+			plt_err("Failed to realloc queue_pairs, nb_queue_pairs = %u",
+				conf->nb_queue_pairs);
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		memset(queue_pairs, 0, sizeof(queue_pairs[0]) * conf->nb_queue_pairs);
+		dev->data->queue_pairs = queue_pairs;
+	}
+	dev->data->nb_queue_pairs = conf->nb_queue_pairs;
+
 	mldev->state = ML_CN10K_DEV_STATE_CONFIGURED;
 
 	return 0;
+
+error:
+	if (dev->data->queue_pairs != NULL)
+		rte_free(dev->data->queue_pairs);
+
+	return ret;
 }
 
 static int
 cn10k_ml_dev_close(struct rte_ml_dev *dev)
 {
 	struct cn10k_ml_dev *mldev;
+	struct cn10k_ml_qp *qp;
+	uint16_t qp_id;
 
 	if (dev == NULL)
 		return -EINVAL;
 
 	mldev = dev->data->dev_private;
 
+	/* Destroy all queue pairs */
+	for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {
+		qp = dev->data->queue_pairs[qp_id];
+		if (qp != NULL) {
+			if (cn10k_ml_qp_destroy(dev, qp) != 0)
+				plt_err("Could not destroy queue pair %u", qp_id);
+			dev->data->queue_pairs[qp_id] = NULL;
+		}
+	}
+
+	if (dev->data->queue_pairs)
+		rte_free(dev->data->queue_pairs);
+
 	/* Unload firmware */
 	cn10k_ml_fw_unload(mldev);
 
@@ -140,9 +296,56 @@  cn10k_ml_dev_stop(struct rte_ml_dev *dev)
 	return 0;
 }
 
+static int
+cn10k_ml_dev_queue_pair_setup(struct rte_ml_dev *dev, uint16_t queue_pair_id,
+			      const struct rte_ml_dev_qp_conf *qp_conf, int socket_id)
+{
+	struct rte_ml_dev_info dev_info;
+	struct cn10k_ml_qp *qp;
+	uint32_t nb_desc;
+
+	if (queue_pair_id >= dev->data->nb_queue_pairs) {
+		plt_err("Queue-pair id = %u (>= max queue pairs supported, %u)\n", queue_pair_id,
+			dev->data->nb_queue_pairs);
+		return -EINVAL;
+	}
+
+	if (dev->data->queue_pairs[queue_pair_id] != NULL)
+		cn10k_ml_dev_queue_pair_release(dev, queue_pair_id);
+
+	cn10k_ml_dev_info_get(dev, &dev_info);
+	if ((qp_conf->nb_desc > dev_info.max_desc) || (qp_conf->nb_desc == 0)) {
+		plt_err("Could not setup queue pair for %u descriptors", qp_conf->nb_desc);
+		return -EINVAL;
+	}
+	plt_ml_dbg("Creating queue-pair, queue_pair_id = %u, nb_desc = %u", queue_pair_id,
+		   qp_conf->nb_desc);
+
+	/* As the number of usable descriptors is 1 less than the queue size being created, we
+	 * increment the size of queue by 1 than the requested size, except when the requested size
+	 * is equal to the maximum possible size.
+	 */
+	nb_desc =
+		(qp_conf->nb_desc == dev_info.max_desc) ? dev_info.max_desc : qp_conf->nb_desc + 1;
+	qp = cn10k_ml_qp_create(dev, queue_pair_id, nb_desc, socket_id);
+	if (qp == NULL) {
+		plt_err("Could not create queue pair %u", queue_pair_id);
+		return -ENOMEM;
+	}
+	dev->data->queue_pairs[queue_pair_id] = qp;
+
+	return 0;
+}
+
 struct rte_ml_dev_ops cn10k_ml_ops = {
 	/* Device control ops */
-	.dev_info_get = cn10k_ml_dev_info_get, .dev_configure = cn10k_ml_dev_configure,
-	.dev_close = cn10k_ml_dev_close,       .dev_start = cn10k_ml_dev_start,
+	.dev_info_get = cn10k_ml_dev_info_get,
+	.dev_configure = cn10k_ml_dev_configure,
+	.dev_close = cn10k_ml_dev_close,
+	.dev_start = cn10k_ml_dev_start,
 	.dev_stop = cn10k_ml_dev_stop,
+
+	/* Queue-pair handling ops */
+	.dev_queue_pair_setup = cn10k_ml_dev_queue_pair_setup,
+	.dev_queue_pair_release = cn10k_ml_dev_queue_pair_release,
 };
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h
index fe18730aca..289c7c5587 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.h
+++ b/drivers/ml/cnxk/cn10k_ml_ops.h
@@ -5,9 +5,13 @@ 
 #ifndef _CN10K_ML_OPS_H_
 #define _CN10K_ML_OPS_H_
 
+#include <rte_mldev.h>
+
+#include <roc_api.h>
+
 #include "cn10k_ml_dev.h"
 
-/* ML request */
+/* Request structure */
 struct cn10k_ml_req {
 	/* Job descriptor */
 	struct cn10k_ml_jd jd;
@@ -19,6 +23,33 @@  struct cn10k_ml_req {
 	volatile uint64_t status;
 } __rte_aligned(ROC_ALIGN);
 
+/* Request queue */
+struct cn10k_ml_queue {
+	/* Array of requests */
+	struct cn10k_ml_req *reqs;
+
+	/* Head of the queue, used for enqueue */
+	uint64_t head;
+
+	/* Tail of the queue, used for dequeue */
+	uint64_t tail;
+
+	/* Wait cycles before timeout */
+	uint64_t wait_cycles;
+};
+
+/* Queue-pair structure */
+struct cn10k_ml_qp {
+	/* ID */
+	uint32_t id;
+
+	/* Number of descriptors */
+	uint64_t nb_desc;
+
+	/* Request queue */
+	struct cn10k_ml_queue queue;
+};
+
 /* Device ops */
 extern struct rte_ml_dev_ops cn10k_ml_ops;