get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/133142/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 133142,
    "url": "http://patches.dpdk.org/api/patches/133142/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20231023044141.22112-17-syalavarthi@marvell.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20231023044141.22112-17-syalavarthi@marvell.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20231023044141.22112-17-syalavarthi@marvell.com",
    "date": "2023-10-23T04:41:19",
    "name": "[v8,16/34] ml/cnxk: update fast path functions",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "6800864b7a4e9699df4f25cc828a93367ea83093",
    "submitter": {
        "id": 2480,
        "url": "http://patches.dpdk.org/api/people/2480/?format=api",
        "name": "Srikanth Yalavarthi",
        "email": "syalavarthi@marvell.com"
    },
    "delegate": {
        "id": 310,
        "url": "http://patches.dpdk.org/api/users/310/?format=api",
        "username": "jerin",
        "first_name": "Jerin",
        "last_name": "Jacob",
        "email": "jerinj@marvell.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20231023044141.22112-17-syalavarthi@marvell.com/mbox/",
    "series": [
        {
            "id": 29941,
            "url": "http://patches.dpdk.org/api/series/29941/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=29941",
            "date": "2023-10-23T04:41:04",
            "name": "Implementation of revised ml/cnxk driver",
            "version": 8,
            "mbox": "http://patches.dpdk.org/series/29941/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/133142/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/133142/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 597FB431DC;\n\tMon, 23 Oct 2023 06:45:49 +0200 (CEST)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 047D742D0B;\n\tMon, 23 Oct 2023 06:42:22 +0200 (CEST)",
            "from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com\n [67.231.156.173])\n by mails.dpdk.org (Postfix) with ESMTP id 784F940633\n for <dev@dpdk.org>; Mon, 23 Oct 2023 06:41:56 +0200 (CEST)",
            "from pps.filterd (m0045851.ppops.net [127.0.0.1])\n by mx0b-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id\n 39MLq9rq014581 for <dev@dpdk.org>; Sun, 22 Oct 2023 21:41:56 -0700",
            "from dc5-exch02.marvell.com ([199.233.59.182])\n by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3tve5mvbem-10\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)\n for <dev@dpdk.org>; Sun, 22 Oct 2023 21:41:55 -0700",
            "from DC5-EXCH02.marvell.com (10.69.176.39) by DC5-EXCH02.marvell.com\n (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.48;\n Sun, 22 Oct 2023 21:41:51 -0700",
            "from maili.marvell.com (10.69.176.80) by DC5-EXCH02.marvell.com\n (10.69.176.39) with Microsoft SMTP Server id 15.0.1497.48 via Frontend\n Transport; Sun, 22 Oct 2023 21:41:51 -0700",
            "from ml-host-33.caveonetworks.com (unknown [10.110.143.233])\n by maili.marvell.com (Postfix) with ESMTP id B08343F7044;\n Sun, 22 Oct 2023 21:41:50 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-transfer-encoding : content-type; s=pfpt0220;\n bh=ucNzu4pc6F9l/lNY61xRhAcvgBJ56LJjcq5At+SVW40=;\n b=OlT5AanQ+caIQm0BJf5VrphgosjZv8/bFgy9qYJ1AMCMLsNGPOYpbwR9pPUZRB0QYRPZ\n RrSiq96MZjRz/r92/1FzopDiqWqvykbUVjI5cKnjMSD9KHNLUmDqdL9fXz8gUiXobtYW\n rKKWgKUzLDv3RHUFzvhMHpo3zMpGap/WuD7SZ0HbRvx8hsTf4NZdbsHTGbWdTkTRBTMC\n WlUfRfvkGFeeNPp1JtlGURKRWktZrlCMp8Z+Cd7cdfIVWmtdkSLJg172g1zXJviPxV7s\n JVx6Pck8yz9hY1Q7sXCQdRkcvc3LwO41goJkO/EYNPMLOreJaFRch/rt9rCffucCGJDs AQ==",
        "From": "Srikanth Yalavarthi <syalavarthi@marvell.com>",
        "To": "Srikanth Yalavarthi <syalavarthi@marvell.com>",
        "CC": "<dev@dpdk.org>, <sshankarnara@marvell.com>, <aprabhu@marvell.com>,\n <ptakkar@marvell.com>",
        "Subject": "[PATCH v8 16/34] ml/cnxk: update fast path functions",
        "Date": "Sun, 22 Oct 2023 21:41:19 -0700",
        "Message-ID": "<20231023044141.22112-17-syalavarthi@marvell.com>",
        "X-Mailer": "git-send-email 2.42.0",
        "In-Reply-To": "<20231023044141.22112-1-syalavarthi@marvell.com>",
        "References": "<20230830155927.3566-1-syalavarthi@marvell.com>\n <20231023044141.22112-1-syalavarthi@marvell.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Proofpoint-ORIG-GUID": "-o01yCSw1t_3xl2aXH7SxDpFpCyIVNqa",
        "X-Proofpoint-GUID": "-o01yCSw1t_3xl2aXH7SxDpFpCyIVNqa",
        "X-Proofpoint-Virus-Version": "vendor=baseguard\n engine=ICAP:2.0.272,Aquarius:18.0.980,Hydra:6.0.619,FMLib:17.11.176.26\n definitions=2023-10-23_01,2023-10-19_01,2023-05-22_02",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Implemented cnxk layer fast-path functions and added support\nfor model specific fast-path functions. CNXK layer functions\nwould invoke model specific fast-path functions.\n\nAdded support for model specific poll handling functions and\nupdated internal inference sync function. Drop use of rte_ml_op\nas argument. Updated function arguments to enable the function\nto be used as callback by TVM HW runtime.\n\nSigned-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>\n---\n drivers/ml/cnxk/cn10k_ml_dev.h  |   5 -\n drivers/ml/cnxk/cn10k_ml_ops.c  | 241 ++++++++------------------------\n drivers/ml/cnxk/cn10k_ml_ops.h  |  13 +-\n drivers/ml/cnxk/cnxk_ml_model.h |  14 ++\n drivers/ml/cnxk/cnxk_ml_ops.c   | 128 +++++++++++++++++\n drivers/ml/cnxk/cnxk_ml_ops.h   |   7 +\n 6 files changed, 216 insertions(+), 192 deletions(-)",
    "diff": "diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h\nindex bde9d08901..94a94d996f 100644\n--- a/drivers/ml/cnxk/cn10k_ml_dev.h\n+++ b/drivers/ml/cnxk/cn10k_ml_dev.h\n@@ -143,11 +143,6 @@ struct cn10k_ml_dev {\n \n \t/* JCMD enqueue function handler */\n \tbool (*ml_jcmdq_enqueue)(struct roc_ml *roc_ml, struct ml_job_cmd_s *job_cmd);\n-\n-\t/* Poll handling function pointers */\n-\tvoid (*set_poll_addr)(struct cnxk_ml_req *req);\n-\tvoid (*set_poll_ptr)(struct cnxk_ml_req *req);\n-\tuint64_t (*get_poll_ptr)(struct cnxk_ml_req *req);\n };\n \n uint64_t cn10k_ml_fw_flags_get(struct cn10k_ml_fw *fw);\ndiff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c\nindex 776ad60401..8116c8dedb 100644\n--- a/drivers/ml/cnxk/cn10k_ml_ops.c\n+++ b/drivers/ml/cnxk/cn10k_ml_ops.c\n@@ -65,24 +65,12 @@ static const struct cn10k_ml_stype_db_driver {\n \t{ML_DRIVER_ERR_FW_ERROR, \"UNKNOWN FIRMWARE ERROR\"},\n };\n \n-static inline void\n+__rte_hot void\n cn10k_ml_set_poll_addr(struct cnxk_ml_req *req)\n {\n \treq->status = &req->cn10k_req.status;\n }\n \n-static inline void\n-cn10k_ml_set_poll_ptr(struct cnxk_ml_req *req)\n-{\n-\tplt_write64(ML_CNXK_POLL_JOB_START, req->status);\n-}\n-\n-static inline uint64_t\n-cn10k_ml_get_poll_ptr(struct cnxk_ml_req *req)\n-{\n-\treturn plt_read64(req->status);\n-}\n-\n void\n cn10k_ml_qp_initialize(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_qp *qp)\n {\n@@ -177,7 +165,7 @@ cn10k_ml_prep_sp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_l\n \n static __rte_always_inline void\n cn10k_ml_prep_fp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_req *req,\n-\t\t\t\tstruct rte_ml_op *op)\n+\t\t\t\tuint16_t index, void *input, void *output, uint16_t nb_batches)\n {\n \tstruct cn10k_ml_dev *cn10k_mldev;\n \n@@ -185,17 +173,17 @@ cn10k_ml_prep_fp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_r\n \n \treq->cn10k_req.jd.hdr.jce.w0.u64 = 0;\n \treq->cn10k_req.jd.hdr.jce.w1.u64 = PLT_U64_CAST(req->status);\n-\treq->cn10k_req.jd.hdr.model_id = op->model_id;\n+\treq->cn10k_req.jd.hdr.model_id = index;\n \treq->cn10k_req.jd.hdr.job_type = ML_CN10K_JOB_TYPE_MODEL_RUN;\n \treq->cn10k_req.jd.hdr.fp_flags = ML_FLAGS_POLL_COMPL;\n \treq->cn10k_req.jd.hdr.sp_flags = 0x0;\n \treq->cn10k_req.jd.hdr.result =\n \t\troc_ml_addr_ap2mlip(&cn10k_mldev->roc, &req->cn10k_req.result);\n \treq->cn10k_req.jd.model_run.input_ddr_addr =\n-\t\tPLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, op->input[0]->addr));\n+\t\tPLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, input));\n \treq->cn10k_req.jd.model_run.output_ddr_addr =\n-\t\tPLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, op->output[0]->addr));\n-\treq->cn10k_req.jd.model_run.num_batches = op->nb_batches;\n+\t\tPLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, output));\n+\treq->cn10k_req.jd.model_run.num_batches = nb_batches;\n }\n \n static void\n@@ -311,30 +299,15 @@ cn10k_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *l\n static int\n cn10k_ml_cache_model_data(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer)\n {\n-\tstruct rte_ml_buff_seg seg[2];\n-\tstruct rte_ml_buff_seg *inp;\n-\tstruct rte_ml_buff_seg *out;\n-\tstruct rte_ml_op op;\n-\n \tchar str[RTE_MEMZONE_NAMESIZE];\n \tconst struct plt_memzone *mz;\n \tuint64_t isize = 0;\n \tuint64_t osize = 0;\n \tint ret = 0;\n-\tuint32_t i;\n-\n-\tinp = &seg[0];\n-\tout = &seg[1];\n \n \t/* Create input and output buffers. */\n-\tfor (i = 0; i < layer->info.nb_inputs; i++)\n-\t\tisize += layer->info.input[i].sz_q;\n-\n-\tfor (i = 0; i < layer->info.nb_outputs; i++)\n-\t\tosize += layer->info.output[i].sz_q;\n-\n-\tisize = layer->batch_size * isize;\n-\tosize = layer->batch_size * osize;\n+\tisize = layer->info.total_input_sz_q;\n+\tosize = layer->info.total_output_sz_q;\n \n \tsnprintf(str, RTE_MEMZONE_NAMESIZE, \"%s_%u\", \"ml_dummy_io\", layer->index);\n \tmz = plt_memzone_reserve_aligned(str, isize + osize, 0, ML_CN10K_ALIGN_SIZE);\n@@ -342,25 +315,9 @@ cn10k_ml_cache_model_data(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *\n \t\treturn -ENOMEM;\n \tmemset(mz->addr, 0, isize + osize);\n \n-\tseg[0].addr = mz->addr;\n-\tseg[0].iova_addr = mz->iova;\n-\tseg[0].length = isize;\n-\tseg[0].next = NULL;\n-\n-\tseg[1].addr = PLT_PTR_ADD(mz->addr, isize);\n-\tseg[1].iova_addr = mz->iova + isize;\n-\tseg[1].length = osize;\n-\tseg[1].next = NULL;\n-\n-\top.model_id = layer->index;\n-\top.nb_batches = layer->batch_size;\n-\top.mempool = NULL;\n-\n-\top.input = &inp;\n-\top.output = &out;\n-\n \tmemset(layer->glow.req, 0, sizeof(struct cnxk_ml_req));\n-\tret = cn10k_ml_inference_sync(cnxk_mldev, &op);\n+\tret = cn10k_ml_inference_sync(cnxk_mldev, layer->index, mz->addr,\n+\t\t\t\t      PLT_PTR_ADD(mz->addr, isize), 1);\n \tplt_memzone_free(mz);\n \n \treturn ret;\n@@ -425,13 +382,8 @@ cn10k_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_c\n \telse\n \t\tcn10k_mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_lf;\n \n-\t/* Set polling function pointers */\n-\tcn10k_mldev->set_poll_addr = cn10k_ml_set_poll_addr;\n-\tcn10k_mldev->set_poll_ptr = cn10k_ml_set_poll_ptr;\n-\tcn10k_mldev->get_poll_ptr = cn10k_ml_get_poll_ptr;\n-\n-\tcnxk_mldev->mldev->enqueue_burst = cn10k_ml_enqueue_burst;\n-\tcnxk_mldev->mldev->dequeue_burst = cn10k_ml_dequeue_burst;\n+\tcnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst;\n+\tcnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst;\n \tcnxk_mldev->mldev->op_error_get = cn10k_ml_op_error_get;\n \n \treturn 0;\n@@ -824,6 +776,12 @@ cn10k_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *\n \n \tcn10k_ml_model_info_set(cnxk_mldev, model, &model->layer[0].info, &model->glow.metadata);\n \n+\t/* Set fast-path functions */\n+\tmodel->enqueue_single = cn10k_ml_enqueue_single;\n+\tmodel->result_update = cn10k_ml_result_update;\n+\tmodel->set_error_code = cn10k_ml_set_error_code;\n+\tmodel->set_poll_addr = cn10k_ml_set_poll_addr;\n+\n \treturn 0;\n }\n \n@@ -1219,26 +1177,8 @@ cn10k_ml_model_params_update(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_mode\n \treturn 0;\n }\n \n-static __rte_always_inline void\n-queue_index_advance(uint64_t *index, uint64_t nb_desc)\n-{\n-\t*index = (*index + 1) % nb_desc;\n-}\n-\n-static __rte_always_inline uint64_t\n-queue_pending_count(uint64_t head, uint64_t tail, uint64_t nb_desc)\n-{\n-\treturn (nb_desc + head - tail) % nb_desc;\n-}\n-\n-static __rte_always_inline uint64_t\n-queue_free_count(uint64_t head, uint64_t tail, uint64_t nb_desc)\n-{\n-\treturn nb_desc - queue_pending_count(head, tail, nb_desc) - 1;\n-}\n-\n-static __rte_always_inline void\n-cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, struct cnxk_ml_req *req)\n+__rte_hot void\n+cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request)\n {\n \tunion cn10k_ml_error_code *error_code;\n \tstruct cn10k_ml_layer_xstats *xstats;\n@@ -1246,6 +1186,7 @@ cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, struct cnxk_ml\n \tstruct cn10k_ml_result *result;\n \tstruct cnxk_ml_model *model;\n \tstruct cnxk_ml_layer *layer;\n+\tstruct cnxk_ml_req *req;\n \tstruct cnxk_ml_qp *qp;\n \tstruct rte_ml_op *op;\n \tuint64_t hw_latency;\n@@ -1253,9 +1194,9 @@ cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, struct cnxk_ml\n \tuint16_t model_id;\n \tuint16_t layer_id;\n \n+\treq = (struct cnxk_ml_req *)request;\n \tresult = &req->cn10k_req.result;\n \top = req->op;\n-\n \tif (likely(result->error_code == 0)) {\n \t\tmodel_id = cnxk_mldev->index_map[op->model_id].model_id;\n \t\tlayer_id = cnxk_mldev->index_map[op->model_id].layer_id;\n@@ -1322,119 +1263,48 @@ cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, struct cnxk_ml\n \top->user_ptr = result->user_ptr;\n }\n \n-__rte_hot uint16_t\n-cn10k_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,\n-\t\t       uint16_t nb_ops)\n+__rte_hot void\n+cn10k_ml_set_error_code(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype)\n+{\n+\tunion cn10k_ml_error_code *error_code;\n+\n+\terror_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code;\n+\terror_code->s.etype = etype;\n+\terror_code->s.stype = stype;\n+}\n+\n+__rte_hot bool\n+cn10k_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, uint16_t layer_id,\n+\t\t\tstruct cnxk_ml_qp *qp, uint64_t head)\n {\n \tunion cn10k_ml_error_code *error_code;\n \tstruct cn10k_ml_dev *cn10k_mldev;\n-\tstruct cnxk_ml_dev *cnxk_mldev;\n+\tstruct cnxk_ml_model *model;\n \tstruct cnxk_ml_queue *queue;\n \tstruct cnxk_ml_req *req;\n-\tstruct cnxk_ml_qp *qp;\n-\tstruct rte_ml_op *op;\n-\n-\tuint16_t count;\n-\tuint64_t head;\n-\tbool enqueued;\n \n-\tcnxk_mldev = dev->data->dev_private;\n \tcn10k_mldev = &cnxk_mldev->cn10k_mldev;\n-\tqp = dev->data->queue_pairs[qp_id];\n \tqueue = &qp->queue;\n-\n-\thead = queue->head;\n-\tnb_ops = PLT_MIN(nb_ops, queue_free_count(head, queue->tail, qp->nb_desc));\n-\tcount = 0;\n-\n-\tif (unlikely(nb_ops == 0))\n-\t\treturn 0;\n-\n-enqueue_req:\n-\top = ops[count];\n \treq = &queue->reqs[head];\n \n-\tcn10k_mldev->set_poll_addr(req);\n-\tcn10k_ml_prep_fp_job_descriptor(cnxk_mldev, req, op);\n+\tmodel = cnxk_mldev->mldev->data->models[op->model_id];\n+\tmodel->set_poll_addr(req);\n+\tcn10k_ml_prep_fp_job_descriptor(cnxk_mldev, req, model->layer[layer_id].index,\n+\t\t\t\t\top->input[0]->addr, op->output[0]->addr, op->nb_batches);\n \n \tmemset(&req->cn10k_req.result, 0, sizeof(struct cn10k_ml_result));\n \terror_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code;\n \terror_code->s.etype = ML_ETYPE_UNKNOWN;\n \treq->cn10k_req.result.user_ptr = op->user_ptr;\n \n-\tcn10k_mldev->set_poll_ptr(req);\n-\tenqueued = cn10k_mldev->ml_jcmdq_enqueue(&cn10k_mldev->roc, &req->cn10k_req.jcmd);\n-\tif (unlikely(!enqueued))\n-\t\tgoto jcmdq_full;\n+\tcnxk_ml_set_poll_ptr(req);\n+\tif (unlikely(!cn10k_mldev->ml_jcmdq_enqueue(&cn10k_mldev->roc, &req->cn10k_req.jcmd)))\n+\t\treturn false;\n \n \treq->timeout = plt_tsc_cycles() + queue->wait_cycles;\n \treq->op = op;\n \n-\tqueue_index_advance(&head, qp->nb_desc);\n-\tcount++;\n-\n-\tif (count < nb_ops)\n-\t\tgoto enqueue_req;\n-\n-jcmdq_full:\n-\tqueue->head = head;\n-\tqp->stats.enqueued_count += count;\n-\n-\treturn count;\n-}\n-\n-__rte_hot uint16_t\n-cn10k_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,\n-\t\t       uint16_t nb_ops)\n-{\n-\tunion cn10k_ml_error_code *error_code;\n-\tstruct cn10k_ml_dev *cn10k_mldev;\n-\tstruct cnxk_ml_dev *cnxk_mldev;\n-\tstruct cnxk_ml_queue *queue;\n-\tstruct cnxk_ml_req *req;\n-\tstruct cnxk_ml_qp *qp;\n-\n-\tuint64_t status;\n-\tuint16_t count;\n-\tuint64_t tail;\n-\n-\tcnxk_mldev = dev->data->dev_private;\n-\tcn10k_mldev = &cnxk_mldev->cn10k_mldev;\n-\tqp = dev->data->queue_pairs[qp_id];\n-\tqueue = &qp->queue;\n-\n-\ttail = queue->tail;\n-\tnb_ops = PLT_MIN(nb_ops, queue_pending_count(queue->head, tail, qp->nb_desc));\n-\tcount = 0;\n-\n-\tif (unlikely(nb_ops == 0))\n-\t\tgoto empty_or_active;\n-\n-dequeue_req:\n-\treq = &queue->reqs[tail];\n-\tstatus = cn10k_mldev->get_poll_ptr(req);\n-\tif (unlikely(status != ML_CNXK_POLL_JOB_FINISH)) {\n-\t\tif (plt_tsc_cycles() < req->timeout) {\n-\t\t\tgoto empty_or_active;\n-\t\t} else { /* Timeout, set indication of driver error */\n-\t\t\terror_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code;\n-\t\t\terror_code->s.etype = ML_ETYPE_DRIVER;\n-\t\t}\n-\t}\n-\n-\tcn10k_ml_result_update(cnxk_mldev, qp_id, req);\n-\tops[count] = req->op;\n-\n-\tqueue_index_advance(&tail, qp->nb_desc);\n-\tcount++;\n-\n-\tif (count < nb_ops)\n-\t\tgoto dequeue_req;\n-\n-empty_or_active:\n-\tqueue->tail = tail;\n-\n-\treturn count;\n+\treturn true;\n }\n \n __rte_hot int\n@@ -1471,41 +1341,48 @@ cn10k_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op, struct rte_m\n }\n \n __rte_hot int\n-cn10k_ml_inference_sync(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op)\n+cn10k_ml_inference_sync(void *device, uint16_t index, void *input, void *output,\n+\t\t\tuint16_t nb_batches)\n {\n \tunion cn10k_ml_error_code *error_code;\n \tstruct cn10k_ml_dev *cn10k_mldev;\n+\tstruct cnxk_ml_dev *cnxk_mldev;\n \tstruct cnxk_ml_model *model;\n \tstruct cnxk_ml_layer *layer;\n \tstruct cnxk_ml_req *req;\n+\tstruct rte_ml_op op;\n \tuint16_t model_id;\n \tuint16_t layer_id;\n \tbool timeout;\n \tint ret = 0;\n \n+\tcnxk_mldev = (struct cnxk_ml_dev *)device;\n \tcn10k_mldev = &cnxk_mldev->cn10k_mldev;\n-\tmodel_id = cnxk_mldev->index_map[op->model_id].model_id;\n-\tlayer_id = cnxk_mldev->index_map[op->model_id].layer_id;\n+\tmodel_id = cnxk_mldev->index_map[index].model_id;\n+\tlayer_id = cnxk_mldev->index_map[index].layer_id;\n \tmodel = cnxk_mldev->mldev->data->models[model_id];\n \tlayer = &model->layer[layer_id];\n \treq = layer->glow.req;\n \n+\top.model_id = index;\n+\top.impl_opaque = 0;\n+\n \tcn10k_ml_set_poll_addr(req);\n-\tcn10k_ml_prep_fp_job_descriptor(cnxk_mldev, req, op);\n+\tcn10k_ml_prep_fp_job_descriptor(cnxk_mldev, req, index, input, output, nb_batches);\n \n \tmemset(&req->cn10k_req.result, 0, sizeof(struct cn10k_ml_result));\n \terror_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code;\n \terror_code->s.etype = ML_ETYPE_UNKNOWN;\n-\treq->cn10k_req.result.user_ptr = op->user_ptr;\n+\treq->cn10k_req.result.user_ptr = NULL;\n \n-\tcn10k_mldev->set_poll_ptr(req);\n+\tcnxk_ml_set_poll_ptr(req);\n \treq->cn10k_req.jcmd.w1.s.jobptr = PLT_U64_CAST(&req->cn10k_req.jd);\n \n \ttimeout = true;\n \treq->timeout = plt_tsc_cycles() + ML_CNXK_CMD_TIMEOUT * plt_tsc_hz();\n \tdo {\n \t\tif (cn10k_mldev->ml_jcmdq_enqueue(&cn10k_mldev->roc, &req->cn10k_req.jcmd)) {\n-\t\t\treq->op = op;\n+\t\t\treq->op = &op;\n \t\t\ttimeout = false;\n \t\t\tbreak;\n \t\t}\n@@ -1518,7 +1395,7 @@ cn10k_ml_inference_sync(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op)\n \n \ttimeout = true;\n \tdo {\n-\t\tif (cn10k_mldev->get_poll_ptr(req) == ML_CNXK_POLL_JOB_FINISH) {\n+\t\tif (cnxk_ml_get_poll_ptr(req) == ML_CNXK_POLL_JOB_FINISH) {\n \t\t\ttimeout = false;\n \t\t\tbreak;\n \t\t}\ndiff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h\nindex 4d76164dba..3d18303ed3 100644\n--- a/drivers/ml/cnxk/cn10k_ml_ops.h\n+++ b/drivers/ml/cnxk/cn10k_ml_ops.h\n@@ -14,6 +14,7 @@ struct cnxk_ml_dev;\n struct cnxk_ml_qp;\n struct cnxk_ml_model;\n struct cnxk_ml_layer;\n+struct cnxk_ml_req;\n \n /* Firmware version string length */\n #define MLDEV_FIRMWARE_VERSION_LENGTH 32\n@@ -309,13 +310,15 @@ int cn10k_ml_model_params_update(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_\n \t\t\t\t void *buffer);\n \n /* Fast-path ops */\n-__rte_hot uint16_t cn10k_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id,\n-\t\t\t\t\t  struct rte_ml_op **ops, uint16_t nb_ops);\n-__rte_hot uint16_t cn10k_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id,\n-\t\t\t\t\t  struct rte_ml_op **ops, uint16_t nb_ops);\n+__rte_hot bool cn10k_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op,\n+\t\t\t\t       uint16_t layer_id, struct cnxk_ml_qp *qp, uint64_t head);\n __rte_hot int cn10k_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op,\n \t\t\t\t    struct rte_ml_op_error *error);\n-__rte_hot int cn10k_ml_inference_sync(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op);\n+__rte_hot int cn10k_ml_inference_sync(void *device, uint16_t index, void *input, void *output,\n+\t\t\t\t      uint16_t nb_batches);\n+__rte_hot void cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request);\n+__rte_hot void cn10k_ml_set_error_code(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype);\n+__rte_hot void cn10k_ml_set_poll_addr(struct cnxk_ml_req *req);\n \n /* Misc ops */\n void cn10k_ml_qp_initialize(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_qp *qp);\ndiff --git a/drivers/ml/cnxk/cnxk_ml_model.h b/drivers/ml/cnxk/cnxk_ml_model.h\nindex 66d979dd3c..f618e5aa5f 100644\n--- a/drivers/ml/cnxk/cnxk_ml_model.h\n+++ b/drivers/ml/cnxk/cnxk_ml_model.h\n@@ -15,6 +15,8 @@\n \n struct cnxk_ml_dev;\n struct cnxk_ml_model;\n+struct cnxk_ml_qp;\n+struct cnxk_ml_req;\n \n /* Model state */\n enum cnxk_ml_model_state {\n@@ -70,6 +72,12 @@ struct cnxk_ml_layer {\n \tstruct cn10k_ml_layer_data glow;\n };\n \n+typedef bool (*enqueue_single_t)(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op,\n+\t\t\t\t uint16_t layer_id, struct cnxk_ml_qp *qp, uint64_t head);\n+typedef void (*result_update_t)(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request);\n+typedef void (*set_error_code_t)(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype);\n+typedef void (*set_poll_addr_t)(struct cnxk_ml_req *req);\n+\n /* Model Object */\n struct cnxk_ml_model {\n \t/* Device reference */\n@@ -106,6 +114,12 @@ struct cnxk_ml_model {\n \n \t/* Spinlock, used to update model state */\n \tplt_spinlock_t lock;\n+\n+\t/* Fast-path functions */\n+\tenqueue_single_t enqueue_single;\n+\tresult_update_t result_update;\n+\tset_error_code_t set_error_code;\n+\tset_poll_addr_t set_poll_addr;\n };\n \n void cnxk_ml_model_dump(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model, FILE *fp);\ndiff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c\nindex 4f4a41219e..909e9143bf 100644\n--- a/drivers/ml/cnxk/cnxk_ml_ops.c\n+++ b/drivers/ml/cnxk/cnxk_ml_ops.c\n@@ -15,6 +15,18 @@\n /* ML model macros */\n #define CNXK_ML_MODEL_MEMZONE_NAME \"ml_cnxk_model_mz\"\n \n+__rte_hot void\n+cnxk_ml_set_poll_ptr(struct cnxk_ml_req *req)\n+{\n+\tplt_write64(ML_CNXK_POLL_JOB_START, req->status);\n+}\n+\n+__rte_hot uint64_t\n+cnxk_ml_get_poll_ptr(struct cnxk_ml_req *req)\n+{\n+\treturn plt_read64(req->status);\n+}\n+\n static void\n qp_memzone_name_get(char *name, int size, int dev_id, int qp_id)\n {\n@@ -1262,6 +1274,122 @@ cnxk_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_b\n \treturn 0;\n }\n \n+static __rte_always_inline void\n+queue_index_advance(uint64_t *index, uint64_t nb_desc)\n+{\n+\t*index = (*index + 1) % nb_desc;\n+}\n+\n+static __rte_always_inline uint64_t\n+queue_pending_count(uint64_t head, uint64_t tail, uint64_t nb_desc)\n+{\n+\treturn (nb_desc + head - tail) % nb_desc;\n+}\n+\n+static __rte_always_inline uint64_t\n+queue_free_count(uint64_t head, uint64_t tail, uint64_t nb_desc)\n+{\n+\treturn nb_desc - queue_pending_count(head, tail, nb_desc) - 1;\n+}\n+\n+__rte_hot uint16_t\n+cnxk_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,\n+\t\t      uint16_t nb_ops)\n+{\n+\tstruct cnxk_ml_dev *cnxk_mldev;\n+\tstruct cnxk_ml_model *model;\n+\tstruct cnxk_ml_queue *queue;\n+\tstruct cnxk_ml_qp *qp;\n+\tstruct rte_ml_op *op;\n+\n+\tuint16_t layer_id = 0;\n+\tuint16_t count;\n+\tuint64_t head;\n+\n+\tcnxk_mldev = dev->data->dev_private;\n+\tqp = dev->data->queue_pairs[qp_id];\n+\tqueue = &qp->queue;\n+\n+\thead = queue->head;\n+\tnb_ops = PLT_MIN(nb_ops, queue_free_count(head, queue->tail, qp->nb_desc));\n+\tcount = 0;\n+\n+\tif (unlikely(nb_ops == 0))\n+\t\treturn 0;\n+\n+enqueue_req:\n+\top = ops[count];\n+\tmodel = cnxk_mldev->mldev->data->models[op->model_id];\n+\n+\tif (unlikely(!model->enqueue_single(cnxk_mldev, op, layer_id, qp, head)))\n+\t\tgoto jcmdq_full;\n+\n+\tqueue_index_advance(&head, qp->nb_desc);\n+\tcount++;\n+\n+\tif (count < nb_ops)\n+\t\tgoto enqueue_req;\n+\n+jcmdq_full:\n+\tqueue->head = head;\n+\tqp->stats.enqueued_count += count;\n+\n+\treturn count;\n+}\n+\n+__rte_hot uint16_t\n+cnxk_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops,\n+\t\t      uint16_t nb_ops)\n+{\n+\tstruct cnxk_ml_dev *cnxk_mldev;\n+\tstruct cnxk_ml_queue *queue;\n+\tstruct cnxk_ml_model *model;\n+\tstruct cnxk_ml_req *req;\n+\tstruct cnxk_ml_qp *qp;\n+\n+\tuint64_t status;\n+\tuint16_t count;\n+\tuint64_t tail;\n+\n+\tcnxk_mldev = dev->data->dev_private;\n+\tqp = dev->data->queue_pairs[qp_id];\n+\tqueue = &qp->queue;\n+\n+\ttail = queue->tail;\n+\tnb_ops = PLT_MIN(nb_ops, queue_pending_count(queue->head, tail, qp->nb_desc));\n+\tcount = 0;\n+\n+\tif (unlikely(nb_ops == 0))\n+\t\tgoto empty_or_active;\n+\n+dequeue_req:\n+\n+\treq = &queue->reqs[tail];\n+\tmodel = cnxk_mldev->mldev->data->models[req->op->model_id];\n+\n+\tstatus = cnxk_ml_get_poll_ptr(req);\n+\tif (unlikely(status != ML_CNXK_POLL_JOB_FINISH)) {\n+\t\tif (plt_tsc_cycles() < req->timeout)\n+\t\t\tgoto empty_or_active;\n+\t\telse /* Timeout, set indication of driver error */\n+\t\t\tmodel->set_error_code(req, ML_ETYPE_DRIVER, 0);\n+\t}\n+\n+\tmodel->result_update(cnxk_mldev, qp->id, req);\n+\n+\tops[count] = req->op;\n+\tqueue_index_advance(&tail, qp->nb_desc);\n+\tcount++;\n+\n+\tif (count < nb_ops)\n+\t\tgoto dequeue_req;\n+\n+empty_or_active:\n+\tqueue->tail = tail;\n+\n+\treturn count;\n+}\n+\n struct rte_ml_dev_ops cnxk_ml_ops = {\n \t/* Device control ops */\n \t.dev_info_get = cnxk_ml_dev_info_get,\ndiff --git a/drivers/ml/cnxk/cnxk_ml_ops.h b/drivers/ml/cnxk/cnxk_ml_ops.h\nindex d27ca0d0cb..d0c126f34b 100644\n--- a/drivers/ml/cnxk/cnxk_ml_ops.h\n+++ b/drivers/ml/cnxk/cnxk_ml_ops.h\n@@ -65,4 +65,11 @@ extern struct rte_ml_dev_ops cnxk_ml_ops;\n int cnxk_ml_model_unload(struct rte_ml_dev *dev, uint16_t model_id);\n int cnxk_ml_model_stop(struct rte_ml_dev *dev, uint16_t model_id);\n \n+__rte_hot uint16_t cnxk_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id,\n+\t\t\t\t\t struct rte_ml_op **ops, uint16_t nb_ops);\n+__rte_hot uint16_t cnxk_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id,\n+\t\t\t\t\t struct rte_ml_op **ops, uint16_t nb_ops);\n+__rte_hot void cnxk_ml_set_poll_ptr(struct cnxk_ml_req *req);\n+__rte_hot uint64_t cnxk_ml_get_poll_ptr(struct cnxk_ml_req *req);\n+\n #endif /* _CNXK_ML_OPS_H_ */\n",
    "prefixes": [
        "v8",
        "16/34"
    ]
}