get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/132259/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 132259,
    "url": "http://patches.dpdk.org/api/patches/132259/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20231002095859.12187-3-syalavarthi@marvell.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20231002095859.12187-3-syalavarthi@marvell.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20231002095859.12187-3-syalavarthi@marvell.com",
    "date": "2023-10-02T09:58:57",
    "name": "[v4,2/3] mldev: introduce support for IO layout",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "d23ea775cfa88a0eef3cb1e063a9caf618b92ea5",
    "submitter": {
        "id": 2480,
        "url": "http://patches.dpdk.org/api/people/2480/?format=api",
        "name": "Srikanth Yalavarthi",
        "email": "syalavarthi@marvell.com"
    },
    "delegate": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20231002095859.12187-3-syalavarthi@marvell.com/mbox/",
    "series": [
        {
            "id": 29710,
            "url": "http://patches.dpdk.org/api/series/29710/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=29710",
            "date": "2023-10-02T09:58:55",
            "name": "Spec changes to support multi I/O models",
            "version": 4,
            "mbox": "http://patches.dpdk.org/series/29710/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/132259/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/132259/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 0E261426A0;\n\tMon,  2 Oct 2023 11:59:26 +0200 (CEST)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 35F86402EC;\n\tMon,  2 Oct 2023 11:59:15 +0200 (CEST)",
            "from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com\n [67.231.148.174])\n by mails.dpdk.org (Postfix) with ESMTP id 55B7D4003C\n for <dev@dpdk.org>; Mon,  2 Oct 2023 11:59:10 +0200 (CEST)",
            "from pps.filterd (m0045849.ppops.net [127.0.0.1])\n by mx0a-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id\n 391M0Dfp018439 for <dev@dpdk.org>; Mon, 2 Oct 2023 02:59:09 -0700",
            "from dc5-exch01.marvell.com ([199.233.59.181])\n by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 3teh1qvcr0-1\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)\n for <dev@dpdk.org>; Mon, 02 Oct 2023 02:59:09 -0700",
            "from DC5-EXCH02.marvell.com (10.69.176.39) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.48;\n Mon, 2 Oct 2023 02:59:07 -0700",
            "from maili.marvell.com (10.69.176.80) by DC5-EXCH02.marvell.com\n (10.69.176.39) with Microsoft SMTP Server id 15.0.1497.48 via Frontend\n Transport; Mon, 2 Oct 2023 02:59:07 -0700",
            "from ml-host-33.caveonetworks.com (unknown [10.110.143.233])\n by maili.marvell.com (Postfix) with ESMTP id 773D33F7050;\n Mon,  2 Oct 2023 02:59:06 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-transfer-encoding : content-type; s=pfpt0220;\n bh=RAPtPH07ZloQHov67KlDDInc+zhgCzviTv/Pr0HNyJE=;\n b=bZ9kaIL18nBYeFe5CygnolkAeUBeMGQLmU1iSenT8hrRsJ3QegF58zRI2qtAVzqe/nW0\n yFc6Rv5ox3clXeCaLDLChVVUHhnt4Ehw24h2jJj4S8kt4wsTxnWPqJcacB/wYG0IzSSc\n ctPES61hfBPQn0kkhUVZEtu2VpnL/eYEwXbiFJjQHiPu8iFQhY96ezGCEK9kdRqyuOGN\n 6MRotH8PcH4oJ/HGfecOjjTGhOlB9ihI22xMCZtOolir8PPallQAAiqLDzFwxJIpO10j\n B1dBeHQCZW9cowDsPfEStztdRhJ6f8psQSbRNS9Ka97sNxlPiRfK85GfrxgGOvrVtdH3 QA==",
        "From": "Srikanth Yalavarthi <syalavarthi@marvell.com>",
        "To": "Srikanth Yalavarthi <syalavarthi@marvell.com>",
        "CC": "<dev@dpdk.org>, <sshankarnara@marvell.com>, <aprabhu@marvell.com>,\n <ptakkar@marvell.com>",
        "Subject": "[PATCH v4 2/3] mldev: introduce support for IO layout",
        "Date": "Mon, 2 Oct 2023 02:58:57 -0700",
        "Message-ID": "<20231002095859.12187-3-syalavarthi@marvell.com>",
        "X-Mailer": "git-send-email 2.41.0",
        "In-Reply-To": "<20231002095859.12187-1-syalavarthi@marvell.com>",
        "References": "<20230830155303.30380-1-syalavarthi@marvell.com>\n <20231002095859.12187-1-syalavarthi@marvell.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Proofpoint-GUID": "zTNd4uuQvnY9y7GLDuSitGzDWSL-rle8",
        "X-Proofpoint-ORIG-GUID": "zTNd4uuQvnY9y7GLDuSitGzDWSL-rle8",
        "X-Proofpoint-Virus-Version": "vendor=baseguard\n engine=ICAP:2.0.267,Aquarius:18.0.980,Hydra:6.0.619,FMLib:17.11.176.26\n definitions=2023-10-02_03,2023-09-28_03,2023-05-22_02",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Introduce IO layout in ML device specification. IO layout\ndefines the expected arrangement of model input and output\nbuffers in the memory. Packed and Split layout support is\nadded in the specification.\n\nUpdated rte_ml_op to support array of rte_ml_buff_seg\npointers to support packed and split I/O layouts. Updated\nML quantize and dequantize APIs to support rte_ml_buff_seg\npointer arrays. Replaced batch_size with min_batches and\nmax_batches in rte_ml_model_info.\n\nImplement support for model IO layout in ml/cnxk driver.\nUpdated the ML test application to support IO layout and\ndropped support for '--batches' in test application.\n\nSigned-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>\n---\n app/test-mldev/ml_options.c            |  16 --\n app/test-mldev/ml_options.h            |   2 -\n app/test-mldev/test_inference_common.c | 327 +++++++++++++++++++++----\n app/test-mldev/test_inference_common.h |   6 +\n app/test-mldev/test_model_common.c     |   6 -\n app/test-mldev/test_model_common.h     |   1 -\n doc/guides/rel_notes/release_23_11.rst |  10 +\n doc/guides/tools/testmldev.rst         |   6 -\n drivers/ml/cnxk/cn10k_ml_dev.h         |   3 +\n drivers/ml/cnxk/cn10k_ml_model.c       |   6 +-\n drivers/ml/cnxk/cn10k_ml_ops.c         |  74 +++---\n lib/mldev/meson.build                  |   2 +-\n lib/mldev/rte_mldev.c                  |  12 +-\n lib/mldev/rte_mldev.h                  |  90 +++++--\n lib/mldev/rte_mldev_core.h             |  14 +-\n 15 files changed, 428 insertions(+), 147 deletions(-)",
    "diff": "diff --git a/app/test-mldev/ml_options.c b/app/test-mldev/ml_options.c\nindex d068b30df5..eeaffec399 100644\n--- a/app/test-mldev/ml_options.c\n+++ b/app/test-mldev/ml_options.c\n@@ -28,7 +28,6 @@ ml_options_default(struct ml_options *opt)\n \topt->burst_size = 1;\n \topt->queue_pairs = 1;\n \topt->queue_size = 1;\n-\topt->batches = 0;\n \topt->tolerance = 0.0;\n \topt->stats = false;\n \topt->debug = false;\n@@ -213,18 +212,6 @@ ml_parse_queue_size(struct ml_options *opt, const char *arg)\n \treturn ret;\n }\n \n-static int\n-ml_parse_batches(struct ml_options *opt, const char *arg)\n-{\n-\tint ret;\n-\n-\tret = parser_read_uint16(&opt->batches, arg);\n-\tif (ret != 0)\n-\t\tml_err(\"Invalid option, batches = %s\\n\", arg);\n-\n-\treturn ret;\n-}\n-\n static int\n ml_parse_tolerance(struct ml_options *opt, const char *arg)\n {\n@@ -255,7 +242,6 @@ ml_dump_test_options(const char *testname)\n \t\t       \"\\t\\t--burst_size       : inference burst size\\n\"\n \t\t       \"\\t\\t--queue_pairs      : number of queue pairs to create\\n\"\n \t\t       \"\\t\\t--queue_size       : size of queue-pair\\n\"\n-\t\t       \"\\t\\t--batches          : number of batches of input\\n\"\n \t\t       \"\\t\\t--tolerance        : maximum tolerance (%%) for output validation\\n\"\n \t\t       \"\\t\\t--stats            : enable reporting device and model statistics\\n\");\n \t\tprintf(\"\\n\");\n@@ -287,7 +273,6 @@ static struct option lgopts[] = {\n \t{ML_BURST_SIZE, 1, 0, 0},\n \t{ML_QUEUE_PAIRS, 1, 0, 0},\n \t{ML_QUEUE_SIZE, 1, 0, 0},\n-\t{ML_BATCHES, 1, 0, 0},\n \t{ML_TOLERANCE, 1, 0, 0},\n \t{ML_STATS, 0, 0, 0},\n \t{ML_DEBUG, 0, 0, 0},\n@@ -309,7 +294,6 @@ ml_opts_parse_long(int opt_idx, struct ml_options *opt)\n \t\t{ML_BURST_SIZE, ml_parse_burst_size},\n \t\t{ML_QUEUE_PAIRS, ml_parse_queue_pairs},\n \t\t{ML_QUEUE_SIZE, ml_parse_queue_size},\n-\t\t{ML_BATCHES, ml_parse_batches},\n \t\t{ML_TOLERANCE, ml_parse_tolerance},\n \t};\n \ndiff --git a/app/test-mldev/ml_options.h b/app/test-mldev/ml_options.h\nindex 622a4c05fc..90e22adeac 100644\n--- a/app/test-mldev/ml_options.h\n+++ b/app/test-mldev/ml_options.h\n@@ -21,7 +21,6 @@\n #define ML_BURST_SIZE  (\"burst_size\")\n #define ML_QUEUE_PAIRS (\"queue_pairs\")\n #define ML_QUEUE_SIZE  (\"queue_size\")\n-#define ML_BATCHES     (\"batches\")\n #define ML_TOLERANCE   (\"tolerance\")\n #define ML_STATS       (\"stats\")\n #define ML_DEBUG       (\"debug\")\n@@ -44,7 +43,6 @@ struct ml_options {\n \tuint16_t burst_size;\n \tuint16_t queue_pairs;\n \tuint16_t queue_size;\n-\tuint16_t batches;\n \tfloat tolerance;\n \tbool stats;\n \tbool debug;\ndiff --git a/app/test-mldev/test_inference_common.c b/app/test-mldev/test_inference_common.c\nindex b40519b5e3..846f71abb1 100644\n--- a/app/test-mldev/test_inference_common.c\n+++ b/app/test-mldev/test_inference_common.c\n@@ -47,7 +47,10 @@ ml_enqueue_single(void *arg)\n \tuint64_t start_cycle;\n \tuint32_t burst_enq;\n \tuint32_t lcore_id;\n+\tuint64_t offset;\n+\tuint64_t bufsz;\n \tuint16_t fid;\n+\tuint32_t i;\n \tint ret;\n \n \tlcore_id = rte_lcore_id();\n@@ -66,24 +69,64 @@ ml_enqueue_single(void *arg)\n \tif (ret != 0)\n \t\tgoto next_model;\n \n-retry:\n+retry_req:\n \tret = rte_mempool_get(t->model[fid].io_pool, (void **)&req);\n \tif (ret != 0)\n-\t\tgoto retry;\n+\t\tgoto retry_req;\n+\n+retry_inp_segs:\n+\tret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->inp_buf_segs,\n+\t\t\t\t   t->model[fid].info.nb_inputs);\n+\tif (ret != 0)\n+\t\tgoto retry_inp_segs;\n+\n+retry_out_segs:\n+\tret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->out_buf_segs,\n+\t\t\t\t   t->model[fid].info.nb_outputs);\n+\tif (ret != 0)\n+\t\tgoto retry_out_segs;\n \n \top->model_id = t->model[fid].id;\n-\top->nb_batches = t->model[fid].nb_batches;\n+\top->nb_batches = t->model[fid].info.min_batches;\n \top->mempool = t->op_pool;\n+\top->input = req->inp_buf_segs;\n+\top->output = req->out_buf_segs;\n+\top->user_ptr = req;\n \n-\top->input.addr = req->input;\n-\top->input.length = t->model[fid].inp_qsize;\n-\top->input.next = NULL;\n+\tif (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {\n+\t\top->input[0]->addr = req->input;\n+\t\top->input[0]->iova_addr = rte_mem_virt2iova(req->input);\n+\t\top->input[0]->length = t->model[fid].inp_qsize;\n+\t\top->input[0]->next = NULL;\n+\n+\t\top->output[0]->addr = req->output;\n+\t\top->output[0]->iova_addr = rte_mem_virt2iova(req->output);\n+\t\top->output[0]->length = t->model[fid].out_qsize;\n+\t\top->output[0]->next = NULL;\n+\t} else {\n+\t\toffset = 0;\n+\t\tfor (i = 0; i < t->model[fid].info.nb_inputs; i++) {\n+\t\t\tbufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size,\n+\t\t\t\t\t       t->cmn.dev_info.align_size);\n+\t\t\top->input[i]->addr = req->input + offset;\n+\t\t\top->input[i]->iova_addr = rte_mem_virt2iova(req->input + offset);\n+\t\t\top->input[i]->length = bufsz;\n+\t\t\top->input[i]->next = NULL;\n+\t\t\toffset += bufsz;\n+\t\t}\n \n-\top->output.addr = req->output;\n-\top->output.length = t->model[fid].out_qsize;\n-\top->output.next = NULL;\n+\t\toffset = 0;\n+\t\tfor (i = 0; i < t->model[fid].info.nb_outputs; i++) {\n+\t\t\tbufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size,\n+\t\t\t\t\t       t->cmn.dev_info.align_size);\n+\t\t\top->output[i]->addr = req->output + offset;\n+\t\t\top->output[i]->iova_addr = rte_mem_virt2iova(req->output + offset);\n+\t\t\top->output[i]->length = bufsz;\n+\t\t\top->output[i]->next = NULL;\n+\t\t\toffset += bufsz;\n+\t\t}\n+\t}\n \n-\top->user_ptr = req;\n \treq->niters++;\n \treq->fid = fid;\n \n@@ -143,6 +186,10 @@ ml_dequeue_single(void *arg)\n \t\t}\n \t\treq = (struct ml_request *)op->user_ptr;\n \t\trte_mempool_put(t->model[req->fid].io_pool, req);\n+\t\trte_mempool_put_bulk(t->buf_seg_pool, (void **)op->input,\n+\t\t\t\t     t->model[req->fid].info.nb_inputs);\n+\t\trte_mempool_put_bulk(t->buf_seg_pool, (void **)op->output,\n+\t\t\t\t     t->model[req->fid].info.nb_outputs);\n \t\trte_mempool_put(t->op_pool, op);\n \t}\n \n@@ -164,9 +211,12 @@ ml_enqueue_burst(void *arg)\n \tuint16_t burst_enq;\n \tuint32_t lcore_id;\n \tuint16_t pending;\n+\tuint64_t offset;\n+\tuint64_t bufsz;\n \tuint16_t idx;\n \tuint16_t fid;\n \tuint16_t i;\n+\tuint16_t j;\n \tint ret;\n \n \tlcore_id = rte_lcore_id();\n@@ -186,25 +236,70 @@ ml_enqueue_burst(void *arg)\n \tif (ret != 0)\n \t\tgoto next_model;\n \n-retry:\n+retry_reqs:\n \tret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count);\n \tif (ret != 0)\n-\t\tgoto retry;\n+\t\tgoto retry_reqs;\n \n \tfor (i = 0; i < ops_count; i++) {\n+retry_inp_segs:\n+\t\tret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->inp_buf_segs,\n+\t\t\t\t\t   t->model[fid].info.nb_inputs);\n+\t\tif (ret != 0)\n+\t\t\tgoto retry_inp_segs;\n+\n+retry_out_segs:\n+\t\tret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->out_buf_segs,\n+\t\t\t\t\t   t->model[fid].info.nb_outputs);\n+\t\tif (ret != 0)\n+\t\t\tgoto retry_out_segs;\n+\n \t\targs->enq_ops[i]->model_id = t->model[fid].id;\n-\t\targs->enq_ops[i]->nb_batches = t->model[fid].nb_batches;\n+\t\targs->enq_ops[i]->nb_batches = t->model[fid].info.min_batches;\n \t\targs->enq_ops[i]->mempool = t->op_pool;\n+\t\targs->enq_ops[i]->input = args->reqs[i]->inp_buf_segs;\n+\t\targs->enq_ops[i]->output = args->reqs[i]->out_buf_segs;\n+\t\targs->enq_ops[i]->user_ptr = args->reqs[i];\n \n-\t\targs->enq_ops[i]->input.addr = args->reqs[i]->input;\n-\t\targs->enq_ops[i]->input.length = t->model[fid].inp_qsize;\n-\t\targs->enq_ops[i]->input.next = NULL;\n+\t\tif (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {\n+\t\t\targs->enq_ops[i]->input[0]->addr = args->reqs[i]->input;\n+\t\t\targs->enq_ops[i]->input[0]->iova_addr =\n+\t\t\t\trte_mem_virt2iova(args->reqs[i]->input);\n+\t\t\targs->enq_ops[i]->input[0]->length = t->model[fid].inp_qsize;\n+\t\t\targs->enq_ops[i]->input[0]->next = NULL;\n+\n+\t\t\targs->enq_ops[i]->output[0]->addr = args->reqs[i]->output;\n+\t\t\targs->enq_ops[i]->output[0]->iova_addr =\n+\t\t\t\trte_mem_virt2iova(args->reqs[i]->output);\n+\t\t\targs->enq_ops[i]->output[0]->length = t->model[fid].out_qsize;\n+\t\t\targs->enq_ops[i]->output[0]->next = NULL;\n+\t\t} else {\n+\t\t\toffset = 0;\n+\t\t\tfor (j = 0; j < t->model[fid].info.nb_inputs; j++) {\n+\t\t\t\tbufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size,\n+\t\t\t\t\t\t       t->cmn.dev_info.align_size);\n+\n+\t\t\t\targs->enq_ops[i]->input[j]->addr = args->reqs[i]->input + offset;\n+\t\t\t\targs->enq_ops[i]->input[j]->iova_addr =\n+\t\t\t\t\trte_mem_virt2iova(args->reqs[i]->input + offset);\n+\t\t\t\targs->enq_ops[i]->input[j]->length = t->model[fid].inp_qsize;\n+\t\t\t\targs->enq_ops[i]->input[j]->next = NULL;\n+\t\t\t\toffset += bufsz;\n+\t\t\t}\n \n-\t\targs->enq_ops[i]->output.addr = args->reqs[i]->output;\n-\t\targs->enq_ops[i]->output.length = t->model[fid].out_qsize;\n-\t\targs->enq_ops[i]->output.next = NULL;\n+\t\t\toffset = 0;\n+\t\t\tfor (j = 0; j < t->model[fid].info.nb_outputs; j++) {\n+\t\t\t\tbufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size,\n+\t\t\t\t\t\t       t->cmn.dev_info.align_size);\n+\t\t\t\targs->enq_ops[i]->output[j]->addr = args->reqs[i]->output + offset;\n+\t\t\t\targs->enq_ops[i]->output[j]->iova_addr =\n+\t\t\t\t\trte_mem_virt2iova(args->reqs[i]->output + offset);\n+\t\t\t\targs->enq_ops[i]->output[j]->length = t->model[fid].out_qsize;\n+\t\t\t\targs->enq_ops[i]->output[j]->next = NULL;\n+\t\t\t\toffset += bufsz;\n+\t\t\t}\n+\t\t}\n \n-\t\targs->enq_ops[i]->user_ptr = args->reqs[i];\n \t\targs->reqs[i]->niters++;\n \t\targs->reqs[i]->fid = fid;\n \t}\n@@ -275,8 +370,15 @@ ml_dequeue_burst(void *arg)\n \t\t\t\tt->error_count[lcore_id]++;\n \t\t\t}\n \t\t\treq = (struct ml_request *)args->deq_ops[i]->user_ptr;\n-\t\t\tif (req != NULL)\n+\t\t\tif (req != NULL) {\n \t\t\t\trte_mempool_put(t->model[req->fid].io_pool, req);\n+\t\t\t\trte_mempool_put_bulk(t->buf_seg_pool,\n+\t\t\t\t\t\t     (void **)args->deq_ops[i]->input,\n+\t\t\t\t\t\t     t->model[req->fid].info.nb_inputs);\n+\t\t\t\trte_mempool_put_bulk(t->buf_seg_pool,\n+\t\t\t\t\t\t     (void **)args->deq_ops[i]->output,\n+\t\t\t\t\t\t     t->model[req->fid].info.nb_outputs);\n+\t\t\t}\n \t\t}\n \t\trte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq);\n \t}\n@@ -315,6 +417,12 @@ test_inference_cap_check(struct ml_options *opt)\n \t\treturn false;\n \t}\n \n+\tif (dev_info.max_io < ML_TEST_MAX_IO_SIZE) {\n+\t\tml_err(\"Insufficient capabilities:  Max I/O, count = %u > (max limit = %u)\",\n+\t\t       ML_TEST_MAX_IO_SIZE, dev_info.max_io);\n+\t\treturn false;\n+\t}\n+\n \treturn true;\n }\n \n@@ -403,11 +511,6 @@ test_inference_opt_dump(struct ml_options *opt)\n \tml_dump(\"tolerance\", \"%-7.3f\", opt->tolerance);\n \tml_dump(\"stats\", \"%s\", (opt->stats ? \"true\" : \"false\"));\n \n-\tif (opt->batches == 0)\n-\t\tml_dump(\"batches\", \"%u (default batch size)\", opt->batches);\n-\telse\n-\t\tml_dump(\"batches\", \"%u\", opt->batches);\n-\n \tml_dump_begin(\"filelist\");\n \tfor (i = 0; i < opt->nb_filelist; i++) {\n \t\tml_dump_list(\"model\", i, opt->filelist[i].model);\n@@ -492,10 +595,18 @@ void\n test_inference_destroy(struct ml_test *test, struct ml_options *opt)\n {\n \tstruct test_inference *t;\n+\tuint32_t lcore_id;\n \n \tRTE_SET_USED(opt);\n \n \tt = ml_test_priv(test);\n+\n+\tfor (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {\n+\t\trte_free(t->args[lcore_id].enq_ops);\n+\t\trte_free(t->args[lcore_id].deq_ops);\n+\t\trte_free(t->args[lcore_id].reqs);\n+\t}\n+\n \trte_free(t);\n }\n \n@@ -572,19 +683,62 @@ ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned\n {\n \tstruct test_inference *t = ml_test_priv((struct ml_test *)opaque);\n \tstruct ml_request *req = (struct ml_request *)obj;\n+\tstruct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE];\n+\tstruct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE];\n+\tstruct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE];\n+\tstruct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE];\n+\tuint64_t offset;\n+\tuint64_t bufsz;\n+\tuint32_t i;\n \n \tRTE_SET_USED(mp);\n \tRTE_SET_USED(obj_idx);\n \n \treq->input = (uint8_t *)obj +\n-\t\t     RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size);\n-\treq->output = req->input +\n-\t\t      RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.min_align_size);\n+\t\t     RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size);\n+\treq->output =\n+\t\treq->input + RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.align_size);\n \treq->niters = 0;\n \n+\tif (t->model[t->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {\n+\t\tdbuff_seg[0].addr = t->model[t->fid].input;\n+\t\tdbuff_seg[0].iova_addr = rte_mem_virt2iova(t->model[t->fid].input);\n+\t\tdbuff_seg[0].length = t->model[t->fid].inp_dsize;\n+\t\tdbuff_seg[0].next = NULL;\n+\t\td_segs[0] = &dbuff_seg[0];\n+\n+\t\tqbuff_seg[0].addr = req->input;\n+\t\tqbuff_seg[0].iova_addr = rte_mem_virt2iova(req->input);\n+\t\tqbuff_seg[0].length = t->model[t->fid].inp_qsize;\n+\t\tqbuff_seg[0].next = NULL;\n+\t\tq_segs[0] = &qbuff_seg[0];\n+\t} else {\n+\t\toffset = 0;\n+\t\tfor (i = 0; i < t->model[t->fid].info.nb_inputs; i++) {\n+\t\t\tbufsz = t->model[t->fid].info.input_info[i].nb_elements * sizeof(float);\n+\t\t\tdbuff_seg[i].addr = t->model[t->fid].input + offset;\n+\t\t\tdbuff_seg[i].iova_addr = rte_mem_virt2iova(t->model[t->fid].input + offset);\n+\t\t\tdbuff_seg[i].length = bufsz;\n+\t\t\tdbuff_seg[i].next = NULL;\n+\t\t\td_segs[i] = &dbuff_seg[i];\n+\t\t\toffset += bufsz;\n+\t\t}\n+\n+\t\toffset = 0;\n+\t\tfor (i = 0; i < t->model[t->fid].info.nb_inputs; i++) {\n+\t\t\tbufsz = RTE_ALIGN_CEIL(t->model[t->fid].info.input_info[i].size,\n+\t\t\t\t\t       t->cmn.dev_info.align_size);\n+\t\t\tqbuff_seg[i].addr = req->input + offset;\n+\t\t\tqbuff_seg[i].iova_addr = rte_mem_virt2iova(req->input + offset);\n+\t\t\tqbuff_seg[i].length = bufsz;\n+\t\t\tqbuff_seg[i].next = NULL;\n+\t\t\tq_segs[i] = &qbuff_seg[i];\n+\t\t\toffset += bufsz;\n+\t\t}\n+\t}\n+\n \t/* quantize data */\n-\trte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, t->model[t->fid].nb_batches,\n-\t\t\t   t->model[t->fid].input, req->input);\n+\trte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, d_segs, q_segs);\n }\n \n int\n@@ -599,24 +753,39 @@ ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t\n \tuint32_t buff_size;\n \tuint32_t mz_size;\n \tsize_t fsize;\n+\tuint32_t i;\n \tint ret;\n \n \t/* get input buffer size */\n-\tret = rte_ml_io_input_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches,\n-\t\t\t\t       &t->model[fid].inp_qsize, &t->model[fid].inp_dsize);\n-\tif (ret != 0) {\n-\t\tml_err(\"Failed to get input size, model : %s\\n\", opt->filelist[fid].model);\n-\t\treturn ret;\n+\tt->model[fid].inp_qsize = 0;\n+\tfor (i = 0; i < t->model[fid].info.nb_inputs; i++) {\n+\t\tif (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED)\n+\t\t\tt->model[fid].inp_qsize += t->model[fid].info.input_info[i].size;\n+\t\telse\n+\t\t\tt->model[fid].inp_qsize += RTE_ALIGN_CEIL(\n+\t\t\t\tt->model[fid].info.input_info[i].size, t->cmn.dev_info.align_size);\n \t}\n \n \t/* get output buffer size */\n-\tret = rte_ml_io_output_size_get(opt->dev_id, t->model[fid].id, t->model[fid].nb_batches,\n-\t\t\t\t\t&t->model[fid].out_qsize, &t->model[fid].out_dsize);\n-\tif (ret != 0) {\n-\t\tml_err(\"Failed to get input size, model : %s\\n\", opt->filelist[fid].model);\n-\t\treturn ret;\n+\tt->model[fid].out_qsize = 0;\n+\tfor (i = 0; i < t->model[fid].info.nb_outputs; i++) {\n+\t\tif (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED)\n+\t\t\tt->model[fid].out_qsize += t->model[fid].info.output_info[i].size;\n+\t\telse\n+\t\t\tt->model[fid].out_qsize += RTE_ALIGN_CEIL(\n+\t\t\t\tt->model[fid].info.output_info[i].size, t->cmn.dev_info.align_size);\n \t}\n \n+\tt->model[fid].inp_dsize = 0;\n+\tfor (i = 0; i < t->model[fid].info.nb_inputs; i++)\n+\t\tt->model[fid].inp_dsize +=\n+\t\t\tt->model[fid].info.input_info[i].nb_elements * sizeof(float);\n+\n+\tt->model[fid].out_dsize = 0;\n+\tfor (i = 0; i < t->model[fid].info.nb_outputs; i++)\n+\t\tt->model[fid].out_dsize +=\n+\t\t\tt->model[fid].info.output_info[i].nb_elements * sizeof(float);\n+\n \t/* allocate buffer for user data */\n \tmz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize;\n \tif (strcmp(opt->filelist[fid].reference, \"\\0\") != 0)\n@@ -675,9 +844,9 @@ ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t\n \t/* create mempool for quantized input and output buffers. ml_request_initialize is\n \t * used as a callback for object creation.\n \t */\n-\tbuff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.min_align_size) +\n-\t\t    RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.min_align_size) +\n-\t\t    RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.min_align_size);\n+\tbuff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size) +\n+\t\t    RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.align_size) +\n+\t\t    RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.align_size);\n \tnb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions);\n \n \tt->fid = fid;\n@@ -740,6 +909,18 @@ ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt)\n \t\treturn -ENOMEM;\n \t}\n \n+\t/* create buf_segs pool of with element of uint8_t. external buffers are attached to the\n+\t * buf_segs while queuing inference requests.\n+\t */\n+\tt->buf_seg_pool = rte_mempool_create(\"ml_test_mbuf_pool\", ML_TEST_MAX_POOL_SIZE * 2,\n+\t\t\t\t\t     sizeof(struct rte_ml_buff_seg), 0, 0, NULL, NULL, NULL,\n+\t\t\t\t\t     NULL, opt->socket_id, 0);\n+\tif (t->buf_seg_pool == NULL) {\n+\t\tml_err(\"Failed to create buf_segs pool : %s\\n\", \"ml_test_mbuf_pool\");\n+\t\trte_ml_op_pool_free(t->op_pool);\n+\t\treturn -ENOMEM;\n+\t}\n+\n \treturn 0;\n }\n \n@@ -752,6 +933,9 @@ ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt)\n \n \t/* release op pool */\n \trte_mempool_free(t->op_pool);\n+\n+\t/* release buf_segs pool */\n+\trte_mempool_free(t->buf_seg_pool);\n }\n \n static bool\n@@ -781,8 +965,10 @@ ml_inference_validation(struct ml_test *test, struct ml_request *req)\n \t\tj = 0;\n next_element:\n \t\tmatch = false;\n-\t\tdeviation =\n-\t\t\t(*reference == 0 ? 0 : 100 * fabs(*output - *reference) / fabs(*reference));\n+\t\tif ((*reference == 0) && (*output == 0))\n+\t\t\tdeviation = 0;\n+\t\telse\n+\t\t\tdeviation = 100 * fabs(*output - *reference) / fabs(*reference);\n \t\tif (deviation <= t->cmn.opt->tolerance)\n \t\t\tmatch = true;\n \t\telse\n@@ -817,14 +1003,59 @@ ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int\n \tbool error = false;\n \tchar *dump_path;\n \n+\tstruct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE];\n+\tstruct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE];\n+\tstruct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE];\n+\tstruct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE];\n+\tuint64_t offset;\n+\tuint64_t bufsz;\n+\tuint32_t i;\n+\n \tRTE_SET_USED(mp);\n \n \tif (req->niters == 0)\n \t\treturn;\n \n \tt->nb_used++;\n-\trte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, t->model[req->fid].nb_batches,\n-\t\t\t     req->output, model->output);\n+\n+\tif (t->model[req->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {\n+\t\tqbuff_seg[0].addr = req->output;\n+\t\tqbuff_seg[0].iova_addr = rte_mem_virt2iova(req->output);\n+\t\tqbuff_seg[0].length = t->model[req->fid].out_qsize;\n+\t\tqbuff_seg[0].next = NULL;\n+\t\tq_segs[0] = &qbuff_seg[0];\n+\n+\t\tdbuff_seg[0].addr = model->output;\n+\t\tdbuff_seg[0].iova_addr = rte_mem_virt2iova(model->output);\n+\t\tdbuff_seg[0].length = t->model[req->fid].out_dsize;\n+\t\tdbuff_seg[0].next = NULL;\n+\t\td_segs[0] = &dbuff_seg[0];\n+\t} else {\n+\t\toffset = 0;\n+\t\tfor (i = 0; i < t->model[req->fid].info.nb_outputs; i++) {\n+\t\t\tbufsz = RTE_ALIGN_CEIL(t->model[req->fid].info.output_info[i].size,\n+\t\t\t\t\t       t->cmn.dev_info.align_size);\n+\t\t\tqbuff_seg[i].addr = req->output + offset;\n+\t\t\tqbuff_seg[i].iova_addr = rte_mem_virt2iova(req->output + offset);\n+\t\t\tqbuff_seg[i].length = bufsz;\n+\t\t\tqbuff_seg[i].next = NULL;\n+\t\t\tq_segs[i] = &qbuff_seg[i];\n+\t\t\toffset += bufsz;\n+\t\t}\n+\n+\t\toffset = 0;\n+\t\tfor (i = 0; i < t->model[req->fid].info.nb_outputs; i++) {\n+\t\t\tbufsz = t->model[req->fid].info.output_info[i].nb_elements * sizeof(float);\n+\t\t\tdbuff_seg[i].addr = model->output + offset;\n+\t\t\tdbuff_seg[i].iova_addr = rte_mem_virt2iova(model->output + offset);\n+\t\t\tdbuff_seg[i].length = bufsz;\n+\t\t\tdbuff_seg[i].next = NULL;\n+\t\t\td_segs[i] = &dbuff_seg[i];\n+\t\t\toffset += bufsz;\n+\t\t}\n+\t}\n+\n+\trte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, q_segs, d_segs);\n \n \tif (model->reference == NULL)\n \t\tgoto dump_output_pass;\ndiff --git a/app/test-mldev/test_inference_common.h b/app/test-mldev/test_inference_common.h\nindex 8f27af25e4..3f4ba3219b 100644\n--- a/app/test-mldev/test_inference_common.h\n+++ b/app/test-mldev/test_inference_common.h\n@@ -11,11 +11,16 @@\n \n #include \"test_model_common.h\"\n \n+#define ML_TEST_MAX_IO_SIZE 32\n+\n struct ml_request {\n \tuint8_t *input;\n \tuint8_t *output;\n \tuint16_t fid;\n \tuint64_t niters;\n+\n+\tstruct rte_ml_buff_seg *inp_buf_segs[ML_TEST_MAX_IO_SIZE];\n+\tstruct rte_ml_buff_seg *out_buf_segs[ML_TEST_MAX_IO_SIZE];\n };\n \n struct ml_core_args {\n@@ -38,6 +43,7 @@ struct test_inference {\n \n \t/* test specific data */\n \tstruct ml_model model[ML_TEST_MAX_MODELS];\n+\tstruct rte_mempool *buf_seg_pool;\n \tstruct rte_mempool *op_pool;\n \n \tuint64_t nb_used;\ndiff --git a/app/test-mldev/test_model_common.c b/app/test-mldev/test_model_common.c\nindex 8dbb0ff89f..c517a50611 100644\n--- a/app/test-mldev/test_model_common.c\n+++ b/app/test-mldev/test_model_common.c\n@@ -50,12 +50,6 @@ ml_model_load(struct ml_test *test, struct ml_options *opt, struct ml_model *mod\n \t\treturn ret;\n \t}\n \n-\t/* Update number of batches */\n-\tif (opt->batches == 0)\n-\t\tmodel->nb_batches = model->info.batch_size;\n-\telse\n-\t\tmodel->nb_batches = opt->batches;\n-\n \tmodel->state = MODEL_LOADED;\n \n \treturn 0;\ndiff --git a/app/test-mldev/test_model_common.h b/app/test-mldev/test_model_common.h\nindex c1021ef1b6..a207e54ab7 100644\n--- a/app/test-mldev/test_model_common.h\n+++ b/app/test-mldev/test_model_common.h\n@@ -31,7 +31,6 @@ struct ml_model {\n \tuint8_t *reference;\n \n \tstruct rte_mempool *io_pool;\n-\tuint32_t nb_batches;\n };\n \n int ml_model_load(struct ml_test *test, struct ml_options *opt, struct ml_model *model,\ndiff --git a/doc/guides/rel_notes/release_23_11.rst b/doc/guides/rel_notes/release_23_11.rst\nindex e553554b3a..8562bac77c 100644\n--- a/doc/guides/rel_notes/release_23_11.rst\n+++ b/doc/guides/rel_notes/release_23_11.rst\n@@ -41,6 +41,11 @@ DPDK Release 23.11\n New Features\n ------------\n \n+   * **Added support for models with multiple I/O in mldev library.**\n+\n+     Added support in mldev library for models with multiple inputs and outputs.\n+\n+\n .. This section should contain new features added in this release.\n    Sample format:\n \n@@ -122,6 +127,11 @@ API Changes\n * mldev: Updated mldev API to support models with multiple inputs and outputs\n   Updated the structure ``rte_ml_model_info`` to support input and output with\n   arbitrary shapes.\n+  Added support for ``rte_ml_io_layout``. Two layout types split and packed are\n+  supported by the specification, which enables higher control in handling models\n+  with multiple inputs and outputs. Updated ``rte_ml_op``, ``rte_ml_io_quantize``\n+  and ``rte_ml_io_dequantize`` to support an array of ``rte_ml_buff_seg`` for\n+  inputs and outputs and removed use of batches argument.\n \n ABI Changes\n -----------\ndiff --git a/doc/guides/tools/testmldev.rst b/doc/guides/tools/testmldev.rst\nindex 741abd722e..9b1565a457 100644\n--- a/doc/guides/tools/testmldev.rst\n+++ b/doc/guides/tools/testmldev.rst\n@@ -106,11 +106,6 @@ The following are the command-line options supported by the test application.\n   Queue size would translate into ``rte_ml_dev_qp_conf::nb_desc`` field during queue-pair creation.\n   Default value is ``1``.\n \n-``--batches <n>``\n-  Set the number batches in the input file provided for inference run.\n-  When not specified, the test would assume the number of batches\n-  is the batch size of the model.\n-\n ``--tolerance <n>``\n   Set the tolerance value in percentage to be used for output validation.\n   Default value is ``0``.\n@@ -282,7 +277,6 @@ Supported command line options for inference tests are following::\n    --burst_size\n    --queue_pairs\n    --queue_size\n-   --batches\n    --tolerance\n    --stats\n \ndiff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h\nindex 6ca0b0bb6e..c73bf7d001 100644\n--- a/drivers/ml/cnxk/cn10k_ml_dev.h\n+++ b/drivers/ml/cnxk/cn10k_ml_dev.h\n@@ -30,6 +30,9 @@\n /* Maximum number of descriptors per queue-pair */\n #define ML_CN10K_MAX_DESC_PER_QP 1024\n \n+/* Maximum number of inputs / outputs per model */\n+#define ML_CN10K_MAX_INPUT_OUTPUT 32\n+\n /* Maximum number of segments for IO data */\n #define ML_CN10K_MAX_SEGMENTS 1\n \ndiff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c\nindex 26df8d9ff9..e0b750cd8e 100644\n--- a/drivers/ml/cnxk/cn10k_ml_model.c\n+++ b/drivers/ml/cnxk/cn10k_ml_model.c\n@@ -520,9 +520,11 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)\n \tstruct rte_ml_model_info *info;\n \tstruct rte_ml_io_info *output;\n \tstruct rte_ml_io_info *input;\n+\tstruct cn10k_ml_dev *mldev;\n \tuint8_t i;\n \tuint8_t j;\n \n+\tmldev = dev->data->dev_private;\n \tmetadata = &model->metadata;\n \tinfo = PLT_PTR_CAST(model->info);\n \tinput = PLT_PTR_ADD(info, sizeof(struct rte_ml_model_info));\n@@ -537,7 +539,9 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)\n \t\t metadata->model.version[3]);\n \tinfo->model_id = model->model_id;\n \tinfo->device_id = dev->data->dev_id;\n-\tinfo->batch_size = model->batch_size;\n+\tinfo->io_layout = RTE_ML_IO_LAYOUT_PACKED;\n+\tinfo->min_batches = model->batch_size;\n+\tinfo->max_batches = mldev->fw.req->jd.fw_load.cap.s.max_num_batches / model->batch_size;\n \tinfo->nb_inputs = metadata->model.num_input;\n \tinfo->input_info = input;\n \tinfo->nb_outputs = metadata->model.num_output;\ndiff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c\nindex e3faab81ba..1d72fb52a6 100644\n--- a/drivers/ml/cnxk/cn10k_ml_ops.c\n+++ b/drivers/ml/cnxk/cn10k_ml_ops.c\n@@ -471,9 +471,9 @@ cn10k_ml_prep_fp_job_descriptor(struct rte_ml_dev *dev, struct cn10k_ml_req *req\n \treq->jd.hdr.sp_flags = 0x0;\n \treq->jd.hdr.result = roc_ml_addr_ap2mlip(&mldev->roc, &req->result);\n \treq->jd.model_run.input_ddr_addr =\n-\t\tPLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, op->input.addr));\n+\t\tPLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, op->input[0]->addr));\n \treq->jd.model_run.output_ddr_addr =\n-\t\tPLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, op->output.addr));\n+\t\tPLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, op->output[0]->addr));\n \treq->jd.model_run.num_batches = op->nb_batches;\n }\n \n@@ -856,7 +856,11 @@ cn10k_ml_model_xstats_reset(struct rte_ml_dev *dev, int32_t model_id, const uint\n static int\n cn10k_ml_cache_model_data(struct rte_ml_dev *dev, uint16_t model_id)\n {\n+\tstruct rte_ml_model_info *info;\n \tstruct cn10k_ml_model *model;\n+\tstruct rte_ml_buff_seg seg[2];\n+\tstruct rte_ml_buff_seg *inp;\n+\tstruct rte_ml_buff_seg *out;\n \tstruct rte_ml_op op;\n \n \tchar str[RTE_MEMZONE_NAMESIZE];\n@@ -864,12 +868,22 @@ cn10k_ml_cache_model_data(struct rte_ml_dev *dev, uint16_t model_id)\n \tuint64_t isize = 0;\n \tuint64_t osize = 0;\n \tint ret = 0;\n+\tuint32_t i;\n \n \tmodel = dev->data->models[model_id];\n+\tinfo = (struct rte_ml_model_info *)model->info;\n+\tinp = &seg[0];\n+\tout = &seg[1];\n \n \t/* Create input and output buffers. */\n-\trte_ml_io_input_size_get(dev->data->dev_id, model_id, model->batch_size, &isize, NULL);\n-\trte_ml_io_output_size_get(dev->data->dev_id, model_id, model->batch_size, &osize, NULL);\n+\tfor (i = 0; i < info->nb_inputs; i++)\n+\t\tisize += info->input_info[i].size;\n+\n+\tfor (i = 0; i < info->nb_outputs; i++)\n+\t\tosize += info->output_info[i].size;\n+\n+\tisize = model->batch_size * isize;\n+\tosize = model->batch_size * osize;\n \n \tsnprintf(str, RTE_MEMZONE_NAMESIZE, \"%s_%u\", \"ml_dummy_io\", model_id);\n \tmz = plt_memzone_reserve_aligned(str, isize + osize, 0, ML_CN10K_ALIGN_SIZE);\n@@ -877,17 +891,22 @@ cn10k_ml_cache_model_data(struct rte_ml_dev *dev, uint16_t model_id)\n \t\treturn -ENOMEM;\n \tmemset(mz->addr, 0, isize + osize);\n \n+\tseg[0].addr = mz->addr;\n+\tseg[0].iova_addr = mz->iova;\n+\tseg[0].length = isize;\n+\tseg[0].next = NULL;\n+\n+\tseg[1].addr = PLT_PTR_ADD(mz->addr, isize);\n+\tseg[1].iova_addr = mz->iova + isize;\n+\tseg[1].length = osize;\n+\tseg[1].next = NULL;\n+\n \top.model_id = model_id;\n \top.nb_batches = model->batch_size;\n \top.mempool = NULL;\n \n-\top.input.addr = mz->addr;\n-\top.input.length = isize;\n-\top.input.next = NULL;\n-\n-\top.output.addr = PLT_PTR_ADD(op.input.addr, isize);\n-\top.output.length = osize;\n-\top.output.next = NULL;\n+\top.input = &inp;\n+\top.output = &out;\n \n \tmemset(model->req, 0, sizeof(struct cn10k_ml_req));\n \tret = cn10k_ml_inference_sync(dev, &op);\n@@ -919,8 +938,9 @@ cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)\n \telse if (strcmp(mldev->fw.poll_mem, \"ddr\") == 0)\n \t\tdev_info->max_desc = ML_CN10K_MAX_DESC_PER_QP;\n \n+\tdev_info->max_io = ML_CN10K_MAX_INPUT_OUTPUT;\n \tdev_info->max_segments = ML_CN10K_MAX_SEGMENTS;\n-\tdev_info->min_align_size = ML_CN10K_ALIGN_SIZE;\n+\tdev_info->align_size = ML_CN10K_ALIGN_SIZE;\n \n \treturn 0;\n }\n@@ -2139,15 +2159,14 @@ cn10k_ml_io_output_size_get(struct rte_ml_dev *dev, uint16_t model_id, uint32_t\n }\n \n static int\n-cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches, void *dbuffer,\n-\t\t     void *qbuffer)\n+cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_buff_seg **dbuffer,\n+\t\t     struct rte_ml_buff_seg **qbuffer)\n {\n \tstruct cn10k_ml_model *model;\n \tuint8_t model_input_type;\n \tuint8_t *lcl_dbuffer;\n \tuint8_t *lcl_qbuffer;\n \tuint8_t input_type;\n-\tuint32_t batch_id;\n \tfloat qscale;\n \tuint32_t i;\n \tuint32_t j;\n@@ -2160,11 +2179,9 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc\n \t\treturn -EINVAL;\n \t}\n \n-\tlcl_dbuffer = dbuffer;\n-\tlcl_qbuffer = qbuffer;\n-\tbatch_id = 0;\n+\tlcl_dbuffer = dbuffer[0]->addr;\n+\tlcl_qbuffer = qbuffer[0]->addr;\n \n-next_batch:\n \tfor (i = 0; i < model->metadata.model.num_input; i++) {\n \t\tif (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {\n \t\t\tinput_type = model->metadata.input1[i].input_type;\n@@ -2218,23 +2235,18 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc\n \t\tlcl_qbuffer += model->addr.input[i].sz_q;\n \t}\n \n-\tbatch_id++;\n-\tif (batch_id < PLT_DIV_CEIL(nb_batches, model->batch_size))\n-\t\tgoto next_batch;\n-\n \treturn 0;\n }\n \n static int\n-cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches,\n-\t\t       void *qbuffer, void *dbuffer)\n+cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_buff_seg **qbuffer,\n+\t\t       struct rte_ml_buff_seg **dbuffer)\n {\n \tstruct cn10k_ml_model *model;\n \tuint8_t model_output_type;\n \tuint8_t *lcl_qbuffer;\n \tuint8_t *lcl_dbuffer;\n \tuint8_t output_type;\n-\tuint32_t batch_id;\n \tfloat dscale;\n \tuint32_t i;\n \tuint32_t j;\n@@ -2247,11 +2259,9 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba\n \t\treturn -EINVAL;\n \t}\n \n-\tlcl_dbuffer = dbuffer;\n-\tlcl_qbuffer = qbuffer;\n-\tbatch_id = 0;\n+\tlcl_dbuffer = dbuffer[0]->addr;\n+\tlcl_qbuffer = qbuffer[0]->addr;\n \n-next_batch:\n \tfor (i = 0; i < model->metadata.model.num_output; i++) {\n \t\tif (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {\n \t\t\toutput_type = model->metadata.output1[i].output_type;\n@@ -2306,10 +2316,6 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba\n \t\tlcl_dbuffer += model->addr.output[i].sz_d;\n \t}\n \n-\tbatch_id++;\n-\tif (batch_id < PLT_DIV_CEIL(nb_batches, model->batch_size))\n-\t\tgoto next_batch;\n-\n \treturn 0;\n }\n \ndiff --git a/lib/mldev/meson.build b/lib/mldev/meson.build\nindex 5769b0640a..0079ccd205 100644\n--- a/lib/mldev/meson.build\n+++ b/lib/mldev/meson.build\n@@ -35,7 +35,7 @@ driver_sdk_headers += files(\n         'mldev_utils.h',\n )\n \n-deps += ['mempool']\n+deps += ['mempool', 'mbuf']\n \n if get_option('buildtype').contains('debug')\n         cflags += [ '-DRTE_LIBRTE_ML_DEV_DEBUG' ]\ndiff --git a/lib/mldev/rte_mldev.c b/lib/mldev/rte_mldev.c\nindex 0d8ccd3212..9a48ed3e94 100644\n--- a/lib/mldev/rte_mldev.c\n+++ b/lib/mldev/rte_mldev.c\n@@ -730,8 +730,8 @@ rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches\n }\n \n int\n-rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *dbuffer,\n-\t\t   void *qbuffer)\n+rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **dbuffer,\n+\t\t   struct rte_ml_buff_seg **qbuffer)\n {\n \tstruct rte_ml_dev *dev;\n \n@@ -754,12 +754,12 @@ rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void\n \t\treturn -EINVAL;\n \t}\n \n-\treturn (*dev->dev_ops->io_quantize)(dev, model_id, nb_batches, dbuffer, qbuffer);\n+\treturn (*dev->dev_ops->io_quantize)(dev, model_id, dbuffer, qbuffer);\n }\n \n int\n-rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *qbuffer,\n-\t\t     void *dbuffer)\n+rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **qbuffer,\n+\t\t     struct rte_ml_buff_seg **dbuffer)\n {\n \tstruct rte_ml_dev *dev;\n \n@@ -782,7 +782,7 @@ rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, voi\n \t\treturn -EINVAL;\n \t}\n \n-\treturn (*dev->dev_ops->io_dequantize)(dev, model_id, nb_batches, qbuffer, dbuffer);\n+\treturn (*dev->dev_ops->io_dequantize)(dev, model_id, qbuffer, dbuffer);\n }\n \n /** Initialise rte_ml_op mempool element */\ndiff --git a/lib/mldev/rte_mldev.h b/lib/mldev/rte_mldev.h\nindex 6204df0930..316c6fd018 100644\n--- a/lib/mldev/rte_mldev.h\n+++ b/lib/mldev/rte_mldev.h\n@@ -228,12 +228,14 @@ struct rte_ml_dev_info {\n \t/**< Maximum allowed number of descriptors for queue pair by the device.\n \t * @see struct rte_ml_dev_qp_conf::nb_desc\n \t */\n+\tuint16_t max_io;\n+\t/**< Maximum number of inputs/outputs supported per model. */\n \tuint16_t max_segments;\n \t/**< Maximum number of scatter-gather entries supported by the device.\n \t * @see struct rte_ml_buff_seg  struct rte_ml_buff_seg::next\n \t */\n-\tuint16_t min_align_size;\n-\t/**< Minimum alignment size of IO buffers used by the device. */\n+\tuint16_t align_size;\n+\t/**< Alignment size of IO buffers used by the device. */\n };\n \n /**\n@@ -429,10 +431,28 @@ struct rte_ml_op {\n \t/**< Reserved for future use. */\n \tstruct rte_mempool *mempool;\n \t/**< Pool from which operation is allocated. */\n-\tstruct rte_ml_buff_seg input;\n-\t/**< Input buffer to hold the inference data. */\n-\tstruct rte_ml_buff_seg output;\n-\t/**< Output buffer to hold the inference output by the driver. */\n+\tstruct rte_ml_buff_seg **input;\n+\t/**< Array of buffer segments to hold the inference input data.\n+\t *\n+\t * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of\n+\t * the array is 1.\n+\t *\n+\t * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of\n+\t * the array is rte_ml_model_info::nb_inputs.\n+\t *\n+\t * @see struct rte_ml_dev_info::io_layout\n+\t */\n+\tstruct rte_ml_buff_seg **output;\n+\t/**< Array of buffer segments to hold the inference output data.\n+\t *\n+\t * When the model supports IO layout RTE_ML_IO_LAYOUT_PACKED, size of\n+\t * the array is 1.\n+\t *\n+\t * When the model supports IO layout RTE_ML_IO_LAYOUT_SPLIT, size of\n+\t * the array is rte_ml_model_info::nb_outputs.\n+\t *\n+\t * @see struct rte_ml_dev_info::io_layout\n+\t */\n \tunion {\n \t\tuint64_t user_u64;\n \t\t/**< User data as uint64_t.*/\n@@ -863,7 +883,37 @@ enum rte_ml_io_type {\n \t/**< 16-bit brain floating point number. */\n };\n \n-/** Input and output data information structure\n+/** ML I/O buffer layout */\n+enum rte_ml_io_layout {\n+\tRTE_ML_IO_LAYOUT_PACKED,\n+\t/**< All inputs for the model should packed in a single buffer with\n+\t * no padding between individual inputs. The buffer is expected to\n+\t * be aligned to rte_ml_dev_info::align_size.\n+\t *\n+\t * When I/O segmentation is supported by the device, the packed\n+\t * data can be split into multiple segments. In this case, each\n+\t * segment is expected to be aligned to rte_ml_dev_info::align_size\n+\t *\n+\t * Same applies to output.\n+\t *\n+\t * @see struct rte_ml_dev_info::max_segments\n+\t */\n+\tRTE_ML_IO_LAYOUT_SPLIT\n+\t/**< Each input for the model should be stored as separate buffers\n+\t * and each input should be aligned to rte_ml_dev_info::align_size.\n+\t *\n+\t * When I/O segmentation is supported, each input can be split into\n+\t * multiple segments. In this case, each segment is expected to be\n+\t * aligned to rte_ml_dev_info::align_size\n+\t *\n+\t * Same applies to output.\n+\t *\n+\t * @see struct rte_ml_dev_info::max_segments\n+\t */\n+};\n+\n+/**\n+ * Input and output data information structure\n  *\n  * Specifies the type and shape of input and output data.\n  */\n@@ -873,7 +923,7 @@ struct rte_ml_io_info {\n \tuint32_t nb_dims;\n \t/**< Number of dimensions in shape */\n \tuint32_t *shape;\n-\t/**< Shape of the tensor */\n+\t/**< Shape of the tensor for rte_ml_model_info::min_batches of the model. */\n \tenum rte_ml_io_type type;\n \t/**< Type of data\n \t * @see enum rte_ml_io_type\n@@ -894,8 +944,16 @@ struct rte_ml_model_info {\n \t/**< Model ID */\n \tuint16_t device_id;\n \t/**< Device ID */\n-\tuint16_t batch_size;\n-\t/**< Maximum number of batches that the model can process simultaneously */\n+\tenum rte_ml_io_layout io_layout;\n+\t/**< I/O buffer layout for the model */\n+\tuint16_t min_batches;\n+\t/**< Minimum number of batches that the model can process\n+\t * in one inference request\n+\t */\n+\tuint16_t max_batches;\n+\t/**< Maximum number of batches that the model can process\n+\t * in one inference request\n+\t */\n \tuint32_t nb_inputs;\n \t/**< Number of inputs */\n \tconst struct rte_ml_io_info *input_info;\n@@ -1021,8 +1079,6 @@ rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches\n  *   The identifier of the device.\n  * @param[in] model_id\n  *   Identifier for the model\n- * @param[in] nb_batches\n- *   Number of batches in the dequantized input buffer\n  * @param[in] dbuffer\n  *   Address of dequantized input data\n  * @param[in] qbuffer\n@@ -1034,8 +1090,8 @@ rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches\n  */\n __rte_experimental\n int\n-rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *dbuffer,\n-\t\t   void *qbuffer);\n+rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **dbuffer,\n+\t\t   struct rte_ml_buff_seg **qbuffer);\n \n /**\n  * Dequantize output data.\n@@ -1047,8 +1103,6 @@ rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void\n  *   The identifier of the device.\n  * @param[in] model_id\n  *   Identifier for the model\n- * @param[in] nb_batches\n- *   Number of batches in the dequantized output buffer\n  * @param[in] qbuffer\n  *   Address of quantized output data\n  * @param[in] dbuffer\n@@ -1060,8 +1114,8 @@ rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void\n  */\n __rte_experimental\n int\n-rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *qbuffer,\n-\t\t     void *dbuffer);\n+rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **qbuffer,\n+\t\t     struct rte_ml_buff_seg **dbuffer);\n \n /* ML op pool operations */\n \ndiff --git a/lib/mldev/rte_mldev_core.h b/lib/mldev/rte_mldev_core.h\nindex 78b8b7633d..8530b07316 100644\n--- a/lib/mldev/rte_mldev_core.h\n+++ b/lib/mldev/rte_mldev_core.h\n@@ -523,8 +523,6 @@ typedef int (*mldev_io_output_size_get_t)(struct rte_ml_dev *dev, uint16_t model\n  *\tML device pointer.\n  * @param model_id\n  *\tModel ID to use.\n- * @param nb_batches\n- *\tNumber of batches.\n  * @param dbuffer\n  *\tPointer t de-quantized data buffer.\n  * @param qbuffer\n@@ -534,8 +532,9 @@ typedef int (*mldev_io_output_size_get_t)(struct rte_ml_dev *dev, uint16_t model\n  *\t- 0 on success.\n  *\t- <0, error on failure.\n  */\n-typedef int (*mldev_io_quantize_t)(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches,\n-\t\t\t\t   void *dbuffer, void *qbuffer);\n+typedef int (*mldev_io_quantize_t)(struct rte_ml_dev *dev, uint16_t model_id,\n+\t\t\t\t   struct rte_ml_buff_seg **dbuffer,\n+\t\t\t\t   struct rte_ml_buff_seg **qbuffer);\n \n /**\n  * @internal\n@@ -546,8 +545,6 @@ typedef int (*mldev_io_quantize_t)(struct rte_ml_dev *dev, uint16_t model_id, ui\n  *\tML device pointer.\n  * @param model_id\n  *\tModel ID to use.\n- * @param nb_batches\n- *\tNumber of batches.\n  * @param qbuffer\n  *\tPointer t de-quantized data buffer.\n  * @param dbuffer\n@@ -557,8 +554,9 @@ typedef int (*mldev_io_quantize_t)(struct rte_ml_dev *dev, uint16_t model_id, ui\n  *\t- 0 on success.\n  *\t- <0, error on failure.\n  */\n-typedef int (*mldev_io_dequantize_t)(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batches,\n-\t\t\t\t     void *qbuffer, void *dbuffer);\n+typedef int (*mldev_io_dequantize_t)(struct rte_ml_dev *dev, uint16_t model_id,\n+\t\t\t\t     struct rte_ml_buff_seg **qbuffer,\n+\t\t\t\t     struct rte_ml_buff_seg **dbuffer);\n \n /**\n  * @internal\n",
    "prefixes": [
        "v4",
        "2/3"
    ]
}