Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/124982/?format=api
https://patches.dpdk.org/api/patches/124982/?format=api", "web_url": "https://patches.dpdk.org/project/dpdk/patch/20230310082015.20200-32-syalavarthi@marvell.com/", "project": { "id": 1, "url": "https://patches.dpdk.org/api/projects/1/?format=api", "name": "DPDK", "link_name": "dpdk", "list_id": "dev.dpdk.org", "list_email": "dev@dpdk.org", "web_url": "http://core.dpdk.org", "scm_url": "git://dpdk.org/dpdk", "webscm_url": "http://git.dpdk.org/dpdk", "list_archive_url": "https://inbox.dpdk.org/dev", "list_archive_url_format": "https://inbox.dpdk.org/dev/{}", "commit_url_format": "" }, "msgid": "<20230310082015.20200-32-syalavarthi@marvell.com>", "list_archive_url": "https://inbox.dpdk.org/dev/20230310082015.20200-32-syalavarthi@marvell.com", "date": "2023-03-10T08:20:07", "name": "[v6,31/39] ml/cnxk: add support to handle extended dev stats", "commit_ref": null, "pull_url": null, "state": "accepted", "archived": true, "hash": "fd7823e7b10959e3cba79f5c7b18d84774c3f09b", "submitter": { "id": 2480, "url": "https://patches.dpdk.org/api/people/2480/?format=api", "name": "Srikanth Yalavarthi", "email": "syalavarthi@marvell.com" }, "delegate": { "id": 310, "url": "https://patches.dpdk.org/api/users/310/?format=api", "username": "jerin", "first_name": "Jerin", "last_name": "Jacob", "email": "jerinj@marvell.com" }, "mbox": "https://patches.dpdk.org/project/dpdk/patch/20230310082015.20200-32-syalavarthi@marvell.com/mbox/", "series": [ { "id": 27325, "url": "https://patches.dpdk.org/api/series/27325/?format=api", "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=27325", "date": "2023-03-10T08:19:36", "name": "Implementation of ML CNXK driver", "version": 6, "mbox": "https://patches.dpdk.org/series/27325/mbox/" } ], "comments": "https://patches.dpdk.org/api/patches/124982/comments/", "check": "success", "checks": "https://patches.dpdk.org/api/patches/124982/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<dev-bounces@dpdk.org>", "X-Original-To": "patchwork@inbox.dpdk.org", "Delivered-To": "patchwork@inbox.dpdk.org", "Received": [ "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id CD12941DD0;\n\tFri, 10 Mar 2023 09:24:14 +0100 (CET)", "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id F3B3742FF8;\n\tFri, 10 Mar 2023 09:21:11 +0100 (CET)", "from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com\n [67.231.148.174])\n by mails.dpdk.org (Postfix) with ESMTP id 1C72042D33\n for <dev@dpdk.org>; Fri, 10 Mar 2023 09:20:36 +0100 (CET)", "from pps.filterd (m0045849.ppops.net [127.0.0.1])\n by mx0a-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id\n 32A7WbXG003211 for <dev@dpdk.org>; Fri, 10 Mar 2023 00:20:36 -0800", "from dc5-exch01.marvell.com ([199.233.59.181])\n by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 3p7n7dj0bf-1\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)\n for <dev@dpdk.org>; Fri, 10 Mar 2023 00:20:35 -0800", "from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.42;\n Fri, 10 Mar 2023 00:20:24 -0800", "from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.42 via Frontend\n Transport; Fri, 10 Mar 2023 00:20:24 -0800", "from ml-host-33.caveonetworks.com (unknown [10.110.143.233])\n by maili.marvell.com (Postfix) with ESMTP id 79AAE3F708D;\n Fri, 10 Mar 2023 00:20:23 -0800 (PST)" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-type; s=pfpt0220; bh=rfkPirM4inrsSCBzI3PorUzI0IpkIYsoyjYiNNgkp/c=;\n b=PMj/ZBCaB72LTcdqj5TGn+06LP1LUUh/GkI6Sa4QVWqeLdjTcLF5jKt7St7koboBI4K5\n C4Xfq2Dh4GDPlMlbWc240/z8BdqDyDTIhDjEBoIpO/3RHAxVYkCkkPgz9PrDm+3KORFX\n 3roHjyZZAnqfoXDTFXKdLZruQg5savlXjeDLMG4o4Qr+6Yw/hZLu9YEyRsJTgfPQ9KFt\n ppemcZTT2b3hSOTvAl3MTnQifYIT/EeThd7twq61bwUyibpKR+mSw+IabXsjDBUbELZc\n qt5kDnRrNgY63FBR2pf3bBkG02Dh7fkdOS71WaYuClsoGrz+yHA97xkJeMNusDzYQjpx 4Q==", "From": "Srikanth Yalavarthi <syalavarthi@marvell.com>", "To": "Srikanth Yalavarthi <syalavarthi@marvell.com>", "CC": "<dev@dpdk.org>, <sshankarnara@marvell.com>, <jerinj@marvell.com>,\n <aprabhu@marvell.com>, <ptakkar@marvell.com>, <pshukla@marvell.com>", "Subject": "[PATCH v6 31/39] ml/cnxk: add support to handle extended dev stats", "Date": "Fri, 10 Mar 2023 00:20:07 -0800", "Message-ID": "<20230310082015.20200-32-syalavarthi@marvell.com>", "X-Mailer": "git-send-email 2.17.1", "In-Reply-To": "<20230310082015.20200-1-syalavarthi@marvell.com>", "References": "<20221208200220.20267-1-syalavarthi@marvell.com>\n <20230310082015.20200-1-syalavarthi@marvell.com>", "MIME-Version": "1.0", "Content-Type": "text/plain", "X-Proofpoint-GUID": "gtv2cc3zqqQeyOHmiulYrZtz08jqktCs", "X-Proofpoint-ORIG-GUID": "gtv2cc3zqqQeyOHmiulYrZtz08jqktCs", "X-Proofpoint-Virus-Version": "vendor=baseguard\n engine=ICAP:2.0.254,Aquarius:18.0.942,Hydra:6.0.573,FMLib:17.11.170.22\n definitions=2023-03-10_02,2023-03-09_01,2023-02-09_01", "X-BeenThere": "dev@dpdk.org", "X-Mailman-Version": "2.1.29", "Precedence": "list", "List-Id": "DPDK patches and discussions <dev.dpdk.org>", "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>", "List-Archive": "<http://mails.dpdk.org/archives/dev/>", "List-Post": "<mailto:dev@dpdk.org>", "List-Help": "<mailto:dev-request@dpdk.org?subject=help>", "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>", "Errors-To": "dev-bounces@dpdk.org" }, "content": "Added support to handle ML device extended stats. Support\nis enabled to get xstats names and stats values and reset\nxstats. Supported xstats include avg, min and max hardware\nand firmware latency.\n\nSigned-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>\n---\n drivers/ml/cnxk/cn10k_ml_dev.h | 3 +\n drivers/ml/cnxk/cn10k_ml_model.h | 57 +++++\n drivers/ml/cnxk/cn10k_ml_ops.c | 356 ++++++++++++++++++++++++++++++-\n 3 files changed, 415 insertions(+), 1 deletion(-)", "diff": "diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h\nindex 604a200e26..b7ff369ba8 100644\n--- a/drivers/ml/cnxk/cn10k_ml_dev.h\n+++ b/drivers/ml/cnxk/cn10k_ml_dev.h\n@@ -372,6 +372,9 @@ struct cn10k_ml_dev {\n \n \t/* Number of models loaded */\n \tuint16_t nb_models_loaded;\n+\n+\t/* xstats status */\n+\tbool xstats_enabled;\n };\n \n uint64_t cn10k_ml_fw_flags_get(struct cn10k_ml_fw *fw);\ndiff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h\nindex 75990fe1e4..1bc748265d 100644\n--- a/drivers/ml/cnxk/cn10k_ml_model.h\n+++ b/drivers/ml/cnxk/cn10k_ml_model.h\n@@ -399,6 +399,57 @@ struct cn10k_ml_model_addr {\n \tuint32_t total_output_sz_d;\n };\n \n+/* Extended stats types enum */\n+enum cn10k_ml_model_xstats_type {\n+\t/* Average hardware latency */\n+\tavg_hw_latency = 0,\n+\n+\t/* Minimum hardware latency */\n+\tmin_hw_latency,\n+\n+\t/* Maximum hardware latency */\n+\tmax_hw_latency,\n+\n+\t/* Average firmware latency */\n+\tavg_fw_latency,\n+\n+\t/* Minimum firmware latency */\n+\tmin_fw_latency,\n+\n+\t/* Maximum firmware latency */\n+\tmax_fw_latency,\n+};\n+\n+/* Model fast-path stats */\n+struct cn10k_ml_model_stats {\n+\t/* Total hardware latency, sum of all inferences */\n+\tuint64_t hw_latency_tot;\n+\n+\t/* Minimum hardware latency */\n+\tuint64_t hw_latency_min;\n+\n+\t/* Maximum hardware latency */\n+\tuint64_t hw_latency_max;\n+\n+\t/* Total firmware latency, sum of all inferences */\n+\tuint64_t fw_latency_tot;\n+\n+\t/* Minimum firmware latency */\n+\tuint64_t fw_latency_min;\n+\n+\t/* Maximum firmware latency */\n+\tuint64_t fw_latency_max;\n+\n+\t/* Total jobs dequeued */\n+\tuint64_t dequeued_count;\n+\n+\t/* Hardware stats reset index */\n+\tuint64_t hw_reset_count;\n+\n+\t/* Firmware stats reset index */\n+\tuint64_t fw_reset_count;\n+};\n+\n /* Model Object */\n struct cn10k_ml_model {\n \t/* Device reference */\n@@ -438,6 +489,12 @@ struct cn10k_ml_model {\n \n \t/* Slow-path operations request pointer */\n \tstruct cn10k_ml_req *req;\n+\n+\t/* Stats for burst ops */\n+\tstruct cn10k_ml_model_stats *burst_stats;\n+\n+\t/* Stats for sync ops */\n+\tstruct cn10k_ml_model_stats *sync_stats;\n };\n \n int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size);\ndiff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c\nindex c38f018a50..880bb6a5a9 100644\n--- a/drivers/ml/cnxk/cn10k_ml_ops.c\n+++ b/drivers/ml/cnxk/cn10k_ml_ops.c\n@@ -354,6 +354,134 @@ cn10k_ml_prep_fp_job_descriptor(struct rte_ml_dev *dev, struct cn10k_ml_req *req\n \treq->jd.model_run.num_batches = op->nb_batches;\n }\n \n+#define ML_AVG_FOREACH_QP(dev, model, qp_id, str, value, count) \\\n+\tdo { \\\n+\t\tvalue = 0; \\\n+\t\tfor (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \\\n+\t\t\tvalue += model->burst_stats[qp_id].str##_latency_tot; \\\n+\t\t\tcount += model->burst_stats[qp_id].dequeued_count - \\\n+\t\t\t\t model->burst_stats[qp_id].str##_reset_count; \\\n+\t\t} \\\n+\t\tvalue = value / count; \\\n+\t} while (0)\n+\n+#define ML_MIN_FOREACH_QP(dev, model, qp_id, str, value, count) \\\n+\tdo { \\\n+\t\tvalue = UINT64_MAX; \\\n+\t\tfor (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \\\n+\t\t\tvalue = PLT_MIN(value, model->burst_stats[qp_id].str##_latency_min); \\\n+\t\t\tcount += model->burst_stats[qp_id].dequeued_count - \\\n+\t\t\t\t model->burst_stats[qp_id].str##_reset_count; \\\n+\t\t} \\\n+\t\tif (count == 0) \\\n+\t\t\tvalue = 0; \\\n+\t} while (0)\n+\n+#define ML_MAX_FOREACH_QP(dev, model, qp_id, str, value, count) \\\n+\tdo { \\\n+\t\tvalue = 0; \\\n+\t\tfor (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \\\n+\t\t\tvalue = PLT_MAX(value, model->burst_stats[qp_id].str##_latency_max); \\\n+\t\t\tcount += model->burst_stats[qp_id].dequeued_count - \\\n+\t\t\t\t model->burst_stats[qp_id].str##_reset_count; \\\n+\t\t} \\\n+\t\tif (count == 0) \\\n+\t\t\tvalue = 0; \\\n+\t} while (0)\n+\n+static uint64_t\n+cn10k_ml_model_xstat_get(struct rte_ml_dev *dev, uint16_t model_id,\n+\t\t\t enum cn10k_ml_model_xstats_type type)\n+{\n+\tstruct cn10k_ml_model *model;\n+\tuint64_t count = 0;\n+\tuint64_t value;\n+\tuint32_t qp_id;\n+\n+\tmodel = dev->data->models[model_id];\n+\tif (model == NULL)\n+\t\treturn 0;\n+\n+\tswitch (type) {\n+\tcase avg_hw_latency:\n+\t\tML_AVG_FOREACH_QP(dev, model, qp_id, hw, value, count);\n+\t\tbreak;\n+\tcase min_hw_latency:\n+\t\tML_MIN_FOREACH_QP(dev, model, qp_id, hw, value, count);\n+\t\tbreak;\n+\tcase max_hw_latency:\n+\t\tML_MAX_FOREACH_QP(dev, model, qp_id, hw, value, count);\n+\t\tbreak;\n+\tcase avg_fw_latency:\n+\t\tML_AVG_FOREACH_QP(dev, model, qp_id, fw, value, count);\n+\t\tbreak;\n+\tcase min_fw_latency:\n+\t\tML_MIN_FOREACH_QP(dev, model, qp_id, fw, value, count);\n+\t\tbreak;\n+\tcase max_fw_latency:\n+\t\tML_MAX_FOREACH_QP(dev, model, qp_id, fw, value, count);\n+\t\tbreak;\n+\tdefault:\n+\t\tvalue = 0;\n+\t}\n+\n+\treturn value;\n+}\n+\n+#define ML_AVG_RESET_FOREACH_QP(dev, model, qp_id, str) \\\n+\tdo { \\\n+\t\tfor (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \\\n+\t\t\tmodel->burst_stats[qp_id].str##_latency_tot = 0; \\\n+\t\t\tmodel->burst_stats[qp_id].str##_reset_count = \\\n+\t\t\t\tmodel->burst_stats[qp_id].dequeued_count; \\\n+\t\t} \\\n+\t} while (0)\n+\n+#define ML_MIN_RESET_FOREACH_QP(dev, model, qp_id, str) \\\n+\tdo { \\\n+\t\tfor (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) \\\n+\t\t\tmodel->burst_stats[qp_id].str##_latency_min = UINT64_MAX; \\\n+\t} while (0)\n+\n+#define ML_MAX_RESET_FOREACH_QP(dev, model, qp_id, str) \\\n+\tdo { \\\n+\t\tfor (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) \\\n+\t\t\tmodel->burst_stats[qp_id].str##_latency_max = 0; \\\n+\t} while (0)\n+\n+static void\n+cn10k_ml_model_xstat_reset(struct rte_ml_dev *dev, uint16_t model_id,\n+\t\t\t enum cn10k_ml_model_xstats_type type)\n+{\n+\tstruct cn10k_ml_model *model;\n+\tuint32_t qp_id;\n+\n+\tmodel = dev->data->models[model_id];\n+\n+\tswitch (type) {\n+\tcase avg_hw_latency:\n+\t\tML_AVG_RESET_FOREACH_QP(dev, model, qp_id, hw);\n+\t\tbreak;\n+\tcase min_hw_latency:\n+\t\tML_MIN_RESET_FOREACH_QP(dev, model, qp_id, hw);\n+\t\tbreak;\n+\tcase max_hw_latency:\n+\t\tML_MAX_RESET_FOREACH_QP(dev, model, qp_id, hw);\n+\t\tbreak;\n+\tcase avg_fw_latency:\n+\t\tML_AVG_RESET_FOREACH_QP(dev, model, qp_id, fw);\n+\t\tbreak;\n+\tcase min_fw_latency:\n+\t\tML_MIN_RESET_FOREACH_QP(dev, model, qp_id, fw);\n+\t\tbreak;\n+\tcase max_fw_latency:\n+\t\tML_MAX_RESET_FOREACH_QP(dev, model, qp_id, fw);\n+\t\tbreak;\n+\tdefault:\n+\t\treturn;\n+\t}\n+}\n+\n static int\n cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)\n {\n@@ -519,6 +647,13 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c\n \n \trte_spinlock_init(&ocm->lock);\n \n+\t/* Check firmware stats */\n+\tif ((mldev->fw.req->jd.fw_load.cap.s.hw_stats) &&\n+\t (mldev->fw.req->jd.fw_load.cap.s.fw_stats))\n+\t\tmldev->xstats_enabled = true;\n+\telse\n+\t\tmldev->xstats_enabled = false;\n+\n \tdev->enqueue_burst = cn10k_ml_enqueue_burst;\n \tdev->dequeue_burst = cn10k_ml_dequeue_burst;\n \tdev->op_error_get = cn10k_ml_op_error_get;\n@@ -714,6 +849,170 @@ cn10k_ml_dev_stats_reset(struct rte_ml_dev *dev)\n \t}\n }\n \n+/* Model xstats names */\n+struct rte_ml_dev_xstats_map cn10k_ml_model_xstats_table[] = {\n+\t{avg_hw_latency, \"Avg-HW-Latency\"}, {min_hw_latency, \"Min-HW-Latency\"},\n+\t{max_hw_latency, \"Max-HW-Latency\"}, {avg_fw_latency, \"Avg-FW-Latency\"},\n+\t{min_fw_latency, \"Min-FW-Latency\"}, {max_fw_latency, \"Max-FW-Latency\"},\n+};\n+\n+static int\n+cn10k_ml_dev_xstats_names_get(struct rte_ml_dev *dev, struct rte_ml_dev_xstats_map *xstats_map,\n+\t\t\t uint32_t size)\n+{\n+\tstruct rte_ml_dev_info dev_info;\n+\tstruct cn10k_ml_model *model;\n+\tstruct cn10k_ml_dev *mldev;\n+\tuint32_t model_id;\n+\tuint32_t count;\n+\tuint32_t type;\n+\tuint32_t id;\n+\n+\tmldev = dev->data->dev_private;\n+\tif (!mldev->xstats_enabled)\n+\t\treturn 0;\n+\n+\tif (xstats_map == NULL)\n+\t\treturn PLT_DIM(cn10k_ml_model_xstats_table) * mldev->nb_models_loaded;\n+\n+\t/* Model xstats names */\n+\tcount = 0;\n+\tcn10k_ml_dev_info_get(dev, &dev_info);\n+\n+\tfor (id = 0; id < PLT_DIM(cn10k_ml_model_xstats_table) * dev_info.max_models; id++) {\n+\t\tmodel_id = id / PLT_DIM(cn10k_ml_model_xstats_table);\n+\t\tmodel = dev->data->models[model_id];\n+\n+\t\tif (model == NULL)\n+\t\t\tcontinue;\n+\n+\t\txstats_map[count].id = id;\n+\t\ttype = id % PLT_DIM(cn10k_ml_model_xstats_table);\n+\n+\t\tsnprintf(xstats_map[count].name, RTE_ML_STR_MAX, \"%s-%s-cycles\",\n+\t\t\t model->metadata.model.name, cn10k_ml_model_xstats_table[type].name);\n+\n+\t\tcount++;\n+\t\tif (count == size)\n+\t\t\tbreak;\n+\t}\n+\n+\treturn count;\n+}\n+\n+static int\n+cn10k_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16_t *stat_id,\n+\t\t\t\tuint64_t *value)\n+{\n+\tstruct rte_ml_dev_xstats_map *xstats_map;\n+\tstruct rte_ml_dev_info dev_info;\n+\tstruct cn10k_ml_dev *mldev;\n+\tuint32_t num_xstats;\n+\tuint32_t model_id;\n+\tuint32_t type;\n+\tuint32_t id;\n+\n+\tmldev = dev->data->dev_private;\n+\tif (!mldev->xstats_enabled)\n+\t\treturn 0;\n+\n+\tnum_xstats = PLT_DIM(cn10k_ml_model_xstats_table) * mldev->nb_models_loaded;\n+\txstats_map = rte_zmalloc(\"cn10k_ml_xstats_map\",\n+\t\t\t\t sizeof(struct rte_ml_dev_xstats_map) * num_xstats, 0);\n+\tcn10k_ml_dev_xstats_names_get(dev, xstats_map, num_xstats);\n+\n+\tcn10k_ml_dev_info_get(dev, &dev_info);\n+\tfor (id = 0; id < PLT_DIM(cn10k_ml_model_xstats_table) * dev_info.max_models; id++) {\n+\t\tif (strncmp(name, xstats_map[id].name, strlen(name)) == 0) {\n+\t\t\t*stat_id = id;\n+\t\t\trte_free(xstats_map);\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\n+\tif (id == PLT_DIM(cn10k_ml_model_xstats_table) * dev_info.max_models)\n+\t\treturn -EINVAL;\n+\n+\tmodel_id = id / PLT_DIM(cn10k_ml_model_xstats_table);\n+\ttype = id % PLT_DIM(cn10k_ml_model_xstats_table);\n+\t*value = cn10k_ml_model_xstat_get(dev, model_id, type);\n+\n+\treturn 0;\n+}\n+\n+static int\n+cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, const uint16_t *stat_ids, uint64_t *values,\n+\t\t\tuint16_t nb_ids)\n+{\n+\tstruct cn10k_ml_model *model;\n+\tstruct cn10k_ml_dev *mldev;\n+\tuint32_t model_id;\n+\tuint32_t count;\n+\tuint32_t type;\n+\tuint32_t i;\n+\n+\tmldev = dev->data->dev_private;\n+\tif (!mldev->xstats_enabled)\n+\t\treturn 0;\n+\n+\tcount = 0;\n+\tfor (i = 0; i < nb_ids; i++) {\n+\t\tmodel_id = stat_ids[i] / PLT_DIM(cn10k_ml_model_xstats_table);\n+\t\tmodel = dev->data->models[model_id];\n+\n+\t\tif (model == NULL)\n+\t\t\tcontinue;\n+\n+\t\ttype = stat_ids[i] % PLT_DIM(cn10k_ml_model_xstats_table);\n+\t\tvalues[i] = cn10k_ml_model_xstat_get(dev, model_id, type);\n+\t\tcount++;\n+\t}\n+\n+\treturn count;\n+}\n+\n+static int\n+cn10k_ml_dev_xstats_reset(struct rte_ml_dev *dev, const uint16_t *stat_ids, uint16_t nb_ids)\n+{\n+\tstruct rte_ml_dev_info dev_info;\n+\tstruct cn10k_ml_model *model;\n+\tstruct cn10k_ml_dev *mldev;\n+\tuint32_t model_id;\n+\tuint32_t type;\n+\tuint32_t i;\n+\n+\tmldev = dev->data->dev_private;\n+\tif (!mldev->xstats_enabled)\n+\t\treturn 0;\n+\n+\tcn10k_ml_dev_info_get(dev, &dev_info);\n+\tif (stat_ids == NULL) {\n+\t\tfor (i = 0; i < PLT_DIM(cn10k_ml_model_xstats_table) * dev_info.max_models; i++) {\n+\t\t\tmodel_id = i / PLT_DIM(cn10k_ml_model_xstats_table);\n+\t\t\tmodel = dev->data->models[model_id];\n+\n+\t\t\tif (model == NULL)\n+\t\t\t\tcontinue;\n+\n+\t\t\ttype = i % PLT_DIM(cn10k_ml_model_xstats_table);\n+\t\t\tcn10k_ml_model_xstat_reset(dev, model_id, type);\n+\t\t}\n+\t} else {\n+\t\tfor (i = 0; i < nb_ids; i++) {\n+\t\t\tmodel_id = stat_ids[i] / PLT_DIM(cn10k_ml_model_xstats_table);\n+\t\t\tmodel = dev->data->models[model_id];\n+\n+\t\t\tif (model == NULL)\n+\t\t\t\tcontinue;\n+\n+\t\t\ttype = stat_ids[i] % PLT_DIM(cn10k_ml_model_xstats_table);\n+\t\t\tcn10k_ml_model_xstat_reset(dev, model_id, type);\n+\t\t}\n+\t}\n+\n+\treturn 0;\n+}\n+\n static int\n cn10k_ml_dev_dump(struct rte_ml_dev *dev, FILE *fp)\n {\n@@ -856,6 +1155,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,\n \n \tchar str[RTE_MEMZONE_NAMESIZE];\n \tconst struct plt_memzone *mz;\n+\tsize_t model_stats_size;\n \tsize_t model_data_size;\n \tsize_t model_info_size;\n \tuint8_t *base_dma_addr;\n@@ -864,6 +1164,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,\n \tuint64_t mz_size;\n \tuint16_t idx;\n \tbool found;\n+\tint qp_id;\n \tint ret;\n \n \tret = cn10k_ml_model_metadata_check(params->addr, params->size);\n@@ -900,10 +1201,12 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,\n \t\t\t metadata->model.num_input * sizeof(struct rte_ml_io_info) +\n \t\t\t metadata->model.num_output * sizeof(struct rte_ml_io_info);\n \tmodel_info_size = PLT_ALIGN_CEIL(model_info_size, ML_CN10K_ALIGN_SIZE);\n+\tmodel_stats_size = (dev->data->nb_queue_pairs + 1) * sizeof(struct cn10k_ml_model_stats);\n \n \tmz_size = PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE) +\n \t\t 2 * model_data_size + model_info_size +\n-\t\t PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE);\n+\t\t PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE) +\n+\t\t model_stats_size;\n \n \t/* Allocate memzone for model object and model data */\n \tsnprintf(str, RTE_MEMZONE_NAMESIZE, \"%s_%u\", CN10K_ML_MODEL_MEMZONE_NAME, idx);\n@@ -949,6 +1252,24 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,\n \t/* Set slow-path request address and state */\n \tmodel->req = PLT_PTR_ADD(model->info, model_info_size);\n \n+\t/* Reset burst and sync stats */\n+\tmodel->burst_stats = PLT_PTR_ADD(\n+\t\tmodel->req, PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE));\n+\tfor (qp_id = 0; qp_id < dev->data->nb_queue_pairs + 1; qp_id++) {\n+\t\tmodel->burst_stats[qp_id].hw_latency_tot = 0;\n+\t\tmodel->burst_stats[qp_id].hw_latency_min = UINT64_MAX;\n+\t\tmodel->burst_stats[qp_id].hw_latency_max = 0;\n+\t\tmodel->burst_stats[qp_id].fw_latency_tot = 0;\n+\t\tmodel->burst_stats[qp_id].fw_latency_min = UINT64_MAX;\n+\t\tmodel->burst_stats[qp_id].fw_latency_max = 0;\n+\t\tmodel->burst_stats[qp_id].hw_reset_count = 0;\n+\t\tmodel->burst_stats[qp_id].fw_reset_count = 0;\n+\t\tmodel->burst_stats[qp_id].dequeued_count = 0;\n+\t}\n+\tmodel->sync_stats =\n+\t\tPLT_PTR_ADD(model->burst_stats,\n+\t\t\t dev->data->nb_queue_pairs * sizeof(struct cn10k_ml_model_stats));\n+\n \tplt_spinlock_init(&model->lock);\n \tmodel->state = ML_CN10K_MODEL_STATE_LOADED;\n \tdev->data->models[idx] = model;\n@@ -1503,15 +1824,44 @@ static __rte_always_inline void\n cn10k_ml_result_update(struct rte_ml_dev *dev, int qp_id, struct cn10k_ml_result *result,\n \t\t struct rte_ml_op *op)\n {\n+\tstruct cn10k_ml_model_stats *stats;\n+\tstruct cn10k_ml_model *model;\n \tstruct cn10k_ml_dev *mldev;\n \tstruct cn10k_ml_qp *qp;\n+\tuint64_t hw_latency;\n+\tuint64_t fw_latency;\n \n \tif (likely(result->error_code.u64 == 0)) {\n+\t\tmodel = dev->data->models[op->model_id];\n \t\tif (likely(qp_id >= 0)) {\n \t\t\tqp = dev->data->queue_pairs[qp_id];\n \t\t\tqp->stats.dequeued_count++;\n+\t\t\tstats = &model->burst_stats[qp_id];\n+\t\t} else {\n+\t\t\tstats = model->sync_stats;\n+\t\t}\n+\n+\t\tif (unlikely(stats->dequeued_count == stats->hw_reset_count)) {\n+\t\t\tstats->hw_latency_min = UINT64_MAX;\n+\t\t\tstats->hw_latency_max = 0;\n \t\t}\n \n+\t\tif (unlikely(stats->dequeued_count == stats->fw_reset_count)) {\n+\t\t\tstats->fw_latency_min = UINT64_MAX;\n+\t\t\tstats->fw_latency_max = 0;\n+\t\t}\n+\n+\t\thw_latency = result->stats.hw_end - result->stats.hw_start;\n+\t\tfw_latency = result->stats.fw_end - result->stats.fw_start - hw_latency;\n+\n+\t\tstats->hw_latency_tot += hw_latency;\n+\t\tstats->hw_latency_min = PLT_MIN(stats->hw_latency_min, hw_latency);\n+\t\tstats->hw_latency_max = PLT_MAX(stats->hw_latency_max, hw_latency);\n+\t\tstats->fw_latency_tot += fw_latency;\n+\t\tstats->fw_latency_min = PLT_MIN(stats->fw_latency_min, fw_latency);\n+\t\tstats->fw_latency_max = PLT_MAX(stats->fw_latency_max, fw_latency);\n+\t\tstats->dequeued_count++;\n+\n \t\top->impl_opaque = result->error_code.u64;\n \t\top->status = RTE_ML_OP_STATUS_SUCCESS;\n \t} else {\n@@ -1745,6 +2095,10 @@ struct rte_ml_dev_ops cn10k_ml_ops = {\n \t/* Stats ops */\n \t.dev_stats_get = cn10k_ml_dev_stats_get,\n \t.dev_stats_reset = cn10k_ml_dev_stats_reset,\n+\t.dev_xstats_names_get = cn10k_ml_dev_xstats_names_get,\n+\t.dev_xstats_by_name_get = cn10k_ml_dev_xstats_by_name_get,\n+\t.dev_xstats_get = cn10k_ml_dev_xstats_get,\n+\t.dev_xstats_reset = cn10k_ml_dev_xstats_reset,\n \n \t/* Model ops */\n \t.model_load = cn10k_ml_model_load,\n", "prefixes": [ "v6", "31/39" ] }{ "id": 124982, "url": "