From patchwork Wed Oct 18 06:47:33 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Srikanth Yalavarthi X-Patchwork-Id: 132848 X-Patchwork-Delegate: jerinj@marvell.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 068AB43196; Wed, 18 Oct 2023 08:49:10 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id AC0B842D7F; Wed, 18 Oct 2023 08:48:24 +0200 (CEST) Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com [67.231.156.173]) by mails.dpdk.org (Postfix) with ESMTP id 5CA7140289 for ; Wed, 18 Oct 2023 08:48:16 +0200 (CEST) Received: from pps.filterd (m0045851.ppops.net [127.0.0.1]) by mx0b-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 39I3vKJs020024 for ; Tue, 17 Oct 2023 23:48:15 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding : content-type; s=pfpt0220; bh=pMXons43NobbqNFLLdZnyAA30AAiigbC0Mqju5wEuRc=; b=KJknhLxeDg2Rs5yUnjZT3Xfwv8u0LEZkvIPI1P+hkWBHFljxc5NluM8027xjgyw6B2Rr IZq5WnxD+dvsfSj1pSficVKYPMZweIftV1oQRlexvdRCHY3bTM6ATu4e7kUdjhdjnr/k ugvaXRc8b1BWi8vXeZFanZ0nl/2lG7VO+51bvRYoQiT0/0OH90MhmK3VGTtUiTSz6h/M +63OBiLuIPOdDIKh/oEptt9vkXuDQ9lq5bo0GZ6onmSiMAZ7ME8bVBAUiwUPcN4nsqIE Qrr/7rUdRGqFON8++H053IpRUl/V66uOO/JkQmoqn23Kdac8Tu02PXHPBh3cZykn3eKN aQ== Received: from dc5-exch02.marvell.com ([199.233.59.182]) by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3tstb3ursq-5 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Tue, 17 Oct 2023 23:48:15 -0700 Received: from DC5-EXCH02.marvell.com (10.69.176.39) by DC5-EXCH02.marvell.com (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.48; Tue, 17 Oct 2023 23:48:11 -0700 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH02.marvell.com (10.69.176.39) with Microsoft SMTP Server id 15.0.1497.48 via Frontend Transport; Tue, 17 Oct 2023 23:48:11 -0700 Received: from ml-host-33.caveonetworks.com (unknown [10.110.143.233]) by maili.marvell.com (Postfix) with ESMTP id 9764E3F7048; Tue, 17 Oct 2023 23:48:11 -0700 (PDT) From: Srikanth Yalavarthi To: Srikanth Yalavarthi CC: , , , Subject: [PATCH v5 05/34] ml/cnxk: add generic cnxk xstats structures Date: Tue, 17 Oct 2023 23:47:33 -0700 Message-ID: <20231018064806.24145-6-syalavarthi@marvell.com> X-Mailer: git-send-email 2.42.0 In-Reply-To: <20231018064806.24145-1-syalavarthi@marvell.com> References: <20230830155927.3566-1-syalavarthi@marvell.com> <20231018064806.24145-1-syalavarthi@marvell.com> MIME-Version: 1.0 X-Proofpoint-ORIG-GUID: aIVpJvrtpxPbcO-WN6jFzPOyCyScJ70T X-Proofpoint-GUID: aIVpJvrtpxPbcO-WN6jFzPOyCyScJ70T X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.980,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-10-18_04,2023-10-17_01,2023-05-22_02 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Introduced generic xstats structures and renamed cn10k xstats enumerations with cnxk prefix. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.h | 86 +--------------- drivers/ml/cnxk/cn10k_ml_model.h | 6 +- drivers/ml/cnxk/cn10k_ml_ops.c | 169 ++++++++++++++----------------- drivers/ml/cnxk/cnxk_ml_xstats.h | 128 +++++++++++++++++++++++ drivers/ml/cnxk/meson.build | 1 + 5 files changed, 210 insertions(+), 180 deletions(-) create mode 100644 drivers/ml/cnxk/cnxk_ml_xstats.h diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index 1852d4f6c9..be989e0a20 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -10,6 +10,7 @@ #include "cn10k_ml_ocm.h" #include "cnxk_ml_io.h" +#include "cnxk_ml_xstats.h" /* Dummy Device ops */ extern struct rte_ml_dev_ops ml_dev_dummy_ops; @@ -121,89 +122,6 @@ struct cn10k_ml_fw { struct cnxk_ml_req *req; }; -/* Extended stats types enum */ -enum cn10k_ml_xstats_type { - /* Number of models loaded */ - nb_models_loaded, - - /* Number of models unloaded */ - nb_models_unloaded, - - /* Number of models started */ - nb_models_started, - - /* Number of models stopped */ - nb_models_stopped, - - /* Average inference hardware latency */ - avg_hw_latency, - - /* Minimum hardware latency */ - min_hw_latency, - - /* Maximum hardware latency */ - max_hw_latency, - - /* Average firmware latency */ - avg_fw_latency, - - /* Minimum firmware latency */ - min_fw_latency, - - /* Maximum firmware latency */ - max_fw_latency, -}; - -/* Extended stats function type enum. */ -enum cn10k_ml_xstats_fn_type { - /* Device function */ - CN10K_ML_XSTATS_FN_DEVICE, - - /* Model function */ - CN10K_ML_XSTATS_FN_MODEL, -}; - -/* Function pointer to get xstats for a type */ -typedef uint64_t (*cn10k_ml_xstats_fn)(struct rte_ml_dev *dev, uint16_t obj_idx, - enum cn10k_ml_xstats_type stat); - -/* Extended stats entry structure */ -struct cn10k_ml_xstats_entry { - /* Name-ID map */ - struct rte_ml_dev_xstats_map map; - - /* xstats mode, device or model */ - enum rte_ml_dev_xstats_mode mode; - - /* Type of xstats */ - enum cn10k_ml_xstats_type type; - - /* xstats function */ - enum cn10k_ml_xstats_fn_type fn_id; - - /* Object ID, model ID for model stat type */ - uint16_t obj_idx; - - /* Allowed to reset the stat */ - uint8_t reset_allowed; - - /* An offset to be taken away to emulate resets */ - uint64_t reset_value; -}; - -/* Extended stats data */ -struct cn10k_ml_xstats { - /* Pointer to xstats entries */ - struct cn10k_ml_xstats_entry *entries; - - /* Store num stats and offset of the stats for each model */ - uint16_t count_per_model[ML_CNXK_MAX_MODELS]; - uint16_t offset_for_model[ML_CNXK_MAX_MODELS]; - uint16_t count_mode_device; - uint16_t count_mode_model; - uint16_t count; -}; - /* Device private data */ struct cn10k_ml_dev { /* Device ROC */ @@ -216,7 +134,7 @@ struct cn10k_ml_dev { struct cn10k_ml_ocm ocm; /* Extended stats data */ - struct cn10k_ml_xstats xstats; + struct cnxk_ml_xstats xstats; /* Enable / disable model data caching */ int cache_model_data; diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h index 74ada1531a..5c32f48c68 100644 --- a/drivers/ml/cnxk/cn10k_ml_model.h +++ b/drivers/ml/cnxk/cn10k_ml_model.h @@ -404,7 +404,7 @@ struct cn10k_ml_layer_addr { }; /* Model fast-path stats */ -struct cn10k_ml_layer_stats { +struct cn10k_ml_layer_xstats { /* Total hardware latency, sum of all inferences */ uint64_t hw_latency_tot; @@ -447,10 +447,10 @@ struct cn10k_ml_layer_data { struct cnxk_ml_req *req; /* Layer: Stats for burst ops */ - struct cn10k_ml_layer_stats *burst_stats; + struct cn10k_ml_layer_xstats *burst_xstats; /* Layer: Stats for sync ops */ - struct cn10k_ml_layer_stats *sync_stats; + struct cn10k_ml_layer_xstats *sync_xstats; }; struct cn10k_ml_model_data { diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index caee09829b..42a4389bbe 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -10,6 +10,7 @@ #include "cnxk_ml_dev.h" #include "cnxk_ml_model.h" #include "cnxk_ml_ops.h" +#include "cnxk_ml_xstats.h" /* ML model macros */ #define CN10K_ML_MODEL_MEMZONE_NAME "ml_cn10k_model_mz" @@ -425,26 +426,6 @@ cn10k_ml_prep_fp_job_descriptor(struct cn10k_ml_dev *cn10k_mldev, struct cnxk_ml req->cn10k_req.jd.model_run.num_batches = op->nb_batches; } -struct xstat_info { - char name[32]; - enum cn10k_ml_xstats_type type; - uint8_t reset_allowed; -}; - -/* Note: Device stats are not allowed to be reset. */ -static const struct xstat_info device_stats[] = { - {"nb_models_loaded", nb_models_loaded, 0}, - {"nb_models_unloaded", nb_models_unloaded, 0}, - {"nb_models_started", nb_models_started, 0}, - {"nb_models_stopped", nb_models_stopped, 0}, -}; - -static const struct xstat_info model_stats[] = { - {"Avg-HW-Latency", avg_hw_latency, 1}, {"Min-HW-Latency", min_hw_latency, 1}, - {"Max-HW-Latency", max_hw_latency, 1}, {"Avg-FW-Latency", avg_fw_latency, 1}, - {"Min-FW-Latency", min_fw_latency, 1}, {"Max-FW-Latency", max_fw_latency, 1}, -}; - static int cn10k_ml_xstats_init(struct rte_ml_dev *dev) { @@ -459,10 +440,10 @@ cn10k_ml_xstats_init(struct rte_ml_dev *dev) cn10k_mldev = &cnxk_mldev->cn10k_mldev; /* Allocate memory for xstats entries. Don't allocate during reconfigure */ - nb_stats = RTE_DIM(device_stats) + ML_CNXK_MAX_MODELS * RTE_DIM(model_stats); + nb_stats = RTE_DIM(device_xstats) + ML_CNXK_MAX_MODELS * RTE_DIM(layer_xstats); if (cn10k_mldev->xstats.entries == NULL) cn10k_mldev->xstats.entries = rte_zmalloc( - "cn10k_ml_xstats", sizeof(struct cn10k_ml_xstats_entry) * nb_stats, + "cn10k_ml_xstats", sizeof(struct cnxk_ml_xstats_entry) * nb_stats, PLT_CACHE_LINE_SIZE); if (cn10k_mldev->xstats.entries == NULL) @@ -470,17 +451,17 @@ cn10k_ml_xstats_init(struct rte_ml_dev *dev) /* Initialize device xstats */ stat_id = 0; - for (i = 0; i < RTE_DIM(device_stats); i++) { + for (i = 0; i < RTE_DIM(device_xstats); i++) { cn10k_mldev->xstats.entries[stat_id].map.id = stat_id; snprintf(cn10k_mldev->xstats.entries[stat_id].map.name, sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), "%s", - device_stats[i].name); + device_xstats[i].name); cn10k_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_DEVICE; - cn10k_mldev->xstats.entries[stat_id].type = device_stats[i].type; - cn10k_mldev->xstats.entries[stat_id].fn_id = CN10K_ML_XSTATS_FN_DEVICE; + cn10k_mldev->xstats.entries[stat_id].type = device_xstats[i].type; + cn10k_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_DEVICE; cn10k_mldev->xstats.entries[stat_id].obj_idx = 0; - cn10k_mldev->xstats.entries[stat_id].reset_allowed = device_stats[i].reset_allowed; + cn10k_mldev->xstats.entries[stat_id].reset_allowed = device_xstats[i].reset_allowed; stat_id++; } cn10k_mldev->xstats.count_mode_device = stat_id; @@ -489,24 +470,24 @@ cn10k_ml_xstats_init(struct rte_ml_dev *dev) for (model = 0; model < ML_CNXK_MAX_MODELS; model++) { cn10k_mldev->xstats.offset_for_model[model] = stat_id; - for (i = 0; i < RTE_DIM(model_stats); i++) { + for (i = 0; i < RTE_DIM(layer_xstats); i++) { cn10k_mldev->xstats.entries[stat_id].map.id = stat_id; cn10k_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_MODEL; - cn10k_mldev->xstats.entries[stat_id].type = model_stats[i].type; - cn10k_mldev->xstats.entries[stat_id].fn_id = CN10K_ML_XSTATS_FN_MODEL; + cn10k_mldev->xstats.entries[stat_id].type = layer_xstats[i].type; + cn10k_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_MODEL; cn10k_mldev->xstats.entries[stat_id].obj_idx = model; cn10k_mldev->xstats.entries[stat_id].reset_allowed = - model_stats[i].reset_allowed; + layer_xstats[i].reset_allowed; /* Name of xstat is updated during model load */ snprintf(cn10k_mldev->xstats.entries[stat_id].map.name, sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), - "Model-%u-%s", model, model_stats[i].name); + "Model-%u-%s", model, layer_xstats[i].name); stat_id++; } - cn10k_mldev->xstats.count_per_model[model] = RTE_DIM(model_stats); + cn10k_mldev->xstats.count_per_model[model] = RTE_DIM(layer_xstats); } cn10k_mldev->xstats.count_mode_model = stat_id - cn10k_mldev->xstats.count_mode_device; @@ -545,7 +526,7 @@ cn10k_ml_xstats_model_name_update(struct rte_ml_dev *dev, uint16_t model_id) cnxk_mldev = dev->data->dev_private; cn10k_mldev = &cnxk_mldev->cn10k_mldev; model = dev->data->models[model_id]; - stat_id = RTE_DIM(device_stats) + model_id * RTE_DIM(model_stats); + stat_id = RTE_DIM(device_xstats) + model_id * RTE_DIM(layer_xstats); roc_clk_freq_get(&rclk_freq, &sclk_freq); if (sclk_freq == 0) @@ -554,17 +535,17 @@ cn10k_ml_xstats_model_name_update(struct rte_ml_dev *dev, uint16_t model_id) strcpy(suffix, "ns"); /* Update xstat name based on model name and sclk availability */ - for (i = 0; i < RTE_DIM(model_stats); i++) { + for (i = 0; i < RTE_DIM(layer_xstats); i++) { snprintf(cn10k_mldev->xstats.entries[stat_id].map.name, sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s", - model->layer[0].glow.metadata.model.name, model_stats[i].name, suffix); + model->layer[0].glow.metadata.model.name, layer_xstats[i].name, suffix); stat_id++; } } static uint64_t cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused, - enum cn10k_ml_xstats_type type) + enum cnxk_ml_xstats_type type) { struct cnxk_ml_dev *cnxk_mldev; @@ -590,9 +571,9 @@ cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused, do { \ value = 0; \ for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \ - value += model->layer[0].glow.burst_stats[qp_id].str##_latency_tot; \ - count += model->layer[0].glow.burst_stats[qp_id].dequeued_count - \ - model->layer[0].glow.burst_stats[qp_id].str##_reset_count; \ + value += model->layer[0].glow.burst_xstats[qp_id].str##_latency_tot; \ + count += model->layer[0].glow.burst_xstats[qp_id].dequeued_count - \ + model->layer[0].glow.burst_xstats[qp_id].str##_reset_count; \ } \ if (count != 0) \ value = value / count; \ @@ -603,9 +584,10 @@ cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused, value = UINT64_MAX; \ for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \ value = PLT_MIN( \ - value, model->layer[0].glow.burst_stats[qp_id].str##_latency_min); \ - count += model->layer[0].glow.burst_stats[qp_id].dequeued_count - \ - model->layer[0].glow.burst_stats[qp_id].str##_reset_count; \ + value, \ + model->layer[0].glow.burst_xstats[qp_id].str##_latency_min); \ + count += model->layer[0].glow.burst_xstats[qp_id].dequeued_count - \ + model->layer[0].glow.burst_xstats[qp_id].str##_reset_count; \ } \ if (count == 0) \ value = 0; \ @@ -616,16 +598,17 @@ cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused, value = 0; \ for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \ value = PLT_MAX( \ - value, model->layer[0].glow.burst_stats[qp_id].str##_latency_max); \ - count += model->layer[0].glow.burst_stats[qp_id].dequeued_count - \ - model->layer[0].glow.burst_stats[qp_id].str##_reset_count; \ + value, \ + model->layer[0].glow.burst_xstats[qp_id].str##_latency_max); \ + count += model->layer[0].glow.burst_xstats[qp_id].dequeued_count - \ + model->layer[0].glow.burst_xstats[qp_id].str##_reset_count; \ } \ if (count == 0) \ value = 0; \ } while (0) static uint64_t -cn10k_ml_model_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx, enum cn10k_ml_xstats_type type) +cn10k_ml_model_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx, enum cnxk_ml_xstats_type type) { struct cnxk_ml_model *model; uint16_t rclk_freq; /* MHz */ @@ -671,8 +654,8 @@ cn10k_ml_model_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx, enum cn10k_ml static int cn10k_ml_device_xstats_reset(struct rte_ml_dev *dev, const uint16_t stat_ids[], uint16_t nb_ids) { - struct cn10k_ml_xstats_entry *xs; struct cn10k_ml_dev *cn10k_mldev; + struct cnxk_ml_xstats_entry *xs; struct cnxk_ml_dev *cnxk_mldev; uint16_t nb_stats; uint16_t stat_id; @@ -708,26 +691,26 @@ cn10k_ml_device_xstats_reset(struct rte_ml_dev *dev, const uint16_t stat_ids[], #define ML_AVG_RESET_FOREACH_QP(dev, model, qp_id, str) \ do { \ for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { \ - model->layer[0].glow.burst_stats[qp_id].str##_latency_tot = 0; \ - model->layer[0].glow.burst_stats[qp_id].str##_reset_count = \ - model->layer[0].glow.burst_stats[qp_id].dequeued_count; \ + model->layer[0].glow.burst_xstats[qp_id].str##_latency_tot = 0; \ + model->layer[0].glow.burst_xstats[qp_id].str##_reset_count = \ + model->layer[0].glow.burst_xstats[qp_id].dequeued_count; \ } \ } while (0) #define ML_MIN_RESET_FOREACH_QP(dev, model, qp_id, str) \ do { \ for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) \ - model->layer[0].glow.burst_stats[qp_id].str##_latency_min = UINT64_MAX; \ + model->layer[0].glow.burst_xstats[qp_id].str##_latency_min = UINT64_MAX; \ } while (0) #define ML_MAX_RESET_FOREACH_QP(dev, model, qp_id, str) \ do { \ for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) \ - model->layer[0].glow.burst_stats[qp_id].str##_latency_max = 0; \ + model->layer[0].glow.burst_xstats[qp_id].str##_latency_max = 0; \ } while (0) static void -cn10k_ml_reset_model_stat(struct rte_ml_dev *dev, uint16_t model_id, enum cn10k_ml_xstats_type type) +cn10k_ml_reset_model_stat(struct rte_ml_dev *dev, uint16_t model_id, enum cnxk_ml_xstats_type type) { struct cnxk_ml_model *model; uint32_t qp_id; @@ -762,8 +745,8 @@ static int cn10k_ml_model_xstats_reset(struct rte_ml_dev *dev, int32_t model_id, const uint16_t stat_ids[], uint16_t nb_ids) { - struct cn10k_ml_xstats_entry *xs; struct cn10k_ml_dev *cn10k_mldev; + struct cnxk_ml_xstats_entry *xs; struct cnxk_ml_dev *cnxk_mldev; struct cnxk_ml_model *model; int32_t lcl_model_id = 0; @@ -1342,10 +1325,10 @@ static int cn10k_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16_t *stat_id, uint64_t *value) { - struct cn10k_ml_xstats_entry *xs; + struct cnxk_ml_xstats_entry *xs; struct cn10k_ml_dev *cn10k_mldev; struct cnxk_ml_dev *cnxk_mldev; - cn10k_ml_xstats_fn fn; + cnxk_ml_xstats_fn fn; uint32_t i; cnxk_mldev = dev->data->dev_private; @@ -1357,10 +1340,10 @@ cn10k_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16 *stat_id = xs->map.id; switch (xs->fn_id) { - case CN10K_ML_XSTATS_FN_DEVICE: + case CNXK_ML_XSTATS_FN_DEVICE: fn = cn10k_ml_dev_xstat_get; break; - case CN10K_ML_XSTATS_FN_MODEL: + case CNXK_ML_XSTATS_FN_MODEL: fn = cn10k_ml_model_xstat_get; break; default: @@ -1384,11 +1367,11 @@ static int cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id, const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids) { - struct cn10k_ml_xstats_entry *xs; struct cn10k_ml_dev *cn10k_mldev; + struct cnxk_ml_xstats_entry *xs; struct cnxk_ml_dev *cnxk_mldev; uint32_t xstats_mode_count; - cn10k_ml_xstats_fn fn; + cnxk_ml_xstats_fn fn; uint64_t val; uint32_t idx; uint32_t i; @@ -1423,10 +1406,10 @@ cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode } switch (xs->fn_id) { - case CN10K_ML_XSTATS_FN_DEVICE: + case CNXK_ML_XSTATS_FN_DEVICE: fn = cn10k_ml_dev_xstat_get; break; - case CN10K_ML_XSTATS_FN_MODEL: + case CNXK_ML_XSTATS_FN_MODEL: fn = cn10k_ml_model_xstat_get; break; default: @@ -1664,7 +1647,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, metadata->model.num_input * sizeof(struct rte_ml_io_info) + metadata->model.num_output * sizeof(struct rte_ml_io_info); model_info_size = PLT_ALIGN_CEIL(model_info_size, ML_CN10K_ALIGN_SIZE); - model_stats_size = (dev->data->nb_queue_pairs + 1) * sizeof(struct cn10k_ml_layer_stats); + model_stats_size = (dev->data->nb_queue_pairs + 1) * sizeof(struct cn10k_ml_layer_xstats); mz_size = PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_model), ML_CN10K_ALIGN_SIZE) + 2 * model_data_size + model_scratch_size + model_info_size + @@ -1738,24 +1721,24 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, model->layer[0].glow.req = PLT_PTR_ADD(model->info, model_info_size); /* Reset burst and sync stats */ - model->layer[0].glow.burst_stats = + model->layer[0].glow.burst_xstats = PLT_PTR_ADD(model->layer[0].glow.req, PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_req), ML_CN10K_ALIGN_SIZE)); for (qp_id = 0; qp_id < dev->data->nb_queue_pairs + 1; qp_id++) { - model->layer[0].glow.burst_stats[qp_id].hw_latency_tot = 0; - model->layer[0].glow.burst_stats[qp_id].hw_latency_min = UINT64_MAX; - model->layer[0].glow.burst_stats[qp_id].hw_latency_max = 0; - model->layer[0].glow.burst_stats[qp_id].fw_latency_tot = 0; - model->layer[0].glow.burst_stats[qp_id].fw_latency_min = UINT64_MAX; - model->layer[0].glow.burst_stats[qp_id].fw_latency_max = 0; - model->layer[0].glow.burst_stats[qp_id].hw_reset_count = 0; - model->layer[0].glow.burst_stats[qp_id].fw_reset_count = 0; - model->layer[0].glow.burst_stats[qp_id].dequeued_count = 0; + model->layer[0].glow.burst_xstats[qp_id].hw_latency_tot = 0; + model->layer[0].glow.burst_xstats[qp_id].hw_latency_min = UINT64_MAX; + model->layer[0].glow.burst_xstats[qp_id].hw_latency_max = 0; + model->layer[0].glow.burst_xstats[qp_id].fw_latency_tot = 0; + model->layer[0].glow.burst_xstats[qp_id].fw_latency_min = UINT64_MAX; + model->layer[0].glow.burst_xstats[qp_id].fw_latency_max = 0; + model->layer[0].glow.burst_xstats[qp_id].hw_reset_count = 0; + model->layer[0].glow.burst_xstats[qp_id].fw_reset_count = 0; + model->layer[0].glow.burst_xstats[qp_id].dequeued_count = 0; } - model->layer[0].glow.sync_stats = - PLT_PTR_ADD(model->layer[0].glow.burst_stats, - dev->data->nb_queue_pairs * sizeof(struct cn10k_ml_layer_stats)); + model->layer[0].glow.sync_xstats = + PLT_PTR_ADD(model->layer[0].glow.burst_xstats, + dev->data->nb_queue_pairs * sizeof(struct cn10k_ml_layer_xstats)); plt_spinlock_init(&model->lock); model->state = ML_CNXK_MODEL_STATE_LOADED; @@ -2308,7 +2291,7 @@ static __rte_always_inline void cn10k_ml_result_update(struct rte_ml_dev *dev, int qp_id, struct cnxk_ml_req *req) { union cn10k_ml_error_code *error_code; - struct cn10k_ml_layer_stats *stats; + struct cn10k_ml_layer_xstats *xstats; struct cn10k_ml_dev *cn10k_mldev; struct cnxk_ml_dev *cnxk_mldev; struct cn10k_ml_result *result; @@ -2326,31 +2309,31 @@ cn10k_ml_result_update(struct rte_ml_dev *dev, int qp_id, struct cnxk_ml_req *re if (likely(qp_id >= 0)) { qp = dev->data->queue_pairs[qp_id]; qp->stats.dequeued_count++; - stats = &model->layer[0].glow.burst_stats[qp_id]; + xstats = &model->layer[0].glow.burst_xstats[qp_id]; } else { - stats = model->layer[0].glow.sync_stats; + xstats = model->layer[0].glow.sync_xstats; } - if (unlikely(stats->dequeued_count == stats->hw_reset_count)) { - stats->hw_latency_min = UINT64_MAX; - stats->hw_latency_max = 0; + if (unlikely(xstats->dequeued_count == xstats->hw_reset_count)) { + xstats->hw_latency_min = UINT64_MAX; + xstats->hw_latency_max = 0; } - if (unlikely(stats->dequeued_count == stats->fw_reset_count)) { - stats->fw_latency_min = UINT64_MAX; - stats->fw_latency_max = 0; + if (unlikely(xstats->dequeued_count == xstats->fw_reset_count)) { + xstats->fw_latency_min = UINT64_MAX; + xstats->fw_latency_max = 0; } hw_latency = result->stats.hw_end - result->stats.hw_start; fw_latency = result->stats.fw_end - result->stats.fw_start - hw_latency; - stats->hw_latency_tot += hw_latency; - stats->hw_latency_min = PLT_MIN(stats->hw_latency_min, hw_latency); - stats->hw_latency_max = PLT_MAX(stats->hw_latency_max, hw_latency); - stats->fw_latency_tot += fw_latency; - stats->fw_latency_min = PLT_MIN(stats->fw_latency_min, fw_latency); - stats->fw_latency_max = PLT_MAX(stats->fw_latency_max, fw_latency); - stats->dequeued_count++; + xstats->hw_latency_tot += hw_latency; + xstats->hw_latency_min = PLT_MIN(xstats->hw_latency_min, hw_latency); + xstats->hw_latency_max = PLT_MAX(xstats->hw_latency_max, hw_latency); + xstats->fw_latency_tot += fw_latency; + xstats->fw_latency_min = PLT_MIN(xstats->fw_latency_min, fw_latency); + xstats->fw_latency_max = PLT_MAX(xstats->fw_latency_max, fw_latency); + xstats->dequeued_count++; op->impl_opaque = result->error_code; op->status = RTE_ML_OP_STATUS_SUCCESS; diff --git a/drivers/ml/cnxk/cnxk_ml_xstats.h b/drivers/ml/cnxk/cnxk_ml_xstats.h new file mode 100644 index 0000000000..0d405679ca --- /dev/null +++ b/drivers/ml/cnxk/cnxk_ml_xstats.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Marvell. + */ + +#ifndef _CNXK_ML_XSTATS_H_ +#define _CNXK_ML_XSTATS_H_ + +#include "cnxk_ml_io.h" + +/* Extended stats types enum */ +enum cnxk_ml_xstats_type { + /* Number of models loaded */ + nb_models_loaded, + + /* Number of models unloaded */ + nb_models_unloaded, + + /* Number of models started */ + nb_models_started, + + /* Number of models stopped */ + nb_models_stopped, + + /* Average inference hardware latency */ + avg_hw_latency, + + /* Minimum hardware latency */ + min_hw_latency, + + /* Maximum hardware latency */ + max_hw_latency, + + /* Average firmware latency */ + avg_fw_latency, + + /* Minimum firmware latency */ + min_fw_latency, + + /* Maximum firmware latency */ + max_fw_latency, + + /* Average runtime latency */ + avg_rt_latency, + + /* Minimum runtime latency */ + min_rt_latency, + + /* Maximum runtime latency */ + max_rt_latency, +}; + +/* Extended stats function type enum. */ +enum cnxk_ml_xstats_fn_type { + /* Device function */ + CNXK_ML_XSTATS_FN_DEVICE, + + /* Model function */ + CNXK_ML_XSTATS_FN_MODEL, +}; + +/* Function pointer to get xstats for a type */ +typedef uint64_t (*cnxk_ml_xstats_fn)(struct rte_ml_dev *cnxk_mldev, uint16_t obj_idx, + enum cnxk_ml_xstats_type stat); + +/* Extended stats entry structure */ +struct cnxk_ml_xstats_entry { + /* Name-ID map */ + struct rte_ml_dev_xstats_map map; + + /* xstats mode, device or model */ + enum rte_ml_dev_xstats_mode mode; + + /* Type of xstats */ + enum cnxk_ml_xstats_type type; + + /* xstats function */ + enum cnxk_ml_xstats_fn_type fn_id; + + /* Object ID, model ID for model stat type */ + uint16_t obj_idx; + + /* Layer ID, valid for model stat type */ + int32_t layer_id; + + /* Allowed to reset the stat */ + uint8_t reset_allowed; + + /* An offset to be taken away to emulate resets */ + uint64_t reset_value; +}; + +/* Extended stats data */ +struct cnxk_ml_xstats { + /* Pointer to xstats entries */ + struct cnxk_ml_xstats_entry *entries; + + /* Store num stats and offset of the stats for each model */ + uint16_t count_per_model[ML_CNXK_MAX_MODELS]; + uint16_t offset_for_model[ML_CNXK_MAX_MODELS]; + uint16_t count_per_layer[ML_CNXK_MAX_MODELS][ML_CNXK_MODEL_MAX_LAYERS]; + uint16_t offset_for_layer[ML_CNXK_MAX_MODELS][ML_CNXK_MODEL_MAX_LAYERS]; + uint16_t count_mode_device; + uint16_t count_mode_model; + uint16_t count; +}; + +struct cnxk_ml_xstat_info { + char name[32]; + enum cnxk_ml_xstats_type type; + uint8_t reset_allowed; +}; + +/* Device xstats. Note: Device stats are not allowed to be reset. */ +static const struct cnxk_ml_xstat_info device_xstats[] = { + {"nb_models_loaded", nb_models_loaded, 0}, + {"nb_models_unloaded", nb_models_unloaded, 0}, + {"nb_models_started", nb_models_started, 0}, + {"nb_models_stopped", nb_models_stopped, 0}, +}; + +/* Layer xstats */ +static const struct cnxk_ml_xstat_info layer_xstats[] = { + {"Avg-HW-Latency", avg_hw_latency, 1}, {"Min-HW-Latency", min_hw_latency, 1}, + {"Max-HW-Latency", max_hw_latency, 1}, {"Avg-FW-Latency", avg_fw_latency, 1}, + {"Min-FW-Latency", min_fw_latency, 1}, {"Max-FW-Latency", max_fw_latency, 1}, +}; + +#endif /* _CNXK_ML_XSTATS_H_ */ diff --git a/drivers/ml/cnxk/meson.build b/drivers/ml/cnxk/meson.build index 73db458fcd..6385ac4548 100644 --- a/drivers/ml/cnxk/meson.build +++ b/drivers/ml/cnxk/meson.build @@ -16,6 +16,7 @@ driver_sdk_headers = files( 'cnxk_ml_io.h', 'cnxk_ml_model.h', 'cnxk_ml_ops.h', + 'cnxk_ml_xstats.h', ) sources = files(