[v5,05/34] ml/cnxk: add generic cnxk xstats structures

Message ID 20231018064806.24145-6-syalavarthi@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series Implementation of revised ml/cnxk driver |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Srikanth Yalavarthi Oct. 18, 2023, 6:47 a.m. UTC
  Introduced generic xstats structures and renamed cn10k
xstats enumerations with cnxk prefix.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_dev.h   |  86 +---------------
 drivers/ml/cnxk/cn10k_ml_model.h |   6 +-
 drivers/ml/cnxk/cn10k_ml_ops.c   | 169 ++++++++++++++-----------------
 drivers/ml/cnxk/cnxk_ml_xstats.h | 128 +++++++++++++++++++++++
 drivers/ml/cnxk/meson.build      |   1 +
 5 files changed, 210 insertions(+), 180 deletions(-)
 create mode 100644 drivers/ml/cnxk/cnxk_ml_xstats.h
  

Patch

diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h
index 1852d4f6c9..be989e0a20 100644
--- a/drivers/ml/cnxk/cn10k_ml_dev.h
+++ b/drivers/ml/cnxk/cn10k_ml_dev.h
@@ -10,6 +10,7 @@ 
 #include "cn10k_ml_ocm.h"
 
 #include "cnxk_ml_io.h"
+#include "cnxk_ml_xstats.h"
 
 /* Dummy Device ops */
 extern struct rte_ml_dev_ops ml_dev_dummy_ops;
@@ -121,89 +122,6 @@  struct cn10k_ml_fw {
 	struct cnxk_ml_req *req;
 };
 
-/* Extended stats types enum */
-enum cn10k_ml_xstats_type {
-	/* Number of models loaded */
-	nb_models_loaded,
-
-	/* Number of models unloaded */
-	nb_models_unloaded,
-
-	/* Number of models started */
-	nb_models_started,
-
-	/* Number of models stopped */
-	nb_models_stopped,
-
-	/* Average inference hardware latency */
-	avg_hw_latency,
-
-	/* Minimum hardware latency */
-	min_hw_latency,
-
-	/* Maximum hardware latency */
-	max_hw_latency,
-
-	/* Average firmware latency */
-	avg_fw_latency,
-
-	/* Minimum firmware latency */
-	min_fw_latency,
-
-	/* Maximum firmware latency */
-	max_fw_latency,
-};
-
-/* Extended stats function type enum. */
-enum cn10k_ml_xstats_fn_type {
-	/* Device function */
-	CN10K_ML_XSTATS_FN_DEVICE,
-
-	/* Model function */
-	CN10K_ML_XSTATS_FN_MODEL,
-};
-
-/* Function pointer to get xstats for a type */
-typedef uint64_t (*cn10k_ml_xstats_fn)(struct rte_ml_dev *dev, uint16_t obj_idx,
-				       enum cn10k_ml_xstats_type stat);
-
-/* Extended stats entry structure */
-struct cn10k_ml_xstats_entry {
-	/* Name-ID map */
-	struct rte_ml_dev_xstats_map map;
-
-	/* xstats mode, device or model */
-	enum rte_ml_dev_xstats_mode mode;
-
-	/* Type of xstats */
-	enum cn10k_ml_xstats_type type;
-
-	/* xstats function */
-	enum cn10k_ml_xstats_fn_type fn_id;
-
-	/* Object ID, model ID for model stat type */
-	uint16_t obj_idx;
-
-	/* Allowed to reset the stat */
-	uint8_t reset_allowed;
-
-	/* An offset to be taken away to emulate resets */
-	uint64_t reset_value;
-};
-
-/* Extended stats data */
-struct cn10k_ml_xstats {
-	/* Pointer to xstats entries */
-	struct cn10k_ml_xstats_entry *entries;
-
-	/* Store num stats and offset of the stats for each model */
-	uint16_t count_per_model[ML_CNXK_MAX_MODELS];
-	uint16_t offset_for_model[ML_CNXK_MAX_MODELS];
-	uint16_t count_mode_device;
-	uint16_t count_mode_model;
-	uint16_t count;
-};
-
 /* Device private data */
 struct cn10k_ml_dev {
 	/* Device ROC */
@@ -216,7 +134,7 @@  struct cn10k_ml_dev {
 	struct cn10k_ml_ocm ocm;
 
 	/* Extended stats data */
-	struct cn10k_ml_xstats xstats;
+	struct cnxk_ml_xstats xstats;
 
 	/* Enable / disable model data caching */
 	int cache_model_data;
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index 74ada1531a..5c32f48c68 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -404,7 +404,7 @@  struct cn10k_ml_layer_addr {
 };
 
 /* Model fast-path stats */
-struct cn10k_ml_layer_stats {
+struct cn10k_ml_layer_xstats {
 	/* Total hardware latency, sum of all inferences */
 	uint64_t hw_latency_tot;
 
@@ -447,10 +447,10 @@  struct cn10k_ml_layer_data {
 	struct cnxk_ml_req *req;
 
 	/* Layer: Stats for burst ops */
-	struct cn10k_ml_layer_stats *burst_stats;
+	struct cn10k_ml_layer_xstats *burst_xstats;
 
 	/* Layer: Stats for sync ops */
-	struct cn10k_ml_layer_stats *sync_stats;
+	struct cn10k_ml_layer_xstats *sync_xstats;
 };
 
 struct cn10k_ml_model_data {
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index caee09829b..42a4389bbe 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -10,6 +10,7 @@ 
 #include "cnxk_ml_dev.h"
 #include "cnxk_ml_model.h"
 #include "cnxk_ml_ops.h"
+#include "cnxk_ml_xstats.h"
 
 /* ML model macros */
 #define CN10K_ML_MODEL_MEMZONE_NAME "ml_cn10k_model_mz"
@@ -425,26 +426,6 @@  cn10k_ml_prep_fp_job_descriptor(struct cn10k_ml_dev *cn10k_mldev, struct cnxk_ml
 	req->cn10k_req.jd.model_run.num_batches = op->nb_batches;
 }
 
-struct xstat_info {
-	char name[32];
-	enum cn10k_ml_xstats_type type;
-	uint8_t reset_allowed;
-};
-
-/* Note: Device stats are not allowed to be reset. */
-static const struct xstat_info device_stats[] = {
-	{"nb_models_loaded", nb_models_loaded, 0},
-	{"nb_models_unloaded", nb_models_unloaded, 0},
-	{"nb_models_started", nb_models_started, 0},
-	{"nb_models_stopped", nb_models_stopped, 0},
-};
-
-static const struct xstat_info model_stats[] = {
-	{"Avg-HW-Latency", avg_hw_latency, 1}, {"Min-HW-Latency", min_hw_latency, 1},
-	{"Max-HW-Latency", max_hw_latency, 1}, {"Avg-FW-Latency", avg_fw_latency, 1},
-	{"Min-FW-Latency", min_fw_latency, 1}, {"Max-FW-Latency", max_fw_latency, 1},
-};
-
 static int
 cn10k_ml_xstats_init(struct rte_ml_dev *dev)
 {
@@ -459,10 +440,10 @@  cn10k_ml_xstats_init(struct rte_ml_dev *dev)
 	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
 
 	/* Allocate memory for xstats entries. Don't allocate during reconfigure */
-	nb_stats = RTE_DIM(device_stats) + ML_CNXK_MAX_MODELS * RTE_DIM(model_stats);
+	nb_stats = RTE_DIM(device_xstats) + ML_CNXK_MAX_MODELS * RTE_DIM(layer_xstats);
 	if (cn10k_mldev->xstats.entries == NULL)
 		cn10k_mldev->xstats.entries = rte_zmalloc(
-			"cn10k_ml_xstats", sizeof(struct cn10k_ml_xstats_entry) * nb_stats,
+			"cn10k_ml_xstats", sizeof(struct cnxk_ml_xstats_entry) * nb_stats,
 			PLT_CACHE_LINE_SIZE);
 
 	if (cn10k_mldev->xstats.entries == NULL)
@@ -470,17 +451,17 @@  cn10k_ml_xstats_init(struct rte_ml_dev *dev)
 
 	/* Initialize device xstats */
 	stat_id = 0;
-	for (i = 0; i < RTE_DIM(device_stats); i++) {
+	for (i = 0; i < RTE_DIM(device_xstats); i++) {
 		cn10k_mldev->xstats.entries[stat_id].map.id = stat_id;
 		snprintf(cn10k_mldev->xstats.entries[stat_id].map.name,
 			 sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), "%s",
-			 device_stats[i].name);
+			 device_xstats[i].name);
 
 		cn10k_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_DEVICE;
-		cn10k_mldev->xstats.entries[stat_id].type = device_stats[i].type;
-		cn10k_mldev->xstats.entries[stat_id].fn_id = CN10K_ML_XSTATS_FN_DEVICE;
+		cn10k_mldev->xstats.entries[stat_id].type = device_xstats[i].type;
+		cn10k_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_DEVICE;
 		cn10k_mldev->xstats.entries[stat_id].obj_idx = 0;
-		cn10k_mldev->xstats.entries[stat_id].reset_allowed = device_stats[i].reset_allowed;
+		cn10k_mldev->xstats.entries[stat_id].reset_allowed = device_xstats[i].reset_allowed;
 		stat_id++;
 	}
 	cn10k_mldev->xstats.count_mode_device = stat_id;
@@ -489,24 +470,24 @@  cn10k_ml_xstats_init(struct rte_ml_dev *dev)
 	for (model = 0; model < ML_CNXK_MAX_MODELS; model++) {
 		cn10k_mldev->xstats.offset_for_model[model] = stat_id;
 
-		for (i = 0; i < RTE_DIM(model_stats); i++) {
+		for (i = 0; i < RTE_DIM(layer_xstats); i++) {
 			cn10k_mldev->xstats.entries[stat_id].map.id = stat_id;
 			cn10k_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_MODEL;
-			cn10k_mldev->xstats.entries[stat_id].type = model_stats[i].type;
-			cn10k_mldev->xstats.entries[stat_id].fn_id = CN10K_ML_XSTATS_FN_MODEL;
+			cn10k_mldev->xstats.entries[stat_id].type = layer_xstats[i].type;
+			cn10k_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_MODEL;
 			cn10k_mldev->xstats.entries[stat_id].obj_idx = model;
 			cn10k_mldev->xstats.entries[stat_id].reset_allowed =
-				model_stats[i].reset_allowed;
+				layer_xstats[i].reset_allowed;
 
 			/* Name of xstat is updated during model load */
 			snprintf(cn10k_mldev->xstats.entries[stat_id].map.name,
 				 sizeof(cn10k_mldev->xstats.entries[stat_id].map.name),
-				 "Model-%u-%s", model, model_stats[i].name);
+				 "Model-%u-%s", model, layer_xstats[i].name);
 
 			stat_id++;
 		}
 
-		cn10k_mldev->xstats.count_per_model[model] = RTE_DIM(model_stats);
+		cn10k_mldev->xstats.count_per_model[model] = RTE_DIM(layer_xstats);
 	}
 
 	cn10k_mldev->xstats.count_mode_model = stat_id - cn10k_mldev->xstats.count_mode_device;
@@ -545,7 +526,7 @@  cn10k_ml_xstats_model_name_update(struct rte_ml_dev *dev, uint16_t model_id)
 	cnxk_mldev = dev->data->dev_private;
 	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
 	model = dev->data->models[model_id];
-	stat_id = RTE_DIM(device_stats) + model_id * RTE_DIM(model_stats);
+	stat_id = RTE_DIM(device_xstats) + model_id * RTE_DIM(layer_xstats);
 
 	roc_clk_freq_get(&rclk_freq, &sclk_freq);
 	if (sclk_freq == 0)
@@ -554,17 +535,17 @@  cn10k_ml_xstats_model_name_update(struct rte_ml_dev *dev, uint16_t model_id)
 		strcpy(suffix, "ns");
 
 	/* Update xstat name based on model name and sclk availability */
-	for (i = 0; i < RTE_DIM(model_stats); i++) {
+	for (i = 0; i < RTE_DIM(layer_xstats); i++) {
 		snprintf(cn10k_mldev->xstats.entries[stat_id].map.name,
 			 sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s",
-			 model->layer[0].glow.metadata.model.name, model_stats[i].name, suffix);
+			 model->layer[0].glow.metadata.model.name, layer_xstats[i].name, suffix);
 		stat_id++;
 	}
 }
 
 static uint64_t
 cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused,
-		       enum cn10k_ml_xstats_type type)
+		       enum cnxk_ml_xstats_type type)
 {
 	struct cnxk_ml_dev *cnxk_mldev;
 
@@ -590,9 +571,9 @@  cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused,
 	do {                                                                                       \
 		value = 0;                                                                         \
 		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {                      \
-			value += model->layer[0].glow.burst_stats[qp_id].str##_latency_tot;        \
-			count += model->layer[0].glow.burst_stats[qp_id].dequeued_count -          \
-				 model->layer[0].glow.burst_stats[qp_id].str##_reset_count;        \
+			value += model->layer[0].glow.burst_xstats[qp_id].str##_latency_tot;       \
+			count += model->layer[0].glow.burst_xstats[qp_id].dequeued_count -         \
+				 model->layer[0].glow.burst_xstats[qp_id].str##_reset_count;       \
 		}                                                                                  \
 		if (count != 0)                                                                    \
 			value = value / count;                                                     \
@@ -603,9 +584,10 @@  cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused,
 		value = UINT64_MAX;                                                                \
 		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {                      \
 			value = PLT_MIN(                                                           \
-				value, model->layer[0].glow.burst_stats[qp_id].str##_latency_min); \
-			count += model->layer[0].glow.burst_stats[qp_id].dequeued_count -          \
-				 model->layer[0].glow.burst_stats[qp_id].str##_reset_count;        \
+				value,                                                             \
+				model->layer[0].glow.burst_xstats[qp_id].str##_latency_min);       \
+			count += model->layer[0].glow.burst_xstats[qp_id].dequeued_count -         \
+				 model->layer[0].glow.burst_xstats[qp_id].str##_reset_count;       \
 		}                                                                                  \
 		if (count == 0)                                                                    \
 			value = 0;                                                                 \
@@ -616,16 +598,17 @@  cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused,
 		value = 0;                                                                         \
 		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {                      \
 			value = PLT_MAX(                                                           \
-				value, model->layer[0].glow.burst_stats[qp_id].str##_latency_max); \
-			count += model->layer[0].glow.burst_stats[qp_id].dequeued_count -          \
-				 model->layer[0].glow.burst_stats[qp_id].str##_reset_count;        \
+				value,                                                             \
+				model->layer[0].glow.burst_xstats[qp_id].str##_latency_max);       \
+			count += model->layer[0].glow.burst_xstats[qp_id].dequeued_count -         \
+				 model->layer[0].glow.burst_xstats[qp_id].str##_reset_count;       \
 		}                                                                                  \
 		if (count == 0)                                                                    \
 			value = 0;                                                                 \
 	} while (0)
 
 static uint64_t
-cn10k_ml_model_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx, enum cn10k_ml_xstats_type type)
+cn10k_ml_model_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx, enum cnxk_ml_xstats_type type)
 {
 	struct cnxk_ml_model *model;
 	uint16_t rclk_freq; /* MHz */
@@ -671,8 +654,8 @@  cn10k_ml_model_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx, enum cn10k_ml
 static int
 cn10k_ml_device_xstats_reset(struct rte_ml_dev *dev, const uint16_t stat_ids[], uint16_t nb_ids)
 {
-	struct cn10k_ml_xstats_entry *xs;
 	struct cn10k_ml_dev *cn10k_mldev;
+	struct cnxk_ml_xstats_entry *xs;
 	struct cnxk_ml_dev *cnxk_mldev;
 	uint16_t nb_stats;
 	uint16_t stat_id;
@@ -708,26 +691,26 @@  cn10k_ml_device_xstats_reset(struct rte_ml_dev *dev, const uint16_t stat_ids[],
 #define ML_AVG_RESET_FOREACH_QP(dev, model, qp_id, str)                                            \
 	do {                                                                                       \
 		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {                      \
-			model->layer[0].glow.burst_stats[qp_id].str##_latency_tot = 0;             \
-			model->layer[0].glow.burst_stats[qp_id].str##_reset_count =                \
-				model->layer[0].glow.burst_stats[qp_id].dequeued_count;            \
+			model->layer[0].glow.burst_xstats[qp_id].str##_latency_tot = 0;            \
+			model->layer[0].glow.burst_xstats[qp_id].str##_reset_count =               \
+				model->layer[0].glow.burst_xstats[qp_id].dequeued_count;           \
 		}                                                                                  \
 	} while (0)
 
 #define ML_MIN_RESET_FOREACH_QP(dev, model, qp_id, str)                                            \
 	do {                                                                                       \
 		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++)                        \
-			model->layer[0].glow.burst_stats[qp_id].str##_latency_min = UINT64_MAX;    \
+			model->layer[0].glow.burst_xstats[qp_id].str##_latency_min = UINT64_MAX;   \
 	} while (0)
 
 #define ML_MAX_RESET_FOREACH_QP(dev, model, qp_id, str)                                            \
 	do {                                                                                       \
 		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++)                        \
-			model->layer[0].glow.burst_stats[qp_id].str##_latency_max = 0;             \
+			model->layer[0].glow.burst_xstats[qp_id].str##_latency_max = 0;            \
 	} while (0)
 
 static void
-cn10k_ml_reset_model_stat(struct rte_ml_dev *dev, uint16_t model_id, enum cn10k_ml_xstats_type type)
+cn10k_ml_reset_model_stat(struct rte_ml_dev *dev, uint16_t model_id, enum cnxk_ml_xstats_type type)
 {
 	struct cnxk_ml_model *model;
 	uint32_t qp_id;
@@ -762,8 +745,8 @@  static int
 cn10k_ml_model_xstats_reset(struct rte_ml_dev *dev, int32_t model_id, const uint16_t stat_ids[],
 			    uint16_t nb_ids)
 {
-	struct cn10k_ml_xstats_entry *xs;
 	struct cn10k_ml_dev *cn10k_mldev;
+	struct cnxk_ml_xstats_entry *xs;
 	struct cnxk_ml_dev *cnxk_mldev;
 	struct cnxk_ml_model *model;
 	int32_t lcl_model_id = 0;
@@ -1342,10 +1325,10 @@  static int
 cn10k_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16_t *stat_id,
 				uint64_t *value)
 {
-	struct cn10k_ml_xstats_entry *xs;
+	struct cnxk_ml_xstats_entry *xs;
 	struct cn10k_ml_dev *cn10k_mldev;
 	struct cnxk_ml_dev *cnxk_mldev;
-	cn10k_ml_xstats_fn fn;
+	cnxk_ml_xstats_fn fn;
 	uint32_t i;
 
 	cnxk_mldev = dev->data->dev_private;
@@ -1357,10 +1340,10 @@  cn10k_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16
 				*stat_id = xs->map.id;
 
 			switch (xs->fn_id) {
-			case CN10K_ML_XSTATS_FN_DEVICE:
+			case CNXK_ML_XSTATS_FN_DEVICE:
 				fn = cn10k_ml_dev_xstat_get;
 				break;
-			case CN10K_ML_XSTATS_FN_MODEL:
+			case CNXK_ML_XSTATS_FN_MODEL:
 				fn = cn10k_ml_model_xstat_get;
 				break;
 			default:
@@ -1384,11 +1367,11 @@  static int
 cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
 			const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids)
 {
-	struct cn10k_ml_xstats_entry *xs;
 	struct cn10k_ml_dev *cn10k_mldev;
+	struct cnxk_ml_xstats_entry *xs;
 	struct cnxk_ml_dev *cnxk_mldev;
 	uint32_t xstats_mode_count;
-	cn10k_ml_xstats_fn fn;
+	cnxk_ml_xstats_fn fn;
 	uint64_t val;
 	uint32_t idx;
 	uint32_t i;
@@ -1423,10 +1406,10 @@  cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode
 		}
 
 		switch (xs->fn_id) {
-		case CN10K_ML_XSTATS_FN_DEVICE:
+		case CNXK_ML_XSTATS_FN_DEVICE:
 			fn = cn10k_ml_dev_xstat_get;
 			break;
-		case CN10K_ML_XSTATS_FN_MODEL:
+		case CNXK_ML_XSTATS_FN_MODEL:
 			fn = cn10k_ml_model_xstat_get;
 			break;
 		default:
@@ -1664,7 +1647,7 @@  cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 			  metadata->model.num_input * sizeof(struct rte_ml_io_info) +
 			  metadata->model.num_output * sizeof(struct rte_ml_io_info);
 	model_info_size = PLT_ALIGN_CEIL(model_info_size, ML_CN10K_ALIGN_SIZE);
-	model_stats_size = (dev->data->nb_queue_pairs + 1) * sizeof(struct cn10k_ml_layer_stats);
+	model_stats_size = (dev->data->nb_queue_pairs + 1) * sizeof(struct cn10k_ml_layer_xstats);
 
 	mz_size = PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_model), ML_CN10K_ALIGN_SIZE) +
 		  2 * model_data_size + model_scratch_size + model_info_size +
@@ -1738,24 +1721,24 @@  cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	model->layer[0].glow.req = PLT_PTR_ADD(model->info, model_info_size);
 
 	/* Reset burst and sync stats */
-	model->layer[0].glow.burst_stats =
+	model->layer[0].glow.burst_xstats =
 		PLT_PTR_ADD(model->layer[0].glow.req,
 			    PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_req), ML_CN10K_ALIGN_SIZE));
 	for (qp_id = 0; qp_id < dev->data->nb_queue_pairs + 1; qp_id++) {
-		model->layer[0].glow.burst_stats[qp_id].hw_latency_tot = 0;
-		model->layer[0].glow.burst_stats[qp_id].hw_latency_min = UINT64_MAX;
-		model->layer[0].glow.burst_stats[qp_id].hw_latency_max = 0;
-		model->layer[0].glow.burst_stats[qp_id].fw_latency_tot = 0;
-		model->layer[0].glow.burst_stats[qp_id].fw_latency_min = UINT64_MAX;
-		model->layer[0].glow.burst_stats[qp_id].fw_latency_max = 0;
-		model->layer[0].glow.burst_stats[qp_id].hw_reset_count = 0;
-		model->layer[0].glow.burst_stats[qp_id].fw_reset_count = 0;
-		model->layer[0].glow.burst_stats[qp_id].dequeued_count = 0;
+		model->layer[0].glow.burst_xstats[qp_id].hw_latency_tot = 0;
+		model->layer[0].glow.burst_xstats[qp_id].hw_latency_min = UINT64_MAX;
+		model->layer[0].glow.burst_xstats[qp_id].hw_latency_max = 0;
+		model->layer[0].glow.burst_xstats[qp_id].fw_latency_tot = 0;
+		model->layer[0].glow.burst_xstats[qp_id].fw_latency_min = UINT64_MAX;
+		model->layer[0].glow.burst_xstats[qp_id].fw_latency_max = 0;
+		model->layer[0].glow.burst_xstats[qp_id].hw_reset_count = 0;
+		model->layer[0].glow.burst_xstats[qp_id].fw_reset_count = 0;
+		model->layer[0].glow.burst_xstats[qp_id].dequeued_count = 0;
 	}
 
-	model->layer[0].glow.sync_stats =
-		PLT_PTR_ADD(model->layer[0].glow.burst_stats,
-			    dev->data->nb_queue_pairs * sizeof(struct cn10k_ml_layer_stats));
+	model->layer[0].glow.sync_xstats =
+		PLT_PTR_ADD(model->layer[0].glow.burst_xstats,
+			    dev->data->nb_queue_pairs * sizeof(struct cn10k_ml_layer_xstats));
 
 	plt_spinlock_init(&model->lock);
 	model->state = ML_CNXK_MODEL_STATE_LOADED;
@@ -2308,7 +2291,7 @@  static __rte_always_inline void
 cn10k_ml_result_update(struct rte_ml_dev *dev, int qp_id, struct cnxk_ml_req *req)
 {
 	union cn10k_ml_error_code *error_code;
-	struct cn10k_ml_layer_stats *stats;
+	struct cn10k_ml_layer_xstats *xstats;
 	struct cn10k_ml_dev *cn10k_mldev;
 	struct cnxk_ml_dev *cnxk_mldev;
 	struct cn10k_ml_result *result;
@@ -2326,31 +2309,31 @@  cn10k_ml_result_update(struct rte_ml_dev *dev, int qp_id, struct cnxk_ml_req *re
 		if (likely(qp_id >= 0)) {
 			qp = dev->data->queue_pairs[qp_id];
 			qp->stats.dequeued_count++;
-			stats = &model->layer[0].glow.burst_stats[qp_id];
+			xstats = &model->layer[0].glow.burst_xstats[qp_id];
 		} else {
-			stats = model->layer[0].glow.sync_stats;
+			xstats = model->layer[0].glow.sync_xstats;
 		}
 
-		if (unlikely(stats->dequeued_count == stats->hw_reset_count)) {
-			stats->hw_latency_min = UINT64_MAX;
-			stats->hw_latency_max = 0;
+		if (unlikely(xstats->dequeued_count == xstats->hw_reset_count)) {
+			xstats->hw_latency_min = UINT64_MAX;
+			xstats->hw_latency_max = 0;
 		}
 
-		if (unlikely(stats->dequeued_count == stats->fw_reset_count)) {
-			stats->fw_latency_min = UINT64_MAX;
-			stats->fw_latency_max = 0;
+		if (unlikely(xstats->dequeued_count == xstats->fw_reset_count)) {
+			xstats->fw_latency_min = UINT64_MAX;
+			xstats->fw_latency_max = 0;
 		}
 
 		hw_latency = result->stats.hw_end - result->stats.hw_start;
 		fw_latency = result->stats.fw_end - result->stats.fw_start - hw_latency;
 
-		stats->hw_latency_tot += hw_latency;
-		stats->hw_latency_min = PLT_MIN(stats->hw_latency_min, hw_latency);
-		stats->hw_latency_max = PLT_MAX(stats->hw_latency_max, hw_latency);
-		stats->fw_latency_tot += fw_latency;
-		stats->fw_latency_min = PLT_MIN(stats->fw_latency_min, fw_latency);
-		stats->fw_latency_max = PLT_MAX(stats->fw_latency_max, fw_latency);
-		stats->dequeued_count++;
+		xstats->hw_latency_tot += hw_latency;
+		xstats->hw_latency_min = PLT_MIN(xstats->hw_latency_min, hw_latency);
+		xstats->hw_latency_max = PLT_MAX(xstats->hw_latency_max, hw_latency);
+		xstats->fw_latency_tot += fw_latency;
+		xstats->fw_latency_min = PLT_MIN(xstats->fw_latency_min, fw_latency);
+		xstats->fw_latency_max = PLT_MAX(xstats->fw_latency_max, fw_latency);
+		xstats->dequeued_count++;
 
 		op->impl_opaque = result->error_code;
 		op->status = RTE_ML_OP_STATUS_SUCCESS;
diff --git a/drivers/ml/cnxk/cnxk_ml_xstats.h b/drivers/ml/cnxk/cnxk_ml_xstats.h
new file mode 100644
index 0000000000..0d405679ca
--- /dev/null
+++ b/drivers/ml/cnxk/cnxk_ml_xstats.h
@@ -0,0 +1,128 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Marvell.
+ */
+
+#ifndef _CNXK_ML_XSTATS_H_
+#define _CNXK_ML_XSTATS_H_
+
+#include "cnxk_ml_io.h"
+
+/* Extended stats types enum */
+enum cnxk_ml_xstats_type {
+	/* Number of models loaded */
+	nb_models_loaded,
+
+	/* Number of models unloaded */
+	nb_models_unloaded,
+
+	/* Number of models started */
+	nb_models_started,
+
+	/* Number of models stopped */
+	nb_models_stopped,
+
+	/* Average inference hardware latency */
+	avg_hw_latency,
+
+	/* Minimum hardware latency */
+	min_hw_latency,
+
+	/* Maximum hardware latency */
+	max_hw_latency,
+
+	/* Average firmware latency */
+	avg_fw_latency,
+
+	/* Minimum firmware latency */
+	min_fw_latency,
+
+	/* Maximum firmware latency */
+	max_fw_latency,
+
+	/* Average runtime latency */
+	avg_rt_latency,
+
+	/* Minimum runtime latency */
+	min_rt_latency,
+
+	/* Maximum runtime latency */
+	max_rt_latency,
+};
+
+/* Extended stats function type enum. */
+enum cnxk_ml_xstats_fn_type {
+	/* Device function */
+	CNXK_ML_XSTATS_FN_DEVICE,
+
+	/* Model function */
+	CNXK_ML_XSTATS_FN_MODEL,
+};
+
+/* Function pointer to get xstats for a type */
+typedef uint64_t (*cnxk_ml_xstats_fn)(struct rte_ml_dev *cnxk_mldev, uint16_t obj_idx,
+				      enum cnxk_ml_xstats_type stat);
+
+/* Extended stats entry structure */
+struct cnxk_ml_xstats_entry {
+	/* Name-ID map */
+	struct rte_ml_dev_xstats_map map;
+
+	/* xstats mode, device or model */
+	enum rte_ml_dev_xstats_mode mode;
+
+	/* Type of xstats */
+	enum cnxk_ml_xstats_type type;
+
+	/* xstats function */
+	enum cnxk_ml_xstats_fn_type fn_id;
+
+	/* Object ID, model ID for model stat type */
+	uint16_t obj_idx;
+
+	/* Layer ID, valid for model stat type */
+	int32_t layer_id;
+
+	/* Allowed to reset the stat */
+	uint8_t reset_allowed;
+
+	/* An offset to be taken away to emulate resets */
+	uint64_t reset_value;
+};
+
+/* Extended stats data */
+struct cnxk_ml_xstats {
+	/* Pointer to xstats entries */
+	struct cnxk_ml_xstats_entry *entries;
+
+	/* Store num stats and offset of the stats for each model */
+	uint16_t count_per_model[ML_CNXK_MAX_MODELS];
+	uint16_t offset_for_model[ML_CNXK_MAX_MODELS];
+	uint16_t count_per_layer[ML_CNXK_MAX_MODELS][ML_CNXK_MODEL_MAX_LAYERS];
+	uint16_t offset_for_layer[ML_CNXK_MAX_MODELS][ML_CNXK_MODEL_MAX_LAYERS];
+	uint16_t count_mode_device;
+	uint16_t count_mode_model;
+	uint16_t count;
+};
+
+struct cnxk_ml_xstat_info {
+	char name[32];
+	enum cnxk_ml_xstats_type type;
+	uint8_t reset_allowed;
+};
+
+/* Device xstats. Note: Device stats are not allowed to be reset. */
+static const struct cnxk_ml_xstat_info device_xstats[] = {
+	{"nb_models_loaded", nb_models_loaded, 0},
+	{"nb_models_unloaded", nb_models_unloaded, 0},
+	{"nb_models_started", nb_models_started, 0},
+	{"nb_models_stopped", nb_models_stopped, 0},
+};
+
+/* Layer xstats */
+static const struct cnxk_ml_xstat_info layer_xstats[] = {
+	{"Avg-HW-Latency", avg_hw_latency, 1}, {"Min-HW-Latency", min_hw_latency, 1},
+	{"Max-HW-Latency", max_hw_latency, 1}, {"Avg-FW-Latency", avg_fw_latency, 1},
+	{"Min-FW-Latency", min_fw_latency, 1}, {"Max-FW-Latency", max_fw_latency, 1},
+};
+
+#endif /* _CNXK_ML_XSTATS_H_ */
diff --git a/drivers/ml/cnxk/meson.build b/drivers/ml/cnxk/meson.build
index 73db458fcd..6385ac4548 100644
--- a/drivers/ml/cnxk/meson.build
+++ b/drivers/ml/cnxk/meson.build
@@ -16,6 +16,7 @@  driver_sdk_headers = files(
         'cnxk_ml_io.h',
         'cnxk_ml_model.h',
         'cnxk_ml_ops.h',
+        'cnxk_ml_xstats.h',
 )
 
 sources = files(