[v5,15/34] ml/cnxk: update device and model xstats functions

Message ID 20231018064806.24145-16-syalavarthi@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series Implementation of revised ml/cnxk driver |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Srikanth Yalavarthi Oct. 18, 2023, 6:47 a.m. UTC
  Added cnxk wrapper function to handle ML device and model
extended stats. Handling resources for the xstats is done
in the cnxk layer. Introduced internal xstats group.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_dev.h   |   4 -
 drivers/ml/cnxk/cn10k_ml_ops.c   | 531 +++----------------------------
 drivers/ml/cnxk/cn10k_ml_ops.h   |  16 +-
 drivers/ml/cnxk/cnxk_ml_dev.h    |   5 +
 drivers/ml/cnxk/cnxk_ml_ops.c    | 481 +++++++++++++++++++++++++++-
 drivers/ml/cnxk/cnxk_ml_xstats.h |  21 +-
 6 files changed, 551 insertions(+), 507 deletions(-)
  

Patch

diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h
index be989e0a20..bde9d08901 100644
--- a/drivers/ml/cnxk/cn10k_ml_dev.h
+++ b/drivers/ml/cnxk/cn10k_ml_dev.h
@@ -10,7 +10,6 @@ 
 #include "cn10k_ml_ocm.h"
 
 #include "cnxk_ml_io.h"
-#include "cnxk_ml_xstats.h"
 
 /* Dummy Device ops */
 extern struct rte_ml_dev_ops ml_dev_dummy_ops;
@@ -133,9 +132,6 @@  struct cn10k_ml_dev {
 	/* OCM info */
 	struct cn10k_ml_ocm ocm;
 
-	/* Extended stats data */
-	struct cnxk_ml_xstats xstats;
-
 	/* Enable / disable model data caching */
 	int cache_model_data;
 
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index 27d255a830..776ad60401 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -198,107 +198,21 @@  cn10k_ml_prep_fp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_r
 	req->cn10k_req.jd.model_run.num_batches = op->nb_batches;
 }
 
-static int
-cn10k_ml_xstats_init(struct rte_ml_dev *dev)
-{
-	struct cn10k_ml_dev *cn10k_mldev;
-	struct cnxk_ml_dev *cnxk_mldev;
-	uint16_t nb_stats;
-	uint16_t stat_id;
-	uint16_t model;
-	uint16_t i;
-
-	cnxk_mldev = dev->data->dev_private;
-	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
-
-	/* Allocate memory for xstats entries. Don't allocate during reconfigure */
-	nb_stats = RTE_DIM(device_xstats) + ML_CNXK_MAX_MODELS * RTE_DIM(layer_xstats);
-	if (cn10k_mldev->xstats.entries == NULL)
-		cn10k_mldev->xstats.entries = rte_zmalloc(
-			"cn10k_ml_xstats", sizeof(struct cnxk_ml_xstats_entry) * nb_stats,
-			PLT_CACHE_LINE_SIZE);
-
-	if (cn10k_mldev->xstats.entries == NULL)
-		return -ENOMEM;
-
-	/* Initialize device xstats */
-	stat_id = 0;
-	for (i = 0; i < RTE_DIM(device_xstats); i++) {
-		cn10k_mldev->xstats.entries[stat_id].map.id = stat_id;
-		snprintf(cn10k_mldev->xstats.entries[stat_id].map.name,
-			 sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), "%s",
-			 device_xstats[i].name);
-
-		cn10k_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_DEVICE;
-		cn10k_mldev->xstats.entries[stat_id].type = device_xstats[i].type;
-		cn10k_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_DEVICE;
-		cn10k_mldev->xstats.entries[stat_id].obj_idx = 0;
-		cn10k_mldev->xstats.entries[stat_id].reset_allowed = device_xstats[i].reset_allowed;
-		stat_id++;
-	}
-	cn10k_mldev->xstats.count_mode_device = stat_id;
-
-	/* Initialize model xstats */
-	for (model = 0; model < ML_CNXK_MAX_MODELS; model++) {
-		cn10k_mldev->xstats.offset_for_model[model] = stat_id;
-
-		for (i = 0; i < RTE_DIM(layer_xstats); i++) {
-			cn10k_mldev->xstats.entries[stat_id].map.id = stat_id;
-			cn10k_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_MODEL;
-			cn10k_mldev->xstats.entries[stat_id].type = layer_xstats[i].type;
-			cn10k_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_MODEL;
-			cn10k_mldev->xstats.entries[stat_id].obj_idx = model;
-			cn10k_mldev->xstats.entries[stat_id].reset_allowed =
-				layer_xstats[i].reset_allowed;
-
-			/* Name of xstat is updated during model load */
-			snprintf(cn10k_mldev->xstats.entries[stat_id].map.name,
-				 sizeof(cn10k_mldev->xstats.entries[stat_id].map.name),
-				 "Model-%u-%s", model, layer_xstats[i].name);
-
-			stat_id++;
-		}
-
-		cn10k_mldev->xstats.count_per_model[model] = RTE_DIM(layer_xstats);
-	}
-
-	cn10k_mldev->xstats.count_mode_model = stat_id - cn10k_mldev->xstats.count_mode_device;
-	cn10k_mldev->xstats.count = stat_id;
-
-	return 0;
-}
-
 static void
-cn10k_ml_xstats_uninit(struct rte_ml_dev *dev)
+cn10k_ml_xstats_layer_name_update(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id,
+				  uint16_t layer_id)
 {
-	struct cn10k_ml_dev *cn10k_mldev;
-	struct cnxk_ml_dev *cnxk_mldev;
-
-	cnxk_mldev = dev->data->dev_private;
-	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
-
-	rte_free(cn10k_mldev->xstats.entries);
-	cn10k_mldev->xstats.entries = NULL;
-
-	cn10k_mldev->xstats.count = 0;
-}
-
-static void
-cn10k_ml_xstats_model_name_update(struct rte_ml_dev *dev, uint16_t model_id)
-{
-	struct cn10k_ml_dev *cn10k_mldev;
-	struct cnxk_ml_dev *cnxk_mldev;
 	struct cnxk_ml_model *model;
+	struct cnxk_ml_layer *layer;
 	uint16_t rclk_freq;
 	uint16_t sclk_freq;
 	uint16_t stat_id;
 	char suffix[8];
 	uint16_t i;
 
-	cnxk_mldev = dev->data->dev_private;
-	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
-	model = dev->data->models[model_id];
-	stat_id = RTE_DIM(device_xstats) + model_id * RTE_DIM(layer_xstats);
+	model = cnxk_mldev->mldev->data->models[model_id];
+	layer = &model->layer[layer_id];
+	stat_id = cnxk_mldev->xstats.offset_for_layer[model_id][layer_id];
 
 	roc_clk_freq_get(&rclk_freq, &sclk_freq);
 	if (sclk_freq == 0)
@@ -306,270 +220,94 @@  cn10k_ml_xstats_model_name_update(struct rte_ml_dev *dev, uint16_t model_id)
 	else
 		strcpy(suffix, "ns");
 
-	/* Update xstat name based on model name and sclk availability */
+	/* Update xstat name based on layer name and sclk availability */
 	for (i = 0; i < RTE_DIM(layer_xstats); i++) {
-		snprintf(cn10k_mldev->xstats.entries[stat_id].map.name,
-			 sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s",
-			 model->layer[0].glow.metadata.model.name, layer_xstats[i].name, suffix);
+		snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
+			 sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s",
+			 layer->glow.metadata.model.name, layer_xstats[i].name, suffix);
 		stat_id++;
 	}
 }
 
-static uint64_t
-cn10k_ml_dev_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx __rte_unused,
-		       enum cnxk_ml_xstats_type type)
-{
-	struct cnxk_ml_dev *cnxk_mldev;
-
-	cnxk_mldev = dev->data->dev_private;
-
-	switch (type) {
-	case nb_models_loaded:
-		return cnxk_mldev->nb_models_loaded;
-	case nb_models_unloaded:
-		return cnxk_mldev->nb_models_unloaded;
-	case nb_models_started:
-		return cnxk_mldev->nb_models_started;
-	case nb_models_stopped:
-		return cnxk_mldev->nb_models_stopped;
-	default:
-		return -1;
-	}
-
-	return 0;
-}
-
-#define ML_AVG_FOREACH_QP(dev, model, qp_id, str, value, count)                                    \
+#define ML_AVG_FOREACH_QP(cnxk_mldev, layer, qp_id, str, value, count)                             \
 	do {                                                                                       \
 		value = 0;                                                                         \
-		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {                      \
-			value += model->layer[0].glow.burst_xstats[qp_id].str##_latency_tot;       \
-			count += model->layer[0].glow.burst_xstats[qp_id].dequeued_count -         \
-				 model->layer[0].glow.burst_xstats[qp_id].str##_reset_count;       \
+		for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) {        \
+			value += layer->glow.burst_xstats[qp_id].str##_latency_tot;                \
+			count += layer->glow.burst_xstats[qp_id].dequeued_count -                  \
+				 layer->glow.burst_xstats[qp_id].str##_reset_count;                \
 		}                                                                                  \
+		value += layer->glow.sync_xstats->str##_latency_tot;                               \
+		count += layer->glow.sync_xstats->dequeued_count -                                 \
+			 layer->glow.sync_xstats->str##_reset_count;                               \
 		if (count != 0)                                                                    \
 			value = value / count;                                                     \
 	} while (0)
 
-#define ML_MIN_FOREACH_QP(dev, model, qp_id, str, value, count)                                    \
+#define ML_MIN_FOREACH_QP(cnxk_mldev, layer, qp_id, str, value, count)                             \
 	do {                                                                                       \
 		value = UINT64_MAX;                                                                \
-		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {                      \
-			value = PLT_MIN(                                                           \
-				value,                                                             \
-				model->layer[0].glow.burst_xstats[qp_id].str##_latency_min);       \
-			count += model->layer[0].glow.burst_xstats[qp_id].dequeued_count -         \
-				 model->layer[0].glow.burst_xstats[qp_id].str##_reset_count;       \
+		for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) {        \
+			value = PLT_MIN(value, layer->glow.burst_xstats[qp_id].str##_latency_min); \
+			count += layer->glow.burst_xstats[qp_id].dequeued_count -                  \
+				 layer->glow.burst_xstats[qp_id].str##_reset_count;                \
 		}                                                                                  \
+		value = PLT_MIN(value, layer->glow.sync_xstats->str##_latency_min);                \
+		count += layer->glow.sync_xstats->dequeued_count -                                 \
+			 layer->glow.sync_xstats->str##_reset_count;                               \
 		if (count == 0)                                                                    \
 			value = 0;                                                                 \
 	} while (0)
 
-#define ML_MAX_FOREACH_QP(dev, model, qp_id, str, value, count)                                    \
+#define ML_MAX_FOREACH_QP(cnxk_mldev, layer, qp_id, str, value, count)                             \
 	do {                                                                                       \
 		value = 0;                                                                         \
-		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {                      \
-			value = PLT_MAX(                                                           \
-				value,                                                             \
-				model->layer[0].glow.burst_xstats[qp_id].str##_latency_max);       \
-			count += model->layer[0].glow.burst_xstats[qp_id].dequeued_count -         \
-				 model->layer[0].glow.burst_xstats[qp_id].str##_reset_count;       \
+		for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) {        \
+			value = PLT_MAX(value, layer->glow.burst_xstats[qp_id].str##_latency_max); \
+			count += layer->glow.burst_xstats[qp_id].dequeued_count -                  \
+				 layer->glow.burst_xstats[qp_id].str##_reset_count;                \
 		}                                                                                  \
+		value = PLT_MAX(value, layer->glow.sync_xstats->str##_latency_max);                \
+		count += layer->glow.sync_xstats->dequeued_count -                                 \
+			 layer->glow.sync_xstats->str##_reset_count;                               \
 		if (count == 0)                                                                    \
 			value = 0;                                                                 \
 	} while (0)
 
-static uint64_t
-cn10k_ml_model_xstat_get(struct rte_ml_dev *dev, uint16_t obj_idx, enum cnxk_ml_xstats_type type)
+uint64_t
+cn10k_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer,
+			 enum cnxk_ml_xstats_type type)
 {
-	struct cnxk_ml_model *model;
-	uint16_t rclk_freq; /* MHz */
-	uint16_t sclk_freq; /* MHz */
 	uint64_t count = 0;
-	uint64_t value;
+	uint64_t value = 0;
 	uint32_t qp_id;
 
-	model = dev->data->models[obj_idx];
-	if (model == NULL)
-		return 0;
-
 	switch (type) {
 	case avg_hw_latency:
-		ML_AVG_FOREACH_QP(dev, model, qp_id, hw, value, count);
+		ML_AVG_FOREACH_QP(cnxk_mldev, layer, qp_id, hw, value, count);
 		break;
 	case min_hw_latency:
-		ML_MIN_FOREACH_QP(dev, model, qp_id, hw, value, count);
+		ML_MIN_FOREACH_QP(cnxk_mldev, layer, qp_id, hw, value, count);
 		break;
 	case max_hw_latency:
-		ML_MAX_FOREACH_QP(dev, model, qp_id, hw, value, count);
+		ML_MAX_FOREACH_QP(cnxk_mldev, layer, qp_id, hw, value, count);
 		break;
 	case avg_fw_latency:
-		ML_AVG_FOREACH_QP(dev, model, qp_id, fw, value, count);
+		ML_AVG_FOREACH_QP(cnxk_mldev, layer, qp_id, fw, value, count);
 		break;
 	case min_fw_latency:
-		ML_MIN_FOREACH_QP(dev, model, qp_id, fw, value, count);
+		ML_MIN_FOREACH_QP(cnxk_mldev, layer, qp_id, fw, value, count);
 		break;
 	case max_fw_latency:
-		ML_MAX_FOREACH_QP(dev, model, qp_id, fw, value, count);
+		ML_MAX_FOREACH_QP(cnxk_mldev, layer, qp_id, fw, value, count);
 		break;
 	default:
 		value = 0;
 	}
 
-	roc_clk_freq_get(&rclk_freq, &sclk_freq);
-	if (sclk_freq != 0) /* return in ns */
-		value = (value * 1000ULL) / sclk_freq;
-
 	return value;
 }
 
-static int
-cn10k_ml_device_xstats_reset(struct rte_ml_dev *dev, const uint16_t stat_ids[], uint16_t nb_ids)
-{
-	struct cn10k_ml_dev *cn10k_mldev;
-	struct cnxk_ml_xstats_entry *xs;
-	struct cnxk_ml_dev *cnxk_mldev;
-	uint16_t nb_stats;
-	uint16_t stat_id;
-	uint32_t i;
-
-	cnxk_mldev = dev->data->dev_private;
-	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
-
-	if (stat_ids == NULL)
-		nb_stats = cn10k_mldev->xstats.count_mode_device;
-	else
-		nb_stats = nb_ids;
-
-	for (i = 0; i < nb_stats; i++) {
-		if (stat_ids == NULL)
-			stat_id = i;
-		else
-			stat_id = stat_ids[i];
-
-		if (stat_id >= cn10k_mldev->xstats.count_mode_device)
-			return -EINVAL;
-
-		xs = &cn10k_mldev->xstats.entries[stat_id];
-		if (!xs->reset_allowed)
-			continue;
-
-		xs->reset_value = cn10k_ml_dev_xstat_get(dev, xs->obj_idx, xs->type);
-	}
-
-	return 0;
-}
-
-#define ML_AVG_RESET_FOREACH_QP(dev, model, qp_id, str)                                            \
-	do {                                                                                       \
-		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) {                      \
-			model->layer[0].glow.burst_xstats[qp_id].str##_latency_tot = 0;            \
-			model->layer[0].glow.burst_xstats[qp_id].str##_reset_count =               \
-				model->layer[0].glow.burst_xstats[qp_id].dequeued_count;           \
-		}                                                                                  \
-	} while (0)
-
-#define ML_MIN_RESET_FOREACH_QP(dev, model, qp_id, str)                                            \
-	do {                                                                                       \
-		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++)                        \
-			model->layer[0].glow.burst_xstats[qp_id].str##_latency_min = UINT64_MAX;   \
-	} while (0)
-
-#define ML_MAX_RESET_FOREACH_QP(dev, model, qp_id, str)                                            \
-	do {                                                                                       \
-		for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++)                        \
-			model->layer[0].glow.burst_xstats[qp_id].str##_latency_max = 0;            \
-	} while (0)
-
-static void
-cn10k_ml_reset_model_stat(struct rte_ml_dev *dev, uint16_t model_id, enum cnxk_ml_xstats_type type)
-{
-	struct cnxk_ml_model *model;
-	uint32_t qp_id;
-
-	model = dev->data->models[model_id];
-
-	switch (type) {
-	case avg_hw_latency:
-		ML_AVG_RESET_FOREACH_QP(dev, model, qp_id, hw);
-		break;
-	case min_hw_latency:
-		ML_MIN_RESET_FOREACH_QP(dev, model, qp_id, hw);
-		break;
-	case max_hw_latency:
-		ML_MAX_RESET_FOREACH_QP(dev, model, qp_id, hw);
-		break;
-	case avg_fw_latency:
-		ML_AVG_RESET_FOREACH_QP(dev, model, qp_id, fw);
-		break;
-	case min_fw_latency:
-		ML_MIN_RESET_FOREACH_QP(dev, model, qp_id, fw);
-		break;
-	case max_fw_latency:
-		ML_MAX_RESET_FOREACH_QP(dev, model, qp_id, fw);
-		break;
-	default:
-		return;
-	}
-}
-
-static int
-cn10k_ml_model_xstats_reset(struct rte_ml_dev *dev, int32_t model_id, const uint16_t stat_ids[],
-			    uint16_t nb_ids)
-{
-	struct cn10k_ml_dev *cn10k_mldev;
-	struct cnxk_ml_xstats_entry *xs;
-	struct cnxk_ml_dev *cnxk_mldev;
-	struct cnxk_ml_model *model;
-	int32_t lcl_model_id = 0;
-	uint16_t start_id;
-	uint16_t end_id;
-	int32_t i;
-	int32_t j;
-
-	cnxk_mldev = dev->data->dev_private;
-	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
-	for (i = 0; i < ML_CNXK_MAX_MODELS; i++) {
-		if (model_id == -1) {
-			model = dev->data->models[i];
-			if (model == NULL) /* Skip inactive models */
-				continue;
-		} else {
-			if (model_id != i)
-				continue;
-
-			model = dev->data->models[model_id];
-			if (model == NULL) {
-				plt_err("Invalid model_id = %d\n", model_id);
-				return -EINVAL;
-			}
-		}
-
-		start_id = cn10k_mldev->xstats.offset_for_model[i];
-		end_id = cn10k_mldev->xstats.offset_for_model[i] +
-			 cn10k_mldev->xstats.count_per_model[i] - 1;
-
-		if (stat_ids == NULL) {
-			for (j = start_id; j <= end_id; j++) {
-				xs = &cn10k_mldev->xstats.entries[j];
-				cn10k_ml_reset_model_stat(dev, i, xs->type);
-			}
-		} else {
-			for (j = 0; j < nb_ids; j++) {
-				if (stat_ids[j] < start_id || stat_ids[j] > end_id) {
-					plt_err("Invalid stat_ids[%d] = %d for model_id = %d\n", j,
-						stat_ids[j], lcl_model_id);
-					return -EINVAL;
-				}
-				xs = &cn10k_mldev->xstats.entries[stat_ids[j]];
-				cn10k_ml_reset_model_stat(dev, i, xs->type);
-			}
-		}
-	}
-
-	return 0;
-}
-
 static int
 cn10k_ml_cache_model_data(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer)
 {
@@ -654,7 +392,6 @@  cn10k_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_c
 	struct cn10k_ml_dev *cn10k_mldev;
 	struct cn10k_ml_ocm *ocm;
 	uint16_t tile_id;
-	int ret;
 
 	RTE_SET_USED(conf);
 
@@ -682,13 +419,6 @@  cn10k_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_c
 
 	rte_spinlock_init(&ocm->lock);
 
-	/* Initialize xstats */
-	ret = cn10k_ml_xstats_init(cnxk_mldev->mldev);
-	if (ret != 0) {
-		plt_err("Failed to initialize xstats");
-		return ret;
-	}
-
 	/* Set JCMDQ enqueue function */
 	if (cn10k_mldev->hw_queue_lock == 1)
 		cn10k_mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_sl;
@@ -717,9 +447,6 @@  cn10k_ml_dev_close(struct cnxk_ml_dev *cnxk_mldev)
 	/* Release ocm_mask memory */
 	rte_free(cn10k_mldev->ocm.ocm_mask);
 
-	/* Un-initialize xstats */
-	cn10k_ml_xstats_uninit(cnxk_mldev->mldev);
-
 	/* Unload firmware */
 	cn10k_ml_fw_unload(cnxk_mldev);
 
@@ -770,174 +497,6 @@  cn10k_ml_dev_stop(struct cnxk_ml_dev *cnxk_mldev)
 	return 0;
 }
 
-int
-cn10k_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
-			      int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map,
-			      uint32_t size)
-{
-	struct cn10k_ml_dev *cn10k_mldev;
-	struct cnxk_ml_dev *cnxk_mldev;
-	uint32_t xstats_mode_count;
-	uint32_t idx = 0;
-	uint32_t i;
-
-	cnxk_mldev = dev->data->dev_private;
-	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
-
-	xstats_mode_count = 0;
-	switch (mode) {
-	case RTE_ML_DEV_XSTATS_DEVICE:
-		xstats_mode_count = cn10k_mldev->xstats.count_mode_device;
-		break;
-	case RTE_ML_DEV_XSTATS_MODEL:
-		if (model_id >= ML_CNXK_MAX_MODELS)
-			break;
-		xstats_mode_count = cn10k_mldev->xstats.count_per_model[model_id];
-		break;
-	default:
-		return -EINVAL;
-	};
-
-	if (xstats_mode_count > size || xstats_map == NULL)
-		return xstats_mode_count;
-
-	for (i = 0; i < cn10k_mldev->xstats.count && idx < size; i++) {
-		if (cn10k_mldev->xstats.entries[i].mode != mode)
-			continue;
-
-		if (mode != RTE_ML_DEV_XSTATS_DEVICE &&
-		    model_id != cn10k_mldev->xstats.entries[i].obj_idx)
-			continue;
-
-		strncpy(xstats_map[idx].name, cn10k_mldev->xstats.entries[i].map.name,
-			RTE_ML_STR_MAX);
-		xstats_map[idx].id = cn10k_mldev->xstats.entries[i].map.id;
-		idx++;
-	}
-
-	return idx;
-}
-
-int
-cn10k_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16_t *stat_id,
-				uint64_t *value)
-{
-	struct cnxk_ml_xstats_entry *xs;
-	struct cn10k_ml_dev *cn10k_mldev;
-	struct cnxk_ml_dev *cnxk_mldev;
-	cnxk_ml_xstats_fn fn;
-	uint32_t i;
-
-	cnxk_mldev = dev->data->dev_private;
-	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
-	for (i = 0; i < cn10k_mldev->xstats.count; i++) {
-		xs = &cn10k_mldev->xstats.entries[i];
-		if (strncmp(xs->map.name, name, RTE_ML_STR_MAX) == 0) {
-			if (stat_id != NULL)
-				*stat_id = xs->map.id;
-
-			switch (xs->fn_id) {
-			case CNXK_ML_XSTATS_FN_DEVICE:
-				fn = cn10k_ml_dev_xstat_get;
-				break;
-			case CNXK_ML_XSTATS_FN_MODEL:
-				fn = cn10k_ml_model_xstat_get;
-				break;
-			default:
-				plt_err("Unexpected xstat fn_id = %d", xs->fn_id);
-				return -EINVAL;
-			}
-
-			*value = fn(dev, xs->obj_idx, xs->type) - xs->reset_value;
-
-			return 0;
-		}
-	}
-
-	if (stat_id != NULL)
-		*stat_id = (uint16_t)-1;
-
-	return -EINVAL;
-}
-
-int
-cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
-			const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids)
-{
-	struct cn10k_ml_dev *cn10k_mldev;
-	struct cnxk_ml_xstats_entry *xs;
-	struct cnxk_ml_dev *cnxk_mldev;
-	uint32_t xstats_mode_count;
-	cnxk_ml_xstats_fn fn;
-	uint64_t val;
-	uint32_t idx;
-	uint32_t i;
-
-	cnxk_mldev = dev->data->dev_private;
-	cn10k_mldev = &cnxk_mldev->cn10k_mldev;
-	xstats_mode_count = 0;
-
-	switch (mode) {
-	case RTE_ML_DEV_XSTATS_DEVICE:
-		xstats_mode_count = cn10k_mldev->xstats.count_mode_device;
-		break;
-	case RTE_ML_DEV_XSTATS_MODEL:
-		if (model_id >= ML_CNXK_MAX_MODELS)
-			return -EINVAL;
-		xstats_mode_count = cn10k_mldev->xstats.count_per_model[model_id];
-		break;
-	default:
-		return -EINVAL;
-	};
-
-	idx = 0;
-	for (i = 0; i < nb_ids && idx < xstats_mode_count; i++) {
-		xs = &cn10k_mldev->xstats.entries[stat_ids[i]];
-		if (stat_ids[i] > cn10k_mldev->xstats.count || xs->mode != mode)
-			continue;
-
-		if (mode == RTE_ML_DEV_XSTATS_MODEL && model_id != xs->obj_idx) {
-			plt_err("Invalid stats_id[%d] = %d for model_id = %d\n", i, stat_ids[i],
-				model_id);
-			return -EINVAL;
-		}
-
-		switch (xs->fn_id) {
-		case CNXK_ML_XSTATS_FN_DEVICE:
-			fn = cn10k_ml_dev_xstat_get;
-			break;
-		case CNXK_ML_XSTATS_FN_MODEL:
-			fn = cn10k_ml_model_xstat_get;
-			break;
-		default:
-			plt_err("Unexpected xstat fn_id = %d", xs->fn_id);
-			return -EINVAL;
-		}
-
-		val = fn(dev, xs->obj_idx, xs->type);
-		if (values)
-			values[idx] = val;
-
-		idx++;
-	}
-
-	return idx;
-}
-
-int
-cn10k_ml_dev_xstats_reset(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
-			  int32_t model_id, const uint16_t stat_ids[], uint16_t nb_ids)
-{
-	switch (mode) {
-	case RTE_ML_DEV_XSTATS_DEVICE:
-		return cn10k_ml_device_xstats_reset(dev, stat_ids, nb_ids);
-	case RTE_ML_DEV_XSTATS_MODEL:
-		return cn10k_ml_model_xstats_reset(dev, model_id, stat_ids, nb_ids);
-	};
-
-	return 0;
-}
-
 int
 cn10k_ml_dev_dump(struct cnxk_ml_dev *cnxk_mldev, FILE *fp)
 {
@@ -1211,7 +770,7 @@  cn10k_ml_layer_load(void *device, uint16_t model_id, const char *layer_name, uin
 							      sizeof(struct cn10k_ml_layer_xstats));
 
 	/* Update xstats names */
-	cn10k_ml_xstats_model_name_update(cnxk_mldev->mldev, idx);
+	cn10k_ml_xstats_layer_name_update(cnxk_mldev, model_id, layer_id);
 
 	layer->state = ML_CNXK_LAYER_STATE_LOADED;
 	cnxk_mldev->index_map[idx].model_id = model->model_id;
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h
index 47e7cb12af..4d76164dba 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.h
+++ b/drivers/ml/cnxk/cn10k_ml_ops.h
@@ -13,6 +13,7 @@ 
 struct cnxk_ml_dev;
 struct cnxk_ml_qp;
 struct cnxk_ml_model;
+struct cnxk_ml_layer;
 
 /* Firmware version string length */
 #define MLDEV_FIRMWARE_VERSION_LENGTH 32
@@ -298,17 +299,6 @@  int cn10k_ml_dev_stop(struct cnxk_ml_dev *cnxk_mldev);
 int cn10k_ml_dev_dump(struct cnxk_ml_dev *cnxk_mldev, FILE *fp);
 int cn10k_ml_dev_selftest(struct cnxk_ml_dev *cnxk_mldev);
 
-int cn10k_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
-				  int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map,
-				  uint32_t size);
-int cn10k_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16_t *stat_id,
-				    uint64_t *value);
-int cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
-			    int32_t model_id, const uint16_t stat_ids[], uint64_t values[],
-			    uint16_t nb_ids);
-int cn10k_ml_dev_xstats_reset(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
-			      int32_t model_id, const uint16_t stat_ids[], uint16_t nb_ids);
-
 /* Slow-path ops */
 int cn10k_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *params,
 			struct cnxk_ml_model *model);
@@ -337,4 +327,8 @@  int cn10k_ml_layer_unload(void *device, uint16_t model_id, const char *layer_nam
 int cn10k_ml_layer_start(void *device, uint16_t model_id, const char *layer_name);
 int cn10k_ml_layer_stop(void *device, uint16_t model_id, const char *layer_name);
 
+/* xstats ops */
+uint64_t cn10k_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer,
+				  enum cnxk_ml_xstats_type type);
+
 #endif /* _CN10K_ML_OPS_H_ */
diff --git a/drivers/ml/cnxk/cnxk_ml_dev.h b/drivers/ml/cnxk/cnxk_ml_dev.h
index 1590249abd..3ce9338f1f 100644
--- a/drivers/ml/cnxk/cnxk_ml_dev.h
+++ b/drivers/ml/cnxk/cnxk_ml_dev.h
@@ -9,6 +9,8 @@ 
 
 #include "cn10k_ml_dev.h"
 
+#include "cnxk_ml_xstats.h"
+
 /* ML command timeout in seconds */
 #define ML_CNXK_CMD_TIMEOUT 5
 
@@ -51,6 +53,9 @@  struct cnxk_ml_dev {
 	/* Configuration state */
 	enum cnxk_ml_dev_state state;
 
+	/* Extended stats data */
+	struct cnxk_ml_xstats xstats;
+
 	/* Number of models loaded */
 	uint16_t nb_models_loaded;
 
diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c
index c75317d6da..6a423d9eda 100644
--- a/drivers/ml/cnxk/cnxk_ml_ops.c
+++ b/drivers/ml/cnxk/cnxk_ml_ops.c
@@ -115,6 +115,285 @@  cnxk_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_desc
 	return NULL;
 }
 
+static int
+cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev)
+{
+	uint16_t nb_stats;
+	uint16_t stat_id;
+	uint16_t model;
+	uint16_t layer;
+	uint16_t i;
+
+	/* Allocate memory for xstats entries. Don't allocate during reconfigure */
+	nb_stats = RTE_DIM(device_xstats) +
+		   RTE_DIM(layer_xstats) * ML_CNXK_MAX_MODELS * ML_CNXK_MODEL_MAX_LAYERS;
+	if (cnxk_mldev->xstats.entries == NULL)
+		cnxk_mldev->xstats.entries = rte_zmalloc(
+			"cnxk_ml_xstats", sizeof(struct cnxk_ml_xstats_entry) * nb_stats,
+			PLT_CACHE_LINE_SIZE);
+
+	if (cnxk_mldev->xstats.entries == NULL)
+		return -ENOMEM;
+
+	/* Initialize device xstats */
+	stat_id = 0;
+	for (i = 0; i < RTE_DIM(device_xstats); i++) {
+		cnxk_mldev->xstats.entries[stat_id].map.id = stat_id;
+		snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
+			 sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), "%s",
+			 device_xstats[i].name);
+
+		cnxk_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_DEVICE;
+		cnxk_mldev->xstats.entries[stat_id].group = CNXK_ML_XSTATS_GROUP_DEVICE;
+		cnxk_mldev->xstats.entries[stat_id].type = device_xstats[i].type;
+		cnxk_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_DEVICE;
+		cnxk_mldev->xstats.entries[stat_id].obj_idx = 0;
+		cnxk_mldev->xstats.entries[stat_id].reset_allowed = device_xstats[i].reset_allowed;
+		stat_id++;
+	}
+	cnxk_mldev->xstats.count_mode_device = stat_id;
+
+	/* Initialize model xstats */
+	for (model = 0; model < ML_CNXK_MAX_MODELS; model++) {
+		cnxk_mldev->xstats.offset_for_model[model] = stat_id;
+
+		for (layer = 0; layer < ML_CNXK_MODEL_MAX_LAYERS; layer++) {
+			cnxk_mldev->xstats.offset_for_layer[model][layer] = stat_id;
+
+			for (i = 0; i < RTE_DIM(layer_xstats); i++) {
+				cnxk_mldev->xstats.entries[stat_id].map.id = stat_id;
+				cnxk_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_MODEL;
+				cnxk_mldev->xstats.entries[stat_id].group =
+					CNXK_ML_XSTATS_GROUP_LAYER;
+				cnxk_mldev->xstats.entries[stat_id].type = layer_xstats[i].type;
+				cnxk_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_MODEL;
+				cnxk_mldev->xstats.entries[stat_id].obj_idx = model;
+				cnxk_mldev->xstats.entries[stat_id].layer_id = layer;
+				cnxk_mldev->xstats.entries[stat_id].reset_allowed =
+					layer_xstats[i].reset_allowed;
+
+				/* Name of xstat is updated during model load */
+				snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
+					 sizeof(cnxk_mldev->xstats.entries[stat_id].map.name),
+					 "Layer-%u-%u-%s", model, layer, layer_xstats[i].name);
+
+				stat_id++;
+			}
+
+			cnxk_mldev->xstats.count_per_layer[model][layer] = RTE_DIM(layer_xstats);
+		}
+
+		cnxk_mldev->xstats.count_per_model[model] = RTE_DIM(layer_xstats);
+	}
+
+	cnxk_mldev->xstats.count_mode_model = stat_id - cnxk_mldev->xstats.count_mode_device;
+	cnxk_mldev->xstats.count = stat_id;
+
+	return 0;
+}
+
+static void
+cnxk_ml_xstats_uninit(struct cnxk_ml_dev *cnxk_mldev)
+{
+	rte_free(cnxk_mldev->xstats.entries);
+	cnxk_mldev->xstats.entries = NULL;
+
+	cnxk_mldev->xstats.count = 0;
+}
+
+static uint64_t
+cnxk_ml_dev_xstat_get(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx __rte_unused,
+		      int32_t layer_id __rte_unused, enum cnxk_ml_xstats_type type)
+{
+	switch (type) {
+	case nb_models_loaded:
+		return cnxk_mldev->nb_models_loaded;
+	case nb_models_unloaded:
+		return cnxk_mldev->nb_models_unloaded;
+	case nb_models_started:
+		return cnxk_mldev->nb_models_started;
+	case nb_models_stopped:
+		return cnxk_mldev->nb_models_stopped;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static uint64_t
+cnxk_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx, int32_t layer_id,
+			enum cnxk_ml_xstats_type type)
+{
+	struct cnxk_ml_model *model;
+	struct cnxk_ml_layer *layer;
+	uint16_t rclk_freq; /* MHz */
+	uint16_t sclk_freq; /* MHz */
+	uint64_t value = 0;
+
+	model = cnxk_mldev->mldev->data->models[obj_idx];
+	if (model == NULL)
+		return 0;
+
+	if (layer_id >= 0)
+		layer = &model->layer[layer_id];
+	else
+		return 0;
+
+	value = cn10k_ml_model_xstat_get(cnxk_mldev, layer, type);
+
+	roc_clk_freq_get(&rclk_freq, &sclk_freq);
+	if (sclk_freq != 0) /* return in ns */
+		value = (value * 1000ULL) / sclk_freq;
+
+	return value;
+}
+
+static int
+cnxk_ml_device_xstats_reset(struct cnxk_ml_dev *cnxk_mldev, const uint16_t stat_ids[],
+			    uint16_t nb_ids)
+{
+	struct cnxk_ml_xstats_entry *xs;
+	uint16_t nb_stats;
+	uint16_t stat_id;
+	uint32_t i;
+
+	if (stat_ids == NULL)
+		nb_stats = cnxk_mldev->xstats.count_mode_device;
+	else
+		nb_stats = nb_ids;
+
+	for (i = 0; i < nb_stats; i++) {
+		if (stat_ids == NULL)
+			stat_id = i;
+		else
+			stat_id = stat_ids[i];
+
+		if (stat_id >= cnxk_mldev->xstats.count_mode_device)
+			return -EINVAL;
+
+		xs = &cnxk_mldev->xstats.entries[stat_id];
+		if (!xs->reset_allowed)
+			continue;
+
+		xs->reset_value =
+			cnxk_ml_dev_xstat_get(cnxk_mldev, xs->obj_idx, xs->layer_id, xs->type);
+	}
+
+	return 0;
+}
+
+#define ML_AVG_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, str)                                     \
+	do {                                                                                       \
+		for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) {        \
+			layer->glow.burst_xstats[qp_id].str##_latency_tot = 0;                     \
+			layer->glow.burst_xstats[qp_id].str##_reset_count =                        \
+				layer->glow.burst_xstats[qp_id].dequeued_count;                    \
+		}                                                                                  \
+	} while (0)
+
+#define ML_MIN_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, str)                                     \
+	do {                                                                                       \
+		for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++)          \
+			layer->glow.burst_xstats[qp_id].str##_latency_min = UINT64_MAX;            \
+	} while (0)
+
+#define ML_MAX_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, str)                                     \
+	do {                                                                                       \
+		for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++)          \
+			layer->glow.burst_xstats[qp_id].str##_latency_max = 0;                     \
+	} while (0)
+
+static void
+cnxk_ml_reset_model_stat(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id,
+			 enum cnxk_ml_xstats_type type)
+{
+	struct cnxk_ml_model *model;
+	struct cnxk_ml_layer *layer;
+	uint16_t layer_id = 0;
+	uint32_t qp_id;
+
+	model = cnxk_mldev->mldev->data->models[model_id];
+	layer = &model->layer[layer_id];
+
+	switch (type) {
+	case avg_hw_latency:
+		ML_AVG_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, hw);
+		break;
+	case min_hw_latency:
+		ML_MIN_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, hw);
+		break;
+	case max_hw_latency:
+		ML_MAX_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, hw);
+		break;
+	case avg_fw_latency:
+		ML_AVG_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, fw);
+		break;
+	case min_fw_latency:
+		ML_MIN_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, fw);
+		break;
+	case max_fw_latency:
+		ML_MAX_RESET_FOREACH_QP(cnxk_mldev, layer, qp_id, fw);
+		break;
+	default:
+		return;
+	}
+}
+
+static int
+cnxk_ml_model_xstats_reset(struct cnxk_ml_dev *cnxk_mldev, int32_t model_id,
+			   const uint16_t stat_ids[], uint16_t nb_ids)
+{
+	struct cnxk_ml_xstats_entry *xs;
+	struct cnxk_ml_model *model;
+	int32_t lcl_model_id = 0;
+	uint16_t layer_id = 0;
+	uint16_t start_id;
+	uint16_t end_id;
+	int32_t i;
+	int32_t j;
+
+	for (i = 0; i < ML_CNXK_MAX_MODELS; i++) {
+		if (model_id == -1) {
+			model = cnxk_mldev->mldev->data->models[i];
+			if (model == NULL) /* skip inactive models */
+				continue;
+		} else {
+			if (model_id != i)
+				continue;
+
+			model = cnxk_mldev->mldev->data->models[model_id];
+			if (model == NULL) {
+				plt_err("Invalid model_id = %d\n", model_id);
+				return -EINVAL;
+			}
+		}
+
+		start_id = cnxk_mldev->xstats.offset_for_layer[i][layer_id];
+		end_id = cnxk_mldev->xstats.offset_for_layer[i][layer_id] +
+			 cnxk_mldev->xstats.count_per_layer[i][layer_id] - 1;
+
+		if (stat_ids == NULL) {
+			for (j = start_id; j <= end_id; j++) {
+				xs = &cnxk_mldev->xstats.entries[j];
+				cnxk_ml_reset_model_stat(cnxk_mldev, i, xs->type);
+			}
+		} else {
+			for (j = 0; j < nb_ids; j++) {
+				if (stat_ids[j] < start_id || stat_ids[j] > end_id) {
+					plt_err("Invalid stat_ids[%d] = %d for model_id = %d\n", j,
+						stat_ids[j], lcl_model_id);
+					return -EINVAL;
+				}
+				xs = &cnxk_mldev->xstats.entries[stat_ids[j]];
+				cnxk_ml_reset_model_stat(cnxk_mldev, i, xs->type);
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int
 cnxk_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)
 {
@@ -294,6 +573,13 @@  cnxk_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *co
 	for (i = 0; i < cnxk_mldev->max_nb_layers; i++)
 		cnxk_mldev->index_map[i].active = false;
 
+	/* Initialize xstats */
+	ret = cnxk_ml_xstats_init(cnxk_mldev);
+	if (ret != 0) {
+		plt_err("Failed to initialize xstats");
+		goto error;
+	}
+
 	cnxk_mldev->nb_models_loaded = 0;
 	cnxk_mldev->nb_models_started = 0;
 	cnxk_mldev->nb_models_stopped = 0;
@@ -323,6 +609,9 @@  cnxk_ml_dev_close(struct rte_ml_dev *dev)
 
 	cnxk_mldev = dev->data->dev_private;
 
+	/* Un-initialize xstats */
+	cnxk_ml_xstats_uninit(cnxk_mldev);
+
 	if (cn10k_ml_dev_close(cnxk_mldev) != 0)
 		plt_err("Failed to close CN10K ML Device");
 
@@ -521,6 +810,190 @@  cnxk_ml_dev_stats_reset(struct rte_ml_dev *dev)
 	}
 }
 
+static int
+cnxk_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
+			     int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map,
+			     uint32_t size)
+{
+	struct cnxk_ml_xstats_entry *xs;
+	struct cnxk_ml_dev *cnxk_mldev;
+	uint32_t xstats_mode_count;
+	uint16_t layer_id = 0;
+	uint32_t idx = 0;
+	uint32_t i;
+
+	if (dev == NULL)
+		return -EINVAL;
+
+	cnxk_mldev = dev->data->dev_private;
+	xstats_mode_count = 0;
+
+	switch (mode) {
+	case RTE_ML_DEV_XSTATS_DEVICE:
+		xstats_mode_count = cnxk_mldev->xstats.count_mode_device;
+		break;
+	case RTE_ML_DEV_XSTATS_MODEL:
+		if (model_id >= ML_CNXK_MAX_MODELS)
+			break;
+		xstats_mode_count = cnxk_mldev->xstats.count_per_layer[model_id][layer_id];
+		break;
+	default:
+		return -EINVAL;
+	};
+
+	if (xstats_mode_count > size || xstats_map == NULL)
+		return xstats_mode_count;
+
+	for (i = 0; i < cnxk_mldev->xstats.count && idx < size; i++) {
+		xs = &cnxk_mldev->xstats.entries[i];
+		if (xs->mode != mode)
+			continue;
+
+		if (mode == RTE_ML_DEV_XSTATS_MODEL &&
+		    (model_id != xs->obj_idx || layer_id != xs->layer_id))
+			continue;
+
+		strncpy(xstats_map[idx].name, xs->map.name, RTE_ML_STR_MAX);
+		xstats_map[idx].id = xs->map.id;
+		idx++;
+	}
+
+	return idx;
+}
+
+static int
+cnxk_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16_t *stat_id,
+			       uint64_t *value)
+{
+	struct cnxk_ml_xstats_entry *xs;
+	struct cnxk_ml_dev *cnxk_mldev;
+	cnxk_ml_xstats_fn fn;
+	uint32_t i;
+
+	if (dev == NULL)
+		return -EINVAL;
+
+	cnxk_mldev = dev->data->dev_private;
+
+	for (i = 0; i < cnxk_mldev->xstats.count; i++) {
+		xs = &cnxk_mldev->xstats.entries[i];
+		if (strncmp(xs->map.name, name, RTE_ML_STR_MAX) == 0) {
+			if (stat_id != NULL)
+				*stat_id = xs->map.id;
+
+			switch (xs->fn_id) {
+			case CNXK_ML_XSTATS_FN_DEVICE:
+				fn = cnxk_ml_dev_xstat_get;
+				break;
+			case CNXK_ML_XSTATS_FN_MODEL:
+				fn = cnxk_ml_model_xstat_get;
+				break;
+			default:
+				plt_err("Unexpected xstat fn_id = %d", xs->fn_id);
+				return -EINVAL;
+			}
+
+			*value = fn(cnxk_mldev, xs->obj_idx, xs->layer_id, xs->type) -
+				 xs->reset_value;
+
+			return 0;
+		}
+	}
+
+	if (stat_id != NULL)
+		*stat_id = (uint16_t)-1;
+
+	return -EINVAL;
+}
+
+static int
+cnxk_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
+		       const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids)
+{
+	struct cnxk_ml_xstats_entry *xs;
+	struct cnxk_ml_dev *cnxk_mldev;
+	uint32_t xstats_mode_count;
+	uint16_t layer_id = 0;
+	cnxk_ml_xstats_fn fn;
+	uint64_t val;
+	uint32_t idx;
+	uint32_t i;
+
+	if (dev == NULL)
+		return -EINVAL;
+
+	cnxk_mldev = dev->data->dev_private;
+	xstats_mode_count = 0;
+
+	switch (mode) {
+	case RTE_ML_DEV_XSTATS_DEVICE:
+		xstats_mode_count = cnxk_mldev->xstats.count_mode_device;
+		break;
+	case RTE_ML_DEV_XSTATS_MODEL:
+		if (model_id >= ML_CNXK_MAX_MODELS)
+			return -EINVAL;
+		xstats_mode_count = cnxk_mldev->xstats.count_per_layer[model_id][layer_id];
+		break;
+	default:
+		return -EINVAL;
+	};
+
+	idx = 0;
+	for (i = 0; i < nb_ids && idx < xstats_mode_count; i++) {
+		xs = &cnxk_mldev->xstats.entries[stat_ids[i]];
+		if (stat_ids[i] > cnxk_mldev->xstats.count || xs->mode != mode)
+			continue;
+
+		if (mode == RTE_ML_DEV_XSTATS_MODEL &&
+		    (model_id != xs->obj_idx || layer_id != xs->layer_id)) {
+			plt_err("Invalid stats_id[%d] = %d for model_id = %d\n", i, stat_ids[i],
+				model_id);
+			return -EINVAL;
+		}
+
+		switch (xs->fn_id) {
+		case CNXK_ML_XSTATS_FN_DEVICE:
+			fn = cnxk_ml_dev_xstat_get;
+			break;
+		case CNXK_ML_XSTATS_FN_MODEL:
+			fn = cnxk_ml_model_xstat_get;
+			break;
+		default:
+			plt_err("Unexpected xstat fn_id = %d", xs->fn_id);
+			return -EINVAL;
+		}
+
+		val = fn(cnxk_mldev, xs->obj_idx, xs->layer_id, xs->type);
+		if (values)
+			values[idx] = val;
+
+		idx++;
+	}
+
+	return idx;
+}
+
+static int
+cnxk_ml_dev_xstats_reset(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
+			 const uint16_t stat_ids[], uint16_t nb_ids)
+{
+	struct cnxk_ml_dev *cnxk_mldev;
+
+	if (dev == NULL)
+		return -EINVAL;
+
+	cnxk_mldev = dev->data->dev_private;
+
+	switch (mode) {
+	case RTE_ML_DEV_XSTATS_DEVICE:
+		return cnxk_ml_device_xstats_reset(cnxk_mldev, stat_ids, nb_ids);
+	case RTE_ML_DEV_XSTATS_MODEL:
+		return cnxk_ml_model_xstats_reset(cnxk_mldev, model_id, stat_ids, nb_ids);
+	};
+
+	return 0;
+}
+
 static int
 cnxk_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, uint16_t *model_id)
 {
@@ -806,10 +1279,10 @@  struct rte_ml_dev_ops cnxk_ml_ops = {
 	/* Stats ops */
 	.dev_stats_get = cnxk_ml_dev_stats_get,
 	.dev_stats_reset = cnxk_ml_dev_stats_reset,
-	.dev_xstats_names_get = cn10k_ml_dev_xstats_names_get,
-	.dev_xstats_by_name_get = cn10k_ml_dev_xstats_by_name_get,
-	.dev_xstats_get = cn10k_ml_dev_xstats_get,
-	.dev_xstats_reset = cn10k_ml_dev_xstats_reset,
+	.dev_xstats_names_get = cnxk_ml_dev_xstats_names_get,
+	.dev_xstats_by_name_get = cnxk_ml_dev_xstats_by_name_get,
+	.dev_xstats_get = cnxk_ml_dev_xstats_get,
+	.dev_xstats_reset = cnxk_ml_dev_xstats_reset,
 
 	/* Model ops */
 	.model_load = cnxk_ml_model_load,
diff --git a/drivers/ml/cnxk/cnxk_ml_xstats.h b/drivers/ml/cnxk/cnxk_ml_xstats.h
index 0d405679ca..5e02bb876c 100644
--- a/drivers/ml/cnxk/cnxk_ml_xstats.h
+++ b/drivers/ml/cnxk/cnxk_ml_xstats.h
@@ -7,6 +7,8 @@ 
 
 #include "cnxk_ml_io.h"
 
+struct cnxk_ml_dev;
+
 /* Extended stats types enum */
 enum cnxk_ml_xstats_type {
 	/* Number of models loaded */
@@ -58,9 +60,21 @@  enum cnxk_ml_xstats_fn_type {
 	CNXK_ML_XSTATS_FN_MODEL,
 };
 
+/* Extended stats group */
+enum cnxk_ml_xstats_group {
+	/* Device stats */
+	CNXK_ML_XSTATS_GROUP_DEVICE,
+
+	/* Model stats */
+	CNXK_ML_XSTATS_GROUP_MODEL,
+
+	/* Layer stats */
+	CNXK_ML_XSTATS_GROUP_LAYER,
+};
+
 /* Function pointer to get xstats for a type */
-typedef uint64_t (*cnxk_ml_xstats_fn)(struct rte_ml_dev *cnxk_mldev, uint16_t obj_idx,
-				      enum cnxk_ml_xstats_type stat);
+typedef uint64_t (*cnxk_ml_xstats_fn)(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx,
+				      int32_t layer_id, enum cnxk_ml_xstats_type stat);
 
 /* Extended stats entry structure */
 struct cnxk_ml_xstats_entry {
@@ -70,6 +84,9 @@  struct cnxk_ml_xstats_entry {
 	/* xstats mode, device or model */
 	enum rte_ml_dev_xstats_mode mode;
 
+	/* xstats group */
+	enum cnxk_ml_xstats_group group;
+
 	/* Type of xstats */
 	enum cnxk_ml_xstats_type type;