@@ -146,7 +146,8 @@ cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev)
/* Allocate memory for xstats entries. Don't allocate during reconfigure */
nb_stats = RTE_DIM(device_xstats) +
- RTE_DIM(layer_xstats) * ML_CNXK_MAX_MODELS * ML_CNXK_MODEL_MAX_LAYERS;
+ RTE_DIM(layer_xstats) * ML_CNXK_MAX_MODELS * ML_CNXK_MODEL_MAX_LAYERS +
+ RTE_DIM(model_xstats) * ML_CNXK_MAX_MODELS;
if (cnxk_mldev->xstats.entries == NULL)
cnxk_mldev->xstats.entries = rte_zmalloc(
"cnxk_ml_xstats", sizeof(struct cnxk_ml_xstats_entry) * nb_stats,
@@ -177,6 +178,25 @@ cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev)
for (model = 0; model < ML_CNXK_MAX_MODELS; model++) {
cnxk_mldev->xstats.offset_for_model[model] = stat_id;
+ for (i = 0; i < RTE_DIM(model_xstats); i++) {
+ cnxk_mldev->xstats.entries[stat_id].map.id = stat_id;
+ cnxk_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_MODEL;
+ cnxk_mldev->xstats.entries[stat_id].group = CNXK_ML_XSTATS_GROUP_MODEL;
+ cnxk_mldev->xstats.entries[stat_id].type = model_xstats[i].type;
+ cnxk_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_MODEL;
+ cnxk_mldev->xstats.entries[stat_id].obj_idx = model;
+ cnxk_mldev->xstats.entries[stat_id].layer_id = -1;
+ cnxk_mldev->xstats.entries[stat_id].reset_allowed =
+ model_xstats[i].reset_allowed;
+
+ /* Name of xstat is updated during model load */
+ snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
+ sizeof(cnxk_mldev->xstats.entries[stat_id].map.name),
+ "Model-%u-%s", model, model_xstats[i].name);
+
+ stat_id++;
+ }
+
for (layer = 0; layer < ML_CNXK_MODEL_MAX_LAYERS; layer++) {
cnxk_mldev->xstats.offset_for_layer[model][layer] = stat_id;
@@ -203,7 +223,8 @@ cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev)
cnxk_mldev->xstats.count_per_layer[model][layer] = RTE_DIM(layer_xstats);
}
- cnxk_mldev->xstats.count_per_model[model] = RTE_DIM(layer_xstats);
+ cnxk_mldev->xstats.count_per_model[model] =
+ RTE_DIM(layer_xstats) + ML_CNXK_MODEL_MAX_LAYERS * RTE_DIM(model_xstats);
}
cnxk_mldev->xstats.count_mode_model = stat_id - cnxk_mldev->xstats.count_mode_device;
@@ -212,6 +233,42 @@ cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev)
return 0;
}
+void
+cnxk_ml_xstats_model_name_update(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id)
+{
+ struct cnxk_ml_model *model;
+ uint16_t rclk_freq;
+ uint16_t sclk_freq;
+ uint16_t stat_id;
+ char suffix[8];
+ uint16_t i;
+
+ model = cnxk_mldev->mldev->data->models[model_id];
+ stat_id = cnxk_mldev->xstats.offset_for_model[model_id];
+
+ roc_clk_freq_get(&rclk_freq, &sclk_freq);
+ if (sclk_freq == 0)
+ strcpy(suffix, "cycles");
+ else
+ strcpy(suffix, "ns");
+
+ /* Update xstat name based on layer name and sclk availability */
+ for (i = 0; i < RTE_DIM(model_xstats); i++) {
+ if (model->type == ML_CNXK_MODEL_TYPE_GLOW)
+ snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
+ sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s",
+ model->glow.metadata.model.name, model_xstats[i].name, suffix);
+#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM
+ else
+ snprintf(cnxk_mldev->xstats.entries[stat_id].map.name,
+ sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s",
+ model->mvtvm.metadata.model.name, model_xstats[i].name, suffix);
+#endif
+
+ stat_id++;
+ }
+}
+
static void
cnxk_ml_xstats_uninit(struct cnxk_ml_dev *cnxk_mldev)
{
@@ -249,6 +306,9 @@ cnxk_ml_dev_xstat_get(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx __rte_unu
count += layer->glow.burst_xstats[qp_id].dequeued_count - \
layer->glow.burst_xstats[qp_id].str##_reset_count; \
} \
+ value += layer->glow.sync_xstats->str##_latency_tot; \
+ count += layer->glow.sync_xstats->dequeued_count - \
+ layer->glow.sync_xstats->str##_reset_count; \
if (count != 0) \
value = value / count; \
} while (0)
@@ -261,6 +321,9 @@ cnxk_ml_dev_xstat_get(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx __rte_unu
count += layer->glow.burst_xstats[qp_id].dequeued_count - \
layer->glow.burst_xstats[qp_id].str##_reset_count; \
} \
+ value = PLT_MIN(value, layer->glow.sync_xstats->str##_latency_min); \
+ count += layer->glow.sync_xstats->dequeued_count - \
+ layer->glow.sync_xstats->str##_reset_count; \
if (count == 0) \
value = 0; \
} while (0)
@@ -273,10 +336,53 @@ cnxk_ml_dev_xstat_get(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx __rte_unu
count += layer->glow.burst_xstats[qp_id].dequeued_count - \
layer->glow.burst_xstats[qp_id].str##_reset_count; \
} \
+ value = PLT_MAX(value, layer->glow.sync_xstats->str##_latency_max); \
+ count += layer->glow.sync_xstats->dequeued_count - \
+ layer->glow.sync_xstats->str##_reset_count; \
if (count == 0) \
value = 0; \
} while (0)
+#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM
+#define ML_AVG_FOREACH_QP_MVTVM(cnxk_mldev, model, qp_id, value, count) \
+ do { \
+ value = 0; \
+ for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) { \
+ value += model->mvtvm.burst_xstats[qp_id].tvm_rt_latency_tot; \
+ count += model->mvtvm.burst_xstats[qp_id].dequeued_count - \
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_reset_count; \
+ } \
+ if (count != 0) \
+ value = value / count; \
+ } while (0)
+
+#define ML_MIN_FOREACH_QP_MVTVM(cnxk_mldev, model, qp_id, value, count) \
+ do { \
+ value = UINT64_MAX; \
+ for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) { \
+ value = PLT_MIN(value, \
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_latency_min); \
+ count += model->mvtvm.burst_xstats[qp_id].dequeued_count - \
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_reset_count; \
+ } \
+ if (count == 0) \
+ value = 0; \
+ } while (0)
+
+#define ML_MAX_FOREACH_QP_MVTVM(cnxk_mldev, model, qp_id, value, count) \
+ do { \
+ value = 0; \
+ for (qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) { \
+ value = PLT_MAX(value, \
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_latency_max); \
+ count += model->mvtvm.burst_xstats[qp_id].dequeued_count - \
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_reset_count; \
+ } \
+ if (count == 0) \
+ value = 0; \
+ } while (0)
+#endif
+
static uint64_t
cnxk_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx, int32_t layer_id,
enum cnxk_ml_xstats_type type)
@@ -317,6 +423,17 @@ cnxk_ml_model_xstat_get(struct cnxk_ml_dev *cnxk_mldev, uint16_t obj_idx, int32_
case max_fw_latency:
ML_MAX_FOREACH_QP(cnxk_mldev, layer, qp_id, fw, value, count);
break;
+#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM
+ case avg_rt_latency:
+ ML_AVG_FOREACH_QP_MVTVM(cnxk_mldev, model, qp_id, value, count);
+ break;
+ case min_rt_latency:
+ ML_MIN_FOREACH_QP_MVTVM(cnxk_mldev, model, qp_id, value, count);
+ break;
+ case max_rt_latency:
+ ML_MAX_FOREACH_QP_MVTVM(cnxk_mldev, model, qp_id, value, count);
+ break;
+#endif
default:
value = 0;
}
@@ -907,8 +1024,9 @@ cnxk_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode
{
struct cnxk_ml_xstats_entry *xs;
struct cnxk_ml_dev *cnxk_mldev;
+ struct cnxk_ml_model *model;
uint32_t xstats_mode_count;
- uint16_t layer_id = 0;
+ uint16_t layer_id;
uint32_t idx = 0;
uint32_t i;
@@ -925,7 +1043,17 @@ cnxk_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode
case RTE_ML_DEV_XSTATS_MODEL:
if (model_id >= ML_CNXK_MAX_MODELS)
break;
- xstats_mode_count = cnxk_mldev->xstats.count_per_layer[model_id][layer_id];
+
+ model = cnxk_mldev->mldev->data->models[model_id];
+ for (layer_id = 0; layer_id < model->nb_layers; layer_id++) {
+ if (model->layer[layer_id].type == ML_CNXK_LAYER_TYPE_MRVL)
+ xstats_mode_count +=
+ cnxk_mldev->xstats.count_per_layer[model_id][layer_id];
+ }
+
+ if ((model->type == ML_CNXK_MODEL_TYPE_TVM) &&
+ (model->subtype != ML_CNXK_MODEL_SUBTYPE_TVM_MRVL))
+ xstats_mode_count += RTE_DIM(model_xstats);
break;
default:
return -EINVAL;
@@ -939,9 +1067,20 @@ cnxk_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode
if (xs->mode != mode)
continue;
- if (mode == RTE_ML_DEV_XSTATS_MODEL &&
- (model_id != xs->obj_idx || layer_id != xs->layer_id))
- continue;
+ if (mode == RTE_ML_DEV_XSTATS_MODEL) {
+ if (model_id != xs->obj_idx)
+ continue;
+
+ model = cnxk_mldev->mldev->data->models[model_id];
+ if ((model->type == ML_CNXK_MODEL_TYPE_GLOW ||
+ model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) &&
+ xs->group == CNXK_ML_XSTATS_GROUP_MODEL)
+ continue;
+
+ if (model->type == ML_CNXK_MODEL_TYPE_TVM &&
+ model->layer[xs->layer_id].type == ML_CNXK_LAYER_TYPE_LLVM)
+ continue;
+ }
strncpy(xstats_map[idx].name, xs->map.name, RTE_ML_STR_MAX);
xstats_map[idx].id = xs->map.id;
@@ -1002,9 +1141,10 @@ cnxk_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
{
struct cnxk_ml_xstats_entry *xs;
struct cnxk_ml_dev *cnxk_mldev;
+ struct cnxk_ml_model *model;
uint32_t xstats_mode_count;
- uint16_t layer_id = 0;
cnxk_ml_xstats_fn fn;
+ uint16_t layer_id;
uint64_t val;
uint32_t idx;
uint32_t i;
@@ -1022,7 +1162,14 @@ cnxk_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
case RTE_ML_DEV_XSTATS_MODEL:
if (model_id >= ML_CNXK_MAX_MODELS)
return -EINVAL;
- xstats_mode_count = cnxk_mldev->xstats.count_per_layer[model_id][layer_id];
+
+ model = cnxk_mldev->mldev->data->models[model_id];
+ for (layer_id = 0; layer_id < model->nb_layers; layer_id++)
+ xstats_mode_count += cnxk_mldev->xstats.count_per_layer[model_id][layer_id];
+
+ if ((model->type == ML_CNXK_MODEL_TYPE_TVM) &&
+ (model->subtype != ML_CNXK_MODEL_SUBTYPE_TVM_MRVL))
+ xstats_mode_count += RTE_DIM(model_xstats);
break;
default:
return -EINVAL;
@@ -1034,11 +1181,18 @@ cnxk_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode,
if (stat_ids[i] > cnxk_mldev->xstats.count || xs->mode != mode)
continue;
- if (mode == RTE_ML_DEV_XSTATS_MODEL &&
- (model_id != xs->obj_idx || layer_id != xs->layer_id)) {
- plt_err("Invalid stats_id[%d] = %d for model_id = %d\n", i, stat_ids[i],
- model_id);
- return -EINVAL;
+ if (mode == RTE_ML_DEV_XSTATS_MODEL) {
+ if (model_id != xs->obj_idx)
+ continue;
+
+ model = cnxk_mldev->mldev->data->models[xs->obj_idx];
+ if ((model->type == ML_CNXK_MODEL_TYPE_GLOW ||
+ model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) &&
+ xs->group == CNXK_ML_XSTATS_GROUP_MODEL)
+ continue;
+
+ if (xs->layer_id == -1 && xs->group == CNXK_ML_XSTATS_GROUP_LAYER)
+ continue;
}
switch (xs->fn_id) {
@@ -64,6 +64,7 @@ extern struct rte_ml_dev_ops cnxk_ml_ops;
int cnxk_ml_model_unload(struct rte_ml_dev *dev, uint16_t model_id);
int cnxk_ml_model_stop(struct rte_ml_dev *dev, uint16_t model_id);
+void cnxk_ml_xstats_model_name_update(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id);
__rte_hot uint16_t cnxk_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id,
struct rte_ml_op **ops, uint16_t nb_ops);
@@ -142,4 +142,11 @@ static const struct cnxk_ml_xstat_info layer_xstats[] = {
{"Min-FW-Latency", min_fw_latency, 1}, {"Max-FW-Latency", max_fw_latency, 1},
};
+/* Model xstats */
+static const struct cnxk_ml_xstat_info model_xstats[] = {
+ {"Avg-RT-Latency", avg_rt_latency, 1},
+ {"Min-RT-Latency", min_rt_latency, 1},
+ {"Max-RT-Latency", max_rt_latency, 1},
+};
+
#endif /* _CNXK_ML_XSTATS_H_ */
@@ -33,6 +33,27 @@ struct mvtvm_ml_model_object {
int64_t size;
};
+/* Model fast-path stats */
+struct mvtvm_ml_model_xstats {
+ /* Total TVM runtime latency, sum of all inferences */
+ uint64_t tvm_rt_latency_tot;
+
+ /* TVM runtime latency */
+ uint64_t tvm_rt_latency;
+
+ /* Minimum TVM runtime latency */
+ uint64_t tvm_rt_latency_min;
+
+ /* Maximum TVM runtime latency */
+ uint64_t tvm_rt_latency_max;
+
+ /* Total jobs dequeued */
+ uint64_t dequeued_count;
+
+ /* Hardware stats reset index */
+ uint64_t tvm_rt_reset_count;
+};
+
struct mvtvm_ml_model_data {
/* Model metadata */
struct tvmdp_model_metadata metadata;
@@ -45,6 +66,9 @@ struct mvtvm_ml_model_data {
/* Model I/O info */
struct cnxk_ml_io_info info;
+
+ /* Stats for burst ops */
+ struct mvtvm_ml_model_xstats *burst_xstats;
};
int mvtvm_ml_model_blob_parse(struct rte_ml_model_params *params,
@@ -16,6 +16,7 @@
#include "cnxk_ml_dev.h"
#include "cnxk_ml_model.h"
+#include "cnxk_ml_ops.h"
/* ML model macros */
#define MVTVM_ML_MODEL_MEMZONE_NAME "ml_mvtvm_model_mz"
@@ -59,6 +60,7 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *
char str[RTE_MEMZONE_NAMESIZE];
const struct plt_memzone *mz;
size_t model_object_size = 0;
+ size_t model_xstats_size = 0;
uint16_t nb_mrvl_layers;
uint16_t nb_llvm_layers;
uint8_t layer_id = 0;
@@ -74,7 +76,11 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *
model_object_size = RTE_ALIGN_CEIL(object[0].size, RTE_CACHE_LINE_MIN_SIZE) +
RTE_ALIGN_CEIL(object[1].size, RTE_CACHE_LINE_MIN_SIZE) +
RTE_ALIGN_CEIL(object[2].size, RTE_CACHE_LINE_MIN_SIZE);
- mz_size += model_object_size;
+
+ model_xstats_size =
+ cnxk_mldev->mldev->data->nb_queue_pairs * sizeof(struct mvtvm_ml_model_xstats);
+
+ mz_size += model_object_size + model_xstats_size;
/* Allocate memzone for model object */
snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", MVTVM_ML_MODEL_MEMZONE_NAME, model->model_id);
@@ -187,6 +193,22 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *
/* Set model info */
mvtvm_ml_model_info_set(cnxk_mldev, model);
+ /* Update model xstats name */
+ cnxk_ml_xstats_model_name_update(cnxk_mldev, model->model_id);
+
+ model->mvtvm.burst_xstats = RTE_PTR_ADD(
+ model->mvtvm.object.params.addr,
+ RTE_ALIGN_CEIL(model->mvtvm.object.params.size, RTE_CACHE_LINE_MIN_SIZE));
+
+ for (int qp_id = 0; qp_id < cnxk_mldev->mldev->data->nb_queue_pairs; qp_id++) {
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_latency_tot = 0;
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_latency = 0;
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_latency_min = UINT64_MAX;
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_latency_max = 0;
+ model->mvtvm.burst_xstats[qp_id].tvm_rt_reset_count = 0;
+ model->mvtvm.burst_xstats[qp_id].dequeued_count = 0;
+ }
+
return 0;
error: