@@ -52,38 +52,27 @@ struct cnxk_ml_dev;
struct cnxk_ml_req;
struct cnxk_ml_qp;
-/* Error types enumeration */
-enum cn10k_ml_error_etype {
- /* 0x0 */ ML_ETYPE_NO_ERROR = 0, /* No error */
- /* 0x1 */ ML_ETYPE_FW_NONFATAL, /* Firmware non-fatal error */
- /* 0x2 */ ML_ETYPE_HW_NONFATAL, /* Hardware non-fatal error */
- /* 0x3 */ ML_ETYPE_HW_FATAL, /* Hardware fatal error */
- /* 0x4 */ ML_ETYPE_HW_WARNING, /* Hardware warning */
- /* 0x5 */ ML_ETYPE_DRIVER, /* Driver specific error */
- /* 0x6 */ ML_ETYPE_UNKNOWN, /* Unknown error */
-};
-
/* Firmware non-fatal error sub-type */
enum cn10k_ml_error_stype_fw_nf {
- /* 0x0 */ ML_FW_ERR_NOERR = 0, /* No error */
- /* 0x1 */ ML_FW_ERR_UNLOAD_ID_NOT_FOUND, /* Model ID not found during load */
- /* 0x2 */ ML_FW_ERR_LOAD_LUT_OVERFLOW, /* Lookup table overflow at load */
- /* 0x3 */ ML_FW_ERR_ID_IN_USE, /* Model ID already in use */
- /* 0x4 */ ML_FW_ERR_INVALID_TILEMASK, /* Invalid OCM tilemask */
- /* 0x5 */ ML_FW_ERR_RUN_LUT_OVERFLOW, /* Lookup table overflow at run */
- /* 0x6 */ ML_FW_ERR_RUN_ID_NOT_FOUND, /* Model ID not found during run */
- /* 0x7 */ ML_FW_ERR_COMMAND_NOTSUP, /* Unsupported command */
- /* 0x8 */ ML_FW_ERR_DDR_ADDR_RANGE, /* DDR address out of range */
- /* 0x9 */ ML_FW_ERR_NUM_BATCHES_INVALID, /* Invalid number of batches */
- /* 0xA */ ML_FW_ERR_INSSYNC_TIMEOUT, /* INS sync timeout */
+ /* 0x0 */ ML_CN10K_FW_ERR_NOERR = 0, /* No error */
+ /* 0x1 */ ML_CN10K_FW_ERR_UNLOAD_ID_NOT_FOUND, /* Model ID not found during load */
+ /* 0x2 */ ML_CN10K_FW_ERR_LOAD_LUT_OVERFLOW, /* Lookup table overflow at load */
+ /* 0x3 */ ML_CN10K_FW_ERR_ID_IN_USE, /* Model ID already in use */
+ /* 0x4 */ ML_CN10K_FW_ERR_INVALID_TILEMASK, /* Invalid OCM tilemask */
+ /* 0x5 */ ML_CN10K_FW_ERR_RUN_LUT_OVERFLOW, /* Lookup table overflow at run */
+ /* 0x6 */ ML_CN10K_FW_ERR_RUN_ID_NOT_FOUND, /* Model ID not found during run */
+ /* 0x7 */ ML_CN10K_FW_ERR_COMMAND_NOTSUP, /* Unsupported command */
+ /* 0x8 */ ML_CN10K_FW_ERR_DDR_ADDR_RANGE, /* DDR address out of range */
+ /* 0x9 */ ML_CN10K_FW_ERR_NUM_BATCHES_INVALID, /* Invalid number of batches */
+ /* 0xA */ ML_CN10K_FW_ERR_INSSYNC_TIMEOUT, /* INS sync timeout */
};
/* Driver error sub-type */
enum cn10k_ml_error_stype_driver {
- /* 0x0 */ ML_DRIVER_ERR_NOERR = 0, /* No error */
- /* 0x1 */ ML_DRIVER_ERR_UNKNOWN, /* Unable to determine error sub-type */
- /* 0x2 */ ML_DRIVER_ERR_EXCEPTION, /* Firmware exception */
- /* 0x3 */ ML_DRIVER_ERR_FW_ERROR, /* Unknown firmware error */
+ /* 0x0 */ ML_CN10K_DRIVER_ERR_NOERR = 0, /* No error */
+ /* 0x1 */ ML_CN10K_DRIVER_ERR_UNKNOWN, /* Unable to determine error sub-type */
+ /* 0x2 */ ML_CN10K_DRIVER_ERR_EXCEPTION, /* Firmware exception */
+ /* 0x3 */ ML_CN10K_DRIVER_ERR_FW_ERROR, /* Unknown firmware error */
};
/* Error structure */
@@ -22,47 +22,27 @@
#define ML_FLAGS_POLL_COMPL BIT(0)
#define ML_FLAGS_SSO_COMPL BIT(1)
-/* Error message length */
-#define ERRMSG_LEN 32
-
-/* Error type database */
-static const struct cn10k_ml_etype_db {
- enum cn10k_ml_error_etype etype;
- char name[ERRMSG_LEN];
-} ml_etype_db[] = {
- {ML_ETYPE_NO_ERROR, "NO_ERROR"}, {ML_ETYPE_FW_NONFATAL, "FW_NON_FATAL"},
- {ML_ETYPE_HW_NONFATAL, "HW_NON_FATAL"}, {ML_ETYPE_HW_FATAL, "HW_FATAL"},
- {ML_ETYPE_HW_WARNING, "HW_WARNING"}, {ML_ETYPE_DRIVER, "DRIVER_ERROR"},
- {ML_ETYPE_UNKNOWN, "UNKNOWN_ERROR"},
-};
-
/* Hardware non-fatal error subtype database */
-static const struct cn10k_ml_stype_db_hw_nf {
- enum cn10k_ml_error_stype_fw_nf stype;
- char msg[ERRMSG_LEN];
-} ml_stype_db_hw_nf[] = {
- {ML_FW_ERR_NOERR, "NO ERROR"},
- {ML_FW_ERR_UNLOAD_ID_NOT_FOUND, "UNLOAD MODEL ID NOT FOUND"},
- {ML_FW_ERR_LOAD_LUT_OVERFLOW, "LOAD LUT OVERFLOW"},
- {ML_FW_ERR_ID_IN_USE, "MODEL ID IN USE"},
- {ML_FW_ERR_INVALID_TILEMASK, "INVALID TILEMASK"},
- {ML_FW_ERR_RUN_LUT_OVERFLOW, "RUN LUT OVERFLOW"},
- {ML_FW_ERR_RUN_ID_NOT_FOUND, "RUN MODEL ID NOT FOUND"},
- {ML_FW_ERR_COMMAND_NOTSUP, "COMMAND NOT SUPPORTED"},
- {ML_FW_ERR_DDR_ADDR_RANGE, "DDR ADDRESS OUT OF RANGE"},
- {ML_FW_ERR_NUM_BATCHES_INVALID, "INVALID BATCHES"},
- {ML_FW_ERR_INSSYNC_TIMEOUT, "INSSYNC TIMEOUT"},
+static struct cnxk_ml_error_db ml_stype_db_hw_nf[] = {
+ {ML_CN10K_FW_ERR_NOERR, "NO ERROR"},
+ {ML_CN10K_FW_ERR_UNLOAD_ID_NOT_FOUND, "UNLOAD MODEL ID NOT FOUND"},
+ {ML_CN10K_FW_ERR_LOAD_LUT_OVERFLOW, "LOAD LUT OVERFLOW"},
+ {ML_CN10K_FW_ERR_ID_IN_USE, "MODEL ID IN USE"},
+ {ML_CN10K_FW_ERR_INVALID_TILEMASK, "INVALID TILEMASK"},
+ {ML_CN10K_FW_ERR_RUN_LUT_OVERFLOW, "RUN LUT OVERFLOW"},
+ {ML_CN10K_FW_ERR_RUN_ID_NOT_FOUND, "RUN MODEL ID NOT FOUND"},
+ {ML_CN10K_FW_ERR_COMMAND_NOTSUP, "COMMAND NOT SUPPORTED"},
+ {ML_CN10K_FW_ERR_DDR_ADDR_RANGE, "DDR ADDRESS OUT OF RANGE"},
+ {ML_CN10K_FW_ERR_NUM_BATCHES_INVALID, "INVALID BATCHES"},
+ {ML_CN10K_FW_ERR_INSSYNC_TIMEOUT, "INSSYNC TIMEOUT"},
};
/* Driver error subtype database */
-static const struct cn10k_ml_stype_db_driver {
- enum cn10k_ml_error_stype_driver stype;
- char msg[ERRMSG_LEN];
-} ml_stype_db_driver[] = {
- {ML_DRIVER_ERR_NOERR, "NO ERROR"},
- {ML_DRIVER_ERR_UNKNOWN, "UNKNOWN ERROR"},
- {ML_DRIVER_ERR_EXCEPTION, "FW EXCEPTION"},
- {ML_DRIVER_ERR_FW_ERROR, "UNKNOWN FIRMWARE ERROR"},
+static struct cnxk_ml_error_db ml_stype_db_driver[] = {
+ {ML_CN10K_DRIVER_ERR_NOERR, "NO ERROR"},
+ {ML_CN10K_DRIVER_ERR_UNKNOWN, "UNKNOWN ERROR"},
+ {ML_CN10K_DRIVER_ERR_EXCEPTION, "FW EXCEPTION"},
+ {ML_CN10K_DRIVER_ERR_FW_ERROR, "UNKNOWN FIRMWARE ERROR"},
};
__rte_hot void
@@ -1241,19 +1221,19 @@ cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request)
/* Handle driver error */
error_code = (union cn10k_ml_error_code *)&result->error_code;
- if (error_code->s.etype == ML_ETYPE_DRIVER) {
+ if (error_code->s.etype == ML_CNXK_ETYPE_DRIVER) {
cn10k_mldev = &cnxk_mldev->cn10k_mldev;
/* Check for exception */
if ((roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_EXCEPTION_SP_C0) !=
0) ||
(roc_ml_reg_read64(&cn10k_mldev->roc, ML_SCRATCH_EXCEPTION_SP_C1) != 0))
- error_code->s.stype = ML_DRIVER_ERR_EXCEPTION;
+ error_code->s.stype = ML_CN10K_DRIVER_ERR_EXCEPTION;
else if ((roc_ml_reg_read64(&cn10k_mldev->roc, ML_CORE_INT_LO) != 0) ||
(roc_ml_reg_read64(&cn10k_mldev->roc, ML_CORE_INT_HI) != 0))
- error_code->s.stype = ML_DRIVER_ERR_FW_ERROR;
+ error_code->s.stype = ML_CN10K_DRIVER_ERR_FW_ERROR;
else
- error_code->s.stype = ML_DRIVER_ERR_UNKNOWN;
+ error_code->s.stype = ML_CN10K_DRIVER_ERR_UNKNOWN;
}
op->impl_opaque = result->error_code;
@@ -1294,7 +1274,7 @@ cn10k_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, ui
memset(&req->cn10k_req.result, 0, sizeof(struct cn10k_ml_result));
error_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code;
- error_code->s.etype = ML_ETYPE_UNKNOWN;
+ error_code->s.etype = ML_CNXK_ETYPE_UNKNOWN;
req->cn10k_req.result.user_ptr = op->user_ptr;
cnxk_ml_set_poll_ptr(req);
@@ -1311,30 +1291,29 @@ __rte_hot int
cn10k_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op, struct rte_ml_op_error *error)
{
union cn10k_ml_error_code *error_code;
- char msg[RTE_ML_STR_MAX];
PLT_SET_USED(dev);
error_code = (union cn10k_ml_error_code *)&op->impl_opaque;
- /* Copy error message */
- plt_strlcpy(msg, ml_etype_db[error_code->s.etype].name, sizeof(msg));
-
/* Copy sub error message */
- if (error_code->s.etype == ML_ETYPE_HW_NONFATAL) {
- strcat(msg, " : ");
+ if (error_code->s.etype == ML_CNXK_ETYPE_HW_NONFATAL) {
if (error_code->s.stype < PLT_DIM(ml_stype_db_hw_nf))
- strcat(msg, ml_stype_db_hw_nf[error_code->s.stype].msg);
+ snprintf(error->message, RTE_ML_STR_MAX, "%s : %s",
+ ml_etype_db[error_code->s.etype].str,
+ ml_stype_db_hw_nf[error_code->s.stype].str);
else
- strcat(msg, "UNKNOWN ERROR");
- }
-
- if (error_code->s.etype == ML_ETYPE_DRIVER) {
- strcat(msg, " : ");
- strcat(msg, ml_stype_db_driver[error_code->s.stype].msg);
+ snprintf(error->message, RTE_ML_STR_MAX, "%s : UNKNOWN ERROR",
+ ml_etype_db[error_code->s.etype].str);
+ } else if (error_code->s.etype == ML_CNXK_ETYPE_DRIVER) {
+ snprintf(error->message, RTE_ML_STR_MAX, "%s : %s",
+ ml_etype_db[error_code->s.etype].str,
+ ml_stype_db_driver[error_code->s.stype].str);
+ } else {
+ snprintf(error->message, RTE_ML_STR_MAX, "%s",
+ ml_etype_db[error_code->s.etype].str);
}
- plt_strlcpy(error->message, msg, sizeof(error->message));
error->errcode = error_code->u64;
return 0;
@@ -1372,7 +1351,7 @@ cn10k_ml_inference_sync(void *device, uint16_t index, void *input, void *output,
memset(&req->cn10k_req.result, 0, sizeof(struct cn10k_ml_result));
error_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code;
- error_code->s.etype = ML_ETYPE_UNKNOWN;
+ error_code->s.etype = ML_CNXK_ETYPE_UNKNOWN;
req->cn10k_req.result.user_ptr = NULL;
cnxk_ml_set_poll_ptr(req);
@@ -9,3 +9,11 @@
/* Dummy operations for ML device */
struct rte_ml_dev_ops ml_dev_dummy_ops = {0};
+
+/* Error type database */
+struct cnxk_ml_error_db ml_etype_db[] = {
+ {ML_CNXK_ETYPE_NO_ERROR, "NO_ERROR"}, {ML_CNXK_ETYPE_FW_NONFATAL, "FW_NON_FATAL"},
+ {ML_CNXK_ETYPE_HW_NONFATAL, "HW_NON_FATAL"}, {ML_CNXK_ETYPE_HW_FATAL, "HW_FATAL"},
+ {ML_CNXK_ETYPE_HW_WARNING, "HW_WARNING"}, {ML_CNXK_ETYPE_DRIVER, "DRIVER_ERROR"},
+ {ML_CNXK_ETYPE_UNKNOWN, "UNKNOWN_ERROR"},
+};
@@ -18,6 +18,22 @@
#define ML_CNXK_POLL_JOB_START 0
#define ML_CNXK_POLL_JOB_FINISH 1
+/* Error types enumeration */
+enum cnxk_ml_error_etype {
+ /* 0x0 */ ML_CNXK_ETYPE_NO_ERROR = 0, /* No error */
+ /* 0x1 */ ML_CNXK_ETYPE_FW_NONFATAL, /* Firmware non-fatal error */
+ /* 0x2 */ ML_CNXK_ETYPE_HW_NONFATAL, /* Hardware non-fatal error */
+ /* 0x3 */ ML_CNXK_ETYPE_HW_FATAL, /* Hardware fatal error */
+ /* 0x4 */ ML_CNXK_ETYPE_HW_WARNING, /* Hardware warning */
+ /* 0x5 */ ML_CNXK_ETYPE_DRIVER, /* Driver specific error */
+ /* 0x6 */ ML_CNXK_ETYPE_UNKNOWN, /* Unknown error */
+};
+
+struct cnxk_ml_error_db {
+ uint64_t code;
+ char str[RTE_ML_STR_MAX];
+};
+
/* Device configuration state enum */
enum cnxk_ml_dev_state {
/* Probed and not configured */
@@ -78,4 +94,6 @@ struct cnxk_ml_dev {
struct cnxk_ml_index_map *index_map;
};
+extern struct cnxk_ml_error_db ml_etype_db[];
+
#endif /* _CNXK_ML_DEV_H_ */
@@ -1432,7 +1432,7 @@ cnxk_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op *
if (plt_tsc_cycles() < req->timeout)
goto empty_or_active;
else /* Timeout, set indication of driver error */
- model->set_error_code(req, ML_ETYPE_DRIVER, 0);
+ model->set_error_code(req, ML_CNXK_ETYPE_DRIVER, 0);
}
model->result_update(cnxk_mldev, qp->id, req);