Added device argument "hw_queue_lock" to select the JCMDQ enqueue
ROC function to be used in fast path.
hw_queue_lock:
0: Disable, use lock free version of JCMDQ enqueue ROC function for
job queuing. To avoid race condition in request queuing to
hardware, disabling hw_queue_lock restricts the number of
queue-pairs supported by cnxk driver to 1.
1: Enable, (default) use spin-lock version of JCMDQ enqueue ROC
function for job queuing. Enabling spinlock version would
disable restrictions on the number of queue-pairs that
can be created.
Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
drivers/ml/cnxk/cn10k_ml_dev.c | 31 ++++++++++++++++++++++++++++++-
drivers/ml/cnxk/cn10k_ml_dev.h | 13 +++++++++++--
drivers/ml/cnxk/cn10k_ml_ops.c | 20 +++++++++++++++++---
3 files changed, 58 insertions(+), 6 deletions(-)
@@ -22,12 +22,14 @@
#define CN10K_ML_FW_REPORT_DPE_WARNINGS "report_dpe_warnings"
#define CN10K_ML_DEV_CACHE_MODEL_DATA "cache_model_data"
#define CN10K_ML_OCM_ALLOC_MODE "ocm_alloc_mode"
+#define CN10K_ML_DEV_HW_QUEUE_LOCK "hw_queue_lock"
#define CN10K_ML_FW_PATH_DEFAULT "/lib/firmware/mlip-fw.bin"
#define CN10K_ML_FW_ENABLE_DPE_WARNINGS_DEFAULT 1
#define CN10K_ML_FW_REPORT_DPE_WARNINGS_DEFAULT 0
#define CN10K_ML_DEV_CACHE_MODEL_DATA_DEFAULT 1
#define CN10K_ML_OCM_ALLOC_MODE_DEFAULT "lowest"
+#define CN10K_ML_DEV_HW_QUEUE_LOCK_DEFAULT 1
/* ML firmware macros */
#define FW_MEMZONE_NAME "ml_cn10k_fw_mz"
@@ -46,6 +48,7 @@ static const char *const valid_args[] = {CN10K_ML_FW_PATH,
CN10K_ML_FW_REPORT_DPE_WARNINGS,
CN10K_ML_DEV_CACHE_MODEL_DATA,
CN10K_ML_OCM_ALLOC_MODE,
+ CN10K_ML_DEV_HW_QUEUE_LOCK,
NULL};
/* Dummy operations for ML device */
@@ -87,6 +90,7 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde
bool cache_model_data_set = false;
struct rte_kvargs *kvlist = NULL;
bool ocm_alloc_mode_set = false;
+ bool hw_queue_lock_set = false;
char *ocm_alloc_mode = NULL;
bool fw_path_set = false;
char *fw_path = NULL;
@@ -158,6 +162,18 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde
ocm_alloc_mode_set = true;
}
+ if (rte_kvargs_count(kvlist, CN10K_ML_DEV_HW_QUEUE_LOCK) == 1) {
+ ret = rte_kvargs_process(kvlist, CN10K_ML_DEV_HW_QUEUE_LOCK, &parse_integer_arg,
+ &mldev->hw_queue_lock);
+ if (ret < 0) {
+ plt_err("Error processing arguments, key = %s\n",
+ CN10K_ML_DEV_HW_QUEUE_LOCK);
+ ret = -EINVAL;
+ goto exit;
+ }
+ hw_queue_lock_set = true;
+ }
+
check_args:
if (!fw_path_set)
mldev->fw.path = CN10K_ML_FW_PATH_DEFAULT;
@@ -215,6 +231,18 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde
}
plt_info("ML: %s = %s", CN10K_ML_OCM_ALLOC_MODE, mldev->ocm.alloc_mode);
+ if (!hw_queue_lock_set) {
+ mldev->hw_queue_lock = CN10K_ML_DEV_HW_QUEUE_LOCK_DEFAULT;
+ } else {
+ if ((mldev->hw_queue_lock < 0) || (mldev->hw_queue_lock > 1)) {
+ plt_err("Invalid argument, %s = %d\n", CN10K_ML_DEV_HW_QUEUE_LOCK,
+ mldev->hw_queue_lock);
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+ plt_info("ML: %s = %d", CN10K_ML_DEV_HW_QUEUE_LOCK, mldev->hw_queue_lock);
+
exit:
if (kvlist)
rte_kvargs_free(kvlist);
@@ -756,4 +784,5 @@ RTE_PMD_REGISTER_PARAM_STRING(MLDEV_NAME_CN10K_PMD, CN10K_ML_FW_PATH
"=<path>" CN10K_ML_FW_ENABLE_DPE_WARNINGS
"=<0|1>" CN10K_ML_FW_REPORT_DPE_WARNINGS
"=<0|1>" CN10K_ML_DEV_CACHE_MODEL_DATA
- "=<0|1>" CN10K_ML_OCM_ALLOC_MODE "=<lowest|largest>");
+ "=<0|1>" CN10K_ML_OCM_ALLOC_MODE
+ "=<lowest|largest>" CN10K_ML_DEV_HW_QUEUE_LOCK "=<0|1>");
@@ -21,8 +21,11 @@
/* Maximum number of models per device */
#define ML_CN10K_MAX_MODELS 16
-/* Maximum number of Queue-Pairs per device */
-#define ML_CN10K_MAX_QP_PER_DEVICE 1
+/* Maximum number of Queue-Pairs per device, spinlock version */
+#define ML_CN10K_MAX_QP_PER_DEVICE_SL 16
+
+/* Maximum number of Queue-Pairs per device, lock-free version */
+#define ML_CN10K_MAX_QP_PER_DEVICE_LF 1
/* Maximum number of descriptors per queue-pair */
#define ML_CN10K_MAX_DESC_PER_QP 1024
@@ -384,6 +387,12 @@ struct cn10k_ml_dev {
/* Enable / disable model data caching */
int cache_model_data;
+
+ /* Use spinlock version of ROC enqueue */
+ int hw_queue_lock;
+
+ /* JCMD enqueue function handler */
+ bool (*ml_jcmdq_enqueue)(struct roc_ml *roc_ml, struct ml_job_cmd_s *job_cmd);
};
uint64_t cn10k_ml_fw_flags_get(struct cn10k_ml_fw *fw);
@@ -534,13 +534,21 @@ cn10k_ml_cache_model_data(struct rte_ml_dev *dev, int16_t model_id)
static int
cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)
{
+ struct cn10k_ml_dev *mldev;
+
if (dev_info == NULL)
return -EINVAL;
+ mldev = dev->data->dev_private;
+
memset(dev_info, 0, sizeof(struct rte_ml_dev_info));
dev_info->driver_name = dev->device->driver->name;
dev_info->max_models = ML_CN10K_MAX_MODELS;
- dev_info->max_queue_pairs = ML_CN10K_MAX_QP_PER_DEVICE;
+ if (mldev->hw_queue_lock)
+ dev_info->max_queue_pairs = ML_CN10K_MAX_QP_PER_DEVICE_SL;
+ else
+ dev_info->max_queue_pairs = ML_CN10K_MAX_QP_PER_DEVICE_LF;
+
dev_info->max_desc = ML_CN10K_MAX_DESC_PER_QP;
dev_info->max_segments = ML_CN10K_MAX_SEGMENTS;
dev_info->min_align_size = ML_CN10K_ALIGN_SIZE;
@@ -703,6 +711,12 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
else
mldev->xstats_enabled = false;
+ /* Set JCMDQ enqueue function */
+ if (mldev->hw_queue_lock == 1)
+ mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_sl;
+ else
+ mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_lf;
+
dev->enqueue_burst = cn10k_ml_enqueue_burst;
dev->dequeue_burst = cn10k_ml_dequeue_burst;
dev->op_error_get = cn10k_ml_op_error_get;
@@ -1996,7 +2010,7 @@ cn10k_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op
req->result.user_ptr = op->user_ptr;
plt_write64(ML_CN10K_POLL_JOB_START, &req->status);
- enqueued = roc_ml_jcmdq_enqueue_lf(&mldev->roc, &req->jcmd);
+ enqueued = mldev->ml_jcmdq_enqueue(&mldev->roc, &req->jcmd);
if (unlikely(!enqueued))
goto jcmdq_full;
@@ -2117,7 +2131,7 @@ cn10k_ml_inference_sync(struct rte_ml_dev *dev, struct rte_ml_op *op)
timeout = true;
req->timeout = plt_tsc_cycles() + ML_CN10K_CMD_TIMEOUT * plt_tsc_hz();
do {
- if (roc_ml_jcmdq_enqueue_lf(&mldev->roc, &req->jcmd)) {
+ if (mldev->ml_jcmdq_enqueue(&mldev->roc, &req->jcmd)) {
req->op = op;
timeout = false;
break;