@@ -180,22 +180,6 @@ Runtime Config Options
in the fast path enqueue burst operation.
-**Polling memory location** (default ``ddr``)
-
- ML cnxk driver provides the option to select the memory location to be used
- for polling to check the inference request completion.
- Driver supports using either the DDR address space (``ddr``)
- or ML registers (``register``) as polling locations.
- The parameter ``poll_mem`` is used to specify the poll location.
-
- For example::
-
- -a 0000:00:10.0,poll_mem="register"
-
- With the above configuration, ML cnxk driver is configured to use ML registers
- for polling in fastpath requests.
-
-
Debugging Options
-----------------
@@ -23,7 +23,6 @@
#define CN10K_ML_DEV_CACHE_MODEL_DATA "cache_model_data"
#define CN10K_ML_OCM_ALLOC_MODE "ocm_alloc_mode"
#define CN10K_ML_DEV_HW_QUEUE_LOCK "hw_queue_lock"
-#define CN10K_ML_FW_POLL_MEM "poll_mem"
#define CN10K_ML_OCM_PAGE_SIZE "ocm_page_size"
#define CN10K_ML_FW_PATH_DEFAULT "/lib/firmware/mlip-fw.bin"
@@ -32,7 +31,6 @@
#define CN10K_ML_DEV_CACHE_MODEL_DATA_DEFAULT 1
#define CN10K_ML_OCM_ALLOC_MODE_DEFAULT "lowest"
#define CN10K_ML_DEV_HW_QUEUE_LOCK_DEFAULT 1
-#define CN10K_ML_FW_POLL_MEM_DEFAULT "ddr"
#define CN10K_ML_OCM_PAGE_SIZE_DEFAULT 16384
/* ML firmware macros */
@@ -54,7 +52,6 @@ static const char *const valid_args[] = {CN10K_ML_FW_PATH,
CN10K_ML_DEV_CACHE_MODEL_DATA,
CN10K_ML_OCM_ALLOC_MODE,
CN10K_ML_DEV_HW_QUEUE_LOCK,
- CN10K_ML_FW_POLL_MEM,
CN10K_ML_OCM_PAGE_SIZE,
NULL};
@@ -103,9 +100,7 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde
bool hw_queue_lock_set = false;
bool ocm_page_size_set = false;
char *ocm_alloc_mode = NULL;
- bool poll_mem_set = false;
bool fw_path_set = false;
- char *poll_mem = NULL;
char *fw_path = NULL;
int ret = 0;
bool found;
@@ -189,17 +184,6 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde
hw_queue_lock_set = true;
}
- if (rte_kvargs_count(kvlist, CN10K_ML_FW_POLL_MEM) == 1) {
- ret = rte_kvargs_process(kvlist, CN10K_ML_FW_POLL_MEM, &parse_string_arg,
- &poll_mem);
- if (ret < 0) {
- plt_err("Error processing arguments, key = %s\n", CN10K_ML_FW_POLL_MEM);
- ret = -EINVAL;
- goto exit;
- }
- poll_mem_set = true;
- }
-
if (rte_kvargs_count(kvlist, CN10K_ML_OCM_PAGE_SIZE) == 1) {
ret = rte_kvargs_process(kvlist, CN10K_ML_OCM_PAGE_SIZE, &parse_integer_arg,
&mldev->ocm_page_size);
@@ -280,18 +264,6 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde
}
plt_info("ML: %s = %d", CN10K_ML_DEV_HW_QUEUE_LOCK, mldev->hw_queue_lock);
- if (!poll_mem_set) {
- mldev->fw.poll_mem = CN10K_ML_FW_POLL_MEM_DEFAULT;
- } else {
- if (!((strcmp(poll_mem, "ddr") == 0) || (strcmp(poll_mem, "register") == 0))) {
- plt_err("Invalid argument, %s = %s\n", CN10K_ML_FW_POLL_MEM, poll_mem);
- ret = -EINVAL;
- goto exit;
- }
- mldev->fw.poll_mem = poll_mem;
- }
- plt_info("ML: %s = %s", CN10K_ML_FW_POLL_MEM, mldev->fw.poll_mem);
-
if (!ocm_page_size_set) {
mldev->ocm_page_size = CN10K_ML_OCM_PAGE_SIZE_DEFAULT;
} else {
@@ -450,10 +422,7 @@ cn10k_ml_fw_flags_get(struct cn10k_ml_fw *fw)
if (fw->report_dpe_warnings)
flags = flags | FW_REPORT_DPE_WARNING_BITMASK;
- if (strcmp(fw->poll_mem, "ddr") == 0)
- flags = flags | FW_USE_DDR_POLL_ADDR_FP;
- else if (strcmp(fw->poll_mem, "register") == 0)
- flags = flags & ~FW_USE_DDR_POLL_ADDR_FP;
+ flags = flags | FW_USE_DDR_POLL_ADDR_FP;
return flags;
}
@@ -863,5 +832,4 @@ RTE_PMD_REGISTER_PARAM_STRING(MLDEV_NAME_CN10K_PMD, CN10K_ML_FW_PATH
"=<0|1>" CN10K_ML_DEV_CACHE_MODEL_DATA
"=<0|1>" CN10K_ML_OCM_ALLOC_MODE
"=<lowest|largest>" CN10K_ML_DEV_HW_QUEUE_LOCK
- "=<0|1>" CN10K_ML_FW_POLL_MEM "=<ddr|register>" CN10K_ML_OCM_PAGE_SIZE
- "=<1024|2048|4096|8192|16384>");
+ "=<0|1>" CN10K_ML_OCM_PAGE_SIZE "=<1024|2048|4096|8192|16384>");
@@ -390,9 +390,6 @@ struct cn10k_ml_fw {
/* Report DPE warnings */
int report_dpe_warnings;
- /* Memory to be used for polling in fast-path requests */
- const char *poll_mem;
-
/* Data buffer */
uint8_t *data;
@@ -525,13 +522,9 @@ struct cn10k_ml_dev {
bool (*ml_jcmdq_enqueue)(struct roc_ml *roc_ml, struct ml_job_cmd_s *job_cmd);
/* Poll handling function pointers */
- void (*set_poll_addr)(struct cn10k_ml_qp *qp, struct cn10k_ml_req *req, uint64_t idx);
- void (*set_poll_ptr)(struct roc_ml *roc_ml, struct cn10k_ml_req *req);
- uint64_t (*get_poll_ptr)(struct roc_ml *roc_ml, struct cn10k_ml_req *req);
-
- /* Memory barrier function pointers to handle synchronization */
- void (*set_enq_barrier)(void);
- void (*set_deq_barrier)(void);
+ void (*set_poll_addr)(struct cn10k_ml_req *req);
+ void (*set_poll_ptr)(struct cn10k_ml_req *req);
+ uint64_t (*get_poll_ptr)(struct cn10k_ml_req *req);
};
uint64_t cn10k_ml_fw_flags_get(struct cn10k_ml_fw *fw);
@@ -23,11 +23,6 @@
#define ML_FLAGS_POLL_COMPL BIT(0)
#define ML_FLAGS_SSO_COMPL BIT(1)
-/* Scratch register range for poll mode requests */
-#define ML_POLL_REGISTER_SYNC 1023
-#define ML_POLL_REGISTER_START 1024
-#define ML_POLL_REGISTER_END 2047
-
/* Error message length */
#define ERRMSG_LEN 32
@@ -82,79 +77,23 @@ print_line(FILE *fp, int len)
}
static inline void
-cn10k_ml_set_poll_addr_ddr(struct cn10k_ml_qp *qp, struct cn10k_ml_req *req, uint64_t idx)
+cn10k_ml_set_poll_addr(struct cn10k_ml_req *req)
{
- PLT_SET_USED(qp);
- PLT_SET_USED(idx);
-
req->compl_W1 = PLT_U64_CAST(&req->status);
}
static inline void
-cn10k_ml_set_poll_addr_reg(struct cn10k_ml_qp *qp, struct cn10k_ml_req *req, uint64_t idx)
-{
- req->compl_W1 = ML_SCRATCH(qp->block_start + idx % qp->block_size);
-}
-
-static inline void
-cn10k_ml_set_poll_ptr_ddr(struct roc_ml *roc_ml, struct cn10k_ml_req *req)
+cn10k_ml_set_poll_ptr(struct cn10k_ml_req *req)
{
- PLT_SET_USED(roc_ml);
-
plt_write64(ML_CN10K_POLL_JOB_START, req->compl_W1);
}
-static inline void
-cn10k_ml_set_poll_ptr_reg(struct roc_ml *roc_ml, struct cn10k_ml_req *req)
-{
- roc_ml_reg_write64(roc_ml, ML_CN10K_POLL_JOB_START, req->compl_W1);
-}
-
static inline uint64_t
-cn10k_ml_get_poll_ptr_ddr(struct roc_ml *roc_ml, struct cn10k_ml_req *req)
+cn10k_ml_get_poll_ptr(struct cn10k_ml_req *req)
{
- PLT_SET_USED(roc_ml);
-
return plt_read64(req->compl_W1);
}
-static inline uint64_t
-cn10k_ml_get_poll_ptr_reg(struct roc_ml *roc_ml, struct cn10k_ml_req *req)
-{
- return roc_ml_reg_read64(roc_ml, req->compl_W1);
-}
-
-static inline void
-cn10k_ml_set_sync_addr(struct cn10k_ml_dev *mldev, struct cn10k_ml_req *req)
-{
- if (strcmp(mldev->fw.poll_mem, "ddr") == 0)
- req->compl_W1 = PLT_U64_CAST(&req->status);
- else if (strcmp(mldev->fw.poll_mem, "register") == 0)
- req->compl_W1 = ML_SCRATCH(ML_POLL_REGISTER_SYNC);
-}
-
-static inline void
-cn10k_ml_enq_barrier_ddr(void)
-{
-}
-
-static inline void
-cn10k_ml_deq_barrier_ddr(void)
-{
-}
-
-static inline void
-cn10k_ml_enq_barrier_register(void)
-{
- dmb_st;
-}
-
-static inline void
-cn10k_ml_deq_barrier_register(void)
-{
- dsb_st;
-}
-
static void
qp_memzone_name_get(char *name, int size, int dev_id, int qp_id)
{
@@ -242,9 +181,6 @@ cn10k_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_des
qp->stats.dequeued_count = 0;
qp->stats.enqueue_err_count = 0;
qp->stats.dequeue_err_count = 0;
- qp->block_size =
- (ML_POLL_REGISTER_END - ML_POLL_REGISTER_START + 1) / dev->data->nb_queue_pairs;
- qp->block_start = ML_POLL_REGISTER_START + qp_id * qp->block_size;
/* Initialize job command */
for (i = 0; i < qp->nb_desc; i++) {
@@ -933,11 +869,7 @@ cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info)
else
dev_info->max_queue_pairs = ML_CN10K_MAX_QP_PER_DEVICE_LF;
- if (strcmp(mldev->fw.poll_mem, "register") == 0)
- dev_info->max_desc = ML_CN10K_MAX_DESC_PER_QP / dev_info->max_queue_pairs;
- else if (strcmp(mldev->fw.poll_mem, "ddr") == 0)
- dev_info->max_desc = ML_CN10K_MAX_DESC_PER_QP;
-
+ dev_info->max_desc = ML_CN10K_MAX_DESC_PER_QP;
dev_info->max_io = ML_CN10K_MAX_INPUT_OUTPUT;
dev_info->max_segments = ML_CN10K_MAX_SEGMENTS;
dev_info->align_size = ML_CN10K_ALIGN_SIZE;
@@ -1118,24 +1050,9 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_lf;
/* Set polling function pointers */
- if (strcmp(mldev->fw.poll_mem, "ddr") == 0) {
- mldev->set_poll_addr = cn10k_ml_set_poll_addr_ddr;
- mldev->set_poll_ptr = cn10k_ml_set_poll_ptr_ddr;
- mldev->get_poll_ptr = cn10k_ml_get_poll_ptr_ddr;
- } else if (strcmp(mldev->fw.poll_mem, "register") == 0) {
- mldev->set_poll_addr = cn10k_ml_set_poll_addr_reg;
- mldev->set_poll_ptr = cn10k_ml_set_poll_ptr_reg;
- mldev->get_poll_ptr = cn10k_ml_get_poll_ptr_reg;
- }
-
- /* Set barrier function pointers */
- if (strcmp(mldev->fw.poll_mem, "ddr") == 0) {
- mldev->set_enq_barrier = cn10k_ml_enq_barrier_ddr;
- mldev->set_deq_barrier = cn10k_ml_deq_barrier_ddr;
- } else if (strcmp(mldev->fw.poll_mem, "register") == 0) {
- mldev->set_enq_barrier = cn10k_ml_enq_barrier_register;
- mldev->set_deq_barrier = cn10k_ml_deq_barrier_register;
- }
+ mldev->set_poll_addr = cn10k_ml_set_poll_addr;
+ mldev->set_poll_ptr = cn10k_ml_set_poll_ptr;
+ mldev->get_poll_ptr = cn10k_ml_get_poll_ptr;
dev->enqueue_burst = cn10k_ml_enqueue_burst;
dev->dequeue_burst = cn10k_ml_dequeue_burst;
@@ -2390,15 +2307,14 @@ cn10k_ml_enqueue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op
op = ops[count];
req = &queue->reqs[head];
- mldev->set_poll_addr(qp, req, head);
+ mldev->set_poll_addr(req);
cn10k_ml_prep_fp_job_descriptor(dev, req, op);
memset(&req->result, 0, sizeof(struct cn10k_ml_result));
req->result.error_code.s.etype = ML_ETYPE_UNKNOWN;
req->result.user_ptr = op->user_ptr;
- mldev->set_enq_barrier();
- mldev->set_poll_ptr(&mldev->roc, req);
+ mldev->set_poll_ptr(req);
enqueued = mldev->ml_jcmdq_enqueue(&mldev->roc, &req->jcmd);
if (unlikely(!enqueued))
goto jcmdq_full;
@@ -2445,7 +2361,7 @@ cn10k_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op
dequeue_req:
req = &queue->reqs[tail];
- status = mldev->get_poll_ptr(&mldev->roc, req);
+ status = mldev->get_poll_ptr(req);
if (unlikely(status != ML_CN10K_POLL_JOB_FINISH)) {
if (plt_tsc_cycles() < req->timeout)
goto empty_or_active;
@@ -2453,7 +2369,6 @@ cn10k_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op
req->result.error_code.s.etype = ML_ETYPE_DRIVER;
}
- mldev->set_deq_barrier();
cn10k_ml_result_update(dev, qp_id, &req->result, req->op);
ops[count] = req->op;
@@ -2515,14 +2430,14 @@ cn10k_ml_inference_sync(struct rte_ml_dev *dev, struct rte_ml_op *op)
model = dev->data->models[op->model_id];
req = model->req;
- cn10k_ml_set_sync_addr(mldev, req);
+ cn10k_ml_set_poll_addr(req);
cn10k_ml_prep_fp_job_descriptor(dev, req, op);
memset(&req->result, 0, sizeof(struct cn10k_ml_result));
req->result.error_code.s.etype = ML_ETYPE_UNKNOWN;
req->result.user_ptr = op->user_ptr;
- mldev->set_poll_ptr(&mldev->roc, req);
+ mldev->set_poll_ptr(req);
req->jcmd.w1.s.jobptr = PLT_U64_CAST(&req->jd);
timeout = true;
@@ -2542,7 +2457,7 @@ cn10k_ml_inference_sync(struct rte_ml_dev *dev, struct rte_ml_op *op)
timeout = true;
do {
- if (mldev->get_poll_ptr(&mldev->roc, req) == ML_CN10K_POLL_JOB_FINISH) {
+ if (mldev->get_poll_ptr(req) == ML_CN10K_POLL_JOB_FINISH) {
timeout = false;
break;
}
@@ -67,12 +67,6 @@ struct cn10k_ml_qp {
/* Statistics per queue-pair */
struct rte_ml_dev_stats stats;
-
- /* Register block start for polling */
- uint32_t block_start;
-
- /* Register block end for polling */
- uint32_t block_size;
};
/* Device ops */