@@ -54,6 +54,7 @@ struct lcore_params {
struct rte_mbuf **srcs;
struct rte_mbuf **dsts;
struct sge_info sge;
+ struct rte_dma_op **dma_ops;
volatile struct worker_info worker_info;
};
@@ -198,6 +199,16 @@ configure_dmadev_queue(uint32_t dev_id, struct test_configure *cfg, uint8_t sges
if (vchan_data_populate(dev_id, &qconf, cfg, dev_num) != 0)
rte_exit(EXIT_FAILURE, "Error with vchan data populate.\n");
+ if (rte_dma_info_get(dev_id, &info) != 0)
+ rte_exit(EXIT_FAILURE, "Error with getting device info.\n");
+
+ if (cfg->use_ops && !(info.dev_capa & RTE_DMA_CAPA_OPS_ENQ_DEQ))
+ rte_exit(EXIT_FAILURE, "Error with device %s not support enq_deq ops.\n",
+ info.dev_name);
+
+ if (cfg->use_ops)
+ dev_config.flags = RTE_DMA_CFG_FLAG_ENQ_DEQ;
+
if (rte_dma_configure(dev_id, &dev_config) != 0)
rte_exit(EXIT_FAILURE, "Error with dma configure.\n");
@@ -395,6 +406,61 @@ do_dma_sg_mem_copy(void *p)
return 0;
}
+static inline int
+do_dma_enq_deq_mem_copy(void *p)
+{
+#define DEQ_SZ 64
+ struct lcore_params *para = (struct lcore_params *)p;
+ volatile struct worker_info *worker_info = &(para->worker_info);
+ struct rte_dma_op **dma_ops = para->dma_ops;
+ uint16_t kick_batch = para->kick_batch, sz;
+ uint16_t enq, deq, poll_cnt;
+ uint64_t tenq, tdeq;
+ const uint16_t dev_id = para->dev_id;
+ uint32_t nr_buf = para->nr_buf;
+ struct rte_dma_op *op[DEQ_SZ];
+ uint32_t i;
+
+ worker_info->stop_flag = false;
+ worker_info->ready_flag = true;
+
+ while (!worker_info->start_flag)
+ ;
+
+ if (kick_batch > nr_buf)
+ kick_batch = nr_buf;
+
+ tenq = 0;
+ tdeq = 0;
+ while (1) {
+ for (i = 0; i < nr_buf; i += kick_batch) {
+ sz = RTE_MIN(nr_buf - i, kick_batch);
+ enq = rte_dma_enqueue_ops(dev_id, 0, &dma_ops[i], sz);
+ while (enq < sz) {
+ do {
+ deq = rte_dma_dequeue_ops(dev_id, 0, op, DEQ_SZ);
+ tdeq += deq;
+ } while (deq);
+ enq += rte_dma_enqueue_ops(dev_id, 0, &dma_ops[i + enq], sz - enq);
+ if (worker_info->stop_flag)
+ break;
+ }
+ tenq += enq;
+
+ worker_info->total_cpl += enq;
+ }
+
+ if (worker_info->stop_flag)
+ break;
+ }
+
+ poll_cnt = 0;
+ while ((tenq != tdeq) && (poll_cnt++ < POLL_MAX))
+ tdeq += rte_dma_dequeue_ops(dev_id, 0, op, DEQ_SZ);
+
+ return 0;
+}
+
static inline int
do_cpu_mem_copy(void *p)
{
@@ -436,16 +502,17 @@ dummy_free_ext_buf(void *addr, void *opaque)
}
static int
-setup_memory_env(struct test_configure *cfg,
- struct rte_mbuf ***srcs, struct rte_mbuf ***dsts,
- struct rte_dma_sge **src_sges, struct rte_dma_sge **dst_sges)
+setup_memory_env(struct test_configure *cfg, struct rte_mbuf ***srcs, struct rte_mbuf ***dsts,
+ struct rte_dma_sge **src_sges, struct rte_dma_sge **dst_sges,
+ struct rte_dma_op ***dma_ops)
{
unsigned int cur_buf_size = cfg->buf_size.cur;
unsigned int buf_size = cur_buf_size + RTE_PKTMBUF_HEADROOM;
- unsigned int nr_sockets;
+ bool is_src_numa_incorrect, is_dst_numa_incorrect;
uint32_t nr_buf = cfg->nr_buf;
+ unsigned int nr_sockets;
+ uintptr_t ops;
uint32_t i;
- bool is_src_numa_incorrect, is_dst_numa_incorrect;
nr_sockets = rte_socket_count();
is_src_numa_incorrect = (cfg->src_numa_node >= nr_sockets);
@@ -540,6 +607,34 @@ setup_memory_env(struct test_configure *cfg,
if (!((i+1) % nb_dst_sges))
(*dst_sges)[i].length += (cur_buf_size % nb_dst_sges);
}
+
+ if (cfg->use_ops) {
+
+ nr_buf /= RTE_MAX(nb_src_sges, nb_dst_sges);
+ *dma_ops = rte_zmalloc(NULL, nr_buf * (sizeof(struct rte_dma_op *)),
+ RTE_CACHE_LINE_SIZE);
+ if (*dma_ops == NULL) {
+ printf("Error: dma_ops container malloc failed.\n");
+ return -1;
+ }
+
+ ops = (uintptr_t)rte_zmalloc(
+ NULL,
+ nr_buf * (sizeof(struct rte_dma_op) + ((nb_src_sges + nb_dst_sges) *
+ sizeof(struct rte_dma_sge))),
+ RTE_CACHE_LINE_SIZE);
+ if (ops == 0) {
+ printf("Error: dma_ops malloc failed.\n");
+ return -1;
+ }
+
+ for (i = 0; i < nr_buf; i++)
+ (*dma_ops)[i] =
+ (struct rte_dma_op *)(ops +
+ (i * (sizeof(struct rte_dma_op) +
+ ((nb_src_sges + nb_dst_sges) *
+ sizeof(struct rte_dma_sge)))));
+ }
}
return 0;
@@ -582,8 +677,12 @@ get_work_function(struct test_configure *cfg)
if (cfg->is_dma) {
if (!cfg->is_sg)
fn = do_dma_plain_mem_copy;
- else
- fn = do_dma_sg_mem_copy;
+ else {
+ if (cfg->use_ops)
+ fn = do_dma_enq_deq_mem_copy;
+ else
+ fn = do_dma_sg_mem_copy;
+ }
} else {
fn = do_cpu_mem_copy;
}
@@ -680,6 +779,7 @@ mem_copy_benchmark(struct test_configure *cfg)
struct rte_dma_sge *src_sges = NULL, *dst_sges = NULL;
struct vchan_dev_config *vchan_dev = NULL;
struct lcore_dma_map_t *lcore_dma_map = NULL;
+ struct rte_dma_op **dma_ops = NULL;
unsigned int buf_size = cfg->buf_size.cur;
uint16_t kick_batch = cfg->kick_batch.cur;
uint16_t nb_workers = cfg->num_worker;
@@ -690,13 +790,13 @@ mem_copy_benchmark(struct test_configure *cfg)
float mops, mops_total;
float bandwidth, bandwidth_total;
uint32_t nr_sgsrc = 0, nr_sgdst = 0;
- uint32_t nr_buf;
+ uint32_t nr_buf, nr_ops;
int ret = 0;
nr_buf = align_buffer_count(cfg, &nr_sgsrc, &nr_sgdst);
cfg->nr_buf = nr_buf;
- if (setup_memory_env(cfg, &srcs, &dsts, &src_sges, &dst_sges) < 0)
+ if (setup_memory_env(cfg, &srcs, &dsts, &src_sges, &dst_sges, &dma_ops) < 0)
goto out;
if (cfg->is_dma)
@@ -751,6 +851,25 @@ mem_copy_benchmark(struct test_configure *cfg)
goto out;
}
+ if (cfg->is_sg && cfg->use_ops) {
+ nr_ops = nr_buf / RTE_MAX(cfg->nb_src_sges, cfg->nb_dst_sges);
+ lcores[i]->nr_buf = nr_ops / nb_workers;
+ lcores[i]->dma_ops = dma_ops + (nr_ops / nb_workers * i);
+ for (j = 0; j < (nr_ops / nb_workers); j++) {
+ for (k = 0; k < cfg->nb_src_sges; k++)
+ lcores[i]->dma_ops[j]->src_dst_seg[k] =
+ lcores[i]->sge.srcs[(j * cfg->nb_src_sges) + k];
+
+ for (k = 0; k < cfg->nb_dst_sges; k++)
+ lcores[i]->dma_ops[j]->src_dst_seg[k + cfg->nb_src_sges] =
+ lcores[i]->sge.dsts[(j * cfg->nb_dst_sges) + k];
+
+ lcores[i]->dma_ops[j]->nb_src = cfg->nb_src_sges;
+ lcores[i]->dma_ops[j]->nb_dst = cfg->nb_dst_sges;
+ lcores[i]->dma_ops[j]->vchan = 0;
+ }
+ }
+
rte_eal_remote_launch(get_work_function(cfg), (void *)(lcores[i]), lcore_id);
}
@@ -52,6 +52,8 @@
;
; For DMA scatter-gather memory copy, the parameters need to be configured
; and they are valid only when type is DMA_MEM_COPY.
+;
+; To use Enqueue Dequeue operations, set ``use_enq_deq_ops=1`` in the configuration.
; To specify a configuration file, use the "--config" flag followed by the path to the file.
@@ -88,6 +90,7 @@ test_seconds=2
lcore_dma0=lcore=10,dev=0000:00:04.1,dir=mem2mem
lcore_dma1=lcore=11,dev=0000:00:04.2,dir=mem2mem
eal_args=--in-memory --file-prefix=test
+use_enq_deq_ops=0
[case3]
skip=1
@@ -297,8 +297,8 @@ load_configs(const char *path)
char section_name[CFG_NAME_LEN];
const char *case_type;
const char *lcore_dma;
- const char *mem_size_str, *buf_size_str, *ring_size_str, *kick_batch_str,
- *src_sges_str, *dst_sges_str;
+ const char *mem_size_str, *buf_size_str, *ring_size_str, *kick_batch_str, *src_sges_str,
+ *dst_sges_str, *use_dma_ops;
const char *skip;
struct rte_kvargs *kvlist;
int args_nr, nb_vp;
@@ -349,6 +349,15 @@ load_configs(const char *path)
continue;
}
+ if (is_dma) {
+ use_dma_ops =
+ rte_cfgfile_get_entry(cfgfile, section_name, "use_enq_deq_ops");
+ if (use_dma_ops != NULL && (atoi(use_dma_ops) == 1))
+ test_case->use_ops = true;
+ else
+ test_case->use_ops = false;
+ }
+
test_case->is_dma = is_dma;
test_case->src_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,
section_name, "src_numa_node"));
@@ -58,6 +58,7 @@ struct test_configure {
uint16_t opcode;
bool is_dma;
bool is_sg;
+ bool use_ops;
struct lcore_dma_config dma_config[MAX_WORKER_NB];
struct test_configure_entry mem_size;
struct test_configure_entry buf_size;
@@ -69,6 +69,7 @@ along with the application to demonstrate all the parameters.
lcore_dma1=lcore=11,dev=0000:00:04.2,dir=dev2mem,raddr=0x200000000,coreid=1,pfid=2,vfid=3
lcore_dma2=lcore=12,dev=0000:00:04.3,dir=mem2dev,raddr=0x200000000,coreid=1,pfid=2,vfid=3
eal_args=--in-memory --file-prefix=test
+ use_enq_deq_ops=0
The configuration file is divided into multiple sections, each section represents a test case.
The four mandatory variables ``mem_size``, ``buf_size``, ``dma_ring_size``, and ``kick_batch``
@@ -83,6 +84,7 @@ The variables for mem2dev and dev2mem copy are
and can vary for each device.
For scatter-gather copy test ``dma_src_sge``, ``dma_dst_sge`` must be configured.
+Enqueue and dequeue operations can be enabled by setting ``use_enq_deq_ops=1``.
Each case can only have one variable change,
and each change will generate a scenario, so each case can have multiple scenarios.
@@ -170,6 +172,9 @@ Configuration Parameters
``eal_args``
Specifies the EAL arguments.
+ ``use_enq_deq_ops``
+ Specifies whether to use enqueue/dequeue operations.
+ ``0`` indicates to not use and ``1`` to use.
Running the Application
-----------------------