@@ -18,6 +18,7 @@ apps = [
'test-pmd',
'test-regex',
'test-sad',
+ 'test-dma-perf',
]
default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API']
new file mode 100644
@@ -0,0 +1,537 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <rte_time.h>
+#include <rte_mbuf.h>
+#include <rte_dmadev.h>
+#include <rte_malloc.h>
+
+#include "eal_private.h"
+
+#include "main.h"
+#include "benchmark.h"
+
+
+#define MAX_DMA_CPL_NB 255
+
+#define CSV_LINE_DMA_FMT "Senario %u,%u,%u,%u,%u,%u,%lu,%.3lf,%lu\n"
+#define CSV_LINE_CPU_FMT "Senario %u,%u,NA,%u,%u,%u,%lu,%.3lf,%lu\n"
+
+struct lcore_params {
+ uint16_t dev_id;
+ uint32_t nr_buf;
+ uint16_t kick_batch;
+ uint32_t buf_size;
+ uint32_t repeat_times;
+ uint16_t mpool_iter_step;
+ struct rte_mbuf **srcs;
+ struct rte_mbuf **dsts;
+ uint8_t senario_id;
+};
+
+struct buf_info {
+ struct rte_mbuf **array;
+ uint32_t nr_buf;
+ uint32_t buf_size;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+ va_list ap;
+ int ret;
+
+ ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+ va_start(ap, format);
+ ret += vfprintf(stderr, format, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+static inline void
+calc_result(struct lcore_params *p, uint64_t cp_cycle_sum, double time_sec,
+ uint32_t repeat_times, uint32_t *memory, uint64_t *ave_cycle,
+ float *bandwidth, uint64_t *ops)
+{
+ *memory = (p->buf_size * p->nr_buf * 2) / (1024 * 1024);
+ *ave_cycle = cp_cycle_sum / (p->repeat_times * p->nr_buf);
+ *bandwidth = p->buf_size * 8 * rte_get_timer_hz() / (*ave_cycle * 1000 * 1000 * 1000.0);
+ *ops = (double)p->nr_buf * repeat_times / time_sec;
+}
+
+static void
+output_result(uint8_t senario_id, uint32_t lcore_id, uint16_t dev_id, uint64_t ave_cycle,
+ uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+ float bandwidth, uint64_t ops, bool is_dma)
+{
+ if (is_dma)
+ printf("lcore %u, DMA %u:\n"
+ "average cycles: %lu,"
+ " buffer size: %u, nr_buf: %u,"
+ " memory: %uMB, frequency: %lu.\n",
+ lcore_id,
+ dev_id,
+ ave_cycle,
+ buf_size,
+ nr_buf,
+ memory,
+ rte_get_timer_hz());
+ else
+ printf("lcore %u\n"
+ "average cycles: %lu,"
+ " buffer size: %u, nr_buf: %u,"
+ " memory: %uMB, frequency: %lu.\n",
+ lcore_id,
+ ave_cycle,
+ buf_size,
+ nr_buf,
+ memory,
+ rte_get_timer_hz());
+
+ printf("Average bandwidth: %.3lfGbps, OPS: %lu\n", bandwidth, ops);
+
+ if (is_dma)
+ snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN,
+ CSV_LINE_DMA_FMT,
+ senario_id, lcore_id, dev_id, buf_size,
+ nr_buf, memory, ave_cycle, bandwidth, ops);
+ else
+ snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN,
+ CSV_LINE_CPU_FMT,
+ senario_id, lcore_id, buf_size,
+ nr_buf, memory, ave_cycle, bandwidth, ops);
+}
+
+static inline void
+cache_flush_buf(void *arg)
+{
+ char *data;
+ char *addr;
+ struct buf_info *info = arg;
+ struct rte_mbuf **srcs = info->array;
+
+ for (uint32_t i = 0; i < info->nr_buf; i++) {
+ data = rte_pktmbuf_mtod(srcs[i], char *);
+ for (uint32_t k = 0; k < info->buf_size / 64; k++) {
+ addr = (k * 64 + data);
+ __builtin_ia32_clflush(addr);
+ }
+ }
+}
+
+/* Configuration of device. */
+static void
+configure_dmadev_queue(uint32_t dev_id, uint32_t ring_size)
+{
+ uint16_t vchan = 0;
+ struct rte_dma_info info;
+ struct rte_dma_conf dev_config = { .nb_vchans = 1 };
+ struct rte_dma_vchan_conf qconf = {
+ .direction = RTE_DMA_DIR_MEM_TO_MEM,
+ .nb_desc = ring_size
+ };
+
+ if (rte_dma_configure(dev_id, &dev_config) != 0)
+ rte_exit(EXIT_FAILURE, "Error with rte_dma_configure()\n");
+
+ if (rte_dma_vchan_setup(dev_id, vchan, &qconf) != 0) {
+ printf("Error with queue configuration\n");
+ rte_panic();
+ }
+
+ rte_dma_info_get(dev_id, &info);
+ if (info.nb_vchans != 1) {
+ printf("Error, no configured queues reported on device id %u\n", dev_id);
+ rte_panic();
+ }
+ if (rte_dma_start(dev_id) != 0)
+ rte_exit(EXIT_FAILURE, "Error with rte_dma_start()\n");
+}
+
+static int
+config_dmadevs(uint32_t nb_workers, uint32_t ring_size)
+{
+ int16_t dev_id = rte_dma_next_dev(0);
+ uint32_t i;
+
+ nb_dmadevs = 0;
+
+ for (i = 0; i < nb_workers; i++) {
+ if (dev_id == -1)
+ goto end;
+
+ dmadev_ids[i] = dev_id;
+ configure_dmadev_queue(dmadev_ids[i], ring_size);
+ dev_id = rte_dma_next_dev(dev_id + 1);
+ ++nb_dmadevs;
+ }
+
+end:
+ if (nb_dmadevs < nb_workers) {
+ printf("Not enough dmadevs (%u) for all workers (%u).\n", nb_dmadevs, nb_workers);
+ return -1;
+ }
+
+ RTE_LOG(INFO, DMA, "Number of used dmadevs: %u.\n", nb_dmadevs);
+
+ return 0;
+}
+
+static inline void
+do_dma_copy(uint16_t dev_id, uint32_t nr_buf, uint16_t kick_batch, uint32_t buf_size,
+ uint16_t mpool_iter_step, struct rte_mbuf **srcs, struct rte_mbuf **dsts)
+{
+ int64_t async_cnt = 0;
+ int nr_cpl = 0;
+ uint32_t index;
+
+ for (uint16_t offset = 0; offset < mpool_iter_step; offset++) {
+ for (uint32_t i = 0; index = i * mpool_iter_step + offset, index < nr_buf; i++) {
+ if (unlikely(rte_dma_copy(dev_id,
+ 0,
+ srcs[index]->buf_iova + srcs[index]->data_off,
+ dsts[index]->buf_iova + dsts[index]->data_off,
+ buf_size,
+ 0) < 0)) {
+ rte_dma_submit(dev_id, 0);
+ while (rte_dma_burst_capacity(dev_id, 0) == 0) {
+ nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB,
+ NULL, NULL);
+ async_cnt -= nr_cpl;
+ }
+ if (rte_dma_copy(dev_id,
+ 0,
+ srcs[index]->buf_iova + srcs[index]->data_off,
+ dsts[index]->buf_iova + dsts[index]->data_off,
+ buf_size,
+ 0) < 0) {
+ printf("enqueue failt again at %u\n", index);
+ printf("space:%d\n", rte_dma_burst_capacity(dev_id, 0));
+ rte_exit(EXIT_FAILURE, "DMA enqueue failed\n");
+ }
+ }
+ async_cnt++;
+
+ /**
+ * When '&' is used to wrap an index, mask must be a power of 2.
+ * That is, kick_batch must be 2^n.
+ */
+ if (unlikely((async_cnt % kick_batch) == 0)) {
+ rte_dma_submit(dev_id, 0);
+ /* add a poll to avoid ring full */
+ nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);
+ async_cnt -= nr_cpl;
+ }
+ }
+
+ rte_dma_submit(dev_id, 0);
+ while (async_cnt > 0) {
+ nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);
+ async_cnt -= nr_cpl;
+ }
+ }
+}
+
+static int
+dma_copy(void *p)
+{
+ uint64_t ops;
+ uint32_t memory;
+ float bandwidth;
+ double time_sec;
+ uint32_t lcore_id = rte_lcore_id();
+ struct lcore_params *params = (struct lcore_params *)p;
+ uint32_t repeat_times = params->repeat_times;
+ uint32_t buf_size = params->buf_size;
+ uint16_t kick_batch = params->kick_batch;
+ uint32_t lcore_nr_buf = params->nr_buf;
+ uint16_t dev_id = params->dev_id;
+ uint16_t mpool_iter_step = params->mpool_iter_step;
+ struct rte_mbuf **srcs = params->srcs;
+ struct rte_mbuf **dsts = params->dsts;
+ uint64_t begin, end, total_cycles = 0, avg_cycles = 0;
+
+ begin = rte_rdtsc();
+
+ for (uint32_t r = 0; r < repeat_times; r++)
+ do_dma_copy(dev_id, lcore_nr_buf, kick_batch, buf_size,
+ mpool_iter_step, srcs, dsts);
+
+ end = rte_rdtsc();
+ total_cycles = end - begin;
+ time_sec = (double)total_cycles / rte_get_timer_hz();
+
+ calc_result(params, total_cycles, time_sec, repeat_times, &memory,
+ &avg_cycles, &bandwidth, &ops);
+ output_result(params->senario_id, lcore_id, dev_id, avg_cycles, buf_size, lcore_nr_buf,
+ memory, bandwidth, ops, true);
+
+ rte_free(p);
+
+ return 0;
+}
+
+static int
+cpu_copy(void *p)
+{
+ uint32_t idx;
+ uint32_t lcore_id;
+ uint32_t memory;
+ uint64_t ops;
+ float bandwidth;
+ double time_sec;
+ struct lcore_params *params = (struct lcore_params *)p;
+ uint32_t repeat_times = params->repeat_times;
+ uint32_t buf_size = params->buf_size;
+ uint32_t lcore_nr_buf = params->nr_buf;
+ uint16_t mpool_iter_step = params->mpool_iter_step;
+ struct rte_mbuf **srcs = params->srcs;
+ struct rte_mbuf **dsts = params->dsts;
+ uint64_t begin, end, total_cycles = 0, avg_cycles = 0;
+ uint32_t k, j, offset;
+
+ begin = rte_rdtsc();
+
+ for (k = 0; k < repeat_times; k++) {
+ /* copy buffer form src to dst */
+ for (offset = 0; offset < mpool_iter_step; offset++) {
+ for (j = 0; idx = j * mpool_iter_step + offset, idx < lcore_nr_buf; j++) {
+ rte_memcpy((void *)((uint64_t)dsts[idx]->buf_addr
+ + dsts[idx]->data_off),
+ (void *)((uint64_t)srcs[idx]->buf_addr
+ + srcs[idx]->data_off),
+ buf_size);
+ }
+ }
+ }
+
+ end = rte_rdtsc();
+ total_cycles = end - begin;
+ time_sec = (double)total_cycles / rte_get_timer_hz();
+
+ lcore_id = rte_lcore_id();
+
+ calc_result(params, total_cycles, time_sec, repeat_times, &memory,
+ &avg_cycles, &bandwidth, &ops);
+ output_result(params->senario_id, lcore_id, 0, avg_cycles, buf_size, lcore_nr_buf,
+ memory, bandwidth, ops, false);
+
+ rte_free(p);
+
+ return 0;
+}
+
+static int
+setup_memory_env(struct test_configure *cfg, struct rte_mbuf ***srcs,
+ struct rte_mbuf ***dsts)
+{
+ uint32_t i;
+ unsigned int buf_size = cfg->buf_size.cur;
+ float mem_size = cfg->mem_size.cur;
+ uint32_t nr_buf = (mem_size * 1024 * 1024) / (buf_size * 2);
+ struct rte_config *rte_cfg;
+
+ rte_cfg = rte_eal_get_configuration();
+ if (cfg->src_numa_node >= rte_cfg->numa_node_count ||
+ cfg->dst_numa_node >= rte_cfg->numa_node_count) {
+ printf("Error: Source or destination numa exceeds the acture numa nodes.\n");
+ return -1;
+ }
+
+ cfg->nr_buf = nr_buf;
+
+ src_pool = rte_pktmbuf_pool_create("Benchmark_DMA_SRC",
+ nr_buf, /* n == num elements */
+ 64, /* cache size */
+ 0, /* priv size */
+ buf_size + RTE_PKTMBUF_HEADROOM,
+ cfg->src_numa_node);
+ if (src_pool == NULL) {
+ PRINT_ERR("Error with source mempool creation.\n");
+ return -1;
+ }
+
+ dst_pool = rte_pktmbuf_pool_create("Benchmark_DMA_DST",
+ nr_buf, /* n == num elements */
+ 64, /* cache size */
+ 0, /* priv size */
+ buf_size + RTE_PKTMBUF_HEADROOM,
+ cfg->dst_numa_node);
+ if (dst_pool == NULL) {
+ PRINT_ERR("Error with destination mempool creation.\n");
+ return -1;
+ }
+
+ *srcs = (struct rte_mbuf **)(malloc(nr_buf * sizeof(struct rte_mbuf *)));
+ if (*srcs == NULL) {
+ printf("Error: srcs malloc failed.\n");
+ return -1;
+ }
+
+ *dsts = (struct rte_mbuf **)(malloc(nr_buf * sizeof(struct rte_mbuf *)));
+ if (*dsts == NULL) {
+ printf("Error: dsts malloc failed.\n");
+ return -1;
+ }
+
+ for (i = 0; i < nr_buf; i++) {
+ (*srcs)[i] = rte_pktmbuf_alloc(src_pool);
+ (*dsts)[i] = rte_pktmbuf_alloc(dst_pool);
+ if ((!(*srcs)[i]) || (!(*dsts)[i])) {
+ printf("src: %p, dst: %p\n", (*srcs)[i], (*dsts)[i]);
+ return -1;
+ }
+
+ (*srcs)[i]->data_len = (*srcs)[i]->pkt_len = buf_size;
+ (*dsts)[i]->data_len = (*dsts)[i]->pkt_len = buf_size;
+ }
+
+ return 0;
+}
+
+void
+dma_copy_benchmark(struct test_configure *cfg)
+{
+ uint32_t i;
+ uint32_t offset;
+ unsigned int lcore_id = 0;
+ struct rte_mbuf **srcs = NULL, **dsts = NULL;
+ unsigned int buf_size = cfg->buf_size.cur;
+ uint16_t kick_batch = cfg->kick_batch.cur;
+ uint16_t mpool_iter_step = cfg->mpool_iter_step;
+ uint32_t nr_buf;
+ uint16_t nb_workers = cfg->nb_workers;
+ uint32_t repeat_times = cfg->repeat_times;
+
+ if (setup_memory_env(cfg, &srcs, &dsts) < 0)
+ goto out;
+
+ if (config_dmadevs(nb_workers, cfg->ring_size.cur) < 0)
+ goto out;
+
+ nr_buf = cfg->nr_buf;
+
+ if (cfg->cache_flush) {
+ struct buf_info info;
+
+ info.array = srcs;
+ info.buf_size = buf_size;
+ info.nr_buf = nr_buf;
+ cache_flush_buf(&info);
+
+ info.array = dsts;
+ cache_flush_buf(&info);
+ __builtin_ia32_mfence();
+ }
+
+ printf("Start testing....\n");
+
+ for (i = 0; i < nb_workers; i++) {
+ lcore_id = rte_get_next_lcore(lcore_id, true, true);
+ offset = nr_buf / nb_workers * i;
+
+ struct lcore_params *p = rte_malloc(NULL, sizeof(*p), 0);
+ if (!p) {
+ printf("lcore parameters malloc failure for lcore %d\n", lcore_id);
+ break;
+ }
+ *p = (struct lcore_params) {
+ dmadev_ids[i],
+ (uint32_t)(nr_buf/nb_workers),
+ kick_batch,
+ buf_size,
+ repeat_times,
+ mpool_iter_step,
+ srcs + offset,
+ dsts + offset,
+ cfg->senario_id
+ };
+
+ rte_eal_remote_launch((lcore_function_t *)dma_copy, p, lcore_id);
+ }
+
+ rte_eal_mp_wait_lcore();
+
+out:
+ /* free env */
+ if (srcs) {
+ for (i = 0; i < cfg->nr_buf; i++)
+ rte_pktmbuf_free(srcs[i]);
+ free(srcs);
+ }
+ if (dsts) {
+ for (i = 0; i < cfg->nr_buf; i++)
+ rte_pktmbuf_free(dsts[i]);
+ free(dsts);
+ }
+
+ if (src_pool)
+ rte_mempool_free(src_pool);
+ if (dst_pool)
+ rte_mempool_free(dst_pool);
+
+ for (i = 0; i < nb_dmadevs; i++) {
+ printf("Stopping dmadev %d\n", dmadev_ids[i]);
+ rte_dma_stop(dmadev_ids[i]);
+ }
+}
+
+void
+cpu_copy_benchmark(struct test_configure *cfg)
+{
+ uint32_t i, offset, nr_buf;
+ uint32_t repeat_times = cfg->repeat_times;
+ uint32_t kick_batch = cfg->kick_batch.cur;
+ uint32_t buf_size = cfg->buf_size.cur;
+ uint16_t nb_workers = cfg->nb_workers;
+ uint16_t mpool_iter_step = cfg->mpool_iter_step;
+ struct rte_mbuf **srcs = NULL, **dsts = NULL;
+ unsigned int lcore_id = 0;
+
+ if (setup_memory_env(cfg, &srcs, &dsts) < 0)
+ goto out;
+
+ nr_buf = cfg->nr_buf;
+
+ for (i = 0; i < nb_workers; i++) {
+ lcore_id = rte_get_next_lcore(lcore_id, rte_lcore_count() > 1 ? 1 : 0, 1);
+ offset = nr_buf / nb_workers * i;
+ struct lcore_params *p = rte_malloc(NULL, sizeof(*p), 0);
+ if (!p) {
+ printf("lcore parameters malloc failure for lcore %d\n", lcore_id);
+ break;
+ }
+ *p = (struct lcore_params) { 0, nr_buf/nb_workers, kick_batch,
+ buf_size, repeat_times, mpool_iter_step,
+ srcs + offset, dsts + offset, cfg->senario_id };
+ rte_eal_remote_launch((lcore_function_t *)cpu_copy, p, lcore_id);
+ }
+
+ rte_eal_mp_wait_lcore();
+
+out:
+ /* free env */
+ if (srcs) {
+ for (i = 0; i < cfg->nr_buf; i++)
+ rte_pktmbuf_free(srcs[i]);
+ free(srcs);
+ }
+ if (dsts) {
+ for (i = 0; i < cfg->nr_buf; i++)
+ rte_pktmbuf_free(dsts[i]);
+ free(dsts);
+ }
+
+ if (src_pool)
+ rte_mempool_free(src_pool);
+ if (dst_pool)
+ rte_mempool_free(dst_pool);
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _BENCHMARK_H_
+#define _BENCHMARK_H_
+
+void dma_copy_benchmark(struct test_configure *cfg);
+
+void cpu_copy_benchmark(struct test_configure *cfg);
+
+#endif /* _BENCHMARK_H_ */
new file mode 100644
@@ -0,0 +1,61 @@
+
+; Supported test types:
+; DMA_COPY|CPU_COPY
+
+; Parameters:
+; "mem_size","buf_size","dma_ring_size","kick_batch".
+; "mem_size" means the size of the memory footprint.
+; "buf_size" means the memory size of a single operation.
+; "dma_ring_size" means the dma ring buffer size.
+; "kick_batch" means dma operation batch size.
+
+; Format: variable=first[,last,increment[,ADD|MUL]]
+; ADD is the default mode.
+
+; src_numa_node is used to control the numa node where the source memory is allocated.
+; dst_numa_node is used to control the numa node where the destination memory is allocated.
+
+; cache_flush is used to control if the cache should be flushed.
+
+; repeat_times is used to control the repeat times of the whole case.
+
+; worker_threads is used to control the threads number of the test app.
+; It should be less than the core number.
+
+; mpool_iter_step is used to control the buffer continuity.
+
+; Bind DMA to lcore:
+; Specify the "lcore_dma" parameter.
+; The number of "lcore_dma" should be greater than or equal to the number of "worker_threads".
+; Otherwise the remaining DMA devices will be automatically allocated to threads that are not
+; specified. If EAL parameters "-l" and "-a" are specified, the "lcore_dma" should be within
+; their range.
+
+[case1]
+type=DMA_COPY
+mem_size=10
+buf_size=64,8192,2,MUL
+dma_ring_size=1024
+kick_batch=32
+src_numa_node=0
+dst_numa_node=0
+cache_flush=0
+repeat_times=10
+worker_threads=1
+mpool_iter_step=1
+lcore_dma=lcore3@0000:00:04.0
+eal_args=--legacy-mem --file-prefix=test
+
+[case2]
+type=CPU_COPY
+mem_size=10
+buf_size=64,8192,2,MUL
+dma_ring_size=1024
+kick_batch=32
+src_numa_node=0
+dst_numa_node=1
+cache_flush=0
+repeat_times=100
+worker_threads=1
+mpool_iter_step=1
+eal_args=--no-pci
new file mode 100644
@@ -0,0 +1,399 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include <rte_eal.h>
+#include <rte_cfgfile.h>
+#include <rte_string_fns.h>
+#include <rte_lcore.h>
+
+#include "main.h"
+#include "benchmark.h"
+
+#define CSV_HDR_FMT "Case %u : %s,lcore,DMA,buffer size,nr_buf,memory(MB),cycle,bandwidth(Gbps),OPS\n"
+
+#define MAX_EAL_PARM_NB 100
+#define MAX_EAL_PARM_LEN 1024
+
+#define DMA_COPY "DMA_COPY"
+#define CPU_COPY "CPU_COPY"
+
+#define MAX_PARAMS_PER_ENTRY 4
+
+enum {
+ TEST_TYPE_NONE = 0,
+ TEST_TYPE_DMA_COPY,
+ TEST_TYPE_CPU_COPY
+};
+
+#define MAX_TEST_CASES 16
+static struct test_configure test_cases[MAX_TEST_CASES];
+
+static FILE *fd;
+
+static void
+output_csv(bool need_blankline)
+{
+ uint32_t i;
+
+ if (need_blankline) {
+ fprintf(fd, "%s", ",,,,,,,,\n");
+ fprintf(fd, "%s", ",,,,,,,,\n");
+ }
+
+ for (i = 0; i < sizeof(output_str) / sizeof(output_str[0]); i++) {
+ if (output_str[i][0]) {
+ fprintf(fd, "%s", output_str[i]);
+ memset(output_str[i], 0, MAX_OUTPUT_STR_LEN);
+ }
+ }
+
+ fflush(fd);
+}
+
+static void
+output_env_info(void)
+{
+ snprintf(output_str[0], MAX_OUTPUT_STR_LEN, "test environment:\n");
+ snprintf(output_str[1], MAX_OUTPUT_STR_LEN, "frequency,%lu\n", rte_get_timer_hz());
+
+ output_csv(true);
+}
+
+static void
+output_header(uint32_t case_id, struct test_configure *case_cfg)
+{
+ snprintf(output_str[0], MAX_OUTPUT_STR_LEN,
+ CSV_HDR_FMT, case_id, case_cfg->test_type_str);
+
+ output_csv(true);
+}
+
+static void
+run_test_case(struct test_configure *case_cfg)
+{
+ switch (case_cfg->test_type) {
+ case TEST_TYPE_DMA_COPY:
+ dma_copy_benchmark(case_cfg);
+ break;
+ case TEST_TYPE_CPU_COPY:
+ cpu_copy_benchmark(case_cfg);
+ break;
+ default:
+ printf("Unknown test type. %s\n", case_cfg->test_type_str);
+ break;
+ }
+}
+
+static void
+run_test(uint32_t case_id, struct test_configure *case_cfg)
+{
+ uint32_t i;
+ uint32_t nb_lcores = rte_lcore_count();
+ struct test_configure_entry *mem_size = &case_cfg->mem_size;
+ struct test_configure_entry *buf_size = &case_cfg->buf_size;
+ struct test_configure_entry *ring_size = &case_cfg->ring_size;
+ struct test_configure_entry *kick_batch = &case_cfg->kick_batch;
+ struct test_configure_entry *var_entry = NULL;
+
+ for (i = 0; i < sizeof(output_str) / sizeof(output_str[0]); i++)
+ memset(output_str[i], 0, MAX_OUTPUT_STR_LEN);
+
+ if (nb_lcores <= case_cfg->nb_workers) {
+ printf("Case %u: Not enough lcores (%u) for all workers (%u).\n",
+ case_id, nb_lcores, case_cfg->nb_workers);
+ return;
+ }
+
+ RTE_LOG(INFO, DMA, "Number of used lcores: %u.\n", nb_lcores);
+
+ if (mem_size->incr != 0)
+ var_entry = mem_size;
+
+ if (buf_size->incr != 0)
+ var_entry = buf_size;
+
+ if (ring_size->incr != 0)
+ var_entry = ring_size;
+
+ if (kick_batch->incr != 0)
+ var_entry = kick_batch;
+
+ case_cfg->senario_id = 0;
+
+ output_header(case_id, case_cfg);
+
+ if (var_entry) {
+ for (var_entry->cur = var_entry->first; var_entry->cur <= var_entry->last;) {
+ case_cfg->senario_id++;
+ printf("\nRunning senario %d\n", case_cfg->senario_id);
+
+ run_test_case(case_cfg);
+ output_csv(false);
+
+ if (var_entry->op == OP_MUL)
+ var_entry->cur *= var_entry->incr;
+ else
+ var_entry->cur += var_entry->incr;
+
+
+ }
+ } else {
+ run_test_case(case_cfg);
+ output_csv(false);
+ }
+}
+
+static int
+parse_entry(const char *value, struct test_configure_entry *entry)
+{
+ char input[255] = {0};
+ char *args[MAX_PARAMS_PER_ENTRY];
+ int args_nr = -1;
+
+ strncpy(input, value, 254);
+ if (*input == '\0')
+ goto out;
+
+ args_nr = rte_strsplit(input, strlen(input), args, MAX_PARAMS_PER_ENTRY, ',');
+ if (args_nr <= 0)
+ goto out;
+
+ entry->cur = entry->first = (uint32_t)atoi(args[0]);
+ entry->last = args_nr > 1 ? (uint32_t)atoi(args[1]) : 0;
+ entry->incr = args_nr > 2 ? (uint32_t)atoi(args[2]) : 0;
+
+ if (args_nr > 3) {
+ if (!strcmp(args[3], "MUL"))
+ entry->op = OP_MUL;
+ else
+ entry->op = OP_ADD;
+ } else
+ entry->op = OP_NONE;
+out:
+ return args_nr;
+}
+
+static void
+load_configs(void)
+{
+ struct rte_cfgfile *cfgfile;
+ int nb_sections, i;
+ struct test_configure *test_case;
+ char **sections_name;
+ const char *section_name, *case_type;
+ const char *mem_size_str, *buf_size_str, *ring_size_str, *kick_batch_str;
+ int args_nr, nb_vp;
+
+ sections_name = malloc(MAX_TEST_CASES * sizeof(char *));
+ for (i = 0; i < MAX_TEST_CASES; i++)
+ sections_name[i] = malloc(CFG_NAME_LEN * sizeof(char *));
+
+ cfgfile = rte_cfgfile_load("./config.ini", 0);
+ if (!cfgfile) {
+ printf("Open configure file error.\n");
+ exit(1);
+ }
+
+ nb_sections = rte_cfgfile_num_sections(cfgfile, NULL, 0);
+ if (nb_sections > MAX_TEST_CASES) {
+ printf("Error: The maximum number of cases is %d.\n", MAX_TEST_CASES);
+ exit(1);
+ }
+ rte_cfgfile_sections(cfgfile, sections_name, MAX_TEST_CASES);
+ for (i = 0; i < nb_sections; i++) {
+ test_case = &test_cases[i];
+ section_name = sections_name[i];
+ case_type = rte_cfgfile_get_entry(cfgfile, section_name, "type");
+ if (!case_type) {
+ printf("Error: No case type in case %d\n.", i + 1);
+ exit(1);
+ }
+ if (!strcmp(case_type, DMA_COPY)) {
+ test_case->test_type = TEST_TYPE_DMA_COPY;
+ test_case->test_type_str = DMA_COPY;
+ } else if (!strcmp(case_type, CPU_COPY)) {
+ test_case->test_type = TEST_TYPE_CPU_COPY;
+ test_case->test_type_str = CPU_COPY;
+ } else {
+ printf("Error: Cannot find case type %s.\n", case_type);
+ exit(1);
+ }
+
+ nb_vp = 0;
+
+ test_case->src_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,
+ section_name, "src_numa_node"));
+ test_case->dst_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,
+ section_name, "dst_numa_node"));
+
+ mem_size_str = rte_cfgfile_get_entry(cfgfile, section_name, "mem_size");
+ args_nr = parse_entry(mem_size_str, &test_case->mem_size);
+ if (args_nr < 0) {
+ printf("parse error\n");
+ break;
+ } else if (args_nr > 1)
+ nb_vp++;
+
+ buf_size_str = rte_cfgfile_get_entry(cfgfile, section_name, "buf_size");
+ args_nr = parse_entry(buf_size_str, &test_case->buf_size);
+ if (args_nr < 0) {
+ printf("parse error\n");
+ break;
+ } else if (args_nr > 1)
+ nb_vp++;
+
+ ring_size_str = rte_cfgfile_get_entry(cfgfile, section_name, "dma_ring_size");
+ args_nr = parse_entry(ring_size_str, &test_case->ring_size);
+ if (args_nr < 0) {
+ printf("parse error\n");
+ break;
+ } else if (args_nr > 1)
+ nb_vp++;
+
+ kick_batch_str = rte_cfgfile_get_entry(cfgfile, section_name, "kick_batch");
+ args_nr = parse_entry(kick_batch_str, &test_case->kick_batch);
+ if (args_nr < 0) {
+ printf("parse error\n");
+ break;
+ } else if (args_nr > 1)
+ nb_vp++;
+
+ if (nb_vp > 2) {
+ printf("%s, variable parameters can only have one.\n", section_name);
+ break;
+ }
+
+ test_case->cache_flush =
+ (int)atoi(rte_cfgfile_get_entry(cfgfile, section_name, "cache_flush"));
+ test_case->repeat_times =
+ (uint32_t)atoi(rte_cfgfile_get_entry(cfgfile,
+ section_name, "repeat_times"));
+ test_case->nb_workers =
+ (uint16_t)atoi(rte_cfgfile_get_entry(cfgfile,
+ section_name, "worker_threads"));
+ test_case->mpool_iter_step =
+ (uint16_t)atoi(rte_cfgfile_get_entry(cfgfile,
+ section_name, "mpool_iter_step"));
+
+ test_case->eal_args = rte_cfgfile_get_entry(cfgfile, section_name, "eal_args");
+ }
+
+ rte_cfgfile_close(cfgfile);
+ for (i = 0; i < MAX_TEST_CASES; i++) {
+ if (sections_name[i] != NULL)
+ free(sections_name[i]);
+ }
+ free(sections_name);
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+append_eal_args(int argc, char **argv, const char *eal_args, char **new_argv)
+{
+ int i;
+ char *tokens[MAX_EAL_PARM_NB];
+ char args[MAX_EAL_PARM_LEN] = {0};
+ int new_argc, token_nb;
+
+ new_argc = argc;
+
+ for (i = 0; i < argc; i++)
+ strcpy(new_argv[i], argv[i]);
+
+ if (eal_args) {
+ strcpy(args, eal_args);
+ token_nb = rte_strsplit(args, strlen(args),
+ tokens, MAX_EAL_PARM_NB, ' ');
+ for (i = 0; i < token_nb; i++)
+ strcpy(new_argv[new_argc++], tokens[i]);
+ }
+
+ return new_argc;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret;
+ uint32_t i, nb_lcores;
+ pid_t child_pid, wpid;
+ int status = 0;
+ char args[MAX_EAL_PARM_NB][MAX_EAL_PARM_LEN] = {0};
+ char *pargs[100];
+ int new_argc;
+
+
+ for (i = 0; i < 100; i++)
+ pargs[i] = args[i];
+
+ load_configs();
+ fd = fopen("./test_result.csv", "w");
+ if (!fd) {
+ printf("Open output CSV file error.\n");
+ return 0;
+ }
+ fclose(fd);
+
+ /* loop each case, run it */
+ for (i = 0; i < MAX_TEST_CASES; i++) {
+ if (test_cases[i].test_type == TEST_TYPE_NONE)
+ break;
+ else {
+ child_pid = fork();
+ if (child_pid < 0) {
+ printf("Fork case %d failed.\n", i + 1);
+ return 1;
+ } else if (child_pid == 0) {
+ printf("\nRunning case %u\n", i + 1);
+
+ if (test_cases[i].eal_args) {
+ new_argc = append_eal_args(argc, argv,
+ test_cases[i].eal_args, pargs);
+
+ ret = rte_eal_init(new_argc, pargs);
+ } else {
+ ret = rte_eal_init(argc, argv);
+ }
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalied EAL arguments\n");
+
+ /* Check lcores. */
+ nb_lcores = rte_lcore_count();
+ if (nb_lcores < 2)
+ rte_exit(EXIT_FAILURE,
+ "There should be at least 2 worker lcores.\n");
+
+ fd = fopen("./test_result.csv", "a");
+ if (!fd) {
+ printf("Open output CSV file error.\n");
+ return 0;
+ }
+
+ if (i == 0)
+ output_env_info();
+ run_test(i + 1, &test_cases[i]);
+
+ /* clean up the EAL */
+ rte_eal_cleanup();
+
+ fclose(fd);
+
+ printf("\nCase %u completed.\n", i + 1);
+
+ return 0;
+ } else
+ while ((wpid = wait(&status)) > 0);
+ }
+
+ }
+
+ printf("Bye...\n");
+ return 0;
+}
\ No newline at end of file
new file mode 100644
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+
+#define MAX_WORKER_NB 128
+#define MAX_OUTPUT_STR_LEN 512
+
+#define RTE_LOGTYPE_DMA RTE_LOGTYPE_USER1
+
+typedef enum {
+ OP_NONE = 0,
+ OP_ADD,
+ OP_MUL
+} alg_op_type;
+
+struct test_configure_entry {
+ uint32_t first;
+ uint32_t last;
+ uint32_t incr;
+ alg_op_type op;
+ uint32_t cur;
+};
+
+struct test_configure {
+ uint8_t test_type;
+ const char *test_type_str;
+ uint16_t src_numa_node;
+ uint16_t dst_numa_node;
+ uint16_t opcode;
+ bool is_dma;
+ struct test_configure_entry mem_size;
+ struct test_configure_entry buf_size;
+ struct test_configure_entry ring_size;
+ struct test_configure_entry kick_batch;
+ uint32_t cache_flush;
+ uint32_t nr_buf;
+ uint32_t repeat_times;
+ uint32_t nb_workers;
+ uint16_t mpool_iter_step;
+ const char *eal_args;
+ uint8_t senario_id;
+};
+
+uint16_t dmadev_ids[MAX_WORKER_NB];
+uint32_t nb_dmadevs;
+char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN];
+
+#endif /* _MAIN_H_ */
new file mode 100644
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2019-2022 Intel Corporation
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+allow_experimental_apis = true
+
+deps += ['dmadev', 'mbuf', 'cfgfile']
+
+sources = files(
+ 'main.c',
+ 'benchmark.c',
+)
\ No newline at end of file