get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/116425/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 116425,
    "url": "http://patches.dpdk.org/api/patches/116425/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20220919113957.52127-1-cheng1.jiang@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20220919113957.52127-1-cheng1.jiang@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20220919113957.52127-1-cheng1.jiang@intel.com",
    "date": "2022-09-19T11:39:57",
    "name": "[RFC,v2] app/dma-perf: introduce dma-perf application",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "bccae63270dccc0e71c8a9fb3e3c5f733a371caa",
    "submitter": {
        "id": 1530,
        "url": "http://patches.dpdk.org/api/people/1530/?format=api",
        "name": "Jiang, Cheng1",
        "email": "Cheng1.jiang@intel.com"
    },
    "delegate": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20220919113957.52127-1-cheng1.jiang@intel.com/mbox/",
    "series": [
        {
            "id": 24709,
            "url": "http://patches.dpdk.org/api/series/24709/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=24709",
            "date": "2022-09-19T11:39:57",
            "name": "[RFC,v2] app/dma-perf: introduce dma-perf application",
            "version": 2,
            "mbox": "http://patches.dpdk.org/series/24709/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/116425/comments/",
    "check": "fail",
    "checks": "http://patches.dpdk.org/api/patches/116425/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 5E2FFA00C3;\n\tMon, 19 Sep 2022 14:15:12 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id EDCD240E0F;\n\tMon, 19 Sep 2022 14:15:11 +0200 (CEST)",
            "from mga18.intel.com (mga18.intel.com [134.134.136.126])\n by mails.dpdk.org (Postfix) with ESMTP id 2FDD940141\n for <dev@dpdk.org>; Mon, 19 Sep 2022 14:15:09 +0200 (CEST)",
            "from fmsmga004.fm.intel.com ([10.253.24.48])\n by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 19 Sep 2022 05:15:07 -0700",
            "from dpdk_jiangcheng.sh.intel.com ([10.67.118.237])\n by fmsmga004.fm.intel.com with ESMTP; 19 Sep 2022 05:15:04 -0700"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple;\n d=intel.com; i=@intel.com; q=dns/txt; s=Intel;\n t=1663589709; x=1695125709;\n h=from:to:cc:subject:date:message-id:in-reply-to:\n references:mime-version:content-transfer-encoding;\n bh=hbQFSc8wYtq9Dz31HEEoLFOwYiSg2ARKnmffseQ5Qsw=;\n b=BuvGAKkrvCtTcFIzumRbzc8aqIryds2ldYlqMBB9dXIowc8AWCsQAK6L\n l/RDDEXUIr8z2xYsN37Oha4U9V1KO19YcVQVDem6zSurCWPjA0dHlMysB\n CziMgDKe1Qfw48+0UguM5g38tY7aIqakDTwJJyltNHFpXvYfSK6gm0SEQ\n 0ZMPpEyNrJFu70yU2WPSOXjjO6CR2bpW4y3wnx4887SMAUona3FWRb9Af\n b5XSbnjtwbFRWqMNmN6oYs/hdA8N0zL8tx6kP/mj9DYErg/m1/+I/ghIm\n ywQsbF2hJCVJyv9jLz7HuWXQWaiDKUfLWKP6MsMdV7RivG6QizE0N9Wqp g==;",
        "X-IronPort-AV": [
            "E=McAfee;i=\"6500,9779,10474\"; a=\"282409326\"",
            "E=Sophos;i=\"5.93,327,1654585200\"; d=\"scan'208\";a=\"282409326\"",
            "E=Sophos;i=\"5.93,327,1654585200\"; d=\"scan'208\";a=\"686954024\""
        ],
        "X-ExtLoop1": "1",
        "From": "Cheng Jiang <cheng1.jiang@intel.com>",
        "To": "thomas@monjalon.net, bruce.richardson@intel.com, mb@smartsharesystems.com",
        "Cc": "dev@dpdk.org, jiayu.hu@intel.com, xuan.ding@intel.com,\n wenwux.ma@intel.com,\n yuanx.wang@intel.com, yvonnex.yang@intel.com, xingguang.he@intel.com,\n Cheng Jiang <cheng1.jiang@intel.com>",
        "Subject": "[RFC v2] app/dma-perf: introduce dma-perf application",
        "Date": "Mon, 19 Sep 2022 11:39:57 +0000",
        "Message-Id": "<20220919113957.52127-1-cheng1.jiang@intel.com>",
        "X-Mailer": "git-send-email 2.35.1",
        "In-Reply-To": "<20220915154758.688-1-cheng1.jiang@intel.com>",
        "References": "<20220915154758.688-1-cheng1.jiang@intel.com>",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain; charset=UTF-8",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "There are many high-performance DMA devices supported in DPDK now, and\nthese DMA devices can also be integrated into other modules of DPDK as\naccelerators, such as Vhost. Before integrating DMA into applications,\ndevelopers need to know the performance of these DMA devices in various\nscenarios and the performance of CPUs in the same scenario, such as\ndifferent buffer lengths. Only in this way can we know the target\nperformance of the application accelerated by using them. This patch\nintroduces a high-performance testing tool, which supports comparing the\nperformance of CPU and DMA in different scenarios automatically with a\npre-set config file. Memory Copy performance test are supported for now.\n\nSigned-off-by: Cheng Jiang <cheng1.jiang@intel.com>\nSigned-off-by: Jiayu Hu <jiayu.hu@intel.com>\nSigned-off-by: Yuan Wang <yuanx.wang@intel.com>\nAcked-by: Morten Brørup <mb@smartsharesystems.com>\n---\nv2: fixed some CI issues.\n\n app/meson.build               |   1 +\n app/test-dma-perf/benchmark.c | 543 ++++++++++++++++++++++++++++++++++\n app/test-dma-perf/benchmark.h |  12 +\n app/test-dma-perf/config.ini  |  61 ++++\n app/test-dma-perf/main.c      | 404 +++++++++++++++++++++++++\n app/test-dma-perf/main.h      |  55 ++++\n app/test-dma-perf/meson.build |  16 +\n 7 files changed, 1092 insertions(+)\n create mode 100644 app/test-dma-perf/benchmark.c\n create mode 100644 app/test-dma-perf/benchmark.h\n create mode 100644 app/test-dma-perf/config.ini\n create mode 100644 app/test-dma-perf/main.c\n create mode 100644 app/test-dma-perf/main.h\n create mode 100644 app/test-dma-perf/meson.build\n\n--\n2.35.1",
    "diff": "diff --git a/app/meson.build b/app/meson.build\nindex 93d8c15032..3826a10a27 100644\n--- a/app/meson.build\n+++ b/app/meson.build\n@@ -18,6 +18,7 @@ apps = [\n         'test-pmd',\n         'test-regex',\n         'test-sad',\n+        'test-dma-perf',\n ]\n\n default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API']\ndiff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c\nnew file mode 100644\nindex 0000000000..8dead4a885\n--- /dev/null\n+++ b/app/test-dma-perf/benchmark.c\n@@ -0,0 +1,543 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2022 Intel Corporation\n+ */\n+\n+#include <inttypes.h>\n+\n+#include <rte_time.h>\n+#include <rte_mbuf.h>\n+#include <rte_dmadev.h>\n+#include <rte_malloc.h>\n+\n+#include \"eal_private.h\"\n+\n+#include \"main.h\"\n+#include \"benchmark.h\"\n+\n+\n+#define MAX_DMA_CPL_NB 255\n+\n+#define CSV_LINE_DMA_FMT \"Scenario %u,%u,%u,%u,%u,%u,%\" PRIu64 \",%.3lf,%\" PRIu64 \"\\n\"\n+#define CSV_LINE_CPU_FMT \"Scenario %u,%u,NA,%u,%u,%u,%\" PRIu64 \",%.3lf,%\" PRIu64 \"\\n\"\n+\n+struct lcore_params {\n+\tuint16_t dev_id;\n+\tuint32_t nr_buf;\n+\tuint16_t kick_batch;\n+\tuint32_t buf_size;\n+\tuint32_t repeat_times;\n+\tuint16_t mpool_iter_step;\n+\tstruct rte_mbuf **srcs;\n+\tstruct rte_mbuf **dsts;\n+\tuint8_t scenario_id;\n+};\n+\n+struct buf_info {\n+\tstruct rte_mbuf **array;\n+\tuint32_t nr_buf;\n+\tuint32_t buf_size;\n+};\n+\n+static struct rte_mempool *src_pool;\n+static struct rte_mempool *dst_pool;\n+\n+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)\n+\n+static inline int\n+__rte_format_printf(3, 4)\n+print_err(const char *func, int lineno, const char *format, ...)\n+{\n+\tva_list ap;\n+\tint ret;\n+\n+\tret = fprintf(stderr, \"In %s:%d - \", func, lineno);\n+\tva_start(ap, format);\n+\tret += vfprintf(stderr, format, ap);\n+\tva_end(ap);\n+\n+\treturn ret;\n+}\n+\n+static inline void\n+calc_result(struct lcore_params *p, uint64_t cp_cycle_sum, double time_sec,\n+\t\t\tuint32_t repeat_times, uint32_t *memory, uint64_t *ave_cycle,\n+\t\t\tfloat *bandwidth, uint64_t *ops)\n+{\n+\t*memory = (p->buf_size * p->nr_buf * 2) / (1024 * 1024);\n+\t*ave_cycle = cp_cycle_sum / (p->repeat_times * p->nr_buf);\n+\t*bandwidth = p->buf_size * 8 * rte_get_timer_hz() / (*ave_cycle * 1000 * 1000 * 1000.0);\n+\t*ops = (double)p->nr_buf * repeat_times / time_sec;\n+}\n+\n+static void\n+output_result(uint8_t scenario_id, uint32_t lcore_id, uint16_t dev_id, uint64_t ave_cycle,\n+\t\t\tuint32_t buf_size, uint32_t nr_buf, uint32_t memory,\n+\t\t\tfloat bandwidth, uint64_t ops, bool is_dma)\n+{\n+\tif (is_dma)\n+\t\tprintf(\"lcore %u, DMA %u:\\n\"\n+\t\t\t\t\"average cycles: %\" PRIu64 \",\"\n+\t\t\t\t\" buffer size: %u, nr_buf: %u,\"\n+\t\t\t\t\" memory: %uMB, frequency: %\" PRIu64 \".\\n\",\n+\t\t\t\tlcore_id,\n+\t\t\t\tdev_id,\n+\t\t\t\tave_cycle,\n+\t\t\t\tbuf_size,\n+\t\t\t\tnr_buf,\n+\t\t\t\tmemory,\n+\t\t\t\trte_get_timer_hz());\n+\telse\n+\t\tprintf(\"lcore %u\\n\"\n+\t\t\t\"average cycles: %\" PRIu64 \",\"\n+\t\t\t\" buffer size: %u, nr_buf: %u,\"\n+\t\t\t\" memory: %uMB, frequency: %\" PRIu64 \".\\n\",\n+\t\t\tlcore_id,\n+\t\t\tave_cycle,\n+\t\t\tbuf_size,\n+\t\t\tnr_buf,\n+\t\t\tmemory,\n+\t\t\trte_get_timer_hz());\n+\n+\tprintf(\"Average bandwidth: %.3lfGbps, OPS: %\" PRIu64 \"\\n\", bandwidth, ops);\n+\n+\tif (is_dma)\n+\t\tsnprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN,\n+\t\t\tCSV_LINE_DMA_FMT,\n+\t\t\tscenario_id, lcore_id, dev_id, buf_size,\n+\t\t\tnr_buf, memory, ave_cycle, bandwidth, ops);\n+\telse\n+\t\tsnprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN,\n+\t\t\tCSV_LINE_CPU_FMT,\n+\t\t\tscenario_id, lcore_id, buf_size,\n+\t\t\tnr_buf, memory, ave_cycle, bandwidth, ops);\n+}\n+\n+static inline void\n+cache_flush_buf(void *arg)\n+{\n+\tchar *data;\n+\tchar *addr;\n+\tstruct buf_info *info = arg;\n+\tstruct rte_mbuf **srcs = info->array;\n+\tuint32_t i, k;\n+\n+\tfor (i = 0; i < info->nr_buf; i++) {\n+\t\tdata = rte_pktmbuf_mtod(srcs[i], char *);\n+\t\tfor (k = 0; k < info->buf_size / 64; k++) {\n+\t\t\taddr = (k * 64 + data);\n+\t\t\t__builtin_ia32_clflush(addr);\n+\t\t}\n+\t}\n+}\n+\n+/* Configuration of device. */\n+static void\n+configure_dmadev_queue(uint32_t dev_id, uint32_t ring_size)\n+{\n+\tuint16_t vchan = 0;\n+\tstruct rte_dma_info info;\n+\tstruct rte_dma_conf dev_config = { .nb_vchans = 1 };\n+\tstruct rte_dma_vchan_conf qconf = {\n+\t\t.direction = RTE_DMA_DIR_MEM_TO_MEM,\n+\t\t.nb_desc = ring_size\n+\t};\n+\n+\tif (rte_dma_configure(dev_id, &dev_config) != 0)\n+\t\trte_exit(EXIT_FAILURE, \"Error with rte_dma_configure()\\n\");\n+\n+\tif (rte_dma_vchan_setup(dev_id, vchan, &qconf) != 0) {\n+\t\tprintf(\"Error with queue configuration\\n\");\n+\t\trte_panic();\n+\t}\n+\n+\trte_dma_info_get(dev_id, &info);\n+\tif (info.nb_vchans != 1) {\n+\t\tprintf(\"Error, no configured queues reported on device id %u\\n\", dev_id);\n+\t\trte_panic();\n+\t}\n+\tif (rte_dma_start(dev_id) != 0)\n+\t\trte_exit(EXIT_FAILURE, \"Error with rte_dma_start()\\n\");\n+}\n+\n+static int\n+config_dmadevs(uint32_t nb_workers, uint32_t ring_size)\n+{\n+\tint16_t dev_id = rte_dma_next_dev(0);\n+\tuint32_t i;\n+\n+\tnb_dmadevs = 0;\n+\n+\tfor (i = 0; i < nb_workers; i++) {\n+\t\tif (dev_id == -1)\n+\t\t\tgoto end;\n+\n+\t\tdmadev_ids[i] = dev_id;\n+\t\tconfigure_dmadev_queue(dmadev_ids[i], ring_size);\n+\t\tdev_id = rte_dma_next_dev(dev_id + 1);\n+\t\t++nb_dmadevs;\n+\t}\n+\n+end:\n+\tif (nb_dmadevs < nb_workers) {\n+\t\tprintf(\"Not enough dmadevs (%u) for all workers (%u).\\n\", nb_dmadevs, nb_workers);\n+\t\treturn -1;\n+\t}\n+\n+\tRTE_LOG(INFO, DMA, \"Number of used dmadevs: %u.\\n\", nb_dmadevs);\n+\n+\treturn 0;\n+}\n+\n+static inline void\n+do_dma_copy(uint16_t dev_id, uint32_t nr_buf, uint16_t kick_batch, uint32_t buf_size,\n+\t\t\tuint16_t mpool_iter_step, struct rte_mbuf **srcs, struct rte_mbuf **dsts)\n+{\n+\tint64_t async_cnt = 0;\n+\tint nr_cpl = 0;\n+\tuint32_t index;\n+\tuint16_t offset;\n+\tuint32_t i;\n+\n+\tfor (offset = 0; offset < mpool_iter_step; offset++) {\n+\t\tfor (i = 0; index = i * mpool_iter_step + offset, index < nr_buf; i++) {\n+\t\t\tif (unlikely(rte_dma_copy(dev_id,\n+\t\t\t\t\t\t0,\n+\t\t\t\t\t\tsrcs[index]->buf_iova + srcs[index]->data_off,\n+\t\t\t\t\t\tdsts[index]->buf_iova + dsts[index]->data_off,\n+\t\t\t\t\t\tbuf_size,\n+\t\t\t\t\t\t0) < 0)) {\n+\t\t\t\trte_dma_submit(dev_id, 0);\n+\t\t\t\twhile (rte_dma_burst_capacity(dev_id, 0) == 0) {\n+\t\t\t\t\tnr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB,\n+\t\t\t\t\t\t\t\tNULL, NULL);\n+\t\t\t\t\tasync_cnt -= nr_cpl;\n+\t\t\t\t}\n+\t\t\t\tif (rte_dma_copy(dev_id,\n+\t\t\t\t\t\t0,\n+\t\t\t\t\t\tsrcs[index]->buf_iova + srcs[index]->data_off,\n+\t\t\t\t\t\tdsts[index]->buf_iova + dsts[index]->data_off,\n+\t\t\t\t\t\tbuf_size,\n+\t\t\t\t\t\t0) < 0) {\n+\t\t\t\t\tprintf(\"enqueue fail again at %u\\n\", index);\n+\t\t\t\t\tprintf(\"space:%d\\n\", rte_dma_burst_capacity(dev_id, 0));\n+\t\t\t\t\trte_exit(EXIT_FAILURE, \"DMA enqueue failed\\n\");\n+\t\t\t\t}\n+\t\t\t}\n+\t\t\tasync_cnt++;\n+\n+\t\t\t/**\n+\t\t\t * When '&' is used to wrap an index, mask must be a power of 2.\n+\t\t\t * That is, kick_batch must be 2^n.\n+\t\t\t */\n+\t\t\tif (unlikely((async_cnt % kick_batch) == 0)) {\n+\t\t\t\trte_dma_submit(dev_id, 0);\n+\t\t\t\t/* add a poll to avoid ring full */\n+\t\t\t\tnr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);\n+\t\t\t\tasync_cnt -= nr_cpl;\n+\t\t\t}\n+\t\t}\n+\n+\t\trte_dma_submit(dev_id, 0);\n+\t\twhile (async_cnt > 0) {\n+\t\t\tnr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);\n+\t\t\tasync_cnt -= nr_cpl;\n+\t\t}\n+\t}\n+}\n+\n+static int\n+dma_copy(void *p)\n+{\n+\tuint64_t ops;\n+\tuint32_t memory;\n+\tfloat bandwidth;\n+\tdouble time_sec;\n+\tuint32_t lcore_id = rte_lcore_id();\n+\tstruct lcore_params *params = (struct lcore_params *)p;\n+\tuint32_t repeat_times = params->repeat_times;\n+\tuint32_t buf_size = params->buf_size;\n+\tuint16_t kick_batch = params->kick_batch;\n+\tuint32_t lcore_nr_buf = params->nr_buf;\n+\tuint16_t dev_id = params->dev_id;\n+\tuint16_t mpool_iter_step = params->mpool_iter_step;\n+\tstruct rte_mbuf **srcs = params->srcs;\n+\tstruct rte_mbuf **dsts = params->dsts;\n+\tuint64_t begin, end, total_cycles = 0, avg_cycles = 0;\n+\tuint32_t r;\n+\n+\tbegin = rte_rdtsc();\n+\n+\tfor (r = 0; r < repeat_times; r++)\n+\t\tdo_dma_copy(dev_id, lcore_nr_buf, kick_batch, buf_size,\n+\t\t\tmpool_iter_step, srcs, dsts);\n+\n+\tend = rte_rdtsc();\n+\ttotal_cycles = end - begin;\n+\ttime_sec = (double)total_cycles / rte_get_timer_hz();\n+\n+\tcalc_result(params, total_cycles, time_sec, repeat_times, &memory,\n+\t\t\t&avg_cycles, &bandwidth, &ops);\n+\toutput_result(params->scenario_id, lcore_id, dev_id, avg_cycles, buf_size, lcore_nr_buf,\n+\t\t\tmemory, bandwidth, ops, true);\n+\n+\trte_free(p);\n+\n+\treturn 0;\n+}\n+\n+static int\n+cpu_copy(void *p)\n+{\n+\tuint32_t idx;\n+\tuint32_t lcore_id;\n+\tuint32_t memory;\n+\tuint64_t ops;\n+\tfloat bandwidth;\n+\tdouble time_sec;\n+\tstruct lcore_params *params = (struct lcore_params *)p;\n+\tuint32_t repeat_times = params->repeat_times;\n+\tuint32_t buf_size = params->buf_size;\n+\tuint32_t lcore_nr_buf = params->nr_buf;\n+\tuint16_t mpool_iter_step = params->mpool_iter_step;\n+\tstruct rte_mbuf **srcs = params->srcs;\n+\tstruct rte_mbuf **dsts = params->dsts;\n+\tuint64_t begin, end, total_cycles = 0, avg_cycles = 0;\n+\tuint32_t k, j, offset;\n+\n+\tbegin = rte_rdtsc();\n+\n+\tfor (k = 0; k < repeat_times; k++) {\n+\t\t/* copy buffer form src to dst */\n+\t\tfor (offset = 0; offset < mpool_iter_step; offset++) {\n+\t\t\tfor (j = 0; idx = j * mpool_iter_step + offset, idx < lcore_nr_buf; j++) {\n+\t\t\t\trte_memcpy((void *)((uint64_t)dsts[idx]->buf_addr\n+\t\t\t\t\t\t\t+ dsts[idx]->data_off),\n+\t\t\t\t\t\t(void *)((uint64_t)srcs[idx]->buf_addr\n+\t\t\t\t\t\t\t+ srcs[idx]->data_off),\n+\t\t\t\t\t\tbuf_size);\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\tend = rte_rdtsc();\n+\ttotal_cycles = end - begin;\n+\ttime_sec = (double)total_cycles / rte_get_timer_hz();\n+\n+\tlcore_id = rte_lcore_id();\n+\n+\tcalc_result(params, total_cycles, time_sec, repeat_times, &memory,\n+\t\t\t&avg_cycles, &bandwidth, &ops);\n+\toutput_result(params->scenario_id, lcore_id, 0, avg_cycles, buf_size, lcore_nr_buf,\n+\t\t\tmemory, bandwidth, ops, false);\n+\n+\trte_free(p);\n+\n+\treturn 0;\n+}\n+\n+static int\n+setup_memory_env(struct test_configure *cfg, struct rte_mbuf ***srcs,\n+\t\t\tstruct rte_mbuf ***dsts)\n+{\n+\tuint32_t i;\n+\tunsigned int buf_size = cfg->buf_size.cur;\n+\tfloat mem_size = cfg->mem_size.cur;\n+\tuint32_t nr_buf = (mem_size * 1024 * 1024) / (buf_size * 2);\n+\tstruct rte_config *rte_cfg;\n+\n+\trte_cfg = rte_eal_get_configuration();\n+\tif (cfg->src_numa_node >= rte_cfg->numa_node_count ||\n+\t\tcfg->dst_numa_node >= rte_cfg->numa_node_count) {\n+\t\tprintf(\"Error: Source or destination numa exceeds the acture numa nodes.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tcfg->nr_buf = nr_buf;\n+\n+\tsrc_pool = rte_pktmbuf_pool_create(\"Benchmark_DMA_SRC\",\n+\t\t\tnr_buf, /* n == num elements */\n+\t\t\t64,  /* cache size */\n+\t\t\t0,   /* priv size */\n+\t\t\tbuf_size + RTE_PKTMBUF_HEADROOM,\n+\t\t\tcfg->src_numa_node);\n+\tif (src_pool == NULL) {\n+\t\tPRINT_ERR(\"Error with source mempool creation.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tdst_pool = rte_pktmbuf_pool_create(\"Benchmark_DMA_DST\",\n+\t\t\tnr_buf, /* n == num elements */\n+\t\t\t64,  /* cache size */\n+\t\t\t0,   /* priv size */\n+\t\t\tbuf_size + RTE_PKTMBUF_HEADROOM,\n+\t\t\tcfg->dst_numa_node);\n+\tif (dst_pool == NULL) {\n+\t\tPRINT_ERR(\"Error with destination mempool creation.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\t*srcs = (struct rte_mbuf **)(malloc(nr_buf * sizeof(struct rte_mbuf *)));\n+\tif (*srcs == NULL) {\n+\t\tprintf(\"Error: srcs malloc failed.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\t*dsts = (struct rte_mbuf **)(malloc(nr_buf * sizeof(struct rte_mbuf *)));\n+\tif (*dsts == NULL) {\n+\t\tprintf(\"Error: dsts malloc failed.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tfor (i = 0; i < nr_buf; i++) {\n+\t\t(*srcs)[i] = rte_pktmbuf_alloc(src_pool);\n+\t\t(*dsts)[i] = rte_pktmbuf_alloc(dst_pool);\n+\t\tif ((!(*srcs)[i]) || (!(*dsts)[i])) {\n+\t\t\tprintf(\"src: %p, dst: %p\\n\", (*srcs)[i], (*dsts)[i]);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\t(*srcs)[i]->data_len = (*srcs)[i]->pkt_len = buf_size;\n+\t\t(*dsts)[i]->data_len = (*dsts)[i]->pkt_len = buf_size;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+void\n+dma_copy_benchmark(struct test_configure *cfg)\n+{\n+\tuint32_t i;\n+\tuint32_t offset;\n+\tunsigned int lcore_id  = 0;\n+\tstruct rte_mbuf **srcs = NULL, **dsts = NULL;\n+\tunsigned int buf_size = cfg->buf_size.cur;\n+\tuint16_t kick_batch = cfg->kick_batch.cur;\n+\tuint16_t mpool_iter_step = cfg->mpool_iter_step;\n+\tuint32_t nr_buf;\n+\tuint16_t nb_workers = cfg->nb_workers;\n+\tuint32_t repeat_times = cfg->repeat_times;\n+\n+\tif (setup_memory_env(cfg, &srcs, &dsts) < 0)\n+\t\tgoto out;\n+\n+\tif (config_dmadevs(nb_workers, cfg->ring_size.cur) < 0)\n+\t\tgoto out;\n+\n+\tnr_buf = cfg->nr_buf;\n+\n+\tif (cfg->cache_flush) {\n+\t\tstruct buf_info info;\n+\n+\t\tinfo.array = srcs;\n+\t\tinfo.buf_size = buf_size;\n+\t\tinfo.nr_buf = nr_buf;\n+\t\tcache_flush_buf(&info);\n+\n+\t\tinfo.array = dsts;\n+\t\tcache_flush_buf(&info);\n+\t\t__builtin_ia32_mfence();\n+\t}\n+\n+\tprintf(\"Start testing....\\n\");\n+\n+\tfor (i = 0; i < nb_workers; i++) {\n+\t\tlcore_id = rte_get_next_lcore(lcore_id, true, true);\n+\t\toffset = nr_buf / nb_workers * i;\n+\n+\t\tstruct lcore_params *p = rte_malloc(NULL, sizeof(*p), 0);\n+\t\tif (!p) {\n+\t\t\tprintf(\"lcore parameters malloc failure for lcore %d\\n\", lcore_id);\n+\t\t\tbreak;\n+\t\t}\n+\t\t*p = (struct lcore_params) {\n+\t\t\tdmadev_ids[i],\n+\t\t\t(uint32_t)(nr_buf/nb_workers),\n+\t\t\tkick_batch,\n+\t\t\tbuf_size,\n+\t\t\trepeat_times,\n+\t\t\tmpool_iter_step,\n+\t\t\tsrcs + offset,\n+\t\t\tdsts + offset,\n+\t\t\tcfg->scenario_id\n+\t\t};\n+\n+\t\trte_eal_remote_launch((lcore_function_t *)dma_copy, p, lcore_id);\n+\t}\n+\n+\trte_eal_mp_wait_lcore();\n+\n+out:\n+\t/* free env */\n+\tif (srcs) {\n+\t\tfor (i = 0; i < cfg->nr_buf; i++)\n+\t\t\trte_pktmbuf_free(srcs[i]);\n+\t\tfree(srcs);\n+\t}\n+\tif (dsts) {\n+\t\tfor (i = 0; i < cfg->nr_buf; i++)\n+\t\t\trte_pktmbuf_free(dsts[i]);\n+\t\tfree(dsts);\n+\t}\n+\n+\tif (src_pool)\n+\t\trte_mempool_free(src_pool);\n+\tif (dst_pool)\n+\t\trte_mempool_free(dst_pool);\n+\n+\tfor (i = 0; i < nb_dmadevs; i++) {\n+\t\tprintf(\"Stopping dmadev %d\\n\", dmadev_ids[i]);\n+\t\trte_dma_stop(dmadev_ids[i]);\n+\t}\n+}\n+\n+void\n+cpu_copy_benchmark(struct test_configure *cfg)\n+{\n+\tuint32_t i, offset, nr_buf;\n+\tuint32_t repeat_times = cfg->repeat_times;\n+\tuint32_t kick_batch = cfg->kick_batch.cur;\n+\tuint32_t buf_size = cfg->buf_size.cur;\n+\tuint16_t nb_workers = cfg->nb_workers;\n+\tuint16_t mpool_iter_step = cfg->mpool_iter_step;\n+\tstruct rte_mbuf **srcs  = NULL, **dsts  = NULL;\n+\tunsigned int lcore_id = 0;\n+\n+\tif (setup_memory_env(cfg, &srcs, &dsts) < 0)\n+\t\tgoto out;\n+\n+\tnr_buf = cfg->nr_buf;\n+\n+\tfor (i = 0; i < nb_workers; i++) {\n+\t\tlcore_id = rte_get_next_lcore(lcore_id, rte_lcore_count() > 1 ? 1 : 0, 1);\n+\t\toffset = nr_buf / nb_workers * i;\n+\t\tstruct lcore_params *p = rte_malloc(NULL, sizeof(*p), 0);\n+\t\tif (!p) {\n+\t\t\tprintf(\"lcore parameters malloc failure for lcore %d\\n\", lcore_id);\n+\t\t\tbreak;\n+\t\t}\n+\t\t*p = (struct lcore_params) { 0, nr_buf/nb_workers, kick_batch,\n+\t\t\t\t\t\tbuf_size, repeat_times, mpool_iter_step,\n+\t\t\t\t\t\tsrcs + offset, dsts + offset, cfg->scenario_id };\n+\t\trte_eal_remote_launch((lcore_function_t *)cpu_copy, p, lcore_id);\n+\t}\n+\n+\trte_eal_mp_wait_lcore();\n+\n+out:\n+\t/* free env */\n+\tif (srcs) {\n+\t\tfor (i = 0; i < cfg->nr_buf; i++)\n+\t\t\trte_pktmbuf_free(srcs[i]);\n+\t\tfree(srcs);\n+\t}\n+\tif (dsts) {\n+\t\tfor (i = 0; i < cfg->nr_buf; i++)\n+\t\t\trte_pktmbuf_free(dsts[i]);\n+\t\tfree(dsts);\n+\t}\n+\n+\tif (src_pool)\n+\t\trte_mempool_free(src_pool);\n+\tif (dst_pool)\n+\t\trte_mempool_free(dst_pool);\n+}\ndiff --git a/app/test-dma-perf/benchmark.h b/app/test-dma-perf/benchmark.h\nnew file mode 100644\nindex 0000000000..2809985dcc\n--- /dev/null\n+++ b/app/test-dma-perf/benchmark.h\n@@ -0,0 +1,12 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2022 Intel Corporation\n+ */\n+\n+#ifndef _BENCHMARK_H_\n+#define _BENCHMARK_H_\n+\n+void dma_copy_benchmark(struct test_configure *cfg);\n+\n+void cpu_copy_benchmark(struct test_configure *cfg);\n+\n+#endif /* _BENCHMARK_H_ */\ndiff --git a/app/test-dma-perf/config.ini b/app/test-dma-perf/config.ini\nnew file mode 100644\nindex 0000000000..e35730b7fe\n--- /dev/null\n+++ b/app/test-dma-perf/config.ini\n@@ -0,0 +1,61 @@\n+\n+; Supported test types:\n+; DMA_COPY|CPU_COPY\n+\n+; Parameters:\n+; \"mem_size\",\"buf_size\",\"dma_ring_size\",\"kick_batch\".\n+; \"mem_size\" means the size of the memory footprint.\n+; \"buf_size\" means the memory size of a single operation.\n+; \"dma_ring_size\" means the dma ring buffer size.\n+; \"kick_batch\" means dma operation batch size.\n+\n+; Format: variable=first[,last,increment[,ADD|MUL]]\n+; ADD is the default mode.\n+\n+; src_numa_node is used to control the numa node where the source memory is allocated.\n+; dst_numa_node is used to control the numa node where the destination memory is allocated.\n+\n+; cache_flush is used to control if the cache should be flushed.\n+\n+; repeat_times is used to control the repeat times of the whole case.\n+\n+; worker_threads is used to control the threads number of the test app.\n+; It should be less than the core number.\n+\n+; mpool_iter_step is used to control the buffer continuity.\n+\n+; Bind DMA to lcore:\n+; Specify the \"lcore_dma\" parameter.\n+; The number of \"lcore_dma\" should be greater than or equal to the number of \"worker_threads\".\n+; Otherwise the remaining DMA devices will be automatically allocated to threads that are not\n+; specified. If EAL parameters \"-l\" and \"-a\" are specified, the \"lcore_dma\" should be within\n+; their range.\n+\n+[case1]\n+type=DMA_COPY\n+mem_size=10\n+buf_size=64,8192,2,MUL\n+dma_ring_size=1024\n+kick_batch=32\n+src_numa_node=0\n+dst_numa_node=0\n+cache_flush=0\n+repeat_times=10\n+worker_threads=1\n+mpool_iter_step=1\n+lcore_dma=lcore3@0000:00:04.0\n+eal_args=--legacy-mem --file-prefix=test\n+\n+[case2]\n+type=CPU_COPY\n+mem_size=10\n+buf_size=64,8192,2,MUL\n+dma_ring_size=1024\n+kick_batch=32\n+src_numa_node=0\n+dst_numa_node=1\n+cache_flush=0\n+repeat_times=100\n+worker_threads=1\n+mpool_iter_step=1\n+eal_args=--no-pci\ndiff --git a/app/test-dma-perf/main.c b/app/test-dma-perf/main.c\nnew file mode 100644\nindex 0000000000..4bf2838a8c\n--- /dev/null\n+++ b/app/test-dma-perf/main.c\n@@ -0,0 +1,404 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2022 Intel Corporation\n+ */\n+\n+#include <getopt.h>\n+#include <signal.h>\n+#include <stdbool.h>\n+#include <unistd.h>\n+#include <sys/wait.h>\n+#include <inttypes.h>\n+\n+#include <rte_eal.h>\n+#include <rte_cfgfile.h>\n+#include <rte_string_fns.h>\n+#include <rte_lcore.h>\n+\n+#include \"main.h\"\n+#include \"benchmark.h\"\n+\n+#define CSV_HDR_FMT \"Case %u : %s,lcore,DMA,buffer size,nr_buf,memory(MB),cycle,bandwidth(Gbps),OPS\\n\"\n+\n+#define MAX_EAL_PARM_NB 100\n+#define MAX_EAL_PARM_LEN 1024\n+\n+#define DMA_COPY \"DMA_COPY\"\n+#define CPU_COPY \"CPU_COPY\"\n+\n+#define MAX_PARAMS_PER_ENTRY 4\n+\n+enum {\n+\tTEST_TYPE_NONE = 0,\n+\tTEST_TYPE_DMA_COPY,\n+\tTEST_TYPE_CPU_COPY\n+};\n+\n+#define MAX_TEST_CASES 16\n+static struct test_configure test_cases[MAX_TEST_CASES];\n+\n+static FILE *fd;\n+\n+static void\n+output_csv(bool need_blankline)\n+{\n+\tuint32_t i;\n+\n+\tif (need_blankline) {\n+\t\tfprintf(fd, \"%s\", \",,,,,,,,\\n\");\n+\t\tfprintf(fd, \"%s\", \",,,,,,,,\\n\");\n+\t}\n+\n+\tfor (i = 0; i < RTE_DIM(output_str); i++) {\n+\t\tif (output_str[i][0]) {\n+\t\t\tfprintf(fd, \"%s\", output_str[i]);\n+\t\t\tmemset(output_str[i], 0, MAX_OUTPUT_STR_LEN);\n+\t\t}\n+\t}\n+\n+\tfflush(fd);\n+}\n+\n+static void\n+output_env_info(void)\n+{\n+\tsnprintf(output_str[0], MAX_OUTPUT_STR_LEN, \"test environment:\\n\");\n+\tsnprintf(output_str[1], MAX_OUTPUT_STR_LEN, \"frequency,%\" PRIu64 \"\\n\", rte_get_timer_hz());\n+\n+\toutput_csv(true);\n+}\n+\n+static void\n+output_header(uint32_t case_id, struct test_configure *case_cfg)\n+{\n+\tsnprintf(output_str[0], MAX_OUTPUT_STR_LEN,\n+\t\t\tCSV_HDR_FMT, case_id, case_cfg->test_type_str);\n+\n+\toutput_csv(true);\n+}\n+\n+static void\n+run_test_case(struct test_configure *case_cfg)\n+{\n+\tswitch (case_cfg->test_type) {\n+\tcase TEST_TYPE_DMA_COPY:\n+\t\tdma_copy_benchmark(case_cfg);\n+\t\tbreak;\n+\tcase TEST_TYPE_CPU_COPY:\n+\t\tcpu_copy_benchmark(case_cfg);\n+\t\tbreak;\n+\tdefault:\n+\t\tprintf(\"Unknown test type. %s\\n\", case_cfg->test_type_str);\n+\t\tbreak;\n+\t}\n+}\n+\n+static void\n+run_test(uint32_t case_id, struct test_configure *case_cfg)\n+{\n+\tuint32_t i;\n+\tuint32_t nb_lcores = rte_lcore_count();\n+\tstruct test_configure_entry *mem_size = &case_cfg->mem_size;\n+\tstruct test_configure_entry *buf_size = &case_cfg->buf_size;\n+\tstruct test_configure_entry *ring_size = &case_cfg->ring_size;\n+\tstruct test_configure_entry *kick_batch = &case_cfg->kick_batch;\n+\tstruct test_configure_entry *var_entry = NULL;\n+\n+\tfor (i = 0; i < RTE_DIM(output_str); i++)\n+\t\tmemset(output_str[i], 0, MAX_OUTPUT_STR_LEN);\n+\n+\tif (nb_lcores <= case_cfg->nb_workers) {\n+\t\tprintf(\"Case %u: Not enough lcores (%u) for all workers (%u).\\n\",\n+\t\t\tcase_id, nb_lcores, case_cfg->nb_workers);\n+\t\treturn;\n+\t}\n+\n+\tRTE_LOG(INFO, DMA, \"Number of used lcores: %u.\\n\", nb_lcores);\n+\n+\tif (mem_size->incr != 0)\n+\t\tvar_entry = mem_size;\n+\n+\tif (buf_size->incr != 0)\n+\t\tvar_entry = buf_size;\n+\n+\tif (ring_size->incr != 0)\n+\t\tvar_entry = ring_size;\n+\n+\tif (kick_batch->incr != 0)\n+\t\tvar_entry = kick_batch;\n+\n+\tcase_cfg->scenario_id = 0;\n+\n+\toutput_header(case_id, case_cfg);\n+\n+\tif (var_entry) {\n+\t\tfor (var_entry->cur = var_entry->first; var_entry->cur <= var_entry->last;) {\n+\t\t\tcase_cfg->scenario_id++;\n+\t\t\tprintf(\"\\nRunning scenario %d\\n\", case_cfg->scenario_id);\n+\n+\t\t\trun_test_case(case_cfg);\n+\t\t\toutput_csv(false);\n+\n+\t\t\tif (var_entry->op == OP_MUL)\n+\t\t\t\tvar_entry->cur *= var_entry->incr;\n+\t\t\telse\n+\t\t\t\tvar_entry->cur += var_entry->incr;\n+\n+\n+\t\t}\n+\t} else {\n+\t\trun_test_case(case_cfg);\n+\t\toutput_csv(false);\n+\t}\n+}\n+\n+static int\n+parse_entry(const char *value, struct test_configure_entry *entry)\n+{\n+\tchar input[255] = {0};\n+\tchar *args[MAX_PARAMS_PER_ENTRY];\n+\tint args_nr = -1;\n+\n+\tstrncpy(input, value, 254);\n+\tif (*input == '\\0')\n+\t\tgoto out;\n+\n+\targs_nr = rte_strsplit(input, strlen(input), args, MAX_PARAMS_PER_ENTRY, ',');\n+\tif (args_nr <= 0)\n+\t\tgoto out;\n+\n+\tentry->cur = entry->first = (uint32_t)atoi(args[0]);\n+\tentry->last = args_nr > 1 ? (uint32_t)atoi(args[1]) : 0;\n+\tentry->incr = args_nr > 2 ? (uint32_t)atoi(args[2]) : 0;\n+\n+\tif (args_nr > 3) {\n+\t\tif (!strcmp(args[3], \"MUL\"))\n+\t\t\tentry->op = OP_MUL;\n+\t\telse\n+\t\t\tentry->op = OP_ADD;\n+\t} else\n+\t\tentry->op = OP_NONE;\n+out:\n+\treturn args_nr;\n+}\n+\n+static void\n+load_configs(void)\n+{\n+\tstruct rte_cfgfile *cfgfile;\n+\tint nb_sections, i;\n+\tstruct test_configure *test_case;\n+\tchar **sections_name;\n+\tconst char *section_name, *case_type;\n+\tconst char *mem_size_str, *buf_size_str, *ring_size_str, *kick_batch_str;\n+\tint args_nr, nb_vp;\n+\n+\tsections_name = malloc(MAX_TEST_CASES * sizeof(char *));\n+\tfor (i = 0; i < MAX_TEST_CASES; i++)\n+\t\tsections_name[i] = malloc(CFG_NAME_LEN * sizeof(char *));\n+\n+\tcfgfile = rte_cfgfile_load(\"./config.ini\", 0);\n+\tif (!cfgfile) {\n+\t\tprintf(\"Open configure file error.\\n\");\n+\t\texit(1);\n+\t}\n+\n+\tnb_sections = rte_cfgfile_num_sections(cfgfile, NULL, 0);\n+\tif (nb_sections > MAX_TEST_CASES) {\n+\t\tprintf(\"Error: The maximum number of cases is %d.\\n\", MAX_TEST_CASES);\n+\t\texit(1);\n+\t}\n+\trte_cfgfile_sections(cfgfile, sections_name, MAX_TEST_CASES);\n+\tfor (i = 0; i < nb_sections; i++) {\n+\t\ttest_case = &test_cases[i];\n+\t\tsection_name = sections_name[i];\n+\t\tcase_type = rte_cfgfile_get_entry(cfgfile, section_name, \"type\");\n+\t\tif (!case_type) {\n+\t\t\tprintf(\"Error: No case type in case %d\\n.\", i + 1);\n+\t\t\texit(1);\n+\t\t}\n+\t\tif (!strcmp(case_type, DMA_COPY)) {\n+\t\t\ttest_case->test_type = TEST_TYPE_DMA_COPY;\n+\t\t\ttest_case->test_type_str = DMA_COPY;\n+\t\t} else if (!strcmp(case_type, CPU_COPY)) {\n+\t\t\ttest_case->test_type = TEST_TYPE_CPU_COPY;\n+\t\t\ttest_case->test_type_str = CPU_COPY;\n+\t\t} else {\n+\t\t\tprintf(\"Error: Cannot find case type %s.\\n\", case_type);\n+\t\t\texit(1);\n+\t\t}\n+\n+\t\tnb_vp = 0;\n+\n+\t\ttest_case->src_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,\n+\t\t\t\t\t\t\t\tsection_name, \"src_numa_node\"));\n+\t\ttest_case->dst_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,\n+\t\t\t\t\t\t\t\tsection_name, \"dst_numa_node\"));\n+\n+\t\tmem_size_str = rte_cfgfile_get_entry(cfgfile, section_name, \"mem_size\");\n+\t\targs_nr = parse_entry(mem_size_str, &test_case->mem_size);\n+\t\tif (args_nr < 0) {\n+\t\t\tprintf(\"parse error\\n\");\n+\t\t\tbreak;\n+\t\t} else if (args_nr > 1)\n+\t\t\tnb_vp++;\n+\n+\t\tbuf_size_str = rte_cfgfile_get_entry(cfgfile, section_name, \"buf_size\");\n+\t\targs_nr = parse_entry(buf_size_str, &test_case->buf_size);\n+\t\tif (args_nr < 0) {\n+\t\t\tprintf(\"parse error\\n\");\n+\t\t\tbreak;\n+\t\t} else if (args_nr > 1)\n+\t\t\tnb_vp++;\n+\n+\t\tring_size_str = rte_cfgfile_get_entry(cfgfile, section_name, \"dma_ring_size\");\n+\t\targs_nr = parse_entry(ring_size_str, &test_case->ring_size);\n+\t\tif (args_nr < 0) {\n+\t\t\tprintf(\"parse error\\n\");\n+\t\t\tbreak;\n+\t\t} else if (args_nr > 1)\n+\t\t\tnb_vp++;\n+\n+\t\tkick_batch_str = rte_cfgfile_get_entry(cfgfile, section_name, \"kick_batch\");\n+\t\targs_nr = parse_entry(kick_batch_str, &test_case->kick_batch);\n+\t\tif (args_nr < 0) {\n+\t\t\tprintf(\"parse error\\n\");\n+\t\t\tbreak;\n+\t\t} else if (args_nr > 1)\n+\t\t\tnb_vp++;\n+\n+\t\tif (nb_vp > 2) {\n+\t\t\tprintf(\"%s, variable parameters can only have one.\\n\", section_name);\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\ttest_case->cache_flush =\n+\t\t\t(int)atoi(rte_cfgfile_get_entry(cfgfile, section_name, \"cache_flush\"));\n+\t\ttest_case->repeat_times =\n+\t\t\t(uint32_t)atoi(rte_cfgfile_get_entry(cfgfile,\n+\t\t\t\t\tsection_name, \"repeat_times\"));\n+\t\ttest_case->nb_workers =\n+\t\t\t(uint16_t)atoi(rte_cfgfile_get_entry(cfgfile,\n+\t\t\t\t\tsection_name, \"worker_threads\"));\n+\t\ttest_case->mpool_iter_step =\n+\t\t\t(uint16_t)atoi(rte_cfgfile_get_entry(cfgfile,\n+\t\t\t\t\tsection_name, \"mpool_iter_step\"));\n+\n+\t\ttest_case->eal_args = rte_cfgfile_get_entry(cfgfile, section_name, \"eal_args\");\n+\t}\n+\n+\trte_cfgfile_close(cfgfile);\n+\tfor (i = 0; i < MAX_TEST_CASES; i++) {\n+\t\tif (sections_name[i] != NULL)\n+\t\t\tfree(sections_name[i]);\n+\t}\n+\tfree(sections_name);\n+}\n+\n+/* Parse the argument given in the command line of the application */\n+static int\n+append_eal_args(int argc, char **argv, const char *eal_args, char **new_argv)\n+{\n+\tint i;\n+\tchar *tokens[MAX_EAL_PARM_NB];\n+\tchar args[MAX_EAL_PARM_LEN] = {0};\n+\tint new_argc, token_nb;\n+\n+\tnew_argc = argc;\n+\n+\tfor (i = 0; i < argc; i++)\n+\t\tstrcpy(new_argv[i], argv[i]);\n+\n+\tif (eal_args) {\n+\t\tstrcpy(args, eal_args);\n+\t\ttoken_nb = rte_strsplit(args, strlen(args),\n+\t\t\t\t\ttokens, MAX_EAL_PARM_NB, ' ');\n+\t\tfor (i = 0; i < token_nb; i++)\n+\t\t\tstrcpy(new_argv[new_argc++], tokens[i]);\n+\t}\n+\n+\treturn new_argc;\n+}\n+\n+int\n+main(int argc, char *argv[])\n+{\n+\tint ret;\n+\tuint32_t i, nb_lcores;\n+\tpid_t child_pid, wpid;\n+\tint status = 0;\n+\tchar args[MAX_EAL_PARM_NB][MAX_EAL_PARM_LEN] = {0};\n+\tchar *pargs[100];\n+\tint new_argc;\n+\n+\n+\tfor (i = 0; i < 100; i++)\n+\t\tpargs[i] = args[i];\n+\n+\tload_configs();\n+\tfd = fopen(\"./test_result.csv\", \"w\");\n+\tif (!fd) {\n+\t\tprintf(\"Open output CSV file error.\\n\");\n+\t\treturn 0;\n+\t}\n+\tfclose(fd);\n+\n+\t/* loop each case, run it */\n+\tfor (i = 0; i < MAX_TEST_CASES; i++) {\n+\t\tif (test_cases[i].test_type != TEST_TYPE_NONE) {\n+\t\t\tchild_pid = fork();\n+\t\t\tif (child_pid < 0) {\n+\t\t\t\tprintf(\"Fork case %d failed.\\n\", i + 1);\n+\t\t\t\texit(EXIT_FAILURE);\n+\t\t\t} else if (child_pid == 0) {\n+\t\t\t\tprintf(\"\\nRunning case %u\\n\", i + 1);\n+\n+\t\t\t\tif (test_cases[i].eal_args) {\n+\t\t\t\t\tnew_argc = append_eal_args(argc, argv,\n+\t\t\t\t\t\ttest_cases[i].eal_args, pargs);\n+\n+\t\t\t\t\tret = rte_eal_init(new_argc, pargs);\n+\t\t\t\t} else {\n+\t\t\t\t\tret = rte_eal_init(argc, argv);\n+\t\t\t\t}\n+\t\t\t\tif (ret < 0)\n+\t\t\t\t\trte_exit(EXIT_FAILURE, \"Invalied EAL arguments\\n\");\n+\n+\t\t\t\t/* Check lcores. */\n+\t\t\t\tnb_lcores = rte_lcore_count();\n+\t\t\t\tif (nb_lcores < 2)\n+\t\t\t\t\trte_exit(EXIT_FAILURE,\n+\t\t\t\t\t\t\"There should be at least 2 worker lcores.\\n\");\n+\n+\t\t\t\tfd = fopen(\"./test_result.csv\", \"a\");\n+\t\t\t\tif (!fd) {\n+\t\t\t\t\tprintf(\"Open output CSV file error.\\n\");\n+\t\t\t\t\treturn 0;\n+\t\t\t\t}\n+\n+\t\t\t\tif (i == 0)\n+\t\t\t\t\toutput_env_info();\n+\t\t\t\trun_test(i + 1, &test_cases[i]);\n+\n+\t\t\t\t/* clean up the EAL */\n+\t\t\t\trte_eal_cleanup();\n+\n+\t\t\t\tfclose(fd);\n+\n+\t\t\t\tprintf(\"\\nCase %u completed.\\n\", i + 1);\n+\n+\t\t\t\texit(EXIT_SUCCESS);\n+\t\t\t} else {\n+\t\t\t\tdo {\n+\t\t\t\t\twpid = wait(&status);\n+\t\t\t\t\tif (wpid == -1) {\n+\t\t\t\t\t\tprintf(\"Child process return error.\\n\");\n+\t\t\t\t\t\texit(EXIT_FAILURE);\n+\t\t\t\t\t}\n+\t\t\t\t} while (!WIFEXITED(status) && !WIFSIGNALED(status));\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\tprintf(\"Bye...\\n\");\n+\treturn 0;\n+}\ndiff --git a/app/test-dma-perf/main.h b/app/test-dma-perf/main.h\nnew file mode 100644\nindex 0000000000..e7b0e78dc4\n--- /dev/null\n+++ b/app/test-dma-perf/main.h\n@@ -0,0 +1,55 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2022 Intel Corporation\n+ */\n+\n+#ifndef _MAIN_H_\n+#define _MAIN_H_\n+\n+\n+#include <rte_common.h>\n+#include <rte_cycles.h>\n+\n+#define MAX_WORKER_NB 128\n+#define MAX_OUTPUT_STR_LEN 512\n+\n+#define RTE_LOGTYPE_DMA RTE_LOGTYPE_USER1\n+\n+typedef enum {\n+\tOP_NONE = 0,\n+\tOP_ADD,\n+\tOP_MUL\n+} alg_op_type;\n+\n+struct test_configure_entry {\n+\tuint32_t first;\n+\tuint32_t last;\n+\tuint32_t incr;\n+\talg_op_type op;\n+\tuint32_t cur;\n+};\n+\n+struct test_configure {\n+\tuint8_t test_type;\n+\tconst char *test_type_str;\n+\tuint16_t src_numa_node;\n+\tuint16_t dst_numa_node;\n+\tuint16_t opcode;\n+\tbool is_dma;\n+\tstruct test_configure_entry mem_size;\n+\tstruct test_configure_entry buf_size;\n+\tstruct test_configure_entry ring_size;\n+\tstruct test_configure_entry kick_batch;\n+\tuint32_t cache_flush;\n+\tuint32_t nr_buf;\n+\tuint32_t repeat_times;\n+\tuint32_t nb_workers;\n+\tuint16_t mpool_iter_step;\n+\tconst char *eal_args;\n+\tuint8_t scenario_id;\n+};\n+\n+uint16_t dmadev_ids[MAX_WORKER_NB];\n+uint32_t nb_dmadevs;\n+char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN];\n+\n+#endif /* _MAIN_H_ */\ndiff --git a/app/test-dma-perf/meson.build b/app/test-dma-perf/meson.build\nnew file mode 100644\nindex 0000000000..931df6ed54\n--- /dev/null\n+++ b/app/test-dma-perf/meson.build\n@@ -0,0 +1,16 @@\n+# SPDX-License-Identifier: BSD-3-Clause\n+# Copyright(c) 2019-2022 Intel Corporation\n+\n+# meson file, for building this example as part of a main DPDK build.\n+#\n+# To build this example as a standalone application with an already-installed\n+# DPDK instance, use 'make'\n+\n+allow_experimental_apis = true\n+\n+deps += ['dmadev', 'mbuf', 'cfgfile']\n+\n+sources = files(\n+        'main.c',\n+        'benchmark.c',\n+)\n",
    "prefixes": [
        "RFC",
        "v2"
    ]
}