get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/128785/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 128785,
    "url": "http://patches.dpdk.org/api/patches/128785/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20230618122629.55569-1-cheng1.jiang@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20230618122629.55569-1-cheng1.jiang@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20230618122629.55569-1-cheng1.jiang@intel.com",
    "date": "2023-06-18T12:26:29",
    "name": "[v7] app/dma-perf: introduce dma-perf application",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "ca3607fef4f73f89b801e935b637e3feb4b9e3ba",
    "submitter": {
        "id": 1530,
        "url": "http://patches.dpdk.org/api/people/1530/?format=api",
        "name": "Jiang, Cheng1",
        "email": "Cheng1.jiang@intel.com"
    },
    "delegate": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20230618122629.55569-1-cheng1.jiang@intel.com/mbox/",
    "series": [
        {
            "id": 28562,
            "url": "http://patches.dpdk.org/api/series/28562/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=28562",
            "date": "2023-06-18T12:26:29",
            "name": "[v7] app/dma-perf: introduce dma-perf application",
            "version": 7,
            "mbox": "http://patches.dpdk.org/series/28562/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/128785/comments/",
    "check": "warning",
    "checks": "http://patches.dpdk.org/api/patches/128785/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 7941342CA3;\n\tSun, 18 Jun 2023 15:03:20 +0200 (CEST)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 0158E40E6E;\n\tSun, 18 Jun 2023 15:03:20 +0200 (CEST)",
            "from mga03.intel.com (mga03.intel.com [134.134.136.65])\n by mails.dpdk.org (Postfix) with ESMTP id B3AA440E03\n for <dev@dpdk.org>; Sun, 18 Jun 2023 15:03:17 +0200 (CEST)",
            "from orsmga002.jf.intel.com ([10.7.209.21])\n by orsmga103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 18 Jun 2023 06:03:16 -0700",
            "from dpdk_jiangcheng.sh.intel.com ([10.67.119.139])\n by orsmga002.jf.intel.com with ESMTP; 18 Jun 2023 06:03:12 -0700"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple;\n d=intel.com; i=@intel.com; q=dns/txt; s=Intel;\n t=1687093397; x=1718629397;\n h=from:to:cc:subject:date:message-id:in-reply-to:\n references:mime-version:content-transfer-encoding;\n bh=ta/P7ZzHCyd9JbKudpUoLXlQ6c8yhudEwLCUSMYkLMg=;\n b=M3vWpl3S9lkLQ0NBbni5YCLkzeOJxpP8KK4rG5t06hTOXZCieHRS7z31\n FQgUKo8ls+DE4aT0Vs0pgQ4Z43kDRiSG3XgIMZogPA7I+A0bz/oP/7H8C\n EdMR2NXSdw+L5ayFVHmeFKm0vpxVVggvEy23qZJg7TJjJf41JgOfb6BjE\n pbFjjTBcCkUXHqR8vsnu5a4Hj3V+z9M8xXAx+Dh4K2r2yF0ot03JAKZ7L\n RLn5f1P6paf0BPPdQJXyzHdLV7gufXSlNumW9tfs5C09kDmCMF5PX47HJ\n LPVBttR5tXMChFmgRcNV3vuTp1q72244ZCy66IN0var/hY7tfPp6tluVv A==;",
        "X-IronPort-AV": [
            "E=McAfee;i=\"6600,9927,10745\"; a=\"362895390\"",
            "E=Sophos;i=\"6.00,252,1681196400\"; d=\"scan'208\";a=\"362895390\"",
            "E=McAfee;i=\"6600,9927,10745\"; a=\"713401300\"",
            "E=Sophos;i=\"6.00,252,1681196400\"; d=\"scan'208\";a=\"713401300\""
        ],
        "X-ExtLoop1": "1",
        "From": "Cheng Jiang <cheng1.jiang@intel.com>",
        "To": "thomas@monjalon.net, bruce.richardson@intel.com, mb@smartsharesystems.com,\n chenbo.xia@intel.com, amitprakashs@marvell.com, anoobj@marvell.com,\n huangdengdui@huawei.com",
        "Cc": "dev@dpdk.org, jiayu.hu@intel.com, xuan.ding@intel.com,\n wenwux.ma@intel.com,\n yuanx.wang@intel.com, xingguang.he@intel.com,\n Cheng Jiang <cheng1.jiang@intel.com>",
        "Subject": "[PATCH v7] app/dma-perf: introduce dma-perf application",
        "Date": "Sun, 18 Jun 2023 12:26:29 +0000",
        "Message-Id": "<20230618122629.55569-1-cheng1.jiang@intel.com>",
        "X-Mailer": "git-send-email 2.40.1",
        "In-Reply-To": "<20230420072215.19069-1-cheng1.jiang@intel.com>",
        "References": "<20230420072215.19069-1-cheng1.jiang@intel.com>",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain; charset=UTF-8",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "There are many high-performance DMA devices supported in DPDK now, and\nthese DMA devices can also be integrated into other modules of DPDK as\naccelerators, such as Vhost. Before integrating DMA into applications,\ndevelopers need to know the performance of these DMA devices in various\nscenarios and the performance of CPUs in the same scenario, such as\ndifferent buffer lengths. Only in this way can we know the target\nperformance of the application accelerated by using them. This patch\nintroduces a high-performance testing tool, which supports comparing the\nperformance of CPU and DMA in different scenarios automatically with a\npre-set config file. Memory Copy performance test are supported for now.\n\nSigned-off-by: Cheng Jiang <cheng1.jiang@intel.com>\nSigned-off-by: Jiayu Hu <jiayu.hu@intel.com>\nSigned-off-by: Yuan Wang <yuanx.wang@intel.com>\nAcked-by: Morten Brørup <mb@smartsharesystems.com>\nAcked-by: Chenbo Xia <chenbo.xia@intel.com>\n---\nv7:\n  fixed some strcpy issues;\n  removed cache setup in calling rte_pktmbuf_pool_create();\n  fixed some typos;\n  added some memory free and null set operations;\n  improved result calculation;\nv6:\n  improved code based on Anoob's comments;\n  fixed some code structure issues;\nv5:\n  fixed some LONG_LINE warnings;\nv4:\n  fixed inaccuracy of the memory footprint display;\nv3:\n  fixed some typos;\nv2:\n\n app/meson.build               |   1 +\n app/test-dma-perf/benchmark.c | 498 ++++++++++++++++++++++++++++\n app/test-dma-perf/config.ini  |  61 ++++\n app/test-dma-perf/main.c      | 594 ++++++++++++++++++++++++++++++++++\n app/test-dma-perf/main.h      |  69 ++++\n app/test-dma-perf/meson.build |  17 +\n 6 files changed, 1240 insertions(+)\n create mode 100644 app/test-dma-perf/benchmark.c\n create mode 100644 app/test-dma-perf/config.ini\n create mode 100644 app/test-dma-perf/main.c\n create mode 100644 app/test-dma-perf/main.h\n create mode 100644 app/test-dma-perf/meson.build\n\n--\n2.40.1",
    "diff": "diff --git a/app/meson.build b/app/meson.build\nindex 74d2420f67..4fc1a83eba 100644\n--- a/app/meson.build\n+++ b/app/meson.build\n@@ -19,6 +19,7 @@ apps = [\n         'test-cmdline',\n         'test-compress-perf',\n         'test-crypto-perf',\n+        'test-dma-perf',\n         'test-eventdev',\n         'test-fib',\n         'test-flow-perf',\ndiff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c\nnew file mode 100644\nindex 0000000000..b866d5e5c0\n--- /dev/null\n+++ b/app/test-dma-perf/benchmark.c\n@@ -0,0 +1,498 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2023 Intel Corporation\n+ */\n+\n+#include <inttypes.h>\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <unistd.h>\n+\n+#include <rte_time.h>\n+#include <rte_mbuf.h>\n+#include <rte_dmadev.h>\n+#include <rte_malloc.h>\n+#include <rte_lcore.h>\n+\n+#include \"main.h\"\n+\n+#define MAX_DMA_CPL_NB 255\n+\n+#define TEST_WAIT_U_SECOND 10000\n+#define POLL_MAX 1000\n+\n+#define CSV_LINE_DMA_FMT \"Scenario %u,%u,%s,%u,%u,%.2lf,%\" PRIu64 \",%.3lf,%.3lf\\n\"\n+#define CSV_LINE_CPU_FMT \"Scenario %u,%u,NA,%u,%u,%.2lf,%\" PRIu64 \",%.3lf,%.3lf\\n\"\n+\n+struct worker_info {\n+\tbool ready_flag;\n+\tbool start_flag;\n+\tbool stop_flag;\n+\tuint32_t total_cpl;\n+\tuint32_t test_cpl;\n+};\n+\n+struct lcore_params {\n+\tuint8_t scenario_id;\n+\tunsigned int lcore_id;\n+\tchar *dma_name;\n+\tuint16_t worker_id;\n+\tuint16_t dev_id;\n+\tuint32_t nr_buf;\n+\tuint16_t kick_batch;\n+\tuint32_t buf_size;\n+\tuint16_t test_secs;\n+\tstruct rte_mbuf **srcs;\n+\tstruct rte_mbuf **dsts;\n+\tstruct worker_info worker_info;\n+};\n+\n+union lcore_params_union {\n+\tvolatile struct lcore_params *v_ptr;\n+\tstruct lcore_params *ptr;\n+};\n+\n+static struct rte_mempool *src_pool;\n+static struct rte_mempool *dst_pool;\n+\n+static union lcore_params_union lcores_p[MAX_WORKER_NB];\n+\n+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)\n+\n+static inline int\n+__rte_format_printf(3, 4)\n+print_err(const char *func, int lineno, const char *format, ...)\n+{\n+\tva_list ap;\n+\tint ret;\n+\n+\tret = fprintf(stderr, \"In %s:%d - \", func, lineno);\n+\tva_start(ap, format);\n+\tret += vfprintf(stderr, format, ap);\n+\tva_end(ap);\n+\n+\treturn ret;\n+}\n+\n+static inline void\n+calc_result(uint32_t buf_size, uint32_t nr_buf, uint16_t nb_workers, uint16_t test_secs,\n+\t\t\t\tuint32_t total_cnt, float *memory, uint32_t *ave_cycle,\n+\t\t\t\tfloat *bandwidth, float *mops)\n+{\n+\tfloat ops;\n+\n+\t*memory = (float)(buf_size * (nr_buf / nb_workers) * 2) / (1024 * 1024);\n+\t*ave_cycle = test_secs * rte_get_timer_hz() / total_cnt;\n+\tops = (float)total_cnt / test_secs;\n+\t*mops = ops / (1000 * 1000);\n+\t*bandwidth = (ops * buf_size * 8) / (1000 * 1000 * 1000);\n+}\n+\n+static void\n+output_result(uint8_t scenario_id, uint32_t lcore_id, char *dma_name, uint64_t ave_cycle,\n+\t\t\tuint32_t buf_size, uint32_t nr_buf, float memory,\n+\t\t\tfloat bandwidth, float mops, bool is_dma)\n+{\n+\tif (is_dma)\n+\t\tprintf(\"lcore %u, DMA %s:\\n\", lcore_id, dma_name);\n+\telse\n+\t\tprintf(\"lcore %u\\n\", lcore_id);\n+\n+\tprintf(\"average cycles/op: %\" PRIu64 \", buffer size: %u, nr_buf: %u, memory: %.2lfMB, frequency: %\" PRIu64 \".\\n\",\n+\t\t\tave_cycle, buf_size, nr_buf, memory, rte_get_timer_hz());\n+\tprintf(\"Average bandwidth: %.3lfGbps, MOps: %.3lf\\n\", bandwidth, mops);\n+\n+\tif (is_dma)\n+\t\tsnprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN, CSV_LINE_DMA_FMT,\n+\t\t\tscenario_id, lcore_id, dma_name, buf_size,\n+\t\t\tnr_buf, memory, ave_cycle, bandwidth, mops);\n+\telse\n+\t\tsnprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN, CSV_LINE_CPU_FMT,\n+\t\t\tscenario_id, lcore_id, buf_size,\n+\t\t\tnr_buf, memory, ave_cycle, bandwidth, mops);\n+}\n+\n+static inline void\n+cache_flush_buf(__maybe_unused struct rte_mbuf **array,\n+\t\t__maybe_unused uint32_t buf_size,\n+\t\t__maybe_unused uint32_t nr_buf)\n+{\n+#ifdef RTE_ARCH_X86_64\n+\tchar *data;\n+\tstruct rte_mbuf **srcs = array;\n+\tuint32_t i, offset;\n+\n+\tfor (i = 0; i < nr_buf; i++) {\n+\t\tdata = rte_pktmbuf_mtod(srcs[i], char *);\n+\t\tfor (offset = 0; offset < buf_size; offset += 64)\n+\t\t\t__builtin_ia32_clflush(data + offset);\n+\t}\n+#endif\n+}\n+\n+/* Configuration of device. */\n+static void\n+configure_dmadev_queue(uint32_t dev_id, uint32_t ring_size)\n+{\n+\tuint16_t vchan = 0;\n+\tstruct rte_dma_info info;\n+\tstruct rte_dma_conf dev_config = { .nb_vchans = 1 };\n+\tstruct rte_dma_vchan_conf qconf = {\n+\t\t.direction = RTE_DMA_DIR_MEM_TO_MEM,\n+\t\t.nb_desc = ring_size\n+\t};\n+\n+\tif (rte_dma_configure(dev_id, &dev_config) != 0)\n+\t\trte_exit(EXIT_FAILURE, \"Error with dma configure.\\n\");\n+\n+\tif (rte_dma_vchan_setup(dev_id, vchan, &qconf) != 0)\n+\t\trte_exit(EXIT_FAILURE, \"Error with queue configuration.\\n\");\n+\n+\trte_dma_info_get(dev_id, &info);\n+\tif (info.nb_vchans != 1)\n+\t\trte_exit(EXIT_FAILURE, \"Error, no configured queues reported on device id. %u\\n\",\n+\t\t\t\tdev_id);\n+\n+\tif (rte_dma_start(dev_id) != 0)\n+\t\trte_exit(EXIT_FAILURE, \"Error with dma start.\\n\");\n+}\n+\n+static int\n+config_dmadevs(struct test_configure *cfg)\n+{\n+\tuint32_t ring_size = cfg->ring_size.cur;\n+\tstruct lcore_dma_map_t *ldm = &cfg->lcore_dma_map;\n+\tuint32_t nb_workers = ldm->cnt;\n+\tuint32_t i;\n+\tint dev_id;\n+\tuint16_t nb_dmadevs = 0;\n+\tchar *dma_name;\n+\n+\tfor (i = 0; i < ldm->cnt; i++) {\n+\t\tdma_name = ldm->dma_names[i];\n+\t\tdev_id = rte_dma_get_dev_id_by_name(dma_name);\n+\t\tif (dev_id == -1) {\n+\t\t\tfprintf(stderr, \"Error: Fail to find DMA %s.\\n\", dma_name);\n+\t\t\tgoto end;\n+\t\t}\n+\n+\t\tldm->dma_ids[i] = dev_id;\n+\t\tconfigure_dmadev_queue(dev_id, ring_size);\n+\t\t++nb_dmadevs;\n+\t}\n+\n+end:\n+\tif (nb_dmadevs < nb_workers) {\n+\t\tprintf(\"Not enough dmadevs (%u) for all workers (%u).\\n\", nb_dmadevs, nb_workers);\n+\t\treturn -1;\n+\t}\n+\n+\tprintf(\"Number of used dmadevs: %u.\\n\", nb_dmadevs);\n+\n+\treturn 0;\n+}\n+\n+static inline void\n+do_dma_submit_and_poll(uint16_t dev_id, uint64_t *async_cnt,\n+\t\t\tvolatile struct worker_info *worker_info)\n+{\n+\tint ret;\n+\tuint16_t nr_cpl;\n+\n+\tret = rte_dma_submit(dev_id, 0);\n+\tif (ret < 0) {\n+\t\trte_dma_stop(dev_id);\n+\t\trte_dma_close(dev_id);\n+\t\trte_exit(EXIT_FAILURE, \"Error with dma submit.\\n\");\n+\t}\n+\n+\tnr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);\n+\t*async_cnt -= nr_cpl;\n+\tworker_info->total_cpl += nr_cpl;\n+}\n+\n+static inline int\n+do_dma_mem_copy(void *p)\n+{\n+\tconst uint16_t *para_idx = (uint16_t *)p;\n+\tvolatile struct lcore_params *para = lcores_p[*para_idx].v_ptr;\n+\tvolatile struct worker_info *worker_info = &(para->worker_info);\n+\tconst uint16_t dev_id = para->dev_id;\n+\tconst uint32_t nr_buf = para->nr_buf;\n+\tconst uint16_t kick_batch = para->kick_batch;\n+\tconst uint32_t buf_size = para->buf_size;\n+\tstruct rte_mbuf **srcs = para->srcs;\n+\tstruct rte_mbuf **dsts = para->dsts;\n+\tuint16_t nr_cpl;\n+\tuint64_t async_cnt = 0;\n+\tuint32_t i;\n+\tuint32_t poll_cnt = 0;\n+\tint ret;\n+\n+\tworker_info->stop_flag = false;\n+\tworker_info->ready_flag = true;\n+\n+\twhile (!worker_info->start_flag)\n+\t\t;\n+\n+\twhile (1) {\n+\t\tfor (i = 0; i < nr_buf; i++) {\n+dma_copy:\n+\t\t\tret = rte_dma_copy(dev_id, 0, rte_pktmbuf_iova(srcs[i]),\n+\t\t\t\trte_pktmbuf_iova(dsts[i]), buf_size, 0);\n+\t\t\tif (unlikely(ret < 0)) {\n+\t\t\t\tif (ret == -ENOSPC) {\n+\t\t\t\t\tdo_dma_submit_and_poll(dev_id, &async_cnt, worker_info);\n+\t\t\t\t\tgoto dma_copy;\n+\t\t\t\t} else {\n+\t\t\t\t\t/* Error exit */\n+\t\t\t\t\trte_dma_stop(dev_id);\n+\t\t\t\t\trte_exit(EXIT_FAILURE, \"DMA enqueue failed\\n\");\n+\t\t\t\t}\n+\t\t\t}\n+\t\t\tasync_cnt++;\n+\n+\t\t\tif ((async_cnt % kick_batch) == 0)\n+\t\t\t\tdo_dma_submit_and_poll(dev_id, &async_cnt, worker_info);\n+\t\t}\n+\n+\t\tif (worker_info->stop_flag)\n+\t\t\tbreak;\n+\t}\n+\n+\trte_dma_submit(dev_id, 0);\n+\twhile ((async_cnt > 0) && (poll_cnt++ < POLL_MAX)) {\n+\t\tnr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);\n+\t\tasync_cnt -= nr_cpl;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static inline int\n+do_cpu_mem_copy(void *p)\n+{\n+\tconst uint16_t *para_idx = (uint16_t *)p;\n+\tvolatile struct lcore_params *para = lcores_p[*para_idx].v_ptr;\n+\tvolatile struct worker_info *worker_info = &(para->worker_info);\n+\tconst uint32_t nr_buf = para->nr_buf;\n+\tconst uint32_t buf_size = para->buf_size;\n+\tstruct rte_mbuf **srcs = para->srcs;\n+\tstruct rte_mbuf **dsts = para->dsts;\n+\tuint32_t i;\n+\n+\tworker_info->stop_flag = false;\n+\tworker_info->ready_flag = true;\n+\n+\twhile (!worker_info->start_flag)\n+\t\t;\n+\n+\twhile (1) {\n+\t\tfor (i = 0; i < nr_buf; i++) {\n+\t\t\t/* copy buffer form src to dst */\n+\t\t\trte_memcpy((void *)(uintptr_t)rte_mbuf_data_iova(dsts[i]),\n+\t\t\t\t(void *)(uintptr_t)rte_mbuf_data_iova(srcs[i]),\n+\t\t\t\t(size_t)buf_size);\n+\t\t\tworker_info->total_cpl++;\n+\t\t}\n+\t\tif (worker_info->stop_flag)\n+\t\t\tbreak;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static int\n+setup_memory_env(struct test_configure *cfg, struct rte_mbuf ***srcs,\n+\t\t\tstruct rte_mbuf ***dsts)\n+{\n+\tunsigned int buf_size = cfg->buf_size.cur;\n+\tunsigned int nr_sockets;\n+\tuint32_t nr_buf = cfg->nr_buf;\n+\n+\tnr_sockets = rte_socket_count();\n+\tif (cfg->src_numa_node >= nr_sockets ||\n+\t\tcfg->dst_numa_node >= nr_sockets) {\n+\t\tprintf(\"Error: Source or destination numa exceeds the acture numa nodes.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tsrc_pool = rte_pktmbuf_pool_create(\"Benchmark_DMA_SRC\",\n+\t\t\tnr_buf,\n+\t\t\t0,\n+\t\t\t0,\n+\t\t\tbuf_size + RTE_PKTMBUF_HEADROOM,\n+\t\t\tcfg->src_numa_node);\n+\tif (src_pool == NULL) {\n+\t\tPRINT_ERR(\"Error with source mempool creation.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tdst_pool = rte_pktmbuf_pool_create(\"Benchmark_DMA_DST\",\n+\t\t\tnr_buf,\n+\t\t\t0,\n+\t\t\t0,\n+\t\t\tbuf_size + RTE_PKTMBUF_HEADROOM,\n+\t\t\tcfg->dst_numa_node);\n+\tif (dst_pool == NULL) {\n+\t\tPRINT_ERR(\"Error with destination mempool creation.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\t*srcs = rte_malloc(NULL, nr_buf * sizeof(struct rte_mbuf *), 0);\n+\tif (*srcs == NULL) {\n+\t\tprintf(\"Error: srcs malloc failed.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\t*dsts = rte_malloc(NULL, nr_buf * sizeof(struct rte_mbuf *), 0);\n+\tif (*dsts == NULL) {\n+\t\tprintf(\"Error: dsts malloc failed.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tif (rte_mempool_get_bulk(src_pool, (void **)*srcs, nr_buf) != 0) {\n+\t\tprintf(\"get src mbufs failed.\\n\");\n+\t\treturn -1;\n+\t}\n+\tif (rte_mempool_get_bulk(dst_pool, (void **)*dsts, nr_buf) != 0) {\n+\t\tprintf(\"get dst mbufs failed.\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+void\n+mem_copy_benchmark(struct test_configure *cfg, bool is_dma)\n+{\n+\tuint16_t i;\n+\tuint32_t offset;\n+\tunsigned int lcore_id = 0;\n+\tstruct rte_mbuf **srcs = NULL, **dsts = NULL;\n+\tstruct lcore_dma_map_t *ldm = &cfg->lcore_dma_map;\n+\tunsigned int buf_size = cfg->buf_size.cur;\n+\tuint16_t kick_batch = cfg->kick_batch.cur;\n+\tuint32_t nr_buf = cfg->nr_buf = (cfg->mem_size.cur * 1024 * 1024) / (cfg->buf_size.cur * 2);\n+\tuint16_t nb_workers = ldm->cnt;\n+\tuint16_t test_secs = cfg->test_secs;\n+\tfloat memory;\n+\tuint32_t avg_cycles = 0;\n+\tfloat mops;\n+\tfloat bandwidth;\n+\n+\tif (setup_memory_env(cfg, &srcs, &dsts) < 0)\n+\t\tgoto out;\n+\n+\tif (is_dma)\n+\t\tif (config_dmadevs(cfg) < 0)\n+\t\t\tgoto out;\n+\n+\tif (cfg->cache_flush) {\n+\t\tcache_flush_buf(srcs, buf_size, nr_buf);\n+\t\tcache_flush_buf(dsts, buf_size, nr_buf);\n+\t\trte_mb();\n+\t}\n+\n+\tprintf(\"Start testing....\\n\");\n+\n+\tfor (i = 0; i < nb_workers; i++) {\n+\t\tlcore_id = ldm->lcores[i];\n+\t\toffset = nr_buf / nb_workers * i;\n+\t\tlcores_p[i].v_ptr = rte_malloc(NULL, sizeof(struct lcore_params), 0);\n+\t\tif (!lcores_p[i].v_ptr) {\n+\t\t\tprintf(\"lcore parameters malloc failure for lcore %d\\n\", lcore_id);\n+\t\t\tbreak;\n+\t\t}\n+\t\tif (is_dma) {\n+\t\t\tlcores_p[i].v_ptr->dma_name = ldm->dma_names[i];\n+\t\t\tlcores_p[i].v_ptr->dev_id = ldm->dma_ids[i];\n+\t\t\tlcores_p[i].v_ptr->kick_batch = kick_batch;\n+\t\t}\n+\t\tlcores_p[i].v_ptr->worker_id = i;\n+\t\tlcores_p[i].v_ptr->nr_buf = (uint32_t)(nr_buf / nb_workers);\n+\t\tlcores_p[i].v_ptr->buf_size = buf_size;\n+\t\tlcores_p[i].v_ptr->test_secs = test_secs;\n+\t\tlcores_p[i].v_ptr->srcs = srcs + offset;\n+\t\tlcores_p[i].v_ptr->dsts = dsts + offset;\n+\t\tlcores_p[i].v_ptr->scenario_id = cfg->scenario_id;\n+\t\tlcores_p[i].v_ptr->lcore_id = lcore_id;\n+\n+\t\tif (is_dma)\n+\t\t\trte_eal_remote_launch(do_dma_mem_copy, (void *)(&i), lcore_id);\n+\t\telse\n+\t\t\trte_eal_remote_launch(do_cpu_mem_copy, (void *)(&i), lcore_id);\n+\t}\n+\n+\twhile (1) {\n+\t\tbool ready = true;\n+\t\tfor (i = 0; i < nb_workers; i++) {\n+\t\t\tif (lcores_p[i].v_ptr->worker_info.ready_flag == false) {\n+\t\t\t\tready = 0;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\t\tif (ready)\n+\t\t\tbreak;\n+\t}\n+\n+\tfor (i = 0; i < nb_workers; i++)\n+\t\tlcores_p[i].v_ptr->worker_info.start_flag = true;\n+\n+\tusleep(TEST_WAIT_U_SECOND);\n+\tfor (i = 0; i < nb_workers; i++)\n+\t\tlcores_p[i].v_ptr->worker_info.test_cpl = lcores_p[i].v_ptr->worker_info.total_cpl;\n+\n+\tusleep(test_secs * 1000 * 1000);\n+\tfor (i = 0; i < nb_workers; i++)\n+\t\tlcores_p[i].v_ptr->worker_info.test_cpl = lcores_p[i].v_ptr->worker_info.total_cpl -\n+\t\t\t\t\t\tlcores_p[i].v_ptr->worker_info.test_cpl;\n+\n+\tfor (i = 0; i < nb_workers; i++)\n+\t\tlcores_p[i].v_ptr->worker_info.stop_flag = true;\n+\n+\trte_eal_mp_wait_lcore();\n+\n+\tfor (i = 0; i < nb_workers; i++) {\n+\t\tcalc_result(buf_size, nr_buf, nb_workers, test_secs,\n+\t\t\tlcores_p[i].v_ptr->worker_info.test_cpl,\n+\t\t\t&memory, &avg_cycles, &bandwidth, &mops);\n+\t\toutput_result(cfg->scenario_id, lcores_p[i].v_ptr->lcore_id,\n+\t\t\t\t\tlcores_p[i].v_ptr->dma_name, avg_cycles, buf_size,\n+\t\t\t\t\tnr_buf / nb_workers, memory, bandwidth, mops, is_dma);\n+\t}\n+\n+out:\n+\t/* free mbufs used in the test */\n+\tif (srcs)\n+\t\trte_pktmbuf_free_bulk(srcs, nr_buf);\n+\tif (dsts)\n+\t\trte_pktmbuf_free_bulk(dsts, nr_buf);\n+\n+\t/* free the points for the mbufs */\n+\trte_free(srcs);\n+\tsrcs = NULL;\n+\trte_free(dsts);\n+\tdsts = NULL;\n+\n+\tif (src_pool) {\n+\t\trte_mempool_free(src_pool);\n+\t\tsrc_pool = NULL;\n+\t}\n+\tif (dst_pool) {\n+\t\trte_mempool_free(dst_pool);\n+\t\tsrc_pool = NULL;\n+\t}\n+\n+\t/* free the worker parameters */\n+\tfor (i = 0; i < nb_workers; i++) {\n+\t\trte_free(lcores_p[i].ptr);\n+\t\tlcores_p[i].ptr = NULL;\n+\t}\n+\n+\tif (is_dma) {\n+\t\tfor (i = 0; i < nb_workers; i++) {\n+\t\t\tprintf(\"Stopping dmadev %d\\n\", ldm->dma_ids[i]);\n+\t\t\trte_dma_stop(ldm->dma_ids[i]);\n+\t\t}\n+\t}\n+}\ndiff --git a/app/test-dma-perf/config.ini b/app/test-dma-perf/config.ini\nnew file mode 100644\nindex 0000000000..b550f4b23f\n--- /dev/null\n+++ b/app/test-dma-perf/config.ini\n@@ -0,0 +1,61 @@\n+\n+; This is an example configuration file for dma-perf, which details the meanings of each parameter\n+; and instructions on how to use dma-perf.\n+\n+; Supported test types are DMA_MEM_COPY and CPU_MEM_COPY.\n+\n+; Parameters:\n+; \"mem_size\" denotes the size of the memory footprint.\n+; \"buf_size\" denotes the memory size of a single operation.\n+; \"dma_ring_size\" denotes the dma ring buffer size. It should be must be a power of two, and between\n+;  64 and 4096.\n+; \"kick_batch\" denotes the dma operation batch size, and should be greater than 1 normally.\n+\n+; The format for variables is variable=first,last,increment,ADD|MUL.\n+\n+; src_numa_node is used to control the numa node where the source memory is allocated.\n+; dst_numa_node is used to control the numa node where the destination memory is allocated.\n+\n+; cache_flush is used to determine whether or not the cache should be flushed, with 1 indicating to\n+; flush and 0 indicating to not flush.\n+\n+; test_seconds controls the test time of the whole case.\n+\n+; To use DMA for a test, please specify the \"lcore_dma\" parameter.\n+; If you have already set the \"-l\" and \"-a\" parameters using EAL,\n+; make sure that the value of \"lcore_dma\" falls within their range of the values.\n+; We have to ensure a 1:1 mapping between the core and DMA device.\n+\n+; To use CPU for a test, please specify the \"lcore\" parameter.\n+; If you have already set the \"-l\" and \"-a\" parameters using EAL,\n+; make sure that the value of \"lcore\" falls within their range of values.\n+\n+; To specify a configuration file, use the \"--config\" flag followed by the path to the file.\n+\n+; To specify a result file, use the \"--result\" flag followed by the path to the file.\n+; If you do not specify a result file, one will be generated with the same name as the configuration\n+; file, with the addition of \"_result.csv\" at the end.\n+\n+[case1]\n+type=DMA_MEM_COPY\n+mem_size=10\n+buf_size=64,8192,2,MUL\n+dma_ring_size=1024\n+kick_batch=32\n+src_numa_node=0\n+dst_numa_node=0\n+cache_flush=0\n+test_seconds=2\n+lcore_dma=lcore10@0000:00:04.2, lcore11@0000:00:04.3\n+eal_args=--in-memory --file-prefix=test\n+\n+[case2]\n+type=CPU_MEM_COPY\n+mem_size=10\n+buf_size=64,8192,2,MUL\n+src_numa_node=0\n+dst_numa_node=1\n+cache_flush=0\n+test_seconds=2\n+lcore = 3, 4\n+eal_args=--in-memory --no-pci\ndiff --git a/app/test-dma-perf/main.c b/app/test-dma-perf/main.c\nnew file mode 100644\nindex 0000000000..b782ea5258\n--- /dev/null\n+++ b/app/test-dma-perf/main.c\n@@ -0,0 +1,594 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2023 Intel Corporation\n+ */\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <getopt.h>\n+#include <signal.h>\n+#include <stdbool.h>\n+#include <unistd.h>\n+#include <sys/wait.h>\n+#include <inttypes.h>\n+#include <libgen.h>\n+\n+#include <rte_eal.h>\n+#include <rte_cfgfile.h>\n+#include <rte_string_fns.h>\n+#include <rte_lcore.h>\n+\n+#include \"main.h\"\n+\n+#define CSV_HDR_FMT \"Case %u : %s,lcore,DMA,buffer size,nr_buf,memory(MB),cycle,bandwidth(Gbps),MOps\\n\"\n+\n+#define MAX_EAL_PARAM_NB 100\n+#define MAX_EAL_PARAM_LEN 1024\n+\n+#define DMA_MEM_COPY \"DMA_MEM_COPY\"\n+#define CPU_MEM_COPY \"CPU_MEM_COPY\"\n+\n+#define CMDLINE_CONFIG_ARG \"--config\"\n+#define CMDLINE_RESULT_ARG \"--result\"\n+\n+#define MAX_PARAMS_PER_ENTRY 4\n+\n+#define MAX_LONG_OPT_SZ 64\n+\n+enum {\n+\tTEST_TYPE_NONE = 0,\n+\tTEST_TYPE_DMA_MEM_COPY,\n+\tTEST_TYPE_CPU_MEM_COPY\n+};\n+\n+#define MAX_TEST_CASES 16\n+static struct test_configure test_cases[MAX_TEST_CASES];\n+\n+char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN];\n+\n+static FILE *fd;\n+\n+static void\n+output_csv(bool need_blankline)\n+{\n+\tuint32_t i;\n+\n+\tif (need_blankline) {\n+\t\tfprintf(fd, \",,,,,,,,\\n\");\n+\t\tfprintf(fd, \",,,,,,,,\\n\");\n+\t}\n+\n+\tfor (i = 0; i < RTE_DIM(output_str); i++) {\n+\t\tif (output_str[i][0]) {\n+\t\t\tfprintf(fd, \"%s\", output_str[i]);\n+\t\t\toutput_str[i][0] = '\\0';\n+\t\t}\n+\t}\n+\n+\tfflush(fd);\n+}\n+\n+static void\n+output_env_info(void)\n+{\n+\tsnprintf(output_str[0], MAX_OUTPUT_STR_LEN, \"test environment:\\n\");\n+\tsnprintf(output_str[1], MAX_OUTPUT_STR_LEN, \"CPU frequency,%\"\n+\t\t\tPRIu64 \"\\n\", rte_get_timer_hz());\n+\n+\toutput_csv(true);\n+}\n+\n+static void\n+output_header(uint32_t case_id, struct test_configure *case_cfg)\n+{\n+\tsnprintf(output_str[0], MAX_OUTPUT_STR_LEN,\n+\t\t\tCSV_HDR_FMT, case_id, case_cfg->test_type_str);\n+\n+\toutput_csv(true);\n+}\n+\n+static void\n+run_test_case(struct test_configure *case_cfg)\n+{\n+\tswitch (case_cfg->test_type) {\n+\tcase TEST_TYPE_DMA_MEM_COPY:\n+\t\tmem_copy_benchmark(case_cfg, true);\n+\t\tbreak;\n+\tcase TEST_TYPE_CPU_MEM_COPY:\n+\t\tmem_copy_benchmark(case_cfg, false);\n+\t\tbreak;\n+\tdefault:\n+\t\tprintf(\"Unknown test type. %s\\n\", case_cfg->test_type_str);\n+\t\tbreak;\n+\t}\n+}\n+\n+static void\n+run_test(uint32_t case_id, struct test_configure *case_cfg)\n+{\n+\tuint32_t i;\n+\tuint32_t nb_lcores = rte_lcore_count();\n+\tstruct test_configure_entry *mem_size = &case_cfg->mem_size;\n+\tstruct test_configure_entry *buf_size = &case_cfg->buf_size;\n+\tstruct test_configure_entry *ring_size = &case_cfg->ring_size;\n+\tstruct test_configure_entry *kick_batch = &case_cfg->kick_batch;\n+\tstruct test_configure_entry dummy = { 0 };\n+\tstruct test_configure_entry *var_entry = &dummy;\n+\n+\tfor (i = 0; i < RTE_DIM(output_str); i++)\n+\t\tmemset(output_str[i], 0, MAX_OUTPUT_STR_LEN);\n+\n+\tif (nb_lcores <= case_cfg->lcore_dma_map.cnt) {\n+\t\tprintf(\"Case %u: Not enough lcores.\\n\", case_id);\n+\t\treturn;\n+\t}\n+\n+\tprintf(\"Number of used lcores: %u.\\n\", nb_lcores);\n+\n+\tif (mem_size->incr != 0)\n+\t\tvar_entry = mem_size;\n+\n+\tif (buf_size->incr != 0)\n+\t\tvar_entry = buf_size;\n+\n+\tif (ring_size->incr != 0)\n+\t\tvar_entry = ring_size;\n+\n+\tif (kick_batch->incr != 0)\n+\t\tvar_entry = kick_batch;\n+\n+\tcase_cfg->scenario_id = 0;\n+\n+\toutput_header(case_id, case_cfg);\n+\n+\tfor (var_entry->cur = var_entry->first; var_entry->cur <= var_entry->last;) {\n+\t\tcase_cfg->scenario_id++;\n+\t\tprintf(\"\\nRunning scenario %d\\n\", case_cfg->scenario_id);\n+\n+\t\trun_test_case(case_cfg);\n+\t\toutput_csv(false);\n+\n+\t\tif (var_entry->op == OP_ADD)\n+\t\t\tvar_entry->cur += var_entry->incr;\n+\t\telse if (var_entry->op == OP_MUL)\n+\t\t\tvar_entry->cur *= var_entry->incr;\n+\t\telse\n+\t\t\tbreak;\n+\t}\n+}\n+\n+static int\n+parse_lcore(struct test_configure *test_case, const char *value)\n+{\n+\tuint16_t len;\n+\tchar *input;\n+\tstruct lcore_dma_map_t *lcore_dma_map;\n+\n+\tif (test_case == NULL || value == NULL)\n+\t\treturn -1;\n+\n+\tlen = strlen(value);\n+\tinput = (char *)malloc((len + 1) * sizeof(char));\n+\tstrlcpy(input, value, len);\n+\tlcore_dma_map = &(test_case->lcore_dma_map);\n+\n+\tmemset(lcore_dma_map, 0, sizeof(struct lcore_dma_map_t));\n+\n+\tchar *token = strtok(input, \", \");\n+\twhile (token != NULL) {\n+\t\tif (lcore_dma_map->cnt >= MAX_LCORE_NB) {\n+\t\t\tfree(input);\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tuint16_t lcore_id = atoi(token);\n+\t\tlcore_dma_map->lcores[lcore_dma_map->cnt++] = lcore_id;\n+\n+\t\ttoken = strtok(NULL, \", \");\n+\t}\n+\n+\tfree(input);\n+\treturn 0;\n+}\n+\n+static int\n+parse_lcore_dma(struct test_configure *test_case, const char *value)\n+{\n+\tstruct lcore_dma_map_t *lcore_dma_map;\n+\tchar *input, *addrs;\n+\tchar *ptrs[2];\n+\tchar *start, *end, *substr;\n+\tuint16_t lcore_id;\n+\tint ret = 0;\n+\n+\tif (test_case == NULL || value == NULL)\n+\t\treturn -1;\n+\n+\tinput = strndup(value, strlen(value) + 1);\n+\taddrs = input;\n+\n+\twhile (*addrs == '\\0')\n+\t\taddrs++;\n+\tif (*addrs == '\\0') {\n+\t\tfprintf(stderr, \"No input DMA addresses\\n\");\n+\t\tret = -1;\n+\t\tgoto out;\n+\t}\n+\n+\tsubstr = strtok(addrs, \",\");\n+\tif (substr == NULL) {\n+\t\tfprintf(stderr, \"No input DMA address\\n\");\n+\t\tret = -1;\n+\t\tgoto out;\n+\t}\n+\n+\tmemset(&test_case->lcore_dma_map, 0, sizeof(struct lcore_dma_map_t));\n+\n+\tdo {\n+\t\tif (rte_strsplit(substr, strlen(substr), ptrs, 2, '@') < 0) {\n+\t\t\tfprintf(stderr, \"Illegal DMA address\\n\");\n+\t\t\tret = -1;\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\tstart = strstr(ptrs[0], \"lcore\");\n+\t\tif (start == NULL) {\n+\t\t\tfprintf(stderr, \"Illegal lcore\\n\");\n+\t\t\tret = -1;\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\tstart += 5;\n+\t\tlcore_id = strtol(start, &end, 0);\n+\t\tif (end == start) {\n+\t\t\tfprintf(stderr, \"No input lcore ID or ID %d is wrong\\n\", lcore_id);\n+\t\t\tret = -1;\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\tlcore_dma_map = &test_case->lcore_dma_map;\n+\t\tif (lcore_dma_map->cnt >= MAX_LCORE_NB) {\n+\t\t\tfprintf(stderr, \"lcores count error\\n\");\n+\t\t\tret = -1;\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\tlcore_dma_map->lcores[lcore_dma_map->cnt] = lcore_id;\n+\t\tstrlcpy(lcore_dma_map->dma_names[lcore_dma_map->cnt], ptrs[1],\n+\t\t\t\tRTE_DEV_NAME_MAX_LEN);\n+\t\tlcore_dma_map->cnt++;\n+\t\tsubstr = strtok(NULL, \",\");\n+\t} while (substr != NULL);\n+\n+out:\n+\tfree(input);\n+\treturn ret;\n+}\n+\n+static int\n+parse_entry(const char *value, struct test_configure_entry *entry)\n+{\n+\tchar input[255] = {0};\n+\tchar *args[MAX_PARAMS_PER_ENTRY];\n+\tint args_nr = -1;\n+\n+\tif (value == NULL || entry == NULL)\n+\t\tgoto out;\n+\n+\tstrncpy(input, value, 254);\n+\tif (*input == '\\0')\n+\t\tgoto out;\n+\n+\targs_nr = rte_strsplit(input, strlen(input), args, MAX_PARAMS_PER_ENTRY, ',');\n+\tif (args_nr != 1 && args_nr != 4)\n+\t\tgoto out;\n+\n+\tentry->cur = entry->first = (uint32_t)atoi(args[0]);\n+\n+\tif (args_nr == 4) {\n+\t\tentry->last = (uint32_t)atoi(args[1]);\n+\t\tentry->incr = (uint32_t)atoi(args[2]);\n+\t\tif (!strcmp(args[3], \"MUL\"))\n+\t\t\tentry->op = OP_MUL;\n+\t\telse if (!strcmp(args[3], \"ADD\"))\n+\t\t\tentry->op = OP_ADD;\n+\t\telse {\n+\t\t\tprintf(\"Invalid op %s.\\n\", args[3]);\n+\t\t\targs_nr = -1;\n+\t\t}\n+\t} else {\n+\t\tentry->op = OP_NONE;\n+\t\tentry->last = 0;\n+\t\tentry->incr = 0;\n+\t}\n+out:\n+\treturn args_nr;\n+}\n+\n+static uint16_t\n+load_configs(const char *path)\n+{\n+\tstruct rte_cfgfile *cfgfile;\n+\tint nb_sections, i;\n+\tstruct test_configure *test_case;\n+\tchar section_name[CFG_NAME_LEN];\n+\tconst char *case_type;\n+\tconst char *lcore_dma;\n+\tconst char *mem_size_str, *buf_size_str, *ring_size_str, *kick_batch_str;\n+\tint args_nr, nb_vp;\n+\tbool is_dma;\n+\n+\tprintf(\"config file parsing...\\n\");\n+\tcfgfile = rte_cfgfile_load(path, 0);\n+\tif (!cfgfile) {\n+\t\tprintf(\"Open configure file error.\\n\");\n+\t\texit(1);\n+\t}\n+\n+\tnb_sections = rte_cfgfile_num_sections(cfgfile, NULL, 0);\n+\tif (nb_sections > MAX_TEST_CASES) {\n+\t\tprintf(\"Error: The maximum number of cases is %d.\\n\", MAX_TEST_CASES);\n+\t\texit(1);\n+\t}\n+\n+\tfor (i = 0; i < nb_sections; i++) {\n+\t\tsnprintf(section_name, CFG_NAME_LEN, \"case%d\", i + 1);\n+\t\ttest_case = &test_cases[i];\n+\t\tcase_type = rte_cfgfile_get_entry(cfgfile, section_name, \"type\");\n+\t\tif (!case_type) {\n+\t\t\tprintf(\"Error: No case type in case %d, the test will be finished here.\\n\",\n+\t\t\t\ti + 1);\n+\t\t\ttest_case->is_valid = false;\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\tif (strcmp(case_type, DMA_MEM_COPY) == 0) {\n+\t\t\ttest_case->test_type = TEST_TYPE_DMA_MEM_COPY;\n+\t\t\ttest_case->test_type_str = DMA_MEM_COPY;\n+\t\t\tis_dma = true;\n+\t\t} else if (strcmp(case_type, CPU_MEM_COPY) == 0) {\n+\t\t\ttest_case->test_type = TEST_TYPE_CPU_MEM_COPY;\n+\t\t\ttest_case->test_type_str = CPU_MEM_COPY;\n+\t\t\tis_dma = false;\n+\t\t} else {\n+\t\t\tprintf(\"Error: Cannot find case type %s in case%d.\\n\", case_type, i + 1);\n+\t\t\ttest_case->is_valid = false;\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\tnb_vp = 0;\n+\n+\t\ttest_case->src_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,\n+\t\t\t\t\t\t\t\tsection_name, \"src_numa_node\"));\n+\t\ttest_case->dst_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,\n+\t\t\t\t\t\t\t\tsection_name, \"dst_numa_node\"));\n+\n+\t\tmem_size_str = rte_cfgfile_get_entry(cfgfile, section_name, \"mem_size\");\n+\t\targs_nr = parse_entry(mem_size_str, &test_case->mem_size);\n+\t\tif (args_nr < 0) {\n+\t\t\tprintf(\"parse error in case %d.\\n\", i + 1);\n+\t\t\ttest_case->is_valid = false;\n+\t\t\tcontinue;\n+\t\t} else if (args_nr > 1)\n+\t\t\tnb_vp++;\n+\n+\t\tbuf_size_str = rte_cfgfile_get_entry(cfgfile, section_name, \"buf_size\");\n+\t\targs_nr = parse_entry(buf_size_str, &test_case->buf_size);\n+\t\tif (args_nr < 0) {\n+\t\t\tprintf(\"parse error in case %d.\\n\", i + 1);\n+\t\t\ttest_case->is_valid = false;\n+\t\t\tcontinue;\n+\t\t} else if (args_nr > 1)\n+\t\t\tnb_vp++;\n+\n+\t\tif (is_dma) {\n+\t\t\tring_size_str = rte_cfgfile_get_entry(cfgfile, section_name,\n+\t\t\t\t\t\t\t\t\"dma_ring_size\");\n+\t\t\targs_nr = parse_entry(ring_size_str, &test_case->ring_size);\n+\t\t\tif (args_nr < 0) {\n+\t\t\t\tprintf(\"parse error in case %d.\\n\", i + 1);\n+\t\t\t\ttest_case->is_valid = false;\n+\t\t\t\tcontinue;\n+\t\t\t} else if (args_nr > 1)\n+\t\t\t\tnb_vp++;\n+\n+\t\t\tkick_batch_str = rte_cfgfile_get_entry(cfgfile, section_name, \"kick_batch\");\n+\t\t\targs_nr = parse_entry(kick_batch_str, &test_case->kick_batch);\n+\t\t\tif (args_nr < 0) {\n+\t\t\t\tprintf(\"parse error in case %d.\\n\", i + 1);\n+\t\t\t\ttest_case->is_valid = false;\n+\t\t\t\tcontinue;\n+\t\t\t} else if (args_nr > 1)\n+\t\t\t\tnb_vp++;\n+\n+\t\t\tlcore_dma = rte_cfgfile_get_entry(cfgfile, section_name, \"lcore_dma\");\n+\t\t\tint lcore_ret = parse_lcore_dma(test_case, lcore_dma);\n+\t\t\tif (lcore_ret < 0) {\n+\t\t\t\tprintf(\"parse lcore dma error in case %d.\\n\", i + 1);\n+\t\t\t\ttest_case->is_valid = false;\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t} else {\n+\t\t\tlcore_dma = rte_cfgfile_get_entry(cfgfile, section_name, \"lcore\");\n+\t\t\tint lcore_ret = parse_lcore(test_case, lcore_dma);\n+\t\t\tif (lcore_ret < 0) {\n+\t\t\t\tprintf(\"parse lcore error in case %d.\\n\", i + 1);\n+\t\t\t\ttest_case->is_valid = false;\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t}\n+\n+\t\tif (nb_vp > 1) {\n+\t\t\tprintf(\"Error, each section can only have a single variable parameter.\\n\");\n+\t\t\ttest_case->is_valid = false;\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\ttest_case->cache_flush =\n+\t\t\t(int)atoi(rte_cfgfile_get_entry(cfgfile, section_name, \"cache_flush\"));\n+\t\ttest_case->test_secs = (uint16_t)atoi(rte_cfgfile_get_entry(cfgfile,\n+\t\t\t\t\tsection_name, \"test_seconds\"));\n+\n+\t\ttest_case->eal_args = rte_cfgfile_get_entry(cfgfile, section_name, \"eal_args\");\n+\t\ttest_case->is_valid = true;\n+\t}\n+\n+\trte_cfgfile_close(cfgfile);\n+\tprintf(\"config file parsing complete.\\n\\n\");\n+\treturn i;\n+}\n+\n+/* Parse the argument given in the command line of the application */\n+static int\n+append_eal_args(int argc, char **argv, const char *eal_args, char **new_argv)\n+{\n+\tint i;\n+\tchar *tokens[MAX_EAL_PARAM_NB];\n+\tchar args[MAX_EAL_PARAM_LEN] = {0};\n+\tint token_nb, new_argc = 0;\n+\n+\tfor (i = 0; i < argc; i++) {\n+\t\tif ((strcmp(argv[i], CMDLINE_CONFIG_ARG) == 0) ||\n+\t\t\t\t(strcmp(argv[i], CMDLINE_RESULT_ARG) == 0)) {\n+\t\t\ti++;\n+\t\t\tcontinue;\n+\t\t}\n+\t\tstrlcpy(new_argv[new_argc], argv[i], MAX_EAL_PARAM_LEN);\n+\t\tnew_argc++;\n+\t}\n+\n+\tif (eal_args) {\n+\t\tstrlcpy(args, eal_args, MAX_EAL_PARAM_LEN);\n+\t\ttoken_nb = rte_strsplit(args, strlen(args),\n+\t\t\t\t\ttokens, MAX_EAL_PARAM_NB, ' ');\n+\t\tfor (i = 0; i < token_nb; i++)\n+\t\t\tstrlcpy(new_argv[new_argc++], tokens[i], MAX_EAL_PARAM_LEN);\n+\t}\n+\n+\treturn new_argc;\n+}\n+\n+int\n+main(int argc, char *argv[])\n+{\n+\tint ret;\n+\tuint16_t case_nb;\n+\tuint32_t i, nb_lcores;\n+\tpid_t cpid, wpid;\n+\tint wstatus;\n+\tchar args[MAX_EAL_PARAM_NB][MAX_EAL_PARAM_LEN];\n+\tchar *pargs[MAX_EAL_PARAM_NB];\n+\tchar *cfg_path_ptr = NULL;\n+\tchar *rst_path_ptr = NULL;\n+\tchar rst_path[PATH_MAX];\n+\tint new_argc;\n+\tbool is_first_case = true;\n+\n+\tmemset(args, 0, sizeof(args));\n+\n+\tfor (i = 0; i < RTE_DIM(pargs); i++)\n+\t\tpargs[i] = args[i];\n+\n+\tfor (i = 0; i < (uint32_t)argc; i++) {\n+\t\tif (strncmp(argv[i], CMDLINE_CONFIG_ARG, MAX_LONG_OPT_SZ) == 0)\n+\t\t\tcfg_path_ptr = argv[i + 1];\n+\t\tif (strncmp(argv[i], CMDLINE_RESULT_ARG, MAX_LONG_OPT_SZ) == 0)\n+\t\t\trst_path_ptr = argv[i + 1];\n+\t}\n+\tif (cfg_path_ptr == NULL) {\n+\t\tprintf(\"Config file not assigned.\\n\");\n+\t\treturn -1;\n+\t}\n+\tif (rst_path_ptr == NULL) {\n+\t\tstrlcpy(rst_path, cfg_path_ptr, PATH_MAX);\n+\t\tchar *token = strtok(basename(rst_path), \".\");\n+\t\tif (token == NULL) {\n+\t\t\tprintf(\"Config file error.\\n\");\n+\t\t\treturn -1;\n+\t\t}\n+\t\tstrcat(token, \"_result.csv\");\n+\t\trst_path_ptr = rst_path;\n+\t}\n+\n+\tcase_nb = load_configs(cfg_path_ptr);\n+\tfd = fopen(rst_path_ptr, \"w\");\n+\tif (fd == NULL) {\n+\t\tprintf(\"Open output CSV file error.\\n\");\n+\t\treturn -1;\n+\t}\n+\tfclose(fd);\n+\n+\tfor (i = 0; i < case_nb; i++) {\n+\t\tif (test_cases[i].test_type == TEST_TYPE_NONE) {\n+\t\t\tprintf(\"No test type in test case %d.\\n\\n\", i + 1);\n+\t\t\tcontinue;\n+\t\t}\n+\t\tif (!test_cases[i].is_valid) {\n+\t\t\tprintf(\"Invalid test case %d.\\n\\n\", i + 1);\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\tcpid = fork();\n+\t\tif (cpid < 0) {\n+\t\t\tprintf(\"Fork case %d failed.\\n\", i + 1);\n+\t\t\texit(EXIT_FAILURE);\n+\t\t} else if (cpid == 0) {\n+\t\t\tprintf(\"\\nRunning case %u\\n\\n\", i + 1);\n+\n+\t\t\tnew_argc = append_eal_args(argc, argv, test_cases[i].eal_args, pargs);\n+\t\t\tret = rte_eal_init(new_argc, pargs);\n+\t\t\tif (ret < 0)\n+\t\t\t\trte_exit(EXIT_FAILURE, \"Invalid EAL arguments\\n\");\n+\n+\t\t\t/* Check lcores. */\n+\t\t\tnb_lcores = rte_lcore_count();\n+\t\t\tif (nb_lcores < 2)\n+\t\t\t\trte_exit(EXIT_FAILURE,\n+\t\t\t\t\t\"There should be at least 2 worker lcores.\\n\");\n+\n+\t\t\tfd = fopen(rst_path_ptr, \"a\");\n+\t\t\tif (!fd) {\n+\t\t\t\tprintf(\"Open output CSV file error.\\n\");\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\n+\t\t\tif (is_first_case) {\n+\t\t\t\toutput_env_info();\n+\t\t\t\tis_first_case = false;\n+\t\t\t}\n+\t\t\trun_test(i + 1, &test_cases[i]);\n+\n+\t\t\t/* clean up the EAL */\n+\t\t\trte_eal_cleanup();\n+\n+\t\t\tfclose(fd);\n+\n+\t\t\tprintf(\"\\nCase %u completed.\\n\\n\", i + 1);\n+\n+\t\t\texit(EXIT_SUCCESS);\n+\t\t} else {\n+\t\t\twpid = waitpid(cpid, &wstatus, 0);\n+\t\t\tif (wpid == -1) {\n+\t\t\t\tprintf(\"waitpid error.\\n\");\n+\t\t\t\texit(EXIT_FAILURE);\n+\t\t\t}\n+\n+\t\t\tif (WIFEXITED(wstatus))\n+\t\t\t\tprintf(\"Case process exited. status %d\\n\\n\",\n+\t\t\t\t\tWEXITSTATUS(wstatus));\n+\t\t\telse if (WIFSIGNALED(wstatus))\n+\t\t\t\tprintf(\"Case process killed by signal %d\\n\\n\",\n+\t\t\t\t\tWTERMSIG(wstatus));\n+\t\t\telse if (WIFSTOPPED(wstatus))\n+\t\t\t\tprintf(\"Case process stopped by signal %d\\n\\n\",\n+\t\t\t\t\tWSTOPSIG(wstatus));\n+\t\t\telse if (WIFCONTINUED(wstatus))\n+\t\t\t\tprintf(\"Case process continued.\\n\\n\");\n+\t\t\telse\n+\t\t\t\tprintf(\"Case process unknown terminated.\\n\\n\");\n+\t\t}\n+\t}\n+\n+\tprintf(\"Bye...\\n\");\n+\treturn 0;\n+}\n+\ndiff --git a/app/test-dma-perf/main.h b/app/test-dma-perf/main.h\nnew file mode 100644\nindex 0000000000..215ac42673\n--- /dev/null\n+++ b/app/test-dma-perf/main.h\n@@ -0,0 +1,69 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2023 Intel Corporation\n+ */\n+\n+#ifndef _MAIN_H_\n+#define _MAIN_H_\n+\n+\n+#include <rte_common.h>\n+#include <rte_cycles.h>\n+#include <rte_dev.h>\n+#include <rte_dmadev.h>\n+\n+#ifndef __maybe_unused\n+#define __maybe_unused\t__rte_unused\n+#endif\n+\n+#define MAX_WORKER_NB 128\n+#define MAX_OUTPUT_STR_LEN 512\n+\n+#define MAX_DMA_NB 128\n+#define MAX_LCORE_NB 256\n+\n+extern char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN];\n+\n+typedef enum {\n+\tOP_NONE = 0,\n+\tOP_ADD,\n+\tOP_MUL\n+} alg_op_type;\n+\n+struct test_configure_entry {\n+\tuint32_t first;\n+\tuint32_t last;\n+\tuint32_t incr;\n+\talg_op_type op;\n+\tuint32_t cur;\n+};\n+\n+struct lcore_dma_map_t {\n+\tuint32_t lcores[MAX_WORKER_NB];\n+\tchar dma_names[MAX_WORKER_NB][RTE_DEV_NAME_MAX_LEN];\n+\tint16_t dma_ids[MAX_WORKER_NB];\n+\tuint16_t cnt;\n+};\n+\n+struct test_configure {\n+\tbool is_valid;\n+\tuint8_t test_type;\n+\tconst char *test_type_str;\n+\tuint16_t src_numa_node;\n+\tuint16_t dst_numa_node;\n+\tuint16_t opcode;\n+\tbool is_dma;\n+\tstruct lcore_dma_map_t lcore_dma_map;\n+\tstruct test_configure_entry mem_size;\n+\tstruct test_configure_entry buf_size;\n+\tstruct test_configure_entry ring_size;\n+\tstruct test_configure_entry kick_batch;\n+\tuint32_t cache_flush;\n+\tuint32_t nr_buf;\n+\tuint16_t test_secs;\n+\tconst char *eal_args;\n+\tuint8_t scenario_id;\n+};\n+\n+void mem_copy_benchmark(struct test_configure *cfg, bool is_dma);\n+\n+#endif /* _MAIN_H_ */\ndiff --git a/app/test-dma-perf/meson.build b/app/test-dma-perf/meson.build\nnew file mode 100644\nindex 0000000000..bd6c264002\n--- /dev/null\n+++ b/app/test-dma-perf/meson.build\n@@ -0,0 +1,17 @@\n+# SPDX-License-Identifier: BSD-3-Clause\n+# Copyright(c) 2019-2023 Intel Corporation\n+\n+# meson file, for building this app as part of a main DPDK build.\n+\n+if is_windows\n+    build = false\n+    reason = 'not supported on Windows'\n+    subdir_done()\n+endif\n+\n+deps += ['dmadev', 'mbuf', 'cfgfile']\n+\n+sources = files(\n+        'main.c',\n+        'benchmark.c',\n+)\n",
    "prefixes": [
        "v7"
    ]
}