get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/137663/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 137663,
    "url": "http://patches.dpdk.org/api/patches/137663/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20240301102104.20074-3-paul.szczepanek@arm.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20240301102104.20074-3-paul.szczepanek@arm.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20240301102104.20074-3-paul.szczepanek@arm.com",
    "date": "2024-03-01T10:21:02",
    "name": "[v7,2/4] test: add pointer compress tests to ring perf test",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "de2992fc2b4a3803316e5fa3804d8984a571e9f5",
    "submitter": {
        "id": 3199,
        "url": "http://patches.dpdk.org/api/people/3199/?format=api",
        "name": "Paul Szczepanek",
        "email": "paul.szczepanek@arm.com"
    },
    "delegate": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20240301102104.20074-3-paul.szczepanek@arm.com/mbox/",
    "series": [
        {
            "id": 31328,
            "url": "http://patches.dpdk.org/api/series/31328/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=31328",
            "date": "2024-03-01T10:21:00",
            "name": "add pointer compression API",
            "version": 7,
            "mbox": "http://patches.dpdk.org/series/31328/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/137663/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/137663/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 24DE743BCD;\n\tFri,  1 Mar 2024 11:21:27 +0100 (CET)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 313F54339E;\n\tFri,  1 Mar 2024 11:21:18 +0100 (CET)",
            "from foss.arm.com (foss.arm.com [217.140.110.172])\n by mails.dpdk.org (Postfix) with ESMTP id 1A07C43390\n for <dev@dpdk.org>; Fri,  1 Mar 2024 11:21:15 +0100 (CET)",
            "from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])\n by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id B753F13D5;\n Fri,  1 Mar 2024 02:21:52 -0800 (PST)",
            "from ampere-altra-2-1.usa.Arm.com (ampere-altra-2-1.usa.arm.com\n [10.118.91.158])\n by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 67CBE3F762;\n Fri,  1 Mar 2024 02:21:14 -0800 (PST)"
        ],
        "From": "Paul Szczepanek <paul.szczepanek@arm.com>",
        "To": "dev@dpdk.org",
        "Cc": "Paul Szczepanek <paul.szczepanek@arm.com>,\n Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>",
        "Subject": "[PATCH v7 2/4] test: add pointer compress tests to ring perf test",
        "Date": "Fri,  1 Mar 2024 10:21:02 +0000",
        "Message-Id": "<20240301102104.20074-3-paul.szczepanek@arm.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20240301102104.20074-1-paul.szczepanek@arm.com>",
        "References": "<20230927150854.3670391-2-paul.szczepanek@arm.com>\n <20240301102104.20074-1-paul.szczepanek@arm.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Add a test that runs a zero copy burst enqueue and dequeue on a ring\nof raw pointers and compressed pointers at different burst sizes to\nshowcase performance benefits of newly added pointer compression APIs.\n\nRefactored threading code to pass more parameters to threads to\nreuse existing code. Added more bulk sizes to showcase their effects\non compression. Adjusted loop iteration numbers to take into account\nbulk sizes to keep runtime constant (instead of number of operations).\n\nAdjusted old printfs to match new ones which have aligned numbers.\n\nSigned-off-by: Paul Szczepanek <paul.szczepanek@arm.com>\nReviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>\n---\n app/test/test_ring.h      |  94 +++++++++-\n app/test/test_ring_perf.c | 354 +++++++++++++++++++++++++-------------\n 2 files changed, 324 insertions(+), 124 deletions(-)\n\n--\n2.25.1",
    "diff": "diff --git a/app/test/test_ring.h b/app/test/test_ring.h\nindex 45c263f3ff..3b00f2465d 100644\n--- a/app/test/test_ring.h\n+++ b/app/test/test_ring.h\n@@ -1,10 +1,12 @@\n /* SPDX-License-Identifier: BSD-3-Clause\n- * Copyright(c) 2019 Arm Limited\n+ * Copyright(c) 2019-2023 Arm Limited\n  */\n\n #include <rte_malloc.h>\n #include <rte_ring.h>\n #include <rte_ring_elem.h>\n+#include <rte_memcpy.h>\n+#include <rte_ptr_compress.h>\n\n /* API type to call\n  * rte_ring_<sp/mp or sc/mc>_enqueue_<bulk/burst>\n@@ -25,6 +27,10 @@\n #define TEST_RING_ELEM_BULK 16\n #define TEST_RING_ELEM_BURST 32\n\n+#define TEST_RING_ELEM_BURST_ZC 64\n+#define TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16 128\n+#define TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32 256\n+\n #define TEST_RING_IGNORE_API_TYPE ~0U\n\n /* This function is placed here as it is required for both\n@@ -101,6 +107,9 @@ static inline unsigned int\n test_ring_enqueue(struct rte_ring *r, void **obj, int esize, unsigned int n,\n \t\t\tunsigned int api_type)\n {\n+\tunsigned int ret;\n+\tstruct rte_ring_zc_data zcd = {0};\n+\n \t/* Legacy queue APIs? */\n \tif (esize == -1)\n \t\tswitch (api_type) {\n@@ -152,6 +161,46 @@ test_ring_enqueue(struct rte_ring *r, void **obj, int esize, unsigned int n,\n \t\tcase (TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BURST):\n \t\t\treturn rte_ring_mp_enqueue_burst_elem(r, obj, esize, n,\n \t\t\t\t\t\t\t\tNULL);\n+\t\tcase (TEST_RING_ELEM_BURST_ZC):\n+\t\t\tret = rte_ring_enqueue_zc_burst_elem_start(\n+\t\t\t\t\tr, esize, n, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_memcpy(zcd.ptr1, (char *)obj, zcd.n1 * esize);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_memcpy(zcd.ptr2,\n+\t\t\t\t\t\t(char *)obj + zcd.n1 * esize,\n+\t\t\t\t\t\t(ret - zcd.n1) * esize);\n+\t\t\trte_ring_enqueue_zc_finish(r, ret);\n+\t\t\treturn ret;\n+\t\tcase (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16):\n+\t\t\t/* rings cannot store uint16_t so we use a uint32_t\n+\t\t\t * and half the requested number of elements\n+\t\t\t * and compensate by doubling the returned numbers\n+\t\t\t */\n+\t\t\tret = rte_ring_enqueue_zc_burst_elem_start(\n+\t\t\t\t\tr, sizeof(uint32_t), n / 2, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_ptr_compress_16(0, obj, zcd.ptr1, zcd.n1 * 2, 3);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_ptr_compress_16(0,\n+\t\t\t\t\t\tobj + (zcd.n1 * 2),\n+\t\t\t\t\t\tzcd.ptr2,\n+\t\t\t\t\t\t(ret - zcd.n1) * 2, 3);\n+\t\t\trte_ring_enqueue_zc_finish(r, ret);\n+\t\t\treturn ret * 2;\n+\t\tcase (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32):\n+\t\t\tret = rte_ring_enqueue_zc_burst_elem_start(\n+\t\t\t\t\tr, sizeof(uint32_t), n, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_ptr_compress_32(0, obj, zcd.ptr1, zcd.n1, 3);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_ptr_compress_32(0, obj + zcd.n1,\n+\t\t\t\t\t\tzcd.ptr2, ret - zcd.n1, 3);\n+\t\t\trte_ring_enqueue_zc_finish(r, ret);\n+\t\t\treturn ret;\n \t\tdefault:\n \t\t\tprintf(\"Invalid API type\\n\");\n \t\t\treturn 0;\n@@ -162,6 +211,9 @@ static inline unsigned int\n test_ring_dequeue(struct rte_ring *r, void **obj, int esize, unsigned int n,\n \t\t\tunsigned int api_type)\n {\n+\tunsigned int ret;\n+\tstruct rte_ring_zc_data zcd = {0};\n+\n \t/* Legacy queue APIs? */\n \tif (esize == -1)\n \t\tswitch (api_type) {\n@@ -213,6 +265,46 @@ test_ring_dequeue(struct rte_ring *r, void **obj, int esize, unsigned int n,\n \t\tcase (TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BURST):\n \t\t\treturn rte_ring_mc_dequeue_burst_elem(r, obj, esize,\n \t\t\t\t\t\t\t\tn, NULL);\n+\t\tcase (TEST_RING_ELEM_BURST_ZC):\n+\t\t\tret = rte_ring_dequeue_zc_burst_elem_start(\n+\t\t\t\t\tr, esize, n, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_memcpy((char *)obj, zcd.ptr1, zcd.n1 * esize);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_memcpy((char *)obj + zcd.n1 * esize,\n+\t\t\t\t\t\tzcd.ptr2,\n+\t\t\t\t\t\t(ret - zcd.n1) * esize);\n+\t\t\trte_ring_dequeue_zc_finish(r, ret);\n+\t\t\treturn ret;\n+\t\tcase (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16):\n+\t\t\t/* rings cannot store uint16_t so we use a uint32_t\n+\t\t\t * and half the requested number of elements\n+\t\t\t * and compensate by doubling the returned numbers\n+\t\t\t */\n+\t\t\tret = rte_ring_dequeue_zc_burst_elem_start(\n+\t\t\t\t\tr, sizeof(uint32_t), n / 2, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_ptr_decompress_16(0, zcd.ptr1, obj, zcd.n1 * 2, 3);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_ptr_decompress_16(0, zcd.ptr2,\n+\t\t\t\t\t\tobj + zcd.n1,\n+\t\t\t\t\t\t(ret - zcd.n1) * 2,\n+\t\t\t\t\t\t3);\n+\t\t\trte_ring_dequeue_zc_finish(r, ret);\n+\t\t\treturn ret * 2;\n+\t\tcase (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32):\n+\t\t\tret = rte_ring_dequeue_zc_burst_elem_start(\n+\t\t\t\t\tr, sizeof(uint32_t), n, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_ptr_decompress_32(0, zcd.ptr1, obj, zcd.n1, 3);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_ptr_decompress_32(0, zcd.ptr2,\n+\t\t\t\t\t\tobj + zcd.n1, ret - zcd.n1, 3);\n+\t\t\trte_ring_dequeue_zc_finish(r, ret);\n+\t\t\treturn ret;\n \t\tdefault:\n \t\t\tprintf(\"Invalid API type\\n\");\n \t\t\treturn 0;\ndiff --git a/app/test/test_ring_perf.c b/app/test/test_ring_perf.c\nindex d7c5a4c30b..8392d0c781 100644\n--- a/app/test/test_ring_perf.c\n+++ b/app/test/test_ring_perf.c\n@@ -1,6 +1,6 @@\n /* SPDX-License-Identifier: BSD-3-Clause\n  * Copyright(c) 2010-2014 Intel Corporation\n- * Copyright(c) 2019 Arm Limited\n+ * Copyright(c) 2019-2023 Arm Limited\n  */\n\n\n@@ -22,13 +22,13 @@\n\n #define RING_NAME \"RING_PERF\"\n #define RING_SIZE 4096\n-#define MAX_BURST 32\n+#define MAX_BURST 256\n\n /*\n  * the sizes to enqueue and dequeue in testing\n  * (marked volatile so they won't be seen as compile-time constants)\n  */\n-static const volatile unsigned bulk_sizes[] = { 8, 32 };\n+static const volatile unsigned int bulk_sizes[] = { 8, 32, 64, 128, 256 };\n\n struct lcore_pair {\n \tunsigned c1, c2;\n@@ -43,26 +43,30 @@ test_ring_print_test_string(unsigned int api_type, int esize,\n \tif (esize == -1)\n \t\tprintf(\"legacy APIs\");\n \telse\n-\t\tprintf(\"elem APIs: element size %dB\", esize);\n+\t\tprintf(\"elem APIs (size:%2dB)\", esize);\n\n \tif (api_type == TEST_RING_IGNORE_API_TYPE)\n \t\treturn;\n\n \tif ((api_type & TEST_RING_THREAD_DEF) == TEST_RING_THREAD_DEF)\n-\t\tprintf(\": default enqueue/dequeue: \");\n+\t\tprintf(\" - default enqueue/dequeue\");\n \telse if ((api_type & TEST_RING_THREAD_SPSC) == TEST_RING_THREAD_SPSC)\n-\t\tprintf(\": SP/SC: \");\n+\t\tprintf(\" - SP/SC\");\n \telse if ((api_type & TEST_RING_THREAD_MPMC) == TEST_RING_THREAD_MPMC)\n-\t\tprintf(\": MP/MC: \");\n+\t\tprintf(\" - MP/MC\");\n\n \tif ((api_type & TEST_RING_ELEM_SINGLE) == TEST_RING_ELEM_SINGLE)\n-\t\tprintf(\"single: \");\n+\t\tprintf(\" - single - \");\n \telse if ((api_type & TEST_RING_ELEM_BULK) == TEST_RING_ELEM_BULK)\n-\t\tprintf(\"bulk (size: %u): \", bsz);\n+\t\tprintf(\" - bulk (n:%-3u) - \", bsz);\n \telse if ((api_type & TEST_RING_ELEM_BURST) == TEST_RING_ELEM_BURST)\n-\t\tprintf(\"burst (size: %u): \", bsz);\n+\t\tprintf(\" - burst (n:%-3u) - \", bsz);\n+\telse if ((api_type & (TEST_RING_ELEM_BURST_ZC |\n+\t\t\tTEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16 |\n+\t\t\tTEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32)) != 0)\n+\t\tprintf(\" - burst zero copy (n:%-3u) - \", bsz);\n\n-\tprintf(\"%.2F\\n\", value);\n+\tprintf(\"cycles per elem: %.3F\\n\", value);\n }\n\n /**** Functions to analyse our core mask to get cores for different tests ***/\n@@ -144,28 +148,33 @@ static void\n test_empty_dequeue(struct rte_ring *r, const int esize,\n \t\t\tconst unsigned int api_type)\n {\n-\tconst unsigned int iter_shift = 26;\n+\tconst unsigned int iter_shift = 29;\n \tconst unsigned int iterations = 1 << iter_shift;\n \tunsigned int i = 0;\n \tvoid *burst[MAX_BURST];\n\n+\tconst unsigned int bulk_iterations = iterations / bulk_sizes[0];\n \tconst uint64_t start = rte_rdtsc();\n-\tfor (i = 0; i < iterations; i++)\n+\tfor (i = 0; i < bulk_iterations; i++)\n \t\ttest_ring_dequeue(r, burst, esize, bulk_sizes[0], api_type);\n \tconst uint64_t end = rte_rdtsc();\n\n \ttest_ring_print_test_string(api_type, esize, bulk_sizes[0],\n-\t\t\t\t\t((double)(end - start)) / iterations);\n+\t\t\t\t\t((double)end - start) / iterations);\n }\n\n-/*\n- * for the separate enqueue and dequeue threads they take in one param\n- * and return two. Input = burst size, output = cycle average for sp/sc & mp/mc\n- */\n-struct thread_params {\n+/* describes the ring used by the enqueue and dequeue thread */\n+struct ring_params {\n \tstruct rte_ring *r;\n-\tunsigned size;        /* input value, the burst size */\n-\tdouble spsc, mpmc;    /* output value, the single or multi timings */\n+\tunsigned int elem_size;\n+\tunsigned int bulk_sizes_i; /* index into bulk_size array */\n+\tunsigned int ring_flags; /* flags for test_ring_enqueue/dequeue */\n+};\n+\n+/* Used do specify enqueue and dequeue ring operations and their results */\n+struct thread_params {\n+\tstruct ring_params *ring_params;\n+\tdouble *results; /* result array size must be equal to bulk_sizes */\n };\n\n /*\n@@ -174,16 +183,15 @@ struct thread_params {\n  * flag == 1 -> dequeue\n  */\n static __rte_always_inline int\n-enqueue_dequeue_bulk_helper(const unsigned int flag, const int esize,\n-\tstruct thread_params *p)\n+enqueue_dequeue_bulk_helper(const unsigned int flag, struct thread_params *p)\n {\n \tint ret;\n-\tconst unsigned int iter_shift = 15;\n+\tconst unsigned int iter_shift = 22;\n \tconst unsigned int iterations = 1 << iter_shift;\n-\tstruct rte_ring *r = p->r;\n-\tunsigned int bsize = p->size;\n \tunsigned int i;\n \tvoid *burst = NULL;\n+\tunsigned int n_remaining;\n+\tconst unsigned int bulk_n = bulk_sizes[p->ring_params->bulk_sizes_i];\n\n #ifdef RTE_USE_C11_MEM_MODEL\n \tif (__atomic_fetch_add(&lcore_count, 1, __ATOMIC_RELAXED) + 1 != 2)\n@@ -193,44 +201,38 @@ enqueue_dequeue_bulk_helper(const unsigned int flag, const int esize,\n \t\twhile(lcore_count != 2)\n \t\t\trte_pause();\n\n-\tburst = test_ring_calloc(MAX_BURST, esize);\n+\tburst = test_ring_calloc(MAX_BURST, p->ring_params->elem_size);\n \tif (burst == NULL)\n \t\treturn -1;\n\n \tconst uint64_t sp_start = rte_rdtsc();\n-\tfor (i = 0; i < iterations; i++)\n+\tconst unsigned int bulk_iterations = iterations / bulk_n;\n+\tfor (i = 0; i < bulk_iterations; i++) {\n+\t\tn_remaining = bulk_n;\n \t\tdo {\n \t\t\tif (flag == 0)\n-\t\t\t\tret = test_ring_enqueue(r, burst, esize, bsize,\n-\t\t\t\t\t\tTEST_RING_THREAD_SPSC |\n-\t\t\t\t\t\tTEST_RING_ELEM_BULK);\n+\t\t\t\tret = test_ring_enqueue(p->ring_params->r,\n+\t\t\t\t\t\tburst,\n+\t\t\t\t\t\tp->ring_params->elem_size,\n+\t\t\t\t\t\tn_remaining,\n+\t\t\t\t\t\tp->ring_params->ring_flags);\n \t\t\telse if (flag == 1)\n-\t\t\t\tret = test_ring_dequeue(r, burst, esize, bsize,\n-\t\t\t\t\t\tTEST_RING_THREAD_SPSC |\n-\t\t\t\t\t\tTEST_RING_ELEM_BULK);\n+\t\t\t\tret = test_ring_dequeue(p->ring_params->r,\n+\t\t\t\t\t\tburst,\n+\t\t\t\t\t\tp->ring_params->elem_size,\n+\t\t\t\t\t\tn_remaining,\n+\t\t\t\t\t\tp->ring_params->ring_flags);\n \t\t\tif (ret == 0)\n \t\t\t\trte_pause();\n-\t\t} while (!ret);\n+\t\t\telse\n+\t\t\t\tn_remaining -= ret;\n+\t\t} while (n_remaining > 0);\n+\t}\n \tconst uint64_t sp_end = rte_rdtsc();\n\n-\tconst uint64_t mp_start = rte_rdtsc();\n-\tfor (i = 0; i < iterations; i++)\n-\t\tdo {\n-\t\t\tif (flag == 0)\n-\t\t\t\tret = test_ring_enqueue(r, burst, esize, bsize,\n-\t\t\t\t\t\tTEST_RING_THREAD_MPMC |\n-\t\t\t\t\t\tTEST_RING_ELEM_BULK);\n-\t\t\telse if (flag == 1)\n-\t\t\t\tret = test_ring_dequeue(r, burst, esize, bsize,\n-\t\t\t\t\t\tTEST_RING_THREAD_MPMC |\n-\t\t\t\t\t\tTEST_RING_ELEM_BULK);\n-\t\t\tif (ret == 0)\n-\t\t\t\trte_pause();\n-\t\t} while (!ret);\n-\tconst uint64_t mp_end = rte_rdtsc();\n+\tp->results[p->ring_params->bulk_sizes_i] =\n+\t\t\t((double)sp_end - sp_start) / iterations;\n\n-\tp->spsc = ((double)(sp_end - sp_start))/(iterations * bsize);\n-\tp->mpmc = ((double)(mp_end - mp_start))/(iterations * bsize);\n \treturn 0;\n }\n\n@@ -243,15 +245,7 @@ enqueue_bulk(void *p)\n {\n \tstruct thread_params *params = p;\n\n-\treturn enqueue_dequeue_bulk_helper(0, -1, params);\n-}\n-\n-static int\n-enqueue_bulk_16B(void *p)\n-{\n-\tstruct thread_params *params = p;\n-\n-\treturn enqueue_dequeue_bulk_helper(0, 16, params);\n+\treturn enqueue_dequeue_bulk_helper(0, params);\n }\n\n /*\n@@ -263,15 +257,7 @@ dequeue_bulk(void *p)\n {\n \tstruct thread_params *params = p;\n\n-\treturn enqueue_dequeue_bulk_helper(1, -1, params);\n-}\n-\n-static int\n-dequeue_bulk_16B(void *p)\n-{\n-\tstruct thread_params *params = p;\n-\n-\treturn enqueue_dequeue_bulk_helper(1, 16, params);\n+\treturn enqueue_dequeue_bulk_helper(1, params);\n }\n\n /*\n@@ -279,42 +265,32 @@ dequeue_bulk_16B(void *p)\n  * used to measure ring perf between hyperthreads, cores and sockets.\n  */\n static int\n-run_on_core_pair(struct lcore_pair *cores, struct rte_ring *r, const int esize)\n+run_on_core_pair(struct lcore_pair *cores,\n+\t\tstruct thread_params *param1, struct thread_params *param2)\n {\n-\tlcore_function_t *f1, *f2;\n-\tstruct thread_params param1 = {0}, param2 = {0};\n \tunsigned i;\n-\n-\tif (esize == -1) {\n-\t\tf1 = enqueue_bulk;\n-\t\tf2 = dequeue_bulk;\n-\t} else {\n-\t\tf1 = enqueue_bulk_16B;\n-\t\tf2 = dequeue_bulk_16B;\n-\t}\n+\tstruct ring_params *ring_params = param1->ring_params;\n\n \tfor (i = 0; i < RTE_DIM(bulk_sizes); i++) {\n \t\tlcore_count = 0;\n-\t\tparam1.size = param2.size = bulk_sizes[i];\n-\t\tparam1.r = param2.r = r;\n+\t\tring_params->bulk_sizes_i = i;\n \t\tif (cores->c1 == rte_get_main_lcore()) {\n-\t\t\trte_eal_remote_launch(f2, &param2, cores->c2);\n-\t\t\tf1(&param1);\n+\t\t\trte_eal_remote_launch(dequeue_bulk, param2, cores->c2);\n+\t\t\tenqueue_bulk(param1);\n \t\t\trte_eal_wait_lcore(cores->c2);\n \t\t} else {\n-\t\t\trte_eal_remote_launch(f1, &param1, cores->c1);\n-\t\t\trte_eal_remote_launch(f2, &param2, cores->c2);\n+\t\t\trte_eal_remote_launch(enqueue_bulk, param1, cores->c1);\n+\t\t\trte_eal_remote_launch(dequeue_bulk, param2, cores->c2);\n \t\t\tif (rte_eal_wait_lcore(cores->c1) < 0)\n \t\t\t\treturn -1;\n \t\t\tif (rte_eal_wait_lcore(cores->c2) < 0)\n \t\t\t\treturn -1;\n \t\t}\n \t\ttest_ring_print_test_string(\n-\t\t\tTEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK,\n-\t\t\tesize, bulk_sizes[i], param1.spsc + param2.spsc);\n-\t\ttest_ring_print_test_string(\n-\t\t\tTEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK,\n-\t\t\tesize, bulk_sizes[i], param1.mpmc + param2.mpmc);\n+\t\t\t\tring_params->ring_flags,\n+\t\t\t\tring_params->elem_size,\n+\t\t\t\tbulk_sizes[i],\n+\t\t\t\tparam1->results[i] + param2->results[i]);\n \t}\n\n \treturn 0;\n@@ -333,7 +309,7 @@ load_loop_fn_helper(struct thread_params *p, const int esize)\n \tuint64_t hz = rte_get_timer_hz();\n \tuint64_t lcount = 0;\n \tconst unsigned int lcore = rte_lcore_id();\n-\tstruct thread_params *params = p;\n+\tstruct ring_params *ring_params = p->ring_params;\n \tvoid *burst = NULL;\n\n \tburst = test_ring_calloc(MAX_BURST, esize);\n@@ -346,9 +322,11 @@ load_loop_fn_helper(struct thread_params *p, const int esize)\n\n \tbegin = rte_get_timer_cycles();\n \twhile (time_diff < hz * TIME_MS / 1000) {\n-\t\ttest_ring_enqueue(params->r, burst, esize, params->size,\n+\t\ttest_ring_enqueue(ring_params->r, burst, esize,\n+\t\t\t\tring_params->elem_size,\n \t\t\t\tTEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);\n-\t\ttest_ring_dequeue(params->r, burst, esize, params->size,\n+\t\ttest_ring_dequeue(ring_params->r, burst, esize,\n+\t\t\t\tring_params->elem_size,\n \t\t\t\tTEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);\n \t\tlcount++;\n \t\ttime_diff = rte_get_timer_cycles() - begin;\n@@ -380,7 +358,8 @@ static int\n run_on_all_cores(struct rte_ring *r, const int esize)\n {\n \tuint64_t total;\n-\tstruct thread_params param;\n+\tstruct ring_params ring_params = {0};\n+\tstruct thread_params params = { .ring_params = &ring_params };\n \tlcore_function_t *lcore_f;\n \tunsigned int i, c;\n\n@@ -389,21 +368,20 @@ run_on_all_cores(struct rte_ring *r, const int esize)\n \telse\n \t\tlcore_f = load_loop_fn_16B;\n\n-\tmemset(&param, 0, sizeof(struct thread_params));\n \tfor (i = 0; i < RTE_DIM(bulk_sizes); i++) {\n \t\ttotal = 0;\n \t\tprintf(\"\\nBulk enq/dequeue count on size %u\\n\", bulk_sizes[i]);\n-\t\tparam.size = bulk_sizes[i];\n-\t\tparam.r = r;\n+\t\tparams.ring_params->bulk_sizes_i = i;\n+\t\tparams.ring_params->r = r;\n\n \t\t/* clear synchro and start workers */\n \t\t__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);\n-\t\tif (rte_eal_mp_remote_launch(lcore_f, &param, SKIP_MAIN) < 0)\n+\t\tif (rte_eal_mp_remote_launch(lcore_f, &params, SKIP_MAIN) < 0)\n \t\t\treturn -1;\n\n \t\t/* start synchro and launch test on main */\n \t\t__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);\n-\t\tlcore_f(&param);\n+\t\tlcore_f(&params);\n\n \t\trte_eal_mp_wait_lcore();\n\n@@ -462,9 +440,9 @@ static int\n test_burst_bulk_enqueue_dequeue(struct rte_ring *r, const int esize,\n \tconst unsigned int api_type)\n {\n-\tconst unsigned int iter_shift = 23;\n+\tconst unsigned int iter_shift = 26;\n \tconst unsigned int iterations = 1 << iter_shift;\n-\tunsigned int sz, i = 0;\n+\tunsigned int sz, i;\n \tvoid **burst = NULL;\n\n \tburst = test_ring_calloc(MAX_BURST, esize);\n@@ -472,17 +450,18 @@ test_burst_bulk_enqueue_dequeue(struct rte_ring *r, const int esize,\n \t\treturn -1;\n\n \tfor (sz = 0; sz < RTE_DIM(bulk_sizes); sz++) {\n+\t\tconst unsigned int n = iterations / bulk_sizes[sz];\n \t\tconst uint64_t start = rte_rdtsc();\n-\t\tfor (i = 0; i < iterations; i++) {\n+\t\tfor (i = 0; i < n; i++) {\n \t\t\ttest_ring_enqueue(r, burst, esize, bulk_sizes[sz],\n-\t\t\t\t\t\tapi_type);\n+\t\t\t\t\tapi_type);\n \t\t\ttest_ring_dequeue(r, burst, esize, bulk_sizes[sz],\n-\t\t\t\t\t\tapi_type);\n+\t\t\t\t\tapi_type);\n \t\t}\n \t\tconst uint64_t end = rte_rdtsc();\n\n \t\ttest_ring_print_test_string(api_type, esize, bulk_sizes[sz],\n-\t\t\t\t\t((double)(end - start)) / iterations);\n+\t\t\t\t\t((double)end - start) / iterations);\n \t}\n\n \trte_free(burst);\n@@ -490,12 +469,43 @@ test_burst_bulk_enqueue_dequeue(struct rte_ring *r, const int esize,\n \treturn 0;\n }\n\n+static __rte_always_inline int\n+test_ring_perf_esize_run_on_two_cores(\n+\t\tstruct thread_params *param1, struct thread_params *param2)\n+{\n+\tstruct lcore_pair cores;\n+\n+\tif (get_two_hyperthreads(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing using two hyperthreads ###\\n\");\n+\t\tif (run_on_core_pair(&cores, param1, param2) < 0)\n+\t\t\treturn -1;\n+\t}\n+\tif (get_two_cores(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing using two physical cores ###\\n\");\n+\t\tif (run_on_core_pair(&cores, param1, param2) < 0)\n+\t\t\treturn -1;\n+\t}\n+\tif (get_two_sockets(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing using two NUMA nodes ###\\n\");\n+\t\tif (run_on_core_pair(&cores, param1, param2) < 0)\n+\t\t\treturn -1;\n+\t}\n+\treturn 0;\n+}\n+\n /* Run all tests for a given element size */\n static __rte_always_inline int\n test_ring_perf_esize(const int esize)\n {\n-\tstruct lcore_pair cores;\n \tstruct rte_ring *r = NULL;\n+\tdouble results_enq[RTE_DIM(bulk_sizes)];\n+\tdouble results_deq[RTE_DIM(bulk_sizes)];\n+\tstruct ring_params ring_params = {\n+\t\t\t.elem_size = esize, .ring_flags = TEST_RING_ELEM_BULK };\n+\tstruct thread_params param1 = {\n+\t\t\t.ring_params = &ring_params, .results = results_enq };\n+\tstruct thread_params param2 = {\n+\t\t\t.ring_params = &ring_params, .results = results_deq };\n\n \t/*\n \t * Performance test for legacy/_elem APIs\n@@ -535,22 +545,13 @@ test_ring_perf_esize(const int esize)\n \ttest_empty_dequeue(r, esize,\n \t\t\tTEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);\n\n-\tif (get_two_hyperthreads(&cores) == 0) {\n-\t\tprintf(\"\\n### Testing using two hyperthreads ###\\n\");\n-\t\tif (run_on_core_pair(&cores, r, esize) < 0)\n-\t\t\tgoto test_fail;\n-\t}\n+\tring_params.r = r;\n\n-\tif (get_two_cores(&cores) == 0) {\n-\t\tprintf(\"\\n### Testing using two physical cores ###\\n\");\n-\t\tif (run_on_core_pair(&cores, r, esize) < 0)\n-\t\t\tgoto test_fail;\n-\t}\n-\tif (get_two_sockets(&cores) == 0) {\n-\t\tprintf(\"\\n### Testing using two NUMA nodes ###\\n\");\n-\t\tif (run_on_core_pair(&cores, r, esize) < 0)\n-\t\t\tgoto test_fail;\n-\t}\n+\tring_params.ring_flags = TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK;\n+\ttest_ring_perf_esize_run_on_two_cores(&param1, &param2);\n+\n+\tring_params.ring_flags = TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK;\n+\ttest_ring_perf_esize_run_on_two_cores(&param1, &param2);\n\n \tprintf(\"\\n### Testing using all worker nodes ###\\n\");\n \tif (run_on_all_cores(r, esize) < 0)\n@@ -566,6 +567,109 @@ test_ring_perf_esize(const int esize)\n \treturn -1;\n }\n\n+\n+static __rte_always_inline int\n+test_ring_perf_compression(void)\n+{\n+\tdouble results1[RTE_DIM(bulk_sizes)];\n+\tdouble results2[RTE_DIM(bulk_sizes)];\n+\tdouble results1_comp[2][RTE_DIM(bulk_sizes)];\n+\tdouble results2_comp[2][RTE_DIM(bulk_sizes)];\n+\n+\tstruct lcore_pair cores;\n+\tint ret = -1;\n+\tunsigned int i, j;\n+\tstruct ring_params ring_params = { .elem_size = sizeof(void *) };\n+\tstruct thread_params param1 = {\n+\t\t\t.ring_params = &ring_params, .results = results1 };\n+\tstruct thread_params param2 = {\n+\t\t\t.ring_params = &ring_params, .results = results2 };\n+\n+\tprintf(\"\\n### Testing compression gain ###\");\n+\n+\tring_params.r = rte_ring_create_elem(\n+\t\t\tRING_NAME, sizeof(void *),\n+\t\t\tRING_SIZE, rte_socket_id(),\n+\t\t\tRING_F_SP_ENQ | RING_F_SC_DEQ);\n+\n+\tif (ring_params.r == NULL)\n+\t\treturn -1;\n+\n+\tif (get_two_cores(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing zero copy ###\\n\");\n+\t\tring_params.ring_flags = TEST_RING_ELEM_BURST_ZC;\n+\t\tret = run_on_core_pair(&cores, &param1, &param2);\n+\t}\n+\n+\trte_ring_free(ring_params.r);\n+\n+\tif (ret != 0)\n+\t\treturn ret;\n+\n+\t/* rings allow only multiples of 4 as sizes,\n+\t * we allocate size 4 despite only using 2 bytes\n+\t * and use half of RING_SIZE as the number of elements\n+\t */\n+\tring_params.r = rte_ring_create_elem(\n+\t\t\tRING_NAME, sizeof(uint32_t),\n+\t\t\tRING_SIZE / 2, rte_socket_id(),\n+\t\t\tRING_F_SP_ENQ | RING_F_SC_DEQ);\n+\n+\tif (ring_params.r == NULL)\n+\t\treturn -1;\n+\n+\tparam1.results = results1_comp[0];\n+\tparam2.results = results2_comp[0];\n+\n+\tif (get_two_cores(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing zero copy with compression (16b) ###\\n\");\n+\t\tring_params.ring_flags =\n+\t\t\t\tTEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16;\n+\t\tret = run_on_core_pair(&cores, &param1, &param2);\n+\t}\n+\n+\trte_ring_free(ring_params.r);\n+\n+\tif (ret != 0)\n+\t\treturn ret;\n+\n+\tring_params.r = rte_ring_create_elem(\n+\t\t\tRING_NAME, sizeof(uint32_t),\n+\t\t\tRING_SIZE, rte_socket_id(),\n+\t\t\tRING_F_SP_ENQ | RING_F_SC_DEQ);\n+\n+\tif (ring_params.r == NULL)\n+\t\treturn -1;\n+\n+\tparam1.results = results1_comp[1];\n+\tparam2.results = results2_comp[1];\n+\n+\tif (get_two_cores(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing zero copy with compression (32b) ###\\n\");\n+\t\tring_params.ring_flags =\n+\t\t\t\tTEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32;\n+\t\tret = run_on_core_pair(&cores, &param1, &param2);\n+\t}\n+\n+\trte_ring_free(ring_params.r);\n+\n+\tfor (j = 0; j < 2; j++) {\n+\t\tprintf(\"\\n### Potential gain from compression (%d-bit offsets) \"\n+\t\t\"###\\n\", (j + 1) * 16);\n+\t\tfor (i = 0; i < RTE_DIM(bulk_sizes); i++) {\n+\t\t\tconst double result = results1[i] + results2[i];\n+\t\t\tconst double result_comp = results1_comp[j][i] +\n+\t\t\t\tresults2_comp[j][i];\n+\t\t\tconst double gain = 100 - (result_comp / result) * 100;\n+\n+\t\t\tprintf(\"Gain of %5.1F%% for burst of %-3u elems\\n\",\n+\t\t\t\t\tgain, bulk_sizes[i]);\n+\t\t}\n+\t}\n+\n+\treturn ret;\n+}\n+\n static int\n test_ring_perf(void)\n {\n@@ -576,6 +680,10 @@ test_ring_perf(void)\n \tif (test_ring_perf_esize(16) == -1)\n \t\treturn -1;\n\n+\t/* Test for performance gain of compression */\n+\tif (test_ring_perf_compression() == -1)\n+\t\treturn -1;\n+\n \treturn 0;\n }\n\n",
    "prefixes": [
        "v7",
        "2/4"
    ]
}