get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/138170/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 138170,
    "url": "http://patches.dpdk.org/api/patches/138170/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20240311203129.335720-4-paul.szczepanek@arm.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20240311203129.335720-4-paul.szczepanek@arm.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20240311203129.335720-4-paul.szczepanek@arm.com",
    "date": "2024-03-11T20:31:27",
    "name": "[v10,3/5] test: add pointer compress tests to ring perf test",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "68986e056256385912ee4550fe64db064009a0de",
    "submitter": {
        "id": 3199,
        "url": "http://patches.dpdk.org/api/people/3199/?format=api",
        "name": "Paul Szczepanek",
        "email": "paul.szczepanek@arm.com"
    },
    "delegate": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20240311203129.335720-4-paul.szczepanek@arm.com/mbox/",
    "series": [
        {
            "id": 31466,
            "url": "http://patches.dpdk.org/api/series/31466/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=31466",
            "date": "2024-03-11T20:31:24",
            "name": "add pointer compression API",
            "version": 10,
            "mbox": "http://patches.dpdk.org/series/31466/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/138170/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/138170/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 1F12E43C8A;\n\tMon, 11 Mar 2024 21:31:57 +0100 (CET)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 9927F40E6E;\n\tMon, 11 Mar 2024 21:31:42 +0100 (CET)",
            "from foss.arm.com (foss.arm.com [217.140.110.172])\n by mails.dpdk.org (Postfix) with ESMTP id 12E554027C\n for <dev@dpdk.org>; Mon, 11 Mar 2024 21:31:38 +0100 (CET)",
            "from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])\n by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 80906152B;\n Mon, 11 Mar 2024 13:32:14 -0700 (PDT)",
            "from ampere-altra-2-1.usa.Arm.com (ampere-altra-2-1.usa.arm.com\n [10.118.91.158])\n by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 648AC3F73F;\n Mon, 11 Mar 2024 13:31:37 -0700 (PDT)"
        ],
        "From": "Paul Szczepanek <paul.szczepanek@arm.com>",
        "To": "dev@dpdk.org",
        "Cc": "bruce.richardson@intel.com, Paul Szczepanek <paul.szczepanek@arm.com>,\n Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>,\n Nathan Brown <nathan.brown@arm.com>",
        "Subject": "[PATCH v10 3/5] test: add pointer compress tests to ring perf test",
        "Date": "Mon, 11 Mar 2024 20:31:27 +0000",
        "Message-Id": "<20240311203129.335720-4-paul.szczepanek@arm.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20240311203129.335720-1-paul.szczepanek@arm.com>",
        "References": "<20230927150854.3670391-2-paul.szczepanek@arm.com>\n <20240311203129.335720-1-paul.szczepanek@arm.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Add a test that runs a zero copy burst enqueue and dequeue on a ring\nof raw pointers and compressed pointers at different burst sizes to\nshowcase performance benefits of newly added pointer compression APIs.\n\nRefactored threading code to pass more parameters to threads to\nreuse existing code. Added more bulk sizes to showcase their effects\non compression. Adjusted loop iteration numbers to take into account\nbulk sizes to keep runtime constant (instead of number of operations).\n\nAdjusted old printfs to match new ones which have aligned numbers.\n\nSigned-off-by: Paul Szczepanek <paul.szczepanek@arm.com>\nReviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>\nReviewed-by: Nathan Brown <nathan.brown@arm.com>\n---\n app/test/meson.build      |  20 +--\n app/test/test_ring.h      |  92 ++++++++++\n app/test/test_ring_perf.c | 352 +++++++++++++++++++++++++-------------\n 3 files changed, 332 insertions(+), 132 deletions(-)\n\n--\n2.25.1",
    "diff": "diff --git a/app/test/meson.build b/app/test/meson.build\nindex 7d909039ae..df8cc00730 100644\n--- a/app/test/meson.build\n+++ b/app/test/meson.build\n@@ -156,16 +156,16 @@ source_file_deps = {\n #    'test_resource.c': [],\n     'test_rib.c': ['net', 'rib'],\n     'test_rib6.c': ['net', 'rib'],\n-    'test_ring.c': [],\n-    'test_ring_hts_stress.c': [],\n-    'test_ring_mpmc_stress.c': [],\n-    'test_ring_mt_peek_stress.c': [],\n-    'test_ring_mt_peek_stress_zc.c': [],\n-    'test_ring_perf.c': [],\n-    'test_ring_rts_stress.c': [],\n-    'test_ring_st_peek_stress.c': [],\n-    'test_ring_st_peek_stress_zc.c': [],\n-    'test_ring_stress.c': [],\n+    'test_ring.c': ['ptr_compress'],\n+    'test_ring_hts_stress.c': ['ptr_compress'],\n+    'test_ring_mpmc_stress.c': ['ptr_compress'],\n+    'test_ring_mt_peek_stress.c': ['ptr_compress'],\n+    'test_ring_mt_peek_stress_zc.c': ['ptr_compress'],\n+    'test_ring_perf.c': ['ptr_compress'],\n+    'test_ring_rts_stress.c': ['ptr_compress'],\n+    'test_ring_st_peek_stress.c': ['ptr_compress'],\n+    'test_ring_st_peek_stress_zc.c': ['ptr_compress'],\n+    'test_ring_stress.c': ['ptr_compress'],\n     'test_rwlock.c': [],\n     'test_sched.c': ['net', 'sched'],\n     'test_security.c': ['net', 'security'],\ndiff --git a/app/test/test_ring.h b/app/test/test_ring.h\nindex 45c263f3ff..f90662818c 100644\n--- a/app/test/test_ring.h\n+++ b/app/test/test_ring.h\n@@ -5,6 +5,8 @@\n #include <rte_malloc.h>\n #include <rte_ring.h>\n #include <rte_ring_elem.h>\n+#include <rte_memcpy.h>\n+#include <rte_ptr_compress.h>\n\n /* API type to call\n  * rte_ring_<sp/mp or sc/mc>_enqueue_<bulk/burst>\n@@ -25,6 +27,10 @@\n #define TEST_RING_ELEM_BULK 16\n #define TEST_RING_ELEM_BURST 32\n\n+#define TEST_RING_ELEM_BURST_ZC 64\n+#define TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16 128\n+#define TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32 256\n+\n #define TEST_RING_IGNORE_API_TYPE ~0U\n\n /* This function is placed here as it is required for both\n@@ -101,6 +107,9 @@ static inline unsigned int\n test_ring_enqueue(struct rte_ring *r, void **obj, int esize, unsigned int n,\n \t\t\tunsigned int api_type)\n {\n+\tunsigned int ret;\n+\tstruct rte_ring_zc_data zcd = {0};\n+\n \t/* Legacy queue APIs? */\n \tif (esize == -1)\n \t\tswitch (api_type) {\n@@ -152,6 +161,46 @@ test_ring_enqueue(struct rte_ring *r, void **obj, int esize, unsigned int n,\n \t\tcase (TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BURST):\n \t\t\treturn rte_ring_mp_enqueue_burst_elem(r, obj, esize, n,\n \t\t\t\t\t\t\t\tNULL);\n+\t\tcase (TEST_RING_ELEM_BURST_ZC):\n+\t\t\tret = rte_ring_enqueue_zc_burst_elem_start(\n+\t\t\t\t\tr, esize, n, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_memcpy(zcd.ptr1, (char *)obj, zcd.n1 * esize);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_memcpy(zcd.ptr2,\n+\t\t\t\t\t\t(char *)obj + zcd.n1 * esize,\n+\t\t\t\t\t\t(ret - zcd.n1) * esize);\n+\t\t\trte_ring_enqueue_zc_finish(r, ret);\n+\t\t\treturn ret;\n+\t\tcase (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16):\n+\t\t\t/* rings cannot store uint16_t so we use a uint32_t\n+\t\t\t * and half the requested number of elements\n+\t\t\t * and compensate by doubling the returned numbers\n+\t\t\t */\n+\t\t\tret = rte_ring_enqueue_zc_burst_elem_start(\n+\t\t\t\t\tr, sizeof(uint32_t), n / 2, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_ptr_compress_16(0, obj, zcd.ptr1, zcd.n1 * 2, 3);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_ptr_compress_16(0,\n+\t\t\t\t\t\tobj + (zcd.n1 * 2),\n+\t\t\t\t\t\tzcd.ptr2,\n+\t\t\t\t\t\t(ret - zcd.n1) * 2, 3);\n+\t\t\trte_ring_enqueue_zc_finish(r, ret);\n+\t\t\treturn ret * 2;\n+\t\tcase (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32):\n+\t\t\tret = rte_ring_enqueue_zc_burst_elem_start(\n+\t\t\t\t\tr, sizeof(uint32_t), n, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_ptr_compress_32(0, obj, zcd.ptr1, zcd.n1, 3);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_ptr_compress_32(0, obj + zcd.n1,\n+\t\t\t\t\t\tzcd.ptr2, ret - zcd.n1, 3);\n+\t\t\trte_ring_enqueue_zc_finish(r, ret);\n+\t\t\treturn ret;\n \t\tdefault:\n \t\t\tprintf(\"Invalid API type\\n\");\n \t\t\treturn 0;\n@@ -162,6 +211,9 @@ static inline unsigned int\n test_ring_dequeue(struct rte_ring *r, void **obj, int esize, unsigned int n,\n \t\t\tunsigned int api_type)\n {\n+\tunsigned int ret;\n+\tstruct rte_ring_zc_data zcd = {0};\n+\n \t/* Legacy queue APIs? */\n \tif (esize == -1)\n \t\tswitch (api_type) {\n@@ -213,6 +265,46 @@ test_ring_dequeue(struct rte_ring *r, void **obj, int esize, unsigned int n,\n \t\tcase (TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BURST):\n \t\t\treturn rte_ring_mc_dequeue_burst_elem(r, obj, esize,\n \t\t\t\t\t\t\t\tn, NULL);\n+\t\tcase (TEST_RING_ELEM_BURST_ZC):\n+\t\t\tret = rte_ring_dequeue_zc_burst_elem_start(\n+\t\t\t\t\tr, esize, n, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_memcpy((char *)obj, zcd.ptr1, zcd.n1 * esize);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_memcpy((char *)obj + zcd.n1 * esize,\n+\t\t\t\t\t\tzcd.ptr2,\n+\t\t\t\t\t\t(ret - zcd.n1) * esize);\n+\t\t\trte_ring_dequeue_zc_finish(r, ret);\n+\t\t\treturn ret;\n+\t\tcase (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16):\n+\t\t\t/* rings cannot store uint16_t so we use a uint32_t\n+\t\t\t * and half the requested number of elements\n+\t\t\t * and compensate by doubling the returned numbers\n+\t\t\t */\n+\t\t\tret = rte_ring_dequeue_zc_burst_elem_start(\n+\t\t\t\t\tr, sizeof(uint32_t), n / 2, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_ptr_decompress_16(0, zcd.ptr1, obj, zcd.n1 * 2, 3);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_ptr_decompress_16(0, zcd.ptr2,\n+\t\t\t\t\t\tobj + zcd.n1,\n+\t\t\t\t\t\t(ret - zcd.n1) * 2,\n+\t\t\t\t\t\t3);\n+\t\t\trte_ring_dequeue_zc_finish(r, ret);\n+\t\t\treturn ret * 2;\n+\t\tcase (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32):\n+\t\t\tret = rte_ring_dequeue_zc_burst_elem_start(\n+\t\t\t\t\tr, sizeof(uint32_t), n, &zcd, NULL);\n+\t\t\tif (unlikely(ret == 0))\n+\t\t\t\treturn 0;\n+\t\t\trte_ptr_decompress_32(0, zcd.ptr1, obj, zcd.n1, 3);\n+\t\t\tif (unlikely(zcd.ptr2 != NULL))\n+\t\t\t\trte_ptr_decompress_32(0, zcd.ptr2,\n+\t\t\t\t\t\tobj + zcd.n1, ret - zcd.n1, 3);\n+\t\t\trte_ring_dequeue_zc_finish(r, ret);\n+\t\t\treturn ret;\n \t\tdefault:\n \t\t\tprintf(\"Invalid API type\\n\");\n \t\t\treturn 0;\ndiff --git a/app/test/test_ring_perf.c b/app/test/test_ring_perf.c\nindex d7c5a4c30b..367670fd09 100644\n--- a/app/test/test_ring_perf.c\n+++ b/app/test/test_ring_perf.c\n@@ -22,13 +22,13 @@\n\n #define RING_NAME \"RING_PERF\"\n #define RING_SIZE 4096\n-#define MAX_BURST 32\n+#define MAX_BURST 256\n\n /*\n  * the sizes to enqueue and dequeue in testing\n  * (marked volatile so they won't be seen as compile-time constants)\n  */\n-static const volatile unsigned bulk_sizes[] = { 8, 32 };\n+static const volatile unsigned int bulk_sizes[] = { 8, 32, 64, 128, 256 };\n\n struct lcore_pair {\n \tunsigned c1, c2;\n@@ -43,26 +43,30 @@ test_ring_print_test_string(unsigned int api_type, int esize,\n \tif (esize == -1)\n \t\tprintf(\"legacy APIs\");\n \telse\n-\t\tprintf(\"elem APIs: element size %dB\", esize);\n+\t\tprintf(\"elem APIs (size:%2dB)\", esize);\n\n \tif (api_type == TEST_RING_IGNORE_API_TYPE)\n \t\treturn;\n\n \tif ((api_type & TEST_RING_THREAD_DEF) == TEST_RING_THREAD_DEF)\n-\t\tprintf(\": default enqueue/dequeue: \");\n+\t\tprintf(\" - default enqueue/dequeue\");\n \telse if ((api_type & TEST_RING_THREAD_SPSC) == TEST_RING_THREAD_SPSC)\n-\t\tprintf(\": SP/SC: \");\n+\t\tprintf(\" - SP/SC\");\n \telse if ((api_type & TEST_RING_THREAD_MPMC) == TEST_RING_THREAD_MPMC)\n-\t\tprintf(\": MP/MC: \");\n+\t\tprintf(\" - MP/MC\");\n\n \tif ((api_type & TEST_RING_ELEM_SINGLE) == TEST_RING_ELEM_SINGLE)\n-\t\tprintf(\"single: \");\n+\t\tprintf(\" - single - \");\n \telse if ((api_type & TEST_RING_ELEM_BULK) == TEST_RING_ELEM_BULK)\n-\t\tprintf(\"bulk (size: %u): \", bsz);\n+\t\tprintf(\" - bulk (n:%-3u) - \", bsz);\n \telse if ((api_type & TEST_RING_ELEM_BURST) == TEST_RING_ELEM_BURST)\n-\t\tprintf(\"burst (size: %u): \", bsz);\n+\t\tprintf(\" - burst (n:%-3u) - \", bsz);\n+\telse if ((api_type & (TEST_RING_ELEM_BURST_ZC |\n+\t\t\tTEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16 |\n+\t\t\tTEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32)) != 0)\n+\t\tprintf(\" - burst zero copy (n:%-3u) - \", bsz);\n\n-\tprintf(\"%.2F\\n\", value);\n+\tprintf(\"cycles per elem: %.3F\\n\", value);\n }\n\n /**** Functions to analyse our core mask to get cores for different tests ***/\n@@ -144,28 +148,33 @@ static void\n test_empty_dequeue(struct rte_ring *r, const int esize,\n \t\t\tconst unsigned int api_type)\n {\n-\tconst unsigned int iter_shift = 26;\n+\tconst unsigned int iter_shift = 29;\n \tconst unsigned int iterations = 1 << iter_shift;\n \tunsigned int i = 0;\n \tvoid *burst[MAX_BURST];\n\n+\tconst unsigned int bulk_iterations = iterations / bulk_sizes[0];\n \tconst uint64_t start = rte_rdtsc();\n-\tfor (i = 0; i < iterations; i++)\n+\tfor (i = 0; i < bulk_iterations; i++)\n \t\ttest_ring_dequeue(r, burst, esize, bulk_sizes[0], api_type);\n \tconst uint64_t end = rte_rdtsc();\n\n \ttest_ring_print_test_string(api_type, esize, bulk_sizes[0],\n-\t\t\t\t\t((double)(end - start)) / iterations);\n+\t\t\t\t\t((double)end - start) / iterations);\n }\n\n-/*\n- * for the separate enqueue and dequeue threads they take in one param\n- * and return two. Input = burst size, output = cycle average for sp/sc & mp/mc\n- */\n-struct thread_params {\n+/* describes the ring used by the enqueue and dequeue thread */\n+struct ring_params {\n \tstruct rte_ring *r;\n-\tunsigned size;        /* input value, the burst size */\n-\tdouble spsc, mpmc;    /* output value, the single or multi timings */\n+\tunsigned int elem_size;\n+\tunsigned int bulk_sizes_i; /* index into bulk_size array */\n+\tunsigned int ring_flags; /* flags for test_ring_enqueue/dequeue */\n+};\n+\n+/* Used to specify enqueue and dequeue ring operations and their results */\n+struct thread_params {\n+\tstruct ring_params *ring_params;\n+\tdouble *results; /* result array size must be equal to bulk_sizes */\n };\n\n /*\n@@ -174,16 +183,15 @@ struct thread_params {\n  * flag == 1 -> dequeue\n  */\n static __rte_always_inline int\n-enqueue_dequeue_bulk_helper(const unsigned int flag, const int esize,\n-\tstruct thread_params *p)\n+enqueue_dequeue_bulk_helper(const unsigned int flag, struct thread_params *p)\n {\n \tint ret;\n-\tconst unsigned int iter_shift = 15;\n+\tconst unsigned int iter_shift = 22;\n \tconst unsigned int iterations = 1 << iter_shift;\n-\tstruct rte_ring *r = p->r;\n-\tunsigned int bsize = p->size;\n \tunsigned int i;\n \tvoid *burst = NULL;\n+\tunsigned int n_remaining;\n+\tconst unsigned int bulk_n = bulk_sizes[p->ring_params->bulk_sizes_i];\n\n #ifdef RTE_USE_C11_MEM_MODEL\n \tif (__atomic_fetch_add(&lcore_count, 1, __ATOMIC_RELAXED) + 1 != 2)\n@@ -193,44 +201,38 @@ enqueue_dequeue_bulk_helper(const unsigned int flag, const int esize,\n \t\twhile(lcore_count != 2)\n \t\t\trte_pause();\n\n-\tburst = test_ring_calloc(MAX_BURST, esize);\n+\tburst = test_ring_calloc(MAX_BURST, p->ring_params->elem_size);\n \tif (burst == NULL)\n \t\treturn -1;\n\n \tconst uint64_t sp_start = rte_rdtsc();\n-\tfor (i = 0; i < iterations; i++)\n+\tconst unsigned int bulk_iterations = iterations / bulk_n;\n+\tfor (i = 0; i < bulk_iterations; i++) {\n+\t\tn_remaining = bulk_n;\n \t\tdo {\n \t\t\tif (flag == 0)\n-\t\t\t\tret = test_ring_enqueue(r, burst, esize, bsize,\n-\t\t\t\t\t\tTEST_RING_THREAD_SPSC |\n-\t\t\t\t\t\tTEST_RING_ELEM_BULK);\n+\t\t\t\tret = test_ring_enqueue(p->ring_params->r,\n+\t\t\t\t\t\tburst,\n+\t\t\t\t\t\tp->ring_params->elem_size,\n+\t\t\t\t\t\tn_remaining,\n+\t\t\t\t\t\tp->ring_params->ring_flags);\n \t\t\telse if (flag == 1)\n-\t\t\t\tret = test_ring_dequeue(r, burst, esize, bsize,\n-\t\t\t\t\t\tTEST_RING_THREAD_SPSC |\n-\t\t\t\t\t\tTEST_RING_ELEM_BULK);\n+\t\t\t\tret = test_ring_dequeue(p->ring_params->r,\n+\t\t\t\t\t\tburst,\n+\t\t\t\t\t\tp->ring_params->elem_size,\n+\t\t\t\t\t\tn_remaining,\n+\t\t\t\t\t\tp->ring_params->ring_flags);\n \t\t\tif (ret == 0)\n \t\t\t\trte_pause();\n-\t\t} while (!ret);\n+\t\t\telse\n+\t\t\t\tn_remaining -= ret;\n+\t\t} while (n_remaining > 0);\n+\t}\n \tconst uint64_t sp_end = rte_rdtsc();\n\n-\tconst uint64_t mp_start = rte_rdtsc();\n-\tfor (i = 0; i < iterations; i++)\n-\t\tdo {\n-\t\t\tif (flag == 0)\n-\t\t\t\tret = test_ring_enqueue(r, burst, esize, bsize,\n-\t\t\t\t\t\tTEST_RING_THREAD_MPMC |\n-\t\t\t\t\t\tTEST_RING_ELEM_BULK);\n-\t\t\telse if (flag == 1)\n-\t\t\t\tret = test_ring_dequeue(r, burst, esize, bsize,\n-\t\t\t\t\t\tTEST_RING_THREAD_MPMC |\n-\t\t\t\t\t\tTEST_RING_ELEM_BULK);\n-\t\t\tif (ret == 0)\n-\t\t\t\trte_pause();\n-\t\t} while (!ret);\n-\tconst uint64_t mp_end = rte_rdtsc();\n+\tp->results[p->ring_params->bulk_sizes_i] =\n+\t\t\t((double)sp_end - sp_start) / iterations;\n\n-\tp->spsc = ((double)(sp_end - sp_start))/(iterations * bsize);\n-\tp->mpmc = ((double)(mp_end - mp_start))/(iterations * bsize);\n \treturn 0;\n }\n\n@@ -243,15 +245,7 @@ enqueue_bulk(void *p)\n {\n \tstruct thread_params *params = p;\n\n-\treturn enqueue_dequeue_bulk_helper(0, -1, params);\n-}\n-\n-static int\n-enqueue_bulk_16B(void *p)\n-{\n-\tstruct thread_params *params = p;\n-\n-\treturn enqueue_dequeue_bulk_helper(0, 16, params);\n+\treturn enqueue_dequeue_bulk_helper(0, params);\n }\n\n /*\n@@ -263,15 +257,7 @@ dequeue_bulk(void *p)\n {\n \tstruct thread_params *params = p;\n\n-\treturn enqueue_dequeue_bulk_helper(1, -1, params);\n-}\n-\n-static int\n-dequeue_bulk_16B(void *p)\n-{\n-\tstruct thread_params *params = p;\n-\n-\treturn enqueue_dequeue_bulk_helper(1, 16, params);\n+\treturn enqueue_dequeue_bulk_helper(1, params);\n }\n\n /*\n@@ -279,42 +265,32 @@ dequeue_bulk_16B(void *p)\n  * used to measure ring perf between hyperthreads, cores and sockets.\n  */\n static int\n-run_on_core_pair(struct lcore_pair *cores, struct rte_ring *r, const int esize)\n+run_on_core_pair(struct lcore_pair *cores,\n+\t\tstruct thread_params *param1, struct thread_params *param2)\n {\n-\tlcore_function_t *f1, *f2;\n-\tstruct thread_params param1 = {0}, param2 = {0};\n \tunsigned i;\n-\n-\tif (esize == -1) {\n-\t\tf1 = enqueue_bulk;\n-\t\tf2 = dequeue_bulk;\n-\t} else {\n-\t\tf1 = enqueue_bulk_16B;\n-\t\tf2 = dequeue_bulk_16B;\n-\t}\n+\tstruct ring_params *ring_params = param1->ring_params;\n\n \tfor (i = 0; i < RTE_DIM(bulk_sizes); i++) {\n \t\tlcore_count = 0;\n-\t\tparam1.size = param2.size = bulk_sizes[i];\n-\t\tparam1.r = param2.r = r;\n+\t\tring_params->bulk_sizes_i = i;\n \t\tif (cores->c1 == rte_get_main_lcore()) {\n-\t\t\trte_eal_remote_launch(f2, &param2, cores->c2);\n-\t\t\tf1(&param1);\n+\t\t\trte_eal_remote_launch(dequeue_bulk, param2, cores->c2);\n+\t\t\tenqueue_bulk(param1);\n \t\t\trte_eal_wait_lcore(cores->c2);\n \t\t} else {\n-\t\t\trte_eal_remote_launch(f1, &param1, cores->c1);\n-\t\t\trte_eal_remote_launch(f2, &param2, cores->c2);\n+\t\t\trte_eal_remote_launch(enqueue_bulk, param1, cores->c1);\n+\t\t\trte_eal_remote_launch(dequeue_bulk, param2, cores->c2);\n \t\t\tif (rte_eal_wait_lcore(cores->c1) < 0)\n \t\t\t\treturn -1;\n \t\t\tif (rte_eal_wait_lcore(cores->c2) < 0)\n \t\t\t\treturn -1;\n \t\t}\n \t\ttest_ring_print_test_string(\n-\t\t\tTEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK,\n-\t\t\tesize, bulk_sizes[i], param1.spsc + param2.spsc);\n-\t\ttest_ring_print_test_string(\n-\t\t\tTEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK,\n-\t\t\tesize, bulk_sizes[i], param1.mpmc + param2.mpmc);\n+\t\t\t\tring_params->ring_flags,\n+\t\t\t\tring_params->elem_size,\n+\t\t\t\tbulk_sizes[i],\n+\t\t\t\tparam1->results[i] + param2->results[i]);\n \t}\n\n \treturn 0;\n@@ -333,7 +309,7 @@ load_loop_fn_helper(struct thread_params *p, const int esize)\n \tuint64_t hz = rte_get_timer_hz();\n \tuint64_t lcount = 0;\n \tconst unsigned int lcore = rte_lcore_id();\n-\tstruct thread_params *params = p;\n+\tstruct ring_params *ring_params = p->ring_params;\n \tvoid *burst = NULL;\n\n \tburst = test_ring_calloc(MAX_BURST, esize);\n@@ -346,9 +322,11 @@ load_loop_fn_helper(struct thread_params *p, const int esize)\n\n \tbegin = rte_get_timer_cycles();\n \twhile (time_diff < hz * TIME_MS / 1000) {\n-\t\ttest_ring_enqueue(params->r, burst, esize, params->size,\n+\t\ttest_ring_enqueue(ring_params->r, burst, esize,\n+\t\t\t\tring_params->elem_size,\n \t\t\t\tTEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);\n-\t\ttest_ring_dequeue(params->r, burst, esize, params->size,\n+\t\ttest_ring_dequeue(ring_params->r, burst, esize,\n+\t\t\t\tring_params->elem_size,\n \t\t\t\tTEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);\n \t\tlcount++;\n \t\ttime_diff = rte_get_timer_cycles() - begin;\n@@ -380,7 +358,8 @@ static int\n run_on_all_cores(struct rte_ring *r, const int esize)\n {\n \tuint64_t total;\n-\tstruct thread_params param;\n+\tstruct ring_params ring_params = {0};\n+\tstruct thread_params params = { .ring_params = &ring_params };\n \tlcore_function_t *lcore_f;\n \tunsigned int i, c;\n\n@@ -389,21 +368,20 @@ run_on_all_cores(struct rte_ring *r, const int esize)\n \telse\n \t\tlcore_f = load_loop_fn_16B;\n\n-\tmemset(&param, 0, sizeof(struct thread_params));\n \tfor (i = 0; i < RTE_DIM(bulk_sizes); i++) {\n \t\ttotal = 0;\n \t\tprintf(\"\\nBulk enq/dequeue count on size %u\\n\", bulk_sizes[i]);\n-\t\tparam.size = bulk_sizes[i];\n-\t\tparam.r = r;\n+\t\tparams.ring_params->bulk_sizes_i = i;\n+\t\tparams.ring_params->r = r;\n\n \t\t/* clear synchro and start workers */\n \t\t__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);\n-\t\tif (rte_eal_mp_remote_launch(lcore_f, &param, SKIP_MAIN) < 0)\n+\t\tif (rte_eal_mp_remote_launch(lcore_f, &params, SKIP_MAIN) < 0)\n \t\t\treturn -1;\n\n \t\t/* start synchro and launch test on main */\n \t\t__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);\n-\t\tlcore_f(&param);\n+\t\tlcore_f(&params);\n\n \t\trte_eal_mp_wait_lcore();\n\n@@ -462,9 +440,9 @@ static int\n test_burst_bulk_enqueue_dequeue(struct rte_ring *r, const int esize,\n \tconst unsigned int api_type)\n {\n-\tconst unsigned int iter_shift = 23;\n+\tconst unsigned int iter_shift = 26;\n \tconst unsigned int iterations = 1 << iter_shift;\n-\tunsigned int sz, i = 0;\n+\tunsigned int sz, i;\n \tvoid **burst = NULL;\n\n \tburst = test_ring_calloc(MAX_BURST, esize);\n@@ -472,17 +450,18 @@ test_burst_bulk_enqueue_dequeue(struct rte_ring *r, const int esize,\n \t\treturn -1;\n\n \tfor (sz = 0; sz < RTE_DIM(bulk_sizes); sz++) {\n+\t\tconst unsigned int n = iterations / bulk_sizes[sz];\n \t\tconst uint64_t start = rte_rdtsc();\n-\t\tfor (i = 0; i < iterations; i++) {\n+\t\tfor (i = 0; i < n; i++) {\n \t\t\ttest_ring_enqueue(r, burst, esize, bulk_sizes[sz],\n-\t\t\t\t\t\tapi_type);\n+\t\t\t\t\tapi_type);\n \t\t\ttest_ring_dequeue(r, burst, esize, bulk_sizes[sz],\n-\t\t\t\t\t\tapi_type);\n+\t\t\t\t\tapi_type);\n \t\t}\n \t\tconst uint64_t end = rte_rdtsc();\n\n \t\ttest_ring_print_test_string(api_type, esize, bulk_sizes[sz],\n-\t\t\t\t\t((double)(end - start)) / iterations);\n+\t\t\t\t\t((double)end - start) / iterations);\n \t}\n\n \trte_free(burst);\n@@ -490,12 +469,43 @@ test_burst_bulk_enqueue_dequeue(struct rte_ring *r, const int esize,\n \treturn 0;\n }\n\n+static __rte_always_inline int\n+test_ring_perf_esize_run_on_two_cores(\n+\t\tstruct thread_params *param1, struct thread_params *param2)\n+{\n+\tstruct lcore_pair cores;\n+\n+\tif (get_two_hyperthreads(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing using two hyperthreads ###\\n\");\n+\t\tif (run_on_core_pair(&cores, param1, param2) < 0)\n+\t\t\treturn -1;\n+\t}\n+\tif (get_two_cores(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing using two physical cores ###\\n\");\n+\t\tif (run_on_core_pair(&cores, param1, param2) < 0)\n+\t\t\treturn -1;\n+\t}\n+\tif (get_two_sockets(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing using two NUMA nodes ###\\n\");\n+\t\tif (run_on_core_pair(&cores, param1, param2) < 0)\n+\t\t\treturn -1;\n+\t}\n+\treturn 0;\n+}\n+\n /* Run all tests for a given element size */\n static __rte_always_inline int\n test_ring_perf_esize(const int esize)\n {\n-\tstruct lcore_pair cores;\n \tstruct rte_ring *r = NULL;\n+\tdouble results_enq[RTE_DIM(bulk_sizes)];\n+\tdouble results_deq[RTE_DIM(bulk_sizes)];\n+\tstruct ring_params ring_params = {\n+\t\t\t.elem_size = esize, .ring_flags = TEST_RING_ELEM_BULK };\n+\tstruct thread_params param1 = {\n+\t\t\t.ring_params = &ring_params, .results = results_enq };\n+\tstruct thread_params param2 = {\n+\t\t\t.ring_params = &ring_params, .results = results_deq };\n\n \t/*\n \t * Performance test for legacy/_elem APIs\n@@ -535,22 +545,13 @@ test_ring_perf_esize(const int esize)\n \ttest_empty_dequeue(r, esize,\n \t\t\tTEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK);\n\n-\tif (get_two_hyperthreads(&cores) == 0) {\n-\t\tprintf(\"\\n### Testing using two hyperthreads ###\\n\");\n-\t\tif (run_on_core_pair(&cores, r, esize) < 0)\n-\t\t\tgoto test_fail;\n-\t}\n+\tring_params.r = r;\n\n-\tif (get_two_cores(&cores) == 0) {\n-\t\tprintf(\"\\n### Testing using two physical cores ###\\n\");\n-\t\tif (run_on_core_pair(&cores, r, esize) < 0)\n-\t\t\tgoto test_fail;\n-\t}\n-\tif (get_two_sockets(&cores) == 0) {\n-\t\tprintf(\"\\n### Testing using two NUMA nodes ###\\n\");\n-\t\tif (run_on_core_pair(&cores, r, esize) < 0)\n-\t\t\tgoto test_fail;\n-\t}\n+\tring_params.ring_flags = TEST_RING_THREAD_SPSC | TEST_RING_ELEM_BULK;\n+\ttest_ring_perf_esize_run_on_two_cores(&param1, &param2);\n+\n+\tring_params.ring_flags = TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BULK;\n+\ttest_ring_perf_esize_run_on_two_cores(&param1, &param2);\n\n \tprintf(\"\\n### Testing using all worker nodes ###\\n\");\n \tif (run_on_all_cores(r, esize) < 0)\n@@ -566,6 +567,109 @@ test_ring_perf_esize(const int esize)\n \treturn -1;\n }\n\n+\n+static __rte_always_inline int\n+test_ring_perf_compression(void)\n+{\n+\tdouble results1[RTE_DIM(bulk_sizes)];\n+\tdouble results2[RTE_DIM(bulk_sizes)];\n+\tdouble results1_comp[2][RTE_DIM(bulk_sizes)];\n+\tdouble results2_comp[2][RTE_DIM(bulk_sizes)];\n+\n+\tstruct lcore_pair cores;\n+\tint ret = -1;\n+\tunsigned int i, j;\n+\tstruct ring_params ring_params = { .elem_size = sizeof(void *) };\n+\tstruct thread_params param1 = {\n+\t\t\t.ring_params = &ring_params, .results = results1 };\n+\tstruct thread_params param2 = {\n+\t\t\t.ring_params = &ring_params, .results = results2 };\n+\n+\tprintf(\"\\n### Testing compression gain ###\");\n+\n+\tring_params.r = rte_ring_create_elem(\n+\t\t\tRING_NAME, sizeof(void *),\n+\t\t\tRING_SIZE, rte_socket_id(),\n+\t\t\tRING_F_SP_ENQ | RING_F_SC_DEQ);\n+\n+\tif (ring_params.r == NULL)\n+\t\treturn -1;\n+\n+\tif (get_two_cores(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing zero copy ###\\n\");\n+\t\tring_params.ring_flags = TEST_RING_ELEM_BURST_ZC;\n+\t\tret = run_on_core_pair(&cores, &param1, &param2);\n+\t}\n+\n+\trte_ring_free(ring_params.r);\n+\n+\tif (ret != 0)\n+\t\treturn ret;\n+\n+\t/* rings allow only multiples of 4 as sizes,\n+\t * we allocate size 4 despite only using 2 bytes\n+\t * and use half of RING_SIZE as the number of elements\n+\t */\n+\tring_params.r = rte_ring_create_elem(\n+\t\t\tRING_NAME, sizeof(uint32_t),\n+\t\t\tRING_SIZE / 2, rte_socket_id(),\n+\t\t\tRING_F_SP_ENQ | RING_F_SC_DEQ);\n+\n+\tif (ring_params.r == NULL)\n+\t\treturn -1;\n+\n+\tparam1.results = results1_comp[0];\n+\tparam2.results = results2_comp[0];\n+\n+\tif (get_two_cores(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing zero copy with compression (16b) ###\\n\");\n+\t\tring_params.ring_flags =\n+\t\t\t\tTEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16;\n+\t\tret = run_on_core_pair(&cores, &param1, &param2);\n+\t}\n+\n+\trte_ring_free(ring_params.r);\n+\n+\tif (ret != 0)\n+\t\treturn ret;\n+\n+\tring_params.r = rte_ring_create_elem(\n+\t\t\tRING_NAME, sizeof(uint32_t),\n+\t\t\tRING_SIZE, rte_socket_id(),\n+\t\t\tRING_F_SP_ENQ | RING_F_SC_DEQ);\n+\n+\tif (ring_params.r == NULL)\n+\t\treturn -1;\n+\n+\tparam1.results = results1_comp[1];\n+\tparam2.results = results2_comp[1];\n+\n+\tif (get_two_cores(&cores) == 0) {\n+\t\tprintf(\"\\n### Testing zero copy with compression (32b) ###\\n\");\n+\t\tring_params.ring_flags =\n+\t\t\t\tTEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32;\n+\t\tret = run_on_core_pair(&cores, &param1, &param2);\n+\t}\n+\n+\trte_ring_free(ring_params.r);\n+\n+\tfor (j = 0; j < 2; j++) {\n+\t\tprintf(\"\\n### Potential gain from compression (%d-bit offsets) \"\n+\t\t\"###\\n\", (j + 1) * 16);\n+\t\tfor (i = 0; i < RTE_DIM(bulk_sizes); i++) {\n+\t\t\tconst double result = results1[i] + results2[i];\n+\t\t\tconst double result_comp = results1_comp[j][i] +\n+\t\t\t\tresults2_comp[j][i];\n+\t\t\tconst double gain = 100 - (result_comp / result) * 100;\n+\n+\t\t\tprintf(\"Gain of %5.1F%% for burst of %-3u elems\\n\",\n+\t\t\t\t\tgain, bulk_sizes[i]);\n+\t\t}\n+\t}\n+\n+\treturn ret;\n+}\n+\n static int\n test_ring_perf(void)\n {\n@@ -576,6 +680,10 @@ test_ring_perf(void)\n \tif (test_ring_perf_esize(16) == -1)\n \t\treturn -1;\n\n+\t/* Test for performance gain of compression */\n+\tif (test_ring_perf_compression() == -1)\n+\t\treturn -1;\n+\n \treturn 0;\n }\n\n",
    "prefixes": [
        "v10",
        "3/5"
    ]
}