get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/137504/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 137504,
    "url": "http://patches.dpdk.org/api/patches/137504/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20240229160308.365277-2-paul.szczepanek@arm.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20240229160308.365277-2-paul.szczepanek@arm.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20240229160308.365277-2-paul.szczepanek@arm.com",
    "date": "2024-02-29T16:03:05",
    "name": "[v6,1/4] eal: add pointer compression functions",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "2ddda75f1a838a951136869cfc876e313717e39e",
    "submitter": {
        "id": 3199,
        "url": "http://patches.dpdk.org/api/people/3199/?format=api",
        "name": "Paul Szczepanek",
        "email": "paul.szczepanek@arm.com"
    },
    "delegate": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20240229160308.365277-2-paul.szczepanek@arm.com/mbox/",
    "series": [
        {
            "id": 31302,
            "url": "http://patches.dpdk.org/api/series/31302/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=31302",
            "date": "2024-02-29T16:03:04",
            "name": "add pointer compression API",
            "version": 6,
            "mbox": "http://patches.dpdk.org/series/31302/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/137504/comments/",
    "check": "warning",
    "checks": "http://patches.dpdk.org/api/patches/137504/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id E458B43C3B;\n\tThu, 29 Feb 2024 17:03:45 +0100 (CET)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 4499E42DD8;\n\tThu, 29 Feb 2024 17:03:43 +0100 (CET)",
            "from foss.arm.com (foss.arm.com [217.140.110.172])\n by mails.dpdk.org (Postfix) with ESMTP id 5A0B54025C\n for <dev@dpdk.org>; Thu, 29 Feb 2024 17:03:39 +0100 (CET)",
            "from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])\n by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 5EE77150C;\n Thu, 29 Feb 2024 08:04:17 -0800 (PST)",
            "from ampere-altra-2-1.usa.Arm.com (ampere-altra-2-1.usa.arm.com\n [10.118.91.158])\n by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 72F513F6C4;\n Thu, 29 Feb 2024 08:03:38 -0800 (PST)"
        ],
        "From": "Paul Szczepanek <paul.szczepanek@arm.com>",
        "To": "dev@dpdk.org",
        "Cc": "Paul Szczepanek <paul.szczepanek@arm.com>,\n Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>,\n Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>",
        "Subject": "[PATCH v6 1/4] eal: add pointer compression functions",
        "Date": "Thu, 29 Feb 2024 16:03:05 +0000",
        "Message-Id": "<20240229160308.365277-2-paul.szczepanek@arm.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20240229160308.365277-1-paul.szczepanek@arm.com>",
        "References": "<20230927150854.3670391-2-paul.szczepanek@arm.com>\n <20240229160308.365277-1-paul.szczepanek@arm.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Add a new utility header for compressing pointers. The provided\nfunctions can store pointers in 32-bit offsets.\n\nThe compression takes advantage of the fact that pointers are\nusually located in a limited memory region (like a mempool).\nWe can compress them by converting them to offsets from a base\nmemory address. Offsets can be stored in fewer bytes (dictated\nby the memory region size and alignment of the pointer).\nFor example: an 8 byte aligned pointer which is part of a 32GB\nmemory pool can be stored in 4 bytes.\n\nThis can be used for example when passing caches full of pointers\nbetween threads. Memory containing the pointers is copied multiple\ntimes which is especially costly between cores. This compression\nmethod will allow us to shrink the memory size copied. Further\ncommits add a test to evaluate the effectiveness of this approach.\n\nSuggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>\nSigned-off-by: Paul Szczepanek <paul.szczepanek@arm.com>\nSigned-off-by: Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>\nReviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>\n---\n .mailmap                           |   1 +\n lib/eal/include/meson.build        |   1 +\n lib/eal/include/rte_ptr_compress.h | 266 +++++++++++++++++++++++++++++\n 3 files changed, 268 insertions(+)\n create mode 100644 lib/eal/include/rte_ptr_compress.h\n\n--\n2.25.1",
    "diff": "diff --git a/.mailmap b/.mailmap\nindex 3f5bab26a8..004751d27a 100644\n--- a/.mailmap\n+++ b/.mailmap\n@@ -1069,6 +1069,7 @@ Paul Greenwalt <paul.greenwalt@intel.com>\n Paulis Gributs <paulis.gributs@intel.com>\n Paul Luse <paul.e.luse@intel.com>\n Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>\n+Paul Szczepanek <paul.szczepanek@arm.com>\n Pavan Kumar Linga <pavan.kumar.linga@intel.com>\n Pavan Nikhilesh <pbhagavatula@marvell.com> <pbhagavatula@caviumnetworks.com>\n Pavel Belous <pavel.belous@aquantia.com>\ndiff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build\nindex e94b056d46..ce2c733633 100644\n--- a/lib/eal/include/meson.build\n+++ b/lib/eal/include/meson.build\n@@ -36,6 +36,7 @@ headers += files(\n         'rte_pci_dev_features.h',\n         'rte_per_lcore.h',\n         'rte_pflock.h',\n+\t'rte_ptr_compress.h',\n         'rte_random.h',\n         'rte_reciprocal.h',\n         'rte_seqcount.h',\ndiff --git a/lib/eal/include/rte_ptr_compress.h b/lib/eal/include/rte_ptr_compress.h\nnew file mode 100644\nindex 0000000000..47a72e4213\n--- /dev/null\n+++ b/lib/eal/include/rte_ptr_compress.h\n@@ -0,0 +1,266 @@\n+/* SPDX-License-Identifier: BSD-shift-Clause\n+ * Copyright(c) 2023 Arm Limited\n+ */\n+\n+#ifndef RTE_PTR_COMPRESS_H\n+#define RTE_PTR_COMPRESS_H\n+\n+/**\n+ * @file\n+ * Pointer compression and decompression functions.\n+ *\n+ * When passing arrays full of pointers between threads, memory containing\n+ * the pointers is copied multiple times which is especially costly between\n+ * cores. These functions allow us to compress the pointers.\n+ *\n+ * Compression takes advantage of the fact that pointers are usually located in\n+ * a limited memory region (like a mempool). We compress them by converting them\n+ * to offsets from a base memory address. Offsets can be stored in fewer bytes.\n+ *\n+ * The compression functions come in two varieties: 32-bit and 16-bit.\n+ *\n+ * To determine how many bits are needed to compress the pointer calculate\n+ * the biggest offset possible (highest value pointer - base pointer)\n+ * and shift the value right according to alignment (shift by exponent of the\n+ * power of 2 of alignment: aligned by 4 - shift by 2, aligned by 8 - shift by\n+ * 3, etc.). The resulting value must fit in either 32 or 16 bits.\n+ *\n+ * For usage example and further explanation please see \"Pointer Compression\" in\n+ * doc/guides/prog_guide/env_abstraction_layer.rst\n+ */\n+\n+#include <stdint.h>\n+#include <inttypes.h>\n+\n+#include <rte_branch_prediction.h>\n+#include <rte_common.h>\n+#include <rte_debug.h>\n+#include <rte_vect.h>\n+\n+#ifdef __cplusplus\n+extern \"C\" {\n+#endif\n+\n+/**\n+ * Compress pointers into 32-bit offsets from base pointer.\n+ *\n+ * @note It is programmer's responsibility to ensure the resulting offsets fit\n+ * into 32 bits. Alignment of the structures pointed to by the pointers allows\n+ * us to drop bits from the offsets. This is controlled by the bit_shift\n+ * parameter. This means that if structures are aligned by 8 bytes they must be\n+ * within 32GB of the base pointer. If there is no such alignment guarantee they\n+ * must be within 4GB.\n+ *\n+ * @param ptr_base\n+ *   A pointer used to calculate offsets of pointers in src_table.\n+ * @param src_table\n+ *   A pointer to an array of pointers.\n+ * @param dest_table\n+ *   A pointer to an array of compressed pointers returned by this function.\n+ * @param n\n+ *   The number of objects to compress, must be strictly positive.\n+ * @param bit_shift\n+ *   Byte alignment of memory pointed to by the pointers allows for\n+ *   bits to be dropped from the offset and hence widen the memory region that\n+ *   can be covered. This controls how many bits are right shifted.\n+ **/\n+static __rte_always_inline void\n+rte_ptr_compress_32(void *ptr_base, void **src_table,\n+\t\tuint32_t *dest_table, unsigned int n, unsigned int bit_shift)\n+{\n+\tunsigned int i = 0;\n+#if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32\n+\tsvuint64_t v_ptr_table;\n+\tsvbool_t pg = svwhilelt_b64(i, n);\n+\tdo {\n+\t\tv_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);\n+\t\tv_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);\n+\t\tv_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);\n+\t\tsvst1w(pg, &dest_table[i], v_ptr_table);\n+\t\ti += svcntd();\n+\t\tpg = svwhilelt_b64(i, n);\n+\t} while (svptest_any(svptrue_b64(), pg));\n+#elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32\n+\tuint64_t ptr_diff;\n+\tuint64x2_t v_ptr_table;\n+\t/* right shift is done by left shifting by negative int */\n+\tint64x2_t v_shift = vdupq_n_s64(-bit_shift);\n+\tuint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);\n+\tfor (; i < (n & ~0x1); i += 2) {\n+\t\tv_ptr_table = vld1q_u64((const uint64_t *)src_table + i);\n+\t\tv_ptr_table = vsubq_u64(v_ptr_table, v_ptr_base);\n+\t\tv_ptr_table = vshlq_u64(v_ptr_table, v_shift);\n+\t\tvst1_u32(dest_table + i, vqmovn_u64(v_ptr_table));\n+\t}\n+\t/* process leftover single item in case of odd number of n */\n+\tif (unlikely(n & 0x1)) {\n+\t\tptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);\n+\t\tdest_table[i] = (uint32_t) (ptr_diff >> bit_shift);\n+\t}\n+#else\n+\tuintptr_t ptr_diff;\n+\tfor (; i < n; i++) {\n+\t\tptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);\n+\t\tptr_diff = ptr_diff >> bit_shift;\n+\t\tRTE_ASSERT(ptr_diff <= UINT32_MAX);\n+\t\tdest_table[i] = (uint32_t) ptr_diff;\n+\t}\n+#endif\n+}\n+\n+/**\n+ * Decompress pointers from 32-bit offsets from base pointer.\n+ *\n+ * @param ptr_base\n+ *   A pointer which was used to calculate offsets in src_table.\n+ * @param src_table\n+ *   A pointer to an array to compressed pointers.\n+ * @param dest_table\n+ *   A pointer to an array of decompressed pointers returned by this function.\n+ * @param n\n+ *   The number of objects to decompress, must be strictly positive.\n+ * @param bit_shift\n+ *   Byte alignment of memory pointed to by the pointers allows for\n+ *   bits to be dropped from the offset and hence widen the memory region that\n+ *   can be covered. This controls how many bits are left shifted when pointers\n+ *   are recovered from the offsets.\n+ **/\n+static __rte_always_inline void\n+rte_ptr_decompress_32(void *ptr_base, uint32_t *src_table,\n+\t\tvoid **dest_table, unsigned int n, unsigned int bit_shift)\n+{\n+\tunsigned int i = 0;\n+#if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32\n+\tsvuint64_t v_ptr_table;\n+\tsvbool_t pg = svwhilelt_b64(i, n);\n+\tdo {\n+\t\tv_ptr_table = svld1uw_u64(pg, &src_table[i]);\n+\t\tv_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);\n+\t\tv_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);\n+\t\tsvst1(pg, (uint64_t *)dest_table + i, v_ptr_table);\n+\t\ti += svcntd();\n+\t\tpg = svwhilelt_b64(i, n);\n+\t} while (svptest_any(svptrue_b64(), pg));\n+#elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32\n+\tuint64_t ptr_diff;\n+\tuint64x2_t v_ptr_table;\n+\tint64x2_t v_shift = vdupq_n_s64(bit_shift);\n+\tuint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);\n+\tfor (; i < (n & ~0x1); i += 2) {\n+\t\tv_ptr_table = vmovl_u32(vld1_u32(src_table + i));\n+\t\tv_ptr_table = vshlq_u64(v_ptr_table, v_shift);\n+\t\tv_ptr_table = vaddq_u64(v_ptr_table, v_ptr_base);\n+\t\tvst1q_u64((uint64_t *)dest_table + i, v_ptr_table);\n+\t}\n+\t/* process leftover single item in case of odd number of n */\n+\tif (unlikely(n & 0x1)) {\n+\t\tptr_diff = ((uint64_t) src_table[i]) << bit_shift;\n+\t\tdest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);\n+\t}\n+#else\n+\tuintptr_t ptr_diff;\n+\tfor (; i < n; i++) {\n+\t\tptr_diff = ((uintptr_t) src_table[i]) << bit_shift;\n+\t\tdest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);\n+\t}\n+#endif\n+}\n+\n+/**\n+ * Compress pointers into 16-bit offsets from base pointer.\n+ *\n+ * @note It is programmer's responsibility to ensure the resulting offsets fit\n+ * into 16 bits. Alignment of the structures pointed to by the pointers allows\n+ * us to drop bits from the offsets. This is controlled by the bit_shift\n+ * parameter. This means that if structures are aligned by 8 bytes they must be\n+ * within 256KB of the base pointer. If there is no such alignment guarantee\n+ * they must be within 64KB.\n+ *\n+ * @param ptr_base\n+ *   A pointer used to calculate offsets of pointers in src_table.\n+ * @param src_table\n+ *   A pointer to an array of pointers.\n+ * @param dest_table\n+ *   A pointer to an array of compressed pointers returned by this function.\n+ * @param n\n+ *   The number of objects to compress, must be strictly positive.\n+ * @param bit_shift\n+ *   Byte alignment of memory pointed to by the pointers allows for\n+ *   bits to be dropped from the offset and hence widen the memory region that\n+ *   can be covered. This controls how many bits are right shifted.\n+ **/\n+static __rte_always_inline void\n+rte_ptr_compress_16(void *ptr_base, void **src_table,\n+\t\tuint16_t *dest_table, unsigned int n, unsigned int bit_shift)\n+{\n+\n+\tunsigned int i = 0;\n+#if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32\n+\tsvuint64_t v_ptr_table;\n+\tsvbool_t pg = svwhilelt_b64(i, n);\n+\tdo {\n+\t\tv_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);\n+\t\tv_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);\n+\t\tv_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);\n+\t\tsvst1h(pg, &dest_table[i], v_ptr_table);\n+\t\ti += svcntd();\n+\t\tpg = svwhilelt_b64(i, n);\n+\t} while (svptest_any(svptrue_b64(), pg));\n+#else\n+\tuintptr_t ptr_diff;\n+\tfor (; i < n; i++) {\n+\t\tptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);\n+\t\tptr_diff = ptr_diff >> bit_shift;\n+\t\tRTE_ASSERT(ptr_diff <= UINT16_MAX);\n+\t\tdest_table[i] = (uint16_t) ptr_diff;\n+\t}\n+#endif\n+}\n+\n+/**\n+ * Decompress pointers from 16-bit offsets from base pointer.\n+ *\n+ * @param ptr_base\n+ *   A pointer which was used to calculate offsets in src_table.\n+ * @param src_table\n+ *   A pointer to an array to compressed pointers.\n+ * @param dest_table\n+ *   A pointer to an array of decompressed pointers returned by this function.\n+ * @param n\n+ *   The number of objects to decompress, must be strictly positive.\n+ * @param bit_shift\n+ *   Byte alignment of memory pointed to by the pointers allows for\n+ *   bits to be dropped from the offset and hence widen the memory region that\n+ *   can be covered. This controls how many bits are left shifted when pointers\n+ *   are recovered from the offsets.\n+ **/\n+static __rte_always_inline void\n+rte_ptr_decompress_16(void *ptr_base, uint16_t *src_table,\n+\t\tvoid **dest_table, unsigned int n, unsigned int bit_shift)\n+{\n+\tunsigned int i = 0;\n+#if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32\n+\tsvuint64_t v_ptr_table;\n+\tsvbool_t pg = svwhilelt_b64(i, n);\n+\tdo {\n+\t\tv_ptr_table = svld1uh_u64(pg, &src_table[i]);\n+\t\tv_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);\n+\t\tv_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);\n+\t\tsvst1(pg, (uint64_t *)dest_table + i, v_ptr_table);\n+\t\ti += svcntd();\n+\t\tpg = svwhilelt_b64(i, n);\n+\t} while (svptest_any(svptrue_b64(), pg));\n+#else\n+\tuintptr_t ptr_diff;\n+\tfor (; i < n; i++) {\n+\t\tptr_diff = ((uintptr_t) src_table[i]) << bit_shift;\n+\t\tdest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);\n+\t}\n+#endif\n+}\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif /* RTE_PTR_COMPRESS_H */\n",
    "prefixes": [
        "v6",
        "1/4"
    ]
}