get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/139452/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 139452,
    "url": "https://patches.dpdk.org/api/patches/139452/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20240417160807.1249480-2-yoan.picchi@arm.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20240417160807.1249480-2-yoan.picchi@arm.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20240417160807.1249480-2-yoan.picchi@arm.com",
    "date": "2024-04-17T16:08:04",
    "name": "[v8,1/4] hash: pack the hitmask for hash in bulk lookup",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "f694c557fe94008ef5d2ae2113b0611c660dd80c",
    "submitter": {
        "id": 3196,
        "url": "https://patches.dpdk.org/api/people/3196/?format=api",
        "name": "Yoan Picchi",
        "email": "yoan.picchi@arm.com"
    },
    "delegate": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20240417160807.1249480-2-yoan.picchi@arm.com/mbox/",
    "series": [
        {
            "id": 31772,
            "url": "https://patches.dpdk.org/api/series/31772/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=31772",
            "date": "2024-04-17T16:08:03",
            "name": "hash: add SVE support for bulk key lookup",
            "version": 8,
            "mbox": "https://patches.dpdk.org/series/31772/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/139452/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/139452/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 5432A43E94;\n\tWed, 17 Apr 2024 18:08:42 +0200 (CEST)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 27AFF40ED8;\n\tWed, 17 Apr 2024 18:08:33 +0200 (CEST)",
            "from foss.arm.com (foss.arm.com [217.140.110.172])\n by mails.dpdk.org (Postfix) with ESMTP id 6C0AB40295\n for <dev@dpdk.org>; Wed, 17 Apr 2024 18:08:29 +0200 (CEST)",
            "from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])\n by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 6621ADA7;\n Wed, 17 Apr 2024 09:08:56 -0700 (PDT)",
            "from octeon10-1.usa.Arm.com (unknown [10.118.91.161])\n by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 0952D3F792;\n Wed, 17 Apr 2024 09:08:28 -0700 (PDT)"
        ],
        "From": "Yoan Picchi <yoan.picchi@arm.com>",
        "To": "Thomas Monjalon <thomas@monjalon.net>,\n Yipeng Wang <yipeng1.wang@intel.com>,\n Sameh Gobriel <sameh.gobriel@intel.com>,\n Bruce Richardson <bruce.richardson@intel.com>,\n Vladimir Medvedkin <vladimir.medvedkin@intel.com>",
        "Cc": "dev@dpdk.org, nd@arm.com, Yoan Picchi <yoan.picchi@arm.com>,\n Ruifeng Wang <ruifeng.wang@arm.com>, Nathan Brown <nathan.brown@arm.com>",
        "Subject": "[PATCH v8 1/4] hash: pack the hitmask for hash in bulk lookup",
        "Date": "Wed, 17 Apr 2024 16:08:04 +0000",
        "Message-Id": "<20240417160807.1249480-2-yoan.picchi@arm.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20240417160807.1249480-1-yoan.picchi@arm.com>",
        "References": "<20231020165159.1649282-1-yoan.picchi@arm.com>\n <20240417160807.1249480-1-yoan.picchi@arm.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Current hitmask includes padding due to Intel's SIMD\nimplementation detail. This patch allows non Intel SIMD\nimplementations to benefit from a dense hitmask.\nIn addition, the new dense hitmask interweave the primary\nand secondary matches which allow a better cache usage and\nenable future improvements for the SIMD implementations\nThe default non SIMD path now use this dense mask.\n\nSigned-off-by: Yoan Picchi <yoan.picchi@arm.com>\nReviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>\nReviewed-by: Nathan Brown <nathan.brown@arm.com>\n---\n .mailmap                                  |   2 +\n lib/hash/arch/arm/compare_signatures.h    |  61 +++++++\n lib/hash/arch/common/compare_signatures.h |  38 +++++\n lib/hash/arch/x86/compare_signatures.h    |  53 ++++++\n lib/hash/rte_cuckoo_hash.c                | 192 ++++++++++++----------\n 5 files changed, 255 insertions(+), 91 deletions(-)\n create mode 100644 lib/hash/arch/arm/compare_signatures.h\n create mode 100644 lib/hash/arch/common/compare_signatures.h\n create mode 100644 lib/hash/arch/x86/compare_signatures.h",
    "diff": "diff --git a/.mailmap b/.mailmap\nindex 66ebc20666..00b50414d3 100644\n--- a/.mailmap\n+++ b/.mailmap\n@@ -494,6 +494,7 @@ Hari Kumar Vemula <hari.kumarx.vemula@intel.com>\n Harini Ramakrishnan <harini.ramakrishnan@microsoft.com>\n Hariprasad Govindharajan <hariprasad.govindharajan@intel.com>\n Harish Patil <harish.patil@cavium.com> <harish.patil@qlogic.com>\n+Harjot Singh <harjot.singh@arm.com>\n Harman Kalra <hkalra@marvell.com>\n Harneet Singh <harneet.singh@intel.com>\n Harold Huang <baymaxhuang@gmail.com>\n@@ -1633,6 +1634,7 @@ Yixue Wang <yixue.wang@intel.com>\n Yi Yang <yangyi01@inspur.com> <yi.y.yang@intel.com>\n Yi Zhang <zhang.yi75@zte.com.cn>\n Yoann Desmouceaux <ydesmouc@cisco.com>\n+Yoan Picchi <yoan.picchi@arm.com>\n Yogesh Jangra <yogesh.jangra@intel.com>\n Yogev Chaimovich <yogev@cgstowernetworks.com>\n Yongjie Gu <yongjiex.gu@intel.com>\ndiff --git a/lib/hash/arch/arm/compare_signatures.h b/lib/hash/arch/arm/compare_signatures.h\nnew file mode 100644\nindex 0000000000..63eb341d0e\n--- /dev/null\n+++ b/lib/hash/arch/arm/compare_signatures.h\n@@ -0,0 +1,61 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2010-2016 Intel Corporation\n+ * Copyright(c) 2018-2024 Arm Limited\n+ */\n+\n+/*\n+ * Arm's version uses a densely packed hitmask buffer:\n+ * Every bit is in use.\n+ */\n+\n+#include <inttypes.h>\n+#include <rte_common.h>\n+#include <rte_vect.h>\n+#include \"rte_cuckoo_hash.h\"\n+\n+#define DENSE_HASH_BULK_LOOKUP 1\n+\n+static inline void\n+compare_signatures_dense(uint16_t *hitmask_buffer,\n+\t\t\tconst uint16_t *prim_bucket_sigs,\n+\t\t\tconst uint16_t *sec_bucket_sigs,\n+\t\t\tuint16_t sig,\n+\t\t\tenum rte_hash_sig_compare_function sig_cmp_fn)\n+{\n+\n+\tstatic_assert(sizeof(*hitmask_buffer) >= 2 * (RTE_HASH_BUCKET_ENTRIES / 8),\n+\t\"The hitmask must be exactly wide enough to accept the whole hitmask if it is dense\");\n+\n+\t/* For match mask every bits indicates the match */\n+\tswitch (sig_cmp_fn) {\n+#if RTE_HASH_BUCKET_ENTRIES <= 8\n+\tcase RTE_HASH_COMPARE_NEON: {\n+\t\tuint16x8_t vmat, vsig, x;\n+\t\tint16x8_t shift = {0, 1, 2, 3, 4, 5, 6, 7};\n+\t\tuint16_t low, high;\n+\n+\t\tvsig = vld1q_dup_u16((uint16_t const *)&sig);\n+\t\t/* Compare all signatures in the primary bucket */\n+\t\tvmat = vceqq_u16(vsig,\n+\t\t\tvld1q_u16((uint16_t const *)prim_bucket_sigs));\n+\t\tx = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x0001)), shift);\n+\t\tlow = (uint16_t)(vaddvq_u16(x));\n+\t\t/* Compare all signatures in the secondary bucket */\n+\t\tvmat = vceqq_u16(vsig,\n+\t\t\tvld1q_u16((uint16_t const *)sec_bucket_sigs));\n+\t\tx = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x0001)), shift);\n+\t\thigh = (uint16_t)(vaddvq_u16(x));\n+\t\t*hitmask_buffer = low | high << RTE_HASH_BUCKET_ENTRIES;\n+\n+\t\t}\n+\t\tbreak;\n+#endif\n+\tdefault:\n+\t\tfor (unsigned int i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {\n+\t\t\t*hitmask_buffer |=\n+\t\t\t\t((sig == prim_bucket_sigs[i]) << i);\n+\t\t\t*hitmask_buffer |=\n+\t\t\t\t((sig == sec_bucket_sigs[i]) << i) << RTE_HASH_BUCKET_ENTRIES;\n+\t\t}\n+\t}\n+}\ndiff --git a/lib/hash/arch/common/compare_signatures.h b/lib/hash/arch/common/compare_signatures.h\nnew file mode 100644\nindex 0000000000..59157d31e1\n--- /dev/null\n+++ b/lib/hash/arch/common/compare_signatures.h\n@@ -0,0 +1,38 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2010-2016 Intel Corporation\n+ * Copyright(c) 2018-2024 Arm Limited\n+ */\n+\n+/*\n+ * The generic version could use either a dense or sparsely packed hitmask buffer,\n+ * but the dense one is slightly faster.\n+ */\n+\n+#include <inttypes.h>\n+#include <rte_common.h>\n+#include <rte_vect.h>\n+#include \"rte_cuckoo_hash.h\"\n+\n+#define DENSE_HASH_BULK_LOOKUP 1\n+\n+static inline void\n+compare_signatures_dense(uint16_t *hitmask_buffer,\n+\t\t\tconst uint16_t *prim_bucket_sigs,\n+\t\t\tconst uint16_t *sec_bucket_sigs,\n+\t\t\tuint16_t sig,\n+\t\t\tenum rte_hash_sig_compare_function sig_cmp_fn)\n+{\n+\t(void) sig_cmp_fn;\n+\n+\tstatic_assert(sizeof(*hitmask_buffer) >= 2 * (RTE_HASH_BUCKET_ENTRIES / 8),\n+\t\"The hitmask must be exactly wide enough to accept the whole hitmask if it is dense\");\n+\n+\t/* For match mask every bits indicates the match */\n+\tfor (unsigned int i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {\n+\t\t*hitmask_buffer |=\n+\t\t\t((sig == prim_bucket_sigs[i]) << i);\n+\t\t*hitmask_buffer |=\n+\t\t\t((sig == sec_bucket_sigs[i]) << i) << RTE_HASH_BUCKET_ENTRIES;\n+\t}\n+\n+}\ndiff --git a/lib/hash/arch/x86/compare_signatures.h b/lib/hash/arch/x86/compare_signatures.h\nnew file mode 100644\nindex 0000000000..7eec499e1f\n--- /dev/null\n+++ b/lib/hash/arch/x86/compare_signatures.h\n@@ -0,0 +1,53 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2010-2016 Intel Corporation\n+ * Copyright(c) 2018-2024 Arm Limited\n+ */\n+\n+/*\n+ * x86's version uses a sparsely packed hitmask buffer:\n+ * Every other bit is padding.\n+ */\n+\n+#include <inttypes.h>\n+#include <rte_common.h>\n+#include <rte_vect.h>\n+#include \"rte_cuckoo_hash.h\"\n+\n+#define DENSE_HASH_BULK_LOOKUP 0\n+\n+static inline void\n+compare_signatures_sparse(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,\n+\t\t\tconst struct rte_hash_bucket *prim_bkt,\n+\t\t\tconst struct rte_hash_bucket *sec_bkt,\n+\t\t\tuint16_t sig,\n+\t\t\tenum rte_hash_sig_compare_function sig_cmp_fn)\n+{\n+\t/* For match mask the first bit of every two bits indicates the match */\n+\tswitch (sig_cmp_fn) {\n+#if defined(__SSE2__) && RTE_HASH_BUCKET_ENTRIES <= 8\n+\tcase RTE_HASH_COMPARE_SSE:\n+\t\t/* Compare all signatures in the bucket */\n+\t\t*prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(\n+\t\t\t\t_mm_load_si128(\n+\t\t\t\t\t(__m128i const *)prim_bkt->sig_current),\n+\t\t\t\t_mm_set1_epi16(sig)));\n+\t\t/* Extract the even-index bits only */\n+\t\t*prim_hash_matches &= 0x5555;\n+\t\t/* Compare all signatures in the bucket */\n+\t\t*sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(\n+\t\t\t\t_mm_load_si128(\n+\t\t\t\t\t(__m128i const *)sec_bkt->sig_current),\n+\t\t\t\t_mm_set1_epi16(sig)));\n+\t\t/* Extract the even-index bits only */\n+\t\t*sec_hash_matches &= 0x5555;\n+\t\tbreak;\n+#endif /* defined(__SSE2__) */\n+\tdefault:\n+\t\tfor (unsigned int i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {\n+\t\t\t*prim_hash_matches |=\n+\t\t\t\t((sig == prim_bkt->sig_current[i]) << (i << 1));\n+\t\t\t*sec_hash_matches |=\n+\t\t\t\t((sig == sec_bkt->sig_current[i]) << (i << 1));\n+\t\t}\n+\t}\n+}\ndiff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c\nindex 9cf94645f6..0697743cdf 100644\n--- a/lib/hash/rte_cuckoo_hash.c\n+++ b/lib/hash/rte_cuckoo_hash.c\n@@ -33,6 +33,14 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);\n \n #include \"rte_cuckoo_hash.h\"\n \n+#if defined(__ARM_NEON)\n+#include \"arch/arm/compare_signatures.h\"\n+#elif defined(__SSE2__)\n+#include \"arch/x86/compare_signatures.h\"\n+#else\n+#include \"arch/common/compare_signatures.h\"\n+#endif\n+\n /* Mask of all flags supported by this version */\n #define RTE_HASH_EXTRA_FLAGS_MASK (RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT | \\\n \t\t\t\t   RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD | \\\n@@ -1857,63 +1865,6 @@ rte_hash_free_key_with_position(const struct rte_hash *h,\n \n }\n \n-static inline void\n-compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,\n-\t\t\tconst struct rte_hash_bucket *prim_bkt,\n-\t\t\tconst struct rte_hash_bucket *sec_bkt,\n-\t\t\tuint16_t sig,\n-\t\t\tenum rte_hash_sig_compare_function sig_cmp_fn)\n-{\n-\tunsigned int i;\n-\n-\t/* For match mask the first bit of every two bits indicates the match */\n-\tswitch (sig_cmp_fn) {\n-#if defined(__SSE2__)\n-\tcase RTE_HASH_COMPARE_SSE:\n-\t\t/* Compare all signatures in the bucket */\n-\t\t*prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(\n-\t\t\t\t_mm_load_si128(\n-\t\t\t\t\t(__m128i const *)prim_bkt->sig_current),\n-\t\t\t\t_mm_set1_epi16(sig)));\n-\t\t/* Extract the even-index bits only */\n-\t\t*prim_hash_matches &= 0x5555;\n-\t\t/* Compare all signatures in the bucket */\n-\t\t*sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(\n-\t\t\t\t_mm_load_si128(\n-\t\t\t\t\t(__m128i const *)sec_bkt->sig_current),\n-\t\t\t\t_mm_set1_epi16(sig)));\n-\t\t/* Extract the even-index bits only */\n-\t\t*sec_hash_matches &= 0x5555;\n-\t\tbreak;\n-#elif defined(__ARM_NEON)\n-\tcase RTE_HASH_COMPARE_NEON: {\n-\t\tuint16x8_t vmat, vsig, x;\n-\t\tint16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1};\n-\n-\t\tvsig = vld1q_dup_u16((uint16_t const *)&sig);\n-\t\t/* Compare all signatures in the primary bucket */\n-\t\tvmat = vceqq_u16(vsig,\n-\t\t\tvld1q_u16((uint16_t const *)prim_bkt->sig_current));\n-\t\tx = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);\n-\t\t*prim_hash_matches = (uint32_t)(vaddvq_u16(x));\n-\t\t/* Compare all signatures in the secondary bucket */\n-\t\tvmat = vceqq_u16(vsig,\n-\t\t\tvld1q_u16((uint16_t const *)sec_bkt->sig_current));\n-\t\tx = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);\n-\t\t*sec_hash_matches = (uint32_t)(vaddvq_u16(x));\n-\t\t}\n-\t\tbreak;\n-#endif\n-\tdefault:\n-\t\tfor (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {\n-\t\t\t*prim_hash_matches |=\n-\t\t\t\t((sig == prim_bkt->sig_current[i]) << (i << 1));\n-\t\t\t*sec_hash_matches |=\n-\t\t\t\t((sig == sec_bkt->sig_current[i]) << (i << 1));\n-\t\t}\n-\t}\n-}\n-\n static inline void\n __bulk_lookup_l(const struct rte_hash *h, const void **keys,\n \t\tconst struct rte_hash_bucket **primary_bkt,\n@@ -1924,22 +1875,44 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,\n \tuint64_t hits = 0;\n \tint32_t i;\n \tint32_t ret;\n-\tuint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n-\tuint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n \tstruct rte_hash_bucket *cur_bkt, *next_bkt;\n \n+#if DENSE_HASH_BULK_LOOKUP\n+\tconst int hitmask_padding = 0;\n+\tuint16_t hitmask_buffer[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n+\n+\tstatic_assert(sizeof(*hitmask_buffer)*8/2 == RTE_HASH_BUCKET_ENTRIES,\n+\t\"The hitmask must be exactly wide enough to accept the whole hitmask when it is dense\");\n+#else\n+\tconst int hitmask_padding = 1;\n+\tuint32_t prim_hitmask_buffer[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n+\tuint32_t sec_hitmask_buffer[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n+#endif\n+\n \t__hash_rw_reader_lock(h);\n \n \t/* Compare signatures and prefetch key slot of first hit */\n \tfor (i = 0; i < num_keys; i++) {\n-\t\tcompare_signatures(&prim_hitmask[i], &sec_hitmask[i],\n+#if DENSE_HASH_BULK_LOOKUP\n+\t\tuint16_t *hitmask = &hitmask_buffer[i];\n+\t\tcompare_signatures_dense(hitmask,\n+\t\t\tprimary_bkt[i]->sig_current,\n+\t\t\tsecondary_bkt[i]->sig_current,\n+\t\t\tsig[i], h->sig_cmp_fn);\n+\t\tconst unsigned int prim_hitmask = *(uint8_t *)(hitmask);\n+\t\tconst unsigned int sec_hitmask = *((uint8_t *)(hitmask)+1);\n+#else\n+\t\tcompare_signatures_sparse(&prim_hitmask_buffer[i], &sec_hitmask_buffer[i],\n \t\t\tprimary_bkt[i], secondary_bkt[i],\n \t\t\tsig[i], h->sig_cmp_fn);\n+\t\tconst unsigned int prim_hitmask = prim_hitmask_buffer[i];\n+\t\tconst unsigned int sec_hitmask = sec_hitmask_buffer[i];\n+#endif\n \n-\t\tif (prim_hitmask[i]) {\n+\t\tif (prim_hitmask) {\n \t\t\tuint32_t first_hit =\n-\t\t\t\t\trte_ctz32(prim_hitmask[i])\n-\t\t\t\t\t>> 1;\n+\t\t\t\t\trte_ctz32(prim_hitmask)\n+\t\t\t\t\t>> hitmask_padding;\n \t\t\tuint32_t key_idx =\n \t\t\t\tprimary_bkt[i]->key_idx[first_hit];\n \t\t\tconst struct rte_hash_key *key_slot =\n@@ -1950,10 +1923,10 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,\n \t\t\tcontinue;\n \t\t}\n \n-\t\tif (sec_hitmask[i]) {\n+\t\tif (sec_hitmask) {\n \t\t\tuint32_t first_hit =\n-\t\t\t\t\trte_ctz32(sec_hitmask[i])\n-\t\t\t\t\t>> 1;\n+\t\t\t\t\trte_ctz32(sec_hitmask)\n+\t\t\t\t\t>> hitmask_padding;\n \t\t\tuint32_t key_idx =\n \t\t\t\tsecondary_bkt[i]->key_idx[first_hit];\n \t\t\tconst struct rte_hash_key *key_slot =\n@@ -1967,10 +1940,18 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,\n \t/* Compare keys, first hits in primary first */\n \tfor (i = 0; i < num_keys; i++) {\n \t\tpositions[i] = -ENOENT;\n-\t\twhile (prim_hitmask[i]) {\n+#if DENSE_HASH_BULK_LOOKUP\n+\t\tuint16_t *hitmask = &hitmask_buffer[i];\n+\t\tunsigned int prim_hitmask = *(uint8_t *)(hitmask);\n+\t\tunsigned int sec_hitmask = *((uint8_t *)(hitmask)+1);\n+#else\n+\t\tunsigned int prim_hitmask = prim_hitmask_buffer[i];\n+\t\tunsigned int sec_hitmask = sec_hitmask_buffer[i];\n+#endif\n+\t\twhile (prim_hitmask) {\n \t\t\tuint32_t hit_index =\n-\t\t\t\t\trte_ctz32(prim_hitmask[i])\n-\t\t\t\t\t>> 1;\n+\t\t\t\t\trte_ctz32(prim_hitmask)\n+\t\t\t\t\t>> hitmask_padding;\n \t\t\tuint32_t key_idx =\n \t\t\t\tprimary_bkt[i]->key_idx[hit_index];\n \t\t\tconst struct rte_hash_key *key_slot =\n@@ -1992,13 +1973,13 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,\n \t\t\t\tpositions[i] = key_idx - 1;\n \t\t\t\tgoto next_key;\n \t\t\t}\n-\t\t\tprim_hitmask[i] &= ~(3ULL << (hit_index << 1));\n+\t\t\tprim_hitmask &= ~(1 << (hit_index << hitmask_padding));\n \t\t}\n \n-\t\twhile (sec_hitmask[i]) {\n+\t\twhile (sec_hitmask) {\n \t\t\tuint32_t hit_index =\n-\t\t\t\t\trte_ctz32(sec_hitmask[i])\n-\t\t\t\t\t>> 1;\n+\t\t\t\t\trte_ctz32(sec_hitmask)\n+\t\t\t\t\t>> hitmask_padding;\n \t\t\tuint32_t key_idx =\n \t\t\t\tsecondary_bkt[i]->key_idx[hit_index];\n \t\t\tconst struct rte_hash_key *key_slot =\n@@ -2021,7 +2002,7 @@ __bulk_lookup_l(const struct rte_hash *h, const void **keys,\n \t\t\t\tpositions[i] = key_idx - 1;\n \t\t\t\tgoto next_key;\n \t\t\t}\n-\t\t\tsec_hitmask[i] &= ~(3ULL << (hit_index << 1));\n+\t\t\tsec_hitmask &= ~(1 << (hit_index << hitmask_padding));\n \t\t}\n next_key:\n \t\tcontinue;\n@@ -2071,11 +2052,20 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,\n \tuint64_t hits = 0;\n \tint32_t i;\n \tint32_t ret;\n-\tuint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n-\tuint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n \tstruct rte_hash_bucket *cur_bkt, *next_bkt;\n \tuint32_t cnt_b, cnt_a;\n \n+#if DENSE_HASH_BULK_LOOKUP\n+\tconst int hitmask_padding = 0;\n+\tuint16_t hitmask_buffer[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n+\tstatic_assert(sizeof(*hitmask_buffer)*8/2 == RTE_HASH_BUCKET_ENTRIES,\n+\t\"The hitmask must be exactly wide enough to accept the whole hitmask chen it is dense\");\n+#else\n+\tconst int hitmask_padding = 1;\n+\tuint32_t prim_hitmask_buffer[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n+\tuint32_t sec_hitmask_buffer[RTE_HASH_LOOKUP_BULK_MAX] = {0};\n+#endif\n+\n \tfor (i = 0; i < num_keys; i++)\n \t\tpositions[i] = -ENOENT;\n \n@@ -2089,14 +2079,26 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,\n \n \t\t/* Compare signatures and prefetch key slot of first hit */\n \t\tfor (i = 0; i < num_keys; i++) {\n-\t\t\tcompare_signatures(&prim_hitmask[i], &sec_hitmask[i],\n+#if DENSE_HASH_BULK_LOOKUP\n+\t\t\tuint16_t *hitmask = &hitmask_buffer[i];\n+\t\t\tcompare_signatures_dense(hitmask,\n+\t\t\t\tprimary_bkt[i]->sig_current,\n+\t\t\t\tsecondary_bkt[i]->sig_current,\n+\t\t\t\tsig[i], h->sig_cmp_fn);\n+\t\t\tconst unsigned int prim_hitmask = *(uint8_t *)(hitmask);\n+\t\t\tconst unsigned int sec_hitmask = *((uint8_t *)(hitmask)+1);\n+#else\n+\t\t\tcompare_signatures_sparse(&prim_hitmask_buffer[i], &sec_hitmask_buffer[i],\n \t\t\t\tprimary_bkt[i], secondary_bkt[i],\n \t\t\t\tsig[i], h->sig_cmp_fn);\n+\t\t\tconst unsigned int prim_hitmask = prim_hitmask_buffer[i];\n+\t\t\tconst unsigned int sec_hitmask = sec_hitmask_buffer[i];\n+#endif\n \n-\t\t\tif (prim_hitmask[i]) {\n+\t\t\tif (prim_hitmask) {\n \t\t\t\tuint32_t first_hit =\n-\t\t\t\t\t\trte_ctz32(prim_hitmask[i])\n-\t\t\t\t\t\t>> 1;\n+\t\t\t\t\t\trte_ctz32(prim_hitmask)\n+\t\t\t\t\t\t>> hitmask_padding;\n \t\t\t\tuint32_t key_idx =\n \t\t\t\t\tprimary_bkt[i]->key_idx[first_hit];\n \t\t\t\tconst struct rte_hash_key *key_slot =\n@@ -2107,10 +2109,10 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,\n \t\t\t\tcontinue;\n \t\t\t}\n \n-\t\t\tif (sec_hitmask[i]) {\n+\t\t\tif (sec_hitmask) {\n \t\t\t\tuint32_t first_hit =\n-\t\t\t\t\t\trte_ctz32(sec_hitmask[i])\n-\t\t\t\t\t\t>> 1;\n+\t\t\t\t\t\trte_ctz32(sec_hitmask)\n+\t\t\t\t\t\t>> hitmask_padding;\n \t\t\t\tuint32_t key_idx =\n \t\t\t\t\tsecondary_bkt[i]->key_idx[first_hit];\n \t\t\t\tconst struct rte_hash_key *key_slot =\n@@ -2123,10 +2125,18 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,\n \n \t\t/* Compare keys, first hits in primary first */\n \t\tfor (i = 0; i < num_keys; i++) {\n-\t\t\twhile (prim_hitmask[i]) {\n+#if DENSE_HASH_BULK_LOOKUP\n+\t\t\tuint16_t *hitmask = &hitmask_buffer[i];\n+\t\t\tunsigned int prim_hitmask = *(uint8_t *)(hitmask);\n+\t\t\tunsigned int sec_hitmask = *((uint8_t *)(hitmask)+1);\n+#else\n+\t\t\tunsigned int prim_hitmask = prim_hitmask_buffer[i];\n+\t\t\tunsigned int sec_hitmask = sec_hitmask_buffer[i];\n+#endif\n+\t\t\twhile (prim_hitmask) {\n \t\t\t\tuint32_t hit_index =\n-\t\t\t\t\t\trte_ctz32(prim_hitmask[i])\n-\t\t\t\t\t\t>> 1;\n+\t\t\t\t\t\trte_ctz32(prim_hitmask)\n+\t\t\t\t\t\t>> hitmask_padding;\n \t\t\t\tuint32_t key_idx =\n \t\t\t\trte_atomic_load_explicit(\n \t\t\t\t\t&primary_bkt[i]->key_idx[hit_index],\n@@ -2152,13 +2162,13 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,\n \t\t\t\t\tpositions[i] = key_idx - 1;\n \t\t\t\t\tgoto next_key;\n \t\t\t\t}\n-\t\t\t\tprim_hitmask[i] &= ~(3ULL << (hit_index << 1));\n+\t\t\t\tprim_hitmask &= ~(1 << (hit_index << hitmask_padding));\n \t\t\t}\n \n-\t\t\twhile (sec_hitmask[i]) {\n+\t\t\twhile (sec_hitmask) {\n \t\t\t\tuint32_t hit_index =\n-\t\t\t\t\t\trte_ctz32(sec_hitmask[i])\n-\t\t\t\t\t\t>> 1;\n+\t\t\t\t\t\trte_ctz32(sec_hitmask)\n+\t\t\t\t\t\t>> hitmask_padding;\n \t\t\t\tuint32_t key_idx =\n \t\t\t\trte_atomic_load_explicit(\n \t\t\t\t\t&secondary_bkt[i]->key_idx[hit_index],\n@@ -2185,7 +2195,7 @@ __bulk_lookup_lf(const struct rte_hash *h, const void **keys,\n \t\t\t\t\tpositions[i] = key_idx - 1;\n \t\t\t\t\tgoto next_key;\n \t\t\t\t}\n-\t\t\t\tsec_hitmask[i] &= ~(3ULL << (hit_index << 1));\n+\t\t\t\tsec_hitmask &= ~(1 << (hit_index << hitmask_padding));\n \t\t\t}\n next_key:\n \t\t\tcontinue;\n",
    "prefixes": [
        "v8",
        "1/4"
    ]
}