get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/28300/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 28300,
    "url": "https://patches.dpdk.org/api/patches/28300/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/1504315481-12854-5-git-send-email-yipeng1.wang@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1504315481-12854-5-git-send-email-yipeng1.wang@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1504315481-12854-5-git-send-email-yipeng1.wang@intel.com",
    "date": "2017-09-02T01:24:38",
    "name": "[dpdk-dev,v2,4/7] member: add AVX for HT mode",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "bbc5bfd93e2de79f94ad71d804ed3c95206f3f52",
    "submitter": {
        "id": 754,
        "url": "https://patches.dpdk.org/api/people/754/?format=api",
        "name": "Wang, Yipeng1",
        "email": "yipeng1.wang@intel.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/1504315481-12854-5-git-send-email-yipeng1.wang@intel.com/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/28300/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/28300/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id 212007CAC;\n\tSat,  2 Sep 2017 03:25:17 +0200 (CEST)",
            "from mga06.intel.com (mga06.intel.com [134.134.136.31])\n\tby dpdk.org (Postfix) with ESMTP id 137E17CBF\n\tfor <dev@dpdk.org>; Sat,  2 Sep 2017 03:25:14 +0200 (CEST)",
            "from fmsmga006.fm.intel.com ([10.253.24.20])\n\tby orsmga104.jf.intel.com with ESMTP; 01 Sep 2017 18:25:14 -0700",
            "from bdw-yipeng.jf.intel.com ([10.54.81.30])\n\tby fmsmga006.fm.intel.com with ESMTP; 01 Sep 2017 18:25:14 -0700"
        ],
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.41,459,1498546800\"; d=\"scan'208\";a=\"147303400\"",
        "From": "Yipeng Wang <yipeng1.wang@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "stephen@networkplumber.org, luca.boccassi@gmail.com,\n\tcharlie.tai@intel.com, sameh.gobriel@intel.com, ren.wang@intel.com,\n\tpablo.de.lara.guarch@intel.com, yipeng1.wang@intel.com",
        "Date": "Fri,  1 Sep 2017 18:24:38 -0700",
        "Message-Id": "<1504315481-12854-5-git-send-email-yipeng1.wang@intel.com>",
        "X-Mailer": "git-send-email 2.7.4",
        "In-Reply-To": "<1504315481-12854-1-git-send-email-yipeng1.wang@intel.com>",
        "References": "<1503361193-36699-1-git-send-email-yipeng1.wang@intel.com>\n\t<1504315481-12854-1-git-send-email-yipeng1.wang@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v2 4/7] member: add AVX for HT mode",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "For key search, the signatures of all entries are compared against\nthe signature of the key that is being looked up. Since all\nsignatures are contguously put in a bucket, they can be compared\nwith vector instructions (AVX2), achieving higher lookup performance.\n\nThis patch adds AVX2 implementation in a separate header file.\n\nSigned-off-by: Yipeng Wang <yipeng1.wang@intel.com>\n---\n lib/librte_member/rte_member_ht.c  | 143 ++++++++++++++++++++++++++++---------\n lib/librte_member/rte_member_x86.h | 111 ++++++++++++++++++++++++++++\n 2 files changed, 222 insertions(+), 32 deletions(-)\n create mode 100644 lib/librte_member/rte_member_x86.h",
    "diff": "diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c\nindex b2ae6d0..15e2534 100644\n--- a/lib/librte_member/rte_member_ht.c\n+++ b/lib/librte_member/rte_member_ht.c\n@@ -40,6 +40,10 @@\n #include \"rte_member.h\"\n #include \"rte_member_ht.h\"\n \n+#if defined(RTE_ARCH_X86)\n+#include \"rte_member_x86.h\"\n+#endif\n+\n \n static inline int\n insert_overwrite_search(uint32_t bucket, SIG_TYPE tmp_sig,\n@@ -135,6 +139,13 @@ rte_member_create_ht(struct rte_member_setsum *ss,\n \t\tfor (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)\n \t\t\tbuckets[i].sets[j] = RTE_MEMBER_NO_MATCH;\n \t}\n+#if defined(RTE_ARCH_X86)\n+\tif (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&\n+\t\t\tRTE_MEMBER_BUCKET_ENTRIES == 16)\n+\t\tss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;\n+\telse\n+#endif\n+\t\tss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR;\n \n \n \tRTE_LOG(DEBUG, MEMBER, \"Hash table based filter created, \"\n@@ -174,11 +185,23 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss,\n \t*set_id = RTE_MEMBER_NO_MATCH;\n \tget_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);\n \n-\tif (search_bucket_single(prim_bucket, tmp_sig, buckets,\n-\t\t\tset_id) ||\n-\t\t\tsearch_bucket_single(sec_bucket, tmp_sig,\n-\t\t\t\tbuckets, set_id))\n-\t\treturn 1;\n+\tswitch (ss->sig_cmp_fn) {\n+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)\n+\tcase RTE_MEMBER_COMPARE_AVX2:\n+\t\tif (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,\n+\t\t\t\tset_id) ||\n+\t\t\t\tsearch_bucket_single_avx(sec_bucket, tmp_sig,\n+\t\t\t\t\tbuckets, set_id))\n+\t\t\treturn 1;\n+\t\tbreak;\n+#endif\n+\tdefault:\n+\t\tif (search_bucket_single(prim_bucket, tmp_sig, buckets,\n+\t\t\t\tset_id) ||\n+\t\t\t\tsearch_bucket_single(sec_bucket, tmp_sig,\n+\t\t\t\t\tbuckets, set_id))\n+\t\t\treturn 1;\n+\t}\n \n \treturn 0;\n }\n@@ -203,13 +226,27 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,\n \t}\n \n \tfor (i = 0; i < num_keys; i++) {\n-\t\tif (search_bucket_single(prim_buckets[i], tmp_sig[i],\n-\t\t\t\tbuckets, &set_id[i]) ||\n-\t\t\t\tsearch_bucket_single(sec_buckets[i],\n-\t\t\t\ttmp_sig[i], buckets, &set_id[i]))\n-\t\t\tret++;\n-\t\telse\n-\t\t\tset_id[i] = RTE_MEMBER_NO_MATCH;\n+\t\tswitch (ss->sig_cmp_fn) {\n+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)\n+\t\tcase RTE_MEMBER_COMPARE_AVX2:\n+\t\t\tif (search_bucket_single_avx(prim_buckets[i],\n+\t\t\t\t\ttmp_sig[i], buckets, &set_id[i]) ||\n+\t\t\t\tsearch_bucket_single_avx(sec_buckets[i],\n+\t\t\t\t\ttmp_sig[i], buckets, &set_id[i]))\n+\t\t\t\tret++;\n+\t\t\telse\n+\t\t\t\tset_id[i] = RTE_MEMBER_NO_MATCH;\n+\t\t\tbreak;\n+#endif\n+\t\tdefault:\n+\t\t\tif (search_bucket_single(prim_buckets[i], tmp_sig[i],\n+\t\t\t\t\tbuckets, &set_id[i]) ||\n+\t\t\t\t\tsearch_bucket_single(sec_buckets[i],\n+\t\t\t\t\ttmp_sig[i], buckets, &set_id[i]))\n+\t\t\t\tret++;\n+\t\t\telse\n+\t\t\t\tset_id[i] = RTE_MEMBER_NO_MATCH;\n+\t\t}\n \t}\n \treturn ret;\n }\n@@ -227,12 +264,24 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,\n \n \tget_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);\n \n-\tsearch_bucket_multi(prim_bucket, tmp_sig, buckets, &ret,\n-\t\t\t match_per_key, set_id);\n-\tif (ret < match_per_key)\n-\t\tsearch_bucket_multi(sec_bucket, tmp_sig,\n-\t\t\tbuckets, &ret, match_per_key, set_id);\n-\treturn ret;\n+\tswitch (ss->sig_cmp_fn) {\n+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)\n+\tcase RTE_MEMBER_COMPARE_AVX2:\n+\t\tsearch_bucket_multi_avx(prim_bucket, tmp_sig, buckets,\n+\t\t\t&ret, match_per_key, set_id);\n+\t\tif (ret < match_per_key)\n+\t\t\tsearch_bucket_multi_avx(sec_bucket, tmp_sig,\n+\t\t\t\tbuckets, &ret, match_per_key, set_id);\n+\t\treturn ret;\n+#endif\n+\tdefault:\n+\t\tsearch_bucket_multi(prim_bucket, tmp_sig, buckets, &ret,\n+\t\t\t\t match_per_key, set_id);\n+\t\tif (ret < match_per_key)\n+\t\t\tsearch_bucket_multi(sec_bucket, tmp_sig,\n+\t\t\t\tbuckets, &ret, match_per_key, set_id);\n+\t\treturn ret;\n+\t}\n }\n \n \n@@ -259,16 +308,34 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,\n \tfor (i = 0; i < num_keys; i++) {\n \t\tmatch_cnt_t = 0;\n \n-\t\tsearch_bucket_multi(prim_buckets[i], tmp_sig[i],\n-\t\t\tbuckets, &match_cnt_t, match_per_key,\n-\t\t\t&set_ids[i*match_per_key]);\n-\t\tif (match_cnt_t < match_per_key)\n-\t\t\tsearch_bucket_multi(sec_buckets[i], tmp_sig[i],\n+\t\tswitch (ss->sig_cmp_fn) {\n+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)\n+\t\tcase RTE_MEMBER_COMPARE_AVX2:\n+\t\t\tsearch_bucket_multi_avx(prim_buckets[i], tmp_sig[i],\n \t\t\t\tbuckets, &match_cnt_t, match_per_key,\n \t\t\t\t&set_ids[i*match_per_key]);\n-\t\tmatch_count[i] = match_cnt_t;\n-\t\tif (match_cnt_t != 0)\n-\t\t\tret++;\n+\t\t\tif (match_cnt_t < match_per_key)\n+\t\t\t\tsearch_bucket_multi_avx(sec_buckets[i],\n+\t\t\t\t\ttmp_sig[i], buckets, &match_cnt_t,\n+\t\t\t\t\tmatch_per_key,\n+\t\t\t\t\t&set_ids[i*match_per_key]);\n+\t\t\tmatch_count[i] = match_cnt_t;\n+\t\t\tif (match_cnt_t != 0)\n+\t\t\t\tret++;\n+\t\t\tbreak;\n+#endif\n+\t\tdefault:\n+\t\t\tsearch_bucket_multi(prim_buckets[i], tmp_sig[i],\n+\t\t\t\tbuckets, &match_cnt_t, match_per_key,\n+\t\t\t\t&set_ids[i*match_per_key]);\n+\t\t\tif (match_cnt_t < match_per_key)\n+\t\t\t\tsearch_bucket_multi(sec_buckets[i], tmp_sig[i],\n+\t\t\t\t\tbuckets, &match_cnt_t, match_per_key,\n+\t\t\t\t\t&set_ids[i*match_per_key]);\n+\t\t\tmatch_count[i] = match_cnt_t;\n+\t\t\tif (match_cnt_t != 0)\n+\t\t\t\tret++;\n+\t\t}\n \t}\n \treturn ret;\n }\n@@ -300,12 +367,24 @@ try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,\n \n static inline int\n try_overwrite(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,\n-\t\tSIG_TYPE sig, MEMBER_SET_TYPE set_id)\n+\t\tSIG_TYPE sig, MEMBER_SET_TYPE set_id,\n+\t\tenum rte_member_sig_compare_function cmp_fn)\n {\n-\tif (insert_overwrite_search(prim, sig, buckets, set_id) ||\n-\t\t\tinsert_overwrite_search(sec, sig, buckets,\n-\t\t\t\tset_id))\n-\t\treturn 0;\n+\tswitch (cmp_fn) {\n+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)\n+\tcase RTE_MEMBER_COMPARE_AVX2:\n+\t\tif (insert_overwrite_search_avx(prim, sig, buckets, set_id) ||\n+\t\t\t\tinsert_overwrite_search_avx(sec, sig, buckets,\n+\t\t\t\t\tset_id))\n+\t\t\treturn 0;\n+\t\tbreak;\n+#endif\n+\tdefault:\n+\t\tif (insert_overwrite_search(prim, sig, buckets, set_id) ||\n+\t\t\t\tinsert_overwrite_search(sec, sig, buckets,\n+\t\t\t\t\tset_id))\n+\t\t\treturn 0;\n+\t}\n \treturn -1;\n }\n \n@@ -411,7 +490,7 @@ rte_member_add_ht(const struct rte_member_setsum *ss,\n \t/* if it is cache based filter, we try overwriting existing entry */\n \tif (ss->cache) {\n \t\tret = try_overwrite(buckets, prim_bucket, sec_bucket, tmp_sig,\n-\t\t\t\t\tset_id);\n+\t\t\t\t\tset_id, ss->sig_cmp_fn);\n \t\tif (ret != -1)\n \t\t\treturn ret;\n \t}\ndiff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h\nnew file mode 100644\nindex 0000000..c55f128\n--- /dev/null\n+++ b/lib/librte_member/rte_member_x86.h\n@@ -0,0 +1,111 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#ifndef _RTE_MEMBER_X86_H_\n+#define _RTE_MEMBER_X86_H_\n+\n+#ifdef __cplusplus\n+extern \"C\" {\n+#endif\n+\n+#include <x86intrin.h>\n+\n+\n+#if defined(RTE_MACHINE_CPUFLAG_AVX2)\n+\n+\n+static inline int\n+insert_overwrite_search_avx(uint32_t bucket, SIG_TYPE tmp_sig,\n+\t\tstruct member_ht_bucket *buckets,\n+\t\tMEMBER_SET_TYPE set_id)\n+{\n+\tuint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(\n+\t\t_mm256_load_si256((__m256i const *)buckets[bucket].sigs),\n+\t\t_mm256_set1_epi16(tmp_sig)));\n+\tif (hitmask) {\n+\t\tuint32_t hit_idx = __builtin_ctzl(hitmask) / 2;\n+\t\tbuckets[bucket].sets[hit_idx] = set_id;\n+\t\treturn 1;\n+\t}\n+\treturn 0;\n+}\n+\n+\n+static inline int\n+search_bucket_single_avx(uint32_t bucket, SIG_TYPE tmp_sig,\n+\t\tstruct member_ht_bucket *buckets,\n+\t\tMEMBER_SET_TYPE *set_id)\n+{\n+\tuint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(\n+\t\t_mm256_load_si256((__m256i const *)buckets[bucket].sigs),\n+\t\t_mm256_set1_epi16(tmp_sig)));\n+\twhile (hitmask) {\n+\t\tuint32_t hit_idx = __builtin_ctzl(hitmask) / 2;\n+\t\tif (buckets[bucket].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {\n+\t\t\t*set_id = buckets[bucket].sets[hit_idx];\n+\t\t\treturn 1;\n+\t\t}\n+\t\thitmask &= ~(3U << (hit_idx) * 2);\n+\t}\n+\treturn 0;\n+}\n+\n+static inline void\n+search_bucket_multi_avx(uint32_t bucket, SIG_TYPE tmp_sig,\n+\t\t\t\tstruct member_ht_bucket *buckets,\n+\t\t\t\tuint32_t *counter,\n+\t\t\t\tuint32_t match_per_key,\n+\t\t\t\tMEMBER_SET_TYPE *set_id)\n+{\n+\tuint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(\n+\t\t_mm256_load_si256((__m256i const *)buckets[bucket].sigs),\n+\t\t_mm256_set1_epi16(tmp_sig)));\n+\twhile (hitmask) {\n+\t\tuint32_t hit_idx = __builtin_ctzl(hitmask) / 2;\n+\t\tif (buckets[bucket].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {\n+\t\t\tset_id[*counter] = buckets[bucket].sets[hit_idx];\n+\t\t\t(*counter)++;\n+\t\t\tif (*counter >= match_per_key)\n+\t\t\t\treturn;\n+\t\t}\n+\t\thitmask &= ~(3U << (hit_idx) * 2);\n+\t}\n+}\n+#endif\n+\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif /* _RTE_MEMBER_X86_H_ */\n",
    "prefixes": [
        "dpdk-dev",
        "v2",
        "4/7"
    ]
}