get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/80186/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 80186,
    "url": "http://patches.dpdk.org/api/patches/80186/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20201009135045.8505-3-mairtin.oloingsigh@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20201009135045.8505-3-mairtin.oloingsigh@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20201009135045.8505-3-mairtin.oloingsigh@intel.com",
    "date": "2020-10-09T13:50:45",
    "name": "[v5,2/2] net: add support for AVX512/VPCLMULQDQ based CRC",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "a86f83c4138b41b8e700a974bd843016cc05b6bc",
    "submitter": {
        "id": 1605,
        "url": "http://patches.dpdk.org/api/people/1605/?format=api",
        "name": "Mairtin o Loingsigh",
        "email": "mairtin.oloingsigh@intel.com"
    },
    "delegate": {
        "id": 24651,
        "url": "http://patches.dpdk.org/api/users/24651/?format=api",
        "username": "dmarchand",
        "first_name": "David",
        "last_name": "Marchand",
        "email": "david.marchand@redhat.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20201009135045.8505-3-mairtin.oloingsigh@intel.com/mbox/",
    "series": [
        {
            "id": 12830,
            "url": "http://patches.dpdk.org/api/series/12830/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=12830",
            "date": "2020-10-09T13:50:43",
            "name": "net: add CRC run-time checks and AVX512/VPCLMULQDQ based CRC",
            "version": 5,
            "mbox": "http://patches.dpdk.org/series/12830/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/80186/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/80186/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 0CD8EA04BC;\n\tFri,  9 Oct 2020 15:55:10 +0200 (CEST)",
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id E3EC41D6B5;\n\tFri,  9 Oct 2020 15:52:30 +0200 (CEST)",
            "from mga12.intel.com (mga12.intel.com [192.55.52.136])\n by dpdk.org (Postfix) with ESMTP id DC4891D694\n for <dev@dpdk.org>; Fri,  9 Oct 2020 15:52:26 +0200 (CEST)",
            "from fmsmga007.fm.intel.com ([10.253.24.52])\n by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 09 Oct 2020 06:52:24 -0700",
            "from irvmail001.ir.intel.com ([163.33.26.43])\n by fmsmga007.fm.intel.com with ESMTP; 09 Oct 2020 06:52:23 -0700",
            "from sivswdev10.ir.intel.com (sivswdev10.ir.intel.com\n [10.237.217.4])\n by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id\n 099DqMGV014780; Fri, 9 Oct 2020 14:52:22 +0100",
            "by sivswdev10.ir.intel.com (Postfix, from userid 28780)\n id 8645C1800911; Fri,  9 Oct 2020 14:52:22 +0100 (IST)"
        ],
        "IronPort-SDR": [
            "\n OPCCQRsyESaX56FsD+VyndrumNFYMHvNY9QHYZPkz83UFpa9CEFGrbRVta73l7vQiS82KES5jE\n CYOkLCRsFQUA==",
            "\n cd9zdibCY26OH2MLqsRWB1VjzzqLSuigCpkz9hTdWK7LvjoATF1/AxjYxiGnSHkhhaWRDB9Vh3\n n9WfItN3YEKQ=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6000,8403,9768\"; a=\"144806194\"",
            "E=Sophos;i=\"5.77,355,1596524400\"; d=\"scan'208\";a=\"144806194\"",
            "E=Sophos;i=\"5.77,355,1596524400\"; d=\"scan'208\";a=\"298275178\""
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "From": "Mairtin o Loingsigh <mairtin.oloingsigh@intel.com>",
        "To": "jasvinder.singh@intel.com, bruce.richardson@intel.com,\n pablo.de.lara.guarch@intel.com, konstantin.ananyev@intel.com",
        "Cc": "dev@dpdk.org, brendan.ryan@intel.com, mairtin.oloingsigh@intel.com,\n david.coyle@intel.com",
        "Date": "Fri,  9 Oct 2020 14:50:45 +0100",
        "Message-Id": "<20201009135045.8505-3-mairtin.oloingsigh@intel.com>",
        "X-Mailer": "git-send-email 2.12.3",
        "In-Reply-To": "<20201009135045.8505-1-mairtin.oloingsigh@intel.com>",
        "References": "<20201006162319.7981-1-mairtin.oloingsigh@intel.com>\n <20201009135045.8505-1-mairtin.oloingsigh@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v5 2/2] net: add support for AVX512/VPCLMULQDQ\n\tbased CRC",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "This patch enables the optimized calculation of CRC32-Ethernet and\nCRC16-CCITT using the AVX512 and VPCLMULQDQ instruction sets. This CRC\nimplementation is built if the compiler supports the required instruction\nsets. It is selected at run-time if the host CPU, again, supports the\nrequired instruction sets.\n\nSigned-off-by: Mairtin o Loingsigh <mairtin.oloingsigh@intel.com>\nSigned-off-by: David Coyle <david.coyle@intel.com>\nAcked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>\n---\n app/test/test_crc.c                    |  11 +-\n config/x86/meson.build                 |   6 +-\n doc/guides/rel_notes/release_20_11.rst |   2 +\n lib/librte_net/meson.build             |  55 +++++\n lib/librte_net/net_crc.h               |  11 +\n lib/librte_net/net_crc_avx512.c        | 423 +++++++++++++++++++++++++++++++++\n lib/librte_net/rte_net_crc.c           |  46 ++++\n lib/librte_net/rte_net_crc.h           |   4 +-\n 8 files changed, 554 insertions(+), 4 deletions(-)\n create mode 100644 lib/librte_net/net_crc_avx512.c",
    "diff": "diff --git a/app/test/test_crc.c b/app/test/test_crc.c\nindex f8a74e04e..bf1d34435 100644\n--- a/app/test/test_crc.c\n+++ b/app/test/test_crc.c\n@@ -1,5 +1,5 @@\n /* SPDX-License-Identifier: BSD-3-Clause\n- * Copyright(c) 2017 Intel Corporation\n+ * Copyright(c) 2017-2020 Intel Corporation\n  */\n \n #include \"test.h\"\n@@ -149,6 +149,15 @@ test_crc(void)\n \t\treturn ret;\n \t}\n \n+\t/* set CRC avx512 mode */\n+\trte_net_crc_set_alg(RTE_NET_CRC_AVX512);\n+\n+\tret = test_crc_calc();\n+\tif (ret < 0) {\n+\t\tprintf(\"test crc (x86_64 AVX512): failed (%d)\\n\", ret);\n+\t\treturn ret;\n+\t}\n+\n \t/* set CRC neon mode */\n \trte_net_crc_set_alg(RTE_NET_CRC_NEON);\n \ndiff --git a/config/x86/meson.build b/config/x86/meson.build\nindex fea4d5403..172b72b72 100644\n--- a/config/x86/meson.build\n+++ b/config/x86/meson.build\n@@ -1,5 +1,5 @@\n # SPDX-License-Identifier: BSD-3-Clause\n-# Copyright(c) 2017-2019 Intel Corporation\n+# Copyright(c) 2017-2020 Intel Corporation\n \n # get binutils version for the workaround of Bug 97\n if not is_windows\n@@ -23,7 +23,9 @@ endforeach\n \n optional_flags = ['AES', 'PCLMUL',\n \t\t'AVX', 'AVX2', 'AVX512F',\n-\t\t'RDRND', 'RDSEED']\n+\t\t'RDRND', 'RDSEED',\n+\t\t'AVX512BW', 'AVX512DQ',\n+\t\t'AVX512VL', 'VPCLMULQDQ']\n foreach f:optional_flags\n \tif cc.get_define('__@0@__'.format(f), args: machine_args) == '1'\n \t\tif f == 'PCLMUL' # special case flags with different defines\ndiff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst\nindex b77297f7e..5eda680d5 100644\n--- a/doc/guides/rel_notes/release_20_11.rst\n+++ b/doc/guides/rel_notes/release_20_11.rst\n@@ -58,6 +58,8 @@ New Features\n * **Updated CRC modules of rte_net library.**\n \n   * Added run-time selection of the optimal architecture-specific CRC path.\n+  * Added optimized implementations of CRC32-Ethernet and CRC16-CCITT\n+    using the AVX512 and VPCLMULQDQ instruction sets.\n \n * **Updated Broadcom bnxt driver.**\n \ndiff --git a/lib/librte_net/meson.build b/lib/librte_net/meson.build\nindex fa439b9e5..6c96b361a 100644\n--- a/lib/librte_net/meson.build\n+++ b/lib/librte_net/meson.build\n@@ -24,18 +24,62 @@ deps += ['mbuf']\n if dpdk_conf.has('RTE_ARCH_X86_64')\n \tnet_crc_sse42_cpu_support = (\n \t\tcc.get_define('__PCLMUL__', args: machine_args) != '')\n+\tnet_crc_avx512_cpu_support = (\n+\t\tcc.get_define('__AVX512F__', args: machine_args) != '' and\n+\t\tcc.get_define('__AVX512BW__', args: machine_args) != '' and\n+\t\tcc.get_define('__AVX512DQ__', args: machine_args) != '' and\n+\t\tcc.get_define('__AVX512VL__', args: machine_args) != '' and\n+\t\tcc.get_define('__VPCLMULQDQ__', args: machine_args) != '')\n+\n \tnet_crc_sse42_cc_support = (\n \t\tcc.has_argument('-mpclmul') and cc.has_argument('-maes'))\n+\tnet_crc_avx512_cc_support = (\n+\t\tnot machine_args.contains('-mno-avx512f') and\n+\t\tcc.has_argument('-mavx512f') and\n+\t\tcc.has_argument('-mavx512bw') and\n+\t\tcc.has_argument('-mavx512dq') and\n+\t\tcc.has_argument('-mavx512vl') and\n+\t\tcc.has_argument('-mvpclmulqdq') and\n+\t\tcc.has_argument('-mavx2') and\n+\t\tcc.has_argument('-mavx'))\n \n \tbuild_static_net_crc_sse42_lib = 0\n+\tbuild_static_net_crc_avx512_lib = 0\n \n \tif net_crc_sse42_cpu_support == true\n \t\tsources += files('net_crc_sse.c')\n \t\tcflags += ['-DCC_X86_64_SSE42_PCLMULQDQ_SUPPORT']\n+\t\tif net_crc_avx512_cpu_support == true\n+\t\t\tsources += files('net_crc_avx512.c')\n+\t\t\tcflags += ['-DCC_X86_64_AVX512_VPCLMULQDQ_SUPPORT']\n+\t\telif net_crc_avx512_cc_support == true\n+\t\t\tbuild_static_net_crc_avx512_lib = 1\n+\t\t\tnet_crc_avx512_lib_cflags = ['-mavx512f',\n+\t\t\t\t\t\t\t'-mavx512bw',\n+\t\t\t\t\t\t\t'-mavx512dq',\n+\t\t\t\t\t\t\t'-mavx512vl',\n+\t\t\t\t\t\t\t'-mvpclmulqdq',\n+\t\t\t\t\t\t\t'-mavx2',\n+\t\t\t\t\t\t\t'-mavx']\n+\t\t\tcflags += ['-DCC_X86_64_AVX512_VPCLMULQDQ_SUPPORT']\n+\t\tendif\n \telif net_crc_sse42_cc_support == true\n \t\tbuild_static_net_crc_sse42_lib = 1\n \t\tnet_crc_sse42_lib_cflags = ['-mpclmul', '-maes']\n \t\tcflags += ['-DCC_X86_64_SSE42_PCLMULQDQ_SUPPORT']\n+\t\tif net_crc_avx512_cc_support == true\n+\t\t\tbuild_static_net_crc_avx512_lib = 1\n+\t\t\tnet_crc_avx512_lib_cflags = ['-mpclmul',\n+\t\t\t\t\t\t\t'-maes',\n+\t\t\t\t\t\t\t'-mavx512f',\n+\t\t\t\t\t\t\t'-mavx512bw',\n+\t\t\t\t\t\t\t'-mavx512dq',\n+\t\t\t\t\t\t\t'-mavx512vl',\n+\t\t\t\t\t\t\t'-mvpclmulqdq',\n+\t\t\t\t\t\t\t'-mavx2',\n+\t\t\t\t\t\t\t'-mavx']\n+\t\t\tcflags += ['-DCC_X86_64_AVX512_VPCLMULQDQ_SUPPORT']\n+\t\tendif\n \tendif\n \n \tif build_static_net_crc_sse42_lib == 1\n@@ -47,6 +91,17 @@ if dpdk_conf.has('RTE_ARCH_X86_64')\n \t\t\t\t\t\tnet_crc_sse42_lib_cflags])\n \t\tobjs += net_crc_sse42_lib.extract_objects('net_crc_sse.c')\n \tendif\n+\n+\tif build_static_net_crc_avx512_lib == 1\n+\t\tnet_crc_avx512_lib = static_library(\n+\t\t\t\t\t'net_crc_avx512_lib',\n+\t\t\t\t\t'net_crc_avx512.c',\n+\t\t\t\t\tdependencies: static_rte_eal,\n+\t\t\t\t\tc_args: [cflags,\n+\t\t\t\t\t\tnet_crc_avx512_lib_cflags])\n+\t\tobjs += net_crc_avx512_lib.extract_objects('net_crc_avx512.c')\n+\tendif\n+\n elif (dpdk_conf.has('RTE_ARCH_ARM64') and\n \t\tcc.get_define('__ARM_FEATURE_CRYPTO', args: machine_args) != '')\n \tsources += files('net_crc_neon.c')\ndiff --git a/lib/librte_net/net_crc.h b/lib/librte_net/net_crc.h\nindex a1578a56c..7a74d5406 100644\n--- a/lib/librte_net/net_crc.h\n+++ b/lib/librte_net/net_crc.h\n@@ -20,6 +20,17 @@ rte_crc16_ccitt_sse42_handler(const uint8_t *data, uint32_t data_len);\n uint32_t\n rte_crc32_eth_sse42_handler(const uint8_t *data, uint32_t data_len);\n \n+/* AVX512 */\n+\n+void\n+rte_net_crc_avx512_init(void);\n+\n+uint32_t\n+rte_crc16_ccitt_avx512_handler(const uint8_t *data, uint32_t data_len);\n+\n+uint32_t\n+rte_crc32_eth_avx512_handler(const uint8_t *data, uint32_t data_len);\n+\n /* NEON */\n \n void\ndiff --git a/lib/librte_net/net_crc_avx512.c b/lib/librte_net/net_crc_avx512.c\nnew file mode 100644\nindex 000000000..3740fe3c9\n--- /dev/null\n+++ b/lib/librte_net/net_crc_avx512.c\n@@ -0,0 +1,423 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2020 Intel Corporation\n+ */\n+\n+#include <string.h>\n+\n+#include <rte_common.h>\n+#include <rte_branch_prediction.h>\n+#include <rte_cpuflags.h>\n+\n+#include \"net_crc.h\"\n+\n+#include <x86intrin.h>\n+\n+/* VPCLMULQDQ CRC computation context structure */\n+struct crc_vpclmulqdq_ctx {\n+\t__m512i rk1_rk2;\n+\t__m512i rk3_rk4;\n+\t__m512i fold_7x128b;\n+\t__m512i fold_3x128b;\n+\t__m128i rk5_rk6;\n+\t__m128i rk7_rk8;\n+\t__m128i fold_1x128b;\n+};\n+\n+static struct crc_vpclmulqdq_ctx crc32_eth __rte_aligned(64);\n+static struct crc_vpclmulqdq_ctx crc16_ccitt __rte_aligned(64);\n+\n+static uint16_t byte_len_to_mask_table[] = {\n+\t0x0000, 0x0001, 0x0003, 0x0007,\n+\t0x000f, 0x001f, 0x003f, 0x007f,\n+\t0x00ff, 0x01ff, 0x03ff, 0x07ff,\n+\t0x0fff, 0x1fff, 0x3fff, 0x7fff,\n+\t0xffff};\n+\n+static const uint8_t shf_table[32] __rte_aligned(16) = {\n+\t0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,\n+\t0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,\n+\t0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,\n+\t0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f\n+};\n+\n+static const uint32_t mask[4] __rte_aligned(16) = {\n+\t0xffffffff, 0xffffffff, 0x00000000, 0x00000000\n+};\n+\n+static const uint32_t mask2[4] __rte_aligned(16) = {\n+\t0x00000000, 0xffffffff, 0xffffffff, 0xffffffff\n+};\n+\n+static __rte_always_inline __m512i\n+crcr32_folding_round(__m512i data_block, __m512i precomp, __m512i fold)\n+{\n+\t__m512i tmp0, tmp1;\n+\n+\ttmp0 = _mm512_clmulepi64_epi128(fold, precomp, 0x01);\n+\ttmp1 = _mm512_clmulepi64_epi128(fold, precomp, 0x10);\n+\n+\treturn _mm512_ternarylogic_epi64(tmp0, tmp1, data_block, 0x96);\n+}\n+\n+static __rte_always_inline __m128i\n+crc32_fold_128(__m512i fold0, __m512i fold1,\n+\tconst struct crc_vpclmulqdq_ctx *params)\n+{\n+\t__m128i res, res2;\n+\t__m256i a;\n+\t__m512i tmp0, tmp1, tmp2, tmp3;\n+\t__m512i tmp4;\n+\n+\ttmp0 = _mm512_clmulepi64_epi128(fold0, params->fold_7x128b, 0x01);\n+\ttmp1 = _mm512_clmulepi64_epi128(fold0, params->fold_7x128b, 0x10);\n+\n+\tres = _mm512_extracti64x2_epi64(fold1, 3);\n+\ttmp4 = _mm512_maskz_broadcast_i32x4(0xF, res);\n+\n+\ttmp2 = _mm512_clmulepi64_epi128(fold1, params->fold_3x128b, 0x01);\n+\ttmp3 = _mm512_clmulepi64_epi128(fold1, params->fold_3x128b, 0x10);\n+\n+\ttmp0 = _mm512_ternarylogic_epi64(tmp0, tmp1, tmp2, 0x96);\n+\ttmp0 = _mm512_ternarylogic_epi64(tmp0, tmp3, tmp4, 0x96);\n+\n+\ttmp1 = _mm512_shuffle_i64x2(tmp0, tmp0, 0x4e);\n+\n+\ta = _mm256_xor_si256(*(__m256i *)&tmp1, *(__m256i *)&tmp0);\n+\tres = _mm256_extracti64x2_epi64(a, 1);\n+\tres2 = _mm_xor_si128(res, *(__m128i *)&a);\n+\n+\treturn res2;\n+}\n+\n+static __rte_always_inline __m128i\n+last_two_xmm(const uint8_t *data, uint32_t data_len, uint32_t n, __m128i res,\n+\tconst struct crc_vpclmulqdq_ctx *params)\n+{\n+\tuint32_t offset;\n+\t__m128i res2, res3, res4, pshufb_shf;\n+\n+\tconst uint32_t mask3[4] __rte_aligned(16) = {\n+\t\t   0x80808080, 0x80808080, 0x80808080, 0x80808080\n+\t};\n+\n+\tres2 = res;\n+\toffset = data_len - n;\n+\tres3 = _mm_loadu_si128((const __m128i *)&data[n+offset-16]);\n+\n+\tpshufb_shf = _mm_loadu_si128((const __m128i *)\n+\t\t\t(shf_table + (data_len-n)));\n+\n+\tres = _mm_shuffle_epi8(res, pshufb_shf);\n+\tpshufb_shf = _mm_xor_si128(pshufb_shf,\n+\t\t\t_mm_load_si128((const __m128i *) mask3));\n+\tres2 = _mm_shuffle_epi8(res2, pshufb_shf);\n+\n+\tres2 = _mm_blendv_epi8(res2, res3, pshufb_shf);\n+\n+\tres4 = _mm_clmulepi64_si128(res, params->fold_1x128b, 0x01);\n+\tres = _mm_clmulepi64_si128(res, params->fold_1x128b, 0x10);\n+\tres = _mm_ternarylogic_epi64(res, res2, res4, 0x96);\n+\n+\treturn res;\n+}\n+\n+static __rte_always_inline __m128i\n+done_128(__m128i res, const struct crc_vpclmulqdq_ctx *params)\n+{\n+\t__m128i res1;\n+\n+\tres1 = res;\n+\n+\tres = _mm_clmulepi64_si128(res, params->rk5_rk6, 0x0);\n+\tres1 = _mm_srli_si128(res1, 8);\n+\tres = _mm_xor_si128(res, res1);\n+\n+\tres1 = res;\n+\tres = _mm_slli_si128(res, 4);\n+\tres = _mm_clmulepi64_si128(res, params->rk5_rk6, 0x10);\n+\tres = _mm_xor_si128(res, res1);\n+\n+\treturn res;\n+}\n+\n+static __rte_always_inline uint32_t\n+barrett_reduction(__m128i data64, const struct crc_vpclmulqdq_ctx *params)\n+{\n+\t__m128i tmp0, tmp1;\n+\n+\tdata64 =  _mm_and_si128(data64, *(const __m128i *)mask2);\n+\ttmp0 = data64;\n+\ttmp1 = data64;\n+\n+\tdata64 = _mm_clmulepi64_si128(tmp0, params->rk7_rk8, 0x0);\n+\tdata64 = _mm_ternarylogic_epi64(data64, tmp1, *(const __m128i *)mask,\n+\t\t\t0x28);\n+\n+\ttmp1 = data64;\n+\tdata64 = _mm_clmulepi64_si128(data64, params->rk7_rk8, 0x10);\n+\tdata64 = _mm_ternarylogic_epi64(data64, tmp1, tmp0, 0x96);\n+\n+\treturn _mm_extract_epi32(data64, 2);\n+}\n+\n+static __rte_always_inline void\n+reduction_loop(__m128i *fold, int *len, const uint8_t *data, uint32_t *n,\n+\tconst struct crc_vpclmulqdq_ctx *params)\n+{\n+\t__m128i tmp, tmp1;\n+\n+\ttmp = _mm_clmulepi64_si128(*fold, params->fold_1x128b, 0x1);\n+\t*fold = _mm_clmulepi64_si128(*fold, params->fold_1x128b, 0x10);\n+\t*fold = _mm_xor_si128(*fold, tmp);\n+\ttmp1 = _mm_loadu_si128((const __m128i *)&data[*n]);\n+\t*fold = _mm_xor_si128(*fold, tmp1);\n+\t*n += 16;\n+\t*len -= 16;\n+}\n+\n+static __rte_always_inline uint32_t\n+crc32_eth_calc_vpclmulqdq(const uint8_t *data, uint32_t data_len, uint32_t crc,\n+\tconst struct crc_vpclmulqdq_ctx *params)\n+{\n+\t__m128i res, d, b;\n+\t__m512i temp, k;\n+\t__m512i qw0 = _mm512_set1_epi64(0), qw1, qw2, qw3;\n+\t__m512i fold0, fold1, fold2, fold3;\n+\t__mmask16 mask;\n+\tuint32_t n = 0;\n+\tint reduction = 0;\n+\n+\t/* Get CRC init value */\n+\tb = _mm_cvtsi32_si128(crc);\n+\ttemp = _mm512_castsi128_si512(b);\n+\n+\tif (data_len > 255) {\n+\t\tfold0 = _mm512_loadu_si512((const __m512i *)data);\n+\t\tfold1 = _mm512_loadu_si512((const __m512i *)(data+64));\n+\t\tfold2 = _mm512_loadu_si512((const __m512i *)(data+128));\n+\t\tfold3 = _mm512_loadu_si512((const __m512i *)(data+192));\n+\t\tfold0 = _mm512_xor_si512(fold0, temp);\n+\n+\t\t/* Main folding loop */\n+\t\tk = params->rk1_rk2;\n+\t\tfor (n = 256; (n + 256) <= data_len; n += 256) {\n+\t\t\tqw0 = _mm512_loadu_si512((const __m512i *)&data[n]);\n+\t\t\tqw1 = _mm512_loadu_si512((const __m512i *)\n+\t\t\t\t\t&(data[n+64]));\n+\t\t\tqw2 = _mm512_loadu_si512((const __m512i *)\n+\t\t\t\t\t&(data[n+128]));\n+\t\t\tqw3 = _mm512_loadu_si512((const __m512i *)\n+\t\t\t\t\t&(data[n+192]));\n+\t\t\tfold0 = crcr32_folding_round(qw0, k, fold0);\n+\t\t\tfold1 = crcr32_folding_round(qw1, k, fold1);\n+\t\t\tfold2 = crcr32_folding_round(qw2, k, fold2);\n+\t\t\tfold3 = crcr32_folding_round(qw3, k, fold3);\n+\t\t}\n+\n+\t\t/* 256 to 128 fold */\n+\t\tk = params->rk3_rk4;\n+\t\tfold0 = crcr32_folding_round(fold2, k, fold0);\n+\t\tfold1 = crcr32_folding_round(fold3, k, fold1);\n+\n+\t\tres = crc32_fold_128(fold0, fold1, params);\n+\n+\t\treduction = 240 - ((n+256)-data_len);\n+\n+\t\twhile (reduction > 0)\n+\t\t\treduction_loop(&res, &reduction, data, &n,\n+\t\t\t\t\tparams);\n+\n+\t\treduction += 16;\n+\n+\t\tif (n != data_len)\n+\t\t\tres = last_two_xmm(data, data_len, n, res,\n+\t\t\t\t\tparams);\n+\t} else {\n+\t\tif (data_len > 31) {\n+\t\t\tres = _mm_cvtsi32_si128(crc);\n+\t\t\td = _mm_loadu_si128((const __m128i *)data);\n+\t\t\tres = _mm_xor_si128(res, d);\n+\t\t\tn += 16;\n+\n+\t\t\treduction = 240 - ((n+256)-data_len);\n+\n+\t\t\twhile (reduction > 0)\n+\t\t\t\treduction_loop(&res, &reduction, data, &n,\n+\t\t\t\t\t\tparams);\n+\n+\t\t\tif (n != data_len)\n+\t\t\t\tres = last_two_xmm(data, data_len, n, res,\n+\t\t\t\t\t\tparams);\n+\t\t} else if (data_len > 16) {\n+\t\t\tres = _mm_cvtsi32_si128(crc);\n+\t\t\td = _mm_loadu_si128((const __m128i *)data);\n+\t\t\tres = _mm_xor_si128(res, d);\n+\t\t\tn += 16;\n+\n+\t\t\tif (n != data_len)\n+\t\t\t\tres = last_two_xmm(data, data_len, n, res,\n+\t\t\t\t\t\tparams);\n+\t\t} else if (data_len == 16) {\n+\t\t\tres = _mm_cvtsi32_si128(crc);\n+\t\t\td = _mm_loadu_si128((const __m128i *)data);\n+\t\t\tres = _mm_xor_si128(res, d);\n+\t\t} else {\n+\t\t\tres = _mm_cvtsi32_si128(crc);\n+\t\t\tmask = byte_len_to_mask_table[data_len];\n+\t\t\td = _mm_maskz_loadu_epi8(mask, data);\n+\t\t\tres = _mm_xor_si128(res, d);\n+\n+\t\t\tif (data_len > 3) {\n+\t\t\t\td = _mm_loadu_si128((const __m128i *)\n+\t\t\t\t\t\t&shf_table[data_len]);\n+\t\t\t\tres = _mm_shuffle_epi8(res, d);\n+\t\t\t} else if (data_len > 2) {\n+\t\t\t\tres = _mm_slli_si128(res, 5);\n+\t\t\t\tgoto do_barrett_reduction;\n+\t\t\t} else if (data_len > 1) {\n+\t\t\t\tres = _mm_slli_si128(res, 6);\n+\t\t\t\tgoto do_barrett_reduction;\n+\t\t\t} else if (data_len > 0) {\n+\t\t\t\tres = _mm_slli_si128(res, 7);\n+\t\t\t\tgoto do_barrett_reduction;\n+\t\t\t} else {\n+\t\t\t\t/* zero length case */\n+\t\t\t\treturn crc;\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\tres = done_128(res, params);\n+\n+do_barrett_reduction:\n+\tn = barrett_reduction(res, params);\n+\n+\treturn n;\n+}\n+\n+static void\n+crc32_load_init_constants(void)\n+{\n+\t__m128i a;\n+\t/* fold constants */\n+\tuint64_t c0 = 0x00000000e95c1271;\n+\tuint64_t c1 = 0x00000000ce3371cb;\n+\tuint64_t c2 = 0x00000000910eeec1;\n+\tuint64_t c3 = 0x0000000033fff533;\n+\tuint64_t c4 = 0x000000000cbec0ed;\n+\tuint64_t c5 = 0x0000000031f8303f;\n+\tuint64_t c6 = 0x0000000057c54819;\n+\tuint64_t c7 = 0x00000000df068dc2;\n+\tuint64_t c8 = 0x00000000ae0b5394;\n+\tuint64_t c9 = 0x000000001c279815;\n+\tuint64_t c10 = 0x000000001d9513d7;\n+\tuint64_t c11 = 0x000000008f352d95;\n+\tuint64_t c12 = 0x00000000af449247;\n+\tuint64_t c13 = 0x000000003db1ecdc;\n+\tuint64_t c14 = 0x0000000081256527;\n+\tuint64_t c15 = 0x00000000f1da05aa;\n+\tuint64_t c16 = 0x00000000ccaa009e;\n+\tuint64_t c17 = 0x00000000ae689191;\n+\tuint64_t c18 = 0x00000000ccaa009e;\n+\tuint64_t c19 = 0x00000000b8bc6765;\n+\tuint64_t c20 = 0x00000001f7011640;\n+\tuint64_t c21 = 0x00000001db710640;\n+\n+\ta = _mm_set_epi64x(c1, c0);\n+\tcrc32_eth.rk1_rk2 = _mm512_broadcast_i32x4(a);\n+\n+\ta = _mm_set_epi64x(c3, c2);\n+\tcrc32_eth.rk3_rk4 = _mm512_broadcast_i32x4(a);\n+\n+\tcrc32_eth.fold_7x128b = _mm512_setr_epi64(c4, c5, c6, c7, c8,\n+\t\t\tc9, c10, c11);\n+\tcrc32_eth.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,\n+\t\t\tc16, c17, 0, 0);\n+\tcrc32_eth.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),\n+\t\t\t_mm_cvtsi64_m64(c17));\n+\n+\tcrc32_eth.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),\n+\t\t\t_mm_cvtsi64_m64(c19));\n+\tcrc32_eth.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),\n+\t\t\t_mm_cvtsi64_m64(c21));\n+}\n+\n+static void\n+crc16_load_init_constants(void)\n+{\n+\t__m128i a;\n+\t/* fold constants */\n+\tuint64_t c0 = 0x0000000000009a19;\n+\tuint64_t c1 = 0x0000000000002df8;\n+\tuint64_t c2 = 0x00000000000068af;\n+\tuint64_t c3 = 0x000000000000b6c9;\n+\tuint64_t c4 = 0x000000000000c64f;\n+\tuint64_t c5 = 0x000000000000cd95;\n+\tuint64_t c6 = 0x000000000000d341;\n+\tuint64_t c7 = 0x000000000000b8f2;\n+\tuint64_t c8 = 0x0000000000000842;\n+\tuint64_t c9 = 0x000000000000b072;\n+\tuint64_t c10 = 0x00000000000047e3;\n+\tuint64_t c11 = 0x000000000000922d;\n+\tuint64_t c12 = 0x0000000000000e3a;\n+\tuint64_t c13 = 0x0000000000004d7a;\n+\tuint64_t c14 = 0x0000000000005b44;\n+\tuint64_t c15 = 0x0000000000007762;\n+\tuint64_t c16 = 0x00000000000081bf;\n+\tuint64_t c17 = 0x0000000000008e10;\n+\tuint64_t c18 = 0x00000000000081bf;\n+\tuint64_t c19 = 0x0000000000001cbb;\n+\tuint64_t c20 = 0x000000011c581910;\n+\tuint64_t c21 = 0x0000000000010810;\n+\n+\ta = _mm_set_epi64x(c1, c0);\n+\tcrc16_ccitt.rk1_rk2 = _mm512_broadcast_i32x4(a);\n+\n+\ta = _mm_set_epi64x(c3, c2);\n+\tcrc16_ccitt.rk3_rk4 = _mm512_broadcast_i32x4(a);\n+\n+\tcrc16_ccitt.fold_7x128b = _mm512_setr_epi64(c4, c5, c6, c7, c8,\n+\t\t\tc9, c10, c11);\n+\tcrc16_ccitt.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,\n+\t\t\tc16, c17, 0, 0);\n+\tcrc16_ccitt.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),\n+\t\t\t_mm_cvtsi64_m64(c17));\n+\n+\tcrc16_ccitt.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),\n+\t\t\t_mm_cvtsi64_m64(c19));\n+\tcrc16_ccitt.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),\n+\t\t\t_mm_cvtsi64_m64(c21));\n+}\n+\n+void\n+rte_net_crc_avx512_init(void)\n+{\n+\tcrc32_load_init_constants();\n+\tcrc16_load_init_constants();\n+\n+\t/*\n+\t * Reset the register as following calculation may\n+\t * use other data types such as float, double, etc.\n+\t */\n+\t_mm_empty();\n+}\n+\n+uint32_t\n+rte_crc16_ccitt_avx512_handler(const uint8_t *data, uint32_t data_len)\n+{\n+\t/* return 16-bit CRC value */\n+\treturn (uint16_t)~crc32_eth_calc_vpclmulqdq(data,\n+\t\tdata_len,\n+\t\t0xffff,\n+\t\t&crc16_ccitt);\n+}\n+\n+uint32_t\n+rte_crc32_eth_avx512_handler(const uint8_t *data, uint32_t data_len)\n+{\n+\t/* return 32-bit CRC value */\n+\treturn ~crc32_eth_calc_vpclmulqdq(data,\n+\t\tdata_len,\n+\t\t0xffffffffUL,\n+\t\t&crc32_eth);\n+}\ndiff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c\nindex d271d5205..32a366590 100644\n--- a/lib/librte_net/rte_net_crc.c\n+++ b/lib/librte_net/rte_net_crc.c\n@@ -37,6 +37,12 @@ static const rte_net_crc_handler handlers_scalar[] = {\n \t[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler,\n \t[RTE_NET_CRC32_ETH] = rte_crc32_eth_handler,\n };\n+#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT\n+static const rte_net_crc_handler handlers_avx512[] = {\n+\t[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_avx512_handler,\n+\t[RTE_NET_CRC32_ETH] = rte_crc32_eth_avx512_handler,\n+};\n+#endif\n #ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT\n static const rte_net_crc_handler handlers_sse42[] = {\n \t[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_sse42_handler,\n@@ -134,6 +140,39 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)\n \t\tcrc32_eth_lut);\n }\n \n+/* AVX512/VPCLMULQDQ handling */\n+\n+#define AVX512_VPCLMULQDQ_CPU_SUPPORTED ( \\\n+\trte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) && \\\n+\trte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) && \\\n+\trte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512DQ) && \\\n+\trte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) && \\\n+\trte_cpu_get_flag_enabled(RTE_CPUFLAG_PCLMULQDQ) && \\\n+\trte_cpu_get_flag_enabled(RTE_CPUFLAG_VPCLMULQDQ) \\\n+)\n+\n+static const rte_net_crc_handler *\n+avx512_vpclmulqdq_get_handlers(void)\n+{\n+#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT\n+\tif (AVX512_VPCLMULQDQ_CPU_SUPPORTED)\n+\t\treturn handlers_avx512;\n+#endif\n+\treturn NULL;\n+}\n+\n+static uint8_t\n+avx512_vpclmulqdq_init(void)\n+{\n+#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT\n+\tif (AVX512_VPCLMULQDQ_CPU_SUPPORTED) {\n+\t\trte_net_crc_avx512_init();\n+\t\treturn 1;\n+\t}\n+#endif\n+\treturn 0;\n+}\n+\n /* SSE4.2/PCLMULQDQ handling */\n \n #define SSE42_PCLMULQDQ_CPU_SUPPORTED \\\n@@ -196,6 +235,11 @@ rte_net_crc_set_alg(enum rte_net_crc_alg alg)\n \thandlers = NULL;\n \n \tswitch (alg) {\n+\tcase RTE_NET_CRC_AVX512:\n+\t\thandlers = avx512_vpclmulqdq_get_handlers();\n+\t\tif (handlers != NULL)\n+\t\t\tbreak;\n+\t\t/* fall-through */\n \tcase RTE_NET_CRC_SSE42:\n \t\thandlers = sse42_pclmulqdq_get_handlers();\n \t\tbreak; /* for x86, always break here */\n@@ -235,6 +279,8 @@ RTE_INIT(rte_net_crc_init)\n \n \tif (sse42_pclmulqdq_init())\n \t\talg = RTE_NET_CRC_SSE42;\n+\tif (avx512_vpclmulqdq_init())\n+\t\talg = RTE_NET_CRC_AVX512;\n \tif (neon_pmull_init())\n \t\talg = RTE_NET_CRC_NEON;\n \ndiff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h\nindex 16e85ca97..72d3e10ff 100644\n--- a/lib/librte_net/rte_net_crc.h\n+++ b/lib/librte_net/rte_net_crc.h\n@@ -1,5 +1,5 @@\n /* SPDX-License-Identifier: BSD-3-Clause\n- * Copyright(c) 2017 Intel Corporation\n+ * Copyright(c) 2017-2020 Intel Corporation\n  */\n \n #ifndef _RTE_NET_CRC_H_\n@@ -23,6 +23,7 @@ enum rte_net_crc_alg {\n \tRTE_NET_CRC_SCALAR = 0,\n \tRTE_NET_CRC_SSE42,\n \tRTE_NET_CRC_NEON,\n+\tRTE_NET_CRC_AVX512,\n };\n \n /**\n@@ -35,6 +36,7 @@ enum rte_net_crc_alg {\n  *   - RTE_NET_CRC_SCALAR\n  *   - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)\n  *   - RTE_NET_CRC_NEON (Use ARM Neon intrinsic)\n+ *   - RTE_NET_CRC_AVX512 (Use 512-bit AVX intrinsic)\n  */\n void\n rte_net_crc_set_alg(enum rte_net_crc_alg alg);\n",
    "prefixes": [
        "v5",
        "2/2"
    ]
}