get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/77772/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 77772,
    "url": "https://patches.dpdk.org/api/patches/77772/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20200915165025.543-11-konstantin.ananyev@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20200915165025.543-11-konstantin.ananyev@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20200915165025.543-11-konstantin.ananyev@intel.com",
    "date": "2020-09-15T16:50:23",
    "name": "[v2,10/12] acl: for AVX512 classify use 4B load whenever possible",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "953eceba69c4cbf8befa3eca82a9b28ff70b16c5",
    "submitter": {
        "id": 33,
        "url": "https://patches.dpdk.org/api/people/33/?format=api",
        "name": "Ananyev, Konstantin",
        "email": "konstantin.ananyev@intel.com"
    },
    "delegate": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20200915165025.543-11-konstantin.ananyev@intel.com/mbox/",
    "series": [
        {
            "id": 12241,
            "url": "https://patches.dpdk.org/api/series/12241/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=12241",
            "date": "2020-09-15T16:50:13",
            "name": "acl: introduce AVX512 classify method",
            "version": 2,
            "mbox": "https://patches.dpdk.org/series/12241/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/77772/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/77772/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 2BB4BA04C7;\n\tTue, 15 Sep 2020 18:52:41 +0200 (CEST)",
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 9688A1C19B;\n\tTue, 15 Sep 2020 18:51:28 +0200 (CEST)",
            "from mga02.intel.com (mga02.intel.com [134.134.136.20])\n by dpdk.org (Postfix) with ESMTP id 5FF011C113\n for <dev@dpdk.org>; Tue, 15 Sep 2020 18:51:26 +0200 (CEST)",
            "from orsmga006.jf.intel.com ([10.7.209.51])\n by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 15 Sep 2020 09:51:25 -0700",
            "from sivswdev08.ir.intel.com ([10.237.217.47])\n by orsmga006.jf.intel.com with ESMTP; 15 Sep 2020 09:51:23 -0700"
        ],
        "IronPort-SDR": [
            "\n V6H9bi0DQdKHaq3Q9zV5yeIymwrZNUQdKzhOz1JM0AyqK7s+WmTixSH8+EmcJdrdAOAID5uBCS\n nsDdmQMDJx7Q==",
            "\n z35yseePC/ucUxx0INURAX68ryVja9T8P3WHvr/ARFrLE12MLu23ToUmTPUt3HgHK1v77R9zwP\n vY1j2bZSwVuA=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6000,8403,9745\"; a=\"146995883\"",
            "E=Sophos;i=\"5.76,430,1592895600\"; d=\"scan'208\";a=\"146995883\"",
            "E=Sophos;i=\"5.76,430,1592895600\"; d=\"scan'208\";a=\"306709566\""
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "From": "Konstantin Ananyev <konstantin.ananyev@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "jerinj@marvell.com, ruifeng.wang@arm.com, vladimir.medvedkin@intel.com,\n Konstantin Ananyev <konstantin.ananyev@intel.com>",
        "Date": "Tue, 15 Sep 2020 17:50:23 +0100",
        "Message-Id": "<20200915165025.543-11-konstantin.ananyev@intel.com>",
        "X-Mailer": "git-send-email 2.18.0",
        "In-Reply-To": "<20200915165025.543-1-konstantin.ananyev@intel.com>",
        "References": "<20200807162829.11690-1-konstantin.ananyev@intel.com>\n <20200915165025.543-1-konstantin.ananyev@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v2 10/12] acl: for AVX512 classify use 4B load\n\twhenever possible",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "With current ACL implementation first field in the rule definition\nhas always to be one byte long. Though for optimising classify\nimplementation it might be useful to be able to use 4B reads\n(as we do for rest of the fields).\nSo at build phase, check user provided field definitions to determine\nis it safe to do 4B loads for first ACL field.\nThen at run-time this information can be used to choose classify\nbehavior.\n\nSigned-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>\n---\n lib/librte_acl/acl.h               |  1 +\n lib/librte_acl/acl_bld.c           | 34 ++++++++++++++++++++++++++++++\n lib/librte_acl/acl_run_avx512.c    |  7 ++++++\n lib/librte_acl/acl_run_avx512x16.h |  8 +++----\n lib/librte_acl/acl_run_avx512x8.h  |  8 +++----\n lib/librte_acl/rte_acl.c           |  1 +\n 6 files changed, 51 insertions(+), 8 deletions(-)",
    "diff": "diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h\nindex 3f0719f33..493dec2a2 100644\n--- a/lib/librte_acl/acl.h\n+++ b/lib/librte_acl/acl.h\n@@ -169,6 +169,7 @@ struct rte_acl_ctx {\n \tint32_t             socket_id;\n \t/** Socket ID to allocate memory from. */\n \tenum rte_acl_classify_alg alg;\n+\tuint32_t           first_load_sz;\n \tvoid               *rules;\n \tuint32_t            max_rules;\n \tuint32_t            rule_sz;\ndiff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c\nindex d1f920b09..da10864cd 100644\n--- a/lib/librte_acl/acl_bld.c\n+++ b/lib/librte_acl/acl_bld.c\n@@ -1581,6 +1581,37 @@ acl_check_bld_param(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)\n \treturn 0;\n }\n \n+/*\n+ * With current ACL implementation first field in the rule definition\n+ * has always to be one byte long. Though for optimising *classify*\n+ * implementation it might be useful to be able to use 4B reads\n+ * (as we do for rest of the fields).\n+ * This function checks input config to determine is it safe to do 4B\n+ * loads for first ACL field. For that we need to make sure that\n+ * first field in our rule definition doesn't have the biggest offset,\n+ * i.e. we still do have other fields located after the first one.\n+ * Contrary if first field has the largest offset, then it means\n+ * first field can occupy the very last byte in the input data buffer,\n+ * and we have to do single byte load for it.\n+ */\n+static uint32_t\n+get_first_load_size(const struct rte_acl_config *cfg)\n+{\n+\tuint32_t i, max_ofs, ofs;\n+\n+\tofs = 0;\n+\tmax_ofs = 0;\n+\n+\tfor (i = 0; i != cfg->num_fields; i++) {\n+\t\tif (cfg->defs[i].field_index == 0)\n+\t\t\tofs = cfg->defs[i].offset;\n+\t\telse if (max_ofs < cfg->defs[i].offset)\n+\t\t\tmax_ofs = cfg->defs[i].offset;\n+\t}\n+\n+\treturn (ofs < max_ofs) ? sizeof(uint32_t) : sizeof(uint8_t);\n+}\n+\n int\n rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)\n {\n@@ -1618,6 +1649,9 @@ rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)\n \t\t\t\t/* set data indexes. */\n \t\t\t\tacl_set_data_indexes(ctx);\n \n+\t\t\t\t/* determine can we always do 4B load */\n+\t\t\t\tctx->first_load_sz = get_first_load_size(cfg);\n+\n \t\t\t\t/* copy in build config. */\n \t\t\t\tctx->config = *cfg;\n \t\t\t}\ndiff --git a/lib/librte_acl/acl_run_avx512.c b/lib/librte_acl/acl_run_avx512.c\nindex 60762b7d6..51bfa6a3b 100644\n--- a/lib/librte_acl/acl_run_avx512.c\n+++ b/lib/librte_acl/acl_run_avx512.c\n@@ -16,6 +16,7 @@ struct acl_flow_avx512 {\n \tuint32_t num_packets;       /* number of packets processed */\n \tuint32_t total_packets;     /* max number of packets to process */\n \tuint32_t root_index;        /* current root index */\n+\tuint32_t first_load_sz;     /* first load size for new packet */\n \tconst uint64_t *trans;      /* transition table */\n \tconst uint32_t *data_index; /* input data indexes */\n \tconst uint8_t **idata;      /* input data */\n@@ -29,6 +30,7 @@ acl_set_flow_avx512(struct acl_flow_avx512 *flow, const struct rte_acl_ctx *ctx,\n {\n \tflow->num_packets = 0;\n \tflow->total_packets = total_packets;\n+\tflow->first_load_sz = ctx->first_load_sz;\n \tflow->root_index = ctx->trie[trie].root_index;\n \tflow->trans = ctx->trans_table;\n \tflow->data_index = ctx->trie[trie].data_index;\n@@ -155,6 +157,11 @@ resolve_mcgt8_avx512x1(uint32_t result[],\n \t}\n }\n \n+/*\n+ * unfortunately current AVX512 ISA doesn't provide ability for\n+ * gather load on a byte quantity. So we have to mimic it in SW,\n+ * by doing 8x1B scalar loads.\n+ */\n static inline ymm_t\n _m512_mask_gather_epi8x8(__m512i pdata, __mmask8 mask)\n {\ndiff --git a/lib/librte_acl/acl_run_avx512x16.h b/lib/librte_acl/acl_run_avx512x16.h\nindex 45b0b4db6..df5f6135f 100644\n--- a/lib/librte_acl/acl_run_avx512x16.h\n+++ b/lib/librte_acl/acl_run_avx512x16.h\n@@ -413,7 +413,7 @@ match_check_process_avx512x16x2(struct acl_flow_avx512 *flow, uint32_t fm[2],\n \n \t\tif (n[0] != 0) {\n \t\t\tinp[0] = get_next_bytes_avx512x16(flow, &pdata[0],\n-\t\t\t\trm[0], &di[0], sizeof(uint8_t));\n+\t\t\t\trm[0], &di[0], flow->first_load_sz);\n \t\t\tfirst_trans16(flow, inp[0], rm[0], &tr_lo[0],\n \t\t\t\t&tr_hi[0]);\n \t\t\trm[0] = _mm512_test_epi32_mask(tr_lo[0],\n@@ -422,7 +422,7 @@ match_check_process_avx512x16x2(struct acl_flow_avx512 *flow, uint32_t fm[2],\n \n \t\tif (n[1] != 0) {\n \t\t\tinp[1] = get_next_bytes_avx512x16(flow, &pdata[2],\n-\t\t\t\trm[1], &di[1], sizeof(uint8_t));\n+\t\t\t\trm[1], &di[1], flow->first_load_sz);\n \t\t\tfirst_trans16(flow, inp[1], rm[1], &tr_lo[1],\n \t\t\t\t&tr_hi[1]);\n \t\t\trm[1] = _mm512_test_epi32_mask(tr_lo[1],\n@@ -447,9 +447,9 @@ search_trie_avx512x16x2(struct acl_flow_avx512 *flow)\n \tstart_flow16(flow, MASK16_BIT, UINT16_MAX, &pdata[2], &idx[1], &di[1]);\n \n \tin[0] = get_next_bytes_avx512x16(flow, &pdata[0], UINT16_MAX, &di[0],\n-\t\tsizeof(uint8_t));\n+\t\t\tflow->first_load_sz);\n \tin[1] = get_next_bytes_avx512x16(flow, &pdata[2], UINT16_MAX, &di[1],\n-\t\tsizeof(uint8_t));\n+\t\t\tflow->first_load_sz);\n \n \tfirst_trans16(flow, in[0], UINT16_MAX, &tr_lo[0], &tr_hi[0]);\n \tfirst_trans16(flow, in[1], UINT16_MAX, &tr_lo[1], &tr_hi[1]);\ndiff --git a/lib/librte_acl/acl_run_avx512x8.h b/lib/librte_acl/acl_run_avx512x8.h\nindex 82171e8e0..777451973 100644\n--- a/lib/librte_acl/acl_run_avx512x8.h\n+++ b/lib/librte_acl/acl_run_avx512x8.h\n@@ -325,7 +325,7 @@ match_check_process_avx512x8x2(struct acl_flow_avx512 *flow, uint32_t fm[2],\n \n \t\tif (n[0] != 0) {\n \t\t\tinp[0] = get_next_bytes_avx512x8(flow, pdata[0], rm[0],\n-\t\t\t\t&di[0], sizeof(uint8_t));\n+\t\t\t\t&di[0], flow->first_load_sz);\n \t\t\tfirst_trans8(flow, inp[0], rm[0], &tr_lo[0], &tr_hi[0]);\n \n \t\t\trm[0] = _mm256_test_epi32_mask(tr_lo[0],\n@@ -334,7 +334,7 @@ match_check_process_avx512x8x2(struct acl_flow_avx512 *flow, uint32_t fm[2],\n \n \t\tif (n[1] != 0) {\n \t\t\tinp[1] = get_next_bytes_avx512x8(flow, pdata[1], rm[1],\n-\t\t\t\t&di[1], sizeof(uint8_t));\n+\t\t\t\t&di[1], flow->first_load_sz);\n \t\t\tfirst_trans8(flow, inp[1], rm[1], &tr_lo[1], &tr_hi[1]);\n \n \t\t\trm[1] = _mm256_test_epi32_mask(tr_lo[1],\n@@ -360,9 +360,9 @@ search_trie_avx512x8x2(struct acl_flow_avx512 *flow)\n \tstart_flow8(flow, CHAR_BIT, UINT8_MAX, &pdata[1], &idx[1], &di[1]);\n \n \tinp[0] = get_next_bytes_avx512x8(flow, pdata[0], UINT8_MAX, &di[0],\n-\t\tsizeof(uint8_t));\n+\t\t\tflow->first_load_sz);\n \tinp[1] = get_next_bytes_avx512x8(flow, pdata[1], UINT8_MAX, &di[1],\n-\t\tsizeof(uint8_t));\n+\t\t\tflow->first_load_sz);\n \n \tfirst_trans8(flow, inp[0], UINT8_MAX, &tr_lo[0], &tr_hi[0]);\n \tfirst_trans8(flow, inp[1], UINT8_MAX, &tr_lo[1], &tr_hi[1]);\ndiff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c\nindex fdcb7a798..9f16d28ea 100644\n--- a/lib/librte_acl/rte_acl.c\n+++ b/lib/librte_acl/rte_acl.c\n@@ -486,6 +486,7 @@ rte_acl_dump(const struct rte_acl_ctx *ctx)\n \tprintf(\"acl context <%s>@%p\\n\", ctx->name, ctx);\n \tprintf(\"  socket_id=%\"PRId32\"\\n\", ctx->socket_id);\n \tprintf(\"  alg=%\"PRId32\"\\n\", ctx->alg);\n+\tprintf(\"  first_load_sz=%\"PRIu32\"\\n\", ctx->first_load_sz);\n \tprintf(\"  max_rules=%\"PRIu32\"\\n\", ctx->max_rules);\n \tprintf(\"  rule_size=%\"PRIu32\"\\n\", ctx->rule_sz);\n \tprintf(\"  num_rules=%\"PRIu32\"\\n\", ctx->num_rules);\n",
    "prefixes": [
        "v2",
        "10/12"
    ]
}