get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/134889/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 134889,
    "url": "https://patches.dpdk.org/api/patches/134889/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20231206172419.878-3-pbhagavatula@marvell.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20231206172419.878-3-pbhagavatula@marvell.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20231206172419.878-3-pbhagavatula@marvell.com",
    "date": "2023-12-06T17:24:19",
    "name": "[v3,3/3] net/octeon_ep: use AVX2 instructions for Rx",
    "commit_ref": null,
    "pull_url": null,
    "state": "changes-requested",
    "archived": true,
    "hash": "44c80e0a3ac850d791f95f4258d6931928e89753",
    "submitter": {
        "id": 1183,
        "url": "https://patches.dpdk.org/api/people/1183/?format=api",
        "name": "Pavan Nikhilesh Bhagavatula",
        "email": "pbhagavatula@marvell.com"
    },
    "delegate": {
        "id": 310,
        "url": "https://patches.dpdk.org/api/users/310/?format=api",
        "username": "jerin",
        "first_name": "Jerin",
        "last_name": "Jacob",
        "email": "jerinj@marvell.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20231206172419.878-3-pbhagavatula@marvell.com/mbox/",
    "series": [
        {
            "id": 30463,
            "url": "https://patches.dpdk.org/api/series/30463/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=30463",
            "date": "2023-12-06T17:24:17",
            "name": "[v3,1/3] net/octeon_ep: optimize Rx and Tx routines",
            "version": 3,
            "mbox": "https://patches.dpdk.org/series/30463/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/134889/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/134889/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id E41F04368C;\n\tWed,  6 Dec 2023 18:24:45 +0100 (CET)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 5965042E9F;\n\tWed,  6 Dec 2023 18:24:33 +0100 (CET)",
            "from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com\n [67.231.156.173])\n by mails.dpdk.org (Postfix) with ESMTP id 2541E42E94\n for <dev@dpdk.org>; Wed,  6 Dec 2023 18:24:32 +0100 (CET)",
            "from pps.filterd (m0045851.ppops.net [127.0.0.1])\n by mx0b-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id\n 3B6EtBTY020279; Wed, 6 Dec 2023 09:24:31 -0800",
            "from dc5-exch01.marvell.com ([199.233.59.181])\n by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3utu530mey-1\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT);\n Wed, 06 Dec 2023 09:24:31 -0800",
            "from DC5-EXCH02.marvell.com (10.69.176.39) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.48;\n Wed, 6 Dec 2023 09:24:29 -0800",
            "from maili.marvell.com (10.69.176.80) by DC5-EXCH02.marvell.com\n (10.69.176.39) with Microsoft SMTP Server id 15.0.1497.48 via Frontend\n Transport; Wed, 6 Dec 2023 09:24:29 -0800",
            "from MININT-80QBFE8.corp.innovium.com (MININT-80QBFE8.marvell.com\n [10.28.164.106])\n by maili.marvell.com (Postfix) with ESMTP id D1FCF3F704E;\n Wed,  6 Dec 2023 09:24:26 -0800 (PST)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-transfer-encoding : content-type; s=pfpt0220;\n bh=f0lvcY2FpExn08ViT1KygH4LF8PxrlGerC4MNksyk+s=;\n b=XawWKdHLXmbSTlRZjGKo1EOzWqOqTEpkHqGrZIhf/IYfIqlkrNfpI6g0QbK1HkbY8tGy\n LxzSIgifKfG4lRUSVr2JCcXnV+PSlnU4TmlDH7CBxmKZb06/WgbrzWQSLZj1L5KhEma0\n ydhpKsIJaCXetC1aqh4wtimB1V7Hm3TjtMVkV0Ljq0FVjeycCZYRCwIBM+sloh5dr5dm\n YLHYwzFXmHU3NJMMXRINh5q+BMvErhzgFj+INteKl6VjsT1UODvKgPH6/outPjXMBwML\n d7DqIuXmtgLxkKrgfiLW3PA3pYi2MLBD8rHv1sFpKggi+kU8OpOmvMcGXeUTrz2uDYnP KA==",
        "From": "<pbhagavatula@marvell.com>",
        "To": "<jerinj@marvell.com>, Bruce Richardson <bruce.richardson@intel.com>,\n Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>, Vamsi Attunuru\n <vattunuru@marvell.com>",
        "CC": "<dev@dpdk.org>, Pavan Nikhilesh <pbhagavatula@marvell.com>",
        "Subject": "[PATCH v3 3/3] net/octeon_ep: use AVX2 instructions for Rx",
        "Date": "Wed, 6 Dec 2023 22:54:19 +0530",
        "Message-ID": "<20231206172419.878-3-pbhagavatula@marvell.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20231206172419.878-1-pbhagavatula@marvell.com>",
        "References": "<20231125160349.2021-1-pbhagavatula@marvell.com>\n <20231206172419.878-1-pbhagavatula@marvell.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Proofpoint-ORIG-GUID": "i61Jqk5MhINRLAyGsZ_hy-ZJEBrnSiMO",
        "X-Proofpoint-GUID": "i61Jqk5MhINRLAyGsZ_hy-ZJEBrnSiMO",
        "X-Proofpoint-Virus-Version": "vendor=baseguard\n engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26\n definitions=2023-12-06_15,2023-12-06_01,2023-05-22_02",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "From: Pavan Nikhilesh <pbhagavatula@marvell.com>\n\nOptimize Rx routine to use AVX2 instructions when underlying\narchitecture supports it.\n\nSigned-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>\n---\n drivers/net/octeon_ep/cnxk_ep_rx_avx.c | 123 +++++++++++++++++++++++++\n drivers/net/octeon_ep/meson.build      |  12 +++\n drivers/net/octeon_ep/otx_ep_ethdev.c  |  10 ++\n drivers/net/octeon_ep/otx_ep_rxtx.h    |   6 ++\n 4 files changed, 151 insertions(+)\n create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx_avx.c",
    "diff": "diff --git a/drivers/net/octeon_ep/cnxk_ep_rx_avx.c b/drivers/net/octeon_ep/cnxk_ep_rx_avx.c\nnew file mode 100644\nindex 0000000000..ae4615e6da\n--- /dev/null\n+++ b/drivers/net/octeon_ep/cnxk_ep_rx_avx.c\n@@ -0,0 +1,123 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(C) 2023 Marvell.\n+ */\n+\n+#include \"cnxk_ep_rx.h\"\n+\n+static __rte_always_inline void\n+cnxk_ep_process_pkts_vec_avx(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)\n+{\n+\tstruct rte_mbuf **recv_buf_list = droq->recv_buf_list;\n+\tuint32_t bytes_rsvd = 0, read_idx = droq->read_idx;\n+\tconst uint64_t rearm_data = droq->rearm_data;\n+\tstruct rte_mbuf *m[CNXK_EP_OQ_DESC_PER_LOOP_AVX];\n+\tuint32_t pidx[CNXK_EP_OQ_DESC_PER_LOOP_AVX];\n+\tuint32_t idx[CNXK_EP_OQ_DESC_PER_LOOP_AVX];\n+\tuint16_t nb_desc = droq->nb_desc;\n+\tuint16_t pkts = 0;\n+\tuint8_t i;\n+\n+\tidx[0] = read_idx;\n+\twhile (pkts < new_pkts) {\n+\t\t__m256i data[CNXK_EP_OQ_DESC_PER_LOOP_AVX];\n+\t\t/* mask to shuffle from desc. to mbuf (2 descriptors)*/\n+\t\tconst __m256i mask =\n+\t\t\t_mm256_set_epi8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 20, 21, 0xFF, 0xFF, 20,\n+\t\t\t\t\t21, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t\t0xFF, 0xFF, 0xFF, 7, 6, 5, 4, 3, 2, 1, 0);\n+\n+\t\t/* Load indexes. */\n+\t\tfor (i = 1; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)\n+\t\t\tidx[i] = otx_ep_incr_index(idx[i - 1], 1, nb_desc);\n+\n+\t\t/* Prefetch next indexes. */\n+\t\tif (new_pkts - pkts > 8) {\n+\t\t\tpidx[0] = otx_ep_incr_index(idx[i - 1], 1, nb_desc);\n+\t\t\tfor (i = 1; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)\n+\t\t\t\tpidx[i] = otx_ep_incr_index(pidx[i - 1], 1, nb_desc);\n+\n+\t\t\tfor (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++) {\n+\t\t\t\trte_prefetch0(recv_buf_list[pidx[i]]);\n+\t\t\t\trte_prefetch0(rte_pktmbuf_mtod(recv_buf_list[pidx[i]], void *));\n+\t\t\t}\n+\t\t}\n+\n+\t\t/* Load mbuf array. */\n+\t\tfor (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)\n+\t\t\tm[i] = recv_buf_list[idx[i]];\n+\n+\t\t/* Load rearm data and packet length for shuffle. */\n+\t\tfor (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)\n+\t\t\tdata[i] = _mm256_set_epi64x(0,\n+\t\t\t\trte_pktmbuf_mtod(m[i], struct otx_ep_droq_info *)->length >> 16,\n+\t\t\t\t0, rearm_data);\n+\n+\t\t/* Shuffle data to its place and sum the packet length. */\n+\t\tfor (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++) {\n+\t\t\tdata[i] = _mm256_shuffle_epi8(data[i], mask);\n+\t\t\tbytes_rsvd += _mm256_extract_epi16(data[i], 10);\n+\t\t}\n+\n+\t\t/* Store the 256bit data to the mbuf. */\n+\t\tfor (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)\n+\t\t\t_mm256_storeu_si256((__m256i *)&m[i]->rearm_data, data[i]);\n+\n+\t\tfor (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)\n+\t\t\trx_pkts[pkts++] = m[i];\n+\t\tidx[0] = otx_ep_incr_index(idx[i - 1], 1, nb_desc);\n+\t}\n+\tdroq->read_idx = idx[0];\n+\n+\tdroq->refill_count += new_pkts;\n+\tdroq->pkts_pending -= new_pkts;\n+\t/* Stats */\n+\tdroq->stats.pkts_received += new_pkts;\n+\tdroq->stats.bytes_received += bytes_rsvd;\n+}\n+\n+uint16_t __rte_noinline __rte_hot\n+cnxk_ep_recv_pkts_avx(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n+{\n+\tstruct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;\n+\tuint16_t new_pkts, vpkts;\n+\n+\tnew_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);\n+\tvpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_AVX);\n+\tcnxk_ep_process_pkts_vec_avx(rx_pkts, droq, vpkts);\n+\tcnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);\n+\n+\t/* Refill RX buffers */\n+\tif (droq->refill_count >= DROQ_REFILL_THRESHOLD)\n+\t\tcnxk_ep_rx_refill(droq);\n+\n+\treturn new_pkts;\n+}\n+\n+uint16_t __rte_noinline __rte_hot\n+cn9k_ep_recv_pkts_avx(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n+{\n+\tstruct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;\n+\tuint16_t new_pkts, vpkts;\n+\n+\tnew_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);\n+\tvpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_AVX);\n+\tcnxk_ep_process_pkts_vec_avx(rx_pkts, droq, vpkts);\n+\tcnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);\n+\n+\t/* Refill RX buffers */\n+\tif (droq->refill_count >= DROQ_REFILL_THRESHOLD) {\n+\t\tcnxk_ep_rx_refill(droq);\n+\t} else {\n+\t\t/* SDP output goes into DROP state when output doorbell count\n+\t\t * goes below drop count. When door bell count is written with\n+\t\t * a value greater than drop count SDP output should come out\n+\t\t * of DROP state. Due to a race condition this is not happening.\n+\t\t * Writing doorbell register with 0 again may make SDP output\n+\t\t * come out of this state.\n+\t\t */\n+\n+\t\trte_write32(0, droq->pkts_credit_reg);\n+\t}\n+\n+\treturn new_pkts;\n+}\ndiff --git a/drivers/net/octeon_ep/meson.build b/drivers/net/octeon_ep/meson.build\nindex feba1fdf25..e8ae56018d 100644\n--- a/drivers/net/octeon_ep/meson.build\n+++ b/drivers/net/octeon_ep/meson.build\n@@ -15,6 +15,18 @@ sources = files(\n \n if arch_subdir == 'x86'\n     sources += files('cnxk_ep_rx_sse.c')\n+    if cc.get_define('__AVX2__', args: machine_args) != ''\n+        cflags += ['-DCC_AVX2_SUPPORT']\n+        sources += files('cnxk_ep_rx_avx.c')\n+    elif cc.has_argument('-mavx2')\n+        cflags += ['-DCC_AVX2_SUPPORT']\n+        otx_ep_avx2_lib = static_library('otx_ep_avx2_lib',\n+                        'cnxk_ep_rx_avx.c',\n+                        dependencies: [static_rte_ethdev, static_rte_pci, static_rte_bus_pci],\n+                        include_directories: includes,\n+                        c_args: [cflags, '-mavx2'])\n+        objs += otx_ep_avx2_lib.extract_objects('cnxk_ep_rx_avx.c')\n+    endif\n endif\n \n extra_flags = ['-Wno-strict-aliasing']\ndiff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c b/drivers/net/octeon_ep/otx_ep_ethdev.c\nindex 51b34cdaa0..42a97ea110 100644\n--- a/drivers/net/octeon_ep/otx_ep_ethdev.c\n+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c\n@@ -54,6 +54,11 @@ otx_ep_set_rx_func(struct rte_eth_dev *eth_dev)\n \t\teth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;\n #ifdef RTE_ARCH_X86\n \t\teth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_sse;\n+#ifdef CC_AVX2_SUPPORT\n+\t\tif (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256 &&\n+\t\t    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1)\n+\t\t\teth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_avx;\n+#endif\n #endif\n \t\tif (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)\n \t\t\teth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_mseg;\n@@ -61,6 +66,11 @@ otx_ep_set_rx_func(struct rte_eth_dev *eth_dev)\n \t\teth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;\n #ifdef RTE_ARCH_X86\n \t\teth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_sse;\n+#ifdef CC_AVX2_SUPPORT\n+\t\tif (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256 &&\n+\t\t    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1)\n+\t\t\teth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_avx;\n+#endif\n #endif\n \n \t\tif (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)\ndiff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h b/drivers/net/octeon_ep/otx_ep_rxtx.h\nindex efc41a8275..0adcbc7814 100644\n--- a/drivers/net/octeon_ep/otx_ep_rxtx.h\n+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h\n@@ -51,6 +51,9 @@ cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n uint16_t\n cnxk_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n \n+uint16_t\n+cnxk_ep_recv_pkts_avx(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n+\n uint16_t\n cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n \n@@ -60,6 +63,9 @@ cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n uint16_t\n cn9k_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n \n+uint16_t\n+cn9k_ep_recv_pkts_avx(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n+\n uint16_t\n cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n #endif /* _OTX_EP_RXTX_H_ */\n",
    "prefixes": [
        "v3",
        "3/3"
    ]
}