get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/134888/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 134888,
    "url": "https://patches.dpdk.org/api/patches/134888/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20231206172419.878-2-pbhagavatula@marvell.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20231206172419.878-2-pbhagavatula@marvell.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20231206172419.878-2-pbhagavatula@marvell.com",
    "date": "2023-12-06T17:24:18",
    "name": "[v3,2/3] net/octeon_ep: use SSE instructions for Rx routine",
    "commit_ref": null,
    "pull_url": null,
    "state": "changes-requested",
    "archived": true,
    "hash": "8cb6384814fe16a0821849196fa0ad0913886ed7",
    "submitter": {
        "id": 1183,
        "url": "https://patches.dpdk.org/api/people/1183/?format=api",
        "name": "Pavan Nikhilesh Bhagavatula",
        "email": "pbhagavatula@marvell.com"
    },
    "delegate": {
        "id": 310,
        "url": "https://patches.dpdk.org/api/users/310/?format=api",
        "username": "jerin",
        "first_name": "Jerin",
        "last_name": "Jacob",
        "email": "jerinj@marvell.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20231206172419.878-2-pbhagavatula@marvell.com/mbox/",
    "series": [
        {
            "id": 30463,
            "url": "https://patches.dpdk.org/api/series/30463/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=30463",
            "date": "2023-12-06T17:24:17",
            "name": "[v3,1/3] net/octeon_ep: optimize Rx and Tx routines",
            "version": 3,
            "mbox": "https://patches.dpdk.org/series/30463/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/134888/comments/",
    "check": "warning",
    "checks": "https://patches.dpdk.org/api/patches/134888/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 6F8AE4368C;\n\tWed,  6 Dec 2023 18:24:37 +0100 (CET)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id DB46342E92;\n\tWed,  6 Dec 2023 18:24:31 +0100 (CET)",
            "from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com\n [67.231.148.174])\n by mails.dpdk.org (Postfix) with ESMTP id 901D74021E\n for <dev@dpdk.org>; Wed,  6 Dec 2023 18:24:29 +0100 (CET)",
            "from pps.filterd (m0045849.ppops.net [127.0.0.1])\n by mx0a-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id\n 3B6CCCgi032691; Wed, 6 Dec 2023 09:24:28 -0800",
            "from dc5-exch01.marvell.com ([199.233.59.181])\n by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 3utd0ruc6r-1\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT);\n Wed, 06 Dec 2023 09:24:28 -0800",
            "from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.48;\n Wed, 6 Dec 2023 09:24:26 -0800",
            "from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.48 via Frontend\n Transport; Wed, 6 Dec 2023 09:24:26 -0800",
            "from MININT-80QBFE8.corp.innovium.com (MININT-80QBFE8.marvell.com\n [10.28.164.106])\n by maili.marvell.com (Postfix) with ESMTP id D67023F70A0;\n Wed,  6 Dec 2023 09:24:23 -0800 (PST)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-transfer-encoding : content-type; s=pfpt0220;\n bh=QmEAKUdMzZjSwoDEOX6t2b3IRiEDS9HdtOQj4cgJ6OI=;\n b=VJZ+lVZmbjwAeP4nCkrzkthnPA+TWWfbEqter9/koyaDd0/qdmY4zEupzwpXDp9XR926\n Kf/hk8aZPdMaAG8WGBgy9umnTSdhjNYe6WB9pk9gMid7Q6Yv5gx82rzsztk5PNtNJHRC\n c3yI7DJRGVcbu9SS7vc1vcPFXzjW2G05gSdFUI2KjL9eeHfxB8nYcrzms/oJRdJ8KrBh\n uilo8V/AiXFrtXk4PHfXsuIdv8smCCCHrLPBbbUrALaVIgSR3jlb2kI99AY+l0w4njuY\n g8D2ldFWctQPU6BOeEo3f0+TKH4cM4zcFzYvvo+OWdr/58LNPWhvN5sHWIxg83ud6vU9 vQ==",
        "From": "<pbhagavatula@marvell.com>",
        "To": "<jerinj@marvell.com>, Vamsi Attunuru <vattunuru@marvell.com>, \"Bruce\n Richardson\" <bruce.richardson@intel.com>, Konstantin Ananyev\n <konstantin.v.ananyev@yandex.ru>",
        "CC": "<dev@dpdk.org>, Pavan Nikhilesh <pbhagavatula@marvell.com>",
        "Subject": "[PATCH v3 2/3] net/octeon_ep: use SSE instructions for Rx routine",
        "Date": "Wed, 6 Dec 2023 22:54:18 +0530",
        "Message-ID": "<20231206172419.878-2-pbhagavatula@marvell.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20231206172419.878-1-pbhagavatula@marvell.com>",
        "References": "<20231125160349.2021-1-pbhagavatula@marvell.com>\n <20231206172419.878-1-pbhagavatula@marvell.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Proofpoint-ORIG-GUID": "pAqk4C88vrXPDPtpnu6H9rroGsJrdRkE",
        "X-Proofpoint-GUID": "pAqk4C88vrXPDPtpnu6H9rroGsJrdRkE",
        "X-Proofpoint-Virus-Version": "vendor=baseguard\n engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26\n definitions=2023-12-06_15,2023-12-06_01,2023-05-22_02",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "From: Pavan Nikhilesh <pbhagavatula@marvell.com>\n\nOptimize Rx routine to use SSE instructions.\n\nSigned-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>\n---\n drivers/net/octeon_ep/cnxk_ep_rx.c     | 159 +----------------------\n drivers/net/octeon_ep/cnxk_ep_rx.h     | 167 +++++++++++++++++++++++++\n drivers/net/octeon_ep/cnxk_ep_rx_sse.c | 130 +++++++++++++++++++\n drivers/net/octeon_ep/meson.build      |  11 ++\n drivers/net/octeon_ep/otx_ep_ethdev.c  |   7 ++\n drivers/net/octeon_ep/otx_ep_rxtx.h    |   6 +\n 6 files changed, 322 insertions(+), 158 deletions(-)\n create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.h\n create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx_sse.c",
    "diff": "diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c b/drivers/net/octeon_ep/cnxk_ep_rx.c\nindex 75bb7225d2..f3e4fb27d1 100644\n--- a/drivers/net/octeon_ep/cnxk_ep_rx.c\n+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c\n@@ -2,164 +2,7 @@\n  * Copyright(C) 2023 Marvell.\n  */\n \n-#include \"otx_ep_common.h\"\n-#include \"otx2_ep_vf.h\"\n-#include \"otx_ep_rxtx.h\"\n-\n-static inline int\n-cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)\n-{\n-\tstruct otx_ep_droq_desc *desc_ring = droq->desc_ring;\n-\tstruct rte_mbuf **recv_buf_list = droq->recv_buf_list;\n-\tuint32_t refill_idx = droq->refill_idx;\n-\tstruct rte_mbuf *buf;\n-\tuint32_t i;\n-\tint rc;\n-\n-\trc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], count);\n-\tif (unlikely(rc)) {\n-\t\tdroq->stats.rx_alloc_failure++;\n-\t\treturn rc;\n-\t}\n-\n-\tfor (i = 0; i < count; i++) {\n-\t\tbuf = recv_buf_list[refill_idx];\n-\t\tdesc_ring[refill_idx].buffer_ptr = rte_mbuf_data_iova_default(buf);\n-\t\trefill_idx++;\n-\t}\n-\n-\tdroq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, droq->nb_desc);\n-\tdroq->refill_count -= count;\n-\n-\treturn 0;\n-}\n-\n-static inline void\n-cnxk_ep_rx_refill(struct otx_ep_droq *droq)\n-{\n-\tuint32_t desc_refilled = 0, count;\n-\tuint32_t nb_desc = droq->nb_desc;\n-\tuint32_t refill_idx = droq->refill_idx;\n-\tint rc;\n-\n-\tif (unlikely(droq->read_idx == refill_idx))\n-\t\treturn;\n-\n-\tif (refill_idx < droq->read_idx) {\n-\t\tcount = droq->read_idx - refill_idx;\n-\t\trc = cnxk_ep_rx_refill_mbuf(droq, count);\n-\t\tif (unlikely(rc)) {\n-\t\t\tdroq->stats.rx_alloc_failure++;\n-\t\t\treturn;\n-\t\t}\n-\t\tdesc_refilled = count;\n-\t} else {\n-\t\tcount = nb_desc - refill_idx;\n-\t\trc = cnxk_ep_rx_refill_mbuf(droq, count);\n-\t\tif (unlikely(rc)) {\n-\t\t\tdroq->stats.rx_alloc_failure++;\n-\t\t\treturn;\n-\t\t}\n-\n-\t\tdesc_refilled = count;\n-\t\tcount = droq->read_idx;\n-\t\trc = cnxk_ep_rx_refill_mbuf(droq, count);\n-\t\tif (unlikely(rc)) {\n-\t\t\tdroq->stats.rx_alloc_failure++;\n-\t\t\treturn;\n-\t\t}\n-\t\tdesc_refilled += count;\n-\t}\n-\n-\t/* Flush the droq descriptor data to memory to be sure\n-\t * that when we update the credits the data in memory is\n-\t * accurate.\n-\t */\n-\trte_io_wmb();\n-\trte_write32(desc_refilled, droq->pkts_credit_reg);\n-}\n-\n-static inline uint32_t\n-cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)\n-{\n-\tuint32_t new_pkts;\n-\tuint32_t val;\n-\n-\t/* Batch subtractions from the HW counter to reduce PCIe traffic\n-\t * This adds an extra local variable, but almost halves the\n-\t * number of PCIe writes.\n-\t */\n-\tval = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);\n-\tnew_pkts = val - droq->pkts_sent_ism_prev;\n-\tdroq->pkts_sent_ism_prev = val;\n-\n-\tif (val > RTE_BIT32(31)) {\n-\t\t/* Only subtract the packet count in the HW counter\n-\t\t * when count above halfway to saturation.\n-\t\t */\n-\t\trte_write64((uint64_t)val, droq->pkts_sent_reg);\n-\t\trte_mb();\n-\n-\t\trte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);\n-\t\twhile (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {\n-\t\t\trte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);\n-\t\t\trte_mb();\n-\t\t}\n-\n-\t\tdroq->pkts_sent_ism_prev = 0;\n-\t}\n-\trte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);\n-\tdroq->pkts_pending += new_pkts;\n-\n-\treturn new_pkts;\n-}\n-\n-static inline int16_t __rte_hot\n-cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)\n-{\n-\tif (droq->pkts_pending < nb_pkts)\n-\t\tcnxk_ep_check_rx_pkts(droq);\n-\n-\treturn RTE_MIN(nb_pkts, droq->pkts_pending);\n-}\n-\n-static __rte_always_inline void\n-cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)\n-{\n-\tstruct rte_mbuf **recv_buf_list = droq->recv_buf_list;\n-\tuint32_t bytes_rsvd = 0, read_idx = droq->read_idx;\n-\tuint16_t nb_desc = droq->nb_desc;\n-\tuint16_t pkts;\n-\n-\tfor (pkts = 0; pkts < new_pkts; pkts++) {\n-\t\tstruct otx_ep_droq_info *info;\n-\t\tstruct rte_mbuf *mbuf;\n-\t\tuint16_t pkt_len;\n-\n-\t\trte_prefetch0(recv_buf_list[otx_ep_incr_index(read_idx, 2, nb_desc)]);\n-\t\trte_prefetch0(rte_pktmbuf_mtod(recv_buf_list[otx_ep_incr_index(read_idx,\n-\t\t\t\t\t\t\t\t\t       2, nb_desc)],\n-\t\t\t      void *));\n-\n-\t\tmbuf = recv_buf_list[read_idx];\n-\t\tinfo = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);\n-\t\tread_idx = otx_ep_incr_index(read_idx, 1, nb_desc);\n-\t\tpkt_len = rte_bswap16(info->length >> 48);\n-\t\tmbuf->pkt_len = pkt_len;\n-\t\tmbuf->data_len = pkt_len;\n-\n-\t\t*(uint64_t *)&mbuf->rearm_data = droq->rearm_data;\n-\t\trx_pkts[pkts] = mbuf;\n-\t\tbytes_rsvd += pkt_len;\n-\t}\n-\tdroq->read_idx = read_idx;\n-\n-\tdroq->refill_count += new_pkts;\n-\tdroq->pkts_pending -= new_pkts;\n-\t/* Stats */\n-\tdroq->stats.pkts_received += new_pkts;\n-\tdroq->stats.bytes_received += bytes_rsvd;\n-}\n+#include \"cnxk_ep_rx.h\"\n \n static __rte_always_inline void\n cnxk_ep_process_pkts_scalar_mseg(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq,\ndiff --git a/drivers/net/octeon_ep/cnxk_ep_rx.h b/drivers/net/octeon_ep/cnxk_ep_rx.h\nnew file mode 100644\nindex 0000000000..e71fc0de5c\n--- /dev/null\n+++ b/drivers/net/octeon_ep/cnxk_ep_rx.h\n@@ -0,0 +1,167 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(C) 2023 Marvell.\n+ */\n+\n+#include <rte_vect.h>\n+\n+#include \"otx_ep_common.h\"\n+#include \"otx2_ep_vf.h\"\n+#include \"otx_ep_rxtx.h\"\n+\n+#define CNXK_EP_OQ_DESC_PER_LOOP_SSE 4\n+#define CNXK_EP_OQ_DESC_PER_LOOP_AVX 8\n+\n+static inline int\n+cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)\n+{\n+\tstruct otx_ep_droq_desc *desc_ring = droq->desc_ring;\n+\tstruct rte_mbuf **recv_buf_list = droq->recv_buf_list;\n+\tuint32_t refill_idx = droq->refill_idx;\n+\tstruct rte_mbuf *buf;\n+\tuint32_t i;\n+\tint rc;\n+\n+\trc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], count);\n+\tif (unlikely(rc)) {\n+\t\tdroq->stats.rx_alloc_failure++;\n+\t\treturn rc;\n+\t}\n+\n+\tfor (i = 0; i < count; i++) {\n+\t\tbuf = recv_buf_list[refill_idx];\n+\t\tdesc_ring[refill_idx].buffer_ptr = rte_mbuf_data_iova_default(buf);\n+\t\trefill_idx++;\n+\t}\n+\n+\tdroq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, droq->nb_desc);\n+\tdroq->refill_count -= count;\n+\n+\treturn 0;\n+}\n+\n+static inline void\n+cnxk_ep_rx_refill(struct otx_ep_droq *droq)\n+{\n+\tuint32_t desc_refilled = 0, count;\n+\tuint32_t nb_desc = droq->nb_desc;\n+\tuint32_t refill_idx = droq->refill_idx;\n+\tint rc;\n+\n+\tif (unlikely(droq->read_idx == refill_idx))\n+\t\treturn;\n+\n+\tif (refill_idx < droq->read_idx) {\n+\t\tcount = droq->read_idx - refill_idx;\n+\t\trc = cnxk_ep_rx_refill_mbuf(droq, count);\n+\t\tif (unlikely(rc)) {\n+\t\t\tdroq->stats.rx_alloc_failure++;\n+\t\t\treturn;\n+\t\t}\n+\t\tdesc_refilled = count;\n+\t} else {\n+\t\tcount = nb_desc - refill_idx;\n+\t\trc = cnxk_ep_rx_refill_mbuf(droq, count);\n+\t\tif (unlikely(rc)) {\n+\t\t\tdroq->stats.rx_alloc_failure++;\n+\t\t\treturn;\n+\t\t}\n+\n+\t\tdesc_refilled = count;\n+\t\tcount = droq->read_idx;\n+\t\trc = cnxk_ep_rx_refill_mbuf(droq, count);\n+\t\tif (unlikely(rc)) {\n+\t\t\tdroq->stats.rx_alloc_failure++;\n+\t\t\treturn;\n+\t\t}\n+\t\tdesc_refilled += count;\n+\t}\n+\n+\t/* Flush the droq descriptor data to memory to be sure\n+\t * that when we update the credits the data in memory is\n+\t * accurate.\n+\t */\n+\trte_io_wmb();\n+\trte_write32(desc_refilled, droq->pkts_credit_reg);\n+}\n+\n+static inline uint32_t\n+cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)\n+{\n+\tuint32_t new_pkts;\n+\tuint32_t val;\n+\n+\t/* Batch subtractions from the HW counter to reduce PCIe traffic\n+\t * This adds an extra local variable, but almost halves the\n+\t * number of PCIe writes.\n+\t */\n+\tval = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);\n+\tnew_pkts = val - droq->pkts_sent_ism_prev;\n+\tdroq->pkts_sent_ism_prev = val;\n+\n+\tif (val > RTE_BIT32(31)) {\n+\t\t/* Only subtract the packet count in the HW counter\n+\t\t * when count above halfway to saturation.\n+\t\t */\n+\t\trte_write64((uint64_t)val, droq->pkts_sent_reg);\n+\t\trte_mb();\n+\n+\t\trte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);\n+\t\twhile (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) >= val) {\n+\t\t\trte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);\n+\t\t\trte_mb();\n+\t\t}\n+\n+\t\tdroq->pkts_sent_ism_prev = 0;\n+\t}\n+\trte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);\n+\tdroq->pkts_pending += new_pkts;\n+\n+\treturn new_pkts;\n+}\n+\n+static inline int16_t __rte_hot\n+cnxk_ep_rx_pkts_to_process(struct otx_ep_droq *droq, uint16_t nb_pkts)\n+{\n+\tif (droq->pkts_pending < nb_pkts)\n+\t\tcnxk_ep_check_rx_pkts(droq);\n+\n+\treturn RTE_MIN(nb_pkts, droq->pkts_pending);\n+}\n+\n+static __rte_always_inline void\n+cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)\n+{\n+\tstruct rte_mbuf **recv_buf_list = droq->recv_buf_list;\n+\tuint32_t bytes_rsvd = 0, read_idx = droq->read_idx;\n+\tuint16_t nb_desc = droq->nb_desc;\n+\tuint16_t pkts;\n+\n+\tfor (pkts = 0; pkts < new_pkts; pkts++) {\n+\t\tstruct otx_ep_droq_info *info;\n+\t\tstruct rte_mbuf *mbuf;\n+\t\tuint16_t pkt_len;\n+\n+\t\trte_prefetch0(recv_buf_list[otx_ep_incr_index(read_idx, 2, nb_desc)]);\n+\t\trte_prefetch0(rte_pktmbuf_mtod(recv_buf_list[otx_ep_incr_index(read_idx,\n+\t\t\t\t\t\t\t\t\t       2, nb_desc)],\n+\t\t\t      void *));\n+\n+\t\tmbuf = recv_buf_list[read_idx];\n+\t\tinfo = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);\n+\t\tread_idx = otx_ep_incr_index(read_idx, 1, nb_desc);\n+\t\tpkt_len = rte_bswap16(info->length >> 48);\n+\t\tmbuf->pkt_len = pkt_len;\n+\t\tmbuf->data_len = pkt_len;\n+\n+\t\t*(uint64_t *)&mbuf->rearm_data = droq->rearm_data;\n+\t\trx_pkts[pkts] = mbuf;\n+\t\tbytes_rsvd += pkt_len;\n+\t}\n+\tdroq->read_idx = read_idx;\n+\n+\tdroq->refill_count += new_pkts;\n+\tdroq->pkts_pending -= new_pkts;\n+\t/* Stats */\n+\tdroq->stats.pkts_received += new_pkts;\n+\tdroq->stats.bytes_received += bytes_rsvd;\n+}\ndiff --git a/drivers/net/octeon_ep/cnxk_ep_rx_sse.c b/drivers/net/octeon_ep/cnxk_ep_rx_sse.c\nnew file mode 100644\nindex 0000000000..afa3616caa\n--- /dev/null\n+++ b/drivers/net/octeon_ep/cnxk_ep_rx_sse.c\n@@ -0,0 +1,130 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(C) 2023 Marvell.\n+ */\n+\n+#include \"cnxk_ep_rx.h\"\n+\n+static __rte_always_inline uint32_t\n+hadd(__m128i x)\n+{\n+\t__m128i hi64 = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2));\n+\t__m128i sum64 = _mm_add_epi32(hi64, x);\n+\t__m128i hi32 = _mm_shufflelo_epi16(sum64, _MM_SHUFFLE(1, 0, 3, 2));\n+\t__m128i sum32 = _mm_add_epi32(sum64, hi32);\n+\treturn _mm_cvtsi128_si32(sum32);\n+}\n+\n+static __rte_always_inline void\n+cnxk_ep_process_pkts_vec_sse(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)\n+{\n+\tstruct rte_mbuf **recv_buf_list = droq->recv_buf_list;\n+\tuint32_t bytes_rsvd = 0, read_idx = droq->read_idx;\n+\tuint32_t idx0, idx1, idx2, idx3;\n+\tstruct rte_mbuf *m0, *m1, *m2, *m3;\n+\tuint16_t nb_desc = droq->nb_desc;\n+\tuint16_t pkts = 0;\n+\n+\tidx0 = read_idx;\n+\twhile (pkts < new_pkts) {\n+\t\tconst __m128i bswap_mask = _mm_set_epi8(0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 8, 9, 0xFF,\n+\t\t\t\t\t\t\t0xFF, 4, 5, 0xFF, 0xFF, 0, 1);\n+\t\tconst __m128i cpy_mask = _mm_set_epi8(0xFF, 0xFF, 9, 8, 0xFF, 0xFF, 9, 8, 0xFF,\n+\t\t\t\t\t\t      0xFF, 1, 0, 0xFF, 0xFF, 1, 0);\n+\t\t__m128i s01, s23;\n+\n+\t\tidx1 = otx_ep_incr_index(idx0, 1, nb_desc);\n+\t\tidx2 = otx_ep_incr_index(idx1, 1, nb_desc);\n+\t\tidx3 = otx_ep_incr_index(idx2, 1, nb_desc);\n+\n+\t\tm0 = recv_buf_list[idx0];\n+\t\tm1 = recv_buf_list[idx1];\n+\t\tm2 = recv_buf_list[idx2];\n+\t\tm3 = recv_buf_list[idx3];\n+\n+\t\t/* Load packet size big-endian. */\n+\t\ts01 = _mm_set_epi32(rte_pktmbuf_mtod(m3, struct otx_ep_droq_info *)->length >> 48,\n+\t\t\t\t    rte_pktmbuf_mtod(m1, struct otx_ep_droq_info *)->length >> 48,\n+\t\t\t\t    rte_pktmbuf_mtod(m2, struct otx_ep_droq_info *)->length >> 48,\n+\t\t\t\t    rte_pktmbuf_mtod(m0, struct otx_ep_droq_info *)->length >> 48);\n+\t\t/* Convert to littel-endian. */\n+\t\ts01 = _mm_shuffle_epi8(s01, bswap_mask);\n+\t\t/* Horizontal add. */\n+\t\tbytes_rsvd += hadd(s01);\n+\t\t/* Segregate to packet length and data length. */\n+\t\ts23 = _mm_shuffle_epi32(s01, _MM_SHUFFLE(3, 3, 1, 1));\n+\t\ts01 = _mm_shuffle_epi8(s01, cpy_mask);\n+\t\ts23 = _mm_shuffle_epi8(s23, cpy_mask);\n+\n+\t\t/* Store packet length and data length to mbuf. */\n+\t\t*(uint64_t *)&m0->pkt_len = ((rte_xmm_t)s01).u64[0];\n+\t\t*(uint64_t *)&m1->pkt_len = ((rte_xmm_t)s01).u64[1];\n+\t\t*(uint64_t *)&m2->pkt_len = ((rte_xmm_t)s23).u64[0];\n+\t\t*(uint64_t *)&m3->pkt_len = ((rte_xmm_t)s23).u64[1];\n+\n+\t\t/* Reset rearm data. */\n+\t\t*(uint64_t *)&m0->rearm_data = droq->rearm_data;\n+\t\t*(uint64_t *)&m1->rearm_data = droq->rearm_data;\n+\t\t*(uint64_t *)&m2->rearm_data = droq->rearm_data;\n+\t\t*(uint64_t *)&m3->rearm_data = droq->rearm_data;\n+\n+\t\trx_pkts[pkts++] = m0;\n+\t\trx_pkts[pkts++] = m1;\n+\t\trx_pkts[pkts++] = m2;\n+\t\trx_pkts[pkts++] = m3;\n+\t\tidx0 = otx_ep_incr_index(idx3, 1, nb_desc);\n+\t}\n+\tdroq->read_idx = idx0;\n+\n+\tdroq->refill_count += new_pkts;\n+\tdroq->pkts_pending -= new_pkts;\n+\t/* Stats */\n+\tdroq->stats.pkts_received += new_pkts;\n+\tdroq->stats.bytes_received += bytes_rsvd;\n+}\n+\n+uint16_t __rte_noinline __rte_hot\n+cnxk_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n+{\n+\tstruct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;\n+\tuint16_t new_pkts, vpkts;\n+\n+\tnew_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);\n+\tvpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE);\n+\tcnxk_ep_process_pkts_vec_sse(rx_pkts, droq, vpkts);\n+\tcnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);\n+\n+\t/* Refill RX buffers */\n+\tif (droq->refill_count >= DROQ_REFILL_THRESHOLD)\n+\t\tcnxk_ep_rx_refill(droq);\n+\n+\treturn new_pkts;\n+}\n+\n+uint16_t __rte_noinline __rte_hot\n+cn9k_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n+{\n+\tstruct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;\n+\tuint16_t new_pkts, vpkts;\n+\n+\tnew_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);\n+\tvpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE);\n+\tcnxk_ep_process_pkts_vec_sse(rx_pkts, droq, vpkts);\n+\tcnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);\n+\n+\t/* Refill RX buffers */\n+\tif (droq->refill_count >= DROQ_REFILL_THRESHOLD) {\n+\t\tcnxk_ep_rx_refill(droq);\n+\t} else {\n+\t\t/* SDP output goes into DROP state when output doorbell count\n+\t\t * goes below drop count. When door bell count is written with\n+\t\t * a value greater than drop count SDP output should come out\n+\t\t * of DROP state. Due to a race condition this is not happening.\n+\t\t * Writing doorbell register with 0 again may make SDP output\n+\t\t * come out of this state.\n+\t\t */\n+\n+\t\trte_write32(0, droq->pkts_credit_reg);\n+\t}\n+\n+\treturn new_pkts;\n+}\ndiff --git a/drivers/net/octeon_ep/meson.build b/drivers/net/octeon_ep/meson.build\nindex 749776d70c..feba1fdf25 100644\n--- a/drivers/net/octeon_ep/meson.build\n+++ b/drivers/net/octeon_ep/meson.build\n@@ -12,3 +12,14 @@ sources = files(\n         'cnxk_ep_rx.c',\n         'cnxk_ep_tx.c',\n )\n+\n+if arch_subdir == 'x86'\n+    sources += files('cnxk_ep_rx_sse.c')\n+endif\n+\n+extra_flags = ['-Wno-strict-aliasing']\n+foreach flag: extra_flags\n+    if cc.has_argument(flag)\n+        cflags += flag\n+    endif\n+endforeach\ndiff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c b/drivers/net/octeon_ep/otx_ep_ethdev.c\nindex 615cbbb648..51b34cdaa0 100644\n--- a/drivers/net/octeon_ep/otx_ep_ethdev.c\n+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c\n@@ -52,10 +52,17 @@ otx_ep_set_rx_func(struct rte_eth_dev *eth_dev)\n \n \tif (otx_epvf->chip_gen == OTX_EP_CN10XX) {\n \t\teth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts;\n+#ifdef RTE_ARCH_X86\n+\t\teth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_sse;\n+#endif\n \t\tif (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)\n \t\t\teth_dev->rx_pkt_burst = &cnxk_ep_recv_pkts_mseg;\n \t} else if (otx_epvf->chip_gen == OTX_EP_CN9XX) {\n \t\teth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts;\n+#ifdef RTE_ARCH_X86\n+\t\teth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_sse;\n+#endif\n+\n \t\tif (otx_epvf->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)\n \t\t\teth_dev->rx_pkt_burst = &cn9k_ep_recv_pkts_mseg;\n \t} else {\ndiff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h b/drivers/net/octeon_ep/otx_ep_rxtx.h\nindex b159c32cae..efc41a8275 100644\n--- a/drivers/net/octeon_ep/otx_ep_rxtx.h\n+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h\n@@ -48,12 +48,18 @@ cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)\n uint16_t\n cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n \n+uint16_t\n+cnxk_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n+\n uint16_t\n cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n \n uint16_t\n cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n \n+uint16_t\n+cn9k_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n+\n uint16_t\n cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);\n #endif /* _OTX_EP_RXTX_H_ */\n",
    "prefixes": [
        "v3",
        "2/3"
    ]
}