get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/91419/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 91419,
    "url": "https://patches.dpdk.org/api/patches/91419/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/1618385126-44384-3-git-send-email-wenzhuo.lu@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1618385126-44384-3-git-send-email-wenzhuo.lu@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1618385126-44384-3-git-send-email-wenzhuo.lu@intel.com",
    "date": "2021-04-14T07:25:25",
    "name": "[v5,2/3] net/ice: fix segment fault in AVX512",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "31c116105c209923d96f760a80b8a92d952f3d49",
    "submitter": {
        "id": 258,
        "url": "https://patches.dpdk.org/api/people/258/?format=api",
        "name": "Wenzhuo Lu",
        "email": "wenzhuo.lu@intel.com"
    },
    "delegate": {
        "id": 1540,
        "url": "https://patches.dpdk.org/api/users/1540/?format=api",
        "username": "qzhan15",
        "first_name": "Qi",
        "last_name": "Zhang",
        "email": "qi.z.zhang@intel.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/1618385126-44384-3-git-send-email-wenzhuo.lu@intel.com/mbox/",
    "series": [
        {
            "id": 16360,
            "url": "https://patches.dpdk.org/api/series/16360/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=16360",
            "date": "2021-04-14T07:25:23",
            "name": "fix segment fault in avx512 code",
            "version": 5,
            "mbox": "https://patches.dpdk.org/series/16360/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/91419/comments/",
    "check": "warning",
    "checks": "https://patches.dpdk.org/api/patches/91419/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 13185A0524;\n\tWed, 14 Apr 2021 09:25:54 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 327D1161781;\n\tWed, 14 Apr 2021 09:25:45 +0200 (CEST)",
            "from mga03.intel.com (mga03.intel.com [134.134.136.65])\n by mails.dpdk.org (Postfix) with ESMTP id 0AE2C161777;\n Wed, 14 Apr 2021 09:25:41 +0200 (CEST)",
            "from orsmga004.jf.intel.com ([10.7.209.38])\n by orsmga103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 14 Apr 2021 00:25:41 -0700",
            "from dpdk-wenzhuo-haswell.sh.intel.com ([10.67.111.137])\n by orsmga004.jf.intel.com with ESMTP; 14 Apr 2021 00:25:39 -0700"
        ],
        "IronPort-SDR": [
            "\n 0mdTAd9Iy9BbL2y8LwwAjYkEUhPvGOJLMJFXaDXAl4QgDj6QYsjEiPahB71q+D6JY4ZjXibYQS\n MAtUpIsU3v5A==",
            "\n 9mL/PdOvXPprbeOJqp7S4ib2SWzlsMRMoZA9XSnZyEB22tQX6OQKtrkcAa/GAhUGNdYUheLZ8y\n FrhLbh59yVFA=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6200,9189,9953\"; a=\"194611467\"",
            "E=Sophos;i=\"5.82,221,1613462400\"; d=\"scan'208\";a=\"194611467\"",
            "E=Sophos;i=\"5.82,221,1613462400\"; d=\"scan'208\";a=\"532687063\""
        ],
        "X-ExtLoop1": "1",
        "From": "Wenzhuo Lu <wenzhuo.lu@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "Wenzhuo Lu <wenzhuo.lu@intel.com>,\n\tstable@dpdk.org",
        "Date": "Wed, 14 Apr 2021 15:25:25 +0800",
        "Message-Id": "<1618385126-44384-3-git-send-email-wenzhuo.lu@intel.com>",
        "X-Mailer": "git-send-email 1.9.3",
        "In-Reply-To": "<1618385126-44384-1-git-send-email-wenzhuo.lu@intel.com>",
        "References": "<1617937317-130223-1-git-send-email-wenzhuo.lu@intel.com>\n <1618385126-44384-1-git-send-email-wenzhuo.lu@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v5 2/3] net/ice: fix segment fault in AVX512",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Fix segment fault when failing to get the memory from the pool.\nIf there's no memory in the default cache, fall back to the\nprevious process.\n\nThe previous AVX2 rearm function is changed to add some AVX512\nintructions and changed to a callee of the AVX2 and AVX512\nrearm functions.\n\nFixes: 7f85d5ebcfe1 (\"net/ice: add AVX512 vector path\")\nCc: stable@dpdk.org\n\nReported-by: David Coyle <david.coyle@intel.com>\nSigned-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>\n---\n drivers/net/ice/ice_rxtx_vec_avx2.c   | 120 +-------------------\n drivers/net/ice/ice_rxtx_vec_avx512.c |   5 +-\n drivers/net/ice/ice_rxtx_vec_common.h | 203 ++++++++++++++++++++++++++++++++++\n drivers/net/ice/meson.build           |   2 +\n 4 files changed, 211 insertions(+), 119 deletions(-)",
    "diff": "diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c\nindex 25efd30..83dcdf1 100644\n--- a/drivers/net/ice/ice_rxtx_vec_avx2.c\n+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c\n@@ -10,126 +10,10 @@\n #pragma GCC diagnostic ignored \"-Wcast-qual\"\n #endif\n \n-static inline void\n+static __rte_always_inline void\n ice_rxq_rearm(struct ice_rx_queue *rxq)\n {\n-\tint i;\n-\tuint16_t rx_id;\n-\tvolatile union ice_rx_flex_desc *rxdp;\n-\tstruct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];\n-\n-\trxdp = rxq->rx_ring + rxq->rxrearm_start;\n-\n-\t/* Pull 'n' more MBUFs into the software ring */\n-\tif (rte_mempool_get_bulk(rxq->mp,\n-\t\t\t\t (void *)rxep,\n-\t\t\t\t ICE_RXQ_REARM_THRESH) < 0) {\n-\t\tif (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=\n-\t\t    rxq->nb_rx_desc) {\n-\t\t\t__m128i dma_addr0;\n-\n-\t\t\tdma_addr0 = _mm_setzero_si128();\n-\t\t\tfor (i = 0; i < ICE_DESCS_PER_LOOP; i++) {\n-\t\t\t\trxep[i].mbuf = &rxq->fake_mbuf;\n-\t\t\t\t_mm_store_si128((__m128i *)&rxdp[i].read,\n-\t\t\t\t\t\tdma_addr0);\n-\t\t\t}\n-\t\t}\n-\t\trte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=\n-\t\t\tICE_RXQ_REARM_THRESH;\n-\t\treturn;\n-\t}\n-\n-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC\n-\tstruct rte_mbuf *mb0, *mb1;\n-\t__m128i dma_addr0, dma_addr1;\n-\t__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,\n-\t\t\tRTE_PKTMBUF_HEADROOM);\n-\t/* Initialize the mbufs in vector, process 2 mbufs in one loop */\n-\tfor (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {\n-\t\t__m128i vaddr0, vaddr1;\n-\n-\t\tmb0 = rxep[0].mbuf;\n-\t\tmb1 = rxep[1].mbuf;\n-\n-\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n-\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n-\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n-\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n-\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n-\n-\t\t/* convert pa to dma_addr hdr/data */\n-\t\tdma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);\n-\t\tdma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);\n-\n-\t\t/* add headroom to pa values */\n-\t\tdma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);\n-\t\tdma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);\n-\n-\t\t/* flush desc with pa dma_addr */\n-\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);\n-\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);\n-\t}\n-#else\n-\tstruct rte_mbuf *mb0, *mb1, *mb2, *mb3;\n-\t__m256i dma_addr0_1, dma_addr2_3;\n-\t__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);\n-\t/* Initialize the mbufs in vector, process 4 mbufs in one loop */\n-\tfor (i = 0; i < ICE_RXQ_REARM_THRESH;\n-\t\t\ti += 4, rxep += 4, rxdp += 4) {\n-\t\t__m128i vaddr0, vaddr1, vaddr2, vaddr3;\n-\t\t__m256i vaddr0_1, vaddr2_3;\n-\n-\t\tmb0 = rxep[0].mbuf;\n-\t\tmb1 = rxep[1].mbuf;\n-\t\tmb2 = rxep[2].mbuf;\n-\t\tmb3 = rxep[3].mbuf;\n-\n-\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n-\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n-\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n-\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n-\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n-\t\tvaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);\n-\t\tvaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);\n-\n-\t\t/**\n-\t\t * merge 0 & 1, by casting 0 to 256-bit and inserting 1\n-\t\t * into the high lanes. Similarly for 2 & 3\n-\t\t */\n-\t\tvaddr0_1 =\n-\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),\n-\t\t\t\t\t\tvaddr1, 1);\n-\t\tvaddr2_3 =\n-\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),\n-\t\t\t\t\t\tvaddr3, 1);\n-\n-\t\t/* convert pa to dma_addr hdr/data */\n-\t\tdma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);\n-\t\tdma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);\n-\n-\t\t/* add headroom to pa values */\n-\t\tdma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);\n-\t\tdma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);\n-\n-\t\t/* flush desc with pa dma_addr */\n-\t\t_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);\n-\t\t_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);\n-\t}\n-\n-#endif\n-\n-\trxq->rxrearm_start += ICE_RXQ_REARM_THRESH;\n-\tif (rxq->rxrearm_start >= rxq->nb_rx_desc)\n-\t\trxq->rxrearm_start = 0;\n-\n-\trxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;\n-\n-\trx_id = (uint16_t)((rxq->rxrearm_start == 0) ?\n-\t\t\t     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));\n-\n-\t/* Update the tail pointer on the NIC */\n-\tICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);\n+\treturn ice_rxq_rearm_common(rxq, false);\n }\n \n static inline __m256i\ndiff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c\nindex 0a3e8da..a668b82 100644\n--- a/drivers/net/ice/ice_rxtx_vec_avx512.c\n+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c\n@@ -12,7 +12,7 @@\n \n #define ICE_DESCS_PER_LOOP_AVX 8\n \n-static inline void\n+static __rte_always_inline void\n ice_rxq_rearm(struct ice_rx_queue *rxq)\n {\n \tint i;\n@@ -24,6 +24,9 @@\n \n \trxdp = rxq->rx_ring + rxq->rxrearm_start;\n \n+\tif (unlikely(!cache))\n+\t\treturn ice_rxq_rearm_common(rxq, true);\n+\n \t/* We need to pull 'n' more MBUFs into the software ring */\n \tif (cache->len < ICE_RXQ_REARM_THRESH) {\n \t\tuint32_t req = ICE_RXQ_REARM_THRESH + (cache->size -\ndiff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h\nindex c09ac7f..a5d76a2 100644\n--- a/drivers/net/ice/ice_rxtx_vec_common.h\n+++ b/drivers/net/ice/ice_rxtx_vec_common.h\n@@ -7,6 +7,10 @@\n \n #include \"ice_rxtx.h\"\n \n+#ifndef __INTEL_COMPILER\n+#pragma GCC diagnostic ignored \"-Wcast-qual\"\n+#endif\n+\n static inline uint16_t\n ice_rx_reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs,\n \t\t\t  uint16_t nb_bufs, uint8_t *split_flags)\n@@ -318,4 +322,203 @@\n \treturn 0;\n }\n \n+#ifdef CC_AVX2_SUPPORT\n+static __rte_always_inline void\n+ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)\n+{\n+\tint i;\n+\tuint16_t rx_id;\n+\tvolatile union ice_rx_flex_desc *rxdp;\n+\tstruct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];\n+\n+\trxdp = rxq->rx_ring + rxq->rxrearm_start;\n+\n+\t/* Pull 'n' more MBUFs into the software ring */\n+\tif (rte_mempool_get_bulk(rxq->mp,\n+\t\t\t\t (void *)rxep,\n+\t\t\t\t ICE_RXQ_REARM_THRESH) < 0) {\n+\t\tif (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=\n+\t\t    rxq->nb_rx_desc) {\n+\t\t\t__m128i dma_addr0;\n+\n+\t\t\tdma_addr0 = _mm_setzero_si128();\n+\t\t\tfor (i = 0; i < ICE_DESCS_PER_LOOP; i++) {\n+\t\t\t\trxep[i].mbuf = &rxq->fake_mbuf;\n+\t\t\t\t_mm_store_si128((__m128i *)&rxdp[i].read,\n+\t\t\t\t\t\tdma_addr0);\n+\t\t\t}\n+\t\t}\n+\t\trte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=\n+\t\t\tICE_RXQ_REARM_THRESH;\n+\t\treturn;\n+\t}\n+\n+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC\n+\tstruct rte_mbuf *mb0, *mb1;\n+\t__m128i dma_addr0, dma_addr1;\n+\t__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,\n+\t\t\tRTE_PKTMBUF_HEADROOM);\n+\t/* Initialize the mbufs in vector, process 2 mbufs in one loop */\n+\tfor (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {\n+\t\t__m128i vaddr0, vaddr1;\n+\n+\t\tmb0 = rxep[0].mbuf;\n+\t\tmb1 = rxep[1].mbuf;\n+\n+\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n+\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n+\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n+\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n+\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n+\n+\t\t/* convert pa to dma_addr hdr/data */\n+\t\tdma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);\n+\t\tdma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);\n+\n+\t\t/* add headroom to pa values */\n+\t\tdma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);\n+\t\tdma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);\n+\n+\t\t/* flush desc with pa dma_addr */\n+\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);\n+\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);\n+\t}\n+#else\n+#ifdef CC_AVX512_SUPPORT\n+\tif (avx512) {\n+\t\tstruct rte_mbuf *mb0, *mb1, *mb2, *mb3;\n+\t\tstruct rte_mbuf *mb4, *mb5, *mb6, *mb7;\n+\t\t__m512i dma_addr0_3, dma_addr4_7;\n+\t\t__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);\n+\t\t/* Initialize the mbufs in vector, process 8 mbufs in one loop */\n+\t\tfor (i = 0; i < ICE_RXQ_REARM_THRESH;\n+\t\t\t\ti += 8, rxep += 8, rxdp += 8) {\n+\t\t\t__m128i vaddr0, vaddr1, vaddr2, vaddr3;\n+\t\t\t__m128i vaddr4, vaddr5, vaddr6, vaddr7;\n+\t\t\t__m256i vaddr0_1, vaddr2_3;\n+\t\t\t__m256i vaddr4_5, vaddr6_7;\n+\t\t\t__m512i vaddr0_3, vaddr4_7;\n+\n+\t\t\tmb0 = rxep[0].mbuf;\n+\t\t\tmb1 = rxep[1].mbuf;\n+\t\t\tmb2 = rxep[2].mbuf;\n+\t\t\tmb3 = rxep[3].mbuf;\n+\t\t\tmb4 = rxep[4].mbuf;\n+\t\t\tmb5 = rxep[5].mbuf;\n+\t\t\tmb6 = rxep[6].mbuf;\n+\t\t\tmb7 = rxep[7].mbuf;\n+\n+\t\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n+\t\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n+\t\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n+\t\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n+\t\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n+\t\t\tvaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);\n+\t\t\tvaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);\n+\t\t\tvaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);\n+\t\t\tvaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);\n+\t\t\tvaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);\n+\t\t\tvaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);\n+\n+\t\t\t/**\n+\t\t\t * merge 0 & 1, by casting 0 to 256-bit and inserting 1\n+\t\t\t * into the high lanes. Similarly for 2 & 3, and so on.\n+\t\t\t */\n+\t\t\tvaddr0_1 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),\n+\t\t\t\t\t\t\tvaddr1, 1);\n+\t\t\tvaddr2_3 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),\n+\t\t\t\t\t\t\tvaddr3, 1);\n+\t\t\tvaddr4_5 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),\n+\t\t\t\t\t\t\tvaddr5, 1);\n+\t\t\tvaddr6_7 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),\n+\t\t\t\t\t\t\tvaddr7, 1);\n+\t\t\tvaddr0_3 =\n+\t\t\t\t_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),\n+\t\t\t\t\t\t\tvaddr2_3, 1);\n+\t\t\tvaddr4_7 =\n+\t\t\t\t_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),\n+\t\t\t\t\t\t\tvaddr6_7, 1);\n+\n+\t\t\t/* convert pa to dma_addr hdr/data */\n+\t\t\tdma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);\n+\t\t\tdma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);\n+\n+\t\t\t/* add headroom to pa values */\n+\t\t\tdma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);\n+\t\t\tdma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);\n+\n+\t\t\t/* flush desc with pa dma_addr */\n+\t\t\t_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);\n+\t\t\t_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);\n+\t\t}\n+\t} else\n+#endif\n+\t{\n+\t\tstruct rte_mbuf *mb0, *mb1, *mb2, *mb3;\n+\t\t__m256i dma_addr0_1, dma_addr2_3;\n+\t\t__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);\n+\t\t/* Initialize the mbufs in vector, process 4 mbufs in one loop */\n+\t\tfor (i = 0; i < ICE_RXQ_REARM_THRESH;\n+\t\t\t\ti += 4, rxep += 4, rxdp += 4) {\n+\t\t\t__m128i vaddr0, vaddr1, vaddr2, vaddr3;\n+\t\t\t__m256i vaddr0_1, vaddr2_3;\n+\n+\t\t\tmb0 = rxep[0].mbuf;\n+\t\t\tmb1 = rxep[1].mbuf;\n+\t\t\tmb2 = rxep[2].mbuf;\n+\t\t\tmb3 = rxep[3].mbuf;\n+\n+\t\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n+\t\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n+\t\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n+\t\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n+\t\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n+\t\t\tvaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);\n+\t\t\tvaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);\n+\n+\t\t\t/**\n+\t\t\t * merge 0 & 1, by casting 0 to 256-bit and inserting 1\n+\t\t\t * into the high lanes. Similarly for 2 & 3\n+\t\t\t */\n+\t\t\tvaddr0_1 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),\n+\t\t\t\t\t\t\tvaddr1, 1);\n+\t\t\tvaddr2_3 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),\n+\t\t\t\t\t\t\tvaddr3, 1);\n+\n+\t\t\t/* convert pa to dma_addr hdr/data */\n+\t\t\tdma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);\n+\t\t\tdma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);\n+\n+\t\t\t/* add headroom to pa values */\n+\t\t\tdma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);\n+\t\t\tdma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);\n+\n+\t\t\t/* flush desc with pa dma_addr */\n+\t\t\t_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);\n+\t\t\t_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);\n+\t\t}\n+\t}\n+\n+#endif\n+\n+\trxq->rxrearm_start += ICE_RXQ_REARM_THRESH;\n+\tif (rxq->rxrearm_start >= rxq->nb_rx_desc)\n+\t\trxq->rxrearm_start = 0;\n+\n+\trxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;\n+\n+\trx_id = (uint16_t)((rxq->rxrearm_start == 0) ?\n+\t\t\t     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));\n+\n+\t/* Update the tail pointer on the NIC */\n+\tICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);\n+}\n+#endif\n+\n #endif\ndiff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build\nindex 44ef64b..b2d0b66 100644\n--- a/drivers/net/ice/meson.build\n+++ b/drivers/net/ice/meson.build\n@@ -28,8 +28,10 @@ if arch_subdir == 'x86'\n \t# a. we have AVX supported in minimum instruction set baseline\n \t# b. it's not minimum instruction set, but supported by compiler\n \tif cc.get_define('__AVX2__', args: machine_args) != ''\n+\t\tcflags += ['-DCC_AVX2_SUPPORT']\n \t\tsources += files('ice_rxtx_vec_avx2.c')\n \telif cc.has_argument('-mavx2')\n+\t\tcflags += ['-DCC_AVX2_SUPPORT']\n \t\tice_avx2_lib = static_library('ice_avx2_lib',\n \t\t\t\t'ice_rxtx_vec_avx2.c',\n \t\t\t\tdependencies: [static_rte_ethdev,\n",
    "prefixes": [
        "v5",
        "2/3"
    ]
}