get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/90043/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 90043,
    "url": "http://patches.dpdk.org/api/patches/90043/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/1617074128-50681-2-git-send-email-wenzhuo.lu@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1617074128-50681-2-git-send-email-wenzhuo.lu@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1617074128-50681-2-git-send-email-wenzhuo.lu@intel.com",
    "date": "2021-03-30T03:15:26",
    "name": "[v2,1/3] net/iavf: fix segment fault in AVX512",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "724a5f0f981c8400cf717a01fd5b628165688584",
    "submitter": {
        "id": 258,
        "url": "http://patches.dpdk.org/api/people/258/?format=api",
        "name": "Wenzhuo Lu",
        "email": "wenzhuo.lu@intel.com"
    },
    "delegate": {
        "id": 1540,
        "url": "http://patches.dpdk.org/api/users/1540/?format=api",
        "username": "qzhan15",
        "first_name": "Qi",
        "last_name": "Zhang",
        "email": "qi.z.zhang@intel.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/1617074128-50681-2-git-send-email-wenzhuo.lu@intel.com/mbox/",
    "series": [
        {
            "id": 15955,
            "url": "http://patches.dpdk.org/api/series/15955/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=15955",
            "date": "2021-03-30T03:15:25",
            "name": "fix segment fault in avx512",
            "version": 2,
            "mbox": "http://patches.dpdk.org/series/15955/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/90043/comments/",
    "check": "warning",
    "checks": "http://patches.dpdk.org/api/patches/90043/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 0B504A034F;\n\tTue, 30 Mar 2021 05:15:47 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id E28AD140F65;\n\tTue, 30 Mar 2021 05:15:43 +0200 (CEST)",
            "from mga17.intel.com (mga17.intel.com [192.55.52.151])\n by mails.dpdk.org (Postfix) with ESMTP id C78EE406B4;\n Tue, 30 Mar 2021 05:15:40 +0200 (CEST)",
            "from fmsmga008.fm.intel.com ([10.253.24.58])\n by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 29 Mar 2021 20:15:40 -0700",
            "from dpdk-wenzhuo-haswell.sh.intel.com ([10.67.111.137])\n by fmsmga008.fm.intel.com with ESMTP; 29 Mar 2021 20:15:39 -0700"
        ],
        "IronPort-SDR": [
            "\n 6kuORpe79fGFJb4L0lCHJ8Wi8Dl7BBHNKsOJGGBvTV5atwQWZDG9eX/lVoB0yADUPj6vVQTDfQ\n RkIgDDkmnTOA==",
            "\n SYDDxj1graiWYpcPlsLQ0WlTEfANl4i+AHojvWKZBOs/eYC64Snhjz0x/HaG+nBL/W3529RViB\n CEboV1d8bc6g=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6000,8403,9938\"; a=\"171693012\"",
            "E=Sophos;i=\"5.81,289,1610438400\"; d=\"scan'208\";a=\"171693012\"",
            "E=Sophos;i=\"5.81,289,1610438400\"; d=\"scan'208\";a=\"411396056\""
        ],
        "X-ExtLoop1": "1",
        "From": "Wenzhuo Lu <wenzhuo.lu@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "Wenzhuo Lu <wenzhuo.lu@intel.com>,\n\tstable@dpdk.org",
        "Date": "Tue, 30 Mar 2021 11:15:26 +0800",
        "Message-Id": "<1617074128-50681-2-git-send-email-wenzhuo.lu@intel.com>",
        "X-Mailer": "git-send-email 1.9.3",
        "In-Reply-To": "<1617074128-50681-1-git-send-email-wenzhuo.lu@intel.com>",
        "References": "<1617074128-50681-1-git-send-email-wenzhuo.lu@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v2 1/3] net/iavf: fix segment fault in AVX512",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Fix segment fault when failing to get the memory from the pool.\n\nFixes: 31737f2b66fb (\"net/iavf: enable AVX512 for legacy Rx\")\nCc: stable@dpdk.org\n\nReported-by: David Coyle <David.Coyle@intel.com>\nSigned-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>\n---\n drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 120 +------------------\n drivers/net/iavf/iavf_rxtx_vec_avx512.c |   5 +-\n drivers/net/iavf/iavf_rxtx_vec_common.h | 201 ++++++++++++++++++++++++++++++++\n 3 files changed, 207 insertions(+), 119 deletions(-)",
    "diff": "diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c\nindex cdb5139..2c2b139 100644\n--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c\n+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c\n@@ -10,126 +10,10 @@\n #pragma GCC diagnostic ignored \"-Wcast-qual\"\n #endif\n \n-static inline void\n+static __rte_always_inline void\n iavf_rxq_rearm(struct iavf_rx_queue *rxq)\n {\n-\tint i;\n-\tuint16_t rx_id;\n-\tvolatile union iavf_rx_desc *rxdp;\n-\tstruct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];\n-\n-\trxdp = rxq->rx_ring + rxq->rxrearm_start;\n-\n-\t/* Pull 'n' more MBUFs into the software ring */\n-\tif (rte_mempool_get_bulk(rxq->mp,\n-\t\t\t\t (void *)rxp,\n-\t\t\t\t IAVF_RXQ_REARM_THRESH) < 0) {\n-\t\tif (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=\n-\t\t    rxq->nb_rx_desc) {\n-\t\t\t__m128i dma_addr0;\n-\n-\t\t\tdma_addr0 = _mm_setzero_si128();\n-\t\t\tfor (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {\n-\t\t\t\trxp[i] = &rxq->fake_mbuf;\n-\t\t\t\t_mm_store_si128((__m128i *)&rxdp[i].read,\n-\t\t\t\t\t\tdma_addr0);\n-\t\t\t}\n-\t\t}\n-\t\trte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=\n-\t\t\tIAVF_RXQ_REARM_THRESH;\n-\t\treturn;\n-\t}\n-\n-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC\n-\tstruct rte_mbuf *mb0, *mb1;\n-\t__m128i dma_addr0, dma_addr1;\n-\t__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,\n-\t\t\tRTE_PKTMBUF_HEADROOM);\n-\t/* Initialize the mbufs in vector, process 2 mbufs in one loop */\n-\tfor (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {\n-\t\t__m128i vaddr0, vaddr1;\n-\n-\t\tmb0 = rxp[0];\n-\t\tmb1 = rxp[1];\n-\n-\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n-\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n-\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n-\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n-\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n-\n-\t\t/* convert pa to dma_addr hdr/data */\n-\t\tdma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);\n-\t\tdma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);\n-\n-\t\t/* add headroom to pa values */\n-\t\tdma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);\n-\t\tdma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);\n-\n-\t\t/* flush desc with pa dma_addr */\n-\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);\n-\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);\n-\t}\n-#else\n-\tstruct rte_mbuf *mb0, *mb1, *mb2, *mb3;\n-\t__m256i dma_addr0_1, dma_addr2_3;\n-\t__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);\n-\t/* Initialize the mbufs in vector, process 4 mbufs in one loop */\n-\tfor (i = 0; i < IAVF_RXQ_REARM_THRESH;\n-\t\t\ti += 4, rxp += 4, rxdp += 4) {\n-\t\t__m128i vaddr0, vaddr1, vaddr2, vaddr3;\n-\t\t__m256i vaddr0_1, vaddr2_3;\n-\n-\t\tmb0 = rxp[0];\n-\t\tmb1 = rxp[1];\n-\t\tmb2 = rxp[2];\n-\t\tmb3 = rxp[3];\n-\n-\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n-\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n-\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n-\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n-\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n-\t\tvaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);\n-\t\tvaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);\n-\n-\t\t/**\n-\t\t * merge 0 & 1, by casting 0 to 256-bit and inserting 1\n-\t\t * into the high lanes. Similarly for 2 & 3\n-\t\t */\n-\t\tvaddr0_1 =\n-\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),\n-\t\t\t\t\t\tvaddr1, 1);\n-\t\tvaddr2_3 =\n-\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),\n-\t\t\t\t\t\tvaddr3, 1);\n-\n-\t\t/* convert pa to dma_addr hdr/data */\n-\t\tdma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);\n-\t\tdma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);\n-\n-\t\t/* add headroom to pa values */\n-\t\tdma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);\n-\t\tdma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);\n-\n-\t\t/* flush desc with pa dma_addr */\n-\t\t_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);\n-\t\t_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);\n-\t}\n-\n-#endif\n-\n-\trxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;\n-\tif (rxq->rxrearm_start >= rxq->nb_rx_desc)\n-\t\trxq->rxrearm_start = 0;\n-\n-\trxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;\n-\n-\trx_id = (uint16_t)((rxq->rxrearm_start == 0) ?\n-\t\t\t     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));\n-\n-\t/* Update the tail pointer on the NIC */\n-\tIAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);\n+\treturn iavf_rxq_rearm_cmn(rxq, false);\n }\n \n #define PKTLEN_SHIFT     10\ndiff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c\nindex 5cb4c7c..acd5e54 100644\n--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c\n+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c\n@@ -13,7 +13,7 @@\n #define IAVF_DESCS_PER_LOOP_AVX 8\n #define PKTLEN_SHIFT 10\n \n-static inline void\n+static __rte_always_inline void\n iavf_rxq_rearm(struct iavf_rx_queue *rxq)\n {\n \tint i;\n@@ -25,6 +25,9 @@\n \n \trxdp = rxq->rx_ring + rxq->rxrearm_start;\n \n+\tif (!cache)\n+\t\treturn iavf_rxq_rearm_cmn(rxq, true);\n+\n \t/* We need to pull 'n' more MBUFs into the software ring from mempool\n \t * We inline the mempool function here, so we can vectorize the copy\n \t * from the cache into the shadow ring.\ndiff --git a/drivers/net/iavf/iavf_rxtx_vec_common.h b/drivers/net/iavf/iavf_rxtx_vec_common.h\nindex 46a1873..c4cc544 100644\n--- a/drivers/net/iavf/iavf_rxtx_vec_common.h\n+++ b/drivers/net/iavf/iavf_rxtx_vec_common.h\n@@ -11,6 +11,10 @@\n #include \"iavf.h\"\n #include \"iavf_rxtx.h\"\n \n+#ifndef __INTEL_COMPILER\n+#pragma GCC diagnostic ignored \"-Wcast-qual\"\n+#endif\n+\n static inline uint16_t\n reassemble_packets(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_bufs,\n \t\t   uint16_t nb_bufs, uint8_t *split_flags)\n@@ -276,4 +280,201 @@\n \treturn 0;\n }\n \n+static __rte_always_inline void\n+iavf_rxq_rearm_cmn(struct iavf_rx_queue *rxq, __rte_unused bool avx512)\n+{\n+\tint i;\n+\tuint16_t rx_id;\n+\tvolatile union iavf_rx_desc *rxdp;\n+\tstruct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];\n+\n+\trxdp = rxq->rx_ring + rxq->rxrearm_start;\n+\n+\t/* Pull 'n' more MBUFs into the software ring */\n+\tif (rte_mempool_get_bulk(rxq->mp,\n+\t\t\t\t (void *)rxp,\n+\t\t\t\t IAVF_RXQ_REARM_THRESH) < 0) {\n+\t\tif (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=\n+\t\t    rxq->nb_rx_desc) {\n+\t\t\t__m128i dma_addr0;\n+\n+\t\t\tdma_addr0 = _mm_setzero_si128();\n+\t\t\tfor (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {\n+\t\t\t\trxp[i] = &rxq->fake_mbuf;\n+\t\t\t\t_mm_store_si128((__m128i *)&rxdp[i].read,\n+\t\t\t\t\t\tdma_addr0);\n+\t\t\t}\n+\t\t}\n+\t\trte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=\n+\t\t\tIAVF_RXQ_REARM_THRESH;\n+\t\treturn;\n+\t}\n+\n+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC\n+\tstruct rte_mbuf *mb0, *mb1;\n+\t__m128i dma_addr0, dma_addr1;\n+\t__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,\n+\t\t\tRTE_PKTMBUF_HEADROOM);\n+\t/* Initialize the mbufs in vector, process 2 mbufs in one loop */\n+\tfor (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {\n+\t\t__m128i vaddr0, vaddr1;\n+\n+\t\tmb0 = rxp[0];\n+\t\tmb1 = rxp[1];\n+\n+\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n+\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n+\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n+\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n+\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n+\n+\t\t/* convert pa to dma_addr hdr/data */\n+\t\tdma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);\n+\t\tdma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);\n+\n+\t\t/* add headroom to pa values */\n+\t\tdma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);\n+\t\tdma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);\n+\n+\t\t/* flush desc with pa dma_addr */\n+\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);\n+\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);\n+\t}\n+#else\n+#ifdef CC_AVX512_SUPPORT\n+\tif (avx512) {\n+\t\tstruct rte_mbuf *mb0, *mb1, *mb2, *mb3;\n+\t\tstruct rte_mbuf *mb4, *mb5, *mb6, *mb7;\n+\t\t__m512i dma_addr0_3, dma_addr4_7;\n+\t\t__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);\n+\t\t/* Initialize the mbufs in vector, process 8 mbufs in one loop */\n+\t\tfor (i = 0; i < IAVF_RXQ_REARM_THRESH;\n+\t\t\t\ti += 8, rxp += 8, rxdp += 8) {\n+\t\t\t__m128i vaddr0, vaddr1, vaddr2, vaddr3;\n+\t\t\t__m128i vaddr4, vaddr5, vaddr6, vaddr7;\n+\t\t\t__m256i vaddr0_1, vaddr2_3;\n+\t\t\t__m256i vaddr4_5, vaddr6_7;\n+\t\t\t__m512i vaddr0_3, vaddr4_7;\n+\n+\t\t\tmb0 = rxp[0];\n+\t\t\tmb1 = rxp[1];\n+\t\t\tmb2 = rxp[2];\n+\t\t\tmb3 = rxp[3];\n+\t\t\tmb4 = rxp[4];\n+\t\t\tmb5 = rxp[5];\n+\t\t\tmb6 = rxp[6];\n+\t\t\tmb7 = rxp[7];\n+\n+\t\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n+\t\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n+\t\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n+\t\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n+\t\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n+\t\t\tvaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);\n+\t\t\tvaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);\n+\t\t\tvaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);\n+\t\t\tvaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);\n+\t\t\tvaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);\n+\t\t\tvaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);\n+\n+\t\t\t/**\n+\t\t\t * merge 0 & 1, by casting 0 to 256-bit and inserting 1\n+\t\t\t * into the high lanes. Similarly for 2 & 3, and so on.\n+\t\t\t */\n+\t\t\tvaddr0_1 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),\n+\t\t\t\t\t\t\tvaddr1, 1);\n+\t\t\tvaddr2_3 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),\n+\t\t\t\t\t\t\tvaddr3, 1);\n+\t\t\tvaddr4_5 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),\n+\t\t\t\t\t\t\tvaddr5, 1);\n+\t\t\tvaddr6_7 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),\n+\t\t\t\t\t\t\tvaddr7, 1);\n+\t\t\tvaddr0_3 =\n+\t\t\t\t_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),\n+\t\t\t\t\t\t\tvaddr2_3, 1);\n+\t\t\tvaddr4_7 =\n+\t\t\t\t_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),\n+\t\t\t\t\t\t\tvaddr6_7, 1);\n+\n+\t\t\t/* convert pa to dma_addr hdr/data */\n+\t\t\tdma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);\n+\t\t\tdma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);\n+\n+\t\t\t/* add headroom to pa values */\n+\t\t\tdma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);\n+\t\t\tdma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);\n+\n+\t\t\t/* flush desc with pa dma_addr */\n+\t\t\t_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);\n+\t\t\t_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);\n+\t\t}\n+\t} else\n+#endif\n+\t{\n+\t\tstruct rte_mbuf *mb0, *mb1, *mb2, *mb3;\n+\t\t__m256i dma_addr0_1, dma_addr2_3;\n+\t\t__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);\n+\t\t/* Initialize the mbufs in vector, process 4 mbufs in one loop */\n+\t\tfor (i = 0; i < IAVF_RXQ_REARM_THRESH;\n+\t\t\t\ti += 4, rxp += 4, rxdp += 4) {\n+\t\t\t__m128i vaddr0, vaddr1, vaddr2, vaddr3;\n+\t\t\t__m256i vaddr0_1, vaddr2_3;\n+\n+\t\t\tmb0 = rxp[0];\n+\t\t\tmb1 = rxp[1];\n+\t\t\tmb2 = rxp[2];\n+\t\t\tmb3 = rxp[3];\n+\n+\t\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n+\t\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n+\t\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n+\t\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n+\t\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n+\t\t\tvaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);\n+\t\t\tvaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);\n+\n+\t\t\t/**\n+\t\t\t * merge 0 & 1, by casting 0 to 256-bit and inserting 1\n+\t\t\t * into the high lanes. Similarly for 2 & 3\n+\t\t\t */\n+\t\t\tvaddr0_1 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),\n+\t\t\t\t\t\t\tvaddr1, 1);\n+\t\t\tvaddr2_3 =\n+\t\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),\n+\t\t\t\t\t\t\tvaddr3, 1);\n+\n+\t\t\t/* convert pa to dma_addr hdr/data */\n+\t\t\tdma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);\n+\t\t\tdma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);\n+\n+\t\t\t/* add headroom to pa values */\n+\t\t\tdma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);\n+\t\t\tdma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);\n+\n+\t\t\t/* flush desc with pa dma_addr */\n+\t\t\t_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);\n+\t\t\t_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);\n+\t\t}\n+\t}\n+\n+#endif\n+\n+\trxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;\n+\tif (rxq->rxrearm_start >= rxq->nb_rx_desc)\n+\t\trxq->rxrearm_start = 0;\n+\n+\trxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;\n+\n+\trx_id = (uint16_t)((rxq->rxrearm_start == 0) ?\n+\t\t\t     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));\n+\n+\t/* Update the tail pointer on the NIC */\n+\tIAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);\n+}\n+\n #endif\n",
    "prefixes": [
        "v2",
        "1/3"
    ]
}