get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/51597/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 51597,
    "url": "https://patches.dpdk.org/api/patches/51597/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/1553493995-29803-4-git-send-email-wenzhuo.lu@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1553493995-29803-4-git-send-email-wenzhuo.lu@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1553493995-29803-4-git-send-email-wenzhuo.lu@intel.com",
    "date": "2019-03-25T06:06:30",
    "name": "[v6,3/8] net/ice: support vector SSE in RX",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "d029892a634a31a5160400d462a86d24694940ef",
    "submitter": {
        "id": 258,
        "url": "https://patches.dpdk.org/api/people/258/?format=api",
        "name": "Wenzhuo Lu",
        "email": "wenzhuo.lu@intel.com"
    },
    "delegate": {
        "id": 1540,
        "url": "https://patches.dpdk.org/api/users/1540/?format=api",
        "username": "qzhan15",
        "first_name": "Qi",
        "last_name": "Zhang",
        "email": "qi.z.zhang@intel.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/1553493995-29803-4-git-send-email-wenzhuo.lu@intel.com/mbox/",
    "series": [
        {
            "id": 3887,
            "url": "https://patches.dpdk.org/api/series/3887/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=3887",
            "date": "2019-03-25T06:06:28",
            "name": "Support vector instructions on ICE",
            "version": 6,
            "mbox": "https://patches.dpdk.org/series/3887/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/51597/comments/",
    "check": "warning",
    "checks": "https://patches.dpdk.org/api/patches/51597/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id CCDB44CA9;\n\tMon, 25 Mar 2019 07:01:19 +0100 (CET)",
            "from mga04.intel.com (mga04.intel.com [192.55.52.120])\n\tby dpdk.org (Postfix) with ESMTP id 594B537AF\n\tfor <dev@dpdk.org>; Mon, 25 Mar 2019 07:01:09 +0100 (CET)",
            "from fmsmga002.fm.intel.com ([10.253.24.26])\n\tby fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t24 Mar 2019 23:01:08 -0700",
            "from dpdk26.sh.intel.com ([10.67.110.164])\n\tby fmsmga002.fm.intel.com with ESMTP; 24 Mar 2019 23:01:07 -0700"
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.60,256,1549958400\"; d=\"scan'208\";a=\"154850722\"",
        "From": "Wenzhuo Lu <wenzhuo.lu@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "Wenzhuo Lu <wenzhuo.lu@intel.com>",
        "Date": "Mon, 25 Mar 2019 14:06:30 +0800",
        "Message-Id": "<1553493995-29803-4-git-send-email-wenzhuo.lu@intel.com>",
        "X-Mailer": "git-send-email 1.9.3",
        "In-Reply-To": "<1553493995-29803-1-git-send-email-wenzhuo.lu@intel.com>",
        "References": "<1551340136-83843-1-git-send-email-wenzhuo.lu@intel.com>\n\t<1553493995-29803-1-git-send-email-wenzhuo.lu@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v6 3/8] net/ice: support vector SSE in RX",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>\n---\n doc/guides/nics/features/ice_vec.ini  |  33 +++\n drivers/net/ice/Makefile              |   3 +\n drivers/net/ice/ice_ethdev.c          |   2 -\n drivers/net/ice/ice_ethdev.h          |   2 +\n drivers/net/ice/ice_rxtx.c            |  27 +-\n drivers/net/ice/ice_rxtx.h            |  21 +-\n drivers/net/ice/ice_rxtx_vec_common.h | 160 +++++++++++\n drivers/net/ice/ice_rxtx_vec_sse.c    | 496 ++++++++++++++++++++++++++++++++++\n drivers/net/ice/meson.build           |   4 +\n 9 files changed, 742 insertions(+), 6 deletions(-)\n create mode 100644 doc/guides/nics/features/ice_vec.ini\n create mode 100644 drivers/net/ice/ice_rxtx_vec_common.h\n create mode 100644 drivers/net/ice/ice_rxtx_vec_sse.c",
    "diff": "diff --git a/doc/guides/nics/features/ice_vec.ini b/doc/guides/nics/features/ice_vec.ini\nnew file mode 100644\nindex 0000000..1a19788\n--- /dev/null\n+++ b/doc/guides/nics/features/ice_vec.ini\n@@ -0,0 +1,33 @@\n+;\n+; Supported features of the 'ice_vec' network poll mode driver.\n+;\n+; Refer to default.ini for the full list of available PMD features.\n+;\n+[Features]\n+Speed capabilities   = Y\n+Link status          = Y\n+Link status event    = Y\n+Rx interrupt         = Y\n+Queue start/stop     = Y\n+MTU update           = Y\n+Jumbo frame          = Y\n+Scattered Rx         = Y\n+Promiscuous mode     = Y\n+Allmulticast mode    = Y\n+Unicast MAC filter   = Y\n+Multicast MAC filter = Y\n+RSS hash             = Y\n+RSS key update       = Y\n+RSS reta update      = Y\n+VLAN filter          = Y\n+Packet type parsing  = Y\n+Rx descriptor status = Y\n+Basic stats          = Y\n+Extended stats       = Y\n+FW version           = Y\n+Module EEPROM dump   = Y\n+BSD nic_uio          = Y\n+Linux UIO            = Y\n+Linux VFIO           = Y\n+x86-32               = Y\n+x86-64               = Y\ndiff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile\nindex 61846ca..92594bb 100644\n--- a/drivers/net/ice/Makefile\n+++ b/drivers/net/ice/Makefile\n@@ -54,5 +54,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_flow.c\n \n SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_ethdev.c\n SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx.c\n+ifeq ($(CONFIG_RTE_ARCH_X86), y)\n+SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_sse.c\n+endif\n \n include $(RTE_SDK)/mk/rte.lib.mk\ndiff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c\nindex b804be1..8e7c7db 100644\n--- a/drivers/net/ice/ice_ethdev.c\n+++ b/drivers/net/ice/ice_ethdev.c\n@@ -2,8 +2,6 @@\n  * Copyright(c) 2018 Intel Corporation\n  */\n \n-#include <rte_ethdev_pci.h>\n-\n #include \"base/ice_sched.h\"\n #include \"ice_ethdev.h\"\n #include \"ice_rxtx.h\"\ndiff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h\nindex 3cefa5b..151a09e 100644\n--- a/drivers/net/ice/ice_ethdev.h\n+++ b/drivers/net/ice/ice_ethdev.h\n@@ -7,6 +7,8 @@\n \n #include <rte_kvargs.h>\n \n+#include <rte_ethdev_pci.h>\n+\n #include \"base/ice_common.h\"\n #include \"base/ice_adminq_cmd.h\"\n \ndiff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c\nindex d540ed1..ebb1cab 100644\n--- a/drivers/net/ice/ice_rxtx.c\n+++ b/drivers/net/ice/ice_rxtx.c\n@@ -7,8 +7,6 @@\n \n #include \"ice_rxtx.h\"\n \n-#define ICE_TD_CMD ICE_TX_DESC_CMD_EOP\n-\n #define ICE_TX_CKSUM_OFFLOAD_MASK (\t\t \\\n \t\tPKT_TX_IP_CKSUM |\t\t \\\n \t\tPKT_TX_L4_MASK |\t\t \\\n@@ -319,6 +317,9 @@\n \trxq->nb_rx_hold = 0;\n \trxq->pkt_first_seg = NULL;\n \trxq->pkt_last_seg = NULL;\n+\n+\trxq->rxrearm_start = 0;\n+\trxq->rxrearm_nb = 0;\n }\n \n int\n@@ -1490,6 +1491,12 @@\n #endif\n \t    dev->rx_pkt_burst == ice_recv_scattered_pkts)\n \t\treturn ptypes;\n+\n+#ifdef RTE_ARCH_X86\n+\tif (dev->rx_pkt_burst == ice_recv_pkts_vec)\n+\t\treturn ptypes;\n+#endif\n+\n \treturn NULL;\n }\n \n@@ -2225,6 +2232,22 @@ void __attribute__((cold))\n \tPMD_INIT_FUNC_TRACE();\n \tstruct ice_adapter *ad =\n \t\tICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);\n+#ifdef RTE_ARCH_X86\n+\tstruct ice_rx_queue *rxq;\n+\tint i;\n+\n+\tif (!ice_rx_vec_dev_check(dev)) {\n+\t\tfor (i = 0; i < dev->data->nb_rx_queues; i++) {\n+\t\t\trxq = dev->data->rx_queues[i];\n+\t\t\t(void)ice_rxq_vec_setup(rxq);\n+\t\t}\n+\t\tPMD_DRV_LOG(DEBUG, \"Using Vector Rx (port %d).\",\n+\t\t\t    dev->data->port_id);\n+\t\tdev->rx_pkt_burst = ice_recv_pkts_vec;\n+\n+\t\treturn;\n+\t}\n+#endif\n \n \tif (dev->data->scattered_rx) {\n \t\t/* Set the non-LRO scattered function */\ndiff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h\nindex 78b4928..656ca0d 100644\n--- a/drivers/net/ice/ice_rxtx.h\n+++ b/drivers/net/ice/ice_rxtx.h\n@@ -27,6 +27,15 @@\n \n #define ICE_SUPPORT_CHAIN_NUM 5\n \n+#define ICE_TD_CMD                      ICE_TX_DESC_CMD_EOP\n+\n+#define ICE_VPMD_RX_BURST           32\n+#define ICE_VPMD_TX_BURST           32\n+#define ICE_RXQ_REARM_THRESH        32\n+#define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH\n+#define ICE_TX_MAX_FREE_BUF_SZ      64\n+#define ICE_DESCS_PER_LOOP          4\n+\n typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);\n typedef void (*ice_tx_release_mbufs_t)(struct ice_tx_queue *txq);\n \n@@ -45,13 +54,16 @@ struct ice_rx_queue {\n \tuint16_t nb_rx_hold; /* number of held free RX desc */\n \tstruct rte_mbuf *pkt_first_seg; /**< first segment of current packet */\n \tstruct rte_mbuf *pkt_last_seg; /**< last segment of current packet */\n-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC\n \tuint16_t rx_nb_avail; /**< number of staged packets ready */\n \tuint16_t rx_next_avail; /**< index of next staged packets */\n \tuint16_t rx_free_trigger; /**< triggers rx buffer allocation */\n \tstruct rte_mbuf fake_mbuf; /**< dummy mbuf */\n \tstruct rte_mbuf *rx_stage[ICE_RX_MAX_BURST * 2];\n-#endif\n+\n+\tuint16_t rxrearm_nb;\t/**< number of remaining to be re-armed */\n+\tuint16_t rxrearm_start;\t/**< the idx we start the re-arming from */\n+\tuint64_t mbuf_initializer; /**< value to init mbufs */\n+\n \tuint8_t port_id; /* device port ID */\n \tuint8_t crc_len; /* 0 if CRC stripped, 4 otherwise */\n \tuint16_t queue_id; /* RX queue index */\n@@ -156,4 +168,9 @@ void ice_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,\n int ice_tx_descriptor_status(void *tx_queue, uint16_t offset);\n void ice_set_default_ptype_table(struct rte_eth_dev *dev);\n const uint32_t *ice_dev_supported_ptypes_get(struct rte_eth_dev *dev);\n+\n+int ice_rx_vec_dev_check(struct rte_eth_dev *dev);\n+int ice_rxq_vec_setup(struct ice_rx_queue *rxq);\n+uint16_t ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\t\t   uint16_t nb_pkts);\n #endif /* _ICE_RXTX_H_ */\ndiff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h\nnew file mode 100644\nindex 0000000..d41232d\n--- /dev/null\n+++ b/drivers/net/ice/ice_rxtx_vec_common.h\n@@ -0,0 +1,160 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2019 Intel Corporation\n+ */\n+\n+#ifndef _ICE_RXTX_VEC_COMMON_H_\n+#define _ICE_RXTX_VEC_COMMON_H_\n+\n+#include \"ice_rxtx.h\"\n+\n+static inline uint16_t\n+ice_rx_reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs,\n+\t\t\t  uint16_t nb_bufs, uint8_t *split_flags)\n+{\n+\tstruct rte_mbuf *pkts[ICE_VPMD_RX_BURST] = {0}; /*finished pkts*/\n+\tstruct rte_mbuf *start = rxq->pkt_first_seg;\n+\tstruct rte_mbuf *end =  rxq->pkt_last_seg;\n+\tunsigned int pkt_idx, buf_idx;\n+\n+\tfor (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {\n+\t\tif (end) {\n+\t\t\t/* processing a split packet */\n+\t\t\tend->next = rx_bufs[buf_idx];\n+\t\t\trx_bufs[buf_idx]->data_len += rxq->crc_len;\n+\n+\t\t\tstart->nb_segs++;\n+\t\t\tstart->pkt_len += rx_bufs[buf_idx]->data_len;\n+\t\t\tend = end->next;\n+\n+\t\t\tif (!split_flags[buf_idx]) {\n+\t\t\t\t/* it's the last packet of the set */\n+\t\t\t\tstart->hash = end->hash;\n+\t\t\t\tstart->ol_flags = end->ol_flags;\n+\t\t\t\t/* we need to strip crc for the whole packet */\n+\t\t\t\tstart->pkt_len -= rxq->crc_len;\n+\t\t\t\tif (end->data_len > rxq->crc_len) {\n+\t\t\t\t\tend->data_len -= rxq->crc_len;\n+\t\t\t\t} else {\n+\t\t\t\t\t/* free up last mbuf */\n+\t\t\t\t\tstruct rte_mbuf *secondlast = start;\n+\n+\t\t\t\t\tstart->nb_segs--;\n+\t\t\t\t\twhile (secondlast->next != end)\n+\t\t\t\t\t\tsecondlast = secondlast->next;\n+\t\t\t\t\tsecondlast->data_len -= (rxq->crc_len -\n+\t\t\t\t\t\t\tend->data_len);\n+\t\t\t\t\tsecondlast->next = NULL;\n+\t\t\t\t\trte_pktmbuf_free_seg(end);\n+\t\t\t\t}\n+\t\t\t\tpkts[pkt_idx++] = start;\n+\t\t\t\tstart = NULL;\n+\t\t\t\tend = NULL;\n+\t\t\t}\n+\t\t} else {\n+\t\t\t/* not processing a split packet */\n+\t\t\tif (!split_flags[buf_idx]) {\n+\t\t\t\t/* not a split packet, save and skip */\n+\t\t\t\tpkts[pkt_idx++] = rx_bufs[buf_idx];\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t\tstart = rx_bufs[buf_idx];\n+\t\t\tend = start;\n+\t\t\trx_bufs[buf_idx]->data_len += rxq->crc_len;\n+\t\t\trx_bufs[buf_idx]->pkt_len += rxq->crc_len;\n+\t\t}\n+\t}\n+\n+\t/* save the partial packet for next time */\n+\trxq->pkt_first_seg = start;\n+\trxq->pkt_last_seg = end;\n+\trte_memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts)));\n+\treturn pkt_idx;\n+}\n+\n+static inline void\n+_ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)\n+{\n+\tconst unsigned int mask = rxq->nb_rx_desc - 1;\n+\tunsigned int i;\n+\n+\tif (unlikely(!rxq->sw_ring)) {\n+\t\tPMD_DRV_LOG(DEBUG, \"sw_ring is NULL\");\n+\t\treturn;\n+\t}\n+\n+\tif (rxq->rxrearm_nb >= rxq->nb_rx_desc)\n+\t\treturn;\n+\n+\t/* free all mbufs that are valid in the ring */\n+\tif (rxq->rxrearm_nb == 0) {\n+\t\tfor (i = 0; i < rxq->nb_rx_desc; i++) {\n+\t\t\tif (rxq->sw_ring[i].mbuf)\n+\t\t\t\trte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);\n+\t\t}\n+\t} else {\n+\t\tfor (i = rxq->rx_tail;\n+\t\t     i != rxq->rxrearm_start;\n+\t\t     i = (i + 1) & mask) {\n+\t\t\tif (rxq->sw_ring[i].mbuf)\n+\t\t\t\trte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);\n+\t\t}\n+\t}\n+\n+\trxq->rxrearm_nb = rxq->nb_rx_desc;\n+\n+\t/* set all entries to NULL */\n+\tmemset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc);\n+}\n+\n+static inline int\n+ice_rxq_vec_setup_default(struct ice_rx_queue *rxq)\n+{\n+\tuintptr_t p;\n+\tstruct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */\n+\n+\tmb_def.nb_segs = 1;\n+\tmb_def.data_off = RTE_PKTMBUF_HEADROOM;\n+\tmb_def.port = rxq->port_id;\n+\trte_mbuf_refcnt_set(&mb_def, 1);\n+\n+\t/* prevent compiler reordering: rearm_data covers previous fields */\n+\trte_compiler_barrier();\n+\tp = (uintptr_t)&mb_def.rearm_data;\n+\trxq->mbuf_initializer = *(uint64_t *)p;\n+\treturn 0;\n+}\n+\n+static inline int\n+ice_rx_vec_queue_default(struct ice_rx_queue *rxq)\n+{\n+\tif (!rxq)\n+\t\treturn -1;\n+\n+\tif (!rte_is_power_of_2(rxq->nb_rx_desc))\n+\t\treturn -1;\n+\n+\tif (rxq->rx_free_thresh < ICE_VPMD_RX_BURST)\n+\t\treturn -1;\n+\n+\tif (rxq->nb_rx_desc % rxq->rx_free_thresh)\n+\t\treturn -1;\n+\n+\treturn 0;\n+}\n+\n+static inline int\n+ice_rx_vec_dev_check_default(struct rte_eth_dev *dev)\n+{\n+\tint i;\n+\tstruct ice_rx_queue *rxq;\n+\n+\tfor (i = 0; i < dev->data->nb_rx_queues; i++) {\n+\t\trxq = dev->data->rx_queues[i];\n+\t\tif (ice_rx_vec_queue_default(rxq))\n+\t\t\treturn -1;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+#endif\ndiff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c\nnew file mode 100644\nindex 0000000..07cbbf3\n--- /dev/null\n+++ b/drivers/net/ice/ice_rxtx_vec_sse.c\n@@ -0,0 +1,496 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2019 Intel Corporation\n+ */\n+\n+#include \"ice_rxtx_vec_common.h\"\n+\n+#include <tmmintrin.h>\n+\n+#ifndef __INTEL_COMPILER\n+#pragma GCC diagnostic ignored \"-Wcast-qual\"\n+#endif\n+\n+static inline void\n+ice_rxq_rearm(struct ice_rx_queue *rxq)\n+{\n+\tint i;\n+\tuint16_t rx_id;\n+\tvolatile union ice_rx_desc *rxdp;\n+\tstruct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];\n+\tstruct rte_mbuf *mb0, *mb1;\n+\t__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,\n+\t\t\t\t\t  RTE_PKTMBUF_HEADROOM);\n+\t__m128i dma_addr0, dma_addr1;\n+\n+\trxdp = rxq->rx_ring + rxq->rxrearm_start;\n+\n+\t/* Pull 'n' more MBUFs into the software ring */\n+\tif (rte_mempool_get_bulk(rxq->mp,\n+\t\t\t\t (void *)rxep,\n+\t\t\t\t ICE_RXQ_REARM_THRESH) < 0) {\n+\t\tif (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=\n+\t\t    rxq->nb_rx_desc) {\n+\t\t\tdma_addr0 = _mm_setzero_si128();\n+\t\t\tfor (i = 0; i < ICE_DESCS_PER_LOOP; i++) {\n+\t\t\t\trxep[i].mbuf = &rxq->fake_mbuf;\n+\t\t\t\t_mm_store_si128((__m128i *)&rxdp[i].read,\n+\t\t\t\t\t\tdma_addr0);\n+\t\t\t}\n+\t\t}\n+\t\trte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=\n+\t\t\tICE_RXQ_REARM_THRESH;\n+\t\treturn;\n+\t}\n+\n+\t/* Initialize the mbufs in vector, process 2 mbufs in one loop */\n+\tfor (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {\n+\t\t__m128i vaddr0, vaddr1;\n+\n+\t\tmb0 = rxep[0].mbuf;\n+\t\tmb1 = rxep[1].mbuf;\n+\n+\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n+\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n+\t\t\t\t offsetof(struct rte_mbuf, buf_addr) + 8);\n+\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n+\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n+\n+\t\t/* convert pa to dma_addr hdr/data */\n+\t\tdma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);\n+\t\tdma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);\n+\n+\t\t/* add headroom to pa values */\n+\t\tdma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);\n+\t\tdma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);\n+\n+\t\t/* flush desc with pa dma_addr */\n+\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);\n+\t\t_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);\n+\t}\n+\n+\trxq->rxrearm_start += ICE_RXQ_REARM_THRESH;\n+\tif (rxq->rxrearm_start >= rxq->nb_rx_desc)\n+\t\trxq->rxrearm_start = 0;\n+\n+\trxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH;\n+\n+\trx_id = (uint16_t)((rxq->rxrearm_start == 0) ?\n+\t\t\t   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));\n+\n+\t/* Update the tail pointer on the NIC */\n+\tICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);\n+}\n+\n+static inline void\n+ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],\n+\t\t\t struct rte_mbuf **rx_pkts)\n+{\n+\tconst __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);\n+\t__m128i rearm0, rearm1, rearm2, rearm3;\n+\n+\t__m128i vlan0, vlan1, rss, l3_l4e;\n+\n+\t/* mask everything except RSS, flow director and VLAN flags\n+\t * bit2 is for VLAN tag, bit11 for flow director indication\n+\t * bit13:12 for RSS indication.\n+\t */\n+\tconst __m128i rss_vlan_msk = _mm_set_epi32(0x1c03804, 0x1c03804,\n+\t\t\t\t\t\t   0x1c03804, 0x1c03804);\n+\n+\tconst __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_GOOD |\n+\t\t\t\t\t\t PKT_RX_IP_CKSUM_BAD |\n+\t\t\t\t\t\t PKT_RX_L4_CKSUM_GOOD |\n+\t\t\t\t\t\t PKT_RX_L4_CKSUM_BAD |\n+\t\t\t\t\t\t PKT_RX_EIP_CKSUM_BAD,\n+\t\t\t\t\t\t PKT_RX_IP_CKSUM_GOOD |\n+\t\t\t\t\t\t PKT_RX_IP_CKSUM_BAD |\n+\t\t\t\t\t\t PKT_RX_L4_CKSUM_GOOD |\n+\t\t\t\t\t\t PKT_RX_L4_CKSUM_BAD |\n+\t\t\t\t\t\t PKT_RX_EIP_CKSUM_BAD,\n+\t\t\t\t\t\t PKT_RX_IP_CKSUM_GOOD |\n+\t\t\t\t\t\t PKT_RX_IP_CKSUM_BAD |\n+\t\t\t\t\t\t PKT_RX_L4_CKSUM_GOOD |\n+\t\t\t\t\t\t PKT_RX_L4_CKSUM_BAD |\n+\t\t\t\t\t\t PKT_RX_EIP_CKSUM_BAD,\n+\t\t\t\t\t\t PKT_RX_IP_CKSUM_GOOD |\n+\t\t\t\t\t\t PKT_RX_IP_CKSUM_BAD |\n+\t\t\t\t\t\t PKT_RX_L4_CKSUM_GOOD |\n+\t\t\t\t\t\t PKT_RX_L4_CKSUM_BAD |\n+\t\t\t\t\t\t PKT_RX_EIP_CKSUM_BAD);\n+\n+\t/* map rss and vlan type to rss hash and vlan flag */\n+\tconst __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0,\n+\t\t\t0, 0, 0, 0,\n+\t\t\t0, 0, 0, PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,\n+\t\t\t0, 0, 0, 0);\n+\n+\tconst __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0,\n+\t\t\t0, 0, 0, 0,\n+\t\t\tPKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0,\n+\t\t\t0, 0, PKT_RX_FDIR, 0);\n+\n+\tconst __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,\n+\t\t\t/* shift right 1 bit to make sure it not exceed 255 */\n+\t\t\t(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |\n+\t\t\t PKT_RX_IP_CKSUM_BAD) >> 1,\n+\t\t\t(PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD |\n+\t\t\t PKT_RX_L4_CKSUM_BAD) >> 1,\n+\t\t\t(PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,\n+\t\t\t(PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1,\n+\t\t\t(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,\n+\t\t\t(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1,\n+\t\t\tPKT_RX_IP_CKSUM_BAD >> 1,\n+\t\t\t(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1);\n+\n+\tvlan0 = _mm_unpackhi_epi32(descs[0], descs[1]);\n+\tvlan1 = _mm_unpackhi_epi32(descs[2], descs[3]);\n+\tvlan0 = _mm_unpacklo_epi64(vlan0, vlan1);\n+\n+\tvlan1 = _mm_and_si128(vlan0, rss_vlan_msk);\n+\tvlan0 = _mm_shuffle_epi8(vlan_flags, vlan1);\n+\n+\trss = _mm_srli_epi32(vlan1, 11);\n+\trss = _mm_shuffle_epi8(rss_flags, rss);\n+\n+\tl3_l4e = _mm_srli_epi32(vlan1, 22);\n+\tl3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e);\n+\t/* then we shift left 1 bit */\n+\tl3_l4e = _mm_slli_epi32(l3_l4e, 1);\n+\t/* we need to mask out the reduntant bits */\n+\tl3_l4e = _mm_and_si128(l3_l4e, cksum_mask);\n+\n+\tvlan0 = _mm_or_si128(vlan0, rss);\n+\tvlan0 = _mm_or_si128(vlan0, l3_l4e);\n+\n+\t/**\n+\t * At this point, we have the 4 sets of flags in the low 16-bits\n+\t * of each 32-bit value in vlan0.\n+\t * We want to extract these, and merge them with the mbuf init data\n+\t * so we can do a single 16-byte write to the mbuf to set the flags\n+\t * and all the other initialization fields. Extracting the\n+\t * appropriate flags means that we have to do a shift and blend for\n+\t * each mbuf before we do the write.\n+\t */\n+\trearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vlan0, 8), 0x10);\n+\trearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vlan0, 4), 0x10);\n+\trearm2 = _mm_blend_epi16(mbuf_init, vlan0, 0x10);\n+\trearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(vlan0, 4), 0x10);\n+\n+\t/* write the rearm data and the olflags in one write */\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=\n+\t\t\t offsetof(struct rte_mbuf, rearm_data) + 8);\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=\n+\t\t\t RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));\n+\t_mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);\n+\t_mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);\n+\t_mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);\n+\t_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);\n+}\n+\n+#define PKTLEN_SHIFT     10\n+\n+static inline void\n+ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,\n+\t\t       uint32_t *ptype_tbl)\n+{\n+\t__m128i ptype0 = _mm_unpackhi_epi64(descs[0], descs[1]);\n+\t__m128i ptype1 = _mm_unpackhi_epi64(descs[2], descs[3]);\n+\n+\tptype0 = _mm_srli_epi64(ptype0, 30);\n+\tptype1 = _mm_srli_epi64(ptype1, 30);\n+\n+\trx_pkts[0]->packet_type = ptype_tbl[_mm_extract_epi8(ptype0, 0)];\n+\trx_pkts[1]->packet_type = ptype_tbl[_mm_extract_epi8(ptype0, 8)];\n+\trx_pkts[2]->packet_type = ptype_tbl[_mm_extract_epi8(ptype1, 0)];\n+\trx_pkts[3]->packet_type = ptype_tbl[_mm_extract_epi8(ptype1, 8)];\n+}\n+\n+/**\n+ * Notice:\n+ * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet\n+ * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST\n+ *   numbers of DD bits\n+ */\n+static inline uint16_t\n+_ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,\n+\t\t       uint16_t nb_pkts, uint8_t *split_packet)\n+{\n+\tvolatile union ice_rx_desc *rxdp;\n+\tstruct ice_rx_entry *sw_ring;\n+\tuint16_t nb_pkts_recd;\n+\tint pos;\n+\tuint64_t var;\n+\t__m128i shuf_msk;\n+\tuint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;\n+\n+\t__m128i crc_adjust = _mm_set_epi16\n+\t\t\t\t(0, 0, 0,    /* ignore non-length fields */\n+\t\t\t\t -rxq->crc_len, /* sub crc on data_len */\n+\t\t\t\t 0,          /* ignore high-16bits of pkt_len */\n+\t\t\t\t -rxq->crc_len, /* sub crc on pkt_len */\n+\t\t\t\t 0, 0            /* ignore pkt_type field */\n+\t\t\t\t);\n+\t/**\n+\t * compile-time check the above crc_adjust layout is correct.\n+\t * NOTE: the first field (lowest address) is given last in set_epi16\n+\t * call above.\n+\t */\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);\n+\t__m128i dd_check, eop_check;\n+\n+\t/* nb_pkts shall be less equal than ICE_MAX_RX_BURST */\n+\tnb_pkts = RTE_MIN(nb_pkts, ICE_MAX_RX_BURST);\n+\n+\t/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */\n+\tnb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP);\n+\n+\t/* Just the act of getting into the function from the application is\n+\t * going to cost about 7 cycles\n+\t */\n+\trxdp = rxq->rx_ring + rxq->rx_tail;\n+\n+\trte_prefetch0(rxdp);\n+\n+\t/* See if we need to rearm the RX queue - gives the prefetch a bit\n+\t * of time to act\n+\t */\n+\tif (rxq->rxrearm_nb > ICE_RXQ_REARM_THRESH)\n+\t\tice_rxq_rearm(rxq);\n+\n+\t/* Before we start moving massive data around, check to see if\n+\t * there is actually a packet available\n+\t */\n+\tif (!(rxdp->wb.qword1.status_error_len &\n+\t      rte_cpu_to_le_32(1 << ICE_RX_DESC_STATUS_DD_S)))\n+\t\treturn 0;\n+\n+\t/* 4 packets DD mask */\n+\tdd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL);\n+\n+\t/* 4 packets EOP mask */\n+\teop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL);\n+\n+\t/* mask to shuffle from desc. to mbuf */\n+\tshuf_msk = _mm_set_epi8\n+\t\t\t(7, 6, 5, 4,  /* octet 4~7, 32bits rss */\n+\t\t\t 3, 2,        /* octet 2~3, low 16 bits vlan_macip */\n+\t\t\t 15, 14,      /* octet 15~14, 16 bits data_len */\n+\t\t\t 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */\n+\t\t\t 15, 14,      /* octet 15~14, low 16 bits pkt_len */\n+\t\t\t 0xFF, 0xFF,  /* pkt_type set as unknown */\n+\t\t\t 0xFF, 0xFF  /*pkt_type set as unknown */\n+\t\t\t);\n+\t/**\n+\t * Compile-time verify the shuffle mask\n+\t * NOTE: some field positions already verified above, but duplicated\n+\t * here for completeness in case of future modifications.\n+\t */\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);\n+\n+\t/* Cache is empty -> need to scan the buffer rings, but first move\n+\t * the next 'n' mbufs into the cache\n+\t */\n+\tsw_ring = &rxq->sw_ring[rxq->rx_tail];\n+\n+\t/* A. load 4 packet in one loop\n+\t * [A*. mask out 4 unused dirty field in desc]\n+\t * B. copy 4 mbuf point from swring to rx_pkts\n+\t * C. calc the number of DD bits among the 4 packets\n+\t * [C*. extract the end-of-packet bit, if requested]\n+\t * D. fill info. from desc to mbuf\n+\t */\n+\n+\tfor (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;\n+\t     pos += ICE_DESCS_PER_LOOP,\n+\t     rxdp += ICE_DESCS_PER_LOOP) {\n+\t\t__m128i descs[ICE_DESCS_PER_LOOP];\n+\t\t__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;\n+\t\t__m128i zero, staterr, sterr_tmp1, sterr_tmp2;\n+\t\t/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */\n+\t\t__m128i mbp1;\n+#if defined(RTE_ARCH_X86_64)\n+\t\t__m128i mbp2;\n+#endif\n+\n+\t\t/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */\n+\t\tmbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);\n+\t\t/* Read desc statuses backwards to avoid race condition */\n+\t\t/* A.1 load 4 pkts desc */\n+\t\tdescs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));\n+\t\trte_compiler_barrier();\n+\n+\t\t/* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */\n+\t\t_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);\n+\n+#if defined(RTE_ARCH_X86_64)\n+\t\t/* B.1 load 2 64 bit mbuf points */\n+\t\tmbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);\n+#endif\n+\n+\t\tdescs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));\n+\t\trte_compiler_barrier();\n+\t\t/* B.1 load 2 mbuf point */\n+\t\tdescs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));\n+\t\trte_compiler_barrier();\n+\t\tdescs[0] = _mm_loadu_si128((__m128i *)(rxdp));\n+\n+#if defined(RTE_ARCH_X86_64)\n+\t\t/* B.2 copy 2 mbuf point into rx_pkts  */\n+\t\t_mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2);\n+#endif\n+\n+\t\tif (split_packet) {\n+\t\t\trte_mbuf_prefetch_part2(rx_pkts[pos]);\n+\t\t\trte_mbuf_prefetch_part2(rx_pkts[pos + 1]);\n+\t\t\trte_mbuf_prefetch_part2(rx_pkts[pos + 2]);\n+\t\t\trte_mbuf_prefetch_part2(rx_pkts[pos + 3]);\n+\t\t}\n+\n+\t\t/* avoid compiler reorder optimization */\n+\t\trte_compiler_barrier();\n+\n+\t\t/* pkt 3,4 shift the pktlen field to be 16-bit aligned*/\n+\t\tconst __m128i len3 = _mm_slli_epi32(descs[3], PKTLEN_SHIFT);\n+\t\tconst __m128i len2 = _mm_slli_epi32(descs[2], PKTLEN_SHIFT);\n+\n+\t\t/* merge the now-aligned packet length fields back in */\n+\t\tdescs[3] = _mm_blend_epi16(descs[3], len3, 0x80);\n+\t\tdescs[2] = _mm_blend_epi16(descs[2], len2, 0x80);\n+\n+\t\t/* D.1 pkt 3,4 convert format from desc to pktmbuf */\n+\t\tpkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);\n+\t\tpkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);\n+\n+\t\t/* C.1 4=>2 filter staterr info only */\n+\t\tsterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);\n+\t\t/* C.1 4=>2 filter staterr info only */\n+\t\tsterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);\n+\n+\t\tice_rx_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);\n+\n+\t\t/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */\n+\t\tpkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);\n+\t\tpkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);\n+\n+\t\t/* pkt 1,2 shift the pktlen field to be 16-bit aligned*/\n+\t\tconst __m128i len1 = _mm_slli_epi32(descs[1], PKTLEN_SHIFT);\n+\t\tconst __m128i len0 = _mm_slli_epi32(descs[0], PKTLEN_SHIFT);\n+\n+\t\t/* merge the now-aligned packet length fields back in */\n+\t\tdescs[1] = _mm_blend_epi16(descs[1], len1, 0x80);\n+\t\tdescs[0] = _mm_blend_epi16(descs[0], len0, 0x80);\n+\n+\t\t/* D.1 pkt 1,2 convert format from desc to pktmbuf */\n+\t\tpkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);\n+\t\tpkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);\n+\n+\t\t/* C.2 get 4 pkts staterr value  */\n+\t\tzero = _mm_xor_si128(dd_check, dd_check);\n+\t\tstaterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);\n+\n+\t\t/* D.3 copy final 3,4 data to rx_pkts */\n+\t\t_mm_storeu_si128\n+\t\t\t((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,\n+\t\t\t pkt_mb4);\n+\t\t_mm_storeu_si128\n+\t\t\t((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,\n+\t\t\t pkt_mb3);\n+\n+\t\t/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */\n+\t\tpkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);\n+\t\tpkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);\n+\n+\t\t/* C* extract and record EOP bit */\n+\t\tif (split_packet) {\n+\t\t\t__m128i eop_shuf_mask = _mm_set_epi8(0xFF, 0xFF,\n+\t\t\t\t\t\t\t     0xFF, 0xFF,\n+\t\t\t\t\t\t\t     0xFF, 0xFF,\n+\t\t\t\t\t\t\t     0xFF, 0xFF,\n+\t\t\t\t\t\t\t     0xFF, 0xFF,\n+\t\t\t\t\t\t\t     0xFF, 0xFF,\n+\t\t\t\t\t\t\t     0x04, 0x0C,\n+\t\t\t\t\t\t\t     0x00, 0x08);\n+\n+\t\t\t/* and with mask to extract bits, flipping 1-0 */\n+\t\t\t__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);\n+\t\t\t/* the staterr values are not in order, as the count\n+\t\t\t * count of dd bits doesn't care. However, for end of\n+\t\t\t * packet tracking, we do care, so shuffle. This also\n+\t\t\t * compresses the 32-bit values to 8-bit\n+\t\t\t */\n+\t\t\teop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);\n+\t\t\t/* store the resulting 32-bit value */\n+\t\t\t*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);\n+\t\t\tsplit_packet += ICE_DESCS_PER_LOOP;\n+\t\t}\n+\n+\t\t/* C.3 calc available number of desc */\n+\t\tstaterr = _mm_and_si128(staterr, dd_check);\n+\t\tstaterr = _mm_packs_epi32(staterr, zero);\n+\n+\t\t/* D.3 copy final 1,2 data to rx_pkts */\n+\t\t_mm_storeu_si128\n+\t\t\t((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,\n+\t\t\t pkt_mb2);\n+\t\t_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,\n+\t\t\t\t pkt_mb1);\n+\t\tice_rx_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);\n+\t\t/* C.4 calc avaialbe number of desc */\n+\t\tvar = __builtin_popcountll(_mm_cvtsi128_si64(staterr));\n+\t\tnb_pkts_recd += var;\n+\t\tif (likely(var != ICE_DESCS_PER_LOOP))\n+\t\t\tbreak;\n+\t}\n+\n+\t/* Update our internal tail pointer */\n+\trxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);\n+\trxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));\n+\trxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);\n+\n+\treturn nb_pkts_recd;\n+}\n+\n+/**\n+ * Notice:\n+ * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet\n+ * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST\n+ *   numbers of DD bits\n+ */\n+uint16_t\n+ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\t  uint16_t nb_pkts)\n+{\n+\treturn _ice_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);\n+}\n+\n+static void __attribute__((cold))\n+ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)\n+{\n+\t_ice_rx_queue_release_mbufs_vec(rxq);\n+}\n+\n+int __attribute__((cold))\n+ice_rxq_vec_setup(struct ice_rx_queue *rxq)\n+{\n+\tif (!rxq)\n+\t\treturn -1;\n+\n+\trxq->rx_rel_mbufs = ice_rx_queue_release_mbufs_vec;\n+\treturn ice_rxq_vec_setup_default(rxq);\n+}\n+\n+int __attribute__((cold))\n+ice_rx_vec_dev_check(struct rte_eth_dev *dev)\n+{\n+\treturn ice_rx_vec_dev_check_default(dev);\n+}\ndiff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build\nindex 857dc0e..469264d 100644\n--- a/drivers/net/ice/meson.build\n+++ b/drivers/net/ice/meson.build\n@@ -11,3 +11,7 @@ sources = files(\n \n deps += ['hash']\n includes += include_directories('base')\n+\n+if arch_subdir == 'x86'\n+\tsources += files('ice_rxtx_vec_sse.c')\n+endif\n",
    "prefixes": [
        "v6",
        "3/8"
    ]
}