get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/45608/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 45608,
    "url": "http://patches.dpdk.org/api/patches/45608/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20180928192007.29519-2-johndale@cisco.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20180928192007.29519-2-johndale@cisco.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20180928192007.29519-2-johndale@cisco.com",
    "date": "2018-09-28T19:20:07",
    "name": "[v2,2/2] net/enic: add AVX2 based vectorized Rx handler",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "6c3e846ccd2fb8b758250ce28806178b8d4457ff",
    "submitter": {
        "id": 359,
        "url": "http://patches.dpdk.org/api/people/359/?format=api",
        "name": "John Daley (johndale)",
        "email": "johndale@cisco.com"
    },
    "delegate": null,
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20180928192007.29519-2-johndale@cisco.com/mbox/",
    "series": [
        {
            "id": 1583,
            "url": "http://patches.dpdk.org/api/series/1583/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=1583",
            "date": "2018-09-28T19:20:06",
            "name": "[v2,1/2] net/enic: move common Rx functions to a new header file",
            "version": 2,
            "mbox": "http://patches.dpdk.org/series/1583/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/45608/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/45608/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id B39591B42C;\n\tFri, 28 Sep 2018 21:21:12 +0200 (CEST)",
            "from alln-iport-5.cisco.com (alln-iport-5.cisco.com\n\t[173.37.142.92]) by dpdk.org (Postfix) with ESMTP id AEA5F1B3D7\n\tfor <dev@dpdk.org>; Fri, 28 Sep 2018 21:21:10 +0200 (CEST)",
            "from alln-core-7.cisco.com ([173.36.13.140])\n\tby alln-iport-5.cisco.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t28 Sep 2018 19:21:09 +0000",
            "from cisco.com (savbu-usnic-a.cisco.com [10.193.184.48])\n\tby alln-core-7.cisco.com (8.15.2/8.15.2) with ESMTP id w8SJL98C003235;\n\tFri, 28 Sep 2018 19:21:09 GMT",
            "by cisco.com (Postfix, from userid 392789)\n\tid 4275520F2001; Fri, 28 Sep 2018 12:21:09 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple;\n\td=cisco.com; i=@cisco.com; l=40450; q=dns/txt;\n\ts=iport; t=1538162470; x=1539372070;\n\th=from:to:cc:subject:date:message-id:in-reply-to: references;\n\tbh=hDZwbUoZwf7Xrgmhew0xn8X1SY2yx8Nj+YkRotCYqCs=;\n\tb=lOT6cNuVPQJm+gTrYUeD9uIGCVjsNWvtKDDfRAh6ls69dVsmUazHlDW2\n\tiA6xlktdAKuUevystsksOwFmwzmd+LC7P3biOrJeJGTDiG0rMb5+wDSq3\n\tt+m2lDV3RERX9YPIZA/yvMqXS1UluzWLRMmVf8ktJle54OLY7d/11y7G9 Y=;",
        "X-IronPort-AV": "E=Sophos;i=\"5.54,316,1534809600\"; d=\"scan'208\";a=\"177687674\"",
        "From": "John Daley <johndale@cisco.com>",
        "To": "ferruh.yigit@intel.com",
        "Cc": "dev@dpdk.org, Hyong Youb Kim <hyonkim@cisco.com>",
        "Date": "Fri, 28 Sep 2018 12:20:07 -0700",
        "Message-Id": "<20180928192007.29519-2-johndale@cisco.com>",
        "X-Mailer": "git-send-email 2.16.2",
        "In-Reply-To": "<20180928192007.29519-1-johndale@cisco.com>",
        "References": "<20180928021655.24869-1-johndale@cisco.com>\n\t<20180928192007.29519-1-johndale@cisco.com>",
        "X-Outbound-SMTP-Client": "10.193.184.48, savbu-usnic-a.cisco.com",
        "X-Outbound-Node": "alln-core-7.cisco.com",
        "Subject": "[dpdk-dev] [PATCH v2 2/2] net/enic: add AVX2 based vectorized Rx\n\thandler",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "From: Hyong Youb Kim <hyonkim@cisco.com>\n\nAdd the vectorized version of the no-scatter Rx handler. It aims to\nprocess 8 descriptors per loop using AVX2 SIMD instructions. This\nhandler is in its own file enic_rxtx_vec_avx2.c, and makefile and\nmeson.build are modified to compile it when the compiler supports\nAVX2. Under ideal conditions, the vectorized handler reduces\ncycles/packet by more than 30%, when compared against the no-scatter\nRx handler. Most implementation ideas come from i40e's AVX2 based\nhandler, so credit goes to its authors.\n\nAt this point, the new handler is meant for field trials, and is not\nselected by default. So add a new devarg enable-avx2-rx to allow the\nuser to request the use of the new handler. When enable-avx2-rx=1, the\ndriver will consider using the new handler.\n\nAlso update the guide doc and introduce the vectorized handler.\n\nSigned-off-by: Hyong Youb Kim <hyonkim@cisco.com>\n---\n\nv2: remove bool type from stucture (found by checkpatch)\n\n doc/guides/nics/enic.rst              |  32 ++\n drivers/net/enic/Makefile             |   7 +\n drivers/net/enic/enic.h               |   3 +\n drivers/net/enic/enic_ethdev.c        |  27 +-\n drivers/net/enic/enic_main.c          |  34 +-\n drivers/net/enic/enic_rxtx_vec_avx2.c | 832 ++++++++++++++++++++++++++++++++++\n drivers/net/enic/meson.build          |   5 +\n 7 files changed, 931 insertions(+), 9 deletions(-)\n create mode 100644 drivers/net/enic/enic_rxtx_vec_avx2.c",
    "diff": "diff --git a/doc/guides/nics/enic.rst b/doc/guides/nics/enic.rst\nindex 86941fdb2..b31f4eef9 100644\n--- a/doc/guides/nics/enic.rst\n+++ b/doc/guides/nics/enic.rst\n@@ -351,6 +351,38 @@ suitable for others. Such applications may change the mode by setting\n   applications such as OVS-DPDK performance benchmarks that utilize\n   only the default VLAN and want to see only untagged packets.\n \n+\n+Vectorized Rx Handler\n+---------------------\n+\n+ENIC PMD includes a version of the receive handler that is vectorized using\n+AVX2 SIMD instructions. It is meant for bulk, throughput oriented workloads\n+where reducing cycles/packet in PMD is a priority. In order to use the\n+vectorized handler, take the following steps.\n+\n+- Use a recent version of gcc, icc, or clang and build 64-bit DPDK. If\n+  the compiler is known to support AVX2, DPDK build system\n+  automatically compiles the vectorized handler. Otherwise, the\n+  handler is not available.\n+\n+- Set ``devargs`` parameter ``enable-avx2-rx=1`` to explicitly request that\n+  PMD consider the vectorized handler when selecting the receive handler.\n+\n+  As the current implementation is intended for field trials, by default, the\n+  vectorized handler is not considerd (``enable-avx2-rx=0``).\n+\n+- Run on a UCS M4 or later server with CPUs that support AVX2.\n+\n+PMD selects the vectorized handler when the handler is compiled into\n+the driver, the user requests its use via ``enable-avx2-rx=1``, CPU\n+supports AVX2, and scatter Rx is not used. To verify that the\n+vectorized handler is selected, enable debug logging\n+(``--log-level=pmd,debug``) and check the following message.\n+\n+.. code-block:: console\n+\n+    enic_use_vector_rx_handler use the non-scatter avx2 Rx handler\n+\n .. _enic_limitations:\n \n Limitations\ndiff --git a/drivers/net/enic/Makefile b/drivers/net/enic/Makefile\nindex 7c6c29cc0..3ec6f9159 100644\n--- a/drivers/net/enic/Makefile\n+++ b/drivers/net/enic/Makefile\n@@ -39,4 +39,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_intr.c\n SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_rq.c\n SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_rss.c\n \n+ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)\n+# The current implementation assumes 64-bit pointers\n+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)\n+\tSRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_rxtx_vec_avx2.c\n+endif\n+endif\n+\n include $(RTE_SDK)/mk/rte.lib.mk\ndiff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h\nindex 775cd5d55..665f5668a 100644\n--- a/drivers/net/enic/enic.h\n+++ b/drivers/net/enic/enic.h\n@@ -106,6 +106,7 @@ struct enic {\n \tstruct vnic_dev_bar bar0;\n \tstruct vnic_dev *vdev;\n \n+\tuint64_t mbuf_initializer;\n \tunsigned int port_id;\n \tbool overlay_offload;\n \tstruct rte_eth_dev *rte_dev;\n@@ -128,6 +129,7 @@ struct enic {\n \tu8 filter_actions; /* HW supported actions */\n \tbool vxlan;\n \tbool disable_overlay; /* devargs disable_overlay=1 */\n+\tuint8_t enable_avx2_rx;  /* devargs enable-avx2-rx=1 */\n \tbool nic_cfg_chk;     /* NIC_CFG_CHK available */\n \tbool udp_rss_weak;    /* Bodega style UDP RSS */\n \tuint8_t ig_vlan_rewrite_mode; /* devargs ig-vlan-rewrite */\n@@ -329,6 +331,7 @@ uint16_t enic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tuint16_t nb_pkts);\n int enic_set_mtu(struct enic *enic, uint16_t new_mtu);\n int enic_link_update(struct enic *enic);\n+bool enic_use_vector_rx_handler(struct enic *enic);\n void enic_fdir_info(struct enic *enic);\n void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *stats);\n void copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,\ndiff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c\nindex 65333c47a..4d450fe0c 100644\n--- a/drivers/net/enic/enic_ethdev.c\n+++ b/drivers/net/enic/enic_ethdev.c\n@@ -37,6 +37,7 @@ static const struct rte_pci_id pci_id_enic_map[] = {\n };\n \n #define ENIC_DEVARG_DISABLE_OVERLAY \"disable-overlay\"\n+#define ENIC_DEVARG_ENABLE_AVX2_RX \"enable-avx2-rx\"\n #define ENIC_DEVARG_IG_VLAN_REWRITE \"ig-vlan-rewrite\"\n \n RTE_INIT(enicpmd_init_log)\n@@ -915,22 +916,27 @@ static const struct eth_dev_ops enicpmd_eth_dev_ops = {\n \t.udp_tunnel_port_del  = enicpmd_dev_udp_tunnel_port_del,\n };\n \n-static int enic_parse_disable_overlay(__rte_unused const char *key,\n-\t\t\t\t      const char *value,\n-\t\t\t\t      void *opaque)\n+static int enic_parse_zero_one(const char *key,\n+\t\t\t       const char *value,\n+\t\t\t       void *opaque)\n {\n \tstruct enic *enic;\n+\tbool b;\n \n \tenic = (struct enic *)opaque;\n \tif (strcmp(value, \"0\") == 0) {\n-\t\tenic->disable_overlay = false;\n+\t\tb = false;\n \t} else if (strcmp(value, \"1\") == 0) {\n-\t\tenic->disable_overlay = true;\n+\t\tb = true;\n \t} else {\n-\t\tdev_err(enic, \"Invalid value for \" ENIC_DEVARG_DISABLE_OVERLAY\n-\t\t\t\": expected=0|1 given=%s\\n\", value);\n+\t\tdev_err(enic, \"Invalid value for %s\"\n+\t\t\t\": expected=0|1 given=%s\\n\", key, value);\n \t\treturn -EINVAL;\n \t}\n+\tif (strcmp(key, ENIC_DEVARG_DISABLE_OVERLAY) == 0)\n+\t\tenic->disable_overlay = b;\n+\tif (strcmp(key, ENIC_DEVARG_ENABLE_AVX2_RX) == 0)\n+\t\tenic->enable_avx2_rx = b;\n \treturn 0;\n }\n \n@@ -971,6 +977,7 @@ static int enic_check_devargs(struct rte_eth_dev *dev)\n {\n \tstatic const char *const valid_keys[] = {\n \t\tENIC_DEVARG_DISABLE_OVERLAY,\n+\t\tENIC_DEVARG_ENABLE_AVX2_RX,\n \t\tENIC_DEVARG_IG_VLAN_REWRITE,\n \t\tNULL};\n \tstruct enic *enic = pmd_priv(dev);\n@@ -979,6 +986,7 @@ static int enic_check_devargs(struct rte_eth_dev *dev)\n \tENICPMD_FUNC_TRACE();\n \n \tenic->disable_overlay = false;\n+\tenic->enable_avx2_rx = false;\n \tenic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_PASS_THRU;\n \tif (!dev->device->devargs)\n \t\treturn 0;\n@@ -986,7 +994,9 @@ static int enic_check_devargs(struct rte_eth_dev *dev)\n \tif (!kvlist)\n \t\treturn -EINVAL;\n \tif (rte_kvargs_process(kvlist, ENIC_DEVARG_DISABLE_OVERLAY,\n-\t\t\t       enic_parse_disable_overlay, enic) < 0 ||\n+\t\t\t       enic_parse_zero_one, enic) < 0 ||\n+\t    rte_kvargs_process(kvlist, ENIC_DEVARG_ENABLE_AVX2_RX,\n+\t\t\t       enic_parse_zero_one, enic) < 0 ||\n \t    rte_kvargs_process(kvlist, ENIC_DEVARG_IG_VLAN_REWRITE,\n \t\t\t       enic_parse_ig_vlan_rewrite, enic) < 0) {\n \t\trte_kvargs_free(kvlist);\n@@ -1055,4 +1065,5 @@ RTE_PMD_REGISTER_PCI_TABLE(net_enic, pci_id_enic_map);\n RTE_PMD_REGISTER_KMOD_DEP(net_enic, \"* igb_uio | uio_pci_generic | vfio-pci\");\n RTE_PMD_REGISTER_PARAM_STRING(net_enic,\n \tENIC_DEVARG_DISABLE_OVERLAY \"=0|1 \"\n+\tENIC_DEVARG_ENABLE_AVX2_RX \"=0|1 \"\n \tENIC_DEVARG_IG_VLAN_REWRITE \"=trunk|untag|priority|pass\");\ndiff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c\nindex ea6cddbd3..aed73ee1b 100644\n--- a/drivers/net/enic/enic_main.c\n+++ b/drivers/net/enic/enic_main.c\n@@ -514,12 +514,29 @@ static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)\n \t}\n }\n \n+/*\n+ * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used\n+ * used when that file is not compiled.\n+ */\n+bool __attribute__((weak))\n+enic_use_vector_rx_handler(__rte_unused struct enic *enic)\n+{\n+\treturn false;\n+}\n+\n static void pick_rx_handler(struct enic *enic)\n {\n \tstruct rte_eth_dev *eth_dev;\n \n-\t/* Use the non-scatter, simplified RX handler if possible. */\n+\t/*\n+\t * Preference order:\n+\t * 1. The vectorized handler if possible and requested.\n+\t * 2. The non-scatter, simplified handler if scatter Rx is not used.\n+\t * 3. The default handler as a fallback.\n+\t */\n \teth_dev = enic->rte_dev;\n+\tif (enic_use_vector_rx_handler(enic))\n+\t\treturn;\n \tif (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {\n \t\tPMD_INIT_LOG(DEBUG, \" use the non-scatter Rx handler\");\n \t\teth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;\n@@ -535,6 +552,21 @@ int enic_enable(struct enic *enic)\n \tint err;\n \tstruct rte_eth_dev *eth_dev = enic->rte_dev;\n \tuint64_t simple_tx_offloads;\n+\tuintptr_t p;\n+\tstruct rte_mbuf mb_def = { .buf_addr = 0 };\n+\n+\t/*\n+\t * mbuf_initializer contains const-after-init fields of\n+\t * receive mbufs (i.e. 64 bits of fields from rearm_data).\n+\t * It is currently used by the vectorized handler.\n+\t */\n+\tmb_def.nb_segs = 1;\n+\tmb_def.data_off = RTE_PKTMBUF_HEADROOM;\n+\tmb_def.port = enic->port_id;\n+\trte_mbuf_refcnt_set(&mb_def, 1);\n+\trte_compiler_barrier();\n+\tp = (uintptr_t)&mb_def.rearm_data;\n+\tenic->mbuf_initializer = *(uint64_t *)p;\n \n \teth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);\n \teth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;\ndiff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c b/drivers/net/enic/enic_rxtx_vec_avx2.c\nnew file mode 100644\nindex 000000000..4891cda1b\n--- /dev/null\n+++ b/drivers/net/enic/enic_rxtx_vec_avx2.c\n@@ -0,0 +1,832 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2008-2018 Cisco Systems, Inc.  All rights reserved.\n+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.\n+ */\n+\n+#include <rte_mbuf.h>\n+#include <rte_ethdev_driver.h>\n+\n+#include \"enic_compat.h\"\n+#include \"rq_enet_desc.h\"\n+#include \"enic.h\"\n+#include \"enic_rxtx_common.h\"\n+\n+#include <x86intrin.h>\n+\n+static struct rte_mbuf *\n+rx_one(struct cq_enet_rq_desc *cqd, struct rte_mbuf *mb, struct enic *enic)\n+{\n+\tbool tnl;\n+\n+\t*(uint64_t *)&mb->rearm_data = enic->mbuf_initializer;\n+\tmb->data_len = cqd->bytes_written_flags &\n+\t\tCQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;\n+\tmb->pkt_len = mb->data_len;\n+\ttnl = enic->overlay_offload && (cqd->completed_index_flags &\n+\t\t\t\t\tCQ_ENET_RQ_DESC_FLAGS_FCOE) != 0;\n+\tmb->packet_type =\n+\t\tenic_cq_rx_flags_to_pkt_type((struct cq_desc *)cqd, tnl);\n+\tenic_cq_rx_to_pkt_flags((struct cq_desc *)cqd, mb);\n+\t/* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */\n+\tif (tnl) {\n+\t\tmb->packet_type &= ~(RTE_PTYPE_L3_MASK |\n+\t\t\t\t     RTE_PTYPE_L4_MASK);\n+\t}\n+\treturn mb;\n+}\n+\n+static uint16_t\n+enic_noscatter_vec_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\t\t     uint16_t nb_pkts)\n+{\n+\tstruct rte_mbuf **rx, **rxmb;\n+\tuint16_t cq_idx, nb_rx, max_rx;\n+\tstruct cq_enet_rq_desc *cqd;\n+\tstruct rq_enet_desc *rqd;\n+\tstruct vnic_cq *cq;\n+\tstruct vnic_rq *rq;\n+\tstruct enic *enic;\n+\tuint8_t color;\n+\n+\trq = rx_queue;\n+\tenic = vnic_dev_priv(rq->vdev);\n+\tcq = &enic->cq[enic_cq_rq(enic, rq->index)];\n+\tcq_idx = cq->to_clean;\n+\n+\t/*\n+\t * Fill up the reserve of free mbufs. Below, we restock the receive\n+\t * ring with these mbufs to avoid allocation failures.\n+\t */\n+\tif (rq->num_free_mbufs == 0) {\n+\t\tif (rte_mempool_get_bulk(rq->mp, (void **)rq->free_mbufs,\n+\t\t\t\t\t ENIC_RX_BURST_MAX))\n+\t\t\treturn 0;\n+\t\trq->num_free_mbufs = ENIC_RX_BURST_MAX;\n+\t}\n+\t/* Receive until the end of the ring, at most. */\n+\tmax_rx = RTE_MIN(nb_pkts, rq->num_free_mbufs);\n+\tmax_rx = RTE_MIN(max_rx, cq->ring.desc_count - cq_idx);\n+\n+\trxmb = rq->mbuf_ring + cq_idx;\n+\tcolor = cq->last_color;\n+\tcqd = (struct cq_enet_rq_desc *)(cq->ring.descs) + cq_idx;\n+\trx = rx_pkts;\n+\tif (max_rx == 0 ||\n+\t    (cqd->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color)\n+\t\treturn 0;\n+\n+\t/* Step 1: Process one packet to do aligned 256-bit load below */\n+\tif (cq_idx & 0x1) {\n+\t\tif (unlikely(cqd->bytes_written_flags &\n+\t\t\t     CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {\n+\t\t\trte_pktmbuf_free(*rxmb++);\n+\t\t\trte_atomic64_inc(&enic->soft_stats.rx_packet_errors);\n+\t\t} else {\n+\t\t\t*rx++ = rx_one(cqd, *rxmb++, enic);\n+\t\t}\n+\t\tcqd++;\n+\t\tmax_rx--;\n+\t}\n+\n+\tconst __m256i mask =\n+\t\t_mm256_set_epi8(/* Second descriptor */\n+\t\t\t0xff, /* type_color */\n+\t\t\t(CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |\n+\t\t\t CQ_ENET_RQ_DESC_FLAGS_IPV4 |\n+\t\t\t CQ_ENET_RQ_DESC_FLAGS_IPV6 |\n+\t\t\t CQ_ENET_RQ_DESC_FLAGS_TCP |\n+\t\t\t CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */\n+\t\t\t0, 0, /* checksum_fcoe */\n+\t\t\t0xff, 0xff, /* vlan */\n+\t\t\t0x3f, 0xff, /* bytes_written_flags */\n+\t\t\t0xff, 0xff, 0xff, 0xff, /* rss_hash */\n+\t\t\t0xff, 0xff, /* q_number_rss_type_flags */\n+\t\t\t0, 0, /* completed_index_flags */\n+\t\t\t/* First descriptor */\n+\t\t\t0xff, /* type_color */\n+\t\t\t(CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |\n+\t\t\t CQ_ENET_RQ_DESC_FLAGS_IPV4 |\n+\t\t\t CQ_ENET_RQ_DESC_FLAGS_IPV6 |\n+\t\t\t CQ_ENET_RQ_DESC_FLAGS_TCP |\n+\t\t\t CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */\n+\t\t\t0, 0, /* checksum_fcoe */\n+\t\t\t0xff, 0xff, /* vlan */\n+\t\t\t0x3f, 0xff, /* bytes_written_flags */\n+\t\t\t0xff, 0xff, 0xff, 0xff, /* rss_hash */\n+\t\t\t0xff, 0xff, /* q_number_rss_type_flags */\n+\t\t\t0, 0 /* completed_index_flags */\n+\t\t\t);\n+\tconst __m256i shuffle_mask =\n+\t\t_mm256_set_epi8(/* Second descriptor */\n+\t\t\t7, 6, 5, 4,             /* rss = rss_hash */\n+\t\t\t11, 10,                 /* vlan_tci = vlan */\n+\t\t\t9, 8,                   /* data_len = bytes_written */\n+\t\t\t0x80, 0x80, 9, 8,       /* pkt_len = bytes_written */\n+\t\t\t0x80, 0x80, 0x80, 0x80, /* packet_type = 0 */\n+\t\t\t/* First descriptor */\n+\t\t\t7, 6, 5, 4,             /* rss = rss_hash */\n+\t\t\t11, 10,                 /* vlan_tci = vlan */\n+\t\t\t9, 8,                   /* data_len = bytes_written */\n+\t\t\t0x80, 0x80, 9, 8,       /* pkt_len = bytes_written */\n+\t\t\t0x80, 0x80, 0x80, 0x80  /* packet_type = 0 */\n+\t\t\t);\n+\t/* Used to collect 8 flags from 8 desc into one register */\n+\tconst __m256i flags_shuffle_mask =\n+\t\t_mm256_set_epi8(/* Second descriptor */\n+\t\t\t1, 3, 9, 14,\n+\t\t\t1, 3, 9, 14,\n+\t\t\t1, 3, 9, 14,\n+\t\t\t1, 3, 9, 14,\n+\t\t\t/* First descriptor */\n+\t\t\t1, 3, 9, 14,\n+\t\t\t1, 3, 9, 14,\n+\t\t\t1, 3, 9, 14,\n+\t\t\t/*\n+\t\t\t * Byte 3: upper byte of completed_index_flags\n+\t\t\t *         bit 5 = fcoe (tunnel)\n+\t\t\t * Byte 2: upper byte of q_number_rss_type_flags\n+\t\t\t *         bits 2,3,4,5 = rss type\n+\t\t\t *         bit 6 = csum_not_calc\n+\t\t\t * Byte 1: upper byte of bytes_written_flags\n+\t\t\t *         bit 6 = truncated\n+\t\t\t *         bit 7 = vlan stripped\n+\t\t\t * Byte 0: flags\n+\t\t\t */\n+\t\t\t1, 3, 9, 14\n+\t\t\t);\n+\t/* Used to collect 8 VLAN IDs from 8 desc into one register */\n+\tconst __m256i vlan_shuffle_mask =\n+\t\t_mm256_set_epi8(/* Second descriptor */\n+\t\t\t0x80, 0x80, 11, 10,\n+\t\t\t0x80, 0x80, 11, 10,\n+\t\t\t0x80, 0x80, 11, 10,\n+\t\t\t0x80, 0x80, 11, 10,\n+\t\t\t/* First descriptor */\n+\t\t\t0x80, 0x80, 11, 10,\n+\t\t\t0x80, 0x80, 11, 10,\n+\t\t\t0x80, 0x80, 11, 10,\n+\t\t\t0x80, 0x80, 11, 10);\n+\t/* PKT_RX_RSS_HASH is 1<<1 so fits in 8-bit integer */\n+\tconst __m256i rss_shuffle =\n+\t\t_mm256_set_epi8(/* second 128 bits */\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\t0, /* rss_types = 0 */\n+\t\t\t/* first 128 bits */\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,\n+\t\t\t0 /* rss_types = 0 */);\n+\t/*\n+\t * VLAN offload flags.\n+\t * shuffle index:\n+\t * vlan_stripped => bit 0\n+\t * vlan_id == 0  => bit 1\n+\t */\n+\tconst __m256i vlan_shuffle =\n+\t\t_mm256_set_epi32(0, 0, 0, 0,\n+\t\t\tPKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0,\n+\t\t\tPKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN);\n+\t/* Use the same shuffle index as vlan_shuffle */\n+\tconst __m256i vlan_ptype_shuffle =\n+\t\t_mm256_set_epi32(0, 0, 0, 0,\n+\t\t\t\t RTE_PTYPE_L2_ETHER,\n+\t\t\t\t RTE_PTYPE_L2_ETHER,\n+\t\t\t\t RTE_PTYPE_L2_ETHER,\n+\t\t\t\t RTE_PTYPE_L2_ETHER_VLAN);\n+\t/*\n+\t * CKSUM flags. Shift right so they fit int 8-bit integers.\n+\t * shuffle index:\n+\t * ipv4_csum_ok    => bit 3\n+\t * ip4             => bit 2\n+\t * tcp_or_udp      => bit 1\n+\t * tcp_udp_csum_ok => bit 0\n+\t */\n+\tconst __m256i csum_shuffle =\n+\t\t_mm256_set_epi8(/* second 128 bits */\n+\t\t\t/* 1111 ip4+ip4_ok+l4+l4_ok */\n+\t\t\t((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1),\n+\t\t\t/* 1110 ip4_ok+ip4+l4+!l4_ok */\n+\t\t\t((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1),\n+\t\t\t(PKT_RX_IP_CKSUM_GOOD >> 1), /* 1101 ip4+ip4_ok */\n+\t\t\t(PKT_RX_IP_CKSUM_GOOD >> 1), /* 1100 ip4_ok+ip4 */\n+\t\t\t(PKT_RX_L4_CKSUM_GOOD >> 1), /* 1011 l4+l4_ok */\n+\t\t\t(PKT_RX_L4_CKSUM_BAD >> 1),  /* 1010 l4+!l4_ok */\n+\t\t\t0, /* 1001 */\n+\t\t\t0, /* 1000 */\n+\t\t\t/* 0111 !ip4_ok+ip4+l4+l4_ok */\n+\t\t\t((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1),\n+\t\t\t/* 0110 !ip4_ok+ip4+l4+!l4_ok */\n+\t\t\t((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1),\n+\t\t\t(PKT_RX_IP_CKSUM_BAD >> 1),  /* 0101 !ip4_ok+ip4 */\n+\t\t\t(PKT_RX_IP_CKSUM_BAD >> 1),  /* 0100 !ip4_ok+ip4 */\n+\t\t\t(PKT_RX_L4_CKSUM_GOOD >> 1), /* 0011 l4+l4_ok */\n+\t\t\t(PKT_RX_L4_CKSUM_BAD >> 1),  /* 0010 l4+!l4_ok */\n+\t\t\t0, /* 0001 */\n+\t\t\t0, /* 0000 */\n+\t\t\t/* first 128 bits */\n+\t\t\t((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1),\n+\t\t\t((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1),\n+\t\t\t(PKT_RX_IP_CKSUM_GOOD >> 1),\n+\t\t\t(PKT_RX_IP_CKSUM_GOOD >> 1),\n+\t\t\t(PKT_RX_L4_CKSUM_GOOD >> 1),\n+\t\t\t(PKT_RX_L4_CKSUM_BAD >> 1),\n+\t\t\t0, 0,\n+\t\t\t((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1),\n+\t\t\t((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1),\n+\t\t\t(PKT_RX_IP_CKSUM_BAD >> 1),\n+\t\t\t(PKT_RX_IP_CKSUM_BAD >> 1),\n+\t\t\t(PKT_RX_L4_CKSUM_GOOD >> 1),\n+\t\t\t(PKT_RX_L4_CKSUM_BAD >> 1),\n+\t\t\t0, 0);\n+\t/*\n+\t * Non-fragment PTYPEs.\n+\t * Shuffle 4-bit index:\n+\t * ip6 => bit 0\n+\t * ip4 => bit 1\n+\t * udp => bit 2\n+\t * tcp => bit 3\n+\t *   bit\n+\t * 3 2 1 0\n+\t * -------\n+\t * 0 0 0 0 unknown\n+\t * 0 0 0 1 ip6 | nonfrag\n+\t * 0 0 1 0 ip4 | nonfrag\n+\t * 0 0 1 1 unknown\n+\t * 0 1 0 0 unknown\n+\t * 0 1 0 1 ip6 | udp\n+\t * 0 1 1 0 ip4 | udp\n+\t * 0 1 1 1 unknown\n+\t * 1 0 0 0 unknown\n+\t * 1 0 0 1 ip6 | tcp\n+\t * 1 0 1 0 ip4 | tcp\n+\t * 1 0 1 1 unknown\n+\t * 1 1 0 0 unknown\n+\t * 1 1 0 1 unknown\n+\t * 1 1 1 0 unknown\n+\t * 1 1 1 1 unknown\n+\t *\n+\t * PTYPEs do not fit in 8 bits, so shift right 4..\n+\t */\n+\tconst __m256i nonfrag_ptype_shuffle =\n+\t\t_mm256_set_epi8(/* second 128 bits */\n+\t\t\tRTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_NONFRAG) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_NONFRAG) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN,\n+\t\t\t/* first 128 bits */\n+\t\t\tRTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_NONFRAG) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_NONFRAG) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN);\n+\t/* Fragment PTYPEs. Use the same shuffle index as above. */\n+\tconst __m256i frag_ptype_shuffle =\n+\t\t_mm256_set_epi8(/* second 128 bits */\n+\t\t\tRTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN,\n+\t\t\t/* first 128 bits */\n+\t\t\tRTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\t(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\t(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |\n+\t\t\t RTE_PTYPE_L4_FRAG) >> 4,\n+\t\t\tRTE_PTYPE_UNKNOWN);\n+\t/*\n+\t * Tunnel PTYPEs. Use the same shuffle index as above.\n+\t * L4 types are not part of this table. They come from non-tunnel\n+\t * types above.\n+\t */\n+\tconst __m256i tnl_l3_ptype_shuffle =\n+\t\t_mm256_set_epi8(/* second 128 bits */\n+\t\t\tRTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_UNKNOWN,\n+\t\t\t/* first 128 bits */\n+\t\t\tRTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,\n+\t\t\tRTE_PTYPE_UNKNOWN);\n+\n+\tconst __m256i mbuf_init = _mm256_set_epi64x(0, enic->mbuf_initializer,\n+\t\t\t\t\t\t    0, enic->mbuf_initializer);\n+\n+\t/*\n+\t * --- cq desc fields ---    offset\n+\t * completed_index_flags    - 0   use: fcoe\n+\t * q_number_rss_type_flags  - 2   use: rss types, csum_not_calc\n+\t * rss_hash                 - 4   ==> mbuf.hash.rss\n+\t * bytes_written_flags      - 8   ==> mbuf.pkt_len,data_len\n+\t *                                use: truncated, vlan_stripped\n+\t * vlan                     - 10  ==> mbuf.vlan_tci\n+\t * checksum_fcoe            - 12  (unused)\n+\t * flags                    - 14  use: all bits\n+\t * type_color               - 15  (unused)\n+\t *\n+\t * --- mbuf fields ---       offset\n+\t * rearm_data              ---- 16\n+\t * data_off    - 0      (mbuf_init) -+\n+\t * refcnt      - 2      (mbuf_init)  |\n+\t * nb_segs     - 4      (mbuf_init)  | 16B 128b\n+\t * port        - 6      (mbuf_init)  |\n+\t * ol_flag     - 8      (from cqd)  -+\n+\t * rx_descriptor_fields1   ---- 32\n+\t * packet_type - 0      (from cqd)  -+\n+\t * pkt_len     - 4      (from cqd)   |\n+\t * data_len    - 8      (from cqd)   | 16B 128b\n+\t * vlan_tci    - 10     (from cqd)   |\n+\t * rss         - 12     (from cqd)  -+\n+\t */\n+\n+\t__m256i overlay_enabled =\n+\t\t_mm256_set1_epi32((uint32_t)enic->overlay_offload);\n+\n+\t/* Step 2: Process 8 packets per loop using SIMD */\n+\twhile (max_rx > 7 && (((cqd + 7)->type_color &\n+\t\t\t       CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {\n+\t\t/* Load 8 16B CQ descriptors */\n+\t\t__m256i cqd01 = _mm256_load_si256((void *)cqd);\n+\t\t__m256i cqd23 = _mm256_load_si256((void *)(cqd + 2));\n+\t\t__m256i cqd45 = _mm256_load_si256((void *)(cqd + 4));\n+\t\t__m256i cqd67 = _mm256_load_si256((void *)(cqd + 6));\n+\t\t/* Copy 8 mbuf pointers to rx_pkts */\n+\t\t_mm256_storeu_si256((void *)rx,\n+\t\t\t\t    _mm256_loadu_si256((void *)rxmb));\n+\t\t_mm256_storeu_si256((void *)(rx + 4),\n+\t\t\t\t    _mm256_loadu_si256((void *)(rxmb + 4)));\n+\n+\t\t/*\n+\t\t * Collect 8 flags (each 32 bits) into one register.\n+\t\t * 4 shuffles, 3 blends, 1 permute for 8 desc: 1 inst/desc\n+\t\t */\n+\t\t__m256i flags01 =\n+\t\t\t_mm256_shuffle_epi8(cqd01, flags_shuffle_mask);\n+\t\t/*\n+\t\t * Shuffle above produces 8 x 32-bit flags for 8 descriptors\n+\t\t * in this order: 0, 0, 0, 0, 1, 1, 1, 1\n+\t\t * The duplicates in each 128-bit lane simplifies blending\n+\t\t * below.\n+\t\t */\n+\t\t__m256i flags23 =\n+\t\t\t_mm256_shuffle_epi8(cqd23, flags_shuffle_mask);\n+\t\t__m256i flags45 =\n+\t\t\t_mm256_shuffle_epi8(cqd45, flags_shuffle_mask);\n+\t\t__m256i flags67 =\n+\t\t\t_mm256_shuffle_epi8(cqd67, flags_shuffle_mask);\n+\t\t/* 1st blend produces flags for desc: 0, 2, 0, 0, 1, 3, 1, 1 */\n+\t\t__m256i flags0_3 = _mm256_blend_epi32(flags01, flags23, 0x22);\n+\t\t/* 2nd blend produces flags for desc: 4, 4, 4, 6, 5, 5, 5, 7 */\n+\t\t__m256i flags4_7 = _mm256_blend_epi32(flags45, flags67, 0x88);\n+\t\t/* 3rd blend produces flags for desc: 0, 2, 4, 6, 1, 3, 5, 7 */\n+\t\t__m256i flags0_7 = _mm256_blend_epi32(flags0_3, flags4_7, 0xcc);\n+\t\t/*\n+\t\t * Swap to reorder flags in this order: 1, 3, 5, 7, 0, 2, 4, 6\n+\t\t * This order simplifies blend operations way below that\n+\t\t * produce 'rearm' data for each mbuf.\n+\t\t */\n+\t\tflags0_7 = _mm256_permute4x64_epi64(flags0_7,\n+\t\t\t(1 << 6) + (0 << 4) + (3 << 2) + 2);\n+\n+\t\t/*\n+\t\t * Check truncated bits and bail out early on.\n+\t\t * 6 avx inst, 1 or, 1 if-then-else for 8 desc: 1 inst/desc\n+\t\t */\n+\t\t__m256i trunc =\n+\t\t\t_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 17), 31);\n+\t\ttrunc = _mm256_add_epi64(trunc, _mm256_permute4x64_epi64(trunc,\n+\t\t\t(1 << 6) + (0 << 4) + (3 << 2) + 2));\n+\t\t/* 0:63 contains 1+3+0+2 and 64:127 contains 5+7+4+6 */\n+\t\tif (_mm256_extract_epi64(trunc, 0) ||\n+\t\t    _mm256_extract_epi64(trunc, 1))\n+\t\t\tbreak;\n+\n+\t\t/*\n+\t\t * Compute PKT_RX_RSS_HASH.\n+\t\t * Use 2 shifts and 1 shuffle for 8 desc: 0.375 inst/desc\n+\t\t * RSS types in byte 0, 4, 8, 12, 16, 20, 24, 28\n+\t\t * Everything else is zero.\n+\t\t */\n+\t\t__m256i rss_types =\n+\t\t\t_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 10), 28);\n+\t\t/*\n+\t\t * RSS flags (PKT_RX_RSS_HASH) are in\n+\t\t * byte 0, 4, 8, 12, 16, 20, 24, 28\n+\t\t * Everything else is zero.\n+\t\t */\n+\t\t__m256i rss_flags = _mm256_shuffle_epi8(rss_shuffle, rss_types);\n+\n+\t\t/*\n+\t\t * Compute CKSUM flags. First build the index and then\n+\t\t * use it to shuffle csum_shuffle.\n+\t\t * 20 instructions including const loads: 2.5 inst/desc\n+\t\t */\n+\t\t/*\n+\t\t * csum_not_calc (bit 22)\n+\t\t * csum_not_calc (0) => 0xffffffff\n+\t\t * csum_not_calc (1) => 0x0\n+\t\t */\n+\t\tconst __m256i zero4 = _mm256_setzero_si256();\n+\t\tconst __m256i mask22 = _mm256_set1_epi32(0x400000);\n+\t\t__m256i csum_not_calc = _mm256_cmpeq_epi32(zero4,\n+\t\t\t_mm256_and_si256(flags0_7, mask22));\n+\t\t/*\n+\t\t * (tcp|udp) && !fragment => bit 1\n+\t\t * tcp = bit 2, udp = bit 1, frag = bit 6\n+\t\t */\n+\t\tconst __m256i mask1 = _mm256_set1_epi32(0x2);\n+\t\t__m256i tcp_udp =\n+\t\t\t_mm256_andnot_si256(_mm256_srli_epi32(flags0_7, 5),\n+\t\t\t\t_mm256_or_si256(flags0_7,\n+\t\t\t\t\t_mm256_srli_epi32(flags0_7, 1)));\n+\t\ttcp_udp = _mm256_and_si256(tcp_udp, mask1);\n+\t\t/* ipv4 (bit 5) => bit 2 */\n+\t\tconst __m256i mask2 = _mm256_set1_epi32(0x4);\n+\t\t__m256i ipv4 = _mm256_and_si256(mask2,\n+\t\t\t_mm256_srli_epi32(flags0_7, 3));\n+\t\t/*\n+\t\t * ipv4_csum_ok (bit 3) => bit 3\n+\t\t * tcp_udp_csum_ok (bit 0) => bit 0\n+\t\t * 0x9\n+\t\t */\n+\t\tconst __m256i mask0_3 = _mm256_set1_epi32(0x9);\n+\t\t__m256i csum_idx = _mm256_and_si256(flags0_7, mask0_3);\n+\t\tcsum_idx = _mm256_and_si256(csum_not_calc,\n+\t\t\t_mm256_or_si256(_mm256_or_si256(csum_idx, ipv4),\n+\t\t\t\ttcp_udp));\n+\t\t__m256i csum_flags =\n+\t\t\t_mm256_shuffle_epi8(csum_shuffle, csum_idx);\n+\t\t/* Shift left to restore CKSUM flags. See csum_shuffle. */\n+\t\tcsum_flags = _mm256_slli_epi32(csum_flags, 1);\n+\t\t/* Combine csum flags and offload flags: 0.125 inst/desc */\n+\t\trss_flags = _mm256_or_si256(rss_flags, csum_flags);\n+\n+\t\t/*\n+\t\t * Collect 8 VLAN IDs and compute vlan_id != 0 on each.\n+\t\t * 4 shuffles, 3 blends, 1 permute, 1 cmp, 1 sub for 8 desc:\n+\t\t * 1.25 inst/desc\n+\t\t */\n+\t\t__m256i vlan01 = _mm256_shuffle_epi8(cqd01, vlan_shuffle_mask);\n+\t\t__m256i vlan23 = _mm256_shuffle_epi8(cqd23, vlan_shuffle_mask);\n+\t\t__m256i vlan45 = _mm256_shuffle_epi8(cqd45, vlan_shuffle_mask);\n+\t\t__m256i vlan67 = _mm256_shuffle_epi8(cqd67, vlan_shuffle_mask);\n+\t\t__m256i vlan0_3 = _mm256_blend_epi32(vlan01, vlan23, 0x22);\n+\t\t__m256i vlan4_7 = _mm256_blend_epi32(vlan45, vlan67, 0x88);\n+\t\t/* desc: 0, 2, 4, 6, 1, 3, 5, 7 */\n+\t\t__m256i vlan0_7 = _mm256_blend_epi32(vlan0_3, vlan4_7, 0xcc);\n+\t\t/* desc: 1, 3, 5, 7, 0, 2, 4, 6 */\n+\t\tvlan0_7 = _mm256_permute4x64_epi64(vlan0_7,\n+\t\t\t(1 << 6) + (0 << 4) + (3 << 2) + 2);\n+\t\t/*\n+\t\t * Compare 0 == vlan_id produces 0xffffffff (-1) if\n+\t\t * vlan 0 and 0 if vlan non-0. Then subtracting the\n+\t\t * result from 0 produces 0 - (-1) = 1 for vlan 0, and\n+\t\t * 0 - 0 = 0 for vlan non-0.\n+\t\t */\n+\t\tvlan0_7 = _mm256_cmpeq_epi32(zero4, vlan0_7);\n+\t\t/* vlan_id != 0 => 0, vlan_id == 0 => 1 */\n+\t\tvlan0_7 = _mm256_sub_epi32(zero4, vlan0_7);\n+\n+\t\t/*\n+\t\t * Compute PKT_RX_VLAN and PKT_RX_VLAN_STRIPPED.\n+\t\t * Use 3 shifts, 1 or,  1 shuffle for 8 desc: 0.625 inst/desc\n+\t\t * VLAN offload flags in byte 0, 4, 8, 12, 16, 20, 24, 28\n+\t\t * Everything else is zero.\n+\t\t */\n+\t\t__m256i vlan_idx =\n+\t\t\t_mm256_or_si256(/* vlan_stripped => bit 0 */\n+\t\t\t\t_mm256_srli_epi32(_mm256_slli_epi32(flags0_7,\n+\t\t\t\t\t16), 31),\n+\t\t\t\t/* (vlan_id == 0) => bit 1 */\n+\t\t\t\t_mm256_slli_epi32(vlan0_7, 1));\n+\t\t/*\n+\t\t * The index captures 4 cases.\n+\t\t * stripped, id = 0   ==> 11b = 3\n+\t\t * stripped, id != 0  ==> 01b = 1\n+\t\t * not strip, id == 0 ==> 10b = 2\n+\t\t * not strip, id != 0 ==> 00b = 0\n+\t\t */\n+\t\t__m256i vlan_flags = _mm256_permutevar8x32_epi32(vlan_shuffle,\n+\t\t\tvlan_idx);\n+\t\t/* Combine vlan and offload flags: 0.125 inst/desc */\n+\t\trss_flags = _mm256_or_si256(rss_flags, vlan_flags);\n+\n+\t\t/*\n+\t\t * Compute non-tunnel PTYPEs.\n+\t\t * 17 inst / 8 desc = 2.125 inst/desc\n+\t\t */\n+\t\t/* ETHER and ETHER_VLAN */\n+\t\t__m256i vlan_ptype =\n+\t\t\t_mm256_permutevar8x32_epi32(vlan_ptype_shuffle,\n+\t\t\t\tvlan_idx);\n+\t\t/* Build the ptype index from flags */\n+\t\ttcp_udp = _mm256_slli_epi32(flags0_7, 29);\n+\t\ttcp_udp = _mm256_slli_epi32(_mm256_srli_epi32(tcp_udp, 30), 2);\n+\t\t__m256i ip4_ip6 =\n+\t\t\t_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 26), 30);\n+\t\t__m256i ptype_idx = _mm256_or_si256(tcp_udp, ip4_ip6);\n+\t\t__m256i frag_bit =\n+\t\t\t_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 25), 31);\n+\t\t__m256i nonfrag_ptype =\n+\t\t\t_mm256_shuffle_epi8(nonfrag_ptype_shuffle, ptype_idx);\n+\t\t__m256i frag_ptype =\n+\t\t\t_mm256_shuffle_epi8(frag_ptype_shuffle, ptype_idx);\n+\t\t/*\n+\t\t * Zero out the unwanted types and combine the remaining bits.\n+\t\t * The effect is same as selecting non-frag or frag types\n+\t\t * depending on the frag bit.\n+\t\t */\n+\t\tnonfrag_ptype = _mm256_and_si256(nonfrag_ptype,\n+\t\t\t_mm256_cmpeq_epi32(zero4, frag_bit));\n+\t\tfrag_ptype = _mm256_and_si256(frag_ptype,\n+\t\t\t_mm256_cmpgt_epi32(frag_bit, zero4));\n+\t\t__m256i ptype = _mm256_or_si256(nonfrag_ptype, frag_ptype);\n+\t\tptype = _mm256_slli_epi32(ptype, 4);\n+\t\t/*\n+\t\t * Compute tunnel PTYPEs.\n+\t\t * 15 inst / 8 desc = 1.875 inst/desc\n+\t\t */\n+\t\t__m256i tnl_l3_ptype =\n+\t\t\t_mm256_shuffle_epi8(tnl_l3_ptype_shuffle, ptype_idx);\n+\t\ttnl_l3_ptype = _mm256_slli_epi32(tnl_l3_ptype, 16);\n+\t\t/*\n+\t\t * Shift non-tunnel L4 types to make them tunnel types.\n+\t\t * RTE_PTYPE_L4_TCP << 16 == RTE_PTYPE_INNER_L4_TCP\n+\t\t */\n+\t\t__m256i tnl_l4_ptype =\n+\t\t\t_mm256_slli_epi32(_mm256_and_si256(ptype,\n+\t\t\t\t_mm256_set1_epi32(RTE_PTYPE_L4_MASK)), 16);\n+\t\t__m256i tnl_ptype =\n+\t\t\t_mm256_or_si256(tnl_l3_ptype, tnl_l4_ptype);\n+\t\ttnl_ptype = _mm256_or_si256(tnl_ptype,\n+\t\t\t_mm256_set1_epi32(RTE_PTYPE_TUNNEL_GRENAT |\n+\t\t\t\tRTE_PTYPE_INNER_L2_ETHER));\n+\t\t/*\n+\t\t * Select non-tunnel or tunnel types by zeroing out the\n+\t\t * unwanted ones.\n+\t\t */\n+\t\t__m256i tnl_flags = _mm256_and_si256(overlay_enabled,\n+\t\t\t_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 2), 31));\n+\t\ttnl_ptype = _mm256_and_si256(tnl_ptype,\n+\t\t\t_mm256_sub_epi32(zero4, tnl_flags));\n+\t\tptype =\t_mm256_and_si256(ptype,\n+\t\t\t_mm256_cmpeq_epi32(zero4, tnl_flags));\n+\t\t/*\n+\t\t * Combine types and swap to have ptypes in the same order\n+\t\t * as desc.\n+\t\t * desc: 0 2 4 6 1 3 5 7\n+\t\t * 3 inst / 8 desc = 0.375 inst/desc\n+\t\t */\n+\t\tptype = _mm256_or_si256(ptype, tnl_ptype);\n+\t\tptype = _mm256_or_si256(ptype, vlan_ptype);\n+\t\tptype = _mm256_permute4x64_epi64(ptype,\n+\t\t\t(1 << 6) + (0 << 4) + (3 << 2) + 2);\n+\n+\t\t/*\n+\t\t * Mask packet length.\n+\t\t * Use 4 ands: 0.5 instructions/desc\n+\t\t */\n+\t\tcqd01 = _mm256_and_si256(cqd01, mask);\n+\t\tcqd23 = _mm256_and_si256(cqd23, mask);\n+\t\tcqd45 = _mm256_and_si256(cqd45, mask);\n+\t\tcqd67 = _mm256_and_si256(cqd67, mask);\n+\t\t/*\n+\t\t * Shuffle. Two 16B sets of the mbuf fields.\n+\t\t * packet_type, pkt_len, data_len, vlan_tci, rss\n+\t\t */\n+\t\t__m256i rearm01 = _mm256_shuffle_epi8(cqd01, shuffle_mask);\n+\t\t__m256i rearm23 = _mm256_shuffle_epi8(cqd23, shuffle_mask);\n+\t\t__m256i rearm45 = _mm256_shuffle_epi8(cqd45, shuffle_mask);\n+\t\t__m256i rearm67 = _mm256_shuffle_epi8(cqd67, shuffle_mask);\n+\n+\t\t/*\n+\t\t * Blend in ptypes\n+\t\t * 4 blends and 3 shuffles for 8 desc: 0.875 inst/desc\n+\t\t */\n+\t\trearm01 = _mm256_blend_epi32(rearm01, ptype, 0x11);\n+\t\trearm23 = _mm256_blend_epi32(rearm23,\n+\t\t\t_mm256_shuffle_epi32(ptype, 1), 0x11);\n+\t\trearm45 = _mm256_blend_epi32(rearm45,\n+\t\t\t_mm256_shuffle_epi32(ptype, 2), 0x11);\n+\t\trearm67 = _mm256_blend_epi32(rearm67,\n+\t\t\t_mm256_shuffle_epi32(ptype, 3), 0x11);\n+\n+\t\t/*\n+\t\t * Move rss_flags into ol_flags in mbuf_init.\n+\t\t * Use 1 shift and 1 blend for each desc: 2 inst/desc\n+\t\t */\n+\t\t__m256i mbuf_init4_5 = _mm256_blend_epi32(mbuf_init,\n+\t\t\trss_flags, 0x44);\n+\t\t__m256i mbuf_init2_3 = _mm256_blend_epi32(mbuf_init,\n+\t\t\t_mm256_slli_si256(rss_flags, 4), 0x44);\n+\t\t__m256i mbuf_init0_1 = _mm256_blend_epi32(mbuf_init,\n+\t\t\t_mm256_slli_si256(rss_flags, 8), 0x44);\n+\t\t__m256i mbuf_init6_7 = _mm256_blend_epi32(mbuf_init,\n+\t\t\t_mm256_srli_si256(rss_flags, 4), 0x44);\n+\n+\t\t/*\n+\t\t * Build rearm, one per desc.\n+\t\t * 8 blends and 4 permutes: 1.5 inst/desc\n+\t\t */\n+\t\t__m256i rearm0 = _mm256_blend_epi32(rearm01,\n+\t\t\tmbuf_init0_1, 0xf0);\n+\t\t__m256i rearm1 = _mm256_blend_epi32(mbuf_init0_1,\n+\t\t\trearm01, 0xf0);\n+\t\t__m256i rearm2 = _mm256_blend_epi32(rearm23,\n+\t\t\tmbuf_init2_3, 0xf0);\n+\t\t__m256i rearm3 = _mm256_blend_epi32(mbuf_init2_3,\n+\t\t\trearm23, 0xf0);\n+\t\t/* Swap upper and lower 64 bits */\n+\t\trearm0 = _mm256_permute4x64_epi64(rearm0,\n+\t\t\t(1 << 6) + (0 << 4) + (3 << 2) + 2);\n+\t\trearm2 = _mm256_permute4x64_epi64(rearm2,\n+\t\t\t(1 << 6) + (0 << 4) + (3 << 2) + 2);\n+\t\t/* Second set of 4 descriptors */\n+\t\t__m256i rearm4 = _mm256_blend_epi32(rearm45,\n+\t\t\tmbuf_init4_5, 0xf0);\n+\t\t__m256i rearm5 = _mm256_blend_epi32(mbuf_init4_5,\n+\t\t\trearm45, 0xf0);\n+\t\t__m256i rearm6 = _mm256_blend_epi32(rearm67,\n+\t\t\tmbuf_init6_7, 0xf0);\n+\t\t__m256i rearm7 = _mm256_blend_epi32(mbuf_init6_7,\n+\t\t\trearm67, 0xf0);\n+\t\trearm4 = _mm256_permute4x64_epi64(rearm4,\n+\t\t\t(1 << 6) + (0 << 4) + (3 << 2) + 2);\n+\t\trearm6 = _mm256_permute4x64_epi64(rearm6,\n+\t\t\t(1 << 6) + (0 << 4) + (3 << 2) + 2);\n+\n+\t\t/*\n+\t\t * Write out 32B of mbuf fields.\n+\t\t * data_off    - off 0  (mbuf_init)\n+\t\t * refcnt      - 2      (mbuf_init)\n+\t\t * nb_segs     - 4      (mbuf_init)\n+\t\t * port        - 6      (mbuf_init)\n+\t\t * ol_flag     - 8      (from cqd)\n+\t\t * packet_type - 16     (from cqd)\n+\t\t * pkt_len     - 20     (from cqd)\n+\t\t * data_len    - 24     (from cqd)\n+\t\t * vlan_tci    - 26     (from cqd)\n+\t\t * rss         - 28     (from cqd)\n+\t\t */\n+\t\t_mm256_storeu_si256((__m256i *)&rxmb[0]->rearm_data, rearm0);\n+\t\t_mm256_storeu_si256((__m256i *)&rxmb[1]->rearm_data, rearm1);\n+\t\t_mm256_storeu_si256((__m256i *)&rxmb[2]->rearm_data, rearm2);\n+\t\t_mm256_storeu_si256((__m256i *)&rxmb[3]->rearm_data, rearm3);\n+\t\t_mm256_storeu_si256((__m256i *)&rxmb[4]->rearm_data, rearm4);\n+\t\t_mm256_storeu_si256((__m256i *)&rxmb[5]->rearm_data, rearm5);\n+\t\t_mm256_storeu_si256((__m256i *)&rxmb[6]->rearm_data, rearm6);\n+\t\t_mm256_storeu_si256((__m256i *)&rxmb[7]->rearm_data, rearm7);\n+\n+\t\tmax_rx -= 8;\n+\t\tcqd += 8;\n+\t\trx += 8;\n+\t\trxmb += 8;\n+\t}\n+\n+\t/*\n+\t * Step 3: Slow path to handle a small (<8) number of packets and\n+\t * occasional truncated packets.\n+\t */\n+\twhile (max_rx && ((cqd->type_color &\n+\t\t\t   CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {\n+\t\tif (unlikely(cqd->bytes_written_flags &\n+\t\t\t     CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {\n+\t\t\trte_pktmbuf_free(*rxmb++);\n+\t\t\trte_atomic64_inc(&enic->soft_stats.rx_packet_errors);\n+\t\t} else {\n+\t\t\t*rx++ = rx_one(cqd, *rxmb++, enic);\n+\t\t}\n+\t\tcqd++;\n+\t\tmax_rx--;\n+\t}\n+\n+\t/* Number of descriptors visited */\n+\tnb_rx = cqd - (struct cq_enet_rq_desc *)(cq->ring.descs) - cq_idx;\n+\tif (nb_rx == 0)\n+\t\treturn 0;\n+\trqd = ((struct rq_enet_desc *)rq->ring.descs) + cq_idx;\n+\trxmb = rq->mbuf_ring + cq_idx;\n+\tcq_idx += nb_rx;\n+\trq->rx_nb_hold += nb_rx;\n+\tif (unlikely(cq_idx == cq->ring.desc_count)) {\n+\t\tcq_idx = 0;\n+\t\tcq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT;\n+\t}\n+\tcq->to_clean = cq_idx;\n+\n+\t/* Step 4: Restock RQ with new mbufs */\n+\tmemcpy(rxmb, rq->free_mbufs + ENIC_RX_BURST_MAX - rq->num_free_mbufs,\n+\t       sizeof(struct rte_mbuf *) * nb_rx);\n+\trq->num_free_mbufs -= nb_rx;\n+\twhile (nb_rx) {\n+\t\trqd->address = (*rxmb)->buf_iova + RTE_PKTMBUF_HEADROOM;\n+\t\tnb_rx--;\n+\t\trqd++;\n+\t\trxmb++;\n+\t}\n+\tif (rq->rx_nb_hold > rq->rx_free_thresh) {\n+\t\trq->posted_index = enic_ring_add(rq->ring.desc_count,\n+\t\t\t\t\t\t rq->posted_index,\n+\t\t\t\t\t\t rq->rx_nb_hold);\n+\t\trq->rx_nb_hold = 0;\n+\t\trte_wmb();\n+\t\tiowrite32_relaxed(rq->posted_index,\n+\t\t\t\t  &rq->ctrl->posted_index);\n+\t}\n+\n+\treturn rx - rx_pkts;\n+}\n+\n+bool\n+enic_use_vector_rx_handler(struct enic *enic)\n+{\n+\tstruct rte_eth_dev *eth_dev;\n+\tstruct rte_fdir_conf *fconf;\n+\n+\teth_dev = enic->rte_dev;\n+\t/* User needs to request for the avx2 handler */\n+\tif (!enic->enable_avx2_rx)\n+\t\treturn false;\n+\t/* Do not support scatter Rx */\n+\tif (!(enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0))\n+\t\treturn false;\n+\t/* Do not support fdir/flow */\n+\tfconf = &eth_dev->data->dev_conf.fdir_conf;\n+\tif (fconf->mode != RTE_FDIR_MODE_NONE)\n+\t\treturn false;\n+\tif (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {\n+\t\tPMD_INIT_LOG(DEBUG, \" use the non-scatter avx2\"\n+\t\t\t     \" Rx handler\");\n+\t\teth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;\n+\t\treturn true;\n+\t}\n+\treturn false;\n+}\ndiff --git a/drivers/net/enic/meson.build b/drivers/net/enic/meson.build\nindex bfd4e2373..5565649c4 100644\n--- a/drivers/net/enic/meson.build\n+++ b/drivers/net/enic/meson.build\n@@ -17,3 +17,8 @@ sources = files(\n \t)\n deps += ['hash']\n includes += include_directories('base')\n+\n+# The current implementation assumes 64-bit pointers\n+if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') and cc.sizeof('void *') == 8\n+\tsources += files('enic_rxtx_vec_avx2.c')\n+endif\n",
    "prefixes": [
        "v2",
        "2/2"
    ]
}