get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/91560/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 91560,
    "url": "https://patches.dpdk.org/api/patches/91560/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20210415085811.56429-3-leyi.rong@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20210415085811.56429-3-leyi.rong@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20210415085811.56429-3-leyi.rong@intel.com",
    "date": "2021-04-15T08:58:11",
    "name": "[v5,2/2] net/ice: add Rx AVX512 offload path",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "7c87305dbf2f039bfb84415f74589cfa854e1c0a",
    "submitter": {
        "id": 1204,
        "url": "https://patches.dpdk.org/api/people/1204/?format=api",
        "name": "Leyi Rong",
        "email": "leyi.rong@intel.com"
    },
    "delegate": {
        "id": 1540,
        "url": "https://patches.dpdk.org/api/users/1540/?format=api",
        "username": "qzhan15",
        "first_name": "Qi",
        "last_name": "Zhang",
        "email": "qi.z.zhang@intel.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20210415085811.56429-3-leyi.rong@intel.com/mbox/",
    "series": [
        {
            "id": 16405,
            "url": "https://patches.dpdk.org/api/series/16405/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=16405",
            "date": "2021-04-15T08:58:09",
            "name": "add alternative AVX512 offload path",
            "version": 5,
            "mbox": "https://patches.dpdk.org/series/16405/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/91560/comments/",
    "check": "fail",
    "checks": "https://patches.dpdk.org/api/patches/91560/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 7798EA0A0E;\n\tThu, 15 Apr 2021 11:23:50 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 878CB16218E;\n\tThu, 15 Apr 2021 11:23:45 +0200 (CEST)",
            "from mga18.intel.com (mga18.intel.com [134.134.136.126])\n by mails.dpdk.org (Postfix) with ESMTP id 8D0EB162181\n for <dev@dpdk.org>; Thu, 15 Apr 2021 11:23:41 +0200 (CEST)",
            "from fmsmga002.fm.intel.com ([10.253.24.26])\n by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 15 Apr 2021 02:23:41 -0700",
            "from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221])\n by fmsmga002.fm.intel.com with ESMTP; 15 Apr 2021 02:23:39 -0700"
        ],
        "IronPort-SDR": [
            "\n 7VPZ3qhX55hv6xF/vgAi4Za9Sj/ozY2x5CBclq+xx+lp4omUyvKfXo/EWUIlG8LJfUuzexFsQj\n zSst6endAXLw==",
            "\n txO97qxU10wRd/7mbUOvMq8s9IO2JEgaaMQkYbNc+j+7dn0WneADLRCVu+UyE5lj2ypEmnCVzV\n eV54NGB0VB2Q=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6200,9189,9954\"; a=\"182320660\"",
            "E=Sophos;i=\"5.82,223,1613462400\"; d=\"scan'208\";a=\"182320660\"",
            "E=Sophos;i=\"5.82,223,1613462400\"; d=\"scan'208\";a=\"452841435\""
        ],
        "X-ExtLoop1": "1",
        "From": "Leyi Rong <leyi.rong@intel.com>",
        "To": "qi.z.zhang@intel.com,\n\twenzhuo.lu@intel.com",
        "Cc": "dev@dpdk.org,\n\tLeyi Rong <leyi.rong@intel.com>",
        "Date": "Thu, 15 Apr 2021 16:58:11 +0800",
        "Message-Id": "<20210415085811.56429-3-leyi.rong@intel.com>",
        "X-Mailer": "git-send-email 2.17.1",
        "In-Reply-To": "<20210415085811.56429-1-leyi.rong@intel.com>",
        "References": "<20210317091409.11725-1-leyi.rong@intel.com>\n <20210415085811.56429-1-leyi.rong@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v5 2/2] net/ice: add Rx AVX512 offload path",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Split AVX512 Rx data path into two, one is for basic,\nthe other one can support additional Rx offload features,\nincluding Rx checksum offload, Rx vlan offload, RSS offload.\n\nSigned-off-by: Leyi Rong <leyi.rong@intel.com>\nSigned-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>\n---\n drivers/net/ice/ice_rxtx.c            |  50 +++-\n drivers/net/ice/ice_rxtx.h            |   7 +\n drivers/net/ice/ice_rxtx_vec_avx512.c | 345 +++++++++++++++++---------\n drivers/net/ice/ice_rxtx_vec_common.h |  52 ++--\n 4 files changed, 299 insertions(+), 155 deletions(-)",
    "diff": "diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c\nindex 75326c76ab..92fbbc18da 100644\n--- a/drivers/net/ice/ice_rxtx.c\n+++ b/drivers/net/ice/ice_rxtx.c\n@@ -1059,6 +1059,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,\n \tuint32_t ring_size;\n \tuint16_t len;\n \tint use_def_burst_func = 1;\n+\tuint64_t offloads;\n \n \tif (nb_desc % ICE_ALIGN_RING_DESC != 0 ||\n \t    nb_desc > ICE_MAX_RING_DESC ||\n@@ -1068,6 +1069,8 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,\n \t\treturn -EINVAL;\n \t}\n \n+\toffloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;\n+\n \t/* Free memory if needed */\n \tif (dev->data->rx_queues[queue_idx]) {\n \t\tice_rx_queue_release(dev->data->rx_queues[queue_idx]);\n@@ -1088,6 +1091,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev,\n \trxq->nb_rx_desc = nb_desc;\n \trxq->rx_free_thresh = rx_conf->rx_free_thresh;\n \trxq->queue_id = queue_idx;\n+\trxq->offloads = offloads;\n \n \trxq->reg_idx = vsi->base_queue + queue_idx;\n \trxq->port_id = dev->data->port_id;\n@@ -1990,7 +1994,9 @@ ice_dev_supported_ptypes_get(struct rte_eth_dev *dev)\n \t    dev->rx_pkt_burst == ice_recv_scattered_pkts_vec ||\n #ifdef CC_AVX512_SUPPORT\n \t    dev->rx_pkt_burst == ice_recv_pkts_vec_avx512 ||\n+\t    dev->rx_pkt_burst == ice_recv_pkts_vec_avx512_offload ||\n \t    dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx512 ||\n+\t    dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx512_offload ||\n #endif\n \t    dev->rx_pkt_burst == ice_recv_pkts_vec_avx2 ||\n \t    dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx2)\n@@ -3052,12 +3058,14 @@ ice_set_rx_function(struct rte_eth_dev *dev)\n #ifdef RTE_ARCH_X86\n \tstruct ice_rx_queue *rxq;\n \tint i;\n+\tint rx_check_ret;\n \tbool use_avx512 = false;\n \tbool use_avx2 = false;\n \n \tif (rte_eal_process_type() == RTE_PROC_PRIMARY) {\n-\t\tif (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed &&\n-\t\t\t\trte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {\n+\t\trx_check_ret = ice_rx_vec_dev_check(dev);\n+\t\tif (rx_check_ret >= 0 && ad->rx_bulk_alloc_allowed &&\n+\t\t    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {\n \t\t\tad->rx_vec_allowed = true;\n \t\t\tfor (i = 0; i < dev->data->nb_rx_queues; i++) {\n \t\t\t\trxq = dev->data->rx_queues[i];\n@@ -3091,11 +3099,19 @@ ice_set_rx_function(struct rte_eth_dev *dev)\n \t\tif (dev->data->scattered_rx) {\n \t\t\tif (use_avx512) {\n #ifdef CC_AVX512_SUPPORT\n-\t\t\t\tPMD_DRV_LOG(NOTICE,\n-\t\t\t\t\t\"Using AVX512 Vector Scattered Rx (port %d).\",\n-\t\t\t\t\tdev->data->port_id);\n-\t\t\t\tdev->rx_pkt_burst =\n-\t\t\t\t\tice_recv_scattered_pkts_vec_avx512;\n+\t\t\t\tif (rx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {\n+\t\t\t\t\tPMD_DRV_LOG(NOTICE,\n+\t\t\t\t\t\t\"Using AVX512 OFFLOAD Vector Scattered Rx (port %d).\",\n+\t\t\t\t\t\tdev->data->port_id);\n+\t\t\t\t\tdev->rx_pkt_burst =\n+\t\t\t\t\t\tice_recv_scattered_pkts_vec_avx512_offload;\n+\t\t\t\t} else {\n+\t\t\t\t\tPMD_DRV_LOG(NOTICE,\n+\t\t\t\t\t\t\"Using AVX512 Vector Scattered Rx (port %d).\",\n+\t\t\t\t\t\tdev->data->port_id);\n+\t\t\t\t\tdev->rx_pkt_burst =\n+\t\t\t\t\t\tice_recv_scattered_pkts_vec_avx512;\n+\t\t\t\t}\n #endif\n \t\t\t} else {\n \t\t\t\tPMD_DRV_LOG(DEBUG,\n@@ -3109,11 +3125,19 @@ ice_set_rx_function(struct rte_eth_dev *dev)\n \t\t} else {\n \t\t\tif (use_avx512) {\n #ifdef CC_AVX512_SUPPORT\n-\t\t\t\tPMD_DRV_LOG(NOTICE,\n-\t\t\t\t\t\"Using AVX512 Vector Rx (port %d).\",\n-\t\t\t\t\tdev->data->port_id);\n-\t\t\t\tdev->rx_pkt_burst =\n-\t\t\t\t\tice_recv_pkts_vec_avx512;\n+\t\t\t\tif (rx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {\n+\t\t\t\t\tPMD_DRV_LOG(NOTICE,\n+\t\t\t\t\t\t\"Using AVX512 OFFLOAD Vector Rx (port %d).\",\n+\t\t\t\t\t\tdev->data->port_id);\n+\t\t\t\t\tdev->rx_pkt_burst =\n+\t\t\t\t\t\tice_recv_pkts_vec_avx512_offload;\n+\t\t\t\t} else {\n+\t\t\t\t\tPMD_DRV_LOG(NOTICE,\n+\t\t\t\t\t\t\"Using AVX512 Vector Rx (port %d).\",\n+\t\t\t\t\t\tdev->data->port_id);\n+\t\t\t\t\tdev->rx_pkt_burst =\n+\t\t\t\t\t\tice_recv_pkts_vec_avx512;\n+\t\t\t\t}\n #endif\n \t\t\t} else {\n \t\t\t\tPMD_DRV_LOG(DEBUG,\n@@ -3162,7 +3186,9 @@ static const struct {\n #ifdef RTE_ARCH_X86\n #ifdef CC_AVX512_SUPPORT\n \t{ ice_recv_scattered_pkts_vec_avx512, \"Vector AVX512 Scattered\" },\n+\t{ ice_recv_scattered_pkts_vec_avx512_offload, \"Offload Vector AVX512 Scattered\" },\n \t{ ice_recv_pkts_vec_avx512,           \"Vector AVX512\" },\n+\t{ ice_recv_pkts_vec_avx512_offload,   \"Offload Vector AVX512\" },\n #endif\n \t{ ice_recv_scattered_pkts_vec_avx2, \"Vector AVX2 Scattered\" },\n \t{ ice_recv_pkts_vec_avx2,           \"Vector AVX2\" },\ndiff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h\nindex f72fad0255..b29387ca0f 100644\n--- a/drivers/net/ice/ice_rxtx.h\n+++ b/drivers/net/ice/ice_rxtx.h\n@@ -88,6 +88,7 @@ struct ice_rx_queue {\n \tuint64_t xtr_ol_flag; /* Protocol extraction offload flag */\n \tice_rxd_to_pkt_fields_t rxd_to_pkt_fields; /* handle FlexiMD by RXDID */\n \tice_rx_release_mbufs_t rx_rel_mbufs;\n+\tuint64_t offloads;\n };\n \n struct ice_tx_entry {\n@@ -256,9 +257,15 @@ uint16_t ice_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t\tuint16_t nb_pkts);\n uint16_t ice_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\t\t\t  uint16_t nb_pkts);\n+uint16_t ice_recv_pkts_vec_avx512_offload(void *rx_queue,\n+\t\t\t\t\t  struct rte_mbuf **rx_pkts,\n+\t\t\t\t\t  uint16_t nb_pkts);\n uint16_t ice_recv_scattered_pkts_vec_avx512(void *rx_queue,\n \t\t\t\t\t    struct rte_mbuf **rx_pkts,\n \t\t\t\t\t    uint16_t nb_pkts);\n+uint16_t ice_recv_scattered_pkts_vec_avx512_offload(void *rx_queue,\n+\t\t\t\t\t\t    struct rte_mbuf **rx_pkts,\n+\t\t\t\t\t\t    uint16_t nb_pkts);\n uint16_t ice_xmit_pkts_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t\t  uint16_t nb_pkts);\n uint16_t ice_xmit_pkts_vec_avx512_offload(void *tx_queue,\ndiff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c\nindex 1c4a59a170..ad6c69da9b 100644\n--- a/drivers/net/ice/ice_rxtx_vec_avx512.c\n+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c\n@@ -150,10 +150,12 @@ ice_flex_rxd_to_fdir_flags_vec_avx512(const __m256i fdir_id0_7)\n \treturn fdir_flags;\n }\n \n-static inline uint16_t\n+static __rte_always_inline uint16_t\n _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,\n \t\t\t      struct rte_mbuf **rx_pkts,\n-\t\t\t      uint16_t nb_pkts, uint8_t *split_packet)\n+\t\t\t      uint16_t nb_pkts,\n+\t\t\t      uint8_t *split_packet,\n+\t\t\t      bool do_offload)\n {\n \tconst uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;\n \tconst __m256i mbuf_init = _mm256_set_epi64x(0, 0,\n@@ -224,6 +226,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,\n \tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=\n \t\t\toffsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);\n \n+\t/* following code block is for Rx Checksum Offload */\n \t/* Status/Error flag masks */\n \t/**\n \t * mask everything except Checksum Reports, RSS indication\n@@ -487,37 +490,42 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,\n \t\t__m256i status0_7 = _mm512_extracti64x4_epi64\n \t\t\t(raw_status0_7, 0);\n \n-\t\t/* now do flag manipulation */\n+\t\t__m256i mbuf_flags = _mm256_set1_epi32(0);\n \n-\t\t/* get only flag/error bits we want */\n-\t\tconst __m256i flag_bits =\n-\t\t\t_mm256_and_si256(status0_7, flags_mask);\n-\t\t/**\n-\t\t * l3_l4_error flags, shuffle, then shift to correct adjustment\n-\t\t * of flags in flags_shuf, and finally mask out extra bits\n-\t\t */\n-\t\t__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,\n-\t\t\t\t_mm256_srli_epi32(flag_bits, 4));\n-\t\tl3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);\n-\t\t__m256i l4_outer_mask = _mm256_set1_epi32(0x6);\n-\t\t__m256i l4_outer_flags =\n-\t\t\t\t_mm256_and_si256(l3_l4_flags, l4_outer_mask);\n-\t\tl4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);\n-\n-\t\t__m256i l3_l4_mask = _mm256_set1_epi32(~0x6);\n-\t\tl3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);\n-\t\tl3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);\n-\t\tl3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);\n-\t\t/* set rss and vlan flags */\n-\t\tconst __m256i rss_vlan_flag_bits =\n-\t\t\t_mm256_srli_epi32(flag_bits, 12);\n-\t\tconst __m256i rss_vlan_flags =\n-\t\t\t_mm256_shuffle_epi8(rss_vlan_flags_shuf,\n-\t\t\t\t\t    rss_vlan_flag_bits);\n-\n-\t\t/* merge flags */\n-\t\t__m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,\n+\t\tif (do_offload) {\n+\t\t\t/* now do flag manipulation */\n+\n+\t\t\t/* get only flag/error bits we want */\n+\t\t\tconst __m256i flag_bits =\n+\t\t\t\t_mm256_and_si256(status0_7, flags_mask);\n+\t\t\t/**\n+\t\t\t * l3_l4_error flags, shuffle, then shift to correct adjustment\n+\t\t\t * of flags in flags_shuf, and finally mask out extra bits\n+\t\t\t */\n+\t\t\t__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,\n+\t\t\t\t\t_mm256_srli_epi32(flag_bits, 4));\n+\t\t\tl3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);\n+\t\t\t__m256i l4_outer_mask = _mm256_set1_epi32(0x6);\n+\t\t\t__m256i l4_outer_flags =\n+\t\t\t\t\t_mm256_and_si256(l3_l4_flags, l4_outer_mask);\n+\t\t\tl4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);\n+\n+\t\t\t__m256i l3_l4_mask = _mm256_set1_epi32(~0x6);\n+\n+\t\t\tl3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);\n+\t\t\tl3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);\n+\t\t\tl3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);\n+\t\t\t/* set rss and vlan flags */\n+\t\t\tconst __m256i rss_vlan_flag_bits =\n+\t\t\t\t_mm256_srli_epi32(flag_bits, 12);\n+\t\t\tconst __m256i rss_vlan_flags =\n+\t\t\t\t_mm256_shuffle_epi8(rss_vlan_flags_shuf,\n+\t\t\t\t\t\t    rss_vlan_flag_bits);\n+\n+\t\t\t/* merge flags */\n+\t\t\tmbuf_flags = _mm256_or_si256(l3_l4_flags,\n \t\t\t\t\t\t     rss_vlan_flags);\n+\t\t}\n \n \t\tif (rxq->fdir_enabled) {\n \t\t\tconst __m256i fdir_id4_7 =\n@@ -529,12 +537,19 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,\n \t\t\tconst __m256i fdir_id0_7 =\n \t\t\t\t_mm256_unpackhi_epi64(fdir_id4_7, fdir_id0_3);\n \n-\t\t\tconst __m256i fdir_flags =\n-\t\t\t\tice_flex_rxd_to_fdir_flags_vec_avx512\n-\t\t\t\t\t(fdir_id0_7);\n-\n-\t\t\t/* merge with fdir_flags */\n-\t\t\tmbuf_flags = _mm256_or_si256(mbuf_flags, fdir_flags);\n+\t\t\tif (do_offload) {\n+\t\t\t\tconst __m256i fdir_flags =\n+\t\t\t\t\tice_flex_rxd_to_fdir_flags_vec_avx512\n+\t\t\t\t\t\t(fdir_id0_7);\n+\n+\t\t\t\t/* merge with fdir_flags */\n+\t\t\t\tmbuf_flags = _mm256_or_si256\n+\t\t\t\t\t\t(mbuf_flags, fdir_flags);\n+\t\t\t} else {\n+\t\t\t\tmbuf_flags =\n+\t\t\t\t\tice_flex_rxd_to_fdir_flags_vec_avx512\n+\t\t\t\t\t\t(fdir_id0_7);\n+\t\t\t}\n \n \t\t\t/* write to mbuf: have to use scalar store here */\n \t\t\trx_pkts[i + 0]->hash.fdir.hi =\n@@ -562,95 +577,97 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,\n \t\t\t\t_mm256_extract_epi32(fdir_id0_7, 4);\n \t\t} /* if() on fdir_enabled */\n \n+\t\tif (do_offload) {\n #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC\n-\t\t/**\n-\t\t * needs to load 2nd 16B of each desc for RSS hash parsing,\n-\t\t * will cause performance drop to get into this context.\n-\t\t */\n-\t\tif (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &\n-\t\t\t\tDEV_RX_OFFLOAD_RSS_HASH) {\n-\t\t\t/* load bottom half of every 32B desc */\n-\t\t\tconst __m128i raw_desc_bh7 =\n-\t\t\t\t_mm_load_si128\n-\t\t\t\t\t((void *)(&rxdp[7].wb.status_error1));\n-\t\t\trte_compiler_barrier();\n-\t\t\tconst __m128i raw_desc_bh6 =\n-\t\t\t\t_mm_load_si128\n-\t\t\t\t\t((void *)(&rxdp[6].wb.status_error1));\n-\t\t\trte_compiler_barrier();\n-\t\t\tconst __m128i raw_desc_bh5 =\n-\t\t\t\t_mm_load_si128\n-\t\t\t\t\t((void *)(&rxdp[5].wb.status_error1));\n-\t\t\trte_compiler_barrier();\n-\t\t\tconst __m128i raw_desc_bh4 =\n-\t\t\t\t_mm_load_si128\n-\t\t\t\t\t((void *)(&rxdp[4].wb.status_error1));\n-\t\t\trte_compiler_barrier();\n-\t\t\tconst __m128i raw_desc_bh3 =\n-\t\t\t\t_mm_load_si128\n-\t\t\t\t\t((void *)(&rxdp[3].wb.status_error1));\n-\t\t\trte_compiler_barrier();\n-\t\t\tconst __m128i raw_desc_bh2 =\n-\t\t\t\t_mm_load_si128\n-\t\t\t\t\t((void *)(&rxdp[2].wb.status_error1));\n-\t\t\trte_compiler_barrier();\n-\t\t\tconst __m128i raw_desc_bh1 =\n-\t\t\t\t_mm_load_si128\n-\t\t\t\t\t((void *)(&rxdp[1].wb.status_error1));\n-\t\t\trte_compiler_barrier();\n-\t\t\tconst __m128i raw_desc_bh0 =\n-\t\t\t\t_mm_load_si128\n-\t\t\t\t\t((void *)(&rxdp[0].wb.status_error1));\n-\n-\t\t\t__m256i raw_desc_bh6_7 =\n-\t\t\t\t_mm256_inserti128_si256\n-\t\t\t\t\t(_mm256_castsi128_si256(raw_desc_bh6),\n-\t\t\t\t\traw_desc_bh7, 1);\n-\t\t\t__m256i raw_desc_bh4_5 =\n-\t\t\t\t_mm256_inserti128_si256\n-\t\t\t\t\t(_mm256_castsi128_si256(raw_desc_bh4),\n-\t\t\t\t\traw_desc_bh5, 1);\n-\t\t\t__m256i raw_desc_bh2_3 =\n-\t\t\t\t_mm256_inserti128_si256\n-\t\t\t\t\t(_mm256_castsi128_si256(raw_desc_bh2),\n-\t\t\t\t\traw_desc_bh3, 1);\n-\t\t\t__m256i raw_desc_bh0_1 =\n-\t\t\t\t_mm256_inserti128_si256\n-\t\t\t\t\t(_mm256_castsi128_si256(raw_desc_bh0),\n-\t\t\t\t\traw_desc_bh1, 1);\n-\n \t\t\t/**\n-\t\t\t * to shift the 32b RSS hash value to the\n-\t\t\t * highest 32b of each 128b before mask\n+\t\t\t * needs to load 2nd 16B of each desc for RSS hash parsing,\n+\t\t\t * will cause performance drop to get into this context.\n \t\t\t */\n-\t\t\t__m256i rss_hash6_7 =\n-\t\t\t\t_mm256_slli_epi64(raw_desc_bh6_7, 32);\n-\t\t\t__m256i rss_hash4_5 =\n-\t\t\t\t_mm256_slli_epi64(raw_desc_bh4_5, 32);\n-\t\t\t__m256i rss_hash2_3 =\n-\t\t\t\t_mm256_slli_epi64(raw_desc_bh2_3, 32);\n-\t\t\t__m256i rss_hash0_1 =\n-\t\t\t\t_mm256_slli_epi64(raw_desc_bh0_1, 32);\n-\n-\t\t\t__m256i rss_hash_msk =\n-\t\t\t\t_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,\n-\t\t\t\t\t\t 0xFFFFFFFF, 0, 0, 0);\n-\n-\t\t\trss_hash6_7 = _mm256_and_si256\n-\t\t\t\t\t(rss_hash6_7, rss_hash_msk);\n-\t\t\trss_hash4_5 = _mm256_and_si256\n-\t\t\t\t\t(rss_hash4_5, rss_hash_msk);\n-\t\t\trss_hash2_3 = _mm256_and_si256\n-\t\t\t\t\t(rss_hash2_3, rss_hash_msk);\n-\t\t\trss_hash0_1 = _mm256_and_si256\n-\t\t\t\t\t(rss_hash0_1, rss_hash_msk);\n-\n-\t\t\tmb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);\n-\t\t\tmb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);\n-\t\t\tmb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);\n-\t\t\tmb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);\n-\t\t} /* if() on RSS hash parsing */\n+\t\t\tif (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &\n+\t\t\t\t\tDEV_RX_OFFLOAD_RSS_HASH) {\n+\t\t\t\t/* load bottom half of every 32B desc */\n+\t\t\t\tconst __m128i raw_desc_bh7 =\n+\t\t\t\t\t_mm_load_si128\n+\t\t\t\t\t\t((void *)(&rxdp[7].wb.status_error1));\n+\t\t\t\trte_compiler_barrier();\n+\t\t\t\tconst __m128i raw_desc_bh6 =\n+\t\t\t\t\t_mm_load_si128\n+\t\t\t\t\t\t((void *)(&rxdp[6].wb.status_error1));\n+\t\t\t\trte_compiler_barrier();\n+\t\t\t\tconst __m128i raw_desc_bh5 =\n+\t\t\t\t\t_mm_load_si128\n+\t\t\t\t\t\t((void *)(&rxdp[5].wb.status_error1));\n+\t\t\t\trte_compiler_barrier();\n+\t\t\t\tconst __m128i raw_desc_bh4 =\n+\t\t\t\t\t_mm_load_si128\n+\t\t\t\t\t\t((void *)(&rxdp[4].wb.status_error1));\n+\t\t\t\trte_compiler_barrier();\n+\t\t\t\tconst __m128i raw_desc_bh3 =\n+\t\t\t\t\t_mm_load_si128\n+\t\t\t\t\t\t((void *)(&rxdp[3].wb.status_error1));\n+\t\t\t\trte_compiler_barrier();\n+\t\t\t\tconst __m128i raw_desc_bh2 =\n+\t\t\t\t\t_mm_load_si128\n+\t\t\t\t\t\t((void *)(&rxdp[2].wb.status_error1));\n+\t\t\t\trte_compiler_barrier();\n+\t\t\t\tconst __m128i raw_desc_bh1 =\n+\t\t\t\t\t_mm_load_si128\n+\t\t\t\t\t\t((void *)(&rxdp[1].wb.status_error1));\n+\t\t\t\trte_compiler_barrier();\n+\t\t\t\tconst __m128i raw_desc_bh0 =\n+\t\t\t\t\t_mm_load_si128\n+\t\t\t\t\t\t((void *)(&rxdp[0].wb.status_error1));\n+\n+\t\t\t\t__m256i raw_desc_bh6_7 =\n+\t\t\t\t\t_mm256_inserti128_si256\n+\t\t\t\t\t\t(_mm256_castsi128_si256(raw_desc_bh6),\n+\t\t\t\t\t\traw_desc_bh7, 1);\n+\t\t\t\t__m256i raw_desc_bh4_5 =\n+\t\t\t\t\t_mm256_inserti128_si256\n+\t\t\t\t\t\t(_mm256_castsi128_si256(raw_desc_bh4),\n+\t\t\t\t\t\traw_desc_bh5, 1);\n+\t\t\t\t__m256i raw_desc_bh2_3 =\n+\t\t\t\t\t_mm256_inserti128_si256\n+\t\t\t\t\t\t(_mm256_castsi128_si256(raw_desc_bh2),\n+\t\t\t\t\t\traw_desc_bh3, 1);\n+\t\t\t\t__m256i raw_desc_bh0_1 =\n+\t\t\t\t\t_mm256_inserti128_si256\n+\t\t\t\t\t\t(_mm256_castsi128_si256(raw_desc_bh0),\n+\t\t\t\t\t\traw_desc_bh1, 1);\n+\n+\t\t\t\t/**\n+\t\t\t\t * to shift the 32b RSS hash value to the\n+\t\t\t\t * highest 32b of each 128b before mask\n+\t\t\t\t */\n+\t\t\t\t__m256i rss_hash6_7 =\n+\t\t\t\t\t_mm256_slli_epi64(raw_desc_bh6_7, 32);\n+\t\t\t\t__m256i rss_hash4_5 =\n+\t\t\t\t\t_mm256_slli_epi64(raw_desc_bh4_5, 32);\n+\t\t\t\t__m256i rss_hash2_3 =\n+\t\t\t\t\t_mm256_slli_epi64(raw_desc_bh2_3, 32);\n+\t\t\t\t__m256i rss_hash0_1 =\n+\t\t\t\t\t_mm256_slli_epi64(raw_desc_bh0_1, 32);\n+\n+\t\t\t\t__m256i rss_hash_msk =\n+\t\t\t\t\t_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,\n+\t\t\t\t\t\t\t 0xFFFFFFFF, 0, 0, 0);\n+\n+\t\t\t\trss_hash6_7 = _mm256_and_si256\n+\t\t\t\t\t\t(rss_hash6_7, rss_hash_msk);\n+\t\t\t\trss_hash4_5 = _mm256_and_si256\n+\t\t\t\t\t\t(rss_hash4_5, rss_hash_msk);\n+\t\t\t\trss_hash2_3 = _mm256_and_si256\n+\t\t\t\t\t\t(rss_hash2_3, rss_hash_msk);\n+\t\t\t\trss_hash0_1 = _mm256_and_si256\n+\t\t\t\t\t\t(rss_hash0_1, rss_hash_msk);\n+\n+\t\t\t\tmb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);\n+\t\t\t\tmb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);\n+\t\t\t\tmb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);\n+\t\t\t\tmb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);\n+\t\t\t} /* if() on RSS hash parsing */\n #endif\n+\t\t}\n \n \t\t/**\n \t\t * At this point, we have the 8 sets of flags in the low 16-bits\n@@ -806,7 +823,19 @@ uint16_t\n ice_recv_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\t\t uint16_t nb_pkts)\n {\n-\treturn _ice_recv_raw_pkts_vec_avx512(rx_queue, rx_pkts, nb_pkts, NULL);\n+\treturn _ice_recv_raw_pkts_vec_avx512(rx_queue, rx_pkts, nb_pkts, NULL, false);\n+}\n+\n+/**\n+ * Notice:\n+ * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet\n+ */\n+uint16_t\n+ice_recv_pkts_vec_avx512_offload(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\t\t\t uint16_t nb_pkts)\n+{\n+\treturn _ice_recv_raw_pkts_vec_avx512(rx_queue, rx_pkts,\n+\t\t\t\t\t     nb_pkts, NULL, true);\n }\n \n /**\n@@ -823,7 +852,49 @@ ice_recv_scattered_burst_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,\n \n \t/* get some new buffers */\n \tuint16_t nb_bufs = _ice_recv_raw_pkts_vec_avx512(rxq, rx_pkts, nb_pkts,\n-\t\t\t\t\t\t       split_flags);\n+\t\t\t\t\t\t       split_flags, false);\n+\tif (nb_bufs == 0)\n+\t\treturn 0;\n+\n+\t/* happy day case, full burst + no packets to be joined */\n+\tconst uint64_t *split_fl64 = (uint64_t *)split_flags;\n+\n+\tif (!rxq->pkt_first_seg &&\n+\t    split_fl64[0] == 0 && split_fl64[1] == 0 &&\n+\t    split_fl64[2] == 0 && split_fl64[3] == 0)\n+\t\treturn nb_bufs;\n+\n+\t/* reassemble any packets that need reassembly */\n+\tunsigned int i = 0;\n+\n+\tif (!rxq->pkt_first_seg) {\n+\t\t/* find the first split flag, and only reassemble then */\n+\t\twhile (i < nb_bufs && !split_flags[i])\n+\t\t\ti++;\n+\t\tif (i == nb_bufs)\n+\t\t\treturn nb_bufs;\n+\t\trxq->pkt_first_seg = rx_pkts[i];\n+\t}\n+\treturn i + ice_rx_reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,\n+\t\t\t\t\t     &split_flags[i]);\n+}\n+\n+/**\n+ * vPMD receive routine that reassembles single burst of 32 scattered packets\n+ * Notice:\n+ * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet\n+ */\n+static uint16_t\n+ice_recv_scattered_burst_vec_avx512_offload(void *rx_queue,\n+\t\t\t\t\t    struct rte_mbuf **rx_pkts,\n+\t\t\t\t\t    uint16_t nb_pkts)\n+{\n+\tstruct ice_rx_queue *rxq = rx_queue;\n+\tuint8_t split_flags[ICE_VPMD_RX_BURST] = {0};\n+\n+\t/* get some new buffers */\n+\tuint16_t nb_bufs = _ice_recv_raw_pkts_vec_avx512(rxq,\n+\t\t\t\trx_pkts, nb_pkts, split_flags, true);\n \tif (nb_bufs == 0)\n \t\treturn 0;\n \n@@ -874,6 +945,32 @@ ice_recv_scattered_pkts_vec_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\t\t\trx_pkts + retval, nb_pkts);\n }\n \n+/**\n+ * vPMD receive routine that reassembles scattered packets.\n+ * Main receive routine that can handle arbitrary burst sizes\n+ * Notice:\n+ * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet\n+ */\n+uint16_t\n+ice_recv_scattered_pkts_vec_avx512_offload(void *rx_queue,\n+\t\t\t\t\t   struct rte_mbuf **rx_pkts,\n+\t\t\t\t\t   uint16_t nb_pkts)\n+{\n+\tuint16_t retval = 0;\n+\n+\twhile (nb_pkts > ICE_VPMD_RX_BURST) {\n+\t\tuint16_t burst =\n+\t\t\tice_recv_scattered_burst_vec_avx512_offload(rx_queue,\n+\t\t\t\trx_pkts + retval, ICE_VPMD_RX_BURST);\n+\t\tretval += burst;\n+\t\tnb_pkts -= burst;\n+\t\tif (burst < ICE_VPMD_RX_BURST)\n+\t\t\treturn retval;\n+\t}\n+\treturn retval + ice_recv_scattered_burst_vec_avx512_offload(rx_queue,\n+\t\t\t\trx_pkts + retval, nb_pkts);\n+}\n+\n static __rte_always_inline int\n ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)\n {\ndiff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h\nindex 942647f4e9..6e8d7a6fc5 100644\n--- a/drivers/net/ice/ice_rxtx_vec_common.h\n+++ b/drivers/net/ice/ice_rxtx_vec_common.h\n@@ -247,6 +247,28 @@ ice_rxq_vec_setup_default(struct ice_rx_queue *rxq)\n \treturn 0;\n }\n \n+#define ICE_TX_NO_VECTOR_FLAGS (\t\t\t\\\n+\t\tDEV_TX_OFFLOAD_MULTI_SEGS |\t\t\\\n+\t\tDEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |\t\\\n+\t\tDEV_TX_OFFLOAD_TCP_TSO)\n+\n+#define ICE_TX_VECTOR_OFFLOAD (\t\t\t\t\\\n+\t\tDEV_TX_OFFLOAD_VLAN_INSERT |\t\t\\\n+\t\tDEV_TX_OFFLOAD_QINQ_INSERT |\t\t\\\n+\t\tDEV_TX_OFFLOAD_IPV4_CKSUM |\t\t\\\n+\t\tDEV_TX_OFFLOAD_SCTP_CKSUM |\t\t\\\n+\t\tDEV_TX_OFFLOAD_UDP_CKSUM |\t\t\\\n+\t\tDEV_TX_OFFLOAD_TCP_CKSUM)\n+\n+#define ICE_RX_VECTOR_OFFLOAD (\t\t\t\t\\\n+\t\tDEV_RX_OFFLOAD_CHECKSUM |\t\t\\\n+\t\tDEV_RX_OFFLOAD_SCTP_CKSUM |\t\t\\\n+\t\tDEV_RX_OFFLOAD_VLAN |\t\t\t\\\n+\t\tDEV_RX_OFFLOAD_RSS_HASH)\n+\n+#define ICE_VECTOR_PATH\t\t0\n+#define ICE_VECTOR_OFFLOAD_PATH\t1\n+\n static inline int\n ice_rx_vec_queue_default(struct ice_rx_queue *rxq)\n {\n@@ -265,24 +287,11 @@ ice_rx_vec_queue_default(struct ice_rx_queue *rxq)\n \tif (rxq->proto_xtr != PROTO_XTR_NONE)\n \t\treturn -1;\n \n-\treturn 0;\n-}\n-\n-#define ICE_TX_NO_VECTOR_FLAGS (\t\t\t\\\n-\t\tDEV_TX_OFFLOAD_MULTI_SEGS |\t\t\\\n-\t\tDEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |\t\\\n-\t\tDEV_TX_OFFLOAD_TCP_TSO)\n-\n-#define ICE_TX_VECTOR_OFFLOAD (\t\t\t\t\\\n-\t\tDEV_TX_OFFLOAD_VLAN_INSERT |\t\t\\\n-\t\tDEV_TX_OFFLOAD_QINQ_INSERT |\t\t\\\n-\t\tDEV_TX_OFFLOAD_IPV4_CKSUM |\t\t\\\n-\t\tDEV_TX_OFFLOAD_SCTP_CKSUM |\t\t\\\n-\t\tDEV_TX_OFFLOAD_UDP_CKSUM |\t\t\\\n-\t\tDEV_TX_OFFLOAD_TCP_CKSUM)\n+\tif (rxq->offloads & ICE_RX_VECTOR_OFFLOAD)\n+\t\treturn ICE_VECTOR_OFFLOAD_PATH;\n \n-#define ICE_VECTOR_PATH\t\t0\n-#define ICE_VECTOR_OFFLOAD_PATH\t1\n+\treturn ICE_VECTOR_PATH;\n+}\n \n static inline int\n ice_tx_vec_queue_default(struct ice_tx_queue *txq)\n@@ -308,14 +317,19 @@ ice_rx_vec_dev_check_default(struct rte_eth_dev *dev)\n {\n \tint i;\n \tstruct ice_rx_queue *rxq;\n+\tint ret = 0;\n+\tint result = 0;\n \n \tfor (i = 0; i < dev->data->nb_rx_queues; i++) {\n \t\trxq = dev->data->rx_queues[i];\n-\t\tif (ice_rx_vec_queue_default(rxq))\n+\t\tret = (ice_rx_vec_queue_default(rxq));\n+\t\tif (ret < 0)\n \t\t\treturn -1;\n+\t\tif (ret == ICE_VECTOR_OFFLOAD_PATH)\n+\t\t\tresult = ret;\n \t}\n \n-\treturn 0;\n+\treturn result;\n }\n \n static inline int\n",
    "prefixes": [
        "v5",
        "2/2"
    ]
}