get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/76658/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 76658,
    "url": "http://patches.dpdk.org/api/patches/76658/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20200907090825.1761-7-huwei013@chinasoftinc.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20200907090825.1761-7-huwei013@chinasoftinc.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20200907090825.1761-7-huwei013@chinasoftinc.com",
    "date": "2020-09-07T09:08:23",
    "name": "[6/8] net/hns3: add vector Rx burst with NEON instructions",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "f3e8f198775c9612fd0e3a8b8aa64cc808494c9b",
    "submitter": {
        "id": 1537,
        "url": "http://patches.dpdk.org/api/people/1537/?format=api",
        "name": "Wei Hu (Xavier)",
        "email": "huwei013@chinasoftinc.com"
    },
    "delegate": {
        "id": 319,
        "url": "http://patches.dpdk.org/api/users/319/?format=api",
        "username": "fyigit",
        "first_name": "Ferruh",
        "last_name": "Yigit",
        "email": "ferruh.yigit@amd.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20200907090825.1761-7-huwei013@chinasoftinc.com/mbox/",
    "series": [
        {
            "id": 11986,
            "url": "http://patches.dpdk.org/api/series/11986/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=11986",
            "date": "2020-09-07T09:08:17",
            "name": "net/hns3: updates for Rx Tx",
            "version": 1,
            "mbox": "http://patches.dpdk.org/series/11986/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/76658/comments/",
    "check": "warning",
    "checks": "http://patches.dpdk.org/api/patches/76658/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id A14E0A04B9;\n\tMon,  7 Sep 2020 11:10:04 +0200 (CEST)",
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 487C31C11C;\n\tMon,  7 Sep 2020 11:09:23 +0200 (CEST)",
            "from mail.chinasoftinc.com (unknown [114.113.233.8])\n by dpdk.org (Postfix) with ESMTP id B74A41BF8A\n for <dev@dpdk.org>; Mon,  7 Sep 2020 11:09:19 +0200 (CEST)",
            "from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss\n (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020\n 17:09:15 +0800"
        ],
        "From": "\"Wei Hu (Xavier)\" <huwei013@chinasoftinc.com>",
        "To": "<dev@dpdk.org>",
        "CC": "<xavier.huwei@huawei.com>",
        "Date": "Mon, 7 Sep 2020 17:08:23 +0800",
        "Message-ID": "<20200907090825.1761-7-huwei013@chinasoftinc.com>",
        "X-Mailer": "git-send-email 2.9.5",
        "In-Reply-To": "<20200907090825.1761-1-huwei013@chinasoftinc.com>",
        "References": "<20200907090825.1761-1-huwei013@chinasoftinc.com>",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain",
        "X-Originating-IP": "[65.49.108.226]",
        "Subject": "[dpdk-dev] [PATCH 6/8] net/hns3: add vector Rx burst with NEON\n\tinstructions",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "From: \"Wei Hu (Xavier)\" <xavier.huwei@huawei.com>\n\nThis patch adds NEON vector instructions to optimize Rx burst process.\n\nSigned-off-by: Chengwen Feng <fengchengwen@huawei.com>\nSigned-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>\nSigned-off-by: Huisong Li <lihuisong@huawei.com>\n---\n drivers/net/hns3/hns3_ethdev.c        |   1 +\n drivers/net/hns3/hns3_ethdev.h        |   1 +\n drivers/net/hns3/hns3_ethdev_vf.c     |   1 +\n drivers/net/hns3/hns3_rxtx.c          |  94 +++++++++++++++-\n drivers/net/hns3/hns3_rxtx.h          |  35 +++++-\n drivers/net/hns3/hns3_rxtx_vec.c      | 167 ++++++++++++++++++++++++++++\n drivers/net/hns3/hns3_rxtx_vec.h      |  20 ++++\n drivers/net/hns3/hns3_rxtx_vec_neon.h | 203 ++++++++++++++++++++++++++++++++++\n 8 files changed, 514 insertions(+), 8 deletions(-)",
    "diff": "diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c\nindex 68239f5..0727c6d 100644\n--- a/drivers/net/hns3/hns3_ethdev.c\n+++ b/drivers/net/hns3/hns3_ethdev.c\n@@ -2352,6 +2352,7 @@ hns3_dev_configure(struct rte_eth_dev *dev)\n \t\tgoto cfg_err;\n \n \thns->rx_simple_allowed = true;\n+\thns->rx_vec_allowed = true;\n \thns->tx_simple_allowed = true;\n \thns->tx_vec_allowed = true;\n \ndiff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h\nindex 098b6ce..fd6a9f9 100644\n--- a/drivers/net/hns3/hns3_ethdev.h\n+++ b/drivers/net/hns3/hns3_ethdev.h\n@@ -643,6 +643,7 @@ struct hns3_adapter {\n \t};\n \n \tbool rx_simple_allowed;\n+\tbool rx_vec_allowed;\n \tbool tx_simple_allowed;\n \tbool tx_vec_allowed;\n \ndiff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c\nindex f3e6aea..93f2c93 100644\n--- a/drivers/net/hns3/hns3_ethdev_vf.c\n+++ b/drivers/net/hns3/hns3_ethdev_vf.c\n@@ -822,6 +822,7 @@ hns3vf_dev_configure(struct rte_eth_dev *dev)\n \t\tgoto cfg_err;\n \n \thns->rx_simple_allowed = true;\n+\thns->rx_vec_allowed = true;\n \thns->tx_simple_allowed = true;\n \thns->tx_vec_allowed = true;\n \ndiff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c\nindex a537fbe..03d69b1 100644\n--- a/drivers/net/hns3/hns3_rxtx.c\n+++ b/drivers/net/hns3/hns3_rxtx.c\n@@ -41,9 +41,19 @@ hns3_rx_queue_release_mbufs(struct hns3_rx_queue *rxq)\n \tif (rxq->sw_ring == NULL)\n \t\treturn;\n \n-\tfor (i = 0; i < rxq->nb_rx_desc; i++)\n-\t\tif (rxq->sw_ring[i].mbuf)\n-\t\t\trte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);\n+\tif (rxq->rx_rearm_nb == 0) {\n+\t\tfor (i = 0; i < rxq->nb_rx_desc; i++) {\n+\t\t\tif (rxq->sw_ring[i].mbuf != NULL)\n+\t\t\t\trte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);\n+\t\t}\n+\t} else {\n+\t\tfor (i = rxq->next_to_use;\n+\t\t     i != rxq->rx_rearm_start;\n+\t\t     i = (i + 1) % rxq->nb_rx_desc) {\n+\t\t\tif (rxq->sw_ring[i].mbuf != NULL)\n+\t\t\t\trte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);\n+\t\t}\n+\t}\n \n \tfor (i = 0; i < rxq->bulk_mbuf_num; i++)\n \t\trte_pktmbuf_free_seg(rxq->bulk_mbuf[i]);\n@@ -661,10 +671,13 @@ hns3_dev_rx_queue_start(struct hns3_adapter *hns, uint16_t idx)\n \t}\n \n \trxq->next_to_use = 0;\n+\trxq->rx_rearm_start = 0;\n \trxq->rx_free_hold = 0;\n+\trxq->rx_rearm_nb = 0;\n \trxq->pkt_first_seg = NULL;\n \trxq->pkt_last_seg = NULL;\n \thns3_init_rx_queue_hw(rxq);\n+\thns3_rxq_vec_setup(rxq);\n \n \treturn 0;\n }\n@@ -678,6 +691,8 @@ hns3_fake_rx_queue_start(struct hns3_adapter *hns, uint16_t idx)\n \trxq = (struct hns3_rx_queue *)hw->fkq_data.rx_queues[idx];\n \trxq->next_to_use = 0;\n \trxq->rx_free_hold = 0;\n+\trxq->rx_rearm_start = 0;\n+\trxq->rx_rearm_nb = 0;\n \thns3_init_rx_queue_hw(rxq);\n }\n \n@@ -860,6 +875,40 @@ hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue)\n \treturn 0;\n }\n \n+/*\n+ * Iterate over all Rx Queue, and call the callback() function for each Rx\n+ * queue.\n+ *\n+ * @param[in] dev\n+ *   The target eth dev.\n+ * @param[in] callback\n+ *   The function to call for each queue.\n+ *   if callback function return nonzero will stop iterate and return it's value\n+ * @param[in] arg\n+ *   The arguments to provide the callback function with.\n+ *\n+ * @return\n+ *   0 on success, otherwise with errno set.\n+ */\n+int\n+hns3_rxq_iterate(struct rte_eth_dev *dev,\n+\t\t int (*callback)(struct hns3_rx_queue *, void *), void *arg)\n+{\n+\tuint32_t i;\n+\tint ret;\n+\n+\tif (dev->data->rx_queues == NULL)\n+\t\treturn -EINVAL;\n+\n+\tfor (i = 0; i < dev->data->nb_rx_queues; i++) {\n+\t\tret = callback(dev->data->rx_queues[i], arg);\n+\t\tif (ret != 0)\n+\t\t\treturn ret;\n+\t}\n+\n+\treturn 0;\n+}\n+\n static void*\n hns3_alloc_rxq_and_dma_zone(struct rte_eth_dev *dev,\n \t\t\t    struct hns3_queue_info *q_info)\n@@ -880,7 +929,13 @@ hns3_alloc_rxq_and_dma_zone(struct rte_eth_dev *dev,\n \t/* Allocate rx ring hardware descriptors. */\n \trxq->queue_id = q_info->idx;\n \trxq->nb_rx_desc = q_info->nb_desc;\n-\trx_desc = rxq->nb_rx_desc * sizeof(struct hns3_desc);\n+\n+\t/*\n+\t * Allocate a litter more memory because rx vector functions\n+\t * don't check boundaries each time.\n+\t */\n+\trx_desc = (rxq->nb_rx_desc + HNS3_DEFAULT_RX_BURST) *\n+\t\t\tsizeof(struct hns3_desc);\n \trx_mz = rte_eth_dma_zone_reserve(dev, q_info->ring_name, q_info->idx,\n \t\t\t\t\t rx_desc, HNS3_RING_BASE_ALIGN,\n \t\t\t\t\t q_info->socket_id);\n@@ -1329,7 +1384,8 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,\n \t\tconf->rx_free_thresh : HNS3_DEFAULT_RX_FREE_THRESH;\n \trxq->rx_deferred_start = conf->rx_deferred_start;\n \n-\trx_entry_len = sizeof(struct hns3_entry) * rxq->nb_rx_desc;\n+\trx_entry_len = (rxq->nb_rx_desc + HNS3_DEFAULT_RX_BURST) *\n+\t\t\tsizeof(struct hns3_entry);\n \trxq->sw_ring = rte_zmalloc_socket(\"hns3 RX sw ring\", rx_entry_len,\n \t\t\t\t\t  RTE_CACHE_LINE_SIZE, socket_id);\n \tif (rxq->sw_ring == NULL) {\n@@ -1340,6 +1396,8 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,\n \n \trxq->next_to_use = 0;\n \trxq->rx_free_hold = 0;\n+\trxq->rx_rearm_start = 0;\n+\trxq->rx_rearm_nb = 0;\n \trxq->pkt_first_seg = NULL;\n \trxq->pkt_last_seg = NULL;\n \trxq->port_id = dev->data->port_id;\n@@ -1431,7 +1489,8 @@ hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev)\n \t};\n \n \tif (dev->rx_pkt_burst == hns3_recv_pkts ||\n-\t    dev->rx_pkt_burst == hns3_recv_scattered_pkts)\n+\t    dev->rx_pkt_burst == hns3_recv_scattered_pkts ||\n+\t    dev->rx_pkt_burst == hns3_recv_pkts_vec)\n \t\treturn ptypes;\n \n \treturn NULL;\n@@ -1915,6 +1974,25 @@ hns3_recv_scattered_pkts(void *rx_queue,\n \treturn nb_rx;\n }\n \n+void __rte_weak\n+hns3_rxq_vec_setup(__rte_unused struct hns3_rx_queue *rxq)\n+{\n+}\n+\n+int __rte_weak\n+hns3_rx_check_vec_support(__rte_unused struct rte_eth_dev *dev)\n+{\n+\treturn -ENOTSUP;\n+}\n+\n+uint16_t __rte_weak\n+hns3_recv_pkts_vec(__rte_unused void *tx_queue,\n+\t\t   __rte_unused struct rte_mbuf **tx_pkts,\n+\t\t   __rte_unused uint16_t nb_pkts)\n+{\n+\treturn 0;\n+}\n+\n int\n hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,\n \t\t       struct rte_eth_burst_mode *mode)\n@@ -1925,6 +2003,7 @@ hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,\n \t} burst_infos[] = {\n \t\t{ hns3_recv_pkts,\t\t\"Scalar\" },\n \t\t{ hns3_recv_scattered_pkts,\t\"Scalar Scattered\" },\n+\t\t{ hns3_recv_pkts_vec,\t\t\"Vector Neon\" },\n \t};\n \n \teth_rx_burst_t pkt_burst = dev->rx_pkt_burst;\n@@ -1949,6 +2028,9 @@ hns3_get_rx_function(struct rte_eth_dev *dev)\n \tstruct hns3_adapter *hns = dev->data->dev_private;\n \tuint64_t offloads = dev->data->dev_conf.rxmode.offloads;\n \n+\tif (hns->rx_vec_allowed && hns3_rx_check_vec_support(dev) == 0)\n+\t\treturn hns3_recv_pkts_vec;\n+\n \tif (hns->rx_simple_allowed && !dev->data->scattered_rx &&\n \t    (offloads & DEV_RX_OFFLOAD_TCP_LRO) == 0)\n \t\treturn hns3_recv_pkts;\ndiff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h\nindex c5a510b..a629be9 100644\n--- a/drivers/net/hns3/hns3_rxtx.h\n+++ b/drivers/net/hns3/hns3_rxtx.h\n@@ -17,6 +17,18 @@\n #define HNS3_DEFAULT_TX_RS_THRESH\t32\n #define HNS3_TX_FAST_FREE_AHEAD\t\t64\n \n+#define HNS3_DEFAULT_RX_BURST\t\t32\n+#if (HNS3_DEFAULT_RX_BURST > 64)\n+#error \"PMD HNS3: HNS3_DEFAULT_RX_BURST must <= 64\\n\"\n+#endif\n+#define HNS3_DEFAULT_DESCS_PER_LOOP\t4\n+#define HNS3_SVE_DEFAULT_DESCS_PER_LOOP\t8\n+#if (HNS3_DEFAULT_DESCS_PER_LOOP > HNS3_SVE_DEFAULT_DESCS_PER_LOOP)\n+#define HNS3_VECTOR_RX_OFFSET_TABLE_LEN\tHNS3_DEFAULT_DESCS_PER_LOOP\n+#else\n+#define HNS3_VECTOR_RX_OFFSET_TABLE_LEN\tHNS3_SVE_DEFAULT_DESCS_PER_LOOP\n+#endif\n+#define HNS3_DEFAULT_RXQ_REARM_THRESH\t64\n #define HNS3_UINT8_BIT\t\t\t8\n #define HNS3_UINT16_BIT\t\t\t16\n #define HNS3_UINT32_BIT\t\t\t32\n@@ -236,7 +248,13 @@ struct hns3_desc {\n \t\t\t\t\tuint16_t ot_vlan_tag;\n \t\t\t\t};\n \t\t\t};\n-\t\t\tuint32_t bd_base_info;\n+\t\t\tunion {\n+\t\t\t\tuint32_t bd_base_info;\n+\t\t\t\tstruct {\n+\t\t\t\t\tuint16_t bdtype_vld_udp0;\n+\t\t\t\t\tuint16_t fe_lum_crcp_l3l4p;\n+\t\t\t\t};\n+\t\t\t};\n \t\t} rx;\n \t};\n } __rte_packed;\n@@ -270,7 +288,8 @@ struct hns3_rx_queue {\n \tuint16_t rx_free_thresh;\n \tuint16_t next_to_use;    /* index of next BD to be polled */\n \tuint16_t rx_free_hold;   /* num of BDs waited to passed to hardware */\n-\n+\tuint16_t rx_rearm_start; /* index of BD that driver re-arming from */\n+\tuint16_t rx_rearm_nb;    /* number of remaining BDs to be re-armed */\n \t/*\n \t * port based vlan configuration state.\n \t * value range: HNS3_PORT_BASE_VLAN_DISABLE / HNS3_PORT_BASE_VLAN_ENABLE\n@@ -292,6 +311,11 @@ struct hns3_rx_queue {\n \n \tstruct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM];\n \tuint16_t bulk_mbuf_num;\n+\n+\t/* offset_table: used for vector, to solve execute re-order problem */\n+\tuint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1];\n+\tuint64_t mbuf_initializer; /* value to init mbufs used with vector rx */\n+\tstruct rte_mbuf fake_mbuf; /* fake mbuf used with vector rx */\n };\n \n struct hns3_tx_queue {\n@@ -554,6 +578,8 @@ int hns3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);\n void hns3_enable_all_queues(struct hns3_hw *hw, bool en);\n int hns3_start_queues(struct hns3_adapter *hns, bool reset_queue);\n int hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue);\n+int hns3_rxq_iterate(struct rte_eth_dev *dev,\n+\t\t int (*callback)(struct hns3_rx_queue *, void *), void *arg);\n void hns3_dev_release_mbufs(struct hns3_adapter *hns);\n int hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,\n \t\t\tunsigned int socket, const struct rte_eth_rxconf *conf,\n@@ -564,9 +590,12 @@ uint16_t hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\t\tuint16_t nb_pkts);\n uint16_t hns3_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\t\t\t  uint16_t nb_pkts);\n+uint16_t hns3_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\t\t    uint16_t nb_pkts);\n int hns3_rx_burst_mode_get(struct rte_eth_dev *dev,\n \t\t\t   __rte_unused uint16_t queue_id,\n \t\t\t   struct rte_eth_burst_mode *mode);\n+int hns3_rx_check_vec_support(struct rte_eth_dev *dev);\n uint16_t hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tuint16_t nb_pkts);\n uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,\n@@ -594,7 +623,9 @@ int hns3_restore_gro_conf(struct hns3_hw *hw);\n void hns3_update_all_queues_pvid_state(struct hns3_hw *hw);\n void hns3_rx_scattered_reset(struct rte_eth_dev *dev);\n void hns3_rx_scattered_calc(struct rte_eth_dev *dev);\n+int hns3_rx_check_vec_support(struct rte_eth_dev *dev);\n int hns3_tx_check_vec_support(struct rte_eth_dev *dev);\n+void hns3_rxq_vec_setup(struct hns3_rx_queue *rxq);\n void hns3_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,\n \t\t       struct rte_eth_rxq_info *qinfo);\n void hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,\ndiff --git a/drivers/net/hns3/hns3_rxtx_vec.c b/drivers/net/hns3/hns3_rxtx_vec.c\nindex 1154b6f..a26c83d 100644\n--- a/drivers/net/hns3/hns3_rxtx_vec.c\n+++ b/drivers/net/hns3/hns3_rxtx_vec.c\n@@ -45,3 +45,170 @@ hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)\n \n \treturn nb_tx;\n }\n+\n+static inline void\n+hns3_rxq_rearm_mbuf(struct hns3_rx_queue *rxq)\n+{\n+#define REARM_LOOP_STEP_NUM\t4\n+\tstruct hns3_entry *rxep = &rxq->sw_ring[rxq->rx_rearm_start];\n+\tstruct hns3_desc *rxdp = rxq->rx_ring + rxq->rx_rearm_start;\n+\tuint64_t dma_addr;\n+\tint i;\n+\n+\tif (unlikely(rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,\n+\t\t\t\t\t  HNS3_DEFAULT_RXQ_REARM_THRESH) < 0)) {\n+\t\trte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;\n+\t\treturn;\n+\t}\n+\n+\tfor (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM,\n+\t\trxep += REARM_LOOP_STEP_NUM, rxdp += REARM_LOOP_STEP_NUM) {\n+\t\tif (likely(i <\n+\t\t\tHNS3_DEFAULT_RXQ_REARM_THRESH - REARM_LOOP_STEP_NUM)) {\n+\t\t\trte_prefetch_non_temporal(rxep[4].mbuf);\n+\t\t\trte_prefetch_non_temporal(rxep[5].mbuf);\n+\t\t\trte_prefetch_non_temporal(rxep[6].mbuf);\n+\t\t\trte_prefetch_non_temporal(rxep[7].mbuf);\n+\t\t}\n+\n+\t\tdma_addr = rte_mbuf_data_iova_default(rxep[0].mbuf);\n+\t\trxdp[0].addr = rte_cpu_to_le_64(dma_addr);\n+\t\trxdp[0].rx.bd_base_info = 0;\n+\n+\t\tdma_addr = rte_mbuf_data_iova_default(rxep[1].mbuf);\n+\t\trxdp[1].addr = rte_cpu_to_le_64(dma_addr);\n+\t\trxdp[1].rx.bd_base_info = 0;\n+\n+\t\tdma_addr = rte_mbuf_data_iova_default(rxep[2].mbuf);\n+\t\trxdp[2].addr = rte_cpu_to_le_64(dma_addr);\n+\t\trxdp[2].rx.bd_base_info = 0;\n+\n+\t\tdma_addr = rte_mbuf_data_iova_default(rxep[3].mbuf);\n+\t\trxdp[3].addr = rte_cpu_to_le_64(dma_addr);\n+\t\trxdp[3].rx.bd_base_info = 0;\n+\t}\n+\n+\trxq->rx_rearm_start += HNS3_DEFAULT_RXQ_REARM_THRESH;\n+\tif (rxq->rx_rearm_start >= rxq->nb_rx_desc)\n+\t\trxq->rx_rearm_start = 0;\n+\n+\trxq->rx_rearm_nb -= HNS3_DEFAULT_RXQ_REARM_THRESH;\n+\n+\thns3_write_reg_opt(rxq->io_head_reg, HNS3_DEFAULT_RXQ_REARM_THRESH);\n+}\n+\n+uint16_t\n+hns3_recv_pkts_vec(void *__restrict rx_queue,\n+\t\t   struct rte_mbuf **__restrict rx_pkts,\n+\t\t   uint16_t nb_pkts)\n+{\n+\tstruct hns3_rx_queue *rxq = rx_queue;\n+\tstruct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];\n+\tuint64_t bd_err_mask;  /* bit mask indicate whick pkts is error */\n+\tuint16_t nb_rx;\n+\n+\tnb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);\n+\tnb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);\n+\n+\trte_prefetch_non_temporal(rxdp);\n+\n+\tif (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)\n+\t\thns3_rxq_rearm_mbuf(rxq);\n+\n+\tif (unlikely(!(rxdp->rx.bd_base_info &\n+\t\t\trte_cpu_to_le_32(1u << HNS3_RXD_VLD_B))))\n+\t\treturn 0;\n+\n+\trte_prefetch0(rxq->sw_ring[rxq->next_to_use + 0].mbuf);\n+\trte_prefetch0(rxq->sw_ring[rxq->next_to_use + 1].mbuf);\n+\trte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf);\n+\trte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf);\n+\n+\tbd_err_mask = 0;\n+\tnb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, &bd_err_mask);\n+\tif (unlikely(bd_err_mask))\n+\t\tnb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask);\n+\n+\treturn nb_rx;\n+}\n+\n+static void\n+hns3_rxq_vec_setup_rearm_data(struct hns3_rx_queue *rxq)\n+{\n+\tuintptr_t p;\n+\tstruct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */\n+\n+\tmb_def.nb_segs = 1;\n+\tmb_def.data_off = RTE_PKTMBUF_HEADROOM;\n+\tmb_def.port = rxq->port_id;\n+\trte_mbuf_refcnt_set(&mb_def, 1);\n+\n+\t/* prevent compiler reordering: rearm_data covers previous fields */\n+\trte_compiler_barrier();\n+\tp = (uintptr_t)&mb_def.rearm_data;\n+\trxq->mbuf_initializer = *(uint64_t *)p;\n+}\n+\n+void\n+hns3_rxq_vec_setup(struct hns3_rx_queue *rxq)\n+{\n+\tstruct hns3_entry *sw_ring = &rxq->sw_ring[rxq->nb_rx_desc];\n+\tunsigned int i;\n+\n+\tmemset(&rxq->rx_ring[rxq->nb_rx_desc], 0,\n+\t\tsizeof(struct hns3_desc) * HNS3_DEFAULT_RX_BURST);\n+\n+\tmemset(&rxq->fake_mbuf, 0, sizeof(rxq->fake_mbuf));\n+\tfor (i = 0; i < HNS3_DEFAULT_RX_BURST; i++)\n+\t\tsw_ring[i].mbuf = &rxq->fake_mbuf;\n+\n+\thns3_rxq_vec_setup_rearm_data(rxq);\n+\n+\tmemset(rxq->offset_table, 0, sizeof(rxq->offset_table));\n+}\n+\n+#ifndef RTE_LIBRTE_IEEE1588\n+static int\n+hns3_rxq_vec_check(struct hns3_rx_queue *rxq, void *arg)\n+{\n+\tuint32_t min_vec_bds = HNS3_DEFAULT_RXQ_REARM_THRESH +\n+\t\t\t\tHNS3_DEFAULT_RX_BURST;\n+\n+\tif (rxq->nb_rx_desc < min_vec_bds)\n+\t\treturn -ENOTSUP;\n+\n+\tif (rxq->nb_rx_desc % HNS3_DEFAULT_RXQ_REARM_THRESH)\n+\t\treturn -ENOTSUP;\n+\n+\tRTE_SET_USED(arg);\n+\treturn 0;\n+}\n+#endif\n+\n+int\n+hns3_rx_check_vec_support(struct rte_eth_dev *dev)\n+{\n+#ifndef RTE_LIBRTE_IEEE1588\n+\tstruct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf;\n+\tstruct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;\n+\tuint64_t offloads_mask = DEV_RX_OFFLOAD_TCP_LRO |\n+\t\t\t\t DEV_RX_OFFLOAD_VLAN;\n+\n+\tif (dev->data->scattered_rx)\n+\t\treturn -ENOTSUP;\n+\n+\tif (fconf->mode != RTE_FDIR_MODE_NONE)\n+\t\treturn -ENOTSUP;\n+\n+\tif (rxmode->offloads & offloads_mask)\n+\t\treturn -ENOTSUP;\n+\n+\tif (hns3_rxq_iterate(dev, hns3_rxq_vec_check, NULL) != 0)\n+\t\treturn -ENOTSUP;\n+\n+\treturn 0;\n+#else\n+\tRTE_SET_USED(dev);\n+\treturn -ENOTSUP;\n+#endif\n+}\ndiff --git a/drivers/net/hns3/hns3_rxtx_vec.h b/drivers/net/hns3/hns3_rxtx_vec.h\nindex 90679bf..c6df36d 100644\n--- a/drivers/net/hns3/hns3_rxtx_vec.h\n+++ b/drivers/net/hns3/hns3_rxtx_vec.h\n@@ -54,4 +54,24 @@ hns3_tx_free_buffers(struct hns3_tx_queue *txq)\n \tif (txq->next_to_clean >= txq->nb_tx_desc)\n \t\ttxq->next_to_clean = 0;\n }\n+\n+static inline uint16_t\n+hns3_rx_reassemble_pkts(struct rte_mbuf **rx_pkts,\n+\t\t\tuint16_t nb_pkts,\n+\t\t\tuint64_t pkt_err_mask)\n+{\n+\tuint16_t count, i;\n+\tuint64_t mask;\n+\n+\tcount = 0;\n+\tfor (i = 0; i < nb_pkts; i++) {\n+\t\tmask = ((uint64_t)1u) << i;\n+\t\tif (pkt_err_mask & mask)\n+\t\t\trte_pktmbuf_free_seg(rx_pkts[i]);\n+\t\telse\n+\t\t\trx_pkts[count++] = rx_pkts[i];\n+\t}\n+\n+\treturn count;\n+}\n #endif /* _HNS3_RXTX_VEC_H_ */\ndiff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h\nindex 2bd2b35..700ee8d 100644\n--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h\n+++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h\n@@ -78,4 +78,207 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue,\n \n \treturn nb_tx;\n }\n+\n+static inline uint32_t\n+hns3_desc_parse_field(struct hns3_rx_queue *rxq,\n+\t\t      struct hns3_entry *sw_ring,\n+\t\t      struct hns3_desc *rxdp,\n+\t\t      uint32_t   bd_vld_num)\n+{\n+\tuint32_t l234_info, ol_info, bd_base_info;\n+\tstruct rte_mbuf *pkt;\n+\tuint32_t retcode = 0;\n+\tuint32_t cksum_err;\n+\tint ret, i;\n+\n+\tfor (i = 0; i < (int)bd_vld_num; i++) {\n+\t\tpkt = sw_ring[i].mbuf;\n+\n+\t\t/* init rte_mbuf.rearm_data last 64-bit */\n+\t\tpkt->ol_flags = PKT_RX_RSS_HASH;\n+\n+\t\tl234_info = rxdp[i].rx.l234_info;\n+\t\tol_info = rxdp[i].rx.ol_info;\n+\t\tbd_base_info = rxdp[i].rx.bd_base_info;\n+\t\tret = hns3_handle_bdinfo(rxq, pkt, bd_base_info,\n+\t\t\t\t\t l234_info, &cksum_err);\n+\t\tif (unlikely(ret)) {\n+\t\t\tretcode |= 1u << i;\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\tpkt->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info);\n+\t\tif (likely(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))\n+\t\t\thns3_rx_set_cksum_flag(pkt, pkt->packet_type,\n+\t\t\t\t\t       cksum_err);\n+\t}\n+\n+\treturn retcode;\n+}\n+\n+static inline uint16_t\n+hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,\n+\t\t    struct rte_mbuf **__restrict rx_pkts,\n+\t\t    uint16_t nb_pkts,\n+\t\t    uint64_t *bd_err_mask)\n+{\n+\tuint16_t rx_id = rxq->next_to_use;\n+\tstruct hns3_entry *sw_ring = &rxq->sw_ring[rx_id];\n+\tstruct hns3_desc *rxdp = &rxq->rx_ring[rx_id];\n+\tuint32_t bd_valid_num, parse_retcode;\n+\tuint16_t nb_rx = 0;\n+\tint pos, offset;\n+\n+\t/* mask to shuffle from desc to mbuf's rx_descriptor_fields1 */\n+\tuint8x16_t shuf_rx_desc_fields_msk = {\n+\t\t0xff, 0xff, 0xff, 0xff,  /* packet type init zero */\n+\t\t22, 23, 0xff, 0xff,      /* rx.pkt_len to rte_mbuf.pkt_len */\n+\t\t20, 21,\t                 /* size to rte_mbuf.data_len */\n+\t\t0xff, 0xff,\t         /* rte_mbuf.vlan_tci init zero */\n+\t\t8, 9, 10, 11,\t         /* rx.rss_hash to rte_mbuf.hash.rss */\n+\t};\n+\n+\tuint16x8_t crc_adjust = {\n+\t\t0, 0,         /* ignore pkt_type field */\n+\t\trxq->crc_len, /* sub crc on pkt_len */\n+\t\t0,            /* ignore high-16bits of pkt_len */\n+\t\trxq->crc_len, /* sub crc on data_len */\n+\t\t0, 0, 0,      /* ignore non-length fields */\n+\t};\n+\n+\tfor (pos = 0; pos < nb_pkts; pos += HNS3_DEFAULT_DESCS_PER_LOOP,\n+\t\t\t\t     rxdp += HNS3_DEFAULT_DESCS_PER_LOOP) {\n+\t\tuint64x2x2_t descs[HNS3_DEFAULT_DESCS_PER_LOOP];\n+\t\tuint8x16x2_t pkt_mbuf1, pkt_mbuf2, pkt_mbuf3, pkt_mbuf4;\n+\t\tuint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;\n+\t\tuint64x2_t mbp1, mbp2;\n+\t\tuint16x4_t bd_vld = {0};\n+\t\tuint16x8_t tmp;\n+\t\tuint64_t stat;\n+\n+\t\t/* calc how many bd valid */\n+\t\tbd_vld = vset_lane_u16(rxdp[0].rx.bdtype_vld_udp0, bd_vld, 0);\n+\t\tbd_vld = vset_lane_u16(rxdp[1].rx.bdtype_vld_udp0, bd_vld, 1);\n+\t\tbd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2);\n+\t\tbd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3);\n+\n+\t\t/* load 2 mbuf pointer */\n+\t\tmbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);\n+\n+\t\tbd_vld = vshl_n_u16(bd_vld,\n+\t\t\t\t    HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B);\n+\t\tbd_vld = vreinterpret_u16_s16(\n+\t\t\t\tvshr_n_s16(vreinterpret_s16_u16(bd_vld),\n+\t\t\t\t\t   HNS3_UINT16_BIT - 1));\n+\t\tstat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0);\n+\n+\t\t/* load 2 mbuf pointer again */\n+\t\tmbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);\n+\n+\t\tif (likely(stat == 0))\n+\t\t\tbd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;\n+\t\telse\n+\t\t\tbd_valid_num = __builtin_ctzl(stat) / HNS3_UINT16_BIT;\n+\t\tif (bd_valid_num == 0)\n+\t\t\tbreak;\n+\n+\t\t/* use offset to control below data load oper ordering */\n+\t\toffset = rxq->offset_table[bd_valid_num];\n+\n+\t\t/* store 2 mbuf pointer into rx_pkts */\n+\t\tvst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);\n+\n+\t\t/* read first two descs */\n+\t\tdescs[0] = vld2q_u64((uint64_t *)(rxdp + offset));\n+\t\tdescs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1));\n+\n+\t\t/* store 2 mbuf pointer into rx_pkts again */\n+\t\tvst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);\n+\n+\t\t/* read remains two descs */\n+\t\tdescs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2));\n+\t\tdescs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3));\n+\n+\t\tpkt_mbuf1.val[0] = vreinterpretq_u8_u64(descs[0].val[0]);\n+\t\tpkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]);\n+\t\tpkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]);\n+\t\tpkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]);\n+\n+\t\t/* pkt 1,2 convert format from desc to pktmbuf */\n+\t\tpkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_rx_desc_fields_msk);\n+\t\tpkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_rx_desc_fields_msk);\n+\n+\t\t/* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */\n+\t\t*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =\n+\t\t\trxq->mbuf_initializer;\n+\t\t*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =\n+\t\t\trxq->mbuf_initializer;\n+\n+\t\t/* pkt 1,2 remove crc */\n+\t\ttmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);\n+\t\tpkt_mb1 = vreinterpretq_u8_u16(tmp);\n+\t\ttmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);\n+\t\tpkt_mb2 = vreinterpretq_u8_u16(tmp);\n+\n+\t\tpkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);\n+\t\tpkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);\n+\t\tpkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);\n+\t\tpkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);\n+\n+\t\t/* pkt 3,4 convert format from desc to pktmbuf */\n+\t\tpkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_rx_desc_fields_msk);\n+\t\tpkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_rx_desc_fields_msk);\n+\n+\t\t/* pkt 1,2 save to rx_pkts mbuf */\n+\t\tvst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1,\n+\t\t\t pkt_mb1);\n+\t\tvst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1,\n+\t\t\t pkt_mb2);\n+\n+\t\t/* pkt 3,4 remove crc */\n+\t\ttmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);\n+\t\tpkt_mb3 = vreinterpretq_u8_u16(tmp);\n+\t\ttmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);\n+\t\tpkt_mb4 = vreinterpretq_u8_u16(tmp);\n+\n+\t\t/* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */\n+\t\t*(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data =\n+\t\t\trxq->mbuf_initializer;\n+\t\t*(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data =\n+\t\t\trxq->mbuf_initializer;\n+\n+\t\t/* pkt 3,4 save to rx_pkts mbuf */\n+\t\tvst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,\n+\t\t\t pkt_mb3);\n+\t\tvst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,\n+\t\t\t pkt_mb4);\n+\n+\t\trte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP);\n+\n+\t\tparse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos],\n+\t\t\t&rxdp[offset], bd_valid_num);\n+\t\tif (unlikely(parse_retcode))\n+\t\t\t(*bd_err_mask) |= ((uint64_t)parse_retcode) << pos;\n+\n+\t\trte_prefetch0(sw_ring[pos +\n+\t\t\t\t      HNS3_DEFAULT_DESCS_PER_LOOP + 0].mbuf);\n+\t\trte_prefetch0(sw_ring[pos +\n+\t\t\t\t      HNS3_DEFAULT_DESCS_PER_LOOP + 1].mbuf);\n+\t\trte_prefetch0(sw_ring[pos +\n+\t\t\t\t      HNS3_DEFAULT_DESCS_PER_LOOP + 2].mbuf);\n+\t\trte_prefetch0(sw_ring[pos +\n+\t\t\t\t      HNS3_DEFAULT_DESCS_PER_LOOP + 3].mbuf);\n+\n+\t\tnb_rx += bd_valid_num;\n+\t\tif (bd_valid_num < HNS3_DEFAULT_DESCS_PER_LOOP)\n+\t\t\tbreak;\n+\t}\n+\n+\trxq->rx_rearm_nb += nb_rx;\n+\trxq->next_to_use += nb_rx;\n+\tif (rxq->next_to_use >= rxq->nb_rx_desc)\n+\t\trxq->next_to_use = 0;\n+\n+\treturn nb_rx;\n+}\n #endif /* _HNS3_RXTX_VEC_NEON_H_ */\n",
    "prefixes": [
        "6/8"
    ]
}