get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/55069/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 55069,
    "url": "http://patches.dpdk.org/api/patches/55069/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/691dcc833654d7e8a666e88ea07dae95a4951230.1560958308.git.xuanziyang2@huawei.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<691dcc833654d7e8a666e88ea07dae95a4951230.1560958308.git.xuanziyang2@huawei.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/691dcc833654d7e8a666e88ea07dae95a4951230.1560958308.git.xuanziyang2@huawei.com",
    "date": "2019-06-19T16:20:27",
    "name": "[v5,14/15] net/hinic: add tx/rx package burst",
    "commit_ref": null,
    "pull_url": null,
    "state": "changes-requested",
    "archived": true,
    "hash": "827c662ad457fcfd0ec8f1b116f52e2c0b54a0ad",
    "submitter": {
        "id": 1321,
        "url": "http://patches.dpdk.org/api/people/1321/?format=api",
        "name": "Ziyang Xuan",
        "email": "xuanziyang2@huawei.com"
    },
    "delegate": {
        "id": 319,
        "url": "http://patches.dpdk.org/api/users/319/?format=api",
        "username": "fyigit",
        "first_name": "Ferruh",
        "last_name": "Yigit",
        "email": "ferruh.yigit@amd.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/691dcc833654d7e8a666e88ea07dae95a4951230.1560958308.git.xuanziyang2@huawei.com/mbox/",
    "series": [
        {
            "id": 5084,
            "url": "http://patches.dpdk.org/api/series/5084/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=5084",
            "date": "2019-06-19T15:45:20",
            "name": "A new net PMD - hinic",
            "version": 5,
            "mbox": "http://patches.dpdk.org/series/5084/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/55069/comments/",
    "check": "warning",
    "checks": "http://patches.dpdk.org/api/patches/55069/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 973251D006;\n\tWed, 19 Jun 2019 18:13:03 +0200 (CEST)",
            "from huawei.com (szxga07-in.huawei.com [45.249.212.35])\n\tby dpdk.org (Postfix) with ESMTP id A3BAC1D15A\n\tfor <dev@dpdk.org>; Wed, 19 Jun 2019 18:09:10 +0200 (CEST)",
            "from DGGEMS410-HUB.china.huawei.com (unknown [172.30.72.58])\n\tby Forcepoint Email with ESMTP id 1EB84496572DFC358B6B\n\tfor <dev@dpdk.org>; Thu, 20 Jun 2019 00:08:43 +0800 (CST)",
            "from tester_149.localdomain (10.175.119.39) by\n\tDGGEMS410-HUB.china.huawei.com (10.3.19.210) with Microsoft SMTP\n\tServer id 14.3.439.0; Thu, 20 Jun 2019 00:08:34 +0800"
        ],
        "From": "Ziyang Xuan <xuanziyang2@huawei.com>",
        "To": "<dev@dpdk.org>",
        "CC": "<ferruh.yigit@intel.com>, <cloud.wangxiaoyun@huawei.com>,\n\t<shahar.belkar@huawei.com>, <tanya.brokhman@huawei.com>,\n\t<luoxianjun@huawei.com>, Ziyang Xuan <xuanziyang2@huawei.com>",
        "Date": "Thu, 20 Jun 2019 00:20:27 +0800",
        "Message-ID": "<691dcc833654d7e8a666e88ea07dae95a4951230.1560958308.git.xuanziyang2@huawei.com>",
        "X-Mailer": "git-send-email 2.18.0",
        "In-Reply-To": "<cover.1560958308.git.xuanziyang2@huawei.com>",
        "References": "<cover.1560958308.git.xuanziyang2@huawei.com>",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain",
        "X-Originating-IP": "[10.175.119.39]",
        "X-CFilter-Loop": "Reflected",
        "Subject": "[dpdk-dev] [PATCH v5 14/15] net/hinic: add tx/rx package burst",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "This patch add package sending and receiving function codes.\n\nSigned-off-by: Ziyang Xuan <xuanziyang2@huawei.com>\n---\n drivers/net/hinic/hinic_pmd_ethdev.c |    4 +\n drivers/net/hinic/hinic_pmd_rx.c     |  439 +++++++++++\n drivers/net/hinic/hinic_pmd_tx.c     | 1082 ++++++++++++++++++++++++++\n 3 files changed, 1525 insertions(+)",
    "diff": "diff --git a/drivers/net/hinic/hinic_pmd_ethdev.c b/drivers/net/hinic/hinic_pmd_ethdev.c\nindex eeb4227fa..6bd35a227 100644\n--- a/drivers/net/hinic/hinic_pmd_ethdev.c\n+++ b/drivers/net/hinic/hinic_pmd_ethdev.c\n@@ -1543,6 +1543,8 @@ static int hinic_dev_init(struct rte_eth_dev *eth_dev)\n \n \t/* rte_eth_dev ops, rx_burst and tx_burst */\n \teth_dev->dev_ops = &hinic_pmd_ops;\n+\teth_dev->rx_pkt_burst = hinic_recv_pkts;\n+\teth_dev->tx_pkt_burst = hinic_xmit_pkts;\n \n \treturn hinic_func_init(eth_dev);\n }\n@@ -1560,6 +1562,8 @@ static int hinic_dev_uninit(struct rte_eth_dev *dev)\n \thinic_dev_close(dev);\n \n \tdev->dev_ops = NULL;\n+\tdev->rx_pkt_burst = NULL;\n+\tdev->tx_pkt_burst = NULL;\n \n \trte_free(dev->data->mac_addrs);\n \tdev->data->mac_addrs = NULL;\ndiff --git a/drivers/net/hinic/hinic_pmd_rx.c b/drivers/net/hinic/hinic_pmd_rx.c\nindex 592a889f0..e474deefe 100644\n--- a/drivers/net/hinic/hinic_pmd_rx.c\n+++ b/drivers/net/hinic/hinic_pmd_rx.c\n@@ -4,6 +4,9 @@\n \n #include <rte_ether.h>\n #include <rte_mbuf.h>\n+#ifdef __ARM64_NEON__\n+#include <arm_neon.h>\n+#endif\n \n #include \"base/hinic_compat.h\"\n #include \"base/hinic_pmd_hwdev.h\"\n@@ -35,8 +38,69 @@\n \n #define HINIC_GET_RQ_FREE_WQEBBS(rxq)\t((rxq)->wq->delta - 1)\n \n+/* rxq cqe done and status bit */\n+#define HINIC_GET_RX_DONE_BE(status)\t\\\n+\t((status) & 0x80U)\n+\n #define HINIC_RX_CSUM_OFFLOAD_EN\t0xFFF\n \n+#define RQ_CQE_SGE_VLAN_SHIFT\t\t\t0\n+#define RQ_CQE_SGE_LEN_SHIFT\t\t\t16\n+\n+#define RQ_CQE_SGE_VLAN_MASK\t\t\t0xFFFFU\n+#define RQ_CQE_SGE_LEN_MASK\t\t\t0xFFFFU\n+\n+#define RQ_CQE_SGE_GET(val, member)\t\t\\\n+\t(((val) >> RQ_CQE_SGE_##member##_SHIFT) & RQ_CQE_SGE_##member##_MASK)\n+\n+#define HINIC_GET_RX_VLAN_TAG(vlan_len)\t\\\n+\t\tRQ_CQE_SGE_GET(vlan_len, VLAN)\n+\n+#define HINIC_GET_RX_PKT_LEN(vlan_len)\t\\\n+\t\tRQ_CQE_SGE_GET(vlan_len, LEN)\n+\n+#define RQ_CQE_STATUS_CSUM_ERR_SHIFT\t\t0\n+#define RQ_CQE_STATUS_NUM_LRO_SHIFT\t\t16\n+#define RQ_CQE_STATUS_LRO_PUSH_SHIFT\t\t25\n+#define RQ_CQE_STATUS_LRO_ENTER_SHIFT\t\t26\n+#define RQ_CQE_STATUS_LRO_INTR_SHIFT\t\t27\n+\n+#define RQ_CQE_STATUS_BP_EN_SHIFT\t\t30\n+#define RQ_CQE_STATUS_RXDONE_SHIFT\t\t31\n+#define RQ_CQE_STATUS_FLUSH_SHIFT\t\t28\n+\n+#define RQ_CQE_STATUS_CSUM_ERR_MASK\t\t0xFFFFU\n+#define RQ_CQE_STATUS_NUM_LRO_MASK\t\t0xFFU\n+#define RQ_CQE_STATUS_LRO_PUSH_MASK\t\t0X1U\n+#define RQ_CQE_STATUS_LRO_ENTER_MASK\t\t0X1U\n+#define RQ_CQE_STATUS_LRO_INTR_MASK\t\t0X1U\n+#define RQ_CQE_STATUS_BP_EN_MASK\t\t0X1U\n+#define RQ_CQE_STATUS_RXDONE_MASK\t\t0x1U\n+#define RQ_CQE_STATUS_FLUSH_MASK\t\t0x1U\n+\n+#define RQ_CQE_STATUS_GET(val, member)\t\t\\\n+\t\t(((val) >> RQ_CQE_STATUS_##member##_SHIFT) & \\\n+\t\t\t\tRQ_CQE_STATUS_##member##_MASK)\n+\n+#define RQ_CQE_STATUS_CLEAR(val, member)\t\\\n+\t\t((val) & (~(RQ_CQE_STATUS_##member##_MASK << \\\n+\t\t\t\tRQ_CQE_STATUS_##member##_SHIFT)))\n+\n+#define HINIC_GET_RX_CSUM_ERR(status)\t\\\n+\t\tRQ_CQE_STATUS_GET(status, CSUM_ERR)\n+\n+#define HINIC_GET_RX_DONE(status)\t\\\n+\t\tRQ_CQE_STATUS_GET(status, RXDONE)\n+\n+#define HINIC_GET_RX_FLUSH(status)\t\\\n+\t\tRQ_CQE_STATUS_GET(status, FLUSH)\n+\n+#define HINIC_GET_RX_BP_EN(status)\t\\\n+\t\tRQ_CQE_STATUS_GET(status, BP_EN)\n+\n+#define HINIC_GET_RX_NUM_LRO(status)\t\\\n+\t\tRQ_CQE_STATUS_GET(status, NUM_LRO)\n+\n /* RQ_CTRL */\n #define\tRQ_CTRL_BUFDESC_SECT_LEN_SHIFT\t\t0\n #define\tRQ_CTRL_COMPLETE_FORMAT_SHIFT\t\t15\n@@ -57,6 +121,72 @@\n #define RQ_CTRL_CLEAR(val, member)\t\t\\\n \t((val) & (~(RQ_CTRL_##member##_MASK << RQ_CTRL_##member##_SHIFT)))\n \n+#define RQ_CQE_PKT_NUM_SHIFT\t\t\t1\n+#define RQ_CQE_PKT_FIRST_LEN_SHIFT\t\t19\n+#define RQ_CQE_PKT_LAST_LEN_SHIFT\t\t6\n+#define RQ_CQE_SUPER_CQE_EN_SHIFT\t\t0\n+\n+#define RQ_CQE_PKT_FIRST_LEN_MASK\t\t0x1FFFU\n+#define RQ_CQE_PKT_LAST_LEN_MASK\t\t0x1FFFU\n+#define RQ_CQE_PKT_NUM_MASK\t\t\t0x1FU\n+#define RQ_CQE_SUPER_CQE_EN_MASK\t\t0x1\n+\n+#define RQ_CQE_PKT_NUM_GET(val, member)\t\t\\\n+\t(((val) >> RQ_CQE_PKT_##member##_SHIFT) & RQ_CQE_PKT_##member##_MASK)\n+\n+#define HINIC_GET_RQ_CQE_PKT_NUM(pkt_info) RQ_CQE_PKT_NUM_GET(pkt_info, NUM)\n+\n+#define RQ_CQE_SUPER_CQE_EN_GET(val, member)\t\\\n+\t(((val) >> RQ_CQE_##member##_SHIFT) & RQ_CQE_##member##_MASK)\n+\n+#define HINIC_GET_SUPER_CQE_EN(pkt_info)\t\\\n+\tRQ_CQE_SUPER_CQE_EN_GET(pkt_info, SUPER_CQE_EN)\n+\n+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT\t\t21\n+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK\t\t0x1U\n+\n+#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT\t\t0\n+#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK\t\t0xFFFU\n+\n+#define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_SHIFT\t\t19\n+#define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_MASK\t\t0x3U\n+\n+#define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_SHIFT\t\t24\n+#define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_MASK\t\t0xFFU\n+\n+#define RQ_CQE_OFFOLAD_TYPE_GET(val, member)\t\t(((val) >> \\\n+\t\t\t\tRQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \\\n+\t\t\t\tRQ_CQE_OFFOLAD_TYPE_##member##_MASK)\n+\n+#define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)\t\\\n+\t\tRQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN)\n+\n+#define HINIC_GET_RSS_TYPES(offload_type)\t\\\n+\t\tRQ_CQE_OFFOLAD_TYPE_GET(offload_type, RSS_TYPE)\n+\n+#define HINIC_GET_RX_PKT_TYPE(offload_type)\t\\\n+\t\tRQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE)\n+\n+#define HINIC_GET_RX_PKT_UMBCAST(offload_type)\t\\\n+\t\tRQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_UMBCAST)\n+\n+#define RQ_CQE_STATUS_CSUM_BYPASS_VAL\t\t\t0x80U\n+#define RQ_CQE_STATUS_CSUM_ERR_IP_MASK\t\t\t0x39U\n+#define RQ_CQE_STATUS_CSUM_ERR_L4_MASK\t\t\t0x46U\n+#define RQ_CQE_STATUS_CSUM_ERR_OTHER\t\t\t0x100U\n+\n+#define HINIC_CSUM_ERR_BYPASSED(csum_err)\t \\\n+\t((csum_err) == RQ_CQE_STATUS_CSUM_BYPASS_VAL)\n+\n+#define HINIC_CSUM_ERR_IP(csum_err)\t \\\n+\t((csum_err) & RQ_CQE_STATUS_CSUM_ERR_IP_MASK)\n+\n+#define HINIC_CSUM_ERR_L4(csum_err)\t \\\n+\t((csum_err) & RQ_CQE_STATUS_CSUM_ERR_L4_MASK)\n+\n+#define HINIC_CSUM_ERR_OTHER(csum_err)\t \\\n+\t((csum_err) == RQ_CQE_STATUS_CSUM_ERR_OTHER)\n+\n \n void hinic_get_func_rx_buf_size(struct hinic_nic_dev *nic_dev)\n {\n@@ -156,6 +286,25 @@ hinic_prepare_rq_wqe(void *wqe, __rte_unused u16 pi, dma_addr_t buf_addr,\n \tbuf_desc->addr_low = lower_32_bits(buf_addr);\n }\n \n+void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats)\n+{\n+\tif (!rxq || !stats)\n+\t\treturn;\n+\n+\tmemcpy(stats, &rxq->rxq_stats, sizeof(rxq->rxq_stats));\n+}\n+\n+void hinic_rxq_stats_reset(struct hinic_rxq *rxq)\n+{\n+\tstruct hinic_rxq_stats *rxq_stats;\n+\n+\tif (rxq == NULL)\n+\t\treturn;\n+\n+\trxq_stats = &rxq->rxq_stats;\n+\tmemset(rxq_stats, 0, sizeof(*rxq_stats));\n+}\n+\n static int hinic_rx_alloc_cqe(struct hinic_rxq *rxq)\n {\n \tsize_t cqe_mem_size;\n@@ -289,6 +438,42 @@ void hinic_free_all_rx_mbuf(struct rte_eth_dev *eth_dev)\n \t\thinic_free_all_rx_skbs(nic_dev->rxqs[q_id]);\n }\n \n+static void hinic_recv_jumbo_pkt(struct hinic_rxq *rxq,\n+\t\t\t\t struct rte_mbuf *head_skb,\n+\t\t\t\t u32 remain_pkt_len)\n+{\n+\tstruct hinic_nic_dev *nic_dev = rxq->nic_dev;\n+\tstruct rte_mbuf *cur_mbuf, *rxm = NULL;\n+\tstruct hinic_rx_info *rx_info;\n+\tu16 sw_ci, rx_buf_len = rxq->buf_len;\n+\tu32 pkt_len;\n+\n+\twhile (remain_pkt_len > 0) {\n+\t\tsw_ci = hinic_get_rq_local_ci(nic_dev->hwdev, rxq->q_id);\n+\t\trx_info = &rxq->rx_info[sw_ci];\n+\n+\t\thinic_update_rq_local_ci(nic_dev->hwdev, rxq->q_id, 1);\n+\n+\t\tpkt_len = remain_pkt_len > rx_buf_len ?\n+\t\t\trx_buf_len : remain_pkt_len;\n+\t\tremain_pkt_len -= pkt_len;\n+\n+\t\tcur_mbuf = rx_info->mbuf;\n+\t\tcur_mbuf->data_len = (u16)pkt_len;\n+\t\tcur_mbuf->next = NULL;\n+\n+\t\thead_skb->pkt_len += cur_mbuf->data_len;\n+\t\thead_skb->nb_segs++;\n+\n+\t\tif (!rxm)\n+\t\t\thead_skb->next = cur_mbuf;\n+\t\telse\n+\t\t\trxm->next = cur_mbuf;\n+\n+\t\trxm = cur_mbuf;\n+\t}\n+}\n+\n static void hinic_rss_deinit(struct hinic_nic_dev *nic_dev)\n {\n \tu8 prio_tc[HINIC_DCB_UP_MAX] = {0};\n@@ -543,6 +728,113 @@ void hinic_free_all_rx_skbs(struct hinic_rxq *rxq)\n \t}\n }\n \n+static inline void hinic_rq_cqe_be_to_cpu32(void *dst_le32,\n+\t\t\t\t\t    volatile void *src_be32)\n+{\n+#ifndef __ARM64_NEON__\n+\tvolatile __m128i *wqe_be = (volatile __m128i *)src_be32;\n+\t__m128i *wqe_le = (__m128i *)dst_le32;\n+\t__m128i shuf_mask =  _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,\n+\t\t\t\t\t  11, 4, 5, 6, 7, 0, 1, 2, 3);\n+\n+\t/* l2nic just use first 128 bits */\n+\twqe_le[0] = _mm_shuffle_epi8(wqe_be[0], shuf_mask);\n+#else\n+\tvolatile uint8x16_t *wqe_be = (volatile uint8x16_t *)src_be32;\n+\tuint8x16_t *wqe_le = (uint8x16_t *)dst_le32;\n+\tconst uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,\n+\t\t\t\t\t9, 8, 15, 14, 13, 12};\n+\n+\t/* l2nic just use first 128 bits */\n+\twqe_le[0] = vqtbl1q_u8(wqe_be[0], shuf_mask);\n+#endif\n+}\n+\n+static inline uint64_t hinic_rx_rss_hash(uint32_t offload_type,\n+\t\t\t\t\t uint32_t cqe_hass_val,\n+\t\t\t\t\t uint32_t *rss_hash)\n+{\n+\tuint32_t rss_type;\n+\n+\trss_type = HINIC_GET_RSS_TYPES(offload_type);\n+\tif (likely(rss_type != 0)) {\n+\t\t*rss_hash = cqe_hass_val;\n+\t\treturn PKT_RX_RSS_HASH;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static inline uint64_t hinic_rx_csum(uint32_t status, struct hinic_rxq *rxq)\n+{\n+\tuint32_t checksum_err;\n+\tuint64_t flags;\n+\n+\t/* most case checksum is ok */\n+\tchecksum_err = HINIC_GET_RX_CSUM_ERR(status);\n+\tif (likely(checksum_err == 0))\n+\t\treturn (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);\n+\n+\t/* If BYPASS bit set, all other status indications should be ignored */\n+\tif (unlikely(HINIC_CSUM_ERR_BYPASSED(checksum_err)))\n+\t\treturn PKT_RX_IP_CKSUM_UNKNOWN;\n+\n+\tflags = 0;\n+\n+\t/* IP checksum error */\n+\tif (HINIC_CSUM_ERR_IP(checksum_err))\n+\t\tflags |= PKT_RX_IP_CKSUM_BAD;\n+\telse\n+\t\tflags |= PKT_RX_IP_CKSUM_GOOD;\n+\n+\t/* L4 checksum error */\n+\tif (HINIC_CSUM_ERR_L4(checksum_err))\n+\t\tflags |= PKT_RX_L4_CKSUM_BAD;\n+\telse\n+\t\tflags |= PKT_RX_L4_CKSUM_GOOD;\n+\n+\tif (unlikely(HINIC_CSUM_ERR_OTHER(checksum_err)))\n+\t\tflags = PKT_RX_L4_CKSUM_NONE;\n+\n+\trxq->rxq_stats.errors++;\n+\n+\treturn flags;\n+}\n+\n+static inline uint64_t hinic_rx_vlan(uint32_t offload_type, uint32_t vlan_len,\n+\t\t\t\t     uint16_t *vlan_tci)\n+{\n+\tuint16_t vlan_tag;\n+\n+\tvlan_tag = HINIC_GET_RX_VLAN_TAG(vlan_len);\n+\tif (!HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) || 0 == vlan_tag) {\n+\t\t*vlan_tci = 0;\n+\t\treturn 0;\n+\t}\n+\n+\t*vlan_tci = vlan_tag;\n+\n+\treturn PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;\n+}\n+\n+static inline u32 hinic_rx_alloc_mbuf_bulk(struct hinic_rxq *rxq,\n+\t\t\t\t\t   struct rte_mbuf **mbufs,\n+\t\t\t\t\t   u32 exp_mbuf_cnt)\n+{\n+\tint rc;\n+\tu32 avail_cnt;\n+\n+\trc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, exp_mbuf_cnt);\n+\tif (likely(rc == HINIC_OK)) {\n+\t\tavail_cnt = exp_mbuf_cnt;\n+\t} else {\n+\t\tavail_cnt = 0;\n+\t\trxq->rxq_stats.rx_nombuf += exp_mbuf_cnt;\n+\t}\n+\n+\treturn avail_cnt;\n+}\n+\n static struct rte_mbuf *hinic_rx_alloc_mbuf(struct hinic_rxq *rxq,\n \t\t\t\t\tdma_addr_t *dma_addr)\n {\n@@ -557,6 +849,51 @@ static struct rte_mbuf *hinic_rx_alloc_mbuf(struct hinic_rxq *rxq,\n \treturn mbuf;\n }\n \n+static inline void hinic_rearm_rxq_mbuf(struct hinic_rxq *rxq)\n+{\n+\tu16 pi;\n+\tu32 i, free_wqebbs, rearm_wqebbs, exp_wqebbs;\n+\tdma_addr_t dma_addr;\n+\tstruct hinic_rq_wqe *rq_wqe;\n+\tstruct rte_mbuf **rearm_mbufs;\n+\n+\t/* check free wqebb fo rearm */\n+\tfree_wqebbs = HINIC_GET_RQ_FREE_WQEBBS(rxq);\n+\tif (unlikely(free_wqebbs < rxq->rx_free_thresh))\n+\t\treturn;\n+\n+\t/* get rearm mbuf array */\n+\tpi = HINIC_GET_RQ_LOCAL_PI(rxq);\n+\trearm_mbufs = (struct rte_mbuf **)(&rxq->rx_info[pi]);\n+\n+\t/* check rxq free wqebbs turn around */\n+\texp_wqebbs = rxq->q_depth - pi;\n+\tif (free_wqebbs < exp_wqebbs)\n+\t\texp_wqebbs = free_wqebbs;\n+\n+\t/* alloc mbuf in bulk */\n+\trearm_wqebbs = hinic_rx_alloc_mbuf_bulk(rxq, rearm_mbufs, exp_wqebbs);\n+\tif (unlikely(rearm_wqebbs == 0))\n+\t\treturn;\n+\n+\t/* rearm rx mbuf */\n+\trq_wqe = WQ_WQE_ADDR(rxq->wq, (u32)pi);\n+\tfor (i = 0; i < rearm_wqebbs; i++) {\n+\t\tdma_addr = rte_mbuf_data_iova_default(rearm_mbufs[i]);\n+\t\trq_wqe->buf_desc.addr_high =\n+\t\t\t\t\tcpu_to_be32(upper_32_bits(dma_addr));\n+\t\trq_wqe->buf_desc.addr_low =\n+\t\t\t\t\tcpu_to_be32(lower_32_bits(dma_addr));\n+\t\trq_wqe++;\n+\t}\n+\trxq->wq->prod_idx += rearm_wqebbs;\n+\trxq->wq->delta -= rearm_wqebbs;\n+\n+\t/* update rq hw_pi */\n+\trte_wmb();\n+\tHINIC_UPDATE_RQ_HW_PI(rxq, pi + rearm_wqebbs);\n+}\n+\n void hinic_rx_alloc_pkts(struct hinic_rxq *rxq)\n {\n \tstruct hinic_nic_dev *nic_dev = rxq->nic_dev;\n@@ -596,3 +933,105 @@ void hinic_rx_alloc_pkts(struct hinic_rxq *rxq)\n \t\tHINIC_UPDATE_RQ_HW_PI(rxq, pi + 1);\n \t}\n }\n+\n+u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts)\n+{\n+\tstruct rte_mbuf *rxm;\n+\tstruct hinic_rxq *rxq = rx_queue;\n+\tstruct hinic_rx_info *rx_info;\n+\tvolatile struct hinic_rq_cqe *rx_cqe;\n+\tu16 rx_buf_len, pkts = 0;\n+\tu16 sw_ci, ci_mask, wqebb_cnt = 0;\n+\tu32 pkt_len, status, vlan_len;\n+\tu64 rx_bytes = 0;\n+\tstruct hinic_rq_cqe cqe;\n+\tu32 offload_type, rss_hash;\n+\n+\trx_buf_len = rxq->buf_len;\n+\n+\t/* 1. get polling start ci */\n+\tci_mask = HINIC_GET_RQ_WQE_MASK(rxq);\n+\tsw_ci = HINIC_GET_RQ_LOCAL_CI(rxq);\n+\n+\twhile (pkts < nb_pkts) {\n+\t\t /* 2. current ci is done */\n+\t\trx_cqe = &rxq->rx_cqe[sw_ci];\n+\t\tstatus = rx_cqe->status;\n+\t\tif (!HINIC_GET_RX_DONE_BE(status))\n+\t\t\tbreak;\n+\n+\t\t/* read other cqe member after status */\n+\t\trte_rmb();\n+\n+\t\t/* convert cqe and get packet length */\n+\t\thinic_rq_cqe_be_to_cpu32(&cqe, (volatile void *)rx_cqe);\n+\t\tvlan_len = cqe.vlan_len;\n+\n+\t\trx_info = &rxq->rx_info[sw_ci];\n+\t\trxm = rx_info->mbuf;\n+\n+\t\t/* 3. next ci point and prefetch */\n+\t\tsw_ci++;\n+\t\tsw_ci &= ci_mask;\n+\n+\t\t/* prefetch next mbuf first 64B */\n+\t\trte_prefetch0(rxq->rx_info[sw_ci].mbuf);\n+\n+\t\t/* 4. jumbo frame process */\n+\t\tpkt_len = HINIC_GET_RX_PKT_LEN(vlan_len);\n+\t\tif (likely(pkt_len <= rx_buf_len)) {\n+\t\t\trxm->data_len = pkt_len;\n+\t\t\trxm->pkt_len = pkt_len;\n+\t\t\twqebb_cnt++;\n+\t\t} else {\n+\t\t\trxm->data_len = rx_buf_len;\n+\t\t\trxm->pkt_len = rx_buf_len;\n+\n+\t\t\t/* if jumbo use multi-wqebb update ci,\n+\t\t\t * recv_jumbo_pkt will also update ci\n+\t\t\t */\n+\t\t\tHINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt + 1);\n+\t\t\twqebb_cnt = 0;\n+\t\t\thinic_recv_jumbo_pkt(rxq, rxm, pkt_len - rx_buf_len);\n+\t\t\tsw_ci = HINIC_GET_RQ_LOCAL_CI(rxq);\n+\t\t}\n+\n+\t\t/* 5. vlan/checksum/rss/pkt_type/gro offload */\n+\t\trxm->data_off = RTE_PKTMBUF_HEADROOM;\n+\t\trxm->port = rxq->port_id;\n+\t\toffload_type = cqe.offload_type;\n+\n+\t\t/* vlan offload */\n+\t\trxm->ol_flags |= hinic_rx_vlan(offload_type, vlan_len,\n+\t\t\t\t\t       &rxm->vlan_tci);\n+\n+\t\t/* checksum offload */\n+\t\trxm->ol_flags |= hinic_rx_csum(cqe.status, rxq);\n+\n+\t\t/* rss hash offload */\n+\t\trss_hash = cqe.rss_hash;\n+\t\trxm->ol_flags |= hinic_rx_rss_hash(offload_type, rss_hash,\n+\t\t\t\t\t\t   &rxm->hash.rss);\n+\n+\t\t/* 6. clear done bit */\n+\t\trx_cqe->status = 0;\n+\n+\t\trx_bytes += pkt_len;\n+\t\trx_pkts[pkts++] = rxm;\n+\t}\n+\n+\tif (pkts) {\n+\t\t/* 7. update ci */\n+\t\tHINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt);\n+\n+\t\t/* do packet stats */\n+\t\trxq->rxq_stats.packets += pkts;\n+\t\trxq->rxq_stats.bytes += rx_bytes;\n+\t}\n+\trxq->rxq_stats.burst_pkts = pkts;\n+\n+\t/* 8. rearm mbuf to rxq */\n+\thinic_rearm_rxq_mbuf(rxq);\n+\n+\treturn pkts;\n+}\ndiff --git a/drivers/net/hinic/hinic_pmd_tx.c b/drivers/net/hinic/hinic_pmd_tx.c\nindex 854b94dc5..84101aebb 100644\n--- a/drivers/net/hinic/hinic_pmd_tx.c\n+++ b/drivers/net/hinic/hinic_pmd_tx.c\n@@ -7,6 +7,9 @@\n #include <rte_sctp.h>\n #include <rte_udp.h>\n #include <rte_ip.h>\n+#ifdef __ARM64_NEON__\n+#include <arm_neon.h>\n+#endif\n \n #include \"base/hinic_compat.h\"\n #include \"base/hinic_pmd_hwdev.h\"\n@@ -16,6 +19,1085 @@\n #include \"hinic_pmd_ethdev.h\"\n #include \"hinic_pmd_tx.h\"\n \n+/* packet header and tx offload info */\n+#define VXLANLEN\t\t\t8\n+#define MAX_PLD_OFFSET\t\t\t221\n+#define MAX_SINGLE_SGE_SIZE\t\t65536\n+#define TSO_ENABLE\t\t\t1\n+#define TX_MSS_DEFAULT\t\t\t0x3E00\n+#define TX_MSS_MIN\t\t\t0x50\n+\n+#define HINIC_NONTSO_PKT_MAX_SGE\t\t17\t/* non-tso max sge 17 */\n+#define HINIC_NONTSO_SEG_NUM_INVALID(num)\t\\\n+\t\t\t((num) > HINIC_NONTSO_PKT_MAX_SGE)\n+\n+#define HINIC_TSO_PKT_MAX_SGE\t\t\t127\t/* tso max sge 127 */\n+#define HINIC_TSO_SEG_NUM_INVALID(num)\t\t((num) > HINIC_TSO_PKT_MAX_SGE)\n+\n+/* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */\n+#define HINIC_BUF_DESC_SIZE(nr_descs)\t(SIZE_8BYTES(((u32)nr_descs) << 4))\n+\n+#define MASKED_SQ_IDX(sq, idx)\t\t((idx) & (sq)->wq->mask)\n+\n+/* SQ_CTRL */\n+#define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT\t\t0\n+#define SQ_CTRL_TASKSECT_LEN_SHIFT\t\t16\n+#define SQ_CTRL_DATA_FORMAT_SHIFT\t\t22\n+#define SQ_CTRL_LEN_SHIFT\t\t\t29\n+#define SQ_CTRL_OWNER_SHIFT\t\t\t31\n+\n+#define SQ_CTRL_BUFDESC_SECT_LEN_MASK\t\t0xFFU\n+#define SQ_CTRL_TASKSECT_LEN_MASK\t\t0x1FU\n+#define SQ_CTRL_DATA_FORMAT_MASK\t\t0x1U\n+#define SQ_CTRL_LEN_MASK\t\t\t0x3U\n+#define SQ_CTRL_OWNER_MASK\t\t\t0x1U\n+\n+#define SQ_CTRL_SET(val, member)\t\\\n+\t(((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT)\n+\n+#define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT\t\t2\n+#define SQ_CTRL_QUEUE_INFO_UFO_SHIFT\t\t10\n+#define SQ_CTRL_QUEUE_INFO_TSO_SHIFT\t\t11\n+#define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT\t12\n+#define SQ_CTRL_QUEUE_INFO_MSS_SHIFT\t\t13\n+#define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT\t\t27\n+#define SQ_CTRL_QUEUE_INFO_UC_SHIFT\t\t28\n+#define SQ_CTRL_QUEUE_INFO_PRI_SHIFT\t\t29\n+\n+#define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK\t\t0xFFU\n+#define SQ_CTRL_QUEUE_INFO_UFO_MASK\t\t0x1U\n+#define SQ_CTRL_QUEUE_INFO_TSO_MASK\t\t0x1U\n+#define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK\t0x1U\n+#define SQ_CTRL_QUEUE_INFO_MSS_MASK\t\t0x3FFFU\n+#define SQ_CTRL_QUEUE_INFO_SCTP_MASK\t\t0x1U\n+#define SQ_CTRL_QUEUE_INFO_UC_MASK\t\t0x1U\n+#define SQ_CTRL_QUEUE_INFO_PRI_MASK\t\t0x7U\n+\n+#define SQ_CTRL_QUEUE_INFO_SET(val, member)\t\\\n+\t(((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) <<\t\\\n+\t\t\tSQ_CTRL_QUEUE_INFO_##member##_SHIFT)\n+\n+#define SQ_CTRL_QUEUE_INFO_GET(val, member)\t\\\n+\t(((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) &\t\\\n+\t\t\tSQ_CTRL_QUEUE_INFO_##member##_MASK)\n+\n+#define SQ_CTRL_QUEUE_INFO_CLEAR(val, member)\t\\\n+\t((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \\\n+\t\t\tSQ_CTRL_QUEUE_INFO_##member##_SHIFT)))\n+\n+#define\tSQ_TASK_INFO0_L2HDR_LEN_SHIFT\t\t0\n+#define\tSQ_TASK_INFO0_L4OFFLOAD_SHIFT\t\t8\n+#define\tSQ_TASK_INFO0_INNER_L3TYPE_SHIFT\t10\n+#define\tSQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT\t12\n+#define\tSQ_TASK_INFO0_PARSE_FLAG_SHIFT\t\t13\n+#define\tSQ_TASK_INFO0_UFO_AVD_SHIFT\t\t14\n+#define\tSQ_TASK_INFO0_TSO_UFO_SHIFT\t\t15\n+#define SQ_TASK_INFO0_VLAN_TAG_SHIFT\t\t16\n+\n+#define\tSQ_TASK_INFO0_L2HDR_LEN_MASK\t\t0xFFU\n+#define\tSQ_TASK_INFO0_L4OFFLOAD_MASK\t\t0x3U\n+#define\tSQ_TASK_INFO0_INNER_L3TYPE_MASK\t\t0x3U\n+#define\tSQ_TASK_INFO0_VLAN_OFFLOAD_MASK\t\t0x1U\n+#define\tSQ_TASK_INFO0_PARSE_FLAG_MASK\t\t0x1U\n+#define\tSQ_TASK_INFO0_UFO_AVD_MASK\t\t0x1U\n+#define SQ_TASK_INFO0_TSO_UFO_MASK\t\t0x1U\n+#define SQ_TASK_INFO0_VLAN_TAG_MASK\t\t0xFFFFU\n+\n+#define SQ_TASK_INFO0_SET(val, member)\t\t\t\\\n+\t(((u32)(val) & SQ_TASK_INFO0_##member##_MASK) <<\t\\\n+\t\t\tSQ_TASK_INFO0_##member##_SHIFT)\n+\n+#define\tSQ_TASK_INFO1_MD_TYPE_SHIFT\t\t8\n+#define SQ_TASK_INFO1_INNER_L4LEN_SHIFT\t\t16\n+#define SQ_TASK_INFO1_INNER_L3LEN_SHIFT\t\t24\n+\n+#define\tSQ_TASK_INFO1_MD_TYPE_MASK\t\t0xFFU\n+#define SQ_TASK_INFO1_INNER_L4LEN_MASK\t\t0xFFU\n+#define SQ_TASK_INFO1_INNER_L3LEN_MASK\t\t0xFFU\n+\n+#define SQ_TASK_INFO1_SET(val, member)\t\t\t\\\n+\t(((val) & SQ_TASK_INFO1_##member##_MASK) <<\t\\\n+\t\t\tSQ_TASK_INFO1_##member##_SHIFT)\n+\n+#define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT\t0\n+#define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT\t\t8\n+#define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT\t16\n+#define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT\t24\n+\n+#define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK\t\t0xFFU\n+#define SQ_TASK_INFO2_OUTER_L3LEN_MASK\t\t0xFFU\n+#define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK\t0x7U\n+#define SQ_TASK_INFO2_OUTER_L3TYPE_MASK\t\t0x3U\n+\n+#define SQ_TASK_INFO2_SET(val, member)\t\t\t\\\n+\t(((val) & SQ_TASK_INFO2_##member##_MASK) <<\t\\\n+\t\t\tSQ_TASK_INFO2_##member##_SHIFT)\n+\n+#define\tSQ_TASK_INFO4_L2TYPE_SHIFT\t\t31\n+\n+#define\tSQ_TASK_INFO4_L2TYPE_MASK\t\t0x1U\n+\n+#define SQ_TASK_INFO4_SET(val, member)\t\t\\\n+\t(((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \\\n+\t\t\tSQ_TASK_INFO4_##member##_SHIFT)\n+\n+/* SQ_DB */\n+#define SQ_DB_OFF\t\t\t\t0x00000800\n+#define SQ_DB_INFO_HI_PI_SHIFT\t\t\t0\n+#define SQ_DB_INFO_QID_SHIFT\t\t\t8\n+#define SQ_DB_INFO_CFLAG_SHIFT\t\t\t23\n+#define SQ_DB_INFO_COS_SHIFT\t\t\t24\n+#define SQ_DB_INFO_TYPE_SHIFT\t\t\t27\n+\n+#define SQ_DB_INFO_HI_PI_MASK\t\t\t0xFFU\n+#define SQ_DB_INFO_QID_MASK\t\t\t0x3FFU\n+#define SQ_DB_INFO_CFLAG_MASK\t\t\t0x1U\n+#define SQ_DB_INFO_COS_MASK\t\t\t0x7U\n+#define SQ_DB_INFO_TYPE_MASK\t\t\t0x1FU\n+#define SQ_DB_INFO_SET(val, member)\t\t\\\n+\t(((u32)(val) & SQ_DB_INFO_##member##_MASK) <<\t\\\n+\t\t\tSQ_DB_INFO_##member##_SHIFT)\n+\n+#define SQ_DB\t\t\t\t\t1\n+#define SQ_CFLAG_DP\t\t\t\t0\t/* CFLAG_DATA_PATH */\n+\n+#define SQ_DB_PI_LOW_MASK\t\t\t0xFF\n+#define SQ_DB_PI_LOW(pi)\t\t\t((pi) & SQ_DB_PI_LOW_MASK)\n+#define SQ_DB_PI_HI_SHIFT\t\t\t8\n+#define SQ_DB_PI_HIGH(pi)\t\t\t((pi) >> SQ_DB_PI_HI_SHIFT)\n+#define SQ_DB_ADDR(sq, pi)\t\t\\\n+\t((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi))\n+\n+/* txq wq operations */\n+#define HINIC_GET_SQ_WQE_MASK(txq)\t\t((txq)->wq->mask)\n+\n+#define HINIC_GET_SQ_HW_CI(txq)\t\\\n+\t((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq))\n+\n+#define HINIC_GET_SQ_LOCAL_CI(txq)\t\\\n+\t(((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq))\n+\n+#define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt)\t\\\n+\tdo {\t\t\t\t\t\t\\\n+\t\t(txq)->wq->cons_idx += wqebb_cnt;\t\\\n+\t\t(txq)->wq->delta += wqebb_cnt;\t\t\\\n+\t} while (0)\n+\n+#define HINIC_GET_SQ_FREE_WQEBBS(txq)\t((txq)->wq->delta - 1)\n+\n+#define HINIC_IS_SQ_EMPTY(txq)\t(((txq)->wq->delta) == ((txq)->q_depth))\n+\n+#define BUF_DESC_SIZE_SHIFT\t\t4\n+\n+#define HINIC_SQ_WQE_SIZE(num_sge)\t\t\\\n+\t(sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) +  \\\n+\t\t\t(unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT))\n+\n+#define HINIC_SQ_WQEBB_CNT(num_sge)\t\\\n+\t(int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \\\n+\t\t\tHINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT)\n+\n+\n+static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb)\n+{\n+\tint i;\n+#ifndef __ARM64_NEON__\n+\t__m128i *wqe_line = (__m128i *)data;\n+\t__m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,\n+\t\t\t\t\t 11, 4, 5, 6, 7, 0, 1, 2, 3);\n+\n+\tfor (i = 0; i < nr_wqebb; i++) {\n+\t\t/* convert 64B wqebb using 4 SSE instructions */\n+\t\twqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask);\n+\t\twqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask);\n+\t\twqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask);\n+\t\twqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask);\n+\t\twqe_line += 4;\n+\t}\n+#else\n+\tuint8x16_t *wqe_line = (uint8x16_t *)data;\n+\tconst uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,\n+\t\t\t\t\t9, 8, 15, 14, 13, 12};\n+\n+\tfor (i = 0; i < nr_wqebb; i++) {\n+\t\twqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask);\n+\t\twqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask);\n+\t\twqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask);\n+\t\twqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask);\n+\t\twqe_line += 4;\n+\t}\n+#endif\n+}\n+\n+static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge)\n+{\n+\tint i;\n+#ifndef __ARM64_NEON__\n+\t__m128i *sge_line = (__m128i *)data;\n+\t__m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,\n+\t\t\t\t\t 11, 4, 5, 6, 7, 0, 1, 2, 3);\n+\n+\tfor (i = 0; i < nr_sge; i++) {\n+\t\t/* convert 16B sge using 1 SSE instructions */\n+\t\t*sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask);\n+\t\tsge_line++;\n+\t}\n+#else\n+\tuint8x16_t *sge_line = (uint8x16_t *)data;\n+\tconst uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,\n+\t\t\t\t\t9, 8, 15, 14, 13, 12};\n+\n+\tfor (i = 0; i < nr_sge; i++) {\n+\t\t*sge_line = vqtbl1q_u8(*sge_line, shuf_mask);\n+\t\tsge_line++;\n+\t}\n+#endif\n+}\n+\n+void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)\n+{\n+\tif (!txq || !stats) {\n+\t\tPMD_DRV_LOG(ERR, \"Txq or stats is NULL\");\n+\t\treturn;\n+\t}\n+\n+\tmemcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats));\n+}\n+\n+void hinic_txq_stats_reset(struct hinic_txq *txq)\n+{\n+\tstruct hinic_txq_stats *txq_stats;\n+\n+\tif (txq == NULL)\n+\t\treturn;\n+\n+\ttxq_stats = &txq->txq_stats;\n+\tmemset(txq_stats, 0, sizeof(*txq_stats));\n+}\n+\n+static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev,\n+\t\t\t\t\t\t  struct rte_mbuf *mbuf,\n+\t\t\t\t\t\t  u16 sge_cnt)\n+{\n+\tstruct rte_mbuf *dst_mbuf;\n+\tu32 offset = 0;\n+\tu16 i;\n+\n+\tif (unlikely(!nic_dev->cpy_mpool))\n+\t\treturn NULL;\n+\n+\tdst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool);\n+\tif (unlikely(!dst_mbuf))\n+\t\treturn NULL;\n+\n+\tdst_mbuf->data_off = 0;\n+\tfor (i = 0; i < sge_cnt; i++) {\n+\t\trte_memcpy((char *)dst_mbuf->buf_addr + offset,\n+\t\t\t   (char *)mbuf->buf_addr + mbuf->data_off,\n+\t\t\t   mbuf->data_len);\n+\t\tdst_mbuf->data_len += mbuf->data_len;\n+\t\toffset += mbuf->data_len;\n+\t\tmbuf = mbuf->next;\n+\t}\n+\n+\treturn dst_mbuf;\n+}\n+\n+static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq,\n+\t\t\t\t\t  struct rte_mbuf *mbuf,\n+\t\t\t\t\t  struct hinic_sq_bufdesc *sges,\n+\t\t\t\t\t  struct hinic_wqe_info *sqe_info)\n+{\n+\tdma_addr_t dma_addr;\n+\tu16 i, around_sges;\n+\tu16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt;\n+\tu16 real_nb_segs = mbuf->nb_segs;\n+\tstruct hinic_sq_bufdesc *sge_idx = sges;\n+\n+\tif (unlikely(sqe_info->around)) {\n+\t\t/* parts of wqe is in sq bottom while parts\n+\t\t * of wqe is in sq head\n+\t\t */\n+\t\ti = 0;\n+\t\tfor (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr;\n+\t\t     sge_idx++) {\n+\t\t\tdma_addr = rte_mbuf_data_iova(mbuf);\n+\t\t\thinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,\n+\t\t\t\t      mbuf->data_len);\n+\t\t\tmbuf = mbuf->next;\n+\t\t\ti++;\n+\t\t}\n+\n+\t\taround_sges = nb_segs - i;\n+\t\tsge_idx = (struct hinic_sq_bufdesc *)\n+\t\t\t\t((void *)txq->sq_head_addr);\n+\t\tfor (; i < nb_segs; i++) {\n+\t\t\tdma_addr = rte_mbuf_data_iova(mbuf);\n+\t\t\thinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,\n+\t\t\t\t      mbuf->data_len);\n+\t\t\tmbuf = mbuf->next;\n+\t\t\tsge_idx++;\n+\t\t}\n+\n+\t\t/* covert sges at head to big endian */\n+\t\thinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges);\n+\t} else {\n+\t\t/* wqe is in continuous space */\n+\t\tfor (i = 0; i < nb_segs; i++) {\n+\t\t\tdma_addr = rte_mbuf_data_iova(mbuf);\n+\t\t\thinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,\n+\t\t\t\t      mbuf->data_len);\n+\t\t\tmbuf = mbuf->next;\n+\t\t\tsge_idx++;\n+\t\t}\n+\t}\n+\n+\t/* for now: support non-tso over 17 sge, copy the last 2 mbuf */\n+\tif (unlikely(sqe_info->cpy_mbuf_cnt != 0)) {\n+\t\t/* copy invalid mbuf segs to a valid buffer, lost performance */\n+\t\ttxq->txq_stats.cpy_pkts += 1;\n+\t\tmbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf,\n+\t\t\t\t\t  real_nb_segs - nb_segs);\n+\t\tif (unlikely(!mbuf))\n+\t\t\treturn false;\n+\n+\t\ttxq->tx_info[sqe_info->pi].cpy_mbuf = mbuf;\n+\n+\t\t/* deal with the last mbuf */\n+\t\tdma_addr = rte_mbuf_data_iova(mbuf);\n+\t\thinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,\n+\t\t\t      mbuf->data_len);\n+\t\tif (unlikely(sqe_info->around))\n+\t\t\thinic_sge_cpu_to_be32((void *)sge_idx, 1);\n+\t}\n+\n+\treturn true;\n+}\n+\n+static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl,\n+\t\t\t\t\t    u32 queue_info, int nr_descs,\n+\t\t\t\t\t    u8 owner)\n+{\n+\tu32 ctrl_size, task_size, bufdesc_size;\n+\n+\tctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));\n+\ttask_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));\n+\tbufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs);\n+\n+\tctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |\n+\t\t\tSQ_CTRL_SET(task_size, TASKSECT_LEN)\t|\n+\t\t\tSQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)\t|\n+\t\t\tSQ_CTRL_SET(ctrl_size, LEN)\t\t|\n+\t\t\tSQ_CTRL_SET(owner, OWNER);\n+\n+\tctrl->queue_info = queue_info;\n+\tctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC);\n+\n+\tif (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) {\n+\t\tctrl->queue_info |=\n+\t\t\tSQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS);\n+\t} else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) {\n+\t\t/* mss should not be less than 80 */\n+\t\tctrl->queue_info =\n+\t\t\t\tSQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS);\n+\t\tctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS);\n+\t}\n+}\n+\n+static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf,\n+\t\t\t\t\t  struct hinic_tx_offload_info\n+\t\t\t\t\t  *poff_info,\n+\t\t\t\t\t  struct hinic_wqe_info *sqe_info)\n+{\n+\tu32 total_len, limit_len, checked_len, left_len;\n+\tu32 i, first_mss_sges, left_sges;\n+\tstruct rte_mbuf *mbuf_head, *mbuf_pre;\n+\n+\tleft_sges = mbuf->nb_segs;\n+\tmbuf_head = mbuf;\n+\n+\t/* tso sge number validation */\n+\tif (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) {\n+\t\tchecked_len = 0;\n+\t\tlimit_len = mbuf->tso_segsz + poff_info->payload_offset;\n+\t\tfirst_mss_sges = HINIC_NONTSO_PKT_MAX_SGE;\n+\n+\t\t/* each continues 17 mbufs segmust do one check */\n+\t\twhile (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) {\n+\t\t\t/* total len of first 16 mbufs must equal\n+\t\t\t * or more than limit_len\n+\t\t\t */\n+\t\t\ttotal_len = 0;\n+\t\t\tfor (i = 0; i < first_mss_sges; i++) {\n+\t\t\t\ttotal_len += mbuf->data_len;\n+\t\t\t\tmbuf_pre = mbuf;\n+\t\t\t\tmbuf = mbuf->next;\n+\t\t\t\tif (total_len >= limit_len) {\n+\t\t\t\t\tlimit_len = mbuf_head->tso_segsz;\n+\t\t\t\t\tbreak;\n+\t\t\t\t}\n+\t\t\t}\n+\n+\t\t\tchecked_len += total_len;\n+\n+\t\t\t/* try to copy if not valid */\n+\t\t\tif (unlikely(first_mss_sges == i)) {\n+\t\t\t\tleft_sges -= first_mss_sges;\n+\t\t\t\tchecked_len -= mbuf_pre->data_len;\n+\n+\t\t\t\tleft_len = mbuf_head->pkt_len - checked_len;\n+\t\t\t\tif (left_len > HINIC_COPY_MBUF_SIZE)\n+\t\t\t\t\treturn false;\n+\n+\t\t\t\tsqe_info->sge_cnt = mbuf_head->nb_segs -\n+\t\t\t\t\t\t\tleft_sges;\n+\t\t\t\tsqe_info->cpy_mbuf_cnt = 1;\n+\n+\t\t\t\treturn true;\n+\t\t\t}\n+\t\t\tfirst_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1);\n+\n+\t\t\t/* continue next 16 mbufs */\n+\t\t\tleft_sges -= (i + 1);\n+\t\t} /* end of while */\n+\t}\n+\n+\tsqe_info->sge_cnt = mbuf_head->nb_segs;\n+\treturn true;\n+}\n+\n+static inline void\n+hinic_set_l4_csum_info(struct hinic_sq_task *task,\n+\t\tu32 *queue_info, struct hinic_tx_offload_info *poff_info)\n+{\n+\tu32 tcp_udp_cs, sctp;\n+\tu16 l2hdr_len;\n+\n+\tsctp = 0;\n+\tif (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE))\n+\t\tsctp = 1;\n+\n+\ttcp_udp_cs = poff_info->inner_l4_tcp_udp;\n+\n+\tif (poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) {\n+\t\tl2hdr_len =  poff_info->outer_l2_len;\n+\n+\t\ttask->pkt_info2 |=\n+\t\tSQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) |\n+\t\tSQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN);\n+\t\ttask->pkt_info2 |=\n+\t\tSQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) |\n+\t\tSQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN);\n+\t} else {\n+\t\tl2hdr_len = poff_info->inner_l2_len;\n+\t}\n+\n+\ttask->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN);\n+\ttask->pkt_info1 |=\n+\t\tSQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN);\n+\ttask->pkt_info0 |=\n+\t\tSQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);\n+\ttask->pkt_info1 |=\n+\t\tSQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN);\n+\ttask->pkt_info0 |=\n+\t\tSQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD);\n+\t*queue_info |=\n+\t\tSQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) |\n+\t\tSQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) |\n+\t\tSQ_CTRL_QUEUE_INFO_SET(sctp, SCTP);\n+}\n+\n+static inline void\n+hinic_set_tso_info(struct hinic_sq_task *task,\n+\t\tu32 *queue_info, struct rte_mbuf *mbuf,\n+\t\tstruct hinic_tx_offload_info *poff_info)\n+{\n+\thinic_set_l4_csum_info(task, queue_info, poff_info);\n+\n+\t/* wqe for tso */\n+\ttask->pkt_info0 |=\n+\t\tSQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);\n+\ttask->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO);\n+\t*queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO);\n+\t/* qsf was initialized in prepare_sq_wqe */\n+\t*queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS);\n+\t*queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS);\n+}\n+\n+static inline void\n+hinic_set_vlan_tx_offload(struct hinic_sq_task *task,\n+\t\t\tu32 *queue_info, u16 vlan_tag, u16 vlan_pri)\n+{\n+\ttask->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |\n+\t\t\t\tSQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);\n+\n+\t*queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI);\n+}\n+\n+static inline void\n+hinic_fill_tx_offload_info(struct rte_mbuf *mbuf,\n+\t\tstruct hinic_sq_task *task, u32 *queue_info,\n+\t\tstruct hinic_tx_offload_info *tx_off_info)\n+{\n+\tu16 vlan_tag;\n+\tuint64_t ol_flags = mbuf->ol_flags;\n+\n+\t/* clear DW0~2 of task section for offload */\n+\ttask->pkt_info0 = 0;\n+\ttask->pkt_info1 = 0;\n+\ttask->pkt_info2 = 0;\n+\n+\t/* Base VLAN */\n+\tif (unlikely(ol_flags & PKT_TX_VLAN_PKT)) {\n+\t\tvlan_tag = mbuf->vlan_tci;\n+\t\thinic_set_vlan_tx_offload(task, queue_info, vlan_tag,\n+\t\t\t\t\t  vlan_tag >> VLAN_PRIO_SHIFT);\n+\t}\n+\n+\t/* non checksum or tso */\n+\tif (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)))\n+\t\treturn;\n+\n+\tif ((ol_flags & PKT_TX_TCP_SEG))\n+\t\t/* set tso info for task and qsf */\n+\t\thinic_set_tso_info(task, queue_info, mbuf, tx_off_info);\n+\telse /* just support l4 checksum offload */\n+\t\thinic_set_l4_csum_info(task, queue_info, tx_off_info);\n+}\n+\n+static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq)\n+{\n+\tstruct hinic_tx_info *tx_info;\n+\tstruct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];\n+\tint i, nb_free = 0;\n+\tu16 hw_ci, sw_ci, sq_mask;\n+\tint wqebb_cnt = 0;\n+\n+\thw_ci = HINIC_GET_SQ_HW_CI(txq);\n+\tsw_ci = HINIC_GET_SQ_LOCAL_CI(txq);\n+\tsq_mask = HINIC_GET_SQ_WQE_MASK(txq);\n+\n+\tfor (i = 0; i < txq->tx_free_thresh; ++i) {\n+\t\ttx_info = &txq->tx_info[sw_ci];\n+\t\tif (hw_ci == sw_ci ||\n+\t\t\t(((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))\n+\t\t\tbreak;\n+\n+\t\tsw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask;\n+\n+\t\tif (unlikely(tx_info->cpy_mbuf != NULL)) {\n+\t\t\trte_pktmbuf_free(tx_info->cpy_mbuf);\n+\t\t\ttx_info->cpy_mbuf = NULL;\n+\t\t}\n+\n+\t\twqebb_cnt += tx_info->wqebb_cnt;\n+\t\tmbuf = tx_info->mbuf;\n+\n+\t\tif (likely(mbuf->nb_segs == 1)) {\n+\t\t\tm = rte_pktmbuf_prefree_seg(mbuf);\n+\t\t\ttx_info->mbuf = NULL;\n+\n+\t\t\tif (unlikely(m == NULL))\n+\t\t\t\tcontinue;\n+\n+\t\t\tmbuf_free[nb_free++] = m;\n+\t\t\tif (unlikely(m->pool != mbuf_free[0]->pool ||\n+\t\t\t\tnb_free >= HINIC_MAX_TX_FREE_BULK)) {\n+\t\t\t\trte_mempool_put_bulk(mbuf_free[0]->pool,\n+\t\t\t\t\t(void **)mbuf_free, (nb_free - 1));\n+\t\t\t\tnb_free = 0;\n+\t\t\t\tmbuf_free[nb_free++] = m;\n+\t\t\t}\n+\t\t} else {\n+\t\t\trte_pktmbuf_free(mbuf);\n+\t\t\ttx_info->mbuf = NULL;\n+\t\t}\n+\t}\n+\n+\tif (nb_free > 0)\n+\t\trte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free,\n+\t\t\t\t     nb_free);\n+\n+\tHINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt);\n+}\n+\n+static inline struct hinic_sq_wqe *\n+hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt,\n+\t\tstruct hinic_wqe_info *wqe_info)\n+{\n+\tu32 cur_pi, end_pi;\n+\tu16 remain_wqebbs;\n+\tstruct hinic_sq *sq = txq->sq;\n+\tstruct hinic_wq *wq = txq->wq;\n+\n+\t/* record current pi */\n+\tcur_pi = MASKED_WQE_IDX(wq, wq->prod_idx);\n+\tend_pi = cur_pi + wqebb_cnt;\n+\n+\t/* update next pi and delta */\n+\twq->prod_idx += wqebb_cnt;\n+\twq->delta -= wqebb_cnt;\n+\n+\t/* return current pi and owner */\n+\twqe_info->pi = cur_pi;\n+\twqe_info->owner = sq->owner;\n+\twqe_info->around = 0;\n+\twqe_info->seq_wqebbs = wqebb_cnt;\n+\n+\tif (unlikely(end_pi >= txq->q_depth)) {\n+\t\t/* update owner of next prod_idx */\n+\t\tsq->owner = !sq->owner;\n+\n+\t\t/* turn around to head */\n+\t\tif (unlikely(end_pi > txq->q_depth)) {\n+\t\t\twqe_info->around = 1;\n+\t\t\tremain_wqebbs = txq->q_depth - cur_pi;\n+\t\t\twqe_info->seq_wqebbs = remain_wqebbs;\n+\t\t}\n+\t}\n+\n+\treturn (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi);\n+}\n+\n+static inline int\n+hinic_validate_tx_offload(const struct rte_mbuf *m)\n+{\n+\tuint64_t ol_flags = m->ol_flags;\n+\tuint64_t inner_l3_offset = m->l2_len;\n+\n+\t/* just support vxlan offload */\n+\tif ((ol_flags & PKT_TX_TUNNEL_MASK) &&\n+\t    !(ol_flags & PKT_TX_TUNNEL_VXLAN))\n+\t\treturn -ENOTSUP;\n+\n+\tif (ol_flags & PKT_TX_OUTER_IP_CKSUM)\n+\t\tinner_l3_offset += m->outer_l2_len + m->outer_l3_len;\n+\n+\t/* Headers are fragmented */\n+\tif (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)\n+\t\treturn -ENOTSUP;\n+\n+\t/* IP checksum can be counted only for IPv4 packet */\n+\tif ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))\n+\t\treturn -EINVAL;\n+\n+\t/* IP type not set when required */\n+\tif (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) {\n+\t\tif (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))\n+\t\t\treturn -EINVAL;\n+\t}\n+\n+\t/* Check requirements for TSO packet */\n+\tif (ol_flags & PKT_TX_TCP_SEG) {\n+\t\tif (m->tso_segsz == 0 ||\n+\t\t\t((ol_flags & PKT_TX_IPV4) &&\n+\t\t\t!(ol_flags & PKT_TX_IP_CKSUM)))\n+\t\t\treturn -EINVAL;\n+\t}\n+\n+\t/* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */\n+\tif ((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&\n+\t\t!(ol_flags & PKT_TX_OUTER_IPV4))\n+\t\treturn -EINVAL;\n+\n+\treturn 0;\n+}\n+\n+static inline uint16_t\n+hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)\n+{\n+\tstruct ipv4_psd_header {\n+\t\tuint32_t src_addr; /* IP address of source host. */\n+\t\tuint32_t dst_addr; /* IP address of destination host. */\n+\t\tuint8_t  zero;     /* zero. */\n+\t\tuint8_t  proto;    /* L4 protocol type. */\n+\t\tuint16_t len;      /* L4 length. */\n+\t} psd_hdr;\n+\tuint8_t ihl;\n+\n+\tpsd_hdr.src_addr = ipv4_hdr->src_addr;\n+\tpsd_hdr.dst_addr = ipv4_hdr->dst_addr;\n+\tpsd_hdr.zero = 0;\n+\tpsd_hdr.proto = ipv4_hdr->next_proto_id;\n+\tif (ol_flags & PKT_TX_TCP_SEG) {\n+\t\tpsd_hdr.len = 0;\n+\t} else {\n+\t\t/* ipv4_hdr->version_ihl is uint8_t big endian, ihl locates\n+\t\t * lower 4 bits and unit is 4 bytes\n+\t\t */\n+\t\tihl = (ipv4_hdr->version_ihl & 0xF) << 2;\n+\t\tpsd_hdr.len =\n+\t\trte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -\n+\t\t\t\t ihl);\n+\t}\n+\treturn rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));\n+}\n+\n+static inline uint16_t\n+hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)\n+{\n+\tuint32_t sum;\n+\tstruct {\n+\t\tuint32_t len;   /* L4 length. */\n+\t\tuint32_t proto; /* L4 protocol - top 3 bytes must be zero */\n+\t} psd_hdr;\n+\n+\tpsd_hdr.proto = (ipv6_hdr->proto << 24);\n+\tif (ol_flags & PKT_TX_TCP_SEG)\n+\t\tpsd_hdr.len = 0;\n+\telse\n+\t\tpsd_hdr.len = ipv6_hdr->payload_len;\n+\n+\tsum = __rte_raw_cksum(ipv6_hdr->src_addr,\n+\t\tsizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0);\n+\tsum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);\n+\treturn __rte_raw_cksum_reduce(sum);\n+}\n+\n+static inline int\n+hinic_tx_offload_pkt_prepare(struct rte_mbuf *m,\n+\t\t\t\tstruct hinic_tx_offload_info *off_info)\n+{\n+\tstruct rte_ipv4_hdr *ipv4_hdr;\n+\tstruct rte_ipv6_hdr *ipv6_hdr;\n+\tstruct rte_tcp_hdr *tcp_hdr;\n+\tstruct rte_udp_hdr *udp_hdr;\n+\tstruct rte_ether_hdr *eth_hdr;\n+\tstruct rte_vlan_hdr *vlan_hdr;\n+\tu16 eth_type = 0;\n+\tuint64_t inner_l3_offset = m->l2_len;\n+\tuint64_t ol_flags = m->ol_flags;\n+\n+\t/* Does packet set any of available offloads */\n+\tif (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))\n+\t\treturn 0;\n+\n+\tif (unlikely(hinic_validate_tx_offload(m)))\n+\t\treturn -EINVAL;\n+\n+\tif ((ol_flags & PKT_TX_OUTER_IP_CKSUM) ||\n+\t\t\t(ol_flags & PKT_TX_OUTER_IPV6) ||\n+\t\t\t(ol_flags & PKT_TX_TUNNEL_VXLAN)) {\n+\t\tinner_l3_offset += m->outer_l2_len + m->outer_l3_len;\n+\t\toff_info->outer_l2_len = m->outer_l2_len;\n+\t\toff_info->outer_l3_len = m->outer_l3_len;\n+\t\t/* just support vxlan tunneling pkt */\n+\t\toff_info->inner_l2_len = m->l2_len - VXLANLEN -\n+\t\t\t\t\t\tsizeof(struct rte_udp_hdr);\n+\t\toff_info->inner_l3_len = m->l3_len;\n+\t\toff_info->inner_l4_len = m->l4_len;\n+\t\toff_info->tunnel_length = m->l2_len;\n+\t\toff_info->payload_offset = m->outer_l2_len +\n+\t\t\t\tm->outer_l3_len + m->l2_len + m->l3_len;\n+\t\toff_info->tunnel_type = TUNNEL_UDP_NO_CSUM;\n+\t} else {\n+\t\toff_info->inner_l2_len = m->l2_len;\n+\t\toff_info->inner_l3_len = m->l3_len;\n+\t\toff_info->inner_l4_len = m->l4_len;\n+\t\toff_info->tunnel_type = NOT_TUNNEL;\n+\t\toff_info->payload_offset = m->l2_len + m->l3_len;\n+\t}\n+\n+\tif (((ol_flags & PKT_TX_L4_MASK) != PKT_TX_SCTP_CKSUM) &&\n+\t    ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_UDP_CKSUM))\n+\t\toff_info->payload_offset += m->l4_len;\n+\n+\t/* invalid udp or tcp header */\n+\tif (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))\n+\t\treturn -EINVAL;\n+\n+\t/* Process outter udp pseudo-header checksum */\n+\tif ((ol_flags & PKT_TX_TUNNEL_VXLAN) && ((ol_flags & PKT_TX_TCP_SEG) ||\n+\t\t\t(ol_flags & PKT_TX_OUTER_IP_CKSUM) ||\n+\t\t\t(ol_flags & PKT_TX_OUTER_IPV6))) {\n+\t\toff_info->tunnel_type = TUNNEL_UDP_CSUM;\n+\n+\t\t/* inner_l4_tcp_udp csum should be setted to calculate outter\n+\t\t * udp checksum when vxlan packets without inner l3 and l4\n+\t\t */\n+\t\toff_info->inner_l4_tcp_udp = 1;\n+\n+\t\teth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);\n+\t\teth_type = rte_be_to_cpu_16(eth_hdr->ether_type);\n+\n+\t\tif (eth_type == RTE_ETHER_TYPE_VLAN) {\n+\t\t\tvlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);\n+\t\t\teth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);\n+\t\t}\n+\n+\t\tif (eth_type == RTE_ETHER_TYPE_IPV4) {\n+\t\t\tipv4_hdr =\n+\t\t\trte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,\n+\t\t\t\t\t\tm->outer_l2_len);\n+\t\t\toff_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD;\n+\t\t\tipv4_hdr->hdr_checksum = 0;\n+\n+\t\t\tudp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +\n+\t\t\t\t\t\t\tm->outer_l3_len);\n+\t\t\tudp_hdr->dgram_cksum =\n+\t\t\t\thinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);\n+\t\t} else if (eth_type == RTE_ETHER_TYPE_IPV6) {\n+\t\t\toff_info->outer_l3_type = IPV6_PKT;\n+\t\t\tipv6_hdr =\n+\t\t\trte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,\n+\t\t\t\t\t\tm->outer_l2_len);\n+\n+\t\t\tudp_hdr =\n+\t\t\trte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,\n+\t\t\t\t\t\t(m->outer_l2_len +\n+\t\t\t\t\t\tm->outer_l3_len));\n+\t\t\tudp_hdr->dgram_cksum =\n+\t\t\t\thinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);\n+\t\t}\n+\t}\n+\n+\tif (ol_flags & PKT_TX_IPV4)\n+\t\toff_info->inner_l3_type = (ol_flags & PKT_TX_IP_CKSUM) ?\n+\t\t\t\t\tIPV4_PKT_WITH_CHKSUM_OFFLOAD :\n+\t\t\t\t\tIPV4_PKT_NO_CHKSUM_OFFLOAD;\n+\telse if (ol_flags & PKT_TX_IPV6)\n+\t\toff_info->inner_l3_type = IPV6_PKT;\n+\n+\t/* Process the pseudo-header checksum */\n+\tif ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {\n+\t\tif (ol_flags & PKT_TX_IPV4) {\n+\t\t\tipv4_hdr =\n+\t\t\trte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,\n+\t\t\t\t\t\tinner_l3_offset);\n+\n+\t\t\tif (ol_flags & PKT_TX_IP_CKSUM)\n+\t\t\t\tipv4_hdr->hdr_checksum = 0;\n+\n+\t\t\tudp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +\n+\t\t\t\t\t\t\t\tm->l3_len);\n+\t\t\tudp_hdr->dgram_cksum =\n+\t\t\t\thinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);\n+\t\t} else {\n+\t\t\tipv6_hdr =\n+\t\t\trte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,\n+\t\t\t\t\t\tinner_l3_offset);\n+\n+\t\t\tudp_hdr =\n+\t\t\trte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,\n+\t\t\t\t\t\t(inner_l3_offset + m->l3_len));\n+\t\t\tudp_hdr->dgram_cksum =\n+\t\t\t\thinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);\n+\t\t}\n+\n+\t\toff_info->inner_l4_type = UDP_OFFLOAD_ENABLE;\n+\t\toff_info->inner_l4_tcp_udp = 1;\n+\t\toff_info->inner_l4_len = sizeof(struct rte_udp_hdr);\n+\t} else if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) ||\n+\t\t\t(ol_flags & PKT_TX_TCP_SEG)) {\n+\t\tif (ol_flags & PKT_TX_IPV4) {\n+\t\t\tipv4_hdr =\n+\t\t\trte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,\n+\t\t\t\t\t\tinner_l3_offset);\n+\n+\t\t\tif (ol_flags & PKT_TX_IP_CKSUM)\n+\t\t\t\tipv4_hdr->hdr_checksum = 0;\n+\n+\t\t\t/* non-TSO tcp */\n+\t\t\ttcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +\n+\t\t\t\t\t\t\t\tm->l3_len);\n+\t\t\ttcp_hdr->cksum =\n+\t\t\t\thinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);\n+\t\t} else {\n+\t\t\tipv6_hdr =\n+\t\t\trte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,\n+\t\t\t\t\t\tinner_l3_offset);\n+\t\t\t/* non-TSO tcp */\n+\t\t\ttcp_hdr =\n+\t\t\trte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *,\n+\t\t\t\t\t\t(inner_l3_offset + m->l3_len));\n+\t\t\ttcp_hdr->cksum =\n+\t\t\t\thinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);\n+\t\t}\n+\n+\t\toff_info->inner_l4_type = TCP_OFFLOAD_ENABLE;\n+\t\toff_info->inner_l4_tcp_udp = 1;\n+\t} else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM) {\n+\t\toff_info->inner_l4_type = SCTP_OFFLOAD_ENABLE;\n+\t\toff_info->inner_l4_tcp_udp = 0;\n+\t\toff_info->inner_l4_len = sizeof(struct rte_sctp_hdr);\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt,\n+\t\t\t\t\t    struct hinic_wqe_info *sqe_info,\n+\t\t\t\t\t    struct hinic_tx_offload_info\n+\t\t\t\t\t    *off_info)\n+{\n+\tu16  i, total_len, sge_cnt = mbuf_pkt->nb_segs;\n+\tstruct rte_mbuf *mbuf;\n+\tint ret;\n+\n+\tmemset(off_info, 0, sizeof(*off_info));\n+\n+\tret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info);\n+\tif (unlikely(ret))\n+\t\treturn false;\n+\n+\tsqe_info->cpy_mbuf_cnt = 0;\n+\n+\t/* non tso mbuf */\n+\tif (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) {\n+\t\tif (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {\n+\t\t\t/* non tso packet len must less than 64KB */\n+\t\t\treturn false;\n+\t\t} else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {\n+\t\t\t/* non tso packet buffer number must less than 17\n+\t\t\t * the mbuf segs more than 17 must copy to one buffer\n+\t\t\t */\n+\t\t\ttotal_len = 0;\n+\t\t\tmbuf = mbuf_pkt;\n+\t\t\tfor (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) {\n+\t\t\t\ttotal_len += mbuf->data_len;\n+\t\t\t\tmbuf = mbuf->next;\n+\t\t\t}\n+\n+\t\t\t/* default support copy total 4k mbuf segs */\n+\t\t\tif ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) <\n+\t\t\t\t  mbuf_pkt->pkt_len)\n+\t\t\t\treturn false;\n+\n+\t\t\tsqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE;\n+\t\t\tsqe_info->cpy_mbuf_cnt = 1;\n+\t\t\treturn true;\n+\t\t}\n+\n+\t\t/* valid non tso mbuf */\n+\t\tsqe_info->sge_cnt = sge_cnt;\n+\t} else {\n+\t\t/* tso mbuf */\n+\t\tif (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt)))\n+\t\t\t/* too many mbuf segs */\n+\t\t\treturn false;\n+\n+\t\t/* check tso mbuf segs are valid or not */\n+\t\tif (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt,\n+\t\t\t     off_info, sqe_info)))\n+\t\t\treturn false;\n+\t}\n+\n+\treturn true;\n+}\n+\n+static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos)\n+{\n+\tu16 prod_idx;\n+\tu32 hi_prod_idx;\n+\tstruct hinic_sq_db sq_db;\n+\n+\tprod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx);\n+\thi_prod_idx = SQ_DB_PI_HIGH(prod_idx);\n+\n+\tsq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) |\n+\t\t\tSQ_DB_INFO_SET(SQ_DB, TYPE) |\n+\t\t\tSQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) |\n+\t\t\tSQ_DB_INFO_SET(cos, COS) |\n+\t\t\tSQ_DB_INFO_SET(sq->q_id, QID);\n+\n+\t/* Data should be written to HW in Big Endian Format */\n+\tsq_db.db_info = cpu_to_be32(sq_db.db_info);\n+\n+\t/* Write all before the doorbell */\n+\trte_wmb();\n+\twritel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx));\n+}\n+\n+u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts)\n+{\n+\tint free_wqebb_cnt, wqe_wqebb_cnt;\n+\tu32 queue_info, tx_bytes = 0;\n+\tu16 nb_tx;\n+\tstruct hinic_wqe_info sqe_info;\n+\tstruct hinic_tx_offload_info off_info;\n+\tstruct rte_mbuf *mbuf_pkt;\n+\tstruct hinic_txq *txq = tx_queue;\n+\tstruct hinic_tx_info *tx_info;\n+\tstruct hinic_sq_wqe *sq_wqe;\n+\tstruct hinic_sq_task *task;\n+\n+\t/* reclaim tx mbuf before xmit new packet */\n+\tif (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)\n+\t\thinic_xmit_mbuf_cleanup(txq);\n+\n+\t/* tx loop routine */\n+\tfor (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {\n+\t\tmbuf_pkt = *tx_pkts++;\n+\t\tqueue_info = 0;\n+\n+\t\t/* 1. parse sge and tx offlod info from mbuf */\n+\t\tif (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,\n+\t\t\t\t\t\t       &sqe_info, &off_info))) {\n+\t\t\ttxq->txq_stats.off_errs++;\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\t/* 2. try to get enough wqebb */\n+\t\twqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);\n+\t\tfree_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);\n+\t\tif (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {\n+\t\t\t/* reclaim again */\n+\t\t\thinic_xmit_mbuf_cleanup(txq);\n+\t\t\tfree_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);\n+\t\t\tif (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {\n+\t\t\t\ttxq->txq_stats.tx_busy += (nb_pkts - nb_tx);\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\n+\t\t/* 3. get sq tail wqe address from wqe_page,\n+\t\t * sq have enough wqebb for this packet\n+\t\t */\n+\t\tsq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);\n+\n+\t\t/* 4. fill sq wqe sge section */\n+\t\tif (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,\n+\t\t\t\t\t\t     sq_wqe->buf_descs,\n+\t\t\t\t\t\t     &sqe_info))) {\n+\t\t\thinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id,\n+\t\t\t\t\t    wqe_wqebb_cnt, sqe_info.owner);\n+\t\t\ttxq->txq_stats.off_errs++;\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\t/* 5. fill sq wqe task section and queue info */\n+\t\ttask = &sq_wqe->task;\n+\n+\t\t/* tx packet offload configure */\n+\t\thinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,\n+\t\t\t\t\t   &off_info);\n+\n+\t\t/* 6. record tx info */\n+\t\ttx_info = &txq->tx_info[sqe_info.pi];\n+\t\ttx_info->mbuf = mbuf_pkt;\n+\t\ttx_info->wqebb_cnt = wqe_wqebb_cnt;\n+\n+\t\t/* 7. fill sq wqe header section */\n+\t\thinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,\n+\t\t\t\t\t sqe_info.sge_cnt, sqe_info.owner);\n+\n+\t\t/* 8.convert continue or bottom wqe byteorder to big endian */\n+\t\thinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);\n+\n+\t\ttx_bytes += mbuf_pkt->pkt_len;\n+\t}\n+\n+\t/* 9. write sq doorbell in burst mode */\n+\tif (nb_tx) {\n+\t\thinic_sq_write_db(txq->sq, txq->cos);\n+\n+\t\ttxq->txq_stats.packets += nb_tx;\n+\t\ttxq->txq_stats.bytes += tx_bytes;\n+\t}\n+\ttxq->txq_stats.burst_pkts = nb_tx;\n+\n+\treturn nb_tx;\n+}\n \n void hinic_free_all_tx_skbs(struct hinic_txq *txq)\n {\n",
    "prefixes": [
        "v5",
        "14/15"
    ]
}