get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/94726/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 94726,
    "url": "https://patches.dpdk.org/api/patches/94726/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20210623044702.4240-31-ndabilpuram@marvell.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20210623044702.4240-31-ndabilpuram@marvell.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20210623044702.4240-31-ndabilpuram@marvell.com",
    "date": "2021-06-23T04:46:30",
    "name": "[v4,30/62] net/cnxk: add Tx vector version for cn10k",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "ad5e8aec95af48e1919d8918462655153ad38e57",
    "submitter": {
        "id": 1202,
        "url": "https://patches.dpdk.org/api/people/1202/?format=api",
        "name": "Nithin Dabilpuram",
        "email": "ndabilpuram@marvell.com"
    },
    "delegate": {
        "id": 310,
        "url": "https://patches.dpdk.org/api/users/310/?format=api",
        "username": "jerin",
        "first_name": "Jerin",
        "last_name": "Jacob",
        "email": "jerinj@marvell.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20210623044702.4240-31-ndabilpuram@marvell.com/mbox/",
    "series": [
        {
            "id": 17449,
            "url": "https://patches.dpdk.org/api/series/17449/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=17449",
            "date": "2021-06-23T04:46:00",
            "name": "Marvell CNXK Ethdev Driver",
            "version": 4,
            "mbox": "https://patches.dpdk.org/series/17449/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/94726/comments/",
    "check": "warning",
    "checks": "https://patches.dpdk.org/api/patches/94726/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id EE2A9A0C41;\n\tWed, 23 Jun 2021 06:50:51 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id CA09341145;\n\tWed, 23 Jun 2021 06:49:01 +0200 (CEST)",
            "from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com\n [67.231.156.173])\n by mails.dpdk.org (Postfix) with ESMTP id 313CE406A3\n for <dev@dpdk.org>; Wed, 23 Jun 2021 06:49:00 +0200 (CEST)",
            "from pps.filterd (m0045851.ppops.net [127.0.0.1])\n by mx0b-0016f401.pphosted.com (8.16.0.43/8.16.0.43) with SMTP id\n 15N4k6Yr025521 for <dev@dpdk.org>; Tue, 22 Jun 2021 21:48:59 -0700",
            "from dc5-exch02.marvell.com ([199.233.59.182])\n by mx0b-0016f401.pphosted.com with ESMTP id 39bptj1gq2-1\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)\n for <dev@dpdk.org>; Tue, 22 Jun 2021 21:48:59 -0700",
            "from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH02.marvell.com\n (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.18;\n Tue, 22 Jun 2021 21:48:57 -0700",
            "from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.18 via Frontend\n Transport; Tue, 22 Jun 2021 21:48:57 -0700",
            "from hyd1588t430.marvell.com (unknown [10.29.52.204])\n by maili.marvell.com (Postfix) with ESMTP id E03235B6937;\n Tue, 22 Jun 2021 21:48:53 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-type; s=pfpt0220; bh=M+p6xdBvr6HWYgHSjPg2NHuPOvHEKHV6qhK7cK62Lew=;\n b=VHeTVIgPPgQ7XPvrlnKLzAlpWqFHiuXh6rczO88x/EpwdtLJiNSjsNBAzfz4plZN/Jo9\n MD+Kt4grnTFCoG2vpSuo0hWquHKD6tDeCDcoq6gjlTrlf7MoH8IGWScVrFVamhRc34wl\n QZPnmtoy+jDAtqeHgsN7uKZucAHJTCgXydK+HJrxmn3AxqHoTVzNF/LyMbyvX3sRa4P9\n DCpRcS5Jo5SVs5mJLJjOniKcYDfj3jUYiI9ovbL5urCT/Ay96JQ7ZATe7OwPHRA5HFuL\n 8E+y2VzOW21um/T6hq/fyQX07xPsGmSw/3cMLhtCGp4QKanOn6BQTng4bSLvbXnX7m1w gg==",
        "From": "Nithin Dabilpuram <ndabilpuram@marvell.com>",
        "To": "<dev@dpdk.org>",
        "CC": "<jerinj@marvell.com>, <skori@marvell.com>, <skoteshwar@marvell.com>,\n <pbhagavatula@marvell.com>, <kirankumark@marvell.com>,\n <psatheesh@marvell.com>, <asekhar@marvell.com>, <hkalra@marvell.com>,\n \"Nithin Dabilpuram\" <ndabilpuram@marvell.com>",
        "Date": "Wed, 23 Jun 2021 10:16:30 +0530",
        "Message-ID": "<20210623044702.4240-31-ndabilpuram@marvell.com>",
        "X-Mailer": "git-send-email 2.8.4",
        "In-Reply-To": "<20210623044702.4240-1-ndabilpuram@marvell.com>",
        "References": "<20210306153404.10781-1-ndabilpuram@marvell.com>\n <20210623044702.4240-1-ndabilpuram@marvell.com>",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain",
        "X-Proofpoint-ORIG-GUID": "c8CiYm3BB-mi2f5op6KBOMZs4VhT7JrF",
        "X-Proofpoint-GUID": "c8CiYm3BB-mi2f5op6KBOMZs4VhT7JrF",
        "X-Proofpoint-Virus-Version": "vendor=fsecure engine=2.50.10434:6.0.391, 18.0.790\n definitions=2021-06-23_01:2021-06-22,\n 2021-06-23 signatures=0",
        "Subject": "[dpdk-dev] [PATCH v4 30/62] net/cnxk: add Tx vector version for\n cn10k",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Add Tx burst vector version for CN10K.\n\nSigned-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>\nSigned-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>\n---\n drivers/net/cnxk/cn10k_tx.c     |  10 +\n drivers/net/cnxk/cn10k_tx.h     | 815 ++++++++++++++++++++++++++++++++++++++++\n drivers/net/cnxk/cn10k_tx_vec.c |  25 ++\n drivers/net/cnxk/meson.build    |   3 +-\n 4 files changed, 852 insertions(+), 1 deletion(-)\n create mode 100644 drivers/net/cnxk/cn10k_tx_vec.c",
    "diff": "diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c\nindex 9803002..e6eb101 100644\n--- a/drivers/net/cnxk/cn10k_tx.c\n+++ b/drivers/net/cnxk/cn10k_tx.c\n@@ -58,10 +58,20 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)\n #undef T\n \t};\n \n+\tconst eth_tx_burst_t nix_eth_tx_vec_burst[2][2][2][2][2] = {\n+#define T(name, f4, f3, f2, f1, f0, sz, flags)                         \\\n+\t[f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_##name,\n+\n+\t\tNIX_TX_FASTPATH_MODES\n+#undef T\n+\t};\n+\n \tif (dev->scalar_ena ||\n \t    (dev->tx_offload_flags &\n \t     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)))\n \t\tpick_tx_func(eth_dev, nix_eth_tx_burst);\n+\telse\n+\t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst);\n \n \tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n \t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\ndiff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h\nindex 63e9848..b74df10 100644\n--- a/drivers/net/cnxk/cn10k_tx.h\n+++ b/drivers/net/cnxk/cn10k_tx.h\n@@ -4,6 +4,8 @@\n #ifndef __CN10K_TX_H__\n #define __CN10K_TX_H__\n \n+#include <rte_vect.h>\n+\n #define NIX_TX_OFFLOAD_NONE\t      (0)\n #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)\n #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)\n@@ -38,6 +40,9 @@\n \t\t}                                                              \\\n \t} while (0)\n \n+#define LMT_OFF(lmt_addr, lmt_num, offset)                                     \\\n+\t(void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))\n+\n /* Function to determine no of tx subdesc required in case ext\n  * sub desc is enabled.\n  */\n@@ -48,6 +53,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags)\n \t\t(NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ? 1 : 0;\n }\n \n+static __rte_always_inline uint8_t\n+cn10k_nix_pkts_per_vec_brst(const uint16_t flags)\n+{\n+\tRTE_SET_USED(flags);\n+\t/* We can pack up to 4 packets per LMTLINE if there are no offloads. */\n+\treturn 4 << ROC_LMT_LINES_PER_CORE_LOG2;\n+}\n+\n static __rte_always_inline uint64_t\n cn10k_nix_tx_steor_data(const uint16_t flags)\n {\n@@ -76,6 +89,35 @@ cn10k_nix_tx_steor_data(const uint16_t flags)\n \treturn data;\n }\n \n+static __rte_always_inline uint64_t\n+cn10k_nix_tx_steor_vec_data(const uint16_t flags)\n+{\n+\tconst uint64_t dw_m1 = 0x7;\n+\tuint64_t data;\n+\n+\tRTE_SET_USED(flags);\n+\t/* This will be moved to addr area */\n+\tdata = dw_m1;\n+\t/* 15 vector sizes for single seg */\n+\tdata |= dw_m1 << 19;\n+\tdata |= dw_m1 << 22;\n+\tdata |= dw_m1 << 25;\n+\tdata |= dw_m1 << 28;\n+\tdata |= dw_m1 << 31;\n+\tdata |= dw_m1 << 34;\n+\tdata |= dw_m1 << 37;\n+\tdata |= dw_m1 << 40;\n+\tdata |= dw_m1 << 43;\n+\tdata |= dw_m1 << 46;\n+\tdata |= dw_m1 << 49;\n+\tdata |= dw_m1 << 52;\n+\tdata |= dw_m1 << 55;\n+\tdata |= dw_m1 << 58;\n+\tdata |= dw_m1 << 61;\n+\n+\treturn data;\n+}\n+\n static __rte_always_inline void\n cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,\n \t\t      const uint16_t flags)\n@@ -589,6 +631,776 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,\n \treturn pkts;\n }\n \n+#if defined(RTE_ARCH_ARM64)\n+\n+#define NIX_DESCS_PER_LOOP 4\n+static __rte_always_inline uint16_t\n+cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\t   uint16_t pkts, uint64_t *cmd, const uint16_t flags)\n+{\n+\tuint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;\n+\tuint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;\n+\tuint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];\n+\tuint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;\n+\tuint64x2_t senddesc01_w0, senddesc23_w0;\n+\tuint64x2_t senddesc01_w1, senddesc23_w1;\n+\tuint16_t left, scalar, burst, i, lmt_id;\n+\tuint64x2_t sgdesc01_w0, sgdesc23_w0;\n+\tuint64x2_t sgdesc01_w1, sgdesc23_w1;\n+\tstruct cn10k_eth_txq *txq = tx_queue;\n+\tuintptr_t laddr = txq->lmt_base;\n+\trte_iova_t io_addr = txq->io_addr;\n+\tuint64x2_t ltypes01, ltypes23;\n+\tuint64x2_t xtmp128, ytmp128;\n+\tuint64x2_t xmask01, xmask23;\n+\tuint8_t lnum;\n+\n+\tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n+\n+\tscalar = pkts & (NIX_DESCS_PER_LOOP - 1);\n+\tpkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);\n+\n+\t/* Reduce the cached count */\n+\ttxq->fc_cache_pkts -= pkts;\n+\n+\tsenddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);\n+\tsenddesc23_w0 = senddesc01_w0;\n+\tsenddesc01_w1 = vdupq_n_u64(0);\n+\tsenddesc23_w1 = senddesc01_w1;\n+\tsgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);\n+\tsgdesc23_w0 = sgdesc01_w0;\n+\n+\t/* Get LMT base address and LMT ID as lcore id */\n+\tROC_LMT_BASE_ID_GET(laddr, lmt_id);\n+\tleft = pkts;\n+again:\n+\t/* Number of packets to prepare depends on offloads enabled. */\n+\tburst = left > cn10k_nix_pkts_per_vec_brst(flags) ?\n+\t\t\t      cn10k_nix_pkts_per_vec_brst(flags) :\n+\t\t\t      left;\n+\tlnum = 0;\n+\tfor (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {\n+\t\t/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */\n+\t\tsenddesc01_w0 =\n+\t\t\tvbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));\n+\t\tsgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));\n+\n+\t\tsenddesc23_w0 = senddesc01_w0;\n+\t\tsgdesc23_w0 = sgdesc01_w0;\n+\n+\t\t/* Move mbufs to iova */\n+\t\tmbuf0 = (uint64_t *)tx_pkts[0];\n+\t\tmbuf1 = (uint64_t *)tx_pkts[1];\n+\t\tmbuf2 = (uint64_t *)tx_pkts[2];\n+\t\tmbuf3 = (uint64_t *)tx_pkts[3];\n+\n+\t\tmbuf0 = (uint64_t *)((uintptr_t)mbuf0 +\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf1 = (uint64_t *)((uintptr_t)mbuf1 +\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf2 = (uint64_t *)((uintptr_t)mbuf2 +\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf3 = (uint64_t *)((uintptr_t)mbuf3 +\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\t/*\n+\t\t * Get mbuf's, olflags, iova, pktlen, dataoff\n+\t\t * dataoff_iovaX.D[0] = iova,\n+\t\t * dataoff_iovaX.D[1](15:0) = mbuf->dataoff\n+\t\t * len_olflagsX.D[0] = ol_flags,\n+\t\t * len_olflagsX.D[1](63:32) = mbuf->pkt_len\n+\t\t */\n+\t\tdataoff_iova0 = vld1q_u64(mbuf0);\n+\t\tlen_olflags0 = vld1q_u64(mbuf0 + 2);\n+\t\tdataoff_iova1 = vld1q_u64(mbuf1);\n+\t\tlen_olflags1 = vld1q_u64(mbuf1 + 2);\n+\t\tdataoff_iova2 = vld1q_u64(mbuf2);\n+\t\tlen_olflags2 = vld1q_u64(mbuf2 + 2);\n+\t\tdataoff_iova3 = vld1q_u64(mbuf3);\n+\t\tlen_olflags3 = vld1q_u64(mbuf3 + 2);\n+\n+\t\t/* Move mbufs to point pool */\n+\t\tmbuf0 = (uint64_t *)((uintptr_t)mbuf0 +\n+\t\t\t\t     offsetof(struct rte_mbuf, pool) -\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf1 = (uint64_t *)((uintptr_t)mbuf1 +\n+\t\t\t\t     offsetof(struct rte_mbuf, pool) -\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf2 = (uint64_t *)((uintptr_t)mbuf2 +\n+\t\t\t\t     offsetof(struct rte_mbuf, pool) -\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf3 = (uint64_t *)((uintptr_t)mbuf3 +\n+\t\t\t\t     offsetof(struct rte_mbuf, pool) -\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\n+\t\tif (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |\n+\t\t\t     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {\n+\t\t\t/* Get tx_offload for ol2, ol3, l2, l3 lengths */\n+\t\t\t/*\n+\t\t\t * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)\n+\t\t\t * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)\n+\t\t\t */\n+\n+\t\t\tasm volatile(\"LD1 {%[a].D}[0],[%[in]]\\n\\t\"\n+\t\t\t\t     : [a] \"+w\"(senddesc01_w1)\n+\t\t\t\t     : [in] \"r\"(mbuf0 + 2)\n+\t\t\t\t     : \"memory\");\n+\n+\t\t\tasm volatile(\"LD1 {%[a].D}[1],[%[in]]\\n\\t\"\n+\t\t\t\t     : [a] \"+w\"(senddesc01_w1)\n+\t\t\t\t     : [in] \"r\"(mbuf1 + 2)\n+\t\t\t\t     : \"memory\");\n+\n+\t\t\tasm volatile(\"LD1 {%[b].D}[0],[%[in]]\\n\\t\"\n+\t\t\t\t     : [b] \"+w\"(senddesc23_w1)\n+\t\t\t\t     : [in] \"r\"(mbuf2 + 2)\n+\t\t\t\t     : \"memory\");\n+\n+\t\t\tasm volatile(\"LD1 {%[b].D}[1],[%[in]]\\n\\t\"\n+\t\t\t\t     : [b] \"+w\"(senddesc23_w1)\n+\t\t\t\t     : [in] \"r\"(mbuf3 + 2)\n+\t\t\t\t     : \"memory\");\n+\n+\t\t\t/* Get pool pointer alone */\n+\t\t\tmbuf0 = (uint64_t *)*mbuf0;\n+\t\t\tmbuf1 = (uint64_t *)*mbuf1;\n+\t\t\tmbuf2 = (uint64_t *)*mbuf2;\n+\t\t\tmbuf3 = (uint64_t *)*mbuf3;\n+\t\t} else {\n+\t\t\t/* Get pool pointer alone */\n+\t\t\tmbuf0 = (uint64_t *)*mbuf0;\n+\t\t\tmbuf1 = (uint64_t *)*mbuf1;\n+\t\t\tmbuf2 = (uint64_t *)*mbuf2;\n+\t\t\tmbuf3 = (uint64_t *)*mbuf3;\n+\t\t}\n+\n+\t\tconst uint8x16_t shuf_mask2 = {\n+\t\t\t0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t};\n+\t\txtmp128 = vzip2q_u64(len_olflags0, len_olflags1);\n+\t\tytmp128 = vzip2q_u64(len_olflags2, len_olflags3);\n+\n+\t\t/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */\n+\t\tconst uint64x2_t and_mask0 = {\n+\t\t\t0xFFFFFFFFFFFFFFFF,\n+\t\t\t0x000000000000FFFF,\n+\t\t};\n+\n+\t\tdataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);\n+\t\tdataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);\n+\t\tdataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);\n+\t\tdataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);\n+\n+\t\t/*\n+\t\t * Pick only 16 bits of pktlen preset at bits 63:32\n+\t\t * and place them at bits 15:0.\n+\t\t */\n+\t\txtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);\n+\t\tytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);\n+\n+\t\t/* Add pairwise to get dataoff + iova in sgdesc_w1 */\n+\t\tsgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);\n+\t\tsgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);\n+\n+\t\t/* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of\n+\t\t * pktlen at 15:0 position.\n+\t\t */\n+\t\tsgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);\n+\t\tsgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);\n+\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);\n+\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);\n+\n+\t\t/* Move mbuf to point to pool_id. */\n+\t\tmbuf0 = (uint64_t *)((uintptr_t)mbuf0 +\n+\t\t\t\t     offsetof(struct rte_mempool, pool_id));\n+\t\tmbuf1 = (uint64_t *)((uintptr_t)mbuf1 +\n+\t\t\t\t     offsetof(struct rte_mempool, pool_id));\n+\t\tmbuf2 = (uint64_t *)((uintptr_t)mbuf2 +\n+\t\t\t\t     offsetof(struct rte_mempool, pool_id));\n+\t\tmbuf3 = (uint64_t *)((uintptr_t)mbuf3 +\n+\t\t\t\t     offsetof(struct rte_mempool, pool_id));\n+\n+\t\tif ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&\n+\t\t    !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {\n+\t\t\t/*\n+\t\t\t * Lookup table to translate ol_flags to\n+\t\t\t * il3/il4 types. But we still use ol3/ol4 types in\n+\t\t\t * senddesc_w1 as only one header processing is enabled.\n+\t\t\t */\n+\t\t\tconst uint8x16_t tbl = {\n+\t\t\t\t/* [0-15] = il4type:il3type */\n+\t\t\t\t0x04, /* none (IPv6 assumed) */\n+\t\t\t\t0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */\n+\t\t\t\t0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */\n+\t\t\t\t0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */\n+\t\t\t\t0x03, /* PKT_TX_IP_CKSUM */\n+\t\t\t\t0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */\n+\t\t\t\t0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */\n+\t\t\t\t0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */\n+\t\t\t\t0x02, /* PKT_TX_IPV4  */\n+\t\t\t\t0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */\n+\t\t\t\t0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */\n+\t\t\t\t0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */\n+\t\t\t\t0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */\n+\t\t\t\t0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t       * PKT_TX_TCP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t       * PKT_TX_SCTP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t       * PKT_TX_UDP_CKSUM\n+\t\t\t\t       */\n+\t\t\t};\n+\n+\t\t\t/* Extract olflags to translate to iltypes */\n+\t\t\txtmp128 = vzip1q_u64(len_olflags0, len_olflags1);\n+\t\t\tytmp128 = vzip1q_u64(len_olflags2, len_olflags3);\n+\n+\t\t\t/*\n+\t\t\t * E(47):L3_LEN(9):L2_LEN(7+z)\n+\t\t\t * E(47):L3_LEN(9):L2_LEN(7+z)\n+\t\t\t */\n+\t\t\tsenddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);\n+\t\t\tsenddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);\n+\n+\t\t\t/* Move OLFLAGS bits 55:52 to 51:48\n+\t\t\t * with zeros preprended on the byte and rest\n+\t\t\t * don't care\n+\t\t\t */\n+\t\t\txtmp128 = vshrq_n_u8(xtmp128, 4);\n+\t\t\tytmp128 = vshrq_n_u8(ytmp128, 4);\n+\t\t\t/*\n+\t\t\t * E(48):L3_LEN(8):L2_LEN(z+7)\n+\t\t\t * E(48):L3_LEN(8):L2_LEN(z+7)\n+\t\t\t */\n+\t\t\tconst int8x16_t tshft3 = {\n+\t\t\t\t-1, 0, 8, 8, 8, 8, 8, 8,\n+\t\t\t\t-1, 0, 8, 8, 8, 8, 8, 8,\n+\t\t\t};\n+\n+\t\t\tsenddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);\n+\t\t\tsenddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);\n+\n+\t\t\t/* Do the lookup */\n+\t\t\tltypes01 = vqtbl1q_u8(tbl, xtmp128);\n+\t\t\tltypes23 = vqtbl1q_u8(tbl, ytmp128);\n+\n+\t\t\t/* Pick only relevant fields i.e Bit 48:55 of iltype\n+\t\t\t * and place it in ol3/ol4type of senddesc_w1\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask0 = {\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,\n+\t\t\t};\n+\n+\t\t\tltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);\n+\t\t\tltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);\n+\n+\t\t\t/* Prepare ol4ptr, ol3ptr from ol3len, ol2len.\n+\t\t\t * a [E(32):E(16):OL3(8):OL2(8)]\n+\t\t\t * a = a + (a << 8)\n+\t\t\t * a [E(32):E(16):(OL3+OL2):OL2]\n+\t\t\t * => E(32):E(16)::OL4PTR(8):OL3PTR(8)\n+\t\t\t */\n+\t\t\tsenddesc01_w1 = vaddq_u8(senddesc01_w1,\n+\t\t\t\t\t\t vshlq_n_u16(senddesc01_w1, 8));\n+\t\t\tsenddesc23_w1 = vaddq_u8(senddesc23_w1,\n+\t\t\t\t\t\t vshlq_n_u16(senddesc23_w1, 8));\n+\n+\t\t\t/* Move ltypes to senddesc*_w1 */\n+\t\t\tsenddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);\n+\t\t\tsenddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);\n+\t\t} else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&\n+\t\t\t   (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {\n+\t\t\t/*\n+\t\t\t * Lookup table to translate ol_flags to\n+\t\t\t * ol3/ol4 types.\n+\t\t\t */\n+\n+\t\t\tconst uint8x16_t tbl = {\n+\t\t\t\t/* [0-15] = ol4type:ol3type */\n+\t\t\t\t0x00, /* none */\n+\t\t\t\t0x03, /* OUTER_IP_CKSUM */\n+\t\t\t\t0x02, /* OUTER_IPV4 */\n+\t\t\t\t0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */\n+\t\t\t\t0x04, /* OUTER_IPV6 */\n+\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */\n+\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IPV4 */\n+\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IPV4 |\n+\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x00, /* OUTER_UDP_CKSUM */\n+\t\t\t\t0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */\n+\t\t\t\t0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */\n+\t\t\t\t0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |\n+\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */\n+\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t       * OUTER_IPV4\n+\t\t\t\t       */\n+\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t       * OUTER_IPV4 | OUTER_IP_CKSUM\n+\t\t\t\t       */\n+\t\t\t};\n+\n+\t\t\t/* Extract olflags to translate to iltypes */\n+\t\t\txtmp128 = vzip1q_u64(len_olflags0, len_olflags1);\n+\t\t\tytmp128 = vzip1q_u64(len_olflags2, len_olflags3);\n+\n+\t\t\t/*\n+\t\t\t * E(47):OL3_LEN(9):OL2_LEN(7+z)\n+\t\t\t * E(47):OL3_LEN(9):OL2_LEN(7+z)\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask5 = {\n+\t\t\t\t0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t};\n+\t\t\tsenddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);\n+\t\t\tsenddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);\n+\n+\t\t\t/* Extract outer ol flags only */\n+\t\t\tconst uint64x2_t o_cksum_mask = {\n+\t\t\t\t0x1C00020000000000,\n+\t\t\t\t0x1C00020000000000,\n+\t\t\t};\n+\n+\t\t\txtmp128 = vandq_u64(xtmp128, o_cksum_mask);\n+\t\t\tytmp128 = vandq_u64(ytmp128, o_cksum_mask);\n+\n+\t\t\t/* Extract OUTER_UDP_CKSUM bit 41 and\n+\t\t\t * move it to bit 61\n+\t\t\t */\n+\n+\t\t\txtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);\n+\t\t\tytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);\n+\n+\t\t\t/* Shift oltype by 2 to start nibble from BIT(56)\n+\t\t\t * instead of BIT(58)\n+\t\t\t */\n+\t\t\txtmp128 = vshrq_n_u8(xtmp128, 2);\n+\t\t\tytmp128 = vshrq_n_u8(ytmp128, 2);\n+\t\t\t/*\n+\t\t\t * E(48):L3_LEN(8):L2_LEN(z+7)\n+\t\t\t * E(48):L3_LEN(8):L2_LEN(z+7)\n+\t\t\t */\n+\t\t\tconst int8x16_t tshft3 = {\n+\t\t\t\t-1, 0, 8, 8, 8, 8, 8, 8,\n+\t\t\t\t-1, 0, 8, 8, 8, 8, 8, 8,\n+\t\t\t};\n+\n+\t\t\tsenddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);\n+\t\t\tsenddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);\n+\n+\t\t\t/* Do the lookup */\n+\t\t\tltypes01 = vqtbl1q_u8(tbl, xtmp128);\n+\t\t\tltypes23 = vqtbl1q_u8(tbl, ytmp128);\n+\n+\t\t\t/* Pick only relevant fields i.e Bit 56:63 of oltype\n+\t\t\t * and place it in ol3/ol4type of senddesc_w1\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask0 = {\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,\n+\t\t\t};\n+\n+\t\t\tltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);\n+\t\t\tltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);\n+\n+\t\t\t/* Prepare ol4ptr, ol3ptr from ol3len, ol2len.\n+\t\t\t * a [E(32):E(16):OL3(8):OL2(8)]\n+\t\t\t * a = a + (a << 8)\n+\t\t\t * a [E(32):E(16):(OL3+OL2):OL2]\n+\t\t\t * => E(32):E(16)::OL4PTR(8):OL3PTR(8)\n+\t\t\t */\n+\t\t\tsenddesc01_w1 = vaddq_u8(senddesc01_w1,\n+\t\t\t\t\t\t vshlq_n_u16(senddesc01_w1, 8));\n+\t\t\tsenddesc23_w1 = vaddq_u8(senddesc23_w1,\n+\t\t\t\t\t\t vshlq_n_u16(senddesc23_w1, 8));\n+\n+\t\t\t/* Move ltypes to senddesc*_w1 */\n+\t\t\tsenddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);\n+\t\t\tsenddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);\n+\t\t} else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&\n+\t\t\t   (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {\n+\t\t\t/* Lookup table to translate ol_flags to\n+\t\t\t * ol4type, ol3type, il4type, il3type of senddesc_w1\n+\t\t\t */\n+\t\t\tconst uint8x16x2_t tbl = {{\n+\t\t\t\t{\n+\t\t\t\t\t/* [0-15] = il4type:il3type */\n+\t\t\t\t\t0x04, /* none (IPv6) */\n+\t\t\t\t\t0x14, /* PKT_TX_TCP_CKSUM (IPv6) */\n+\t\t\t\t\t0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */\n+\t\t\t\t\t0x34, /* PKT_TX_UDP_CKSUM (IPv6) */\n+\t\t\t\t\t0x03, /* PKT_TX_IP_CKSUM */\n+\t\t\t\t\t0x13, /* PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_TCP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x23, /* PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_SCTP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x33, /* PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_UDP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x02, /* PKT_TX_IPV4 */\n+\t\t\t\t\t0x12, /* PKT_TX_IPV4 |\n+\t\t\t\t\t       * PKT_TX_TCP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x22, /* PKT_TX_IPV4 |\n+\t\t\t\t\t       * PKT_TX_SCTP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x32, /* PKT_TX_IPV4 |\n+\t\t\t\t\t       * PKT_TX_UDP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x03, /* PKT_TX_IPV4 |\n+\t\t\t\t\t       * PKT_TX_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_TCP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_SCTP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_UDP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t},\n+\n+\t\t\t\t{\n+\t\t\t\t\t/* [16-31] = ol4type:ol3type */\n+\t\t\t\t\t0x00, /* none */\n+\t\t\t\t\t0x03, /* OUTER_IP_CKSUM */\n+\t\t\t\t\t0x02, /* OUTER_IPV4 */\n+\t\t\t\t\t0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */\n+\t\t\t\t\t0x04, /* OUTER_IPV6 */\n+\t\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */\n+\t\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IPV4 */\n+\t\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IPV4 |\n+\t\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x00, /* OUTER_UDP_CKSUM */\n+\t\t\t\t\t0x33, /* OUTER_UDP_CKSUM |\n+\t\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x32, /* OUTER_UDP_CKSUM |\n+\t\t\t\t\t       * OUTER_IPV4\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x33, /* OUTER_UDP_CKSUM |\n+\t\t\t\t\t       * OUTER_IPV4 | OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x34, /* OUTER_UDP_CKSUM |\n+\t\t\t\t\t       * OUTER_IPV6\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t\t       * OUTER_IPV4\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t\t       * OUTER_IPV4 | OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t},\n+\t\t\t}};\n+\n+\t\t\t/* Extract olflags to translate to oltype & iltype */\n+\t\t\txtmp128 = vzip1q_u64(len_olflags0, len_olflags1);\n+\t\t\tytmp128 = vzip1q_u64(len_olflags2, len_olflags3);\n+\n+\t\t\t/*\n+\t\t\t * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)\n+\t\t\t * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)\n+\t\t\t */\n+\t\t\tconst uint32x4_t tshft_4 = {\n+\t\t\t\t1,\n+\t\t\t\t0,\n+\t\t\t\t1,\n+\t\t\t\t0,\n+\t\t\t};\n+\t\t\tsenddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);\n+\t\t\tsenddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);\n+\n+\t\t\t/*\n+\t\t\t * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)\n+\t\t\t * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask5 = {\n+\t\t\t\t0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t};\n+\t\t\tsenddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);\n+\t\t\tsenddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);\n+\n+\t\t\t/* Extract outer and inner header ol_flags */\n+\t\t\tconst uint64x2_t oi_cksum_mask = {\n+\t\t\t\t0x1CF0020000000000,\n+\t\t\t\t0x1CF0020000000000,\n+\t\t\t};\n+\n+\t\t\txtmp128 = vandq_u64(xtmp128, oi_cksum_mask);\n+\t\t\tytmp128 = vandq_u64(ytmp128, oi_cksum_mask);\n+\n+\t\t\t/* Extract OUTER_UDP_CKSUM bit 41 and\n+\t\t\t * move it to bit 61\n+\t\t\t */\n+\n+\t\t\txtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);\n+\t\t\tytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);\n+\n+\t\t\t/* Shift right oltype by 2 and iltype by 4\n+\t\t\t * to start oltype nibble from BIT(58)\n+\t\t\t * instead of BIT(56) and iltype nibble from BIT(48)\n+\t\t\t * instead of BIT(52).\n+\t\t\t */\n+\t\t\tconst int8x16_t tshft5 = {\n+\t\t\t\t8, 8, 8, 8, 8, 8, -4, -2,\n+\t\t\t\t8, 8, 8, 8, 8, 8, -4, -2,\n+\t\t\t};\n+\n+\t\t\txtmp128 = vshlq_u8(xtmp128, tshft5);\n+\t\t\tytmp128 = vshlq_u8(ytmp128, tshft5);\n+\t\t\t/*\n+\t\t\t * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)\n+\t\t\t * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)\n+\t\t\t */\n+\t\t\tconst int8x16_t tshft3 = {\n+\t\t\t\t-1, 0, -1, 0, 0, 0, 0, 0,\n+\t\t\t\t-1, 0, -1, 0, 0, 0, 0, 0,\n+\t\t\t};\n+\n+\t\t\tsenddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);\n+\t\t\tsenddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);\n+\n+\t\t\t/* Mark Bit(4) of oltype */\n+\t\t\tconst uint64x2_t oi_cksum_mask2 = {\n+\t\t\t\t0x1000000000000000,\n+\t\t\t\t0x1000000000000000,\n+\t\t\t};\n+\n+\t\t\txtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);\n+\t\t\tytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);\n+\n+\t\t\t/* Do the lookup */\n+\t\t\tltypes01 = vqtbl2q_u8(tbl, xtmp128);\n+\t\t\tltypes23 = vqtbl2q_u8(tbl, ytmp128);\n+\n+\t\t\t/* Pick only relevant fields i.e Bit 48:55 of iltype and\n+\t\t\t * Bit 56:63 of oltype and place it in corresponding\n+\t\t\t * place in senddesc_w1.\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask0 = {\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,\n+\t\t\t};\n+\n+\t\t\tltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);\n+\t\t\tltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);\n+\n+\t\t\t/* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from\n+\t\t\t * l3len, l2len, ol3len, ol2len.\n+\t\t\t * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]\n+\t\t\t * a = a + (a << 8)\n+\t\t\t * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]\n+\t\t\t * a = a + (a << 16)\n+\t\t\t * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]\n+\t\t\t * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)\n+\t\t\t */\n+\t\t\tsenddesc01_w1 = vaddq_u8(senddesc01_w1,\n+\t\t\t\t\t\t vshlq_n_u32(senddesc01_w1, 8));\n+\t\t\tsenddesc23_w1 = vaddq_u8(senddesc23_w1,\n+\t\t\t\t\t\t vshlq_n_u32(senddesc23_w1, 8));\n+\n+\t\t\t/* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */\n+\t\t\tsenddesc01_w1 = vaddq_u8(\n+\t\t\t\tsenddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));\n+\t\t\tsenddesc23_w1 = vaddq_u8(\n+\t\t\t\tsenddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));\n+\n+\t\t\t/* Move ltypes to senddesc*_w1 */\n+\t\t\tsenddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);\n+\t\t\tsenddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);\n+\t\t}\n+\n+\t\txmask01 = vdupq_n_u64(0);\n+\t\txmask23 = xmask01;\n+\t\tasm volatile(\"LD1 {%[a].H}[0],[%[in]]\\n\\t\"\n+\t\t\t     : [a] \"+w\"(xmask01)\n+\t\t\t     : [in] \"r\"(mbuf0)\n+\t\t\t     : \"memory\");\n+\n+\t\tasm volatile(\"LD1 {%[a].H}[4],[%[in]]\\n\\t\"\n+\t\t\t     : [a] \"+w\"(xmask01)\n+\t\t\t     : [in] \"r\"(mbuf1)\n+\t\t\t     : \"memory\");\n+\n+\t\tasm volatile(\"LD1 {%[b].H}[0],[%[in]]\\n\\t\"\n+\t\t\t     : [b] \"+w\"(xmask23)\n+\t\t\t     : [in] \"r\"(mbuf2)\n+\t\t\t     : \"memory\");\n+\n+\t\tasm volatile(\"LD1 {%[b].H}[4],[%[in]]\\n\\t\"\n+\t\t\t     : [b] \"+w\"(xmask23)\n+\t\t\t     : [in] \"r\"(mbuf3)\n+\t\t\t     : \"memory\");\n+\t\txmask01 = vshlq_n_u64(xmask01, 20);\n+\t\txmask23 = vshlq_n_u64(xmask23, 20);\n+\n+\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n+\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n+\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\t\t/* Set don't free bit if reference count > 1 */\n+\t\t\txmask01 = vdupq_n_u64(0);\n+\t\t\txmask23 = xmask01;\n+\n+\t\t\t/* Move mbufs to iova */\n+\t\t\tmbuf0 = (uint64_t *)tx_pkts[0];\n+\t\t\tmbuf1 = (uint64_t *)tx_pkts[1];\n+\t\t\tmbuf2 = (uint64_t *)tx_pkts[2];\n+\t\t\tmbuf3 = (uint64_t *)tx_pkts[3];\n+\n+\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))\n+\t\t\t\tvsetq_lane_u64(0x80000, xmask01, 0);\n+\t\t\telse\n+\t\t\t\t__mempool_check_cookies(\n+\t\t\t\t\t((struct rte_mbuf *)mbuf0)->pool,\n+\t\t\t\t\t(void **)&mbuf0, 1, 0);\n+\n+\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))\n+\t\t\t\tvsetq_lane_u64(0x80000, xmask01, 1);\n+\t\t\telse\n+\t\t\t\t__mempool_check_cookies(\n+\t\t\t\t\t((struct rte_mbuf *)mbuf1)->pool,\n+\t\t\t\t\t(void **)&mbuf1, 1, 0);\n+\n+\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))\n+\t\t\t\tvsetq_lane_u64(0x80000, xmask23, 0);\n+\t\t\telse\n+\t\t\t\t__mempool_check_cookies(\n+\t\t\t\t\t((struct rte_mbuf *)mbuf2)->pool,\n+\t\t\t\t\t(void **)&mbuf2, 1, 0);\n+\n+\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))\n+\t\t\t\tvsetq_lane_u64(0x80000, xmask23, 1);\n+\t\t\telse\n+\t\t\t\t__mempool_check_cookies(\n+\t\t\t\t\t((struct rte_mbuf *)mbuf3)->pool,\n+\t\t\t\t\t(void **)&mbuf3, 1, 0);\n+\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n+\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n+\t\t} else {\n+\t\t\t/* Move mbufs to iova */\n+\t\t\tmbuf0 = (uint64_t *)tx_pkts[0];\n+\t\t\tmbuf1 = (uint64_t *)tx_pkts[1];\n+\t\t\tmbuf2 = (uint64_t *)tx_pkts[2];\n+\t\t\tmbuf3 = (uint64_t *)tx_pkts[3];\n+\n+\t\t\t/* Mark mempool object as \"put\" since\n+\t\t\t * it is freed by NIX\n+\t\t\t */\n+\t\t\t__mempool_check_cookies(\n+\t\t\t\t((struct rte_mbuf *)mbuf0)->pool,\n+\t\t\t\t(void **)&mbuf0, 1, 0);\n+\n+\t\t\t__mempool_check_cookies(\n+\t\t\t\t((struct rte_mbuf *)mbuf1)->pool,\n+\t\t\t\t(void **)&mbuf1, 1, 0);\n+\n+\t\t\t__mempool_check_cookies(\n+\t\t\t\t((struct rte_mbuf *)mbuf2)->pool,\n+\t\t\t\t(void **)&mbuf2, 1, 0);\n+\n+\t\t\t__mempool_check_cookies(\n+\t\t\t\t((struct rte_mbuf *)mbuf3)->pool,\n+\t\t\t\t(void **)&mbuf3, 1, 0);\n+\t\t}\n+\n+\t\t/* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */\n+\t\tcmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);\n+\t\tcmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);\n+\t\tcmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);\n+\t\tcmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);\n+\n+\t\tcmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\tcmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\tcmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);\n+\t\tcmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);\n+\n+\t\t/* Store the prepared send desc to LMT lines */\n+\t\tvst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);\n+\t\tvst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);\n+\t\tvst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);\n+\t\tvst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);\n+\t\tvst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);\n+\t\tvst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);\n+\t\tvst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);\n+\t\tvst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);\n+\t\tlnum += 1;\n+\n+\t\ttx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;\n+\t}\n+\n+\t/* Trigger LMTST */\n+\tif (lnum > 16) {\n+\t\tdata = cn10k_nix_tx_steor_vec_data(flags);\n+\t\tpa = io_addr | (data & 0x7) << 4;\n+\t\tdata &= ~0x7ULL;\n+\t\tdata |= (15ULL << 12);\n+\t\tdata |= (uint64_t)lmt_id;\n+\n+\t\t/* STEOR0 */\n+\t\troc_lmt_submit_steorl(data, pa);\n+\n+\t\tdata = cn10k_nix_tx_steor_vec_data(flags);\n+\t\tpa = io_addr | (data & 0x7) << 4;\n+\t\tdata &= ~0x7ULL;\n+\t\tdata |= ((uint64_t)(lnum - 17)) << 12;\n+\t\tdata |= (uint64_t)(lmt_id + 16);\n+\n+\t\t/* STEOR1 */\n+\t\troc_lmt_submit_steorl(data, pa);\n+\t} else if (lnum) {\n+\t\tdata = cn10k_nix_tx_steor_vec_data(flags);\n+\t\tpa = io_addr | (data & 0x7) << 4;\n+\t\tdata &= ~0x7ULL;\n+\t\tdata |= ((uint64_t)(lnum - 1)) << 12;\n+\t\tdata |= lmt_id;\n+\n+\t\t/* STEOR0 */\n+\t\troc_lmt_submit_steorl(data, pa);\n+\t}\n+\n+\tleft -= burst;\n+\trte_io_wmb();\n+\tif (left)\n+\t\tgoto again;\n+\n+\tif (unlikely(scalar))\n+\t\tpkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,\n+\t\t\t\t\t    flags);\n+\n+\treturn pkts;\n+}\n+\n+#else\n+static __rte_always_inline uint16_t\n+cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\t   uint16_t pkts, uint64_t *cmd, const uint16_t flags)\n+{\n+\tRTE_SET_USED(tx_queue);\n+\tRTE_SET_USED(tx_pkts);\n+\tRTE_SET_USED(pkts);\n+\tRTE_SET_USED(cmd);\n+\tRTE_SET_USED(flags);\n+\treturn 0;\n+}\n+#endif\n+\n #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F\n #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F\n #define VLAN_F\t     NIX_TX_OFFLOAD_VLAN_QINQ_F\n@@ -667,6 +1479,9 @@ T(tso_noff_vlan_ol3ol4csum_l3l4csum,\t1, 1, 1, 1, 1,\t6,\t\t\\\n \t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \\\n \t\t\t\t\t\t\t\t\t       \\\n \tuint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \\\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \\\n+\t\t\t\t\t\t\t\t\t       \\\n+\tuint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \\\n \t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);\n \n NIX_TX_FASTPATH_MODES\ndiff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c\nnew file mode 100644\nindex 0000000..42baeb5\n--- /dev/null\n+++ b/drivers/net/cnxk/cn10k_tx_vec.c\n@@ -0,0 +1,25 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(C) 2021 Marvell.\n+ */\n+\n+#include \"cn10k_ethdev.h\"\n+#include \"cn10k_tx.h\"\n+\n+#define T(name, f4, f3, f2, f1, f0, sz, flags)\t\t\t\t       \\\n+\tuint16_t __rte_noinline __rte_hot\t\t\t\t       \\\n+\t\tcn10k_nix_xmit_pkts_vec_##name(void *tx_queue,                 \\\n+\t\t\t\t\t       struct rte_mbuf **tx_pkts,      \\\n+\t\t\t\t\t       uint16_t pkts)                  \\\n+\t{                                                                      \\\n+\t\tuint64_t cmd[sz];                                              \\\n+\t\t\t\t\t\t\t\t\t       \\\n+\t\t/* VLAN, TSTMP, TSO is not supported by vec */                 \\\n+\t\tif ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F ||\t\t       \\\n+\t\t    (flags) & NIX_TX_OFFLOAD_TSO_F)\t\t\t       \\\n+\t\t\treturn 0;                                              \\\n+\t\treturn cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\\\n+\t\t\t\t\t\t  (flags));                    \\\n+\t}\n+\n+NIX_TX_FASTPATH_MODES\n+#undef T\ndiff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build\nindex 21e5676..8f32dc7 100644\n--- a/drivers/net/cnxk/meson.build\n+++ b/drivers/net/cnxk/meson.build\n@@ -28,7 +28,8 @@ sources += files('cn10k_ethdev.c',\n \t\t 'cn10k_rx_mseg.c',\n \t\t 'cn10k_rx_vec.c',\n \t\t 'cn10k_tx.c',\n-\t\t 'cn10k_tx_mseg.c')\n+\t\t 'cn10k_tx_mseg.c',\n+\t\t 'cn10k_tx_vec.c')\n \n deps += ['bus_pci', 'cryptodev', 'eventdev', 'security']\n deps += ['common_cnxk', 'mempool_cnxk']\n",
    "prefixes": [
        "v4",
        "30/62"
    ]
}