get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/55722/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 55722,
    "url": "http://patches.dpdk.org/api/patches/55722/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20190630180609.36705-54-jerinj@marvell.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20190630180609.36705-54-jerinj@marvell.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20190630180609.36705-54-jerinj@marvell.com",
    "date": "2019-06-30T18:06:05",
    "name": "[v2,53/57] net/octeontx2: add Tx vector version",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "5d97403fba8a6dbc1d0032c337b2f3fa6873f533",
    "submitter": {
        "id": 1188,
        "url": "http://patches.dpdk.org/api/people/1188/?format=api",
        "name": "Jerin Jacob Kollanukkaran",
        "email": "jerinj@marvell.com"
    },
    "delegate": {
        "id": 310,
        "url": "http://patches.dpdk.org/api/users/310/?format=api",
        "username": "jerin",
        "first_name": "Jerin",
        "last_name": "Jacob",
        "email": "jerinj@marvell.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20190630180609.36705-54-jerinj@marvell.com/mbox/",
    "series": [
        {
            "id": 5236,
            "url": "http://patches.dpdk.org/api/series/5236/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=5236",
            "date": "2019-06-30T18:05:12",
            "name": "OCTEON TX2 Ethdev driver",
            "version": 2,
            "mbox": "http://patches.dpdk.org/series/5236/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/55722/comments/",
    "check": "warning",
    "checks": "http://patches.dpdk.org/api/patches/55722/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 077B21BAC5;\n\tSun, 30 Jun 2019 20:12:13 +0200 (CEST)",
            "from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com\n\t[67.231.156.173]) by dpdk.org (Postfix) with ESMTP id EAA4F1B9D4\n\tfor <dev@dpdk.org>; Sun, 30 Jun 2019 20:09:17 +0200 (CEST)",
            "from pps.filterd (m0045851.ppops.net [127.0.0.1])\n\tby mx0b-0016f401.pphosted.com (8.16.0.27/8.16.0.27) with SMTP id\n\tx5UI6414016301 for <dev@dpdk.org>; Sun, 30 Jun 2019 11:09:16 -0700",
            "from sc-exch01.marvell.com ([199.233.58.181])\n\tby mx0b-0016f401.pphosted.com with ESMTP id 2te7gm3yfs-1\n\t(version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)\n\tfor <dev@dpdk.org>; Sun, 30 Jun 2019 11:09:16 -0700",
            "from SC-EXCH01.marvell.com (10.93.176.81) by SC-EXCH01.marvell.com\n\t(10.93.176.81) with Microsoft SMTP Server (TLS) id 15.0.1367.3;\n\tSun, 30 Jun 2019 11:09:14 -0700",
            "from maili.marvell.com (10.93.176.43) by SC-EXCH01.marvell.com\n\t(10.93.176.81) with Microsoft SMTP Server id 15.0.1367.3 via Frontend\n\tTransport; Sun, 30 Jun 2019 11:09:14 -0700",
            "from jerin-lab.marvell.com (jerin-lab.marvell.com [10.28.34.14])\n\tby maili.marvell.com (Postfix) with ESMTP id 9BD223F703F;\n\tSun, 30 Jun 2019 11:09:12 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n\th=from : to : cc :\n\tsubject : date : message-id : in-reply-to : references : mime-version\n\t: content-transfer-encoding : content-type; s=pfpt0818;\n\tbh=nU0jLGdRQ5mVVEbMQHWirkjBxe2PnFmruCk+dz1oj+4=;\n\tb=khtrAm83ouLoSMlnYzmf6SC1G9/WSlrqnPc7lifL2vJH7SksBLj+ciUQzJxsJmYfBxUQ\n\t8NMxNLCnjH1citVAnfyiZNDSr72gdTcxXzKXTRpbOMgFs6hF1QQuBAfwK2J+Q0MnfZny\n\tXbyp1JW1AlQj+4HtfA5ycXUj1BSaUFv6WNMK4QVjyaqZpRidspnuR5+Ux/rBXU3Ju4J1\n\tDtQoxoecctbPR+t5s6XvKAsoigm3/FUcYfXOXNnT4CH2yKbhWC4zLPtwAhvVYJopV8GE\n\tXBcE/iQQNzlAUpVuYGE3B65mDTUAWmlsFX14mPgFVGa+RYC8QJzN9xvy4tKQ+pB2Ya0P\n\tnA== ",
        "From": "<jerinj@marvell.com>",
        "To": "<dev@dpdk.org>, Jerin Jacob <jerinj@marvell.com>, Nithin Dabilpuram\n\t<ndabilpuram@marvell.com>, Kiran Kumar K <kirankumark@marvell.com>",
        "CC": "Pavan Nikhilesh <pbhagavatula@marvell.com>",
        "Date": "Sun, 30 Jun 2019 23:36:05 +0530",
        "Message-ID": "<20190630180609.36705-54-jerinj@marvell.com>",
        "X-Mailer": "git-send-email 2.21.0",
        "In-Reply-To": "<20190630180609.36705-1-jerinj@marvell.com>",
        "References": "<20190602152434.23996-1-jerinj@marvell.com>\n\t<20190630180609.36705-1-jerinj@marvell.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Proofpoint-Virus-Version": "vendor=fsecure engine=2.50.10434:, ,\n\tdefinitions=2019-06-30_08:, , signatures=0",
        "Subject": "[dpdk-dev]  [PATCH v2 53/57] net/octeontx2: add Tx vector version",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "From: Nithin Dabilpuram <ndabilpuram@marvell.com>\n\nAdd vector version of packet transmit function.\n\nSigned-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>\nSigned-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>\n---\n drivers/net/octeontx2/otx2_tx.c | 883 +++++++++++++++++++++++++++++++-\n 1 file changed, 882 insertions(+), 1 deletion(-)",
    "diff": "diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c\nindex 0ac5ea652..6bce55112 100644\n--- a/drivers/net/octeontx2/otx2_tx.c\n+++ b/drivers/net/octeontx2/otx2_tx.c\n@@ -80,6 +80,859 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,\n \treturn pkts;\n }\n \n+#if defined(RTE_ARCH_ARM64)\n+\n+#define NIX_DESCS_PER_LOOP\t4\n+static __rte_always_inline uint16_t\n+nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t     uint16_t pkts, const uint16_t flags)\n+{\n+\tuint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;\n+\tuint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;\n+\tuint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;\n+\tuint64x2_t senddesc01_w0, senddesc23_w0;\n+\tuint64x2_t senddesc01_w1, senddesc23_w1;\n+\tuint64x2_t sgdesc01_w0, sgdesc23_w0;\n+\tuint64x2_t sgdesc01_w1, sgdesc23_w1;\n+\tstruct otx2_eth_txq *txq = tx_queue;\n+\tuint64_t *lmt_addr = txq->lmt_addr;\n+\trte_iova_t io_addr = txq->io_addr;\n+\tuint64x2_t ltypes01, ltypes23;\n+\tuint64x2_t xtmp128, ytmp128;\n+\tuint64x2_t xmask01, xmask23;\n+\tuint64x2_t mbuf01, mbuf23;\n+\tuint64x2_t cmd00, cmd01;\n+\tuint64x2_t cmd10, cmd11;\n+\tuint64x2_t cmd20, cmd21;\n+\tuint64x2_t cmd30, cmd31;\n+\tuint64_t lmt_status, i;\n+\n+\tpkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);\n+\n+\tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n+\n+\t/* Reduce the cached count */\n+\ttxq->fc_cache_pkts -= pkts;\n+\n+\t/* Lets commit any changes in the packet */\n+\trte_cio_wmb();\n+\n+\tsenddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);\n+\tsenddesc23_w0 = senddesc01_w0;\n+\tsenddesc01_w1 = vdupq_n_u64(0);\n+\tsenddesc23_w1 = senddesc01_w1;\n+\tsgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);\n+\tsgdesc23_w0 = sgdesc01_w0;\n+\n+\tfor (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {\n+\t\tmbuf01 = vld1q_u64((uint64_t *)tx_pkts);\n+\t\tmbuf23 = vld1q_u64((uint64_t *)(tx_pkts + 2));\n+\n+\t\t/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */\n+\t\tsenddesc01_w0 = vbicq_u64(senddesc01_w0,\n+\t\t\t\t\t  vdupq_n_u64(0xFFFFFFFF));\n+\t\tsgdesc01_w0 = vbicq_u64(sgdesc01_w0,\n+\t\t\t\t\tvdupq_n_u64(0xFFFFFFFF));\n+\n+\t\tsenddesc23_w0 = senddesc01_w0;\n+\t\tsgdesc23_w0 = sgdesc01_w0;\n+\n+\t\ttx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;\n+\n+\t\t/* Move mbufs to iova */\n+\t\tmbuf0 = (uint64_t *)vgetq_lane_u64(mbuf01, 0);\n+\t\tmbuf1 = (uint64_t *)vgetq_lane_u64(mbuf01, 1);\n+\t\tmbuf2 = (uint64_t *)vgetq_lane_u64(mbuf23, 0);\n+\t\tmbuf3 = (uint64_t *)vgetq_lane_u64(mbuf23, 1);\n+\n+\t\tmbuf0 = (uint64_t *)((uintptr_t)mbuf0 +\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf1 = (uint64_t *)((uintptr_t)mbuf1 +\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf2 = (uint64_t *)((uintptr_t)mbuf2 +\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf3 = (uint64_t *)((uintptr_t)mbuf3 +\n+\t\t\t\t     offsetof(struct rte_mbuf, buf_iova));\n+\t\t/*\n+\t\t * Get mbuf's, olflags, iova, pktlen, dataoff\n+\t\t * dataoff_iovaX.D[0] = iova,\n+\t\t * dataoff_iovaX.D[1](15:0) = mbuf->dataoff\n+\t\t * len_olflagsX.D[0] = ol_flags,\n+\t\t * len_olflagsX.D[1](63:32) = mbuf->pkt_len\n+\t\t */\n+\t\tdataoff_iova0  = vld1q_u64(mbuf0);\n+\t\tlen_olflags0 = vld1q_u64(mbuf0 + 2);\n+\t\tdataoff_iova1  = vld1q_u64(mbuf1);\n+\t\tlen_olflags1 = vld1q_u64(mbuf1 + 2);\n+\t\tdataoff_iova2  = vld1q_u64(mbuf2);\n+\t\tlen_olflags2 = vld1q_u64(mbuf2 + 2);\n+\t\tdataoff_iova3  = vld1q_u64(mbuf3);\n+\t\tlen_olflags3 = vld1q_u64(mbuf3 + 2);\n+\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\t\tstruct rte_mbuf *mbuf;\n+\t\t\t/* Set don't free bit if reference count > 1 */\n+\t\t\txmask01 = vdupq_n_u64(0);\n+\t\t\txmask23 = xmask01;\n+\n+\t\t\tmbuf = (struct rte_mbuf *)((uintptr_t)mbuf0 -\n+\t\t\t\toffsetof(struct rte_mbuf, buf_iova));\n+\n+\t\t\tif (rte_pktmbuf_prefree_seg(mbuf) == NULL)\n+\t\t\t\tvsetq_lane_u64(0x80000, xmask01, 0);\n+\t\t\telse\n+\t\t\t\t__mempool_check_cookies(mbuf->pool,\n+\t\t\t\t\t\t\t(void **)&mbuf,\n+\t\t\t\t\t\t\t1, 0);\n+\n+\t\t\tmbuf = (struct rte_mbuf *)((uintptr_t)mbuf1 -\n+\t\t\t\toffsetof(struct rte_mbuf, buf_iova));\n+\t\t\tif (rte_pktmbuf_prefree_seg(mbuf) == NULL)\n+\t\t\t\tvsetq_lane_u64(0x80000, xmask01, 1);\n+\t\t\telse\n+\t\t\t\t__mempool_check_cookies(mbuf->pool,\n+\t\t\t\t\t\t\t(void **)&mbuf,\n+\t\t\t\t\t\t\t1, 0);\n+\n+\t\t\tmbuf = (struct rte_mbuf *)((uintptr_t)mbuf2 -\n+\t\t\t\toffsetof(struct rte_mbuf, buf_iova));\n+\t\t\tif (rte_pktmbuf_prefree_seg(mbuf) == NULL)\n+\t\t\t\tvsetq_lane_u64(0x80000, xmask23, 0);\n+\t\t\telse\n+\t\t\t\t__mempool_check_cookies(mbuf->pool,\n+\t\t\t\t\t\t\t(void **)&mbuf,\n+\t\t\t\t\t\t\t1, 0);\n+\n+\t\t\tmbuf = (struct rte_mbuf *)((uintptr_t)mbuf3 -\n+\t\t\t\toffsetof(struct rte_mbuf, buf_iova));\n+\t\t\tif (rte_pktmbuf_prefree_seg(mbuf) == NULL)\n+\t\t\t\tvsetq_lane_u64(0x80000, xmask23, 1);\n+\t\t\telse\n+\t\t\t\t__mempool_check_cookies(mbuf->pool,\n+\t\t\t\t\t\t\t(void **)&mbuf,\n+\t\t\t\t\t\t\t1, 0);\n+\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n+\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n+\t\t} else {\n+\t\t\tstruct rte_mbuf *mbuf;\n+\t\t\t/* Mark mempool object as \"put\" since\n+\t\t\t * it is freed by NIX\n+\t\t\t */\n+\t\t\tmbuf = (struct rte_mbuf *)((uintptr_t)mbuf0 -\n+\t\t\t\toffsetof(struct rte_mbuf, buf_iova));\n+\t\t\t__mempool_check_cookies(mbuf->pool, (void **)&mbuf,\n+\t\t\t\t\t\t1, 0);\n+\n+\t\t\tmbuf = (struct rte_mbuf *)((uintptr_t)mbuf1 -\n+\t\t\t\toffsetof(struct rte_mbuf, buf_iova));\n+\t\t\t__mempool_check_cookies(mbuf->pool, (void **)&mbuf,\n+\t\t\t\t\t\t1, 0);\n+\n+\t\t\tmbuf = (struct rte_mbuf *)((uintptr_t)mbuf2 -\n+\t\t\t\toffsetof(struct rte_mbuf, buf_iova));\n+\t\t\t__mempool_check_cookies(mbuf->pool, (void **)&mbuf,\n+\t\t\t\t\t\t1, 0);\n+\n+\t\t\tmbuf = (struct rte_mbuf *)((uintptr_t)mbuf3 -\n+\t\t\t\toffsetof(struct rte_mbuf, buf_iova));\n+\t\t\t__mempool_check_cookies(mbuf->pool, (void **)&mbuf,\n+\t\t\t\t\t\t1, 0);\n+\t\t\tRTE_SET_USED(mbuf);\n+\t\t}\n+\n+\t\t/* Move mbufs to point pool */\n+\t\tmbuf0 = (uint64_t *)((uintptr_t)mbuf0 +\n+\t\t\t offsetof(struct rte_mbuf, pool) -\n+\t\t\t offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf1 = (uint64_t *)((uintptr_t)mbuf1 +\n+\t\t\t offsetof(struct rte_mbuf, pool) -\n+\t\t\t offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf2 = (uint64_t *)((uintptr_t)mbuf2 +\n+\t\t\t offsetof(struct rte_mbuf, pool) -\n+\t\t\t offsetof(struct rte_mbuf, buf_iova));\n+\t\tmbuf3 = (uint64_t *)((uintptr_t)mbuf3 +\n+\t\t\t offsetof(struct rte_mbuf, pool) -\n+\t\t\t offsetof(struct rte_mbuf, buf_iova));\n+\n+\t\tif (flags &\n+\t\t    (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |\n+\t\t     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {\n+\t\t\t/* Get tx_offload for ol2, ol3, l2, l3 lengths */\n+\t\t\t/*\n+\t\t\t * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)\n+\t\t\t * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)\n+\t\t\t */\n+\n+\t\t\tasm volatile (\"LD1 {%[a].D}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t      [a]\"+w\"(senddesc01_w1) :\n+\t\t\t\t      [in]\"r\"(mbuf0 + 2) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[a].D}[1],[%[in]]\\n\\t\" :\n+\t\t\t\t      [a]\"+w\"(senddesc01_w1) :\n+\t\t\t\t      [in]\"r\"(mbuf1 + 2) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].D}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t      [b]\"+w\"(senddesc23_w1) :\n+\t\t\t\t      [in]\"r\"(mbuf2 + 2) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].D}[1],[%[in]]\\n\\t\" :\n+\t\t\t\t      [b]\"+w\"(senddesc23_w1) :\n+\t\t\t\t      [in]\"r\"(mbuf3 + 2) : \"memory\");\n+\n+\t\t\t/* Get pool pointer alone */\n+\t\t\tmbuf0 = (uint64_t *)*mbuf0;\n+\t\t\tmbuf1 = (uint64_t *)*mbuf1;\n+\t\t\tmbuf2 = (uint64_t *)*mbuf2;\n+\t\t\tmbuf3 = (uint64_t *)*mbuf3;\n+\t\t} else {\n+\t\t\t/* Get pool pointer alone */\n+\t\t\tmbuf0 = (uint64_t *)*mbuf0;\n+\t\t\tmbuf1 = (uint64_t *)*mbuf1;\n+\t\t\tmbuf2 = (uint64_t *)*mbuf2;\n+\t\t\tmbuf3 = (uint64_t *)*mbuf3;\n+\t\t}\n+\n+\t\tconst uint8x16_t shuf_mask2 = {\n+\t\t\t0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t};\n+\t\txtmp128 = vzip2q_u64(len_olflags0, len_olflags1);\n+\t\tytmp128 = vzip2q_u64(len_olflags2, len_olflags3);\n+\n+\t\t/* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */\n+\t\tconst uint64x2_t and_mask0 = {\n+\t\t\t0xFFFFFFFFFFFFFFFF,\n+\t\t\t0x000000000000FFFF,\n+\t\t};\n+\n+\t\tdataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);\n+\t\tdataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);\n+\t\tdataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);\n+\t\tdataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);\n+\n+\t\t/*\n+\t\t * Pick only 16 bits of pktlen preset at bits 63:32\n+\t\t * and place them at bits 15:0.\n+\t\t */\n+\t\txtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);\n+\t\tytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);\n+\n+\t\t/* Add pairwise to get dataoff + iova in sgdesc_w1 */\n+\t\tsgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);\n+\t\tsgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);\n+\n+\t\t/* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of\n+\t\t * pktlen at 15:0 position.\n+\t\t */\n+\t\tsgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);\n+\t\tsgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);\n+\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);\n+\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);\n+\n+\t\tif ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&\n+\t\t    !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {\n+\t\t\t/*\n+\t\t\t * Lookup table to translate ol_flags to\n+\t\t\t * il3/il4 types. But we still use ol3/ol4 types in\n+\t\t\t * senddesc_w1 as only one header processing is enabled.\n+\t\t\t */\n+\t\t\tconst uint8x16_t tbl = {\n+\t\t\t\t/* [0-15] = il4type:il3type */\n+\t\t\t\t0x04, /* none (IPv6 assumed) */\n+\t\t\t\t0x14, /* PKT_TX_TCP_CKSUM (IPv6 assumed) */\n+\t\t\t\t0x24, /* PKT_TX_SCTP_CKSUM (IPv6 assumed) */\n+\t\t\t\t0x34, /* PKT_TX_UDP_CKSUM (IPv6 assumed) */\n+\t\t\t\t0x03, /* PKT_TX_IP_CKSUM */\n+\t\t\t\t0x13, /* PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM */\n+\t\t\t\t0x23, /* PKT_TX_IP_CKSUM | PKT_TX_SCTP_CKSUM */\n+\t\t\t\t0x33, /* PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM */\n+\t\t\t\t0x02, /* PKT_TX_IPV4  */\n+\t\t\t\t0x12, /* PKT_TX_IPV4 | PKT_TX_TCP_CKSUM */\n+\t\t\t\t0x22, /* PKT_TX_IPV4 | PKT_TX_SCTP_CKSUM */\n+\t\t\t\t0x32, /* PKT_TX_IPV4 | PKT_TX_UDP_CKSUM */\n+\t\t\t\t0x03, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM */\n+\t\t\t\t0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t       * PKT_TX_TCP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t       * PKT_TX_SCTP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t       * PKT_TX_UDP_CKSUM\n+\t\t\t\t       */\n+\t\t\t};\n+\n+\t\t\t/* Extract olflags to translate to iltypes */\n+\t\t\txtmp128 = vzip1q_u64(len_olflags0, len_olflags1);\n+\t\t\tytmp128 = vzip1q_u64(len_olflags2, len_olflags3);\n+\n+\t\t\t/*\n+\t\t\t * E(47):L3_LEN(9):L2_LEN(7+z)\n+\t\t\t * E(47):L3_LEN(9):L2_LEN(7+z)\n+\t\t\t */\n+\t\t\tsenddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);\n+\t\t\tsenddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);\n+\n+\t\t\t/* Move OLFLAGS bits 55:52 to 51:48\n+\t\t\t * with zeros preprended on the byte and rest\n+\t\t\t * don't care\n+\t\t\t */\n+\t\t\txtmp128 = vshrq_n_u8(xtmp128, 4);\n+\t\t\tytmp128 = vshrq_n_u8(ytmp128, 4);\n+\t\t\t/*\n+\t\t\t * E(48):L3_LEN(8):L2_LEN(z+7)\n+\t\t\t * E(48):L3_LEN(8):L2_LEN(z+7)\n+\t\t\t */\n+\t\t\tconst int8x16_t tshft3 = {\n+\t\t\t\t-1, 0, 8, 8, 8,\t8, 8, 8,\n+\t\t\t\t-1, 0, 8, 8, 8,\t8, 8, 8,\n+\t\t\t};\n+\n+\t\t\tsenddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);\n+\t\t\tsenddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);\n+\n+\t\t\t/* Do the lookup */\n+\t\t\tltypes01 = vqtbl1q_u8(tbl, xtmp128);\n+\t\t\tltypes23 = vqtbl1q_u8(tbl, ytmp128);\n+\n+\t\t\t/* Just use ld1q to retrieve aura\n+\t\t\t * when we don't need tx_offload\n+\t\t\t */\n+\t\t\tmbuf0 = (uint64_t *)((uintptr_t)mbuf0 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf1 = (uint64_t *)((uintptr_t)mbuf1 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf2 = (uint64_t *)((uintptr_t)mbuf2 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf3 = (uint64_t *)((uintptr_t)mbuf3 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\n+\t\t\t/* Pick only relevant fields i.e Bit 48:55 of iltype\n+\t\t\t * and place it in ol3/ol4type of senddesc_w1\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask0 = {\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF,\t0x6, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,\n+\t\t\t};\n+\n+\t\t\tltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);\n+\t\t\tltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);\n+\n+\t\t\t/* Prepare ol4ptr, ol3ptr from ol3len, ol2len.\n+\t\t\t * a [E(32):E(16):OL3(8):OL2(8)]\n+\t\t\t * a = a + (a << 8)\n+\t\t\t * a [E(32):E(16):(OL3+OL2):OL2]\n+\t\t\t * => E(32):E(16)::OL4PTR(8):OL3PTR(8)\n+\t\t\t */\n+\t\t\tsenddesc01_w1 = vaddq_u8(senddesc01_w1,\n+\t\t\t\t\t\t vshlq_n_u16(senddesc01_w1, 8));\n+\t\t\tsenddesc23_w1 = vaddq_u8(senddesc23_w1,\n+\t\t\t\t\t\t vshlq_n_u16(senddesc23_w1, 8));\n+\n+\t\t\t/* Create first half of 4W cmd for 4 mbufs (sgdesc) */\n+\t\t\tcmd01 = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\t\tcmd11 = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\t\tcmd21 = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);\n+\t\t\tcmd31 = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);\n+\n+\t\t\txmask01 = vdupq_n_u64(0);\n+\t\t\txmask23 = xmask01;\n+\t\t\tasm volatile (\"LD1 {%[a].H}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t[a]\"+w\"(xmask01) : [in]\"r\"(mbuf0) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[a].H}[4],[%[in]]\\n\\t\" :\n+\t\t\t\t [a]\"+w\"(xmask01) : [in]\"r\"(mbuf1) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].H}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t [b]\"+w\"(xmask23) : [in]\"r\"(mbuf2) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].H}[4],[%[in]]\\n\\t\" :\n+\t\t\t\t [b]\"+w\"(xmask23) : [in]\"r\"(mbuf3) : \"memory\");\n+\t\t\txmask01 = vshlq_n_u64(xmask01, 20);\n+\t\t\txmask23 = vshlq_n_u64(xmask23, 20);\n+\n+\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n+\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n+\t\t\t/* Move ltypes to senddesc*_w1 */\n+\t\t\tsenddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);\n+\t\t\tsenddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);\n+\n+\t\t\t/* Create first half of 4W cmd for 4 mbufs (sendhdr) */\n+\t\t\tcmd00 = vzip1q_u64(senddesc01_w0, senddesc01_w1);\n+\t\t\tcmd10 = vzip2q_u64(senddesc01_w0, senddesc01_w1);\n+\t\t\tcmd20 = vzip1q_u64(senddesc23_w0, senddesc23_w1);\n+\t\t\tcmd30 = vzip2q_u64(senddesc23_w0, senddesc23_w1);\n+\n+\t\t} else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&\n+\t\t\t   (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {\n+\t\t\t/*\n+\t\t\t * Lookup table to translate ol_flags to\n+\t\t\t * ol3/ol4 types.\n+\t\t\t */\n+\n+\t\t\tconst uint8x16_t tbl = {\n+\t\t\t\t/* [0-15] = ol4type:ol3type */\n+\t\t\t\t0x00, /* none */\n+\t\t\t\t0x03, /* OUTER_IP_CKSUM */\n+\t\t\t\t0x02, /* OUTER_IPV4 */\n+\t\t\t\t0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */\n+\t\t\t\t0x04, /* OUTER_IPV6 */\n+\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */\n+\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IPV4 */\n+\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IPV4 |\n+\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x00, /* OUTER_UDP_CKSUM */\n+\t\t\t\t0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */\n+\t\t\t\t0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */\n+\t\t\t\t0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |\n+\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */\n+\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t       */\n+\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t       * OUTER_IPV4\n+\t\t\t\t       */\n+\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t       * OUTER_IPV4 | OUTER_IP_CKSUM\n+\t\t\t\t       */\n+\t\t\t};\n+\n+\t\t\t/* Extract olflags to translate to iltypes */\n+\t\t\txtmp128 = vzip1q_u64(len_olflags0, len_olflags1);\n+\t\t\tytmp128 = vzip1q_u64(len_olflags2, len_olflags3);\n+\n+\t\t\t/*\n+\t\t\t * E(47):OL3_LEN(9):OL2_LEN(7+z)\n+\t\t\t * E(47):OL3_LEN(9):OL2_LEN(7+z)\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask5 = {\n+\t\t\t\t0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t};\n+\t\t\tsenddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);\n+\t\t\tsenddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);\n+\n+\t\t\t/* Extract outer ol flags only */\n+\t\t\tconst uint64x2_t o_cksum_mask = {\n+\t\t\t\t0x1C00020000000000,\n+\t\t\t\t0x1C00020000000000,\n+\t\t\t};\n+\n+\t\t\txtmp128 = vandq_u64(xtmp128, o_cksum_mask);\n+\t\t\tytmp128 = vandq_u64(ytmp128, o_cksum_mask);\n+\n+\t\t\t/* Extract OUTER_UDP_CKSUM bit 41 and\n+\t\t\t * move it to bit 61\n+\t\t\t */\n+\n+\t\t\txtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);\n+\t\t\tytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);\n+\n+\t\t\t/* Shift oltype by 2 to start nibble from BIT(56)\n+\t\t\t * instead of BIT(58)\n+\t\t\t */\n+\t\t\txtmp128 = vshrq_n_u8(xtmp128, 2);\n+\t\t\tytmp128 = vshrq_n_u8(ytmp128, 2);\n+\t\t\t/*\n+\t\t\t * E(48):L3_LEN(8):L2_LEN(z+7)\n+\t\t\t * E(48):L3_LEN(8):L2_LEN(z+7)\n+\t\t\t */\n+\t\t\tconst int8x16_t tshft3 = {\n+\t\t\t\t-1, 0, 8, 8, 8, 8, 8, 8,\n+\t\t\t\t-1, 0, 8, 8, 8, 8, 8, 8,\n+\t\t\t};\n+\n+\t\t\tsenddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);\n+\t\t\tsenddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);\n+\n+\t\t\t/* Do the lookup */\n+\t\t\tltypes01 = vqtbl1q_u8(tbl, xtmp128);\n+\t\t\tltypes23 = vqtbl1q_u8(tbl, ytmp128);\n+\n+\t\t\t/* Just use ld1q to retrieve aura\n+\t\t\t * when we don't need tx_offload\n+\t\t\t */\n+\t\t\tmbuf0 = (uint64_t *)((uintptr_t)mbuf0 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf1 = (uint64_t *)((uintptr_t)mbuf1 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf2 = (uint64_t *)((uintptr_t)mbuf2 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf3 = (uint64_t *)((uintptr_t)mbuf3 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\n+\t\t\t/* Pick only relevant fields i.e Bit 56:63 of oltype\n+\t\t\t * and place it in ol3/ol4type of senddesc_w1\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask0 = {\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF,\t0x7, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,\n+\t\t\t};\n+\n+\t\t\tltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);\n+\t\t\tltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);\n+\n+\t\t\t/* Prepare ol4ptr, ol3ptr from ol3len, ol2len.\n+\t\t\t * a [E(32):E(16):OL3(8):OL2(8)]\n+\t\t\t * a = a + (a << 8)\n+\t\t\t * a [E(32):E(16):(OL3+OL2):OL2]\n+\t\t\t * => E(32):E(16)::OL4PTR(8):OL3PTR(8)\n+\t\t\t */\n+\t\t\tsenddesc01_w1 = vaddq_u8(senddesc01_w1,\n+\t\t\t\t\t\t vshlq_n_u16(senddesc01_w1, 8));\n+\t\t\tsenddesc23_w1 = vaddq_u8(senddesc23_w1,\n+\t\t\t\t\t\t vshlq_n_u16(senddesc23_w1, 8));\n+\n+\t\t\t/* Create second half of 4W cmd for 4 mbufs (sgdesc) */\n+\t\t\tcmd01 = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\t\tcmd11 = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\t\tcmd21 = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);\n+\t\t\tcmd31 = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);\n+\n+\t\t\txmask01 = vdupq_n_u64(0);\n+\t\t\txmask23 = xmask01;\n+\t\t\tasm volatile (\"LD1 {%[a].H}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t [a]\"+w\"(xmask01) : [in]\"r\"(mbuf0) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[a].H}[4],[%[in]]\\n\\t\" :\n+\t\t\t\t [a]\"+w\"(xmask01) : [in]\"r\"(mbuf1) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].H}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t [b]\"+w\"(xmask23) : [in]\"r\"(mbuf2) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].H}[4],[%[in]]\\n\\t\" :\n+\t\t\t\t [b]\"+w\"(xmask23) : [in]\"r\"(mbuf3) : \"memory\");\n+\t\t\txmask01 = vshlq_n_u64(xmask01, 20);\n+\t\t\txmask23 = vshlq_n_u64(xmask23, 20);\n+\n+\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n+\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n+\t\t\t/* Move ltypes to senddesc*_w1 */\n+\t\t\tsenddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);\n+\t\t\tsenddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);\n+\n+\t\t\t/* Create first half of 4W cmd for 4 mbufs (sendhdr) */\n+\t\t\tcmd00 = vzip1q_u64(senddesc01_w0, senddesc01_w1);\n+\t\t\tcmd10 = vzip2q_u64(senddesc01_w0, senddesc01_w1);\n+\t\t\tcmd20 = vzip1q_u64(senddesc23_w0, senddesc23_w1);\n+\t\t\tcmd30 = vzip2q_u64(senddesc23_w0, senddesc23_w1);\n+\n+\t\t} else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&\n+\t\t\t   (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {\n+\t\t\t/* Lookup table to translate ol_flags to\n+\t\t\t * ol4type, ol3type, il4type, il3type of senddesc_w1\n+\t\t\t */\n+\t\t\tconst uint8x16x2_t tbl = {\n+\t\t\t{\n+\t\t\t\t{\n+\t\t\t\t\t/* [0-15] = il4type:il3type */\n+\t\t\t\t\t0x04, /* none (IPv6) */\n+\t\t\t\t\t0x14, /* PKT_TX_TCP_CKSUM (IPv6) */\n+\t\t\t\t\t0x24, /* PKT_TX_SCTP_CKSUM (IPv6) */\n+\t\t\t\t\t0x34, /* PKT_TX_UDP_CKSUM (IPv6) */\n+\t\t\t\t\t0x03, /* PKT_TX_IP_CKSUM */\n+\t\t\t\t\t0x13, /* PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_TCP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x23, /* PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_SCTP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x33, /* PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_UDP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x02, /* PKT_TX_IPV4 */\n+\t\t\t\t\t0x12, /* PKT_TX_IPV4 |\n+\t\t\t\t\t       * PKT_TX_TCP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x22, /* PKT_TX_IPV4 |\n+\t\t\t\t\t       * PKT_TX_SCTP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x32, /* PKT_TX_IPV4 |\n+\t\t\t\t\t       * PKT_TX_UDP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x03, /* PKT_TX_IPV4 |\n+\t\t\t\t\t       * PKT_TX_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x13, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_TCP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x23, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_SCTP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x33, /* PKT_TX_IPV4 | PKT_TX_IP_CKSUM |\n+\t\t\t\t\t       * PKT_TX_UDP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t},\n+\n+\t\t\t\t{\n+\t\t\t\t\t/* [16-31] = ol4type:ol3type */\n+\t\t\t\t\t0x00, /* none */\n+\t\t\t\t\t0x03, /* OUTER_IP_CKSUM */\n+\t\t\t\t\t0x02, /* OUTER_IPV4 */\n+\t\t\t\t\t0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */\n+\t\t\t\t\t0x04, /* OUTER_IPV6 */\n+\t\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */\n+\t\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IPV4 */\n+\t\t\t\t\t0x00, /* OUTER_IPV6 | OUTER_IPV4 |\n+\t\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x00, /* OUTER_UDP_CKSUM */\n+\t\t\t\t\t0x33, /* OUTER_UDP_CKSUM |\n+\t\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x32, /* OUTER_UDP_CKSUM |\n+\t\t\t\t\t       * OUTER_IPV4\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x33, /* OUTER_UDP_CKSUM |\n+\t\t\t\t\t       * OUTER_IPV4 | OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x34, /* OUTER_UDP_CKSUM |\n+\t\t\t\t\t       * OUTER_IPV6\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t\t       * OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t\t       * OUTER_IPV4\n+\t\t\t\t\t       */\n+\t\t\t\t\t0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |\n+\t\t\t\t\t       * OUTER_IPV4 | OUTER_IP_CKSUM\n+\t\t\t\t\t       */\n+\t\t\t\t},\n+\t\t\t}\n+\t\t\t};\n+\n+\t\t\t/* Extract olflags to translate to oltype & iltype */\n+\t\t\txtmp128 = vzip1q_u64(len_olflags0, len_olflags1);\n+\t\t\tytmp128 = vzip1q_u64(len_olflags2, len_olflags3);\n+\n+\t\t\t/*\n+\t\t\t * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)\n+\t\t\t * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)\n+\t\t\t */\n+\t\t\tconst uint32x4_t tshft_4 = {\n+\t\t\t\t1, 0,\n+\t\t\t\t1, 0,\n+\t\t\t};\n+\t\t\tsenddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);\n+\t\t\tsenddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);\n+\n+\t\t\t/*\n+\t\t\t * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)\n+\t\t\t * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask5 = {\n+\t\t\t\t0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF,\t0xFF, 0xFF,\n+\t\t\t};\n+\t\t\tsenddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);\n+\t\t\tsenddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);\n+\n+\t\t\t/* Extract outer and inner header ol_flags */\n+\t\t\tconst uint64x2_t oi_cksum_mask = {\n+\t\t\t\t0x1CF0020000000000,\n+\t\t\t\t0x1CF0020000000000,\n+\t\t\t};\n+\n+\t\t\txtmp128 = vandq_u64(xtmp128, oi_cksum_mask);\n+\t\t\tytmp128 = vandq_u64(ytmp128, oi_cksum_mask);\n+\n+\t\t\t/* Extract OUTER_UDP_CKSUM bit 41 and\n+\t\t\t * move it to bit 61\n+\t\t\t */\n+\n+\t\t\txtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);\n+\t\t\tytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);\n+\n+\t\t\t/* Shift right oltype by 2 and iltype by 4\n+\t\t\t * to start oltype nibble from BIT(58)\n+\t\t\t * instead of BIT(56) and iltype nibble from BIT(48)\n+\t\t\t * instead of BIT(52).\n+\t\t\t */\n+\t\t\tconst int8x16_t tshft5 = {\n+\t\t\t\t8, 8, 8, 8, 8, 8, -4, -2,\n+\t\t\t\t8, 8, 8, 8, 8, 8, -4, -2,\n+\t\t\t};\n+\n+\t\t\txtmp128 = vshlq_u8(xtmp128, tshft5);\n+\t\t\tytmp128 = vshlq_u8(ytmp128, tshft5);\n+\t\t\t/*\n+\t\t\t * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)\n+\t\t\t * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)\n+\t\t\t */\n+\t\t\tconst int8x16_t tshft3 = {\n+\t\t\t\t-1, 0, -1, 0, 0, 0, 0, 0,\n+\t\t\t\t-1, 0, -1, 0, 0, 0, 0, 0,\n+\t\t\t};\n+\n+\t\t\tsenddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);\n+\t\t\tsenddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);\n+\n+\t\t\t/* Mark Bit(4) of oltype */\n+\t\t\tconst uint64x2_t oi_cksum_mask2 = {\n+\t\t\t\t0x1000000000000000,\n+\t\t\t\t0x1000000000000000,\n+\t\t\t};\n+\n+\t\t\txtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);\n+\t\t\tytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);\n+\n+\t\t\t/* Do the lookup */\n+\t\t\tltypes01 = vqtbl2q_u8(tbl, xtmp128);\n+\t\t\tltypes23 = vqtbl2q_u8(tbl, ytmp128);\n+\n+\t\t\t/* Just use ld1q to retrieve aura\n+\t\t\t * when we don't need tx_offload\n+\t\t\t */\n+\t\t\tmbuf0 = (uint64_t *)((uintptr_t)mbuf0 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf1 = (uint64_t *)((uintptr_t)mbuf1 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf2 = (uint64_t *)((uintptr_t)mbuf2 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf3 = (uint64_t *)((uintptr_t)mbuf3 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\n+\t\t\t/* Pick only relevant fields i.e Bit 48:55 of iltype and\n+\t\t\t * Bit 56:63 of oltype and place it in corresponding\n+\t\t\t * place in senddesc_w1.\n+\t\t\t */\n+\t\t\tconst uint8x16_t shuf_mask0 = {\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,\n+\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF,\t0xF, 0xE, 0xFF, 0xFF,\n+\t\t\t};\n+\n+\t\t\tltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);\n+\t\t\tltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);\n+\n+\t\t\t/* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from\n+\t\t\t * l3len, l2len, ol3len, ol2len.\n+\t\t\t * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]\n+\t\t\t * a = a + (a << 8)\n+\t\t\t * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]\n+\t\t\t * a = a + (a << 16)\n+\t\t\t * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]\n+\t\t\t * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)\n+\t\t\t */\n+\t\t\tsenddesc01_w1 = vaddq_u8(senddesc01_w1,\n+\t\t\t\t\t\t vshlq_n_u32(senddesc01_w1, 8));\n+\t\t\tsenddesc23_w1 = vaddq_u8(senddesc23_w1,\n+\t\t\t\t\t\t vshlq_n_u32(senddesc23_w1, 8));\n+\n+\t\t\t/* Create second half of 4W cmd for 4 mbufs (sgdesc) */\n+\t\t\tcmd01 = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\t\tcmd11 = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\t\tcmd21 = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);\n+\t\t\tcmd31 = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);\n+\n+\t\t\t/* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */\n+\t\t\tsenddesc01_w1 = vaddq_u8(senddesc01_w1,\n+\t\t\t\t\t\tvshlq_n_u32(senddesc01_w1, 16));\n+\t\t\tsenddesc23_w1 = vaddq_u8(senddesc23_w1,\n+\t\t\t\t\t\tvshlq_n_u32(senddesc23_w1, 16));\n+\n+\t\t\txmask01 = vdupq_n_u64(0);\n+\t\t\txmask23 = xmask01;\n+\t\t\tasm volatile (\"LD1 {%[a].H}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t [a]\"+w\"(xmask01) : [in]\"r\"(mbuf0) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[a].H}[4],[%[in]]\\n\\t\" :\n+\t\t\t\t [a]\"+w\"(xmask01) : [in]\"r\"(mbuf1) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].H}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t [b]\"+w\"(xmask23) : [in]\"r\"(mbuf2) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].H}[4],[%[in]]\\n\\t\" :\n+\t\t\t\t [b]\"+w\"(xmask23) : [in]\"r\"(mbuf3) : \"memory\");\n+\t\t\txmask01 = vshlq_n_u64(xmask01, 20);\n+\t\t\txmask23 = vshlq_n_u64(xmask23, 20);\n+\n+\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n+\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n+\t\t\t/* Move ltypes to senddesc*_w1 */\n+\t\t\tsenddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);\n+\t\t\tsenddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);\n+\n+\t\t\t/* Create first half of 4W cmd for 4 mbufs (sendhdr) */\n+\t\t\tcmd00 = vzip1q_u64(senddesc01_w0, senddesc01_w1);\n+\t\t\tcmd10 = vzip2q_u64(senddesc01_w0, senddesc01_w1);\n+\t\t\tcmd20 = vzip1q_u64(senddesc23_w0, senddesc23_w1);\n+\t\t\tcmd30 = vzip2q_u64(senddesc23_w0, senddesc23_w1);\n+\t\t} else {\n+\t\t\t/* Just use ld1q to retrieve aura\n+\t\t\t * when we don't need tx_offload\n+\t\t\t */\n+\t\t\tmbuf0 = (uint64_t *)((uintptr_t)mbuf0 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf1 = (uint64_t *)((uintptr_t)mbuf1 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf2 = (uint64_t *)((uintptr_t)mbuf2 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\tmbuf3 = (uint64_t *)((uintptr_t)mbuf3 +\n+\t\t\t\t\toffsetof(struct rte_mempool, pool_id));\n+\t\t\txmask01 = vdupq_n_u64(0);\n+\t\t\txmask23 = xmask01;\n+\t\t\tasm volatile (\"LD1 {%[a].H}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t [a]\"+w\"(xmask01) : [in]\"r\"(mbuf0) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[a].H}[4],[%[in]]\\n\\t\" :\n+\t\t\t\t [a]\"+w\"(xmask01) : [in]\"r\"(mbuf1) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].H}[0],[%[in]]\\n\\t\" :\n+\t\t\t\t [b]\"+w\"(xmask23) : [in]\"r\"(mbuf2) : \"memory\");\n+\n+\t\t\tasm volatile (\"LD1 {%[b].H}[4],[%[in]]\\n\\t\" :\n+\t\t\t\t [b]\"+w\"(xmask23) : [in]\"r\"(mbuf3) : \"memory\");\n+\t\t\txmask01 = vshlq_n_u64(xmask01, 20);\n+\t\t\txmask23 = vshlq_n_u64(xmask23, 20);\n+\n+\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n+\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n+\n+\t\t\t/* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */\n+\t\t\tcmd00 = vzip1q_u64(senddesc01_w0, senddesc01_w1);\n+\t\t\tcmd01 = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\t\tcmd10 = vzip2q_u64(senddesc01_w0, senddesc01_w1);\n+\t\t\tcmd11 = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);\n+\t\t\tcmd20 = vzip1q_u64(senddesc23_w0, senddesc23_w1);\n+\t\t\tcmd21 = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);\n+\t\t\tcmd30 = vzip2q_u64(senddesc23_w0, senddesc23_w1);\n+\t\t\tcmd31 = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);\n+\t\t}\n+\n+\t\tdo {\n+\t\t\tvst1q_u64(lmt_addr, cmd00);\n+\t\t\tvst1q_u64(lmt_addr + 2, cmd01);\n+\t\t\tvst1q_u64(lmt_addr + 4, cmd10);\n+\t\t\tvst1q_u64(lmt_addr + 6, cmd11);\n+\t\t\tvst1q_u64(lmt_addr + 8, cmd20);\n+\t\t\tvst1q_u64(lmt_addr + 10, cmd21);\n+\t\t\tvst1q_u64(lmt_addr + 12, cmd30);\n+\t\t\tvst1q_u64(lmt_addr + 14, cmd31);\n+\t\t\tlmt_status = otx2_lmt_submit(io_addr);\n+\n+\t\t} while (lmt_status == 0);\n+\t}\n+\n+\treturn pkts;\n+}\n+\n+#else\n+static __rte_always_inline uint16_t\n+nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t     uint16_t pkts, const uint16_t flags)\n+{\n+\tRTE_SET_USED(tx_queue);\n+\tRTE_SET_USED(tx_pkts);\n+\tRTE_SET_USED(pkts);\n+\tRTE_SET_USED(flags);\n+\treturn 0;\n+}\n+#endif\n+\n #define T(name, f4, f3, f2, f1, f0, sz, flags)\t\t\t\t\\\n static uint16_t __rte_noinline\t__hot\t\t\t\t\t\\\n otx2_nix_xmit_pkts_ ## name(void *tx_queue,\t\t\t\t\\\n@@ -107,6 +960,21 @@ otx2_nix_xmit_pkts_mseg_ ## name(void *tx_queue,\t\t\t\\\n NIX_TX_FASTPATH_MODES\n #undef T\n \n+#define T(name, f4, f3, f2, f1, f0, sz, flags)\t\t\t\t\\\n+static uint16_t __rte_noinline\t__hot\t\t\t\t\t\\\n+otx2_nix_xmit_pkts_vec_ ## name(void *tx_queue,\t\t\t\t\\\n+\t\t\tstruct rte_mbuf **tx_pkts, uint16_t pkts)\t\\\n+{\t\t\t\t\t\t\t\t\t\\\n+\t/* VLAN and TSTMP is not supported by vec */\t\t\t\\\n+\tif ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F ||\t\t\t\\\n+\t    (flags) & NIX_TX_OFFLOAD_TSTAMP_F)\t\t\t\t\\\n+\t\treturn 0;\t\t\t\t\t\t\\\n+\treturn nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, (flags));\t\\\n+}\n+\n+NIX_TX_FASTPATH_MODES\n+#undef T\n+\n static inline void\n pick_tx_func(struct rte_eth_dev *eth_dev,\n \t     const eth_tx_burst_t tx_burst[2][2][2][2][2])\n@@ -143,7 +1011,20 @@ NIX_TX_FASTPATH_MODES\n #undef T\n \t};\n \n-\tpick_tx_func(eth_dev, nix_eth_tx_burst);\n+\tconst eth_tx_burst_t nix_eth_tx_vec_burst[2][2][2][2][2] = {\n+#define T(name, f4, f3, f2, f1, f0, sz, flags)\t\t\t\t\\\n+\t[f4][f3][f2][f1][f0] =  otx2_nix_xmit_pkts_vec_ ## name,\n+\n+NIX_TX_FASTPATH_MODES\n+#undef T\n+\t};\n+\n+\tif (dev->scalar_ena ||\n+\t    (dev->tx_offload_flags &\n+\t     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F)))\n+\t\tpick_tx_func(eth_dev, nix_eth_tx_burst);\n+\telse\n+\t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst);\n \n \tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n \t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\n",
    "prefixes": [
        "v2",
        "53/57"
    ]
}