get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/94546/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 94546,
    "url": "https://patches.dpdk.org/api/patches/94546/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20210619110154.10301-6-pbhagavatula@marvell.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20210619110154.10301-6-pbhagavatula@marvell.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20210619110154.10301-6-pbhagavatula@marvell.com",
    "date": "2021-06-19T11:01:46",
    "name": "[v2,06/13] net/cnxk: add multi seg Tx vector routine",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "9af386d1ebb887e89b3eeb12da0df70ee9dbaac4",
    "submitter": {
        "id": 1183,
        "url": "https://patches.dpdk.org/api/people/1183/?format=api",
        "name": "Pavan Nikhilesh Bhagavatula",
        "email": "pbhagavatula@marvell.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20210619110154.10301-6-pbhagavatula@marvell.com/mbox/",
    "series": [
        {
            "id": 17405,
            "url": "https://patches.dpdk.org/api/series/17405/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=17405",
            "date": "2021-06-19T11:01:41",
            "name": "[v2,01/13] net/cnxk: add multi seg Rx vector routine",
            "version": 2,
            "mbox": "https://patches.dpdk.org/series/17405/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/94546/comments/",
    "check": "warning",
    "checks": "https://patches.dpdk.org/api/patches/94546/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 9D314A0A0C;\n\tSat, 19 Jun 2021 13:02:57 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 7D00141158;\n\tSat, 19 Jun 2021 13:02:30 +0200 (CEST)",
            "from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com\n [67.231.156.173])\n by mails.dpdk.org (Postfix) with ESMTP id 70BE341158\n for <dev@dpdk.org>; Sat, 19 Jun 2021 13:02:29 +0200 (CEST)",
            "from pps.filterd (m0045851.ppops.net [127.0.0.1])\n by mx0b-0016f401.pphosted.com (8.16.0.43/8.16.0.43) with SMTP id\n 15JAu41B007712 for <dev@dpdk.org>; Sat, 19 Jun 2021 04:02:28 -0700",
            "from dc5-exch02.marvell.com ([199.233.59.182])\n by mx0b-0016f401.pphosted.com with ESMTP id 398tu0v61j-1\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)\n for <dev@dpdk.org>; Sat, 19 Jun 2021 04:02:28 -0700",
            "from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH02.marvell.com\n (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.18;\n Sat, 19 Jun 2021 04:02:26 -0700",
            "from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.18 via Frontend\n Transport; Sat, 19 Jun 2021 04:02:26 -0700",
            "from BG-LT7430.marvell.com (BG-LT7430.marvell.com [10.28.177.176])\n by maili.marvell.com (Postfix) with ESMTP id 8BECD5B6966;\n Sat, 19 Jun 2021 04:02:23 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-transfer-encoding : content-type; s=pfpt0220;\n bh=AnT92RmO0e1plPL1XtKTleGo54fbjYovYKzMZZRs3Ng=;\n b=h/EnSsZsBCMCFQ66FpgE+Dmm1MwkqlCSwiGMOYErQNfzlb74qqP5Yk8EscXwN8L+i9Z2\n 2EecKQ+ifm90OBS60H3eIjOuGXHsmusH0TO24bKNrxSRYF0Rf6CNTOEMzOokAEpCp2zO\n GDAvLeRa2PspIPEkF7GTKSVmYmVXDwBRzuq4yD0nakBGY6WwbRNBGMxYQ2z5DEIwCHy5\n Rf439cOJVEb45VGdTBd5lFt0dE4vOSo3XugQYBT0YYRSvAhmVa4ASCZxnqEiYqLcvHdC\n /kMnOqSBd9N8KrQjGwE7P/6zVRSqhpS05FDCqiFup+yK2dmLh3jdrTQ/PJeHXcSzTVRc CQ==",
        "From": "<pbhagavatula@marvell.com>",
        "To": "<jerinj@marvell.com>, Nithin Dabilpuram <ndabilpuram@marvell.com>, \"Kiran\n Kumar K\" <kirankumark@marvell.com>, Sunil Kumar Kori <skori@marvell.com>,\n Satha Rao <skoteshwar@marvell.com>",
        "CC": "<dev@dpdk.org>, Pavan Nikhilesh <pbhagavatula@marvell.com>",
        "Date": "Sat, 19 Jun 2021 16:31:46 +0530",
        "Message-ID": "<20210619110154.10301-6-pbhagavatula@marvell.com>",
        "X-Mailer": "git-send-email 2.17.1",
        "In-Reply-To": "<20210619110154.10301-1-pbhagavatula@marvell.com>",
        "References": "<20210524122303.1116-1-pbhagavatula@marvell.com>\n <20210619110154.10301-1-pbhagavatula@marvell.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Proofpoint-GUID": "ZYrXI3eo6t7QVb4Us4tm4jPVcrpiFEcU",
        "X-Proofpoint-ORIG-GUID": "ZYrXI3eo6t7QVb4Us4tm4jPVcrpiFEcU",
        "X-Proofpoint-Virus-Version": "vendor=fsecure engine=2.50.10434:6.0.391, 18.0.790\n definitions=2021-06-19_09:2021-06-18,\n 2021-06-19 signatures=0",
        "Subject": "[dpdk-dev] [PATCH v2 06/13] net/cnxk: add multi seg Tx vector\n routine",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "From: Pavan Nikhilesh <pbhagavatula@marvell.com>\n\nAdd multi segment Tx vector routine.\n\nSigned-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>\n---\n drivers/net/cnxk/cn10k_tx.c          |  20 +-\n drivers/net/cnxk/cn10k_tx.h          | 388 +++++++++++++++++++++++++--\n drivers/net/cnxk/cn10k_tx_vec_mseg.c |  24 ++\n drivers/net/cnxk/cn9k_tx.c           |  20 +-\n drivers/net/cnxk/cn9k_tx.h           | 272 ++++++++++++++++++-\n drivers/net/cnxk/cn9k_tx_vec_mseg.c  |  24 ++\n drivers/net/cnxk/meson.build         |   6 +-\n 7 files changed, 709 insertions(+), 45 deletions(-)\n create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c\n create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c",
    "diff": "diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c\nindex d06879163f..1f30bab59a 100644\n--- a/drivers/net/cnxk/cn10k_tx.c\n+++ b/drivers/net/cnxk/cn10k_tx.c\n@@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)\n #undef T\n \t};\n \n-\tif (dev->scalar_ena)\n+\tconst eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {\n+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \\\n+\t[f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name,\n+\n+\t\tNIX_TX_FASTPATH_MODES\n+#undef T\n+\t};\n+\n+\tif (dev->scalar_ena) {\n \t\tpick_tx_func(eth_dev, nix_eth_tx_burst);\n-\telse\n+\t\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n+\t\t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\n+\t} else {\n \t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst);\n-\n-\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n-\t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\n+\t\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n+\t\t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);\n+\t}\n \n \trte_mb();\n }\ndiff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h\nindex cea7c6cd34..b25b20dcb2 100644\n--- a/drivers/net/cnxk/cn10k_tx.h\n+++ b/drivers/net/cnxk/cn10k_tx.h\n@@ -42,6 +42,13 @@\n \t\t}                                                              \\\n \t} while (0)\n \n+/* Encoded number of segments to number of dwords macro, each value of nb_segs\n+ * is encoded as 4bits.\n+ */\n+#define NIX_SEGDW_MAGIC 0x76654432210ULL\n+\n+#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)\n+\n #define LMT_OFF(lmt_addr, lmt_num, offset)                                     \\\n \t(void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))\n \n@@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags)\n \treturn data;\n }\n \n+static __rte_always_inline uint8_t\n+cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)\n+{\n+\treturn ((flags & NIX_TX_NEED_EXT_HDR) ?\n+\t\t\t      (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :\n+\t\t\t      4);\n+}\n+\n static __rte_always_inline uint64_t\n cn10k_nix_tx_steor_vec_data(const uint16_t flags)\n {\n@@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,\n \t}\n }\n \n+static __rte_always_inline void\n+cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,\n+\t\t\t\tunion nix_send_hdr_w0_u *sh,\n+\t\t\t\tunion nix_send_sg_s *sg, const uint32_t flags)\n+{\n+\tstruct rte_mbuf *m_next;\n+\tuint64_t *slist, sg_u;\n+\tuint16_t nb_segs;\n+\tint i = 1;\n+\n+\tsh->total = m->pkt_len;\n+\t/* Clear sg->u header before use */\n+\tsg->u &= 0xFC00000000000000;\n+\tsg_u = sg->u;\n+\tslist = &cmd[0];\n+\n+\tsg_u = sg_u | ((uint64_t)m->data_len);\n+\n+\tnb_segs = m->nb_segs - 1;\n+\tm_next = m->next;\n+\n+\t/* Set invert df if buffer is not to be freed by H/W */\n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\tsg_u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\t/* Mark mempool object as \"put\" since it is freed by NIX */\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\tif (!(sg_u & (1ULL << 55)))\n+\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\trte_io_wmb();\n+#endif\n+\n+\tm = m_next;\n+\t/* Fill mbuf segments */\n+\tdo {\n+\t\tm_next = m->next;\n+\t\tsg_u = sg_u | ((uint64_t)m->data_len << (i << 4));\n+\t\t*slist = rte_mbuf_data_iova(m);\n+\t\t/* Set invert df if buffer is not to be freed by H/W */\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\t\tsg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));\n+\t\t\t/* Mark mempool object as \"put\" since it is freed by NIX\n+\t\t\t */\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\t\tif (!(sg_u & (1ULL << (i + 55))))\n+\t\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\t\trte_io_wmb();\n+#endif\n+\t\tslist++;\n+\t\ti++;\n+\t\tnb_segs--;\n+\t\tif (i > 2 && nb_segs) {\n+\t\t\ti = 0;\n+\t\t\t/* Next SG subdesc */\n+\t\t\t*(uint64_t *)slist = sg_u & 0xFC00000000000000;\n+\t\t\tsg->u = sg_u;\n+\t\t\tsg->segs = 3;\n+\t\t\tsg = (union nix_send_sg_s *)slist;\n+\t\t\tsg_u = sg->u;\n+\t\t\tslist++;\n+\t\t}\n+\t\tm = m_next;\n+\t} while (nb_segs);\n+\n+\tsg->u = sg_u;\n+\tsg->segs = i;\n+}\n+\n+static __rte_always_inline void\n+cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,\n+\t\t\t   uint64x2_t *cmd1, const uint8_t segdw,\n+\t\t\t   const uint32_t flags)\n+{\n+\tunion nix_send_hdr_w0_u sh;\n+\tunion nix_send_sg_s sg;\n+\n+\tif (m->nb_segs == 1) {\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\t\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\t\t\tsg.u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\t\tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n+\t\t}\n+\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\t\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\t\tif (!(sg.u & (1ULL << 55)))\n+\t\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\t\trte_io_wmb();\n+#endif\n+\t\treturn;\n+\t}\n+\n+\tsh.u = vgetq_lane_u64(cmd0[0], 0);\n+\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\n+\tcn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);\n+\n+\tsh.sizem1 = segdw - 1;\n+\tcmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);\n+\tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n+}\n+\n #define NIX_DESCS_PER_LOOP 4\n+\n+static __rte_always_inline uint8_t\n+cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,\n+\t\t\t       uint64x2_t *cmd1, uint64x2_t *cmd2,\n+\t\t\t       uint64x2_t *cmd3, uint8_t *segdw,\n+\t\t\t       uint64_t *lmt_addr, __uint128_t *data128,\n+\t\t\t       uint8_t *shift, const uint16_t flags)\n+{\n+\tuint8_t j, off, lmt_used;\n+\n+\tif (!(flags & NIX_TX_NEED_EXT_HDR) &&\n+\t    !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t/* No segments in 4 consecutive packets. */\n+\t\tif ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {\n+\t\t\tfor (j = 0; j < NIX_DESCS_PER_LOOP; j++)\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j], NULL,\n+\t\t\t\t\t\t\t   &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t   segdw[j], flags);\n+\t\t\tvst1q_u64(lmt_addr, cmd0[0]);\n+\t\t\tvst1q_u64(lmt_addr + 2, cmd1[0]);\n+\t\t\tvst1q_u64(lmt_addr + 4, cmd0[1]);\n+\t\t\tvst1q_u64(lmt_addr + 6, cmd1[1]);\n+\t\t\tvst1q_u64(lmt_addr + 8, cmd0[2]);\n+\t\t\tvst1q_u64(lmt_addr + 10, cmd1[2]);\n+\t\t\tvst1q_u64(lmt_addr + 12, cmd0[3]);\n+\t\t\tvst1q_u64(lmt_addr + 14, cmd1[3]);\n+\n+\t\t\t*data128 |= ((__uint128_t)7) << *shift;\n+\t\t\tshift += 3;\n+\n+\t\t\treturn 1;\n+\t\t}\n+\t}\n+\n+\tlmt_used = 0;\n+\tfor (j = 0; j < NIX_DESCS_PER_LOOP;) {\n+\t\t/* Fit consecutive packets in same LMTLINE. */\n+\t\tif ((segdw[j] + segdw[j + 1]) <= 8) {\n+\t\t\tif (flags & NIX_TX_OFFLOAD_TSTAMP_F) {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j], NULL,\n+\t\t\t\t\t\t\t   &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t   segdw[j], flags);\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,\n+\t\t\t\t\t\t\t   &cmd0[j + 1],\n+\t\t\t\t\t\t\t   &cmd1[j + 1],\n+\t\t\t\t\t\t\t   segdw[j + 1], flags);\n+\t\t\t\t/* TSTAMP takes 4 each, no segs. */\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 6, cmd3[j]);\n+\n+\t\t\t\tvst1q_u64(lmt_addr + 8, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10, cmd2[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 12, cmd1[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 14, cmd3[j + 1]);\n+\t\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\t\t\t/* EXT header take 3 each, space for 2 segs.*/\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t   lmt_addr + 6,\n+\t\t\t\t\t\t\t   &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t   segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\toff = segdw[j] - 3;\n+\t\t\t\toff <<= 1;\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j + 1],\n+\t\t\t\t\t\t\t   lmt_addr + 12 + off,\n+\t\t\t\t\t\t\t   &cmd0[j + 1],\n+\t\t\t\t\t\t\t   &cmd1[j + 1],\n+\t\t\t\t\t\t\t   segdw[j + 1], flags);\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);\n+\t\t\t} else {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t   lmt_addr + 4,\n+\t\t\t\t\t\t\t   &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t   segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[j]);\n+\t\t\t\toff = segdw[j] - 2;\n+\t\t\t\toff <<= 1;\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j + 1],\n+\t\t\t\t\t\t\t   lmt_addr + 8 + off,\n+\t\t\t\t\t\t\t   &cmd0[j + 1],\n+\t\t\t\t\t\t\t   &cmd1[j + 1],\n+\t\t\t\t\t\t\t   segdw[j + 1], flags);\n+\t\t\t\tvst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);\n+\t\t\t}\n+\t\t\t*data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)\n+\t\t\t\t    << *shift;\n+\t\t\t*shift += 3;\n+\t\t\tj += 2;\n+\t\t} else {\n+\t\t\tif ((flags & NIX_TX_NEED_EXT_HDR) &&\n+\t\t\t    (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t   lmt_addr + 6,\n+\t\t\t\t\t\t\t   &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t   segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\toff = segdw[j] - 4;\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd3[j]);\n+\t\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t   lmt_addr + 6,\n+\t\t\t\t\t\t\t   &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t   segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t} else {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t   lmt_addr + 4,\n+\t\t\t\t\t\t\t   &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t   segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[j]);\n+\t\t\t}\n+\t\t\t*data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;\n+\t\t\t*shift += 3;\n+\t\t\tj++;\n+\t\t}\n+\t\tlmt_used++;\n+\t\tlmt_addr += 16;\n+\t}\n+\n+\treturn lmt_used;\n+}\n+\n static __rte_always_inline uint16_t\n cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t   uint16_t pkts, uint64_t *cmd, const uint16_t flags)\n@@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tuint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;\n \tuint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],\n \t\tcmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];\n-\tuint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;\n+\tuint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;\n \tuint64x2_t senddesc01_w0, senddesc23_w0;\n \tuint64x2_t senddesc01_w1, senddesc23_w1;\n \tuint16_t left, scalar, burst, i, lmt_id;\n@@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tuint64x2_t sendext01_w1, sendext23_w1;\n \tuint64x2_t sendmem01_w0, sendmem23_w0;\n \tuint64x2_t sendmem01_w1, sendmem23_w1;\n+\tuint8_t segdw[NIX_DESCS_PER_LOOP + 1];\n \tuint64x2_t sgdesc01_w0, sgdesc23_w0;\n \tuint64x2_t sgdesc01_w1, sgdesc23_w1;\n \tstruct cn10k_eth_txq *txq = tx_queue;\n@@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tuint64x2_t ltypes01, ltypes23;\n \tuint64x2_t xtmp128, ytmp128;\n \tuint64x2_t xmask01, xmask23;\n-\tuint8_t lnum;\n+\tuint8_t lnum, shift;\n+\tunion wdata {\n+\t\t__uint128_t data128;\n+\t\tuint64_t data[2];\n+\t} wd;\n \n \tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n \n@@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tburst = left > cn10k_nix_pkts_per_vec_brst(flags) ?\n \t\t\t      cn10k_nix_pkts_per_vec_brst(flags) :\n \t\t\t      left;\n+\tif (flags & NIX_TX_MULTI_SEG_F) {\n+\t\twd.data128 = 0;\n+\t\tshift = 16;\n+\t}\n \tlnum = 0;\n+\n \tfor (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F) {\n+\t\t\tstruct rte_mbuf *m = tx_pkts[j];\n+\t\t\tuint8_t j;\n+\n+\t\t\tfor (j = 0; j < NIX_DESCS_PER_LOOP; j++) {\n+\t\t\t\t/* Get dwords based on nb_segs. */\n+\t\t\t\tsegdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);\n+\t\t\t\t/* Add dwords based on offloads. */\n+\t\t\t\tsegdw[j] += 1 + /* SEND HDR */\n+\t\t\t\t\t    !!(flags & NIX_TX_NEED_EXT_HDR) +\n+\t\t\t\t\t    !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);\n+\t\t\t}\n+\n+\t\t\t/* Check if there are enough LMTLINES for this loop */\n+\t\t\tif (lnum + 4 > 32) {\n+\t\t\t\tuint8_t ldwords_con = 0, lneeded = 0;\n+\t\t\t\tfor (j = 0; j < NIX_DESCS_PER_LOOP; j++) {\n+\t\t\t\t\tldwords_con += segdw[j];\n+\t\t\t\t\tif (ldwords_con > 8) {\n+\t\t\t\t\t\tlneeded += 1;\n+\t\t\t\t\t\tldwords_con = segdw[j];\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t\tlneeded += 1;\n+\t\t\t\tif (lnum + lneeded > 32) {\n+\t\t\t\t\tburst = i;\n+\t\t\t\t\tbreak;\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n \t\t/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */\n \t\tsenddesc01_w0 =\n \t\t\tvbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));\n@@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tsendext23_w0 = vld1q_u64(sx_w0 + 2);\n \t\t}\n \n-\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\tif ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&\n+\t\t    !(flags & NIX_TX_MULTI_SEG_F)) {\n \t\t\t/* Set don't free bit if reference count > 1 */\n \t\t\txmask01 = vdupq_n_u64(0);\n \t\t\txmask23 = xmask01;\n@@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t\t\t(void **)&mbuf3, 1, 0);\n \t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n \t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n-\t\t} else {\n+\t\t} else if (!(flags & NIX_TX_MULTI_SEG_F)) {\n \t\t\t/* Move mbufs to iova */\n \t\t\tmbuf0 = (uint64_t *)tx_pkts[0];\n \t\t\tmbuf1 = (uint64_t *)tx_pkts[1];\n@@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tcmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);\n \t\t}\n \n-\t\tif (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F) {\n+\t\t\tuint8_t j;\n+\n+\t\t\tsegdw[4] = 8;\n+\t\t\tj = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,\n+\t\t\t\t\t\t\t  cmd2, cmd3, segdw,\n+\t\t\t\t\t\t\t  (uint64_t *)\n+\t\t\t\t\t\t\t  LMT_OFF(laddr, lnum,\n+\t\t\t\t\t\t\t\t  0),\n+\t\t\t\t\t\t\t  &wd.data128, &shift,\n+\t\t\t\t\t\t\t  flags);\n+\t\t\tlnum += j;\n+\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n \t\t\t/* Store the prepared send desc to LMT lines */\n \t\t\tif (flags & NIX_TX_OFFLOAD_TSTAMP_F) {\n \t\t\t\tvst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);\n@@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\ttx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;\n \t}\n \n+\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\twd.data[0] >>= 16;\n+\n \t/* Trigger LMTST */\n \tif (lnum > 16) {\n-\t\tdata = cn10k_nix_tx_steor_vec_data(flags);\n-\t\tpa = io_addr | (data & 0x7) << 4;\n-\t\tdata &= ~0x7ULL;\n-\t\tdata |= (15ULL << 12);\n-\t\tdata |= (uint64_t)lmt_id;\n+\t\tif (!(flags & NIX_TX_MULTI_SEG_F))\n+\t\t\twd.data[0] = cn10k_nix_tx_steor_vec_data(flags);\n+\n+\t\tpa = io_addr | (wd.data[0] & 0x7) << 4;\n+\t\twd.data[0] &= ~0x7ULL;\n+\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\twd.data[0] <<= 16;\n+\n+\t\twd.data[0] |= (15ULL << 12);\n+\t\twd.data[0] |= (uint64_t)lmt_id;\n \n \t\t/* STEOR0 */\n-\t\troc_lmt_submit_steorl(data, pa);\n+\t\troc_lmt_submit_steorl(wd.data[0], pa);\n \n-\t\tdata = cn10k_nix_tx_steor_vec_data(flags);\n-\t\tpa = io_addr | (data & 0x7) << 4;\n-\t\tdata &= ~0x7ULL;\n-\t\tdata |= ((uint64_t)(lnum - 17)) << 12;\n-\t\tdata |= (uint64_t)(lmt_id + 16);\n+\t\tif (!(flags & NIX_TX_MULTI_SEG_F))\n+\t\t\twd.data[1] = cn10k_nix_tx_steor_vec_data(flags);\n+\n+\t\tpa = io_addr | (wd.data[1] & 0x7) << 4;\n+\t\twd.data[1] &= ~0x7ULL;\n+\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\twd.data[1] <<= 16;\n+\n+\t\twd.data[1] |= ((uint64_t)(lnum - 17)) << 12;\n+\t\twd.data[1] |= (uint64_t)(lmt_id + 16);\n \n \t\t/* STEOR1 */\n-\t\troc_lmt_submit_steorl(data, pa);\n+\t\troc_lmt_submit_steorl(wd.data[1], pa);\n \t} else if (lnum) {\n-\t\tdata = cn10k_nix_tx_steor_vec_data(flags);\n-\t\tpa = io_addr | (data & 0x7) << 4;\n-\t\tdata &= ~0x7ULL;\n-\t\tdata |= ((uint64_t)(lnum - 1)) << 12;\n-\t\tdata |= lmt_id;\n+\t\tif (!(flags & NIX_TX_MULTI_SEG_F))\n+\t\t\twd.data[0] = cn10k_nix_tx_steor_vec_data(flags);\n+\n+\t\tpa = io_addr | (wd.data[0] & 0x7) << 4;\n+\t\twd.data[0] &= ~0x7ULL;\n+\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\twd.data[0] <<= 16;\n+\n+\t\twd.data[0] |= ((uint64_t)(lnum - 1)) << 12;\n+\t\twd.data[0] |= lmt_id;\n \n \t\t/* STEOR0 */\n-\t\troc_lmt_submit_steorl(data, pa);\n+\t\troc_lmt_submit_steorl(wd.data[0], pa);\n \t}\n \n \tleft -= burst;\n@@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tif (left)\n \t\tgoto again;\n \n-\tif (unlikely(scalar))\n-\t\tpkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,\n-\t\t\t\t\t    flags);\n+\tif (unlikely(scalar)) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\tpkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,\n+\t\t\t\t\t\t\t scalar, cmd, flags);\n+\t\telse\n+\t\t\tpkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,\n+\t\t\t\t\t\t    cmd, flags);\n+\t}\n \n \treturn pkts;\n }\n@@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum,\t1, 1, 1, 1, 1, 1,\t8,\t\\\n \t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \\\n \t\t\t\t\t\t\t\t\t       \\\n \tuint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \\\n-\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \\\n+\t\t\t\t\t\t\t\t\t       \\\n+\tuint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \\\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \\\n \n NIX_TX_FASTPATH_MODES\n #undef T\ndiff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c\nnew file mode 100644\nindex 0000000000..1fad81dbad\n--- /dev/null\n+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c\n@@ -0,0 +1,24 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(C) 2021 Marvell.\n+ */\n+\n+#include \"cn10k_ethdev.h\"\n+#include \"cn10k_tx.h\"\n+\n+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \\\n+\tuint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \\\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \\\n+\t{                                                                      \\\n+\t\tuint64_t cmd[sz];                                              \\\n+\t\t\t\t\t\t\t\t\t       \\\n+\t\t/* For TSO inner checksum is a must */                         \\\n+\t\tif (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \\\n+\t\t    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \\\n+\t\t\treturn 0;                                              \\\n+\t\treturn cn10k_nix_xmit_pkts_vector(                             \\\n+\t\t\ttx_queue, tx_pkts, pkts, cmd,                          \\\n+\t\t\t(flags) | NIX_TX_MULTI_SEG_F);                         \\\n+\t}\n+\n+NIX_TX_FASTPATH_MODES\n+#undef T\ndiff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c\nindex 735e21cc60..763f9a14fd 100644\n--- a/drivers/net/cnxk/cn9k_tx.c\n+++ b/drivers/net/cnxk/cn9k_tx.c\n@@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)\n #undef T\n \t};\n \n-\tif (dev->scalar_ena)\n+\tconst eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {\n+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)\t\t\t       \\\n+\t[f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name,\n+\n+\t\tNIX_TX_FASTPATH_MODES\n+#undef T\n+\t};\n+\n+\tif (dev->scalar_ena) {\n \t\tpick_tx_func(eth_dev, nix_eth_tx_burst);\n-\telse\n+\t\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n+\t\t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\n+\t} else {\n \t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst);\n-\n-\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n-\t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\n+\t\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n+\t\t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);\n+\t}\n \n \trte_mb();\n }\ndiff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h\nindex 2adff45705..42b54a378e 100644\n--- a/drivers/net/cnxk/cn9k_tx.h\n+++ b/drivers/net/cnxk/cn9k_tx.h\n@@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,\n \t}\n }\n \n+static __rte_always_inline uint8_t\n+cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,\n+\t\t\t       union nix_send_hdr_w0_u *sh,\n+\t\t\t       union nix_send_sg_s *sg, const uint32_t flags)\n+{\n+\tstruct rte_mbuf *m_next;\n+\tuint64_t *slist, sg_u;\n+\tuint16_t nb_segs;\n+\tuint64_t segdw;\n+\tint i = 1;\n+\n+\tsh->total = m->pkt_len;\n+\t/* Clear sg->u header before use */\n+\tsg->u &= 0xFC00000000000000;\n+\tsg_u = sg->u;\n+\tslist = &cmd[0];\n+\n+\tsg_u = sg_u | ((uint64_t)m->data_len);\n+\n+\tnb_segs = m->nb_segs - 1;\n+\tm_next = m->next;\n+\n+\t/* Set invert df if buffer is not to be freed by H/W */\n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\tsg_u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\t/* Mark mempool object as \"put\" since it is freed by NIX */\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\tif (!(sg_u & (1ULL << 55)))\n+\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\trte_io_wmb();\n+#endif\n+\n+\tm = m_next;\n+\t/* Fill mbuf segments */\n+\tdo {\n+\t\tm_next = m->next;\n+\t\tsg_u = sg_u | ((uint64_t)m->data_len << (i << 4));\n+\t\t*slist = rte_mbuf_data_iova(m);\n+\t\t/* Set invert df if buffer is not to be freed by H/W */\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\t\tsg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));\n+\t\t\t/* Mark mempool object as \"put\" since it is freed by NIX\n+\t\t\t */\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\t\tif (!(sg_u & (1ULL << (i + 55))))\n+\t\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\t\trte_io_wmb();\n+#endif\n+\t\tslist++;\n+\t\ti++;\n+\t\tnb_segs--;\n+\t\tif (i > 2 && nb_segs) {\n+\t\t\ti = 0;\n+\t\t\t/* Next SG subdesc */\n+\t\t\t*(uint64_t *)slist = sg_u & 0xFC00000000000000;\n+\t\t\tsg->u = sg_u;\n+\t\t\tsg->segs = 3;\n+\t\t\tsg = (union nix_send_sg_s *)slist;\n+\t\t\tsg_u = sg->u;\n+\t\t\tslist++;\n+\t\t}\n+\t\tm = m_next;\n+\t} while (nb_segs);\n+\n+\tsg->u = sg_u;\n+\tsg->segs = i;\n+\tsegdw = (uint64_t *)slist - (uint64_t *)&cmd[0];\n+\n+\tsegdw += 2;\n+\t/* Roundup extra dwords to multiple of 2 */\n+\tsegdw = (segdw >> 1) + (segdw & 0x1);\n+\t/* Default dwords */\n+\tsegdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +\n+\t\t !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);\n+\tsh->sizem1 = segdw - 1;\n+\n+\treturn segdw;\n+}\n+\n+static __rte_always_inline uint8_t\n+cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,\n+\t\t\t  uint64x2_t *cmd1, const uint32_t flags)\n+{\n+\tunion nix_send_hdr_w0_u sh;\n+\tunion nix_send_sg_s sg;\n+\tuint8_t ret;\n+\n+\tif (m->nb_segs == 1) {\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\t\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\t\t\tsg.u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\t\tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n+\t\t}\n+\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\t\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\t\tif (!(sg.u & (1ULL << 55)))\n+\t\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\t\trte_io_wmb();\n+#endif\n+\t\treturn 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +\n+\t\t       !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);\n+\t}\n+\n+\tsh.u = vgetq_lane_u64(cmd0[0], 0);\n+\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\n+\tret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);\n+\n+\tcmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);\n+\tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n+\treturn ret;\n+}\n+\n #define NIX_DESCS_PER_LOOP 4\n+\n+static __rte_always_inline void\n+cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,\n+\t\t\t       uint64x2_t *cmd2, uint64x2_t *cmd3,\n+\t\t\t       uint8_t *segdw,\n+\t\t\t       uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],\n+\t\t\t       uint64_t *lmt_addr, rte_iova_t io_addr,\n+\t\t\t       const uint32_t flags)\n+{\n+\tuint64_t lmt_status;\n+\tuint8_t j, off;\n+\n+\tif (!(flags & NIX_TX_NEED_EXT_HDR) &&\n+\t    !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t/* No segments in 4 consecutive packets. */\n+\t\tif ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {\n+\t\t\tdo {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[0]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[0]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd0[1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 6, cmd1[1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 8, cmd0[2]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10, cmd1[2]);\n+\t\t\t\tvst1q_u64(lmt_addr + 12, cmd0[3]);\n+\t\t\t\tvst1q_u64(lmt_addr + 14, cmd1[3]);\n+\t\t\t\tlmt_status = roc_lmt_submit_ldeor(io_addr);\n+\t\t\t} while (lmt_status == 0);\n+\n+\t\t\treturn;\n+\t\t}\n+\t}\n+\n+\tfor (j = 0; j < NIX_DESCS_PER_LOOP;) {\n+\t\t/* Fit consecutive packets in same LMTLINE. */\n+\t\tif ((segdw[j] + segdw[j + 1]) <= 8) {\n+again0:\n+\t\t\tif ((flags & NIX_TX_NEED_EXT_HDR) &&\n+\t\t\t    (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 4;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 6, slist[j], off);\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd3[j]);\n+\n+\t\t\t\tvst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 14 + off,\n+\t\t\t\t\t\tslist[j + 1], segdw[j + 1] - 4);\n+\t\t\t\toff += ((segdw[j + 1] - 4) << 1);\n+\t\t\t\tvst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);\n+\t\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 3;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 6, slist[j], off);\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 12 + off,\n+\t\t\t\t\t\tslist[j + 1], segdw[j + 1] - 3);\n+\t\t\t} else {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 2;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 4, slist[j], off);\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 8 + off,\n+\t\t\t\t\t\tslist[j + 1], segdw[j + 1] - 2);\n+\t\t\t}\n+\t\t\tlmt_status = roc_lmt_submit_ldeor(io_addr);\n+\t\t\tif (lmt_status == 0)\n+\t\t\t\tgoto again0;\n+\t\t\tj += 2;\n+\t\t} else {\n+again1:\n+\t\t\tif ((flags & NIX_TX_NEED_EXT_HDR) &&\n+\t\t\t    (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 4;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 6, slist[j], off);\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd3[j]);\n+\t\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 3;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 6, slist[j], off);\n+\t\t\t} else {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 2;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 4, slist[j], off);\n+\t\t\t}\n+\t\t\tlmt_status = roc_lmt_submit_ldeor(io_addr);\n+\t\t\tif (lmt_status == 0)\n+\t\t\t\tgoto again1;\n+\t\t\tj += 1;\n+\t\t}\n+\t}\n+}\n+\n static __rte_always_inline uint16_t\n cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t  uint16_t pkts, uint64_t *cmd, const uint16_t flags)\n@@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tsendext23_w0 = vld1q_u64(sx_w0 + 2);\n \t\t}\n \n-\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\tif ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&\n+\t\t    !(flags & NIX_TX_MULTI_SEG_F)) {\n \t\t\t/* Set don't free bit if reference count > 1 */\n \t\t\txmask01 = vdupq_n_u64(0);\n \t\t\txmask23 = xmask01;\n@@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t * cnxk_nix_prefree_seg are written before LMTST.\n \t\t\t */\n \t\t\trte_io_wmb();\n-\t\t} else {\n+\t\t} else if (!(flags & NIX_TX_MULTI_SEG_F)) {\n \t\t\t/* Move mbufs to iova */\n \t\t\tmbuf0 = (uint64_t *)tx_pkts[0];\n \t\t\tmbuf1 = (uint64_t *)tx_pkts[1];\n@@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tcmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);\n \t\t}\n \n-\t\tif (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F) {\n+\t\t\tuint64_t seg_list[NIX_DESCS_PER_LOOP]\n+\t\t\t\t\t [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];\n+\t\t\tuint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];\n+\n+\t\t\t/* Build mseg list for each packet individually. */\n+\t\t\tfor (j = 0; j < NIX_DESCS_PER_LOOP; j++)\n+\t\t\t\tsegdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],\n+\t\t\t\t\t\t\tseg_list[j], &cmd0[j],\n+\t\t\t\t\t\t\t&cmd1[j], flags);\n+\t\t\tsegdw[4] = 8;\n+\n+\t\t\t/* Commit all changes to mbuf before LMTST. */\n+\t\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\t\t\trte_io_wmb();\n+\n+\t\t\tcn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,\n+\t\t\t\t\t\t       segdw, seg_list,\n+\t\t\t\t\t\t       lmt_addr, io_addr,\n+\t\t\t\t\t\t       flags);\n+\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n \t\t\t/* With ext header in the command we can no longer send\n \t\t\t * all 4 packets together since LMTLINE is 128bytes.\n \t\t\t * Split and Tx twice.\n@@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\ttx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;\n \t}\n \n-\tif (unlikely(pkts_left))\n-\t\tpkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,\n-\t\t\t\t\t   flags);\n+\tif (unlikely(pkts_left)) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\tpkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,\n+\t\t\t\t\t\t\tpkts_left, cmd, flags);\n+\t\telse\n+\t\t\tpkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,\n+\t\t\t\t\t\t   cmd, flags);\n+\t}\n \n \treturn pkts;\n }\n@@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum,\t1, 1, 1, 1, 1, 1,\t8,\t       \\\n \t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \\\n \t\t\t\t\t\t\t\t\t       \\\n \tuint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name(       \\\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \\\n+\t\t\t\t\t\t\t\t\t       \\\n+\tuint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name(  \\\n \t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);\n \n NIX_TX_FASTPATH_MODES\ndiff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c\nnew file mode 100644\nindex 0000000000..0256efd45a\n--- /dev/null\n+++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c\n@@ -0,0 +1,24 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(C) 2021 Marvell.\n+ */\n+\n+#include \"cn9k_ethdev.h\"\n+#include \"cn9k_tx.h\"\n+\n+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \\\n+\tuint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name(  \\\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \\\n+\t{                                                                      \\\n+\t\tuint64_t cmd[sz];                                              \\\n+\t\t\t\t\t\t\t\t\t       \\\n+\t\t/* For TSO inner checksum is a must */                         \\\n+\t\tif (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \\\n+\t\t    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \\\n+\t\t\treturn 0;                                              \\\n+\t\treturn cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \\\n+\t\t\t\t\t\t (flags) |                     \\\n+\t\t\t\t\t\t\t NIX_TX_MULTI_SEG_F);  \\\n+\t}\n+\n+NIX_TX_FASTPATH_MODES\n+#undef T\ndiff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build\nindex aa8c7253fb..361f7ce849 100644\n--- a/drivers/net/cnxk/meson.build\n+++ b/drivers/net/cnxk/meson.build\n@@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c',\n \t\t 'cn9k_rx_vec_mseg.c',\n \t\t 'cn9k_tx.c',\n \t\t 'cn9k_tx_mseg.c',\n-\t\t 'cn9k_tx_vec.c')\n+\t\t 'cn9k_tx_vec.c',\n+\t\t 'cn9k_tx_vec_mseg.c')\n # CN10K\n sources += files('cn10k_ethdev.c',\n \t\t 'cn10k_rte_flow.c',\n@@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c',\n \t\t 'cn10k_rx_vec_mseg.c',\n \t\t 'cn10k_tx.c',\n \t\t 'cn10k_tx_mseg.c',\n-\t\t 'cn10k_tx_vec.c')\n+\t\t 'cn10k_tx_vec.c',\n+\t\t 'cn10k_tx_vec_mseg.c')\n \n deps += ['bus_pci', 'cryptodev', 'eventdev', 'security']\n deps += ['common_cnxk', 'mempool_cnxk']\n",
    "prefixes": [
        "v2",
        "06/13"
    ]
}