Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/94546/?format=api
https://patches.dpdk.org/api/patches/94546/?format=api", "web_url": "https://patches.dpdk.org/project/dpdk/patch/20210619110154.10301-6-pbhagavatula@marvell.com/", "project": { "id": 1, "url": "https://patches.dpdk.org/api/projects/1/?format=api", "name": "DPDK", "link_name": "dpdk", "list_id": "dev.dpdk.org", "list_email": "dev@dpdk.org", "web_url": "http://core.dpdk.org", "scm_url": "git://dpdk.org/dpdk", "webscm_url": "http://git.dpdk.org/dpdk", "list_archive_url": "https://inbox.dpdk.org/dev", "list_archive_url_format": "https://inbox.dpdk.org/dev/{}", "commit_url_format": "" }, "msgid": "<20210619110154.10301-6-pbhagavatula@marvell.com>", "list_archive_url": "https://inbox.dpdk.org/dev/20210619110154.10301-6-pbhagavatula@marvell.com", "date": "2021-06-19T11:01:46", "name": "[v2,06/13] net/cnxk: add multi seg Tx vector routine", "commit_ref": null, "pull_url": null, "state": "superseded", "archived": true, "hash": "9af386d1ebb887e89b3eeb12da0df70ee9dbaac4", "submitter": { "id": 1183, "url": "https://patches.dpdk.org/api/people/1183/?format=api", "name": "Pavan Nikhilesh Bhagavatula", "email": "pbhagavatula@marvell.com" }, "delegate": null, "mbox": "https://patches.dpdk.org/project/dpdk/patch/20210619110154.10301-6-pbhagavatula@marvell.com/mbox/", "series": [ { "id": 17405, "url": "https://patches.dpdk.org/api/series/17405/?format=api", "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=17405", "date": "2021-06-19T11:01:41", "name": "[v2,01/13] net/cnxk: add multi seg Rx vector routine", "version": 2, "mbox": "https://patches.dpdk.org/series/17405/mbox/" } ], "comments": "https://patches.dpdk.org/api/patches/94546/comments/", "check": "warning", "checks": "https://patches.dpdk.org/api/patches/94546/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<dev-bounces@dpdk.org>", "X-Original-To": "patchwork@inbox.dpdk.org", "Delivered-To": "patchwork@inbox.dpdk.org", "Received": [ "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 9D314A0A0C;\n\tSat, 19 Jun 2021 13:02:57 +0200 (CEST)", "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 7D00141158;\n\tSat, 19 Jun 2021 13:02:30 +0200 (CEST)", "from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com\n [67.231.156.173])\n by mails.dpdk.org (Postfix) with ESMTP id 70BE341158\n for <dev@dpdk.org>; Sat, 19 Jun 2021 13:02:29 +0200 (CEST)", "from pps.filterd (m0045851.ppops.net [127.0.0.1])\n by mx0b-0016f401.pphosted.com (8.16.0.43/8.16.0.43) with SMTP id\n 15JAu41B007712 for <dev@dpdk.org>; Sat, 19 Jun 2021 04:02:28 -0700", "from dc5-exch02.marvell.com ([199.233.59.182])\n by mx0b-0016f401.pphosted.com with ESMTP id 398tu0v61j-1\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)\n for <dev@dpdk.org>; Sat, 19 Jun 2021 04:02:28 -0700", "from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH02.marvell.com\n (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.18;\n Sat, 19 Jun 2021 04:02:26 -0700", "from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.18 via Frontend\n Transport; Sat, 19 Jun 2021 04:02:26 -0700", "from BG-LT7430.marvell.com (BG-LT7430.marvell.com [10.28.177.176])\n by maili.marvell.com (Postfix) with ESMTP id 8BECD5B6966;\n Sat, 19 Jun 2021 04:02:23 -0700 (PDT)" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-transfer-encoding : content-type; s=pfpt0220;\n bh=AnT92RmO0e1plPL1XtKTleGo54fbjYovYKzMZZRs3Ng=;\n b=h/EnSsZsBCMCFQ66FpgE+Dmm1MwkqlCSwiGMOYErQNfzlb74qqP5Yk8EscXwN8L+i9Z2\n 2EecKQ+ifm90OBS60H3eIjOuGXHsmusH0TO24bKNrxSRYF0Rf6CNTOEMzOokAEpCp2zO\n GDAvLeRa2PspIPEkF7GTKSVmYmVXDwBRzuq4yD0nakBGY6WwbRNBGMxYQ2z5DEIwCHy5\n Rf439cOJVEb45VGdTBd5lFt0dE4vOSo3XugQYBT0YYRSvAhmVa4ASCZxnqEiYqLcvHdC\n /kMnOqSBd9N8KrQjGwE7P/6zVRSqhpS05FDCqiFup+yK2dmLh3jdrTQ/PJeHXcSzTVRc CQ==", "From": "<pbhagavatula@marvell.com>", "To": "<jerinj@marvell.com>, Nithin Dabilpuram <ndabilpuram@marvell.com>, \"Kiran\n Kumar K\" <kirankumark@marvell.com>, Sunil Kumar Kori <skori@marvell.com>,\n Satha Rao <skoteshwar@marvell.com>", "CC": "<dev@dpdk.org>, Pavan Nikhilesh <pbhagavatula@marvell.com>", "Date": "Sat, 19 Jun 2021 16:31:46 +0530", "Message-ID": "<20210619110154.10301-6-pbhagavatula@marvell.com>", "X-Mailer": "git-send-email 2.17.1", "In-Reply-To": "<20210619110154.10301-1-pbhagavatula@marvell.com>", "References": "<20210524122303.1116-1-pbhagavatula@marvell.com>\n <20210619110154.10301-1-pbhagavatula@marvell.com>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "Content-Type": "text/plain", "X-Proofpoint-GUID": "ZYrXI3eo6t7QVb4Us4tm4jPVcrpiFEcU", "X-Proofpoint-ORIG-GUID": "ZYrXI3eo6t7QVb4Us4tm4jPVcrpiFEcU", "X-Proofpoint-Virus-Version": "vendor=fsecure engine=2.50.10434:6.0.391, 18.0.790\n definitions=2021-06-19_09:2021-06-18,\n 2021-06-19 signatures=0", "Subject": "[dpdk-dev] [PATCH v2 06/13] net/cnxk: add multi seg Tx vector\n routine", "X-BeenThere": "dev@dpdk.org", "X-Mailman-Version": "2.1.29", "Precedence": "list", "List-Id": "DPDK patches and discussions <dev.dpdk.org>", "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>", "List-Archive": "<http://mails.dpdk.org/archives/dev/>", "List-Post": "<mailto:dev@dpdk.org>", "List-Help": "<mailto:dev-request@dpdk.org?subject=help>", "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>", "Errors-To": "dev-bounces@dpdk.org", "Sender": "\"dev\" <dev-bounces@dpdk.org>" }, "content": "From: Pavan Nikhilesh <pbhagavatula@marvell.com>\n\nAdd multi segment Tx vector routine.\n\nSigned-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>\n---\n drivers/net/cnxk/cn10k_tx.c | 20 +-\n drivers/net/cnxk/cn10k_tx.h | 388 +++++++++++++++++++++++++--\n drivers/net/cnxk/cn10k_tx_vec_mseg.c | 24 ++\n drivers/net/cnxk/cn9k_tx.c | 20 +-\n drivers/net/cnxk/cn9k_tx.h | 272 ++++++++++++++++++-\n drivers/net/cnxk/cn9k_tx_vec_mseg.c | 24 ++\n drivers/net/cnxk/meson.build | 6 +-\n 7 files changed, 709 insertions(+), 45 deletions(-)\n create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c\n create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c", "diff": "diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c\nindex d06879163f..1f30bab59a 100644\n--- a/drivers/net/cnxk/cn10k_tx.c\n+++ b/drivers/net/cnxk/cn10k_tx.c\n@@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)\n #undef T\n \t};\n \n-\tif (dev->scalar_ena)\n+\tconst eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {\n+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \\\n+\t[f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name,\n+\n+\t\tNIX_TX_FASTPATH_MODES\n+#undef T\n+\t};\n+\n+\tif (dev->scalar_ena) {\n \t\tpick_tx_func(eth_dev, nix_eth_tx_burst);\n-\telse\n+\t\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n+\t\t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\n+\t} else {\n \t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst);\n-\n-\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n-\t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\n+\t\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n+\t\t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);\n+\t}\n \n \trte_mb();\n }\ndiff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h\nindex cea7c6cd34..b25b20dcb2 100644\n--- a/drivers/net/cnxk/cn10k_tx.h\n+++ b/drivers/net/cnxk/cn10k_tx.h\n@@ -42,6 +42,13 @@\n \t\t} \\\n \t} while (0)\n \n+/* Encoded number of segments to number of dwords macro, each value of nb_segs\n+ * is encoded as 4bits.\n+ */\n+#define NIX_SEGDW_MAGIC 0x76654432210ULL\n+\n+#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)\n+\n #define LMT_OFF(lmt_addr, lmt_num, offset) \\\n \t(void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))\n \n@@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags)\n \treturn data;\n }\n \n+static __rte_always_inline uint8_t\n+cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)\n+{\n+\treturn ((flags & NIX_TX_NEED_EXT_HDR) ?\n+\t\t\t (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :\n+\t\t\t 4);\n+}\n+\n static __rte_always_inline uint64_t\n cn10k_nix_tx_steor_vec_data(const uint16_t flags)\n {\n@@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,\n \t}\n }\n \n+static __rte_always_inline void\n+cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,\n+\t\t\t\tunion nix_send_hdr_w0_u *sh,\n+\t\t\t\tunion nix_send_sg_s *sg, const uint32_t flags)\n+{\n+\tstruct rte_mbuf *m_next;\n+\tuint64_t *slist, sg_u;\n+\tuint16_t nb_segs;\n+\tint i = 1;\n+\n+\tsh->total = m->pkt_len;\n+\t/* Clear sg->u header before use */\n+\tsg->u &= 0xFC00000000000000;\n+\tsg_u = sg->u;\n+\tslist = &cmd[0];\n+\n+\tsg_u = sg_u | ((uint64_t)m->data_len);\n+\n+\tnb_segs = m->nb_segs - 1;\n+\tm_next = m->next;\n+\n+\t/* Set invert df if buffer is not to be freed by H/W */\n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\tsg_u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\t/* Mark mempool object as \"put\" since it is freed by NIX */\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\tif (!(sg_u & (1ULL << 55)))\n+\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\trte_io_wmb();\n+#endif\n+\n+\tm = m_next;\n+\t/* Fill mbuf segments */\n+\tdo {\n+\t\tm_next = m->next;\n+\t\tsg_u = sg_u | ((uint64_t)m->data_len << (i << 4));\n+\t\t*slist = rte_mbuf_data_iova(m);\n+\t\t/* Set invert df if buffer is not to be freed by H/W */\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\t\tsg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));\n+\t\t\t/* Mark mempool object as \"put\" since it is freed by NIX\n+\t\t\t */\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\t\tif (!(sg_u & (1ULL << (i + 55))))\n+\t\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\t\trte_io_wmb();\n+#endif\n+\t\tslist++;\n+\t\ti++;\n+\t\tnb_segs--;\n+\t\tif (i > 2 && nb_segs) {\n+\t\t\ti = 0;\n+\t\t\t/* Next SG subdesc */\n+\t\t\t*(uint64_t *)slist = sg_u & 0xFC00000000000000;\n+\t\t\tsg->u = sg_u;\n+\t\t\tsg->segs = 3;\n+\t\t\tsg = (union nix_send_sg_s *)slist;\n+\t\t\tsg_u = sg->u;\n+\t\t\tslist++;\n+\t\t}\n+\t\tm = m_next;\n+\t} while (nb_segs);\n+\n+\tsg->u = sg_u;\n+\tsg->segs = i;\n+}\n+\n+static __rte_always_inline void\n+cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,\n+\t\t\t uint64x2_t *cmd1, const uint8_t segdw,\n+\t\t\t const uint32_t flags)\n+{\n+\tunion nix_send_hdr_w0_u sh;\n+\tunion nix_send_sg_s sg;\n+\n+\tif (m->nb_segs == 1) {\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\t\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\t\t\tsg.u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\t\tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n+\t\t}\n+\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\t\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\t\tif (!(sg.u & (1ULL << 55)))\n+\t\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\t\trte_io_wmb();\n+#endif\n+\t\treturn;\n+\t}\n+\n+\tsh.u = vgetq_lane_u64(cmd0[0], 0);\n+\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\n+\tcn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);\n+\n+\tsh.sizem1 = segdw - 1;\n+\tcmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);\n+\tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n+}\n+\n #define NIX_DESCS_PER_LOOP 4\n+\n+static __rte_always_inline uint8_t\n+cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,\n+\t\t\t uint64x2_t *cmd1, uint64x2_t *cmd2,\n+\t\t\t uint64x2_t *cmd3, uint8_t *segdw,\n+\t\t\t uint64_t *lmt_addr, __uint128_t *data128,\n+\t\t\t uint8_t *shift, const uint16_t flags)\n+{\n+\tuint8_t j, off, lmt_used;\n+\n+\tif (!(flags & NIX_TX_NEED_EXT_HDR) &&\n+\t !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t/* No segments in 4 consecutive packets. */\n+\t\tif ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {\n+\t\t\tfor (j = 0; j < NIX_DESCS_PER_LOOP; j++)\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j], NULL,\n+\t\t\t\t\t\t\t &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t segdw[j], flags);\n+\t\t\tvst1q_u64(lmt_addr, cmd0[0]);\n+\t\t\tvst1q_u64(lmt_addr + 2, cmd1[0]);\n+\t\t\tvst1q_u64(lmt_addr + 4, cmd0[1]);\n+\t\t\tvst1q_u64(lmt_addr + 6, cmd1[1]);\n+\t\t\tvst1q_u64(lmt_addr + 8, cmd0[2]);\n+\t\t\tvst1q_u64(lmt_addr + 10, cmd1[2]);\n+\t\t\tvst1q_u64(lmt_addr + 12, cmd0[3]);\n+\t\t\tvst1q_u64(lmt_addr + 14, cmd1[3]);\n+\n+\t\t\t*data128 |= ((__uint128_t)7) << *shift;\n+\t\t\tshift += 3;\n+\n+\t\t\treturn 1;\n+\t\t}\n+\t}\n+\n+\tlmt_used = 0;\n+\tfor (j = 0; j < NIX_DESCS_PER_LOOP;) {\n+\t\t/* Fit consecutive packets in same LMTLINE. */\n+\t\tif ((segdw[j] + segdw[j + 1]) <= 8) {\n+\t\t\tif (flags & NIX_TX_OFFLOAD_TSTAMP_F) {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j], NULL,\n+\t\t\t\t\t\t\t &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t segdw[j], flags);\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,\n+\t\t\t\t\t\t\t &cmd0[j + 1],\n+\t\t\t\t\t\t\t &cmd1[j + 1],\n+\t\t\t\t\t\t\t segdw[j + 1], flags);\n+\t\t\t\t/* TSTAMP takes 4 each, no segs. */\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 6, cmd3[j]);\n+\n+\t\t\t\tvst1q_u64(lmt_addr + 8, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10, cmd2[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 12, cmd1[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 14, cmd3[j + 1]);\n+\t\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\t\t\t/* EXT header take 3 each, space for 2 segs.*/\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t lmt_addr + 6,\n+\t\t\t\t\t\t\t &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\toff = segdw[j] - 3;\n+\t\t\t\toff <<= 1;\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j + 1],\n+\t\t\t\t\t\t\t lmt_addr + 12 + off,\n+\t\t\t\t\t\t\t &cmd0[j + 1],\n+\t\t\t\t\t\t\t &cmd1[j + 1],\n+\t\t\t\t\t\t\t segdw[j + 1], flags);\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);\n+\t\t\t} else {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t lmt_addr + 4,\n+\t\t\t\t\t\t\t &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[j]);\n+\t\t\t\toff = segdw[j] - 2;\n+\t\t\t\toff <<= 1;\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j + 1],\n+\t\t\t\t\t\t\t lmt_addr + 8 + off,\n+\t\t\t\t\t\t\t &cmd0[j + 1],\n+\t\t\t\t\t\t\t &cmd1[j + 1],\n+\t\t\t\t\t\t\t segdw[j + 1], flags);\n+\t\t\t\tvst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);\n+\t\t\t}\n+\t\t\t*data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)\n+\t\t\t\t << *shift;\n+\t\t\t*shift += 3;\n+\t\t\tj += 2;\n+\t\t} else {\n+\t\t\tif ((flags & NIX_TX_NEED_EXT_HDR) &&\n+\t\t\t (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t lmt_addr + 6,\n+\t\t\t\t\t\t\t &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\toff = segdw[j] - 4;\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd3[j]);\n+\t\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t lmt_addr + 6,\n+\t\t\t\t\t\t\t &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t} else {\n+\t\t\t\tcn10k_nix_prepare_mseg_vec(mbufs[j],\n+\t\t\t\t\t\t\t lmt_addr + 4,\n+\t\t\t\t\t\t\t &cmd0[j], &cmd1[j],\n+\t\t\t\t\t\t\t segdw[j], flags);\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[j]);\n+\t\t\t}\n+\t\t\t*data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;\n+\t\t\t*shift += 3;\n+\t\t\tj++;\n+\t\t}\n+\t\tlmt_used++;\n+\t\tlmt_addr += 16;\n+\t}\n+\n+\treturn lmt_used;\n+}\n+\n static __rte_always_inline uint16_t\n cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t uint16_t pkts, uint64_t *cmd, const uint16_t flags)\n@@ -738,7 +990,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tuint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;\n \tuint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],\n \t\tcmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];\n-\tuint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;\n+\tuint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;\n \tuint64x2_t senddesc01_w0, senddesc23_w0;\n \tuint64x2_t senddesc01_w1, senddesc23_w1;\n \tuint16_t left, scalar, burst, i, lmt_id;\n@@ -746,6 +998,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tuint64x2_t sendext01_w1, sendext23_w1;\n \tuint64x2_t sendmem01_w0, sendmem23_w0;\n \tuint64x2_t sendmem01_w1, sendmem23_w1;\n+\tuint8_t segdw[NIX_DESCS_PER_LOOP + 1];\n \tuint64x2_t sgdesc01_w0, sgdesc23_w0;\n \tuint64x2_t sgdesc01_w1, sgdesc23_w1;\n \tstruct cn10k_eth_txq *txq = tx_queue;\n@@ -754,7 +1007,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tuint64x2_t ltypes01, ltypes23;\n \tuint64x2_t xtmp128, ytmp128;\n \tuint64x2_t xmask01, xmask23;\n-\tuint8_t lnum;\n+\tuint8_t lnum, shift;\n+\tunion wdata {\n+\t\t__uint128_t data128;\n+\t\tuint64_t data[2];\n+\t} wd;\n \n \tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n \n@@ -798,8 +1055,43 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tburst = left > cn10k_nix_pkts_per_vec_brst(flags) ?\n \t\t\t cn10k_nix_pkts_per_vec_brst(flags) :\n \t\t\t left;\n+\tif (flags & NIX_TX_MULTI_SEG_F) {\n+\t\twd.data128 = 0;\n+\t\tshift = 16;\n+\t}\n \tlnum = 0;\n+\n \tfor (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F) {\n+\t\t\tstruct rte_mbuf *m = tx_pkts[j];\n+\t\t\tuint8_t j;\n+\n+\t\t\tfor (j = 0; j < NIX_DESCS_PER_LOOP; j++) {\n+\t\t\t\t/* Get dwords based on nb_segs. */\n+\t\t\t\tsegdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);\n+\t\t\t\t/* Add dwords based on offloads. */\n+\t\t\t\tsegdw[j] += 1 + /* SEND HDR */\n+\t\t\t\t\t !!(flags & NIX_TX_NEED_EXT_HDR) +\n+\t\t\t\t\t !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);\n+\t\t\t}\n+\n+\t\t\t/* Check if there are enough LMTLINES for this loop */\n+\t\t\tif (lnum + 4 > 32) {\n+\t\t\t\tuint8_t ldwords_con = 0, lneeded = 0;\n+\t\t\t\tfor (j = 0; j < NIX_DESCS_PER_LOOP; j++) {\n+\t\t\t\t\tldwords_con += segdw[j];\n+\t\t\t\t\tif (ldwords_con > 8) {\n+\t\t\t\t\t\tlneeded += 1;\n+\t\t\t\t\t\tldwords_con = segdw[j];\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t\tlneeded += 1;\n+\t\t\t\tif (lnum + lneeded > 32) {\n+\t\t\t\t\tburst = i;\n+\t\t\t\t\tbreak;\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n \t\t/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */\n \t\tsenddesc01_w0 =\n \t\t\tvbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));\n@@ -1527,7 +1819,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tsendext23_w0 = vld1q_u64(sx_w0 + 2);\n \t\t}\n \n-\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\tif ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&\n+\t\t !(flags & NIX_TX_MULTI_SEG_F)) {\n \t\t\t/* Set don't free bit if reference count > 1 */\n \t\t\txmask01 = vdupq_n_u64(0);\n \t\t\txmask23 = xmask01;\n@@ -1567,7 +1860,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t\t\t(void **)&mbuf3, 1, 0);\n \t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n \t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n-\t\t} else {\n+\t\t} else if (!(flags & NIX_TX_MULTI_SEG_F)) {\n \t\t\t/* Move mbufs to iova */\n \t\t\tmbuf0 = (uint64_t *)tx_pkts[0];\n \t\t\tmbuf1 = (uint64_t *)tx_pkts[1];\n@@ -1612,7 +1905,19 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tcmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);\n \t\t}\n \n-\t\tif (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F) {\n+\t\t\tuint8_t j;\n+\n+\t\t\tsegdw[4] = 8;\n+\t\t\tj = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,\n+\t\t\t\t\t\t\t cmd2, cmd3, segdw,\n+\t\t\t\t\t\t\t (uint64_t *)\n+\t\t\t\t\t\t\t LMT_OFF(laddr, lnum,\n+\t\t\t\t\t\t\t\t 0),\n+\t\t\t\t\t\t\t &wd.data128, &shift,\n+\t\t\t\t\t\t\t flags);\n+\t\t\tlnum += j;\n+\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n \t\t\t/* Store the prepared send desc to LMT lines */\n \t\t\tif (flags & NIX_TX_OFFLOAD_TSTAMP_F) {\n \t\t\t\tvst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);\n@@ -1664,34 +1969,55 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\ttx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;\n \t}\n \n+\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\twd.data[0] >>= 16;\n+\n \t/* Trigger LMTST */\n \tif (lnum > 16) {\n-\t\tdata = cn10k_nix_tx_steor_vec_data(flags);\n-\t\tpa = io_addr | (data & 0x7) << 4;\n-\t\tdata &= ~0x7ULL;\n-\t\tdata |= (15ULL << 12);\n-\t\tdata |= (uint64_t)lmt_id;\n+\t\tif (!(flags & NIX_TX_MULTI_SEG_F))\n+\t\t\twd.data[0] = cn10k_nix_tx_steor_vec_data(flags);\n+\n+\t\tpa = io_addr | (wd.data[0] & 0x7) << 4;\n+\t\twd.data[0] &= ~0x7ULL;\n+\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\twd.data[0] <<= 16;\n+\n+\t\twd.data[0] |= (15ULL << 12);\n+\t\twd.data[0] |= (uint64_t)lmt_id;\n \n \t\t/* STEOR0 */\n-\t\troc_lmt_submit_steorl(data, pa);\n+\t\troc_lmt_submit_steorl(wd.data[0], pa);\n \n-\t\tdata = cn10k_nix_tx_steor_vec_data(flags);\n-\t\tpa = io_addr | (data & 0x7) << 4;\n-\t\tdata &= ~0x7ULL;\n-\t\tdata |= ((uint64_t)(lnum - 17)) << 12;\n-\t\tdata |= (uint64_t)(lmt_id + 16);\n+\t\tif (!(flags & NIX_TX_MULTI_SEG_F))\n+\t\t\twd.data[1] = cn10k_nix_tx_steor_vec_data(flags);\n+\n+\t\tpa = io_addr | (wd.data[1] & 0x7) << 4;\n+\t\twd.data[1] &= ~0x7ULL;\n+\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\twd.data[1] <<= 16;\n+\n+\t\twd.data[1] |= ((uint64_t)(lnum - 17)) << 12;\n+\t\twd.data[1] |= (uint64_t)(lmt_id + 16);\n \n \t\t/* STEOR1 */\n-\t\troc_lmt_submit_steorl(data, pa);\n+\t\troc_lmt_submit_steorl(wd.data[1], pa);\n \t} else if (lnum) {\n-\t\tdata = cn10k_nix_tx_steor_vec_data(flags);\n-\t\tpa = io_addr | (data & 0x7) << 4;\n-\t\tdata &= ~0x7ULL;\n-\t\tdata |= ((uint64_t)(lnum - 1)) << 12;\n-\t\tdata |= lmt_id;\n+\t\tif (!(flags & NIX_TX_MULTI_SEG_F))\n+\t\t\twd.data[0] = cn10k_nix_tx_steor_vec_data(flags);\n+\n+\t\tpa = io_addr | (wd.data[0] & 0x7) << 4;\n+\t\twd.data[0] &= ~0x7ULL;\n+\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\twd.data[0] <<= 16;\n+\n+\t\twd.data[0] |= ((uint64_t)(lnum - 1)) << 12;\n+\t\twd.data[0] |= lmt_id;\n \n \t\t/* STEOR0 */\n-\t\troc_lmt_submit_steorl(data, pa);\n+\t\troc_lmt_submit_steorl(wd.data[0], pa);\n \t}\n \n \tleft -= burst;\n@@ -1699,9 +2025,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tif (left)\n \t\tgoto again;\n \n-\tif (unlikely(scalar))\n-\t\tpkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar, cmd,\n-\t\t\t\t\t flags);\n+\tif (unlikely(scalar)) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\tpkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,\n+\t\t\t\t\t\t\t scalar, cmd, flags);\n+\t\telse\n+\t\t\tpkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,\n+\t\t\t\t\t\t cmd, flags);\n+\t}\n \n \treturn pkts;\n }\n@@ -1866,7 +2197,10 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum,\t1, 1, 1, 1, 1, 1,\t8,\t\\\n \t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \\\n \t\t\t\t\t\t\t\t\t \\\n \tuint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \\\n-\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \\\n+\t\t\t\t\t\t\t\t\t \\\n+\tuint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \\\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \\\n \n NIX_TX_FASTPATH_MODES\n #undef T\ndiff --git a/drivers/net/cnxk/cn10k_tx_vec_mseg.c b/drivers/net/cnxk/cn10k_tx_vec_mseg.c\nnew file mode 100644\nindex 0000000000..1fad81dbad\n--- /dev/null\n+++ b/drivers/net/cnxk/cn10k_tx_vec_mseg.c\n@@ -0,0 +1,24 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(C) 2021 Marvell.\n+ */\n+\n+#include \"cn10k_ethdev.h\"\n+#include \"cn10k_tx.h\"\n+\n+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \\\n+\tuint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \\\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \\\n+\t{ \\\n+\t\tuint64_t cmd[sz]; \\\n+\t\t\t\t\t\t\t\t\t \\\n+\t\t/* For TSO inner checksum is a must */ \\\n+\t\tif (((flags) & NIX_TX_OFFLOAD_TSO_F) && \\\n+\t\t !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \\\n+\t\t\treturn 0; \\\n+\t\treturn cn10k_nix_xmit_pkts_vector( \\\n+\t\t\ttx_queue, tx_pkts, pkts, cmd, \\\n+\t\t\t(flags) | NIX_TX_MULTI_SEG_F); \\\n+\t}\n+\n+NIX_TX_FASTPATH_MODES\n+#undef T\ndiff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c\nindex 735e21cc60..763f9a14fd 100644\n--- a/drivers/net/cnxk/cn9k_tx.c\n+++ b/drivers/net/cnxk/cn9k_tx.c\n@@ -66,13 +66,23 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)\n #undef T\n \t};\n \n-\tif (dev->scalar_ena)\n+\tconst eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {\n+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)\t\t\t \\\n+\t[f5][f4][f3][f2][f1][f0] = cn9k_nix_xmit_pkts_vec_mseg_##name,\n+\n+\t\tNIX_TX_FASTPATH_MODES\n+#undef T\n+\t};\n+\n+\tif (dev->scalar_ena) {\n \t\tpick_tx_func(eth_dev, nix_eth_tx_burst);\n-\telse\n+\t\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n+\t\t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\n+\t} else {\n \t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst);\n-\n-\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n-\t\tpick_tx_func(eth_dev, nix_eth_tx_burst_mseg);\n+\t\tif (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n+\t\t\tpick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);\n+\t}\n \n \trte_mb();\n }\ndiff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h\nindex 2adff45705..42b54a378e 100644\n--- a/drivers/net/cnxk/cn9k_tx.h\n+++ b/drivers/net/cnxk/cn9k_tx.h\n@@ -582,7 +582,238 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,\n \t}\n }\n \n+static __rte_always_inline uint8_t\n+cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,\n+\t\t\t union nix_send_hdr_w0_u *sh,\n+\t\t\t union nix_send_sg_s *sg, const uint32_t flags)\n+{\n+\tstruct rte_mbuf *m_next;\n+\tuint64_t *slist, sg_u;\n+\tuint16_t nb_segs;\n+\tuint64_t segdw;\n+\tint i = 1;\n+\n+\tsh->total = m->pkt_len;\n+\t/* Clear sg->u header before use */\n+\tsg->u &= 0xFC00000000000000;\n+\tsg_u = sg->u;\n+\tslist = &cmd[0];\n+\n+\tsg_u = sg_u | ((uint64_t)m->data_len);\n+\n+\tnb_segs = m->nb_segs - 1;\n+\tm_next = m->next;\n+\n+\t/* Set invert df if buffer is not to be freed by H/W */\n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\tsg_u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\t/* Mark mempool object as \"put\" since it is freed by NIX */\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\tif (!(sg_u & (1ULL << 55)))\n+\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\trte_io_wmb();\n+#endif\n+\n+\tm = m_next;\n+\t/* Fill mbuf segments */\n+\tdo {\n+\t\tm_next = m->next;\n+\t\tsg_u = sg_u | ((uint64_t)m->data_len << (i << 4));\n+\t\t*slist = rte_mbuf_data_iova(m);\n+\t\t/* Set invert df if buffer is not to be freed by H/W */\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\t\tsg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));\n+\t\t\t/* Mark mempool object as \"put\" since it is freed by NIX\n+\t\t\t */\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\t\tif (!(sg_u & (1ULL << (i + 55))))\n+\t\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\t\trte_io_wmb();\n+#endif\n+\t\tslist++;\n+\t\ti++;\n+\t\tnb_segs--;\n+\t\tif (i > 2 && nb_segs) {\n+\t\t\ti = 0;\n+\t\t\t/* Next SG subdesc */\n+\t\t\t*(uint64_t *)slist = sg_u & 0xFC00000000000000;\n+\t\t\tsg->u = sg_u;\n+\t\t\tsg->segs = 3;\n+\t\t\tsg = (union nix_send_sg_s *)slist;\n+\t\t\tsg_u = sg->u;\n+\t\t\tslist++;\n+\t\t}\n+\t\tm = m_next;\n+\t} while (nb_segs);\n+\n+\tsg->u = sg_u;\n+\tsg->segs = i;\n+\tsegdw = (uint64_t *)slist - (uint64_t *)&cmd[0];\n+\n+\tsegdw += 2;\n+\t/* Roundup extra dwords to multiple of 2 */\n+\tsegdw = (segdw >> 1) + (segdw & 0x1);\n+\t/* Default dwords */\n+\tsegdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +\n+\t\t !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);\n+\tsh->sizem1 = segdw - 1;\n+\n+\treturn segdw;\n+}\n+\n+static __rte_always_inline uint8_t\n+cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,\n+\t\t\t uint64x2_t *cmd1, const uint32_t flags)\n+{\n+\tunion nix_send_hdr_w0_u sh;\n+\tunion nix_send_sg_s sg;\n+\tuint8_t ret;\n+\n+\tif (m->nb_segs == 1) {\n+\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\t\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\t\t\tsg.u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\t\tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n+\t\t}\n+\n+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n+\t\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\t\tif (!(sg.u & (1ULL << 55)))\n+\t\t\t__mempool_check_cookies(m->pool, (void **)&m, 1, 0);\n+\t\trte_io_wmb();\n+#endif\n+\t\treturn 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +\n+\t\t !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);\n+\t}\n+\n+\tsh.u = vgetq_lane_u64(cmd0[0], 0);\n+\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n+\n+\tret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);\n+\n+\tcmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);\n+\tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n+\treturn ret;\n+}\n+\n #define NIX_DESCS_PER_LOOP 4\n+\n+static __rte_always_inline void\n+cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,\n+\t\t\t uint64x2_t *cmd2, uint64x2_t *cmd3,\n+\t\t\t uint8_t *segdw,\n+\t\t\t uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],\n+\t\t\t uint64_t *lmt_addr, rte_iova_t io_addr,\n+\t\t\t const uint32_t flags)\n+{\n+\tuint64_t lmt_status;\n+\tuint8_t j, off;\n+\n+\tif (!(flags & NIX_TX_NEED_EXT_HDR) &&\n+\t !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t/* No segments in 4 consecutive packets. */\n+\t\tif ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {\n+\t\t\tdo {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[0]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[0]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd0[1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 6, cmd1[1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 8, cmd0[2]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10, cmd1[2]);\n+\t\t\t\tvst1q_u64(lmt_addr + 12, cmd0[3]);\n+\t\t\t\tvst1q_u64(lmt_addr + 14, cmd1[3]);\n+\t\t\t\tlmt_status = roc_lmt_submit_ldeor(io_addr);\n+\t\t\t} while (lmt_status == 0);\n+\n+\t\t\treturn;\n+\t\t}\n+\t}\n+\n+\tfor (j = 0; j < NIX_DESCS_PER_LOOP;) {\n+\t\t/* Fit consecutive packets in same LMTLINE. */\n+\t\tif ((segdw[j] + segdw[j + 1]) <= 8) {\n+again0:\n+\t\t\tif ((flags & NIX_TX_NEED_EXT_HDR) &&\n+\t\t\t (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 4;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 6, slist[j], off);\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd3[j]);\n+\n+\t\t\t\tvst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 14 + off,\n+\t\t\t\t\t\tslist[j + 1], segdw[j + 1] - 4);\n+\t\t\t\toff += ((segdw[j + 1] - 4) << 1);\n+\t\t\t\tvst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);\n+\t\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 3;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 6, slist[j], off);\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 12 + off,\n+\t\t\t\t\t\tslist[j + 1], segdw[j + 1] - 3);\n+\t\t\t} else {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 2;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 4, slist[j], off);\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 8 + off,\n+\t\t\t\t\t\tslist[j + 1], segdw[j + 1] - 2);\n+\t\t\t}\n+\t\t\tlmt_status = roc_lmt_submit_ldeor(io_addr);\n+\t\t\tif (lmt_status == 0)\n+\t\t\t\tgoto again0;\n+\t\t\tj += 2;\n+\t\t} else {\n+again1:\n+\t\t\tif ((flags & NIX_TX_NEED_EXT_HDR) &&\n+\t\t\t (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 4;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 6, slist[j], off);\n+\t\t\t\toff <<= 1;\n+\t\t\t\tvst1q_u64(lmt_addr + 6 + off, cmd3[j]);\n+\t\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd2[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 4, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 3;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 6, slist[j], off);\n+\t\t\t} else {\n+\t\t\t\tvst1q_u64(lmt_addr, cmd0[j]);\n+\t\t\t\tvst1q_u64(lmt_addr + 2, cmd1[j]);\n+\t\t\t\t/* Copy segs */\n+\t\t\t\toff = segdw[j] - 2;\n+\t\t\t\troc_lmt_mov_seg(lmt_addr + 4, slist[j], off);\n+\t\t\t}\n+\t\t\tlmt_status = roc_lmt_submit_ldeor(io_addr);\n+\t\t\tif (lmt_status == 0)\n+\t\t\t\tgoto again1;\n+\t\t\tj += 1;\n+\t\t}\n+\t}\n+}\n+\n static __rte_always_inline uint16_t\n cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t uint16_t pkts, uint64_t *cmd, const uint16_t flags)\n@@ -1380,7 +1611,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tsendext23_w0 = vld1q_u64(sx_w0 + 2);\n \t\t}\n \n-\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\tif ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&\n+\t\t !(flags & NIX_TX_MULTI_SEG_F)) {\n \t\t\t/* Set don't free bit if reference count > 1 */\n \t\t\txmask01 = vdupq_n_u64(0);\n \t\t\txmask23 = xmask01;\n@@ -1424,7 +1656,7 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t * cnxk_nix_prefree_seg are written before LMTST.\n \t\t\t */\n \t\t\trte_io_wmb();\n-\t\t} else {\n+\t\t} else if (!(flags & NIX_TX_MULTI_SEG_F)) {\n \t\t\t/* Move mbufs to iova */\n \t\t\tmbuf0 = (uint64_t *)tx_pkts[0];\n \t\t\tmbuf1 = (uint64_t *)tx_pkts[1];\n@@ -1472,7 +1704,27 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tcmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);\n \t\t}\n \n-\t\tif (flags & NIX_TX_NEED_EXT_HDR) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F) {\n+\t\t\tuint64_t seg_list[NIX_DESCS_PER_LOOP]\n+\t\t\t\t\t [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];\n+\t\t\tuint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];\n+\n+\t\t\t/* Build mseg list for each packet individually. */\n+\t\t\tfor (j = 0; j < NIX_DESCS_PER_LOOP; j++)\n+\t\t\t\tsegdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],\n+\t\t\t\t\t\t\tseg_list[j], &cmd0[j],\n+\t\t\t\t\t\t\t&cmd1[j], flags);\n+\t\t\tsegdw[4] = 8;\n+\n+\t\t\t/* Commit all changes to mbuf before LMTST. */\n+\t\t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n+\t\t\t\trte_io_wmb();\n+\n+\t\t\tcn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,\n+\t\t\t\t\t\t segdw, seg_list,\n+\t\t\t\t\t\t lmt_addr, io_addr,\n+\t\t\t\t\t\t flags);\n+\t\t} else if (flags & NIX_TX_NEED_EXT_HDR) {\n \t\t\t/* With ext header in the command we can no longer send\n \t\t\t * all 4 packets together since LMTLINE is 128bytes.\n \t\t\t * Split and Tx twice.\n@@ -1534,9 +1786,14 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\ttx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;\n \t}\n \n-\tif (unlikely(pkts_left))\n-\t\tpkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd,\n-\t\t\t\t\t flags);\n+\tif (unlikely(pkts_left)) {\n+\t\tif (flags & NIX_TX_MULTI_SEG_F)\n+\t\t\tpkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,\n+\t\t\t\t\t\t\tpkts_left, cmd, flags);\n+\t\telse\n+\t\t\tpkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,\n+\t\t\t\t\t\t cmd, flags);\n+\t}\n \n \treturn pkts;\n }\n@@ -1701,6 +1958,9 @@ T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum,\t1, 1, 1, 1, 1, 1,\t8,\t \\\n \t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \\\n \t\t\t\t\t\t\t\t\t \\\n \tuint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name( \\\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \\\n+\t\t\t\t\t\t\t\t\t \\\n+\tuint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \\\n \t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);\n \n NIX_TX_FASTPATH_MODES\ndiff --git a/drivers/net/cnxk/cn9k_tx_vec_mseg.c b/drivers/net/cnxk/cn9k_tx_vec_mseg.c\nnew file mode 100644\nindex 0000000000..0256efd45a\n--- /dev/null\n+++ b/drivers/net/cnxk/cn9k_tx_vec_mseg.c\n@@ -0,0 +1,24 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(C) 2021 Marvell.\n+ */\n+\n+#include \"cn9k_ethdev.h\"\n+#include \"cn9k_tx.h\"\n+\n+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) \\\n+\tuint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name( \\\n+\t\tvoid *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \\\n+\t{ \\\n+\t\tuint64_t cmd[sz]; \\\n+\t\t\t\t\t\t\t\t\t \\\n+\t\t/* For TSO inner checksum is a must */ \\\n+\t\tif (((flags) & NIX_TX_OFFLOAD_TSO_F) && \\\n+\t\t !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \\\n+\t\t\treturn 0; \\\n+\t\treturn cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \\\n+\t\t\t\t\t\t (flags) | \\\n+\t\t\t\t\t\t\t NIX_TX_MULTI_SEG_F); \\\n+\t}\n+\n+NIX_TX_FASTPATH_MODES\n+#undef T\ndiff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build\nindex aa8c7253fb..361f7ce849 100644\n--- a/drivers/net/cnxk/meson.build\n+++ b/drivers/net/cnxk/meson.build\n@@ -26,7 +26,8 @@ sources += files('cn9k_ethdev.c',\n \t\t 'cn9k_rx_vec_mseg.c',\n \t\t 'cn9k_tx.c',\n \t\t 'cn9k_tx_mseg.c',\n-\t\t 'cn9k_tx_vec.c')\n+\t\t 'cn9k_tx_vec.c',\n+\t\t 'cn9k_tx_vec_mseg.c')\n # CN10K\n sources += files('cn10k_ethdev.c',\n \t\t 'cn10k_rte_flow.c',\n@@ -36,7 +37,8 @@ sources += files('cn10k_ethdev.c',\n \t\t 'cn10k_rx_vec_mseg.c',\n \t\t 'cn10k_tx.c',\n \t\t 'cn10k_tx_mseg.c',\n-\t\t 'cn10k_tx_vec.c')\n+\t\t 'cn10k_tx_vec.c',\n+\t\t 'cn10k_tx_vec_mseg.c')\n \n deps += ['bus_pci', 'cryptodev', 'eventdev', 'security']\n deps += ['common_cnxk', 'mempool_cnxk']\n", "prefixes": [ "v2", "06/13" ] }{ "id": 94546, "url": "