From patchwork Sat Mar 6 15:33:41 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nithin Dabilpuram X-Patchwork-Id: 88618 X-Patchwork-Delegate: jerinj@marvell.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 50A46A0548; Sat, 6 Mar 2021 16:37:42 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 40F0622A3EC; Sat, 6 Mar 2021 16:35:25 +0100 (CET) Received: from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com [67.231.148.174]) by mails.dpdk.org (Postfix) with ESMTP id E3B9922A37A for ; Sat, 6 Mar 2021 16:35:23 +0100 (CET) Received: from pps.filterd (m0045849.ppops.net [127.0.0.1]) by mx0a-0016f401.pphosted.com (8.16.0.43/8.16.0.43) with SMTP id 126FVumM029368 for ; Sat, 6 Mar 2021 07:35:23 -0800 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-type; s=pfpt0220; bh=/i26B1zPNjdhzdF2qOzdeDLEc1WFKoMZdZ7yMz44HAA=; b=P7BXcHfhdnZXYLbE9MxxHOnPLZ8ndpSicsKOtZl84vfGS+NPJerz0ubaBnRzGsPEmJpv 4fT1B5R335rGo+wHktQ9fi1tuZbRo2lN6B4o3lvNo0nytKUZsNP3r8CPXLj11OO2nMxX B8J+CkB+bnVlFfKyKI/J3GxvyfWEIbpBJmpa/lO15iXO0k+4BOgDsRacsTTz4JfkAsjN M49tBEy2hTb6ppzetCy+QKPy7+RQ62zQaHtA3q4OQuc/kGuaoS1S+mhgkgEA+R7gOAnt pmMllqi21FZIUK2TVdWp1BVF3UpLhJ79WhLJiYhdFCZXjOOrU+GgCB0gfF4cccCN3Axn PA== Received: from dc5-exch01.marvell.com ([199.233.59.181]) by mx0a-0016f401.pphosted.com with ESMTP id 3747yurccn-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Sat, 06 Mar 2021 07:35:23 -0800 Received: from SC-EXCH04.marvell.com (10.93.176.84) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.2; Sat, 6 Mar 2021 07:35:21 -0800 Received: from DC5-EXCH01.marvell.com (10.69.176.38) by SC-EXCH04.marvell.com (10.93.176.84) with Microsoft SMTP Server (TLS) id 15.0.1497.2; Sat, 6 Mar 2021 07:35:21 -0800 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.2 via Frontend Transport; Sat, 6 Mar 2021 07:35:21 -0800 Received: from hyd1588t430.marvell.com (unknown [10.29.52.204]) by maili.marvell.com (Postfix) with ESMTP id 8B33C3F703F; Sat, 6 Mar 2021 07:35:18 -0800 (PST) From: Nithin Dabilpuram To: CC: , , , , , , , Nithin Dabilpuram Date: Sat, 6 Mar 2021 21:03:41 +0530 Message-ID: <20210306153404.10781-22-ndabilpuram@marvell.com> X-Mailer: git-send-email 2.8.4 In-Reply-To: <20210306153404.10781-1-ndabilpuram@marvell.com> References: <20210306153404.10781-1-ndabilpuram@marvell.com> MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.369, 18.0.761 definitions=2021-03-06_08:2021-03-03, 2021-03-06 signatures=0 Subject: [dpdk-dev] [PATCH 21/44] net/cnxk: add Tx multi-segment version for cn10k X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add Tx burst multi-segment version for CN10K. Signed-off-by: Nithin Dabilpuram Signed-off-by: Pavan Nikhilesh --- drivers/net/cnxk/cn10k_tx.c | 124 ++++++++++++++++++++++++++++++++++++++++++++ drivers/net/cnxk/cn10k_tx.h | 71 +++++++++++++++++++++++++ 2 files changed, 195 insertions(+) diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 0fad4c0..d170f31 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -125,6 +125,98 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, return pkts; } +static __rte_always_inline uint16_t +nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, + uint64_t *cmd, const uint16_t flags) +{ + struct cn10k_eth_txq *txq = tx_queue; + uintptr_t pa0, pa1, lmt_addr = txq->lmt_base; + const rte_iova_t io_addr = txq->io_addr; + uint16_t segdw, lmt_id, burst, left, i; + uint64_t data0, data1; + __uint128_t data128; + uint16_t shft; + + NIX_XMIT_FC_OR_RETURN(txq, pkts); + + cn10k_nix_tx_skeleton(txq, cmd, flags); + + /* Reduce the cached count */ + txq->fc_cache_pkts -= pkts; + + /* Get LMT base address and LMT ID as lcore id */ + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + left = pkts; +again: + burst = left > 32 ? 32 : left; + shft = 16; + data128 = 0; + for (i = 0; i < burst; i++) { + /* Perform header writes for TSO, barrier at + * lmt steorl will suffice. + */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags); + + cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags); + /* Store sg list directly on lmt line */ + segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr, + flags); + lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2); + data128 |= (((__uint128_t)(segdw - 1)) << shft); + shft += 3; + } + + data0 = (uint64_t)data128; + data1 = (uint64_t)(data128 >> 64); + /* Make data0 similar to data1 */ + data0 >>= 16; + /* Trigger LMTST */ + if (burst > 16) { + pa0 = io_addr | (data0 & 0x7) << 4; + data0 &= ~0x7ULL; + /* Move lmtst1..15 sz to bits 63:19 */ + data0 <<= 16; + data0 |= (15ULL << 12); + data0 |= (uint64_t)lmt_id; + + /* STEOR0 */ + roc_lmt_submit_steorl(data0, pa0); + + pa1 = io_addr | (data1 & 0x7) << 4; + data1 &= ~0x7ULL; + data1 <<= 16; + data1 |= ((uint64_t)(burst - 17)) << 12; + data1 |= (uint64_t)(lmt_id + 16); + + /* STEOR1 */ + roc_lmt_submit_steorl(data1, pa1); + } else if (burst) { + pa0 = io_addr | (data0 & 0x7) << 4; + data0 &= ~0x7ULL; + /* Move lmtst1..15 sz to bits 63:19 */ + data0 <<= 16; + data0 |= ((burst - 1) << 12); + data0 |= (uint64_t)lmt_id; + + /* STEOR0 */ + roc_lmt_submit_steorl(data0, pa0); + } + + left -= burst; + rte_io_wmb(); + if (left) { + /* Start processing another burst */ + tx_pkts += burst; + /* Reset lmt base addr */ + lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2); + lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1)); + goto again; + } + + return pkts; +} + #define T(name, f4, f3, f2, f1, f0, sz, flags) \ static uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \ @@ -142,6 +234,25 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts, NIX_TX_FASTPATH_MODES #undef T +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + static uint16_t __rte_noinline __rte_hot \ + cn10k_nix_xmit_pkts_mseg_##name(void *tx_queue, \ + struct rte_mbuf **tx_pkts, \ + uint16_t pkts) \ + { \ + uint64_t cmd[(sz)]; \ + \ + /* For TSO inner checksum is a must */ \ + if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \ + !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \ + return 0; \ + return nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T + static inline void pick_tx_func(struct rte_eth_dev *eth_dev, const eth_tx_burst_t tx_burst[2][2][2][2][2]) @@ -160,6 +271,8 @@ pick_tx_func(struct rte_eth_dev *eth_dev, void cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) { + struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev); + const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2] = { #define T(name, f4, f3, f2, f1, f0, sz, flags) \ [f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_##name, @@ -168,7 +281,18 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) #undef T }; + const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2] = { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_mseg_##name, + + NIX_TX_FASTPATH_MODES +#undef T + }; + pick_tx_func(eth_dev, nix_eth_tx_burst); + if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + pick_tx_func(eth_dev, nix_eth_tx_burst_mseg); + rte_mb(); } diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index ce1d4a0..22f7a2b 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -295,6 +295,77 @@ cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr, *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1); } +static __rte_always_inline uint16_t +cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) +{ + struct nix_send_hdr_s *send_hdr; + union nix_send_sg_s *sg; + struct rte_mbuf *m_next; + uint64_t *slist, sg_u; + uint64_t nb_segs; + uint64_t segdw; + uint8_t off, i; + + send_hdr = (struct nix_send_hdr_s *)cmd; + send_hdr->w0.total = m->pkt_len; + send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id); + + if (flags & NIX_TX_NEED_EXT_HDR) + off = 2; + else + off = 0; + + sg = (union nix_send_sg_s *)&cmd[2 + off]; + /* Clear sg->u header before use */ + sg->u &= 0xFC00000000000000; + sg_u = sg->u; + slist = &cmd[3 + off]; + + i = 0; + nb_segs = m->nb_segs; + + /* Fill mbuf segments */ + do { + m_next = m->next; + sg_u = sg_u | ((uint64_t)m->data_len << (i << 4)); + *slist = rte_mbuf_data_iova(m); + /* Set invert df if buffer is not to be freed by H/W */ + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) + sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55)); + /* Mark mempool object as "put" since it is freed by NIX + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (!(sg_u & (1ULL << (i + 55)))) + __mempool_check_cookies(m->pool, (void **)&m, 1, 0); +#endif + slist++; + i++; + nb_segs--; + if (i > 2 && nb_segs) { + i = 0; + /* Next SG subdesc */ + *(uint64_t *)slist = sg_u & 0xFC00000000000000; + sg->u = sg_u; + sg->segs = 3; + sg = (union nix_send_sg_s *)slist; + sg_u = sg->u; + slist++; + } + m = m_next; + } while (nb_segs); + + sg->u = sg_u; + sg->segs = i; + segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off]; + /* Roundup extra dwords to multiple of 2 */ + segdw = (segdw >> 1) + (segdw & 0x1); + /* Default dwords */ + segdw += (off >> 1) + 1; + send_hdr->w0.sizem1 = segdw - 1; + + return segdw; +} + #define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F #define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F