From patchwork Mon Sep 4 07:02:58 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Simei Su X-Patchwork-Id: 131137 X-Patchwork-Delegate: qi.z.zhang@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D7F9C424EC; Mon, 4 Sep 2023 09:02:11 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 5F669402AE; Mon, 4 Sep 2023 09:02:11 +0200 (CEST) Received: from mgamail.intel.com (mgamail.intel.com [134.134.136.100]) by mails.dpdk.org (Postfix) with ESMTP id C5DAF402A9 for ; Mon, 4 Sep 2023 09:02:09 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1693810929; x=1725346929; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=fw6SwZYYZ3F6VuYmGImtx2Hn/xS3Wi94Ywua57cIrgQ=; b=ltA6P1PjL/fhEm76lQYyMbbGqfnZ2TBW0AbOyXvN+iFEbD8TPR6XDS06 bnpAO8zu4aaBclZfEhEw2ytbubArqSYz+zCDcPPvqd7Uz3mvjHb/j7mMX jlcbGWL7nIdfWb0AEEpuuurDaqPzcxdd4Sq4UpUMfnsSHpyMLpiC5W8rV QWGPK8X5NarqzIXFHRTuUwO6Q6+jJ6hKmgKvhGymgAnQ6tYITQOndQbSz LRzD+Cy2pNSASYHbiIiuXWEIw2wPerFEKtqdJ+KeoaXIoIBOs5lf3Qiiq DQUbwEqGk6YhDCbmIp9HNWaXvCAzeKD64VtZEscwIO/ON5BQhuvzVF6VN Q==; X-IronPort-AV: E=McAfee;i="6600,9927,10822"; a="442929676" X-IronPort-AV: E=Sophos;i="6.02,225,1688454000"; d="scan'208";a="442929676" Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 04 Sep 2023 00:02:08 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10822"; a="740649535" X-IronPort-AV: E=Sophos;i="6.02,225,1688454000"; d="scan'208";a="740649535" Received: from dpdk-simei-icelake.sh.intel.com ([10.67.110.167]) by orsmga002.jf.intel.com with ESMTP; 04 Sep 2023 00:02:06 -0700 From: Simei Su To: jingjing.wu@intel.com, beilei.xing@intel.com, qi.z.zhang@intel.com Cc: dev@dpdk.org, wenjun1.wu@intel.com, Simei Su Subject: [PATCH v2] common/idpf: refactor single queue Tx function Date: Mon, 4 Sep 2023 15:02:58 +0800 Message-Id: <20230904070258.2130174-1-simei.su@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20230825072106.1819603-1-simei.su@intel.com> References: <20230825072106.1819603-1-simei.su@intel.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org This patch replaces flex Tx descriptor with base Tx descriptor to align with kernel driver practice. Signed-off-by: Simei Su --- v2: * Refine commit title and commit log. * Remove redundant definition. * Modify base mode context TSO descriptor. drivers/common/idpf/idpf_common_rxtx.c | 76 +++++++++++++------ drivers/common/idpf/idpf_common_rxtx.h | 2 +- drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++---- drivers/net/idpf/idpf_rxtx.c | 2 +- 4 files changed, 73 insertions(+), 44 deletions(-) diff --git a/drivers/common/idpf/idpf_common_rxtx.c b/drivers/common/idpf/idpf_common_rxtx.c index fc87e3e243..01a8685ea3 100644 --- a/drivers/common/idpf/idpf_common_rxtx.c +++ b/drivers/common/idpf/idpf_common_rxtx.c @@ -276,14 +276,14 @@ idpf_qc_single_tx_queue_reset(struct idpf_tx_queue *txq) } txe = txq->sw_ring; - size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc; + size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc; for (i = 0; i < size; i++) ((volatile char *)txq->tx_ring)[i] = 0; prev = (uint16_t)(txq->nb_tx_desc - 1); for (i = 0; i < txq->nb_tx_desc; i++) { - txq->tx_ring[i].qw1.cmd_dtype = - rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE); + txq->tx_ring[i].qw1 = + rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE); txe[i].mbuf = NULL; txe[i].last_id = i; txe[prev].next_id = i; @@ -823,6 +823,37 @@ idpf_calc_context_desc(uint64_t flags) return 0; } +/* set TSO context descriptor for single queue + */ +static inline void +idpf_set_singleq_tso_ctx(struct rte_mbuf *mbuf, + union idpf_tx_offload tx_offload, + volatile struct idpf_base_tx_ctx_desc *ctx_desc) +{ + uint32_t tso_len; + uint8_t hdr_len; + uint64_t qw1; + + if (tx_offload.l4_len == 0) { + TX_LOG(DEBUG, "L4 length set to 0"); + return; + } + + hdr_len = tx_offload.l2_len + + tx_offload.l3_len + + tx_offload.l4_len; + tso_len = mbuf->pkt_len - hdr_len; + qw1 = (uint64_t)IDPF_TX_DESC_DTYPE_CTX; + + qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; + qw1 |= ((uint64_t)tso_len << IDPF_TXD_CTX_QW1_TSO_LEN_S) & + IDPF_TXD_CTX_QW1_TSO_LEN_M; + qw1 |= ((uint64_t)mbuf->tso_segsz << IDPF_TXD_CTX_QW1_MSS_S) & + IDPF_TXD_CTX_QW1_MSS_M; + + ctx_desc->qw1 = rte_cpu_to_le_64(qw1); +} + /* set TSO context descriptor */ static inline void @@ -1307,17 +1338,16 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq) uint16_t nb_tx_to_clean; uint16_t i; - volatile struct idpf_flex_tx_desc *txd = txq->tx_ring; + volatile struct idpf_base_tx_desc *txd = txq->tx_ring; desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh); if (desc_to_clean_to >= nb_tx_desc) desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc); desc_to_clean_to = sw_ring[desc_to_clean_to].last_id; - /* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */ - if ((txd[desc_to_clean_to].qw1.cmd_dtype & - rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) != - rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) { + if ((txd[desc_to_clean_to].qw1 & + rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) != + rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) { TX_LOG(DEBUG, "TX descriptor %4u is not done " "(port=%d queue=%d)", desc_to_clean_to, txq->port_id, txq->queue_id); @@ -1331,10 +1361,7 @@ idpf_xmit_cleanup(struct idpf_tx_queue *txq) nb_tx_to_clean = (uint16_t)(desc_to_clean_to - last_desc_cleaned); - txd[desc_to_clean_to].qw1.cmd_dtype = 0; - txd[desc_to_clean_to].qw1.buf_size = 0; - for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++) - txd[desc_to_clean_to].qw1.flex.raw[i] = 0; + txd[desc_to_clean_to].qw1 = 0; txq->last_desc_cleaned = desc_to_clean_to; txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean); @@ -1347,8 +1374,8 @@ uint16_t idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { - volatile struct idpf_flex_tx_desc *txd; - volatile struct idpf_flex_tx_desc *txr; + volatile struct idpf_base_tx_desc *txd; + volatile struct idpf_base_tx_desc *txr; union idpf_tx_offload tx_offload = {0}; struct idpf_tx_entry *txe, *txn; struct idpf_tx_entry *sw_ring; @@ -1356,6 +1383,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, struct rte_mbuf *tx_pkt; struct rte_mbuf *m_seg; uint64_t buf_dma_addr; + uint32_t td_offset; uint64_t ol_flags; uint16_t tx_last; uint16_t nb_used; @@ -1382,6 +1410,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { td_cmd = 0; + td_offset = 0; tx_pkt = *tx_pkts++; RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf); @@ -1428,8 +1457,8 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, if (nb_ctx != 0) { /* Setup TX context descriptor if required */ - volatile union idpf_flex_tx_ctx_desc *ctx_txd = - (volatile union idpf_flex_tx_ctx_desc *) + volatile struct idpf_base_tx_ctx_desc *ctx_txd = + (volatile struct idpf_base_tx_ctx_desc *) &txr[tx_id]; txn = &sw_ring[txe->next_id]; @@ -1441,7 +1470,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, /* TSO enabled */ if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) - idpf_set_splitq_tso_ctx(tx_pkt, tx_offload, + idpf_set_singleq_tso_ctx(tx_pkt, tx_offload, ctx_txd); txe->last_id = tx_last; @@ -1462,9 +1491,10 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, slen = m_seg->data_len; buf_dma_addr = rte_mbuf_data_iova(m_seg); txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr); - txd->qw1.buf_size = slen; - txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA << - IDPF_FLEX_TXD_QW1_DTYPE_S); + txd->qw1 = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA | + ((uint64_t)td_cmd << IDPF_TXD_QW1_CMD_S) | + ((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) | + ((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S)); txe->last_id = tx_last; tx_id = txe->next_id; @@ -1473,7 +1503,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, } while (m_seg); /* The last packet data descriptor needs End Of Packet (EOP) */ - td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP; + td_cmd |= IDPF_TX_DESC_CMD_EOP; txq->nb_used = (uint16_t)(txq->nb_used + nb_used); txq->nb_free = (uint16_t)(txq->nb_free - nb_used); @@ -1482,7 +1512,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, "%4u (port=%d queue=%d)", tx_last, txq->port_id, txq->queue_id); - td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS; + td_cmd |= IDPF_TX_DESC_CMD_RS; /* Update txq RS bit counters */ txq->nb_used = 0; @@ -1491,7 +1521,7 @@ idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK) td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; - txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S); + txd->qw1 |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S); } end_of_tx: diff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h index 6cb83fc0a6..b49b1ed737 100644 --- a/drivers/common/idpf/idpf_common_rxtx.h +++ b/drivers/common/idpf/idpf_common_rxtx.h @@ -157,7 +157,7 @@ struct idpf_tx_entry { /* Structure associated with each TX queue. */ struct idpf_tx_queue { const struct rte_memzone *mz; /* memzone for Tx ring */ - volatile struct idpf_flex_tx_desc *tx_ring; /* Tx ring virtual address */ + volatile struct idpf_base_tx_desc *tx_ring; /* Tx ring virtual address */ volatile union { struct idpf_flex_tx_sched_desc *desc_ring; struct idpf_splitq_tx_compl_desc *compl_ring; diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c index 81312617cc..afb0014a13 100644 --- a/drivers/common/idpf/idpf_common_rxtx_avx512.c +++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c @@ -1005,7 +1005,7 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq) struct rte_mbuf *m, *free[txq->rs_thresh]; /* check DD bits on threshold descriptor */ - if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype & + if ((txq->tx_ring[txq->next_dd].qw1 & rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) != rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) return 0; @@ -1113,15 +1113,14 @@ tx_backlog_entry_avx512(struct idpf_tx_vec_entry *txep, txep[i].mbuf = tx_pkts[i]; } -#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48 static __rte_always_inline void -idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp, +idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags) { uint64_t high_qw = - (IDPF_TX_DESC_DTYPE_FLEX_DATA << IDPF_FLEX_TXD_QW1_DTYPE_S | - ((uint64_t)flags << IDPF_FLEX_TXD_QW1_CMD_S) | - ((uint64_t)pkt->data_len << IDPF_FLEX_TXD_QW1_BUF_SZ_S)); + (IDPF_TX_DESC_DTYPE_DATA | + ((uint64_t)flags << IDPF_TXD_QW1_CMD_S) | + ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S)); __m128i descriptor = _mm_set_epi64x(high_qw, pkt->buf_iova + pkt->data_off); @@ -1131,11 +1130,11 @@ idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp, #define IDPF_TX_LEN_MASK 0xAA #define IDPF_TX_OFF_MASK 0x55 static __rte_always_inline void -idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp, +idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp, struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) { - const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA | - ((uint64_t)flags << IDPF_FLEX_TXD_QW1_CMD_S)); + const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA | + ((uint64_t)flags << IDPF_TXD_QW1_CMD_S)); /* if unaligned on 32-bit boundary, do one to align */ if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) { @@ -1148,19 +1147,19 @@ idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp, uint64_t hi_qw3 = hi_qw_tmpl | ((uint64_t)pkt[3]->data_len << - IDPF_FLEX_TXD_QW1_BUF_SZ_S); + IDPF_TXD_QW1_TX_BUF_SZ_S); uint64_t hi_qw2 = hi_qw_tmpl | ((uint64_t)pkt[2]->data_len << - IDPF_FLEX_TXD_QW1_BUF_SZ_S); + IDPF_TXD_QW1_TX_BUF_SZ_S); uint64_t hi_qw1 = hi_qw_tmpl | ((uint64_t)pkt[1]->data_len << - IDPF_FLEX_TXD_QW1_BUF_SZ_S); + IDPF_TXD_QW1_TX_BUF_SZ_S); uint64_t hi_qw0 = hi_qw_tmpl | ((uint64_t)pkt[0]->data_len << - IDPF_FLEX_TXD_QW1_BUF_SZ_S); + IDPF_TXD_QW1_TX_BUF_SZ_S); __m512i desc0_3 = _mm512_set_epi64 @@ -1187,11 +1186,11 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk uint16_t nb_pkts) { struct idpf_tx_queue *txq = tx_queue; - volatile struct idpf_flex_tx_desc *txdp; + volatile struct idpf_base_tx_desc *txdp; struct idpf_tx_vec_entry *txep; uint16_t n, nb_commit, tx_id; - uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP; - uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags; + uint64_t flags = IDPF_TX_DESC_CMD_EOP; + uint64_t rs = IDPF_TX_DESC_CMD_RS | flags; /* cross rx_thresh boundary is not allowed */ nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh); @@ -1238,9 +1237,9 @@ idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk tx_id = (uint16_t)(tx_id + nb_commit); if (tx_id > txq->next_rs) { - txq->tx_ring[txq->next_rs].qw1.cmd_dtype |= - rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) << - IDPF_FLEX_TXD_QW1_CMD_S); + txq->tx_ring[txq->next_rs].qw1 |= + rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) << + IDPF_TXD_QW1_CMD_S); txq->next_rs = (uint16_t)(txq->next_rs + txq->rs_thresh); } diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c index 3e3d81ca6d..64f2235580 100644 --- a/drivers/net/idpf/idpf_rxtx.c +++ b/drivers/net/idpf/idpf_rxtx.c @@ -74,7 +74,7 @@ idpf_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx, ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc), IDPF_DMA_MEM_ALIGN); else - ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc), + ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc), IDPF_DMA_MEM_ALIGN); rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring")); break;