[v5,01/11] common/idpf: refactor single queue Tx data path

Message ID 20230920062236.375308-2-simei.su@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Qi Zhang
Headers
Series update idpf base code |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Simei Su Sept. 20, 2023, 6:22 a.m. UTC
  Currently, single queue Tx data path uses flex Tx data
descriptor(DTYPE3) which is removed in the latest idpf spec.
This patch replaces flex Tx data descriptor with base Tx data
descriptor for single queue Tx data path and refines Tx single
queue setup to align with Tx data path.

Signed-off-by: Simei Su <simei.su@intel.com>
Acked-by: Wenjun Wu <wenjun1.wu@intel.com>
Acked-by: Beilei Xing <beilei.xing@intel.com>
---
 drivers/common/idpf/idpf_common_rxtx.c        | 39 +++++++++----------
 drivers/common/idpf/idpf_common_rxtx.h        |  2 +-
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 37 +++++++++---------
 drivers/net/cpfl/cpfl_rxtx.c                  |  2 +-
 drivers/net/idpf/idpf_rxtx.c                  |  2 +-
 5 files changed, 40 insertions(+), 42 deletions(-)
  

Patch

diff --git a/drivers/common/idpf/idpf_common_rxtx.c b/drivers/common/idpf/idpf_common_rxtx.c
index fc87e3e243..e6d2486272 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -276,14 +276,14 @@  idpf_qc_single_tx_queue_reset(struct idpf_tx_queue *txq)
 	}
 
 	txe = txq->sw_ring;
-	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
+	size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc;
 	for (i = 0; i < size; i++)
 		((volatile char *)txq->tx_ring)[i] = 0;
 
 	prev = (uint16_t)(txq->nb_tx_desc - 1);
 	for (i = 0; i < txq->nb_tx_desc; i++) {
-		txq->tx_ring[i].qw1.cmd_dtype =
-			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
+		txq->tx_ring[i].qw1 =
+			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE);
 		txe[i].mbuf =  NULL;
 		txe[i].last_id = i;
 		txe[prev].next_id = i;
@@ -1307,17 +1307,16 @@  idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 	uint16_t nb_tx_to_clean;
 	uint16_t i;
 
-	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
+	volatile struct idpf_base_tx_desc *txd = txq->tx_ring;
 
 	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
 	if (desc_to_clean_to >= nb_tx_desc)
 		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
 
 	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
-	/* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */
-	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
-	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
-	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
+	if ((txd[desc_to_clean_to].qw1 &
+	     rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
+	    rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
 		TX_LOG(DEBUG, "TX descriptor %4u is not done "
 		       "(port=%d queue=%d)", desc_to_clean_to,
 		       txq->port_id, txq->queue_id);
@@ -1331,10 +1330,7 @@  idpf_xmit_cleanup(struct idpf_tx_queue *txq)
 		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
 					    last_desc_cleaned);
 
-	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
-	txd[desc_to_clean_to].qw1.buf_size = 0;
-	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
-		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
+	txd[desc_to_clean_to].qw1 = 0;
 
 	txq->last_desc_cleaned = desc_to_clean_to;
 	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
@@ -1347,8 +1343,8 @@  uint16_t
 idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			  uint16_t nb_pkts)
 {
-	volatile struct idpf_flex_tx_desc *txd;
-	volatile struct idpf_flex_tx_desc *txr;
+	volatile struct idpf_base_tx_desc *txd;
+	volatile struct idpf_base_tx_desc *txr;
 	union idpf_tx_offload tx_offload = {0};
 	struct idpf_tx_entry *txe, *txn;
 	struct idpf_tx_entry *sw_ring;
@@ -1356,6 +1352,7 @@  idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf *tx_pkt;
 	struct rte_mbuf *m_seg;
 	uint64_t buf_dma_addr;
+	uint32_t td_offset;
 	uint64_t ol_flags;
 	uint16_t tx_last;
 	uint16_t nb_used;
@@ -1382,6 +1379,7 @@  idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		td_cmd = 0;
+		td_offset = 0;
 
 		tx_pkt = *tx_pkts++;
 		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
@@ -1462,9 +1460,10 @@  idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			slen = m_seg->data_len;
 			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
-			txd->qw1.buf_size = slen;
-			txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
-							      IDPF_FLEX_TXD_QW1_DTYPE_S);
+			txd->qw1 = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA |
+				((uint64_t)td_cmd  << IDPF_TXD_QW1_CMD_S) |
+				((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) |
+				((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 			txe->last_id = tx_last;
 			tx_id = txe->next_id;
@@ -1473,7 +1472,7 @@  idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		} while (m_seg);
 
 		/* The last packet data descriptor needs End Of Packet (EOP) */
-		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
+		td_cmd |= IDPF_TX_DESC_CMD_EOP;
 		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
 		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
 
@@ -1482,7 +1481,7 @@  idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       "%4u (port=%d queue=%d)",
 			       tx_last, txq->port_id, txq->queue_id);
 
-			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
+			td_cmd |= IDPF_TX_DESC_CMD_RS;
 
 			/* Update txq RS bit counters */
 			txq->nb_used = 0;
@@ -1491,7 +1490,7 @@  idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
 			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
 
-		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S);
+		txd->qw1 |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S);
 	}
 
 end_of_tx:
diff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h
index 6cb83fc0a6..b49b1ed737 100644
--- a/drivers/common/idpf/idpf_common_rxtx.h
+++ b/drivers/common/idpf/idpf_common_rxtx.h
@@ -157,7 +157,7 @@  struct idpf_tx_entry {
 /* Structure associated with each TX queue. */
 struct idpf_tx_queue {
 	const struct rte_memzone *mz;		/* memzone for Tx ring */
-	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual address */
+	volatile struct idpf_base_tx_desc *tx_ring;	/* Tx ring virtual address */
 	volatile union {
 		struct idpf_flex_tx_sched_desc *desc_ring;
 		struct idpf_splitq_tx_compl_desc *compl_ring;
diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index 2ac46fb1d2..f65e8d512b 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -1005,7 +1005,7 @@  idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq)
 	struct rte_mbuf *m, *free[txq->rs_thresh];
 
 	/* check DD bits on threshold descriptor */
-	if ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &
+	if ((txq->tx_ring[txq->next_dd].qw1 &
 			rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=
 			rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))
 		return 0;
@@ -1113,15 +1113,14 @@  tx_backlog_entry_avx512(struct idpf_tx_vec_entry *txep,
 		txep[i].mbuf = tx_pkts[i];
 }
 
-#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48
 static __rte_always_inline void
-idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
 {
 	uint64_t high_qw =
-		(IDPF_TX_DESC_DTYPE_FLEX_DATA << IDPF_FLEX_TXD_QW1_DTYPE_S |
-		 ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |
-		 ((uint64_t)pkt->data_len << IDPF_FLEX_TXD_QW1_BUF_SZ_S));
+		(IDPF_TX_DESC_DTYPE_DATA |
+		 ((uint64_t)flags  << IDPF_TXD_QW1_CMD_S) |
+		 ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
 					    pkt->buf_iova + pkt->data_off);
@@ -1131,11 +1130,11 @@  idpf_singleq_vtx1(volatile struct idpf_flex_tx_desc *txdp,
 #define IDPF_TX_LEN_MASK 0xAA
 #define IDPF_TX_OFF_MASK 0x55
 static __rte_always_inline void
-idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
+idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,
 	 struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 {
-	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |
-			((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));
+	const uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA  |
+			((uint64_t)flags  << IDPF_TXD_QW1_CMD_S));
 
 	/* if unaligned on 32-bit boundary, do one to align */
 	if (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {
@@ -1148,19 +1147,19 @@  idpf_singleq_vtx(volatile struct idpf_flex_tx_desc *txdp,
 		uint64_t hi_qw3 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[3]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw2 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[2]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw1 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[1]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 		uint64_t hi_qw0 =
 			hi_qw_tmpl |
 			((uint64_t)pkt[0]->data_len <<
-			 IDPF_FLEX_TXD_QW1_BUF_SZ_S);
+			 IDPF_TXD_QW1_TX_BUF_SZ_S);
 
 		__m512i desc0_3 =
 			_mm512_set_epi64
@@ -1187,11 +1186,11 @@  idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 					 uint16_t nb_pkts)
 {
 	struct idpf_tx_queue *txq = tx_queue;
-	volatile struct idpf_flex_tx_desc *txdp;
+	volatile struct idpf_base_tx_desc *txdp;
 	struct idpf_tx_vec_entry *txep;
 	uint16_t n, nb_commit, tx_id;
-	uint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;
-	uint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;
+	uint64_t flags = IDPF_TX_DESC_CMD_EOP;
+	uint64_t rs = IDPF_TX_DESC_CMD_RS | flags;
 
 	/* cross rx_thresh boundary is not allowed */
 	nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh);
@@ -1238,9 +1237,9 @@  idpf_singleq_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pk
 
 	tx_id = (uint16_t)(tx_id + nb_commit);
 	if (tx_id > txq->next_rs) {
-		txq->tx_ring[txq->next_rs].qw1.cmd_dtype |=
-			rte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<
-					 IDPF_FLEX_TXD_QW1_CMD_S);
+		txq->tx_ring[txq->next_rs].qw1 |=
+			rte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<
+					 IDPF_TXD_QW1_CMD_S);
 		txq->next_rs =
 			(uint16_t)(txq->next_rs + txq->rs_thresh);
 	}
diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c
index 2ef6871a85..ab8bec4645 100644
--- a/drivers/net/cpfl/cpfl_rxtx.c
+++ b/drivers/net/cpfl/cpfl_rxtx.c
@@ -135,7 +135,7 @@  cpfl_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
 			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
 					      CPFL_DMA_MEM_ALIGN);
 		else
-			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc),
+			ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
 					      CPFL_DMA_MEM_ALIGN);
 		memcpy(ring_name, "cpfl Tx ring", sizeof("cpfl Tx ring"));
 		break;
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index 3e3d81ca6d..64f2235580 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -74,7 +74,7 @@  idpf_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx,
 			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_sched_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		else
-			ring_size = RTE_ALIGN(len * sizeof(struct idpf_flex_tx_desc),
+			ring_size = RTE_ALIGN(len * sizeof(struct idpf_base_tx_desc),
 					      IDPF_DMA_MEM_ALIGN);
 		rte_memcpy(ring_name, "idpf Tx ring", sizeof("idpf Tx ring"));
 		break;