[13/15] common/idpf: add scalar data path

Message ID 20221208075309.37852-14-beilei.xing@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series net/idpf: refactor idpf pmd |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Xing, Beilei Dec. 8, 2022, 7:53 a.m. UTC
  From: Beilei Xing <beilei.xing@intel.com>

Add timestamp filed to idpf_adapter structure.
Add scalar data path for single queue model and split queue model.

Signed-off-by: Beilei Xing <beilei.xing@intel.com>
---
 drivers/common/idpf/idpf_common_device.h |   5 +
 drivers/common/idpf/idpf_common_logs.h   |  24 +
 drivers/common/idpf/idpf_common_rxtx.c   | 985 +++++++++++++++++++++++
 drivers/common/idpf/idpf_common_rxtx.h   |  87 ++
 drivers/common/idpf/version.map          |   6 +
 drivers/net/idpf/idpf_ethdev.c           |   2 -
 drivers/net/idpf/idpf_ethdev.h           |   4 -
 drivers/net/idpf/idpf_logs.h             |  24 -
 drivers/net/idpf/idpf_rxtx.c             | 936 +--------------------
 drivers/net/idpf/idpf_rxtx.h             | 132 ---
 drivers/net/idpf/idpf_rxtx_vec_avx512.c  |   6 +-
 11 files changed, 1111 insertions(+), 1100 deletions(-)
  

Patch

diff --git a/drivers/common/idpf/idpf_common_device.h b/drivers/common/idpf/idpf_common_device.h
index c007c0b705..6c9a65ae3b 100644
--- a/drivers/common/idpf/idpf_common_device.h
+++ b/drivers/common/idpf/idpf_common_device.h
@@ -23,6 +23,8 @@ 
 #define IDPF_TX_COMPLQ_PER_GRP	1
 #define IDPF_TXQ_PER_GRP	1
 
+#define IDPF_MIN_FRAME_SIZE	14
+
 #define IDPF_MAX_PKT_TYPE	1024
 
 #define IDPF_DFLT_INTERVAL	16
@@ -43,6 +45,9 @@  struct idpf_adapter {
 
 	uint32_t txq_model; /* 0 - split queue model, non-0 - single queue model */
 	uint32_t rxq_model; /* 0 - split queue model, non-0 - single queue model */
+
+	/* For timestamp */
+	uint64_t time_hw;
 };
 
 struct idpf_chunks_info {
diff --git a/drivers/common/idpf/idpf_common_logs.h b/drivers/common/idpf/idpf_common_logs.h
index fe36562769..63ad2195be 100644
--- a/drivers/common/idpf/idpf_common_logs.h
+++ b/drivers/common/idpf/idpf_common_logs.h
@@ -20,4 +20,28 @@  extern int idpf_common_logtype;
 #define DRV_LOG(level, fmt, args...)		\
 	DRV_LOG_RAW(level, fmt "\n", ## args)
 
+#ifdef RTE_LIBRTE_IDPF_DEBUG_RX
+#define RX_LOG(level, ...) \
+	RTE_LOG(level, \
+		PMD, \
+		RTE_FMT("%s(): " \
+			RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+			__func__, \
+			RTE_FMT_TAIL(__VA_ARGS__,)))
+#else
+#define RX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_IDPF_DEBUG_TX
+#define TX_LOG(level, ...) \
+	RTE_LOG(level, \
+		PMD, \
+		RTE_FMT("%s(): " \
+			RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+			__func__, \
+			RTE_FMT_TAIL(__VA_ARGS__,)))
+#else
+#define TX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
 #endif /* _IDPF_COMMON_LOGS_H_ */
diff --git a/drivers/common/idpf/idpf_common_rxtx.c b/drivers/common/idpf/idpf_common_rxtx.c
index 440acc55a6..3030f89bf1 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -3,8 +3,13 @@ 
  */
 
 #include <rte_mbuf_dyn.h>
+#include <rte_errno.h>
+
 #include "idpf_common_rxtx.h"
 
+int idpf_timestamp_dynfield_offset = -1;
+uint64_t idpf_timestamp_dynflag;
+
 int
 check_rx_thresh(uint16_t nb_desc, uint16_t thresh)
 {
@@ -337,6 +342,23 @@  idpf_tx_queue_release(void *txq)
 	rte_free(q);
 }
 
+int
+idpf_register_ts_mbuf(struct idpf_rx_queue *rxq)
+{
+	int err;
+	if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) {
+		/* Register mbuf field and flag for Rx timestamp */
+		err = rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset,
+							 &idpf_timestamp_dynflag);
+		if (err != 0) {
+			DRV_LOG(ERR,
+				"Cannot register mbuf field/flag for timestamp");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
 int
 idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq)
 {
@@ -412,3 +434,966 @@  idpf_alloc_split_rxq_mbufs(struct idpf_rx_queue *rxq)
 
 	return 0;
 }
+
+#define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND  10000
+/* Helper function to convert a 32b nanoseconds timestamp to 64b. */
+static inline uint64_t
+idpf_tstamp_convert_32b_64b(struct idpf_adapter *ad, uint32_t flag,
+			    uint32_t in_timestamp)
+{
+#ifdef RTE_ARCH_X86_64
+	struct idpf_hw *hw = &ad->hw;
+	const uint64_t mask = 0xFFFFFFFF;
+	uint32_t hi, lo, lo2, delta;
+	uint64_t ns;
+
+	if (flag != 0) {
+		IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
+		IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M |
+			       PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
+		lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
+		hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
+		/*
+		 * On typical system, the delta between lo and lo2 is ~1000ns,
+		 * so 10000 seems a large-enough but not overly-big guard band.
+		 */
+		if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND))
+			lo2 = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
+		else
+			lo2 = lo;
+
+		if (lo2 < lo) {
+			lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
+			hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
+		}
+
+		ad->time_hw = ((uint64_t)hi << 32) | lo;
+	}
+
+	delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
+	if (delta > (mask / 2)) {
+		delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
+		ns = ad->time_hw - delta;
+	} else {
+		ns = ad->time_hw + delta;
+	}
+
+	return ns;
+#else /* !RTE_ARCH_X86_64 */
+	RTE_SET_USED(ad);
+	RTE_SET_USED(flag);
+	RTE_SET_USED(in_timestamp);
+	return 0;
+#endif /* RTE_ARCH_X86_64 */
+}
+
+#define IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S				\
+	(RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S) |     \
+	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S) |     \
+	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S) |    \
+	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S))
+
+static inline uint64_t
+idpf_splitq_rx_csum_offload(uint8_t err)
+{
+	uint64_t flags = 0;
+
+	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_S)) == 0))
+		return flags;
+
+	if (likely((err & IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S) == 0)) {
+		flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD |
+			  RTE_MBUF_F_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S)) != 0))
+		flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
+	else
+		flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
+
+	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S)) != 0))
+		flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
+	else
+		flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
+
+	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S)) != 0))
+		flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
+
+	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) != 0))
+		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD;
+	else
+		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD;
+
+	return flags;
+}
+
+#define IDPF_RX_FLEX_DESC_ADV_HASH1_S  0
+#define IDPF_RX_FLEX_DESC_ADV_HASH2_S  16
+#define IDPF_RX_FLEX_DESC_ADV_HASH3_S  24
+
+static inline uint64_t
+idpf_splitq_rx_rss_offload(struct rte_mbuf *mb,
+			   volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
+{
+	uint8_t status_err0_qw0;
+	uint64_t flags = 0;
+
+	status_err0_qw0 = rx_desc->status_err0_qw0;
+
+	if ((status_err0_qw0 & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_S)) != 0) {
+		flags |= RTE_MBUF_F_RX_RSS_HASH;
+		mb->hash.rss = (rte_le_to_cpu_16(rx_desc->hash1) <<
+				IDPF_RX_FLEX_DESC_ADV_HASH1_S) |
+			((uint32_t)(rx_desc->ff2_mirrid_hash2.hash2) <<
+			 IDPF_RX_FLEX_DESC_ADV_HASH2_S) |
+			((uint32_t)(rx_desc->hash3) <<
+			 IDPF_RX_FLEX_DESC_ADV_HASH3_S);
+	}
+
+	return flags;
+}
+
+static void
+idpf_split_rx_bufq_refill(struct idpf_rx_queue *rx_bufq)
+{
+	volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_ring;
+	volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_desc;
+	uint16_t nb_refill = rx_bufq->rx_free_thresh;
+	uint16_t nb_desc = rx_bufq->nb_rx_desc;
+	uint16_t next_avail = rx_bufq->rx_tail;
+	struct rte_mbuf *nmb[rx_bufq->rx_free_thresh];
+	uint64_t dma_addr;
+	uint16_t delta;
+	int i;
+
+	if (rx_bufq->nb_rx_hold < rx_bufq->rx_free_thresh)
+		return;
+
+	rx_buf_ring = rx_bufq->rx_ring;
+	delta = nb_desc - next_avail;
+	if (unlikely(delta < nb_refill)) {
+		if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, delta) == 0)) {
+			for (i = 0; i < delta; i++) {
+				rx_buf_desc = &rx_buf_ring[next_avail + i];
+				rx_bufq->sw_ring[next_avail + i] = nmb[i];
+				dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i]));
+				rx_buf_desc->hdr_addr = 0;
+				rx_buf_desc->pkt_addr = dma_addr;
+			}
+			nb_refill -= delta;
+			next_avail = 0;
+			rx_bufq->nb_rx_hold -= delta;
+		} else {
+			rx_bufq->rx_stats.mbuf_alloc_failed += nb_desc - next_avail;
+			RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u",
+			       rx_bufq->port_id, rx_bufq->queue_id);
+			return;
+		}
+	}
+
+	if (nb_desc - next_avail >= nb_refill) {
+		if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, nb_refill) == 0)) {
+			for (i = 0; i < nb_refill; i++) {
+				rx_buf_desc = &rx_buf_ring[next_avail + i];
+				rx_bufq->sw_ring[next_avail + i] = nmb[i];
+				dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i]));
+				rx_buf_desc->hdr_addr = 0;
+				rx_buf_desc->pkt_addr = dma_addr;
+			}
+			next_avail += nb_refill;
+			rx_bufq->nb_rx_hold -= nb_refill;
+		} else {
+			rx_bufq->rx_stats.mbuf_alloc_failed += nb_desc - next_avail;
+			RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u",
+			       rx_bufq->port_id, rx_bufq->queue_id);
+		}
+	}
+
+	IDPF_PCI_REG_WRITE(rx_bufq->qrx_tail, next_avail);
+
+	rx_bufq->rx_tail = next_avail;
+}
+
+uint16_t
+idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		      uint16_t nb_pkts)
+{
+	volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc_ring;
+	volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc;
+	uint16_t pktlen_gen_bufq_id;
+	struct idpf_rx_queue *rxq;
+	const uint32_t *ptype_tbl;
+	uint8_t status_err0_qw1;
+	struct idpf_adapter *ad;
+	struct rte_mbuf *rxm;
+	uint16_t rx_id_bufq1;
+	uint16_t rx_id_bufq2;
+	uint64_t pkt_flags;
+	uint16_t pkt_len;
+	uint16_t bufq_id;
+	uint16_t gen_id;
+	uint16_t rx_id;
+	uint16_t nb_rx;
+	uint64_t ts_ns;
+
+	nb_rx = 0;
+	rxq = rx_queue;
+	ad = rxq->adapter;
+
+	if (unlikely(rxq == NULL) || unlikely(!rxq->q_started))
+		return nb_rx;
+
+	rx_id = rxq->rx_tail;
+	rx_id_bufq1 = rxq->bufq1->rx_next_avail;
+	rx_id_bufq2 = rxq->bufq2->rx_next_avail;
+	rx_desc_ring = rxq->rx_ring;
+	ptype_tbl = rxq->adapter->ptype_tbl;
+
+	if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0)
+		rxq->hw_register_set = 1;
+
+	while (nb_rx < nb_pkts) {
+		rx_desc = &rx_desc_ring[rx_id];
+
+		pktlen_gen_bufq_id =
+			rte_le_to_cpu_16(rx_desc->pktlen_gen_bufq_id);
+		gen_id = (pktlen_gen_bufq_id &
+			  VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M) >>
+			VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S;
+		if (gen_id != rxq->expected_gen_id)
+			break;
+
+		pkt_len = (pktlen_gen_bufq_id &
+			   VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M) >>
+			VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S;
+		if (pkt_len == 0)
+			RX_LOG(ERR, "Packet length is 0");
+
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc)) {
+			rx_id = 0;
+			rxq->expected_gen_id ^= 1;
+		}
+
+		bufq_id = (pktlen_gen_bufq_id &
+			   VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M) >>
+			VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S;
+		if (bufq_id == 0) {
+			rxm = rxq->bufq1->sw_ring[rx_id_bufq1];
+			rx_id_bufq1++;
+			if (unlikely(rx_id_bufq1 == rxq->bufq1->nb_rx_desc))
+				rx_id_bufq1 = 0;
+			rxq->bufq1->nb_rx_hold++;
+		} else {
+			rxm = rxq->bufq2->sw_ring[rx_id_bufq2];
+			rx_id_bufq2++;
+			if (unlikely(rx_id_bufq2 == rxq->bufq2->nb_rx_desc))
+				rx_id_bufq2 = 0;
+			rxq->bufq2->nb_rx_hold++;
+		}
+
+		rxm->pkt_len = pkt_len;
+		rxm->data_len = pkt_len;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rxm->next = NULL;
+		rxm->nb_segs = 1;
+		rxm->port = rxq->port_id;
+		rxm->ol_flags = 0;
+		rxm->packet_type =
+			ptype_tbl[(rte_le_to_cpu_16(rx_desc->ptype_err_fflags0) &
+				   VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M) >>
+				  VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S];
+
+		status_err0_qw1 = rx_desc->status_err0_qw1;
+		pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1);
+		pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc);
+		if (idpf_timestamp_dynflag > 0 &&
+		    (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP)) {
+			/* timestamp */
+			ts_ns = idpf_tstamp_convert_32b_64b(ad,
+							    rxq->hw_register_set,
+							    rte_le_to_cpu_32(rx_desc->ts_high));
+			rxq->hw_register_set = 0;
+			*RTE_MBUF_DYNFIELD(rxm,
+					   idpf_timestamp_dynfield_offset,
+					   rte_mbuf_timestamp_t *) = ts_ns;
+			rxm->ol_flags |= idpf_timestamp_dynflag;
+		}
+
+		rxm->ol_flags |= pkt_flags;
+
+		rx_pkts[nb_rx++] = rxm;
+	}
+
+	if (nb_rx > 0) {
+		rxq->rx_tail = rx_id;
+		if (rx_id_bufq1 != rxq->bufq1->rx_next_avail)
+			rxq->bufq1->rx_next_avail = rx_id_bufq1;
+		if (rx_id_bufq2 != rxq->bufq2->rx_next_avail)
+			rxq->bufq2->rx_next_avail = rx_id_bufq2;
+
+		idpf_split_rx_bufq_refill(rxq->bufq1);
+		idpf_split_rx_bufq_refill(rxq->bufq2);
+	}
+
+	return nb_rx;
+}
+
+static inline void
+idpf_split_tx_free(struct idpf_tx_queue *cq)
+{
+	volatile struct idpf_splitq_tx_compl_desc *compl_ring = cq->compl_ring;
+	volatile struct idpf_splitq_tx_compl_desc *txd;
+	uint16_t next = cq->tx_tail;
+	struct idpf_tx_entry *txe;
+	struct idpf_tx_queue *txq;
+	uint16_t gen, qid, q_head;
+	uint16_t nb_desc_clean;
+	uint8_t ctype;
+
+	txd = &compl_ring[next];
+	gen = (rte_le_to_cpu_16(txd->qid_comptype_gen) &
+	       IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S;
+	if (gen != cq->expected_gen_id)
+		return;
+
+	ctype = (rte_le_to_cpu_16(txd->qid_comptype_gen) &
+		 IDPF_TXD_COMPLQ_COMPL_TYPE_M) >> IDPF_TXD_COMPLQ_COMPL_TYPE_S;
+	qid = (rte_le_to_cpu_16(txd->qid_comptype_gen) &
+	       IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S;
+	q_head = rte_le_to_cpu_16(txd->q_head_compl_tag.compl_tag);
+	txq = cq->txqs[qid - cq->tx_start_qid];
+
+	switch (ctype) {
+	case IDPF_TXD_COMPLT_RE:
+		/* clean to q_head which indicates be fetched txq desc id + 1.
+		 * TODO: need to refine and remove the if condition.
+		 */
+		if (unlikely(q_head % 32)) {
+			TX_LOG(ERR, "unexpected desc (head = %u) completion.",
+			       q_head);
+			return;
+		}
+		if (txq->last_desc_cleaned > q_head)
+			nb_desc_clean = (txq->nb_tx_desc - txq->last_desc_cleaned) +
+				q_head;
+		else
+			nb_desc_clean = q_head - txq->last_desc_cleaned;
+		txq->nb_free += nb_desc_clean;
+		txq->last_desc_cleaned = q_head;
+		break;
+	case IDPF_TXD_COMPLT_RS:
+		/* q_head indicates sw_id when ctype is 2 */
+		txe = &txq->sw_ring[q_head];
+		if (txe->mbuf != NULL) {
+			rte_pktmbuf_free_seg(txe->mbuf);
+			txe->mbuf = NULL;
+		}
+		break;
+	default:
+		TX_LOG(ERR, "unknown completion type.");
+		return;
+	}
+
+	if (++next == cq->nb_tx_desc) {
+		next = 0;
+		cq->expected_gen_id ^= 1;
+	}
+
+	cq->tx_tail = next;
+}
+
+/* Check if the context descriptor is needed for TX offloading */
+static inline uint16_t
+idpf_calc_context_desc(uint64_t flags)
+{
+	if ((flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
+		return 1;
+
+	return 0;
+}
+
+/* set TSO context descriptor
+ */
+static inline void
+idpf_set_splitq_tso_ctx(struct rte_mbuf *mbuf,
+			union idpf_tx_offload tx_offload,
+			volatile union idpf_flex_tx_ctx_desc *ctx_desc)
+{
+	uint16_t cmd_dtype;
+	uint32_t tso_len;
+	uint8_t hdr_len;
+
+	if (tx_offload.l4_len == 0) {
+		TX_LOG(DEBUG, "L4 length set to 0");
+		return;
+	}
+
+	hdr_len = tx_offload.l2_len +
+		tx_offload.l3_len +
+		tx_offload.l4_len;
+	cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX |
+		IDPF_TX_FLEX_CTX_DESC_CMD_TSO;
+	tso_len = mbuf->pkt_len - hdr_len;
+
+	ctx_desc->tso.qw1.cmd_dtype = rte_cpu_to_le_16(cmd_dtype);
+	ctx_desc->tso.qw0.hdr_len = hdr_len;
+	ctx_desc->tso.qw0.mss_rt =
+		rte_cpu_to_le_16((uint16_t)mbuf->tso_segsz &
+				 IDPF_TXD_FLEX_CTX_MSS_RT_M);
+	ctx_desc->tso.qw0.flex_tlen =
+		rte_cpu_to_le_32(tso_len &
+				 IDPF_TXD_FLEX_CTX_MSS_RT_M);
+}
+
+uint16_t
+idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		      uint16_t nb_pkts)
+{
+	struct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue;
+	volatile struct idpf_flex_tx_sched_desc *txr;
+	volatile struct idpf_flex_tx_sched_desc *txd;
+	struct idpf_tx_entry *sw_ring;
+	union idpf_tx_offload tx_offload = {0};
+	struct idpf_tx_entry *txe, *txn;
+	uint16_t nb_used, tx_id, sw_id;
+	struct rte_mbuf *tx_pkt;
+	uint16_t nb_to_clean;
+	uint16_t nb_tx = 0;
+	uint64_t ol_flags;
+	uint16_t nb_ctx;
+
+	if (unlikely(txq == NULL) || unlikely(!txq->q_started))
+		return nb_tx;
+
+	txr = txq->desc_ring;
+	sw_ring = txq->sw_ring;
+	tx_id = txq->tx_tail;
+	sw_id = txq->sw_tail;
+	txe = &sw_ring[sw_id];
+
+	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+		tx_pkt = tx_pkts[nb_tx];
+
+		if (txq->nb_free <= txq->free_thresh) {
+			/* TODO: Need to refine
+			 * 1. free and clean: Better to decide a clean destination instead of
+			 * loop times. And don't free mbuf when RS got immediately, free when
+			 * transmit or according to the clean destination.
+			 * Now, just ignore the RE write back, free mbuf when get RS
+			 * 2. out-of-order rewrite back haven't be supported, SW head and HW head
+			 * need to be separated.
+			 **/
+			nb_to_clean = 2 * txq->rs_thresh;
+			while (nb_to_clean--)
+				idpf_split_tx_free(txq->complq);
+		}
+
+		if (txq->nb_free < tx_pkt->nb_segs)
+			break;
+
+		ol_flags = tx_pkt->ol_flags;
+		tx_offload.l2_len = tx_pkt->l2_len;
+		tx_offload.l3_len = tx_pkt->l3_len;
+		tx_offload.l4_len = tx_pkt->l4_len;
+		tx_offload.tso_segsz = tx_pkt->tso_segsz;
+		/* Calculate the number of context descriptors needed. */
+		nb_ctx = idpf_calc_context_desc(ol_flags);
+		nb_used = tx_pkt->nb_segs + nb_ctx;
+
+		/* context descriptor */
+		if (nb_ctx != 0) {
+			volatile union idpf_flex_tx_ctx_desc *ctx_desc =
+				(volatile union idpf_flex_tx_ctx_desc *)&txr[tx_id];
+
+			if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
+				idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
+							ctx_desc);
+
+			tx_id++;
+			if (tx_id == txq->nb_tx_desc)
+				tx_id = 0;
+		}
+
+		do {
+			txd = &txr[tx_id];
+			txn = &sw_ring[txe->next_id];
+			txe->mbuf = tx_pkt;
+
+			/* Setup TX descriptor */
+			txd->buf_addr =
+				rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt));
+			txd->qw1.cmd_dtype =
+				rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE);
+			txd->qw1.rxr_bufsize = tx_pkt->data_len;
+			txd->qw1.compl_tag = sw_id;
+			tx_id++;
+			if (tx_id == txq->nb_tx_desc)
+				tx_id = 0;
+			sw_id = txe->next_id;
+			txe = txn;
+			tx_pkt = tx_pkt->next;
+		} while (tx_pkt);
+
+		/* fill the last descriptor with End of Packet (EOP) bit */
+		txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_EOP;
+
+		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
+			txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN;
+		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
+		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
+
+		if (txq->nb_used >= 32) {
+			txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_RE;
+			/* Update txq RE bit counters */
+			txq->nb_used = 0;
+		}
+	}
+
+	/* update the tail pointer if any packets were processed */
+	if (likely(nb_tx > 0)) {
+		IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id);
+		txq->tx_tail = tx_id;
+		txq->sw_tail = sw_id;
+	}
+
+	return nb_tx;
+}
+
+#define IDPF_RX_FLEX_DESC_STATUS0_XSUM_S				\
+	(RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |		\
+	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |		\
+	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |	\
+	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S))
+
+/* Translate the rx descriptor status and error fields to pkt flags */
+static inline uint64_t
+idpf_rxd_to_pkt_flags(uint16_t status_error)
+{
+	uint64_t flags = 0;
+
+	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_S)) == 0))
+		return flags;
+
+	if (likely((status_error & IDPF_RX_FLEX_DESC_STATUS0_XSUM_S) == 0)) {
+		flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD |
+			  RTE_MBUF_F_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)) != 0))
+		flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
+	else
+		flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
+
+	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) != 0))
+		flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
+	else
+		flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
+
+	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)) != 0))
+		flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
+
+	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) != 0))
+		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD;
+	else
+		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD;
+
+	return flags;
+}
+
+static inline void
+idpf_update_rx_tail(struct idpf_rx_queue *rxq, uint16_t nb_hold,
+		    uint16_t rx_id)
+{
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+
+	if (nb_hold > rxq->rx_free_thresh) {
+		RX_LOG(DEBUG,
+		       "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u",
+		       rxq->port_id, rxq->queue_id, rx_id, nb_hold);
+		rx_id = (uint16_t)((rx_id == 0) ?
+				   (rxq->nb_rx_desc - 1) : (rx_id - 1));
+		IDPF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+}
+
+uint16_t
+idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		       uint16_t nb_pkts)
+{
+	volatile union virtchnl2_rx_desc *rx_ring;
+	volatile union virtchnl2_rx_desc *rxdp;
+	union virtchnl2_rx_desc rxd;
+	struct idpf_rx_queue *rxq;
+	const uint32_t *ptype_tbl;
+	uint16_t rx_id, nb_hold;
+	struct idpf_adapter *ad;
+	uint16_t rx_packet_len;
+	struct rte_mbuf *rxm;
+	struct rte_mbuf *nmb;
+	uint16_t rx_status0;
+	uint64_t pkt_flags;
+	uint64_t dma_addr;
+	uint64_t ts_ns;
+	uint16_t nb_rx;
+
+	nb_rx = 0;
+	nb_hold = 0;
+	rxq = rx_queue;
+
+	ad = rxq->adapter;
+
+	if (unlikely(rxq == NULL) || unlikely(!rxq->q_started))
+		return nb_rx;
+
+	rx_id = rxq->rx_tail;
+	rx_ring = rxq->rx_ring;
+	ptype_tbl = rxq->adapter->ptype_tbl;
+
+	if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0)
+		rxq->hw_register_set = 1;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = &rx_ring[rx_id];
+		rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0);
+
+		/* Check the DD bit first */
+		if ((rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_S)) == 0)
+			break;
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(nmb == NULL)) {
+			rxq->rx_stats.mbuf_alloc_failed++;
+			RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+			       "queue_id=%u", rxq->port_id, rxq->queue_id);
+			break;
+		}
+		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
+
+		nb_hold++;
+		rxm = rxq->sw_ring[rx_id];
+		rxq->sw_ring[rx_id] = nmb;
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc))
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+
+		rx_packet_len = (rte_cpu_to_le_16(rxd.flex_nic_wb.pkt_len) &
+				 VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M);
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = rx_packet_len;
+		rxm->data_len = rx_packet_len;
+		rxm->port = rxq->port_id;
+		rxm->ol_flags = 0;
+		pkt_flags = idpf_rxd_to_pkt_flags(rx_status0);
+		rxm->packet_type =
+			ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) &
+					    VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)];
+
+		rxm->ol_flags |= pkt_flags;
+
+		if (idpf_timestamp_dynflag > 0 &&
+		    (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) {
+			/* timestamp */
+			ts_ns = idpf_tstamp_convert_32b_64b(ad,
+					    rxq->hw_register_set,
+					    rte_le_to_cpu_32(rxd.flex_nic_wb.flex_ts.ts_high));
+			rxq->hw_register_set = 0;
+			*RTE_MBUF_DYNFIELD(rxm,
+					   idpf_timestamp_dynfield_offset,
+					   rte_mbuf_timestamp_t *) = ts_ns;
+			rxm->ol_flags |= idpf_timestamp_dynflag;
+		}
+
+		rx_pkts[nb_rx++] = rxm;
+	}
+	rxq->rx_tail = rx_id;
+
+	idpf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+static inline int
+idpf_xmit_cleanup(struct idpf_tx_queue *txq)
+{
+	uint16_t last_desc_cleaned = txq->last_desc_cleaned;
+	struct idpf_tx_entry *sw_ring = txq->sw_ring;
+	uint16_t nb_tx_desc = txq->nb_tx_desc;
+	uint16_t desc_to_clean_to;
+	uint16_t nb_tx_to_clean;
+	uint16_t i;
+
+	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
+
+	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
+	if (desc_to_clean_to >= nb_tx_desc)
+		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
+
+	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
+	/* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */
+	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
+	     rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
+	    rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
+		TX_LOG(DEBUG, "TX descriptor %4u is not done "
+		       "(port=%d queue=%d)", desc_to_clean_to,
+		       txq->port_id, txq->queue_id);
+		return -1;
+	}
+
+	if (last_desc_cleaned > desc_to_clean_to)
+		nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
+					    desc_to_clean_to);
+	else
+		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
+					    last_desc_cleaned);
+
+	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
+	txd[desc_to_clean_to].qw1.buf_size = 0;
+	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
+		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
+
+	txq->last_desc_cleaned = desc_to_clean_to;
+	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
+
+	return 0;
+}
+
+/* TX function */
+uint16_t
+idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		       uint16_t nb_pkts)
+{
+	volatile struct idpf_flex_tx_desc *txd;
+	volatile struct idpf_flex_tx_desc *txr;
+	union idpf_tx_offload tx_offload = {0};
+	struct idpf_tx_entry *txe, *txn;
+	struct idpf_tx_entry *sw_ring;
+	struct idpf_tx_queue *txq;
+	struct rte_mbuf *tx_pkt;
+	struct rte_mbuf *m_seg;
+	uint64_t buf_dma_addr;
+	uint64_t ol_flags;
+	uint16_t tx_last;
+	uint16_t nb_used;
+	uint16_t nb_ctx;
+	uint16_t td_cmd;
+	uint16_t tx_id;
+	uint16_t nb_tx;
+	uint16_t slen;
+
+	nb_tx = 0;
+	txq = tx_queue;
+
+	if (unlikely(txq == NULL) || unlikely(!txq->q_started))
+		return nb_tx;
+
+	sw_ring = txq->sw_ring;
+	txr = txq->tx_ring;
+	tx_id = txq->tx_tail;
+	txe = &sw_ring[tx_id];
+
+	/* Check if the descriptor ring needs to be cleaned. */
+	if (txq->nb_free < txq->free_thresh)
+		(void)idpf_xmit_cleanup(txq);
+
+	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+		td_cmd = 0;
+
+		tx_pkt = *tx_pkts++;
+		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
+
+		ol_flags = tx_pkt->ol_flags;
+		tx_offload.l2_len = tx_pkt->l2_len;
+		tx_offload.l3_len = tx_pkt->l3_len;
+		tx_offload.l4_len = tx_pkt->l4_len;
+		tx_offload.tso_segsz = tx_pkt->tso_segsz;
+		/* Calculate the number of context descriptors needed. */
+		nb_ctx = idpf_calc_context_desc(ol_flags);
+
+		/* The number of descriptors that must be allocated for
+		 * a packet equals to the number of the segments of that
+		 * packet plus 1 context descriptor if needed.
+		 */
+		nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+		tx_last = (uint16_t)(tx_id + nb_used - 1);
+
+		/* Circular ring */
+		if (tx_last >= txq->nb_tx_desc)
+			tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
+
+		TX_LOG(DEBUG, "port_id=%u queue_id=%u"
+		       " tx_first=%u tx_last=%u",
+		       txq->port_id, txq->queue_id, tx_id, tx_last);
+
+		if (nb_used > txq->nb_free) {
+			if (idpf_xmit_cleanup(txq) != 0) {
+				if (nb_tx == 0)
+					return 0;
+				goto end_of_tx;
+			}
+			if (unlikely(nb_used > txq->rs_thresh)) {
+				while (nb_used > txq->nb_free) {
+					if (idpf_xmit_cleanup(txq) != 0) {
+						if (nb_tx == 0)
+							return 0;
+						goto end_of_tx;
+					}
+				}
+			}
+		}
+
+		if (nb_ctx != 0) {
+			/* Setup TX context descriptor if required */
+			volatile union idpf_flex_tx_ctx_desc *ctx_txd =
+				(volatile union idpf_flex_tx_ctx_desc *)
+				&txr[tx_id];
+
+			txn = &sw_ring[txe->next_id];
+			RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
+			if (txe->mbuf != NULL) {
+				rte_pktmbuf_free_seg(txe->mbuf);
+				txe->mbuf = NULL;
+			}
+
+			/* TSO enabled */
+			if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
+				idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
+							ctx_txd);
+
+			txe->last_id = tx_last;
+			tx_id = txe->next_id;
+			txe = txn;
+		}
+
+		m_seg = tx_pkt;
+		do {
+			txd = &txr[tx_id];
+			txn = &sw_ring[txe->next_id];
+
+			if (txe->mbuf != NULL)
+				rte_pktmbuf_free_seg(txe->mbuf);
+			txe->mbuf = m_seg;
+
+			/* Setup TX Descriptor */
+			slen = m_seg->data_len;
+			buf_dma_addr = rte_mbuf_data_iova(m_seg);
+			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
+			txd->qw1.buf_size = slen;
+			txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
+							      IDPF_FLEX_TXD_QW1_DTYPE_S);
+
+			txe->last_id = tx_last;
+			tx_id = txe->next_id;
+			txe = txn;
+			m_seg = m_seg->next;
+		} while (m_seg);
+
+		/* The last packet data descriptor needs End Of Packet (EOP) */
+		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
+		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
+		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
+
+		if (txq->nb_used >= txq->rs_thresh) {
+			TX_LOG(DEBUG, "Setting RS bit on TXD id="
+			       "%4u (port=%d queue=%d)",
+			       tx_last, txq->port_id, txq->queue_id);
+
+			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
+
+			/* Update txq RS bit counters */
+			txq->nb_used = 0;
+		}
+
+		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
+			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
+
+		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S);
+	}
+
+end_of_tx:
+	rte_wmb();
+
+	TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
+	       txq->port_id, txq->queue_id, tx_id, nb_tx);
+
+	IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id);
+	txq->tx_tail = tx_id;
+
+	return nb_tx;
+}
+
+/* TX prep functions */
+uint16_t
+idpf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+	       uint16_t nb_pkts)
+{
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+	int ret;
+#endif
+	int i;
+	uint64_t ol_flags;
+	struct rte_mbuf *m;
+
+	for (i = 0; i < nb_pkts; i++) {
+		m = tx_pkts[i];
+		ol_flags = m->ol_flags;
+
+		/* Check condition for nb_segs > IDPF_TX_MAX_MTU_SEG. */
+		if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0) {
+			if (m->nb_segs > IDPF_TX_MAX_MTU_SEG) {
+				rte_errno = EINVAL;
+				return i;
+			}
+		} else if ((m->tso_segsz < IDPF_MIN_TSO_MSS) ||
+			   (m->tso_segsz > IDPF_MAX_TSO_MSS) ||
+			   (m->pkt_len > IDPF_MAX_TSO_FRAME_SIZE)) {
+			/* MSS outside the range are considered malicious */
+			rte_errno = EINVAL;
+			return i;
+		}
+
+		if ((ol_flags & IDPF_TX_OFFLOAD_NOTSUP_MASK) != 0) {
+			rte_errno = ENOTSUP;
+			return i;
+		}
+
+		if (m->pkt_len < IDPF_MIN_FRAME_SIZE) {
+			rte_errno = EINVAL;
+			return i;
+		}
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+		ret = rte_validate_tx_offload(m);
+		if (ret != 0) {
+			rte_errno = -ret;
+			return i;
+		}
+#endif
+	}
+
+	return i;
+}
diff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h
index 9abf321519..2da2a6dc49 100644
--- a/drivers/common/idpf/idpf_common_rxtx.h
+++ b/drivers/common/idpf/idpf_common_rxtx.h
@@ -27,6 +27,61 @@ 
 #define IDPF_TX_OFFLOAD_MULTI_SEGS       RTE_BIT64(15)
 #define IDPF_TX_OFFLOAD_MBUF_FAST_FREE   RTE_BIT64(16)
 
+#define IDPF_TX_MAX_MTU_SEG	10
+
+#define IDPF_MIN_TSO_MSS	88
+#define IDPF_MAX_TSO_MSS	9728
+#define IDPF_MAX_TSO_FRAME_SIZE	262143
+#define IDPF_TX_MAX_MTU_SEG     10
+
+#define IDPF_TX_CKSUM_OFFLOAD_MASK (		\
+		RTE_MBUF_F_TX_IP_CKSUM |	\
+		RTE_MBUF_F_TX_L4_MASK |		\
+		RTE_MBUF_F_TX_TCP_SEG)
+
+#define IDPF_TX_OFFLOAD_MASK (			\
+		IDPF_TX_CKSUM_OFFLOAD_MASK |	\
+		RTE_MBUF_F_TX_IPV4 |		\
+		RTE_MBUF_F_TX_IPV6)
+
+#define IDPF_TX_OFFLOAD_NOTSUP_MASK \
+		(RTE_MBUF_F_TX_OFFLOAD_MASK ^ IDPF_TX_OFFLOAD_MASK)
+
+/* MTS */
+#define GLTSYN_CMD_SYNC_0_0	(PF_TIMESYNC_BASE + 0x0)
+#define PF_GLTSYN_SHTIME_0_0	(PF_TIMESYNC_BASE + 0x4)
+#define PF_GLTSYN_SHTIME_L_0	(PF_TIMESYNC_BASE + 0x8)
+#define PF_GLTSYN_SHTIME_H_0	(PF_TIMESYNC_BASE + 0xC)
+#define GLTSYN_ART_L_0		(PF_TIMESYNC_BASE + 0x10)
+#define GLTSYN_ART_H_0		(PF_TIMESYNC_BASE + 0x14)
+#define PF_GLTSYN_SHTIME_0_1	(PF_TIMESYNC_BASE + 0x24)
+#define PF_GLTSYN_SHTIME_L_1	(PF_TIMESYNC_BASE + 0x28)
+#define PF_GLTSYN_SHTIME_H_1	(PF_TIMESYNC_BASE + 0x2C)
+#define PF_GLTSYN_SHTIME_0_2	(PF_TIMESYNC_BASE + 0x44)
+#define PF_GLTSYN_SHTIME_L_2	(PF_TIMESYNC_BASE + 0x48)
+#define PF_GLTSYN_SHTIME_H_2	(PF_TIMESYNC_BASE + 0x4C)
+#define PF_GLTSYN_SHTIME_0_3	(PF_TIMESYNC_BASE + 0x64)
+#define PF_GLTSYN_SHTIME_L_3	(PF_TIMESYNC_BASE + 0x68)
+#define PF_GLTSYN_SHTIME_H_3	(PF_TIMESYNC_BASE + 0x6C)
+
+#define PF_TIMESYNC_BAR4_BASE	0x0E400000
+#define GLTSYN_ENA		(PF_TIMESYNC_BAR4_BASE + 0x90)
+#define GLTSYN_CMD		(PF_TIMESYNC_BAR4_BASE + 0x94)
+#define GLTSYC_TIME_L		(PF_TIMESYNC_BAR4_BASE + 0x104)
+#define GLTSYC_TIME_H		(PF_TIMESYNC_BAR4_BASE + 0x108)
+
+#define GLTSYN_CMD_SYNC_0_4	(PF_TIMESYNC_BAR4_BASE + 0x110)
+#define PF_GLTSYN_SHTIME_L_4	(PF_TIMESYNC_BAR4_BASE + 0x118)
+#define PF_GLTSYN_SHTIME_H_4	(PF_TIMESYNC_BAR4_BASE + 0x11C)
+#define GLTSYN_INCVAL_L		(PF_TIMESYNC_BAR4_BASE + 0x150)
+#define GLTSYN_INCVAL_H		(PF_TIMESYNC_BAR4_BASE + 0x154)
+#define GLTSYN_SHADJ_L		(PF_TIMESYNC_BAR4_BASE + 0x158)
+#define GLTSYN_SHADJ_H		(PF_TIMESYNC_BAR4_BASE + 0x15C)
+
+#define GLTSYN_CMD_SYNC_0_5	(PF_TIMESYNC_BAR4_BASE + 0x130)
+#define PF_GLTSYN_SHTIME_L_5	(PF_TIMESYNC_BAR4_BASE + 0x138)
+#define PF_GLTSYN_SHTIME_H_5	(PF_TIMESYNC_BAR4_BASE + 0x13C)
+
 struct idpf_rx_stats {
 	uint64_t mbuf_alloc_failed;
 };
@@ -126,6 +181,18 @@  struct idpf_tx_queue {
 	struct idpf_tx_queue *complq;
 };
 
+/* Offload features */
+union idpf_tx_offload {
+	uint64_t data;
+	struct {
+		uint64_t l2_len:7; /* L2 (MAC) Header Length. */
+		uint64_t l3_len:9; /* L3 (IP) Header Length. */
+		uint64_t l4_len:8; /* L4 Header Length. */
+		uint64_t tso_segsz:16; /* TCP TSO segment size */
+		/* uint64_t unused : 24; */
+	};
+};
+
 struct idpf_rxq_ops {
 	void (*release_mbufs)(struct idpf_rx_queue *rxq);
 };
@@ -134,6 +201,9 @@  struct idpf_txq_ops {
 	void (*release_mbufs)(struct idpf_tx_queue *txq);
 };
 
+extern int idpf_timestamp_dynfield_offset;
+extern uint64_t idpf_timestamp_dynflag;
+
 __rte_internal
 int check_rx_thresh(uint16_t nb_desc, uint16_t thresh);
 __rte_internal
@@ -162,8 +232,25 @@  void idpf_rx_queue_release(void *rxq);
 __rte_internal
 void idpf_tx_queue_release(void *txq);
 __rte_internal
+int idpf_register_ts_mbuf(struct idpf_rx_queue *rxq);
+__rte_internal
 int idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq);
 __rte_internal
 int idpf_alloc_split_rxq_mbufs(struct idpf_rx_queue *rxq);
+__rte_internal
+uint16_t idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+			       uint16_t nb_pkts);
+__rte_internal
+uint16_t idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+			       uint16_t nb_pkts);
+__rte_internal
+uint16_t idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+				uint16_t nb_pkts);
+__rte_internal
+uint16_t idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+				uint16_t nb_pkts);
+__rte_internal
+uint16_t idpf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+			uint16_t nb_pkts);
 
 #endif /* _IDPF_COMMON_RXTX_H_ */
diff --git a/drivers/common/idpf/version.map b/drivers/common/idpf/version.map
index 648f94bf16..bc2a069735 100644
--- a/drivers/common/idpf/version.map
+++ b/drivers/common/idpf/version.map
@@ -35,8 +35,14 @@  INTERNAL {
 	reset_single_tx_queue;
 	idpf_rx_queue_release;
 	idpf_tx_queue_release;
+	idpf_register_ts_mbuf;
 	idpf_alloc_single_rxq_mbufs;
 	idpf_alloc_split_rxq_mbufs;
+	idpf_splitq_recv_pkts;
+	idpf_splitq_xmit_pkts;
+	idpf_singleq_recv_pkts;
+	idpf_singleq_xmit_pkts;
+	idpf_prep_pkts;
 
 	local: *;
 };
diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c
index 06625252b6..a70ae65558 100644
--- a/drivers/net/idpf/idpf_ethdev.c
+++ b/drivers/net/idpf/idpf_ethdev.c
@@ -22,8 +22,6 @@  rte_spinlock_t idpf_adapter_lock;
 struct idpf_adapter_list idpf_adapter_list;
 bool idpf_adapter_list_init;
 
-uint64_t idpf_timestamp_dynflag;
-
 static const char * const idpf_valid_args[] = {
 	IDPF_TX_SINGLE_Q,
 	IDPF_RX_SINGLE_Q,
diff --git a/drivers/net/idpf/idpf_ethdev.h b/drivers/net/idpf/idpf_ethdev.h
index 6d4738f6fe..133589cf98 100644
--- a/drivers/net/idpf/idpf_ethdev.h
+++ b/drivers/net/idpf/idpf_ethdev.h
@@ -28,7 +28,6 @@ 
 
 #define IDPF_MIN_BUF_SIZE	1024
 #define IDPF_MAX_FRAME_SIZE	9728
-#define IDPF_MIN_FRAME_SIZE	14
 
 #define IDPF_NUM_MACADDR_MAX	64
 
@@ -77,9 +76,6 @@  struct idpf_adapter_ext {
 	uint16_t cur_vport_nb;
 
 	uint16_t used_vecs_num;
-
-	/* For PTP */
-	uint64_t time_hw;
 };
 
 TAILQ_HEAD(idpf_adapter_list, idpf_adapter_ext);
diff --git a/drivers/net/idpf/idpf_logs.h b/drivers/net/idpf/idpf_logs.h
index d5f778fefe..bf0774b8e4 100644
--- a/drivers/net/idpf/idpf_logs.h
+++ b/drivers/net/idpf/idpf_logs.h
@@ -29,28 +29,4 @@  extern int idpf_logtype_driver;
 #define PMD_DRV_LOG(level, fmt, args...) \
 	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)
 
-#ifdef RTE_LIBRTE_IDPF_DEBUG_RX
-#define PMD_RX_LOG(level, ...) \
-	RTE_LOG(level, \
-		PMD, \
-		RTE_FMT("%s(): " \
-			RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
-			__func__, \
-			RTE_FMT_TAIL(__VA_ARGS__,)))
-#else
-#define PMD_RX_LOG(level, fmt, args...) do { } while (0)
-#endif
-
-#ifdef RTE_LIBRTE_IDPF_DEBUG_TX
-#define PMD_TX_LOG(level, ...) \
-	RTE_LOG(level, \
-		PMD, \
-		RTE_FMT("%s(): " \
-			RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
-			__func__, \
-			RTE_FMT_TAIL(__VA_ARGS__,)))
-#else
-#define PMD_TX_LOG(level, fmt, args...) do { } while (0)
-#endif
-
 #endif /* _IDPF_LOGS_H_ */
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index 6c693f4c3a..fbf2a8f0cd 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -10,7 +10,8 @@ 
 #include "idpf_rxtx.h"
 #include "idpf_rxtx_vec_common.h"
 
-static int idpf_timestamp_dynfield_offset = -1;
+int idpf_timestamp_dynfield_offset;
+uint64_t idpf_timestamp_dynflag;
 
 static uint64_t
 idpf_rx_offload_convert(uint64_t offload)
@@ -501,23 +502,6 @@  idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	return ret;
 }
 
-static int
-idpf_register_ts_mbuf(struct idpf_rx_queue *rxq)
-{
-	int err;
-	if ((rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0) {
-		/* Register mbuf field and flag for Rx timestamp */
-		err = rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset,
-							 &idpf_timestamp_dynflag);
-		if (err != 0) {
-			PMD_DRV_LOG(ERR,
-				    "Cannot register mbuf field/flag for timestamp");
-			return -EINVAL;
-		}
-	}
-	return 0;
-}
-
 int
 idpf_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
@@ -762,922 +746,6 @@  idpf_stop_queues(struct rte_eth_dev *dev)
 	}
 }
 
-#define IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S				\
-	(RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S) |     \
-	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S) |     \
-	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S) |    \
-	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S))
-
-static inline uint64_t
-idpf_splitq_rx_csum_offload(uint8_t err)
-{
-	uint64_t flags = 0;
-
-	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_S)) == 0))
-		return flags;
-
-	if (likely((err & IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S) == 0)) {
-		flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD |
-			  RTE_MBUF_F_RX_L4_CKSUM_GOOD);
-		return flags;
-	}
-
-	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S)) != 0))
-		flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
-	else
-		flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
-
-	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S)) != 0))
-		flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
-	else
-		flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
-
-	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S)) != 0))
-		flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
-
-	if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) != 0))
-		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD;
-	else
-		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD;
-
-	return flags;
-}
-
-#define IDPF_RX_FLEX_DESC_ADV_HASH1_S  0
-#define IDPF_RX_FLEX_DESC_ADV_HASH2_S  16
-#define IDPF_RX_FLEX_DESC_ADV_HASH3_S  24
-
-static inline uint64_t
-idpf_splitq_rx_rss_offload(struct rte_mbuf *mb,
-			   volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
-{
-	uint8_t status_err0_qw0;
-	uint64_t flags = 0;
-
-	status_err0_qw0 = rx_desc->status_err0_qw0;
-
-	if ((status_err0_qw0 & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_S)) != 0) {
-		flags |= RTE_MBUF_F_RX_RSS_HASH;
-		mb->hash.rss = (rte_le_to_cpu_16(rx_desc->hash1) <<
-				IDPF_RX_FLEX_DESC_ADV_HASH1_S) |
-			((uint32_t)(rx_desc->ff2_mirrid_hash2.hash2) <<
-			 IDPF_RX_FLEX_DESC_ADV_HASH2_S) |
-			((uint32_t)(rx_desc->hash3) <<
-			 IDPF_RX_FLEX_DESC_ADV_HASH3_S);
-	}
-
-	return flags;
-}
-
-static void
-idpf_split_rx_bufq_refill(struct idpf_rx_queue *rx_bufq)
-{
-	volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_ring;
-	volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_desc;
-	uint16_t nb_refill = rx_bufq->rx_free_thresh;
-	uint16_t nb_desc = rx_bufq->nb_rx_desc;
-	uint16_t next_avail = rx_bufq->rx_tail;
-	struct rte_mbuf *nmb[rx_bufq->rx_free_thresh];
-	struct rte_eth_dev *dev;
-	uint64_t dma_addr;
-	uint16_t delta;
-	int i;
-
-	if (rx_bufq->nb_rx_hold < rx_bufq->rx_free_thresh)
-		return;
-
-	rx_buf_ring = rx_bufq->rx_ring;
-	delta = nb_desc - next_avail;
-	if (unlikely(delta < nb_refill)) {
-		if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, delta) == 0)) {
-			for (i = 0; i < delta; i++) {
-				rx_buf_desc = &rx_buf_ring[next_avail + i];
-				rx_bufq->sw_ring[next_avail + i] = nmb[i];
-				dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i]));
-				rx_buf_desc->hdr_addr = 0;
-				rx_buf_desc->pkt_addr = dma_addr;
-			}
-			nb_refill -= delta;
-			next_avail = 0;
-			rx_bufq->nb_rx_hold -= delta;
-		} else {
-			dev = &rte_eth_devices[rx_bufq->port_id];
-			dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail;
-			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u",
-				   rx_bufq->port_id, rx_bufq->queue_id);
-			return;
-		}
-	}
-
-	if (nb_desc - next_avail >= nb_refill) {
-		if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, nb_refill) == 0)) {
-			for (i = 0; i < nb_refill; i++) {
-				rx_buf_desc = &rx_buf_ring[next_avail + i];
-				rx_bufq->sw_ring[next_avail + i] = nmb[i];
-				dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i]));
-				rx_buf_desc->hdr_addr = 0;
-				rx_buf_desc->pkt_addr = dma_addr;
-			}
-			next_avail += nb_refill;
-			rx_bufq->nb_rx_hold -= nb_refill;
-		} else {
-			dev = &rte_eth_devices[rx_bufq->port_id];
-			dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail;
-			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u",
-				   rx_bufq->port_id, rx_bufq->queue_id);
-		}
-	}
-
-	IDPF_PCI_REG_WRITE(rx_bufq->qrx_tail, next_avail);
-
-	rx_bufq->rx_tail = next_avail;
-}
-
-uint16_t
-idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-		      uint16_t nb_pkts)
-{
-	volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc_ring;
-	volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc;
-	uint16_t pktlen_gen_bufq_id;
-	struct idpf_rx_queue *rxq;
-	const uint32_t *ptype_tbl;
-	uint8_t status_err0_qw1;
-	struct idpf_adapter_ext *ad;
-	struct rte_mbuf *rxm;
-	uint16_t rx_id_bufq1;
-	uint16_t rx_id_bufq2;
-	uint64_t pkt_flags;
-	uint16_t pkt_len;
-	uint16_t bufq_id;
-	uint16_t gen_id;
-	uint16_t rx_id;
-	uint16_t nb_rx;
-	uint64_t ts_ns;
-
-	nb_rx = 0;
-	rxq = rx_queue;
-	ad = IDPF_ADAPTER_TO_EXT(rxq->adapter);
-
-	if (unlikely(rxq == NULL) || unlikely(!rxq->q_started))
-		return nb_rx;
-
-	rx_id = rxq->rx_tail;
-	rx_id_bufq1 = rxq->bufq1->rx_next_avail;
-	rx_id_bufq2 = rxq->bufq2->rx_next_avail;
-	rx_desc_ring = rxq->rx_ring;
-	ptype_tbl = rxq->adapter->ptype_tbl;
-
-	if ((rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0)
-		rxq->hw_register_set = 1;
-
-	while (nb_rx < nb_pkts) {
-		rx_desc = &rx_desc_ring[rx_id];
-
-		pktlen_gen_bufq_id =
-			rte_le_to_cpu_16(rx_desc->pktlen_gen_bufq_id);
-		gen_id = (pktlen_gen_bufq_id &
-			  VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M) >>
-			VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S;
-		if (gen_id != rxq->expected_gen_id)
-			break;
-
-		pkt_len = (pktlen_gen_bufq_id &
-			   VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M) >>
-			VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S;
-		if (pkt_len == 0)
-			PMD_RX_LOG(ERR, "Packet length is 0");
-
-		rx_id++;
-		if (unlikely(rx_id == rxq->nb_rx_desc)) {
-			rx_id = 0;
-			rxq->expected_gen_id ^= 1;
-		}
-
-		bufq_id = (pktlen_gen_bufq_id &
-			   VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M) >>
-			VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S;
-		if (bufq_id == 0) {
-			rxm = rxq->bufq1->sw_ring[rx_id_bufq1];
-			rx_id_bufq1++;
-			if (unlikely(rx_id_bufq1 == rxq->bufq1->nb_rx_desc))
-				rx_id_bufq1 = 0;
-			rxq->bufq1->nb_rx_hold++;
-		} else {
-			rxm = rxq->bufq2->sw_ring[rx_id_bufq2];
-			rx_id_bufq2++;
-			if (unlikely(rx_id_bufq2 == rxq->bufq2->nb_rx_desc))
-				rx_id_bufq2 = 0;
-			rxq->bufq2->nb_rx_hold++;
-		}
-
-		rxm->pkt_len = pkt_len;
-		rxm->data_len = pkt_len;
-		rxm->data_off = RTE_PKTMBUF_HEADROOM;
-		rxm->next = NULL;
-		rxm->nb_segs = 1;
-		rxm->port = rxq->port_id;
-		rxm->ol_flags = 0;
-		rxm->packet_type =
-			ptype_tbl[(rte_le_to_cpu_16(rx_desc->ptype_err_fflags0) &
-				   VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M) >>
-				  VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S];
-
-		status_err0_qw1 = rx_desc->status_err0_qw1;
-		pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1);
-		pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc);
-		if (idpf_timestamp_dynflag > 0 &&
-		    (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)) {
-			/* timestamp */
-			ts_ns = idpf_tstamp_convert_32b_64b(ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rx_desc->ts_high));
-			rxq->hw_register_set = 0;
-			*RTE_MBUF_DYNFIELD(rxm,
-					   idpf_timestamp_dynfield_offset,
-					   rte_mbuf_timestamp_t *) = ts_ns;
-			rxm->ol_flags |= idpf_timestamp_dynflag;
-		}
-
-		rxm->ol_flags |= pkt_flags;
-
-		rx_pkts[nb_rx++] = rxm;
-	}
-
-	if (nb_rx > 0) {
-		rxq->rx_tail = rx_id;
-		if (rx_id_bufq1 != rxq->bufq1->rx_next_avail)
-			rxq->bufq1->rx_next_avail = rx_id_bufq1;
-		if (rx_id_bufq2 != rxq->bufq2->rx_next_avail)
-			rxq->bufq2->rx_next_avail = rx_id_bufq2;
-
-		idpf_split_rx_bufq_refill(rxq->bufq1);
-		idpf_split_rx_bufq_refill(rxq->bufq2);
-	}
-
-	return nb_rx;
-}
-
-static inline void
-idpf_split_tx_free(struct idpf_tx_queue *cq)
-{
-	volatile struct idpf_splitq_tx_compl_desc *compl_ring = cq->compl_ring;
-	volatile struct idpf_splitq_tx_compl_desc *txd;
-	uint16_t next = cq->tx_tail;
-	struct idpf_tx_entry *txe;
-	struct idpf_tx_queue *txq;
-	uint16_t gen, qid, q_head;
-	uint16_t nb_desc_clean;
-	uint8_t ctype;
-
-	txd = &compl_ring[next];
-	gen = (rte_le_to_cpu_16(txd->qid_comptype_gen) &
-		IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S;
-	if (gen != cq->expected_gen_id)
-		return;
-
-	ctype = (rte_le_to_cpu_16(txd->qid_comptype_gen) &
-		IDPF_TXD_COMPLQ_COMPL_TYPE_M) >> IDPF_TXD_COMPLQ_COMPL_TYPE_S;
-	qid = (rte_le_to_cpu_16(txd->qid_comptype_gen) &
-		IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S;
-	q_head = rte_le_to_cpu_16(txd->q_head_compl_tag.compl_tag);
-	txq = cq->txqs[qid - cq->tx_start_qid];
-
-	switch (ctype) {
-	case IDPF_TXD_COMPLT_RE:
-		/* clean to q_head which indicates be fetched txq desc id + 1.
-		 * TODO: need to refine and remove the if condition.
-		 */
-		if (unlikely(q_head % 32)) {
-			PMD_DRV_LOG(ERR, "unexpected desc (head = %u) completion.",
-						q_head);
-			return;
-		}
-		if (txq->last_desc_cleaned > q_head)
-			nb_desc_clean = (txq->nb_tx_desc - txq->last_desc_cleaned) +
-				q_head;
-		else
-			nb_desc_clean = q_head - txq->last_desc_cleaned;
-		txq->nb_free += nb_desc_clean;
-		txq->last_desc_cleaned = q_head;
-		break;
-	case IDPF_TXD_COMPLT_RS:
-		/* q_head indicates sw_id when ctype is 2 */
-		txe = &txq->sw_ring[q_head];
-		if (txe->mbuf != NULL) {
-			rte_pktmbuf_free_seg(txe->mbuf);
-			txe->mbuf = NULL;
-		}
-		break;
-	default:
-		PMD_DRV_LOG(ERR, "unknown completion type.");
-		return;
-	}
-
-	if (++next == cq->nb_tx_desc) {
-		next = 0;
-		cq->expected_gen_id ^= 1;
-	}
-
-	cq->tx_tail = next;
-}
-
-/* Check if the context descriptor is needed for TX offloading */
-static inline uint16_t
-idpf_calc_context_desc(uint64_t flags)
-{
-	if ((flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
-		return 1;
-
-	return 0;
-}
-
-/* set TSO context descriptor
- */
-static inline void
-idpf_set_splitq_tso_ctx(struct rte_mbuf *mbuf,
-			union idpf_tx_offload tx_offload,
-			volatile union idpf_flex_tx_ctx_desc *ctx_desc)
-{
-	uint16_t cmd_dtype;
-	uint32_t tso_len;
-	uint8_t hdr_len;
-
-	if (tx_offload.l4_len == 0) {
-		PMD_TX_LOG(DEBUG, "L4 length set to 0");
-		return;
-	}
-
-	hdr_len = tx_offload.l2_len +
-		tx_offload.l3_len +
-		tx_offload.l4_len;
-	cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX |
-		IDPF_TX_FLEX_CTX_DESC_CMD_TSO;
-	tso_len = mbuf->pkt_len - hdr_len;
-
-	ctx_desc->tso.qw1.cmd_dtype = rte_cpu_to_le_16(cmd_dtype);
-	ctx_desc->tso.qw0.hdr_len = hdr_len;
-	ctx_desc->tso.qw0.mss_rt =
-		rte_cpu_to_le_16((uint16_t)mbuf->tso_segsz &
-				 IDPF_TXD_FLEX_CTX_MSS_RT_M);
-	ctx_desc->tso.qw0.flex_tlen =
-		rte_cpu_to_le_32(tso_len &
-				 IDPF_TXD_FLEX_CTX_MSS_RT_M);
-}
-
-uint16_t
-idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
-		      uint16_t nb_pkts)
-{
-	struct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue;
-	volatile struct idpf_flex_tx_sched_desc *txr;
-	volatile struct idpf_flex_tx_sched_desc *txd;
-	struct idpf_tx_entry *sw_ring;
-	union idpf_tx_offload tx_offload = {0};
-	struct idpf_tx_entry *txe, *txn;
-	uint16_t nb_used, tx_id, sw_id;
-	struct rte_mbuf *tx_pkt;
-	uint16_t nb_to_clean;
-	uint16_t nb_tx = 0;
-	uint64_t ol_flags;
-	uint16_t nb_ctx;
-
-	if (unlikely(txq == NULL) || unlikely(!txq->q_started))
-		return nb_tx;
-
-	txr = txq->desc_ring;
-	sw_ring = txq->sw_ring;
-	tx_id = txq->tx_tail;
-	sw_id = txq->sw_tail;
-	txe = &sw_ring[sw_id];
-
-	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
-		tx_pkt = tx_pkts[nb_tx];
-
-		if (txq->nb_free <= txq->free_thresh) {
-			/* TODO: Need to refine
-			 * 1. free and clean: Better to decide a clean destination instead of
-			 * loop times. And don't free mbuf when RS got immediately, free when
-			 * transmit or according to the clean destination.
-			 * Now, just ignore the RE write back, free mbuf when get RS
-			 * 2. out-of-order rewrite back haven't be supported, SW head and HW head
-			 * need to be separated.
-			 **/
-			nb_to_clean = 2 * txq->rs_thresh;
-			while (nb_to_clean--)
-				idpf_split_tx_free(txq->complq);
-		}
-
-		if (txq->nb_free < tx_pkt->nb_segs)
-			break;
-
-		ol_flags = tx_pkt->ol_flags;
-		tx_offload.l2_len = tx_pkt->l2_len;
-		tx_offload.l3_len = tx_pkt->l3_len;
-		tx_offload.l4_len = tx_pkt->l4_len;
-		tx_offload.tso_segsz = tx_pkt->tso_segsz;
-		/* Calculate the number of context descriptors needed. */
-		nb_ctx = idpf_calc_context_desc(ol_flags);
-		nb_used = tx_pkt->nb_segs + nb_ctx;
-
-		/* context descriptor */
-		if (nb_ctx != 0) {
-			volatile union idpf_flex_tx_ctx_desc *ctx_desc =
-			(volatile union idpf_flex_tx_ctx_desc *)&txr[tx_id];
-
-			if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
-				idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
-							ctx_desc);
-
-			tx_id++;
-			if (tx_id == txq->nb_tx_desc)
-				tx_id = 0;
-		}
-
-		do {
-			txd = &txr[tx_id];
-			txn = &sw_ring[txe->next_id];
-			txe->mbuf = tx_pkt;
-
-			/* Setup TX descriptor */
-			txd->buf_addr =
-				rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt));
-			txd->qw1.cmd_dtype =
-				rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE);
-			txd->qw1.rxr_bufsize = tx_pkt->data_len;
-			txd->qw1.compl_tag = sw_id;
-			tx_id++;
-			if (tx_id == txq->nb_tx_desc)
-				tx_id = 0;
-			sw_id = txe->next_id;
-			txe = txn;
-			tx_pkt = tx_pkt->next;
-		} while (tx_pkt);
-
-		/* fill the last descriptor with End of Packet (EOP) bit */
-		txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_EOP;
-
-		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
-			txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN;
-		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
-		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
-
-		if (txq->nb_used >= 32) {
-			txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_RE;
-			/* Update txq RE bit counters */
-			txq->nb_used = 0;
-		}
-	}
-
-	/* update the tail pointer if any packets were processed */
-	if (likely(nb_tx > 0)) {
-		IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id);
-		txq->tx_tail = tx_id;
-		txq->sw_tail = sw_id;
-	}
-
-	return nb_tx;
-}
-
-#define IDPF_RX_FLEX_DESC_STATUS0_XSUM_S				\
-	(RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |		\
-	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |		\
-	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |	\
-	 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S))
-
-/* Translate the rx descriptor status and error fields to pkt flags */
-static inline uint64_t
-idpf_rxd_to_pkt_flags(uint16_t status_error)
-{
-	uint64_t flags = 0;
-
-	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_S)) == 0))
-		return flags;
-
-	if (likely((status_error & IDPF_RX_FLEX_DESC_STATUS0_XSUM_S) == 0)) {
-		flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD |
-			  RTE_MBUF_F_RX_L4_CKSUM_GOOD);
-		return flags;
-	}
-
-	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)) != 0))
-		flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
-	else
-		flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
-
-	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) != 0))
-		flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
-	else
-		flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
-
-	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)) != 0))
-		flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
-
-	if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) != 0))
-		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD;
-	else
-		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD;
-
-	return flags;
-}
-
-static inline void
-idpf_update_rx_tail(struct idpf_rx_queue *rxq, uint16_t nb_hold,
-		    uint16_t rx_id)
-{
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG,
-			   "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u",
-			   rxq->port_id, rxq->queue_id, rx_id, nb_hold);
-		rx_id = (uint16_t)((rx_id == 0) ?
-				   (rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IDPF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
-	}
-	rxq->nb_rx_hold = nb_hold;
-}
-
-uint16_t
-idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-		       uint16_t nb_pkts)
-{
-	volatile union virtchnl2_rx_desc *rx_ring;
-	volatile union virtchnl2_rx_desc *rxdp;
-	union virtchnl2_rx_desc rxd;
-	struct idpf_rx_queue *rxq;
-	const uint32_t *ptype_tbl;
-	uint16_t rx_id, nb_hold;
-	struct rte_eth_dev *dev;
-	struct idpf_adapter_ext *ad;
-	uint16_t rx_packet_len;
-	struct rte_mbuf *rxm;
-	struct rte_mbuf *nmb;
-	uint16_t rx_status0;
-	uint64_t pkt_flags;
-	uint64_t dma_addr;
-	uint64_t ts_ns;
-	uint16_t nb_rx;
-
-	nb_rx = 0;
-	nb_hold = 0;
-	rxq = rx_queue;
-
-	ad = IDPF_ADAPTER_TO_EXT(rxq->adapter);
-
-	if (unlikely(rxq == NULL) || unlikely(!rxq->q_started))
-		return nb_rx;
-
-	rx_id = rxq->rx_tail;
-	rx_ring = rxq->rx_ring;
-	ptype_tbl = rxq->adapter->ptype_tbl;
-
-	if ((rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0)
-		rxq->hw_register_set = 1;
-
-	while (nb_rx < nb_pkts) {
-		rxdp = &rx_ring[rx_id];
-		rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0);
-
-		/* Check the DD bit first */
-		if ((rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_S)) == 0)
-			break;
-
-		nmb = rte_mbuf_raw_alloc(rxq->mp);
-		if (unlikely(nmb == NULL)) {
-			dev = &rte_eth_devices[rxq->port_id];
-			dev->data->rx_mbuf_alloc_failed++;
-			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
-				   "queue_id=%u", rxq->port_id, rxq->queue_id);
-			break;
-		}
-		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
-
-		nb_hold++;
-		rxm = rxq->sw_ring[rx_id];
-		rxq->sw_ring[rx_id] = nmb;
-		rx_id++;
-		if (unlikely(rx_id == rxq->nb_rx_desc))
-			rx_id = 0;
-
-		/* Prefetch next mbuf */
-		rte_prefetch0(rxq->sw_ring[rx_id]);
-
-		/* When next RX descriptor is on a cache line boundary,
-		 * prefetch the next 4 RX descriptors and next 8 pointers
-		 * to mbufs.
-		 */
-		if ((rx_id & 0x3) == 0) {
-			rte_prefetch0(&rx_ring[rx_id]);
-			rte_prefetch0(rxq->sw_ring[rx_id]);
-		}
-		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
-		rxdp->read.hdr_addr = 0;
-		rxdp->read.pkt_addr = dma_addr;
-
-		rx_packet_len = (rte_cpu_to_le_16(rxd.flex_nic_wb.pkt_len) &
-				 VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M);
-
-		rxm->data_off = RTE_PKTMBUF_HEADROOM;
-		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
-		rxm->nb_segs = 1;
-		rxm->next = NULL;
-		rxm->pkt_len = rx_packet_len;
-		rxm->data_len = rx_packet_len;
-		rxm->port = rxq->port_id;
-		rxm->ol_flags = 0;
-		pkt_flags = idpf_rxd_to_pkt_flags(rx_status0);
-		rxm->packet_type =
-			ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) &
-					    VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)];
-
-		rxm->ol_flags |= pkt_flags;
-
-		if (idpf_timestamp_dynflag > 0 &&
-		   (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0) {
-			/* timestamp */
-			ts_ns = idpf_tstamp_convert_32b_64b(ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.flex_nic_wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
-			*RTE_MBUF_DYNFIELD(rxm,
-					   idpf_timestamp_dynfield_offset,
-					   rte_mbuf_timestamp_t *) = ts_ns;
-			rxm->ol_flags |= idpf_timestamp_dynflag;
-		}
-
-		rx_pkts[nb_rx++] = rxm;
-	}
-	rxq->rx_tail = rx_id;
-
-	idpf_update_rx_tail(rxq, nb_hold, rx_id);
-
-	return nb_rx;
-}
-
-static inline int
-idpf_xmit_cleanup(struct idpf_tx_queue *txq)
-{
-	uint16_t last_desc_cleaned = txq->last_desc_cleaned;
-	struct idpf_tx_entry *sw_ring = txq->sw_ring;
-	uint16_t nb_tx_desc = txq->nb_tx_desc;
-	uint16_t desc_to_clean_to;
-	uint16_t nb_tx_to_clean;
-	uint16_t i;
-
-	volatile struct idpf_flex_tx_desc *txd = txq->tx_ring;
-
-	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
-	if (desc_to_clean_to >= nb_tx_desc)
-		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
-
-	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
-	/* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */
-	if ((txd[desc_to_clean_to].qw1.cmd_dtype &
-			rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) !=
-			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) {
-		PMD_TX_LOG(DEBUG, "TX descriptor %4u is not done "
-			   "(port=%d queue=%d)", desc_to_clean_to,
-			   txq->port_id, txq->queue_id);
-		return -1;
-	}
-
-	if (last_desc_cleaned > desc_to_clean_to)
-		nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
-					    desc_to_clean_to);
-	else
-		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
-					last_desc_cleaned);
-
-	txd[desc_to_clean_to].qw1.cmd_dtype = 0;
-	txd[desc_to_clean_to].qw1.buf_size = 0;
-	for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++)
-		txd[desc_to_clean_to].qw1.flex.raw[i] = 0;
-
-	txq->last_desc_cleaned = desc_to_clean_to;
-	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
-
-	return 0;
-}
-
-/* TX function */
-uint16_t
-idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
-		       uint16_t nb_pkts)
-{
-	volatile struct idpf_flex_tx_desc *txd;
-	volatile struct idpf_flex_tx_desc *txr;
-	union idpf_tx_offload tx_offload = {0};
-	struct idpf_tx_entry *txe, *txn;
-	struct idpf_tx_entry *sw_ring;
-	struct idpf_tx_queue *txq;
-	struct rte_mbuf *tx_pkt;
-	struct rte_mbuf *m_seg;
-	uint64_t buf_dma_addr;
-	uint64_t ol_flags;
-	uint16_t tx_last;
-	uint16_t nb_used;
-	uint16_t nb_ctx;
-	uint16_t td_cmd;
-	uint16_t tx_id;
-	uint16_t nb_tx;
-	uint16_t slen;
-
-	nb_tx = 0;
-	txq = tx_queue;
-
-	if (unlikely(txq == NULL) || unlikely(!txq->q_started))
-		return nb_tx;
-
-	sw_ring = txq->sw_ring;
-	txr = txq->tx_ring;
-	tx_id = txq->tx_tail;
-	txe = &sw_ring[tx_id];
-
-	/* Check if the descriptor ring needs to be cleaned. */
-	if (txq->nb_free < txq->free_thresh)
-		(void)idpf_xmit_cleanup(txq);
-
-	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
-		td_cmd = 0;
-
-		tx_pkt = *tx_pkts++;
-		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
-
-		ol_flags = tx_pkt->ol_flags;
-		tx_offload.l2_len = tx_pkt->l2_len;
-		tx_offload.l3_len = tx_pkt->l3_len;
-		tx_offload.l4_len = tx_pkt->l4_len;
-		tx_offload.tso_segsz = tx_pkt->tso_segsz;
-		/* Calculate the number of context descriptors needed. */
-		nb_ctx = idpf_calc_context_desc(ol_flags);
-
-		/* The number of descriptors that must be allocated for
-		 * a packet equals to the number of the segments of that
-		 * packet plus 1 context descriptor if needed.
-		 */
-		nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
-		tx_last = (uint16_t)(tx_id + nb_used - 1);
-
-		/* Circular ring */
-		if (tx_last >= txq->nb_tx_desc)
-			tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
-
-		PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u"
-			   " tx_first=%u tx_last=%u",
-			   txq->port_id, txq->queue_id, tx_id, tx_last);
-
-		if (nb_used > txq->nb_free) {
-			if (idpf_xmit_cleanup(txq) != 0) {
-				if (nb_tx == 0)
-					return 0;
-				goto end_of_tx;
-			}
-			if (unlikely(nb_used > txq->rs_thresh)) {
-				while (nb_used > txq->nb_free) {
-					if (idpf_xmit_cleanup(txq) != 0) {
-						if (nb_tx == 0)
-							return 0;
-						goto end_of_tx;
-					}
-				}
-			}
-		}
-
-		if (nb_ctx != 0) {
-			/* Setup TX context descriptor if required */
-			volatile union idpf_flex_tx_ctx_desc *ctx_txd =
-				(volatile union idpf_flex_tx_ctx_desc *)
-							&txr[tx_id];
-
-			txn = &sw_ring[txe->next_id];
-			RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
-			if (txe->mbuf != NULL) {
-				rte_pktmbuf_free_seg(txe->mbuf);
-				txe->mbuf = NULL;
-			}
-
-			/* TSO enabled */
-			if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0)
-				idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
-							ctx_txd);
-
-			txe->last_id = tx_last;
-			tx_id = txe->next_id;
-			txe = txn;
-		}
-
-		m_seg = tx_pkt;
-		do {
-			txd = &txr[tx_id];
-			txn = &sw_ring[txe->next_id];
-
-			if (txe->mbuf != NULL)
-				rte_pktmbuf_free_seg(txe->mbuf);
-			txe->mbuf = m_seg;
-
-			/* Setup TX Descriptor */
-			slen = m_seg->data_len;
-			buf_dma_addr = rte_mbuf_data_iova(m_seg);
-			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
-			txd->qw1.buf_size = slen;
-			txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA <<
-							      IDPF_FLEX_TXD_QW1_DTYPE_S);
-
-			txe->last_id = tx_last;
-			tx_id = txe->next_id;
-			txe = txn;
-			m_seg = m_seg->next;
-		} while (m_seg);
-
-		/* The last packet data descriptor needs End Of Packet (EOP) */
-		td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP;
-		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
-		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
-
-		if (txq->nb_used >= txq->rs_thresh) {
-			PMD_TX_LOG(DEBUG, "Setting RS bit on TXD id="
-				   "%4u (port=%d queue=%d)",
-				   tx_last, txq->port_id, txq->queue_id);
-
-			td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS;
-
-			/* Update txq RS bit counters */
-			txq->nb_used = 0;
-		}
-
-		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
-			td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
-
-		txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S);
-	}
-
-end_of_tx:
-	rte_wmb();
-
-	PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
-		   txq->port_id, txq->queue_id, tx_id, nb_tx);
-
-	IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id);
-	txq->tx_tail = tx_id;
-
-	return nb_tx;
-}
-
-/* TX prep functions */
-uint16_t
-idpf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
-	       uint16_t nb_pkts)
-{
-#ifdef RTE_LIBRTE_ETHDEV_DEBUG
-	int ret;
-#endif
-	int i;
-	uint64_t ol_flags;
-	struct rte_mbuf *m;
-
-	for (i = 0; i < nb_pkts; i++) {
-		m = tx_pkts[i];
-		ol_flags = m->ol_flags;
-
-		/* Check condition for nb_segs > IDPF_TX_MAX_MTU_SEG. */
-		if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0) {
-			if (m->nb_segs > IDPF_TX_MAX_MTU_SEG) {
-				rte_errno = EINVAL;
-				return i;
-			}
-		} else if ((m->tso_segsz < IDPF_MIN_TSO_MSS) ||
-			   (m->tso_segsz > IDPF_MAX_TSO_MSS) ||
-			   (m->pkt_len > IDPF_MAX_TSO_FRAME_SIZE)) {
-			/* MSS outside the range are considered malicious */
-			rte_errno = EINVAL;
-			return i;
-		}
-
-		if ((ol_flags & IDPF_TX_OFFLOAD_NOTSUP_MASK) != 0) {
-			rte_errno = ENOTSUP;
-			return i;
-		}
-
-		if (m->pkt_len < IDPF_MIN_FRAME_SIZE) {
-			rte_errno = EINVAL;
-			return i;
-		}
-
-#ifdef RTE_LIBRTE_ETHDEV_DEBUG
-		ret = rte_validate_tx_offload(m);
-		if (ret != 0) {
-			rte_errno = -ret;
-			return i;
-		}
-#endif
-	}
-
-	return i;
-}
-
 static void __rte_cold
 release_rxq_mbufs_vec(struct idpf_rx_queue *rxq)
 {
diff --git a/drivers/net/idpf/idpf_rxtx.h b/drivers/net/idpf/idpf_rxtx.h
index 4efbf10295..eab363c3e7 100644
--- a/drivers/net/idpf/idpf_rxtx.h
+++ b/drivers/net/idpf/idpf_rxtx.h
@@ -8,41 +8,6 @@ 
 #include <idpf_common_rxtx.h>
 #include "idpf_ethdev.h"
 
-/* MTS */
-#define GLTSYN_CMD_SYNC_0_0	(PF_TIMESYNC_BASE + 0x0)
-#define PF_GLTSYN_SHTIME_0_0	(PF_TIMESYNC_BASE + 0x4)
-#define PF_GLTSYN_SHTIME_L_0	(PF_TIMESYNC_BASE + 0x8)
-#define PF_GLTSYN_SHTIME_H_0	(PF_TIMESYNC_BASE + 0xC)
-#define GLTSYN_ART_L_0		(PF_TIMESYNC_BASE + 0x10)
-#define GLTSYN_ART_H_0		(PF_TIMESYNC_BASE + 0x14)
-#define PF_GLTSYN_SHTIME_0_1	(PF_TIMESYNC_BASE + 0x24)
-#define PF_GLTSYN_SHTIME_L_1	(PF_TIMESYNC_BASE + 0x28)
-#define PF_GLTSYN_SHTIME_H_1	(PF_TIMESYNC_BASE + 0x2C)
-#define PF_GLTSYN_SHTIME_0_2	(PF_TIMESYNC_BASE + 0x44)
-#define PF_GLTSYN_SHTIME_L_2	(PF_TIMESYNC_BASE + 0x48)
-#define PF_GLTSYN_SHTIME_H_2	(PF_TIMESYNC_BASE + 0x4C)
-#define PF_GLTSYN_SHTIME_0_3	(PF_TIMESYNC_BASE + 0x64)
-#define PF_GLTSYN_SHTIME_L_3	(PF_TIMESYNC_BASE + 0x68)
-#define PF_GLTSYN_SHTIME_H_3	(PF_TIMESYNC_BASE + 0x6C)
-
-#define PF_TIMESYNC_BAR4_BASE	0x0E400000
-#define GLTSYN_ENA		(PF_TIMESYNC_BAR4_BASE + 0x90)
-#define GLTSYN_CMD		(PF_TIMESYNC_BAR4_BASE + 0x94)
-#define GLTSYC_TIME_L		(PF_TIMESYNC_BAR4_BASE + 0x104)
-#define GLTSYC_TIME_H		(PF_TIMESYNC_BAR4_BASE + 0x108)
-
-#define GLTSYN_CMD_SYNC_0_4	(PF_TIMESYNC_BAR4_BASE + 0x110)
-#define PF_GLTSYN_SHTIME_L_4	(PF_TIMESYNC_BAR4_BASE + 0x118)
-#define PF_GLTSYN_SHTIME_H_4	(PF_TIMESYNC_BAR4_BASE + 0x11C)
-#define GLTSYN_INCVAL_L		(PF_TIMESYNC_BAR4_BASE + 0x150)
-#define GLTSYN_INCVAL_H		(PF_TIMESYNC_BAR4_BASE + 0x154)
-#define GLTSYN_SHADJ_L		(PF_TIMESYNC_BAR4_BASE + 0x158)
-#define GLTSYN_SHADJ_H		(PF_TIMESYNC_BAR4_BASE + 0x15C)
-
-#define GLTSYN_CMD_SYNC_0_5	(PF_TIMESYNC_BAR4_BASE + 0x130)
-#define PF_GLTSYN_SHTIME_L_5	(PF_TIMESYNC_BAR4_BASE + 0x138)
-#define PF_GLTSYN_SHTIME_H_5	(PF_TIMESYNC_BAR4_BASE + 0x13C)
-
 /* In QLEN must be whole number of 32 descriptors. */
 #define IDPF_ALIGN_RING_DESC	32
 #define IDPF_MIN_RING_DESC	32
@@ -62,44 +27,10 @@ 
 #define IDPF_DEFAULT_TX_RS_THRESH	32
 #define IDPF_DEFAULT_TX_FREE_THRESH	32
 
-#define IDPF_TX_MAX_MTU_SEG	10
-
-#define IDPF_MIN_TSO_MSS	88
-#define IDPF_MAX_TSO_MSS	9728
-#define IDPF_MAX_TSO_FRAME_SIZE	262143
-#define IDPF_TX_MAX_MTU_SEG     10
-
-#define IDPF_TX_CKSUM_OFFLOAD_MASK (		\
-		RTE_MBUF_F_TX_IP_CKSUM |	\
-		RTE_MBUF_F_TX_L4_MASK |		\
-		RTE_MBUF_F_TX_TCP_SEG)
-
-#define IDPF_TX_OFFLOAD_MASK (			\
-		IDPF_TX_CKSUM_OFFLOAD_MASK |	\
-		RTE_MBUF_F_TX_IPV4 |		\
-		RTE_MBUF_F_TX_IPV6)
-
-#define IDPF_TX_OFFLOAD_NOTSUP_MASK \
-		(RTE_MBUF_F_TX_OFFLOAD_MASK ^ IDPF_TX_OFFLOAD_MASK)
-
-extern uint64_t idpf_timestamp_dynflag;
-
 struct idpf_tx_vec_entry {
 	struct rte_mbuf *mbuf;
 };
 
-/* Offload features */
-union idpf_tx_offload {
-	uint64_t data;
-	struct {
-		uint64_t l2_len:7; /* L2 (MAC) Header Length. */
-		uint64_t l3_len:9; /* L3 (IP) Header Length. */
-		uint64_t l4_len:8; /* L4 Header Length. */
-		uint64_t tso_segsz:16; /* TCP TSO segment size */
-		/* uint64_t unused : 24; */
-	};
-};
-
 int idpf_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 			uint16_t nb_desc, unsigned int socket_id,
 			const struct rte_eth_rxconf *rx_conf,
@@ -118,77 +49,14 @@  int idpf_tx_queue_init(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 int idpf_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 int idpf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 void idpf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
-uint16_t idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-				uint16_t nb_pkts);
 uint16_t idpf_singleq_recv_pkts_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,
 				       uint16_t nb_pkts);
-uint16_t idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-			       uint16_t nb_pkts);
-uint16_t idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
-				uint16_t nb_pkts);
 uint16_t idpf_singleq_xmit_pkts_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 				       uint16_t nb_pkts);
-uint16_t idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
-			       uint16_t nb_pkts);
-uint16_t idpf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
-			uint16_t nb_pkts);
 
 void idpf_stop_queues(struct rte_eth_dev *dev);
 
 void idpf_set_rx_function(struct rte_eth_dev *dev);
 void idpf_set_tx_function(struct rte_eth_dev *dev);
 
-#define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND  10000
-/* Helper function to convert a 32b nanoseconds timestamp to 64b. */
-static inline uint64_t
-
-idpf_tstamp_convert_32b_64b(struct idpf_adapter_ext *ad, uint32_t flag,
-			    uint32_t in_timestamp)
-{
-#ifdef RTE_ARCH_X86_64
-	struct idpf_hw *hw = &ad->base.hw;
-	const uint64_t mask = 0xFFFFFFFF;
-	uint32_t hi, lo, lo2, delta;
-	uint64_t ns;
-
-	if (flag != 0) {
-		IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
-		IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M |
-			       PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
-		lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
-		hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
-		/*
-		 * On typical system, the delta between lo and lo2 is ~1000ns,
-		 * so 10000 seems a large-enough but not overly-big guard band.
-		 */
-		if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND))
-			lo2 = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
-		else
-			lo2 = lo;
-
-		if (lo2 < lo) {
-			lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
-			hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
-		}
-
-		ad->time_hw = ((uint64_t)hi << 32) | lo;
-	}
-
-	delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
-	if (delta > (mask / 2)) {
-		delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
-		ns = ad->time_hw - delta;
-	} else {
-		ns = ad->time_hw + delta;
-	}
-
-	return ns;
-#else /* !RTE_ARCH_X86_64 */
-	RTE_SET_USED(ad);
-	RTE_SET_USED(flag);
-	RTE_SET_USED(in_timestamp);
-	return 0;
-#endif /* RTE_ARCH_X86_64 */
-}
-
 #endif /* _IDPF_RXTX_H_ */
diff --git a/drivers/net/idpf/idpf_rxtx_vec_avx512.c b/drivers/net/idpf/idpf_rxtx_vec_avx512.c
index 71a6c59823..19047d23c1 100644
--- a/drivers/net/idpf/idpf_rxtx_vec_avx512.c
+++ b/drivers/net/idpf/idpf_rxtx_vec_avx512.c
@@ -38,8 +38,7 @@  idpf_singleq_rearm_common(struct idpf_rx_queue *rxq)
 						dma_addr0);
 			}
 		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			IDPF_RXQ_REARM_THRESH;
+		rxq->rx_stats.mbuf_alloc_failed += IDPF_RXQ_REARM_THRESH;
 		return;
 	}
 	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
@@ -168,8 +167,7 @@  idpf_singleq_rearm(struct idpf_rx_queue *rxq)
 							 dma_addr0);
 				}
 			}
-			rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-					IDPF_RXQ_REARM_THRESH;
+			rxq->rx_stats.mbuf_alloc_failed += IDPF_RXQ_REARM_THRESH;
 			return;
 		}
 	}