[RFC,v2,2/3] net/idpf: add support for hairpin queue

Message ID 20220926084248.1421987-3-junfeng.guo@intel.com (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers
Series enable hairpin queue |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Guo, Junfeng Sept. 26, 2022, 8:42 a.m. UTC
  - Implement hairpin queue setup/confige/enable/disable.
- Cross-vport hairpin queue implemented via hairpin_bind/unbind API.

Test step:
1. Make sure no bug on CP side.
2. Add rule on IMC.
   - devmem 0x202920C100 64 0x804
   - opcode=0x1303 prof_id=0x34 sub_prof_id=0x0 cookie=0xa2b87 key=0x18,\
     0x0,00,00,00,00,de,0xad,0xbe,0xef,0x20,0x24,0x0,0x0,0x0,0x0,00,00,\
     00,00,00,00,0xa,0x2,0x1d,0x64,00,00,00,00,00,00,00,00,00,00,00,00,\
     0xa,0x2,0x1d,0x2,00,00,00,00,00,00,00,00,00,00,00,00 act=set_vsi{\
     act_val=0 val_type=2 dst_pe=0 slot=0x0} act=set_q{\
     qnum=0x142 no_implicit_vsi=1 prec=5}
3. Send packets on ixia side
   UDP packets with dmac=de:ad:be:ef:20:24 sip=10.2.29.100
   dip=10.2.29.2

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Signed-off-by: Beilei Xing <beilei.xing@intel.com>
Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
---
 drivers/net/idpf/base/virtchnl2.h |   6 +
 drivers/net/idpf/idpf_ethdev.c    | 181 ++++++++++-
 drivers/net/idpf/idpf_ethdev.h    |   5 +
 drivers/net/idpf/idpf_rxtx.c      | 521 ++++++++++++++++++++++++++++--
 drivers/net/idpf/idpf_rxtx.h      |  27 ++
 drivers/net/idpf/idpf_vchnl.c     | 211 +++++++++---
 6 files changed, 871 insertions(+), 80 deletions(-)
  

Patch

diff --git a/drivers/net/idpf/base/virtchnl2.h b/drivers/net/idpf/base/virtchnl2.h
index 566afe075f..9fdcccd2ae 100644
--- a/drivers/net/idpf/base/virtchnl2.h
+++ b/drivers/net/idpf/base/virtchnl2.h
@@ -273,6 +273,12 @@ 
 #define VIRTCHNL2_QUEUE_TYPE_RX_BUFFER		3
 #define VIRTCHNL2_QUEUE_TYPE_CONFIG_TX		4
 #define VIRTCHNL2_QUEUE_TYPE_CONFIG_RX		5
+#define VIRTCHNL2_QUEUE_TYPE_P2P_TX		6
+#define VIRTCHNL2_QUEUE_TYPE_P2P_RX		7
+#define VIRTCHNL2_QUEUE_TYPE_P2P_TX_COMPLETION	8
+#define VIRTCHNL2_QUEUE_TYPE_P2P_RX_BUFFER	9
+#define VIRTCHNL2_QUEUE_TYPE_MBX_TX		10
+#define VIRTCHNL2_QUEUE_TYPE_MBX_RX		11
 
 /* VIRTCHNL2_ITR_IDX
  * Virtchannel interrupt throttling rate index
diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c
index 08ada728b1..41e6391d8b 100644
--- a/drivers/net/idpf/idpf_ethdev.c
+++ b/drivers/net/idpf/idpf_ethdev.c
@@ -38,6 +38,13 @@  static int idpf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
 static int idpf_dev_stats_get(struct rte_eth_dev *dev,
 			struct rte_eth_stats *stats);
 static int idpf_dev_stats_reset(struct rte_eth_dev *dev);
+static int idpf_hairpin_cap_get(struct rte_eth_dev *dev,
+				struct rte_eth_hairpin_cap *cap);
+static int
+idpf_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
+		size_t len, uint32_t tx);
+static int
+idpf_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port);
 
 int
 idpf_dev_link_update(struct rte_eth_dev *dev,
@@ -78,8 +85,25 @@  static const struct eth_dev_ops idpf_eth_dev_ops = {
 	.mtu_set			= idpf_dev_mtu_set,
 	.stats_get			= idpf_dev_stats_get,
 	.stats_reset			= idpf_dev_stats_reset,
+	.hairpin_cap_get		= idpf_hairpin_cap_get,
+	.rx_hairpin_queue_setup		= idpf_rx_hairpin_queue_setup,
+	.tx_hairpin_queue_setup		= idpf_tx_hairpin_queue_setup,
+	.hairpin_get_peer_ports		= idpf_hairpin_get_peer_ports,
+	.hairpin_bind			= idpf_hairpin_bind,
 };
 
+static int
+idpf_hairpin_cap_get(__rte_unused struct rte_eth_dev *dev,
+		     struct rte_eth_hairpin_cap *cap)
+{
+	cap->max_nb_queues = 1;
+	cap->max_rx_2_tx = 1;
+	cap->max_tx_2_rx = 1;
+	cap->max_nb_desc = 1024;
+
+	return 0;
+}
+
 static int
 idpf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
@@ -221,7 +245,6 @@  idpf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	return ret;
 }
 
-
 static int
 idpf_dev_stats_reset(struct rte_eth_dev *dev)
 {
@@ -609,14 +632,26 @@  idpf_start_queues(struct rte_eth_dev *dev)
 	int err = 0;
 	int i;
 
+	/* For normal data queues, configure, init and enale Txq first, then
+	 * configure, init and enable Qxq.
+	 * For non-cross vport hairpin queues, configure Txq and Rxq first, then init Rxq.
+	 */
 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
 		txq = dev->data->tx_queues[i];
 		if (!txq || txq->tx_deferred_start)
 			continue;
-		err = idpf_tx_queue_start(dev, i);
-		if (err) {
-			PMD_DRV_LOG(ERR, "Fail to start Tx queue %u", i);
-			return err;
+		if (!txq->hairpin_q) {
+			err = idpf_tx_queue_start(dev, i);
+			if (err) {
+				PMD_DRV_LOG(ERR, "Fail to start Tx queue %u", i);
+				return err;
+			}
+		} else if (!txq->hairpin_cv) {
+			err = idpf_config_txq(vport, i);
+			if (err) {
+				PMD_DRV_LOG(ERR, "Fail to configure hairpin Tx queue %u", i);
+				return err;
+			}
 		}
 	}
 
@@ -624,10 +659,48 @@  idpf_start_queues(struct rte_eth_dev *dev)
 		rxq = dev->data->rx_queues[i];
 		if (!rxq || rxq->rx_deferred_start)
 			continue;
-		err = idpf_rx_queue_start(dev, i);
-		if (err) {
-			PMD_DRV_LOG(ERR, "Fail to start Rx queue %u", i);
-			return err;
+		if (!rxq->hairpin_q) {
+			err = idpf_rx_queue_start(dev, i);
+			if (err) {
+				PMD_DRV_LOG(ERR, "Fail to start Rx queue %u", i);
+				return err;
+			}
+		} else if (!rxq->hairpin_cv) {
+			err = idpf_config_rxq(vport, i);
+			if (err) {
+				PMD_DRV_LOG(ERR, "Fail to configure hairpin Rx queue %u", i);
+				return err;
+			}
+			err = idpf_rx_queue_init(dev, i);
+			if (err) {
+				PMD_DRV_LOG(ERR, "Fail to init hairpin Rx queue %u", i);
+				return err;
+			}
+		}
+	}
+
+	/* For non-cross vport hairpin queues, enable Txq and Rxq at last. */
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		txq = dev->data->tx_queues[i];
+		if (txq->hairpin_q && !txq->hairpin_cv) {
+			err = idpf_switch_hairpin_queue(vport, i, false, true);
+			if (err)
+				PMD_DRV_LOG(ERR, "Failed to switch hairpin TX queue %u on",
+					    i);
+			else
+				txq->q_started = true;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		rxq = dev->data->rx_queues[i];
+		if (rxq->hairpin_q && !rxq->hairpin_cv) {
+			err = idpf_switch_hairpin_queue(vport, i, true, true);
+			if (err)
+				PMD_DRV_LOG(ERR, "Failed to switch hairpin RX queue %u on",
+					    i);
+		else
+			rxq->q_started = true;
 		}
 	}
 
@@ -696,6 +769,90 @@  idpf_dev_start(struct rte_eth_dev *dev)
 	return -1;
 }
 
+static int
+idpf_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
+		__rte_unused size_t len, uint32_t tx)
+{
+	/* Assume the last queue is used by app as hairpin */
+	int qid = dev->data->nb_tx_queues - 1;
+	struct idpf_tx_queue *txq = dev->data->tx_queues[qid];
+	struct idpf_rx_queue *rxq = dev->data->rx_queues[qid];
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (tx && txq->hairpin_cv) {
+		peer_ports[0] = txq->peer_rxp;
+		return 1;
+	} else if (!tx && rxq->hairpin_cv) {
+		peer_ports[0] = rxq->peer_txp;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int
+idpf_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
+{
+	struct idpf_vport *vport, *peer_vport;
+	/* Assume the last queue is used by app as hairpin */
+	int qid = dev->data->nb_tx_queues - 1;
+	struct rte_eth_dev *peer_dev;
+	int err;
+	struct idpf_tx_queue *txq = dev->data->tx_queues[qid];
+
+	PMD_INIT_FUNC_TRACE();
+	if (rx_port >= RTE_MAX_ETHPORTS)
+		return 0;
+
+	if (txq->bound) {
+		PMD_DRV_LOG(INFO, "port %u already hairpin bound",
+				dev->data->port_id);
+		return 0;
+	}
+
+	vport = (struct idpf_vport *)dev->data->dev_private;
+	err = idpf_config_txq(vport, qid);
+	if (err) {
+		PMD_DRV_LOG(ERR, "Fail to configure hairpin Tx queue %u of port %u",
+			    qid, dev->data->port_id);
+		return err;
+	}
+
+	peer_dev = &rte_eth_devices[rx_port];
+	peer_vport = (struct idpf_vport *)peer_dev->data->dev_private;
+	err = idpf_config_rxq(peer_vport, qid);
+	if (err) {
+		PMD_DRV_LOG(ERR, "Fail to configure hairpin Rx queue %u of port %u",
+			    qid, peer_dev->data->port_id);
+		return err;
+	}
+	err = idpf_rx_queue_init(peer_dev, qid);
+	if (err) {
+		PMD_DRV_LOG(ERR, "Fail to init hairpin Rx queue %u of port %u",
+			    qid, peer_dev->data->port_id);
+		return err;
+	}
+
+	err = idpf_switch_hairpin_queue(vport, qid, false, true);
+	if (err) {
+		PMD_DRV_LOG(ERR, "Fail to enable hairpin Tx queue %u of port %u",
+			    qid, dev->data->port_id);
+		return err;
+	}
+
+	err = idpf_switch_hairpin_queue(peer_vport, qid, true, true);
+	if (err) {
+		PMD_DRV_LOG(ERR, "Fail to enable hairpin Rx queue %u of port %u",
+			    qid, peer_dev->data->port_id);
+		return err;
+	}
+
+
+	txq->bound = true;
+	return 0;
+}
+
 static int
 idpf_dev_stop(struct rte_eth_dev *dev)
 {
@@ -733,6 +890,12 @@  idpf_dev_close(struct rte_eth_dev *dev)
 		return 0;
 
 	idpf_dev_stop(dev);
+
+	if (vport->p2p_mp) {
+		rte_mempool_free(vport->p2p_mp);
+		vport->p2p_mp = NULL;
+	}
+
 	idpf_destroy_vport(vport);
 
 	rte_free(vport->rss_lut);
diff --git a/drivers/net/idpf/idpf_ethdev.h b/drivers/net/idpf/idpf_ethdev.h
index 968e0e3cbf..90b931313b 100644
--- a/drivers/net/idpf/idpf_ethdev.h
+++ b/drivers/net/idpf/idpf_ethdev.h
@@ -135,6 +135,9 @@  struct idpf_vport {
 	/* Chunk info */
 	struct idpf_chunks_info chunks_info;
 
+	/* p2p mbuf pool */
+	struct rte_mempool *p2p_mp;
+
 	/* Event from ipf */
 	bool link_up;
 	uint32_t link_speed;
@@ -256,6 +259,8 @@  int idpf_config_txqs(struct idpf_vport *vport);
 int idpf_config_txq(struct idpf_vport *vport, uint16_t txq_id);
 int idpf_switch_queue(struct idpf_vport *vport, uint16_t qid,
 		      bool rx, bool on);
+int idpf_switch_hairpin_queue(struct idpf_vport *vport, uint16_t qid,
+		      bool rx, bool on);
 int idpf_ena_dis_queues(struct idpf_vport *vport, bool enable);
 int idpf_ena_dis_vport(struct idpf_vport *vport, bool enable);
 int idpf_query_stats(struct idpf_vport *vport,
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index b0037eca08..13e55d7196 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -152,16 +152,25 @@  idpf_rx_queue_release(void *rxq)
 		return;
 
 	/* Split queue */
-	if (q->bufq1 && q->bufq2) {
+	if (q->bufq1) {
+		/* the mz is shared between Tx/Rx hairpin, let Tx_release
+		 * free the buf.
+		 */
+		if (!q->hairpin_q) {
+			rte_memzone_free(q->bufq1->mz);
+			rte_memzone_free(q->bufq2->mz);
+			rte_memzone_free(q->mz);
+		}
+
 		q->bufq1->ops->release_mbufs(q->bufq1);
 		rte_free(q->bufq1->sw_ring);
-		rte_memzone_free(q->bufq1->mz);
 		rte_free(q->bufq1);
-		q->bufq2->ops->release_mbufs(q->bufq2);
-		rte_free(q->bufq2->sw_ring);
-		rte_memzone_free(q->bufq2->mz);
-		rte_free(q->bufq2);
-		rte_memzone_free(q->mz);
+
+		if (q->bufq2) {
+			q->bufq2->ops->release_mbufs(q->bufq2);
+			rte_free(q->bufq2->sw_ring);
+			rte_free(q->bufq2);
+		}
 		rte_free(q);
 		return;
 	}
@@ -244,7 +253,8 @@  reset_split_rx_queue(struct idpf_rx_queue *rxq)
 {
 	reset_split_rx_descq(rxq);
 	reset_split_rx_bufq(rxq->bufq1);
-	reset_split_rx_bufq(rxq->bufq2);
+	if (rxq->bufq2)
+		reset_split_rx_bufq(rxq->bufq2);
 }
 
 static inline void
@@ -390,6 +400,7 @@  idpf_rx_split_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue *bufq,
 	bufq->rx_deferred_start = rx_conf->rx_deferred_start;
 	bufq->rx_hdr_len = 0;
 	bufq->adapter = adapter;
+	bufq->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
 
 	if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
 		bufq->crc_len = RTE_ETHER_CRC_LEN;
@@ -503,6 +514,7 @@  idpf_rx_split_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	rxq->rx_hdr_len = 0;
 	rxq->adapter = adapter;
 	rxq->offloads = offloads;
+	rxq->q_type = VIRTCHNL2_QUEUE_TYPE_RX;
 
 	if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
 		rxq->crc_len = RTE_ETHER_CRC_LEN;
@@ -656,6 +668,7 @@  idpf_rx_single_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	rxq->rx_hdr_len = 0;
 	rxq->adapter = adapter;
 	rxq->offloads = offloads;
+	rxq->q_type = VIRTCHNL2_QUEUE_TYPE_RX;
 
 	if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
 		rxq->crc_len = RTE_ETHER_CRC_LEN;
@@ -780,6 +793,7 @@  idpf_tx_split_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	txq->port_id = dev->data->port_id;
 	txq->offloads = offloads;
 	txq->tx_deferred_start = tx_conf->tx_deferred_start;
+	txq->q_type = VIRTCHNL2_QUEUE_TYPE_TX;
 
 	/* Allocate software ring */
 	txq->sw_nb_desc = 2 * nb_desc;
@@ -831,6 +845,7 @@  idpf_tx_split_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	cq->port_id = dev->data->port_id;
 	cq->txqs = dev->data->tx_queues;
 	cq->tx_start_qid = vport->chunks_info.tx_start_qid;
+	cq->q_type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
 
 	ring_size = sizeof(struct iecm_splitq_tx_compl_desc) * cq->nb_tx_desc;
 	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
@@ -912,6 +927,7 @@  idpf_tx_single_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	txq->port_id = dev->data->port_id;
 	txq->offloads = offloads;
 	txq->tx_deferred_start = tx_conf->tx_deferred_start;
+	txq->q_type = VIRTCHNL2_QUEUE_TYPE_TX;
 
 	/* Allocate software ring */
 	txq->sw_ring =
@@ -985,6 +1001,407 @@  idpf_register_ts_mbuf(struct idpf_rx_queue *rxq)
 	return 0;
 }
 
+static inline void
+reset_rx_hairpin_descq(struct idpf_rx_queue *rxq)
+{
+	uint16_t len;
+	uint32_t i;
+
+	if (!rxq)
+		return;
+
+	len = rxq->nb_rx_desc;
+	for (i = 0; i < len * IDPF_P2P_DESC_LEN; i++)
+		((volatile char *)rxq->rx_ring)[i] = 0;
+}
+
+static inline void
+reset_rx_hairpin_bufq(struct idpf_rx_queue *rxbq)
+{
+	uint16_t len;
+	uint32_t i;
+
+	if (!rxbq)
+		return;
+
+	len = rxbq->nb_rx_desc;
+	for (i = 0; i < len * IDPF_P2P_DESC_LEN; i++)
+		((volatile char *)rxbq->rx_ring)[i] = 0;
+
+	rxbq->bufq1 = NULL;
+	rxbq->bufq2 = NULL;
+}
+
+#define IDPF_NB_MBUF		4096
+#define IDPF_CACHE_SIZE		250
+#define IDPF_MBUF_SIZE		2048
+#define IDPF_P2P_RING_BUF	128
+
+static int
+idpf_rx_hairpin_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue *bufq,
+			   uint16_t queue_idx, uint16_t nb_desc,
+			   struct idpf_tx_queue *peer_txq)
+{
+	struct idpf_vport *vport =
+		(struct idpf_vport *)dev->data->dev_private;
+	struct idpf_adapter *adapter = vport->adapter;
+	struct iecm_hw *hw = &adapter->hw;
+	struct rte_mempool *mp;
+	const struct rte_memzone *mz;
+	uint32_t ring_size;
+	char pool_name[RTE_MEMPOOL_NAMESIZE];
+
+	mp = vport->p2p_mp;
+	if (!mp) {
+		snprintf(pool_name, RTE_MEMPOOL_NAMESIZE, "p2p_mb_pool_%u",
+			 dev->data->port_id);
+		mp = rte_pktmbuf_pool_create(pool_name, IDPF_NB_MBUF, IDPF_CACHE_SIZE,
+					     0, IDPF_MBUF_SIZE, dev->device->numa_node);
+		if (!mp) {
+			PMD_INIT_LOG(ERR, "Failed to allocate mbuf pool for p2p");
+			return -ENOMEM;
+		}
+		vport->p2p_mp = mp;
+	}
+
+	bufq->mp = mp;
+	bufq->nb_rx_desc = nb_desc;
+	bufq->queue_id = vport->chunks_info.rx_buf_start_qid + queue_idx;
+	bufq->port_id = dev->data->port_id;
+	bufq->adapter = adapter;
+	bufq->q_type = VIRTCHNL2_QUEUE_TYPE_P2P_RX_BUFFER;
+	bufq->hairpin_q = true;
+	bufq->rx_buf_len = 2048;
+
+	bufq->sw_ring = rte_zmalloc("sw ring",
+				    sizeof(struct rte_mbuf *) * nb_desc,
+				    RTE_CACHE_LINE_SIZE);
+	if (!bufq->sw_ring) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
+		return -ENOMEM;
+	}
+
+	if (peer_txq && peer_txq->complq->mz) {
+		mz = peer_txq->complq->mz;
+		bufq->rx_ring_phys_addr = mz->iova;
+		bufq->rx_ring = mz->addr;
+		bufq->mz = mz;
+	} else {
+		ring_size = RTE_ALIGN(bufq->nb_rx_desc * IDPF_P2P_DESC_LEN,
+				      IDPF_DMA_MEM_ALIGN);
+		mz = rte_eth_dma_zone_reserve(dev, "hairpin_rx_buf_ring", queue_idx,
+					      ring_size + IDPF_P2P_RING_BUF,
+					      IDPF_RING_BASE_ALIGN,
+					      dev->device->numa_node);
+		if (!mz) {
+			PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for hairpin RX buffer queue.");
+			rte_free(bufq->sw_ring);
+			return -ENOMEM;
+		}
+
+		bufq->rx_ring_phys_addr = mz->iova;
+		bufq->rx_ring = mz->addr;
+		bufq->mz = mz;
+	}
+	reset_rx_hairpin_bufq(bufq);
+	bufq->q_set = true;
+	bufq->qrx_tail = hw->hw_addr + (vport->chunks_info.rx_buf_qtail_start +
+			 queue_idx * vport->chunks_info.rx_buf_qtail_spacing);
+	bufq->ops = &def_rxq_ops;
+
+	return 0;
+}
+
+int
+idpf_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+			    uint16_t nb_desc,
+			    const struct rte_eth_hairpin_conf *conf)
+{
+	struct idpf_vport *vport =
+		(struct idpf_vport *)dev->data->dev_private;
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_vport *peer_vport;
+	struct idpf_rx_queue *rxq;
+	struct idpf_rx_queue *bufq1;
+	struct idpf_tx_queue *peer_txq = NULL;
+	const struct rte_memzone *mz;
+	uint32_t ring_size;
+	uint16_t qid;
+	int ret;
+	uint16_t peer_port = conf->peers[0].port;
+	uint16_t peer_q = conf->peers[0].queue;
+
+	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
+		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin queue.");
+		return -EINVAL;
+	}
+
+	if (nb_desc % IDPF_ALIGN_RING_DESC != 0 ||
+	    nb_desc > IDPF_MAX_RING_DESC ||
+	    nb_desc < IDPF_MIN_RING_DESC) {
+		PMD_INIT_LOG(ERR, "Number (%u) of receive descriptors is invalid", nb_desc);
+		return -EINVAL;
+	}
+
+	/* Free memory if needed */
+	if (dev->data->rx_queues[queue_idx]) {
+		idpf_rx_queue_release(dev->data->rx_queues[queue_idx]);
+		dev->data->rx_queues[queue_idx] = NULL;
+	}
+
+	/* Setup Rx description queue */
+	rxq = rte_zmalloc("idpf_rxq_hairpin", sizeof(struct idpf_rx_queue),
+			  RTE_CACHE_LINE_SIZE);
+	if (!rxq) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for rx queue data structure");
+		return -ENOMEM;
+	}
+
+	rxq->nb_rx_desc = nb_desc * 2;
+	rxq->queue_id = vport->chunks_info.rx_start_qid + queue_idx;
+	rxq->port_id = dev->data->port_id;
+	rxq->adapter = adapter;
+	rxq->q_type = VIRTCHNL2_QUEUE_TYPE_P2P_RX;
+	rxq->hairpin_q = true;
+	rxq->rx_buf_len = 2048;
+
+	if (peer_port != dev->data->port_id)
+		rxq->hairpin_cv = true;
+	rxq->peer_txp = peer_port;
+	peer_vport = adapter->vports[peer_port];
+	if (peer_q < peer_vport->dev_data->nb_tx_queues)
+		peer_txq = peer_vport->dev_data->tx_queues[peer_q];
+
+	if (peer_txq && peer_txq->mz) {
+		mz = peer_txq->mz;
+		rxq->rx_ring_phys_addr = mz->iova;
+		rxq->rx_ring = mz->addr;
+		rxq->mz = mz;
+	} else {
+		ring_size = RTE_ALIGN(rxq->nb_rx_desc * IDPF_P2P_DESC_LEN,
+				      IDPF_DMA_MEM_ALIGN);
+		mz = rte_eth_dma_zone_reserve(dev, "hairpin_rx_ring", queue_idx,
+					      ring_size + IDPF_P2P_RING_BUF,
+					      IDPF_RING_BASE_ALIGN,
+					      dev->device->numa_node);
+		if (!mz) {
+			PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for RX");
+			ret = -ENOMEM;
+			goto free_rxq;
+		}
+
+		rxq->rx_ring_phys_addr = mz->iova;
+		rxq->rx_ring = mz->addr;
+		rxq->mz = mz;
+	}
+	reset_rx_hairpin_descq(rxq);
+
+	/* setup 1 Rx buffer queue for 1 hairpin rxq */
+	bufq1 = rte_zmalloc_socket("hairpin rx bufq1",
+				   sizeof(struct idpf_rx_queue),
+				   RTE_CACHE_LINE_SIZE,
+				   SOCKET_ID_ANY);
+	if (!bufq1) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for hairpin Rx buffer queue 1.");
+		ret = -ENOMEM;
+		goto free_mz;
+	}
+	qid = 2 * queue_idx;
+	ret = idpf_rx_hairpin_bufq_setup(dev, bufq1, qid, nb_desc, peer_txq);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Failed to setup hairpin Rx buffer queue 1");
+		ret = -EINVAL;
+		goto free_bufq1;
+	}
+	rxq->bufq1 = bufq1;
+	rxq->bufq2 = NULL;
+
+	rxq->q_set = true;
+	dev->data->rx_queues[queue_idx] = rxq;
+
+	return 0;
+
+free_bufq1:
+	rte_free(bufq1);
+free_mz:
+	rte_memzone_free(mz);
+free_rxq:
+	rte_free(rxq);
+
+	return ret;
+}
+
+static inline void
+reset_tx_hairpin_descq(struct idpf_tx_queue *txq)
+{
+	uint32_t i, size;
+
+	if (!txq) {
+		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
+		return;
+	}
+
+	size = txq->nb_tx_desc * IDPF_P2P_DESC_LEN;
+	for (i = 0; i < size; i++)
+		((volatile char *)txq->desc_ring)[i] = 0;
+}
+
+static inline void
+reset_tx_hairpin_complq(struct idpf_tx_queue *cq)
+{
+	uint32_t i, size;
+
+	if (!cq) {
+		PMD_DRV_LOG(DEBUG, "Pointer to complq is NULL");
+		return;
+	}
+
+	size = cq->nb_tx_desc * IDPF_P2P_DESC_LEN;
+	for (i = 0; i < size; i++)
+		((volatile char *)cq->compl_ring)[i] = 0;
+}
+
+int
+idpf_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+			    uint16_t nb_desc,
+			    const struct rte_eth_hairpin_conf *conf)
+{
+	struct idpf_vport *vport =
+		(struct idpf_vport *)dev->data->dev_private;
+	struct idpf_vport *peer_vport;
+	struct idpf_adapter *adapter = vport->adapter;
+	struct iecm_hw *hw = &adapter->hw;
+	struct idpf_tx_queue *txq, *cq;
+	struct idpf_rx_queue *peer_rxq = NULL;
+	const struct rte_memzone *mz;
+	uint32_t ring_size;
+	uint16_t peer_port = conf->peers[0].port;
+	uint16_t peer_q = conf->peers[0].queue;
+
+	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
+		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin queue.");
+		return -EINVAL;
+	}
+
+	if (nb_desc % IDPF_ALIGN_RING_DESC != 0 ||
+	    nb_desc > IDPF_MAX_RING_DESC ||
+	    nb_desc < IDPF_MIN_RING_DESC) {
+		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is invalid",
+			     nb_desc);
+		return -EINVAL;
+	}
+
+	/* Free memory if needed. */
+	if (dev->data->tx_queues[queue_idx]) {
+		idpf_tx_queue_release(dev->data->tx_queues[queue_idx]);
+		dev->data->tx_queues[queue_idx] = NULL;
+	}
+
+	/* Allocate the TX queue data structure. */
+	txq = rte_zmalloc_socket("idpf hairpin txq",
+				 sizeof(struct idpf_tx_queue),
+				 RTE_CACHE_LINE_SIZE,
+				 SOCKET_ID_ANY);
+	if (!txq) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
+		return -ENOMEM;
+	}
+
+	/* Txq ring length should be 2 times of Tx completion queue size. */
+	txq->nb_tx_desc = nb_desc * 2;
+	txq->queue_id = vport->chunks_info.tx_start_qid + queue_idx;
+	txq->port_id = dev->data->port_id;
+	txq->q_type = VIRTCHNL2_QUEUE_TYPE_P2P_TX;
+	txq->hairpin_q = true;
+
+	if (peer_port != dev->data->port_id)
+		txq->hairpin_cv = true;
+	txq->peer_rxp = peer_port;
+	peer_vport = adapter->vports[peer_port];
+	txq->peer_rxq_id = peer_vport->chunks_info.rx_start_qid + conf->peers[0].queue;
+	if (peer_q < peer_vport->dev_data->nb_rx_queues)
+		peer_rxq = peer_vport->dev_data->rx_queues[peer_q];
+
+	/* Hairpin Rxq and Txq share the same HW ring */
+	if (peer_rxq && peer_rxq->mz) {
+		mz = peer_rxq->mz;
+		txq->tx_ring_phys_addr = mz->iova;
+		txq->desc_ring = mz->addr;
+		txq->mz = mz;
+	} else {
+		ring_size = RTE_ALIGN(txq->nb_tx_desc * IDPF_P2P_DESC_LEN,
+				      IDPF_DMA_MEM_ALIGN);
+		mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_ring", queue_idx,
+					      ring_size + IDPF_P2P_RING_BUF,
+					      IDPF_RING_BASE_ALIGN,
+					      dev->device->numa_node);
+		if (!mz) {
+			PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
+			rte_free(txq->sw_ring);
+			rte_free(txq);
+			return -ENOMEM;
+		}
+
+		txq->tx_ring_phys_addr = mz->iova;
+		txq->desc_ring = mz->addr;
+		txq->mz = mz;
+	}
+
+	reset_tx_hairpin_descq(txq);
+	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
+			queue_idx * vport->chunks_info.tx_qtail_spacing);
+	txq->ops = &def_txq_ops;
+
+	/* Allocate the TX completion queue data structure. */
+	txq->complq = rte_zmalloc_socket("idpf hairpin cq",
+					 sizeof(struct idpf_tx_queue),
+					 RTE_CACHE_LINE_SIZE,
+					 dev->device->numa_node);
+	cq = txq->complq;
+	if (!cq) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
+		return -ENOMEM;
+	}
+
+	cq->nb_tx_desc = nb_desc;
+	cq->queue_id = vport->chunks_info.tx_compl_start_qid + queue_idx;
+	cq->port_id = dev->data->port_id;
+	cq->q_type = VIRTCHNL2_QUEUE_TYPE_P2P_TX_COMPLETION;
+	cq->hairpin_q = true;
+	cq->peer_rxq_id = peer_vport->chunks_info.rx_buf_start_qid + conf->peers[0].queue * 2;
+
+	/* Hairpin Rx buffer queue and Tx completion queue share the same HW ring */
+	if (peer_rxq && peer_rxq->bufq1->mz) {
+		mz = peer_rxq->bufq1->mz;
+		cq->tx_ring_phys_addr = mz->iova;
+		cq->compl_ring = mz->addr;
+		cq->mz = mz;
+	} else {
+		ring_size = RTE_ALIGN(cq->nb_tx_desc * IDPF_P2P_DESC_LEN,
+				      IDPF_DMA_MEM_ALIGN);
+		mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_compl_ring", queue_idx,
+					      ring_size + IDPF_P2P_RING_BUF,
+					      IDPF_RING_BASE_ALIGN,
+					      dev->device->numa_node);
+		if (!mz) {
+			PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX completion queue");
+			rte_free(txq->sw_ring);
+			rte_free(txq);
+			return -ENOMEM;
+		}
+		cq->tx_ring_phys_addr = mz->iova;
+		cq->compl_ring = mz->addr;
+		cq->mz = mz;
+	}
+
+	reset_tx_hairpin_complq(cq);
+
+	txq->q_set = true;
+	dev->data->tx_queues[queue_idx] = txq;
+
+	return 0;
+}
+
 static int
 idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq)
 {
@@ -1023,6 +1440,41 @@  idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq)
 	return 0;
 }
 
+static int
+idpf_alloc_split_p2p_rxq_mbufs(struct idpf_rx_queue *rxq)
+{
+	volatile struct virtchnl2_p2p_rx_buf_desc *rxd;
+	struct rte_mbuf *mbuf = NULL;
+	uint64_t dma_addr;
+	uint16_t i;
+
+	for (i = 0; i < rxq->nb_rx_desc; i++) {
+		mbuf = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!mbuf)) {
+			PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
+			return -ENOMEM;
+		}
+
+		rte_mbuf_refcnt_set(mbuf, 1);
+		mbuf->next = NULL;
+		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
+		mbuf->nb_segs = 1;
+		mbuf->port = rxq->port_id;
+		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
+
+		rxd = &((volatile struct virtchnl2_p2p_rx_buf_desc *)(rxq->rx_ring))[i];
+		rxd->reserve0 = 0;
+		rxd->pkt_addr = dma_addr;
+
+		rxq->sw_ring[i] = mbuf;
+	}
+
+	rxq->nb_rx_hold = 0;
+	rxq->rx_tail = rxq->nb_rx_desc - 8;
+
+	return 0;
+}
+
 static int
 idpf_alloc_split_rxq_mbufs(struct idpf_rx_queue *rxq)
 {
@@ -1102,22 +1554,31 @@  idpf_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 		IECM_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
 	} else {
 		/* Split queue */
-		err = idpf_alloc_split_rxq_mbufs(rxq->bufq1);
+		if (rxq->hairpin_q)
+			err = idpf_alloc_split_p2p_rxq_mbufs(rxq->bufq1);
+		else
+			err = idpf_alloc_split_rxq_mbufs(rxq->bufq1);
 		if (err) {
 			PMD_DRV_LOG(ERR, "Failed to allocate RX buffer queue mbuf");
 			return err;
 		}
+
+		if (!rxq->bufq2)
+			goto doorbell;
+
 		err = idpf_alloc_split_rxq_mbufs(rxq->bufq2);
 		if (err) {
 			PMD_DRV_LOG(ERR, "Failed to allocate RX buffer queue mbuf");
 			return err;
 		}
 
+doorbell:
 		rte_wmb();
 
 		/* Init the RX tail register. */
 		IECM_PCI_REG_WRITE(rxq->bufq1->qrx_tail, rxq->bufq1->rx_tail);
-		IECM_PCI_REG_WRITE(rxq->bufq2->qrx_tail, rxq->bufq2->rx_tail);
+		if (rxq->bufq2)
+			IECM_PCI_REG_WRITE(rxq->bufq2->qrx_tail, rxq->bufq2->rx_tail);
 	}
 
 	return err;
@@ -1225,7 +1686,11 @@  idpf_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	if (rx_queue_id >= dev->data->nb_rx_queues)
 		return -EINVAL;
 
-	err = idpf_switch_queue(vport, rx_queue_id, true, false);
+	rxq = dev->data->rx_queues[rx_queue_id];
+	if (rxq->hairpin_q)
+		err = idpf_switch_hairpin_queue(vport, rx_queue_id, true, false);
+	else
+		err = idpf_switch_queue(vport, rx_queue_id, true, false);
 	if (err) {
 		PMD_DRV_LOG(ERR, "Failed to switch RX queue %u off",
 			    rx_queue_id);
@@ -1238,10 +1703,18 @@  idpf_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 		reset_single_rx_queue(rxq);
 	} else {
 		rxq->bufq1->ops->release_mbufs(rxq->bufq1);
-		rxq->bufq2->ops->release_mbufs(rxq->bufq2);
-		reset_split_rx_queue(rxq);
+		if (rxq->bufq2)
+			rxq->bufq2->ops->release_mbufs(rxq->bufq2);
+
+		if (rxq->hairpin_q) {
+			reset_rx_hairpin_descq(rxq);
+			reset_rx_hairpin_bufq(rxq->bufq1);
+		} else {
+			reset_split_rx_queue(rxq);
+		}
 	}
-	dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
+	if (!rxq->hairpin_q)
+		dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
 	return 0;
 }
@@ -1258,22 +1731,31 @@  idpf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 	if (tx_queue_id >= dev->data->nb_tx_queues)
 		return -EINVAL;
 
-	err = idpf_switch_queue(vport, tx_queue_id, false, false);
+	txq = dev->data->tx_queues[tx_queue_id];
+	if (txq->hairpin_q)
+		err = idpf_switch_hairpin_queue(vport, tx_queue_id, false, false);
+	else
+		err = idpf_switch_queue(vport, tx_queue_id, false, false);
 	if (err) {
 		PMD_DRV_LOG(ERR, "Failed to switch TX queue %u off",
 			    tx_queue_id);
 		return err;
 	}
 
-	txq = dev->data->tx_queues[tx_queue_id];
 	txq->ops->release_mbufs(txq);
 	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
 		reset_single_tx_queue(txq);
 	} else {
-		reset_split_tx_descq(txq);
-		reset_split_tx_complq(txq->complq);
+		if (txq->hairpin_q) {
+			reset_tx_hairpin_descq(txq);
+			reset_tx_hairpin_complq(txq->complq);
+		} else {
+			reset_split_tx_descq(txq);
+			reset_split_tx_complq(txq->complq);
+		}
 	}
-	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
+	if (!txq->hairpin_q)
+		dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
 	return 0;
 }
@@ -1302,6 +1784,7 @@  idpf_stop_queues(struct rte_eth_dev *dev)
 		if (!rxq)
 			continue;
 
+		 /* hairpin queue is also stopped here. */
 		if (idpf_rx_queue_stop(dev, i))
 			PMD_DRV_LOG(WARNING, "Fail to stop Rx queue %d", i);
 	}
diff --git a/drivers/net/idpf/idpf_rxtx.h b/drivers/net/idpf/idpf_rxtx.h
index 6fcb441143..285d333a34 100644
--- a/drivers/net/idpf/idpf_rxtx.h
+++ b/drivers/net/idpf/idpf_rxtx.h
@@ -55,6 +55,7 @@ 
 #define IDPF_MIN_RING_DESC	32
 #define IDPF_MAX_RING_DESC	4096
 #define IDPF_DMA_MEM_ALIGN	4096
+#define IDPF_P2P_DESC_LEN	16
 /* Base address of the HW descriptor ring should be 128B aligned. */
 #define IDPF_RING_BASE_ALIGN	128
 
@@ -103,6 +104,11 @@ 
 
 extern uint64_t idpf_timestamp_dynflag;
 
+struct virtchnl2_p2p_rx_buf_desc {
+	__le64  reserve0;
+	__le64  pkt_addr; /* Packet buffer address */
+};
+
 struct idpf_rx_queue {
 	struct idpf_adapter *adapter;	/* the adapter this queue belongs to */
 	struct rte_mempool *mp;		/* mbuf pool to populate Rx ring */
@@ -138,6 +144,12 @@  struct idpf_rx_queue {
 	uint16_t max_pkt_len;   /* Maximum packet length */
 	uint8_t crc_len;	/* 0 if CRC stripped, 4 otherwise */
 	uint8_t rxdid;
+	uint8_t q_type;
+
+	bool hairpin_q;		/* if rx queue is a hairpin queue */
+	/* only valid if the hairpin queue pair crosses vport */
+	bool hairpin_cv;
+	uint16_t peer_txp;
 
 	bool q_set;		/* if rx queue has been configured */
 	bool q_started;		/* if rx queue has been started */
@@ -186,6 +198,7 @@  struct idpf_tx_queue {
 
 	uint16_t port_id;
 	uint16_t queue_id;
+	uint8_t q_type;
 	uint64_t offloads;
 	uint16_t next_dd;	/* next to set RS, for VPMD */
 	uint16_t next_rs;	/* next to check DD,  for VPMD */
@@ -205,6 +218,14 @@  struct idpf_tx_queue {
 	uint32_t tx_start_qid;
 	uint8_t expected_gen_id;
 	struct idpf_tx_queue *complq;
+
+	/* only valid for hairpin queue */
+	bool hairpin_q;
+	/* only valid if the hairpin queue pair crosses vport */
+	bool hairpin_cv;
+	uint16_t peer_rxq_id;
+	uint16_t peer_rxp;
+	bool bound;
 };
 
 /* Offload features */
@@ -242,6 +263,12 @@  int idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 int idpf_tx_queue_init(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 int idpf_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 int idpf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id);
+int idpf_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+				uint16_t nb_desc,
+				const struct rte_eth_hairpin_conf *conf);
+int idpf_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+				uint16_t nb_desc,
+				const struct rte_eth_hairpin_conf *conf);
 void idpf_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
 
 uint16_t idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/idpf/idpf_vchnl.c b/drivers/net/idpf/idpf_vchnl.c
index 97fcfb574b..db80293fbd 100644
--- a/drivers/net/idpf/idpf_vchnl.c
+++ b/drivers/net/idpf/idpf_vchnl.c
@@ -585,6 +585,8 @@  idpf_get_caps(struct idpf_adapter *adapter)
 		 VIRTCHNL2_CAP_WB_ON_ITR		|
 		 VIRTCHNL2_CAP_PROMISC			|
 		 VIRTCHNL2_CAP_LINK_SPEED		|
+		 VIRTCHNL2_CAP_PTP			|
+		 VIRTCHNL2_CAP_RX_FLEX_DESC		|
 		 VIRTCHNL2_CAP_VLAN;
 
 	args.ops = VIRTCHNL2_OP_GET_CAPS;
@@ -813,7 +815,7 @@  idpf_config_rxqs(struct idpf_vport *vport)
 			for (i = 0; i < num_qs; i++, k++) {
 				rxq_info = &vc_rxqs->qinfo[i];
 				rxq_info->dma_ring_addr = rxq[k]->rx_ring_phys_addr;
-				rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX;
+				rxq_info->type = rxq[k]->q_type;
 				rxq_info->queue_id = rxq[k]->queue_id;
 				rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
 				rxq_info->data_buffer_size = rxq[k]->rx_buf_len;
@@ -830,7 +832,7 @@  idpf_config_rxqs(struct idpf_vport *vport)
 				rxq_info = &vc_rxqs->qinfo[i * 3];
 				rxq_info->dma_ring_addr =
 					rxq[k]->rx_ring_phys_addr;
-				rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX;
+				rxq_info->type = rxq[k]->q_type;
 				rxq_info->queue_id = rxq[k]->queue_id;
 				rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
 				rxq_info->data_buffer_size = rxq[k]->rx_buf_len;
@@ -851,8 +853,7 @@  idpf_config_rxqs(struct idpf_vport *vport)
 					rxq_info = &vc_rxqs->qinfo[i * 3 + j];
 					rxq_info->dma_ring_addr =
 						bufq->rx_ring_phys_addr;
-					rxq_info->type =
-						VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+					rxq_info->type = bufq->q_type;
 					rxq_info->queue_id = bufq->queue_id;
 					rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
 					rxq_info->data_buffer_size = bufq->rx_buf_len;
@@ -898,6 +899,8 @@  idpf_config_rxq(struct idpf_vport *vport, uint16_t rxq_id)
 
 	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
 		num_qs = IDPF_RXQ_PER_GRP;
+	else if (rxq[rxq_id]->hairpin_q)
+		num_qs = IDPF_RXQ_PER_GRP + 1;
 	else
 		num_qs = IDPF_RXQ_PER_GRP + IDPF_RX_BUFQ_PER_GRP;
 
@@ -914,7 +917,7 @@  idpf_config_rxq(struct idpf_vport *vport, uint16_t rxq_id)
 	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
 		rxq_info = &vc_rxqs->qinfo[0];
 		rxq_info->dma_ring_addr = rxq[rxq_id]->rx_ring_phys_addr;
-		rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX;
+		rxq_info->type = rxq[rxq_id]->q_type;
 		rxq_info->queue_id = rxq[rxq_id]->queue_id;
 		rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
 		rxq_info->data_buffer_size = rxq[rxq_id]->rx_buf_len;
@@ -925,38 +928,72 @@  idpf_config_rxq(struct idpf_vport *vport, uint16_t rxq_id)
 
 		rxq_info->ring_len = rxq[rxq_id]->nb_rx_desc;
 	}  else {
-		/* Rx queue */
-		rxq_info = &vc_rxqs->qinfo[0];
-		rxq_info->dma_ring_addr = rxq[rxq_id]->rx_ring_phys_addr;
-		rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX;
-		rxq_info->queue_id = rxq[rxq_id]->queue_id;
-		rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
-		rxq_info->data_buffer_size = rxq[rxq_id]->rx_buf_len;
-		rxq_info->max_pkt_size = vport->max_pkt_len;
+		if (rxq[rxq_id]->hairpin_q) {
+			/* Rx queue */
+			rxq_info = &vc_rxqs->qinfo[0];
+			rxq_info->type = rxq[rxq_id]->q_type;
+			rxq_info->queue_id = rxq[rxq_id]->queue_id;
+			rxq_info->ring_len = rxq[rxq_id]->nb_rx_desc;
+			rxq_info->dma_ring_addr = rxq[rxq_id]->rx_ring_phys_addr;
+			rxq_info->rx_bufq1_id = rxq[rxq_id]->bufq1->queue_id;
+			rxq_info->max_pkt_size = vport->max_pkt_len;
+			rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+			rxq_info->qflags |= VIRTCHNL2_RX_DESC_SIZE_32BYTE;
 
-		rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
-		rxq_info->qflags |= VIRTCHNL2_RX_DESC_SIZE_32BYTE;
+			rxq_info->data_buffer_size = rxq[rxq_id]->rx_buf_len;
+			rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+			rxq_info->rx_buffer_low_watermark = 64;
+			PMD_DRV_LOG(NOTICE, "hairpin: vport %u, Rxq id 0x%x",
+				vport->vport_id, rxq_info->queue_id);
 
-		rxq_info->ring_len = rxq[rxq_id]->nb_rx_desc;
-		rxq_info->rx_bufq1_id = rxq[rxq_id]->bufq1->queue_id;
-		rxq_info->rx_bufq2_id = rxq[rxq_id]->bufq2->queue_id;
-		rxq_info->rx_buffer_low_watermark = 64;
-
-		/* Buffer queue */
-		for (i = 1; i <= IDPF_RX_BUFQ_PER_GRP; i++) {
-			struct idpf_rx_queue *bufq =
-				i == 1 ? rxq[rxq_id]->bufq1 : rxq[rxq_id]->bufq2;
-			rxq_info = &vc_rxqs->qinfo[i];
-			rxq_info->dma_ring_addr = bufq->rx_ring_phys_addr;
-			rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+			/* Buffer queue */
+			rxq_info = &vc_rxqs->qinfo[1];
+			struct idpf_rx_queue *bufq = rxq[rxq_id]->bufq1;
+			rxq_info->type = bufq->q_type;
 			rxq_info->queue_id = bufq->queue_id;
+			rxq_info->ring_len = bufq->nb_rx_desc;
+			rxq_info->dma_ring_addr = bufq->rx_ring_phys_addr;
+			rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+			rxq_info->rx_buffer_low_watermark = 64;
 			rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
 			rxq_info->data_buffer_size = bufq->rx_buf_len;
+			rxq_info->buffer_notif_stride = IDPF_RX_BUF_STRIDE;
+			PMD_DRV_LOG(NOTICE, "hairpin: vport %u, Rxbufq id 0x%x",
+				vport->vport_id, rxq_info->queue_id);
+		} else {
+			/* Rx queue */
+			rxq_info = &vc_rxqs->qinfo[0];
+			rxq_info->dma_ring_addr = rxq[rxq_id]->rx_ring_phys_addr;
+			rxq_info->type = rxq[rxq_id]->q_type;
+			rxq_info->queue_id = rxq[rxq_id]->queue_id;
+			rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+			rxq_info->data_buffer_size = rxq[rxq_id]->rx_buf_len;
+			rxq_info->max_pkt_size = vport->max_pkt_len;
+
 			rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
-			rxq_info->ring_len = bufq->nb_rx_desc;
+			rxq_info->qflags |= VIRTCHNL2_RX_DESC_SIZE_32BYTE;
 
-			rxq_info->buffer_notif_stride = IDPF_RX_BUF_STRIDE;
+			rxq_info->ring_len = rxq[rxq_id]->nb_rx_desc;
+			rxq_info->rx_bufq1_id = rxq[rxq_id]->bufq1->queue_id;
+			rxq_info->rx_bufq2_id = rxq[rxq_id]->bufq2->queue_id;
 			rxq_info->rx_buffer_low_watermark = 64;
+
+			/* Buffer queue */
+			for (i = 1; i <= IDPF_RX_BUFQ_PER_GRP; i++) {
+				struct idpf_rx_queue *bufq =
+					i == 1 ? rxq[rxq_id]->bufq1 : rxq[rxq_id]->bufq2;
+				rxq_info = &vc_rxqs->qinfo[i];
+				rxq_info->dma_ring_addr = bufq->rx_ring_phys_addr;
+				rxq_info->type = bufq->q_type;
+				rxq_info->queue_id = bufq->queue_id;
+				rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+				rxq_info->data_buffer_size = bufq->rx_buf_len;
+				rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+				rxq_info->ring_len = bufq->nb_rx_desc;
+
+				rxq_info->buffer_notif_stride = IDPF_RX_BUF_STRIDE;
+				rxq_info->rx_buffer_low_watermark = 64;
+			}
 		}
 	}
 
@@ -1011,7 +1048,7 @@  idpf_config_txqs(struct idpf_vport *vport)
 			for (i = 0; i < num_qs; i++, k++) {
 				txq_info = &vc_txqs->qinfo[i];
 				txq_info->dma_ring_addr = txq[k]->tx_ring_phys_addr;
-				txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX;
+				txq_info->type = txq[k]->q_type;
 				txq_info->queue_id = txq[k]->queue_id;
 				txq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
 				txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE;
@@ -1022,7 +1059,7 @@  idpf_config_txqs(struct idpf_vport *vport)
 				/* txq info */
 				txq_info = &vc_txqs->qinfo[2 * i];
 				txq_info->dma_ring_addr = txq[k]->tx_ring_phys_addr;
-				txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX;
+				txq_info->type = txq[k]->q_type;
 				txq_info->queue_id = txq[k]->queue_id;
 				txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
 				txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
@@ -1035,7 +1072,7 @@  idpf_config_txqs(struct idpf_vport *vport)
 				txq_info = &vc_txqs->qinfo[2 * i + 1];
 				txq_info->dma_ring_addr =
 					txq[k]->complq->tx_ring_phys_addr;
-				txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+				txq_info->type = txq[k]->complq->q_type;
 				txq_info->queue_id = txq[k]->complq->queue_id;
 				txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
 				txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
@@ -1092,31 +1129,61 @@  idpf_config_txq(struct idpf_vport *vport, uint16_t txq_id)
 	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
 		txq_info = &vc_txqs->qinfo[0];
 		txq_info->dma_ring_addr = txq[txq_id]->tx_ring_phys_addr;
-		txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX;
+		txq_info->type = txq[txq_id]->q_type;
 		txq_info->queue_id = txq[txq_id]->queue_id;
 		txq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
 		txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE;
 		txq_info->ring_len = txq[txq_id]->nb_tx_desc;
 	} else {
-		/* txq info */
-		txq_info = &vc_txqs->qinfo[0];
-		txq_info->dma_ring_addr = txq[txq_id]->tx_ring_phys_addr;
-		txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX;
-		txq_info->queue_id = txq[txq_id]->queue_id;
-		txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
-		txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
-		txq_info->ring_len = txq[txq_id]->nb_tx_desc;
-		txq_info->tx_compl_queue_id = txq[txq_id]->complq->queue_id;
-		txq_info->relative_queue_id = txq_info->queue_id;
-
-		/* tx completion queue info */
-		txq_info = &vc_txqs->qinfo[1];
-		txq_info->dma_ring_addr = txq[txq_id]->complq->tx_ring_phys_addr;
-		txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
-		txq_info->queue_id = txq[txq_id]->complq->queue_id;
-		txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
-		txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
-		txq_info->ring_len = txq[txq_id]->complq->nb_tx_desc;
+		if (txq[txq_id]->hairpin_q) {
+			/* txq info */
+			txq_info = &vc_txqs->qinfo[0];
+			txq_info->dma_ring_addr = txq[txq_id]->tx_ring_phys_addr;
+			txq_info->type = txq[txq_id]->q_type;
+			txq_info->queue_id = txq[txq_id]->queue_id;
+			txq_info->ring_len = txq[txq_id]->nb_tx_desc;
+			txq_info->tx_compl_queue_id = txq[txq_id]->complq->queue_id;
+			txq_info->relative_queue_id = txq_info->queue_id;
+			txq_info->peer_rx_queue_id = txq[txq_id]->peer_rxq_id;
+			txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+			txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
+			PMD_DRV_LOG(NOTICE, "hairpin: vport %u, Txq id 0x%x, peer"
+				" rxq id 0x%x", vport->vport_id, txq_info->queue_id,
+				txq_info->peer_rx_queue_id);
+
+			/* tx completion queue info */
+			txq_info = &vc_txqs->qinfo[1];
+			txq_info->dma_ring_addr = txq[txq_id]->complq->tx_ring_phys_addr;
+			txq_info->type = txq[txq_id]->complq->q_type;
+			txq_info->queue_id = txq[txq_id]->complq->queue_id;
+			txq_info->ring_len = txq[txq_id]->complq->nb_tx_desc;
+			txq_info->peer_rx_queue_id = txq[txq_id]->complq->peer_rxq_id;
+			txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+			txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
+			PMD_DRV_LOG(NOTICE, "hairpin: vport %u, Txcomplq id 0x%x,"
+				" peer rxbufq id 0x%x", vport->vport_id,
+				txq_info->queue_id, txq_info->peer_rx_queue_id);
+		} else {
+			/* txq info */
+			txq_info = &vc_txqs->qinfo[0];
+			txq_info->dma_ring_addr = txq[txq_id]->tx_ring_phys_addr;
+			txq_info->type = txq[txq_id]->q_type;
+			txq_info->queue_id = txq[txq_id]->queue_id;
+			txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+			txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
+			txq_info->ring_len = txq[txq_id]->nb_tx_desc;
+			txq_info->tx_compl_queue_id = txq[txq_id]->complq->queue_id;
+			txq_info->relative_queue_id = txq_info->queue_id;
+
+			/* tx completion queue info */
+			txq_info = &vc_txqs->qinfo[1];
+			txq_info->dma_ring_addr = txq[txq_id]->complq->tx_ring_phys_addr;
+			txq_info->type = txq[txq_id]->complq->q_type;
+			txq_info->queue_id = txq[txq_id]->complq->queue_id;
+			txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+			txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
+			txq_info->ring_len = txq[txq_id]->complq->nb_tx_desc;
+		}
 	}
 
 	memset(&args, 0, sizeof(args));
@@ -1322,6 +1389,46 @@  idpf_switch_queue(struct idpf_vport *vport, uint16_t qid,
 	return err;
 }
 
+int
+idpf_switch_hairpin_queue(struct idpf_vport *vport, uint16_t qid,
+		 bool rx, bool on)
+{
+	uint32_t type;
+	int err, queue_id;
+
+	type = rx ? VIRTCHNL2_QUEUE_TYPE_P2P_RX : VIRTCHNL2_QUEUE_TYPE_P2P_TX;
+
+	/* switch p2p txq/rxq */
+	if (type == VIRTCHNL2_QUEUE_TYPE_P2P_RX)
+		queue_id = vport->chunks_info.rx_start_qid + qid;
+	else
+		queue_id = vport->chunks_info.tx_start_qid + qid;
+	err = idpf_ena_dis_one_queue(vport, queue_id, type, on);
+	if (err)
+		return err;
+
+	/* switch p2p tx completion queue */
+	if (!rx && vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+		type = VIRTCHNL2_QUEUE_TYPE_P2P_TX_COMPLETION;
+		queue_id = vport->chunks_info.tx_compl_start_qid + qid;
+		err = idpf_ena_dis_one_queue(vport, queue_id, type, on);
+		if (err)
+			return err;
+	}
+
+	/* switch p2p rx buffer queue */
+	if (rx && vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+		type = VIRTCHNL2_QUEUE_TYPE_P2P_RX_BUFFER;
+		queue_id = vport->chunks_info.rx_buf_start_qid + 2 * qid;
+		err = idpf_ena_dis_one_queue(vport, queue_id, type, on);
+		if (err)
+			return err;
+	}
+
+
+	return err;
+}
+
 #define IDPF_RXTX_QUEUE_CHUNKS_NUM	2
 int
 idpf_ena_dis_queues(struct idpf_vport *vport, bool enable)