[RFC,20/29] net/qdma: add Rx burst API

Message ID 20220706075219.517046-21-aman.kumar@vvdntech.in (mailing list archive)
State Changes Requested, archived
Delegated to: Thomas Monjalon
Headers
Series cover letter for net/qdma PMD |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Aman Kumar July 6, 2022, 7:52 a.m. UTC
  add Rx data path burst API support for device.

Signed-off-by: Aman Kumar <aman.kumar@vvdntech.in>
---
 drivers/net/qdma/qdma_devops.c |  10 -
 drivers/net/qdma/qdma_rxtx.c   | 709 +++++++++++++++++++++++++++++++++
 drivers/net/qdma/qdma_rxtx.h   |   8 +-
 3 files changed, 716 insertions(+), 11 deletions(-)
  

Patch

diff --git a/drivers/net/qdma/qdma_devops.c b/drivers/net/qdma/qdma_devops.c
index dfa41a9aa7..7f525773d0 100644
--- a/drivers/net/qdma/qdma_devops.c
+++ b/drivers/net/qdma/qdma_devops.c
@@ -1742,16 +1742,6 @@  static struct eth_dev_ops qdma_eth_dev_ops = {
 	.txq_info_get             = qdma_dev_txq_info_get,
 };
 
-uint16_t qdma_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
-			uint16_t nb_pkts)
-{
-	(void)rx_queue;
-	(void)rx_pkts;
-	(void)nb_pkts;
-
-	return 0;
-}
-
 void qdma_dev_ops_init(struct rte_eth_dev *dev)
 {
 	dev->dev_ops = &qdma_eth_dev_ops;
diff --git a/drivers/net/qdma/qdma_rxtx.c b/drivers/net/qdma/qdma_rxtx.c
index 3abc72717f..7652f35dd2 100644
--- a/drivers/net/qdma/qdma_rxtx.c
+++ b/drivers/net/qdma/qdma_rxtx.c
@@ -20,6 +20,20 @@ 
 #endif /* RTE_ARCH_X86_64 */
 
 /******** User logic dependent functions start **********/
+static int qdma_ul_extract_st_cmpt_info_v(void *ul_cmpt_entry, void *cmpt_info)
+{
+	union qdma_ul_st_cmpt_ring *cmpt_data, *cmpt_desc;
+
+	cmpt_desc = (union qdma_ul_st_cmpt_ring *)(ul_cmpt_entry);
+	cmpt_data = (union qdma_ul_st_cmpt_ring *)(cmpt_info);
+
+	cmpt_data->data = cmpt_desc->data;
+	if (unlikely(!cmpt_desc->desc_used))
+		cmpt_data->length = 0;
+
+	return 0;
+}
+
 #ifdef QDMA_RX_VEC_X86_64
 /* Vector implementation to get packet length from two completion entries */
 static void qdma_ul_get_cmpt_pkt_len_v(void *ul_cmpt_entry, __m128i *data)
@@ -410,6 +424,107 @@  static void adapt_update_counter(struct qdma_rx_queue *rxq,
 }
 #endif /* QDMA_LATENCY_OPTIMIZED */
 
+/* Process completion ring */
+static int process_cmpt_ring(struct qdma_rx_queue *rxq,
+		uint16_t num_cmpt_entries)
+{
+	struct qdma_pci_dev *qdma_dev = rxq->dev->data->dev_private;
+	union qdma_ul_st_cmpt_ring *user_cmpt_entry;
+	uint32_t count = 0;
+	int ret = 0;
+	uint16_t rx_cmpt_tail = rxq->cmpt_cidx_info.wrb_cidx;
+
+	if (likely(!rxq->dump_immediate_data)) {
+		if ((rx_cmpt_tail + num_cmpt_entries) <
+			(rxq->nb_rx_cmpt_desc - 1)) {
+			for (count = 0; count < num_cmpt_entries; count++) {
+				user_cmpt_entry =
+				(union qdma_ul_st_cmpt_ring *)
+				((uint64_t)rxq->cmpt_ring +
+				((uint64_t)rx_cmpt_tail * rxq->cmpt_desc_len));
+
+				ret = qdma_ul_extract_st_cmpt_info_v
+						(user_cmpt_entry,
+						&rxq->cmpt_data[count]);
+				if (ret != 0) {
+					PMD_DRV_LOG(ERR, "Error detected on CMPT ring "
+						"at index %d, queue_id = %d\n",
+						rx_cmpt_tail, rxq->queue_id);
+					rxq->err = 1;
+					return -1;
+				}
+				rx_cmpt_tail++;
+			}
+		} else {
+			while (count < num_cmpt_entries) {
+				user_cmpt_entry =
+				(union qdma_ul_st_cmpt_ring *)
+				((uint64_t)rxq->cmpt_ring +
+				((uint64_t)rx_cmpt_tail * rxq->cmpt_desc_len));
+
+				ret = qdma_ul_extract_st_cmpt_info_v
+						(user_cmpt_entry,
+						&rxq->cmpt_data[count]);
+				if (ret != 0) {
+					PMD_DRV_LOG(ERR, "Error detected on CMPT ring "
+						"at index %d, queue_id = %d\n",
+						rx_cmpt_tail, rxq->queue_id);
+					rxq->err = 1;
+					return -1;
+				}
+
+				rx_cmpt_tail++;
+				if (unlikely(rx_cmpt_tail >=
+					(rxq->nb_rx_cmpt_desc - 1)))
+					rx_cmpt_tail -=
+						(rxq->nb_rx_cmpt_desc - 1);
+				count++;
+			}
+		}
+	} else {
+		while (count < num_cmpt_entries) {
+			user_cmpt_entry =
+			(union qdma_ul_st_cmpt_ring *)
+			((uint64_t)rxq->cmpt_ring +
+			((uint64_t)rx_cmpt_tail * rxq->cmpt_desc_len));
+
+			ret = qdma_ul_extract_st_cmpt_info
+					(user_cmpt_entry,
+					&rxq->cmpt_data[count]);
+			if (ret != 0) {
+				PMD_DRV_LOG(ERR, "Error detected on CMPT ring "
+					"at CMPT index %d, queue_id = %d\n",
+					rx_cmpt_tail, rxq->queue_id);
+				rxq->err = 1;
+				return -1;
+			}
+
+			ret = qdma_ul_process_immediate_data_st((void *)rxq,
+					user_cmpt_entry, rxq->cmpt_desc_len);
+			if (ret < 0) {
+				PMD_DRV_LOG(ERR, "Error processing immediate data "
+					"at CMPT index = %d, queue_id = %d\n",
+					rx_cmpt_tail, rxq->queue_id);
+				return -1;
+			}
+
+			rx_cmpt_tail++;
+			if (unlikely(rx_cmpt_tail >=
+				(rxq->nb_rx_cmpt_desc - 1)))
+				rx_cmpt_tail -= (rxq->nb_rx_cmpt_desc - 1);
+			count++;
+		}
+	}
+
+	/* Update the CPMT CIDX */
+	rxq->cmpt_cidx_info.wrb_cidx = rx_cmpt_tail;
+	qdma_dev->hw_access->qdma_queue_cmpt_cidx_update(rxq->dev,
+		qdma_dev->is_vf,
+		rxq->queue_id, &rxq->cmpt_cidx_info);
+
+	return 0;
+}
+
 static uint32_t rx_queue_count(void *rx_queue)
 {
 	struct qdma_rx_queue *rxq = rx_queue;
@@ -531,6 +646,600 @@  qdma_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
 	return RTE_ETH_RX_DESC_AVAIL;
 }
 
+/* Update mbuf for a segmented packet */
+static struct rte_mbuf *prepare_segmented_packet(struct qdma_rx_queue *rxq,
+		uint16_t pkt_length, uint16_t *tail)
+{
+	struct rte_mbuf *mb;
+	struct rte_mbuf *first_seg = NULL;
+	struct rte_mbuf *last_seg = NULL;
+	uint16_t id = *tail;
+	uint16_t length;
+	uint16_t rx_buff_size = rxq->rx_buff_size;
+
+	do {
+		mb = rxq->sw_ring[id];
+		rxq->sw_ring[id++] = NULL;
+		length = pkt_length;
+
+		if (unlikely(id >= (rxq->nb_rx_desc - 1)))
+			id -= (rxq->nb_rx_desc - 1);
+		if (pkt_length > rx_buff_size) {
+			rte_pktmbuf_data_len(mb) = rx_buff_size;
+			pkt_length -= rx_buff_size;
+		} else {
+			rte_pktmbuf_data_len(mb) = pkt_length;
+			pkt_length = 0;
+		}
+		rte_mbuf_refcnt_set(mb, 1);
+
+		if (first_seg == NULL) {
+			first_seg = mb;
+			first_seg->nb_segs = 1;
+			first_seg->pkt_len = length;
+			first_seg->packet_type = 0;
+			first_seg->ol_flags = 0;
+			first_seg->port = rxq->port_id;
+			first_seg->vlan_tci = 0;
+			first_seg->hash.rss = 0;
+		} else {
+			first_seg->nb_segs++;
+			if (last_seg != NULL)
+				last_seg->next = mb;
+		}
+
+		last_seg = mb;
+		mb->next = NULL;
+	} while (pkt_length);
+
+	*tail = id;
+	return first_seg;
+}
+
+/* Prepare mbuf for one packet */
+static inline
+struct rte_mbuf *prepare_single_packet(struct qdma_rx_queue *rxq,
+		uint16_t cmpt_idx)
+{
+	struct rte_mbuf *mb = NULL;
+	uint16_t id = rxq->rx_tail;
+	uint16_t pkt_length;
+
+	pkt_length = qdma_ul_get_cmpt_pkt_len(&rxq->cmpt_data[cmpt_idx]);
+
+	if (pkt_length) {
+		if (likely(pkt_length <= rxq->rx_buff_size)) {
+			mb = rxq->sw_ring[id];
+			rxq->sw_ring[id++] = NULL;
+
+			if (unlikely(id >= (rxq->nb_rx_desc - 1)))
+				id -= (rxq->nb_rx_desc - 1);
+
+			rte_mbuf_refcnt_set(mb, 1);
+			mb->nb_segs = 1;
+			mb->port = rxq->port_id;
+			mb->ol_flags = 0;
+			mb->packet_type = 0;
+			mb->pkt_len = pkt_length;
+			mb->data_len = pkt_length;
+		} else {
+			mb = prepare_segmented_packet(rxq, pkt_length, &id);
+		}
+
+		rxq->rx_tail = id;
+	}
+	return mb;
+}
+
+#ifdef QDMA_RX_VEC_X86_64
+/* Vector implementation to prepare mbufs for packets.
+ * Update this API if HW provides more information to be populated in mbuf.
+ */
+static uint16_t prepare_packets_v(struct qdma_rx_queue *rxq,
+			struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mb;
+	uint16_t count = 0, count_pkts = 0;
+	uint16_t n_pkts = nb_pkts & -2;
+	uint16_t id = rxq->rx_tail;
+	struct rte_mbuf **sw_ring = rxq->sw_ring;
+	uint16_t rx_buff_size = rxq->rx_buff_size;
+	/* mask to shuffle from desc. to mbuf */
+	__m128i shuf_msk = _mm_set_epi8
+			(0xFF, 0xFF, 0xFF, 0xFF,  /* skip 32bits rss */
+			0xFF, 0xFF,      /* skip low 16 bits vlan_macip */
+			1, 0,      /* octet 0~1, 16 bits data_len */
+			0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
+			1, 0,      /* octet 0~1, low 16 bits pkt_len */
+			0xFF, 0xFF,  /* skip 32 bit pkt_type */
+			0xFF, 0xFF
+			);
+	__m128i mbuf_init, pktlen, zero_data;
+
+	mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
+	pktlen = _mm_setzero_si128();
+	zero_data = _mm_setzero_si128();
+
+	/* compile-time check */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+			RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+
+	for (count = 0; count < n_pkts;
+		count += RTE_QDMA_DESCS_PER_LOOP) {
+		__m128i pkt_len[RTE_QDMA_DESCS_PER_LOOP];
+		__m128i pkt_mb1, pkt_mb2;
+		__m128i mbp1;
+		uint16_t pktlen1, pktlen2;
+
+		qdma_ul_get_cmpt_pkt_len_v
+			(&rxq->cmpt_data[count], pkt_len);
+
+		pktlen1 = _mm_extract_epi16(pkt_len[0], 0);
+		pktlen2 = _mm_extract_epi16(pkt_len[1], 0);
+
+		/* Check if packets are segmented across descriptors */
+		if ((pktlen1 && pktlen1 <= rx_buff_size) &&
+			(pktlen2 && pktlen2 <= rx_buff_size) &&
+			((id + RTE_QDMA_DESCS_PER_LOOP) <
+				(rxq->nb_rx_desc - 1))) {
+			/* Load 2 (64 bit) mbuf pointers */
+			mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[id]);
+
+			/* Copy 2 64 bit mbuf point into rx_pkts */
+			_mm_storeu_si128((__m128i *)&rx_pkts[count_pkts], mbp1);
+			_mm_storeu_si128((__m128i *)&sw_ring[id], zero_data);
+
+			/* Pkt 1,2 convert format from desc to pktmbuf */
+			/* We only have packet length to copy */
+			pkt_mb2 = _mm_shuffle_epi8(pkt_len[1], shuf_msk);
+			pkt_mb1 = _mm_shuffle_epi8(pkt_len[0], shuf_msk);
+
+			/* Write the rearm data and the olflags in one write */
+			_mm_store_si128
+			((__m128i *)&rx_pkts[count_pkts]->rearm_data, mbuf_init);
+			_mm_store_si128
+			((__m128i *)&rx_pkts[count_pkts + 1]->rearm_data,
+			mbuf_init);
+
+			/* Write packet length */
+			_mm_storeu_si128
+			((void *)&rx_pkts[count_pkts]->rx_descriptor_fields1,
+			pkt_mb1);
+			_mm_storeu_si128
+			((void *)&rx_pkts[count_pkts + 1]->rx_descriptor_fields1,
+			pkt_mb2);
+
+			/* Accumulate packet length counter */
+			pktlen = _mm_add_epi32(pktlen, pkt_len[0]);
+			pktlen = _mm_add_epi32(pktlen, pkt_len[1]);
+
+			count_pkts += RTE_QDMA_DESCS_PER_LOOP;
+			id += RTE_QDMA_DESCS_PER_LOOP;
+		} else {
+			/* Handle packets segmented
+			 * across multiple descriptors
+			 * or ring wrap
+			 */
+			if (pktlen1) {
+				mb = prepare_segmented_packet(rxq,
+					pktlen1, &id);
+				rx_pkts[count_pkts++] = mb;
+				pktlen = _mm_add_epi32(pktlen, pkt_len[0]);
+			}
+
+			if (pktlen2) {
+				mb = prepare_segmented_packet(rxq,
+					pktlen2, &id);
+				rx_pkts[count_pkts++] = mb;
+				pktlen = _mm_add_epi32(pktlen, pkt_len[1]);
+			}
+		}
+	}
+
+	rxq->stats.pkts += count_pkts;
+	rxq->stats.bytes += _mm_extract_epi64(pktlen, 0);
+	rxq->rx_tail = id;
+
+	/* Handle single packet, if any pending */
+	if (nb_pkts & 1) {
+		mb = prepare_single_packet(rxq, count);
+		if (mb)
+			rx_pkts[count_pkts++] = mb;
+	}
+
+	return count_pkts;
+}
+#endif /* QDMA_RX_VEC_X86_64 */
+
+/* Prepare mbufs with packet information */
+static uint16_t prepare_packets(struct qdma_rx_queue *rxq,
+			struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	uint16_t count_pkts = 0;
+
+#ifdef QDMA_RX_VEC_X86_64
+	count_pkts = prepare_packets_v(rxq, rx_pkts, nb_pkts);
+#else /* QDMA_RX_VEC_X86_64 */
+	struct rte_mbuf *mb;
+	uint16_t pkt_length;
+	uint16_t count = 0;
+	while (count < nb_pkts) {
+		pkt_length = qdma_ul_get_cmpt_pkt_len(&rxq->cmpt_data[count]);
+		if (pkt_length) {
+			mb = prepare_segmented_packet(rxq,
+					pkt_length, &rxq->rx_tail);
+			rx_pkts[count_pkts++] = mb;
+		}
+		count++;
+	}
+#endif /* QDMA_RX_VEC_X86_64 */
+
+	return count_pkts;
+}
+
+/* Populate C2H ring with new buffers */
+static int rearm_c2h_ring(struct qdma_rx_queue *rxq, uint16_t num_desc)
+{
+	struct qdma_pci_dev *qdma_dev = rxq->dev->data->dev_private;
+	struct rte_mbuf *mb;
+	struct qdma_ul_st_c2h_desc *rx_ring_st =
+			(struct qdma_ul_st_c2h_desc *)rxq->rx_ring;
+	uint16_t mbuf_index = 0;
+	uint16_t id;
+	int rearm_descs;
+
+	id = rxq->q_pidx_info.pidx;
+
+	/* Split the C2H ring updation in two parts.
+	 * First handle till end of ring and then
+	 * handle from beginning of ring, if ring wraps
+	 */
+	if ((id + num_desc) < (rxq->nb_rx_desc - 1))
+		rearm_descs = num_desc;
+	else
+		rearm_descs = (rxq->nb_rx_desc - 1) - id;
+
+	/* allocate new buffer */
+	if (rte_mempool_get_bulk(rxq->mb_pool, (void *)&rxq->sw_ring[id],
+					rearm_descs) != 0){
+		PMD_DRV_LOG(ERR, "%s(): %d: No MBUFS, queue id = %d,"
+		"mbuf_avail_count = %d,"
+		" mbuf_in_use_count = %d, num_desc_req = %d\n",
+		__func__, __LINE__, rxq->queue_id,
+		rte_mempool_avail_count(rxq->mb_pool),
+		rte_mempool_in_use_count(rxq->mb_pool), rearm_descs);
+		return -1;
+	}
+
+#ifdef QDMA_RX_VEC_X86_64
+	int rearm_cnt = rearm_descs & -2;
+	__m128i head_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+
+	for (mbuf_index = 0; mbuf_index < ((uint16_t)rearm_cnt  & 0xFFFF);
+			mbuf_index += RTE_QDMA_DESCS_PER_LOOP,
+			id += RTE_QDMA_DESCS_PER_LOOP) {
+		__m128i vaddr0, vaddr1;
+		__m128i dma_addr;
+
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+
+		/* Load two mbufs data addresses */
+		vaddr0 = _mm_loadu_si128
+				((__m128i *)&rxq->sw_ring[id]->buf_addr);
+		vaddr1 = _mm_loadu_si128
+				((__m128i *)&rxq->sw_ring[id + 1]->buf_addr);
+
+		/* Extract physical addresses of two mbufs */
+		dma_addr = _mm_unpackhi_epi64(vaddr0, vaddr1);
+
+		/* Add headroom to dma_addr */
+		dma_addr = _mm_add_epi64(dma_addr, head_room);
+
+		/* Write C2H desc with physical dma_addr */
+		_mm_storeu_si128((__m128i *)&rx_ring_st[id], dma_addr);
+	}
+
+	if (rearm_descs & 1) {
+		mb = rxq->sw_ring[id];
+
+		/* rearm descriptor */
+		rx_ring_st[id].dst_addr =
+				(uint64_t)mb->buf_iova +
+					RTE_PKTMBUF_HEADROOM;
+		id++;
+	}
+#else /* QDMA_RX_VEC_X86_64 */
+	for (mbuf_index = 0; mbuf_index < rearm_descs;
+			mbuf_index++, id++) {
+		mb = rxq->sw_ring[id];
+		mb->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* rearm descriptor */
+		rx_ring_st[id].dst_addr =
+				(uint64_t)mb->buf_iova +
+					RTE_PKTMBUF_HEADROOM;
+	}
+#endif /* QDMA_RX_VEC_X86_64 */
+
+	if (unlikely(id >= (rxq->nb_rx_desc - 1)))
+		id -= (rxq->nb_rx_desc - 1);
+
+	/* Handle from beginning of ring, if ring wrapped */
+	rearm_descs = num_desc - rearm_descs;
+	if (unlikely(rearm_descs)) {
+		/* allocate new buffer */
+		if (rte_mempool_get_bulk(rxq->mb_pool,
+			(void *)&rxq->sw_ring[id], rearm_descs) != 0) {
+			PMD_DRV_LOG(ERR, "%s(): %d: No MBUFS, queue id = %d,"
+			"mbuf_avail_count = %d,"
+			" mbuf_in_use_count = %d, num_desc_req = %d\n",
+			__func__, __LINE__, rxq->queue_id,
+			rte_mempool_avail_count(rxq->mb_pool),
+			rte_mempool_in_use_count(rxq->mb_pool), rearm_descs);
+
+			rxq->q_pidx_info.pidx = id;
+			qdma_dev->hw_access->qdma_queue_pidx_update(rxq->dev,
+				qdma_dev->is_vf,
+				rxq->queue_id, 1, &rxq->q_pidx_info);
+
+			return -1;
+		}
+
+		for (mbuf_index = 0;
+				mbuf_index < ((uint16_t)rearm_descs & 0xFFFF);
+				mbuf_index++, id++) {
+			mb = rxq->sw_ring[id];
+			mb->data_off = RTE_PKTMBUF_HEADROOM;
+
+			/* rearm descriptor */
+			rx_ring_st[id].dst_addr =
+					(uint64_t)mb->buf_iova +
+						RTE_PKTMBUF_HEADROOM;
+		}
+	}
+
+	PMD_DRV_LOG(DEBUG, "%s(): %d: PIDX Update: queue id = %d, "
+				"num_desc = %d",
+				__func__, __LINE__, rxq->queue_id,
+				num_desc);
+
+	/* Make sure writes to the C2H descriptors are
+	 * synchronized before updating PIDX
+	 */
+	rte_wmb();
+
+	rxq->q_pidx_info.pidx = id;
+	qdma_dev->hw_access->qdma_queue_pidx_update(rxq->dev,
+		qdma_dev->is_vf,
+		rxq->queue_id, 1, &rxq->q_pidx_info);
+
+	return 0;
+}
+
+/* Receive API for Streaming mode */
+uint16_t qdma_recv_pkts_st(struct qdma_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+				uint16_t nb_pkts)
+{
+	uint16_t count_pkts;
+	struct wb_status *wb_status;
+	uint16_t nb_pkts_avail = 0;
+	uint16_t rx_cmpt_tail = 0;
+	uint16_t cmpt_pidx, c2h_pidx;
+	uint16_t pending_desc;
+#ifdef TEST_64B_DESC_BYPASS
+	int bypass_desc_sz_idx = qmda_get_desc_sz_idx(rxq->bypass_desc_sz);
+#endif
+
+	if (unlikely(rxq->err))
+		return 0;
+
+	PMD_DRV_LOG(DEBUG, "recv start on rx queue-id :%d, on "
+			"tail index:%d number of pkts %d",
+			rxq->queue_id, rxq->rx_tail, nb_pkts);
+	wb_status = rxq->wb_status;
+	rx_cmpt_tail = rxq->cmpt_cidx_info.wrb_cidx;
+
+#ifdef TEST_64B_DESC_BYPASS
+	if (unlikely(rxq->en_bypass &&
+			bypass_desc_sz_idx == SW_DESC_CNTXT_64B_BYPASS_DMA)) {
+		PMD_DRV_LOG(DEBUG, "For  RX ST-mode, example"
+				" design doesn't support 64byte descriptor\n");
+		return 0;
+	}
+#endif
+	cmpt_pidx = wb_status->pidx;
+
+	if (rx_cmpt_tail < cmpt_pidx)
+		nb_pkts_avail = cmpt_pidx - rx_cmpt_tail;
+	else if (rx_cmpt_tail > cmpt_pidx)
+		nb_pkts_avail = rxq->nb_rx_cmpt_desc - 1 - rx_cmpt_tail +
+				cmpt_pidx;
+
+	if (nb_pkts_avail == 0) {
+		PMD_DRV_LOG(DEBUG, "%s(): %d: nb_pkts_avail = 0\n",
+				__func__, __LINE__);
+		return 0;
+	}
+
+	if (nb_pkts > QDMA_MAX_BURST_SIZE)
+		nb_pkts = QDMA_MAX_BURST_SIZE;
+
+	if (nb_pkts > nb_pkts_avail)
+		nb_pkts = nb_pkts_avail;
+
+#ifdef DUMP_MEMPOOL_USAGE_STATS
+	PMD_DRV_LOG(DEBUG, "%s(): %d: queue id = %d, mbuf_avail_count = %d, "
+			"mbuf_in_use_count = %d",
+		__func__, __LINE__, rxq->queue_id,
+		rte_mempool_avail_count(rxq->mb_pool),
+		rte_mempool_in_use_count(rxq->mb_pool));
+#endif /* DUMP_MEMPOOL_USAGE_STATS */
+	/* Make sure reads to CMPT ring are synchronized before
+	 * accessing the ring
+	 */
+	rte_rmb();
+#ifdef QDMA_LATENCY_OPTIMIZED
+	adapt_update_counter(rxq, nb_pkts_avail);
+#endif /* QDMA_LATENCY_OPTIMIZED */
+	if (process_cmpt_ring(rxq, nb_pkts) != 0)
+		return 0;
+
+	if (rxq->status != RTE_ETH_QUEUE_STATE_STARTED) {
+		PMD_DRV_LOG(DEBUG, "%s(): %d: rxq->status = %d\n",
+				__func__, __LINE__, rxq->status);
+		return 0;
+	}
+
+	count_pkts = prepare_packets(rxq, rx_pkts, nb_pkts);
+
+	c2h_pidx = rxq->q_pidx_info.pidx;
+	pending_desc = rxq->rx_tail - c2h_pidx - 1;
+	if (rxq->rx_tail < (c2h_pidx + 1))
+		pending_desc = rxq->nb_rx_desc - 2 + rxq->rx_tail -
+				c2h_pidx;
+
+	/* Batch the PIDX updates, this minimizes overhead on
+	 * descriptor engine
+	 */
+	if (pending_desc >= MIN_RX_PIDX_UPDATE_THRESHOLD)
+		rearm_c2h_ring(rxq, pending_desc);
+
+#ifdef DUMP_MEMPOOL_USAGE_STATS
+	PMD_DRV_LOG(DEBUG, "%s(): %d: queue id = %d, mbuf_avail_count = %d,"
+			" mbuf_in_use_count = %d, count_pkts = %d",
+		__func__, __LINE__, rxq->queue_id,
+		rte_mempool_avail_count(rxq->mb_pool),
+		rte_mempool_in_use_count(rxq->mb_pool), count_pkts);
+#endif /* DUMP_MEMPOOL_USAGE_STATS */
+
+	PMD_DRV_LOG(DEBUG, " Recv complete with hw cidx :%d",
+				rxq->wb_status->cidx);
+	PMD_DRV_LOG(DEBUG, " Recv complete with hw pidx :%d\n",
+				rxq->wb_status->pidx);
+
+	return count_pkts;
+}
+
+/* Receive API for Memory mapped mode */
+uint16_t qdma_recv_pkts_mm(struct qdma_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+			uint16_t nb_pkts)
+{
+	struct rte_mbuf *mb;
+	uint32_t count, id;
+	struct qdma_ul_mm_desc *desc;
+	uint32_t len;
+	struct qdma_pci_dev *qdma_dev = rxq->dev->data->dev_private;
+#ifdef TEST_64B_DESC_BYPASS
+	int bypass_desc_sz_idx = qmda_get_desc_sz_idx(rxq->bypass_desc_sz);
+#endif
+
+	if (rxq->status != RTE_ETH_QUEUE_STATE_STARTED)
+		return 0;
+
+	id = rxq->q_pidx_info.pidx; /* Descriptor index */
+
+	PMD_DRV_LOG(DEBUG, "recv start on rx queue-id :%d, on tail index:%d\n",
+			rxq->queue_id, id);
+
+#ifdef TEST_64B_DESC_BYPASS
+	if (unlikely(rxq->en_bypass &&
+			bypass_desc_sz_idx == SW_DESC_CNTXT_64B_BYPASS_DMA)) {
+		PMD_DRV_LOG(DEBUG, "For MM mode, example design doesn't "
+				"support 64byte descriptor\n");
+		return 0;
+	}
+#endif
+	/* Make 1 less available, otherwise if we allow all descriptors
+	 * to be filled,when nb_pkts = nb_tx_desc - 1, pidx will be same
+	 * as old pidx and HW will treat this as no new descriptors were added.
+	 * Hence, DMA won't happen with new descriptors.
+	 */
+	if (nb_pkts > rxq->nb_rx_desc - 2)
+		nb_pkts = rxq->nb_rx_desc - 2;
+
+	for (count = 0; count < nb_pkts; count++) {
+		/* allocate new buffer */
+		if (rte_mempool_get(rxq->mb_pool, (void *)&mb) != 0) {
+			PMD_DRV_LOG(ERR, "%s(): %d: No MBUFS, queue id = %d,"
+			"mbuf_avail_count = %d,"
+			" mbuf_in_use_count = %d\n",
+			__func__, __LINE__, rxq->queue_id,
+			rte_mempool_avail_count(rxq->mb_pool),
+			rte_mempool_in_use_count(rxq->mb_pool));
+			return 0;
+		}
+
+		desc = (struct qdma_ul_mm_desc *)rxq->rx_ring;
+		desc += id;
+		qdma_ul_update_mm_c2h_desc(rxq, mb, desc);
+
+		len = (int)rxq->rx_buff_size;
+		rte_pktmbuf_pkt_len(mb) = len;
+
+		rte_mbuf_refcnt_set(mb, 1);
+		mb->packet_type = 0;
+		mb->ol_flags = 0;
+		mb->next = 0;
+		mb->nb_segs = 1;
+		mb->port = rxq->port_id;
+		mb->vlan_tci = 0;
+		mb->hash.rss = 0;
+
+		rx_pkts[count] = mb;
+
+		rxq->ep_addr = (rxq->ep_addr + len) % DMA_BRAM_SIZE;
+		id = (id + 1) % (rxq->nb_rx_desc - 1);
+	}
+
+	/* Make sure writes to the C2H descriptors are synchronized
+	 * before updating PIDX
+	 */
+	rte_wmb();
+
+	/* update pidx pointer for MM-mode */
+	if (count > 0) {
+		rxq->q_pidx_info.pidx = id;
+		qdma_dev->hw_access->qdma_queue_pidx_update(rxq->dev,
+			qdma_dev->is_vf,
+			rxq->queue_id, 1, &rxq->q_pidx_info);
+	}
+
+	return count;
+}
+/**
+ * DPDK callback for receiving packets in burst.
+ *
+ * @param rx_queue
+ *   Generic pointer to Rx queue structure.
+ * @param[out] rx_pkts
+ *   Array to store received packets.
+ * @param nb_pkts
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received (<= nb_pkts).
+ */
+uint16_t qdma_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+			uint16_t nb_pkts)
+{
+	struct qdma_rx_queue *rxq = rx_queue;
+	uint32_t count;
+
+	if (rxq->st_mode)
+		count = qdma_recv_pkts_st(rxq, rx_pkts, nb_pkts);
+	else
+		count = qdma_recv_pkts_mm(rxq, rx_pkts, nb_pkts);
+
+	return count;
+}
+
 /**
  * DPDK callback to request the driver to free mbufs
  * currently cached by the driver.
diff --git a/drivers/net/qdma/qdma_rxtx.h b/drivers/net/qdma/qdma_rxtx.h
index 397740abc0..b940788973 100644
--- a/drivers/net/qdma/qdma_rxtx.h
+++ b/drivers/net/qdma/qdma_rxtx.h
@@ -9,6 +9,7 @@ 
 
 /* forward declaration */
 struct qdma_tx_queue;
+struct qdma_rx_queue;
 
 /* Supporting functions for user logic pluggability */
 uint16_t qdma_get_rx_queue_id(void *queue_hndl);
@@ -26,5 +27,10 @@  uint16_t qdma_xmit_pkts_st(struct qdma_tx_queue *txq,
 uint16_t qdma_xmit_pkts_mm(struct qdma_tx_queue *txq,
 			   struct rte_mbuf **tx_pkts,
 			   uint16_t nb_pkts);
-
+uint16_t qdma_recv_pkts_st(struct qdma_rx_queue *rxq,
+			   struct rte_mbuf **rx_pkts,
+			   uint16_t nb_pkts);
+uint16_t qdma_recv_pkts_mm(struct qdma_rx_queue *rxq,
+			   struct rte_mbuf **rx_pkts,
+			   uint16_t nb_pkts);
 #endif /* QDMA_DPDK_RXTX_H_ */