diff mbox series

[v3,27/56] net/txgbe: fill receive functions

Message ID	20201014055517.1214386-28-jiawenwu@trustnetic.com (mailing list archive)
State	Changes Requested, archived
Delegated to:	Ferruh Yigit
Headers	From: Jiawen Wu <jiawenwu@trustnetic.com> To: dev@dpdk.org Cc: Jiawen Wu <jiawenwu@trustnetic.com> Date: Wed, 14 Oct 2020 13:54:48 +0800 Message-Id: <20201014055517.1214386-28-jiawenwu@trustnetic.com> In-Reply-To: <20201014055517.1214386-1-jiawenwu@trustnetic.com> References: <20201014055517.1214386-1-jiawenwu@trustnetic.com> Feedback-ID: bizesmtp:trustnetic.com:qybgforeign:qybgforeign5 Subject: [dpdk-dev] [PATCH v3 27/56] net/txgbe: fill receive functions Precedence: list Errors-To: dev-bounces@dpdk.org Sender: "dev" <dev-bounces@dpdk.org>
Series	net: txgbe PMD \| [v3,00/56] net: txgbe PMD [v3,01/56] net/txgbe: add build and doc infrastructure [v3,02/56] net/txgbe: add ethdev probe and remove [v3,03/56] net/txgbe: add device init and uninit [v3,04/56] net/txgbe: add error types and registers [v3,05/56] net/txgbe: add MAC type and bus lan id [v3,06/56] net/txgbe: add HW infrastructure and dummy function [v3,07/56] net/txgbe: add EEPROM functions [v3,08/56] net/txgbe: add HW init and reset operation [v3,09/56] net/txgbe: add PHY init [v3,10/56] net/txgbe: add module identify [v3,11/56] net/txgbe: add PHY reset [v3,12/56] net/txgbe: add info get operation [v3,13/56] net/txgbe: add interrupt operation [v3,14/56] net/txgbe: add device configure operation [v3,15/56] net/txgbe: add link status change [v3,16/56] net/txgbe: add multi-speed link setup [v3,17/56] net/txgbe: add autoc read and write [v3,18/56] net/txgbe: add MAC address operations [v3,19/56] net/txgbe: add unicast hash bitmap [v3,20/56] net/txgbe: add Rx and Tx init [v3,21/56] net/txgbe: add Rx and Tx queues setup and release [v3,22/56] net/txgbe: add Rx and Tx start and stop [v3,23/56] net/txgbe: add packet type [v3,24/56] net/txgbe: fill simple transmit function [v3,25/56] net/txgbe: fill transmit function with hardware offload [v3,26/56] net/txgbe: fill Tx prepare function [v3,27/56] net/txgbe: fill receive functions [v3,28/56] net/txgbe: add device start operation [v3,29/56] net/txgbe: add Rx and Tx data path start and stop [v3,30/56] net/txgbe: add device stop and close operations [v3,31/56] net/txgbe: support Rx interrupt [v3,32/56] net/txgbe: add Rx and Tx queue info get [v3,33/56] net/txgbe: add device stats get [v3,34/56] net/txgbe: add device xstats get [v3,35/56] net/txgbe: add queue stats mapping [v3,36/56] net/txgbe: add VLAN handle support [v3,37/56] net/txgbe: add SWFW semaphore and lock [v3,38/56] net/txgbe: add PF module init and uninit for SRIOV [v3,39/56] net/txgbe: add process mailbox operation [v3,40/56] net/txgbe: add PF module configure for SRIOV [v3,41/56] net/txgbe: add VMDq configure [v3,42/56] net/txgbe: add RSS support [v3,43/56] net/txgbe: add DCB support [v3,44/56] net/txgbe: add flow control support [v3,45/56] net/txgbe: add FC auto negotiation support [v3,46/56] net/txgbe: add priority flow control support [v3,47/56] net/txgbe: add device promiscuous and allmulticast mode [v3,48/56] net/txgbe: add MTU set operation [v3,49/56] net/txgbe: add FW version get operation [v3,50/56] net/txgbe: add EEPROM info get operation [v3,51/56] net/txgbe: add register dump support [v3,52/56] net/txgbe: support device LED on and off [v3,53/56] net/txgbe: add mirror rule operations [v3,54/56] net/txgbe: add PTP support [v3,55/56] net/txgbe: add DCB info get operation [v3,56/56] net/txgbe: add Rx and Tx descriptor status

Checks

Context	Check	Description
ci/checkpatch	warning	coding style issues

Commit Message

Jiawen Wu Oct. 14, 2020, 5:54 a.m. UTC

  Fill receive functions and define receive descriptor.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 doc/guides/nics/features/txgbe.ini  |   1 +
 doc/guides/nics/txgbe.rst           |   3 +-
 drivers/net/txgbe/base/txgbe_type.h |   2 +
 drivers/net/txgbe/txgbe_ethdev.c    |  14 +
 drivers/net/txgbe/txgbe_ethdev.h    |  12 +
 drivers/net/txgbe/txgbe_rxtx.c      | 950 +++++++++++++++++++++++++++-
 drivers/net/txgbe/txgbe_rxtx.h      | 102 +++
 7 files changed, 1077 insertions(+), 7 deletions(-)

Comments

Ferruh Yigit Oct. 15, 2020, 12:55 a.m. UTC | #1

On 10/14/2020 6:54 AM, Jiawen Wu wrote:
> Fill receive functions and define receive descriptor.
> 
> Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>

<...>

> @@ -42,6 +54,17 @@ static const u64 TXGBE_TX_OFFLOAD_MASK = (PKT_TX_IP_CKSUM |
>   #define TXGBE_TX_OFFLOAD_NOTSUP_MASK \
>   		(PKT_TX_OFFLOAD_MASK ^ TXGBE_TX_OFFLOAD_MASK)
>   
> +#define RTE_PMD_USE_PREFETCH
> +
> +#ifdef RTE_PMD_USE_PREFETCH
> +/*
> + * Prefetch a cache line into all cache levels.
> + */
> +#define rte_txgbe_prefetch(p)   rte_prefetch0(p)
> +#else
> +#define rte_txgbe_prefetch(p)   do {} while (0)
> +#endif

If 'RTE_PMD_USE_PREFETCH' will be always defined, can it be removed?

<...>

> @@ -145,6 +239,12 @@ struct txgbe_tx_desc {
>   #define RX_RING_SZ ((TXGBE_RING_DESC_MAX + RTE_PMD_TXGBE_RX_MAX_BURST) * \
>   		    sizeof(struct txgbe_rx_desc))
>   
> +#ifdef RTE_PMD_PACKET_PREFETCH
> +#define rte_packet_prefetch(p)  rte_prefetch1(p)
> +#else
> +#define rte_packet_prefetch(p)  do {} while (0)
> +#endif

Since there is no config file we had in the Makebuild times, can you please 
document all the compile time flags that the driver is using, in the driver 
documentation, as they are introduced.
And showing how can they be enabled via meson (meson 
-Dc_args="-DRTE_PMD_PACKET_PREFETCH" ... )

diff mbox series

Patch

diff --git a/doc/guides/nics/features/txgbe.ini b/doc/guides/nics/features/txgbe.ini
index f77afa002..c8cd58ce2 100644
--- a/doc/guides/nics/features/txgbe.ini
+++ b/doc/guides/nics/features/txgbe.ini
@@ -10,6 +10,7 @@  Link status event    = Y
 Queue start/stop     = Y
 Jumbo frame          = Y
 Scattered Rx         = Y
+LRO                  = Y
 TSO                  = Y
 Unicast MAC filter   = Y
 Multicast MAC filter = Y
diff --git a/doc/guides/nics/txgbe.rst b/doc/guides/nics/txgbe.rst
index 010444939..101765a6c 100644
--- a/doc/guides/nics/txgbe.rst
+++ b/doc/guides/nics/txgbe.rst
@@ -17,7 +17,8 @@  Features
 - TSO offload
 - Jumbo frames
 - Link state information
-- Scattered and gather for TX
+- Scattered and gather for TX and RX
+- LRO
 
 Prerequisites
 -------------
diff --git a/drivers/net/txgbe/base/txgbe_type.h b/drivers/net/txgbe/base/txgbe_type.h
index 5237200d4..622c5c6a3 100644
--- a/drivers/net/txgbe/base/txgbe_type.h
+++ b/drivers/net/txgbe/base/txgbe_type.h
@@ -38,6 +38,8 @@ 
 #define TXGBE_PHYSICAL_LAYER_10BASE_T		0x08000
 #define TXGBE_PHYSICAL_LAYER_2500BASE_KX	0x10000
 
+#define TXGBE_ATR_HASH_MASK			0x7fff
+
 enum txgbe_eeprom_type {
 	txgbe_eeprom_unknown = 0,
 	txgbe_eeprom_spi,
diff --git a/drivers/net/txgbe/txgbe_ethdev.c b/drivers/net/txgbe/txgbe_ethdev.c
index 09317e3c2..124009fdc 100644
--- a/drivers/net/txgbe/txgbe_ethdev.c
+++ b/drivers/net/txgbe/txgbe_ethdev.c
@@ -116,6 +116,7 @@  eth_txgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
 	PMD_INIT_FUNC_TRACE();
 
 	eth_dev->dev_ops = &txgbe_eth_dev_ops;
+	eth_dev->rx_pkt_burst = &txgbe_recv_pkts;
 	eth_dev->tx_pkt_burst = &txgbe_xmit_pkts;
 	eth_dev->tx_pkt_prepare = &txgbe_prep_pkts;
 
@@ -688,6 +689,18 @@  txgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	return 0;
 }
 
+const uint32_t *
+txgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+	if (dev->rx_pkt_burst == txgbe_recv_pkts ||
+	    dev->rx_pkt_burst == txgbe_recv_pkts_lro_single_alloc ||
+	    dev->rx_pkt_burst == txgbe_recv_pkts_lro_bulk_alloc ||
+	    dev->rx_pkt_burst == txgbe_recv_pkts_bulk_alloc)
+		return txgbe_get_supported_ptypes();
+
+	return NULL;
+}
+
 void
 txgbe_dev_setup_link_alarm_handler(void *param)
 {
@@ -1351,6 +1364,7 @@  static const struct eth_dev_ops txgbe_eth_dev_ops = {
 	.dev_infos_get              = txgbe_dev_info_get,
 	.dev_set_link_up            = txgbe_dev_set_link_up,
 	.dev_set_link_down          = txgbe_dev_set_link_down,
+	.dev_supported_ptypes_get   = txgbe_dev_supported_ptypes_get,
 	.rx_queue_start	            = txgbe_dev_rx_queue_start,
 	.rx_queue_stop              = txgbe_dev_rx_queue_stop,
 	.tx_queue_start	            = txgbe_dev_tx_queue_start,
diff --git a/drivers/net/txgbe/txgbe_ethdev.h b/drivers/net/txgbe/txgbe_ethdev.h
index 12d176166..c01f31295 100644
--- a/drivers/net/txgbe/txgbe_ethdev.h
+++ b/drivers/net/txgbe/txgbe_ethdev.h
@@ -113,6 +113,17 @@  int txgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 
 int txgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 
+uint16_t txgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts);
+
+uint16_t txgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
+				    uint16_t nb_pkts);
+
+uint16_t txgbe_recv_pkts_lro_single_alloc(void *rx_queue,
+		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t txgbe_recv_pkts_lro_bulk_alloc(void *rx_queue,
+		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+
 uint16_t txgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 
@@ -146,6 +157,7 @@  txgbe_dev_link_update_share(struct rte_eth_dev *dev,
 #define TXGBE_DEFAULT_TX_HTHRESH      0
 #define TXGBE_DEFAULT_TX_WTHRESH      0
 
+const uint32_t *txgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev);
 int txgbe_dev_set_mc_addr_list(struct rte_eth_dev *dev,
 				      struct rte_ether_addr *mc_addr_set,
 				      uint32_t nb_mc_addr);
diff --git a/drivers/net/txgbe/txgbe_rxtx.c b/drivers/net/txgbe/txgbe_rxtx.c
index f95102df8..aa795920f 100644
--- a/drivers/net/txgbe/txgbe_rxtx.c
+++ b/drivers/net/txgbe/txgbe_rxtx.c
@@ -8,6 +8,10 @@ 
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <inttypes.h>
 
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -16,9 +20,17 @@ 
 #include <rte_ethdev.h>
 #include <rte_ethdev_driver.h>
 #include <rte_memzone.h>
+#include <rte_atomic.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_prefetch.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_sctp.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
 #include <rte_ip.h>
 #include <rte_net.h>
 
@@ -42,6 +54,17 @@  static const u64 TXGBE_TX_OFFLOAD_MASK = (PKT_TX_IP_CKSUM |
 #define TXGBE_TX_OFFLOAD_NOTSUP_MASK \
 		(PKT_TX_OFFLOAD_MASK ^ TXGBE_TX_OFFLOAD_MASK)
 
+#define RTE_PMD_USE_PREFETCH
+
+#ifdef RTE_PMD_USE_PREFETCH
+/*
+ * Prefetch a cache line into all cache levels.
+ */
+#define rte_txgbe_prefetch(p)   rte_prefetch0(p)
+#else
+#define rte_txgbe_prefetch(p)   do {} while (0)
+#endif
+
 static int
 txgbe_is_vf(struct rte_eth_dev *dev)
 {
@@ -986,6 +1009,862 @@  txgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	return i;
 }
 
+/*********************************************************************
+ *
+ *  RX functions
+ *
+ **********************************************************************/
+/* @note: fix txgbe_dev_supported_ptypes_get() if any change here. */
+static inline uint32_t
+txgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptid_mask)
+{
+	uint16_t ptid = TXGBE_RXD_PTID(pkt_info);
+
+	ptid &= ptid_mask;
+
+	return txgbe_decode_ptype(ptid);
+}
+
+static inline uint64_t
+txgbe_rxd_pkt_info_to_pkt_flags(uint32_t pkt_info)
+{
+	static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
+		0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+		0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
+		PKT_RX_RSS_HASH, 0, 0, 0,
+		0, 0, 0,  PKT_RX_FDIR,
+	};
+
+	return ip_rss_types_map[TXGBE_RXD_RSSTYPE(pkt_info)];
+}
+
+static inline uint64_t
+rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
+{
+	uint64_t pkt_flags;
+
+	/*
+	 * Check if VLAN present only.
+	 * Do not check whether L3/L4 rx checksum done by NIC or not,
+	 * That can be found from rte_eth_rxmode.offloads flag
+	 */
+	pkt_flags = (rx_status & TXGBE_RXD_STAT_VLAN &&
+		     vlan_flags & PKT_RX_VLAN_STRIPPED)
+		    ? vlan_flags : 0;
+
+	return pkt_flags;
+}
+
+static inline uint64_t
+rx_desc_error_to_pkt_flags(uint32_t rx_status)
+{
+	uint64_t pkt_flags = 0;
+
+	/* checksum offload can't be disabled */
+	if (rx_status & TXGBE_RXD_STAT_IPCS) {
+		pkt_flags |= (rx_status & TXGBE_RXD_ERR_IPCS
+				? PKT_RX_IP_CKSUM_BAD : PKT_RX_IP_CKSUM_GOOD);
+	}
+
+	if (rx_status & TXGBE_RXD_STAT_L4CS) {
+		pkt_flags |= (rx_status & TXGBE_RXD_ERR_L4CS
+				? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD);
+	}
+
+	if (rx_status & TXGBE_RXD_STAT_EIPCS &&
+	    rx_status & TXGBE_RXD_ERR_EIPCS) {
+		pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
+	}
+
+	return pkt_flags;
+}
+
+/*
+ * LOOK_AHEAD defines how many desc statuses to check beyond the
+ * current descriptor.
+ * It must be a pound define for optimal performance.
+ * Do not change the value of LOOK_AHEAD, as the txgbe_rx_scan_hw_ring
+ * function only works with LOOK_AHEAD=8.
+ */
+#define LOOK_AHEAD 8
+#if (LOOK_AHEAD != 8)
+#error "PMD TXGBE: LOOK_AHEAD must be 8\n"
+#endif
+static inline int
+txgbe_rx_scan_hw_ring(struct txgbe_rx_queue *rxq)
+{
+	volatile struct txgbe_rx_desc *rxdp;
+	struct txgbe_rx_entry *rxep;
+	struct rte_mbuf *mb;
+	uint16_t pkt_len;
+	uint64_t pkt_flags;
+	int nb_dd;
+	uint32_t s[LOOK_AHEAD];
+	uint32_t pkt_info[LOOK_AHEAD];
+	int i, j, nb_rx = 0;
+	uint32_t status;
+
+	/* get references to current descriptor and S/W ring entry */
+	rxdp = &rxq->rx_ring[rxq->rx_tail];
+	rxep = &rxq->sw_ring[rxq->rx_tail];
+
+	status = rxdp->qw1.lo.status;
+	/* check to make sure there is at least 1 packet to receive */
+	if (!(status & rte_cpu_to_le_32(TXGBE_RXD_STAT_DD)))
+		return 0;
+
+	/*
+	 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
+	 * reference packets that are ready to be received.
+	 */
+	for (i = 0; i < RTE_PMD_TXGBE_RX_MAX_BURST;
+	     i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
+		/* Read desc statuses backwards to avoid race condition */
+		for (j = 0; j < LOOK_AHEAD; j++)
+			s[j] = rte_le_to_cpu_32(rxdp[j].qw1.lo.status);
+
+		rte_smp_rmb();
+
+		/* Compute how many status bits were set */
+		for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
+				(s[nb_dd] & TXGBE_RXD_STAT_DD); nb_dd++)
+			;
+
+		for (j = 0; j < nb_dd; j++)
+			pkt_info[j] = rte_le_to_cpu_32(rxdp[j].qw0.dw0);
+
+		nb_rx += nb_dd;
+
+		/* Translate descriptor info to mbuf format */
+		for (j = 0; j < nb_dd; ++j) {
+			mb = rxep[j].mbuf;
+			pkt_len = rte_le_to_cpu_16(rxdp[j].qw1.hi.len) -
+				  rxq->crc_len;
+			mb->data_len = pkt_len;
+			mb->pkt_len = pkt_len;
+			mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].qw1.hi.tag);
+
+			/* convert descriptor fields to rte mbuf flags */
+			pkt_flags = rx_desc_status_to_pkt_flags(s[j],
+					rxq->vlan_flags);
+			pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
+			pkt_flags |=
+				txgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
+			mb->ol_flags = pkt_flags;
+			mb->packet_type =
+				txgbe_rxd_pkt_info_to_pkt_type(pkt_info[j],
+				rxq->pkt_type_mask);
+
+			if (likely(pkt_flags & PKT_RX_RSS_HASH))
+				mb->hash.rss =
+					rte_le_to_cpu_32(rxdp[j].qw0.dw1);
+			else if (pkt_flags & PKT_RX_FDIR) {
+				mb->hash.fdir.hash =
+					rte_le_to_cpu_16(rxdp[j].qw0.hi.csum) &
+					TXGBE_ATR_HASH_MASK;
+				mb->hash.fdir.id =
+					rte_le_to_cpu_16(rxdp[j].qw0.hi.ipid);
+			}
+		}
+
+		/* Move mbuf pointers from the S/W ring to the stage */
+		for (j = 0; j < LOOK_AHEAD; ++j)
+			rxq->rx_stage[i + j] = rxep[j].mbuf;
+
+		/* stop if all requested packets could not be received */
+		if (nb_dd != LOOK_AHEAD)
+			break;
+	}
+
+	/* clear software ring entries so we can cleanup correctly */
+	for (i = 0; i < nb_rx; ++i)
+		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
+
+	return nb_rx;
+}
+
+static inline int
+txgbe_rx_alloc_bufs(struct txgbe_rx_queue *rxq, bool reset_mbuf)
+{
+	volatile struct txgbe_rx_desc *rxdp;
+	struct txgbe_rx_entry *rxep;
+	struct rte_mbuf *mb;
+	uint16_t alloc_idx;
+	__le64 dma_addr;
+	int diag, i;
+
+	/* allocate buffers in bulk directly into the S/W ring */
+	alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
+	rxep = &rxq->sw_ring[alloc_idx];
+	diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
+				    rxq->rx_free_thresh);
+	if (unlikely(diag != 0))
+		return -ENOMEM;
+
+	rxdp = &rxq->rx_ring[alloc_idx];
+	for (i = 0; i < rxq->rx_free_thresh; ++i) {
+		/* populate the static rte mbuf fields */
+		mb = rxep[i].mbuf;
+		if (reset_mbuf)
+			mb->port = rxq->port_id;
+
+		rte_mbuf_refcnt_set(mb, 1);
+		mb->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* populate the descriptors */
+		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
+		TXGBE_RXD_HDRADDR(&rxdp[i], 0);
+		TXGBE_RXD_PKTADDR(&rxdp[i], dma_addr);
+	}
+
+	/* update state of internal queue structure */
+	rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
+	if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
+		rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
+
+	/* no errors */
+	return 0;
+}
+
+static inline uint16_t
+txgbe_rx_fill_from_stage(struct txgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+			 uint16_t nb_pkts)
+{
+	struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
+	int i;
+
+	/* how many packets are ready to return? */
+	nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
+
+	/* copy mbuf pointers to the application's packet list */
+	for (i = 0; i < nb_pkts; ++i)
+		rx_pkts[i] = stage[i];
+
+	/* update internal queue state */
+	rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
+	rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
+
+	return nb_pkts;
+}
+
+static inline uint16_t
+txgbe_rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+	     uint16_t nb_pkts)
+{
+	struct txgbe_rx_queue *rxq = (struct txgbe_rx_queue *)rx_queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
+	uint16_t nb_rx = 0;
+
+	/* Any previously recv'd pkts will be returned from the Rx stage */
+	if (rxq->rx_nb_avail)
+		return txgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
+
+	/* Scan the H/W ring for packets to receive */
+	nb_rx = (uint16_t)txgbe_rx_scan_hw_ring(rxq);
+
+	/* update internal queue state */
+	rxq->rx_next_avail = 0;
+	rxq->rx_nb_avail = nb_rx;
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
+
+	/* if required, allocate new buffers to replenish descriptors */
+	if (rxq->rx_tail > rxq->rx_free_trigger) {
+		uint16_t cur_free_trigger = rxq->rx_free_trigger;
+
+		if (txgbe_rx_alloc_bufs(rxq, true) != 0) {
+			int i, j;
+
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", (uint16_t)rxq->port_id,
+				   (uint16_t)rxq->queue_id);
+
+			dev->data->rx_mbuf_alloc_failed +=
+				rxq->rx_free_thresh;
+
+			/*
+			 * Need to rewind any previous receives if we cannot
+			 * allocate new buffers to replenish the old ones.
+			 */
+			rxq->rx_nb_avail = 0;
+			rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
+			for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
+				rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
+
+			return 0;
+		}
+
+		/* update tail pointer */
+		rte_wmb();
+		txgbe_set32_relaxed(rxq->rdt_reg_addr, cur_free_trigger);
+	}
+
+	if (rxq->rx_tail >= rxq->nb_rx_desc)
+		rxq->rx_tail = 0;
+
+	/* received any packets this loop? */
+	if (rxq->rx_nb_avail)
+		return txgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
+
+	return 0;
+}
+
+/* split requests into chunks of size RTE_PMD_TXGBE_RX_MAX_BURST */
+uint16_t
+txgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
+			   uint16_t nb_pkts)
+{
+	uint16_t nb_rx;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	if (likely(nb_pkts <= RTE_PMD_TXGBE_RX_MAX_BURST))
+		return txgbe_rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
+
+	/* request is relatively large, chunk it up */
+	nb_rx = 0;
+	while (nb_pkts) {
+		uint16_t ret, n;
+
+		n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_TXGBE_RX_MAX_BURST);
+		ret = txgbe_rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
+		nb_rx = (uint16_t)(nb_rx + ret);
+		nb_pkts = (uint16_t)(nb_pkts - ret);
+		if (ret < n)
+			break;
+	}
+
+	return nb_rx;
+}
+
+uint16_t
+txgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts)
+{
+	struct txgbe_rx_queue *rxq;
+	volatile struct txgbe_rx_desc *rx_ring;
+	volatile struct txgbe_rx_desc *rxdp;
+	struct txgbe_rx_entry *sw_ring;
+	struct txgbe_rx_entry *rxe;
+	struct rte_mbuf *rxm;
+	struct rte_mbuf *nmb;
+	struct txgbe_rx_desc rxd;
+	uint64_t dma_addr;
+	uint32_t staterr;
+	uint32_t pkt_info;
+	uint16_t pkt_len;
+	uint16_t rx_id;
+	uint16_t nb_rx;
+	uint16_t nb_hold;
+	uint64_t pkt_flags;
+
+	nb_rx = 0;
+	nb_hold = 0;
+	rxq = rx_queue;
+	rx_id = rxq->rx_tail;
+	rx_ring = rxq->rx_ring;
+	sw_ring = rxq->sw_ring;
+	struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
+	while (nb_rx < nb_pkts) {
+		/*
+		 * The order of operations here is important as the DD status
+		 * bit must not be read after any other descriptor fields.
+		 * rx_ring and rxdp are pointing to volatile data so the order
+		 * of accesses cannot be reordered by the compiler. If they were
+		 * not volatile, they could be reordered which could lead to
+		 * using invalid descriptor fields when read from rxd.
+		 */
+		rxdp = &rx_ring[rx_id];
+		staterr = rxdp->qw1.lo.status;
+		if (!(staterr & rte_cpu_to_le_32(TXGBE_RXD_STAT_DD)))
+			break;
+		rxd = *rxdp;
+
+		/*
+		 * End of packet.
+		 *
+		 * If the TXGBE_RXD_STAT_EOP flag is not set, the RX packet
+		 * is likely to be invalid and to be dropped by the various
+		 * validation checks performed by the network stack.
+		 *
+		 * Allocate a new mbuf to replenish the RX ring descriptor.
+		 * If the allocation fails:
+		 *    - arrange for that RX descriptor to be the first one
+		 *      being parsed the next time the receive function is
+		 *      invoked [on the same queue].
+		 *
+		 *    - Stop parsing the RX ring and return immediately.
+		 *
+		 * This policy do not drop the packet received in the RX
+		 * descriptor for which the allocation of a new mbuf failed.
+		 * Thus, it allows that packet to be later retrieved if
+		 * mbuf have been freed in the mean time.
+		 * As a side effect, holding RX descriptors instead of
+		 * systematically giving them back to the NIC may lead to
+		 * RX ring exhaustion situations.
+		 * However, the NIC can gracefully prevent such situations
+		 * to happen by sending specific "back-pressure" flow control
+		 * frames to its peer(s).
+		 */
+		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
+			   "ext_err_stat=0x%08x pkt_len=%u",
+			   (uint16_t)rxq->port_id, (uint16_t)rxq->queue_id,
+			   (uint16_t)rx_id, (uint32_t)staterr,
+			   (uint16_t)rte_le_to_cpu_16(rxd.qw1.hi.len));
+
+		nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+		if (nmb == NULL) {
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", (uint16_t)rxq->port_id,
+				   (uint16_t)rxq->queue_id);
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+
+		nb_hold++;
+		rxe = &sw_ring[rx_id];
+		rx_id++;
+		if (rx_id == rxq->nb_rx_desc)
+			rx_id = 0;
+
+		/* Prefetch next mbuf while processing current one. */
+		rte_txgbe_prefetch(sw_ring[rx_id].mbuf);
+
+		/*
+		 * When next RX descriptor is on a cache-line boundary,
+		 * prefetch the next 4 RX descriptors and the next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_txgbe_prefetch(&rx_ring[rx_id]);
+			rte_txgbe_prefetch(&sw_ring[rx_id]);
+		}
+
+		rxm = rxe->mbuf;
+		rxe->mbuf = nmb;
+		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+		TXGBE_RXD_HDRADDR(rxdp, 0);
+		TXGBE_RXD_PKTADDR(rxdp, dma_addr);
+
+		/*
+		 * Initialize the returned mbuf.
+		 * 1) setup generic mbuf fields:
+		 *    - number of segments,
+		 *    - next segment,
+		 *    - packet length,
+		 *    - RX port identifier.
+		 * 2) integrate hardware offload data, if any:
+		 *    - RSS flag & hash,
+		 *    - IP checksum flag,
+		 *    - VLAN TCI, if any,
+		 *    - error flags.
+		 */
+		pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.qw1.hi.len) -
+				      rxq->crc_len);
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = pkt_len;
+		rxm->data_len = pkt_len;
+		rxm->port = rxq->port_id;
+
+		pkt_info = rte_le_to_cpu_32(rxd.qw0.dw0);
+		/* Only valid if PKT_RX_VLAN set in pkt_flags */
+		rxm->vlan_tci = rte_le_to_cpu_16(rxd.qw1.hi.tag);
+
+		pkt_flags = rx_desc_status_to_pkt_flags(staterr,
+					rxq->vlan_flags);
+		pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
+		pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
+		rxm->ol_flags = pkt_flags;
+		rxm->packet_type = txgbe_rxd_pkt_info_to_pkt_type(pkt_info,
+						       rxq->pkt_type_mask);
+
+		if (likely(pkt_flags & PKT_RX_RSS_HASH)) {
+			rxm->hash.rss = rte_le_to_cpu_32(rxd.qw0.dw1);
+		} else if (pkt_flags & PKT_RX_FDIR) {
+			rxm->hash.fdir.hash =
+				rte_le_to_cpu_16(rxd.qw0.hi.csum) &
+				TXGBE_ATR_HASH_MASK;
+			rxm->hash.fdir.id = rte_le_to_cpu_16(rxd.qw0.hi.ipid);
+		}
+		/*
+		 * Store the mbuf address into the next entry of the array
+		 * of returned packets.
+		 */
+		rx_pkts[nb_rx++] = rxm;
+	}
+	rxq->rx_tail = rx_id;
+
+	/*
+	 * If the number of free RX descriptors is greater than the RX free
+	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+	 * register.
+	 * Update the RDT with the value of the last processed RX descriptor
+	 * minus 1, to guarantee that the RDT register is never equal to the
+	 * RDH register, which creates a "full" ring situation from the
+	 * hardware point of view...
+	 */
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+	if (nb_hold > rxq->rx_free_thresh) {
+		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
+			   "nb_hold=%u nb_rx=%u",
+			   (uint16_t)rxq->port_id, (uint16_t)rxq->queue_id,
+			   (uint16_t)rx_id, (uint16_t)nb_hold,
+			   (uint16_t)nb_rx);
+		rx_id = (uint16_t)((rx_id == 0) ?
+				(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		txgbe_set32(rxq->rdt_reg_addr, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+	return nb_rx;
+}
+
+/**
+ * txgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
+ *
+ * Fill the following info in the HEAD buffer of the Rx cluster:
+ *    - RX port identifier
+ *    - hardware offload data, if any:
+ *      - RSS flag & hash
+ *      - IP checksum flag
+ *      - VLAN TCI, if any
+ *      - error flags
+ * @head HEAD of the packet cluster
+ * @desc HW descriptor to get data from
+ * @rxq Pointer to the Rx queue
+ */
+static inline void
+txgbe_fill_cluster_head_buf(struct rte_mbuf *head, struct txgbe_rx_desc *desc,
+		struct txgbe_rx_queue *rxq, uint32_t staterr)
+{
+	uint32_t pkt_info;
+	uint64_t pkt_flags;
+
+	head->port = rxq->port_id;
+
+	/* The vlan_tci field is only valid when PKT_RX_VLAN is
+	 * set in the pkt_flags field.
+	 */
+	head->vlan_tci = rte_le_to_cpu_16(desc->qw1.hi.tag);
+	pkt_info = rte_le_to_cpu_32(desc->qw0.dw0);
+	pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
+	pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
+	pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
+	head->ol_flags = pkt_flags;
+	head->packet_type = txgbe_rxd_pkt_info_to_pkt_type(pkt_info,
+						rxq->pkt_type_mask);
+
+	if (likely(pkt_flags & PKT_RX_RSS_HASH)) {
+		head->hash.rss = rte_le_to_cpu_32(desc->qw0.dw1);
+	} else if (pkt_flags & PKT_RX_FDIR) {
+		head->hash.fdir.hash = rte_le_to_cpu_16(desc->qw0.hi.csum)
+				& TXGBE_ATR_HASH_MASK;
+		head->hash.fdir.id = rte_le_to_cpu_16(desc->qw0.hi.ipid);
+	}
+}
+
+/**
+ * txgbe_recv_pkts_lro - receive handler for and LRO case.
+ *
+ * @rx_queue Rx queue handle
+ * @rx_pkts table of received packets
+ * @nb_pkts size of rx_pkts table
+ * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
+ *
+ * Handles the Rx HW ring completions when RSC feature is configured. Uses an
+ * additional ring of txgbe_rsc_entry's that will hold the relevant RSC info.
+ *
+ * We use the same logic as in Linux and in FreeBSD txgbe drivers:
+ * 1) When non-EOP RSC completion arrives:
+ *    a) Update the HEAD of the current RSC aggregation cluster with the new
+ *       segment's data length.
+ *    b) Set the "next" pointer of the current segment to point to the segment
+ *       at the NEXTP index.
+ *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
+ *       in the sw_rsc_ring.
+ * 2) When EOP arrives we just update the cluster's total length and offload
+ *    flags and deliver the cluster up to the upper layers. In our case - put it
+ *    in the rx_pkts table.
+ *
+ * Returns the number of received packets/clusters (according to the "bulk
+ * receive" interface).
+ */
+static inline uint16_t
+txgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
+		    bool bulk_alloc)
+{
+	struct txgbe_rx_queue *rxq = rx_queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
+	volatile struct txgbe_rx_desc *rx_ring = rxq->rx_ring;
+	struct txgbe_rx_entry *sw_ring = rxq->sw_ring;
+	struct txgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
+	uint16_t rx_id = rxq->rx_tail;
+	uint16_t nb_rx = 0;
+	uint16_t nb_hold = rxq->nb_rx_hold;
+	uint16_t prev_id = rxq->rx_tail;
+
+	while (nb_rx < nb_pkts) {
+		bool eop;
+		struct txgbe_rx_entry *rxe;
+		struct txgbe_scattered_rx_entry *sc_entry;
+		struct txgbe_scattered_rx_entry *next_sc_entry = NULL;
+		struct txgbe_rx_entry *next_rxe = NULL;
+		struct rte_mbuf *first_seg;
+		struct rte_mbuf *rxm;
+		struct rte_mbuf *nmb = NULL;
+		struct txgbe_rx_desc rxd;
+		uint16_t data_len;
+		uint16_t next_id;
+		volatile struct txgbe_rx_desc *rxdp;
+		uint32_t staterr;
+
+next_desc:
+		/*
+		 * The code in this whole file uses the volatile pointer to
+		 * ensure the read ordering of the status and the rest of the
+		 * descriptor fields (on the compiler level only!!!). This is so
+		 * UGLY - why not to just use the compiler barrier instead? DPDK
+		 * even has the rte_compiler_barrier() for that.
+		 *
+		 * But most importantly this is just wrong because this doesn't
+		 * ensure memory ordering in a general case at all. For
+		 * instance, DPDK is supposed to work on Power CPUs where
+		 * compiler barrier may just not be enough!
+		 *
+		 * I tried to write only this function properly to have a
+		 * starting point (as a part of an LRO/RSC series) but the
+		 * compiler cursed at me when I tried to cast away the
+		 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
+		 * keeping it the way it is for now.
+		 *
+		 * The code in this file is broken in so many other places and
+		 * will just not work on a big endian CPU anyway therefore the
+		 * lines below will have to be revisited together with the rest
+		 * of the txgbe PMD.
+		 *
+		 * TODO:
+		 *    - Get rid of "volatile" and let the compiler do its job.
+		 *    - Use the proper memory barrier (rte_rmb()) to ensure the
+		 *      memory ordering below.
+		 */
+		rxdp = &rx_ring[rx_id];
+		staterr = rte_le_to_cpu_32(rxdp->qw1.lo.status);
+
+		if (!(staterr & TXGBE_RXD_STAT_DD))
+			break;
+
+		rxd = *rxdp;
+
+		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
+				  "staterr=0x%x data_len=%u",
+			   rxq->port_id, rxq->queue_id, rx_id, staterr,
+			   rte_le_to_cpu_16(rxd.qw1.hi.len));
+
+		if (!bulk_alloc) {
+			nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
+			if (nmb == NULL) {
+				PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
+						  "port_id=%u queue_id=%u",
+					   rxq->port_id, rxq->queue_id);
+
+				dev->data->rx_mbuf_alloc_failed++;
+				break;
+			}
+		} else if (nb_hold > rxq->rx_free_thresh) {
+			uint16_t next_rdt = rxq->rx_free_trigger;
+
+			if (!txgbe_rx_alloc_bufs(rxq, false)) {
+				rte_wmb();
+				txgbe_set32_relaxed(rxq->rdt_reg_addr,
+							    next_rdt);
+				nb_hold -= rxq->rx_free_thresh;
+			} else {
+				PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
+						  "port_id=%u queue_id=%u",
+					   rxq->port_id, rxq->queue_id);
+
+				dev->data->rx_mbuf_alloc_failed++;
+				break;
+			}
+		}
+
+		nb_hold++;
+		rxe = &sw_ring[rx_id];
+		eop = staterr & TXGBE_RXD_STAT_EOP;
+
+		next_id = rx_id + 1;
+		if (next_id == rxq->nb_rx_desc)
+			next_id = 0;
+
+		/* Prefetch next mbuf while processing current one. */
+		rte_txgbe_prefetch(sw_ring[next_id].mbuf);
+
+		/*
+		 * When next RX descriptor is on a cache-line boundary,
+		 * prefetch the next 4 RX descriptors and the next 4 pointers
+		 * to mbufs.
+		 */
+		if ((next_id & 0x3) == 0) {
+			rte_txgbe_prefetch(&rx_ring[next_id]);
+			rte_txgbe_prefetch(&sw_ring[next_id]);
+		}
+
+		rxm = rxe->mbuf;
+
+		if (!bulk_alloc) {
+			__le64 dma =
+			  rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+			/*
+			 * Update RX descriptor with the physical address of the
+			 * new data buffer of the new allocated mbuf.
+			 */
+			rxe->mbuf = nmb;
+
+			rxm->data_off = RTE_PKTMBUF_HEADROOM;
+			TXGBE_RXD_HDRADDR(rxdp, 0);
+			TXGBE_RXD_PKTADDR(rxdp, dma);
+		} else {
+			rxe->mbuf = NULL;
+		}
+
+		/*
+		 * Set data length & data buffer address of mbuf.
+		 */
+		data_len = rte_le_to_cpu_16(rxd.qw1.hi.len);
+		rxm->data_len = data_len;
+
+		if (!eop) {
+			uint16_t nextp_id;
+			/*
+			 * Get next descriptor index:
+			 *  - For RSC it's in the NEXTP field.
+			 *  - For a scattered packet - it's just a following
+			 *    descriptor.
+			 */
+			if (TXGBE_RXD_RSCCNT(rxd.qw0.dw0))
+				nextp_id = TXGBE_RXD_NEXTP(staterr);
+			else
+				nextp_id = next_id;
+
+			next_sc_entry = &sw_sc_ring[nextp_id];
+			next_rxe = &sw_ring[nextp_id];
+			rte_txgbe_prefetch(next_rxe);
+		}
+
+		sc_entry = &sw_sc_ring[rx_id];
+		first_seg = sc_entry->fbuf;
+		sc_entry->fbuf = NULL;
+
+		/*
+		 * If this is the first buffer of the received packet,
+		 * set the pointer to the first mbuf of the packet and
+		 * initialize its context.
+		 * Otherwise, update the total length and the number of segments
+		 * of the current scattered packet, and update the pointer to
+		 * the last mbuf of the current packet.
+		 */
+		if (first_seg == NULL) {
+			first_seg = rxm;
+			first_seg->pkt_len = data_len;
+			first_seg->nb_segs = 1;
+		} else {
+			first_seg->pkt_len += data_len;
+			first_seg->nb_segs++;
+		}
+
+		prev_id = rx_id;
+		rx_id = next_id;
+
+		/*
+		 * If this is not the last buffer of the received packet, update
+		 * the pointer to the first mbuf at the NEXTP entry in the
+		 * sw_sc_ring and continue to parse the RX ring.
+		 */
+		if (!eop && next_rxe) {
+			rxm->next = next_rxe->mbuf;
+			next_sc_entry->fbuf = first_seg;
+			goto next_desc;
+		}
+
+		/* Initialize the first mbuf of the returned packet */
+		txgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
+
+		/*
+		 * Deal with the case, when HW CRC srip is disabled.
+		 * That can't happen when LRO is enabled, but still could
+		 * happen for scattered RX mode.
+		 */
+		first_seg->pkt_len -= rxq->crc_len;
+		if (unlikely(rxm->data_len <= rxq->crc_len)) {
+			struct rte_mbuf *lp;
+
+			for (lp = first_seg; lp->next != rxm; lp = lp->next)
+				;
+
+			first_seg->nb_segs--;
+			lp->data_len -= rxq->crc_len - rxm->data_len;
+			lp->next = NULL;
+			rte_pktmbuf_free_seg(rxm);
+		} else {
+			rxm->data_len -= rxq->crc_len;
+		}
+
+		/* Prefetch data of first segment, if configured to do so. */
+		rte_packet_prefetch((char *)first_seg->buf_addr +
+			first_seg->data_off);
+
+		/*
+		 * Store the mbuf address into the next entry of the array
+		 * of returned packets.
+		 */
+		rx_pkts[nb_rx++] = first_seg;
+	}
+
+	/*
+	 * Record index of the next RX descriptor to probe.
+	 */
+	rxq->rx_tail = rx_id;
+
+	/*
+	 * If the number of free RX descriptors is greater than the RX free
+	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+	 * register.
+	 * Update the RDT with the value of the last processed RX descriptor
+	 * minus 1, to guarantee that the RDT register is never equal to the
+	 * RDH register, which creates a "full" ring situation from the
+	 * hardware point of view...
+	 */
+	if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
+		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
+			   "nb_hold=%u nb_rx=%u",
+			   rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
+
+		rte_wmb();
+		txgbe_set32_relaxed(rxq->rdt_reg_addr, prev_id);
+		nb_hold = 0;
+	}
+
+	rxq->nb_rx_hold = nb_hold;
+	return nb_rx;
+}
+
+uint16_t
+txgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts)
+{
+	return txgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
+}
+
+uint16_t
+txgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
+			       uint16_t nb_pkts)
+{
+	return txgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
+}
+
 uint64_t
 txgbe_get_rx_queue_offloads(struct rte_eth_dev *dev __rte_unused)
 {
@@ -1609,12 +2488,6 @@  txgbe_dev_free_queues(struct rte_eth_dev *dev)
 	dev->data->nb_tx_queues = 0;
 }
 
-void __rte_cold
-txgbe_set_rx_function(struct rte_eth_dev *dev)
-{
-	RTE_SET_USED(dev);
-}
-
 static int __rte_cold
 txgbe_alloc_rx_queue_mbufs(struct txgbe_rx_queue *rxq)
 {
@@ -1789,6 +2662,71 @@  txgbe_set_rsc(struct rte_eth_dev *dev)
 	return 0;
 }
 
+void __rte_cold
+txgbe_set_rx_function(struct rte_eth_dev *dev)
+{
+	struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
+
+	/*
+	 * Initialize the appropriate LRO callback.
+	 *
+	 * If all queues satisfy the bulk allocation preconditions
+	 * (adapter->rx_bulk_alloc_allowed is TRUE) then we may use
+	 * bulk allocation. Otherwise use a single allocation version.
+	 */
+	if (dev->data->lro) {
+		if (adapter->rx_bulk_alloc_allowed) {
+			PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
+					   "allocation version");
+			dev->rx_pkt_burst = txgbe_recv_pkts_lro_bulk_alloc;
+		} else {
+			PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
+					   "allocation version");
+			dev->rx_pkt_burst = txgbe_recv_pkts_lro_single_alloc;
+		}
+	} else if (dev->data->scattered_rx) {
+		/*
+		 * Set the non-LRO scattered callback: there are bulk and
+		 * single allocation versions.
+		 */
+		if (adapter->rx_bulk_alloc_allowed) {
+			PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
+					   "allocation callback (port=%d).",
+				     dev->data->port_id);
+			dev->rx_pkt_burst = txgbe_recv_pkts_lro_bulk_alloc;
+		} else {
+			PMD_INIT_LOG(DEBUG, "Using Regular (non-vector, "
+					    "single allocation) "
+					    "Scattered Rx callback "
+					    "(port=%d).",
+				     dev->data->port_id);
+
+			dev->rx_pkt_burst = txgbe_recv_pkts_lro_single_alloc;
+		}
+	/*
+	 * Below we set "simple" callbacks according to port/queues parameters.
+	 * If parameters allow we are going to choose between the following
+	 * callbacks:
+	 *    - Bulk Allocation
+	 *    - Single buffer allocation (the simplest one)
+	 */
+	} else if (adapter->rx_bulk_alloc_allowed) {
+		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
+				    "satisfied. Rx Burst Bulk Alloc function "
+				    "will be used on port=%d.",
+			     dev->data->port_id);
+
+		dev->rx_pkt_burst = txgbe_recv_pkts_bulk_alloc;
+	} else {
+		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
+				    "satisfied, or Scattered Rx is requested "
+				    "(port=%d).",
+			     dev->data->port_id);
+
+		dev->rx_pkt_burst = txgbe_recv_pkts;
+	}
+}
+
 /*
  * Initializes Receive Unit.
  */
diff --git a/drivers/net/txgbe/txgbe_rxtx.h b/drivers/net/txgbe/txgbe_rxtx.h
index b61382f1d..8c36698c3 100644
--- a/drivers/net/txgbe/txgbe_rxtx.h
+++ b/drivers/net/txgbe/txgbe_rxtx.h
@@ -50,6 +50,100 @@  struct txgbe_rx_desc {
 #define TXGBE_RXD_HDRADDR(rxd, v)  \
 	(((volatile __le64 *)(rxd))[1] = cpu_to_le64(v))
 
+/* @txgbe_rx_desc.dw0 */
+#define TXGBE_RXD_RSSTYPE(dw)      RS(dw, 0, 0xF)
+#define   TXGBE_RSSTYPE_NONE       0
+#define   TXGBE_RSSTYPE_IPV4TCP    1
+#define   TXGBE_RSSTYPE_IPV4       2
+#define   TXGBE_RSSTYPE_IPV6TCP    3
+#define   TXGBE_RSSTYPE_IPV4SCTP   4
+#define   TXGBE_RSSTYPE_IPV6       5
+#define   TXGBE_RSSTYPE_IPV6SCTP   6
+#define   TXGBE_RSSTYPE_IPV4UDP    7
+#define   TXGBE_RSSTYPE_IPV6UDP    8
+#define   TXGBE_RSSTYPE_FDIR       15
+#define TXGBE_RXD_SECTYPE(dw)      RS(dw, 4, 0x3)
+#define TXGBE_RXD_SECTYPE_NONE     LS(0, 4, 0x3)
+#define TXGBE_RXD_SECTYPE_LINKSEC  LS(1, 4, 0x3)
+#define TXGBE_RXD_SECTYPE_IPSECESP LS(2, 4, 0x3)
+#define TXGBE_RXD_SECTYPE_IPSECAH  LS(3, 4, 0x3)
+#define TXGBE_RXD_TPIDSEL(dw)      RS(dw, 6, 0x7)
+#define TXGBE_RXD_PTID(dw)         RS(dw, 9, 0xFF)
+#define TXGBE_RXD_RSCCNT(dw)       RS(dw, 17, 0xF)
+#define TXGBE_RXD_HDRLEN(dw)       RS(dw, 21, 0x3FF)
+#define TXGBE_RXD_SPH              MS(31, 0x1)
+
+/* @txgbe_rx_desc.dw1 */
+/** bit 0-31, as rss hash when  **/
+#define TXGBE_RXD_RSSHASH(rxd)     ((rxd)->qw0.dw1)
+
+/** bit 0-31, as ip csum when  **/
+#define TXGBE_RXD_IPID(rxd)        ((rxd)->qw0.hi.ipid)
+#define TXGBE_RXD_CSUM(rxd)        ((rxd)->qw0.hi.csum)
+
+/** bit 0-31, as fdir id when  **/
+#define TXGBE_RXD_FDIRID(rxd)      ((rxd)->qw0.hi.dw1)
+
+/* @txgbe_rx_desc.dw2 */
+#define TXGBE_RXD_STATUS(rxd)      ((rxd)->qw1.lo.status)
+/** bit 0-1 **/
+#define TXGBE_RXD_STAT_DD          MS(0, 0x1) /* Descriptor Done */
+#define TXGBE_RXD_STAT_EOP         MS(1, 0x1) /* End of Packet */
+/** bit 2-31, when EOP=0 **/
+#define TXGBE_RXD_NEXTP_RESV(v)    LS(v, 2, 0x3)
+#define TXGBE_RXD_NEXTP(dw)        RS(dw, 4, 0xFFFF) /* Next Descriptor */
+/** bit 2-31, when EOP=1 **/
+#define TXGBE_RXD_PKT_CLS_MASK     MS(2, 0x7) /* Packet Class */
+#define TXGBE_RXD_PKT_CLS_TC_RSS   LS(0, 2, 0x7) /* RSS Hash */
+#define TXGBE_RXD_PKT_CLS_FLM      LS(1, 2, 0x7) /* FDir Match */
+#define TXGBE_RXD_PKT_CLS_SYN      LS(2, 2, 0x7) /* TCP Sync */
+#define TXGBE_RXD_PKT_CLS_5TUPLE   LS(3, 2, 0x7) /* 5 Tuple */
+#define TXGBE_RXD_PKT_CLS_ETF      LS(4, 2, 0x7) /* Ethertype Filter */
+#define TXGBE_RXD_STAT_VLAN        MS(5, 0x1) /* IEEE VLAN Packet */
+#define TXGBE_RXD_STAT_UDPCS       MS(6, 0x1) /* UDP xsum calculated */
+#define TXGBE_RXD_STAT_L4CS        MS(7, 0x1) /* L4 xsum calculated */
+#define TXGBE_RXD_STAT_IPCS        MS(8, 0x1) /* IP xsum calculated */
+#define TXGBE_RXD_STAT_PIF         MS(9, 0x1) /* Non-unicast address */
+#define TXGBE_RXD_STAT_EIPCS       MS(10, 0x1) /* Encap IP xsum calculated */
+#define TXGBE_RXD_STAT_VEXT        MS(11, 0x1) /* Multi-VLAN */
+#define TXGBE_RXD_STAT_IPV6EX      MS(12, 0x1) /* IPv6 with option header */
+#define TXGBE_RXD_STAT_LLINT       MS(13, 0x1) /* Pkt caused LLI */
+#define TXGBE_RXD_STAT_1588        MS(14, 0x1) /* IEEE1588 Time Stamp */
+#define TXGBE_RXD_STAT_SECP        MS(15, 0x1) /* Security Processing */
+#define TXGBE_RXD_STAT_LB          MS(16, 0x1) /* Loopback Status */
+/*** bit 17-30, when PTYPE=IP ***/
+#define TXGBE_RXD_STAT_BMC         MS(17, 0x1) /* PTYPE=IP, BMC status */
+#define TXGBE_RXD_ERR_FDIR_LEN     MS(20, 0x1) /* FDIR Length error */
+#define TXGBE_RXD_ERR_FDIR_DROP    MS(21, 0x1) /* FDIR Drop error */
+#define TXGBE_RXD_ERR_FDIR_COLL    MS(22, 0x1) /* FDIR Collision error */
+#define TXGBE_RXD_ERR_HBO          MS(23, 0x1) /* Header Buffer Overflow */
+#define TXGBE_RXD_ERR_EIPCS        MS(26, 0x1) /* Encap IP header error */
+#define TXGBE_RXD_ERR_SECERR       MS(27, 0x1) /* macsec or ipsec error */
+#define TXGBE_RXD_ERR_RXE          MS(29, 0x1) /* Any MAC Error */
+#define TXGBE_RXD_ERR_L4CS         MS(30, 0x1) /* TCP/UDP xsum error */
+#define TXGBE_RXD_ERR_IPCS         MS(31, 0x1) /* IP xsum error */
+#define TXGBE_RXD_ERR_CSUM(dw)     RS(dw, 30, 0x3)
+/*** bit 17-30, when PTYPE=FCOE ***/
+#define TXGBE_RXD_STAT_FCOEFS      MS(17, 0x1) /* PTYPE=FCOE, FCoE EOF/SOF */
+#define TXGBE_RXD_FCSTAT_MASK      MS(18, 0x3) /* FCoE Pkt Stat */
+#define TXGBE_RXD_FCSTAT_NOMTCH    LS(0, 18, 0x3) /* No Ctxt Match */
+#define TXGBE_RXD_FCSTAT_NODDP     LS(1, 18, 0x3) /* Ctxt w/o DDP */
+#define TXGBE_RXD_FCSTAT_FCPRSP    LS(2, 18, 0x3) /* Recv. FCP_RSP */
+#define TXGBE_RXD_FCSTAT_DDP       LS(3, 18, 0x3) /* Ctxt w/ DDP */
+#define TXGBE_RXD_FCERR_MASK       MS(20, 0x7) /* FCERR */
+#define TXGBE_RXD_FCERR_0          LS(0, 20, 0x7)
+#define TXGBE_RXD_FCERR_1          LS(1, 20, 0x7)
+#define TXGBE_RXD_FCERR_2          LS(2, 20, 0x7)
+#define TXGBE_RXD_FCERR_3          LS(3, 20, 0x7)
+#define TXGBE_RXD_FCERR_4          LS(4, 20, 0x7)
+#define TXGBE_RXD_FCERR_5          LS(5, 20, 0x7)
+#define TXGBE_RXD_FCERR_6          LS(6, 20, 0x7)
+#define TXGBE_RXD_FCERR_7          LS(7, 20, 0x7)
+
+/* @txgbe_rx_desc.dw3 */
+#define TXGBE_RXD_LENGTH(rxd)           ((rxd)->qw1.hi.len)
+#define TXGBE_RXD_VLAN(rxd)             ((rxd)->qw1.hi.tag)
+
 /*****************************************************************************
  * Transmit Descriptor
  *****************************************************************************/
@@ -145,6 +239,12 @@  struct txgbe_tx_desc {
 #define RX_RING_SZ ((TXGBE_RING_DESC_MAX + RTE_PMD_TXGBE_RX_MAX_BURST) * \
 		    sizeof(struct txgbe_rx_desc))
 
+#ifdef RTE_PMD_PACKET_PREFETCH
+#define rte_packet_prefetch(p)  rte_prefetch1(p)
+#else
+#define rte_packet_prefetch(p)  do {} while (0)
+#endif
+
 #define RTE_TXGBE_REGISTER_POLL_WAIT_10_MS  10
 #define RTE_TXGBE_WAIT_100_US               100
 
@@ -206,6 +306,8 @@  struct txgbe_rx_queue {
 	uint8_t             crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
 	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
 	uint8_t             rx_deferred_start; /**< not in global dev start. */
+	/** flags to set in mbuf when a vlan is detected. */
+	uint64_t            vlan_flags;
 	uint64_t	    offloads; /**< Rx offloads with DEV_RX_OFFLOAD_* */
 	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
 	struct rte_mbuf fake_mbuf;