[dpdk-dev,RFC,4/9] net/avf: enable basic Rx Tx func

Message ID 1508488012-82704-5-git-send-email-jingjing.wu@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues

Commit Message

Jingjing Wu Oct. 20, 2017, 8:26 a.m. UTC
  Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 config/common_base           |   3 +
 drivers/net/avf/avf_ethdev.c |  17 +-
 drivers/net/avf/avf_log.h    |  14 +
 drivers/net/avf/avf_rxtx.c   | 740 +++++++++++++++++++++++++++++++++++++++++++
 drivers/net/avf/avf_rxtx.h   |  46 +++
 5 files changed, 819 insertions(+), 1 deletion(-)
  

Comments

Ferruh Yigit Nov. 22, 2017, 12:06 a.m. UTC | #1
On 10/20/2017 1:26 AM, Jingjing Wu wrote:
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>

<...>

> @@ -214,6 +214,9 @@ CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
>  # Compile burst-oriented AVF PMD driver
>  #
>  CONFIG_RTE_LIBRTE_AVF_PMD=y
> +CONFIG_RTE_LIBRTE_AVF_RX_DUMP=n
> +CONFIG_RTE_LIBRTE_AVF_TX_DUMP=n

Are these config options used?

<...>

> @@ -49,4 +49,18 @@ extern int avf_logtype_driver;
>  	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)
>  #define PMD_DRV_FUNC_TRACE() PMD_DRV_LOG(DEBUG, " >>")
>  
> +#ifdef RTE_LIBRTE_AVF_DEBUG_TX

Is this defined anywhere?

> +#define PMD_TX_LOG(level, fmt, args...) \
> +	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)

Instead should RTE_LOG_DP used?
And since other macros uses dynamic log functions, why here use static method,
what do you think using new method for data path logs as well?

<...>

> +static inline void
> +avf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union avf_rx_desc *rxdp)
> +{
> +	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
> +		(1 << AVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
> +		mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;

Please new flag instead of PKT_RX_VLAN_PKT and please be sure flag is correctly
used with its new meaning.

<...>

> +/* TX prep functions */
> +uint16_t
> +avf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
> +	      uint16_t nb_pkts)
> +{
> +	int i, ret;
> +	uint64_t ol_flags;
> +	struct rte_mbuf *m;
> +
> +	for (i = 0; i < nb_pkts; i++) {
> +		m = tx_pkts[i];
> +		ol_flags = m->ol_flags;
> +
> +		/* m->nb_segs is uint8_t, so nb_segs is always less than
> +		 * AVF_TX_MAX_SEG.
> +		 * We check only a condition for nb_segs > AVF_TX_MAX_MTU_SEG.
> +		 */

This is wrong, nb_segs is 16bits now, this check has been updated in i40e already.

<...>
  
Stephen Hemminger Nov. 22, 2017, 12:57 a.m. UTC | #2
On Tue, 21 Nov 2017 16:06:24 -0800
Ferruh Yigit <ferruh.yigit@intel.com> wrote:

> On 10/20/2017 1:26 AM, Jingjing Wu wrote:
> > Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>  
> 
> <...>
> 
> > @@ -214,6 +214,9 @@ CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
> >  # Compile burst-oriented AVF PMD driver
> >  #
> >  CONFIG_RTE_LIBRTE_AVF_PMD=y
> > +CONFIG_RTE_LIBRTE_AVF_RX_DUMP=n
> > +CONFIG_RTE_LIBRTE_AVF_TX_DUMP=n  
> 
> Are these config options used?
> 
> <...>
> 
> > @@ -49,4 +49,18 @@ extern int avf_logtype_driver;
> >  	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)
> >  #define PMD_DRV_FUNC_TRACE() PMD_DRV_LOG(DEBUG, " >>")
> >  
> > +#ifdef RTE_LIBRTE_AVF_DEBUG_TX  
> 
> Is this defined anywhere?
> 
> > +#define PMD_TX_LOG(level, fmt, args...) \
> > +	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)  
> 
> Instead should RTE_LOG_DP used?
> And since other macros uses dynamic log functions, why here use static method,
> what do you think using new method for data path logs as well?
> 
> <...>
> 
> > +static inline void
> > +avf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union avf_rx_desc *rxdp)
> > +{
> > +	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
> > +		(1 << AVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
> > +		mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;  
> 
> Please new flag instead of PKT_RX_VLAN_PKT and please be sure flag is correctly
> used with its new meaning.
> 
> <...>
> 
> > +/* TX prep functions */
> > +uint16_t
> > +avf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
> > +	      uint16_t nb_pkts)
> > +{
> > +	int i, ret;
> > +	uint64_t ol_flags;
> > +	struct rte_mbuf *m;
> > +
> > +	for (i = 0; i < nb_pkts; i++) {
> > +		m = tx_pkts[i];
> > +		ol_flags = m->ol_flags;
> > +
> > +		/* m->nb_segs is uint8_t, so nb_segs is always less than
> > +		 * AVF_TX_MAX_SEG.
> > +		 * We check only a condition for nb_segs > AVF_TX_MAX_MTU_SEG.
> > +		 */  
> 
> This is wrong, nb_segs is 16bits now, this check has been updated in i40e already.
> 
> <...>

Most drivers base code of one of the legacy Intel drivers.
Why not fix ixgbe (or similar) to be a "follow this model" reference?

It is unreasonable to expect new drivers to follow a different pattern.
  
Jingjing Wu Nov. 22, 2017, 7:55 a.m. UTC | #3
> -----Original Message-----

> From: Yigit, Ferruh

> Sent: Wednesday, November 22, 2017 8:06 AM

> To: Wu, Jingjing <jingjing.wu@intel.com>; dev@dpdk.org

> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>

> Subject: Re: [dpdk-dev] [RFC 4/9] net/avf: enable basic Rx Tx func

> 

> On 10/20/2017 1:26 AM, Jingjing Wu wrote:

> > Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>

> 

> <...>

> 

> > @@ -214,6 +214,9 @@ CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y

> >  # Compile burst-oriented AVF PMD driver  #

> > CONFIG_RTE_LIBRTE_AVF_PMD=y

> > +CONFIG_RTE_LIBRTE_AVF_RX_DUMP=n

> > +CONFIG_RTE_LIBRTE_AVF_TX_DUMP=n

> 

> Are these config options used?

> 

Yes, some macros are defined in avf_rxtx.h for dump descriptors. Will merge them with AVF_DEBUG_TX/RX.

> <...>

> 

> > @@ -49,4 +49,18 @@ extern int avf_logtype_driver;

> >  	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)  #define

> > PMD_DRV_FUNC_TRACE() PMD_DRV_LOG(DEBUG, " >>")

> >

> > +#ifdef RTE_LIBRTE_AVF_DEBUG_TX

> 

> Is this defined anywhere?

Will merge it with AVF_TX_DUMP.

> 

> > +#define PMD_TX_LOG(level, fmt, args...) \

> > +	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)

> 

> Instead should RTE_LOG_DP used?

> And since other macros uses dynamic log functions, why here use static method,

> what do you think using new method for data path logs as well?

> 

This is used for fast path debug, so static macro will benefit performance.

> <...>

> 

> > +static inline void

> > +avf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union avf_rx_desc

> > +*rxdp) {

> > +	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &

> > +		(1 << AVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {

> > +		mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;

> 

> Please new flag instead of PKT_RX_VLAN_PKT and please be sure flag is

> correctly used with its new meaning.

> 

> <...>

> 

> > +/* TX prep functions */

> > +uint16_t

> > +avf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,

> > +	      uint16_t nb_pkts)

> > +{

> > +	int i, ret;

> > +	uint64_t ol_flags;

> > +	struct rte_mbuf *m;

> > +

> > +	for (i = 0; i < nb_pkts; i++) {

> > +		m = tx_pkts[i];

> > +		ol_flags = m->ol_flags;

> > +

> > +		/* m->nb_segs is uint8_t, so nb_segs is always less than

> > +		 * AVF_TX_MAX_SEG.

> > +		 * We check only a condition for nb_segs >

> AVF_TX_MAX_MTU_SEG.

> > +		 */

> 

> This is wrong, nb_segs is 16bits now, this check has been updated in i40e

> already.

> 

Will change, Thanks

> <...>
  
Ferruh Yigit Nov. 22, 2017, 10:38 p.m. UTC | #4
On 11/21/2017 11:55 PM, Wu, Jingjing wrote:
> 
> 
>> -----Original Message-----
>> From: Yigit, Ferruh
>> Sent: Wednesday, November 22, 2017 8:06 AM
>> To: Wu, Jingjing <jingjing.wu@intel.com>; dev@dpdk.org
>> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>
>> Subject: Re: [dpdk-dev] [RFC 4/9] net/avf: enable basic Rx Tx func
>>
>> On 10/20/2017 1:26 AM, Jingjing Wu wrote:
>>> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
>>
>> <...>
>>
>>> @@ -214,6 +214,9 @@ CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
>>>  # Compile burst-oriented AVF PMD driver  #
>>> CONFIG_RTE_LIBRTE_AVF_PMD=y
>>> +CONFIG_RTE_LIBRTE_AVF_RX_DUMP=n
>>> +CONFIG_RTE_LIBRTE_AVF_TX_DUMP=n
>>
>> Are these config options used?
>>
> Yes, some macros are defined in avf_rxtx.h for dump descriptors. Will merge them with AVF_DEBUG_TX/RX.
> 
>> <...>
>>
>>> @@ -49,4 +49,18 @@ extern int avf_logtype_driver;
>>>  	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)  #define
>>> PMD_DRV_FUNC_TRACE() PMD_DRV_LOG(DEBUG, " >>")
>>>
>>> +#ifdef RTE_LIBRTE_AVF_DEBUG_TX
>>
>> Is this defined anywhere?
> Will merge it with AVF_TX_DUMP.
> 
>>
>>> +#define PMD_TX_LOG(level, fmt, args...) \
>>> +	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
>>
>> Instead should RTE_LOG_DP used?
>> And since other macros uses dynamic log functions, why here use static method,
>> what do you think using new method for data path logs as well?
>>
> This is used for fast path debug, so static macro will benefit performance.

How it will benefit?

The PMD_TX_LOG macro controlled by a specific compile time option,
RTE_LIBRTE_AVF_DEBUG_TX. If this config is disabled the logging won't be part of
all binary at all.

When that config option enabled, what is the difference with macro and dynamic
debug call? Eventually both are rte_log calls. Only macro has dependency to
RTE_LOGTYPE_xxx static definitions.

> 
>> <...>
>>
>>> +static inline void
>>> +avf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union avf_rx_desc
>>> +*rxdp) {
>>> +	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
>>> +		(1 << AVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
>>> +		mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
>>
>> Please new flag instead of PKT_RX_VLAN_PKT and please be sure flag is
>> correctly used with its new meaning.

Just reminder of this one, new flag is "PKT_RX_VLAN" which means mbuf contains
vlan information.

>>
>> <...>
>>
>>> +/* TX prep functions */
>>> +uint16_t
>>> +avf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
>>> +	      uint16_t nb_pkts)
>>> +{
>>> +	int i, ret;
>>> +	uint64_t ol_flags;
>>> +	struct rte_mbuf *m;
>>> +
>>> +	for (i = 0; i < nb_pkts; i++) {
>>> +		m = tx_pkts[i];
>>> +		ol_flags = m->ol_flags;
>>> +
>>> +		/* m->nb_segs is uint8_t, so nb_segs is always less than
>>> +		 * AVF_TX_MAX_SEG.
>>> +		 * We check only a condition for nb_segs >
>> AVF_TX_MAX_MTU_SEG.
>>> +		 */
>>
>> This is wrong, nb_segs is 16bits now, this check has been updated in i40e
>> already.
>>
> Will change, Thanks
> 
>> <...>
  
Ferruh Yigit Nov. 22, 2017, 11:15 p.m. UTC | #5
On 11/21/2017 4:57 PM, Stephen Hemminger wrote:
> On Tue, 21 Nov 2017 16:06:24 -0800
> Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> 
>> On 10/20/2017 1:26 AM, Jingjing Wu wrote:
>>> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>  
>>
>> <...>
>>
>>> @@ -214,6 +214,9 @@ CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
>>>  # Compile burst-oriented AVF PMD driver
>>>  #
>>>  CONFIG_RTE_LIBRTE_AVF_PMD=y
>>> +CONFIG_RTE_LIBRTE_AVF_RX_DUMP=n
>>> +CONFIG_RTE_LIBRTE_AVF_TX_DUMP=n  
>>
>> Are these config options used?
>>
>> <...>
>>
>>> @@ -49,4 +49,18 @@ extern int avf_logtype_driver;
>>>  	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)
>>>  #define PMD_DRV_FUNC_TRACE() PMD_DRV_LOG(DEBUG, " >>")
>>>  
>>> +#ifdef RTE_LIBRTE_AVF_DEBUG_TX  
>>
>> Is this defined anywhere?
>>
>>> +#define PMD_TX_LOG(level, fmt, args...) \
>>> +	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)  
>>
>> Instead should RTE_LOG_DP used?
>> And since other macros uses dynamic log functions, why here use static method,
>> what do you think using new method for data path logs as well?
>>
>> <...>
>>
>>> +static inline void
>>> +avf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union avf_rx_desc *rxdp)
>>> +{
>>> +	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
>>> +		(1 << AVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
>>> +		mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;  
>>
>> Please new flag instead of PKT_RX_VLAN_PKT and please be sure flag is correctly
>> used with its new meaning.
>>
>> <...>
>>
>>> +/* TX prep functions */
>>> +uint16_t
>>> +avf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
>>> +	      uint16_t nb_pkts)
>>> +{
>>> +	int i, ret;
>>> +	uint64_t ol_flags;
>>> +	struct rte_mbuf *m;
>>> +
>>> +	for (i = 0; i < nb_pkts; i++) {
>>> +		m = tx_pkts[i];
>>> +		ol_flags = m->ol_flags;
>>> +
>>> +		/* m->nb_segs is uint8_t, so nb_segs is always less than
>>> +		 * AVF_TX_MAX_SEG.
>>> +		 * We check only a condition for nb_segs > AVF_TX_MAX_MTU_SEG.
>>> +		 */  
>>
>> This is wrong, nb_segs is 16bits now, this check has been updated in i40e already.
>>
>> <...>
> 
> Most drivers base code of one of the legacy Intel drivers.
> Why not fix ixgbe (or similar) to be a "follow this model" reference?
> 
> It is unreasonable to expect new drivers to follow a different pattern.

You are right, updating existing drivers will increase the chance of new drivers
being correct at first time.

After above said, it is harder to get community driven updates for existing
drivers, but easier to ask new drivers to comply with latest libraries since
there is already a resource working on developing the driver.
  
Jingjing Wu Nov. 23, 2017, 1:17 a.m. UTC | #6
> -----Original Message-----

> From: Yigit, Ferruh

> Sent: Thursday, November 23, 2017 6:39 AM

> To: Wu, Jingjing <jingjing.wu@intel.com>; dev@dpdk.org

> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>

> Subject: Re: [dpdk-dev] [RFC 4/9] net/avf: enable basic Rx Tx func

> 

> On 11/21/2017 11:55 PM, Wu, Jingjing wrote:

> >

> >

> >> -----Original Message-----

> >> From: Yigit, Ferruh

> >> Sent: Wednesday, November 22, 2017 8:06 AM

> >> To: Wu, Jingjing <jingjing.wu@intel.com>; dev@dpdk.org

> >> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>

> >> Subject: Re: [dpdk-dev] [RFC 4/9] net/avf: enable basic Rx Tx func

> >>

> >> On 10/20/2017 1:26 AM, Jingjing Wu wrote:

> >>> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>

> >>

> >> <...>

> >>

> >>> @@ -214,6 +214,9 @@ CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y

> >>>  # Compile burst-oriented AVF PMD driver  #

> >>> CONFIG_RTE_LIBRTE_AVF_PMD=y

> >>> +CONFIG_RTE_LIBRTE_AVF_RX_DUMP=n

> >>> +CONFIG_RTE_LIBRTE_AVF_TX_DUMP=n

> >>

> >> Are these config options used?

> >>

> > Yes, some macros are defined in avf_rxtx.h for dump descriptors. Will merge

> them with AVF_DEBUG_TX/RX.

> >

> >> <...>

> >>

> >>> @@ -49,4 +49,18 @@ extern int avf_logtype_driver;

> >>>  	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)  #define

> >>> PMD_DRV_FUNC_TRACE() PMD_DRV_LOG(DEBUG, " >>")

> >>>

> >>> +#ifdef RTE_LIBRTE_AVF_DEBUG_TX

> >>

> >> Is this defined anywhere?

> > Will merge it with AVF_TX_DUMP.

> >

> >>

> >>> +#define PMD_TX_LOG(level, fmt, args...) \

> >>> +	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)

> >>

> >> Instead should RTE_LOG_DP used?

> >> And since other macros uses dynamic log functions, why here use

> >> static method, what do you think using new method for data path logs as

> well?

> >>

> > This is used for fast path debug, so static macro will benefit performance.

> 

> How it will benefit?

> 

> The PMD_TX_LOG macro controlled by a specific compile time option,

> RTE_LIBRTE_AVF_DEBUG_TX. If this config is disabled the logging won't be part

> of all binary at all.

> 

> When that config option enabled, what is the difference with macro and

> dynamic debug call? Eventually both are rte_log calls. Only macro has

> dependency to RTE_LOGTYPE_xxx static definitions.

> 

I was thinking about RTE_LOG, if RTE_LOG_DP, it is fine for performance, but
It cannot distinguish which driver we are going to debug. Because there is
Only one check by RTE_LOG_DP_LEVEL, think about if there are more than one
Drivers, but we just want to debug one of them?
 
Thanks
Jingjing
  

Patch

diff --git a/config/common_base b/config/common_base
index e5f96ee..dac4cd3 100644
--- a/config/common_base
+++ b/config/common_base
@@ -214,6 +214,9 @@  CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y
 # Compile burst-oriented AVF PMD driver
 #
 CONFIG_RTE_LIBRTE_AVF_PMD=y
+CONFIG_RTE_LIBRTE_AVF_RX_DUMP=n
+CONFIG_RTE_LIBRTE_AVF_TX_DUMP=n
+CONFIG_RTE_LIBRTE_AVF_16BYTE_RX_DESC=n
 
 #
 # Compile burst-oriented Mellanox ConnectX-3 (MLX4) PMD
diff --git a/drivers/net/avf/avf_ethdev.c b/drivers/net/avf/avf_ethdev.c
index f968314..b4d3153 100644
--- a/drivers/net/avf/avf_ethdev.c
+++ b/drivers/net/avf/avf_ethdev.c
@@ -229,9 +229,12 @@  avf_init_queues(struct rte_eth_dev *dev)
 		if (ret != AVF_SUCCESS)
 			break;
 	}
-	/* TODO: set rx/tx function to vector/scatter/single-segment
+	/* set rx/tx function to vector/scatter/single-segment
 	 * accoding to parameters
 	 */
+	avf_set_rx_function(dev);
+	avf_set_tx_function(dev);
+
 	return ret;
 }
 
@@ -592,7 +595,19 @@  avf_dev_init(struct rte_eth_dev *eth_dev)
 
 	/* assign ops func pointer */
 	eth_dev->dev_ops = &avf_eth_dev_ops;
+	eth_dev->rx_pkt_burst = &avf_recv_pkts;
+	eth_dev->tx_pkt_burst = &avf_xmit_pkts;
+	eth_dev->tx_pkt_prepare = &avf_prep_pkts;
 
+	/* For secondary processes, we don't initialise any further as primary
+	 * has already done this work. Only check if we need a different RX
+	 * and TX function.
+	 */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		avf_set_rx_function(eth_dev);
+		avf_set_tx_function(eth_dev);
+		return 0;
+	}
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
 	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE |
 				    RTE_ETH_DEV_INTR_LSC;
diff --git a/drivers/net/avf/avf_log.h b/drivers/net/avf/avf_log.h
index 431f0f3..782a6e7 100644
--- a/drivers/net/avf/avf_log.h
+++ b/drivers/net/avf/avf_log.h
@@ -49,4 +49,18 @@  extern int avf_logtype_driver;
 	PMD_DRV_LOG_RAW(level, fmt "\n", ## args)
 #define PMD_DRV_FUNC_TRACE() PMD_DRV_LOG(DEBUG, " >>")
 
+#ifdef RTE_LIBRTE_AVF_DEBUG_TX
+#define PMD_TX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#else
+#define PMD_TX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_AVF_DEBUG_TX_FREE
+#define PMD_TX_FREE_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#else
+#define PMD_TX_FREE_LOG(level, fmt, args...) do { } while (0)
+#endif
+
 #endif /* _AVF_LOGS_H_ */
diff --git a/drivers/net/avf/avf_rxtx.c b/drivers/net/avf/avf_rxtx.c
index 28f0b5e..95992fc 100644
--- a/drivers/net/avf/avf_rxtx.c
+++ b/drivers/net/avf/avf_rxtx.c
@@ -644,4 +644,744 @@  avf_stop_queues(struct rte_eth_dev *dev)
 		reset_rx_queue(rxq);
 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
 	}
+}
+
+static inline void
+avf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union avf_rx_desc *rxdp)
+{
+	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
+		(1 << AVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
+		mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+		mb->vlan_tci =
+			rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1);
+	} else {
+		mb->vlan_tci = 0;
+	}
+}
+
+/* Translate the rx descriptor status and error fields to pkt flags */
+static inline uint64_t
+avf_rxd_to_pkt_flags(uint64_t qword)
+{
+	uint64_t flags;
+	uint64_t error_bits = (qword >> AVF_RXD_QW1_ERROR_SHIFT);
+
+#define AVF_RX_ERR_BITS 0x3f
+
+	/* Check if RSS_HASH */
+	flags = (((qword >> AVF_RX_DESC_STATUS_FLTSTAT_SHIFT) &
+					AVF_RX_DESC_FLTSTAT_RSS_HASH) ==
+			AVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
+
+	if (likely((error_bits & AVF_RX_ERR_BITS) == 0)) {
+		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely(error_bits & (1 << AVF_RX_DESC_ERROR_IPE_SHIFT)))
+		flags |= PKT_RX_IP_CKSUM_BAD;
+	else
+		flags |= PKT_RX_IP_CKSUM_GOOD;
+
+	if (unlikely(error_bits & (1 << AVF_RX_DESC_ERROR_L4E_SHIFT)))
+		flags |= PKT_RX_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_L4_CKSUM_GOOD;
+
+	/* TODO: Oversize error bit is not processed here */
+
+	return flags;
+}
+
+/* implement recv_pkts */
+uint16_t
+avf_recv_pkts(__rte_unused void *rx_queue,
+			__rte_unused struct rte_mbuf **rx_pkts,
+			__rte_unused uint16_t nb_pkts)
+{
+	volatile union avf_rx_desc *rx_ring;
+	volatile union avf_rx_desc *rxdp;
+	struct avf_rx_queue *rxq;
+	union avf_rx_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_eth_dev *dev;
+	struct rte_mbuf *rxm;
+	struct rte_mbuf *nmb;
+	uint16_t nb_rx;
+	uint32_t rx_status;
+	uint64_t qword1;
+	uint16_t rx_packet_len;
+	uint16_t rx_id, nb_hold;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+
+	nb_rx = 0;
+	nb_hold = 0;
+	rxq = rx_queue;
+	rx_id = rxq->rx_tail;
+	rx_ring = rxq->rx_ring;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = &rx_ring[rx_id];
+		qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
+		rx_status = (qword1 & AVF_RXD_QW1_STATUS_MASK) >>
+			    AVF_RXD_QW1_STATUS_SHIFT;
+
+		/* Check the DD bit first */
+		if (!(rx_status & (1 << AVF_RX_DESC_STATUS_DD_SHIFT)))
+			break;
+		AVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			/* TODO: count rx_mbuf_alloc_failed */
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc))
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+
+		rx_packet_len = ((qword1 & AVF_RXD_QW1_LENGTH_PBUF_MASK) >>
+				AVF_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = rx_packet_len;
+		rxm->data_len = rx_packet_len;
+		rxm->port = rxq->port_id;
+		rxm->ol_flags = 0;
+		avf_rxd_to_vlan_tci(rxm, &rxd);
+		pkt_flags = avf_rxd_to_pkt_flags(qword1);
+		/* TODO: support rxm->packet_type here */
+
+		if (pkt_flags & PKT_RX_RSS_HASH)
+			rxm->hash.rss =
+				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
+
+		rxm->ol_flags |= pkt_flags;
+
+		rx_pkts[nb_rx++] = rxm;
+	}
+	rxq->rx_tail = rx_id;
+
+	/* If the number of free RX descriptors is greater than the RX free
+	 * threshold of the queue, advance the receive tail register of queue.
+	 * Update that register with the value of the last processed RX
+	 * descriptor minus 1.
+	 */
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+	if (nb_hold > rxq->rx_free_thresh) {
+		rx_id = (uint16_t)((rx_id == 0) ?
+			(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		AVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+
+	return nb_rx;
+}
+
+/* implement recv_scattered_pkts  */
+uint16_t
+avf_recv_scattered_pkts(__rte_unused void *rx_queue,
+			__rte_unused struct rte_mbuf **rx_pkts,
+			__rte_unused uint16_t nb_pkts)
+{
+	struct avf_rx_queue *rxq = rx_queue;
+	union avf_rx_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
+	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
+	struct rte_mbuf *nmb, *rxm;
+	uint16_t rx_id = rxq->rx_tail;
+	uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
+	struct rte_eth_dev *dev;
+	uint32_t rx_status;
+	uint64_t qword1;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+
+	volatile union avf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union avf_rx_desc *rxdp;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = &rx_ring[rx_id];
+		qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
+		rx_status = (qword1 & AVF_RXD_QW1_STATUS_MASK) >>
+			    AVF_RXD_QW1_STATUS_SHIFT;
+
+		/* Check the DD bit */
+		if (!(rx_status & (1 << AVF_RX_DESC_STATUS_DD_SHIFT)))
+			break;
+		AVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			/* TODO: support count rx_mbuf_alloc_failed */
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (rx_id == rxq->nb_rx_desc)
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+
+		/* Set data buffer address and data length of the mbuf */
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+		rx_packet_len = (qword1 & AVF_RXD_QW1_LENGTH_PBUF_MASK) >>
+				 AVF_RXD_QW1_LENGTH_PBUF_SHIFT;
+		rxm->data_len = rx_packet_len;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* If this is the first buffer of the received packet, set the
+		 * pointer to the first mbuf of the packet and initialize its
+		 * context. Otherwise, update the total length and the number
+		 * of segments of the current scattered packet, and update the
+		 * pointer to the last mbuf of the current packet.
+		 */
+		if (!first_seg) {
+			first_seg = rxm;
+			first_seg->nb_segs = 1;
+			first_seg->pkt_len = rx_packet_len;
+		} else {
+			first_seg->pkt_len =
+				(uint16_t)(first_seg->pkt_len +
+						rx_packet_len);
+			first_seg->nb_segs++;
+			last_seg->next = rxm;
+		}
+
+		/* If this is not the last buffer of the received packet,
+		 * update the pointer to the last mbuf of the current scattered
+		 * packet and continue to parse the RX ring.
+		 */
+		if (!(rx_status & (1 << AVF_RX_DESC_STATUS_EOF_SHIFT))) {
+			last_seg = rxm;
+			continue;
+		}
+
+		/* This is the last buffer of the received packet. If the CRC
+		 * is not stripped by the hardware:
+		 *  - Subtract the CRC length from the total packet length.
+		 *  - If the last buffer only contains the whole CRC or a part
+		 *  of it, free the mbuf associated to the last buffer. If part
+		 *  of the CRC is also contained in the previous mbuf, subtract
+		 *  the length of that CRC part from the data length of the
+		 *  previous mbuf.
+		 */
+		rxm->next = NULL;
+		if (unlikely(rxq->crc_len > 0)) {
+			first_seg->pkt_len -= ETHER_CRC_LEN;
+			if (rx_packet_len <= ETHER_CRC_LEN) {
+				rte_pktmbuf_free_seg(rxm);
+				first_seg->nb_segs--;
+				last_seg->data_len =
+					(uint16_t)(last_seg->data_len -
+					(ETHER_CRC_LEN - rx_packet_len));
+				last_seg->next = NULL;
+			} else
+				rxm->data_len = (uint16_t)(rx_packet_len -
+								ETHER_CRC_LEN);
+		}
+
+		first_seg->port = rxq->port_id;
+		first_seg->ol_flags = 0;
+		avf_rxd_to_vlan_tci(first_seg, &rxd);
+		pkt_flags = avf_rxd_to_pkt_flags(qword1);
+		/* TODO: support first_seg->packet_type here */
+
+		if (pkt_flags & PKT_RX_RSS_HASH)
+			first_seg->hash.rss =
+				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
+
+		first_seg->ol_flags |= pkt_flags;
+
+		/* Prefetch data of first segment, if configured to do so. */
+		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+					  first_seg->data_off));
+		rx_pkts[nb_rx++] = first_seg;
+		first_seg = NULL;
+	}
+
+	/* Record index of the next RX descriptor to probe. */
+	rxq->rx_tail = rx_id;
+	rxq->pkt_first_seg = first_seg;
+	rxq->pkt_last_seg = last_seg;
+
+	/* If the number of free RX descriptors is greater than the RX free
+	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+	 * register. Update the RDT with the value of the last processed RX
+	 * descriptor minus 1, to guarantee that the RDT register is never
+	 * equal to the RDH register, which creates a "full" ring situtation
+	 * from the hardware point of view.
+	 */
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+	if (nb_hold > rxq->rx_free_thresh) {
+		rx_id = (uint16_t)(rx_id == 0 ?
+			(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		AVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+
+	return nb_rx;
+}
+
+static inline int
+avf_xmit_cleanup(struct avf_tx_queue *txq)
+{
+	struct avf_tx_entry *sw_ring = txq->sw_ring;
+	uint16_t last_desc_cleaned = txq->last_desc_cleaned;
+	uint16_t nb_tx_desc = txq->nb_tx_desc;
+	uint16_t desc_to_clean_to;
+	uint16_t nb_tx_to_clean;
+
+	volatile struct avf_tx_desc *txd = txq->tx_ring;
+
+	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
+	if (desc_to_clean_to >= nb_tx_desc)
+		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
+
+	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
+	if ((txd[desc_to_clean_to].cmd_type_offset_bsz &
+			rte_cpu_to_le_64(AVF_TXD_QW1_DTYPE_MASK)) !=
+			rte_cpu_to_le_64(AVF_TX_DESC_DTYPE_DESC_DONE)) {
+		PMD_TX_FREE_LOG(DEBUG, "TX descriptor %4u is not done "
+			"(port=%d queue=%d)", desc_to_clean_to,
+				txq->port_id, txq->queue_id);
+		return -1;
+	}
+
+	if (last_desc_cleaned > desc_to_clean_to)
+		nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
+							desc_to_clean_to);
+	else
+		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
+					last_desc_cleaned);
+
+	txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
+
+	txq->last_desc_cleaned = desc_to_clean_to;
+	txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
+
+	return 0;
+}
+
+/* Check if the context descriptor is needed for TX offloading */
+static inline uint16_t
+avf_calc_context_desc(uint64_t flags)
+{
+	static uint64_t mask = PKT_TX_TCP_SEG;
+
+	return (flags & mask) ? 1 : 0;
+}
+
+static inline void
+avf_txd_enable_checksum(uint64_t ol_flags,
+			uint32_t *td_cmd,
+			uint32_t *td_offset,
+			union avf_tx_offload tx_offload)
+{
+	/* Set MACLEN */
+	*td_offset |= (tx_offload.l2_len >> 1) <<
+		      AVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+
+	/* Enable L3 checksum offloads */
+	if (ol_flags & PKT_TX_IP_CKSUM) {
+		*td_cmd |= AVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
+		*td_offset |= (tx_offload.l3_len >> 2) <<
+			      AVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+	} else if (ol_flags & PKT_TX_IPV4) {
+		*td_cmd |= AVF_TX_DESC_CMD_IIPT_IPV4;
+		*td_offset |= (tx_offload.l3_len >> 2) <<
+			      AVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+	} else if (ol_flags & PKT_TX_IPV6) {
+		*td_cmd |= AVF_TX_DESC_CMD_IIPT_IPV6;
+		*td_offset |= (tx_offload.l3_len >> 2) <<
+			      AVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+	}
+
+	if (ol_flags & PKT_TX_TCP_SEG) {
+		*td_cmd |= AVF_TX_DESC_CMD_L4T_EOFT_TCP;
+		*td_offset |= (tx_offload.l4_len >> 2) <<
+			      AVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		return;
+	}
+
+	/* Enable L4 checksum offloads */
+	switch (ol_flags & PKT_TX_L4_MASK) {
+	case PKT_TX_TCP_CKSUM:
+		*td_cmd |= AVF_TX_DESC_CMD_L4T_EOFT_TCP;
+		*td_offset |= (sizeof(struct tcp_hdr) >> 2) <<
+			      AVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		break;
+	case PKT_TX_SCTP_CKSUM:
+		*td_cmd |= AVF_TX_DESC_CMD_L4T_EOFT_SCTP;
+		*td_offset |= (sizeof(struct sctp_hdr) >> 2) <<
+			      AVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		break;
+	case PKT_TX_UDP_CKSUM:
+		*td_cmd |= AVF_TX_DESC_CMD_L4T_EOFT_UDP;
+		*td_offset |= (sizeof(struct udp_hdr) >> 2) <<
+			      AVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		break;
+	default:
+		break;
+	}
+}
+
+/* set TSO context descriptor
+ * support IP -> L4 and IP -> IP -> L4
+ */
+static inline uint64_t
+avf_set_tso_ctx(struct rte_mbuf *mbuf, union avf_tx_offload tx_offload)
+{
+	uint64_t ctx_desc = 0;
+	uint32_t cd_cmd, hdr_len, cd_tso_len;
+
+	if (!tx_offload.l4_len) {
+		PMD_DRV_LOG(DEBUG, "L4 length set to 0");
+		return ctx_desc;
+	}
+
+	/* in case of non tunneling packet, the outer_l2_len and
+	 * outer_l3_len must be 0.
+	 */
+	hdr_len = tx_offload.outer_l2_len +
+		  tx_offload.outer_l3_len +
+		  tx_offload.l2_len +
+		  tx_offload.l3_len +
+		  tx_offload.l4_len;
+
+	cd_cmd = AVF_TX_CTX_DESC_TSO;
+	cd_tso_len = mbuf->pkt_len - hdr_len;
+	ctx_desc |= ((uint64_t)cd_cmd << AVF_TXD_CTX_QW1_CMD_SHIFT) |
+		     ((uint64_t)cd_tso_len << AVF_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+		     ((uint64_t)mbuf->tso_segsz << AVF_TXD_CTX_QW1_MSS_SHIFT);
+
+	return ctx_desc;
+}
+
+/* Construct the tx flags */
+static inline uint64_t
+avf_build_ctob(uint32_t td_cmd, uint32_t td_offset, unsigned int size,
+	       uint32_t td_tag)
+{
+	return rte_cpu_to_le_64(AVF_TX_DESC_DTYPE_DATA |
+				((uint64_t)td_cmd  << AVF_TXD_QW1_CMD_SHIFT) |
+				((uint64_t)td_offset <<
+				 AVF_TXD_QW1_OFFSET_SHIFT) |
+				((uint64_t)size  <<
+				 AVF_TXD_QW1_TX_BUF_SZ_SHIFT) |
+				((uint64_t)td_tag  <<
+				 AVF_TXD_QW1_L2TAG1_SHIFT));
+}
+
+/* TX function */
+uint16_t
+avf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	volatile struct avf_tx_desc *txd;
+	volatile struct avf_tx_desc *txr;
+	struct avf_tx_queue *txq;
+	struct avf_tx_entry *sw_ring;
+	struct avf_tx_entry *txe, *txn;
+	struct rte_mbuf *tx_pkt;
+	struct rte_mbuf *m_seg;
+	uint16_t tx_id;
+	uint16_t nb_tx;
+	uint32_t td_cmd;
+	uint32_t td_offset;
+	uint32_t td_tag;
+	uint64_t ol_flags;
+	uint16_t nb_used;
+	uint16_t nb_ctx;
+	uint16_t tx_last;
+	uint16_t slen;
+	uint64_t buf_dma_addr;
+	union avf_tx_offload tx_offload = {0};
+
+	txq = tx_queue;
+	sw_ring = txq->sw_ring;
+	txr = txq->tx_ring;
+	tx_id = txq->tx_tail;
+	txe = &sw_ring[tx_id];
+
+	/* Check if the descriptor ring needs to be cleaned. */
+	if (txq->nb_free < txq->free_thresh)
+		avf_xmit_cleanup(txq);
+
+	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+		td_cmd = 0;
+		td_tag = 0;
+		td_offset = 0;
+
+		tx_pkt = *tx_pkts++;
+		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
+
+		ol_flags = tx_pkt->ol_flags;
+		tx_offload.l2_len = tx_pkt->l2_len;
+		tx_offload.l3_len = tx_pkt->l3_len;
+		tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
+		tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
+		tx_offload.l4_len = tx_pkt->l4_len;
+		tx_offload.tso_segsz = tx_pkt->tso_segsz;
+
+		/* Calculate the number of context descriptors needed. */
+		nb_ctx = avf_calc_context_desc(ol_flags);
+
+		/* The number of descriptors that must be allocated for
+		 * a packet equals to the number of the segments of that
+		 * packet plus 1 context descriptor if needed.
+		 */
+		nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+		tx_last = (uint16_t)(tx_id + nb_used - 1);
+
+		/* Circular ring */
+		if (tx_last >= txq->nb_tx_desc)
+			tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
+
+		if (nb_used > txq->nb_free) {
+			if (avf_xmit_cleanup(txq)) {
+				if (nb_tx == 0)
+					return 0;
+				goto end_of_tx;
+			}
+			if (unlikely(nb_used > txq->rs_thresh)) {
+				while (nb_used > txq->nb_free) {
+					if (avf_xmit_cleanup(txq)) {
+						if (nb_tx == 0)
+							return 0;
+						goto end_of_tx;
+					}
+				}
+			}
+		}
+
+		/* Descriptor based VLAN insertion */
+		if (ol_flags & PKT_TX_VLAN_PKT) {
+			td_cmd |= AVF_TX_DESC_CMD_IL2TAG1;
+			td_tag = tx_pkt->vlan_tci;
+		}
+
+		/* Always enable CRC offload insertion */
+		td_cmd |= AVF_TX_DESC_CMD_ICRC;
+
+		/* Enable checksum offloading */
+		if (ol_flags & AVF_TX_CKSUM_OFFLOAD_MASK)
+			avf_txd_enable_checksum(ol_flags, &td_cmd,
+						&td_offset, tx_offload);
+
+		if (nb_ctx) {
+			/* Setup TX context descriptor if required */
+			volatile struct avf_tx_context_desc *ctx_txd =
+				(volatile struct avf_tx_context_desc *)
+					&txr[tx_id];
+			uint16_t cd_l2tag2 = 0;
+			uint64_t cd_type_cmd_tso_mss =
+				AVF_TX_DESC_DTYPE_CONTEXT;
+
+			txn = &sw_ring[txe->next_id];
+			RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
+			if (txe->mbuf) {
+				rte_pktmbuf_free_seg(txe->mbuf);
+				txe->mbuf = NULL;
+			}
+
+			/* TSO enabled */
+			if (ol_flags & PKT_TX_TCP_SEG)
+				cd_type_cmd_tso_mss |=
+					avf_set_tso_ctx(tx_pkt, tx_offload);
+
+			PMD_TX_LOG(DEBUG, "mbuf: %p, TCD[%u]:\n"
+				"type_cmd_tso_mss: %#"PRIx64";\n",
+				tx_pkt, tx_id,
+				ctx_txd->type_cmd_tso_mss);
+
+			txe->last_id = tx_last;
+			tx_id = txe->next_id;
+			txe = txn;
+		}
+
+		m_seg = tx_pkt;
+		do {
+			txd = &txr[tx_id];
+			txn = &sw_ring[txe->next_id];
+
+			if (txe->mbuf)
+				rte_pktmbuf_free_seg(txe->mbuf);
+			txe->mbuf = m_seg;
+
+			/* Setup TX Descriptor */
+			slen = m_seg->data_len;
+			buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
+
+			PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n"
+				"buf_dma_addr: %#"PRIx64";\n"
+				"td_cmd: %#x;\n"
+				"td_offset: %#x;\n"
+				"td_len: %u;\n"
+				"td_tag: %#x;\n",
+				tx_pkt, tx_id, buf_dma_addr,
+				td_cmd, td_offset, slen, td_tag);
+
+			txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
+			txd->cmd_type_offset_bsz = avf_build_ctob(td_cmd,
+								  td_offset,
+								  slen,
+								  td_tag);
+			txe->last_id = tx_last;
+			tx_id = txe->next_id;
+			txe = txn;
+			m_seg = m_seg->next;
+			AVF_DUMP_TX_DESC(txq, txd, tx_id);
+		} while (m_seg);
+
+		/* The last packet data descriptor needs End Of Packet (EOP) */
+		td_cmd |= AVF_TX_DESC_CMD_EOP;
+		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
+		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
+
+		if (txq->nb_used >= txq->rs_thresh) {
+			PMD_TX_FREE_LOG(DEBUG,
+					"Setting RS bit on TXD id="
+					"%4u (port=%d queue=%d)",
+					tx_last, txq->port_id, txq->queue_id);
+
+			td_cmd |= AVF_TX_DESC_CMD_RS;
+
+			/* Update txq RS bit counters */
+			txq->nb_used = 0;
+		}
+
+		txd->cmd_type_offset_bsz |=
+			rte_cpu_to_le_64(((uint64_t)td_cmd) <<
+					 AVF_TXD_QW1_CMD_SHIFT);
+		AVF_DUMP_TX_DESC(txq, txd, tx_id);
+	}
+
+end_of_tx:
+	rte_wmb();
+
+	PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
+		   (unsigned)txq->port_id, (unsigned)txq->queue_id,
+		   (unsigned)tx_id, (unsigned)nb_tx);
+
+	AVF_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
+	txq->tx_tail = tx_id;
+
+	return nb_tx;
+}
+
+/* TX prep functions */
+uint16_t
+avf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+	      uint16_t nb_pkts)
+{
+	int i, ret;
+	uint64_t ol_flags;
+	struct rte_mbuf *m;
+
+	for (i = 0; i < nb_pkts; i++) {
+		m = tx_pkts[i];
+		ol_flags = m->ol_flags;
+
+		/* m->nb_segs is uint8_t, so nb_segs is always less than
+		 * AVF_TX_MAX_SEG.
+		 * We check only a condition for nb_segs > AVF_TX_MAX_MTU_SEG.
+		 */
+		if (!(ol_flags & PKT_TX_TCP_SEG)) {
+			if (m->nb_segs > AVF_TX_MAX_MTU_SEG) {
+				rte_errno = -EINVAL;
+				return i;
+			}
+		} else if ((m->tso_segsz < AVF_MIN_TSO_MSS) ||
+				(m->tso_segsz > AVF_MAX_TSO_MSS)) {
+			/* MSS outside the range are considered malicious */
+			rte_errno = -EINVAL;
+			return i;
+		}
+
+		if (ol_flags & AVF_TX_OFFLOAD_NOTSUP_MASK) {
+			rte_errno = -ENOTSUP;
+			return i;
+		}
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+		ret = rte_validate_tx_offload(m);
+		if (ret != 0) {
+			rte_errno = ret;
+			return i;
+		}
+#endif
+		ret = rte_net_intel_cksum_prepare(m);
+		if (ret != 0) {
+			rte_errno = ret;
+			return i;
+		}
+	}
+
+	return i;
+}
+
+/* choose rx function*/
+void
+avf_set_rx_function(struct rte_eth_dev *dev)
+{
+	if (dev->data->scattered_rx)
+		dev->rx_pkt_burst = avf_recv_scattered_pkts;
+	else
+		dev->rx_pkt_burst = avf_recv_pkts;
+}
+
+/* choose rx function*/
+void
+avf_set_tx_function(struct rte_eth_dev *dev)
+{
+	dev->tx_pkt_burst = avf_xmit_pkts;
+	dev->tx_pkt_prepare = avf_prep_pkts;
 }
\ No newline at end of file
diff --git a/drivers/net/avf/avf_rxtx.h b/drivers/net/avf/avf_rxtx.h
index 9bdceb7..de98ce3 100644
--- a/drivers/net/avf/avf_rxtx.h
+++ b/drivers/net/avf/avf_rxtx.h
@@ -48,6 +48,28 @@ 
 #define DEFAULT_TX_RS_THRESH     32
 #define DEFAULT_TX_FREE_THRESH   32
 
+#define AVF_MIN_TSO_MSS          256
+#define AVF_MAX_TSO_MSS          9668
+#define AVF_TSO_MAX_SEG          UINT8_MAX
+#define AVF_TX_MAX_MTU_SEG       8
+
+#define AVF_TD_CMD (AVF_TX_DESC_CMD_ICRC |\
+		    AVF_TX_DESC_CMD_EOP)
+
+#define AVF_TX_CKSUM_OFFLOAD_MASK (		 \
+		PKT_TX_IP_CKSUM |		 \
+		PKT_TX_L4_MASK |		 \
+		PKT_TX_TCP_SEG)
+
+#define AVF_TX_OFFLOAD_MASK (  \
+		PKT_TX_VLAN_PKT |		 \
+		PKT_TX_IP_CKSUM |		 \
+		PKT_TX_L4_MASK |		 \
+		PKT_TX_TCP_SEG)
+
+#define AVF_TX_OFFLOAD_NOTSUP_MASK \
+		(PKT_TX_OFFLOAD_MASK ^ AVF_TX_OFFLOAD_MASK)
+
 /* HW desc structure, both 16-byte and 32-byte types are supported */
 #ifdef RTE_LIBRTE_AVF_16BYTE_RX_DESC
 #define avf_rx_desc avf_16byte_rx_desc
@@ -114,6 +136,19 @@  struct avf_tx_queue {
 	bool tx_deferred_start;        /* don't start this queue in dev start */
 };
 
+/** Offload features */
+union avf_tx_offload {
+	uint64_t data;
+	struct {
+		uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+		uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+		uint64_t l4_len:8; /**< L4 Header Length. */
+		uint64_t tso_segsz:16; /**< TCP TSO segment size */
+		uint64_t outer_l2_len:8; /**< outer L2 Header Length */
+		uint64_t outer_l3_len:16; /**< outer L3 Header Length */
+	};
+};
+
 int avf_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			   uint16_t queue_idx,
 			   uint16_t nb_desc,
@@ -134,6 +169,17 @@  int avf_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 int avf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 void avf_dev_tx_queue_release(void *txq);
 void avf_stop_queues(struct rte_eth_dev *dev);
+uint16_t avf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		       uint16_t nb_pkts);
+uint16_t avf_recv_scattered_pkts(void *rx_queue,
+				 struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts);
+uint16_t avf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		       uint16_t nb_pkts);
+uint16_t avf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		       uint16_t nb_pkts);
+void avf_set_rx_function(struct rte_eth_dev *dev);
+void avf_set_tx_function(struct rte_eth_dev *dev);
 
 static inline
 void avf_dump_rx_descriptor(struct avf_rx_queue *rxq,