[v7,3/4] net/ixgbe: cleanup Tx buffers

Message ID 20200109103822.89011-4-chenxux.di@intel.com (mailing list archive)
State Superseded, archived
Delegated to: xiaolong ye
Headers
Series drivers/net: cleanup Tx buffers |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Chenxu Di Jan. 9, 2020, 10:38 a.m. UTC
  Add support to the ixgbe driver for the API rte_eth_tx_done_cleanup
to force free consumed buffers on Tx ring.

Signed-off-by: Chenxu Di <chenxux.di@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethdev.c |   4 +
 drivers/net/ixgbe/ixgbe_rxtx.c   | 156 ++++++++++++++++++++++++++++++-
 drivers/net/ixgbe/ixgbe_rxtx.h   |  10 ++
 3 files changed, 169 insertions(+), 1 deletion(-)
  

Comments

Ananyev, Konstantin Jan. 9, 2020, 2:01 p.m. UTC | #1
Hi Chenxu,

Good progress wih _full_version, but still some issues remains I think.
More comments inline.
Konstantin

> 
> Signed-off-by: Chenxu Di <chenxux.di@intel.com>
> ---
>  drivers/net/ixgbe/ixgbe_ethdev.c |   4 +
>  drivers/net/ixgbe/ixgbe_rxtx.c   | 156 ++++++++++++++++++++++++++++++-
>  drivers/net/ixgbe/ixgbe_rxtx.h   |  10 ++
>  3 files changed, 169 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
> index 2c6fd0f13..668c36188 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -601,6 +601,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = {
>  	.udp_tunnel_port_add  = ixgbe_dev_udp_tunnel_port_add,
>  	.udp_tunnel_port_del  = ixgbe_dev_udp_tunnel_port_del,
>  	.tm_ops_get           = ixgbe_tm_ops_get,
> +	.tx_done_cleanup      = ixgbe_tx_done_cleanup,
>  };
> 
>  /*
> @@ -649,6 +650,7 @@ static const struct eth_dev_ops ixgbevf_eth_dev_ops = {
>  	.reta_query           = ixgbe_dev_rss_reta_query,
>  	.rss_hash_update      = ixgbe_dev_rss_hash_update,
>  	.rss_hash_conf_get    = ixgbe_dev_rss_hash_conf_get,
> +	.tx_done_cleanup      = ixgbe_tx_done_cleanup,
>  };
> 
>  /* store statistics names and its offset in stats structure */
> @@ -1101,6 +1103,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
>  	eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
>  	eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
>  	eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts;
> +	ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
> 
>  	/*
>  	 * For secondary processes, we don't initialise any further as primary
> @@ -1580,6 +1583,7 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
>  	eth_dev->dev_ops = &ixgbevf_eth_dev_ops;
>  	eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
>  	eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> +	ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
> 
>  	/* for secondary processes, we don't initialise any further as primary
>  	 * has already done this work. Only check we don't need a different
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index fa572d184..122dae425 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -92,6 +92,8 @@ uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>  				    uint16_t nb_pkts);
>  #endif
> 
> +static ixgbe_tx_done_cleanup_t ixgbe_tx_done_cleanup_op;

You can't have just one static variable here.
There could be several ixgbe devices and they could be configured in a different way.
I.E. txpkt_burst() is per device, so tx_done_cleanup() also has to be per device.
Probably the easiest way is to add new entry for tx_done_cleanup into struct ixgbe_txq_ops,
and set it properly in ixgbe_set_tx_function().

> +
>  /*********************************************************************
>   *
>   *  TX functions
> @@ -2306,6 +2308,152 @@ ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
>  	}
>  }
> 
> +int
> +ixgbe_tx_done_cleanup_scalar(struct ixgbe_tx_queue *txq, uint32_t free_cnt)

As a nit I would change _scalar to _full or so.

> +{
> +	uint32_t pkt_cnt;
> +	uint16_t i;
> +	uint16_t tx_last;
> +	uint16_t tx_id;
> +	uint16_t nb_tx_to_clean;
> +	uint16_t nb_tx_free_last;
> +	struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
> +
> +	/* Start free mbuf from the next of tx_tail */
> +	tx_last = txq->tx_tail;
> +	tx_id  = swr_ring[tx_last].next_id;
> +
> +	if (txq->nb_tx_free == 0)
> +		if (ixgbe_xmit_cleanup(txq))


As a nit it could be just if (ixgbe_set_tx_function && ixgbe_xmit_cleanup(txq))

> +			return 0;
> +
> +	nb_tx_to_clean = txq->nb_tx_free;
> +	nb_tx_free_last = txq->nb_tx_free;
> +	if (!free_cnt)
> +		free_cnt = txq->nb_tx_desc;
> +
> +	/* Loop through swr_ring to count the amount of
> +	 * freeable mubfs and packets.
> +	 */
> +	for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
> +		for (i = 0; i < nb_tx_to_clean &&
> +			pkt_cnt < free_cnt &&
> +			tx_id != tx_last; i++) {
> +			if (swr_ring[tx_id].mbuf != NULL) {
> +				rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
> +				swr_ring[tx_id].mbuf = NULL;
> +
> +				/*
> +				 * last segment in the packet,
> +				 * increment packet count
> +				 */
> +				pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
> +			}
> +
> +			tx_id = swr_ring[tx_id].next_id;
> +		}
> +
> +		if (tx_id == tx_last || txq->tx_rs_thresh
> +			> txq->nb_tx_desc - txq->nb_tx_free)

First condition (tx_id == tx_last) is porbably redundant here.

> +			break;
> +
> +		if (pkt_cnt < free_cnt) {
> +			if (ixgbe_xmit_cleanup(txq))
> +				break;
> +
> +			nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
> +			nb_tx_free_last = txq->nb_tx_free;
> +		}
> +	}
> +
> +	PMD_TX_FREE_LOG(DEBUG,
> +		"Free %u Packets successfully "
> +		"(port=%d queue=%d)",
> +		pkt_cnt, txq->port_id, txq->queue_id);
> +
> +	return (int)pkt_cnt;
> +}
> +
> +int
> +ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
> +			uint32_t free_cnt __rte_unused)
> +{
> +	return -ENOTSUP;
> +}
> +
> +int
> +ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
> +			uint32_t free_cnt)
> +{
> +	uint16_t i;
> +	uint16_t tx_first;
> +	uint16_t tx_id;
> +	uint32_t pkt_cnt;
> +	struct ixgbe_tx_entry *swr_ring = txq->sw_ring;


Looks overcomplicated here.
TX simple (and vec) doesn't support mulsti-seg packets, 
So one TXD - one mbuf, and one packet.
And ixgbe_tx_free_bufs() always retunrs/frees either 0 or tx_rs_thresh mbufs/packets.
So it probably can be something like that:

ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,  uint32_t free_cnt)
{
   If (free_cnt == 0)
      free_cnt = txq->nb_desc;
 
   cnt = free_cnt - free_cnt % txq->tx_rs_thesh;
    for (i = 0; i < cnt; i+= n) {
          n = ixgbe_tx_free_bufs(txq);
          if (n == 0)
             break;
    } 
    return i;
}

> +
> +	/* Start free mbuf from tx_first */
> +	tx_first = txq->tx_next_dd - (txq->tx_rs_thresh - 1);
> +	tx_id  = tx_first;
> +
> +	/* while free_cnt is 0,
> +	 * suppose one mbuf per packet,
> +	 * try to free packets as many as possible
> +	 */
> +	if (free_cnt == 0)
> +		free_cnt = txq->nb_tx_desc;
> +
> +	/* Loop through swr_ring to count freeable packets */
> +	for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
> +		if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
> +			break;
> +
> +		if (!ixgbe_tx_free_bufs(txq))
> +			break;
> +
> +		for (i = 0; i != txq->tx_rs_thresh &&
> +			tx_id != tx_first; i++) {
> +			/* last segment in the packet,
> +			 * increment packet count
> +			 */
> +			pkt_cnt += (tx_id == swr_ring[tx_id].last_id);
> +			tx_id = swr_ring[tx_id].next_id;
> +		}
> +
> +		if (tx_id == tx_first)
> +			break;
> +	}
> +
> +	PMD_TX_FREE_LOG(DEBUG,
> +		"Free %u packets successfully "
> +		"(port=%d queue=%d)",
> +		pkt_cnt, txq->port_id, txq->queue_id);
> +
> +	return (int)pkt_cnt;
> +}
> +
> +int
> +ixgbe_tx_done_cleanup(void *txq, uint32_t free_cnt)
> +{
> +	ixgbe_tx_done_cleanup_t func = ixgbe_get_tx_done_cleanup_func();
> +
> +	if (!func)
> +		return -ENOTSUP;
> +
> +	return func(txq, free_cnt);
> +}
> +
> +void
> +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_t fn)
> +{
> +	ixgbe_tx_done_cleanup_op = fn;
> +}
> +
> +ixgbe_tx_done_cleanup_t
> +ixgbe_get_tx_done_cleanup_func(void)
> +{
> +	return ixgbe_tx_done_cleanup_op;
> +}
> +
>  static void __attribute__((cold))
>  ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
>  {
> @@ -2398,9 +2546,14 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
>  					ixgbe_txq_vec_setup(txq) == 0)) {
>  			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
>  			dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
> -		} else
> +			ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_vec);
> +		} else {
>  #endif
>  		dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
> +		ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_simple);
> +#ifdef RTE_IXGBE_INC_VECTOR
> +		}
> +#endif
>  	} else {
>  		PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
>  		PMD_INIT_LOG(DEBUG,
> @@ -2412,6 +2565,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
>  				(unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
>  		dev->tx_pkt_burst = ixgbe_xmit_pkts;
>  		dev->tx_pkt_prepare = ixgbe_prep_pkts;
> +		ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
>  	}
>  }
> 
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 505d344b9..a52597aa9 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -253,6 +253,8 @@ struct ixgbe_txq_ops {
>  			 IXGBE_ADVTXD_DCMD_DEXT |\
>  			 IXGBE_ADVTXD_DCMD_EOP)
> 
> +typedef int (*ixgbe_tx_done_cleanup_t)(struct ixgbe_tx_queue *txq,
> +				uint32_t free_cnt);
> 
>  /* Takes an ethdev and a queue and sets up the tx function to be used based on
>   * the queue parameters. Used in tx_queue_setup by primary process and then
> @@ -285,6 +287,14 @@ int ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
>  int ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);
>  void ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
> 
> +void ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_t fn);
> +ixgbe_tx_done_cleanup_t ixgbe_get_tx_done_cleanup_func(void);
> +
> +int ixgbe_tx_done_cleanup(void *txq, uint32_t free_cnt);
> +int ixgbe_tx_done_cleanup_scalar(struct ixgbe_tx_queue *txq, uint32_t free_cnt);
> +int ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq, uint32_t free_cnt);
> +int ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq, uint32_t free_cnt);
> +
>  extern const uint32_t ptype_table[IXGBE_PACKET_TYPE_MAX];
>  extern const uint32_t ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX];
> 
> --
> 2.17.1
  
Chenxu Di Jan. 10, 2020, 10:08 a.m. UTC | #2
hi, Konstantin 

thanks for your opinion, I have fixed almost in new version patch except one. 

> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Thursday, January 9, 2020 10:02 PM
> To: Di, ChenxuX <chenxux.di@intel.com>; dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Di, ChenxuX
> <chenxux.di@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v7 3/4] net/ixgbe: cleanup Tx buffers
> 
> 
> Hi Chenxu,
> 
> Good progress wih _full_version, but still some issues remains I think.
> More comments inline.
> Konstantin
> 
> >
> > Signed-off-by: Chenxu Di <chenxux.di@intel.com>
> > ---
> >  drivers/net/ixgbe/ixgbe_ethdev.c |   4 +
> >  drivers/net/ixgbe/ixgbe_rxtx.c   | 156 ++++++++++++++++++++++++++++++-
> >  drivers/net/ixgbe/ixgbe_rxtx.h   |  10 ++
> >  3 files changed, 169 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
> > b/drivers/net/ixgbe/ixgbe_ethdev.c
> > index 2c6fd0f13..668c36188 100644
> > --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> > +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> > @@ -601,6 +601,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops
> > = {  .udp_tunnel_port_add  = ixgbe_dev_udp_tunnel_port_add,
> > .udp_tunnel_port_del  = ixgbe_dev_udp_tunnel_port_del,
> >  .tm_ops_get           = ixgbe_tm_ops_get,
> > +.tx_done_cleanup      = ixgbe_tx_done_cleanup,
> >  };
> >
> >  /*
> > @@ -649,6 +650,7 @@ static const struct eth_dev_ops ixgbevf_eth_dev_ops
> = {
> >  .reta_query           = ixgbe_dev_rss_reta_query,
> >  .rss_hash_update      = ixgbe_dev_rss_hash_update,
> >  .rss_hash_conf_get    = ixgbe_dev_rss_hash_conf_get,
> > +.tx_done_cleanup      = ixgbe_tx_done_cleanup,
> >  };
> >
> >  /* store statistics names and its offset in stats structure */ @@
> > -1101,6 +1103,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev,
> > void *init_params __rte_unused)  eth_dev->rx_pkt_burst =
> > &ixgbe_recv_pkts;  eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> > eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts;
> > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
> >
> >  /*
> >   * For secondary processes, we don't initialise any further as
> > primary @@ -1580,6 +1583,7 @@ eth_ixgbevf_dev_init(struct rte_eth_dev
> > *eth_dev)  eth_dev->dev_ops = &ixgbevf_eth_dev_ops;
> > eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;  eth_dev->tx_pkt_burst =
> > &ixgbe_xmit_pkts;
> > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
> >
> >  /* for secondary processes, we don't initialise any further as primary
> >   * has already done this work. Only check we don't need a different
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c
> > b/drivers/net/ixgbe/ixgbe_rxtx.c index fa572d184..122dae425 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > @@ -92,6 +92,8 @@ uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue,
> struct rte_mbuf **tx_pkts,
> >      uint16_t nb_pkts);
> >  #endif
> >
> > +static ixgbe_tx_done_cleanup_t ixgbe_tx_done_cleanup_op;
> 
> You can't have just one static variable here.
> There could be several ixgbe devices and they could be configured in a different
> way.
> I.E. txpkt_burst() is per device, so tx_done_cleanup() also has to be per device.
> Probably the easiest way is to add new entry for tx_done_cleanup into struct
> ixgbe_txq_ops, and set it properly in ixgbe_set_tx_function().
> 
> > +
> >
> /****************************************************************
> *****
> >   *
> >   *  TX functions
> > @@ -2306,6 +2308,152 @@ ixgbe_tx_queue_release_mbufs(struct
> > ixgbe_tx_queue *txq)  }  }
> >
> > +int
> > +ixgbe_tx_done_cleanup_scalar(struct ixgbe_tx_queue *txq, uint32_t
> > +free_cnt)
> 
> As a nit I would change _scalar to _full or so.
> 
> > +{
> > +uint32_t pkt_cnt;
> > +uint16_t i;
> > +uint16_t tx_last;
> > +uint16_t tx_id;
> > +uint16_t nb_tx_to_clean;
> > +uint16_t nb_tx_free_last;
> > +struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
> > +
> > +/* Start free mbuf from the next of tx_tail */ tx_last =
> > +txq->tx_tail; tx_id  = swr_ring[tx_last].next_id;
> > +
> > +if (txq->nb_tx_free == 0)
> > +if (ixgbe_xmit_cleanup(txq))
> 
> 
> As a nit it could be just if (ixgbe_set_tx_function && ixgbe_xmit_cleanup(txq))
> 
> > +return 0;
> > +
> > +nb_tx_to_clean = txq->nb_tx_free;
> > +nb_tx_free_last = txq->nb_tx_free;
> > +if (!free_cnt)
> > +free_cnt = txq->nb_tx_desc;
> > +
> > +/* Loop through swr_ring to count the amount of
> > + * freeable mubfs and packets.
> > + */
> > +for (pkt_cnt = 0; pkt_cnt < free_cnt; ) { for (i = 0; i <
> > +nb_tx_to_clean && pkt_cnt < free_cnt && tx_id != tx_last; i++) { if
> > +(swr_ring[tx_id].mbuf != NULL) {
> > +rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
> > +swr_ring[tx_id].mbuf = NULL;
> > +
> > +/*
> > + * last segment in the packet,
> > + * increment packet count
> > + */
> > +pkt_cnt += (swr_ring[tx_id].last_id == tx_id); }
> > +
> > +tx_id = swr_ring[tx_id].next_id;
> > +}
> > +
> > +if (tx_id == tx_last || txq->tx_rs_thresh
> > +> txq->nb_tx_desc - txq->nb_tx_free)
> 
> First condition (tx_id == tx_last) is porbably redundant here.
> 

I think it is necessary. The txq may transmit packets when the API called.
So txq->nb_tx_free may be changed.

If (tx_id == tx_last) , it will break the loop above and the function should be done and return.
However if more than  txq->tx_rs_thresh numbers packet send into txq while function doing.
It will not return. And fall in endless loop

> > +break;
> > +
> > +if (pkt_cnt < free_cnt) {
> > +if (ixgbe_xmit_cleanup(txq))
> > +break;
> > +
> > +nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last; nb_tx_free_last =
> > +txq->nb_tx_free; } }
> > +
> > +PMD_TX_FREE_LOG(DEBUG,
> > +"Free %u Packets successfully "
> > +"(port=%d queue=%d)",
> > +pkt_cnt, txq->port_id, txq->queue_id);
> > +
> > +return (int)pkt_cnt;
> > +}
> > +
> > +int
> > +ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
> > +uint32_t free_cnt __rte_unused) { return -ENOTSUP; }
> > +
> > +int
> > +ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq, uint32_t
> > +free_cnt) { uint16_t i; uint16_t tx_first; uint16_t tx_id; uint32_t
> > +pkt_cnt; struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
> 
> 
> Looks overcomplicated here.
> TX simple (and vec) doesn't support mulsti-seg packets, So one TXD - one mbuf,
> and one packet.
> And ixgbe_tx_free_bufs() always retunrs/frees either 0 or tx_rs_thresh
> mbufs/packets.
> So it probably can be something like that:
> 
> ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,  uint32_t free_cnt) {
>    If (free_cnt == 0)
>       free_cnt = txq->nb_desc;
> 
>    cnt = free_cnt - free_cnt % txq->tx_rs_thesh;
>     for (i = 0; i < cnt; i+= n) {
>           n = ixgbe_tx_free_bufs(txq);
>           if (n == 0)
>              break;
>     }
>     return i;
> }
> 
> > +
> > +/* Start free mbuf from tx_first */
> > +tx_first = txq->tx_next_dd - (txq->tx_rs_thresh - 1); tx_id  =
> > +tx_first;
> > +
> > +/* while free_cnt is 0,
> > + * suppose one mbuf per packet,
> > + * try to free packets as many as possible  */ if (free_cnt == 0)
> > +free_cnt = txq->nb_tx_desc;
> > +
> > +/* Loop through swr_ring to count freeable packets */ for (pkt_cnt =
> > +0; pkt_cnt < free_cnt; ) { if (txq->nb_tx_desc - txq->nb_tx_free <
> > +txq->tx_rs_thresh) break;
> > +
> > +if (!ixgbe_tx_free_bufs(txq))
> > +break;
> > +
> > +for (i = 0; i != txq->tx_rs_thresh && tx_id != tx_first; i++) {
> > +/* last segment in the packet,
> > + * increment packet count
> > + */
> > +pkt_cnt += (tx_id == swr_ring[tx_id].last_id); tx_id =
> > +swr_ring[tx_id].next_id; }
> > +
> > +if (tx_id == tx_first)
> > +break;
> > +}
> > +
> > +PMD_TX_FREE_LOG(DEBUG,
> > +"Free %u packets successfully "
> > +"(port=%d queue=%d)",
> > +pkt_cnt, txq->port_id, txq->queue_id);
> > +
> > +return (int)pkt_cnt;
> > +}
> > +
> > +int
> > +ixgbe_tx_done_cleanup(void *txq, uint32_t free_cnt) {
> > +ixgbe_tx_done_cleanup_t func = ixgbe_get_tx_done_cleanup_func();
> > +
> > +if (!func)
> > +return -ENOTSUP;
> > +
> > +return func(txq, free_cnt);
> > +}
> > +
> > +void
> > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_t fn) {
> > +ixgbe_tx_done_cleanup_op = fn; }
> > +
> > +ixgbe_tx_done_cleanup_t
> > +ixgbe_get_tx_done_cleanup_func(void)
> > +{
> > +return ixgbe_tx_done_cleanup_op;
> > +}
> > +
> >  static void __attribute__((cold))
> >  ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)  { @@ -2398,9
> > +2546,14 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct
> > ixgbe_tx_queue *txq)
> >  ixgbe_txq_vec_setup(txq) == 0)) {
> >  PMD_INIT_LOG(DEBUG, "Vector tx enabled.");  dev->tx_pkt_burst =
> > ixgbe_xmit_pkts_vec; -} else
> > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_vec);
> > +} else {
> >  #endif
> >  dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
> > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_simple);
> > +#ifdef RTE_IXGBE_INC_VECTOR
> > +}
> > +#endif
> >  } else {
> >  PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
> > PMD_INIT_LOG(DEBUG, @@ -2412,6 +2565,7 @@
> ixgbe_set_tx_function(struct
> > rte_eth_dev *dev, struct ixgbe_tx_queue *txq)  (unsigned
> > long)RTE_PMD_IXGBE_TX_MAX_BURST);  dev->tx_pkt_burst =
> > ixgbe_xmit_pkts;  dev->tx_pkt_prepare = ixgbe_prep_pkts;
> > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
> >  }
> >  }
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h
> > b/drivers/net/ixgbe/ixgbe_rxtx.h index 505d344b9..a52597aa9 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> > @@ -253,6 +253,8 @@ struct ixgbe_txq_ops {
> >   IXGBE_ADVTXD_DCMD_DEXT |\
> >   IXGBE_ADVTXD_DCMD_EOP)
> >
> > +typedef int (*ixgbe_tx_done_cleanup_t)(struct ixgbe_tx_queue *txq,
> > +uint32_t free_cnt);
> >
> >  /* Takes an ethdev and a queue and sets up the tx function to be used based
> on
> >   * the queue parameters. Used in tx_queue_setup by primary process
> > and then @@ -285,6 +287,14 @@ int
> > ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);  int
> > ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);  void
> > ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
> >
> > +void ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_t fn);
> > +ixgbe_tx_done_cleanup_t ixgbe_get_tx_done_cleanup_func(void);
> > +
> > +int ixgbe_tx_done_cleanup(void *txq, uint32_t free_cnt); int
> > +ixgbe_tx_done_cleanup_scalar(struct ixgbe_tx_queue *txq, uint32_t
> > +free_cnt); int ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq,
> > +uint32_t free_cnt); int ixgbe_tx_done_cleanup_simple(struct
> > +ixgbe_tx_queue *txq, uint32_t free_cnt);
> > +
> >  extern const uint32_t ptype_table[IXGBE_PACKET_TYPE_MAX];
> >  extern const uint32_t ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX];
> >
> > --
> > 2.17.1
>
  
Ananyev, Konstantin Jan. 10, 2020, 12:46 p.m. UTC | #3
Hi Chenxu,

> hi, Konstantin
> 
> thanks for your opinion, I have fixed almost in new version patch except one.
> 
> > -----Original Message-----
> > From: Ananyev, Konstantin
> > Sent: Thursday, January 9, 2020 10:02 PM
> > To: Di, ChenxuX <chenxux.di@intel.com>; dev@dpdk.org
> > Cc: Yang, Qiming <qiming.yang@intel.com>; Di, ChenxuX
> > <chenxux.di@intel.com>
> > Subject: RE: [dpdk-dev] [PATCH v7 3/4] net/ixgbe: cleanup Tx buffers
> >
> >
> > Hi Chenxu,
> >
> > Good progress wih _full_version, but still some issues remains I think.
> > More comments inline.
> > Konstantin
> >
> > >
> > > Signed-off-by: Chenxu Di <chenxux.di@intel.com>
> > > ---
> > >  drivers/net/ixgbe/ixgbe_ethdev.c |   4 +
> > >  drivers/net/ixgbe/ixgbe_rxtx.c   | 156 ++++++++++++++++++++++++++++++-
> > >  drivers/net/ixgbe/ixgbe_rxtx.h   |  10 ++
> > >  3 files changed, 169 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
> > > b/drivers/net/ixgbe/ixgbe_ethdev.c
> > > index 2c6fd0f13..668c36188 100644
> > > --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> > > +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> > > @@ -601,6 +601,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops
> > > = {  .udp_tunnel_port_add  = ixgbe_dev_udp_tunnel_port_add,
> > > .udp_tunnel_port_del  = ixgbe_dev_udp_tunnel_port_del,
> > >  .tm_ops_get           = ixgbe_tm_ops_get,
> > > +.tx_done_cleanup      = ixgbe_tx_done_cleanup,
> > >  };
> > >
> > >  /*
> > > @@ -649,6 +650,7 @@ static const struct eth_dev_ops ixgbevf_eth_dev_ops
> > = {
> > >  .reta_query           = ixgbe_dev_rss_reta_query,
> > >  .rss_hash_update      = ixgbe_dev_rss_hash_update,
> > >  .rss_hash_conf_get    = ixgbe_dev_rss_hash_conf_get,
> > > +.tx_done_cleanup      = ixgbe_tx_done_cleanup,
> > >  };
> > >
> > >  /* store statistics names and its offset in stats structure */ @@
> > > -1101,6 +1103,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev,
> > > void *init_params __rte_unused)  eth_dev->rx_pkt_burst =
> > > &ixgbe_recv_pkts;  eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
> > > eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts;
> > > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
> > >
> > >  /*
> > >   * For secondary processes, we don't initialise any further as
> > > primary @@ -1580,6 +1583,7 @@ eth_ixgbevf_dev_init(struct rte_eth_dev
> > > *eth_dev)  eth_dev->dev_ops = &ixgbevf_eth_dev_ops;
> > > eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;  eth_dev->tx_pkt_burst =
> > > &ixgbe_xmit_pkts;
> > > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
> > >
> > >  /* for secondary processes, we don't initialise any further as primary
> > >   * has already done this work. Only check we don't need a different
> > > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c
> > > b/drivers/net/ixgbe/ixgbe_rxtx.c index fa572d184..122dae425 100644
> > > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > > @@ -92,6 +92,8 @@ uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue,
> > struct rte_mbuf **tx_pkts,
> > >      uint16_t nb_pkts);
> > >  #endif
> > >
> > > +static ixgbe_tx_done_cleanup_t ixgbe_tx_done_cleanup_op;
> >
> > You can't have just one static variable here.
> > There could be several ixgbe devices and they could be configured in a different
> > way.
> > I.E. txpkt_burst() is per device, so tx_done_cleanup() also has to be per device.
> > Probably the easiest way is to add new entry for tx_done_cleanup into struct
> > ixgbe_txq_ops, and set it properly in ixgbe_set_tx_function().
> >
> > > +
> > >
> > /****************************************************************
> > *****
> > >   *
> > >   *  TX functions
> > > @@ -2306,6 +2308,152 @@ ixgbe_tx_queue_release_mbufs(struct
> > > ixgbe_tx_queue *txq)  }  }
> > >
> > > +int
> > > +ixgbe_tx_done_cleanup_scalar(struct ixgbe_tx_queue *txq, uint32_t
> > > +free_cnt)
> >
> > As a nit I would change _scalar to _full or so.
> >
> > > +{
> > > +uint32_t pkt_cnt;
> > > +uint16_t i;
> > > +uint16_t tx_last;
> > > +uint16_t tx_id;
> > > +uint16_t nb_tx_to_clean;
> > > +uint16_t nb_tx_free_last;
> > > +struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
> > > +
> > > +/* Start free mbuf from the next of tx_tail */ tx_last =
> > > +txq->tx_tail; tx_id  = swr_ring[tx_last].next_id;
> > > +
> > > +if (txq->nb_tx_free == 0)
> > > +if (ixgbe_xmit_cleanup(txq))
> >
> >
> > As a nit it could be just if (ixgbe_set_tx_function && ixgbe_xmit_cleanup(txq))
> >
> > > +return 0;
> > > +
> > > +nb_tx_to_clean = txq->nb_tx_free;
> > > +nb_tx_free_last = txq->nb_tx_free;
> > > +if (!free_cnt)
> > > +free_cnt = txq->nb_tx_desc;
> > > +
> > > +/* Loop through swr_ring to count the amount of
> > > + * freeable mubfs and packets.
> > > + */
> > > +for (pkt_cnt = 0; pkt_cnt < free_cnt; ) { for (i = 0; i <
> > > +nb_tx_to_clean && pkt_cnt < free_cnt && tx_id != tx_last; i++) { if
> > > +(swr_ring[tx_id].mbuf != NULL) {
> > > +rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
> > > +swr_ring[tx_id].mbuf = NULL;
> > > +
> > > +/*
> > > + * last segment in the packet,
> > > + * increment packet count
> > > + */
> > > +pkt_cnt += (swr_ring[tx_id].last_id == tx_id); }
> > > +
> > > +tx_id = swr_ring[tx_id].next_id;
> > > +}
> > > +
> > > +if (tx_id == tx_last || txq->tx_rs_thresh
> > > +> txq->nb_tx_desc - txq->nb_tx_free)
> >
> > First condition (tx_id == tx_last) is porbably redundant here.
> >
> 
> I think it is necessary. The txq may transmit packets when the API called.

Nope it is not possible.
All ethdev RX/TX API is not thread safe.
It will be a race condition that most likely will cause either crash or memory corruption.

> So txq->nb_tx_free may be changed.
> 
> If (tx_id == tx_last) , it will break the loop above and the function should be done and return.
> However if more than  txq->tx_rs_thresh numbers packet send into txq while function doing.
> It will not return. And fall in endless loop
> 
> > > +break;
> > > +
> > > +if (pkt_cnt < free_cnt) {
> > > +if (ixgbe_xmit_cleanup(txq))
> > > +break;
> > > +
> > > +nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last; nb_tx_free_last =
> > > +txq->nb_tx_free; } }
> > > +
> > > +PMD_TX_FREE_LOG(DEBUG,
> > > +"Free %u Packets successfully "
> > > +"(port=%d queue=%d)",
> > > +pkt_cnt, txq->port_id, txq->queue_id);
> > > +
> > > +return (int)pkt_cnt;
> > > +}
> > > +
> > > +int
> > > +ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
> > > +uint32_t free_cnt __rte_unused) { return -ENOTSUP; }
> > > +
> > > +int
> > > +ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq, uint32_t
> > > +free_cnt) { uint16_t i; uint16_t tx_first; uint16_t tx_id; uint32_t
> > > +pkt_cnt; struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
> >
> >
> > Looks overcomplicated here.
> > TX simple (and vec) doesn't support mulsti-seg packets, So one TXD - one mbuf,
> > and one packet.
> > And ixgbe_tx_free_bufs() always retunrs/frees either 0 or tx_rs_thresh
> > mbufs/packets.
> > So it probably can be something like that:
> >
> > ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,  uint32_t free_cnt) {
> >    If (free_cnt == 0)
> >       free_cnt = txq->nb_desc;
> >
> >    cnt = free_cnt - free_cnt % txq->tx_rs_thesh;
> >     for (i = 0; i < cnt; i+= n) {
> >           n = ixgbe_tx_free_bufs(txq);
> >           if (n == 0)
> >              break;
> >     }
> >     return i;
> > }
> >
> > > +
> > > +/* Start free mbuf from tx_first */
> > > +tx_first = txq->tx_next_dd - (txq->tx_rs_thresh - 1); tx_id  =
> > > +tx_first;
> > > +
> > > +/* while free_cnt is 0,
> > > + * suppose one mbuf per packet,
> > > + * try to free packets as many as possible  */ if (free_cnt == 0)
> > > +free_cnt = txq->nb_tx_desc;
> > > +
> > > +/* Loop through swr_ring to count freeable packets */ for (pkt_cnt =
> > > +0; pkt_cnt < free_cnt; ) { if (txq->nb_tx_desc - txq->nb_tx_free <
> > > +txq->tx_rs_thresh) break;
> > > +
> > > +if (!ixgbe_tx_free_bufs(txq))
> > > +break;
> > > +
> > > +for (i = 0; i != txq->tx_rs_thresh && tx_id != tx_first; i++) {
> > > +/* last segment in the packet,
> > > + * increment packet count
> > > + */
> > > +pkt_cnt += (tx_id == swr_ring[tx_id].last_id); tx_id =
> > > +swr_ring[tx_id].next_id; }
> > > +
> > > +if (tx_id == tx_first)
> > > +break;
> > > +}
> > > +
> > > +PMD_TX_FREE_LOG(DEBUG,
> > > +"Free %u packets successfully "
> > > +"(port=%d queue=%d)",
> > > +pkt_cnt, txq->port_id, txq->queue_id);
> > > +
> > > +return (int)pkt_cnt;
> > > +}
> > > +
> > > +int
> > > +ixgbe_tx_done_cleanup(void *txq, uint32_t free_cnt) {
> > > +ixgbe_tx_done_cleanup_t func = ixgbe_get_tx_done_cleanup_func();
> > > +
> > > +if (!func)
> > > +return -ENOTSUP;
> > > +
> > > +return func(txq, free_cnt);
> > > +}
> > > +
> > > +void
> > > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_t fn) {
> > > +ixgbe_tx_done_cleanup_op = fn; }
> > > +
> > > +ixgbe_tx_done_cleanup_t
> > > +ixgbe_get_tx_done_cleanup_func(void)
> > > +{
> > > +return ixgbe_tx_done_cleanup_op;
> > > +}
> > > +
> > >  static void __attribute__((cold))
> > >  ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)  { @@ -2398,9
> > > +2546,14 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct
> > > ixgbe_tx_queue *txq)
> > >  ixgbe_txq_vec_setup(txq) == 0)) {
> > >  PMD_INIT_LOG(DEBUG, "Vector tx enabled.");  dev->tx_pkt_burst =
> > > ixgbe_xmit_pkts_vec; -} else
> > > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_vec);
> > > +} else {
> > >  #endif
> > >  dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
> > > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_simple);
> > > +#ifdef RTE_IXGBE_INC_VECTOR
> > > +}
> > > +#endif
> > >  } else {
> > >  PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
> > > PMD_INIT_LOG(DEBUG, @@ -2412,6 +2565,7 @@
> > ixgbe_set_tx_function(struct
> > > rte_eth_dev *dev, struct ixgbe_tx_queue *txq)  (unsigned
> > > long)RTE_PMD_IXGBE_TX_MAX_BURST);  dev->tx_pkt_burst =
> > > ixgbe_xmit_pkts;  dev->tx_pkt_prepare = ixgbe_prep_pkts;
> > > +ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
> > >  }
> > >  }
> > >
> > > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h
> > > b/drivers/net/ixgbe/ixgbe_rxtx.h index 505d344b9..a52597aa9 100644
> > > --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> > > +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> > > @@ -253,6 +253,8 @@ struct ixgbe_txq_ops {
> > >   IXGBE_ADVTXD_DCMD_DEXT |\
> > >   IXGBE_ADVTXD_DCMD_EOP)
> > >
> > > +typedef int (*ixgbe_tx_done_cleanup_t)(struct ixgbe_tx_queue *txq,
> > > +uint32_t free_cnt);
> > >
> > >  /* Takes an ethdev and a queue and sets up the tx function to be used based
> > on
> > >   * the queue parameters. Used in tx_queue_setup by primary process
> > > and then @@ -285,6 +287,14 @@ int
> > > ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);  int
> > > ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);  void
> > > ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
> > >
> > > +void ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_t fn);
> > > +ixgbe_tx_done_cleanup_t ixgbe_get_tx_done_cleanup_func(void);
> > > +
> > > +int ixgbe_tx_done_cleanup(void *txq, uint32_t free_cnt); int
> > > +ixgbe_tx_done_cleanup_scalar(struct ixgbe_tx_queue *txq, uint32_t
> > > +free_cnt); int ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq,
> > > +uint32_t free_cnt); int ixgbe_tx_done_cleanup_simple(struct
> > > +ixgbe_tx_queue *txq, uint32_t free_cnt);
> > > +
> > >  extern const uint32_t ptype_table[IXGBE_PACKET_TYPE_MAX];
> > >  extern const uint32_t ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX];
> > >
> > > --
> > > 2.17.1
> >
>
  

Patch

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 2c6fd0f13..668c36188 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -601,6 +601,7 @@  static const struct eth_dev_ops ixgbe_eth_dev_ops = {
 	.udp_tunnel_port_add  = ixgbe_dev_udp_tunnel_port_add,
 	.udp_tunnel_port_del  = ixgbe_dev_udp_tunnel_port_del,
 	.tm_ops_get           = ixgbe_tm_ops_get,
+	.tx_done_cleanup      = ixgbe_tx_done_cleanup,
 };
 
 /*
@@ -649,6 +650,7 @@  static const struct eth_dev_ops ixgbevf_eth_dev_ops = {
 	.reta_query           = ixgbe_dev_rss_reta_query,
 	.rss_hash_update      = ixgbe_dev_rss_hash_update,
 	.rss_hash_conf_get    = ixgbe_dev_rss_hash_conf_get,
+	.tx_done_cleanup      = ixgbe_tx_done_cleanup,
 };
 
 /* store statistics names and its offset in stats structure */
@@ -1101,6 +1103,7 @@  eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
 	eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
 	eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
 	eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts;
+	ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
 
 	/*
 	 * For secondary processes, we don't initialise any further as primary
@@ -1580,6 +1583,7 @@  eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
 	eth_dev->dev_ops = &ixgbevf_eth_dev_ops;
 	eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
 	eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+	ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
 
 	/* for secondary processes, we don't initialise any further as primary
 	 * has already done this work. Only check we don't need a different
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index fa572d184..122dae425 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -92,6 +92,8 @@  uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				    uint16_t nb_pkts);
 #endif
 
+static ixgbe_tx_done_cleanup_t ixgbe_tx_done_cleanup_op;
+
 /*********************************************************************
  *
  *  TX functions
@@ -2306,6 +2308,152 @@  ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
 	}
 }
 
+int
+ixgbe_tx_done_cleanup_scalar(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
+{
+	uint32_t pkt_cnt;
+	uint16_t i;
+	uint16_t tx_last;
+	uint16_t tx_id;
+	uint16_t nb_tx_to_clean;
+	uint16_t nb_tx_free_last;
+	struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
+
+	/* Start free mbuf from the next of tx_tail */
+	tx_last = txq->tx_tail;
+	tx_id  = swr_ring[tx_last].next_id;
+
+	if (txq->nb_tx_free == 0)
+		if (ixgbe_xmit_cleanup(txq))
+			return 0;
+
+	nb_tx_to_clean = txq->nb_tx_free;
+	nb_tx_free_last = txq->nb_tx_free;
+	if (!free_cnt)
+		free_cnt = txq->nb_tx_desc;
+
+	/* Loop through swr_ring to count the amount of
+	 * freeable mubfs and packets.
+	 */
+	for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
+		for (i = 0; i < nb_tx_to_clean &&
+			pkt_cnt < free_cnt &&
+			tx_id != tx_last; i++) {
+			if (swr_ring[tx_id].mbuf != NULL) {
+				rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
+				swr_ring[tx_id].mbuf = NULL;
+
+				/*
+				 * last segment in the packet,
+				 * increment packet count
+				 */
+				pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
+			}
+
+			tx_id = swr_ring[tx_id].next_id;
+		}
+
+		if (tx_id == tx_last || txq->tx_rs_thresh
+			> txq->nb_tx_desc - txq->nb_tx_free)
+			break;
+
+		if (pkt_cnt < free_cnt) {
+			if (ixgbe_xmit_cleanup(txq))
+				break;
+
+			nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
+			nb_tx_free_last = txq->nb_tx_free;
+		}
+	}
+
+	PMD_TX_FREE_LOG(DEBUG,
+		"Free %u Packets successfully "
+		"(port=%d queue=%d)",
+		pkt_cnt, txq->port_id, txq->queue_id);
+
+	return (int)pkt_cnt;
+}
+
+int
+ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
+			uint32_t free_cnt __rte_unused)
+{
+	return -ENOTSUP;
+}
+
+int
+ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
+			uint32_t free_cnt)
+{
+	uint16_t i;
+	uint16_t tx_first;
+	uint16_t tx_id;
+	uint32_t pkt_cnt;
+	struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
+
+	/* Start free mbuf from tx_first */
+	tx_first = txq->tx_next_dd - (txq->tx_rs_thresh - 1);
+	tx_id  = tx_first;
+
+	/* while free_cnt is 0,
+	 * suppose one mbuf per packet,
+	 * try to free packets as many as possible
+	 */
+	if (free_cnt == 0)
+		free_cnt = txq->nb_tx_desc;
+
+	/* Loop through swr_ring to count freeable packets */
+	for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
+		if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
+			break;
+
+		if (!ixgbe_tx_free_bufs(txq))
+			break;
+
+		for (i = 0; i != txq->tx_rs_thresh &&
+			tx_id != tx_first; i++) {
+			/* last segment in the packet,
+			 * increment packet count
+			 */
+			pkt_cnt += (tx_id == swr_ring[tx_id].last_id);
+			tx_id = swr_ring[tx_id].next_id;
+		}
+
+		if (tx_id == tx_first)
+			break;
+	}
+
+	PMD_TX_FREE_LOG(DEBUG,
+		"Free %u packets successfully "
+		"(port=%d queue=%d)",
+		pkt_cnt, txq->port_id, txq->queue_id);
+
+	return (int)pkt_cnt;
+}
+
+int
+ixgbe_tx_done_cleanup(void *txq, uint32_t free_cnt)
+{
+	ixgbe_tx_done_cleanup_t func = ixgbe_get_tx_done_cleanup_func();
+
+	if (!func)
+		return -ENOTSUP;
+
+	return func(txq, free_cnt);
+}
+
+void
+ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_t fn)
+{
+	ixgbe_tx_done_cleanup_op = fn;
+}
+
+ixgbe_tx_done_cleanup_t
+ixgbe_get_tx_done_cleanup_func(void)
+{
+	return ixgbe_tx_done_cleanup_op;
+}
+
 static void __attribute__((cold))
 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
 {
@@ -2398,9 +2546,14 @@  ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
 					ixgbe_txq_vec_setup(txq) == 0)) {
 			PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
 			dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
-		} else
+			ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_vec);
+		} else {
 #endif
 		dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
+		ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_simple);
+#ifdef RTE_IXGBE_INC_VECTOR
+		}
+#endif
 	} else {
 		PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
 		PMD_INIT_LOG(DEBUG,
@@ -2412,6 +2565,7 @@  ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
 				(unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
 		dev->tx_pkt_burst = ixgbe_xmit_pkts;
 		dev->tx_pkt_prepare = ixgbe_prep_pkts;
+		ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_scalar);
 	}
 }
 
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 505d344b9..a52597aa9 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -253,6 +253,8 @@  struct ixgbe_txq_ops {
 			 IXGBE_ADVTXD_DCMD_DEXT |\
 			 IXGBE_ADVTXD_DCMD_EOP)
 
+typedef int (*ixgbe_tx_done_cleanup_t)(struct ixgbe_tx_queue *txq,
+				uint32_t free_cnt);
 
 /* Takes an ethdev and a queue and sets up the tx function to be used based on
  * the queue parameters. Used in tx_queue_setup by primary process and then
@@ -285,6 +287,14 @@  int ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev);
 int ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq);
 void ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq);
 
+void ixgbe_set_tx_done_cleanup_func(ixgbe_tx_done_cleanup_t fn);
+ixgbe_tx_done_cleanup_t ixgbe_get_tx_done_cleanup_func(void);
+
+int ixgbe_tx_done_cleanup(void *txq, uint32_t free_cnt);
+int ixgbe_tx_done_cleanup_scalar(struct ixgbe_tx_queue *txq, uint32_t free_cnt);
+int ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq, uint32_t free_cnt);
+int ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq, uint32_t free_cnt);
+
 extern const uint32_t ptype_table[IXGBE_PACKET_TYPE_MAX];
 extern const uint32_t ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX];