[V4,4/4] net/i40e: FDIR update rate optimization
diff mbox series

Message ID 20200715195329.34699-5-chenmin.sun@intel.com
State Superseded
Delegated to: Qi Zhang
Headers show
Series
  • i40e FDIR update rate optimization
Related show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/travis-robot success Travis build: passed
ci/checkpatch success coding style OK

Commit Message

Sun, Chenmin July 15, 2020, 7:53 p.m. UTC
From: Chenmin Sun <chenmin.sun@intel.com>

This patch optimized the fdir update rate for i40e PF, by tracking
whether the fdir rule being inserted into the guaranteed space
or shared space.
For the flows that are inserted to the guaranteed space, we assume
that the insertion will always succeed as the hardware only report
the "no enough space left" error. In this case, the software can
directly return success and no need to retrieve the result from
the hardware. When destroying a flow, we also assume the operation
will succeed as the software has checked the flow is indeed in
the hardware.
See the fdir programming status descriptor format in the datasheet
for more details.

Signed-off-by: Chenmin Sun <chenmin.sun@intel.com>
---
 drivers/net/i40e/i40e_ethdev.h |  14 ++-
 drivers/net/i40e/i40e_fdir.c   | 151 ++++++++++++++++++++++++++-------
 drivers/net/i40e/i40e_rxtx.c   |   6 +-
 drivers/net/i40e/i40e_rxtx.h   |   3 +
 4 files changed, 139 insertions(+), 35 deletions(-)

Comments

Wu, Jingjing July 16, 2020, 1:57 p.m. UTC | #1
[...]

> +static inline unsigned char *
> +i40e_find_available_buffer(struct rte_eth_dev *dev)
> +{
> +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> +	struct i40e_fdir_info *fdir_info = &pf->fdir;
> +	struct i40e_tx_queue *txq = pf->fdir.txq;
> +	volatile struct i40e_tx_desc *txdp = &txq->tx_ring[txq->tx_tail + 1];
> +	uint32_t i;
> +
> +	/* no available buffer
> +	 * search for more available buffers from the current
> +	 * descriptor, until an unavailable one
> +	 */
> +	if (fdir_info->txq_available_buf_count <= 0) {
> +		uint16_t tmp_tail;
> +		volatile struct i40e_tx_desc *tmp_txdp;
> +
> +		tmp_tail = txq->tx_tail;
> +		tmp_txdp = &txq->tx_ring[tmp_tail + 1];
> +
> +		do {
> +			if ((tmp_txdp->cmd_type_offset_bsz &
> +
> 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
> +
> 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> +				fdir_info->txq_available_buf_count++;
> +			else
> +				break;
> +
> +			tmp_tail += 2;
> +			if (tmp_tail >= txq->nb_tx_desc)
> +				tmp_tail = 0;
> +		} while (tmp_tail != txq->tx_tail);
> +	}
> +
> +	/*
> +	 * if txq_available_buf_count > 0, just use the next one is ok,
> +	 * else wait for the next DD until it's set to make sure the data
> +	 * had been fetched by hardware
> +	 */
> +	if (fdir_info->txq_available_buf_count > 0) {
> +		fdir_info->txq_available_buf_count--;
> +	} else {
> +		/* wait until the tx descriptor is ready */
> +		for (i = 0; i < I40E_FDIR_MAX_WAIT_US; i++) {
> +			if ((txdp->cmd_type_offset_bsz &
> +
> 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
> +
> 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> +				break;
> +			rte_delay_us(1);
> +		}
> +		if (i >= I40E_FDIR_MAX_WAIT_US) {
> +			PMD_DRV_LOG(ERR,
> +			    "Failed to program FDIR filter: time out to get DD on tx
> queue.");
> +			return NULL;
> +		}
> +	}
Why wait for I40E_FDIR_MAX_WAIT_US but not return NULL immediately?

[...]


>  i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
>  				  enum i40e_filter_pctype pctype,
>  				  const struct i40e_fdir_filter_conf *filter,
> -				  bool add)
> +				  bool add, bool wait_status)
>  {
>  	struct i40e_tx_queue *txq = pf->fdir.txq;
>  	struct i40e_rx_queue *rxq = pf->fdir.rxq;
> @@ -2011,8 +2092,10 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
>  	volatile struct i40e_tx_desc *txdp;
>  	volatile struct i40e_filter_program_desc *fdirdp;
>  	uint32_t td_cmd;
> -	uint16_t vsi_id, i;
> +	uint16_t vsi_id;
>  	uint8_t dest;
> +	uint32_t i;
> +	uint8_t retry_count = 0;
> 
>  	PMD_DRV_LOG(INFO, "filling filter programming descriptor.");
>  	fdirdp = (volatile struct i40e_filter_program_desc *)
> @@ -2087,7 +2170,8 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
> 
>  	PMD_DRV_LOG(INFO, "filling transmit descriptor.");
>  	txdp = &txq->tx_ring[txq->tx_tail + 1];
> -	txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr);
> +	txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr[txq->tx_tail / 2]);
> +
[txq->tx_tail / 2] is not readable, how about use the avail pkt you get directly? Or another index to identify it?
 
>  	td_cmd = I40E_TX_DESC_CMD_EOP |
>  		 I40E_TX_DESC_CMD_RS  |
>  		 I40E_TX_DESC_CMD_DUMMY;
> @@ -2100,25 +2184,34 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
>  		txq->tx_tail = 0;
>  	/* Update the tx tail register */
>  	rte_wmb();
> +
> +	/* capture the previous error report(if any) from rx ring */
> +	while ((i40e_check_fdir_programming_status(rxq) < 0) &&
> +		(++retry_count < 100))
> +		PMD_DRV_LOG(INFO, "previous error report captured.");
> +
Why check FDIR ring for 100 times? And "&&" is used here, the log is only print if the 100th check fails? 

> 
> --
> 2.17.1
Sun, Chenmin July 17, 2020, 8:26 a.m. UTC | #2
Best Regards,
Sun, Chenmin

> -----Original Message-----
> From: Wu, Jingjing <jingjing.wu@intel.com>
> Sent: Thursday, July 16, 2020 9:57 PM
> To: Sun, Chenmin <chenmin.sun@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Wang, Haiyue
> <haiyue.wang@intel.com>
> Cc: dev@dpdk.org
> Subject: RE: [PATCH V4 4/4] net/i40e: FDIR update rate optimization
> 
> 
> [...]
> 
> > +static inline unsigned char *
> > +i40e_find_available_buffer(struct rte_eth_dev *dev) {
> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-
> >dev_private);
> > +	struct i40e_fdir_info *fdir_info = &pf->fdir;
> > +	struct i40e_tx_queue *txq = pf->fdir.txq;
> > +	volatile struct i40e_tx_desc *txdp = &txq->tx_ring[txq->tx_tail + 1];
> > +	uint32_t i;
> > +
> > +	/* no available buffer
> > +	 * search for more available buffers from the current
> > +	 * descriptor, until an unavailable one
> > +	 */
> > +	if (fdir_info->txq_available_buf_count <= 0) {
> > +		uint16_t tmp_tail;
> > +		volatile struct i40e_tx_desc *tmp_txdp;
> > +
> > +		tmp_tail = txq->tx_tail;
> > +		tmp_txdp = &txq->tx_ring[tmp_tail + 1];
> > +
> > +		do {
> > +			if ((tmp_txdp->cmd_type_offset_bsz &
> > +
> > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
> > +
> > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > +				fdir_info->txq_available_buf_count++;
> > +			else
> > +				break;
> > +
> > +			tmp_tail += 2;
> > +			if (tmp_tail >= txq->nb_tx_desc)
> > +				tmp_tail = 0;
> > +		} while (tmp_tail != txq->tx_tail);
> > +	}
> > +
> > +	/*
> > +	 * if txq_available_buf_count > 0, just use the next one is ok,
> > +	 * else wait for the next DD until it's set to make sure the data
> > +	 * had been fetched by hardware
> > +	 */
> > +	if (fdir_info->txq_available_buf_count > 0) {
> > +		fdir_info->txq_available_buf_count--;
> > +	} else {
> > +		/* wait until the tx descriptor is ready */
> > +		for (i = 0; i < I40E_FDIR_MAX_WAIT_US; i++) {
> > +			if ((txdp->cmd_type_offset_bsz &
> > +
> > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
> > +
> > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > +				break;
> > +			rte_delay_us(1);
> > +		}
> > +		if (i >= I40E_FDIR_MAX_WAIT_US) {
> > +			PMD_DRV_LOG(ERR,
> > +			    "Failed to program FDIR filter: time out to get DD on
> tx
> > queue.");
> > +			return NULL;
> > +		}
> > +	}
> Why wait for I40E_FDIR_MAX_WAIT_US but not return NULL immediately?

Done

> [...]
> 
> 
> >  i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
> >  				  enum i40e_filter_pctype pctype,
> >  				  const struct i40e_fdir_filter_conf *filter,
> > -				  bool add)
> > +				  bool add, bool wait_status)
> >  {
> >  	struct i40e_tx_queue *txq = pf->fdir.txq;
> >  	struct i40e_rx_queue *rxq = pf->fdir.rxq; @@ -2011,8 +2092,10 @@
> > i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
> >  	volatile struct i40e_tx_desc *txdp;
> >  	volatile struct i40e_filter_program_desc *fdirdp;
> >  	uint32_t td_cmd;
> > -	uint16_t vsi_id, i;
> > +	uint16_t vsi_id;
> >  	uint8_t dest;
> > +	uint32_t i;
> > +	uint8_t retry_count = 0;
> >
> >  	PMD_DRV_LOG(INFO, "filling filter programming descriptor.");
> >  	fdirdp = (volatile struct i40e_filter_program_desc *) @@ -2087,7
> > +2170,8 @@ i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
> >
> >  	PMD_DRV_LOG(INFO, "filling transmit descriptor.");
> >  	txdp = &txq->tx_ring[txq->tx_tail + 1];
> > -	txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr);
> > +	txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr[txq->tx_tail
> > +/ 2]);
> > +
> [txq->tx_tail / 2] is not readable, how about use the avail pkt you get directly?
> Or another index to identify it?

Have replaced with >> 1

> >  	td_cmd = I40E_TX_DESC_CMD_EOP |
> >  		 I40E_TX_DESC_CMD_RS  |
> >  		 I40E_TX_DESC_CMD_DUMMY;
> > @@ -2100,25 +2184,34 @@ i40e_flow_fdir_filter_programming(struct
> i40e_pf *pf,
> >  		txq->tx_tail = 0;
> >  	/* Update the tx tail register */
> >  	rte_wmb();
> > +
> > +	/* capture the previous error report(if any) from rx ring */
> > +	while ((i40e_check_fdir_programming_status(rxq) < 0) &&
> > +		(++retry_count < 100))
> > +		PMD_DRV_LOG(INFO, "previous error report captured.");
> > +
> Why check FDIR ring for 100 times? And "&&" is used here, the log is only print if
> the 100th check fails?
No, it will print 100 times.
The purpose of this code is to clean up the fdir rx queue.
Have new an independent function for this

> >
> > --
> > 2.17.1

Patch
diff mbox series

diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index f4f34dad3..96b42b376 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -264,6 +264,8 @@  enum i40e_flxpld_layer_idx {
 #define I40E_DEFAULT_DCB_APP_NUM    1
 #define I40E_DEFAULT_DCB_APP_PRIO   3
 
+#define I40E_FDIR_PRG_PKT_CNT       128
+
 /*
  * Struct to store flow created.
  */
@@ -705,12 +707,20 @@  TAILQ_HEAD(i40e_fdir_filter_list, i40e_fdir_filter);
  *  A structure used to define fields of a FDIR related info.
  */
 struct i40e_fdir_info {
+#define I40E_FDIR_PRG_PKT_CNT   128
+
 	struct i40e_vsi *fdir_vsi;     /* pointer to fdir VSI structure */
 	uint16_t match_counter_index;  /* Statistic counter index used for fdir*/
 	struct i40e_tx_queue *txq;
 	struct i40e_rx_queue *rxq;
-	void *prg_pkt;                 /* memory for fdir program packet */
-	uint64_t dma_addr;             /* physic address of packet memory*/
+	void *prg_pkt[I40E_FDIR_PRG_PKT_CNT];     /* memory for fdir program packet */
+	uint64_t dma_addr[I40E_FDIR_PRG_PKT_CNT]; /* physic address of packet memory*/
+	/*
+	 * txq available buffer counter, indicates how many available buffers
+	 * for fdir programming, initialized as I40E_FDIR_PRG_PKT_CNT
+	 */
+	int txq_available_buf_count;
+
 	/* input set bits for each pctype */
 	uint64_t input_set[I40E_FILTER_PCTYPE_MAX];
 	/*
diff --git a/drivers/net/i40e/i40e_fdir.c b/drivers/net/i40e/i40e_fdir.c
index fb778202f..c2647224e 100644
--- a/drivers/net/i40e/i40e_fdir.c
+++ b/drivers/net/i40e/i40e_fdir.c
@@ -100,7 +100,7 @@  static int
 i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  enum i40e_filter_pctype pctype,
 				  const struct i40e_fdir_filter_conf *filter,
-				  bool add);
+				  bool add, bool wait_status);
 
 static int
 i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
@@ -164,6 +164,7 @@  i40e_fdir_setup(struct i40e_pf *pf)
 	char z_name[RTE_MEMZONE_NAMESIZE];
 	const struct rte_memzone *mz = NULL;
 	struct rte_eth_dev *eth_dev = pf->adapter->eth_dev;
+	uint16_t i;
 
 	if ((pf->flags & I40E_FLAG_FDIR) == 0) {
 		PMD_INIT_LOG(ERR, "HW doesn't support FDIR");
@@ -235,15 +236,21 @@  i40e_fdir_setup(struct i40e_pf *pf)
 			eth_dev->device->driver->name,
 			I40E_FDIR_MZ_NAME,
 			eth_dev->data->port_id);
-	mz = i40e_memzone_reserve(z_name, I40E_FDIR_PKT_LEN, SOCKET_ID_ANY);
+	mz = i40e_memzone_reserve(z_name, I40E_FDIR_PKT_LEN *
+			I40E_FDIR_PRG_PKT_CNT, SOCKET_ID_ANY);
 	if (!mz) {
 		PMD_DRV_LOG(ERR, "Cannot init memzone for "
 				 "flow director program packet.");
 		err = I40E_ERR_NO_MEMORY;
 		goto fail_mem;
 	}
-	pf->fdir.prg_pkt = mz->addr;
-	pf->fdir.dma_addr = mz->iova;
+
+	for (i = 0; i < I40E_FDIR_PRG_PKT_CNT; i++) {
+		pf->fdir.prg_pkt[i] = (uint8_t *)mz->addr +
+			I40E_FDIR_PKT_LEN * i;
+		pf->fdir.dma_addr[i] = mz->iova +
+			I40E_FDIR_PKT_LEN * i;
+	}
 
 	pf->fdir.match_counter_index = I40E_COUNTER_INDEX_FDIR(hw->pf_id);
 	pf->fdir.fdir_actual_cnt = 0;
@@ -1687,7 +1694,7 @@  i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
 {
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
-	unsigned char *pkt = (unsigned char *)pf->fdir.prg_pkt;
+	unsigned char *pkt = (unsigned char *)pf->fdir.prg_pkt[0];
 	enum i40e_filter_pctype pctype;
 	int ret = 0;
 
@@ -1736,6 +1743,66 @@  i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
 	return ret;
 }
 
+static inline unsigned char *
+i40e_find_available_buffer(struct rte_eth_dev *dev)
+{
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	struct i40e_fdir_info *fdir_info = &pf->fdir;
+	struct i40e_tx_queue *txq = pf->fdir.txq;
+	volatile struct i40e_tx_desc *txdp = &txq->tx_ring[txq->tx_tail + 1];
+	uint32_t i;
+
+	/* no available buffer
+	 * search for more available buffers from the current
+	 * descriptor, until an unavailable one
+	 */
+	if (fdir_info->txq_available_buf_count <= 0) {
+		uint16_t tmp_tail;
+		volatile struct i40e_tx_desc *tmp_txdp;
+
+		tmp_tail = txq->tx_tail;
+		tmp_txdp = &txq->tx_ring[tmp_tail + 1];
+
+		do {
+			if ((tmp_txdp->cmd_type_offset_bsz &
+					rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
+					rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+				fdir_info->txq_available_buf_count++;
+			else
+				break;
+
+			tmp_tail += 2;
+			if (tmp_tail >= txq->nb_tx_desc)
+				tmp_tail = 0;
+		} while (tmp_tail != txq->tx_tail);
+	}
+
+	/*
+	 * if txq_available_buf_count > 0, just use the next one is ok,
+	 * else wait for the next DD until it's set to make sure the data
+	 * had been fetched by hardware
+	 */
+	if (fdir_info->txq_available_buf_count > 0) {
+		fdir_info->txq_available_buf_count--;
+	} else {
+		/* wait until the tx descriptor is ready */
+		for (i = 0; i < I40E_FDIR_MAX_WAIT_US; i++) {
+			if ((txdp->cmd_type_offset_bsz &
+					rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
+					rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+				break;
+			rte_delay_us(1);
+		}
+		if (i >= I40E_FDIR_MAX_WAIT_US) {
+			PMD_DRV_LOG(ERR,
+			    "Failed to program FDIR filter: time out to get DD on tx queue.");
+			return NULL;
+		}
+	}
+
+	return (unsigned char *)fdir_info->prg_pkt[txq->tx_tail >> 1];
+}
+
 /**
  * i40e_flow_add_del_fdir_filter - add or remove a flow director filter.
  * @pf: board private structure
@@ -1749,11 +1816,12 @@  i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev,
 {
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
-	unsigned char *pkt = (unsigned char *)pf->fdir.prg_pkt;
+	unsigned char *pkt = NULL;
 	enum i40e_filter_pctype pctype;
 	struct i40e_fdir_info *fdir_info = &pf->fdir;
 	struct i40e_fdir_filter *node;
 	struct i40e_fdir_filter check_filter; /* Check if the filter exists */
+	bool wait_status = true;
 	int ret = 0;
 
 	if (pf->fdir.fdir_vsi == NULL) {
@@ -1793,6 +1861,10 @@  i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev,
 				    "Conflict with existing flow director rules!");
 			return -EINVAL;
 		}
+
+		if (fdir_info->fdir_invalprio == 1 &&
+				fdir_info->fdir_guarantee_free_space > 0)
+			wait_status = false;
 	} else {
 		node = i40e_sw_fdir_filter_lookup(fdir_info,
 				&check_filter.fdir.input);
@@ -1808,8 +1880,16 @@  i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev,
 					"Error deleting fdir rule from hash table!");
 			return -EINVAL;
 		}
+
+		if (fdir_info->fdir_invalprio == 1)
+			wait_status = false;
 	}
 
+	/* find a buffer to store the pkt */
+	pkt = i40e_find_available_buffer(dev);
+	if (pkt == NULL)
+		goto error_op;
+
 	memset(pkt, 0, I40E_FDIR_PKT_LEN);
 	ret = i40e_flow_fdir_construct_pkt(pf, &filter->input, pkt);
 	if (ret < 0) {
@@ -1823,7 +1903,8 @@  i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev,
 			hw, I40E_GLQF_FD_PCTYPES((int)pctype));
 	}
 
-	ret = i40e_flow_fdir_filter_programming(pf, pctype, filter, add);
+	ret = i40e_flow_fdir_filter_programming(pf, pctype, filter, add,
+			wait_status);
 	if (ret < 0) {
 		PMD_DRV_LOG(ERR, "fdir programming fails for PCTYPE(%u).",
 			    pctype);
@@ -1953,7 +2034,7 @@  i40e_fdir_filter_programming(struct i40e_pf *pf,
 
 	PMD_DRV_LOG(INFO, "filling transmit descriptor.");
 	txdp = &(txq->tx_ring[txq->tx_tail + 1]);
-	txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr);
+	txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr[0]);
 	td_cmd = I40E_TX_DESC_CMD_EOP |
 		 I40E_TX_DESC_CMD_RS  |
 		 I40E_TX_DESC_CMD_DUMMY;
@@ -2003,7 +2084,7 @@  static int
 i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 				  enum i40e_filter_pctype pctype,
 				  const struct i40e_fdir_filter_conf *filter,
-				  bool add)
+				  bool add, bool wait_status)
 {
 	struct i40e_tx_queue *txq = pf->fdir.txq;
 	struct i40e_rx_queue *rxq = pf->fdir.rxq;
@@ -2011,8 +2092,10 @@  i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 	volatile struct i40e_tx_desc *txdp;
 	volatile struct i40e_filter_program_desc *fdirdp;
 	uint32_t td_cmd;
-	uint16_t vsi_id, i;
+	uint16_t vsi_id;
 	uint8_t dest;
+	uint32_t i;
+	uint8_t retry_count = 0;
 
 	PMD_DRV_LOG(INFO, "filling filter programming descriptor.");
 	fdirdp = (volatile struct i40e_filter_program_desc *)
@@ -2087,7 +2170,8 @@  i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 
 	PMD_DRV_LOG(INFO, "filling transmit descriptor.");
 	txdp = &txq->tx_ring[txq->tx_tail + 1];
-	txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr);
+	txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr[txq->tx_tail / 2]);
+
 	td_cmd = I40E_TX_DESC_CMD_EOP |
 		 I40E_TX_DESC_CMD_RS  |
 		 I40E_TX_DESC_CMD_DUMMY;
@@ -2100,25 +2184,34 @@  i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
 		txq->tx_tail = 0;
 	/* Update the tx tail register */
 	rte_wmb();
+
+	/* capture the previous error report(if any) from rx ring */
+	while ((i40e_check_fdir_programming_status(rxq) < 0) &&
+		(++retry_count < 100))
+		PMD_DRV_LOG(INFO, "previous error report captured.");
+
 	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
-	for (i = 0; i < I40E_FDIR_MAX_WAIT_US; i++) {
-		if ((txdp->cmd_type_offset_bsz &
-				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
-				rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
-			break;
-		rte_delay_us(1);
-	}
-	if (i >= I40E_FDIR_MAX_WAIT_US) {
-		PMD_DRV_LOG(ERR,
-		    "Failed to program FDIR filter: time out to get DD on tx queue.");
-		return -ETIMEDOUT;
-	}
-	/* totally delay 10 ms to check programming status*/
-	rte_delay_us(I40E_FDIR_MAX_WAIT_US);
-	if (i40e_check_fdir_programming_status(rxq) < 0) {
-		PMD_DRV_LOG(ERR,
-		    "Failed to program FDIR filter: programming status reported.");
-		return -ETIMEDOUT;
+
+	if (wait_status) {
+		for (i = 0; i < I40E_FDIR_MAX_WAIT_US; i++) {
+			if ((txdp->cmd_type_offset_bsz &
+					rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
+					rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+				break;
+			rte_delay_us(1);
+		}
+		if (i >= I40E_FDIR_MAX_WAIT_US) {
+			PMD_DRV_LOG(ERR,
+			    "Failed to program FDIR filter: time out to get DD on tx queue.");
+			return -ETIMEDOUT;
+		}
+		/* totally delay 10 ms to check programming status*/
+		rte_delay_us(I40E_FDIR_MAX_WAIT_US);
+		if (i40e_check_fdir_programming_status(rxq) < 0) {
+			PMD_DRV_LOG(ERR,
+			    "Failed to program FDIR filter: programming status reported.");
+			return -ETIMEDOUT;
+		}
 	}
 
 	return 0;
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index d21fbeaca..fe7f9200c 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -2940,16 +2940,13 @@  i40e_dev_free_queues(struct rte_eth_dev *dev)
 	}
 }
 
-#define I40E_FDIR_NUM_TX_DESC  I40E_MIN_RING_DESC
-#define I40E_FDIR_NUM_RX_DESC  I40E_MIN_RING_DESC
-
 enum i40e_status_code
 i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 {
 	struct i40e_tx_queue *txq;
 	const struct rte_memzone *tz = NULL;
-	uint32_t ring_size;
 	struct rte_eth_dev *dev;
+	uint32_t ring_size;
 
 	if (!pf) {
 		PMD_DRV_LOG(ERR, "PF is not available");
@@ -2996,6 +2993,7 @@  i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 	 */
 	txq->q_set = TRUE;
 	pf->fdir.txq = txq;
+	pf->fdir.txq_available_buf_count = I40E_FDIR_PRG_PKT_CNT;
 
 	return I40E_SUCCESS;
 }
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 8f11f011a..b4d00ef08 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -24,6 +24,9 @@ 
 #define	I40E_MIN_RING_DESC	64
 #define	I40E_MAX_RING_DESC	4096
 
+#define I40E_FDIR_NUM_TX_DESC   (I40E_FDIR_PRG_PKT_CNT * 2)
+#define I40E_FDIR_NUM_RX_DESC   (I40E_FDIR_PRG_PKT_CNT * 2)
+
 #define I40E_MIN_TSO_MSS          256
 #define I40E_MAX_TSO_MSS          9674