[dpdk-dev,RFC,09/14] Fix performance regression due to moved pool ptr

Message ID 1407789890-17355-10-git-send-email-bruce.richardson@intel.com (mailing list archive)
State RFC, archived
Headers

Commit Message

Bruce Richardson Aug. 11, 2014, 8:44 p.m. UTC
Adjust the fast-path code to fix the regression caused by the pool
pointer moving to the second cache line. This change adjusts the
prefetching and also the way in which the mbufs are freed back to the
mempool.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c     | 23 +++++----
 lib/librte_pmd_ixgbe/ixgbe_rxtx.h     | 12 -----
 lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 93 ++++++++++++++---------------------
 3 files changed, 47 insertions(+), 81 deletions(-)
  

Comments

Olivier Matz Aug. 12, 2014, 11:28 a.m. UTC | #1
Hi Bruce,

On 08/11/2014 10:44 PM, Bruce Richardson wrote:
> Adjust the fast-path code to fix the regression caused by the pool
> pointer moving to the second cache line. This change adjusts the
> prefetching and also the way in which the mbufs are freed back to the
> mempool.
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>

Just one comment here (maybe this code should be reviewed by someone
knowing the ixgbe driver better than me):


> @@ -252,14 +250,6 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   	volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
>   	uint16_t n = 0;
>
> -	/*
> -	 * Begin scanning the H/W ring for done descriptors when the
> -	 * number of available descriptors drops below tx_free_thresh.  For
> -	 * each done descriptor, free the associated buffer.
> -	 */
> -	if (txq->nb_tx_free < txq->tx_free_thresh)
> -		ixgbe_tx_free_bufs(txq);
> -
>   	/* Only use descriptors that are available */
>   	nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
>   	if (unlikely(nb_pkts == 0))
> @@ -323,6 +313,15 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   	if (txq->tx_tail >= txq->nb_tx_desc)
>   		txq->tx_tail = 0;
>
> +	/*
> +	 * Begin scanning the H/W ring for done descriptors when the
> +	 * number of available descriptors drops below tx_free_thresh.  For
> +	 * each done descriptor, free the associated buffer.
> +	 */
> +	if (txq->nb_tx_free < txq->tx_free_thresh)
> +		ixgbe_tx_free_bufs(txq);
> +
> +
>   	/* update tail pointer */
>   	rte_wmb();
>   	IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);

It looks like these 2 hunks are reverted in next commit. I'm not sure
this is what you expected.
  

Patch

diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 1b0e272..fa3b357 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -142,10 +142,6 @@  ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
 	 */
 	txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
 
-	/* prefetch the mbufs that are about to be freed */
-	for (i = 0; i < txq->tx_rs_thresh; ++i)
-		rte_prefetch0((txep + i)->mbuf);
-
 	/* free buffers one at a time */
 	if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
 		for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
@@ -186,6 +182,7 @@  tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
 				((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
 		txdp->read.olinfo_status =
 				(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+		rte_prefetch0(&(*pkts)->pool);
 	}
 }
 
@@ -205,6 +202,7 @@  tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
 			((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
 	txdp->read.olinfo_status =
 			(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+	rte_prefetch0(&(*pkts)->pool);
 }
 
 /*
@@ -252,14 +250,6 @@  tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
 	uint16_t n = 0;
 
-	/*
-	 * Begin scanning the H/W ring for done descriptors when the
-	 * number of available descriptors drops below tx_free_thresh.  For
-	 * each done descriptor, free the associated buffer.
-	 */
-	if (txq->nb_tx_free < txq->tx_free_thresh)
-		ixgbe_tx_free_bufs(txq);
-
 	/* Only use descriptors that are available */
 	nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
 	if (unlikely(nb_pkts == 0))
@@ -323,6 +313,15 @@  tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	if (txq->tx_tail >= txq->nb_tx_desc)
 		txq->tx_tail = 0;
 
+	/*
+	 * Begin scanning the H/W ring for done descriptors when the
+	 * number of available descriptors drops below tx_free_thresh.  For
+	 * each done descriptor, free the associated buffer.
+	 */
+	if (txq->nb_tx_free < txq->tx_free_thresh)
+		ixgbe_tx_free_bufs(txq);
+
+
 	/* update tail pointer */
 	rte_wmb();
 	IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
index 1861f18..d9889d9 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
@@ -96,14 +96,6 @@  struct igb_tx_entry_v {
 };
 
 /**
- * continuous entry sequence, gather by the same mempool
- */
-struct igb_tx_entry_seq {
-	const struct rte_mempool* pool;
-	uint32_t same_pool;
-};
-
-/**
  * Structure associated with each RX queue.
  */
 struct igb_rx_queue {
@@ -170,10 +162,6 @@  struct igb_tx_queue {
 	volatile union ixgbe_adv_tx_desc *tx_ring;
 	uint64_t            tx_ring_phys_addr; /**< TX ring DMA address. */
 	struct igb_tx_entry *sw_ring;      /**< virtual address of SW ring. */
-#ifdef RTE_IXGBE_INC_VECTOR
-	/** continuous tx entry sequence within the same mempool */
-	struct igb_tx_entry_seq *sw_ring_seq;
-#endif
 	volatile uint32_t   *tdt_reg_addr; /**< Address of TDT register. */
 	uint16_t            nb_tx_desc;    /**< number of TX descriptors. */
 	uint16_t            tx_tail;       /**< current value of TDT reg. */
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
index 780bf1e..c98356e 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
@@ -365,9 +365,8 @@  static inline int __attribute__((always_inline))
 ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
 {
 	struct igb_tx_entry_v *txep;
-	struct igb_tx_entry_seq *txsp;
 	uint32_t status;
-	uint32_t n, k;
+	uint32_t n;
 #ifdef RTE_MBUF_REFCNT
 	uint32_t i;
 	int nb_free = 0;
@@ -387,25 +386,42 @@  ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
 	 */
 	txep = &((struct igb_tx_entry_v *)txq->sw_ring)[txq->tx_next_dd -
 			(n - 1)];
-	txsp = &txq->sw_ring_seq[txq->tx_next_dd - (n - 1)];
-
-	while (n > 0) {
-		k = RTE_MIN(n, txsp[n-1].same_pool);
 #ifdef RTE_MBUF_REFCNT
-		for (i = 0; i < k; i++) {
-			m = __rte_pktmbuf_prefree_seg((txep+n-k+i)->mbuf);
+
+	m = __rte_pktmbuf_prefree_seg(txep[0].mbuf);
+	if (likely(m != NULL)) {
+		free[0] = m;
+		nb_free = 1;
+		for (i = 1; i < n; i++) {
+			m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
+			if (likely(m != NULL)) {
+				if (likely(m->pool == free[0]->pool))
+					free[nb_free++] = m;
+				else {
+					rte_mempool_put_bulk(free[0]->pool,
+							(void *)free, nb_free);
+					free[0] = m;
+					nb_free = 1;
+				}
+			}
+		}
+		rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+	}
+	else {
+		for (i = 1; i < n; i++) {
+			m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
 			if (m != NULL)
-				free[nb_free++] = m;
+				rte_mempool_put(m->pool, m);
 		}
-		rte_mempool_put_bulk((void *)txsp[n-1].pool,
-				(void **)free, nb_free);
-#else
-		rte_mempool_put_bulk((void *)txsp[n-1].pool,
-				(void **)(txep+n-k), k);
-#endif
-		n -= k;
 	}
 
+#else /* no scatter_gather */
+	for (i = 0; i < n; i++) {
+		m = txep[i]->mbuf;
+		rte_mempool_put(m->pool,m);
+	}
+#endif /* scatter_gather */
+
 	/* buffers were freed, update counters */
 	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
 	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
@@ -417,19 +433,11 @@  ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
 
 static inline void __attribute__((always_inline))
 tx_backlog_entry(struct igb_tx_entry_v *txep,
-		 struct igb_tx_entry_seq *txsp,
 		 struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
 	int i;
-	for (i = 0; i < (int)nb_pkts; ++i) {
+	for (i = 0; i < (int)nb_pkts; ++i)
 		txep[i].mbuf = tx_pkts[i];
-		/* check and update sequence number */
-		txsp[i].pool = tx_pkts[i]->pool;
-		if (txsp[i-1].pool == tx_pkts[i]->pool)
-			txsp[i].same_pool = txsp[i-1].same_pool + 1;
-		else
-			txsp[i].same_pool = 1;
-	}
 }
 
 uint16_t
@@ -439,7 +447,6 @@  ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue;
 	volatile union ixgbe_adv_tx_desc *txdp;
 	struct igb_tx_entry_v *txep;
-	struct igb_tx_entry_seq *txsp;
 	uint16_t n, nb_commit, tx_id;
 	uint64_t flags = DCMD_DTYP_FLAGS;
 	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
@@ -458,14 +465,13 @@  ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	tx_id = txq->tx_tail;
 	txdp = &txq->tx_ring[tx_id];
 	txep = &((struct igb_tx_entry_v *)txq->sw_ring)[tx_id];
-	txsp = &txq->sw_ring_seq[tx_id];
 
 	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
 
 	n = (uint16_t)(txq->nb_tx_desc - tx_id);
 	if (nb_commit >= n) {
 
-		tx_backlog_entry(txep, txsp, tx_pkts, n);
+		tx_backlog_entry(txep, tx_pkts, n);
 
 		for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
 			vtx1(txdp, *tx_pkts, flags);
@@ -480,10 +486,9 @@  ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 		/* avoid reach the end of ring */
 		txdp = &(txq->tx_ring[tx_id]);
 		txep = &(((struct igb_tx_entry_v *)txq->sw_ring)[tx_id]);
-		txsp = &(txq->sw_ring_seq[tx_id]);
 	}
 
-	tx_backlog_entry(txep, txsp, tx_pkts, nb_commit);
+	tx_backlog_entry(txep, tx_pkts, nb_commit);
 
 	vtx(txdp, tx_pkts, nb_commit, flags);
 
@@ -507,7 +512,6 @@  ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
 {
 	unsigned i;
 	struct igb_tx_entry_v *txe;
-	struct igb_tx_entry_seq *txs;
 	uint16_t nb_free, max_desc;
 
 	if (txq->sw_ring != NULL) {
@@ -525,10 +529,6 @@  ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
 		for (i = 0; i < txq->nb_tx_desc; i++) {
 			txe = (struct igb_tx_entry_v *)&txq->sw_ring[i];
 			txe->mbuf = NULL;
-
-			txs = &txq->sw_ring_seq[i];
-			txs->pool = NULL;
-			txs->same_pool = 0;
 		}
 	}
 }
@@ -543,11 +543,6 @@  ixgbe_tx_free_swring(struct igb_tx_queue *txq)
 		rte_free((struct igb_rx_entry *)txq->sw_ring - 1);
 		txq->sw_ring = NULL;
 	}
-
-	if (txq->sw_ring_seq != NULL) {
-		rte_free(txq->sw_ring_seq - 1);
-		txq->sw_ring_seq = NULL;
-	}
 }
 
 static void
@@ -556,7 +551,6 @@  ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
 	static const union ixgbe_adv_tx_desc zeroed_desc = { .read = {
 			.buffer_addr = 0} };
 	struct igb_tx_entry_v *txe = (struct igb_tx_entry_v *)txq->sw_ring;
-	struct igb_tx_entry_seq *txs = txq->sw_ring_seq;
 	uint16_t i;
 
 	/* Zero out HW ring memory */
@@ -568,8 +562,6 @@  ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
 		volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
 		txd->wb.status = IXGBE_TXD_STAT_DD;
 		txe[i].mbuf = NULL;
-		txs[i].pool = NULL;
-		txs[i].same_pool = 0;
 	}
 
 	txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
@@ -595,27 +587,14 @@  static struct ixgbe_txq_ops vec_txq_ops = {
 };
 
 int ixgbe_txq_vec_setup(struct igb_tx_queue *txq,
-			unsigned int socket_id)
+			unsigned int socket_id __rte_unused)
 {
-	uint16_t nb_desc;
-
 	if (txq->sw_ring == NULL)
 		return -1;
 
-	/* request addtional one entry for continous sequence check */
-	nb_desc = (uint16_t)(txq->nb_tx_desc + 1);
-
-	txq->sw_ring_seq = rte_zmalloc_socket("txq->sw_ring_seq",
-				sizeof(struct igb_tx_entry_seq) * nb_desc,
-				CACHE_LINE_SIZE, socket_id);
-	if (txq->sw_ring_seq == NULL)
-		return -1;
-
-
 	/* leave the first one for overflow */
 	txq->sw_ring = (struct igb_tx_entry *)
 		((struct igb_tx_entry_v *)txq->sw_ring + 1);
-	txq->sw_ring_seq += 1;
 	txq->ops = &vec_txq_ops;
 
 	return 0;