Hi Bruce,
On 08/11/2014 10:44 PM, Bruce Richardson wrote:
> Adjust the fast-path code to fix the regression caused by the pool
> pointer moving to the second cache line. This change adjusts the
> prefetching and also the way in which the mbufs are freed back to the
> mempool.
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Just one comment here (maybe this code should be reviewed by someone
knowing the ixgbe driver better than me):
> @@ -252,14 +250,6 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
> uint16_t n = 0;
>
> - /*
> - * Begin scanning the H/W ring for done descriptors when the
> - * number of available descriptors drops below tx_free_thresh. For
> - * each done descriptor, free the associated buffer.
> - */
> - if (txq->nb_tx_free < txq->tx_free_thresh)
> - ixgbe_tx_free_bufs(txq);
> -
> /* Only use descriptors that are available */
> nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
> if (unlikely(nb_pkts == 0))
> @@ -323,6 +313,15 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> if (txq->tx_tail >= txq->nb_tx_desc)
> txq->tx_tail = 0;
>
> + /*
> + * Begin scanning the H/W ring for done descriptors when the
> + * number of available descriptors drops below tx_free_thresh. For
> + * each done descriptor, free the associated buffer.
> + */
> + if (txq->nb_tx_free < txq->tx_free_thresh)
> + ixgbe_tx_free_bufs(txq);
> +
> +
> /* update tail pointer */
> rte_wmb();
> IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
It looks like these 2 hunks are reverted in next commit. I'm not sure
this is what you expected.
@@ -142,10 +142,6 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
*/
txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
- /* prefetch the mbufs that are about to be freed */
- for (i = 0; i < txq->tx_rs_thresh; ++i)
- rte_prefetch0((txep + i)->mbuf);
-
/* free buffers one at a time */
if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
@@ -186,6 +182,7 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
txdp->read.olinfo_status =
(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+ rte_prefetch0(&(*pkts)->pool);
}
}
@@ -205,6 +202,7 @@ tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
txdp->read.olinfo_status =
(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+ rte_prefetch0(&(*pkts)->pool);
}
/*
@@ -252,14 +250,6 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
uint16_t n = 0;
- /*
- * Begin scanning the H/W ring for done descriptors when the
- * number of available descriptors drops below tx_free_thresh. For
- * each done descriptor, free the associated buffer.
- */
- if (txq->nb_tx_free < txq->tx_free_thresh)
- ixgbe_tx_free_bufs(txq);
-
/* Only use descriptors that are available */
nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
if (unlikely(nb_pkts == 0))
@@ -323,6 +313,15 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
if (txq->tx_tail >= txq->nb_tx_desc)
txq->tx_tail = 0;
+ /*
+ * Begin scanning the H/W ring for done descriptors when the
+ * number of available descriptors drops below tx_free_thresh. For
+ * each done descriptor, free the associated buffer.
+ */
+ if (txq->nb_tx_free < txq->tx_free_thresh)
+ ixgbe_tx_free_bufs(txq);
+
+
/* update tail pointer */
rte_wmb();
IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
@@ -96,14 +96,6 @@ struct igb_tx_entry_v {
};
/**
- * continuous entry sequence, gather by the same mempool
- */
-struct igb_tx_entry_seq {
- const struct rte_mempool* pool;
- uint32_t same_pool;
-};
-
-/**
* Structure associated with each RX queue.
*/
struct igb_rx_queue {
@@ -170,10 +162,6 @@ struct igb_tx_queue {
volatile union ixgbe_adv_tx_desc *tx_ring;
uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
-#ifdef RTE_IXGBE_INC_VECTOR
- /** continuous tx entry sequence within the same mempool */
- struct igb_tx_entry_seq *sw_ring_seq;
-#endif
volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
uint16_t nb_tx_desc; /**< number of TX descriptors. */
uint16_t tx_tail; /**< current value of TDT reg. */
@@ -365,9 +365,8 @@ static inline int __attribute__((always_inline))
ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
{
struct igb_tx_entry_v *txep;
- struct igb_tx_entry_seq *txsp;
uint32_t status;
- uint32_t n, k;
+ uint32_t n;
#ifdef RTE_MBUF_REFCNT
uint32_t i;
int nb_free = 0;
@@ -387,25 +386,42 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
*/
txep = &((struct igb_tx_entry_v *)txq->sw_ring)[txq->tx_next_dd -
(n - 1)];
- txsp = &txq->sw_ring_seq[txq->tx_next_dd - (n - 1)];
-
- while (n > 0) {
- k = RTE_MIN(n, txsp[n-1].same_pool);
#ifdef RTE_MBUF_REFCNT
- for (i = 0; i < k; i++) {
- m = __rte_pktmbuf_prefree_seg((txep+n-k+i)->mbuf);
+
+ m = __rte_pktmbuf_prefree_seg(txep[0].mbuf);
+ if (likely(m != NULL)) {
+ free[0] = m;
+ nb_free = 1;
+ for (i = 1; i < n; i++) {
+ m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
+ if (likely(m != NULL)) {
+ if (likely(m->pool == free[0]->pool))
+ free[nb_free++] = m;
+ else {
+ rte_mempool_put_bulk(free[0]->pool,
+ (void *)free, nb_free);
+ free[0] = m;
+ nb_free = 1;
+ }
+ }
+ }
+ rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
+ }
+ else {
+ for (i = 1; i < n; i++) {
+ m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
if (m != NULL)
- free[nb_free++] = m;
+ rte_mempool_put(m->pool, m);
}
- rte_mempool_put_bulk((void *)txsp[n-1].pool,
- (void **)free, nb_free);
-#else
- rte_mempool_put_bulk((void *)txsp[n-1].pool,
- (void **)(txep+n-k), k);
-#endif
- n -= k;
}
+#else /* no scatter_gather */
+ for (i = 0; i < n; i++) {
+ m = txep[i]->mbuf;
+ rte_mempool_put(m->pool,m);
+ }
+#endif /* scatter_gather */
+
/* buffers were freed, update counters */
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
@@ -417,19 +433,11 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
static inline void __attribute__((always_inline))
tx_backlog_entry(struct igb_tx_entry_v *txep,
- struct igb_tx_entry_seq *txsp,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
int i;
- for (i = 0; i < (int)nb_pkts; ++i) {
+ for (i = 0; i < (int)nb_pkts; ++i)
txep[i].mbuf = tx_pkts[i];
- /* check and update sequence number */
- txsp[i].pool = tx_pkts[i]->pool;
- if (txsp[i-1].pool == tx_pkts[i]->pool)
- txsp[i].same_pool = txsp[i-1].same_pool + 1;
- else
- txsp[i].same_pool = 1;
- }
}
uint16_t
@@ -439,7 +447,6 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
struct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue;
volatile union ixgbe_adv_tx_desc *txdp;
struct igb_tx_entry_v *txep;
- struct igb_tx_entry_seq *txsp;
uint16_t n, nb_commit, tx_id;
uint64_t flags = DCMD_DTYP_FLAGS;
uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
@@ -458,14 +465,13 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_id = txq->tx_tail;
txdp = &txq->tx_ring[tx_id];
txep = &((struct igb_tx_entry_v *)txq->sw_ring)[tx_id];
- txsp = &txq->sw_ring_seq[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
n = (uint16_t)(txq->nb_tx_desc - tx_id);
if (nb_commit >= n) {
- tx_backlog_entry(txep, txsp, tx_pkts, n);
+ tx_backlog_entry(txep, tx_pkts, n);
for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
vtx1(txdp, *tx_pkts, flags);
@@ -480,10 +486,9 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
/* avoid reach the end of ring */
txdp = &(txq->tx_ring[tx_id]);
txep = &(((struct igb_tx_entry_v *)txq->sw_ring)[tx_id]);
- txsp = &(txq->sw_ring_seq[tx_id]);
}
- tx_backlog_entry(txep, txsp, tx_pkts, nb_commit);
+ tx_backlog_entry(txep, tx_pkts, nb_commit);
vtx(txdp, tx_pkts, nb_commit, flags);
@@ -507,7 +512,6 @@ ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
{
unsigned i;
struct igb_tx_entry_v *txe;
- struct igb_tx_entry_seq *txs;
uint16_t nb_free, max_desc;
if (txq->sw_ring != NULL) {
@@ -525,10 +529,6 @@ ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
for (i = 0; i < txq->nb_tx_desc; i++) {
txe = (struct igb_tx_entry_v *)&txq->sw_ring[i];
txe->mbuf = NULL;
-
- txs = &txq->sw_ring_seq[i];
- txs->pool = NULL;
- txs->same_pool = 0;
}
}
}
@@ -543,11 +543,6 @@ ixgbe_tx_free_swring(struct igb_tx_queue *txq)
rte_free((struct igb_rx_entry *)txq->sw_ring - 1);
txq->sw_ring = NULL;
}
-
- if (txq->sw_ring_seq != NULL) {
- rte_free(txq->sw_ring_seq - 1);
- txq->sw_ring_seq = NULL;
- }
}
static void
@@ -556,7 +551,6 @@ ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
static const union ixgbe_adv_tx_desc zeroed_desc = { .read = {
.buffer_addr = 0} };
struct igb_tx_entry_v *txe = (struct igb_tx_entry_v *)txq->sw_ring;
- struct igb_tx_entry_seq *txs = txq->sw_ring_seq;
uint16_t i;
/* Zero out HW ring memory */
@@ -568,8 +562,6 @@ ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
txd->wb.status = IXGBE_TXD_STAT_DD;
txe[i].mbuf = NULL;
- txs[i].pool = NULL;
- txs[i].same_pool = 0;
}
txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
@@ -595,27 +587,14 @@ static struct ixgbe_txq_ops vec_txq_ops = {
};
int ixgbe_txq_vec_setup(struct igb_tx_queue *txq,
- unsigned int socket_id)
+ unsigned int socket_id __rte_unused)
{
- uint16_t nb_desc;
-
if (txq->sw_ring == NULL)
return -1;
- /* request addtional one entry for continous sequence check */
- nb_desc = (uint16_t)(txq->nb_tx_desc + 1);
-
- txq->sw_ring_seq = rte_zmalloc_socket("txq->sw_ring_seq",
- sizeof(struct igb_tx_entry_seq) * nb_desc,
- CACHE_LINE_SIZE, socket_id);
- if (txq->sw_ring_seq == NULL)
- return -1;
-
-
/* leave the first one for overflow */
txq->sw_ring = (struct igb_tx_entry *)
((struct igb_tx_entry_v *)txq->sw_ring + 1);
- txq->sw_ring_seq += 1;
txq->ops = &vec_txq_ops;
return 0;