[dpdk-dev,2/5] net/mlx5: free buffers in bulk on Tx completion

Message ID 20170628230403.10142-3-yskoh@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Yongseok Koh June 28, 2017, 11:04 p.m. UTC
  When processing Tx completion, it is more efficient to free buffers in bulk
using rte_mempool_put_bulk() if buffers are from a same mempool.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)
  

Comments

Nélio Laranjeiro June 30, 2017, 12:30 p.m. UTC | #1
On Wed, Jun 28, 2017 at 04:04:00PM -0700, Yongseok Koh wrote:
> When processing Tx completion, it is more efficient to free buffers in bulk
> using rte_mempool_put_bulk() if buffers are from a same mempool.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c | 36 +++++++++++++++++++++++++++---------
>  1 file changed, 27 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 43db06ad8..d81d630f7 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -264,6 +264,8 @@ txq_complete(struct txq *txq)
>  	uint16_t cq_ci = txq->cq_ci;
>  	volatile struct mlx5_cqe *cqe = NULL;
>  	volatile struct mlx5_wqe_ctrl *ctrl;
> +	struct rte_mbuf *m, *free[elts_n];
> +	unsigned int blk_n = 0;
>  
>  	do {
>  		volatile struct mlx5_cqe *tmp;
> @@ -296,21 +298,37 @@ txq_complete(struct txq *txq)
>  	assert((elts_tail & elts_m) < (1 << txq->wqe_n));
>  	/* Free buffers. */
>  	while (elts_free != elts_tail) {
> -		struct rte_mbuf *elt = (*txq->elts)[elts_free & elts_m];
> -		struct rte_mbuf *elt_next =
> -			(*txq->elts)[(elts_free + 1) & elts_m];
> -
> +		m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
> +		if (likely(m != NULL)) {
> +			if (blk_n) {
> +				if (likely(m->pool == free[0]->pool)) {
> +					free[blk_n++] = m;
> +				} else {
> +					rte_mempool_put_bulk(
> +							free[0]->pool,
> +							(void *)free,
> +							blk_n);

The indentation is strange here, free[0] should be on the same line as
rte_mempool_put_bulk.

> +					free[0] = m;
> +					blk_n = 1;
> +				}
> +			} else {
> +				free[0] = m;
> +				blk_n = 1;
> +			}
> +		}

This loop could be smaller, blk_n can only be equal to 0 in the first
iteration, otherwise is >= 1.
The first if statement can be merged with the second one: 

	if (likely(m != NULL)) {
		if (likely(blk_n && m->pool == free[0]->pool)) {
		...
		} else {
		...
		}

Thanks,
  
Nélio Laranjeiro June 30, 2017, 12:43 p.m. UTC | #2
On Fri, Jun 30, 2017 at 02:30:47PM +0200, Nélio Laranjeiro wrote:
> On Wed, Jun 28, 2017 at 04:04:00PM -0700, Yongseok Koh wrote:
> > When processing Tx completion, it is more efficient to free buffers in bulk
> > using rte_mempool_put_bulk() if buffers are from a same mempool.
> > 
> > Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> > ---
> >  drivers/net/mlx5/mlx5_rxtx.c | 36 +++++++++++++++++++++++++++---------
> >  1 file changed, 27 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> > index 43db06ad8..d81d630f7 100644
> > --- a/drivers/net/mlx5/mlx5_rxtx.c
> > +++ b/drivers/net/mlx5/mlx5_rxtx.c
> > @@ -264,6 +264,8 @@ txq_complete(struct txq *txq)
> >  	uint16_t cq_ci = txq->cq_ci;
> >  	volatile struct mlx5_cqe *cqe = NULL;
> >  	volatile struct mlx5_wqe_ctrl *ctrl;
> > +	struct rte_mbuf *m, *free[elts_n];
> > +	unsigned int blk_n = 0;
> >  
> >  	do {
> >  		volatile struct mlx5_cqe *tmp;
> > @@ -296,21 +298,37 @@ txq_complete(struct txq *txq)
> >  	assert((elts_tail & elts_m) < (1 << txq->wqe_n));
> >  	/* Free buffers. */
> >  	while (elts_free != elts_tail) {
> > -		struct rte_mbuf *elt = (*txq->elts)[elts_free & elts_m];
> > -		struct rte_mbuf *elt_next =
> > -			(*txq->elts)[(elts_free + 1) & elts_m];
> > -
> > +		m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
> > +		if (likely(m != NULL)) {
> > +			if (blk_n) {
> > +				if (likely(m->pool == free[0]->pool)) {
> > +					free[blk_n++] = m;
> > +				} else {
> > +					rte_mempool_put_bulk(
> > +							free[0]->pool,
> > +							(void *)free,
> > +							blk_n);
> 
> The indentation is strange here, free[0] should be on the same line as
> rte_mempool_put_bulk.
> 
> > +					free[0] = m;
> > +					blk_n = 1;
> > +				}
> > +			} else {
> > +				free[0] = m;
> > +				blk_n = 1;
> > +			}
> > +		}
> 
> This loop could be smaller, blk_n can only be equal to 0 in the first
> iteration, otherwise is >= 1.
> The first if statement can be merged with the second one: 
> 
> 	if (likely(m != NULL)) {
> 		if (likely(blk_n && m->pool == free[0]->pool)) {

This condition is a wrong also, it should be !blk_n || (m->pool ...

Why don't you keep a pointer to the mpool (e.g. m->pool == pool)?  It
seems to cost a little to deference two pointers to reach the pool's
one.

Thanks,
  
Yongseok Koh June 30, 2017, 5:49 p.m. UTC | #3
On Fri, Jun 30, 2017 at 02:43:21PM +0200, Nélio Laranjeiro wrote:
> On Fri, Jun 30, 2017 at 02:30:47PM +0200, Nélio Laranjeiro wrote:
> > On Wed, Jun 28, 2017 at 04:04:00PM -0700, Yongseok Koh wrote:
> > > When processing Tx completion, it is more efficient to free buffers in bulk
> > > using rte_mempool_put_bulk() if buffers are from a same mempool.
> > > 
> > > Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> > > ---
> > >  drivers/net/mlx5/mlx5_rxtx.c | 36 +++++++++++++++++++++++++++---------
> > >  1 file changed, 27 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> > > index 43db06ad8..d81d630f7 100644
> > > --- a/drivers/net/mlx5/mlx5_rxtx.c
> > > +++ b/drivers/net/mlx5/mlx5_rxtx.c
> > > @@ -264,6 +264,8 @@ txq_complete(struct txq *txq)
> > >  	uint16_t cq_ci = txq->cq_ci;
> > >  	volatile struct mlx5_cqe *cqe = NULL;
> > >  	volatile struct mlx5_wqe_ctrl *ctrl;
> > > +	struct rte_mbuf *m, *free[elts_n];
> > > +	unsigned int blk_n = 0;
> > >  
> > >  	do {
> > >  		volatile struct mlx5_cqe *tmp;
> > > @@ -296,21 +298,37 @@ txq_complete(struct txq *txq)
> > >  	assert((elts_tail & elts_m) < (1 << txq->wqe_n));
> > >  	/* Free buffers. */
> > >  	while (elts_free != elts_tail) {
> > > -		struct rte_mbuf *elt = (*txq->elts)[elts_free & elts_m];
> > > -		struct rte_mbuf *elt_next =
> > > -			(*txq->elts)[(elts_free + 1) & elts_m];
> > > -
> > > +		m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
> > > +		if (likely(m != NULL)) {
> > > +			if (blk_n) {
> > > +				if (likely(m->pool == free[0]->pool)) {
> > > +					free[blk_n++] = m;
> > > +				} else {
> > > +					rte_mempool_put_bulk(
> > > +							free[0]->pool,
> > > +							(void *)free,
> > > +							blk_n);
> > 
> > The indentation is strange here, free[0] should be on the same line as
> > rte_mempool_put_bulk.
> > 
> > > +					free[0] = m;
> > > +					blk_n = 1;
> > > +				}
> > > +			} else {
> > > +				free[0] = m;
> > > +				blk_n = 1;
> > > +			}
> > > +		}
> > 
> > This loop could be smaller, blk_n can only be equal to 0 in the first
> > iteration, otherwise is >= 1.
> > The first if statement can be merged with the second one: 
> > 
> > 	if (likely(m != NULL)) {
> > 		if (likely(blk_n && m->pool == free[0]->pool)) {
> 
> This condition is a wrong also, it should be !blk_n || (m->pool ...
> 
> Why don't you keep a pointer to the mpool (e.g. m->pool == pool)?  It
> seems to cost a little to deference two pointers to reach the pool's
> one.
Good point. The following will be the final streamlined code.
        /* Free buffers. */
        while (elts_free != elts_tail) {
                m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
                if (likely(m != NULL)) {
                        if (likely(m->pool == pool)) {
                                free[blk_n++] = m;
                        } else {
                                if (likely(pool != NULL))
                                        rte_mempool_put_bulk(pool,
                                                             (void *)free,
                                                             blk_n);
                                free[0] = m;
                                pool = m->pool;
                                blk_n = 1;
                        }
                }
        }
        if (blk_n)
                rte_mempool_put_bulk(pool, (void *)free, blk_n);


Thanks,
Yongseok
  

Patch

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 43db06ad8..d81d630f7 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -264,6 +264,8 @@  txq_complete(struct txq *txq)
 	uint16_t cq_ci = txq->cq_ci;
 	volatile struct mlx5_cqe *cqe = NULL;
 	volatile struct mlx5_wqe_ctrl *ctrl;
+	struct rte_mbuf *m, *free[elts_n];
+	unsigned int blk_n = 0;
 
 	do {
 		volatile struct mlx5_cqe *tmp;
@@ -296,21 +298,37 @@  txq_complete(struct txq *txq)
 	assert((elts_tail & elts_m) < (1 << txq->wqe_n));
 	/* Free buffers. */
 	while (elts_free != elts_tail) {
-		struct rte_mbuf *elt = (*txq->elts)[elts_free & elts_m];
-		struct rte_mbuf *elt_next =
-			(*txq->elts)[(elts_free + 1) & elts_m];
-
+		m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
+		if (likely(m != NULL)) {
+			if (blk_n) {
+				if (likely(m->pool == free[0]->pool)) {
+					free[blk_n++] = m;
+				} else {
+					rte_mempool_put_bulk(
+							free[0]->pool,
+							(void *)free,
+							blk_n);
+					free[0] = m;
+					blk_n = 1;
+				}
+			} else {
+				free[0] = m;
+				blk_n = 1;
+			}
+		}
+	}
+	if (blk_n)
+		rte_mempool_put_bulk(free[0]->pool, (void *)free, blk_n);
 #ifndef NDEBUG
-		/* Poisoning. */
+	elts_free = txq->elts_tail;
+	/* Poisoning. */
+	while (elts_free != elts_tail) {
 		memset(&(*txq->elts)[elts_free & elts_m],
 		       0x66,
 		       sizeof((*txq->elts)[elts_free & elts_m]));
-#endif
-		RTE_MBUF_PREFETCH_TO_FREE(elt_next);
-		/* Only one segment needs to be freed. */
-		rte_pktmbuf_free_seg(elt);
 		++elts_free;
 	}
+#endif
 	txq->cq_ci = cq_ci;
 	txq->elts_tail = elts_tail;
 	/* Update the consumer index. */