[dpdk-dev,7/8] net/mlx4: align Tx descriptors number

Message ID 1511871570-16826-8-git-send-email-matan@mellanox.com
State Superseded, archived
Delegated to: Shahaf Shuler
Headers show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Matan Azrad Nov. 28, 2017, 12:19 p.m.
Using power of 2 descriptors number makes the ring management easier
and allows to use mask operation instead of wraparound conditions.

Adjust Tx descriptor number to be power of 2 and change calculation to
use mask accordingly.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx4/mlx4_rxtx.c | 22 ++++++++--------------
 drivers/net/mlx4/mlx4_txq.c  | 20 ++++++++++++--------
 2 files changed, 20 insertions(+), 22 deletions(-)

Comments

Adrien Mazarguil Dec. 6, 2017, 10:59 a.m. | #1
On Tue, Nov 28, 2017 at 12:19:29PM +0000, Matan Azrad wrote:
> Using power of 2 descriptors number makes the ring management easier
> and allows to use mask operation instead of wraparound conditions.
> 
> Adjust Tx descriptor number to be power of 2 and change calculation to
> use mask accordingly.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>

A few minor comments, see below.

> ---
>  drivers/net/mlx4/mlx4_rxtx.c | 22 ++++++++--------------
>  drivers/net/mlx4/mlx4_txq.c  | 20 ++++++++++++--------
>  2 files changed, 20 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
> index 30f2930..b5aaf4c 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.c
> +++ b/drivers/net/mlx4/mlx4_rxtx.c
> @@ -314,7 +314,7 @@ struct pv {
>   *   Pointer to Tx queue structure.
>   */
>  static void
> -mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
> +mlx4_txq_complete(struct txq *txq, const unsigned int elts_m,
>  				  struct mlx4_sq *sq)

Documentation needs updating.

>  {
>  	unsigned int elts_tail = txq->elts_tail;
> @@ -355,13 +355,11 @@ struct pv {
>  	if (unlikely(!completed))
>  		return;
>  	/* First stamping address is the end of the last one. */
> -	first_txbb = (&(*txq->elts)[elts_tail])->eocb;
> +	first_txbb = (&(*txq->elts)[elts_tail & elts_m])->eocb;
>  	elts_tail += completed;
> -	if (elts_tail >= elts_n)
> -		elts_tail -= elts_n;
>  	/* The new tail element holds the end address. */
>  	sq->remain_size += mlx4_txq_stamp_freed_wqe(sq, first_txbb,
> -		(&(*txq->elts)[elts_tail])->eocb);
> +		(&(*txq->elts)[elts_tail & elts_m])->eocb);
>  	/* Update CQ consumer index. */
>  	cq->cons_index = cons_index;
>  	*cq->set_ci_db = rte_cpu_to_be_32(cons_index & MLX4_CQ_DB_CI_MASK);
> @@ -534,6 +532,7 @@ struct pv {
>  	struct txq *txq = (struct txq *)dpdk_txq;
>  	unsigned int elts_head = txq->elts_head;
>  	const unsigned int elts_n = txq->elts_n;
> +	const unsigned int elts_m = elts_n - 1;
>  	unsigned int bytes_sent = 0;
>  	unsigned int i;
>  	unsigned int max;
> @@ -543,24 +542,20 @@ struct pv {
>  
>  	assert(txq->elts_comp_cd != 0);
>  	if (likely(txq->elts_comp != 0))
> -		mlx4_txq_complete(txq, elts_n, sq);
> +		mlx4_txq_complete(txq, elts_m, sq);
>  	max = (elts_n - (elts_head - txq->elts_tail));
> -	if (max > elts_n)
> -		max -= elts_n;
>  	assert(max >= 1);
>  	assert(max <= elts_n);
>  	/* Always leave one free entry in the ring. */
>  	--max;
>  	if (max > pkts_n)
>  		max = pkts_n;
> -	elt = &(*txq->elts)[elts_head];
> +	elt = &(*txq->elts)[elts_head & elts_m];
>  	/* First Tx burst element saves the next WQE control segment. */
>  	ctrl = elt->wqe;
>  	for (i = 0; (i != max); ++i) {
>  		struct rte_mbuf *buf = pkts[i];
> -		unsigned int elts_head_next =
> -			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
> -		struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
> +		struct txq_elt *elt_next = &(*txq->elts)[++elts_head & elts_m];
>  		uint32_t owner_opcode = sq->owner_opcode;
>  		volatile struct mlx4_wqe_data_seg *dseg =
>  				(volatile struct mlx4_wqe_data_seg *)(ctrl + 1);
> @@ -678,7 +673,6 @@ struct pv {
>  		ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode);
>  		elt->buf = buf;
>  		bytes_sent += buf->pkt_len;
> -		elts_head = elts_head_next;
>  		ctrl = ctrl_next;
>  		elt = elt_next;
>  	}
> @@ -694,7 +688,7 @@ struct pv {
>  	rte_wmb();
>  	/* Ring QP doorbell. */
>  	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
> -	txq->elts_head = elts_head;
> +	txq->elts_head += i;
>  	txq->elts_comp += i;
>  	return i;
>  }
> diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> index 4c7b62a..253075a 100644
> --- a/drivers/net/mlx4/mlx4_txq.c
> +++ b/drivers/net/mlx4/mlx4_txq.c
> @@ -76,17 +76,16 @@
>  	unsigned int elts_head = txq->elts_head;
>  	unsigned int elts_tail = txq->elts_tail;
>  	struct txq_elt (*elts)[txq->elts_n] = txq->elts;
> +	unsigned int elts_m = txq->elts_n - 1;
>  
>  	DEBUG("%p: freeing WRs", (void *)txq);
>  	while (elts_tail != elts_head) {
> -		struct txq_elt *elt = &(*elts)[elts_tail];
> +		struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m];
>  
>  		assert(elt->buf != NULL);
>  		rte_pktmbuf_free(elt->buf);
>  		elt->buf = NULL;
>  		elt->wqe = NULL;
> -		if (++elts_tail == RTE_DIM(*elts))
> -			elts_tail = 0;
>  	}
>  	txq->elts_tail = txq->elts_head;
>  }
> @@ -208,7 +207,9 @@ struct txq_mp2mr_mbuf_check_data {
>  	struct mlx4dv_obj mlxdv;
>  	struct mlx4dv_qp dv_qp;
>  	struct mlx4dv_cq dv_cq;
> -	struct txq_elt (*elts)[desc];
> +	uint32_t elts_size = desc > 0x1000 ? 0x1000 :
> +		rte_align32pow2((uint32_t)desc);

Where is that magical 0x1000 value coming from? It should at least come
through a macro definition.

> +	struct txq_elt (*elts)[elts_size];
>  	struct ibv_qp_init_attr qp_init_attr;
>  	struct txq *txq;
>  	uint8_t *bounce_buf;
> @@ -247,11 +248,14 @@ struct txq_mp2mr_mbuf_check_data {
>  		      (void *)dev, idx);
>  		return -rte_errno;
>  	}
> -	if (!desc) {
> -		rte_errno = EINVAL;
> -		ERROR("%p: invalid number of Tx descriptors", (void *)dev);
> -		return -rte_errno;
> +	if ((uint32_t)desc != elts_size) {
> +		desc = (uint16_t)elts_size;
> +		WARN("%p: changed number of descriptors in TX queue %u"
> +		     " to be power of two (%d)",
> +		     (void *)dev, idx, desc);

You should display the same message as in mlx4_rxq.c for consistency
(also TX => Tx).

>  	}
> +	DEBUG("%p: configuring queue %u for %u descriptors",
> +	      (void *)dev, idx, desc);

Unnecessary duplicated debugging message already printed at the beginning of
this function. Yes this is a different value but WARN() made that pretty
clear.

>  	/* Allocate and initialize Tx queue. */
>  	mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket);
>  	if (!txq) {
> -- 
> 1.8.3.1
>
Matan Azrad Dec. 6, 2017, 11:44 a.m. | #2
Hi Adrien

> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> Sent: Wednesday, December 6, 2017 12:59 PM
> To: Matan Azrad <matan@mellanox.com>
> Cc: dev@dpdk.org
> Subject: Re: [PATCH 7/8] net/mlx4: align Tx descriptors number
> 
> On Tue, Nov 28, 2017 at 12:19:29PM +0000, Matan Azrad wrote:
> > Using power of 2 descriptors number makes the ring management easier
> > and allows to use mask operation instead of wraparound conditions.
> >
> > Adjust Tx descriptor number to be power of 2 and change calculation to
> > use mask accordingly.
> >
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> 
> A few minor comments, see below.
> 
> > ---
> >  drivers/net/mlx4/mlx4_rxtx.c | 22 ++++++++--------------
> > drivers/net/mlx4/mlx4_txq.c  | 20 ++++++++++++--------
> >  2 files changed, 20 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/net/mlx4/mlx4_rxtx.c
> > b/drivers/net/mlx4/mlx4_rxtx.c index 30f2930..b5aaf4c 100644
> > --- a/drivers/net/mlx4/mlx4_rxtx.c
> > +++ b/drivers/net/mlx4/mlx4_rxtx.c
> > @@ -314,7 +314,7 @@ struct pv {
> >   *   Pointer to Tx queue structure.
> >   */
> >  static void
> > -mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
> > +mlx4_txq_complete(struct txq *txq, const unsigned int elts_m,
> >  				  struct mlx4_sq *sq)
> 
> Documentation needs updating.
> 

OK

> >  {
> >  	unsigned int elts_tail = txq->elts_tail; @@ -355,13 +355,11 @@
> > struct pv {
> >  	if (unlikely(!completed))
> >  		return;
> >  	/* First stamping address is the end of the last one. */
> > -	first_txbb = (&(*txq->elts)[elts_tail])->eocb;
> > +	first_txbb = (&(*txq->elts)[elts_tail & elts_m])->eocb;
> >  	elts_tail += completed;
> > -	if (elts_tail >= elts_n)
> > -		elts_tail -= elts_n;
> >  	/* The new tail element holds the end address. */
> >  	sq->remain_size += mlx4_txq_stamp_freed_wqe(sq, first_txbb,
> > -		(&(*txq->elts)[elts_tail])->eocb);
> > +		(&(*txq->elts)[elts_tail & elts_m])->eocb);
> >  	/* Update CQ consumer index. */
> >  	cq->cons_index = cons_index;
> >  	*cq->set_ci_db = rte_cpu_to_be_32(cons_index &
> MLX4_CQ_DB_CI_MASK);
> > @@ -534,6 +532,7 @@ struct pv {
> >  	struct txq *txq = (struct txq *)dpdk_txq;
> >  	unsigned int elts_head = txq->elts_head;
> >  	const unsigned int elts_n = txq->elts_n;
> > +	const unsigned int elts_m = elts_n - 1;
> >  	unsigned int bytes_sent = 0;
> >  	unsigned int i;
> >  	unsigned int max;
> > @@ -543,24 +542,20 @@ struct pv {
> >
> >  	assert(txq->elts_comp_cd != 0);
> >  	if (likely(txq->elts_comp != 0))
> > -		mlx4_txq_complete(txq, elts_n, sq);
> > +		mlx4_txq_complete(txq, elts_m, sq);
> >  	max = (elts_n - (elts_head - txq->elts_tail));
> > -	if (max > elts_n)
> > -		max -= elts_n;
> >  	assert(max >= 1);
> >  	assert(max <= elts_n);
> >  	/* Always leave one free entry in the ring. */
> >  	--max;
> >  	if (max > pkts_n)
> >  		max = pkts_n;
> > -	elt = &(*txq->elts)[elts_head];
> > +	elt = &(*txq->elts)[elts_head & elts_m];
> >  	/* First Tx burst element saves the next WQE control segment. */
> >  	ctrl = elt->wqe;
> >  	for (i = 0; (i != max); ++i) {
> >  		struct rte_mbuf *buf = pkts[i];
> > -		unsigned int elts_head_next =
> > -			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
> > -		struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
> > +		struct txq_elt *elt_next = &(*txq->elts)[++elts_head &
> elts_m];
> >  		uint32_t owner_opcode = sq->owner_opcode;
> >  		volatile struct mlx4_wqe_data_seg *dseg =
> >  				(volatile struct mlx4_wqe_data_seg *)(ctrl +
> 1); @@ -678,7 +673,6
> > @@ struct pv {
> >  		ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode);
> >  		elt->buf = buf;
> >  		bytes_sent += buf->pkt_len;
> > -		elts_head = elts_head_next;
> >  		ctrl = ctrl_next;
> >  		elt = elt_next;
> >  	}
> > @@ -694,7 +688,7 @@ struct pv {
> >  	rte_wmb();
> >  	/* Ring QP doorbell. */
> >  	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
> > -	txq->elts_head = elts_head;
> > +	txq->elts_head += i;
> >  	txq->elts_comp += i;
> >  	return i;
> >  }
> > diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> > index 4c7b62a..253075a 100644
> > --- a/drivers/net/mlx4/mlx4_txq.c
> > +++ b/drivers/net/mlx4/mlx4_txq.c
> > @@ -76,17 +76,16 @@
> >  	unsigned int elts_head = txq->elts_head;
> >  	unsigned int elts_tail = txq->elts_tail;
> >  	struct txq_elt (*elts)[txq->elts_n] = txq->elts;
> > +	unsigned int elts_m = txq->elts_n - 1;
> >
> >  	DEBUG("%p: freeing WRs", (void *)txq);
> >  	while (elts_tail != elts_head) {
> > -		struct txq_elt *elt = &(*elts)[elts_tail];
> > +		struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m];
> >
> >  		assert(elt->buf != NULL);
> >  		rte_pktmbuf_free(elt->buf);
> >  		elt->buf = NULL;
> >  		elt->wqe = NULL;
> > -		if (++elts_tail == RTE_DIM(*elts))
> > -			elts_tail = 0;
> >  	}
> >  	txq->elts_tail = txq->elts_head;
> >  }
> > @@ -208,7 +207,9 @@ struct txq_mp2mr_mbuf_check_data {
> >  	struct mlx4dv_obj mlxdv;
> >  	struct mlx4dv_qp dv_qp;
> >  	struct mlx4dv_cq dv_cq;
> > -	struct txq_elt (*elts)[desc];
> > +	uint32_t elts_size = desc > 0x1000 ? 0x1000 :
> > +		rte_align32pow2((uint32_t)desc);
> 
> Where is that magical 0x1000 value coming from? It should at least come
> through a macro definition.
> 
> > +	struct txq_elt (*elts)[elts_size];
> >  	struct ibv_qp_init_attr qp_init_attr;
> >  	struct txq *txq;
> >  	uint8_t *bounce_buf;
> > @@ -247,11 +248,14 @@ struct txq_mp2mr_mbuf_check_data {
> >  		      (void *)dev, idx);
> >  		return -rte_errno;
> >  	}
> > -	if (!desc) {
> > -		rte_errno = EINVAL;
> > -		ERROR("%p: invalid number of Tx descriptors", (void *)dev);
> > -		return -rte_errno;
> > +	if ((uint32_t)desc != elts_size) {
> > +		desc = (uint16_t)elts_size;
> > +		WARN("%p: changed number of descriptors in TX queue %u"
> > +		     " to be power of two (%d)",
> > +		     (void *)dev, idx, desc);
> 
> You should display the same message as in mlx4_rxq.c for consistency (also
> TX => Tx).
> 
OK

> >  	}
> > +	DEBUG("%p: configuring queue %u for %u descriptors",
> > +	      (void *)dev, idx, desc);
> 
> Unnecessary duplicated debugging message already printed at the beginning
> of this function. Yes this is a different value but WARN() made that pretty
> clear.
> 
> >  	/* Allocate and initialize Tx queue. */
> >  	mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket);
> >  	if (!txq) {
> > --
> > 1.8.3.1
> >
> 
> --
> Adrien Mazarguil
> 6WIND

Patch

diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 30f2930..b5aaf4c 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -314,7 +314,7 @@  struct pv {
  *   Pointer to Tx queue structure.
  */
 static void
-mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
+mlx4_txq_complete(struct txq *txq, const unsigned int elts_m,
 				  struct mlx4_sq *sq)
 {
 	unsigned int elts_tail = txq->elts_tail;
@@ -355,13 +355,11 @@  struct pv {
 	if (unlikely(!completed))
 		return;
 	/* First stamping address is the end of the last one. */
-	first_txbb = (&(*txq->elts)[elts_tail])->eocb;
+	first_txbb = (&(*txq->elts)[elts_tail & elts_m])->eocb;
 	elts_tail += completed;
-	if (elts_tail >= elts_n)
-		elts_tail -= elts_n;
 	/* The new tail element holds the end address. */
 	sq->remain_size += mlx4_txq_stamp_freed_wqe(sq, first_txbb,
-		(&(*txq->elts)[elts_tail])->eocb);
+		(&(*txq->elts)[elts_tail & elts_m])->eocb);
 	/* Update CQ consumer index. */
 	cq->cons_index = cons_index;
 	*cq->set_ci_db = rte_cpu_to_be_32(cons_index & MLX4_CQ_DB_CI_MASK);
@@ -534,6 +532,7 @@  struct pv {
 	struct txq *txq = (struct txq *)dpdk_txq;
 	unsigned int elts_head = txq->elts_head;
 	const unsigned int elts_n = txq->elts_n;
+	const unsigned int elts_m = elts_n - 1;
 	unsigned int bytes_sent = 0;
 	unsigned int i;
 	unsigned int max;
@@ -543,24 +542,20 @@  struct pv {
 
 	assert(txq->elts_comp_cd != 0);
 	if (likely(txq->elts_comp != 0))
-		mlx4_txq_complete(txq, elts_n, sq);
+		mlx4_txq_complete(txq, elts_m, sq);
 	max = (elts_n - (elts_head - txq->elts_tail));
-	if (max > elts_n)
-		max -= elts_n;
 	assert(max >= 1);
 	assert(max <= elts_n);
 	/* Always leave one free entry in the ring. */
 	--max;
 	if (max > pkts_n)
 		max = pkts_n;
-	elt = &(*txq->elts)[elts_head];
+	elt = &(*txq->elts)[elts_head & elts_m];
 	/* First Tx burst element saves the next WQE control segment. */
 	ctrl = elt->wqe;
 	for (i = 0; (i != max); ++i) {
 		struct rte_mbuf *buf = pkts[i];
-		unsigned int elts_head_next =
-			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
-		struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
+		struct txq_elt *elt_next = &(*txq->elts)[++elts_head & elts_m];
 		uint32_t owner_opcode = sq->owner_opcode;
 		volatile struct mlx4_wqe_data_seg *dseg =
 				(volatile struct mlx4_wqe_data_seg *)(ctrl + 1);
@@ -678,7 +673,6 @@  struct pv {
 		ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode);
 		elt->buf = buf;
 		bytes_sent += buf->pkt_len;
-		elts_head = elts_head_next;
 		ctrl = ctrl_next;
 		elt = elt_next;
 	}
@@ -694,7 +688,7 @@  struct pv {
 	rte_wmb();
 	/* Ring QP doorbell. */
 	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
-	txq->elts_head = elts_head;
+	txq->elts_head += i;
 	txq->elts_comp += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 4c7b62a..253075a 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -76,17 +76,16 @@ 
 	unsigned int elts_head = txq->elts_head;
 	unsigned int elts_tail = txq->elts_tail;
 	struct txq_elt (*elts)[txq->elts_n] = txq->elts;
+	unsigned int elts_m = txq->elts_n - 1;
 
 	DEBUG("%p: freeing WRs", (void *)txq);
 	while (elts_tail != elts_head) {
-		struct txq_elt *elt = &(*elts)[elts_tail];
+		struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m];
 
 		assert(elt->buf != NULL);
 		rte_pktmbuf_free(elt->buf);
 		elt->buf = NULL;
 		elt->wqe = NULL;
-		if (++elts_tail == RTE_DIM(*elts))
-			elts_tail = 0;
 	}
 	txq->elts_tail = txq->elts_head;
 }
@@ -208,7 +207,9 @@  struct txq_mp2mr_mbuf_check_data {
 	struct mlx4dv_obj mlxdv;
 	struct mlx4dv_qp dv_qp;
 	struct mlx4dv_cq dv_cq;
-	struct txq_elt (*elts)[desc];
+	uint32_t elts_size = desc > 0x1000 ? 0x1000 :
+		rte_align32pow2((uint32_t)desc);
+	struct txq_elt (*elts)[elts_size];
 	struct ibv_qp_init_attr qp_init_attr;
 	struct txq *txq;
 	uint8_t *bounce_buf;
@@ -247,11 +248,14 @@  struct txq_mp2mr_mbuf_check_data {
 		      (void *)dev, idx);
 		return -rte_errno;
 	}
-	if (!desc) {
-		rte_errno = EINVAL;
-		ERROR("%p: invalid number of Tx descriptors", (void *)dev);
-		return -rte_errno;
+	if ((uint32_t)desc != elts_size) {
+		desc = (uint16_t)elts_size;
+		WARN("%p: changed number of descriptors in TX queue %u"
+		     " to be power of two (%d)",
+		     (void *)dev, idx, desc);
 	}
+	DEBUG("%p: configuring queue %u for %u descriptors",
+	      (void *)dev, idx, desc);
 	/* Allocate and initialize Tx queue. */
 	mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket);
 	if (!txq) {