[dpdk-dev,2/2] net/mlx5: enforce Tx num of segments limitation

Message ID 20170823073358.116786-2-shahafs@mellanox.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Shahaf Shuler Aug. 23, 2017, 7:33 a.m. UTC
  Mellanox NICs has a limitation on the number of mbuf segments a multi
segment mbuf can have. The max number depends on the Tx offloads requested.

The current code not enforce such limitation, which might cause
malformed WQEs to be written to the device.

This commit adds verification for the number of mbuf segments posted
to the device. In case of overflow the packet will not be sent.
Debug prints were added to help application identify the cause for such
case.

Cc: stable@dpdk.org

Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---

This patch should be applied only after the series:
http://dpdk.org/dev/patchwork/patch/27367/

---
 drivers/net/mlx5/mlx5_defs.h         |  3 ++-
 drivers/net/mlx5/mlx5_prm.h          |  3 +++
 drivers/net/mlx5/mlx5_rxtx.c         | 30 +++++++++++++++++++++++++++---
 drivers/net/mlx5/mlx5_rxtx_vec_sse.c |  8 ++++++++
 drivers/net/mlx5/mlx5_txq.c          | 27 +++++++++++++++++++++++++++
 5 files changed, 67 insertions(+), 4 deletions(-)
  

Comments

Nélio Laranjeiro Aug. 24, 2017, 1:28 p.m. UTC | #1
On Wed, Aug 23, 2017 at 10:33:58AM +0300, Shahaf Shuler wrote:
> Mellanox NICs has a limitation on the number of mbuf segments a multi
> segment mbuf can have. The max number depends on the Tx offloads requested.
> 
> The current code not enforce such limitation, which might cause
> malformed WQEs to be written to the device.

Avoid acronyms in the commit message (at least on first occurrence), not all
people knows what a WQE is and getting such information is not easy.

> This commit adds verification for the number of mbuf segments posted
> to the device. In case of overflow the packet will not be sent.
> Debug prints were added to help application identify the cause for such
> case.
> 
> Cc: stable@dpdk.org
> 
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
> 
> This patch should be applied only after the series:
> http://dpdk.org/dev/patchwork/patch/27367/
> 
> ---
>  drivers/net/mlx5/mlx5_defs.h         |  3 ++-
>  drivers/net/mlx5/mlx5_prm.h          |  3 +++
>  drivers/net/mlx5/mlx5_rxtx.c         | 30 +++++++++++++++++++++++++++---
>  drivers/net/mlx5/mlx5_rxtx_vec_sse.c |  8 ++++++++
>  drivers/net/mlx5/mlx5_txq.c          | 27 +++++++++++++++++++++++++++
>  5 files changed, 67 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
> index 608072f7e..87244e7db 100644
> --- a/drivers/net/mlx5/mlx5_prm.h
> +++ b/drivers/net/mlx5/mlx5_prm.h
> @@ -154,6 +154,9 @@
>  /* Default mark value used when none is provided. */
>  #define MLX5_FLOW_MARK_DEFAULT 0xffffff
>  
> +/* Maximum number of DS in WQE. */
> +#define MLX5_MAX_DS (63)
> +

Why the parenthesis?

Thanks,
  

Patch

diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f65..7d2a65099 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -100,7 +100,8 @@ 
 
 /*
  * Maximum size of burst for vectorized Tx. This is related to the maximum size
- * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * of Enhacned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * Careful when changing, large value can cause wqe DS to overlap.
  */
 #define MLX5_VPMD_TX_MAX_BURST        32U
 
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 608072f7e..87244e7db 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -154,6 +154,9 @@ 
 /* Default mark value used when none is provided. */
 #define MLX5_FLOW_MARK_DEFAULT 0xffffff
 
+/* Maximum number of DS in WQE. */
+#define MLX5_MAX_DS (63)
+
 /* Subset of struct mlx5_wqe_eth_seg. */
 struct mlx5_wqe_eth_seg_small {
 	uint32_t rsvd0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b07bcd118..77a789c2c 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -664,6 +664,15 @@  mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		else
 			j += sg;
 next_pkt:
+		if (ds > MLX5_MAX_DS) {
+#ifndef NDEBUG
+			WARN("Cannot send packet %p with %d segments "
+			     "wqe.ds = %d, wqe.inline_sz = %d",
+			     (void *)pkts, (*pkts)->nb_segs, ds,
+			     pkt_inline_sz);
+#endif
+			break;
+		}
 		++elts_head;
 		++pkts;
 		++i;
@@ -850,8 +859,13 @@  mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (max_elts < segs_n)
 			break;
 		/* Do not bother with large packets MPW cannot handle. */
-		if (segs_n > MLX5_MPW_DSEG_MAX)
+		if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+			WARN("Cannot send packet %p with %d segments",
+			     (void *)buf, segs_n);
+#endif
 			break;
+		}
 		max_elts -= segs_n;
 		--pkts_n;
 		/* Should we enable HW CKSUM offload */
@@ -1071,8 +1085,13 @@  mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 		if (max_elts < segs_n)
 			break;
 		/* Do not bother with large packets MPW cannot handle. */
-		if (segs_n > MLX5_MPW_DSEG_MAX)
+		if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+			WARN("Cannot send packet %p with %d segments",
+			     (void *)buf, segs_n);
+#endif
 			break;
+		}
 		max_elts -= segs_n;
 		--pkts_n;
 		/*
@@ -1360,8 +1379,13 @@  mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (max_elts - j < segs_n)
 			break;
 		/* Do not bother with large packets MPW cannot handle. */
-		if (segs_n > MLX5_MPW_DSEG_MAX)
+		if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+			WARN("Cannot send packet %p with %d segments",
+			     (void *)buf, segs_n);
+#endif
 			break;
+		}
 		/* Should we enable HW CKSUM offload. */
 		if (buf->ol_flags &
 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index 30727e6dd..6f66517df 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -257,6 +257,13 @@  txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (segs_n == 1 ||
 		    max_elts < segs_n || max_wqe < 2)
 			break;
+		if (segs_n > MLX5_MPW_DSEG_MAX) {
+#ifndef NDEBUG
+			WARN("Cannot send packet %p with %d segments",
+			     (void *)buf, segs_n);
+#endif
+			break;
+		}
 		wqe = &((volatile struct mlx5_wqe64 *)
 			 txq->wqes)[wqe_ci & wq_mask].hdr;
 		if (buf->ol_flags &
@@ -374,6 +381,7 @@  txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	max_elts = (elts_n - (elts_head - txq->elts_tail));
 	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
 	pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+	assert(pkts_n <= MLX5_MAX_DS - nb_dword_in_hdr);
 	if (unlikely(!pkts_n))
 		return 0;
 	elts = &(*txq->elts)[elts_head & elts_m];
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 98aaa7ca0..6ea02485f 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -295,6 +295,8 @@  txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
 		.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
 	};
 	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+		unsigned int ds_cnt;
+
 		tmpl.txq.max_inline =
 			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
 			 RTE_CACHE_LINE_SIZE);
@@ -327,6 +329,31 @@  txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
 			attr.init.cap.max_inline_data =
 				tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
 		}
+		/*
+		 * Check if the inline size is too large in a way which
+		 * can make the wqe DS to overflow.
+		 * Considering in calculation:
+		 *	WQE CTRL (1 DS)
+		 *	WQE ETH  (1 DS)
+		 *	inline part (N DS)
+		 */
+		ds_cnt = 2 +
+			(attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
+		if (ds_cnt > MLX5_MAX_DS) {
+			unsigned int max_inline = (MLX5_MAX_DS - 2) *
+						   MLX5_WQE_DWORD_SIZE;
+
+			/* Ceil down*/
+			max_inline = max_inline - (max_inline %
+						   RTE_CACHE_LINE_SIZE);
+			WARN("txq inline is too large (%d) setting it to "
+			     "the maximum possible: %d\n",
+			     priv->txq_inline, max_inline);
+			tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
+			attr.init.cap.max_inline_data = max_inline;
+			if (priv->mps == MLX5_MPW_ENHANCED)
+				tmpl.txq.inline_max_packet_sz = max_inline;
+		}
 	}
 	if (priv->tso) {
 		attr.init.max_tso_header =