diff mbox series

net/mlx5: convert meta register to big-endian

Message ID 20210616144602.2803271-1-akozyrev@nvidia.com (mailing list archive)
State Accepted
Delegated to: Raslan Darawsheh
Headers show
Series net/mlx5: convert meta register to big-endian | expand

Checks

Context Check Description
ci/iol-mellanox-Functional fail Functional Testing issues
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/iol-abi-testing warning Testing issues
ci/github-robot success github build: passed
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-intel-Functional success Functional Testing PASS
ci/checkpatch success coding style OK

Commit Message

Alexander Kozyrev June 16, 2021, 2:46 p.m. UTC
Metadata is stored in the CPU order (little-endian format on x86),
while all the packet header fields are stored in the network order.
That leads to the wrong results whenever we try to use the metadata
value in the modify_field actions: bytes are swapped as a result.

Convert the metadata into the big-endian format before storing it
in the Mellanox NIC to achieve consistent behaviour.

Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_dv.c          | 40 +++++-------------------
 drivers/net/mlx5/mlx5_rx.c               |  5 +--
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 22 +++++++++----
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    | 30 +++++++++++-------
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     | 18 ++++++++---
 5 files changed, 59 insertions(+), 56 deletions(-)

Comments

Raslan Darawsheh June 17, 2021, 11:40 a.m. UTC | #1
Hi,
> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@nvidia.com>
> Sent: Wednesday, June 16, 2021 5:46 PM
> To: dev@dpdk.org
> Cc: Raslan Darawsheh <rasland@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Matan Azrad <matan@nvidia.com>
> Subject: [PATCH] net/mlx5: convert meta register to big-endian
> 
> Metadata is stored in the CPU order (little-endian format on x86),
> while all the packet header fields are stored in the network order.
> That leads to the wrong results whenever we try to use the metadata
> value in the modify_field actions: bytes are swapped as a result.
> 
> Convert the metadata into the big-endian format before storing it
> in the Mellanox NIC to achieve consistent behaviour.
> 
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
diff mbox series

Patch

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index dafd37ab93..b36ffde559 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1239,8 +1239,8 @@  flow_dv_convert_action_set_meta
 			 const struct rte_flow_action_set_meta *conf,
 			 struct rte_flow_error *error)
 {
-	uint32_t data = conf->data;
-	uint32_t mask = conf->mask;
+	uint32_t mask = rte_cpu_to_be_32(conf->mask);
+	uint32_t data = rte_cpu_to_be_32(conf->data) & mask;
 	struct rte_flow_item item = {
 		.spec = &data,
 		.mask = &mask,
@@ -1253,25 +1253,14 @@  flow_dv_convert_action_set_meta
 	if (reg < 0)
 		return reg;
 	MLX5_ASSERT(reg != REG_NON);
-	/*
-	 * In datapath code there is no endianness
-	 * coversions for perfromance reasons, all
-	 * pattern conversions are done in rte_flow.
-	 */
 	if (reg == REG_C_0) {
 		struct mlx5_priv *priv = dev->data->dev_private;
 		uint32_t msk_c0 = priv->sh->dv_regc0_mask;
-		uint32_t shl_c0;
+		uint32_t shl_c0 = rte_bsf32(msk_c0);
 
-		MLX5_ASSERT(msk_c0);
-#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
-		shl_c0 = rte_bsf32(msk_c0);
-#else
-		shl_c0 = sizeof(msk_c0) * CHAR_BIT - rte_fls_u32(msk_c0);
-#endif
-		mask <<= shl_c0;
-		data <<= shl_c0;
-		MLX5_ASSERT(!(~msk_c0 & rte_cpu_to_be_32(mask)));
+		data = rte_cpu_to_be_32(rte_cpu_to_be_32(data) << shl_c0);
+		mask = rte_cpu_to_be_32(mask) & msk_c0;
+		mask = rte_cpu_to_be_32(mask << shl_c0);
 	}
 	reg_c_x[0] = (struct field_modify_info){4, 0, reg_to_field[reg]};
 	/* The routine expects parameters in memory as big-endian ones. */
@@ -9226,27 +9215,14 @@  flow_dv_translate_item_meta(struct rte_eth_dev *dev,
 		if (reg < 0)
 			return;
 		MLX5_ASSERT(reg != REG_NON);
-		/*
-		 * In datapath code there is no endianness
-		 * coversions for perfromance reasons, all
-		 * pattern conversions are done in rte_flow.
-		 */
-		value = rte_cpu_to_be_32(value);
-		mask = rte_cpu_to_be_32(mask);
 		if (reg == REG_C_0) {
 			struct mlx5_priv *priv = dev->data->dev_private;
 			uint32_t msk_c0 = priv->sh->dv_regc0_mask;
 			uint32_t shl_c0 = rte_bsf32(msk_c0);
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-			uint32_t shr_c0 = __builtin_clz(priv->sh->dv_meta_mask);
 
-			value >>= shr_c0;
-			mask >>= shr_c0;
-#endif
-			value <<= shl_c0;
+			mask &= msk_c0;
 			mask <<= shl_c0;
-			MLX5_ASSERT(msk_c0);
-			MLX5_ASSERT(!(~msk_c0 & mask));
+			value <<= shl_c0;
 		}
 		flow_dv_match_meta_reg(matcher, key, reg, value, mask);
 	}
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 6cd71a44eb..777a1d6e45 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -740,8 +740,9 @@  rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
 		}
 	}
 	if (rxq->dynf_meta) {
-		uint32_t meta = cqe->flow_table_metadata &
-				rxq->flow_meta_port_mask;
+		uint32_t meta = rte_be_to_cpu_32(cqe->flow_table_metadata >>
+			__builtin_popcount(rxq->flow_meta_port_mask)) &
+			rxq->flow_meta_port_mask;
 
 		if (meta) {
 			pkt->ol_flags |= rxq->flow_meta_mask;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 2d1154b624..648c59e2c2 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -1221,23 +1221,33 @@  rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		if (rxq->dynf_meta) {
 			uint64_t flag = rxq->flow_meta_mask;
 			int32_t offs = rxq->flow_meta_offset;
-			uint32_t metadata, mask;
+			uint32_t mask = rxq->flow_meta_port_mask;
+			uint32_t shift =
+				__builtin_popcount(rxq->flow_meta_port_mask);
+			uint32_t metadata;
 
-			mask = rxq->flow_meta_port_mask;
 			/* This code is subject for futher optimization. */
-			metadata = cq[pos].flow_table_metadata & mask;
+			metadata = (rte_be_to_cpu_32
+				(cq[pos].flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
 								metadata;
 			pkts[pos]->ol_flags |= metadata ? flag : 0ULL;
-			metadata = cq[pos + 1].flow_table_metadata & mask;
+			metadata = (rte_be_to_cpu_32
+				(cq[pos + 1].flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
 								metadata;
 			pkts[pos + 1]->ol_flags |= metadata ? flag : 0ULL;
-			metadata = cq[pos + 2].flow_table_metadata & mask;
+			metadata = (rte_be_to_cpu_32
+				(cq[pos + 2].flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
 								metadata;
 			pkts[pos + 2]->ol_flags |= metadata ? flag : 0ULL;
-			metadata = cq[pos + 3].flow_table_metadata & mask;
+			metadata = (rte_be_to_cpu_32
+				(cq[pos + 3].flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
 								metadata;
 			pkts[pos + 3]->ol_flags |= metadata ? flag : 0ULL;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 2234fbe6b2..5c569ee199 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -833,23 +833,29 @@  rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 			/* This code is subject for futher optimization. */
 			int32_t offs = rxq->flow_meta_offset;
 			uint32_t mask = rxq->flow_meta_port_mask;
+			uint32_t shift =
+				__builtin_popcount(rxq->flow_meta_port_mask);
 
 			*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
-				container_of(p0, struct mlx5_cqe,
-					     pkt_info)->flow_table_metadata &
-					     mask;
+				(rte_be_to_cpu_32(container_of
+				(p0, struct mlx5_cqe,
+				pkt_info)->flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
-				container_of(p1, struct mlx5_cqe,
-					     pkt_info)->flow_table_metadata &
-					     mask;
+				(rte_be_to_cpu_32(container_of
+				(p1, struct mlx5_cqe,
+				pkt_info)->flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
-				container_of(p2, struct mlx5_cqe,
-					     pkt_info)->flow_table_metadata &
-					     mask;
+				(rte_be_to_cpu_32(container_of
+				(p2, struct mlx5_cqe,
+				pkt_info)->flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
-				container_of(p3, struct mlx5_cqe,
-					     pkt_info)->flow_table_metadata &
-					     mask;
+				(rte_be_to_cpu_32(container_of
+				(p3, struct mlx5_cqe,
+				pkt_info)->flow_table_metadata) >> shift) &
+				mask;
 			if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *))
 				elts[pos]->ol_flags |= rxq->flow_meta_mask;
 			if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index c508a7a4f2..661fa7273c 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -769,15 +769,25 @@  rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 			/* This code is subject for futher optimization. */
 			int32_t offs = rxq->flow_meta_offset;
 			uint32_t mask = rxq->flow_meta_port_mask;
+			uint32_t shift =
+				__builtin_popcount(rxq->flow_meta_port_mask);
 
 			*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
-				cq[pos].flow_table_metadata & mask;
+				(rte_be_to_cpu_32
+				(cq[pos].flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
-				cq[pos + p1].flow_table_metadata  & mask;
+				(rte_be_to_cpu_32
+				(cq[pos + p1].flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
-				cq[pos + p2].flow_table_metadata & mask;
+				(rte_be_to_cpu_32
+				(cq[pos + p2].flow_table_metadata) >> shift) &
+				mask;
 			*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
-				cq[pos + p3].flow_table_metadata & mask;
+				(rte_be_to_cpu_32
+				(cq[pos + p3].flow_table_metadata) >> shift) &
+				mask;
 			if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *))
 				pkts[pos]->ol_flags |= rxq->flow_meta_mask;
 			if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))