[07/10] net/mlx5: support match on IPv4 fragment packets

Message ID cee4f1b6633c0a63efa88e7fcaf3120b83cc7ee5.1601474841.git.dekelp@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series support match on L3 fragmented packets |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Dekel Peled Sept. 30, 2020, 2:10 p.m. UTC
This patch adds to MLX5 PMD the support of matching on IPv4
fragmented and non-fragmented packets, using the IPv4 header
fragment_offset field.

Signed-off-by: Dekel Peled <dekelp@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c       |  48 ++++++++----
 drivers/net/mlx5/mlx5_flow.h       |  10 +++
 drivers/net/mlx5/mlx5_flow_dv.c    | 156 +++++++++++++++++++++++++++++++------
 drivers/net/mlx5/mlx5_flow_verbs.c |   9 ++-
 4 files changed, 178 insertions(+), 45 deletions(-)
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index ffa7646..906741f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -474,6 +474,8 @@  struct mlx5_flow_tunnel_info {
  *   Bit-masks covering supported fields by the NIC to compare with user mask.
  * @param[in] size
  *   Bit-masks size in bytes.
+ * @param[in] range_accepted
+ *   True if range of values is accepted for specific fields, false otherwise.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -485,6 +487,7 @@  struct mlx5_flow_tunnel_info {
 			  const uint8_t *mask,
 			  const uint8_t *nic_mask,
 			  unsigned int size,
+			  bool range_accepted,
 			  struct rte_flow_error *error)
 {
 	unsigned int i;
@@ -502,7 +505,7 @@  struct mlx5_flow_tunnel_info {
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
 					  "mask/last without a spec is not"
 					  " supported");
-	if (item->spec && item->last) {
+	if (item->spec && item->last && !range_accepted) {
 		uint8_t spec[size];
 		uint8_t last[size];
 		unsigned int i;
@@ -1277,7 +1280,8 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
-		 sizeof(struct rte_flow_item_icmp6), error);
+		 sizeof(struct rte_flow_item_icmp6),
+		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -1329,7 +1333,8 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)&rte_flow_item_icmp_mask,
-		 sizeof(struct rte_flow_item_icmp), error);
+		 sizeof(struct rte_flow_item_icmp),
+		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -1384,7 +1389,7 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
 					(const uint8_t *)&nic_mask,
 					sizeof(struct rte_flow_item_eth),
-					error);
+					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	return ret;
 }
 
@@ -1438,7 +1443,7 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
 					(const uint8_t *)&nic_mask,
 					sizeof(struct rte_flow_item_vlan),
-					error);
+					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret)
 		return ret;
 	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
@@ -1502,6 +1507,7 @@  struct mlx5_flow_tunnel_info {
 			     uint64_t last_item,
 			     uint16_t ether_type,
 			     const struct rte_flow_item_ipv4 *acc_mask,
+			     bool range_accepted,
 			     struct rte_flow_error *error)
 {
 	const struct rte_flow_item_ipv4 *mask = item->mask;
@@ -1572,7 +1578,7 @@  struct mlx5_flow_tunnel_info {
 					acc_mask ? (const uint8_t *)acc_mask
 						 : (const uint8_t *)&nic_mask,
 					sizeof(struct rte_flow_item_ipv4),
-					error);
+					range_accepted, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -1592,6 +1598,8 @@  struct mlx5_flow_tunnel_info {
  * @param[in] acc_mask
  *   Acceptable mask, if NULL default internal default mask
  *   will be used to check whether item fields are supported.
+ * @param[in] range_accepted
+ *   True if range of values is accepted for specific fields, false otherwise.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -1671,7 +1679,7 @@  struct mlx5_flow_tunnel_info {
 					acc_mask ? (const uint8_t *)acc_mask
 						 : (const uint8_t *)&nic_mask,
 					sizeof(struct rte_flow_item_ipv6),
-					error);
+					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -1726,7 +1734,8 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)&rte_flow_item_udp_mask,
-		 sizeof(struct rte_flow_item_udp), error);
+		 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
+		 error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -1781,7 +1790,8 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)flow_mask,
-		 sizeof(struct rte_flow_item_tcp), error);
+		 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
+		 error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -1835,7 +1845,7 @@  struct mlx5_flow_tunnel_info {
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)&rte_flow_item_vxlan_mask,
 		 sizeof(struct rte_flow_item_vxlan),
-		 error);
+		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret < 0)
 		return ret;
 	if (spec) {
@@ -1906,7 +1916,7 @@  struct mlx5_flow_tunnel_info {
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
 		 sizeof(struct rte_flow_item_vxlan_gpe),
-		 error);
+		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret < 0)
 		return ret;
 	if (spec) {
@@ -1980,7 +1990,7 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)&gre_key_default_mask,
-		 sizeof(rte_be32_t), error);
+		 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	return ret;
 }
 
@@ -2032,7 +2042,8 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)&nic_mask,
-		 sizeof(struct rte_flow_item_gre), error);
+		 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
+		 error);
 	if (ret < 0)
 		return ret;
 #ifndef HAVE_MLX5DV_DR
@@ -2107,7 +2118,8 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable
 				  (item, (const uint8_t *)mask,
 				   (const uint8_t *)&nic_mask,
-				   sizeof(struct rte_flow_item_geneve), error);
+				   sizeof(struct rte_flow_item_geneve),
+				   MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret)
 		return ret;
 	if (spec) {
@@ -2190,7 +2202,8 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)&rte_flow_item_mpls_mask,
-		 sizeof(struct rte_flow_item_mpls), error);
+		 sizeof(struct rte_flow_item_mpls),
+		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -2245,7 +2258,8 @@  struct mlx5_flow_tunnel_info {
 	ret = mlx5_flow_item_acceptable
 		(item, (const uint8_t *)mask,
 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
-		 sizeof(struct rte_flow_item_nvgre), error);
+		 sizeof(struct rte_flow_item_nvgre),
+		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -2339,7 +2353,7 @@  struct mlx5_flow_tunnel_info {
 					 acc_mask ? (const uint8_t *)acc_mask
 						  : (const uint8_t *)&nic_mask,
 					 sizeof(struct rte_flow_item_ecpri),
-					 error);
+					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 }
 
 /* Allocate unique ID for the split Q/RSS subflows. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 279daf2..1e30c93 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -330,6 +330,14 @@  enum mlx5_feature_name {
 #define MLX5_ENCAPSULATION_DECISION_SIZE (sizeof(struct rte_flow_item_eth) + \
 					  sizeof(struct rte_flow_item_ipv4))
 
+/* IPv4 fragment_offset field contains relevant data in bits 2 to 15. */
+#define MLX5_IPV4_FRAG_OFFSET_MASK \
+		(RTE_IPV4_HDR_OFFSET_MASK | RTE_IPV4_HDR_MF_FLAG)
+
+/* Specific item's fields can accept a range of values (using spec and last). */
+#define MLX5_ITEM_RANGE_NOT_ACCEPTED	false
+#define MLX5_ITEM_RANGE_ACCEPTED	true
+
 /* Software header modify action numbers of a flow. */
 #define MLX5_ACT_NUM_MDF_IPV4		1
 #define MLX5_ACT_NUM_MDF_IPV6		4
@@ -985,6 +993,7 @@  int mlx5_flow_item_acceptable(const struct rte_flow_item *item,
 			      const uint8_t *mask,
 			      const uint8_t *nic_mask,
 			      unsigned int size,
+			      bool range_accepted,
 			      struct rte_flow_error *error);
 int mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
 				uint64_t item_flags,
@@ -1002,6 +1011,7 @@  int mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
 				 uint64_t last_item,
 				 uint16_t ether_type,
 				 const struct rte_flow_item_ipv4 *acc_mask,
+				 bool range_accepted,
 				 struct rte_flow_error *error);
 int mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
 				 uint64_t item_flags,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 0a0a5a4..3379caf 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1418,7 +1418,7 @@  struct field_modify_info modify_tcp[] = {
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
 					(const uint8_t *)&nic_mask,
 					sizeof(struct rte_flow_item_mark),
-					error);
+					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -1494,7 +1494,7 @@  struct field_modify_info modify_tcp[] = {
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
 					(const uint8_t *)&nic_mask,
 					sizeof(struct rte_flow_item_meta),
-					error);
+					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	return ret;
 }
 
@@ -1547,7 +1547,7 @@  struct field_modify_info modify_tcp[] = {
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
 					(const uint8_t *)&nic_mask,
 					sizeof(struct rte_flow_item_tag),
-					error);
+					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret < 0)
 		return ret;
 	if (mask->index != 0xff)
@@ -1618,7 +1618,7 @@  struct field_modify_info modify_tcp[] = {
 				(item, (const uint8_t *)mask,
 				 (const uint8_t *)&rte_flow_item_port_id_mask,
 				 sizeof(struct rte_flow_item_port_id),
-				 error);
+				 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret)
 		return ret;
 	if (!spec)
@@ -1691,7 +1691,7 @@  struct field_modify_info modify_tcp[] = {
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
 					(const uint8_t *)&nic_mask,
 					sizeof(struct rte_flow_item_vlan),
-					error);
+					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
 	if (ret)
 		return ret;
 	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
@@ -1778,11 +1778,126 @@  struct field_modify_info modify_tcp[] = {
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
 					  "Match is supported for GTP"
 					  " flags only");
-	return mlx5_flow_item_acceptable
-		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&nic_mask,
-		 sizeof(struct rte_flow_item_gtp),
-		 error);
+	return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+					 (const uint8_t *)&nic_mask,
+					 sizeof(struct rte_flow_item_gtp),
+					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
+}
+
+/**
+ * Validate IPV4 item.
+ * Use existing validation function mlx5_flow_validate_item_ipv4(), and
+ * add specific validation of fragment_offset field,
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_ipv4(const struct rte_flow_item *item,
+			   uint64_t item_flags,
+			   uint64_t last_item,
+			   uint16_t ether_type,
+			   struct rte_flow_error *error)
+{
+	int ret;
+	const struct rte_flow_item_ipv4 *spec = item->spec;
+	const struct rte_flow_item_ipv4 *last = item->last;
+	const struct rte_flow_item_ipv4 *mask = item->mask;
+	rte_be16_t fragment_offset_spec = 0;
+	rte_be16_t fragment_offset_last = 0;
+	const struct rte_flow_item_ipv4 nic_ipv4_mask = {
+		.hdr = {
+			.src_addr = RTE_BE32(0xffffffff),
+			.dst_addr = RTE_BE32(0xffffffff),
+			.type_of_service = 0xff,
+			.fragment_offset = RTE_BE16(0xffff),
+			.next_proto_id = 0xff,
+			.time_to_live = 0xff,
+		},
+	};
+
+	ret = mlx5_flow_validate_item_ipv4(item, item_flags, last_item,
+					   ether_type, &nic_ipv4_mask,
+					   MLX5_ITEM_RANGE_ACCEPTED, error);
+	if (ret < 0)
+		return ret;
+	if (spec && mask)
+		fragment_offset_spec = spec->hdr.fragment_offset &
+				       mask->hdr.fragment_offset;
+	if (!fragment_offset_spec)
+		return 0;
+	/*
+	 * spec and mask are valid, enforce using full mask to make sure the
+	 * complete value is used correctly.
+	 */
+	if ((mask->hdr.fragment_offset & RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK))
+			!= RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					  item, "must use full mask for"
+					  " fragment_offset");
+	/*
+	 * Match on fragment_offset 0x2000 means MF is 1 and frag-offset is 0,
+	 * indicating this is 1st fragment of fragmented packet.
+	 * This is not yet supported in MLX5, return appropriate error message.
+	 */
+	if (fragment_offset_spec == RTE_BE16(RTE_IPV4_HDR_MF_FLAG))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "match on first fragment not "
+					  "supported");
+	if (fragment_offset_spec && !last)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "specified value not supported");
+	/* spec and last are valid, validate the specified range. */
+	fragment_offset_last = last->hdr.fragment_offset &
+			       mask->hdr.fragment_offset;
+	/*
+	 * Match on fragment_offset spec 0x2001 and last 0x3fff
+	 * means MF is 1 and frag-offset is > 0.
+	 * This packet is fragment 2nd and onward, excluding last.
+	 * This is not yet supported in MLX5, return appropriate
+	 * error message.
+	 */
+	if (fragment_offset_spec == RTE_BE16(RTE_IPV4_HDR_MF_FLAG + 1) &&
+	    fragment_offset_last == RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM_LAST,
+					  last, "match on following "
+					  "fragments not supported");
+	/*
+	 * Match on fragment_offset spec 0x0001 and last 0x1fff
+	 * means MF is 0 and frag-offset is > 0.
+	 * This packet is last fragment of fragmented packet.
+	 * This is not yet supported in MLX5, return appropriate
+	 * error message.
+	 */
+	if (fragment_offset_spec == RTE_BE16(1) &&
+	    fragment_offset_last == RTE_BE16(RTE_IPV4_HDR_OFFSET_MASK))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM_LAST,
+					  last, "match on last "
+					  "fragment not supported");
+	/*
+	 * Match on fragment_offset spec 0x0001 and last 0x3fff
+	 * means MF and/or frag-offset is not 0.
+	 * This is a fragmented packet.
+	 * Other range values are invalid and rejected.
+	 */
+	if (!(fragment_offset_spec == RTE_BE16(1) &&
+	      fragment_offset_last == RTE_BE16(MLX5_IPV4_FRAG_OFFSET_MASK)))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM_LAST, last,
+					  "specified range not supported");
+	return 0;
 }
 
 /**
@@ -5084,15 +5199,6 @@  struct field_modify_info modify_tcp[] = {
 			.dst_port = RTE_BE16(UINT16_MAX),
 		}
 	};
-	const struct rte_flow_item_ipv4 nic_ipv4_mask = {
-		.hdr = {
-			.src_addr = RTE_BE32(0xffffffff),
-			.dst_addr = RTE_BE32(0xffffffff),
-			.type_of_service = 0xff,
-			.next_proto_id = 0xff,
-			.time_to_live = 0xff,
-		},
-	};
 	const struct rte_flow_item_ipv6 nic_ipv6_mask = {
 		.hdr = {
 			.src_addr =
@@ -5192,11 +5298,9 @@  struct field_modify_info modify_tcp[] = {
 		case RTE_FLOW_ITEM_TYPE_IPV4:
 			mlx5_flow_tunnel_ip_check(items, next_protocol,
 						  &item_flags, &tunnel);
-			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
-							   last_item,
-							   ether_type,
-							   &nic_ipv4_mask,
-							   error);
+			ret = flow_dv_validate_item_ipv4(items, item_flags,
+							 last_item, ether_type,
+							 error);
 			if (ret < 0)
 				return ret;
 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
@@ -6296,6 +6400,10 @@  struct field_modify_info modify_tcp[] = {
 		 ipv4_m->hdr.time_to_live);
 	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ttl_hoplimit,
 		 ipv4_v->hdr.time_to_live & ipv4_m->hdr.time_to_live);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_m, frag,
+		 !!(ipv4_m->hdr.fragment_offset));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
+		 !!(ipv4_v->hdr.fragment_offset & ipv4_m->hdr.fragment_offset));
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 62c18b8..276bcb5 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1312,10 +1312,11 @@ 
 			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV4:
-			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
-							   last_item,
-							   ether_type, NULL,
-							   error);
+			ret = mlx5_flow_validate_item_ipv4
+						(items, item_flags,
+						 last_item, ether_type, NULL,
+						 MLX5_ITEM_RANGE_NOT_ACCEPTED,
+						 error);
 			if (ret < 0)
 				return ret;
 			last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :