[v2,17/20] net/mlx5: add flow VXLAN-GPE item

Message ID 5e9bb490cd3cccb660e11ea0c7ca256e0fd73cc7.1530111623.git.nelio.laranjeiro@6wind.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series net/mlx5: flow rework |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply issues

Commit Message

Nélio Laranjeiro June 27, 2018, 3:07 p.m. UTC
  Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 123 ++++++++++++++++++++++++++++++++++-
 1 file changed, 120 insertions(+), 3 deletions(-)
  

Comments

Yongseok Koh July 6, 2018, 11:23 p.m. UTC | #1
On Wed, Jun 27, 2018 at 05:07:49PM +0200, Nelio Laranjeiro wrote:
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> ---
>  drivers/net/mlx5/mlx5_flow.c | 123 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 120 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index daf5b9b5a..47c55b426 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -53,6 +53,7 @@ extern const struct eth_dev_ops mlx5_dev_ops_isolate;
>  
>  /* Pattern tunnel Layer bits. */
>  #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
> +#define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
>  
>  /* Outer Masks. */
>  #define MLX5_FLOW_LAYER_OUTER_L3 \
> @@ -64,7 +65,8 @@ extern const struct eth_dev_ops mlx5_dev_ops_isolate;
>  	 MLX5_FLOW_LAYER_OUTER_L4)
>  
>  /* Tunnel masks. */
> -#define MLX5_FLOW_LAYER_TUNNEL MLX5_FLOW_LAYER_VXLAN
> +#define MLX5_FLOW_LAYER_TUNNEL \
> +	(MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE)
>  
>  /* Inner Masks. */
>  #define MLX5_FLOW_LAYER_INNER_L3 \
> @@ -1302,9 +1304,118 @@ mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
>  	return size;
>  }
>  
> +/**
> + * Validate VXLAN-GPE layer and possibly create the Verbs specification.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + * @param item[in]
> + *   Item specification.
> + * @param flow[in, out]
> + *   Pointer to flow structure.
> + * @param flow_size[in]
> + *   Size in bytes of the available space for to store the flow information.
> + * @param error
> + *   Pointer to error structure.
> + *
> + * @return
> + *   size in bytes necessary for the conversion, a negative errno value
> + *   otherwise and rte_errno is set.
> + */
> +static int
> +mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
> +			 const struct rte_flow_item *item,
> +			 struct rte_flow *flow, const size_t flow_size,
> +			 struct rte_flow_error *error)

It is almost same as mlx5_flow_item_vxlan() except for checking
priv->config.l3_vxlan_en. One more difference I noticed is that it doesn't check
flow->exapnd on validation. Why is that? If that's a mistake, isn't it better to
make the common code shareable?

> +{
> +	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
> +	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
> +	const uint32_t layers = mlx5_flow_layers(flow);
> +	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
> +	struct ibv_flow_spec_tunnel vxlan_gpe = {
> +		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
> +		.size = size,
> +	};
> +	int ret;
> +	union vni {
> +		uint32_t vlan_id;
> +		uint8_t vni[4];
> +	} id = { .vlan_id = 0, };
> +
> +	if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ITEM,
> +					  item,
> +					  "L3 VXLAN is not enabled by device"
> +					  " parameter and/or not configured in"
> +					  " firmware");
> +	if (layers & MLX5_FLOW_LAYER_TUNNEL)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ITEM,
> +					  item,
> +					  "a tunnel is already present");
> +	/*
> +	 * Verify only UDPv4 is present as defined in
> +	 * https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Ftools.ietf.org%2Fhtml%2Frfc7348&data=02%7C01%7Cyskoh%40mellanox.com%7C15334e781d3e4dec8aec08d5dc3fbf72%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636657088715446919&sdata=LwT6lKV%2B59%2F839S8M2wZPFeQwzxSDr8cW3e6zBTX4TA%3D&reserved=0
> +	 */
> +	if (!(layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ITEM,
> +					  item,
> +					  "no outer UDP layer found");
> +	if (!mask)
> +		mask = &rte_flow_item_vxlan_gpe_mask;
> +	ret = mlx5_flow_item_validate
> +		(item, (const uint8_t *)mask,
> +		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
> +		 sizeof(struct rte_flow_item_vxlan_gpe), error);
> +	if (ret < 0)
> +		return ret;
> +	if (spec) {
> +		memcpy(&id.vni[1], spec->vni, 3);
> +		vxlan_gpe.val.tunnel_id = id.vlan_id;
> +		memcpy(&id.vni[1], mask->vni, 3);
> +		vxlan_gpe.mask.tunnel_id = id.vlan_id;
> +		if (spec->protocol)
> +			return rte_flow_error_set
> +				(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM,
> +				 item,
> +				 "VxLAN-GPE protocol not supported");
> +		/* Remove unwanted bits from values. */
> +		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
> +	}
> +	/*
> +	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
> +	 * layer is defined in the Verbs specification it is interpreted as
> +	 * wildcard and all packets will match this rule, if it follows a full
> +	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
> +	 * before will also match this rule.  To avoid such situation, VNI 0
> +	 * is currently refused.
> +	 */
> +	if (!vxlan_gpe.val.tunnel_id)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ITEM,
> +					  item,
> +					  "VXLAN-GPE vni cannot be 0");
> +	if (!(layers & MLX5_FLOW_LAYER_OUTER))
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ITEM,
> +					  item,
> +					  "VXLAN-GPE tunnel must be fully"
> +					  " defined");
> +	if (size <= flow_size)
> +		mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
> +	mlx5_flow_layers_update(flow, MLX5_FLOW_LAYER_VXLAN_GPE);
> +	flow->ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP;
> +	return size;
> +}
> +
>  /**
>   * Validate items provided by the user.
>   *
> + * @param dev
> + *   Pointer to Ethernet device.
>   * @param items
>   *   Pointer to flow items array.
>   * @param flow
> @@ -1319,7 +1430,8 @@ mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
>   *   otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_flow_items(const struct rte_flow_item items[],
> +mlx5_flow_items(struct rte_eth_dev *dev,
> +		const struct rte_flow_item items[],
>  		struct rte_flow *flow, const size_t flow_size,
>  		struct rte_flow_error *error)
>  {
> @@ -1353,6 +1465,10 @@ mlx5_flow_items(const struct rte_flow_item items[],
>  		case RTE_FLOW_ITEM_TYPE_VXLAN:
>  			ret = mlx5_flow_item_vxlan(items, flow, remain, error);
>  			break;
> +		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
> +			ret = mlx5_flow_item_vxlan_gpe(dev, items, flow,
> +						       remain, error);
> +			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
>  						  RTE_FLOW_ERROR_TYPE_ITEM,
> @@ -1962,7 +2078,8 @@ mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
>  				(void *)(flow->cur_verbs->attr + 1);
>  		}
>  		ret = mlx5_flow_items
> -			((const struct rte_flow_item *)buf->patterns[i],
> +			(dev,
> +			 (const struct rte_flow_item *)buf->patterns[i],
>  			 flow, remain, error);
>  		if (ret < 0)
>  			goto error;
> -- 
> 2.18.0
>
  
Nélio Laranjeiro July 9, 2018, 2:53 p.m. UTC | #2
On Fri, Jul 06, 2018 at 04:23:26PM -0700, Yongseok Koh wrote:
> On Wed, Jun 27, 2018 at 05:07:49PM +0200, Nelio Laranjeiro wrote:
> > Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> > ---
> >  drivers/net/mlx5/mlx5_flow.c | 123 ++++++++++++++++++++++++++++++++++-
> >  1 file changed, 120 insertions(+), 3 deletions(-)
> > 
>[...]  
> > +/**
> > + * Validate VXLAN-GPE layer and possibly create the Verbs specification.
> > + *
> > + * @param dev
> > + *   Pointer to Ethernet device.
> > + * @param item[in]
> > + *   Item specification.
> > + * @param flow[in, out]
> > + *   Pointer to flow structure.
> > + * @param flow_size[in]
> > + *   Size in bytes of the available space for to store the flow information.
> > + * @param error
> > + *   Pointer to error structure.
> > + *
> > + * @return
> > + *   size in bytes necessary for the conversion, a negative errno value
> > + *   otherwise and rte_errno is set.
> > + */
> > +static int
> > +mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
> > +			 const struct rte_flow_item *item,
> > +			 struct rte_flow *flow, const size_t flow_size,
> > +			 struct rte_flow_error *error)
> 
> It is almost same as mlx5_flow_item_vxlan() except for checking
> priv->config.l3_vxlan_en. One more difference I noticed is that it doesn't check
> flow->exapnd on validation. Why is that? If that's a mistake, isn't it better to
> make the common code shareable?
>[...]

The GPE version needs:

 - l3_vxlan_en
 - set its own tunnel bit (as in this case the following layer may
   directly be an L3)

Indeed there are some common code (as for the TCP/UDP) but sharing it
will be more difficult to read and fix in case of bugs.

In addition if this RFC [1] is fully dropped it will be easier to remove
the dedicated code when the ITEM in the API will also be removed, it may
not be from Mellanox PMD team but from anyone proposing the drop.  The
chances he breaks anything if the code is shared among several items is
high.  It is better to have a single function per item/action according
to the API directly.

Thanks,

[1] https://datatracker.ietf.org/doc/draft-quinn-vxlan-gpe/
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index daf5b9b5a..47c55b426 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -53,6 +53,7 @@  extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 
 /* Pattern tunnel Layer bits. */
 #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
+#define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
 
 /* Outer Masks. */
 #define MLX5_FLOW_LAYER_OUTER_L3 \
@@ -64,7 +65,8 @@  extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 	 MLX5_FLOW_LAYER_OUTER_L4)
 
 /* Tunnel masks. */
-#define MLX5_FLOW_LAYER_TUNNEL MLX5_FLOW_LAYER_VXLAN
+#define MLX5_FLOW_LAYER_TUNNEL \
+	(MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE)
 
 /* Inner Masks. */
 #define MLX5_FLOW_LAYER_INNER_L3 \
@@ -1302,9 +1304,118 @@  mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
 	return size;
 }
 
+/**
+ * Validate VXLAN-GPE layer and possibly create the Verbs specification.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param item[in]
+ *   Item specification.
+ * @param flow[in, out]
+ *   Pointer to flow structure.
+ * @param flow_size[in]
+ *   Size in bytes of the available space for to store the flow information.
+ * @param error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   size in bytes necessary for the conversion, a negative errno value
+ *   otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
+			 const struct rte_flow_item *item,
+			 struct rte_flow *flow, const size_t flow_size,
+			 struct rte_flow_error *error)
+{
+	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
+	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
+	const uint32_t layers = mlx5_flow_layers(flow);
+	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+	struct ibv_flow_spec_tunnel vxlan_gpe = {
+		.type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.size = size,
+	};
+	int ret;
+	union vni {
+		uint32_t vlan_id;
+		uint8_t vni[4];
+	} id = { .vlan_id = 0, };
+
+	if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM,
+					  item,
+					  "L3 VXLAN is not enabled by device"
+					  " parameter and/or not configured in"
+					  " firmware");
+	if (layers & MLX5_FLOW_LAYER_TUNNEL)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM,
+					  item,
+					  "a tunnel is already present");
+	/*
+	 * Verify only UDPv4 is present as defined in
+	 * https://tools.ietf.org/html/rfc7348
+	 */
+	if (!(layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM,
+					  item,
+					  "no outer UDP layer found");
+	if (!mask)
+		mask = &rte_flow_item_vxlan_gpe_mask;
+	ret = mlx5_flow_item_validate
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
+		 sizeof(struct rte_flow_item_vxlan_gpe), error);
+	if (ret < 0)
+		return ret;
+	if (spec) {
+		memcpy(&id.vni[1], spec->vni, 3);
+		vxlan_gpe.val.tunnel_id = id.vlan_id;
+		memcpy(&id.vni[1], mask->vni, 3);
+		vxlan_gpe.mask.tunnel_id = id.vlan_id;
+		if (spec->protocol)
+			return rte_flow_error_set
+				(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM,
+				 item,
+				 "VxLAN-GPE protocol not supported");
+		/* Remove unwanted bits from values. */
+		vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
+	}
+	/*
+	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
+	 * layer is defined in the Verbs specification it is interpreted as
+	 * wildcard and all packets will match this rule, if it follows a full
+	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
+	 * before will also match this rule.  To avoid such situation, VNI 0
+	 * is currently refused.
+	 */
+	if (!vxlan_gpe.val.tunnel_id)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM,
+					  item,
+					  "VXLAN-GPE vni cannot be 0");
+	if (!(layers & MLX5_FLOW_LAYER_OUTER))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM,
+					  item,
+					  "VXLAN-GPE tunnel must be fully"
+					  " defined");
+	if (size <= flow_size)
+		mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
+	mlx5_flow_layers_update(flow, MLX5_FLOW_LAYER_VXLAN_GPE);
+	flow->ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP;
+	return size;
+}
+
 /**
  * Validate items provided by the user.
  *
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param items
  *   Pointer to flow items array.
  * @param flow
@@ -1319,7 +1430,8 @@  mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
  *   otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_items(const struct rte_flow_item items[],
+mlx5_flow_items(struct rte_eth_dev *dev,
+		const struct rte_flow_item items[],
 		struct rte_flow *flow, const size_t flow_size,
 		struct rte_flow_error *error)
 {
@@ -1353,6 +1465,10 @@  mlx5_flow_items(const struct rte_flow_item items[],
 		case RTE_FLOW_ITEM_TYPE_VXLAN:
 			ret = mlx5_flow_item_vxlan(items, flow, remain, error);
 			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			ret = mlx5_flow_item_vxlan_gpe(dev, items, flow,
+						       remain, error);
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ITEM,
@@ -1962,7 +2078,8 @@  mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
 				(void *)(flow->cur_verbs->attr + 1);
 		}
 		ret = mlx5_flow_items
-			((const struct rte_flow_item *)buf->patterns[i],
+			(dev,
+			 (const struct rte_flow_item *)buf->patterns[i],
 			 flow, remain, error);
 		if (ret < 0)
 			goto error;