[v2,2/7] net/mlx5: e-switch VXLAN flow validation routine

Message ID 1539612815-47199-3-git-send-email-viacheslavo@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series net/mlx5: e-switch VXLAN encap/decap hardware offload |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Slava Ovsiienko Oct. 15, 2018, 2:13 p.m. UTC
  This part of patchset adds support for flow item/action lists
validation. The following entities are now supported:

- RTE_FLOW_ITEM_TYPE_VXLAN, contains the tunnel VNI

- RTE_FLOW_ACTION_TYPE_VXLAN_DECAP, if this action is specified
  the items in the flow items list treated as outer network
  parameters for tunnel outer header match. The ethernet layer
  addresses always are treated as inner ones.

- RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, contains the item list to
  build the encapsulation header. In current implementation the
  values is the subject for some constraints:
    - outer source MAC address will be always unconditionally
      set to the one of MAC addresses of outer egress interface
    - no way to specify source UDP port
    - all abovementioned parameters are ignored if specified
      in the rule, warning messages are sent to the log

Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow_tcf.c | 711 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 705 insertions(+), 6 deletions(-)
  

Comments

Yongseok Koh Oct. 23, 2018, 10:04 a.m. UTC | #1
On Mon, Oct 15, 2018 at 02:13:30PM +0000, Viacheslav Ovsiienko wrote:
> This part of patchset adds support for flow item/action lists
> validation. The following entities are now supported:
> 
> - RTE_FLOW_ITEM_TYPE_VXLAN, contains the tunnel VNI
> 
> - RTE_FLOW_ACTION_TYPE_VXLAN_DECAP, if this action is specified
>   the items in the flow items list treated as outer network
>   parameters for tunnel outer header match. The ethernet layer
>   addresses always are treated as inner ones.
> 
> - RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, contains the item list to
>   build the encapsulation header. In current implementation the
>   values is the subject for some constraints:
>     - outer source MAC address will be always unconditionally
>       set to the one of MAC addresses of outer egress interface
>     - no way to specify source UDP port
>     - all abovementioned parameters are ignored if specified
>       in the rule, warning messages are sent to the log
> 
> Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_flow_tcf.c | 711 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 705 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> index 8f9c78a..0055417 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -430,6 +430,7 @@ struct mlx5_flow_tcf_context {
>  	struct rte_flow_item_ipv6 ipv6;
>  	struct rte_flow_item_tcp tcp;
>  	struct rte_flow_item_udp udp;
> +	struct rte_flow_item_vxlan vxlan;
>  } flow_tcf_mask_empty;
>  
>  /** Supported masks for known item types. */
> @@ -441,6 +442,7 @@ struct mlx5_flow_tcf_context {
>  	struct rte_flow_item_ipv6 ipv6;
>  	struct rte_flow_item_tcp tcp;
>  	struct rte_flow_item_udp udp;
> +	struct rte_flow_item_vxlan vxlan;
>  } flow_tcf_mask_supported = {
>  	.port_id = {
>  		.id = 0xffffffff,
> @@ -478,6 +480,9 @@ struct mlx5_flow_tcf_context {
>  		.src_port = RTE_BE16(0xffff),
>  		.dst_port = RTE_BE16(0xffff),
>  	},
> +	.vxlan = {
> +	       .vni = "\xff\xff\xff",
> +	},
>  };
>  
>  #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
> @@ -943,6 +948,615 @@ struct pedit_parser {
>  }
>  
>  /**
> + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch.

How about mentioning it is to validate items of the "encap header"? Same for the
rest.

> + *
> + * @param[in] item
> + *   Pointer to the itemn structure.

Typo. Same for the rest.

> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + **/
> +static int
> +flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item,
> +				  struct rte_flow_error *error)
> +{
> +	const struct rte_flow_item_eth *spec = item->spec;
> +	const struct rte_flow_item_eth *mask = item->mask;
> +
> +	if (!spec)
> +		/*
> +		 * Specification for L2 addresses can be empty
> +		 * because these ones are optional and not
> +		 * required directly by tc rule.
> +		 */
> +		return 0;
> +	if (!mask)
> +		/* If mask is not specified use the default one. */
> +		mask = &rte_flow_item_eth_mask;
> +	if (memcmp(&mask->dst,
> +		   &flow_tcf_mask_empty.eth.dst,
> +		   sizeof(flow_tcf_mask_empty.eth.dst))) {
> +		if (memcmp(&mask->dst,
> +			   &rte_flow_item_eth_mask.dst,
> +			   sizeof(rte_flow_item_eth_mask.dst)))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"eth.dst\" field");
> +	}
> +	if (memcmp(&mask->src,
> +		   &flow_tcf_mask_empty.eth.src,
> +		   sizeof(flow_tcf_mask_empty.eth.src))) {
> +		if (memcmp(&mask->src,
> +			   &rte_flow_item_eth_mask.src,
> +			   sizeof(rte_flow_item_eth_mask.src)))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"eth.src\" field");
> +	}
> +	if (mask->type != RTE_BE16(0x0000)) {
> +		if (mask->type != RTE_BE16(0xffff))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"eth.type\" field");
> +		DRV_LOG(WARNING,
> +			"outer ethernet type field "
> +			"cannot be forced for VXLAN "
> +			"encapsulation, parameter ignored");
> +	}
> +	return 0;
> +}
> +
> +/**
> + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch.
> + *
> + * @param[in] item
> + *   Pointer to the itemn structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + **/
> +static int
> +flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
> +				   struct rte_flow_error *error)
> +{
> +	const struct rte_flow_item_ipv4 *spec = item->spec;
> +	const struct rte_flow_item_ipv4 *mask = item->mask;
> +
> +	if (!spec)
> +		/*
> +		 * Specification for L3 addresses cannot be empty
> +		 * because it is required by tunnel_key parameter.
> +		 */
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "NULL outer L3 address specification "
> +				 " for VXLAN encapsulation");
> +	if (!mask)
> +		mask = &rte_flow_item_ipv4_mask;
> +	if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
> +		if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"ipv4.hdr.dst_addr\" field");
> +		/* More L3 address validations can be put here. */
> +	} else {
> +		/*
> +		 * Kernel uses the destination L3 address to determine
> +		 * the routing path and obtain the L2 destination
> +		 * address, so L3 destination address must be
> +		 * specified in the tc rule.
> +		 */
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "outer L3 destination address must be "
> +				 "specified for VXLAN encapsulation");
> +	}
> +	if (mask->hdr.src_addr != RTE_BE32(0x00000000)) {
> +		if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"ipv4.hdr.src_addr\" field");
> +		/* More L3 address validations can be put here. */
> +	} else {
> +		/*
> +		 * Kernel uses the source L3 address to select the
> +		 * interface for egress encapsulated traffic, so
> +		 * it must be specified in the tc rule.
> +		 */
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "outer L3 source address must be "
> +				 "specified for VXLAN encapsulation");
> +	}
> +	return 0;
> +}
> +
> +/**
> + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch.
> + *
> + * @param[in] item
> + *   Pointer to the itemn structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> + **/
> +static int
> +flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
> +				   struct rte_flow_error *error)
> +{
> +	const struct rte_flow_item_ipv6 *spec = item->spec;
> +	const struct rte_flow_item_ipv6 *mask = item->mask;
> +
> +	if (!spec)
> +		/*
> +		 * Specification for L3 addresses cannot be empty
> +		 * because it is required by tunnel_key parameter.
> +		 */
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "NULL outer L3 address specification "
> +				 " for VXLAN encapsulation");
> +	if (!mask)
> +		mask = &rte_flow_item_ipv6_mask;
> +	if (memcmp(&mask->hdr.dst_addr,
> +		   &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
> +		   sizeof(flow_tcf_mask_empty.ipv6.hdr.dst_addr))) {
> +		if (memcmp(&mask->hdr.dst_addr,
> +		   &rte_flow_item_ipv6_mask.hdr.dst_addr,
> +		   sizeof(rte_flow_item_ipv6_mask.hdr.dst_addr)))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"ipv6.hdr.dst_addr\" field");
> +		/* More L3 address validations can be put here. */
> +	} else {
> +		/*
> +		 * Kernel uses the destination L3 address to determine
> +		 * the routing path and obtain the L2 destination
> +		 * address (heigh or gate), so L3 destination address
> +		 * must be specified within the tc rule.
> +		 */
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "outer L3 destination address must be "
> +				 "specified for VXLAN encapsulation");
> +	}
> +	if (memcmp(&mask->hdr.src_addr,
> +		   &flow_tcf_mask_empty.ipv6.hdr.src_addr,
> +		   sizeof(flow_tcf_mask_empty.ipv6.hdr.src_addr))) {
> +		if (memcmp(&mask->hdr.src_addr,
> +		   &rte_flow_item_ipv6_mask.hdr.src_addr,
> +		   sizeof(rte_flow_item_ipv6_mask.hdr.src_addr)))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"ipv6.hdr.src_addr\" field");
> +		/* More L3 address validation can be put here. */
> +	} else {
> +		/*
> +		 * Kernel uses the source L3 address to select the
> +		 * interface for egress encapsulated traffic, so
> +		 * it must be specified in the tc rule.
> +		 */
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "outer L3 source address must be "
> +				 "specified for VXLAN encapsulation");
> +	}
> +	return 0;
> +}
> +
> +/**
> + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch.
> + *
> + * @param[in] item
> + *   Pointer to the itemn structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> + **/
> +static int
> +flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
> +				  struct rte_flow_error *error)
> +{
> +	const struct rte_flow_item_udp *spec = item->spec;
> +	const struct rte_flow_item_udp *mask = item->mask;
> +
> +	if (!spec)
> +		/*
> +		 * Specification for UDP ports cannot be empty
> +		 * because it is required by tunnel_key parameter.
> +		 */
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "NULL UDP port specification "
> +				 " for VXLAN encapsulation");
> +	if (!mask)
> +		mask = &rte_flow_item_udp_mask;
> +	if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
> +		if (mask->hdr.dst_port != RTE_BE16(0xffff))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"udp.hdr.dst_port\" field");
> +		if (!spec->hdr.dst_port)
> +			return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "zero encap remote UDP port");
> +	} else {
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "outer UDP remote port must be "
> +				 "specified for VXLAN encapsulation");
> +	}
> +	if (mask->hdr.src_port != RTE_BE16(0x0000)) {
> +		if (mask->hdr.src_port != RTE_BE16(0xffff))
> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"udp.hdr.src_port\" field");
> +		DRV_LOG(WARNING,
> +			"outer UDP source port cannot be "
> +			"forced for VXLAN encapsulation, "
> +			"parameter ignored");
> +	}
> +	return 0;
> +}
> +
> +/**
> + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch.
> + *
> + * @param[in] item
> + *   Pointer to the itemn structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> + **/
> +static int
> +flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
> +				  struct rte_flow_error *error)
> +{
> +	const struct rte_flow_item_vxlan *spec = item->spec;
> +	const struct rte_flow_item_vxlan *mask = item->mask;
> +
> +	if (!spec)
> +		/* Outer VNI is required by tunnel_key parameter. */
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> +				 "NULL VNI specification "
> +				 " for VXLAN encapsulation");
> +	if (!mask)
> +		mask = &rte_flow_item_vxlan_mask;
> +	if (mask->vni[0] != 0 ||
> +	    mask->vni[1] != 0 ||
> +	    mask->vni[2] != 0) {

can be one line.

> +		if (mask->vni[0] != 0xff ||
> +		    mask->vni[1] != 0xff ||
> +		    mask->vni[2] != 0xff)

same here.

> +			return rte_flow_error_set(error, ENOTSUP,
> +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				 "no support for partial mask on"
> +				 " \"vxlan.vni\" field");
> +		if (spec->vni[0] == 0 &&
> +		    spec->vni[1] == 0 &&
> +		    spec->vni[2] == 0)
> +			return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ITEM, item,
> +					  "VXLAN vni cannot be 0");

It is already checked by mlx5_flow_validate_item_vxlan().

> +	} else {
> +		return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM,
> +				 item,
> +				 "outer VNI must be specified "
> +				 "for VXLAN encapsulation");
> +	}

Already checked in mlx5_flow_validate_item_vxlan().

> +	return 0;
> +}
> +
> +/**
> + * Validate VXLAN_ENCAP action item list for E-Switch.
> + *
> + * @param[in] action
> + *   Pointer to the VXLAN_ENCAP action structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> + **/
> +static int
> +flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
> +			      struct rte_flow_error *error)
> +{
> +	const struct rte_flow_item *items;
> +	int ret;
> +	uint32_t item_flags = 0;
> +
> +	assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
> +	if (!action->conf)
> +		return rte_flow_error_set
> +			(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
> +			 action, "Missing VXLAN tunnel "
> +				 "action configuration");
> +	items = ((const struct rte_flow_action_vxlan_encap *)
> +					action->conf)->definition;
> +	if (!items)
> +		return rte_flow_error_set
> +			(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
> +			 action, "Missing VXLAN tunnel "
> +				 "encapsulation parameters");
> +	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
> +		switch (items->type) {
> +		case RTE_FLOW_ITEM_TYPE_VOID:
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_ETH:
> +			ret = mlx5_flow_validate_item_eth(items, item_flags,
> +							  error);
> +			if (ret < 0)
> +				return ret;
> +			ret = flow_tcf_validate_vxlan_encap_eth(items, error);
> +			if (ret < 0)
> +				return ret;
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
> +			break;
> +		break;
> +		case RTE_FLOW_ITEM_TYPE_IPV4:
> +			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
> +							   error);
> +			if (ret < 0)
> +				return ret;
> +			ret = flow_tcf_validate_vxlan_encap_ipv4(items, error);
> +			if (ret < 0)
> +				return ret;
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_IPV6:
> +			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
> +							   error);
> +			if (ret < 0)
> +				return ret;
> +			ret = flow_tcf_validate_vxlan_encap_ipv6(items, error);
> +			if (ret < 0)
> +				return ret;
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_UDP:
> +			ret = mlx5_flow_validate_item_udp(items, item_flags,
> +							   0xFF, error);
> +			if (ret < 0)
> +				return ret;
> +			ret = flow_tcf_validate_vxlan_encap_udp(items, error);
> +			if (ret < 0)
> +				return ret;
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VXLAN:
> +			ret = mlx5_flow_validate_item_vxlan(items,
> +							    item_flags, error);
> +			if (ret < 0)
> +				return ret;
> +			ret = flow_tcf_validate_vxlan_encap_vni(items, error);
> +			if (ret < 0)
> +				return ret;
> +			item_flags |= MLX5_FLOW_LAYER_VXLAN;
> +			break;
> +		default:
> +			return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ITEM, items,
> +					  "VXLAN encap item not supported");
> +		}
> +	}
> +	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION, action,
> +					  "no outer L3 layer found"
> +					  " for VXLAN encapsulation");
> +	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION, action,
> +					  "no outer L4 layer found"

L4 -> UDP?

> +					  " for VXLAN encapsulation");
> +	if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION, action,
> +					  "no VXLAN VNI found"
> +					  " for VXLAN encapsulation");
> +	return 0;
> +}
> +
> +/**
> + * Validate VXLAN_DECAP action outer tunnel items for E-Switch.
> + *
> + * @param[in] item_flags
> + *   Mask of provided outer tunnel parameters
> + * @param[in] ipv4
> + *   Outer IPv4 address item (if any, NULL otherwise).
> + * @param[in] ipv6
> + *   Outer IPv6 address item (if any, NULL otherwise).
> + * @param[in] udp
> + *   Outer UDP layer item (if any, NULL otherwise).
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> + **/
> +static int
> +flow_tcf_validate_vxlan_decap(uint32_t item_flags,
> +			      const struct rte_flow_action *action,
> +			      const struct rte_flow_item *ipv4,
> +			      const struct rte_flow_item *ipv6,
> +			      const struct rte_flow_item *udp,
> +			      struct rte_flow_error *error)
> +{
> +	if (!ipv4 && !ipv6)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION, action,
> +					  "no outer L3 layer found"
> +					  " for VXLAN decapsulation");
> +	if (ipv4) {
> +		const struct rte_flow_item_ipv4 *spec = ipv4->spec;
> +		const struct rte_flow_item_ipv4 *mask = ipv4->mask;
> +
> +		if (!spec)
> +			/*
> +			 * Specification for L3 addresses cannot be empty
> +			 * because it is required as decap parameter.
> +			 */
> +			return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
> +				 "NULL outer L3 address specification "
> +				 " for VXLAN decapsulation");
> +		if (!mask)
> +			mask = &rte_flow_item_ipv4_mask;
> +		if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
> +			if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
> +				return rte_flow_error_set(error, ENOTSUP,
> +					 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +					 "no support for partial mask on"
> +					 " \"ipv4.hdr.dst_addr\" field");
> +			/* More L3 address validations can be put here. */
> +		} else {
> +			/*
> +			 * Kernel uses the destination L3 address
> +			 * to determine the ingress network interface
> +			 * for traffic being decapculated.
> +			 */
> +			return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
> +				 "outer L3 destination address must be "
> +				 "specified for VXLAN decapsulation");
> +		}
> +		/* Source L3 address is optional for decap. */
> +		if (mask->hdr.src_addr != RTE_BE32(0x00000000))
> +			if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
> +				return rte_flow_error_set(error, ENOTSUP,
> +					 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +					 "no support for partial mask on"
> +					 " \"ipv4.hdr.src_addr\" field");
> +	} else {
> +		const struct rte_flow_item_ipv6 *spec = ipv6->spec;
> +		const struct rte_flow_item_ipv6 *mask = ipv6->mask;
> +
> +		if (!spec)
> +			/*
> +			 * Specification for L3 addresses cannot be empty
> +			 * because it is required as decap parameter.
> +			 */
> +			return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
> +				 "NULL outer L3 address specification "
> +				 " for VXLAN decapsulation");
> +		if (!mask)
> +			mask = &rte_flow_item_ipv6_mask;
> +		if (memcmp(&mask->hdr.dst_addr,
> +			   &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
> +			   sizeof(flow_tcf_mask_empty.ipv6.hdr.dst_addr))) {
> +			if (memcmp(&mask->hdr.dst_addr,
> +				&rte_flow_item_ipv6_mask.hdr.dst_addr,
> +				sizeof(rte_flow_item_ipv6_mask.hdr.dst_addr)))
> +				return rte_flow_error_set(error, ENOTSUP,
> +				       RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +				       "no support for partial mask on"
> +				       " \"ipv6.hdr.dst_addr\" field");
> +		/* More L3 address validations can be put here. */
> +		} else {
> +			/*
> +			 * Kernel uses the destination L3 address
> +			 * to determine the ingress network interface
> +			 * for traffic being decapculated.
> +			 */
> +			return rte_flow_error_set(error, EINVAL,
> +				 RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
> +				 "outer L3 destination address must be "
> +				 "specified for VXLAN decapsulation");
> +		}
> +		/* Source L3 address is optional for decap. */
> +		if (memcmp(&mask->hdr.src_addr,
> +			   &flow_tcf_mask_empty.ipv6.hdr.src_addr,
> +			   sizeof(flow_tcf_mask_empty.ipv6.hdr.src_addr))) {
> +			if (memcmp(&mask->hdr.src_addr,
> +				   &rte_flow_item_ipv6_mask.hdr.src_addr,
> +				   sizeof(mask->hdr.src_addr)))
> +				return rte_flow_error_set(error, ENOTSUP,
> +					RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +					"no support for partial mask on"
> +					" \"ipv6.hdr.src_addr\" field");
> +		}
> +	}
> +	if (!udp) {
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION, action,
> +					  "no outer L4 layer found"
> +					  " for VXLAN decapsulation");
> +	} else {
> +		const struct rte_flow_item_udp *spec = udp->spec;
> +		const struct rte_flow_item_udp *mask = udp->mask;
> +
> +		if (!spec)
> +			/*
> +			 * Specification for UDP ports cannot be empty
> +			 * because it is required as decap parameter.
> +			 */
> +			return rte_flow_error_set(error, EINVAL,
> +					 RTE_FLOW_ERROR_TYPE_ITEM, udp,
> +					 "NULL UDP port specification "
> +					 " for VXLAN decapsulation");
> +		if (!mask)
> +			mask = &rte_flow_item_udp_mask;
> +		if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
> +			if (mask->hdr.dst_port != RTE_BE16(0xffff))
> +				return rte_flow_error_set(error, ENOTSUP,
> +					 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +					 "no support for partial mask on"
> +					 " \"udp.hdr.dst_port\" field");
> +			if (!spec->hdr.dst_port)
> +				return rte_flow_error_set(error, EINVAL,
> +					 RTE_FLOW_ERROR_TYPE_ITEM, udp,
> +					 "zero decap local UDP port");
> +		} else {
> +			return rte_flow_error_set(error, EINVAL,
> +					 RTE_FLOW_ERROR_TYPE_ITEM, udp,
> +					 "outer UDP destination port must be "
> +					 "specified for VXLAN decapsulation");
> +		}
> +		if (mask->hdr.src_port != RTE_BE16(0x0000)) {
> +			if (mask->hdr.src_port != RTE_BE16(0xffff))
> +				return rte_flow_error_set(error, ENOTSUP,
> +					 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> +					 "no support for partial mask on"
> +					 " \"udp.hdr.src_port\" field");
> +			DRV_LOG(WARNING,
> +			"outer UDP local port cannot be "
> +			"forced for VXLAN encapsulation, "
> +			"parameter ignored");
> +		}
> +	}
> +	if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION, action,
> +					  "no VXLAN VNI found"
> +					  " for VXLAN decapsulation");
> +	/* VNI is already validated, extra check can be put here. */
> +	return 0;
> +}
> +
> +/**
>   * Validate flow for E-Switch.
>   *
>   * @param[in] priv
> @@ -974,7 +1588,8 @@ struct pedit_parser {
>  		const struct rte_flow_item_ipv6 *ipv6;
>  		const struct rte_flow_item_tcp *tcp;
>  		const struct rte_flow_item_udp *udp;
> -	} spec, mask;
> +		const struct rte_flow_item_vxlan *vxlan;
> +	 } spec, mask;
>  	union {
>  		const struct rte_flow_action_port_id *port_id;
>  		const struct rte_flow_action_jump *jump;
> @@ -983,9 +1598,13 @@ struct pedit_parser {
>  			of_set_vlan_vid;
>  		const struct rte_flow_action_of_set_vlan_pcp *
>  			of_set_vlan_pcp;
> +		const struct rte_flow_action_vxlan_encap *vxlan_encap;
>  		const struct rte_flow_action_set_ipv4 *set_ipv4;
>  		const struct rte_flow_action_set_ipv6 *set_ipv6;
>  	} conf;
> +	const struct rte_flow_item *ipv4 = NULL; /* storage to check */
> +	const struct rte_flow_item *ipv6 = NULL; /* outer tunnel. */
> +	const struct rte_flow_item *udp = NULL;  /* parameters. */
>  	uint32_t item_flags = 0;
>  	uint32_t action_flags = 0;
>  	uint8_t next_protocol = -1;
> @@ -1114,7 +1733,6 @@ struct pedit_parser {
>  							   error);
>  			if (ret < 0)
>  				return ret;
> -			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
>  			mask.ipv4 = flow_tcf_item_mask
>  				(items, &rte_flow_item_ipv4_mask,
>  				 &flow_tcf_mask_supported.ipv4,
> @@ -1135,13 +1753,22 @@ struct pedit_parser {
>  				next_protocol =
>  					((const struct rte_flow_item_ipv4 *)
>  					 (items->spec))->hdr.next_proto_id;
> +			if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
> +				/*
> +				 * Multiple outer items are not allowed as
> +				 * tunnel parameters, will raise an error later.
> +				 */
> +				ipv4 = NULL;

Can't it be inner then?

  flow create 1 ingress transfer
    pattern eth src is 66:77:88:99:aa:bb
      dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 /
      udp src is 4789 dst is 4242 / vxlan vni is 0x112233 /
      eth / ipv6 / tcp dst is 42 / end
    actions vxlan_decap / port_id id 2 / end

Is this flow supported by linux tcf? I took this example from Adrien's patch -
"[8/8] net/mlx5: add VXLAN decap support to switch flow rules". If so, isn't it
possible to have inner L3 layer (MLX5_FLOW_LAYER_INNER_*)? If not, you should
return error in this case. I don't see any code to check redundant outer items.
Did I miss something?

BTW, for the tunneled items, why don't you follow the code of
Verbs(mlx5_flow_verbs.c) and DV(mlx5_flow_dv.c)? For tcf, it is the first time
to add tunneled item, but Verbs/DV already have validation code for tunnel, so
you can reuse the existing code. In flow_tcf_validate_vxlan_decap(), not every
validation is VXLAN-specific but some of them can be common code.

And if you need to know whether there's the VXLAN decap action prior to outer
header item validation, you can relocate the code - action validation first and
item validation next, as there's no dependency yet in the current code. Defining
ipv4, ipv6, udp seems to make the code path more complex.

For example, you just can call vxlan decap item validation (by splitting
flow_tcf_validate_vxlan_decap()) at this point like:

			if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP)
				ret = flow_tcf_validate_vxlan_decap_ipv4(...);
			...

Same for other items.

> +			} else {
> +				ipv4 = items;
> +				item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> +			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_IPV6:
>  			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
>  							   error);
>  			if (ret < 0)
>  				return ret;
> -			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
>  			mask.ipv6 = flow_tcf_item_mask
>  				(items, &rte_flow_item_ipv6_mask,
>  				 &flow_tcf_mask_supported.ipv6,
> @@ -1162,13 +1789,22 @@ struct pedit_parser {
>  				next_protocol =
>  					((const struct rte_flow_item_ipv6 *)
>  					 (items->spec))->hdr.proto;
> +			if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
> +				/*
> +				 *Multiple outer items are not allowed as
> +				 * tunnel parameters
> +				 */
> +				ipv6 = NULL;
> +			} else {
> +				ipv6 = items;
> +				item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
> +			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_UDP:
>  			ret = mlx5_flow_validate_item_udp(items, item_flags,
>  							  next_protocol, error);
>  			if (ret < 0)
>  				return ret;
> -			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
>  			mask.udp = flow_tcf_item_mask
>  				(items, &rte_flow_item_udp_mask,
>  				 &flow_tcf_mask_supported.udp,
> @@ -1177,6 +1813,12 @@ struct pedit_parser {
>  				 error);
>  			if (!mask.udp)
>  				return -rte_errno;
> +			if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) {
> +				udp = NULL;
> +			} else {
> +				udp = items;
> +				item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> +			}
>  			break;
>  		case RTE_FLOW_ITEM_TYPE_TCP:
>  			ret = mlx5_flow_validate_item_tcp
> @@ -1186,7 +1828,6 @@ struct pedit_parser {
>  					      error);
>  			if (ret < 0)
>  				return ret;
> -			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
>  			mask.tcp = flow_tcf_item_mask
>  				(items, &rte_flow_item_tcp_mask,
>  				 &flow_tcf_mask_supported.tcp,
> @@ -1195,11 +1836,36 @@ struct pedit_parser {
>  				 error);
>  			if (!mask.tcp)
>  				return -rte_errno;
> +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VXLAN:
> +			ret = mlx5_flow_validate_item_vxlan(items,
> +							    item_flags, error);
> +			if (ret < 0)
> +				return ret;
> +			mask.vxlan = flow_tcf_item_mask
> +				(items, &rte_flow_item_vxlan_mask,
> +				 &flow_tcf_mask_supported.vxlan,
> +				 &flow_tcf_mask_empty.vxlan,
> +				 sizeof(flow_tcf_mask_supported.vxlan),
> +				 error);
> +			if (!mask.vxlan)
> +				return -rte_errno;
> +			if (mask.vxlan->vni[0] != 0xff ||
> +			    mask.vxlan->vni[1] != 0xff ||
> +			    mask.vxlan->vni[2] != 0xff)
> +				return rte_flow_error_set
> +					(error, ENOTSUP,
> +					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> +					 mask.vxlan,
> +					 "no support for partial or "
> +					 "empty mask on \"vxlan.vni\" field");
> +			item_flags |= MLX5_FLOW_LAYER_VXLAN;
>  			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
>  						  RTE_FLOW_ERROR_TYPE_ITEM,
> -						  NULL, "item not supported");
> +						  items, "item not supported");
>  		}
>  	}
>  	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
> @@ -1271,6 +1937,33 @@ struct pedit_parser {
>  					 " set action must follow push action");
>  			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> +			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
> +					   | MLX5_ACTION_VXLAN_DECAP))
> +				return rte_flow_error_set
> +					(error, ENOTSUP,
> +					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
> +					 "can't have multiple vxlan actions");
> +			ret = flow_tcf_validate_vxlan_encap(actions, error);
> +			if (ret < 0)
> +				return ret;
> +			action_flags |= MLX5_ACTION_VXLAN_ENCAP;

Recently, current_action_flag has been added for PEDIT actions. Please refer to
the code above and make it compliant.

> +			break;
> +		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> +			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
> +					   | MLX5_ACTION_VXLAN_DECAP))
> +				return rte_flow_error_set
> +					(error, ENOTSUP,
> +					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
> +					 "can't have multiple vxlan actions");
> +			ret = flow_tcf_validate_vxlan_decap(item_flags,
> +							    actions,
> +							    ipv4, ipv6, udp,
> +							    error);
> +			if (ret < 0)
> +				return ret;
> +			action_flags |= MLX5_ACTION_VXLAN_DECAP;
> +			break;
>  		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>  			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
>  			break;
> @@ -1391,6 +2084,12 @@ struct pedit_parser {
>  		return rte_flow_error_set(error, EINVAL,
>  					  RTE_FLOW_ERROR_TYPE_ACTION, actions,
>  					  "no fate action is found");
> +	if ((item_flags & MLX5_FLOW_LAYER_VXLAN) &&
> +	    !(action_flags & MLX5_ACTION_VXLAN_DECAP))
> +		return rte_flow_error_set(error, ENOTSUP,
> +					 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
> +					 "VNI pattern should be followed "
> +					 " by VXLAN_DECAP action");
>  	return 0;
>  }
>
  
Slava Ovsiienko Oct. 25, 2018, 1:53 p.m. UTC | #2
> -----Original Message-----
> From: Yongseok Koh
> Sent: Tuesday, October 23, 2018 13:05
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> routine
> 
> On Mon, Oct 15, 2018 at 02:13:30PM +0000, Viacheslav Ovsiienko wrote:
> > This part of patchset adds support for flow item/action lists
> > validation. The following entities are now supported:
> >
> > - RTE_FLOW_ITEM_TYPE_VXLAN, contains the tunnel VNI
> >
> > - RTE_FLOW_ACTION_TYPE_VXLAN_DECAP, if this action is specified
> >   the items in the flow items list treated as outer network
> >   parameters for tunnel outer header match. The ethernet layer
> >   addresses always are treated as inner ones.
> >
> > - RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, contains the item list to
> >   build the encapsulation header. In current implementation the
> >   values is the subject for some constraints:
> >     - outer source MAC address will be always unconditionally
> >       set to the one of MAC addresses of outer egress interface
> >     - no way to specify source UDP port
> >     - all abovementioned parameters are ignored if specified
> >       in the rule, warning messages are sent to the log
> >
> > Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > ---
> >  drivers/net/mlx5/mlx5_flow_tcf.c | 711
> > ++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 705 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c
> > b/drivers/net/mlx5/mlx5_flow_tcf.c
> > index 8f9c78a..0055417 100644
> > --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> > +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> > @@ -430,6 +430,7 @@ struct mlx5_flow_tcf_context {
> >  	struct rte_flow_item_ipv6 ipv6;
> >  	struct rte_flow_item_tcp tcp;
> >  	struct rte_flow_item_udp udp;
> > +	struct rte_flow_item_vxlan vxlan;
> >  } flow_tcf_mask_empty;
> >
> >  /** Supported masks for known item types. */ @@ -441,6 +442,7 @@
> > struct mlx5_flow_tcf_context {
> >  	struct rte_flow_item_ipv6 ipv6;
> >  	struct rte_flow_item_tcp tcp;
> >  	struct rte_flow_item_udp udp;
> > +	struct rte_flow_item_vxlan vxlan;
> >  } flow_tcf_mask_supported = {
> >  	.port_id = {
> >  		.id = 0xffffffff,
> > @@ -478,6 +480,9 @@ struct mlx5_flow_tcf_context {
> >  		.src_port = RTE_BE16(0xffff),
> >  		.dst_port = RTE_BE16(0xffff),
> >  	},
> > +	.vxlan = {
> > +	       .vni = "\xff\xff\xff",
> > +	},
> >  };
> >
> >  #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr)) @@ -943,6
> > +948,615 @@ struct pedit_parser {  }
> >
> >  /**
> > + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-
> Switch.
> 
> How about mentioning it is to validate items of the "encap header"? Same
> for the rest.


OK. Will clarify description(s).

> 
> > + *
> > + * @param[in] item
> > + *   Pointer to the itemn structure.
> 
> Typo. Same for the rest.

OK. Thanks, will correct.

> 
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_errno is set.
> > + **/
> > +static int
> > +flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item,
> > +				  struct rte_flow_error *error)
> > +{
> > +	const struct rte_flow_item_eth *spec = item->spec;
> > +	const struct rte_flow_item_eth *mask = item->mask;
> > +
> > +	if (!spec)
> > +		/*
> > +		 * Specification for L2 addresses can be empty
> > +		 * because these ones are optional and not
> > +		 * required directly by tc rule.
> > +		 */
> > +		return 0;
> > +	if (!mask)
> > +		/* If mask is not specified use the default one. */
> > +		mask = &rte_flow_item_eth_mask;
> > +	if (memcmp(&mask->dst,
> > +		   &flow_tcf_mask_empty.eth.dst,
> > +		   sizeof(flow_tcf_mask_empty.eth.dst))) {
> > +		if (memcmp(&mask->dst,
> > +			   &rte_flow_item_eth_mask.dst,
> > +			   sizeof(rte_flow_item_eth_mask.dst)))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"eth.dst\" field");
> > +	}
> > +	if (memcmp(&mask->src,
> > +		   &flow_tcf_mask_empty.eth.src,
> > +		   sizeof(flow_tcf_mask_empty.eth.src))) {
> > +		if (memcmp(&mask->src,
> > +			   &rte_flow_item_eth_mask.src,
> > +			   sizeof(rte_flow_item_eth_mask.src)))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"eth.src\" field");
> > +	}
> > +	if (mask->type != RTE_BE16(0x0000)) {
> > +		if (mask->type != RTE_BE16(0xffff))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"eth.type\" field");
> > +		DRV_LOG(WARNING,
> > +			"outer ethernet type field "
> > +			"cannot be forced for VXLAN "
> > +			"encapsulation, parameter ignored");
> > +	}
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-
> Switch.
> > + *
> > + * @param[in] item
> > + *   Pointer to the itemn structure.
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_errno is set.
> > + **/
> > +static int
> > +flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
> > +				   struct rte_flow_error *error)
> > +{
> > +	const struct rte_flow_item_ipv4 *spec = item->spec;
> > +	const struct rte_flow_item_ipv4 *mask = item->mask;
> > +
> > +	if (!spec)
> > +		/*
> > +		 * Specification for L3 addresses cannot be empty
> > +		 * because it is required by tunnel_key parameter.
> > +		 */
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "NULL outer L3 address specification "
> > +				 " for VXLAN encapsulation");
> > +	if (!mask)
> > +		mask = &rte_flow_item_ipv4_mask;
> > +	if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
> > +		if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"ipv4.hdr.dst_addr\" field");
> > +		/* More L3 address validations can be put here. */
> > +	} else {
> > +		/*
> > +		 * Kernel uses the destination L3 address to determine
> > +		 * the routing path and obtain the L2 destination
> > +		 * address, so L3 destination address must be
> > +		 * specified in the tc rule.
> > +		 */
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "outer L3 destination address must be "
> > +				 "specified for VXLAN encapsulation");
> > +	}
> > +	if (mask->hdr.src_addr != RTE_BE32(0x00000000)) {
> > +		if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"ipv4.hdr.src_addr\" field");
> > +		/* More L3 address validations can be put here. */
> > +	} else {
> > +		/*
> > +		 * Kernel uses the source L3 address to select the
> > +		 * interface for egress encapsulated traffic, so
> > +		 * it must be specified in the tc rule.
> > +		 */
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "outer L3 source address must be "
> > +				 "specified for VXLAN encapsulation");
> > +	}
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-
> Switch.
> > + *
> > + * @param[in] item
> > + *   Pointer to the itemn structure.
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> > + **/
> > +static int
> > +flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
> > +				   struct rte_flow_error *error)
> > +{
> > +	const struct rte_flow_item_ipv6 *spec = item->spec;
> > +	const struct rte_flow_item_ipv6 *mask = item->mask;
> > +
> > +	if (!spec)
> > +		/*
> > +		 * Specification for L3 addresses cannot be empty
> > +		 * because it is required by tunnel_key parameter.
> > +		 */
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "NULL outer L3 address specification "
> > +				 " for VXLAN encapsulation");
> > +	if (!mask)
> > +		mask = &rte_flow_item_ipv6_mask;
> > +	if (memcmp(&mask->hdr.dst_addr,
> > +		   &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
> > +		   sizeof(flow_tcf_mask_empty.ipv6.hdr.dst_addr))) {
> > +		if (memcmp(&mask->hdr.dst_addr,
> > +		   &rte_flow_item_ipv6_mask.hdr.dst_addr,
> > +		   sizeof(rte_flow_item_ipv6_mask.hdr.dst_addr)))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"ipv6.hdr.dst_addr\" field");
> > +		/* More L3 address validations can be put here. */
> > +	} else {
> > +		/*
> > +		 * Kernel uses the destination L3 address to determine
> > +		 * the routing path and obtain the L2 destination
> > +		 * address (heigh or gate), so L3 destination address
> > +		 * must be specified within the tc rule.
> > +		 */
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "outer L3 destination address must be "
> > +				 "specified for VXLAN encapsulation");
> > +	}
> > +	if (memcmp(&mask->hdr.src_addr,
> > +		   &flow_tcf_mask_empty.ipv6.hdr.src_addr,
> > +		   sizeof(flow_tcf_mask_empty.ipv6.hdr.src_addr))) {
> > +		if (memcmp(&mask->hdr.src_addr,
> > +		   &rte_flow_item_ipv6_mask.hdr.src_addr,
> > +		   sizeof(rte_flow_item_ipv6_mask.hdr.src_addr)))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"ipv6.hdr.src_addr\" field");
> > +		/* More L3 address validation can be put here. */
> > +	} else {
> > +		/*
> > +		 * Kernel uses the source L3 address to select the
> > +		 * interface for egress encapsulated traffic, so
> > +		 * it must be specified in the tc rule.
> > +		 */
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "outer L3 source address must be "
> > +				 "specified for VXLAN encapsulation");
> > +	}
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-
> Switch.
> > + *
> > + * @param[in] item
> > + *   Pointer to the itemn structure.
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> > + **/
> > +static int
> > +flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
> > +				  struct rte_flow_error *error)
> > +{
> > +	const struct rte_flow_item_udp *spec = item->spec;
> > +	const struct rte_flow_item_udp *mask = item->mask;
> > +
> > +	if (!spec)
> > +		/*
> > +		 * Specification for UDP ports cannot be empty
> > +		 * because it is required by tunnel_key parameter.
> > +		 */
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "NULL UDP port specification "
> > +				 " for VXLAN encapsulation");
> > +	if (!mask)
> > +		mask = &rte_flow_item_udp_mask;
> > +	if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
> > +		if (mask->hdr.dst_port != RTE_BE16(0xffff))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"udp.hdr.dst_port\" field");
> > +		if (!spec->hdr.dst_port)
> > +			return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "zero encap remote UDP port");
> > +	} else {
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "outer UDP remote port must be "
> > +				 "specified for VXLAN encapsulation");
> > +	}
> > +	if (mask->hdr.src_port != RTE_BE16(0x0000)) {
> > +		if (mask->hdr.src_port != RTE_BE16(0xffff))
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"udp.hdr.src_port\" field");
> > +		DRV_LOG(WARNING,
> > +			"outer UDP source port cannot be "
> > +			"forced for VXLAN encapsulation, "
> > +			"parameter ignored");
> > +	}
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for
> E-Switch.
> > + *
> > + * @param[in] item
> > + *   Pointer to the itemn structure.
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> > + **/
> > +static int
> > +flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
> > +				  struct rte_flow_error *error)
> > +{
> > +	const struct rte_flow_item_vxlan *spec = item->spec;
> > +	const struct rte_flow_item_vxlan *mask = item->mask;
> > +
> > +	if (!spec)
> > +		/* Outer VNI is required by tunnel_key parameter. */
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, item,
> > +				 "NULL VNI specification "
> > +				 " for VXLAN encapsulation");
> > +	if (!mask)
> > +		mask = &rte_flow_item_vxlan_mask;
> > +	if (mask->vni[0] != 0 ||
> > +	    mask->vni[1] != 0 ||
> > +	    mask->vni[2] != 0) {
> 
> can be one line.
> 

OK. Will be aligned.

> > +		if (mask->vni[0] != 0xff ||
> > +		    mask->vni[1] != 0xff ||
> > +		    mask->vni[2] != 0xff)
> 
> same here.
> 
OK. Ditto.

> > +			return rte_flow_error_set(error, ENOTSUP,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				 "no support for partial mask on"
> > +				 " \"vxlan.vni\" field");
> > +		if (spec->vni[0] == 0 &&
> > +		    spec->vni[1] == 0 &&
> > +		    spec->vni[2] == 0)
> > +			return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ITEM,
> item,
> > +					  "VXLAN vni cannot be 0");
> 
> It is already checked by mlx5_flow_validate_item_vxlan().
> 
> > +	} else {
> > +		return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM,
> > +				 item,
> > +				 "outer VNI must be specified "
> > +				 "for VXLAN encapsulation");
> > +	}
> 
> Already checked in mlx5_flow_validate_item_vxlan().

OK. Has to be removed. We trust the validated rules.

> 
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Validate VXLAN_ENCAP action item list for E-Switch.
> > + *
> > + * @param[in] action
> > + *   Pointer to the VXLAN_ENCAP action structure.
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> > + **/
> > +static int
> > +flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
> > +			      struct rte_flow_error *error) {
> > +	const struct rte_flow_item *items;
> > +	int ret;
> > +	uint32_t item_flags = 0;
> > +
> > +	assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
> > +	if (!action->conf)
> > +		return rte_flow_error_set
> > +			(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
> > +			 action, "Missing VXLAN tunnel "
> > +				 "action configuration");
> > +	items = ((const struct rte_flow_action_vxlan_encap *)
> > +					action->conf)->definition;
> > +	if (!items)
> > +		return rte_flow_error_set
> > +			(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
> > +			 action, "Missing VXLAN tunnel "
> > +				 "encapsulation parameters");
> > +	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
> > +		switch (items->type) {
> > +		case RTE_FLOW_ITEM_TYPE_VOID:
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_ETH:
> > +			ret = mlx5_flow_validate_item_eth(items,
> item_flags,
> > +							  error);
> > +			if (ret < 0)
> > +				return ret;
> > +			ret = flow_tcf_validate_vxlan_encap_eth(items,
> error);
> > +			if (ret < 0)
> > +				return ret;
> > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
> > +			break;
> > +		break;
> > +		case RTE_FLOW_ITEM_TYPE_IPV4:
> > +			ret = mlx5_flow_validate_item_ipv4(items,
> item_flags,
> > +							   error);
> > +			if (ret < 0)
> > +				return ret;
> > +			ret = flow_tcf_validate_vxlan_encap_ipv4(items,
> error);
> > +			if (ret < 0)
> > +				return ret;
> > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_IPV6:
> > +			ret = mlx5_flow_validate_item_ipv6(items,
> item_flags,
> > +							   error);
> > +			if (ret < 0)
> > +				return ret;
> > +			ret = flow_tcf_validate_vxlan_encap_ipv6(items,
> error);
> > +			if (ret < 0)
> > +				return ret;
> > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_UDP:
> > +			ret = mlx5_flow_validate_item_udp(items,
> item_flags,
> > +							   0xFF, error);
> > +			if (ret < 0)
> > +				return ret;
> > +			ret = flow_tcf_validate_vxlan_encap_udp(items,
> error);
> > +			if (ret < 0)
> > +				return ret;
> > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_VXLAN:
> > +			ret = mlx5_flow_validate_item_vxlan(items,
> > +							    item_flags, error);
> > +			if (ret < 0)
> > +				return ret;
> > +			ret = flow_tcf_validate_vxlan_encap_vni(items,
> error);
> > +			if (ret < 0)
> > +				return ret;
> > +			item_flags |= MLX5_FLOW_LAYER_VXLAN;
> > +			break;
> > +		default:
> > +			return rte_flow_error_set(error, ENOTSUP,
> > +					  RTE_FLOW_ERROR_TYPE_ITEM,
> items,
> > +					  "VXLAN encap item not supported");
> > +		}
> > +	}
> > +	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> action,
> > +					  "no outer L3 layer found"
> > +					  " for VXLAN encapsulation");
> > +	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> action,
> > +					  "no outer L4 layer found"
> 
> L4 -> UDP?

OK.  Good clarification.

> 
> > +					  " for VXLAN encapsulation");
> > +	if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> action,
> > +					  "no VXLAN VNI found"
> > +					  " for VXLAN encapsulation");
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Validate VXLAN_DECAP action outer tunnel items for E-Switch.
> > + *
> > + * @param[in] item_flags
> > + *   Mask of provided outer tunnel parameters
> > + * @param[in] ipv4
> > + *   Outer IPv4 address item (if any, NULL otherwise).
> > + * @param[in] ipv6
> > + *   Outer IPv6 address item (if any, NULL otherwise).
> > + * @param[in] udp
> > + *   Outer UDP layer item (if any, NULL otherwise).
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_ernno is set.
> > + **/
> > +static int
> > +flow_tcf_validate_vxlan_decap(uint32_t item_flags,
> > +			      const struct rte_flow_action *action,
> > +			      const struct rte_flow_item *ipv4,
> > +			      const struct rte_flow_item *ipv6,
> > +			      const struct rte_flow_item *udp,
> > +			      struct rte_flow_error *error) {
> > +	if (!ipv4 && !ipv6)
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> action,
> > +					  "no outer L3 layer found"
> > +					  " for VXLAN decapsulation");
> > +	if (ipv4) {
> > +		const struct rte_flow_item_ipv4 *spec = ipv4->spec;
> > +		const struct rte_flow_item_ipv4 *mask = ipv4->mask;
> > +
> > +		if (!spec)
> > +			/*
> > +			 * Specification for L3 addresses cannot be empty
> > +			 * because it is required as decap parameter.
> > +			 */
> > +			return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
> > +				 "NULL outer L3 address specification "
> > +				 " for VXLAN decapsulation");
> > +		if (!mask)
> > +			mask = &rte_flow_item_ipv4_mask;
> > +		if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
> > +			if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
> > +				return rte_flow_error_set(error, ENOTSUP,
> > +
> RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> > +					 "no support for partial mask on"
> > +					 " \"ipv4.hdr.dst_addr\" field");
> > +			/* More L3 address validations can be put here. */
> > +		} else {
> > +			/*
> > +			 * Kernel uses the destination L3 address
> > +			 * to determine the ingress network interface
> > +			 * for traffic being decapculated.
> > +			 */
> > +			return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
> > +				 "outer L3 destination address must be "
> > +				 "specified for VXLAN decapsulation");
> > +		}
> > +		/* Source L3 address is optional for decap. */
> > +		if (mask->hdr.src_addr != RTE_BE32(0x00000000))
> > +			if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
> > +				return rte_flow_error_set(error, ENOTSUP,
> > +
> RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> > +					 "no support for partial mask on"
> > +					 " \"ipv4.hdr.src_addr\" field");
> > +	} else {
> > +		const struct rte_flow_item_ipv6 *spec = ipv6->spec;
> > +		const struct rte_flow_item_ipv6 *mask = ipv6->mask;
> > +
> > +		if (!spec)
> > +			/*
> > +			 * Specification for L3 addresses cannot be empty
> > +			 * because it is required as decap parameter.
> > +			 */
> > +			return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
> > +				 "NULL outer L3 address specification "
> > +				 " for VXLAN decapsulation");
> > +		if (!mask)
> > +			mask = &rte_flow_item_ipv6_mask;
> > +		if (memcmp(&mask->hdr.dst_addr,
> > +			   &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
> > +			   sizeof(flow_tcf_mask_empty.ipv6.hdr.dst_addr))) {
> > +			if (memcmp(&mask->hdr.dst_addr,
> > +				&rte_flow_item_ipv6_mask.hdr.dst_addr,
> > +
> 	sizeof(rte_flow_item_ipv6_mask.hdr.dst_addr)))
> > +				return rte_flow_error_set(error, ENOTSUP,
> > +				       RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> mask,
> > +				       "no support for partial mask on"
> > +				       " \"ipv6.hdr.dst_addr\" field");
> > +		/* More L3 address validations can be put here. */
> > +		} else {
> > +			/*
> > +			 * Kernel uses the destination L3 address
> > +			 * to determine the ingress network interface
> > +			 * for traffic being decapculated.
> > +			 */
> > +			return rte_flow_error_set(error, EINVAL,
> > +				 RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
> > +				 "outer L3 destination address must be "
> > +				 "specified for VXLAN decapsulation");
> > +		}
> > +		/* Source L3 address is optional for decap. */
> > +		if (memcmp(&mask->hdr.src_addr,
> > +			   &flow_tcf_mask_empty.ipv6.hdr.src_addr,
> > +			   sizeof(flow_tcf_mask_empty.ipv6.hdr.src_addr))) {
> > +			if (memcmp(&mask->hdr.src_addr,
> > +				   &rte_flow_item_ipv6_mask.hdr.src_addr,
> > +				   sizeof(mask->hdr.src_addr)))
> > +				return rte_flow_error_set(error, ENOTSUP,
> > +
> 	RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> > +					"no support for partial mask on"
> > +					" \"ipv6.hdr.src_addr\" field");
> > +		}
> > +	}
> > +	if (!udp) {
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> action,
> > +					  "no outer L4 layer found"
> > +					  " for VXLAN decapsulation");
> > +	} else {
> > +		const struct rte_flow_item_udp *spec = udp->spec;
> > +		const struct rte_flow_item_udp *mask = udp->mask;
> > +
> > +		if (!spec)
> > +			/*
> > +			 * Specification for UDP ports cannot be empty
> > +			 * because it is required as decap parameter.
> > +			 */
> > +			return rte_flow_error_set(error, EINVAL,
> > +					 RTE_FLOW_ERROR_TYPE_ITEM,
> udp,
> > +					 "NULL UDP port specification "
> > +					 " for VXLAN decapsulation");
> > +		if (!mask)
> > +			mask = &rte_flow_item_udp_mask;
> > +		if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
> > +			if (mask->hdr.dst_port != RTE_BE16(0xffff))
> > +				return rte_flow_error_set(error, ENOTSUP,
> > +
> RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> > +					 "no support for partial mask on"
> > +					 " \"udp.hdr.dst_port\" field");
> > +			if (!spec->hdr.dst_port)
> > +				return rte_flow_error_set(error, EINVAL,
> > +					 RTE_FLOW_ERROR_TYPE_ITEM,
> udp,
> > +					 "zero decap local UDP port");
> > +		} else {
> > +			return rte_flow_error_set(error, EINVAL,
> > +					 RTE_FLOW_ERROR_TYPE_ITEM,
> udp,
> > +					 "outer UDP destination port must be
> "
> > +					 "specified for VXLAN
> decapsulation");
> > +		}
> > +		if (mask->hdr.src_port != RTE_BE16(0x0000)) {
> > +			if (mask->hdr.src_port != RTE_BE16(0xffff))
> > +				return rte_flow_error_set(error, ENOTSUP,
> > +
> RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
> > +					 "no support for partial mask on"
> > +					 " \"udp.hdr.src_port\" field");
> > +			DRV_LOG(WARNING,
> > +			"outer UDP local port cannot be "
> > +			"forced for VXLAN encapsulation, "
> > +			"parameter ignored");
> > +		}
> > +	}
> > +	if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> action,
> > +					  "no VXLAN VNI found"
> > +					  " for VXLAN decapsulation");
> > +	/* VNI is already validated, extra check can be put here. */
> > +	return 0;
> > +}
> > +
> > +/**
> >   * Validate flow for E-Switch.
> >   *
> >   * @param[in] priv
> > @@ -974,7 +1588,8 @@ struct pedit_parser {
> >  		const struct rte_flow_item_ipv6 *ipv6;
> >  		const struct rte_flow_item_tcp *tcp;
> >  		const struct rte_flow_item_udp *udp;
> > -	} spec, mask;
> > +		const struct rte_flow_item_vxlan *vxlan;
> > +	 } spec, mask;
> >  	union {
> >  		const struct rte_flow_action_port_id *port_id;
> >  		const struct rte_flow_action_jump *jump; @@ -983,9
> +1598,13 @@
> > struct pedit_parser {
> >  			of_set_vlan_vid;
> >  		const struct rte_flow_action_of_set_vlan_pcp *
> >  			of_set_vlan_pcp;
> > +		const struct rte_flow_action_vxlan_encap *vxlan_encap;
> >  		const struct rte_flow_action_set_ipv4 *set_ipv4;
> >  		const struct rte_flow_action_set_ipv6 *set_ipv6;
> >  	} conf;
> > +	const struct rte_flow_item *ipv4 = NULL; /* storage to check */
> > +	const struct rte_flow_item *ipv6 = NULL; /* outer tunnel. */
> > +	const struct rte_flow_item *udp = NULL;  /* parameters. */
> >  	uint32_t item_flags = 0;
> >  	uint32_t action_flags = 0;
> >  	uint8_t next_protocol = -1;
> > @@ -1114,7 +1733,6 @@ struct pedit_parser {
> >  							   error);
> >  			if (ret < 0)
> >  				return ret;
> > -			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> >  			mask.ipv4 = flow_tcf_item_mask
> >  				(items, &rte_flow_item_ipv4_mask,
> >  				 &flow_tcf_mask_supported.ipv4,
> > @@ -1135,13 +1753,22 @@ struct pedit_parser {
> >  				next_protocol =
> >  					((const struct rte_flow_item_ipv4 *)
> >  					 (items->spec))->hdr.next_proto_id;
> > +			if (item_flags &
> MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
> > +				/*
> > +				 * Multiple outer items are not allowed as
> > +				 * tunnel parameters, will raise an error later.
> > +				 */
> > +				ipv4 = NULL;
> 
> Can't it be inner then?
AFAIK,  no for tc rules, we can not specify multiple levels (inner + outer) for them.
There is just no TCA_FLOWER_KEY_xxx attributes  for specifying inner items 
to match by flower.

It is quite unclear comment, not the best one, sorry. I did not like it too, 
just forgot to rewrite.

ipv4, ipv6 , udp variables gather the matching items during the item list scanning,
later variables are used for VXLAN decap action validation only. So, the "outer"
means that ipv4 variable contains the VXLAN decap outer addresses, and
should be NULL-ed if multiple items are found in the items list. 

But we can generate an error here if we have valid action_flags
(gathered by prepare function) and VXLAN decap is set. Raising
an error looks more relevant and clear.

 
>   flow create 1 ingress transfer
>     pattern eth src is 66:77:88:99:aa:bb
>       dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 /
>       udp src is 4789 dst is 4242 / vxlan vni is 0x112233 /
>       eth / ipv6 / tcp dst is 42 / end
>     actions vxlan_decap / port_id id 2 / end
> 
> Is this flow supported by linux tcf? I took this example from Adrien's patch -
> "[8/8] net/mlx5: add VXLAN decap support to switch flow rules". If so, isn't it
> possible to have inner L3 layer (MLX5_FLOW_LAYER_INNER_*)? If not, you
> should return error in this case. I don't see any code to check redundant
> outer items.
> Did I miss something?

Interesting, besides rule has correct syntax, I'm not sure whether it can be applied w/o errors.
At least our current flow_tcf_translate() implementation does not support any INNERs.
But it seems the flow_tcf_validate() does, it's subject to recheck - we should not allow
unsupported items to pass the validation. I'll check and provide the separate bugfix patch
(if any).

> 
> BTW, for the tunneled items, why don't you follow the code of
> Verbs(mlx5_flow_verbs.c) and DV(mlx5_flow_dv.c)? For tcf, it is the first time
For VXLAN it has some specifics (warning about ignored params, etc.)
I've checked which of verbs/dv code could be reused and did not discovered
a lot. I'll recheck the latest code commits, possible it became more appropriate
for VXLAN. 

> to add tunneled item, but Verbs/DV already have validation code for tunnel,
> so you can reuse the existing code. In flow_tcf_validate_vxlan_decap(), not
> every validation is VXLAN-specific but some of them can be common code.
> 
> And if you need to know whether there's the VXLAN decap action prior to
> outer header item validation, you can relocate the code - action validation
> first and item validation next, as there's no dependency yet in the current

We can not validate action first - we need items to be preliminary gathered,
to check them in action's specific fashion and to check action itself. 
I mean, if we see VXLAN decap action, we should check the presence of
L2, L3, L4 and VNI items. I minimized the number of passes along the item
and action lists. BTW, Adrien's approach performed two passes, mine does only.

> code. Defining ipv4, ipv6, udp seems to make the code path more complex.
Yes, but it allows us to avoid the extra item list scanning and minimizes the changes
of existing code.
In your approach we should:
- scan actions, w/o full checking, just action_flags gathering and checking
- scan items, performing variating check (depending on gathered action flags)
- scan actions again, performing full check with params (at least for now 
check whether all params gathered)
> 
> For example, you just can call vxlan decap item validation (by splitting
> flow_tcf_validate_vxlan_decap()) at this point like:
> 
> 			if (action_flags &
> MLX5_FLOW_ACTION_VXLAN_DECAP)
> 				ret =
> flow_tcf_validate_vxlan_decap_ipv4(...);
> 			...
> 
> Same for other items.
> 
> > +			} else {
> > +				ipv4 = items;
> > +				item_flags |=
> MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> > +			}
> >  			break;
> >  		case RTE_FLOW_ITEM_TYPE_IPV6:
> >  			ret = mlx5_flow_validate_item_ipv6(items,
> item_flags,
> >  							   error);
> >  			if (ret < 0)
> >  				return ret;
> > -			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
> >  			mask.ipv6 = flow_tcf_item_mask
> >  				(items, &rte_flow_item_ipv6_mask,
> >  				 &flow_tcf_mask_supported.ipv6,
> > @@ -1162,13 +1789,22 @@ struct pedit_parser {
> >  				next_protocol =
> >  					((const struct rte_flow_item_ipv6 *)
> >  					 (items->spec))->hdr.proto;
> > +			if (item_flags &
> MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
> > +				/*
> > +				 *Multiple outer items are not allowed as
> > +				 * tunnel parameters
> > +				 */
> > +				ipv6 = NULL;
> > +			} else {
> > +				ipv6 = items;
> > +				item_flags |=
> MLX5_FLOW_LAYER_OUTER_L3_IPV6;
> > +			}
> >  			break;
> >  		case RTE_FLOW_ITEM_TYPE_UDP:
> >  			ret = mlx5_flow_validate_item_udp(items,
> item_flags,
> >  							  next_protocol,
> error);
> >  			if (ret < 0)
> >  				return ret;
> > -			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
> >  			mask.udp = flow_tcf_item_mask
> >  				(items, &rte_flow_item_udp_mask,
> >  				 &flow_tcf_mask_supported.udp,
> > @@ -1177,6 +1813,12 @@ struct pedit_parser {
> >  				 error);
> >  			if (!mask.udp)
> >  				return -rte_errno;
> > +			if (item_flags &
> MLX5_FLOW_LAYER_OUTER_L4_UDP) {
> > +				udp = NULL;
> > +			} else {
> > +				udp = items;
> > +				item_flags |=
> MLX5_FLOW_LAYER_OUTER_L4_UDP;
> > +			}
> >  			break;
> >  		case RTE_FLOW_ITEM_TYPE_TCP:
> >  			ret = mlx5_flow_validate_item_tcp
> > @@ -1186,7 +1828,6 @@ struct pedit_parser {
> >  					      error);
> >  			if (ret < 0)
> >  				return ret;
> > -			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> >  			mask.tcp = flow_tcf_item_mask
> >  				(items, &rte_flow_item_tcp_mask,
> >  				 &flow_tcf_mask_supported.tcp,
> > @@ -1195,11 +1836,36 @@ struct pedit_parser {
> >  				 error);
> >  			if (!mask.tcp)
> >  				return -rte_errno;
> > +			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_VXLAN:
> > +			ret = mlx5_flow_validate_item_vxlan(items,
> > +							    item_flags, error);
> > +			if (ret < 0)
> > +				return ret;
> > +			mask.vxlan = flow_tcf_item_mask
> > +				(items, &rte_flow_item_vxlan_mask,
> > +				 &flow_tcf_mask_supported.vxlan,
> > +				 &flow_tcf_mask_empty.vxlan,
> > +				 sizeof(flow_tcf_mask_supported.vxlan),
> > +				 error);
> > +			if (!mask.vxlan)
> > +				return -rte_errno;
> > +			if (mask.vxlan->vni[0] != 0xff ||
> > +			    mask.vxlan->vni[1] != 0xff ||
> > +			    mask.vxlan->vni[2] != 0xff)
> > +				return rte_flow_error_set
> > +					(error, ENOTSUP,
> > +
> RTE_FLOW_ERROR_TYPE_ITEM_MASK,
> > +					 mask.vxlan,
> > +					 "no support for partial or "
> > +					 "empty mask on \"vxlan.vni\" field");
> > +			item_flags |= MLX5_FLOW_LAYER_VXLAN;
> >  			break;
> >  		default:
> >  			return rte_flow_error_set(error, ENOTSUP,
> >
> RTE_FLOW_ERROR_TYPE_ITEM,
> > -						  NULL, "item not
> supported");
> > +						  items, "item not
> supported");
> >  		}
> >  	}
> >  	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
> @@
> > -1271,6 +1937,33 @@ struct pedit_parser {
> >  					 " set action must follow push
> action");
> >  			current_action_flag =
> MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
> >  			break;
> > +		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> > +			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
> > +					   | MLX5_ACTION_VXLAN_DECAP))
> > +				return rte_flow_error_set
> > +					(error, ENOTSUP,
> > +					 RTE_FLOW_ERROR_TYPE_ACTION,
> actions,
> > +					 "can't have multiple vxlan actions");
> > +			ret = flow_tcf_validate_vxlan_encap(actions, error);
> > +			if (ret < 0)
> > +				return ret;
> > +			action_flags |= MLX5_ACTION_VXLAN_ENCAP;
> 
> Recently, current_action_flag has been added for PEDIT actions. Please refer
> to the code above and make it compliant.

OK. I missed the point while rebasing, will make code compliant.

With best regards,
Slava
> 
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> > +			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
> > +					   | MLX5_ACTION_VXLAN_DECAP))
> > +				return rte_flow_error_set
> > +					(error, ENOTSUP,
> > +					 RTE_FLOW_ERROR_TYPE_ACTION,
> actions,
> > +					 "can't have multiple vxlan actions");
> > +			ret = flow_tcf_validate_vxlan_decap(item_flags,
> > +							    actions,
> > +							    ipv4, ipv6, udp,
> > +							    error);
> > +			if (ret < 0)
> > +				return ret;
> > +			action_flags |= MLX5_ACTION_VXLAN_DECAP;
> > +			break;
> >  		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
> >  			current_action_flag =
> MLX5_FLOW_ACTION_SET_IPV4_SRC;
> >  			break;
> > @@ -1391,6 +2084,12 @@ struct pedit_parser {
> >  		return rte_flow_error_set(error, EINVAL,
> >  					  RTE_FLOW_ERROR_TYPE_ACTION,
> actions,
> >  					  "no fate action is found");
> > +	if ((item_flags & MLX5_FLOW_LAYER_VXLAN) &&
> > +	    !(action_flags & MLX5_ACTION_VXLAN_DECAP))
> > +		return rte_flow_error_set(error, ENOTSUP,
> > +					 RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> > +					 "VNI pattern should be followed "
> > +					 " by VXLAN_DECAP action");
> >  	return 0;
> >  }
> >
  
Yongseok Koh Oct. 26, 2018, 3:07 a.m. UTC | #3
On Thu, Oct 25, 2018 at 06:53:11AM -0700, Slava Ovsiienko wrote:
> > -----Original Message-----
> > From: Yongseok Koh
> > Sent: Tuesday, October 23, 2018 13:05
> > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> > routine
> > 
> > On Mon, Oct 15, 2018 at 02:13:30PM +0000, Viacheslav Ovsiienko wrote:
[...]
> > > @@ -1114,7 +1733,6 @@ struct pedit_parser {
> > >  							   error);
> > >  			if (ret < 0)
> > >  				return ret;
> > > -			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> > >  			mask.ipv4 = flow_tcf_item_mask
> > >  				(items, &rte_flow_item_ipv4_mask,
> > >  				 &flow_tcf_mask_supported.ipv4,
> > > @@ -1135,13 +1753,22 @@ struct pedit_parser {
> > >  				next_protocol =
> > >  					((const struct rte_flow_item_ipv4 *)
> > >  					 (items->spec))->hdr.next_proto_id;
> > > +			if (item_flags &
> > MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
> > > +				/*
> > > +				 * Multiple outer items are not allowed as
> > > +				 * tunnel parameters, will raise an error later.
> > > +				 */
> > > +				ipv4 = NULL;
> > 
> > Can't it be inner then?
> AFAIK,  no for tc rules, we can not specify multiple levels (inner + outer) for them.
> There is just no TCA_FLOWER_KEY_xxx attributes  for specifying inner items 
> to match by flower.

When I briefly read the kernel code, I thought TCA_FLOWER_KEY_* are for inner
header before decap. I mean TCA_FLOWER_KEY_IPV4_SRC is for inner L3 and
TCA_FLOWER_KEY_ENC_IPV4_SRC is for outer tunnel header. Please do some
experiments with tc-flower command.

> It is quite unclear comment, not the best one, sorry. I did not like it too, 
> just forgot to rewrite.
> 
> ipv4, ipv6 , udp variables gather the matching items during the item list scanning,
> later variables are used for VXLAN decap action validation only. So, the "outer"
> means that ipv4 variable contains the VXLAN decap outer addresses, and
> should be NULL-ed if multiple items are found in the items list. 
> 
> But we can generate an error here if we have valid action_flags
> (gathered by prepare function) and VXLAN decap is set. Raising
> an error looks more relevant and clear.

You can't use flags at this point. It is validate() so prepare() might not be
preceded.

> >   flow create 1 ingress transfer
> >     pattern eth src is 66:77:88:99:aa:bb
> >       dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 /
> >       udp src is 4789 dst is 4242 / vxlan vni is 0x112233 /
> >       eth / ipv6 / tcp dst is 42 / end
> >     actions vxlan_decap / port_id id 2 / end
> > 
> > Is this flow supported by linux tcf? I took this example from Adrien's patch -
> > "[8/8] net/mlx5: add VXLAN decap support to switch flow rules". If so, isn't it
> > possible to have inner L3 layer (MLX5_FLOW_LAYER_INNER_*)? If not, you
> > should return error in this case. I don't see any code to check redundant
> > outer items.
> > Did I miss something?
> 
> Interesting, besides rule has correct syntax, I'm not sure whether it can be applied w/o errors.

Please try. You owns this patchset. However, you just can prohibit such flows
(tunneled item) and come up with follow-up patches to enable it later if it is
support by tcf as this whole patchset itself is pretty huge enough and we don't
have much time.

> At least our current flow_tcf_translate() implementation does not support any INNERs.
> But it seems the flow_tcf_validate() does, it's subject to recheck - we should not allow
> unsupported items to pass the validation. I'll check and provide the separate bugfix patch
> (if any).

Neither has tunnel support. It is the first time to add tunnel support to TCF.
If it was needed, you should've added it, not skipping it.

You can check how MLX5_FLOW_LAYER_TUNNEL is used in Verbs/DV as a reference.

> > BTW, for the tunneled items, why don't you follow the code of
> > Verbs(mlx5_flow_verbs.c) and DV(mlx5_flow_dv.c)? For tcf, it is the first time
> For VXLAN it has some specifics (warning about ignored params, etc.)
> I've checked which of verbs/dv code could be reused and did not discovered
> a lot. I'll recheck the latest code commits, possible it became more appropriate
> for VXLAN. 

Agreed. I'm not forcing you to do it because we run out of time but mentioned it
because if there's any redundancy in our code, that usually causes bug later.
Let's not waste too much time for that. Just grab low hanging fruits if any.

> > to add tunneled item, but Verbs/DV already have validation code for tunnel,
> > so you can reuse the existing code. In flow_tcf_validate_vxlan_decap(), not
> > every validation is VXLAN-specific but some of them can be common code.
> > 
> > And if you need to know whether there's the VXLAN decap action prior to
> > outer header item validation, you can relocate the code - action validation
> > first and item validation next, as there's no dependency yet in the current
> 
> We can not validate action first - we need items to be preliminary gathered,
> to check them in action's specific fashion and to check action itself. 
> I mean, if we see VXLAN decap action, we should check the presence of
> L2, L3, L4 and VNI items. I minimized the number of passes along the item
> and action lists. BTW, Adrien's approach performed two passes, mine does only.
> 
> > code. Defining ipv4, ipv6, udp seems to make the code path more complex.
> Yes, but it allows us to avoid the extra item list scanning and minimizes the changes
> of existing code.
> In your approach we should:
> - scan actions, w/o full checking, just action_flags gathering and checking
> - scan items, performing variating check (depending on gathered action flags)
> - scan actions again, performing full check with params (at least for now 
> check whether all params gathered)

Disagree. flow_tcf_validate_vxlan_encap() doesn't even need any info of items
and flow_tcf_validate_vxlan_decap() needs item_flags to check whether VXLAN
item is there or not and ipv4/ipv6/udp are all for item checks. Let me give you
very detailed exmaple:

{
	for (actions[]...) {
		...
		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
			...
			flow_tcf_validate_vxlan_encap();
			...
			break;
		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
					   | MLX5_ACTION_VXLAN_DECAP))
				return rte_flow_error_set
					(error, ENOTSUP,
					 RTE_FLOW_ERROR_TYPE_ACTION,
					 actions,
					 "can't have multiple vxlan actions");
			/* Don't call flow_tcf_validate_vxlan_decap(). */
			action_flags |= MLX5_ACTION_VXLAN_DECAP;
			break;
	}
	for (items[]...) {
		...
		case RTE_FLOW_ITEM_TYPE_IPV4:
			/* Existing common validation. */
			...
			if (action_flags & MLX5_ACTION_VXLAN_DECAP) {
				/* Do ipv4 validation in
				 * flow_tcf_validate_vxlan_decap()/
			}
			break;
	}
}

Curretly you are doing,

	- validate items
	- validate actions
	- validate items again if decap.

But this can simply be

	- validate actions
	- validate items

Thanks,
Yongseok

> > 
> > For example, you just can call vxlan decap item validation (by splitting
> > flow_tcf_validate_vxlan_decap()) at this point like:
> > 
> > 			if (action_flags &
> > MLX5_FLOW_ACTION_VXLAN_DECAP)
> > 				ret =
> > flow_tcf_validate_vxlan_decap_ipv4(...);
> > 			...
> > 
> > Same for other items.
> >
  
Slava Ovsiienko Oct. 26, 2018, 8:39 a.m. UTC | #4
> -----Original Message-----
> From: Yongseok Koh
> Sent: Friday, October 26, 2018 6:07
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> routine
> 
> On Thu, Oct 25, 2018 at 06:53:11AM -0700, Slava Ovsiienko wrote:
> > > -----Original Message-----
> > > From: Yongseok Koh
> > > Sent: Tuesday, October 23, 2018 13:05
> > > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > > Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> > > routine
> > >
> > > On Mon, Oct 15, 2018 at 02:13:30PM +0000, Viacheslav Ovsiienko wrote:
> [...]
> > > > @@ -1114,7 +1733,6 @@ struct pedit_parser {
> > > >  							   error);
> > > >  			if (ret < 0)
> > > >  				return ret;
> > > > -			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> > > >  			mask.ipv4 = flow_tcf_item_mask
> > > >  				(items, &rte_flow_item_ipv4_mask,
> > > >  				 &flow_tcf_mask_supported.ipv4,
> > > > @@ -1135,13 +1753,22 @@ struct pedit_parser {
> > > >  				next_protocol =
> > > >  					((const struct rte_flow_item_ipv4 *)
> > > >  					 (items->spec))->hdr.next_proto_id;
> > > > +			if (item_flags &
> > > MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
> > > > +				/*
> > > > +				 * Multiple outer items are not allowed as
> > > > +				 * tunnel parameters, will raise an error later.
> > > > +				 */
> > > > +				ipv4 = NULL;
> > >
> > > Can't it be inner then?
> > AFAIK,  no for tc rules, we can not specify multiple levels (inner + outer) for
> them.
> > There is just no TCA_FLOWER_KEY_xxx attributes  for specifying inner
> items
> > to match by flower.
> 
> When I briefly read the kernel code, I thought TCA_FLOWER_KEY_* are for
> inner
> header before decap. I mean TCA_FLOWER_KEY_IPV4_SRC is for inner L3
> and
> TCA_FLOWER_KEY_ENC_IPV4_SRC is for outer tunnel header. Please do
> some
> experiments with tc-flower command.

Hm. Interesting. I will check.

> > It is quite unclear comment, not the best one, sorry. I did not like it too,
> > just forgot to rewrite.
> >
> > ipv4, ipv6 , udp variables gather the matching items during the item list
> scanning,
> > later variables are used for VXLAN decap action validation only. So, the
> "outer"
> > means that ipv4 variable contains the VXLAN decap outer addresses, and
> > should be NULL-ed if multiple items are found in the items list.
> >
> > But we can generate an error here if we have valid action_flags
> > (gathered by prepare function) and VXLAN decap is set. Raising
> > an error looks more relevant and clear.
> 
> You can't use flags at this point. It is validate() so prepare() might not be
> preceded.
> 
> > >   flow create 1 ingress transfer
> > >     pattern eth src is 66:77:88:99:aa:bb
> > >       dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 /
> > >       udp src is 4789 dst is 4242 / vxlan vni is 0x112233 /
> > >       eth / ipv6 / tcp dst is 42 / end
> > >     actions vxlan_decap / port_id id 2 / end
> > >
> > > Is this flow supported by linux tcf? I took this example from Adrien's
> patch -
> > > "[8/8] net/mlx5: add VXLAN decap support to switch flow rules". If so,
> isn't it
> > > possible to have inner L3 layer (MLX5_FLOW_LAYER_INNER_*)? If not,
> you
> > > should return error in this case. I don't see any code to check redundant
> > > outer items.
> > > Did I miss something?
> >
> > Interesting, besides rule has correct syntax, I'm not sure whether it can be
> applied w/o errors.
> 
> Please try. You owns this patchset. However, you just can prohibit such flows
> (tunneled item) and come up with follow-up patches to enable it later if it is
> support by tcf as this whole patchset itself is pretty huge enough and we
> don't
> have much time.
> 
> > At least our current flow_tcf_translate() implementation does not support
> any INNERs.
> > But it seems the flow_tcf_validate() does, it's subject to recheck - we
> should not allow
> > unsupported items to pass the validation. I'll check and provide the
> separate bugfix patch
> > (if any).
> 
> Neither has tunnel support. It is the first time to add tunnel support to TCF.
> If it was needed, you should've added it, not skipping it.
> 
> You can check how MLX5_FLOW_LAYER_TUNNEL is used in Verbs/DV as a
> reference.

Yes. I understood your point. Will check and add tunnel support for TCF rules.
Anyway, inner MAC addresses are supported for VXLAN decap, I think we should
specify these ones in the rule as inners (after VNI item),  definitely
some tunnel support in validate/parse/translate should be added.

> 
> > > BTW, for the tunneled items, why don't you follow the code of
> > > Verbs(mlx5_flow_verbs.c) and DV(mlx5_flow_dv.c)? For tcf, it is the first
> time
> > For VXLAN it has some specifics (warning about ignored params, etc.)
> > I've checked which of verbs/dv code could be reused and did not
> discovered
> > a lot. I'll recheck the latest code commits, possible it became more
> appropriate
> > for VXLAN.
> 
> Agreed. I'm not forcing you to do it because we run out of time but
> mentioned it
> because if there's any redundancy in our code, that usually causes bug later.
> Let's not waste too much time for that. Just grab low hanging fruits if any.
> 
> > > to add tunneled item, but Verbs/DV already have validation code for
> tunnel,
> > > so you can reuse the existing code. In flow_tcf_validate_vxlan_decap(),
> not
> > > every validation is VXLAN-specific but some of them can be common
> code.
> > >
> > > And if you need to know whether there's the VXLAN decap action prior to
> > > outer header item validation, you can relocate the code - action
> validation
> > > first and item validation next, as there's no dependency yet in the current
> >
> > We can not validate action first - we need items to be preliminary
> gathered,
> > to check them in action's specific fashion and to check action itself.
> > I mean, if we see VXLAN decap action, we should check the presence of
> > L2, L3, L4 and VNI items. I minimized the number of passes along the item
> > and action lists. BTW, Adrien's approach performed two passes, mine does
> only.
> >
> > > code. Defining ipv4, ipv6, udp seems to make the code path more
> complex.
> > Yes, but it allows us to avoid the extra item list scanning and minimizes the
> changes
> > of existing code.
> > In your approach we should:
> > - scan actions, w/o full checking, just action_flags gathering and checking
> > - scan items, performing variating check (depending on gathered action
> flags)
> > - scan actions again, performing full check with params (at least for now
> > check whether all params gathered)
> 
> Disagree. flow_tcf_validate_vxlan_encap() doesn't even need any info of
> items
> and flow_tcf_validate_vxlan_decap() needs item_flags to check whether
> VXLAN
> item is there or not and ipv4/ipv6/udp are all for item checks. Let me give
> you
> very detailed exmaple:
> 
> {
> 	for (actions[]...) {
> 		...
> 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> 			...
> 			flow_tcf_validate_vxlan_encap();
> 			...
> 			break;
> 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> 			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
> 					   | MLX5_ACTION_VXLAN_DECAP))
> 				return rte_flow_error_set
> 					(error, ENOTSUP,
> 					 RTE_FLOW_ERROR_TYPE_ACTION,
> 					 actions,
> 					 "can't have multiple vxlan actions");
> 			/* Don't call flow_tcf_validate_vxlan_decap(). */
> 			action_flags |= MLX5_ACTION_VXLAN_DECAP;
> 			break;
> 	}
> 	for (items[]...) {
> 		...
> 		case RTE_FLOW_ITEM_TYPE_IPV4:
> 			/* Existing common validation. */
> 			...
> 			if (action_flags & MLX5_ACTION_VXLAN_DECAP) {
> 				/* Do ipv4 validation in
> 				 * flow_tcf_validate_vxlan_decap()/
> 			}
> 			break;
> 	}
> }
> 
> Curretly you are doing,
> 
> 	- validate items
> 	- validate actions
> 	- validate items again if decap.
> 
> But this can simply be
> 
> 	- validate actions
How  we could validate VXLAN decap at this stage? 
As we do not have item_flags set yet?
Do I miss something?

> 	- validate items
> 
> Thanks,
> Yongseok
> 

With best regards,
Slava

> > >
> > > For example, you just can call vxlan decap item validation (by splitting
> > > flow_tcf_validate_vxlan_decap()) at this point like:
> > >
> > > 			if (action_flags &
> > > MLX5_FLOW_ACTION_VXLAN_DECAP)
> > > 				ret =
> > > flow_tcf_validate_vxlan_decap_ipv4(...);
> > > 			...
> > >
> > > Same for other items.
> > >
  
Yongseok Koh Oct. 26, 2018, 9:56 p.m. UTC | #5
On Fri, Oct 26, 2018 at 01:39:38AM -0700, Slava Ovsiienko wrote:
> > -----Original Message-----
> > From: Yongseok Koh
> > Sent: Friday, October 26, 2018 6:07
> > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> > routine
> > 
> > On Thu, Oct 25, 2018 at 06:53:11AM -0700, Slava Ovsiienko wrote:
> > > > -----Original Message-----
> > > > From: Yongseok Koh
> > > > Sent: Tuesday, October 23, 2018 13:05
> > > > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > > > Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> > > > routine
> > > >
> > > > On Mon, Oct 15, 2018 at 02:13:30PM +0000, Viacheslav Ovsiienko wrote:
> > [...]
> > > > > @@ -1114,7 +1733,6 @@ struct pedit_parser {
> > > > >  							   error);
> > > > >  			if (ret < 0)
> > > > >  				return ret;
> > > > > -			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> > > > >  			mask.ipv4 = flow_tcf_item_mask
> > > > >  				(items, &rte_flow_item_ipv4_mask,
> > > > >  				 &flow_tcf_mask_supported.ipv4,
> > > > > @@ -1135,13 +1753,22 @@ struct pedit_parser {
> > > > >  				next_protocol =
> > > > >  					((const struct rte_flow_item_ipv4 *)
> > > > >  					 (items->spec))->hdr.next_proto_id;
> > > > > +			if (item_flags &
> > > > MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
> > > > > +				/*
> > > > > +				 * Multiple outer items are not allowed as
> > > > > +				 * tunnel parameters, will raise an error later.
> > > > > +				 */
> > > > > +				ipv4 = NULL;
> > > >
> > > > Can't it be inner then?
> > > AFAIK,  no for tc rules, we can not specify multiple levels (inner + outer) for
> > them.
> > > There is just no TCA_FLOWER_KEY_xxx attributes  for specifying inner
> > items
> > > to match by flower.
> > 
> > When I briefly read the kernel code, I thought TCA_FLOWER_KEY_* are for
> > inner
> > header before decap. I mean TCA_FLOWER_KEY_IPV4_SRC is for inner L3
> > and
> > TCA_FLOWER_KEY_ENC_IPV4_SRC is for outer tunnel header. Please do
> > some
> > experiments with tc-flower command.
> 
> Hm. Interesting. I will check.
> 
> > > It is quite unclear comment, not the best one, sorry. I did not like it too,
> > > just forgot to rewrite.
> > >
> > > ipv4, ipv6 , udp variables gather the matching items during the item list
> > scanning,
> > > later variables are used for VXLAN decap action validation only. So, the
> > "outer"
> > > means that ipv4 variable contains the VXLAN decap outer addresses, and
> > > should be NULL-ed if multiple items are found in the items list.
> > >
> > > But we can generate an error here if we have valid action_flags
> > > (gathered by prepare function) and VXLAN decap is set. Raising
> > > an error looks more relevant and clear.
> > 
> > You can't use flags at this point. It is validate() so prepare() might not be
> > preceded.
> > 
> > > >   flow create 1 ingress transfer
> > > >     pattern eth src is 66:77:88:99:aa:bb
> > > >       dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 /
> > > >       udp src is 4789 dst is 4242 / vxlan vni is 0x112233 /
> > > >       eth / ipv6 / tcp dst is 42 / end
> > > >     actions vxlan_decap / port_id id 2 / end
> > > >
> > > > Is this flow supported by linux tcf? I took this example from Adrien's
> > patch -
> > > > "[8/8] net/mlx5: add VXLAN decap support to switch flow rules". If so,
> > isn't it
> > > > possible to have inner L3 layer (MLX5_FLOW_LAYER_INNER_*)? If not,
> > you
> > > > should return error in this case. I don't see any code to check redundant
> > > > outer items.
> > > > Did I miss something?
> > >
> > > Interesting, besides rule has correct syntax, I'm not sure whether it can be
> > applied w/o errors.
> > 
> > Please try. You owns this patchset. However, you just can prohibit such flows
> > (tunneled item) and come up with follow-up patches to enable it later if it is
> > support by tcf as this whole patchset itself is pretty huge enough and we
> > don't
> > have much time.
> > 
> > > At least our current flow_tcf_translate() implementation does not support
> > any INNERs.
> > > But it seems the flow_tcf_validate() does, it's subject to recheck - we
> > should not allow
> > > unsupported items to pass the validation. I'll check and provide the
> > separate bugfix patch
> > > (if any).
> > 
> > Neither has tunnel support. It is the first time to add tunnel support to TCF.
> > If it was needed, you should've added it, not skipping it.
> > 
> > You can check how MLX5_FLOW_LAYER_TUNNEL is used in Verbs/DV as a
> > reference.
> 
> Yes. I understood your point. Will check and add tunnel support for TCF rules.
> Anyway, inner MAC addresses are supported for VXLAN decap, I think we should
> specify these ones in the rule as inners (after VNI item),  definitely
> some tunnel support in validate/parse/translate should be added.
> 
> > 
> > > > BTW, for the tunneled items, why don't you follow the code of
> > > > Verbs(mlx5_flow_verbs.c) and DV(mlx5_flow_dv.c)? For tcf, it is the first
> > time
> > > For VXLAN it has some specifics (warning about ignored params, etc.)
> > > I've checked which of verbs/dv code could be reused and did not
> > discovered
> > > a lot. I'll recheck the latest code commits, possible it became more
> > appropriate
> > > for VXLAN.
> > 
> > Agreed. I'm not forcing you to do it because we run out of time but
> > mentioned it
> > because if there's any redundancy in our code, that usually causes bug later.
> > Let's not waste too much time for that. Just grab low hanging fruits if any.
> > 
> > > > to add tunneled item, but Verbs/DV already have validation code for
> > tunnel,
> > > > so you can reuse the existing code. In flow_tcf_validate_vxlan_decap(),
> > not
> > > > every validation is VXLAN-specific but some of them can be common
> > code.
> > > >
> > > > And if you need to know whether there's the VXLAN decap action prior to
> > > > outer header item validation, you can relocate the code - action
> > validation
> > > > first and item validation next, as there's no dependency yet in the current
> > >
> > > We can not validate action first - we need items to be preliminary
> > gathered,
> > > to check them in action's specific fashion and to check action itself.
> > > I mean, if we see VXLAN decap action, we should check the presence of
> > > L2, L3, L4 and VNI items. I minimized the number of passes along the item
> > > and action lists. BTW, Adrien's approach performed two passes, mine does
> > only.
> > >
> > > > code. Defining ipv4, ipv6, udp seems to make the code path more
> > complex.
> > > Yes, but it allows us to avoid the extra item list scanning and minimizes the
> > changes
> > > of existing code.
> > > In your approach we should:
> > > - scan actions, w/o full checking, just action_flags gathering and checking
> > > - scan items, performing variating check (depending on gathered action
> > flags)
> > > - scan actions again, performing full check with params (at least for now
> > > check whether all params gathered)
> > 
> > Disagree. flow_tcf_validate_vxlan_encap() doesn't even need any info of
> > items
> > and flow_tcf_validate_vxlan_decap() needs item_flags to check whether
> > VXLAN
> > item is there or not and ipv4/ipv6/udp are all for item checks. Let me give
> > you
> > very detailed exmaple:
> > 
> > {
> > 	for (actions[]...) {
> > 		...
> > 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> > 			...
> > 			flow_tcf_validate_vxlan_encap();
> > 			...
> > 			break;
> > 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> > 			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
> > 					   | MLX5_ACTION_VXLAN_DECAP))
> > 				return rte_flow_error_set
> > 					(error, ENOTSUP,
> > 					 RTE_FLOW_ERROR_TYPE_ACTION,
> > 					 actions,
> > 					 "can't have multiple vxlan actions");
> > 			/* Don't call flow_tcf_validate_vxlan_decap(). */
> > 			action_flags |= MLX5_ACTION_VXLAN_DECAP;
> > 			break;
> > 	}
> > 	for (items[]...) {
> > 		...
> > 		case RTE_FLOW_ITEM_TYPE_IPV4:
> > 			/* Existing common validation. */
> > 			...
> > 			if (action_flags & MLX5_ACTION_VXLAN_DECAP) {
> > 				/* Do ipv4 validation in
> > 				 * flow_tcf_validate_vxlan_decap()/
> > 			}
> > 			break;
> > 	}
> > }
> > 
> > Curretly you are doing,
> > 
> > 	- validate items
> > 	- validate actions
> > 	- validate items again if decap.
> > 
> > But this can simply be
> > 
> > 	- validate actions
> How  we could validate VXLAN decap at this stage? 
> As we do not have item_flags set yet?
> Do I miss something?

Look at my pseudo code above.
Nothing much to be done in validating decap action. And item validation for
decap can be done together in item validation code.

Thanks,
Yongseok

> 
> > 	- validate items
> >
  
Slava Ovsiienko Oct. 29, 2018, 9:33 a.m. UTC | #6
> -----Original Message-----
> From: Yongseok Koh
> Sent: Saturday, October 27, 2018 0:57
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> routine
> 
> On Fri, Oct 26, 2018 at 01:39:38AM -0700, Slava Ovsiienko wrote:
> > > -----Original Message-----
> > > From: Yongseok Koh
> > > Sent: Friday, October 26, 2018 6:07
> > > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > > Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> > > routine
> > >
> > > On Thu, Oct 25, 2018 at 06:53:11AM -0700, Slava Ovsiienko wrote:
> > > > > -----Original Message-----
> > > > > From: Yongseok Koh
> > > > > Sent: Tuesday, October 23, 2018 13:05
> > > > > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > > > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > > > > Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow
> > > > > validation routine
> > > > >
> > > > > On Mon, Oct 15, 2018 at 02:13:30PM +0000, Viacheslav Ovsiienko
> wrote:
> > > [...]
> > > > > > @@ -1114,7 +1733,6 @@ struct pedit_parser {
> > > > > >  							   error);
> > > > > >  			if (ret < 0)
> > > > > >  				return ret;
> > > > > > -			item_flags |=
> MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> > > > > >  			mask.ipv4 = flow_tcf_item_mask
> > > > > >  				(items, &rte_flow_item_ipv4_mask,
> > > > > >  				 &flow_tcf_mask_supported.ipv4,
> @@ -1135,13 +1753,22 @@
> > > > > > struct pedit_parser {
> > > > > >  				next_protocol =
> > > > > >  					((const struct
> rte_flow_item_ipv4 *)
> > > > > >  					 (items->spec))-
> >hdr.next_proto_id;
> > > > > > +			if (item_flags &
> > > > > MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
> > > > > > +				/*
> > > > > > +				 * Multiple outer items are not
> allowed as
> > > > > > +				 * tunnel parameters, will raise an
> error later.
> > > > > > +				 */
> > > > > > +				ipv4 = NULL;
> > > > >
> > > > > Can't it be inner then?
> > > > AFAIK,  no for tc rules, we can not specify multiple levels (inner
> > > > + outer) for
> > > them.
> > > > There is just no TCA_FLOWER_KEY_xxx attributes  for specifying
> > > > inner
> > > items
> > > > to match by flower.
> > >
> > > When I briefly read the kernel code, I thought TCA_FLOWER_KEY_* are
> > > for inner header before decap. I mean TCA_FLOWER_KEY_IPV4_SRC is
> for
> > > inner L3 and TCA_FLOWER_KEY_ENC_IPV4_SRC is for outer tunnel
> header.
> > > Please do some experiments with tc-flower command.
> >
> > Hm. Interesting. I will check.
> >
> > > > It is quite unclear comment, not the best one, sorry. I did not
> > > > like it too, just forgot to rewrite.
> > > >
> > > > ipv4, ipv6 , udp variables gather the matching items during the
> > > > item list
> > > scanning,
> > > > later variables are used for VXLAN decap action validation only.
> > > > So, the
> > > "outer"
> > > > means that ipv4 variable contains the VXLAN decap outer addresses,
> > > > and should be NULL-ed if multiple items are found in the items list.
> > > >
> > > > But we can generate an error here if we have valid action_flags
> > > > (gathered by prepare function) and VXLAN decap is set. Raising an
> > > > error looks more relevant and clear.
> > >
> > > You can't use flags at this point. It is validate() so prepare()
> > > might not be preceded.
> > >
> > > > >   flow create 1 ingress transfer
> > > > >     pattern eth src is 66:77:88:99:aa:bb
> > > > >       dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 /
> > > > >       udp src is 4789 dst is 4242 / vxlan vni is 0x112233 /
> > > > >       eth / ipv6 / tcp dst is 42 / end
> > > > >     actions vxlan_decap / port_id id 2 / end
> > > > >
> > > > > Is this flow supported by linux tcf? I took this example from
> > > > > Adrien's
> > > patch -
> > > > > "[8/8] net/mlx5: add VXLAN decap support to switch flow rules".
> > > > > If so,
> > > isn't it
> > > > > possible to have inner L3 layer (MLX5_FLOW_LAYER_INNER_*)? If
> > > > > not,
> > > you
> > > > > should return error in this case. I don't see any code to check
> > > > > redundant outer items.
> > > > > Did I miss something?
> > > >
> > > > Interesting, besides rule has correct syntax, I'm not sure whether
> > > > it can be
> > > applied w/o errors.
> > >
> > > Please try. You owns this patchset. However, you just can prohibit
> > > such flows (tunneled item) and come up with follow-up patches to
> > > enable it later if it is support by tcf as this whole patchset
> > > itself is pretty huge enough and we don't have much time.
> > >
> > > > At least our current flow_tcf_translate() implementation does not
> > > > support
> > > any INNERs.
> > > > But it seems the flow_tcf_validate() does, it's subject to recheck
> > > > - we
> > > should not allow
> > > > unsupported items to pass the validation. I'll check and provide
> > > > the
> > > separate bugfix patch
> > > > (if any).
> > >
> > > Neither has tunnel support. It is the first time to add tunnel support to
> TCF.
> > > If it was needed, you should've added it, not skipping it.
> > >
> > > You can check how MLX5_FLOW_LAYER_TUNNEL is used in Verbs/DV as
> a
> > > reference.
> >
> > Yes. I understood your point. Will check and add tunnel support for TCF
> rules.
> > Anyway, inner MAC addresses are supported for VXLAN decap, I think we
> > should specify these ones in the rule as inners (after VNI item),
> > definitely some tunnel support in validate/parse/translate should be added.
> >
> > >
> > > > > BTW, for the tunneled items, why don't you follow the code of
> > > > > Verbs(mlx5_flow_verbs.c) and DV(mlx5_flow_dv.c)? For tcf, it is
> > > > > the first
> > > time
> > > > For VXLAN it has some specifics (warning about ignored params,
> > > > etc.) I've checked which of verbs/dv code could be reused and did
> > > > not
> > > discovered
> > > > a lot. I'll recheck the latest code commits, possible it became
> > > > more
> > > appropriate
> > > > for VXLAN.
> > >
> > > Agreed. I'm not forcing you to do it because we run out of time but
> > > mentioned it because if there's any redundancy in our code, that
> > > usually causes bug later.
> > > Let's not waste too much time for that. Just grab low hanging fruits if
> any.
> > >
> > > > > to add tunneled item, but Verbs/DV already have validation code
> > > > > for
> > > tunnel,
> > > > > so you can reuse the existing code. In
> > > > > flow_tcf_validate_vxlan_decap(),
> > > not
> > > > > every validation is VXLAN-specific but some of them can be
> > > > > common
> > > code.
> > > > >
> > > > > And if you need to know whether there's the VXLAN decap action
> > > > > prior to outer header item validation, you can relocate the code
> > > > > - action
> > > validation
> > > > > first and item validation next, as there's no dependency yet in
> > > > > the current
> > > >
> > > > We can not validate action first - we need items to be preliminary
> > > gathered,
> > > > to check them in action's specific fashion and to check action itself.
> > > > I mean, if we see VXLAN decap action, we should check the presence
> > > > of L2, L3, L4 and VNI items. I minimized the number of passes
> > > > along the item and action lists. BTW, Adrien's approach performed
> > > > two passes, mine does
> > > only.
> > > >
> > > > > code. Defining ipv4, ipv6, udp seems to make the code path more
> > > complex.
> > > > Yes, but it allows us to avoid the extra item list scanning and
> > > > minimizes the
> > > changes
> > > > of existing code.
> > > > In your approach we should:
> > > > - scan actions, w/o full checking, just action_flags gathering and
> > > > checking
> > > > - scan items, performing variating check (depending on gathered
> > > > action
> > > flags)
> > > > - scan actions again, performing full check with params (at least
> > > > for now check whether all params gathered)
> > >
> > > Disagree. flow_tcf_validate_vxlan_encap() doesn't even need any info
> > > of items and flow_tcf_validate_vxlan_decap() needs item_flags to
> > > check whether VXLAN item is there or not and ipv4/ipv6/udp are all
> > > for item checks. Let me give you very detailed exmaple:
> > >
> > > {
> > > 	for (actions[]...) {
> > > 		...
> > > 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> > > 			...
> > > 			flow_tcf_validate_vxlan_encap();
> > > 			...
> > > 			break;
> > > 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> > > 			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
> > > 					   | MLX5_ACTION_VXLAN_DECAP))
> > > 				return rte_flow_error_set
> > > 					(error, ENOTSUP,
> > > 					 RTE_FLOW_ERROR_TYPE_ACTION,
> > > 					 actions,
> > > 					 "can't have multiple vxlan actions");
> > > 			/* Don't call flow_tcf_validate_vxlan_decap(). */
> > > 			action_flags |= MLX5_ACTION_VXLAN_DECAP;
> > > 			break;
> > > 	}
> > > 	for (items[]...) {
> > > 		...
> > > 		case RTE_FLOW_ITEM_TYPE_IPV4:
> > > 			/* Existing common validation. */
> > > 			...
> > > 			if (action_flags & MLX5_ACTION_VXLAN_DECAP) {
> > > 				/* Do ipv4 validation in
> > > 				 * flow_tcf_validate_vxlan_decap()/
> > > 			}
> > > 			break;
> > > 	}
> > > }
> > >
> > > Curretly you are doing,
> > >
> > > 	- validate items
> > > 	- validate actions
> > > 	- validate items again if decap.
> > >
> > > But this can simply be
> > >
> > > 	- validate actions
> > How  we could validate VXLAN decap at this stage?
> > As we do not have item_flags set yet?
> > Do I miss something?
> 
> Look at my pseudo code above.
> Nothing much to be done in validating decap action. And item validation for
> decap can be done together in item validation code.
> 
VXLAB decap action should check:
- whether outer destination UDP port is present (otherwise we cannot assign VTEP VXLAN)
- whether outer destination IP is present (otherwise we cannot assign IP to ifouter/build route)
- whether VNI is present (to identify VXLAN traffic)

How do you  propose check these issues in your approach?

With best regards,
Slava


> Thanks,
> Yongseok
> 
> >
> > > 	- validate items
> > >
  
Yongseok Koh Oct. 29, 2018, 6:26 p.m. UTC | #7
On Mon, Oct 29, 2018 at 02:33:03AM -0700, Slava Ovsiienko wrote:
> > -----Original Message-----
> > From: Yongseok Koh
> > Sent: Saturday, October 27, 2018 0:57
> > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> > routine
> > 
> > On Fri, Oct 26, 2018 at 01:39:38AM -0700, Slava Ovsiienko wrote:
> > > > -----Original Message-----
> > > > From: Yongseok Koh
> > > > Sent: Friday, October 26, 2018 6:07
> > > > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > > > Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow validation
> > > > routine
> > > >
> > > > On Thu, Oct 25, 2018 at 06:53:11AM -0700, Slava Ovsiienko wrote:
> > > > > > -----Original Message-----
> > > > > > From: Yongseok Koh
> > > > > > Sent: Tuesday, October 23, 2018 13:05
> > > > > > To: Slava Ovsiienko <viacheslavo@mellanox.com>
> > > > > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > > > > > Subject: Re: [PATCH v2 2/7] net/mlx5: e-switch VXLAN flow
> > > > > > validation routine
> > > > > >
> > > > > > On Mon, Oct 15, 2018 at 02:13:30PM +0000, Viacheslav Ovsiienko
> > wrote:
> > > > [...]
> > > > > > > @@ -1114,7 +1733,6 @@ struct pedit_parser {
> > > > > > >  							   error);
> > > > > > >  			if (ret < 0)
> > > > > > >  				return ret;
> > > > > > > -			item_flags |=
> > MLX5_FLOW_LAYER_OUTER_L3_IPV4;
> > > > > > >  			mask.ipv4 = flow_tcf_item_mask
> > > > > > >  				(items, &rte_flow_item_ipv4_mask,
> > > > > > >  				 &flow_tcf_mask_supported.ipv4,
> > @@ -1135,13 +1753,22 @@
> > > > > > > struct pedit_parser {
> > > > > > >  				next_protocol =
> > > > > > >  					((const struct
> > rte_flow_item_ipv4 *)
> > > > > > >  					 (items->spec))-
> > >hdr.next_proto_id;
> > > > > > > +			if (item_flags &
> > > > > > MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
> > > > > > > +				/*
> > > > > > > +				 * Multiple outer items are not
> > allowed as
> > > > > > > +				 * tunnel parameters, will raise an
> > error later.
> > > > > > > +				 */
> > > > > > > +				ipv4 = NULL;
> > > > > >
> > > > > > Can't it be inner then?
> > > > > AFAIK,  no for tc rules, we can not specify multiple levels (inner
> > > > > + outer) for
> > > > them.
> > > > > There is just no TCA_FLOWER_KEY_xxx attributes  for specifying
> > > > > inner
> > > > items
> > > > > to match by flower.
> > > >
> > > > When I briefly read the kernel code, I thought TCA_FLOWER_KEY_* are
> > > > for inner header before decap. I mean TCA_FLOWER_KEY_IPV4_SRC is
> > for
> > > > inner L3 and TCA_FLOWER_KEY_ENC_IPV4_SRC is for outer tunnel
> > header.
> > > > Please do some experiments with tc-flower command.
> > >
> > > Hm. Interesting. I will check.
> > >
> > > > > It is quite unclear comment, not the best one, sorry. I did not
> > > > > like it too, just forgot to rewrite.
> > > > >
> > > > > ipv4, ipv6 , udp variables gather the matching items during the
> > > > > item list
> > > > scanning,
> > > > > later variables are used for VXLAN decap action validation only.
> > > > > So, the
> > > > "outer"
> > > > > means that ipv4 variable contains the VXLAN decap outer addresses,
> > > > > and should be NULL-ed if multiple items are found in the items list.
> > > > >
> > > > > But we can generate an error here if we have valid action_flags
> > > > > (gathered by prepare function) and VXLAN decap is set. Raising an
> > > > > error looks more relevant and clear.
> > > >
> > > > You can't use flags at this point. It is validate() so prepare()
> > > > might not be preceded.
> > > >
> > > > > >   flow create 1 ingress transfer
> > > > > >     pattern eth src is 66:77:88:99:aa:bb
> > > > > >       dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 /
> > > > > >       udp src is 4789 dst is 4242 / vxlan vni is 0x112233 /
> > > > > >       eth / ipv6 / tcp dst is 42 / end
> > > > > >     actions vxlan_decap / port_id id 2 / end
> > > > > >
> > > > > > Is this flow supported by linux tcf? I took this example from
> > > > > > Adrien's
> > > > patch -
> > > > > > "[8/8] net/mlx5: add VXLAN decap support to switch flow rules".
> > > > > > If so,
> > > > isn't it
> > > > > > possible to have inner L3 layer (MLX5_FLOW_LAYER_INNER_*)? If
> > > > > > not,
> > > > you
> > > > > > should return error in this case. I don't see any code to check
> > > > > > redundant outer items.
> > > > > > Did I miss something?
> > > > >
> > > > > Interesting, besides rule has correct syntax, I'm not sure whether
> > > > > it can be
> > > > applied w/o errors.
> > > >
> > > > Please try. You owns this patchset. However, you just can prohibit
> > > > such flows (tunneled item) and come up with follow-up patches to
> > > > enable it later if it is support by tcf as this whole patchset
> > > > itself is pretty huge enough and we don't have much time.
> > > >
> > > > > At least our current flow_tcf_translate() implementation does not
> > > > > support
> > > > any INNERs.
> > > > > But it seems the flow_tcf_validate() does, it's subject to recheck
> > > > > - we
> > > > should not allow
> > > > > unsupported items to pass the validation. I'll check and provide
> > > > > the
> > > > separate bugfix patch
> > > > > (if any).
> > > >
> > > > Neither has tunnel support. It is the first time to add tunnel support to
> > TCF.
> > > > If it was needed, you should've added it, not skipping it.
> > > >
> > > > You can check how MLX5_FLOW_LAYER_TUNNEL is used in Verbs/DV as
> > a
> > > > reference.
> > >
> > > Yes. I understood your point. Will check and add tunnel support for TCF
> > rules.
> > > Anyway, inner MAC addresses are supported for VXLAN decap, I think we
> > > should specify these ones in the rule as inners (after VNI item),
> > > definitely some tunnel support in validate/parse/translate should be added.
> > >
> > > >
> > > > > > BTW, for the tunneled items, why don't you follow the code of
> > > > > > Verbs(mlx5_flow_verbs.c) and DV(mlx5_flow_dv.c)? For tcf, it is
> > > > > > the first
> > > > time
> > > > > For VXLAN it has some specifics (warning about ignored params,
> > > > > etc.) I've checked which of verbs/dv code could be reused and did
> > > > > not
> > > > discovered
> > > > > a lot. I'll recheck the latest code commits, possible it became
> > > > > more
> > > > appropriate
> > > > > for VXLAN.
> > > >
> > > > Agreed. I'm not forcing you to do it because we run out of time but
> > > > mentioned it because if there's any redundancy in our code, that
> > > > usually causes bug later.
> > > > Let's not waste too much time for that. Just grab low hanging fruits if
> > any.
> > > >
> > > > > > to add tunneled item, but Verbs/DV already have validation code
> > > > > > for
> > > > tunnel,
> > > > > > so you can reuse the existing code. In
> > > > > > flow_tcf_validate_vxlan_decap(),
> > > > not
> > > > > > every validation is VXLAN-specific but some of them can be
> > > > > > common
> > > > code.
> > > > > >
> > > > > > And if you need to know whether there's the VXLAN decap action
> > > > > > prior to outer header item validation, you can relocate the code
> > > > > > - action
> > > > validation
> > > > > > first and item validation next, as there's no dependency yet in
> > > > > > the current
> > > > >
> > > > > We can not validate action first - we need items to be preliminary
> > > > gathered,
> > > > > to check them in action's specific fashion and to check action itself.
> > > > > I mean, if we see VXLAN decap action, we should check the presence
> > > > > of L2, L3, L4 and VNI items. I minimized the number of passes
> > > > > along the item and action lists. BTW, Adrien's approach performed
> > > > > two passes, mine does
> > > > only.
> > > > >
> > > > > > code. Defining ipv4, ipv6, udp seems to make the code path more
> > > > complex.
> > > > > Yes, but it allows us to avoid the extra item list scanning and
> > > > > minimizes the
> > > > changes
> > > > > of existing code.
> > > > > In your approach we should:
> > > > > - scan actions, w/o full checking, just action_flags gathering and
> > > > > checking
> > > > > - scan items, performing variating check (depending on gathered
> > > > > action
> > > > flags)
> > > > > - scan actions again, performing full check with params (at least
> > > > > for now check whether all params gathered)
> > > >
> > > > Disagree. flow_tcf_validate_vxlan_encap() doesn't even need any info
> > > > of items and flow_tcf_validate_vxlan_decap() needs item_flags to
> > > > check whether VXLAN item is there or not and ipv4/ipv6/udp are all
> > > > for item checks. Let me give you very detailed exmaple:
> > > >
> > > > {
> > > > 	for (actions[]...) {
> > > > 		...
> > > > 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> > > > 			...
> > > > 			flow_tcf_validate_vxlan_encap();
> > > > 			...
> > > > 			break;
> > > > 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> > > > 			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
> > > > 					   | MLX5_ACTION_VXLAN_DECAP))
> > > > 				return rte_flow_error_set
> > > > 					(error, ENOTSUP,
> > > > 					 RTE_FLOW_ERROR_TYPE_ACTION,
> > > > 					 actions,
> > > > 					 "can't have multiple vxlan actions");
> > > > 			/* Don't call flow_tcf_validate_vxlan_decap(). */
> > > > 			action_flags |= MLX5_ACTION_VXLAN_DECAP;
> > > > 			break;
> > > > 	}
> > > > 	for (items[]...) {
> > > > 		...
> > > > 		case RTE_FLOW_ITEM_TYPE_IPV4:
> > > > 			/* Existing common validation. */
> > > > 			...
> > > > 			if (action_flags & MLX5_ACTION_VXLAN_DECAP) {
> > > > 				/* Do ipv4 validation in
> > > > 				 * flow_tcf_validate_vxlan_decap()/
> > > > 			}
> > > > 			break;
> > > > 	}
> > > > }
> > > >
> > > > Curretly you are doing,
> > > >
> > > > 	- validate items
> > > > 	- validate actions
> > > > 	- validate items again if decap.
> > > >
> > > > But this can simply be
> > > >
> > > > 	- validate actions
> > > How  we could validate VXLAN decap at this stage?
> > > As we do not have item_flags set yet?
> > > Do I miss something?
> > 
> > Look at my pseudo code above.
> > Nothing much to be done in validating decap action. And item validation for
> > decap can be done together in item validation code.
> > 
> VXLAB decap action should check:
> - whether outer destination UDP port is present (otherwise we cannot assign VTEP VXLAN)
> - whether outer destination IP is present (otherwise we cannot assign IP to ifouter/build route)
> - whether VNI is present (to identify VXLAN traffic)
> 
> How do you  propose check these issues in your approach?

Did you look at my pseudo code? We are not validating vxlan decap action itself
but items when vxlan decap is present.

{
	for (actions[]...) {
		...
		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
			...
			flow_tcf_validate_vxlan_encap();
			...
			break;
		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
					   | MLX5_ACTION_VXLAN_DECAP))
				return rte_flow_error_set
					(error, ENOTSUP,
					 RTE_FLOW_ERROR_TYPE_ACTION,
					 actions,
					 "can't have multiple vxlan actions");
			/* Don't call flow_tcf_validate_vxlan_decap(). */
			action_flags |= MLX5_ACTION_VXLAN_DECAP;
			break;
	}
	for (items[]...) {
		...
		case RTE_FLOW_ITEM_TYPE_IPV4:
			/* Existing common validation. */
			...
			if (action_flags & MLX5_ACTION_VXLAN_DECAP) {
				/*
				 * check whether outer destination IP is present
				 */
			}
			break;
		...
		case RTE_FLOW_ITEM_TYPE_UDP:
			/* Existing common validation. */
			...
			if (action_flags & MLX5_ACTION_VXLAN_DECAP) {
				/*
				 * check whether outer destination UDP port is
				 * present
				 */
			}
			break;
		...
		case RTE_FLOW_ITEM_TYPE_VXLAN:
			/* Do the same for vni. */
	}
	...
	if (action_flags & MLX5_ACTION_VXLAN_DECAP) {
		if (!(items_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4 || ... IPV6))
			return rte_flow_error_set
				(error, EINVAL,
				 RTE_FLOW_ERROR_TYPE_ITEM, items,
				 "vxlan decap requires item IP");
		if (!(items_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
			return rte_flow_error_set
				(error, EINVAL,
				 RTE_FLOW_ERROR_TYPE_ITEM, items,
				 "vxlan decap requires item UDP");
		if (!(items_flags & MLX5_FLOW_LAYER_VXLAN))
			/* Do the same . */
	}
}

Still problem?

Thanks,
Yongseok
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 8f9c78a..0055417 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -430,6 +430,7 @@  struct mlx5_flow_tcf_context {
 	struct rte_flow_item_ipv6 ipv6;
 	struct rte_flow_item_tcp tcp;
 	struct rte_flow_item_udp udp;
+	struct rte_flow_item_vxlan vxlan;
 } flow_tcf_mask_empty;
 
 /** Supported masks for known item types. */
@@ -441,6 +442,7 @@  struct mlx5_flow_tcf_context {
 	struct rte_flow_item_ipv6 ipv6;
 	struct rte_flow_item_tcp tcp;
 	struct rte_flow_item_udp udp;
+	struct rte_flow_item_vxlan vxlan;
 } flow_tcf_mask_supported = {
 	.port_id = {
 		.id = 0xffffffff,
@@ -478,6 +480,9 @@  struct mlx5_flow_tcf_context {
 		.src_port = RTE_BE16(0xffff),
 		.dst_port = RTE_BE16(0xffff),
 	},
+	.vxlan = {
+	       .vni = "\xff\xff\xff",
+	},
 };
 
 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
@@ -943,6 +948,615 @@  struct pedit_parser {
 }
 
 /**
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch.
+ *
+ * @param[in] item
+ *   Pointer to the itemn structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item,
+				  struct rte_flow_error *error)
+{
+	const struct rte_flow_item_eth *spec = item->spec;
+	const struct rte_flow_item_eth *mask = item->mask;
+
+	if (!spec)
+		/*
+		 * Specification for L2 addresses can be empty
+		 * because these ones are optional and not
+		 * required directly by tc rule.
+		 */
+		return 0;
+	if (!mask)
+		/* If mask is not specified use the default one. */
+		mask = &rte_flow_item_eth_mask;
+	if (memcmp(&mask->dst,
+		   &flow_tcf_mask_empty.eth.dst,
+		   sizeof(flow_tcf_mask_empty.eth.dst))) {
+		if (memcmp(&mask->dst,
+			   &rte_flow_item_eth_mask.dst,
+			   sizeof(rte_flow_item_eth_mask.dst)))
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"eth.dst\" field");
+	}
+	if (memcmp(&mask->src,
+		   &flow_tcf_mask_empty.eth.src,
+		   sizeof(flow_tcf_mask_empty.eth.src))) {
+		if (memcmp(&mask->src,
+			   &rte_flow_item_eth_mask.src,
+			   sizeof(rte_flow_item_eth_mask.src)))
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"eth.src\" field");
+	}
+	if (mask->type != RTE_BE16(0x0000)) {
+		if (mask->type != RTE_BE16(0xffff))
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"eth.type\" field");
+		DRV_LOG(WARNING,
+			"outer ethernet type field "
+			"cannot be forced for VXLAN "
+			"encapsulation, parameter ignored");
+	}
+	return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch.
+ *
+ * @param[in] item
+ *   Pointer to the itemn structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
+				   struct rte_flow_error *error)
+{
+	const struct rte_flow_item_ipv4 *spec = item->spec;
+	const struct rte_flow_item_ipv4 *mask = item->mask;
+
+	if (!spec)
+		/*
+		 * Specification for L3 addresses cannot be empty
+		 * because it is required by tunnel_key parameter.
+		 */
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "NULL outer L3 address specification "
+				 " for VXLAN encapsulation");
+	if (!mask)
+		mask = &rte_flow_item_ipv4_mask;
+	if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
+		if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"ipv4.hdr.dst_addr\" field");
+		/* More L3 address validations can be put here. */
+	} else {
+		/*
+		 * Kernel uses the destination L3 address to determine
+		 * the routing path and obtain the L2 destination
+		 * address, so L3 destination address must be
+		 * specified in the tc rule.
+		 */
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "outer L3 destination address must be "
+				 "specified for VXLAN encapsulation");
+	}
+	if (mask->hdr.src_addr != RTE_BE32(0x00000000)) {
+		if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"ipv4.hdr.src_addr\" field");
+		/* More L3 address validations can be put here. */
+	} else {
+		/*
+		 * Kernel uses the source L3 address to select the
+		 * interface for egress encapsulated traffic, so
+		 * it must be specified in the tc rule.
+		 */
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "outer L3 source address must be "
+				 "specified for VXLAN encapsulation");
+	}
+	return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch.
+ *
+ * @param[in] item
+ *   Pointer to the itemn structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
+				   struct rte_flow_error *error)
+{
+	const struct rte_flow_item_ipv6 *spec = item->spec;
+	const struct rte_flow_item_ipv6 *mask = item->mask;
+
+	if (!spec)
+		/*
+		 * Specification for L3 addresses cannot be empty
+		 * because it is required by tunnel_key parameter.
+		 */
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "NULL outer L3 address specification "
+				 " for VXLAN encapsulation");
+	if (!mask)
+		mask = &rte_flow_item_ipv6_mask;
+	if (memcmp(&mask->hdr.dst_addr,
+		   &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
+		   sizeof(flow_tcf_mask_empty.ipv6.hdr.dst_addr))) {
+		if (memcmp(&mask->hdr.dst_addr,
+		   &rte_flow_item_ipv6_mask.hdr.dst_addr,
+		   sizeof(rte_flow_item_ipv6_mask.hdr.dst_addr)))
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"ipv6.hdr.dst_addr\" field");
+		/* More L3 address validations can be put here. */
+	} else {
+		/*
+		 * Kernel uses the destination L3 address to determine
+		 * the routing path and obtain the L2 destination
+		 * address (heigh or gate), so L3 destination address
+		 * must be specified within the tc rule.
+		 */
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "outer L3 destination address must be "
+				 "specified for VXLAN encapsulation");
+	}
+	if (memcmp(&mask->hdr.src_addr,
+		   &flow_tcf_mask_empty.ipv6.hdr.src_addr,
+		   sizeof(flow_tcf_mask_empty.ipv6.hdr.src_addr))) {
+		if (memcmp(&mask->hdr.src_addr,
+		   &rte_flow_item_ipv6_mask.hdr.src_addr,
+		   sizeof(rte_flow_item_ipv6_mask.hdr.src_addr)))
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"ipv6.hdr.src_addr\" field");
+		/* More L3 address validation can be put here. */
+	} else {
+		/*
+		 * Kernel uses the source L3 address to select the
+		 * interface for egress encapsulated traffic, so
+		 * it must be specified in the tc rule.
+		 */
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "outer L3 source address must be "
+				 "specified for VXLAN encapsulation");
+	}
+	return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch.
+ *
+ * @param[in] item
+ *   Pointer to the itemn structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
+				  struct rte_flow_error *error)
+{
+	const struct rte_flow_item_udp *spec = item->spec;
+	const struct rte_flow_item_udp *mask = item->mask;
+
+	if (!spec)
+		/*
+		 * Specification for UDP ports cannot be empty
+		 * because it is required by tunnel_key parameter.
+		 */
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "NULL UDP port specification "
+				 " for VXLAN encapsulation");
+	if (!mask)
+		mask = &rte_flow_item_udp_mask;
+	if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
+		if (mask->hdr.dst_port != RTE_BE16(0xffff))
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"udp.hdr.dst_port\" field");
+		if (!spec->hdr.dst_port)
+			return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "zero encap remote UDP port");
+	} else {
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "outer UDP remote port must be "
+				 "specified for VXLAN encapsulation");
+	}
+	if (mask->hdr.src_port != RTE_BE16(0x0000)) {
+		if (mask->hdr.src_port != RTE_BE16(0xffff))
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"udp.hdr.src_port\" field");
+		DRV_LOG(WARNING,
+			"outer UDP source port cannot be "
+			"forced for VXLAN encapsulation, "
+			"parameter ignored");
+	}
+	return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch.
+ *
+ * @param[in] item
+ *   Pointer to the itemn structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
+				  struct rte_flow_error *error)
+{
+	const struct rte_flow_item_vxlan *spec = item->spec;
+	const struct rte_flow_item_vxlan *mask = item->mask;
+
+	if (!spec)
+		/* Outer VNI is required by tunnel_key parameter. */
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, item,
+				 "NULL VNI specification "
+				 " for VXLAN encapsulation");
+	if (!mask)
+		mask = &rte_flow_item_vxlan_mask;
+	if (mask->vni[0] != 0 ||
+	    mask->vni[1] != 0 ||
+	    mask->vni[2] != 0) {
+		if (mask->vni[0] != 0xff ||
+		    mask->vni[1] != 0xff ||
+		    mask->vni[2] != 0xff)
+			return rte_flow_error_set(error, ENOTSUP,
+				 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				 "no support for partial mask on"
+				 " \"vxlan.vni\" field");
+		if (spec->vni[0] == 0 &&
+		    spec->vni[1] == 0 &&
+		    spec->vni[2] == 0)
+			return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "VXLAN vni cannot be 0");
+	} else {
+		return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM,
+				 item,
+				 "outer VNI must be specified "
+				 "for VXLAN encapsulation");
+	}
+	return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action item list for E-Switch.
+ *
+ * @param[in] action
+ *   Pointer to the VXLAN_ENCAP action structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
+			      struct rte_flow_error *error)
+{
+	const struct rte_flow_item *items;
+	int ret;
+	uint32_t item_flags = 0;
+
+	assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
+	if (!action->conf)
+		return rte_flow_error_set
+			(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+			 action, "Missing VXLAN tunnel "
+				 "action configuration");
+	items = ((const struct rte_flow_action_vxlan_encap *)
+					action->conf)->definition;
+	if (!items)
+		return rte_flow_error_set
+			(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+			 action, "Missing VXLAN tunnel "
+				 "encapsulation parameters");
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			ret = mlx5_flow_validate_item_eth(items, item_flags,
+							  error);
+			if (ret < 0)
+				return ret;
+			ret = flow_tcf_validate_vxlan_encap_eth(items, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
+			break;
+		break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			ret = flow_tcf_validate_vxlan_encap_ipv4(items, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+							   error);
+			if (ret < 0)
+				return ret;
+			ret = flow_tcf_validate_vxlan_encap_ipv6(items, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			ret = mlx5_flow_validate_item_udp(items, item_flags,
+							   0xFF, error);
+			if (ret < 0)
+				return ret;
+			ret = flow_tcf_validate_vxlan_encap_udp(items, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			ret = mlx5_flow_validate_item_vxlan(items,
+							    item_flags, error);
+			if (ret < 0)
+				return ret;
+			ret = flow_tcf_validate_vxlan_encap_vni(items, error);
+			if (ret < 0)
+				return ret;
+			item_flags |= MLX5_FLOW_LAYER_VXLAN;
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, items,
+					  "VXLAN encap item not supported");
+		}
+	}
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, action,
+					  "no outer L3 layer found"
+					  " for VXLAN encapsulation");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, action,
+					  "no outer L4 layer found"
+					  " for VXLAN encapsulation");
+	if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, action,
+					  "no VXLAN VNI found"
+					  " for VXLAN encapsulation");
+	return 0;
+}
+
+/**
+ * Validate VXLAN_DECAP action outer tunnel items for E-Switch.
+ *
+ * @param[in] item_flags
+ *   Mask of provided outer tunnel parameters
+ * @param[in] ipv4
+ *   Outer IPv4 address item (if any, NULL otherwise).
+ * @param[in] ipv6
+ *   Outer IPv6 address item (if any, NULL otherwise).
+ * @param[in] udp
+ *   Outer UDP layer item (if any, NULL otherwise).
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_decap(uint32_t item_flags,
+			      const struct rte_flow_action *action,
+			      const struct rte_flow_item *ipv4,
+			      const struct rte_flow_item *ipv6,
+			      const struct rte_flow_item *udp,
+			      struct rte_flow_error *error)
+{
+	if (!ipv4 && !ipv6)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, action,
+					  "no outer L3 layer found"
+					  " for VXLAN decapsulation");
+	if (ipv4) {
+		const struct rte_flow_item_ipv4 *spec = ipv4->spec;
+		const struct rte_flow_item_ipv4 *mask = ipv4->mask;
+
+		if (!spec)
+			/*
+			 * Specification for L3 addresses cannot be empty
+			 * because it is required as decap parameter.
+			 */
+			return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
+				 "NULL outer L3 address specification "
+				 " for VXLAN decapsulation");
+		if (!mask)
+			mask = &rte_flow_item_ipv4_mask;
+		if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
+			if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
+				return rte_flow_error_set(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					 "no support for partial mask on"
+					 " \"ipv4.hdr.dst_addr\" field");
+			/* More L3 address validations can be put here. */
+		} else {
+			/*
+			 * Kernel uses the destination L3 address
+			 * to determine the ingress network interface
+			 * for traffic being decapculated.
+			 */
+			return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
+				 "outer L3 destination address must be "
+				 "specified for VXLAN decapsulation");
+		}
+		/* Source L3 address is optional for decap. */
+		if (mask->hdr.src_addr != RTE_BE32(0x00000000))
+			if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
+				return rte_flow_error_set(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					 "no support for partial mask on"
+					 " \"ipv4.hdr.src_addr\" field");
+	} else {
+		const struct rte_flow_item_ipv6 *spec = ipv6->spec;
+		const struct rte_flow_item_ipv6 *mask = ipv6->mask;
+
+		if (!spec)
+			/*
+			 * Specification for L3 addresses cannot be empty
+			 * because it is required as decap parameter.
+			 */
+			return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
+				 "NULL outer L3 address specification "
+				 " for VXLAN decapsulation");
+		if (!mask)
+			mask = &rte_flow_item_ipv6_mask;
+		if (memcmp(&mask->hdr.dst_addr,
+			   &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
+			   sizeof(flow_tcf_mask_empty.ipv6.hdr.dst_addr))) {
+			if (memcmp(&mask->hdr.dst_addr,
+				&rte_flow_item_ipv6_mask.hdr.dst_addr,
+				sizeof(rte_flow_item_ipv6_mask.hdr.dst_addr)))
+				return rte_flow_error_set(error, ENOTSUP,
+				       RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+				       "no support for partial mask on"
+				       " \"ipv6.hdr.dst_addr\" field");
+		/* More L3 address validations can be put here. */
+		} else {
+			/*
+			 * Kernel uses the destination L3 address
+			 * to determine the ingress network interface
+			 * for traffic being decapculated.
+			 */
+			return rte_flow_error_set(error, EINVAL,
+				 RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
+				 "outer L3 destination address must be "
+				 "specified for VXLAN decapsulation");
+		}
+		/* Source L3 address is optional for decap. */
+		if (memcmp(&mask->hdr.src_addr,
+			   &flow_tcf_mask_empty.ipv6.hdr.src_addr,
+			   sizeof(flow_tcf_mask_empty.ipv6.hdr.src_addr))) {
+			if (memcmp(&mask->hdr.src_addr,
+				   &rte_flow_item_ipv6_mask.hdr.src_addr,
+				   sizeof(mask->hdr.src_addr)))
+				return rte_flow_error_set(error, ENOTSUP,
+					RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					"no support for partial mask on"
+					" \"ipv6.hdr.src_addr\" field");
+		}
+	}
+	if (!udp) {
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, action,
+					  "no outer L4 layer found"
+					  " for VXLAN decapsulation");
+	} else {
+		const struct rte_flow_item_udp *spec = udp->spec;
+		const struct rte_flow_item_udp *mask = udp->mask;
+
+		if (!spec)
+			/*
+			 * Specification for UDP ports cannot be empty
+			 * because it is required as decap parameter.
+			 */
+			return rte_flow_error_set(error, EINVAL,
+					 RTE_FLOW_ERROR_TYPE_ITEM, udp,
+					 "NULL UDP port specification "
+					 " for VXLAN decapsulation");
+		if (!mask)
+			mask = &rte_flow_item_udp_mask;
+		if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
+			if (mask->hdr.dst_port != RTE_BE16(0xffff))
+				return rte_flow_error_set(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					 "no support for partial mask on"
+					 " \"udp.hdr.dst_port\" field");
+			if (!spec->hdr.dst_port)
+				return rte_flow_error_set(error, EINVAL,
+					 RTE_FLOW_ERROR_TYPE_ITEM, udp,
+					 "zero decap local UDP port");
+		} else {
+			return rte_flow_error_set(error, EINVAL,
+					 RTE_FLOW_ERROR_TYPE_ITEM, udp,
+					 "outer UDP destination port must be "
+					 "specified for VXLAN decapsulation");
+		}
+		if (mask->hdr.src_port != RTE_BE16(0x0000)) {
+			if (mask->hdr.src_port != RTE_BE16(0xffff))
+				return rte_flow_error_set(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					 "no support for partial mask on"
+					 " \"udp.hdr.src_port\" field");
+			DRV_LOG(WARNING,
+			"outer UDP local port cannot be "
+			"forced for VXLAN encapsulation, "
+			"parameter ignored");
+		}
+	}
+	if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, action,
+					  "no VXLAN VNI found"
+					  " for VXLAN decapsulation");
+	/* VNI is already validated, extra check can be put here. */
+	return 0;
+}
+
+/**
  * Validate flow for E-Switch.
  *
  * @param[in] priv
@@ -974,7 +1588,8 @@  struct pedit_parser {
 		const struct rte_flow_item_ipv6 *ipv6;
 		const struct rte_flow_item_tcp *tcp;
 		const struct rte_flow_item_udp *udp;
-	} spec, mask;
+		const struct rte_flow_item_vxlan *vxlan;
+	 } spec, mask;
 	union {
 		const struct rte_flow_action_port_id *port_id;
 		const struct rte_flow_action_jump *jump;
@@ -983,9 +1598,13 @@  struct pedit_parser {
 			of_set_vlan_vid;
 		const struct rte_flow_action_of_set_vlan_pcp *
 			of_set_vlan_pcp;
+		const struct rte_flow_action_vxlan_encap *vxlan_encap;
 		const struct rte_flow_action_set_ipv4 *set_ipv4;
 		const struct rte_flow_action_set_ipv6 *set_ipv6;
 	} conf;
+	const struct rte_flow_item *ipv4 = NULL; /* storage to check */
+	const struct rte_flow_item *ipv6 = NULL; /* outer tunnel. */
+	const struct rte_flow_item *udp = NULL;  /* parameters. */
 	uint32_t item_flags = 0;
 	uint32_t action_flags = 0;
 	uint8_t next_protocol = -1;
@@ -1114,7 +1733,6 @@  struct pedit_parser {
 							   error);
 			if (ret < 0)
 				return ret;
-			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
 			mask.ipv4 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv4_mask,
 				 &flow_tcf_mask_supported.ipv4,
@@ -1135,13 +1753,22 @@  struct pedit_parser {
 				next_protocol =
 					((const struct rte_flow_item_ipv4 *)
 					 (items->spec))->hdr.next_proto_id;
+			if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
+				/*
+				 * Multiple outer items are not allowed as
+				 * tunnel parameters, will raise an error later.
+				 */
+				ipv4 = NULL;
+			} else {
+				ipv4 = items;
+				item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
 							   error);
 			if (ret < 0)
 				return ret;
-			item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 			mask.ipv6 = flow_tcf_item_mask
 				(items, &rte_flow_item_ipv6_mask,
 				 &flow_tcf_mask_supported.ipv6,
@@ -1162,13 +1789,22 @@  struct pedit_parser {
 				next_protocol =
 					((const struct rte_flow_item_ipv6 *)
 					 (items->spec))->hdr.proto;
+			if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
+				/*
+				 *Multiple outer items are not allowed as
+				 * tunnel parameters
+				 */
+				ipv6 = NULL;
+			} else {
+				ipv6 = items;
+				item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
 			ret = mlx5_flow_validate_item_udp(items, item_flags,
 							  next_protocol, error);
 			if (ret < 0)
 				return ret;
-			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
 			mask.udp = flow_tcf_item_mask
 				(items, &rte_flow_item_udp_mask,
 				 &flow_tcf_mask_supported.udp,
@@ -1177,6 +1813,12 @@  struct pedit_parser {
 				 error);
 			if (!mask.udp)
 				return -rte_errno;
+			if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) {
+				udp = NULL;
+			} else {
+				udp = items;
+				item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
 			ret = mlx5_flow_validate_item_tcp
@@ -1186,7 +1828,6 @@  struct pedit_parser {
 					      error);
 			if (ret < 0)
 				return ret;
-			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
 			mask.tcp = flow_tcf_item_mask
 				(items, &rte_flow_item_tcp_mask,
 				 &flow_tcf_mask_supported.tcp,
@@ -1195,11 +1836,36 @@  struct pedit_parser {
 				 error);
 			if (!mask.tcp)
 				return -rte_errno;
+			item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			ret = mlx5_flow_validate_item_vxlan(items,
+							    item_flags, error);
+			if (ret < 0)
+				return ret;
+			mask.vxlan = flow_tcf_item_mask
+				(items, &rte_flow_item_vxlan_mask,
+				 &flow_tcf_mask_supported.vxlan,
+				 &flow_tcf_mask_empty.vxlan,
+				 sizeof(flow_tcf_mask_supported.vxlan),
+				 error);
+			if (!mask.vxlan)
+				return -rte_errno;
+			if (mask.vxlan->vni[0] != 0xff ||
+			    mask.vxlan->vni[1] != 0xff ||
+			    mask.vxlan->vni[2] != 0xff)
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+					 mask.vxlan,
+					 "no support for partial or "
+					 "empty mask on \"vxlan.vni\" field");
+			item_flags |= MLX5_FLOW_LAYER_VXLAN;
 			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ITEM,
-						  NULL, "item not supported");
+						  items, "item not supported");
 		}
 	}
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
@@ -1271,6 +1937,33 @@  struct pedit_parser {
 					 " set action must follow push action");
 			current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
 			break;
+		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
+					   | MLX5_ACTION_VXLAN_DECAP))
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
+					 "can't have multiple vxlan actions");
+			ret = flow_tcf_validate_vxlan_encap(actions, error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_VXLAN_ENCAP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
+			if (action_flags & (MLX5_ACTION_VXLAN_ENCAP
+					   | MLX5_ACTION_VXLAN_DECAP))
+				return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ACTION, actions,
+					 "can't have multiple vxlan actions");
+			ret = flow_tcf_validate_vxlan_decap(item_flags,
+							    actions,
+							    ipv4, ipv6, udp,
+							    error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_ACTION_VXLAN_DECAP;
+			break;
 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
 			current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
 			break;
@@ -1391,6 +2084,12 @@  struct pedit_parser {
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ACTION, actions,
 					  "no fate action is found");
+	if ((item_flags & MLX5_FLOW_LAYER_VXLAN) &&
+	    !(action_flags & MLX5_ACTION_VXLAN_DECAP))
+		return rte_flow_error_set(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					 "VNI pattern should be followed "
+					 " by VXLAN_DECAP action");
 	return 0;
 }