[1/2] net/mlx5: add VXLAN encap decap to Direct Verbs

Message ID 1540379299-23764-2-git-send-email-dekelp@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series net/mlx5: add L2 encap and decap to Direct Verbs |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Dekel Peled Oct. 24, 2018, 11:08 a.m. UTC
  This patch adds support for VXLAN encap and decap operations, in
Direct Verbs flow.

Signed-off-by: Dekel Peled <dekelp@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.h    |   4 +
 drivers/net/mlx5/mlx5_flow_dv.c | 409 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 408 insertions(+), 5 deletions(-)
  

Comments

Ori Kam Oct. 24, 2018, 12:59 p.m. UTC | #1
> -----Original Message-----
> From: Dekel Peled <dekelp@mellanox.com>
> Sent: Wednesday, October 24, 2018 2:08 PM
> To: Yongseok Koh <yskoh@mellanox.com>; Shahaf Shuler
> <shahafs@mellanox.com>
> Cc: dev@dpdk.org; Ori Kam <orika@mellanox.com>
> Subject: [PATCH 1/2] net/mlx5: add VXLAN encap decap to Direct Verbs
> 
> This patch adds support for VXLAN encap and decap operations, in
> Direct Verbs flow.
> 
> Signed-off-by: Dekel Peled <dekelp@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_flow.h    |   4 +
>  drivers/net/mlx5/mlx5_flow_dv.c | 409
> +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 408 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 38635c9..9c28e50 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -92,6 +92,8 @@
>  #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19)
>  #define MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20)
>  #define MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21)
> +#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22)
> +#define MLX5_FLOW_ACTION_VXLAN_DECAP (1u << 23)
> 
>  #define MLX5_FLOW_FATE_ACTIONS \
>  	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE |
> MLX5_FLOW_ACTION_RSS)
> @@ -181,6 +183,8 @@ struct mlx5_flow_dv {
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
>  	struct mlx5dv_flow_action_attr
> actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
>  	/**< Action list. */
> +	struct ibv_flow_action *encap_verb; /**< Verbs object of encap. */
> +	struct ibv_flow_action *decap_verb; /**< Verbs object of decap. */

Why do we need encap and decap?

>  #endif
>  	int actions_n; /**< number of actions. */
>  };
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c
> index e8f409f..06ecabf 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -35,6 +35,16 @@
> 
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
> 
> +#define MLX5_UDP	17
> +#define MLX5_TCP	6
> +#define MLX5_GRE	47

Please use already created defines. (IPPROTO_TCP)

> +
> +/*
> + * Encap buf length, max:
> + *   Eth:14/VLAN:8/IPv6:40/TCP:36/TUNNEL:20/Eth:14
> + */
> +#define MLX5_ENCAP_LEN 132
> +
>  /**
>   * Validate META item.
>   *
> @@ -97,6 +107,331 @@
>  }
> 
>  /**
> + * Validate the vxlan encap action.
> + *
> + * @param[in] action_flags
> + *   Holds the actions detected until now.
> + * @param[in] action
> + *   Pointer to the encap action.
> + * @param[in] attr
> + *   Pointer to flow attributes
> + * @param[out] error
> + *   Pointer to error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_validate_action_vxlan_encap(uint64_t action_flags,
> +				    const struct rte_flow_action *action,
> +				    const struct rte_flow_attr *attr,
> +				    struct rte_flow_error *error)
> +{
> +	const struct rte_flow_action_vxlan_encap *vxlan_encap = action->conf;
> +
> +	if (!vxlan_encap)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> action,
> +					  "configuration cannot be null");
> +	if (action_flags & MLX5_FLOW_ACTION_DROP)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +					  "can't drop and encap in same flow");
> +	if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +					  "can't have 2 encap actions in same"
> +					  " flow");
> +	if (attr->ingress)
> +		return rte_flow_error_set(error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> +					  NULL,
> +					  "encap action not supported for "
> +					  "ingress");
> +	return 0;
> +}
> +
> +/**
> + * Validate the vxlan decap action.
> + *
> + * @param[in] action_flags
> + *   Holds the actions detected until now.
> + * @param[in] action
> + *   Pointer to the decap action.
> + * @param[in] attr
> + *   Pointer to flow attributes
> + * @param[out] error
> + *   Pointer to error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_validate_action_vxlan_decap(uint64_t action_flags,
> +			const struct rte_flow_action *action __rte_unused,
> +			const struct rte_flow_attr *attr,
> +			struct rte_flow_error *error)
> +{
> +	if (action_flags & MLX5_FLOW_ACTION_DROP)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +					  "can't drop and decap in same flow");
> +	if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +					  "can't encap and decap in same
> flow");
> +	if (attr->egress)
> +		return rte_flow_error_set(error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> +					  NULL,
> +					  "decap action not supported for "
> +					  "egress");

Why not EINVAL?

> +	return 0;
> +}
> +
> +static size_t item_len[] = {
> +	[RTE_FLOW_ITEM_TYPE_VOID] = 0,
> +	[RTE_FLOW_ITEM_TYPE_ETH] = sizeof(struct rte_flow_item_eth),
> +	[RTE_FLOW_ITEM_TYPE_VLAN] = sizeof(struct rte_flow_item_vlan),
> +	[RTE_FLOW_ITEM_TYPE_IPV4] = sizeof(struct rte_flow_item_ipv4),
> +	[RTE_FLOW_ITEM_TYPE_IPV6] = sizeof(struct rte_flow_item_ipv6),
> +	[RTE_FLOW_ITEM_TYPE_UDP] = sizeof(struct rte_flow_item_udp),
> +	[RTE_FLOW_ITEM_TYPE_TCP] = sizeof(struct rte_flow_item_tcp),
> +	[RTE_FLOW_ITEM_TYPE_VXLAN] = sizeof(struct rte_flow_item_vxlan),
> +	[RTE_FLOW_ITEM_TYPE_GRE] = sizeof(struct rte_flow_item_gre),
> +	[RTE_FLOW_ITEM_TYPE_NVGRE] = sizeof(struct rte_flow_item_gre),
> +	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = sizeof(struct
> rte_flow_item_vxlan_gpe),
> +	[RTE_FLOW_ITEM_TYPE_MPLS] = sizeof(struct rte_flow_item_mpls),
> +};
> +
> +/**
> + * Convert the encap action data from rte_flow_item to raw buffer
> + *
> + * @param[in] item
> + *   Pointer to rte_flow_item object.
> + * @param[out] buf
> + *   Pointer to the output buffer.
> + * @param[out] size
> + *   Pointer to the output buffer size.
> + * @param[out] error
> + *   Pointer to the error structure.
> + * @param[in] l3_type
> + *   ???.

What is ???

> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_convert_encap(struct rte_flow_item *item, uint8_t *buf, size_t *size,
> +		      struct rte_flow_error *error, uint16_t l3_type)

What is the l3_type used for?

> +{
> +	struct ether_hdr *eth = NULL;
> +	struct vlan_hdr *vlan = NULL;
> +	struct ipv4_hdr *ipv4 = NULL;
> +	struct ipv6_hdr *ipv6 = NULL;
> +	struct udp_hdr *udp = NULL;
> +	struct vxlan_hdr *vxlan = NULL;
> +	const struct rte_flow_item_vlan *vlan_spec;

Why vlan has dedicated variable?

> +	size_t len;
> +
> +	assert(item);

Why assert on the item? It should be valid and if not return error.

> +	*size = 0;

Why not use temp_size and avoid memory access?

> +	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
> +		/* TODO: variable length handling: raw, vxlan and nvgre. */

Why TODO?

> +		len = item_len[item->type];

This can result in crash if type is not the one you expected.

> +		if (len + *size > MLX5_ENCAP_LEN)
> +			return rte_flow_error_set(error, EINVAL,
> +
> RTE_FLOW_ERROR_TYPE_ACTION,
> +						  (void *)item->type,
> +						  "invalid item length");
> +		rte_memcpy((void *)&buf[*size], item->spec, len);
> +		switch (item->type) {
> +		case RTE_FLOW_ITEM_TYPE_ETH:
> +			eth = (void *)&buf[*size];

Why cast to void? Same for all others.

> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VLAN:
> +			vlan_spec = item->spec;
> +			vlan = (void *)&buf[*size];
> +			if (!eth)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"eth header not found");
> +			vlan->vlan_tci = vlan_spec->tci;
> +			vlan->eth_proto = vlan_spec->inner_type;
> +			if (!eth->ether_type)
> +				eth->ether_type = htons(ETHER_TYPE_VLAN);

Why not use rte_cpu_to_be? Same for all.

> +			break;
> +		case RTE_FLOW_ITEM_TYPE_IPV4:
> +			ipv4 = (void *)&buf[*size];
> +			if (!vlan && !eth)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"neither eth nor vlan header
> found");
> +			if (vlan && !vlan->eth_proto)
> +				vlan->eth_proto = htons(ETHER_TYPE_IPv4);
> +			else if (eth && !eth->ether_type)
> +				eth->ether_type = htons(ETHER_TYPE_IPv4);
> +			if (!ipv4->version_ihl)
> +				ipv4->version_ihl = 0x45;
> +			if (!ipv4->time_to_live)
> +				ipv4->time_to_live = 0x40;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_IPV6:
> +			ipv6 = (void *)&buf[*size];
> +			if (!vlan && !eth)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"neither eth nor vlan header
> found");
> +			if (vlan && !vlan->eth_proto)
> +				vlan->eth_proto = htons(ETHER_TYPE_IPv6);
> +			else if (eth && !eth->ether_type)
> +				eth->ether_type = htons(ETHER_TYPE_IPv6);
> +			if (!ipv6->vtc_flow)
> +				ipv6->vtc_flow = htonl(0x60000000);
> +			if (!ipv6->hop_limits)
> +				ipv6->hop_limits = 0xff;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_UDP:
> +			udp = (void *)&buf[*size];
> +			if (!ipv4 && !ipv6)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"ip header not found");
> +			if (ipv4 && !ipv4->next_proto_id)
> +				ipv4->next_proto_id = MLX5_UDP;
> +			else if (ipv6 && !ipv6->proto)
> +				ipv6->proto = MLX5_UDP;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VXLAN:
> +			vxlan = (void *)&buf[*size];
> +			if (!udp)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"udp header not found");
> +			if (!udp->dst_port)
> +				udp->dst_port =
> htons(MLX5_UDP_PORT_VXLAN);
> +			if (!vxlan->vx_flags)
> +				vxlan->vx_flags = htonl(0x08000000);
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
> +			vxlan = (void *)&buf[*size];
> +			if (!udp)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"udp header not found");
> +			if (!udp->dst_port)
> +				udp->dst_port =
> htons(MLX5_UDP_PORT_VXLAN_GPE);
> +			if (!vxlan->vx_flags)
> +				vxlan->vx_flags = htonl(0x0c000003);
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_GRE:
> +		case RTE_FLOW_ITEM_TYPE_NVGRE:
> +			if (!ipv4 && !ipv6)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"ip header not found");
> +			if (ipv4 && !ipv4->next_proto_id)
> +				ipv4->next_proto_id = htons(MLX5_GRE);
> +			else if (ipv6 && !ipv6->proto)
> +				ipv6->proto = htons(MLX5_GRE);
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VOID:
> +			break;
> +		default:
> +			return rte_flow_error_set(error, EINVAL,
> +					RTE_FLOW_ERROR_TYPE_ACTION,
> +					(void *)item->type,
> +					"unsupported item type");
> +			break;
> +		}
> +		*size += len;
> +	}
> +	if (l3_type && vlan)
> +		vlan->eth_proto = htons(l3_type);
> +	else if (l3_type && eth)
> +		eth->ether_type = htons(l3_type);
> +	return 0;
> +}
> +
> +/**
> + * Convert VXLAN encap action to DV specification.
> + *
> + * @param[in] dev
> + *   Pointer to rte_eth_dev structure.
> + * @param[in] action
> + *   Pointer to action structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   Pointer to action on success, NULL otherwise and rte_errno is set.
> + */
> +static struct ibv_flow_action *
> +flow_dv_create_vxlan_encap(struct rte_eth_dev *dev,
> +			   const struct rte_flow_action *action,
> +			   struct rte_flow_error *error)
> +{
> +	struct ibv_flow_action *encap_verb = NULL;
> +	const struct rte_flow_action_vxlan_encap *encap_data;
> +	struct priv *priv = dev->data->dev_private;
> +	uint8_t buf[MLX5_ENCAP_LEN];
> +	size_t size = 0;
> +	int convert_result;
> +
> +	encap_data = (const struct rte_flow_action_vxlan_encap *)action-
> >conf;
> +	convert_result = flow_dv_convert_encap(encap_data->definition,
> +					       buf, &size, error, 0);
> +	if (convert_result)
> +		return NULL;
> +	encap_verb = mlx5_glue->dv_create_flow_action_packet_reformat
> +		(priv->ctx, size, (size ? buf : NULL),
> +
> MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL,
> +		 MLX5DV_FLOW_TABLE_TYPE_NIC_TX);
> +	if (!encap_verb)
> +		rte_flow_error_set(error, EINVAL,
> RTE_FLOW_ERROR_TYPE_ACTION,
> +				   NULL, "cannot create vxlan encap action");
> +	return encap_verb;
> +}
> +
> +/**
> + * Convert VXLAN decap action to DV specification.
> + *
> + * @param[in] dev
> + *   Pointer to rte_eth_dev structure.
> + * @param[in] action
> + *   Pointer to action structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   Pointer to action on success, NULL otherwise and rte_errno is set.
> + */
> +static struct ibv_flow_action *
> +flow_dv_create_vxlan_decap(struct rte_eth_dev *dev,
> +			   const struct rte_flow_action *action __rte_unused,
> +			   struct rte_flow_error *error)
> +{
> +	struct ibv_flow_action *decap_verb = NULL;
> +	struct priv *priv = dev->data->dev_private;
> +
> +	decap_verb = mlx5_glue->dv_create_flow_action_packet_reformat
> +		(priv->ctx, 0, NULL,
> +
> MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2,
> +		 MLX5DV_FLOW_TABLE_TYPE_NIC_RX);
> +	if (!decap_verb)
> +		rte_flow_error_set(error, EINVAL,
> RTE_FLOW_ERROR_TYPE_ACTION,
> +				   NULL, "cannot create decap action");
> +	return decap_verb;
> +}
> +
> +/**
>   * Verify the @p attributes will be correctly understood by the NIC and store
>   * them in the @p flow if everything is correct.
>   *
> @@ -347,6 +682,24 @@
>  			action_flags |= MLX5_FLOW_ACTION_COUNT;
>  			++actions_n;
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> +			ret =
> flow_dv_validate_action_vxlan_encap(action_flags,
> +								  actions, attr,
> +								  error);
> +			if (ret < 0)
> +				return ret;
> +			action_flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
> +			++actions_n;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> +			ret =
> flow_dv_validate_action_vxlan_decap(action_flags,
> +								  actions, attr,
> +								  error);
> +			if (ret < 0)
> +				return ret;
> +			action_flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
> +			++actions_n;
> +			break;
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
> 
> RTE_FLOW_ERROR_TYPE_ACTION,
> @@ -1056,14 +1409,23 @@
>  /**
>   * Store the requested actions in an array.
>   *
> + * @param[in] dev
> + *   Pointer to rte_eth_dev structure.
>   * @param[in] action
>   *   Flow action to translate.
>   * @param[in, out] dev_flow
>   *   Pointer to the mlx5_flow.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
> -static void
> -flow_dv_create_action(const struct rte_flow_action *action,
> -		      struct mlx5_flow *dev_flow)
> +static int
> +flow_dv_create_action(struct rte_eth_dev *dev,
> +		      const struct rte_flow_action *action,
> +		      struct mlx5_flow *dev_flow,
> +		      struct rte_flow_error *error)
>  {
>  	const struct rte_flow_action_queue *queue;
>  	const struct rte_flow_action_rss *rss;
> @@ -1110,10 +1472,35 @@
>  		/* Added to array only in apply since we need the QP */
>  		flow->actions |= MLX5_FLOW_ACTION_RSS;
>  		break;
> +	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> +		dev_flow->dv.actions[actions_n].type =
> +			MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
> +		dev_flow->dv.actions[actions_n].action =
> +				flow_dv_create_vxlan_encap(dev, action,
> error);
> +		if (!(dev_flow->dv.actions[actions_n].action))
> +			return -rte_errno;
> +		dev_flow->dv.encap_verb =
> +			dev_flow->dv.actions[actions_n].action;
> +		flow->actions |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
> +		actions_n++;
> +		break;
> +	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> +		dev_flow->dv.actions[actions_n].type =
> +			MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
> +		dev_flow->dv.actions[actions_n].action =
> +				flow_dv_create_vxlan_decap(dev, action,
> error);
> +		if (!(dev_flow->dv.actions[actions_n].action))
> +			return -rte_errno;
> +		dev_flow->dv.decap_verb =
> +			dev_flow->dv.actions[actions_n].action;
> +		flow->actions |= MLX5_FLOW_ACTION_VXLAN_DECAP;
> +		actions_n++;
> +		break;
>  	default:
>  		break;
>  	}
>  	dev_flow->dv.actions_n = actions_n;
> +	return 0;
>  }
> 
>  static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
> @@ -1279,8 +1666,10 @@
>  	matcher.egress = attr->egress;
>  	if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
>  		return -rte_errno;
> -	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
> -		flow_dv_create_action(actions, dev_flow);
> +	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
> +		if (flow_dv_create_action(dev, actions, dev_flow, error))
> +			return -rte_errno;
> +	}
>  	return 0;
>  }
> 
> @@ -1465,6 +1854,16 @@
>  		LIST_REMOVE(dev_flow, next);
>  		if (dev_flow->dv.matcher)
>  			flow_dv_matcher_release(dev, dev_flow);
> +		if (dev_flow->dv.encap_verb) {
> +			claim_zero(mlx5_glue->destroy_flow_action
> +						(dev_flow->dv.encap_verb));
> +			dev_flow->dv.encap_verb = NULL;
> +		}
> +		if (dev_flow->dv.decap_verb) {
> +			claim_zero(mlx5_glue->destroy_flow_action
> +						(dev_flow->dv.decap_verb));
> +			dev_flow->dv.decap_verb = NULL;
> +		}
>  		rte_free(dev_flow);
>  	}
>  }
> --
> 1.8.3.1


Thanks,
Ori
  
Dekel Peled Oct. 25, 2018, 5:53 a.m. UTC | #2
Thanks.
I deferred the series, will send revised series later.

> -----Original Message-----
> From: Ori Kam
> Sent: Wednesday, October 24, 2018 3:59 PM
> To: Dekel Peled <dekelp@mellanox.com>; Yongseok Koh
> <yskoh@mellanox.com>; Shahaf Shuler <shahafs@mellanox.com>
> Cc: dev@dpdk.org
> Subject: RE: [PATCH 1/2] net/mlx5: add VXLAN encap decap to Direct Verbs
> 
> 
> 
> > -----Original Message-----
> > From: Dekel Peled <dekelp@mellanox.com>
> > Sent: Wednesday, October 24, 2018 2:08 PM
> > To: Yongseok Koh <yskoh@mellanox.com>; Shahaf Shuler
> > <shahafs@mellanox.com>
> > Cc: dev@dpdk.org; Ori Kam <orika@mellanox.com>
> > Subject: [PATCH 1/2] net/mlx5: add VXLAN encap decap to Direct Verbs
> >
> > This patch adds support for VXLAN encap and decap operations, in
> > Direct Verbs flow.
> >
> > Signed-off-by: Dekel Peled <dekelp@mellanox.com>
> > ---
> >  drivers/net/mlx5/mlx5_flow.h    |   4 +
> >  drivers/net/mlx5/mlx5_flow_dv.c | 409
> > +++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 408 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5_flow.h
> > b/drivers/net/mlx5/mlx5_flow.h index 38635c9..9c28e50 100644
> > --- a/drivers/net/mlx5/mlx5_flow.h
> > +++ b/drivers/net/mlx5/mlx5_flow.h
> > @@ -92,6 +92,8 @@
> >  #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19)  #define
> > MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20)  #define
> > MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21)
> > +#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22) #define
> > +MLX5_FLOW_ACTION_VXLAN_DECAP (1u << 23)
> >
> >  #define MLX5_FLOW_FATE_ACTIONS \
> >  	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE |
> > MLX5_FLOW_ACTION_RSS)
> > @@ -181,6 +183,8 @@ struct mlx5_flow_dv {  #ifdef
> > HAVE_IBV_FLOW_DV_SUPPORT
> >  	struct mlx5dv_flow_action_attr
> > actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
> >  	/**< Action list. */
> > +	struct ibv_flow_action *encap_verb; /**< Verbs object of encap. */
> > +	struct ibv_flow_action *decap_verb; /**< Verbs object of decap. */
> 
> Why do we need encap and decap?
> 
> >  #endif
> >  	int actions_n; /**< number of actions. */  }; diff --git
> > a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
> > index e8f409f..06ecabf 100644
> > --- a/drivers/net/mlx5/mlx5_flow_dv.c
> > +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> > @@ -35,6 +35,16 @@
> >
> >  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
> >
> > +#define MLX5_UDP	17
> > +#define MLX5_TCP	6
> > +#define MLX5_GRE	47
> 
> Please use already created defines. (IPPROTO_TCP)
> 
> > +
> > +/*
> > + * Encap buf length, max:
> > + *   Eth:14/VLAN:8/IPv6:40/TCP:36/TUNNEL:20/Eth:14
> > + */
> > +#define MLX5_ENCAP_LEN 132
> > +
> >  /**
> >   * Validate META item.
> >   *
> > @@ -97,6 +107,331 @@
> >  }
> >
> >  /**
> > + * Validate the vxlan encap action.
> > + *
> > + * @param[in] action_flags
> > + *   Holds the actions detected until now.
> > + * @param[in] action
> > + *   Pointer to the encap action.
> > + * @param[in] attr
> > + *   Pointer to flow attributes
> > + * @param[out] error
> > + *   Pointer to error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +static int
> > +flow_dv_validate_action_vxlan_encap(uint64_t action_flags,
> > +				    const struct rte_flow_action *action,
> > +				    const struct rte_flow_attr *attr,
> > +				    struct rte_flow_error *error) {
> > +	const struct rte_flow_action_vxlan_encap *vxlan_encap =
> > +action->conf;
> > +
> > +	if (!vxlan_encap)
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> > action,
> > +					  "configuration cannot be null");
> > +	if (action_flags & MLX5_FLOW_ACTION_DROP)
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> > NULL,
> > +					  "can't drop and encap in same
> flow");
> > +	if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> > NULL,
> > +					  "can't have 2 encap actions in same"
> > +					  " flow");
> > +	if (attr->ingress)
> > +		return rte_flow_error_set(error, ENOTSUP,
> > +
> > RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> > +					  NULL,
> > +					  "encap action not supported for "
> > +					  "ingress");
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Validate the vxlan decap action.
> > + *
> > + * @param[in] action_flags
> > + *   Holds the actions detected until now.
> > + * @param[in] action
> > + *   Pointer to the decap action.
> > + * @param[in] attr
> > + *   Pointer to flow attributes
> > + * @param[out] error
> > + *   Pointer to error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +static int
> > +flow_dv_validate_action_vxlan_decap(uint64_t action_flags,
> > +			const struct rte_flow_action *action __rte_unused,
> > +			const struct rte_flow_attr *attr,
> > +			struct rte_flow_error *error)
> > +{
> > +	if (action_flags & MLX5_FLOW_ACTION_DROP)
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> > NULL,
> > +					  "can't drop and decap in same
> flow");
> > +	if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
> > +		return rte_flow_error_set(error, EINVAL,
> > +					  RTE_FLOW_ERROR_TYPE_ACTION,
> > NULL,
> > +					  "can't encap and decap in same
> > flow");
> > +	if (attr->egress)
> > +		return rte_flow_error_set(error, ENOTSUP,
> > +
> > RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> > +					  NULL,
> > +					  "decap action not supported for "
> > +					  "egress");
> 
> Why not EINVAL?
> 
> > +	return 0;
> > +}
> > +
> > +static size_t item_len[] = {
> > +	[RTE_FLOW_ITEM_TYPE_VOID] = 0,
> > +	[RTE_FLOW_ITEM_TYPE_ETH] = sizeof(struct rte_flow_item_eth),
> > +	[RTE_FLOW_ITEM_TYPE_VLAN] = sizeof(struct rte_flow_item_vlan),
> > +	[RTE_FLOW_ITEM_TYPE_IPV4] = sizeof(struct rte_flow_item_ipv4),
> > +	[RTE_FLOW_ITEM_TYPE_IPV6] = sizeof(struct rte_flow_item_ipv6),
> > +	[RTE_FLOW_ITEM_TYPE_UDP] = sizeof(struct rte_flow_item_udp),
> > +	[RTE_FLOW_ITEM_TYPE_TCP] = sizeof(struct rte_flow_item_tcp),
> > +	[RTE_FLOW_ITEM_TYPE_VXLAN] = sizeof(struct
> rte_flow_item_vxlan),
> > +	[RTE_FLOW_ITEM_TYPE_GRE] = sizeof(struct rte_flow_item_gre),
> > +	[RTE_FLOW_ITEM_TYPE_NVGRE] = sizeof(struct
> rte_flow_item_gre),
> > +	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = sizeof(struct
> > rte_flow_item_vxlan_gpe),
> > +	[RTE_FLOW_ITEM_TYPE_MPLS] = sizeof(struct rte_flow_item_mpls),
> };
> > +
> > +/**
> > + * Convert the encap action data from rte_flow_item to raw buffer
> > + *
> > + * @param[in] item
> > + *   Pointer to rte_flow_item object.
> > + * @param[out] buf
> > + *   Pointer to the output buffer.
> > + * @param[out] size
> > + *   Pointer to the output buffer size.
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + * @param[in] l3_type
> > + *   ???.
> 
> What is ???
> 
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +static int
> > +flow_dv_convert_encap(struct rte_flow_item *item, uint8_t *buf, size_t
> *size,
> > +		      struct rte_flow_error *error, uint16_t l3_type)
> 
> What is the l3_type used for?
> 
> > +{
> > +	struct ether_hdr *eth = NULL;
> > +	struct vlan_hdr *vlan = NULL;
> > +	struct ipv4_hdr *ipv4 = NULL;
> > +	struct ipv6_hdr *ipv6 = NULL;
> > +	struct udp_hdr *udp = NULL;
> > +	struct vxlan_hdr *vxlan = NULL;
> > +	const struct rte_flow_item_vlan *vlan_spec;
> 
> Why vlan has dedicated variable?
> 
> > +	size_t len;
> > +
> > +	assert(item);
> 
> Why assert on the item? It should be valid and if not return error.
> 
> > +	*size = 0;
> 
> Why not use temp_size and avoid memory access?
> 
> > +	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
> > +		/* TODO: variable length handling: raw, vxlan and nvgre. */
> 
> Why TODO?
> 
> > +		len = item_len[item->type];
> 
> This can result in crash if type is not the one you expected.
> 
> > +		if (len + *size > MLX5_ENCAP_LEN)
> > +			return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > +						  (void *)item->type,
> > +						  "invalid item length");
> > +		rte_memcpy((void *)&buf[*size], item->spec, len);
> > +		switch (item->type) {
> > +		case RTE_FLOW_ITEM_TYPE_ETH:
> > +			eth = (void *)&buf[*size];
> 
> Why cast to void? Same for all others.
> 
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_VLAN:
> > +			vlan_spec = item->spec;
> > +			vlan = (void *)&buf[*size];
> > +			if (!eth)
> > +				return rte_flow_error_set(error, EINVAL,
> > +
> > 	RTE_FLOW_ERROR_TYPE_ACTION,
> > +						(void *)item->type,
> > +						"eth header not found");
> > +			vlan->vlan_tci = vlan_spec->tci;
> > +			vlan->eth_proto = vlan_spec->inner_type;
> > +			if (!eth->ether_type)
> > +				eth->ether_type =
> htons(ETHER_TYPE_VLAN);
> 
> Why not use rte_cpu_to_be? Same for all.
> 
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_IPV4:
> > +			ipv4 = (void *)&buf[*size];
> > +			if (!vlan && !eth)
> > +				return rte_flow_error_set(error, EINVAL,
> > +
> > 	RTE_FLOW_ERROR_TYPE_ACTION,
> > +						(void *)item->type,
> > +						"neither eth nor vlan header
> > found");
> > +			if (vlan && !vlan->eth_proto)
> > +				vlan->eth_proto = htons(ETHER_TYPE_IPv4);
> > +			else if (eth && !eth->ether_type)
> > +				eth->ether_type = htons(ETHER_TYPE_IPv4);
> > +			if (!ipv4->version_ihl)
> > +				ipv4->version_ihl = 0x45;
> > +			if (!ipv4->time_to_live)
> > +				ipv4->time_to_live = 0x40;
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_IPV6:
> > +			ipv6 = (void *)&buf[*size];
> > +			if (!vlan && !eth)
> > +				return rte_flow_error_set(error, EINVAL,
> > +
> > 	RTE_FLOW_ERROR_TYPE_ACTION,
> > +						(void *)item->type,
> > +						"neither eth nor vlan header
> > found");
> > +			if (vlan && !vlan->eth_proto)
> > +				vlan->eth_proto = htons(ETHER_TYPE_IPv6);
> > +			else if (eth && !eth->ether_type)
> > +				eth->ether_type = htons(ETHER_TYPE_IPv6);
> > +			if (!ipv6->vtc_flow)
> > +				ipv6->vtc_flow = htonl(0x60000000);
> > +			if (!ipv6->hop_limits)
> > +				ipv6->hop_limits = 0xff;
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_UDP:
> > +			udp = (void *)&buf[*size];
> > +			if (!ipv4 && !ipv6)
> > +				return rte_flow_error_set(error, EINVAL,
> > +
> > 	RTE_FLOW_ERROR_TYPE_ACTION,
> > +						(void *)item->type,
> > +						"ip header not found");
> > +			if (ipv4 && !ipv4->next_proto_id)
> > +				ipv4->next_proto_id = MLX5_UDP;
> > +			else if (ipv6 && !ipv6->proto)
> > +				ipv6->proto = MLX5_UDP;
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_VXLAN:
> > +			vxlan = (void *)&buf[*size];
> > +			if (!udp)
> > +				return rte_flow_error_set(error, EINVAL,
> > +
> > 	RTE_FLOW_ERROR_TYPE_ACTION,
> > +						(void *)item->type,
> > +						"udp header not found");
> > +			if (!udp->dst_port)
> > +				udp->dst_port =
> > htons(MLX5_UDP_PORT_VXLAN);
> > +			if (!vxlan->vx_flags)
> > +				vxlan->vx_flags = htonl(0x08000000);
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
> > +			vxlan = (void *)&buf[*size];
> > +			if (!udp)
> > +				return rte_flow_error_set(error, EINVAL,
> > +
> > 	RTE_FLOW_ERROR_TYPE_ACTION,
> > +						(void *)item->type,
> > +						"udp header not found");
> > +			if (!udp->dst_port)
> > +				udp->dst_port =
> > htons(MLX5_UDP_PORT_VXLAN_GPE);
> > +			if (!vxlan->vx_flags)
> > +				vxlan->vx_flags = htonl(0x0c000003);
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_GRE:
> > +		case RTE_FLOW_ITEM_TYPE_NVGRE:
> > +			if (!ipv4 && !ipv6)
> > +				return rte_flow_error_set(error, EINVAL,
> > +
> > 	RTE_FLOW_ERROR_TYPE_ACTION,
> > +						(void *)item->type,
> > +						"ip header not found");
> > +			if (ipv4 && !ipv4->next_proto_id)
> > +				ipv4->next_proto_id = htons(MLX5_GRE);
> > +			else if (ipv6 && !ipv6->proto)
> > +				ipv6->proto = htons(MLX5_GRE);
> > +			break;
> > +		case RTE_FLOW_ITEM_TYPE_VOID:
> > +			break;
> > +		default:
> > +			return rte_flow_error_set(error, EINVAL,
> > +					RTE_FLOW_ERROR_TYPE_ACTION,
> > +					(void *)item->type,
> > +					"unsupported item type");
> > +			break;
> > +		}
> > +		*size += len;
> > +	}
> > +	if (l3_type && vlan)
> > +		vlan->eth_proto = htons(l3_type);
> > +	else if (l3_type && eth)
> > +		eth->ether_type = htons(l3_type);
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Convert VXLAN encap action to DV specification.
> > + *
> > + * @param[in] dev
> > + *   Pointer to rte_eth_dev structure.
> > + * @param[in] action
> > + *   Pointer to action structure.
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   Pointer to action on success, NULL otherwise and rte_errno is set.
> > + */
> > +static struct ibv_flow_action *
> > +flow_dv_create_vxlan_encap(struct rte_eth_dev *dev,
> > +			   const struct rte_flow_action *action,
> > +			   struct rte_flow_error *error)
> > +{
> > +	struct ibv_flow_action *encap_verb = NULL;
> > +	const struct rte_flow_action_vxlan_encap *encap_data;
> > +	struct priv *priv = dev->data->dev_private;
> > +	uint8_t buf[MLX5_ENCAP_LEN];
> > +	size_t size = 0;
> > +	int convert_result;
> > +
> > +	encap_data = (const struct rte_flow_action_vxlan_encap *)action-
> > >conf;
> > +	convert_result = flow_dv_convert_encap(encap_data->definition,
> > +					       buf, &size, error, 0);
> > +	if (convert_result)
> > +		return NULL;
> > +	encap_verb = mlx5_glue->dv_create_flow_action_packet_reformat
> > +		(priv->ctx, size, (size ? buf : NULL),
> > +
> > MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL,
> > +		 MLX5DV_FLOW_TABLE_TYPE_NIC_TX);
> > +	if (!encap_verb)
> > +		rte_flow_error_set(error, EINVAL,
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > +				   NULL, "cannot create vxlan encap action");
> > +	return encap_verb;
> > +}
> > +
> > +/**
> > + * Convert VXLAN decap action to DV specification.
> > + *
> > + * @param[in] dev
> > + *   Pointer to rte_eth_dev structure.
> > + * @param[in] action
> > + *   Pointer to action structure.
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   Pointer to action on success, NULL otherwise and rte_errno is set.
> > + */
> > +static struct ibv_flow_action *
> > +flow_dv_create_vxlan_decap(struct rte_eth_dev *dev,
> > +			   const struct rte_flow_action *action __rte_unused,
> > +			   struct rte_flow_error *error)
> > +{
> > +	struct ibv_flow_action *decap_verb = NULL;
> > +	struct priv *priv = dev->data->dev_private;
> > +
> > +	decap_verb = mlx5_glue->dv_create_flow_action_packet_reformat
> > +		(priv->ctx, 0, NULL,
> > +
> > MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2,
> > +		 MLX5DV_FLOW_TABLE_TYPE_NIC_RX);
> > +	if (!decap_verb)
> > +		rte_flow_error_set(error, EINVAL,
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > +				   NULL, "cannot create decap action");
> > +	return decap_verb;
> > +}
> > +
> > +/**
> >   * Verify the @p attributes will be correctly understood by the NIC and
> store
> >   * them in the @p flow if everything is correct.
> >   *
> > @@ -347,6 +682,24 @@
> >  			action_flags |= MLX5_FLOW_ACTION_COUNT;
> >  			++actions_n;
> >  			break;
> > +		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> > +			ret =
> > flow_dv_validate_action_vxlan_encap(action_flags,
> > +								  actions, attr,
> > +								  error);
> > +			if (ret < 0)
> > +				return ret;
> > +			action_flags |=
> MLX5_FLOW_ACTION_VXLAN_ENCAP;
> > +			++actions_n;
> > +			break;
> > +		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> > +			ret =
> > flow_dv_validate_action_vxlan_decap(action_flags,
> > +								  actions, attr,
> > +								  error);
> > +			if (ret < 0)
> > +				return ret;
> > +			action_flags |=
> MLX5_FLOW_ACTION_VXLAN_DECAP;
> > +			++actions_n;
> > +			break;
> >  		default:
> >  			return rte_flow_error_set(error, ENOTSUP,
> >
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > @@ -1056,14 +1409,23 @@
> >  /**
> >   * Store the requested actions in an array.
> >   *
> > + * @param[in] dev
> > + *   Pointer to rte_eth_dev structure.
> >   * @param[in] action
> >   *   Flow action to translate.
> >   * @param[in, out] dev_flow
> >   *   Pointer to the mlx5_flow.
> > + * @param[out] error
> > + *   Pointer to the error structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_errno is set.
> >   */
> > -static void
> > -flow_dv_create_action(const struct rte_flow_action *action,
> > -		      struct mlx5_flow *dev_flow)
> > +static int
> > +flow_dv_create_action(struct rte_eth_dev *dev,
> > +		      const struct rte_flow_action *action,
> > +		      struct mlx5_flow *dev_flow,
> > +		      struct rte_flow_error *error)
> >  {
> >  	const struct rte_flow_action_queue *queue;
> >  	const struct rte_flow_action_rss *rss; @@ -1110,10 +1472,35 @@
> >  		/* Added to array only in apply since we need the QP */
> >  		flow->actions |= MLX5_FLOW_ACTION_RSS;
> >  		break;
> > +	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> > +		dev_flow->dv.actions[actions_n].type =
> > +			MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
> > +		dev_flow->dv.actions[actions_n].action =
> > +				flow_dv_create_vxlan_encap(dev, action,
> > error);
> > +		if (!(dev_flow->dv.actions[actions_n].action))
> > +			return -rte_errno;
> > +		dev_flow->dv.encap_verb =
> > +			dev_flow->dv.actions[actions_n].action;
> > +		flow->actions |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
> > +		actions_n++;
> > +		break;
> > +	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> > +		dev_flow->dv.actions[actions_n].type =
> > +			MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
> > +		dev_flow->dv.actions[actions_n].action =
> > +				flow_dv_create_vxlan_decap(dev, action,
> > error);
> > +		if (!(dev_flow->dv.actions[actions_n].action))
> > +			return -rte_errno;
> > +		dev_flow->dv.decap_verb =
> > +			dev_flow->dv.actions[actions_n].action;
> > +		flow->actions |= MLX5_FLOW_ACTION_VXLAN_DECAP;
> > +		actions_n++;
> > +		break;
> >  	default:
> >  		break;
> >  	}
> >  	dev_flow->dv.actions_n = actions_n;
> > +	return 0;
> >  }
> >
> >  static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
> > @@ -1279,8 +1666,10 @@
> >  	matcher.egress = attr->egress;
> >  	if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
> >  		return -rte_errno;
> > -	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
> > -		flow_dv_create_action(actions, dev_flow);
> > +	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
> > +		if (flow_dv_create_action(dev, actions, dev_flow, error))
> > +			return -rte_errno;
> > +	}
> >  	return 0;
> >  }
> >
> > @@ -1465,6 +1854,16 @@
> >  		LIST_REMOVE(dev_flow, next);
> >  		if (dev_flow->dv.matcher)
> >  			flow_dv_matcher_release(dev, dev_flow);
> > +		if (dev_flow->dv.encap_verb) {
> > +			claim_zero(mlx5_glue->destroy_flow_action
> > +						(dev_flow->dv.encap_verb));
> > +			dev_flow->dv.encap_verb = NULL;
> > +		}
> > +		if (dev_flow->dv.decap_verb) {
> > +			claim_zero(mlx5_glue->destroy_flow_action
> > +						(dev_flow->dv.decap_verb));
> > +			dev_flow->dv.decap_verb = NULL;
> > +		}
> >  		rte_free(dev_flow);
> >  	}
> >  }
> > --
> > 1.8.3.1
> 
> 
> Thanks,
> Ori
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 38635c9..9c28e50 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -92,6 +92,8 @@ 
 #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19)
 #define MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20)
 #define MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21)
+#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22)
+#define MLX5_FLOW_ACTION_VXLAN_DECAP (1u << 23)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
@@ -181,6 +183,8 @@  struct mlx5_flow_dv {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	struct mlx5dv_flow_action_attr actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
 	/**< Action list. */
+	struct ibv_flow_action *encap_verb; /**< Verbs object of encap. */
+	struct ibv_flow_action *decap_verb; /**< Verbs object of decap. */
 #endif
 	int actions_n; /**< number of actions. */
 };
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index e8f409f..06ecabf 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -35,6 +35,16 @@ 
 
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
+#define MLX5_UDP	17
+#define MLX5_TCP	6
+#define MLX5_GRE	47
+
+/*
+ * Encap buf length, max:
+ *   Eth:14/VLAN:8/IPv6:40/TCP:36/TUNNEL:20/Eth:14
+ */
+#define MLX5_ENCAP_LEN 132
+
 /**
  * Validate META item.
  *
@@ -97,6 +107,331 @@ 
 }
 
 /**
+ * Validate the vxlan encap action.
+ *
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] action
+ *   Pointer to the encap action.
+ * @param[in] attr
+ *   Pointer to flow attributes
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_vxlan_encap(uint64_t action_flags,
+				    const struct rte_flow_action *action,
+				    const struct rte_flow_attr *attr,
+				    struct rte_flow_error *error)
+{
+	const struct rte_flow_action_vxlan_encap *vxlan_encap = action->conf;
+
+	if (!vxlan_encap)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, action,
+					  "configuration cannot be null");
+	if (action_flags & MLX5_FLOW_ACTION_DROP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't drop and encap in same flow");
+	if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't have 2 encap actions in same"
+					  " flow");
+	if (attr->ingress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+					  NULL,
+					  "encap action not supported for "
+					  "ingress");
+	return 0;
+}
+
+/**
+ * Validate the vxlan decap action.
+ *
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] action
+ *   Pointer to the decap action.
+ * @param[in] attr
+ *   Pointer to flow attributes
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_vxlan_decap(uint64_t action_flags,
+			const struct rte_flow_action *action __rte_unused,
+			const struct rte_flow_attr *attr,
+			struct rte_flow_error *error)
+{
+	if (action_flags & MLX5_FLOW_ACTION_DROP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't drop and decap in same flow");
+	if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "can't encap and decap in same flow");
+	if (attr->egress)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+					  NULL,
+					  "decap action not supported for "
+					  "egress");
+	return 0;
+}
+
+static size_t item_len[] = {
+	[RTE_FLOW_ITEM_TYPE_VOID] = 0,
+	[RTE_FLOW_ITEM_TYPE_ETH] = sizeof(struct rte_flow_item_eth),
+	[RTE_FLOW_ITEM_TYPE_VLAN] = sizeof(struct rte_flow_item_vlan),
+	[RTE_FLOW_ITEM_TYPE_IPV4] = sizeof(struct rte_flow_item_ipv4),
+	[RTE_FLOW_ITEM_TYPE_IPV6] = sizeof(struct rte_flow_item_ipv6),
+	[RTE_FLOW_ITEM_TYPE_UDP] = sizeof(struct rte_flow_item_udp),
+	[RTE_FLOW_ITEM_TYPE_TCP] = sizeof(struct rte_flow_item_tcp),
+	[RTE_FLOW_ITEM_TYPE_VXLAN] = sizeof(struct rte_flow_item_vxlan),
+	[RTE_FLOW_ITEM_TYPE_GRE] = sizeof(struct rte_flow_item_gre),
+	[RTE_FLOW_ITEM_TYPE_NVGRE] = sizeof(struct rte_flow_item_gre),
+	[RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = sizeof(struct rte_flow_item_vxlan_gpe),
+	[RTE_FLOW_ITEM_TYPE_MPLS] = sizeof(struct rte_flow_item_mpls),
+};
+
+/**
+ * Convert the encap action data from rte_flow_item to raw buffer
+ *
+ * @param[in] item
+ *   Pointer to rte_flow_item object.
+ * @param[out] buf
+ *   Pointer to the output buffer.
+ * @param[out] size
+ *   Pointer to the output buffer size.
+ * @param[out] error
+ *   Pointer to the error structure.
+ * @param[in] l3_type
+ *   ???.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_convert_encap(struct rte_flow_item *item, uint8_t *buf, size_t *size,
+		      struct rte_flow_error *error, uint16_t l3_type)
+{
+	struct ether_hdr *eth = NULL;
+	struct vlan_hdr *vlan = NULL;
+	struct ipv4_hdr *ipv4 = NULL;
+	struct ipv6_hdr *ipv6 = NULL;
+	struct udp_hdr *udp = NULL;
+	struct vxlan_hdr *vxlan = NULL;
+	const struct rte_flow_item_vlan *vlan_spec;
+	size_t len;
+
+	assert(item);
+	*size = 0;
+	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+		/* TODO: variable length handling: raw, vxlan and nvgre. */
+		len = item_len[item->type];
+		if (len + *size > MLX5_ENCAP_LEN)
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  (void *)item->type,
+						  "invalid item length");
+		rte_memcpy((void *)&buf[*size], item->spec, len);
+		switch (item->type) {
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			eth = (void *)&buf[*size];
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			vlan_spec = item->spec;
+			vlan = (void *)&buf[*size];
+			if (!eth)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						(void *)item->type,
+						"eth header not found");
+			vlan->vlan_tci = vlan_spec->tci;
+			vlan->eth_proto = vlan_spec->inner_type;
+			if (!eth->ether_type)
+				eth->ether_type = htons(ETHER_TYPE_VLAN);
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			ipv4 = (void *)&buf[*size];
+			if (!vlan && !eth)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						(void *)item->type,
+						"neither eth nor vlan header found");
+			if (vlan && !vlan->eth_proto)
+				vlan->eth_proto = htons(ETHER_TYPE_IPv4);
+			else if (eth && !eth->ether_type)
+				eth->ether_type = htons(ETHER_TYPE_IPv4);
+			if (!ipv4->version_ihl)
+				ipv4->version_ihl = 0x45;
+			if (!ipv4->time_to_live)
+				ipv4->time_to_live = 0x40;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			ipv6 = (void *)&buf[*size];
+			if (!vlan && !eth)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						(void *)item->type,
+						"neither eth nor vlan header found");
+			if (vlan && !vlan->eth_proto)
+				vlan->eth_proto = htons(ETHER_TYPE_IPv6);
+			else if (eth && !eth->ether_type)
+				eth->ether_type = htons(ETHER_TYPE_IPv6);
+			if (!ipv6->vtc_flow)
+				ipv6->vtc_flow = htonl(0x60000000);
+			if (!ipv6->hop_limits)
+				ipv6->hop_limits = 0xff;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			udp = (void *)&buf[*size];
+			if (!ipv4 && !ipv6)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						(void *)item->type,
+						"ip header not found");
+			if (ipv4 && !ipv4->next_proto_id)
+				ipv4->next_proto_id = MLX5_UDP;
+			else if (ipv6 && !ipv6->proto)
+				ipv6->proto = MLX5_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			vxlan = (void *)&buf[*size];
+			if (!udp)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						(void *)item->type,
+						"udp header not found");
+			if (!udp->dst_port)
+				udp->dst_port = htons(MLX5_UDP_PORT_VXLAN);
+			if (!vxlan->vx_flags)
+				vxlan->vx_flags = htonl(0x08000000);
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			vxlan = (void *)&buf[*size];
+			if (!udp)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						(void *)item->type,
+						"udp header not found");
+			if (!udp->dst_port)
+				udp->dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE);
+			if (!vxlan->vx_flags)
+				vxlan->vx_flags = htonl(0x0c000003);
+			break;
+		case RTE_FLOW_ITEM_TYPE_GRE:
+		case RTE_FLOW_ITEM_TYPE_NVGRE:
+			if (!ipv4 && !ipv6)
+				return rte_flow_error_set(error, EINVAL,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						(void *)item->type,
+						"ip header not found");
+			if (ipv4 && !ipv4->next_proto_id)
+				ipv4->next_proto_id = htons(MLX5_GRE);
+			else if (ipv6 && !ipv6->proto)
+				ipv6->proto = htons(MLX5_GRE);
+			break;
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		default:
+			return rte_flow_error_set(error, EINVAL,
+					RTE_FLOW_ERROR_TYPE_ACTION,
+					(void *)item->type,
+					"unsupported item type");
+			break;
+		}
+		*size += len;
+	}
+	if (l3_type && vlan)
+		vlan->eth_proto = htons(l3_type);
+	else if (l3_type && eth)
+		eth->ether_type = htons(l3_type);
+	return 0;
+}
+
+/**
+ * Convert VXLAN encap action to DV specification.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in] action
+ *   Pointer to action structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to action on success, NULL otherwise and rte_errno is set.
+ */
+static struct ibv_flow_action *
+flow_dv_create_vxlan_encap(struct rte_eth_dev *dev,
+			   const struct rte_flow_action *action,
+			   struct rte_flow_error *error)
+{
+	struct ibv_flow_action *encap_verb = NULL;
+	const struct rte_flow_action_vxlan_encap *encap_data;
+	struct priv *priv = dev->data->dev_private;
+	uint8_t buf[MLX5_ENCAP_LEN];
+	size_t size = 0;
+	int convert_result;
+
+	encap_data = (const struct rte_flow_action_vxlan_encap *)action->conf;
+	convert_result = flow_dv_convert_encap(encap_data->definition,
+					       buf, &size, error, 0);
+	if (convert_result)
+		return NULL;
+	encap_verb = mlx5_glue->dv_create_flow_action_packet_reformat
+		(priv->ctx, size, (size ? buf : NULL),
+		 MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL,
+		 MLX5DV_FLOW_TABLE_TYPE_NIC_TX);
+	if (!encap_verb)
+		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+				   NULL, "cannot create vxlan encap action");
+	return encap_verb;
+}
+
+/**
+ * Convert VXLAN decap action to DV specification.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in] action
+ *   Pointer to action structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to action on success, NULL otherwise and rte_errno is set.
+ */
+static struct ibv_flow_action *
+flow_dv_create_vxlan_decap(struct rte_eth_dev *dev,
+			   const struct rte_flow_action *action __rte_unused,
+			   struct rte_flow_error *error)
+{
+	struct ibv_flow_action *decap_verb = NULL;
+	struct priv *priv = dev->data->dev_private;
+
+	decap_verb = mlx5_glue->dv_create_flow_action_packet_reformat
+		(priv->ctx, 0, NULL,
+		 MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2,
+		 MLX5DV_FLOW_TABLE_TYPE_NIC_RX);
+	if (!decap_verb)
+		rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+				   NULL, "cannot create decap action");
+	return decap_verb;
+}
+
+/**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
  *
@@ -347,6 +682,24 @@ 
 			action_flags |= MLX5_FLOW_ACTION_COUNT;
 			++actions_n;
 			break;
+		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+			ret = flow_dv_validate_action_vxlan_encap(action_flags,
+								  actions, attr,
+								  error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
+			++actions_n;
+			break;
+		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
+			ret = flow_dv_validate_action_vxlan_decap(action_flags,
+								  actions, attr,
+								  error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
+			++actions_n;
+			break;
 		default:
 			return rte_flow_error_set(error, ENOTSUP,
 						  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -1056,14 +1409,23 @@ 
 /**
  * Store the requested actions in an array.
  *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
  * @param[in] action
  *   Flow action to translate.
  * @param[in, out] dev_flow
  *   Pointer to the mlx5_flow.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static void
-flow_dv_create_action(const struct rte_flow_action *action,
-		      struct mlx5_flow *dev_flow)
+static int
+flow_dv_create_action(struct rte_eth_dev *dev,
+		      const struct rte_flow_action *action,
+		      struct mlx5_flow *dev_flow,
+		      struct rte_flow_error *error)
 {
 	const struct rte_flow_action_queue *queue;
 	const struct rte_flow_action_rss *rss;
@@ -1110,10 +1472,35 @@ 
 		/* Added to array only in apply since we need the QP */
 		flow->actions |= MLX5_FLOW_ACTION_RSS;
 		break;
+	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+		dev_flow->dv.actions[actions_n].type =
+			MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
+		dev_flow->dv.actions[actions_n].action =
+				flow_dv_create_vxlan_encap(dev, action, error);
+		if (!(dev_flow->dv.actions[actions_n].action))
+			return -rte_errno;
+		dev_flow->dv.encap_verb =
+			dev_flow->dv.actions[actions_n].action;
+		flow->actions |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
+		actions_n++;
+		break;
+	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
+		dev_flow->dv.actions[actions_n].type =
+			MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
+		dev_flow->dv.actions[actions_n].action =
+				flow_dv_create_vxlan_decap(dev, action, error);
+		if (!(dev_flow->dv.actions[actions_n].action))
+			return -rte_errno;
+		dev_flow->dv.decap_verb =
+			dev_flow->dv.actions[actions_n].action;
+		flow->actions |= MLX5_FLOW_ACTION_VXLAN_DECAP;
+		actions_n++;
+		break;
 	default:
 		break;
 	}
 	dev_flow->dv.actions_n = actions_n;
+	return 0;
 }
 
 static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
@@ -1279,8 +1666,10 @@ 
 	matcher.egress = attr->egress;
 	if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
 		return -rte_errno;
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
-		flow_dv_create_action(actions, dev_flow);
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		if (flow_dv_create_action(dev, actions, dev_flow, error))
+			return -rte_errno;
+	}
 	return 0;
 }
 
@@ -1465,6 +1854,16 @@ 
 		LIST_REMOVE(dev_flow, next);
 		if (dev_flow->dv.matcher)
 			flow_dv_matcher_release(dev, dev_flow);
+		if (dev_flow->dv.encap_verb) {
+			claim_zero(mlx5_glue->destroy_flow_action
+						(dev_flow->dv.encap_verb));
+			dev_flow->dv.encap_verb = NULL;
+		}
+		if (dev_flow->dv.decap_verb) {
+			claim_zero(mlx5_glue->destroy_flow_action
+						(dev_flow->dv.decap_verb));
+			dev_flow->dv.decap_verb = NULL;
+		}
 		rte_free(dev_flow);
 	}
 }