[v3,07/13] net/mlx5: add VXLAN support to flow prepare routine

Message ID 1541074741-41368-8-git-send-email-viacheslavo@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series net/mlx5: e-switch VXLAN encap/decap hardware offload |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK

Commit Message

Slava Ovsiienko Nov. 1, 2018, 12:19 p.m. UTC
  The e-switch Flow prepare function is updated to support VXLAN
encapsulation/and decapsulation actions. The function calculates
buffer size for Netlink message and Flow description structures,
including optional ones for tunneling purposes.

Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow_tcf.c | 133 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 130 insertions(+), 3 deletions(-)
  

Comments

Yongseok Koh Nov. 1, 2018, 9:03 p.m. UTC | #1
On Thu, Nov 01, 2018 at 05:19:28AM -0700, Slava Ovsiienko wrote:
> The e-switch Flow prepare function is updated to support VXLAN
> encapsulation/and decapsulation actions. The function calculates
> buffer size for Netlink message and Flow description structures,
> including optional ones for tunneling purposes.
> 
> Suggested-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_flow_tcf.c | 133 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 130 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> index 7e00232..b5be264 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -2389,7 +2389,7 @@ struct pedit_parser {
>  		case RTE_FLOW_ITEM_TYPE_IPV6:
>  			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
>  				SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
> -				SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
> +				SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
>  				/* dst/src IP addr and mask. */
>  			flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
>  			break;
> @@ -2405,6 +2405,10 @@ struct pedit_parser {
>  				/* dst/src port and mask. */
>  			flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
>  			break;
> +		case RTE_FLOW_ITEM_TYPE_VXLAN:
> +			size += SZ_NLATTR_TYPE_OF(uint32_t);
> +			flags |= MLX5_FLOW_LAYER_VXLAN;
> +			break;
>  		default:
>  			DRV_LOG(WARNING,
>  				"unsupported item %p type %d,"
> @@ -2418,6 +2422,69 @@ struct pedit_parser {
>  }
>  
>  /**
> + * Calculate size of memory to store the VXLAN encapsultion
> + * related items in the Netlink message buffer. Items list
> + * is specified by RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action.
> + * The item list should be validated.
> + *
> + * @param[in] action
> + *   RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
> + *   List of pattern items to scan data from.
> + *
> + * @return
> + *   The size the part of Netlink message buffer to store the
> + *   VXLAN encapsulation item attributes.
> + */
> +static int
> +flow_tcf_vxlan_encap_size(const struct rte_flow_action *action)
> +{
> +	const struct rte_flow_item *items;
> +	int size = 0;
> +
> +	assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
> +	assert(action->conf);
> +
> +	items = ((const struct rte_flow_action_vxlan_encap *)
> +					action->conf)->definition;
> +	assert(items);
> +	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
> +		switch (items->type) {
> +		case RTE_FLOW_ITEM_TYPE_VOID:
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_ETH:
> +			/* This item does not require message buffer. */
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_IPV4:
> +			size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_IPV6:
> +			size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_UDP: {
> +			const struct rte_flow_item_udp *udp = items->mask;
> +
> +			size += SZ_NLATTR_TYPE_OF(uint16_t);
> +			if (!udp || udp->hdr.src_port != RTE_BE16(0x0000))
> +				size += SZ_NLATTR_TYPE_OF(uint16_t);
> +			break;
> +		}
> +		case RTE_FLOW_ITEM_TYPE_VXLAN:
> +			size +=	SZ_NLATTR_TYPE_OF(uint32_t);
> +			break;
> +		default:
> +			assert(false);
> +			DRV_LOG(WARNING,
> +				"unsupported item %p type %d,"
> +				" items must be validated"
> +				" before flow creation",
> +				(const void *)items, items->type);
> +			return 0;
> +		}
> +	}
> +	return size;
> +}
> +
> +/**
>   * Calculate maximum size of memory for flow actions of Linux TC flower and
>   * extract specified actions.
>   *
> @@ -2486,6 +2553,29 @@ struct pedit_parser {
>  				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
>  				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> +			size += SZ_NLATTR_NEST + /* na_act_index. */
> +				SZ_NLATTR_STRZ_OF("tunnel_key") +
> +				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
> +				SZ_NLATTR_TYPE_OF(uint8_t);
> +			size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
> +			size +=	flow_tcf_vxlan_encap_size(actions) +
> +				RTE_ALIGN_CEIL /* preceding encap params. */
> +				(sizeof(struct flow_tcf_vxlan_encap),
> +				MNL_ALIGNTO);
> +			flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
> +			size += SZ_NLATTR_NEST + /* na_act_index. */
> +				SZ_NLATTR_STRZ_OF("tunnel_key") +
> +				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
> +				SZ_NLATTR_TYPE_OF(uint8_t);
> +			size +=	SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
> +			size +=	RTE_ALIGN_CEIL /* preceding decap params. */
> +				(sizeof(struct flow_tcf_vxlan_decap),
> +				MNL_ALIGNTO);
> +			flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
> +			break;
>  		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
>  		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
>  		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
> @@ -2561,12 +2651,15 @@ struct pedit_parser {
>  		 uint64_t *item_flags, uint64_t *action_flags,
>  		 struct rte_flow_error *error)
>  {
> -	size_t size = sizeof(struct mlx5_flow) +
> +	size_t size = RTE_ALIGN_CEIL
> +			(sizeof(struct mlx5_flow),
> +			 alignof(struct flow_tcf_tunnel_hdr)) +
>  		      MNL_ALIGN(sizeof(struct nlmsghdr)) +
>  		      MNL_ALIGN(sizeof(struct tcmsg));
>  	struct mlx5_flow *dev_flow;
>  	struct nlmsghdr *nlh;
>  	struct tcmsg *tcm;
> +	uint8_t *sp, *tun = NULL;
>  
>  	size += flow_tcf_get_items_and_size(attr, items, item_flags);
>  	size += flow_tcf_get_actions_and_size(actions, action_flags);
> @@ -2577,10 +2670,44 @@ struct pedit_parser {
>  				   "not enough memory to create E-Switch flow");
>  		return NULL;
>  	}
> -	nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
> +	sp = (uint8_t *)(dev_flow + 1);
> +	if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) {
> +		sp = RTE_PTR_ALIGN
> +			(sp, alignof(struct flow_tcf_tunnel_hdr));
> +		tun = sp;
> +		sp += RTE_ALIGN_CEIL
> +			(sizeof(struct flow_tcf_vxlan_encap),
> +			MNL_ALIGNTO);
> +#ifndef NDEBUG
> +		size -= RTE_ALIGN_CEIL
> +			(sizeof(struct flow_tcf_vxlan_encap),
> +			MNL_ALIGNTO);
> +#endif
> +	} else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
> +		sp = RTE_PTR_ALIGN
> +			(sp, alignof(struct flow_tcf_tunnel_hdr));
> +		tun = sp;
> +		sp += RTE_ALIGN_CEIL
> +			(sizeof(struct flow_tcf_vxlan_decap),
> +			MNL_ALIGNTO);
> +#ifndef NDEBUG
> +		size -= RTE_ALIGN_CEIL
> +			(sizeof(struct flow_tcf_vxlan_decap),
> +			MNL_ALIGNTO);
> +#endif
> +	} else {
> +		sp = RTE_PTR_ALIGN(sp, MNL_ALIGNTO);
> +	}
> +	nlh = mnl_nlmsg_put_header(sp);
>  	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
>  	*dev_flow = (struct mlx5_flow){
>  		.tcf = (struct mlx5_flow_tcf){
> +#ifndef NDEBUG
> +			.nlsize = size - RTE_ALIGN_CEIL
> +				(sizeof(struct mlx5_flow),
> +				 alignof(struct flow_tcf_tunnel_hdr)),
> +#endif
> +			.tunnel = (struct flow_tcf_tunnel_hdr *)tun,
>  			.nlh = nlh,
>  			.tcm = tcm,
>  		},

One favor. Can you set the header type here instead of _translate()?

	dev_flow->tcf.tunnel->type = tun_type;

tun_type can be set above (FLOW_TCF_TUNACT_VXLAN_ENCAP/DECAP).
This is because I'm deleting flow->actions field and dev_flow->layers field as
it causes some confusion in parsing.

Please put my acked-by tag if you agree and make the change in v4.

Thanks,
Yongseok
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index 7e00232..b5be264 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -2389,7 +2389,7 @@  struct pedit_parser {
 		case RTE_FLOW_ITEM_TYPE_IPV6:
 			size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
 				SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
-				SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
+				SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
 				/* dst/src IP addr and mask. */
 			flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 			break;
@@ -2405,6 +2405,10 @@  struct pedit_parser {
 				/* dst/src port and mask. */
 			flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
 			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			size += SZ_NLATTR_TYPE_OF(uint32_t);
+			flags |= MLX5_FLOW_LAYER_VXLAN;
+			break;
 		default:
 			DRV_LOG(WARNING,
 				"unsupported item %p type %d,"
@@ -2418,6 +2422,69 @@  struct pedit_parser {
 }
 
 /**
+ * Calculate size of memory to store the VXLAN encapsultion
+ * related items in the Netlink message buffer. Items list
+ * is specified by RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action.
+ * The item list should be validated.
+ *
+ * @param[in] action
+ *   RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
+ *   List of pattern items to scan data from.
+ *
+ * @return
+ *   The size the part of Netlink message buffer to store the
+ *   VXLAN encapsulation item attributes.
+ */
+static int
+flow_tcf_vxlan_encap_size(const struct rte_flow_action *action)
+{
+	const struct rte_flow_item *items;
+	int size = 0;
+
+	assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
+	assert(action->conf);
+
+	items = ((const struct rte_flow_action_vxlan_encap *)
+					action->conf)->definition;
+	assert(items);
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_VOID:
+			break;
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			/* This item does not require message buffer. */
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP: {
+			const struct rte_flow_item_udp *udp = items->mask;
+
+			size += SZ_NLATTR_TYPE_OF(uint16_t);
+			if (!udp || udp->hdr.src_port != RTE_BE16(0x0000))
+				size += SZ_NLATTR_TYPE_OF(uint16_t);
+			break;
+		}
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			size +=	SZ_NLATTR_TYPE_OF(uint32_t);
+			break;
+		default:
+			assert(false);
+			DRV_LOG(WARNING,
+				"unsupported item %p type %d,"
+				" items must be validated"
+				" before flow creation",
+				(const void *)items, items->type);
+			return 0;
+		}
+	}
+	return size;
+}
+
+/**
  * Calculate maximum size of memory for flow actions of Linux TC flower and
  * extract specified actions.
  *
@@ -2486,6 +2553,29 @@  struct pedit_parser {
 				SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
 				SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
 			break;
+		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+			size += SZ_NLATTR_NEST + /* na_act_index. */
+				SZ_NLATTR_STRZ_OF("tunnel_key") +
+				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+				SZ_NLATTR_TYPE_OF(uint8_t);
+			size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
+			size +=	flow_tcf_vxlan_encap_size(actions) +
+				RTE_ALIGN_CEIL /* preceding encap params. */
+				(sizeof(struct flow_tcf_vxlan_encap),
+				MNL_ALIGNTO);
+			flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
+			break;
+		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
+			size += SZ_NLATTR_NEST + /* na_act_index. */
+				SZ_NLATTR_STRZ_OF("tunnel_key") +
+				SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+				SZ_NLATTR_TYPE_OF(uint8_t);
+			size +=	SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
+			size +=	RTE_ALIGN_CEIL /* preceding decap params. */
+				(sizeof(struct flow_tcf_vxlan_decap),
+				MNL_ALIGNTO);
+			flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
+			break;
 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
@@ -2561,12 +2651,15 @@  struct pedit_parser {
 		 uint64_t *item_flags, uint64_t *action_flags,
 		 struct rte_flow_error *error)
 {
-	size_t size = sizeof(struct mlx5_flow) +
+	size_t size = RTE_ALIGN_CEIL
+			(sizeof(struct mlx5_flow),
+			 alignof(struct flow_tcf_tunnel_hdr)) +
 		      MNL_ALIGN(sizeof(struct nlmsghdr)) +
 		      MNL_ALIGN(sizeof(struct tcmsg));
 	struct mlx5_flow *dev_flow;
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
+	uint8_t *sp, *tun = NULL;
 
 	size += flow_tcf_get_items_and_size(attr, items, item_flags);
 	size += flow_tcf_get_actions_and_size(actions, action_flags);
@@ -2577,10 +2670,44 @@  struct pedit_parser {
 				   "not enough memory to create E-Switch flow");
 		return NULL;
 	}
-	nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
+	sp = (uint8_t *)(dev_flow + 1);
+	if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) {
+		sp = RTE_PTR_ALIGN
+			(sp, alignof(struct flow_tcf_tunnel_hdr));
+		tun = sp;
+		sp += RTE_ALIGN_CEIL
+			(sizeof(struct flow_tcf_vxlan_encap),
+			MNL_ALIGNTO);
+#ifndef NDEBUG
+		size -= RTE_ALIGN_CEIL
+			(sizeof(struct flow_tcf_vxlan_encap),
+			MNL_ALIGNTO);
+#endif
+	} else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
+		sp = RTE_PTR_ALIGN
+			(sp, alignof(struct flow_tcf_tunnel_hdr));
+		tun = sp;
+		sp += RTE_ALIGN_CEIL
+			(sizeof(struct flow_tcf_vxlan_decap),
+			MNL_ALIGNTO);
+#ifndef NDEBUG
+		size -= RTE_ALIGN_CEIL
+			(sizeof(struct flow_tcf_vxlan_decap),
+			MNL_ALIGNTO);
+#endif
+	} else {
+		sp = RTE_PTR_ALIGN(sp, MNL_ALIGNTO);
+	}
+	nlh = mnl_nlmsg_put_header(sp);
 	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
 	*dev_flow = (struct mlx5_flow){
 		.tcf = (struct mlx5_flow_tcf){
+#ifndef NDEBUG
+			.nlsize = size - RTE_ALIGN_CEIL
+				(sizeof(struct mlx5_flow),
+				 alignof(struct flow_tcf_tunnel_hdr)),
+#endif
+			.tunnel = (struct flow_tcf_tunnel_hdr *)tun,
 			.nlh = nlh,
 			.tcm = tcm,
 		},