From patchwork Wed Oct 24 11:08:18 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dekel Peled X-Patchwork-Id: 47318 X-Patchwork-Delegate: shahafs@mellanox.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 9B4AC1B185; Wed, 24 Oct 2018 13:09:11 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id E41C71B185 for ; Wed, 24 Oct 2018 13:09:09 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from dekelp@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Oct 2018 13:14:12 +0200 Received: from mtl-vdi-280.wap.labs.mlnx. (mtl-vdi-280.wap.labs.mlnx [10.128.130.87]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id w9OB8xX3024546; Wed, 24 Oct 2018 14:09:04 +0300 From: Dekel Peled To: yskoh@mellanox.com, shahafs@mellanox.com Cc: dev@dpdk.org, orika@mellanox.com Date: Wed, 24 Oct 2018 14:08:18 +0300 Message-Id: <1540379299-23764-2-git-send-email-dekelp@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1540379299-23764-1-git-send-email-dekelp@mellanox.com> References: <1540379299-23764-1-git-send-email-dekelp@mellanox.com> Subject: [dpdk-dev] [PATCH 1/2] net/mlx5: add VXLAN encap decap to Direct Verbs X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds support for VXLAN encap and decap operations, in Direct Verbs flow. Signed-off-by: Dekel Peled --- drivers/net/mlx5/mlx5_flow.h | 4 + drivers/net/mlx5/mlx5_flow_dv.c | 409 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 408 insertions(+), 5 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 38635c9..9c28e50 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -92,6 +92,8 @@ #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19) #define MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20) #define MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21) +#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22) +#define MLX5_FLOW_ACTION_VXLAN_DECAP (1u << 23) #define MLX5_FLOW_FATE_ACTIONS \ (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS) @@ -181,6 +183,8 @@ struct mlx5_flow_dv { #ifdef HAVE_IBV_FLOW_DV_SUPPORT struct mlx5dv_flow_action_attr actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS]; /**< Action list. */ + struct ibv_flow_action *encap_verb; /**< Verbs object of encap. */ + struct ibv_flow_action *decap_verb; /**< Verbs object of decap. */ #endif int actions_n; /**< number of actions. */ }; diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index e8f409f..06ecabf 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -35,6 +35,16 @@ #ifdef HAVE_IBV_FLOW_DV_SUPPORT +#define MLX5_UDP 17 +#define MLX5_TCP 6 +#define MLX5_GRE 47 + +/* + * Encap buf length, max: + * Eth:14/VLAN:8/IPv6:40/TCP:36/TUNNEL:20/Eth:14 + */ +#define MLX5_ENCAP_LEN 132 + /** * Validate META item. * @@ -97,6 +107,331 @@ } /** + * Validate the vxlan encap action. + * + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] action + * Pointer to the encap action. + * @param[in] attr + * Pointer to flow attributes + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_action_vxlan_encap(uint64_t action_flags, + const struct rte_flow_action *action, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + const struct rte_flow_action_vxlan_encap *vxlan_encap = action->conf; + + if (!vxlan_encap) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "configuration cannot be null"); + if (action_flags & MLX5_FLOW_ACTION_DROP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't drop and encap in same flow"); + if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't have 2 encap actions in same" + " flow"); + if (attr->ingress) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + NULL, + "encap action not supported for " + "ingress"); + return 0; +} + +/** + * Validate the vxlan decap action. + * + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] action + * Pointer to the decap action. + * @param[in] attr + * Pointer to flow attributes + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_action_vxlan_decap(uint64_t action_flags, + const struct rte_flow_action *action __rte_unused, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + if (action_flags & MLX5_FLOW_ACTION_DROP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't drop and decap in same flow"); + if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't encap and decap in same flow"); + if (attr->egress) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + NULL, + "decap action not supported for " + "egress"); + return 0; +} + +static size_t item_len[] = { + [RTE_FLOW_ITEM_TYPE_VOID] = 0, + [RTE_FLOW_ITEM_TYPE_ETH] = sizeof(struct rte_flow_item_eth), + [RTE_FLOW_ITEM_TYPE_VLAN] = sizeof(struct rte_flow_item_vlan), + [RTE_FLOW_ITEM_TYPE_IPV4] = sizeof(struct rte_flow_item_ipv4), + [RTE_FLOW_ITEM_TYPE_IPV6] = sizeof(struct rte_flow_item_ipv6), + [RTE_FLOW_ITEM_TYPE_UDP] = sizeof(struct rte_flow_item_udp), + [RTE_FLOW_ITEM_TYPE_TCP] = sizeof(struct rte_flow_item_tcp), + [RTE_FLOW_ITEM_TYPE_VXLAN] = sizeof(struct rte_flow_item_vxlan), + [RTE_FLOW_ITEM_TYPE_GRE] = sizeof(struct rte_flow_item_gre), + [RTE_FLOW_ITEM_TYPE_NVGRE] = sizeof(struct rte_flow_item_gre), + [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = sizeof(struct rte_flow_item_vxlan_gpe), + [RTE_FLOW_ITEM_TYPE_MPLS] = sizeof(struct rte_flow_item_mpls), +}; + +/** + * Convert the encap action data from rte_flow_item to raw buffer + * + * @param[in] item + * Pointer to rte_flow_item object. + * @param[out] buf + * Pointer to the output buffer. + * @param[out] size + * Pointer to the output buffer size. + * @param[out] error + * Pointer to the error structure. + * @param[in] l3_type + * ???. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_convert_encap(struct rte_flow_item *item, uint8_t *buf, size_t *size, + struct rte_flow_error *error, uint16_t l3_type) +{ + struct ether_hdr *eth = NULL; + struct vlan_hdr *vlan = NULL; + struct ipv4_hdr *ipv4 = NULL; + struct ipv6_hdr *ipv6 = NULL; + struct udp_hdr *udp = NULL; + struct vxlan_hdr *vxlan = NULL; + const struct rte_flow_item_vlan *vlan_spec; + size_t len; + + assert(item); + *size = 0; + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + /* TODO: variable length handling: raw, vxlan and nvgre. */ + len = item_len[item->type]; + if (len + *size > MLX5_ENCAP_LEN) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)item->type, + "invalid item length"); + rte_memcpy((void *)&buf[*size], item->spec, len); + switch (item->type) { + case RTE_FLOW_ITEM_TYPE_ETH: + eth = (void *)&buf[*size]; + break; + case RTE_FLOW_ITEM_TYPE_VLAN: + vlan_spec = item->spec; + vlan = (void *)&buf[*size]; + if (!eth) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)item->type, + "eth header not found"); + vlan->vlan_tci = vlan_spec->tci; + vlan->eth_proto = vlan_spec->inner_type; + if (!eth->ether_type) + eth->ether_type = htons(ETHER_TYPE_VLAN); + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + ipv4 = (void *)&buf[*size]; + if (!vlan && !eth) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)item->type, + "neither eth nor vlan header found"); + if (vlan && !vlan->eth_proto) + vlan->eth_proto = htons(ETHER_TYPE_IPv4); + else if (eth && !eth->ether_type) + eth->ether_type = htons(ETHER_TYPE_IPv4); + if (!ipv4->version_ihl) + ipv4->version_ihl = 0x45; + if (!ipv4->time_to_live) + ipv4->time_to_live = 0x40; + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + ipv6 = (void *)&buf[*size]; + if (!vlan && !eth) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)item->type, + "neither eth nor vlan header found"); + if (vlan && !vlan->eth_proto) + vlan->eth_proto = htons(ETHER_TYPE_IPv6); + else if (eth && !eth->ether_type) + eth->ether_type = htons(ETHER_TYPE_IPv6); + if (!ipv6->vtc_flow) + ipv6->vtc_flow = htonl(0x60000000); + if (!ipv6->hop_limits) + ipv6->hop_limits = 0xff; + break; + case RTE_FLOW_ITEM_TYPE_UDP: + udp = (void *)&buf[*size]; + if (!ipv4 && !ipv6) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)item->type, + "ip header not found"); + if (ipv4 && !ipv4->next_proto_id) + ipv4->next_proto_id = MLX5_UDP; + else if (ipv6 && !ipv6->proto) + ipv6->proto = MLX5_UDP; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + vxlan = (void *)&buf[*size]; + if (!udp) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)item->type, + "udp header not found"); + if (!udp->dst_port) + udp->dst_port = htons(MLX5_UDP_PORT_VXLAN); + if (!vxlan->vx_flags) + vxlan->vx_flags = htonl(0x08000000); + break; + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: + vxlan = (void *)&buf[*size]; + if (!udp) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)item->type, + "udp header not found"); + if (!udp->dst_port) + udp->dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE); + if (!vxlan->vx_flags) + vxlan->vx_flags = htonl(0x0c000003); + break; + case RTE_FLOW_ITEM_TYPE_GRE: + case RTE_FLOW_ITEM_TYPE_NVGRE: + if (!ipv4 && !ipv6) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)item->type, + "ip header not found"); + if (ipv4 && !ipv4->next_proto_id) + ipv4->next_proto_id = htons(MLX5_GRE); + else if (ipv6 && !ipv6->proto) + ipv6->proto = htons(MLX5_GRE); + break; + case RTE_FLOW_ITEM_TYPE_VOID: + break; + default: + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + (void *)item->type, + "unsupported item type"); + break; + } + *size += len; + } + if (l3_type && vlan) + vlan->eth_proto = htons(l3_type); + else if (l3_type && eth) + eth->ether_type = htons(l3_type); + return 0; +} + +/** + * Convert VXLAN encap action to DV specification. + * + * @param[in] dev + * Pointer to rte_eth_dev structure. + * @param[in] action + * Pointer to action structure. + * @param[out] error + * Pointer to the error structure. + * + * @return + * Pointer to action on success, NULL otherwise and rte_errno is set. + */ +static struct ibv_flow_action * +flow_dv_create_vxlan_encap(struct rte_eth_dev *dev, + const struct rte_flow_action *action, + struct rte_flow_error *error) +{ + struct ibv_flow_action *encap_verb = NULL; + const struct rte_flow_action_vxlan_encap *encap_data; + struct priv *priv = dev->data->dev_private; + uint8_t buf[MLX5_ENCAP_LEN]; + size_t size = 0; + int convert_result; + + encap_data = (const struct rte_flow_action_vxlan_encap *)action->conf; + convert_result = flow_dv_convert_encap(encap_data->definition, + buf, &size, error, 0); + if (convert_result) + return NULL; + encap_verb = mlx5_glue->dv_create_flow_action_packet_reformat + (priv->ctx, size, (size ? buf : NULL), + MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL, + MLX5DV_FLOW_TABLE_TYPE_NIC_TX); + if (!encap_verb) + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "cannot create vxlan encap action"); + return encap_verb; +} + +/** + * Convert VXLAN decap action to DV specification. + * + * @param[in] dev + * Pointer to rte_eth_dev structure. + * @param[in] action + * Pointer to action structure. + * @param[out] error + * Pointer to the error structure. + * + * @return + * Pointer to action on success, NULL otherwise and rte_errno is set. + */ +static struct ibv_flow_action * +flow_dv_create_vxlan_decap(struct rte_eth_dev *dev, + const struct rte_flow_action *action __rte_unused, + struct rte_flow_error *error) +{ + struct ibv_flow_action *decap_verb = NULL; + struct priv *priv = dev->data->dev_private; + + decap_verb = mlx5_glue->dv_create_flow_action_packet_reformat + (priv->ctx, 0, NULL, + MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2, + MLX5DV_FLOW_TABLE_TYPE_NIC_RX); + if (!decap_verb) + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "cannot create decap action"); + return decap_verb; +} + +/** * Verify the @p attributes will be correctly understood by the NIC and store * them in the @p flow if everything is correct. * @@ -347,6 +682,24 @@ action_flags |= MLX5_FLOW_ACTION_COUNT; ++actions_n; break; + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: + ret = flow_dv_validate_action_vxlan_encap(action_flags, + actions, attr, + error); + if (ret < 0) + return ret; + action_flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP; + ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + ret = flow_dv_validate_action_vxlan_decap(action_flags, + actions, attr, + error); + if (ret < 0) + return ret; + action_flags |= MLX5_FLOW_ACTION_VXLAN_DECAP; + ++actions_n; + break; default: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, @@ -1056,14 +1409,23 @@ /** * Store the requested actions in an array. * + * @param[in] dev + * Pointer to rte_eth_dev structure. * @param[in] action * Flow action to translate. * @param[in, out] dev_flow * Pointer to the mlx5_flow. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ -static void -flow_dv_create_action(const struct rte_flow_action *action, - struct mlx5_flow *dev_flow) +static int +flow_dv_create_action(struct rte_eth_dev *dev, + const struct rte_flow_action *action, + struct mlx5_flow *dev_flow, + struct rte_flow_error *error) { const struct rte_flow_action_queue *queue; const struct rte_flow_action_rss *rss; @@ -1110,10 +1472,35 @@ /* Added to array only in apply since we need the QP */ flow->actions |= MLX5_FLOW_ACTION_RSS; break; + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; + dev_flow->dv.actions[actions_n].action = + flow_dv_create_vxlan_encap(dev, action, error); + if (!(dev_flow->dv.actions[actions_n].action)) + return -rte_errno; + dev_flow->dv.encap_verb = + dev_flow->dv.actions[actions_n].action; + flow->actions |= MLX5_FLOW_ACTION_VXLAN_ENCAP; + actions_n++; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + dev_flow->dv.actions[actions_n].type = + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; + dev_flow->dv.actions[actions_n].action = + flow_dv_create_vxlan_decap(dev, action, error); + if (!(dev_flow->dv.actions[actions_n].action)) + return -rte_errno; + dev_flow->dv.decap_verb = + dev_flow->dv.actions[actions_n].action; + flow->actions |= MLX5_FLOW_ACTION_VXLAN_DECAP; + actions_n++; + break; default: break; } dev_flow->dv.actions_n = actions_n; + return 0; } static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 }; @@ -1279,8 +1666,10 @@ matcher.egress = attr->egress; if (flow_dv_matcher_register(dev, &matcher, dev_flow, error)) return -rte_errno; - for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) - flow_dv_create_action(actions, dev_flow); + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + if (flow_dv_create_action(dev, actions, dev_flow, error)) + return -rte_errno; + } return 0; } @@ -1465,6 +1854,16 @@ LIST_REMOVE(dev_flow, next); if (dev_flow->dv.matcher) flow_dv_matcher_release(dev, dev_flow); + if (dev_flow->dv.encap_verb) { + claim_zero(mlx5_glue->destroy_flow_action + (dev_flow->dv.encap_verb)); + dev_flow->dv.encap_verb = NULL; + } + if (dev_flow->dv.decap_verb) { + claim_zero(mlx5_glue->destroy_flow_action + (dev_flow->dv.decap_verb)); + dev_flow->dv.decap_verb = NULL; + } rte_free(dev_flow); } }