diff mbox series

[3/4] net/mlx5: fix PMD crash after tunnel offload match rule destruction

Message ID 20201111071417.21177-4-getelson@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers show
Series restore tunnel offload functionality in mlx5 | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Gregory Etelson Nov. 11, 2020, 7:14 a.m. UTC
The new flow table resource management API triggered a PMD crash in
tunnel offload mode, when tunnel match flow rule was inserted before
tunnel set rule.

Reason for the crash was double flow table registration. The table was
registered by the tunnel offload code for the first time and once
more by PMD code, as part of general table processing. The table
counter was decremented only once during the rule destruction and
caused a resource leak that triggered the crash.

The patch updates PMD registration with tunnel offload parameters and
removes table registration in tunnel related code.

Fixes: 663ad57dabb2 ("net/mlx5: make flow table cache thread safe")

Signed-off-by: Gregory Etelson <getelson@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c    | 16 ++++++++++----
 drivers/net/mlx5/mlx5_flow_dv.c | 39 +++++++++++++++++----------------
 2 files changed, 32 insertions(+), 23 deletions(-)

Comments

Slava Ovsiienko Nov. 11, 2020, 8:51 a.m. UTC | #1
> -----Original Message-----
> From: Gregory Etelson <getelson@nvidia.com>
> Sent: Wednesday, November 11, 2020 9:14
> To: dev@dpdk.org
> Cc: Gregory Etelson <getelson@nvidia.com>; Matan Azrad
> <matan@nvidia.com>; Raslan Darawsheh <rasland@nvidia.com>; Shahaf
> Shuler <shahafs@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>;
> Xueming(Steven) Li <xuemingl@nvidia.com>
> Subject: [PATCH 3/4] net/mlx5: fix PMD crash after tunnel offload match rule
> destruction
> 
> The new flow table resource management API triggered a PMD crash in tunnel
> offload mode, when tunnel match flow rule was inserted before tunnel set
> rule.
> 
> Reason for the crash was double flow table registration. The table was
> registered by the tunnel offload code for the first time and once more by PMD
> code, as part of general table processing. The table counter was decremented
> only once during the rule destruction and caused a resource leak that triggered
> the crash.
> 
> The patch updates PMD registration with tunnel offload parameters and
> removes table registration in tunnel related code.
> 
> Fixes: 663ad57dabb2 ("net/mlx5: make flow table cache thread safe")
> 
> Signed-off-by: Gregory Etelson <getelson@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
diff mbox series

Patch

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2f01e34033..185b4ba51a 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -7024,7 +7024,15 @@  tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
 	struct mlx5_hlist *group_hash;
 
 	group_hash = tunnel ? tunnel->groups : thub->groups;
-	he = mlx5_hlist_register(group_hash, key.val, NULL);
+	he = mlx5_hlist_lookup(group_hash, key.val, NULL);
+	if (!he) {
+		DRV_LOG(DEBUG, "port %u tunnel %u group=%u - generate table id",
+		dev->data->port_id, key.tunnel_id, group);
+		he = mlx5_hlist_register(group_hash, key.val, NULL);
+	} else {
+		DRV_LOG(DEBUG, "port %u tunnel %u group=%u - skip table id",
+		dev->data->port_id, key.tunnel_id, group);
+	}
 	if (!he)
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
@@ -7032,8 +7040,8 @@  tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
 					  "tunnel group index not supported");
 	tte = container_of(he, typeof(*tte), hash);
 	*table = tte->flow_table;
-	DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
-		dev->data->port_id, key.tunnel_id, group, *table);
+	DRV_LOG(DEBUG, "port %u tunnel %u group=%u table=%u",
+	dev->data->port_id, key.tunnel_id, group, *table);
 	return 0;
 }
 
@@ -7114,7 +7122,7 @@  mlx5_flow_group_to_table(struct rte_eth_dev *dev,
 		standard_translation = true;
 	}
 	DRV_LOG(DEBUG,
-		"port %u group=%#x transfer=%d external=%d fdb_def_rule=%d translate=%s",
+		"port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
 		dev->data->port_id, group, grp_info.transfer,
 		grp_info.external, grp_info.fdb_def_rule,
 		standard_translation ? "STANDARD" : "TUNNEL");
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 78c710fef9..95165980f4 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -8042,6 +8042,8 @@  flow_dv_tbl_resource_get(struct rte_eth_dev *dev,
 				   "cannot get table");
 		return NULL;
 	}
+	DRV_LOG(DEBUG, "Table_id %u tunnel %u group %u registered.",
+		table_id, tunnel ? tunnel->tunnel_id : 0, group_id);
 	tbl_data = container_of(entry, struct mlx5_flow_tbl_data_entry, entry);
 	return &tbl_data->tbl;
 }
@@ -8080,7 +8082,7 @@  flow_dv_tbl_remove_cb(struct mlx5_hlist *list,
 		if (he)
 			mlx5_hlist_unregister(tunnel_grp_hash, he);
 		DRV_LOG(DEBUG,
-			"Table_id %#x tunnel %u group %u released.",
+			"Table_id %u tunnel %u group %u released.",
 			table_id,
 			tbl_data->tunnel ?
 			tbl_data->tunnel->tunnel_id : 0,
@@ -8192,6 +8194,8 @@  flow_dv_matcher_register(struct rte_eth_dev *dev,
 			 struct mlx5_flow_dv_matcher *ref,
 			 union mlx5_flow_tbl_key *key,
 			 struct mlx5_flow *dev_flow,
+			 const struct mlx5_flow_tunnel *tunnel,
+			 uint32_t group_id,
 			 struct rte_flow_error *error)
 {
 	struct mlx5_cache_entry *entry;
@@ -8203,8 +8207,14 @@  flow_dv_matcher_register(struct rte_eth_dev *dev,
 		.data = ref,
 	};
 
-	tbl = flow_dv_tbl_resource_get(dev, key->table_id, key->direction,
-				       key->domain, false, NULL, 0, 0, error);
+	/**
+	 * tunnel offload API requires this registration for cases when
+	 * tunnel match rule was inserted before tunnel set rule.
+	 */
+	tbl = flow_dv_tbl_resource_get(dev, key->table_id,
+				       key->direction, key->domain,
+				       dev_flow->external, tunnel,
+				       group_id, 0, error);
 	if (!tbl)
 		return -rte_errno;	/* No need to refill the error info */
 	tbl_data = container_of(tbl, struct mlx5_flow_tbl_data_entry, tbl);
@@ -9605,10 +9615,14 @@  flow_dv_translate(struct rte_eth_dev *dev,
 		/*
 		 * do not add decap action if match rule drops packet
 		 * HW rejects rules with decap & drop
+		 *
+		 * if tunnel match rule was inserted before matching tunnel set
+		 * rule flow table used in the match rule must be registered.
+		 * current implementation handles that in the
+		 * flow_dv_match_register() at the function end.
 		 */
 		bool add_decap = true;
 		const struct rte_flow_action *ptr = actions;
-		struct mlx5_flow_tbl_resource *tbl;
 
 		for (; ptr->type != RTE_FLOW_ACTION_TYPE_END; ptr++) {
 			if (ptr->type == RTE_FLOW_ACTION_TYPE_DROP) {
@@ -9625,20 +9639,6 @@  flow_dv_translate(struct rte_eth_dev *dev,
 					dev_flow->dv.encap_decap->action;
 			action_flags |= MLX5_FLOW_ACTION_DECAP;
 		}
-		/*
-		 * bind table_id with <group, table> for tunnel match rule.
-		 * Tunnel set rule establishes that bind in JUMP action handler.
-		 * Required for scenario when application creates tunnel match
-		 * rule before tunnel set rule.
-		 */
-		tbl = flow_dv_tbl_resource_get(dev, table, attr->egress,
-					       attr->transfer,
-					       !!dev_flow->external, tunnel,
-					       attr->group, 0, error);
-		if (!tbl)
-			return rte_flow_error_set
-			       (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
-			       actions, "cannot register tunnel group");
 	}
 	for (; !actions_end ; actions++) {
 		const struct rte_flow_action_queue *queue;
@@ -10468,7 +10468,8 @@  flow_dv_translate(struct rte_eth_dev *dev,
 	tbl_key.domain = attr->transfer;
 	tbl_key.direction = attr->egress;
 	tbl_key.table_id = dev_flow->dv.group;
-	if (flow_dv_matcher_register(dev, &matcher, &tbl_key, dev_flow, error))
+	if (flow_dv_matcher_register(dev, &matcher, &tbl_key, dev_flow,
+				     tunnel, attr->group, error))
 		return -rte_errno;
 	return 0;
 }