[v4] net/ice: fix crash on closing representor ports

Message ID 20231102101103.3707173-1-mingjinx.ye@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series [v4] net/ice: fix crash on closing representor ports |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS

Commit Message

Mingjin Ye Nov. 2, 2023, 10:11 a.m. UTC
  Since the representor port needs to access the resource of the
associated DCF when it is closing. Therefore, all the representor
port should be closed first, and then close the associated DCF port.

If the DCF port is closed before the representor port on PMD exit.
This will result in accessing freed resources and eventually a
core dump will occur.

This patch fixes this issue by notifying each other to unassociate
when the DCF port and the representor port are closed.

Fixes: 5674465a32c8 ("net/ice: add DCF VLAN handling")
Fixes: c7e1a1a3bfeb ("net/ice: refactor DCF VLAN handling")
Fixes: da9cdcd1f372 ("net/ice: fix crash on representor port closing")
Cc: stable@dpdk.org

Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
---
v2: Reformat code to remove unneeded fixlines.
---
v3: New solution.
---
v4: Optimize v2 patch.
---
 drivers/net/ice/ice_dcf_ethdev.c         | 20 ++++++++++++++++++++
 drivers/net/ice/ice_dcf_ethdev.h         |  2 ++
 drivers/net/ice/ice_dcf_vf_representor.c | 23 ++++++++++++++++++-----
 3 files changed, 40 insertions(+), 5 deletions(-)
  

Comments

Qi Zhang Nov. 6, 2023, 1:23 a.m. UTC | #1
> -----Original Message-----
> From: Ye, MingjinX <mingjinx.ye@intel.com>
> Sent: Thursday, November 2, 2023 6:11 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Zhou, YidingX
> <yidingx.zhou@intel.com>; Ye, MingjinX <mingjinx.ye@intel.com>;
> stable@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>
> Subject: [PATCH v4] net/ice: fix crash on closing representor ports
> 
> Since the representor port needs to access the resource of the associated DCF
> when it is closing. Therefore, all the representor port should be closed first,
> and then close the associated DCF port.
> 
> If the DCF port is closed before the representor port on PMD exit.
> This will result in accessing freed resources and eventually a core dump will
> occur.
> 
> This patch fixes this issue by notifying each other to unassociate when the
> DCF port and the representor port are closed.
> 
> Fixes: 5674465a32c8 ("net/ice: add DCF VLAN handling")
> Fixes: c7e1a1a3bfeb ("net/ice: refactor DCF VLAN handling")
> Fixes: da9cdcd1f372 ("net/ice: fix crash on representor port closing")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Mingjin Ye <mingjinx.ye@intel.com>
> ---
> v2: Reformat code to remove unneeded fixlines.
> ---
> v3: New solution.
> ---
> v4: Optimize v2 patch.
> ---
>  drivers/net/ice/ice_dcf_ethdev.c         | 20 ++++++++++++++++++++
>  drivers/net/ice/ice_dcf_ethdev.h         |  2 ++
>  drivers/net/ice/ice_dcf_vf_representor.c | 23 ++++++++++++++++++-----
>  3 files changed, 40 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
> index 065ec728c2..63e63a23de 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.c
> +++ b/drivers/net/ice/ice_dcf_ethdev.c
> @@ -1618,6 +1618,26 @@ ice_dcf_free_repr_info(struct ice_dcf_adapter
> *dcf_adapter)
>  	}
>  }
> 
> +int
> +ice_dcf_handle_vf_repr_uninit(struct ice_dcf_adapter *dcf_adapter,
> +				uint16_t vf_id)
> +{
> +	struct ice_dcf_repr_info *vf_rep_info;
> +
> +	if (dcf_adapter->num_reprs >= vf_id) {
> +		PMD_DRV_LOG(ERR, "Invalid VF id: %d", vf_id);
> +		return -1;
> +	}
> +
> +	if (!dcf_adapter->repr_infos)
> +		return 0;
> +
> +	vf_rep_info = &dcf_adapter->repr_infos[vf_id];
> +	vf_rep_info->vf_rep_eth_dev = NULL;
> +
> +	return 0;
> +}
> +
>  static int
>  ice_dcf_init_repr_info(struct ice_dcf_adapter *dcf_adapter)  { diff --git
> a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
> index 4baaec4b8b..094e2a36db 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.h
> +++ b/drivers/net/ice/ice_dcf_ethdev.h
> @@ -60,6 +60,7 @@ struct ice_dcf_vf_repr {
>  	struct rte_ether_addr mac_addr;
>  	uint16_t switch_domain_id;
>  	uint16_t vf_id;
> +	bool dcf_valid;
> 
>  	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN
> */  }; @@ -81,5 +82,6 @@ int ice_dcf_vf_repr_uninit(struct rte_eth_dev
> *vf_rep_eth_dev);  int ice_dcf_vf_repr_init_vlan(struct rte_eth_dev
> *vf_rep_eth_dev);  void ice_dcf_vf_repr_stop_all(struct ice_dcf_adapter
> *dcf_adapter);  bool ice_dcf_adminq_need_retry(struct ice_adapter *ad);
> +int ice_dcf_handle_vf_repr_uninit(struct ice_dcf_adapter *dcf_adapter,
> +uint16_t vf_id);
> 
>  #endif /* _ICE_DCF_ETHDEV_H_ */
> diff --git a/drivers/net/ice/ice_dcf_vf_representor.c
> b/drivers/net/ice/ice_dcf_vf_representor.c
> index b9fcfc80ad..167abaa780 100644
> --- a/drivers/net/ice/ice_dcf_vf_representor.c
> +++ b/drivers/net/ice/ice_dcf_vf_representor.c
> @@ -45,6 +45,9 @@ ice_dcf_vf_repr_dev_start(struct rte_eth_dev *dev)
> static int  ice_dcf_vf_repr_dev_stop(struct rte_eth_dev *dev)  {
> +	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
> +
> +	repr->dcf_valid = false;

Is this correct ? 

ice_dcf_handle_vf_repr_uninit will not be called,
if we stop a representor port (call ice_dcf_vf_repr_dev_stop), then close it (call ice_dcf_vf_repr_uninit) while DCF still be active.

if you want to use dcf_valid as a notification flag sent from dcf to each representors,  never set it in a representer's own callback function.

Btw, besides set dcf_valid to true in ice_dcf_vf_repr_init, you may also consider the case when a DCF reset happen, dcf_valid need to be reset to true for each active representor. 

...

> *init_param)  int  ice_dcf_vf_repr_uninit(struct rte_eth_dev *vf_rep_eth_dev)
> {
> +	struct ice_dcf_vf_repr *repr = vf_rep_eth_dev->data->dev_private;
> +	struct ice_dcf_adapter *dcf_adapter;
> +
> +	if (repr->dcf_valid) {
> +		dcf_adapter = repr->dcf_eth_dev->data->dev_private;
> +		ice_dcf_handle_vf_repr_uninit(dcf_adapter, repr->vf_id);
> +	}
> +
>  	vf_rep_eth_dev->data->mac_addrs = NULL;
> 
>  	return 0;
  

Patch

diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 065ec728c2..63e63a23de 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1618,6 +1618,26 @@  ice_dcf_free_repr_info(struct ice_dcf_adapter *dcf_adapter)
 	}
 }
 
+int
+ice_dcf_handle_vf_repr_uninit(struct ice_dcf_adapter *dcf_adapter,
+				uint16_t vf_id)
+{
+	struct ice_dcf_repr_info *vf_rep_info;
+
+	if (dcf_adapter->num_reprs >= vf_id) {
+		PMD_DRV_LOG(ERR, "Invalid VF id: %d", vf_id);
+		return -1;
+	}
+
+	if (!dcf_adapter->repr_infos)
+		return 0;
+
+	vf_rep_info = &dcf_adapter->repr_infos[vf_id];
+	vf_rep_info->vf_rep_eth_dev = NULL;
+
+	return 0;
+}
+
 static int
 ice_dcf_init_repr_info(struct ice_dcf_adapter *dcf_adapter)
 {
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index 4baaec4b8b..094e2a36db 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -60,6 +60,7 @@  struct ice_dcf_vf_repr {
 	struct rte_ether_addr mac_addr;
 	uint16_t switch_domain_id;
 	uint16_t vf_id;
+	bool dcf_valid;
 
 	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN */
 };
@@ -81,5 +82,6 @@  int ice_dcf_vf_repr_uninit(struct rte_eth_dev *vf_rep_eth_dev);
 int ice_dcf_vf_repr_init_vlan(struct rte_eth_dev *vf_rep_eth_dev);
 void ice_dcf_vf_repr_stop_all(struct ice_dcf_adapter *dcf_adapter);
 bool ice_dcf_adminq_need_retry(struct ice_adapter *ad);
+int ice_dcf_handle_vf_repr_uninit(struct ice_dcf_adapter *dcf_adapter, uint16_t vf_id);
 
 #endif /* _ICE_DCF_ETHDEV_H_ */
diff --git a/drivers/net/ice/ice_dcf_vf_representor.c b/drivers/net/ice/ice_dcf_vf_representor.c
index b9fcfc80ad..167abaa780 100644
--- a/drivers/net/ice/ice_dcf_vf_representor.c
+++ b/drivers/net/ice/ice_dcf_vf_representor.c
@@ -45,6 +45,9 @@  ice_dcf_vf_repr_dev_start(struct rte_eth_dev *dev)
 static int
 ice_dcf_vf_repr_dev_stop(struct rte_eth_dev *dev)
 {
+	struct ice_dcf_vf_repr *repr = dev->data->dev_private;
+
+	repr->dcf_valid = false;
 	dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
 
 	return 0;
@@ -111,14 +114,15 @@  ice_dcf_vf_repr_link_update(__rte_unused struct rte_eth_dev *ethdev,
 static __rte_always_inline struct ice_dcf_hw *
 ice_dcf_vf_repr_hw(struct ice_dcf_vf_repr *repr)
 {
-	struct ice_dcf_adapter *dcf_adapter =
-			repr->dcf_eth_dev->data->dev_private;
+	struct ice_dcf_adapter *dcf_adapter;
 
-	if (!dcf_adapter) {
+	if (!repr->dcf_valid) {
 		PMD_DRV_LOG(ERR, "DCF for VF representor has been released\n");
 		return NULL;
 	}
 
+	dcf_adapter = repr->dcf_eth_dev->data->dev_private;
+
 	return &dcf_adapter->real_hw;
 }
 
@@ -414,6 +418,7 @@  ice_dcf_vf_repr_init(struct rte_eth_dev *vf_rep_eth_dev, void *init_param)
 	repr->dcf_eth_dev = param->dcf_eth_dev;
 	repr->switch_domain_id = param->switch_domain_id;
 	repr->vf_id = param->vf_id;
+	repr->dcf_valid = true;
 	repr->outer_vlan_info.port_vlan_ena = false;
 	repr->outer_vlan_info.stripping_ena = false;
 	repr->outer_vlan_info.tpid = RTE_ETHER_TYPE_VLAN;
@@ -437,6 +442,14 @@  ice_dcf_vf_repr_init(struct rte_eth_dev *vf_rep_eth_dev, void *init_param)
 int
 ice_dcf_vf_repr_uninit(struct rte_eth_dev *vf_rep_eth_dev)
 {
+	struct ice_dcf_vf_repr *repr = vf_rep_eth_dev->data->dev_private;
+	struct ice_dcf_adapter *dcf_adapter;
+
+	if (repr->dcf_valid) {
+		dcf_adapter = repr->dcf_eth_dev->data->dev_private;
+		ice_dcf_handle_vf_repr_uninit(dcf_adapter, repr->vf_id);
+	}
+
 	vf_rep_eth_dev->data->mac_addrs = NULL;
 
 	return 0;
@@ -480,11 +493,11 @@  ice_dcf_vf_repr_stop_all(struct ice_dcf_adapter *dcf_adapter)
 	for (vf_id = 0; vf_id < dcf_adapter->real_hw.num_vfs; vf_id++) {
 		struct rte_eth_dev *vf_rep_eth_dev =
 				dcf_adapter->repr_infos[vf_id].vf_rep_eth_dev;
-		if (!vf_rep_eth_dev || vf_rep_eth_dev->data->dev_started == 0)
+		if (!vf_rep_eth_dev)
 			continue;
 
 		ret = ice_dcf_vf_repr_dev_stop(vf_rep_eth_dev);
 		if (!ret)
-			vf_rep_eth_dev->data->dev_started = 0;
+			dcf_adapter->repr_infos[vf_id].vf_rep_eth_dev = NULL;
 	}
 }