[2/2] net/mlx5: fix crash in dev_info_get in secondary process

Message ID 20190712205425.17781-3-stephen@networkplumber.org (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series fix dev_info_get in mlx secondary process |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Stephen Hemminger July 12, 2019, 8:54 p.m. UTC
  mlx5_dev_info_get calls mlx5_get_ifname, but mlx5_get_ifname
uses priv->ctx which is not a valid pointer in a secondary
process. The fix is to cache the value in primary.

In the primary process, get and store the interface index of
the device so that secondary process can see it.

Fixes: e60fbd5b24fc ("mlx5: add device configure/start/stop")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/mlx5/mlx5.c        | 17 ++++++++---------
 drivers/net/mlx5/mlx5.h        |  1 +
 drivers/net/mlx5/mlx5_ethdev.c |  4 +---
 3 files changed, 10 insertions(+), 12 deletions(-)
  

Comments

Slava Ovsiienko July 15, 2019, 7:41 a.m. UTC | #1
Hi, Stephen

>mlx5_get_ifname uses priv->ctx which is not a valid pointer in a secondary process.

1. Sorry, mlx5_priv structure does not contain ctx field.  Do you mean priv->sh  (shared context) ?
This one is allocated with rte_zmalloc(), that supposes shared memory allocation and shared
context structure should be valid within secondary process context.

It seems there is another issue - mlx5_get_ifname() uses priv->nl_socket_rdma
which is process-specific socket handle (fd), so routine may fail.

2. Generally I'm OK with caching ifindex in priv structure - it seems the ifindex is permanent 
throughout interface lifetime. The mlx5_dev_spawn has a parameter mlx5_dev_spawn_data *spawn,
it contains the successfully queried ifindex of the device being spawned - no need to query again.

The cached valued should be retrieved by mlx5_ifindex() routine and  mlx5_get_ifname()
should be refactored to use mlx5_ifindex().

WBR,
Slava

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Friday, July 12, 2019 23:54
> To: Matan Azrad <matan@mellanox.com>; Shahaf Shuler
> <shahafs@mellanox.com>; Yongseok Koh <yskoh@mellanox.com>; Slava
> Ovsiienko <viacheslavo@mellanox.com>
> Cc: dev@dpdk.org; sju@paloaltonetworks.com; Stephen Hemminger
> <stephen@networkplumber.org>
> Subject: [PATCH 2/2] net/mlx5: fix crash in dev_info_get in secondary
> process
> 
> mlx5_dev_info_get calls mlx5_get_ifname, but mlx5_get_ifname uses priv-
> >ctx which is not a valid pointer in a secondary process. The fix is to cache
> the value in primary.
> 
> In the primary process, get and store the interface index of the device so that
> secondary process can see it.
> 
> Fixes: e60fbd5b24fc ("mlx5: add device configure/start/stop")
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  drivers/net/mlx5/mlx5.c        | 17 ++++++++---------
>  drivers/net/mlx5/mlx5.h        |  1 +
>  drivers/net/mlx5/mlx5_ethdev.c |  4 +---
>  3 files changed, 10 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> d93f92db56b5..27c5ef9b1763 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -1105,6 +1105,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  	int own_domain_id = 0;
>  	uint16_t port_id;
>  	unsigned int i;
> +	char ifname[IF_NAMESIZE];
> 
>  	/* Determine if this port representor is supposed to be spawned. */
>  	if (switch_info->representor && dpdk_dev->devargs) { @@ -1479,18
> +1480,16 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  		mac.addr_bytes[0], mac.addr_bytes[1],
>  		mac.addr_bytes[2], mac.addr_bytes[3],
>  		mac.addr_bytes[4], mac.addr_bytes[5]); -#ifndef NDEBUG
> -	{
> -		char ifname[IF_NAMESIZE];
> 
> -		if (mlx5_get_ifname(eth_dev, &ifname) == 0)
> -			DRV_LOG(DEBUG, "port %u ifname is \"%s\"",

> +	if (mlx5_get_ifname(eth_dev, &ifname) == 0) {
> +		priv->if_index = if_nametoindex(ifname);
> +		DRV_LOG(DEBUG, "port %u ifname is \"%s\"",
>  				eth_dev->data->port_id, ifname);
> -		else
> -			DRV_LOG(DEBUG, "port %u ifname is unknown",
> -				eth_dev->data->port_id);
> +	} else {
> +		DRV_LOG(DEBUG, "port %u ifname is unknown",
> +			eth_dev->data->port_id);
>  	}
> -#endif
> +
>  	/* Get actual MTU if possible. */
>  	err = mlx5_get_mtu(eth_dev, &priv->mtu);
>  	if (err) {
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 5af3f413cdcb..a06ffd444255 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -342,6 +342,7 @@ struct mlx5_priv {
>  	uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
>  	unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
>  	/* Device properties. */
> +	unsigned int if_index; /* Associated kernel network device index. */
>  	uint16_t mtu; /* Configured MTU. */
>  	unsigned int isolated:1; /* Whether isolated mode is enabled. */
>  	unsigned int representor:1; /* Device is a port representor. */ diff --
> git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index eeefe4df3cd4..41e58db5e573 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -605,7 +605,6 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct
> rte_eth_dev_info *info)
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_dev_config *config = &priv->config;
>  	unsigned int max;
> -	char ifname[IF_NAMESIZE];
> 
>  	/* FIXME: we should ask the device for these values. */
>  	info->min_rx_bufsize = 32;
> @@ -626,8 +625,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct
> rte_eth_dev_info *info)
>  	info->rx_offload_capa = (mlx5_get_rx_port_offloads() |
>  				 info->rx_queue_offload_capa);
>  	info->tx_offload_capa = mlx5_get_tx_port_offloads(dev);
> -	if (mlx5_get_ifname(dev, &ifname) == 0)
> -		info->if_index = if_nametoindex(ifname);
> +	info->if_index = priv->if_index;
>  	info->reta_size = priv->reta_idx_n ?
>  		priv->reta_idx_n : config->ind_table_max_size;
>  	info->hash_key_size = MLX5_RSS_HASH_KEY_LEN;
> --
> 2.20.1
  
Raslan Darawsheh July 31, 2019, 7:36 a.m. UTC | #2
Hi Stephen,

Can you please confirm that Slava's patch fixed your issue and this patch is not needed anymore for MLX5?
So that I can take mlx4 patch only from this series?

Kindest regards,
Raslan Darawsheh

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Stephen Hemminger
> Sent: Friday, July 12, 2019 11:54 PM
> To: Matan Azrad <matan@mellanox.com>; Shahaf Shuler
> <shahafs@mellanox.com>; Yongseok Koh <yskoh@mellanox.com>; Slava
> Ovsiienko <viacheslavo@mellanox.com>
> Cc: dev@dpdk.org; sju@paloaltonetworks.com; Stephen Hemminger
> <stephen@networkplumber.org>
> Subject: [dpdk-dev] [PATCH 2/2] net/mlx5: fix crash in dev_info_get in
> secondary process
> 
> mlx5_dev_info_get calls mlx5_get_ifname, but mlx5_get_ifname
> uses priv->ctx which is not a valid pointer in a secondary
> process. The fix is to cache the value in primary.
> 
> In the primary process, get and store the interface index of
> the device so that secondary process can see it.
> 
> Fixes: e60fbd5b24fc ("mlx5: add device configure/start/stop")
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  drivers/net/mlx5/mlx5.c        | 17 ++++++++---------
>  drivers/net/mlx5/mlx5.h        |  1 +
>  drivers/net/mlx5/mlx5_ethdev.c |  4 +---
>  3 files changed, 10 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index d93f92db56b5..27c5ef9b1763 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -1105,6 +1105,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  	int own_domain_id = 0;
>  	uint16_t port_id;
>  	unsigned int i;
> +	char ifname[IF_NAMESIZE];
> 
>  	/* Determine if this port representor is supposed to be spawned. */
>  	if (switch_info->representor && dpdk_dev->devargs) {
> @@ -1479,18 +1480,16 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  		mac.addr_bytes[0], mac.addr_bytes[1],
>  		mac.addr_bytes[2], mac.addr_bytes[3],
>  		mac.addr_bytes[4], mac.addr_bytes[5]);
> -#ifndef NDEBUG
> -	{
> -		char ifname[IF_NAMESIZE];
> 
> -		if (mlx5_get_ifname(eth_dev, &ifname) == 0)
> -			DRV_LOG(DEBUG, "port %u ifname is \"%s\"",
> +	if (mlx5_get_ifname(eth_dev, &ifname) == 0) {
> +		priv->if_index = if_nametoindex(ifname);
> +		DRV_LOG(DEBUG, "port %u ifname is \"%s\"",
>  				eth_dev->data->port_id, ifname);
> -		else
> -			DRV_LOG(DEBUG, "port %u ifname is unknown",
> -				eth_dev->data->port_id);
> +	} else {
> +		DRV_LOG(DEBUG, "port %u ifname is unknown",
> +			eth_dev->data->port_id);
>  	}
> -#endif
> +
>  	/* Get actual MTU if possible. */
>  	err = mlx5_get_mtu(eth_dev, &priv->mtu);
>  	if (err) {
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 5af3f413cdcb..a06ffd444255 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -342,6 +342,7 @@ struct mlx5_priv {
>  	uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
>  	unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
>  	/* Device properties. */
> +	unsigned int if_index; /* Associated kernel network device index. */
>  	uint16_t mtu; /* Configured MTU. */
>  	unsigned int isolated:1; /* Whether isolated mode is enabled. */
>  	unsigned int representor:1; /* Device is a port representor. */
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> b/drivers/net/mlx5/mlx5_ethdev.c
> index eeefe4df3cd4..41e58db5e573 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -605,7 +605,6 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct
> rte_eth_dev_info *info)
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_dev_config *config = &priv->config;
>  	unsigned int max;
> -	char ifname[IF_NAMESIZE];
> 
>  	/* FIXME: we should ask the device for these values. */
>  	info->min_rx_bufsize = 32;
> @@ -626,8 +625,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct
> rte_eth_dev_info *info)
>  	info->rx_offload_capa = (mlx5_get_rx_port_offloads() |
>  				 info->rx_queue_offload_capa);
>  	info->tx_offload_capa = mlx5_get_tx_port_offloads(dev);
> -	if (mlx5_get_ifname(dev, &ifname) == 0)
> -		info->if_index = if_nametoindex(ifname);
> +	info->if_index = priv->if_index;
>  	info->reta_size = priv->reta_idx_n ?
>  		priv->reta_idx_n : config->ind_table_max_size;
>  	info->hash_key_size = MLX5_RSS_HASH_KEY_LEN;
> --
> 2.20.1
  
Stephen Hemminger July 31, 2019, 1:47 p.m. UTC | #3
On Wed, 31 Jul 2019 07:36:26 +0000
Raslan Darawsheh <rasland@mellanox.com> wrote:

> Hi Stephen,
> 
> Can you please confirm that Slava's patch fixed your issue and this patch is not needed anymore for MLX5?
> So that I can take mlx4 patch only from this series?
> 
> Kindest regards,
> Raslan Darawsheh

OK, but my other concern is that mlx5 patch will not backport cleanly to stable.
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index d93f92db56b5..27c5ef9b1763 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1105,6 +1105,7 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	int own_domain_id = 0;
 	uint16_t port_id;
 	unsigned int i;
+	char ifname[IF_NAMESIZE];
 
 	/* Determine if this port representor is supposed to be spawned. */
 	if (switch_info->representor && dpdk_dev->devargs) {
@@ -1479,18 +1480,16 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		mac.addr_bytes[0], mac.addr_bytes[1],
 		mac.addr_bytes[2], mac.addr_bytes[3],
 		mac.addr_bytes[4], mac.addr_bytes[5]);
-#ifndef NDEBUG
-	{
-		char ifname[IF_NAMESIZE];
 
-		if (mlx5_get_ifname(eth_dev, &ifname) == 0)
-			DRV_LOG(DEBUG, "port %u ifname is \"%s\"",
+	if (mlx5_get_ifname(eth_dev, &ifname) == 0) {
+		priv->if_index = if_nametoindex(ifname);
+		DRV_LOG(DEBUG, "port %u ifname is \"%s\"",
 				eth_dev->data->port_id, ifname);
-		else
-			DRV_LOG(DEBUG, "port %u ifname is unknown",
-				eth_dev->data->port_id);
+	} else {
+		DRV_LOG(DEBUG, "port %u ifname is unknown",
+			eth_dev->data->port_id);
 	}
-#endif
+
 	/* Get actual MTU if possible. */
 	err = mlx5_get_mtu(eth_dev, &priv->mtu);
 	if (err) {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 5af3f413cdcb..a06ffd444255 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -342,6 +342,7 @@  struct mlx5_priv {
 	uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
 	unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
 	/* Device properties. */
+	unsigned int if_index; /* Associated kernel network device index. */
 	uint16_t mtu; /* Configured MTU. */
 	unsigned int isolated:1; /* Whether isolated mode is enabled. */
 	unsigned int representor:1; /* Device is a port representor. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index eeefe4df3cd4..41e58db5e573 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -605,7 +605,6 @@  mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_dev_config *config = &priv->config;
 	unsigned int max;
-	char ifname[IF_NAMESIZE];
 
 	/* FIXME: we should ask the device for these values. */
 	info->min_rx_bufsize = 32;
@@ -626,8 +625,7 @@  mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 	info->rx_offload_capa = (mlx5_get_rx_port_offloads() |
 				 info->rx_queue_offload_capa);
 	info->tx_offload_capa = mlx5_get_tx_port_offloads(dev);
-	if (mlx5_get_ifname(dev, &ifname) == 0)
-		info->if_index = if_nametoindex(ifname);
+	info->if_index = priv->if_index;
 	info->reta_size = priv->reta_idx_n ?
 		priv->reta_idx_n : config->ind_table_max_size;
 	info->hash_key_size = MLX5_RSS_HASH_KEY_LEN;