[01/14] net/mlx5: add representor recognition on kernels 5.x

Message ID 1553155888-27498-2-git-send-email-viacheslavo@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series net/mlx5: add support for multiport IB devices |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Slava Ovsiienko March 21, 2019, 8:11 a.m. UTC
  The master device and VF representors were distinguished by
presence of port name, master device did not have one. The new Linux
kernels starting from 5.0 provide the port name for master device
and the implemented representor recognizing method does not work.
The new recognizing method is based on quiering the VF number,
created on the base of the device.

The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
attribute is specified in the Netlink request message.

Also the presence of device symlink in device sysfs folder is
added to distinguish representors with sysfs based method.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

---

v3: - rebased over new port naming http://patches.dpdk.org/patch/51245/
    - master recognition is reinforced by checking vport for -1
      for new port naming schema

v2: - fopen replaced with opendir to detect whether directory exists

v1: http://patches.dpdk.org/patch/50411/
---
 drivers/net/mlx5/Makefile      | 10 ++++++++++
 drivers/net/mlx5/meson.build   |  4 ++++
 drivers/net/mlx5/mlx5.c        |  2 +-
 drivers/net/mlx5/mlx5.h        |  1 +
 drivers/net/mlx5/mlx5_ethdev.c | 13 +++++++++++--
 drivers/net/mlx5/mlx5_nl.c     | 36 +++++++++++++++++++++++++++++++++---
 6 files changed, 60 insertions(+), 6 deletions(-)
  

Comments

Shahaf Shuler March 21, 2019, 12:13 p.m. UTC | #1
Hi Slava,

Small comments below. Once fixed you can put my acked-by on the next version. 

Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko:
> Subject: [PATCH 01/14] net/mlx5: add representor recognition on kernels 5.x
> 
> The master device and VF representors were distinguished by presence of
> port name, master device did not have one. The new Linux kernels starting
> from 5.0 provide the port name for master device and the implemented
> representor recognizing method does not work.
> The new recognizing method is based on quiering the VF number, created on
> the base of the device.
> 
> The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> attribute is specified in the Netlink request message.
> 
> Also the presence of device symlink in device sysfs folder is added to
> distinguish representors with sysfs based method.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> 
> ---
> 
> v3: - rebased over new port naming http://patches.dpdk.org/patch/51245/
>     - master recognition is reinforced by checking vport for -1
>       for new port naming schema
> 
> v2: - fopen replaced with opendir to detect whether directory exists
> 
> v1: http://patches.dpdk.org/patch/50411/
> ---
>  drivers/net/mlx5/Makefile      | 10 ++++++++++
>  drivers/net/mlx5/meson.build   |  4 ++++
>  drivers/net/mlx5/mlx5.c        |  2 +-
>  drivers/net/mlx5/mlx5.h        |  1 +
>  drivers/net/mlx5/mlx5_ethdev.c | 13 +++++++++++--
>  drivers/net/mlx5/mlx5_nl.c     | 36
> +++++++++++++++++++++++++++++++++---
>  6 files changed, 60 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> 1ed299d..3dd7e38 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -231,6 +231,16 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-
> config-h.sh
>  		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_IFLA_NUM_VF \
> +		linux/if_link.h \
> +		enum IFLA_NUM_VF \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_IFLA_EXT_MASK \
> +		linux/if_link.h \
> +		enum IFLA_EXT_MASK \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
>  		HAVE_IFLA_PHYS_SWITCH_ID \
>  		linux/if_link.h \
>  		enum IFLA_PHYS_SWITCH_ID \
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index 0cf2f08..e3cb9bc 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -133,6 +133,10 @@ if build
>  		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
>  		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
>  		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
> +		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
> +		'IFLA_NUM_VF' ],
> +		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
> +		'IFLA_EXT_MASK' ],
>  		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
>  		'IFLA_PHYS_SWITCH_ID' ],
>  		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git
> a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> ad1975c..ea3d00c 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -13,7 +13,6 @@
>  #include <errno.h>
>  #include <net/if.h>
>  #include <sys/mman.h>
> -#include <linux/netlink.h>
>  #include <linux/rtnetlink.h>
> 
>  /* Verbs header. */
> @@ -1001,6 +1000,7 @@
>  	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
>  	priv->nl_sn = 0;
>  	priv->representor = !!switch_info->representor;
> +	priv->master = !!switch_info->master;
>  	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
>  	priv->representor_id =
>  		switch_info->representor ? switch_info->port_name : -1; diff
> --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> a88cb4a..58bc37f 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -214,6 +214,7 @@ struct mlx5_priv {
>  	uint16_t mtu; /* Configured MTU. */
>  	unsigned int isolated:1; /* Whether isolated mode is enabled. */
>  	unsigned int representor:1; /* Device is a port representor. */
> +	unsigned int master:1; /* Device is a E-Switch master. */
>  	uint16_t domain_id; /* Switch domain identifier. */
>  	int32_t representor_id; /* Port representor identifier. */
>  	/* RX/TX queues. */
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> b/drivers/net/mlx5/mlx5_ethdev.c index 84d761c..81f2a42 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -1362,8 +1362,10 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)
>  		.port_name = 0,
>  		.switch_id = 0,
>  	};
> +	DIR *dir;
>  	bool port_name_set = false;
>  	bool port_switch_id_set = false;
> +	bool device_dir = false;
>  	char c;
> 
>  	if (!if_indextoname(ifindex, ifname)) { @@ -1375,6 +1377,8 @@ int
> mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t
> fw_size)
>  	      ifname);
>  	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
>  	      ifname);
> +	MKSTR(pci_device, "/sys/class/net/%s/device",
> +	      ifname);
> 
>  	file = fopen(phys_port_name, "rb");
>  	if (file != NULL) {
> @@ -1391,8 +1395,13 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)
>  		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
>  		c == '\n';
>  	fclose(file);
> -	data.master = port_switch_id_set && !port_name_set;
> -	data.representor = port_switch_id_set && port_name_set;
> +	dir = opendir(pci_device);
> +	if (dir != NULL) {
> +		closedir(dir);
> +		device_dir = true;
> +	}
> +	data.master = port_switch_id_set && (!port_name_set ||
> device_dir);
> +	data.representor = port_switch_id_set && port_name_set &&
> !device_dir;

Add assert that device cannot be both master and representor. 

>  	*info = data;
>  	return 0;
>  }
> diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index
> 8a10109..aa49cb4 100644
> --- a/drivers/net/mlx5/mlx5_nl.c
> +++ b/drivers/net/mlx5/mlx5_nl.c
> @@ -65,6 +65,12 @@
>  #endif
> 
>  /* These are normally found in linux/if_link.h. */
> +#ifndef HAVE_IFLA_NUM_VF
> +#define IFLA_NUM_VF 21
> +#endif
> +#ifndef HAVE_IFLA_EXT_MASK
> +#define IFLA_EXT_MASK 29
> +#endif
>  #ifndef HAVE_IFLA_PHYS_SWITCH_ID
>  #define IFLA_PHYS_SWITCH_ID 36
>  #endif
> @@ -837,6 +843,7 @@ struct mlx5_nl_ifindex_data {
>  	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
>  	bool port_name_set = false;
>  	bool switch_id_set = false;
> +	bool num_vf_set = false;
> 
>  	if (nh->nlmsg_type != RTM_NEWLINK)
>  		goto error;
> @@ -848,6 +855,9 @@ struct mlx5_nl_ifindex_data {
>  		if (ra->rta_len > nh->nlmsg_len - off)
>  			goto error;
>  		switch (ra->rta_type) {
> +		case IFLA_NUM_VF:
> +			num_vf_set = true;
> +			break;
>  		case IFLA_PHYS_PORT_NAME:
>  			port_name_set =
>  				mlx5_translate_port_name((char *)payload,
> @@ -864,8 +874,19 @@ struct mlx5_nl_ifindex_data {
>  		}
>  		off += RTA_ALIGN(ra->rta_len);
>  	}
> -	info.master = switch_id_set && !port_name_set;
> -	info.representor = switch_id_set && port_name_set;
> +	if (switch_id_set) {
> +		if (info.port_name_new) {
> +			/* New representors naming schema. */
> +			if (port_name_set) {
> +				info.master = (info.port_name == -1);
> +				info.representor = (info.port_name != -1);
> +			}
> +		} else {
> +			/* Legacy representors naming schema. */
> +			info.master = (!port_name_set || num_vf_set);
> +			info.representor = port_name_set && !num_vf_set;
> +		}
> +	}

Add assert that device cannot be both master and representor.

>  	memcpy(arg, &info, sizeof(info));
>  	return 0;
>  error:
> @@ -893,9 +914,13 @@ struct mlx5_nl_ifindex_data {
>  	struct {
>  		struct nlmsghdr nh;
>  		struct ifinfomsg info;
> +		struct rtattr rta;
> +		uint32_t extmask;
>  	} req = {
>  		.nh = {
> -			.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
> +			.nlmsg_len = NLMSG_LENGTH
> +					(sizeof(req.info) +
> +					 RTA_LENGTH(sizeof(uint32_t))),
>  			.nlmsg_type = RTM_GETLINK,
>  			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
>  		},
> @@ -903,6 +928,11 @@ struct mlx5_nl_ifindex_data {
>  			.ifi_family = AF_UNSPEC,
>  			.ifi_index = ifindex,
>  		},
> +		.rta = {
> +			.rta_type = IFLA_EXT_MASK,
> +			.rta_len = RTA_LENGTH(sizeof(int32_t)),
> +		},
> +		.extmask = RTE_LE32(1),
>  	};
>  	int ret;
> 
> --
> 1.8.3.1
  
Stephen Hemminger March 21, 2019, 3:08 p.m. UTC | #2
On Thu, 21 Mar 2019 12:13:50 +0000
Shahaf Shuler <shahafs@mellanox.com> wrote:

> Hi Slava,
> 
> Small comments below. Once fixed you can put my acked-by on the next version. 
> 
> Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko:
> > Subject: [PATCH 01/14] net/mlx5: add representor recognition on kernels 5.x
> > 
> > The master device and VF representors were distinguished by presence of
> > port name, master device did not have one. The new Linux kernels starting
> > from 5.0 provide the port name for master device and the implemented
> > representor recognizing method does not work.
> > The new recognizing method is based on quiering the VF number, created on
> > the base of the device.
> > 
> > The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> > attribute is specified in the Netlink request message.
> > 
> > Also the presence of device symlink in device sysfs folder is added to
> > distinguish representors with sysfs based method.
> > 
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > 
> > ---
> > 
> > v3: - rebased over new port naming http://patches.dpdk.org/patch/51245/
> >     - master recognition is reinforced by checking vport for -1
> >       for new port naming schema
> > 
> > v2: - fopen replaced with opendir to detect whether directory exists
> > 
> > v1: http://patches.dpdk.org/patch/50411/
> > ---
> >  drivers/net/mlx5/Makefile      | 10 ++++++++++
> >  drivers/net/mlx5/meson.build   |  4 ++++
> >  drivers/net/mlx5/mlx5.c        |  2 +-
> >  drivers/net/mlx5/mlx5.h        |  1 +
> >  drivers/net/mlx5/mlx5_ethdev.c | 13 +++++++++++--
> >  drivers/net/mlx5/mlx5_nl.c     | 36
> > +++++++++++++++++++++++++++++++++---
> >  6 files changed, 60 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> > 1ed299d..3dd7e38 100644
> > --- a/drivers/net/mlx5/Makefile
> > +++ b/drivers/net/mlx5/Makefile
> > @@ -231,6 +231,16 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-
> > config-h.sh
> >  		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
> >  		$(AUTOCONF_OUTPUT)
> >  	$Q sh -- '$<' '$@' \
> > +		HAVE_IFLA_NUM_VF \
> > +		linux/if_link.h \
> > +		enum IFLA_NUM_VF \
> > +		$(AUTOCONF_OUTPUT)
> > +	$Q sh -- '$<' '$@' \
> > +		HAVE_IFLA_EXT_MASK \
> > +		linux/if_link.h \
> > +		enum IFLA_EXT_MASK \
> > +		$(AUTOCONF_OUTPUT)
> > +	$Q sh -- '$<' '$@' \
> >  		HAVE_IFLA_PHYS_SWITCH_ID \
> >  		linux/if_link.h \
> >  		enum IFLA_PHYS_SWITCH_ID \
> > diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> > index 0cf2f08..e3cb9bc 100644
> > --- a/drivers/net/mlx5/meson.build
> > +++ b/drivers/net/mlx5/meson.build
> > @@ -133,6 +133,10 @@ if build
> >  		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
> >  		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
> >  		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
> > +		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
> > +		'IFLA_NUM_VF' ],
> > +		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
> > +		'IFLA_EXT_MASK' ],
> >  		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
> >  		'IFLA_PHYS_SWITCH_ID' ],
> >  		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git
> > a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> > ad1975c..ea3d00c 100644
> > --- a/drivers/net/mlx5/mlx5.c
> > +++ b/drivers/net/mlx5/mlx5.c
> > @@ -13,7 +13,6 @@
> >  #include <errno.h>
> >  #include <net/if.h>
> >  #include <sys/mman.h>
> > -#include <linux/netlink.h>
> >  #include <linux/rtnetlink.h>
> > 
> >  /* Verbs header. */
> > @@ -1001,6 +1000,7 @@
> >  	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
> >  	priv->nl_sn = 0;
> >  	priv->representor = !!switch_info->representor;
> > +	priv->master = !!switch_info->master;
> >  	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
> >  	priv->representor_id =
> >  		switch_info->representor ? switch_info->port_name : -1; diff
> > --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > a88cb4a..58bc37f 100644
> > --- a/drivers/net/mlx5/mlx5.h
> > +++ b/drivers/net/mlx5/mlx5.h
> > @@ -214,6 +214,7 @@ struct mlx5_priv {
> >  	uint16_t mtu; /* Configured MTU. */
> >  	unsigned int isolated:1; /* Whether isolated mode is enabled. */
> >  	unsigned int representor:1; /* Device is a port representor. */
> > +	unsigned int master:1; /* Device is a E-Switch master. */
> >  	uint16_t domain_id; /* Switch domain identifier. */
> >  	int32_t representor_id; /* Port representor identifier. */
> >  	/* RX/TX queues. */
> > diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> > b/drivers/net/mlx5/mlx5_ethdev.c index 84d761c..81f2a42 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -1362,8 +1362,10 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > *dev, char *fw_ver, size_t fw_size)
> >  		.port_name = 0,
> >  		.switch_id = 0,
> >  	};
> > +	DIR *dir;
> >  	bool port_name_set = false;
> >  	bool port_switch_id_set = false;
> > +	bool device_dir = false;
> >  	char c;
> > 
> >  	if (!if_indextoname(ifindex, ifname)) { @@ -1375,6 +1377,8 @@ int
> > mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t
> > fw_size)
> >  	      ifname);
> >  	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
> >  	      ifname);
> > +	MKSTR(pci_device, "/sys/class/net/%s/device",
> > +	      ifname);
> > 
> >  	file = fopen(phys_port_name, "rb");
> >  	if (file != NULL) {
> > @@ -1391,8 +1395,13 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > *dev, char *fw_ver, size_t fw_size)
> >  		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
> >  		c == '\n';
> >  	fclose(file);
> > -	data.master = port_switch_id_set && !port_name_set;
> > -	data.representor = port_switch_id_set && port_name_set;
> > +	dir = opendir(pci_device);
> > +	if (dir != NULL) {
> > +		closedir(dir);
> > +		device_dir = true;
> > +	}
> > +	data.master = port_switch_id_set && (!port_name_set ||
> > device_dir);
> > +	data.representor = port_switch_id_set && port_name_set &&
> > !device_dir;  
> 
> Add assert that device cannot be both master and representor. 

Error checking would be but assert() is usually not a useful in drivers.
It causes crash, and is often compiled out.
  
Slava Ovsiienko March 21, 2019, 3:31 p.m. UTC | #3
Hi, Stephen

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Thursday, March 21, 2019 17:09
> To: Shahaf Shuler <shahafs@mellanox.com>
> Cc: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 01/14] net/mlx5: add representor recognition
> on kernels 5.x
> 
> On Thu, 21 Mar 2019 12:13:50 +0000
> Shahaf Shuler <shahafs@mellanox.com> wrote:
> 
> > Hi Slava,
> >
> > Small comments below. Once fixed you can put my acked-by on the next
> version.
> >
> > Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko:
> > > Subject: [PATCH 01/14] net/mlx5: add representor recognition on
> > > kernels 5.x
> > >
> > > The master device and VF representors were distinguished by presence
> > > of port name, master device did not have one. The new Linux kernels
> > > starting from 5.0 provide the port name for master device and the
> > > implemented representor recognizing method does not work.
> > > The new recognizing method is based on quiering the VF number,
> > > created on the base of the device.
> > >
> > > The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> > > attribute is specified in the Netlink request message.
> > >
> > > Also the presence of device symlink in device sysfs folder is added
> > > to distinguish representors with sysfs based method.
> > >
> > > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > >
> > > ---
> > >
> > > v3: - rebased over new port naming
> https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatche
> s.dpdk.org%2Fpatch%2F51245%2F&amp;data=02%7C01%7Cviacheslavo%40
> mellanox.com%7C94cc885cbb8d4aade9dd08d6ae0f26cd%7Ca652971c7d2e4
> d9ba6a4d149256f461b%7C0%7C0%7C636887777455248723&amp;sdata=FDq
> 950ksokxsNac8cBM293W263uPfVeY1xA7Cx%2F4FLk%3D&amp;reserved=0
> > >     - master recognition is reinforced by checking vport for -1
> > >       for new port naming schema
> > >
> > > v2: - fopen replaced with opendir to detect whether directory exists
> > >
> > > v1:
> > > https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpat
> > >
> ches.dpdk.org%2Fpatch%2F50411%2F&amp;data=02%7C01%7Cviacheslavo%
> 40me
> > >
> llanox.com%7C94cc885cbb8d4aade9dd08d6ae0f26cd%7Ca652971c7d2e4d9b
> a6a4
> > >
> d149256f461b%7C0%7C0%7C636887777455248723&amp;sdata=JkWKbb6LV
> diIHW%2
> > > FpJEQHcD7hvFLWdGmM%2BTVhM%2F%2F80Uk%3D&amp;reserved=0
> > > ---
> > >  drivers/net/mlx5/Makefile      | 10 ++++++++++
> > >  drivers/net/mlx5/meson.build   |  4 ++++
> > >  drivers/net/mlx5/mlx5.c        |  2 +-
> > >  drivers/net/mlx5/mlx5.h        |  1 +
> > >  drivers/net/mlx5/mlx5_ethdev.c | 13 +++++++++++--
> > >  drivers/net/mlx5/mlx5_nl.c     | 36
> > > +++++++++++++++++++++++++++++++++---
> > >  6 files changed, 60 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > index
> > > 1ed299d..3dd7e38 100644
> > > --- a/drivers/net/mlx5/Makefile
> > > +++ b/drivers/net/mlx5/Makefile
> > > @@ -231,6 +231,16 @@ mlx5_autoconf.h.new:
> > > $(RTE_SDK)/buildtools/auto- config-h.sh
> > >  		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
> > >  		$(AUTOCONF_OUTPUT)
> > >  	$Q sh -- '$<' '$@' \
> > > +		HAVE_IFLA_NUM_VF \
> > > +		linux/if_link.h \
> > > +		enum IFLA_NUM_VF \
> > > +		$(AUTOCONF_OUTPUT)
> > > +	$Q sh -- '$<' '$@' \
> > > +		HAVE_IFLA_EXT_MASK \
> > > +		linux/if_link.h \
> > > +		enum IFLA_EXT_MASK \
> > > +		$(AUTOCONF_OUTPUT)
> > > +	$Q sh -- '$<' '$@' \
> > >  		HAVE_IFLA_PHYS_SWITCH_ID \
> > >  		linux/if_link.h \
> > >  		enum IFLA_PHYS_SWITCH_ID \
> > > diff --git a/drivers/net/mlx5/meson.build
> > > b/drivers/net/mlx5/meson.build index 0cf2f08..e3cb9bc 100644
> > > --- a/drivers/net/mlx5/meson.build
> > > +++ b/drivers/net/mlx5/meson.build
> > > @@ -133,6 +133,10 @@ if build
> > >  		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
> > >  		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
> > >  		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
> > > +		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
> > > +		'IFLA_NUM_VF' ],
> > > +		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
> > > +		'IFLA_EXT_MASK' ],
> > >  		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
> > >  		'IFLA_PHYS_SWITCH_ID' ],
> > >  		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git
> > > a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> > > ad1975c..ea3d00c 100644
> > > --- a/drivers/net/mlx5/mlx5.c
> > > +++ b/drivers/net/mlx5/mlx5.c
> > > @@ -13,7 +13,6 @@
> > >  #include <errno.h>
> > >  #include <net/if.h>
> > >  #include <sys/mman.h>
> > > -#include <linux/netlink.h>
> > >  #include <linux/rtnetlink.h>
> > >
> > >  /* Verbs header. */
> > > @@ -1001,6 +1000,7 @@
> > >  	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
> > >  	priv->nl_sn = 0;
> > >  	priv->representor = !!switch_info->representor;
> > > +	priv->master = !!switch_info->master;
> > >  	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
> > >  	priv->representor_id =
> > >  		switch_info->representor ? switch_info->port_name : -1; diff
> > > --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > > a88cb4a..58bc37f 100644
> > > --- a/drivers/net/mlx5/mlx5.h
> > > +++ b/drivers/net/mlx5/mlx5.h
> > > @@ -214,6 +214,7 @@ struct mlx5_priv {
> > >  	uint16_t mtu; /* Configured MTU. */
> > >  	unsigned int isolated:1; /* Whether isolated mode is enabled. */
> > >  	unsigned int representor:1; /* Device is a port representor. */
> > > +	unsigned int master:1; /* Device is a E-Switch master. */
> > >  	uint16_t domain_id; /* Switch domain identifier. */
> > >  	int32_t representor_id; /* Port representor identifier. */
> > >  	/* RX/TX queues. */
> > > diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> > > b/drivers/net/mlx5/mlx5_ethdev.c index 84d761c..81f2a42 100644
> > > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > > @@ -1362,8 +1362,10 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > > *dev, char *fw_ver, size_t fw_size)
> > >  		.port_name = 0,
> > >  		.switch_id = 0,
> > >  	};
> > > +	DIR *dir;
> > >  	bool port_name_set = false;
> > >  	bool port_switch_id_set = false;
> > > +	bool device_dir = false;
> > >  	char c;
> > >
> > >  	if (!if_indextoname(ifindex, ifname)) { @@ -1375,6 +1377,8 @@ int
> > > mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t
> > > fw_size)
> > >  	      ifname);
> > >  	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
> > >  	      ifname);
> > > +	MKSTR(pci_device, "/sys/class/net/%s/device",
> > > +	      ifname);
> > >
> > >  	file = fopen(phys_port_name, "rb");
> > >  	if (file != NULL) {
> > > @@ -1391,8 +1395,13 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > > *dev, char *fw_ver, size_t fw_size)
> > >  		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
> > >  		c == '\n';
> > >  	fclose(file);
> > > -	data.master = port_switch_id_set && !port_name_set;
> > > -	data.representor = port_switch_id_set && port_name_set;
> > > +	dir = opendir(pci_device);
> > > +	if (dir != NULL) {
> > > +		closedir(dir);
> > > +		device_dir = true;
> > > +	}
> > > +	data.master = port_switch_id_set && (!port_name_set ||
> > > device_dir);
> > > +	data.representor = port_switch_id_set && port_name_set &&
> > > !device_dir;
> >
> > Add assert that device cannot be both master and representor.
> 
> Error checking would be but assert() is usually not a useful in drivers.
> It causes crash, and is often compiled out.

PMD is a user mode driver, so standard assert() seems to be relevant.
But I agree, it would be good for portable code to have its own
definition of assert. Say, "rte_assert". It would allow us to define/redefine
the code behavior if assertion fails.

As for me, I think asserts are EXTREMELY useful, it saves a lot of time while
debugging, and it is proved by my own practice of mlx5 PMD debugging
(beside other projects).  Assert inserted in right place stops the quite wrong
situation evolving and allows us to have a good catch and find
the root of the problem quickly.

WBR,
Slava
  
Stephen Hemminger March 21, 2019, 7:08 p.m. UTC | #4
On Thu, 21 Mar 2019 15:31:36 +0000
Slava Ovsiienko <viacheslavo@mellanox.com> wrote:

> Hi, Stephen
> 
> > -----Original Message-----
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > Sent: Thursday, March 21, 2019 17:09
> > To: Shahaf Shuler <shahafs@mellanox.com>
> > Cc: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 01/14] net/mlx5: add representor recognition
> > on kernels 5.x
> > 
> > On Thu, 21 Mar 2019 12:13:50 +0000
> > Shahaf Shuler <shahafs@mellanox.com> wrote:
> >   
> > > Hi Slava,
> > >
> > > Small comments below. Once fixed you can put my acked-by on the next  
> > version.  
> > >
> > > Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko:  
> > > > Subject: [PATCH 01/14] net/mlx5: add representor recognition on
> > > > kernels 5.x
> > > >
> > > > The master device and VF representors were distinguished by presence
> > > > of port name, master device did not have one. The new Linux kernels
> > > > starting from 5.0 provide the port name for master device and the
> > > > implemented representor recognizing method does not work.
> > > > The new recognizing method is based on quiering the VF number,
> > > > created on the base of the device.
> > > >
> > > > The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> > > > attribute is specified in the Netlink request message.
> > > >
> > > > Also the presence of device symlink in device sysfs folder is added
> > > > to distinguish representors with sysfs based method.
> > > >
> > > > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > > >
> > > > ---
> > > >
> > > > v3: - rebased over new port naming  
> > https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatche
> > s.dpdk.org%2Fpatch%2F51245%2F&amp;data=02%7C01%7Cviacheslavo%40
> > mellanox.com%7C94cc885cbb8d4aade9dd08d6ae0f26cd%7Ca652971c7d2e4
> > d9ba6a4d149256f461b%7C0%7C0%7C636887777455248723&amp;sdata=FDq
> > 950ksokxsNac8cBM293W263uPfVeY1xA7Cx%2F4FLk%3D&amp;reserved=0  
> > > >     - master recognition is reinforced by checking vport for -1
> > > >       for new port naming schema
> > > >
> > > > v2: - fopen replaced with opendir to detect whether directory exists
> > > >
> > > > v1:
> > > > https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpat
> > > >  
> > ches.dpdk.org%2Fpatch%2F50411%2F&amp;data=02%7C01%7Cviacheslavo%
> > 40me  
> > > >  
> > llanox.com%7C94cc885cbb8d4aade9dd08d6ae0f26cd%7Ca652971c7d2e4d9b
> > a6a4  
> > > >  
> > d149256f461b%7C0%7C0%7C636887777455248723&amp;sdata=JkWKbb6LV
> > diIHW%2  
> > > > FpJEQHcD7hvFLWdGmM%2BTVhM%2F%2F80Uk%3D&amp;reserved=0
> > > > ---
> > > >  drivers/net/mlx5/Makefile      | 10 ++++++++++
> > > >  drivers/net/mlx5/meson.build   |  4 ++++
> > > >  drivers/net/mlx5/mlx5.c        |  2 +-
> > > >  drivers/net/mlx5/mlx5.h        |  1 +
> > > >  drivers/net/mlx5/mlx5_ethdev.c | 13 +++++++++++--
> > > >  drivers/net/mlx5/mlx5_nl.c     | 36
> > > > +++++++++++++++++++++++++++++++++---
> > > >  6 files changed, 60 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > > > index
> > > > 1ed299d..3dd7e38 100644
> > > > --- a/drivers/net/mlx5/Makefile
> > > > +++ b/drivers/net/mlx5/Makefile
> > > > @@ -231,6 +231,16 @@ mlx5_autoconf.h.new:
> > > > $(RTE_SDK)/buildtools/auto- config-h.sh
> > > >  		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
> > > >  		$(AUTOCONF_OUTPUT)
> > > >  	$Q sh -- '$<' '$@' \
> > > > +		HAVE_IFLA_NUM_VF \
> > > > +		linux/if_link.h \
> > > > +		enum IFLA_NUM_VF \
> > > > +		$(AUTOCONF_OUTPUT)
> > > > +	$Q sh -- '$<' '$@' \
> > > > +		HAVE_IFLA_EXT_MASK \
> > > > +		linux/if_link.h \
> > > > +		enum IFLA_EXT_MASK \
> > > > +		$(AUTOCONF_OUTPUT)
> > > > +	$Q sh -- '$<' '$@' \
> > > >  		HAVE_IFLA_PHYS_SWITCH_ID \
> > > >  		linux/if_link.h \
> > > >  		enum IFLA_PHYS_SWITCH_ID \
> > > > diff --git a/drivers/net/mlx5/meson.build
> > > > b/drivers/net/mlx5/meson.build index 0cf2f08..e3cb9bc 100644
> > > > --- a/drivers/net/mlx5/meson.build
> > > > +++ b/drivers/net/mlx5/meson.build
> > > > @@ -133,6 +133,10 @@ if build
> > > >  		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
> > > >  		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
> > > >  		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
> > > > +		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
> > > > +		'IFLA_NUM_VF' ],
> > > > +		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
> > > > +		'IFLA_EXT_MASK' ],
> > > >  		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
> > > >  		'IFLA_PHYS_SWITCH_ID' ],
> > > >  		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git
> > > > a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> > > > ad1975c..ea3d00c 100644
> > > > --- a/drivers/net/mlx5/mlx5.c
> > > > +++ b/drivers/net/mlx5/mlx5.c
> > > > @@ -13,7 +13,6 @@
> > > >  #include <errno.h>
> > > >  #include <net/if.h>
> > > >  #include <sys/mman.h>
> > > > -#include <linux/netlink.h>
> > > >  #include <linux/rtnetlink.h>
> > > >
> > > >  /* Verbs header. */
> > > > @@ -1001,6 +1000,7 @@
> > > >  	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
> > > >  	priv->nl_sn = 0;
> > > >  	priv->representor = !!switch_info->representor;
> > > > +	priv->master = !!switch_info->master;
> > > >  	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
> > > >  	priv->representor_id =
> > > >  		switch_info->representor ? switch_info->port_name : -1; diff
> > > > --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > > > a88cb4a..58bc37f 100644
> > > > --- a/drivers/net/mlx5/mlx5.h
> > > > +++ b/drivers/net/mlx5/mlx5.h
> > > > @@ -214,6 +214,7 @@ struct mlx5_priv {
> > > >  	uint16_t mtu; /* Configured MTU. */
> > > >  	unsigned int isolated:1; /* Whether isolated mode is enabled. */
> > > >  	unsigned int representor:1; /* Device is a port representor. */
> > > > +	unsigned int master:1; /* Device is a E-Switch master. */
> > > >  	uint16_t domain_id; /* Switch domain identifier. */
> > > >  	int32_t representor_id; /* Port representor identifier. */
> > > >  	/* RX/TX queues. */
> > > > diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> > > > b/drivers/net/mlx5/mlx5_ethdev.c index 84d761c..81f2a42 100644
> > > > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > > > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > > > @@ -1362,8 +1362,10 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > > > *dev, char *fw_ver, size_t fw_size)
> > > >  		.port_name = 0,
> > > >  		.switch_id = 0,
> > > >  	};
> > > > +	DIR *dir;
> > > >  	bool port_name_set = false;
> > > >  	bool port_switch_id_set = false;
> > > > +	bool device_dir = false;
> > > >  	char c;
> > > >
> > > >  	if (!if_indextoname(ifindex, ifname)) { @@ -1375,6 +1377,8 @@ int
> > > > mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t
> > > > fw_size)
> > > >  	      ifname);
> > > >  	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
> > > >  	      ifname);
> > > > +	MKSTR(pci_device, "/sys/class/net/%s/device",
> > > > +	      ifname);
> > > >
> > > >  	file = fopen(phys_port_name, "rb");
> > > >  	if (file != NULL) {
> > > > @@ -1391,8 +1395,13 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > > > *dev, char *fw_ver, size_t fw_size)
> > > >  		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
> > > >  		c == '\n';
> > > >  	fclose(file);
> > > > -	data.master = port_switch_id_set && !port_name_set;
> > > > -	data.representor = port_switch_id_set && port_name_set;
> > > > +	dir = opendir(pci_device);
> > > > +	if (dir != NULL) {
> > > > +		closedir(dir);
> > > > +		device_dir = true;
> > > > +	}
> > > > +	data.master = port_switch_id_set && (!port_name_set ||
> > > > device_dir);
> > > > +	data.representor = port_switch_id_set && port_name_set &&
> > > > !device_dir;  
> > >
> > > Add assert that device cannot be both master and representor.  
> > 
> > Error checking would be but assert() is usually not a useful in drivers.
> > It causes crash, and is often compiled out.  
> 
> PMD is a user mode driver, so standard assert() seems to be relevant.
> But I agree, it would be good for portable code to have its own
> definition of assert. Say, "rte_assert". It would allow us to define/redefine
> the code behavior if assertion fails.
> 
> As for me, I think asserts are EXTREMELY useful, it saves a lot of time while
> debugging, and it is proved by my own practice of mlx5 PMD debugging
> (beside other projects).  Assert inserted in right place stops the quite wrong
> situation evolving and allows us to have a good catch and find
> the root of the problem quickly.
> 
> WBR,
> Slava

You may misunderstand what I meant. Asserts are useful, but they need to be
used only when real error handling is not possible. In general, it is better
to log and return an error on invalid data than crash the whole application.
Especially since DPDK now supports hot plug and it could be that device is
added to a working application.

For me, the worry is that an application on Azure starts up using synthetic
datapath successfully, and the hot plug of VF (accelerated networking) might
expose a driver bug. In that case, there still is a fallback to ignore the
mlx device.
  
Slava Ovsiienko March 22, 2019, 8:15 a.m. UTC | #5
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Thursday, March 21, 2019 21:09
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 01/14] net/mlx5: add representor recognition
> on kernels 5.x
> 
> On Thu, 21 Mar 2019 15:31:36 +0000
> Slava Ovsiienko <viacheslavo@mellanox.com> wrote:
> 
> > Hi, Stephen
> >
> > > -----Original Message-----
> > > From: Stephen Hemminger <stephen@networkplumber.org>
> > > Sent: Thursday, March 21, 2019 17:09
> > > To: Shahaf Shuler <shahafs@mellanox.com>
> > > Cc: Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH 01/14] net/mlx5: add representor
> > > recognition on kernels 5.x
> > >
> > > On Thu, 21 Mar 2019 12:13:50 +0000
> > > Shahaf Shuler <shahafs@mellanox.com> wrote:
> > >
> > > > Hi Slava,
> > > >
> > > > Small comments below. Once fixed you can put my acked-by on the
> > > > next
> > > version.
> > > >
> > > > Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko:
> > > > > Subject: [PATCH 01/14] net/mlx5: add representor recognition on
> > > > > kernels 5.x
> > > > >
> > > > > The master device and VF representors were distinguished by
> > > > > presence of port name, master device did not have one. The new
> > > > > Linux kernels starting from 5.0 provide the port name for master
> > > > > device and the implemented representor recognizing method does
> not work.
> > > > > The new recognizing method is based on quiering the VF number,
> > > > > created on the base of the device.
> > > > >
> > > > > The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> > > > > attribute is specified in the Netlink request message.
> > > > >
> > > > > Also the presence of device symlink in device sysfs folder is
> > > > > added to distinguish representors with sysfs based method.
> > > > >
> > > > > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > > > >
> > > > > ---
> > > > >
> > > > > v3: - rebased over new port naming
> > > https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpat
> > > che
> > >
> s.dpdk.org%2Fpatch%2F51245%2F&amp;data=02%7C01%7Cviacheslavo%40
> > >
> mellanox.com%7C94cc885cbb8d4aade9dd08d6ae0f26cd%7Ca652971c7d2e4
> > >
> d9ba6a4d149256f461b%7C0%7C0%7C636887777455248723&amp;sdata=FDq
> > >
> 950ksokxsNac8cBM293W263uPfVeY1xA7Cx%2F4FLk%3D&amp;reserved=0
> > > > >     - master recognition is reinforced by checking vport for -1
> > > > >       for new port naming schema
> > > > >
> > > > > v2: - fopen replaced with opendir to detect whether directory
> > > > > exists
> > > > >
> > > > > v1:
> > > > > https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2
> > > > > Fpat
> > > > >
> > >
> ches.dpdk.org%2Fpatch%2F50411%2F&amp;data=02%7C01%7Cviacheslavo%
> > > 40me
> > > > >
> > >
> llanox.com%7C94cc885cbb8d4aade9dd08d6ae0f26cd%7Ca652971c7d2e4d9b
> > > a6a4
> > > > >
> > >
> d149256f461b%7C0%7C0%7C636887777455248723&amp;sdata=JkWKbb6LV
> > > diIHW%2
> > > > >
> FpJEQHcD7hvFLWdGmM%2BTVhM%2F%2F80Uk%3D&amp;reserved=0
> > > > > ---
> > > > >  drivers/net/mlx5/Makefile      | 10 ++++++++++
> > > > >  drivers/net/mlx5/meson.build   |  4 ++++
> > > > >  drivers/net/mlx5/mlx5.c        |  2 +-
> > > > >  drivers/net/mlx5/mlx5.h        |  1 +
> > > > >  drivers/net/mlx5/mlx5_ethdev.c | 13 +++++++++++--
> > > > >  drivers/net/mlx5/mlx5_nl.c     | 36
> > > > > +++++++++++++++++++++++++++++++++---
> > > > >  6 files changed, 60 insertions(+), 6 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/mlx5/Makefile
> > > > > b/drivers/net/mlx5/Makefile index
> > > > > 1ed299d..3dd7e38 100644
> > > > > --- a/drivers/net/mlx5/Makefile
> > > > > +++ b/drivers/net/mlx5/Makefile
> > > > > @@ -231,6 +231,16 @@ mlx5_autoconf.h.new:
> > > > > $(RTE_SDK)/buildtools/auto- config-h.sh
> > > > >  		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
> > > > >  		$(AUTOCONF_OUTPUT)
> > > > >  	$Q sh -- '$<' '$@' \
> > > > > +		HAVE_IFLA_NUM_VF \
> > > > > +		linux/if_link.h \
> > > > > +		enum IFLA_NUM_VF \
> > > > > +		$(AUTOCONF_OUTPUT)
> > > > > +	$Q sh -- '$<' '$@' \
> > > > > +		HAVE_IFLA_EXT_MASK \
> > > > > +		linux/if_link.h \
> > > > > +		enum IFLA_EXT_MASK \
> > > > > +		$(AUTOCONF_OUTPUT)
> > > > > +	$Q sh -- '$<' '$@' \
> > > > >  		HAVE_IFLA_PHYS_SWITCH_ID \
> > > > >  		linux/if_link.h \
> > > > >  		enum IFLA_PHYS_SWITCH_ID \
> > > > > diff --git a/drivers/net/mlx5/meson.build
> > > > > b/drivers/net/mlx5/meson.build index 0cf2f08..e3cb9bc 100644
> > > > > --- a/drivers/net/mlx5/meson.build
> > > > > +++ b/drivers/net/mlx5/meson.build
> > > > > @@ -133,6 +133,10 @@ if build
> > > > >  		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
> > > > >  		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
> > > > >  		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
> > > > > +		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
> > > > > +		'IFLA_NUM_VF' ],
> > > > > +		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
> > > > > +		'IFLA_EXT_MASK' ],
> > > > >  		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
> > > > >  		'IFLA_PHYS_SWITCH_ID' ],
> > > > >  		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git
> > > > > a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> > > > > ad1975c..ea3d00c 100644
> > > > > --- a/drivers/net/mlx5/mlx5.c
> > > > > +++ b/drivers/net/mlx5/mlx5.c
> > > > > @@ -13,7 +13,6 @@
> > > > >  #include <errno.h>
> > > > >  #include <net/if.h>
> > > > >  #include <sys/mman.h>
> > > > > -#include <linux/netlink.h>
> > > > >  #include <linux/rtnetlink.h>
> > > > >
> > > > >  /* Verbs header. */
> > > > > @@ -1001,6 +1000,7 @@
> > > > >  	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
> > > > >  	priv->nl_sn = 0;
> > > > >  	priv->representor = !!switch_info->representor;
> > > > > +	priv->master = !!switch_info->master;
> > > > >  	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
> > > > >  	priv->representor_id =
> > > > >  		switch_info->representor ? switch_info->port_name : -1; diff
> > > > > --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > > > > a88cb4a..58bc37f 100644
> > > > > --- a/drivers/net/mlx5/mlx5.h
> > > > > +++ b/drivers/net/mlx5/mlx5.h
> > > > > @@ -214,6 +214,7 @@ struct mlx5_priv {
> > > > >  	uint16_t mtu; /* Configured MTU. */
> > > > >  	unsigned int isolated:1; /* Whether isolated mode is enabled. */
> > > > >  	unsigned int representor:1; /* Device is a port representor.
> > > > > */
> > > > > +	unsigned int master:1; /* Device is a E-Switch master. */
> > > > >  	uint16_t domain_id; /* Switch domain identifier. */
> > > > >  	int32_t representor_id; /* Port representor identifier. */
> > > > >  	/* RX/TX queues. */
> > > > > diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> > > > > b/drivers/net/mlx5/mlx5_ethdev.c index 84d761c..81f2a42 100644
> > > > > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > > > > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > > > > @@ -1362,8 +1362,10 @@ int mlx5_fw_version_get(struct
> > > > > rte_eth_dev *dev, char *fw_ver, size_t fw_size)
> > > > >  		.port_name = 0,
> > > > >  		.switch_id = 0,
> > > > >  	};
> > > > > +	DIR *dir;
> > > > >  	bool port_name_set = false;
> > > > >  	bool port_switch_id_set = false;
> > > > > +	bool device_dir = false;
> > > > >  	char c;
> > > > >
> > > > >  	if (!if_indextoname(ifindex, ifname)) { @@ -1375,6 +1377,8 @@
> > > > > int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver,
> > > > > size_t
> > > > > fw_size)
> > > > >  	      ifname);
> > > > >  	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
> > > > >  	      ifname);
> > > > > +	MKSTR(pci_device, "/sys/class/net/%s/device",
> > > > > +	      ifname);
> > > > >
> > > > >  	file = fopen(phys_port_name, "rb");
> > > > >  	if (file != NULL) {
> > > > > @@ -1391,8 +1395,13 @@ int mlx5_fw_version_get(struct
> > > > > rte_eth_dev *dev, char *fw_ver, size_t fw_size)
> > > > >  		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
> > > > >  		c == '\n';
> > > > >  	fclose(file);
> > > > > -	data.master = port_switch_id_set && !port_name_set;
> > > > > -	data.representor = port_switch_id_set && port_name_set;
> > > > > +	dir = opendir(pci_device);
> > > > > +	if (dir != NULL) {
> > > > > +		closedir(dir);
> > > > > +		device_dir = true;
> > > > > +	}
> > > > > +	data.master = port_switch_id_set && (!port_name_set ||
> > > > > device_dir);
> > > > > +	data.representor = port_switch_id_set && port_name_set
> &&
> > > > > !device_dir;
> > > >
> > > > Add assert that device cannot be both master and representor.
> > >
> > > Error checking would be but assert() is usually not a useful in drivers.
> > > It causes crash, and is often compiled out.
> >
> > PMD is a user mode driver, so standard assert() seems to be relevant.
> > But I agree, it would be good for portable code to have its own
> > definition of assert. Say, "rte_assert". It would allow us to
> > define/redefine the code behavior if assertion fails.
> >
> > As for me, I think asserts are EXTREMELY useful, it saves a lot of
> > time while debugging, and it is proved by my own practice of mlx5 PMD
> > debugging (beside other projects).  Assert inserted in right place
> > stops the quite wrong situation evolving and allows us to have a good
> > catch and find the root of the problem quickly.
> >
> > WBR,
> > Slava
> 
> You may misunderstand what I meant. Asserts are useful, but they need to
> be used only when real error handling is not possible. In general, it is better
> to log and return an error on invalid data than crash the whole application.

Yes, assert is not a replacement for error handling in any way. Assert is a "last chance"
debug tool, if assert condition is not met it means something is going in very
wrong way and often we have a very few steps before final application crash. 
Assert just allows us to be a step ahead of crash, nothing else. I totally agree
that we should handle errors whenever it is possible, but asserts are not regarding
this. Error handling and asserts are orthogonal things, IMO.

For example, in my patch, Shahaf's proposed asserts is just to ensure the
logic above works in correct way. Actually we can just review this logic to make
sure there is no invalid output combinations produced (and we've reviewed, of course).
But this logic can be changed occasionally for some reasons.  And proposed asserts
will help to locate this erroneous/not covered change instantly. Obviously it is not a case
we should tell user/application about error, it is a case of developer failure 😊. We just
use assert here as free of charge debug check, nothing else.
 
> Especially since DPDK now supports hot plug and it could be that device is
> added to a working application.
> 
> For me, the worry is that an application on Azure starts up using synthetic
> datapath successfully, and the hot plug of VF (accelerated networking) might
> expose a driver bug. In that case, there still is a fallback to ignore the mlx
> device.
There should no be asserts in release version. Assert is just a debug tool, should
we be worried too much about debug versions? By the way, "to expose driver bug" is
exactly our intention when we are running the debug version, isn't it?

With best regards,
Slava
  

Patch

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 1ed299d..3dd7e38 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -231,6 +231,16 @@  mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
+		HAVE_IFLA_NUM_VF \
+		linux/if_link.h \
+		enum IFLA_NUM_VF \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_IFLA_EXT_MASK \
+		linux/if_link.h \
+		enum IFLA_EXT_MASK \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
 		HAVE_IFLA_PHYS_SWITCH_ID \
 		linux/if_link.h \
 		enum IFLA_PHYS_SWITCH_ID \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 0cf2f08..e3cb9bc 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -133,6 +133,10 @@  if build
 		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
 		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
 		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
+		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
+		'IFLA_NUM_VF' ],
+		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
+		'IFLA_EXT_MASK' ],
 		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
 		'IFLA_PHYS_SWITCH_ID' ],
 		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h',
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index ad1975c..ea3d00c 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -13,7 +13,6 @@ 
 #include <errno.h>
 #include <net/if.h>
 #include <sys/mman.h>
-#include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 
 /* Verbs header. */
@@ -1001,6 +1000,7 @@ 
 	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
 	priv->nl_sn = 0;
 	priv->representor = !!switch_info->representor;
+	priv->master = !!switch_info->master;
 	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
 	priv->representor_id =
 		switch_info->representor ? switch_info->port_name : -1;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a88cb4a..58bc37f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -214,6 +214,7 @@  struct mlx5_priv {
 	uint16_t mtu; /* Configured MTU. */
 	unsigned int isolated:1; /* Whether isolated mode is enabled. */
 	unsigned int representor:1; /* Device is a port representor. */
+	unsigned int master:1; /* Device is a E-Switch master. */
 	uint16_t domain_id; /* Switch domain identifier. */
 	int32_t representor_id; /* Port representor identifier. */
 	/* RX/TX queues. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 84d761c..81f2a42 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1362,8 +1362,10 @@  int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)
 		.port_name = 0,
 		.switch_id = 0,
 	};
+	DIR *dir;
 	bool port_name_set = false;
 	bool port_switch_id_set = false;
+	bool device_dir = false;
 	char c;
 
 	if (!if_indextoname(ifindex, ifname)) {
@@ -1375,6 +1377,8 @@  int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)
 	      ifname);
 	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
 	      ifname);
+	MKSTR(pci_device, "/sys/class/net/%s/device",
+	      ifname);
 
 	file = fopen(phys_port_name, "rb");
 	if (file != NULL) {
@@ -1391,8 +1395,13 @@  int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)
 		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
 		c == '\n';
 	fclose(file);
-	data.master = port_switch_id_set && !port_name_set;
-	data.representor = port_switch_id_set && port_name_set;
+	dir = opendir(pci_device);
+	if (dir != NULL) {
+		closedir(dir);
+		device_dir = true;
+	}
+	data.master = port_switch_id_set && (!port_name_set || device_dir);
+	data.representor = port_switch_id_set && port_name_set && !device_dir;
 	*info = data;
 	return 0;
 }
diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c
index 8a10109..aa49cb4 100644
--- a/drivers/net/mlx5/mlx5_nl.c
+++ b/drivers/net/mlx5/mlx5_nl.c
@@ -65,6 +65,12 @@ 
 #endif
 
 /* These are normally found in linux/if_link.h. */
+#ifndef HAVE_IFLA_NUM_VF
+#define IFLA_NUM_VF 21
+#endif
+#ifndef HAVE_IFLA_EXT_MASK
+#define IFLA_EXT_MASK 29
+#endif
 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
 #define IFLA_PHYS_SWITCH_ID 36
 #endif
@@ -837,6 +843,7 @@  struct mlx5_nl_ifindex_data {
 	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
 	bool port_name_set = false;
 	bool switch_id_set = false;
+	bool num_vf_set = false;
 
 	if (nh->nlmsg_type != RTM_NEWLINK)
 		goto error;
@@ -848,6 +855,9 @@  struct mlx5_nl_ifindex_data {
 		if (ra->rta_len > nh->nlmsg_len - off)
 			goto error;
 		switch (ra->rta_type) {
+		case IFLA_NUM_VF:
+			num_vf_set = true;
+			break;
 		case IFLA_PHYS_PORT_NAME:
 			port_name_set =
 				mlx5_translate_port_name((char *)payload,
@@ -864,8 +874,19 @@  struct mlx5_nl_ifindex_data {
 		}
 		off += RTA_ALIGN(ra->rta_len);
 	}
-	info.master = switch_id_set && !port_name_set;
-	info.representor = switch_id_set && port_name_set;
+	if (switch_id_set) {
+		if (info.port_name_new) {
+			/* New representors naming schema. */
+			if (port_name_set) {
+				info.master = (info.port_name == -1);
+				info.representor = (info.port_name != -1);
+			}
+		} else {
+			/* Legacy representors naming schema. */
+			info.master = (!port_name_set || num_vf_set);
+			info.representor = port_name_set && !num_vf_set;
+		}
+	}
 	memcpy(arg, &info, sizeof(info));
 	return 0;
 error:
@@ -893,9 +914,13 @@  struct mlx5_nl_ifindex_data {
 	struct {
 		struct nlmsghdr nh;
 		struct ifinfomsg info;
+		struct rtattr rta;
+		uint32_t extmask;
 	} req = {
 		.nh = {
-			.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
+			.nlmsg_len = NLMSG_LENGTH
+					(sizeof(req.info) +
+					 RTA_LENGTH(sizeof(uint32_t))),
 			.nlmsg_type = RTM_GETLINK,
 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
 		},
@@ -903,6 +928,11 @@  struct mlx5_nl_ifindex_data {
 			.ifi_family = AF_UNSPEC,
 			.ifi_index = ifindex,
 		},
+		.rta = {
+			.rta_type = IFLA_EXT_MASK,
+			.rta_len = RTA_LENGTH(sizeof(int32_t)),
+		},
+		.extmask = RTE_LE32(1),
 	};
 	int ret;