[v2,1/1] net/mlx5: add representor recognition on kernels 5.x

Message ID 1551092489-12247-1-git-send-email-viacheslavo@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series [v2,1/1] net/mlx5: add representor recognition on kernels 5.x |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/mellanox-Performance-Testing success Performance Testing PASS
ci/intel-Performance-Testing success Performance Testing PASS

Commit Message

Slava Ovsiienko Feb. 25, 2019, 11:01 a.m. UTC
  The master device and VF representors were distinguished by
presence of port name, master device did not have one. The new Linux
kernels starting from 5.0 provide the port name for master device
and the implemented representor recognizing method does not work.
The new recognizing method is based on quiering the VF number,
created on the base of the device.

The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
attribute is specified in the Netlink request message.

Also the presence of device symlink in device sysfs folder is
added to distinguish representors with sysfs based method.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

---
v2:
    fopen is replaced with opendir to detect whether directory exists

v1:
    http://patches.dpdk.org/patch/50411/
---
 drivers/net/mlx5/Makefile      | 10 ++++++++++
 drivers/net/mlx5/meson.build   |  4 ++++
 drivers/net/mlx5/mlx5_ethdev.c | 13 +++++++++++--
 drivers/net/mlx5/mlx5_nl.c     | 25 ++++++++++++++++++++++---
 4 files changed, 47 insertions(+), 5 deletions(-)
  

Comments

Shahaf Shuler March 7, 2019, 8:44 a.m. UTC | #1
Monday, February 25, 2019 1:01 PM, Viacheslav Ovsiienko:
> Subject: [dpdk-dev] [PATCH v2 1/1] net/mlx5: add representor recognition
> on kernels 5.x
> 
> The master device and VF representors were distinguished by presence of
> port name, master device did not have one. The new Linux kernels starting
> from 5.0 provide the port name for master device and the implemented
> representor recognizing method does not work.
> The new recognizing method is based on quiering the VF number, created on
> the base of the device.
> 
> The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> attribute is specified in the Netlink request message.
> 
> Also the presence of device symlink in device sysfs folder is added to
> distinguish representors with sysfs based method.

w/ kernel 5.x, there is also a new naming scheme for representor, right?
Wouldn't it be simpler to use it in order to decide uplink/regular representor? 

There is already patch for it from Dekel:
https://patches.dpdk.org/patch/50771/

> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> 
> ---
> v2:
>     fopen is replaced with opendir to detect whether directory exists
> 
> v1:
> 
> https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatc
> hes.dpdk.org%2Fpatch%2F50411%2F&amp;data=02%7C01%7Cshahafs%40m
> ellanox.com%7C570dc79e8c0f40b23a7208d69b109fa0%7Ca652971c7d2e4d9b
> a6a4d149256f461b%7C0%7C0%7C636866893031018956&amp;sdata=jftQKX8B
> d1ZVcT38q6TLSukJYMo9VRIyEPEHhFu00S4%3D&amp;reserved=0
> ---
>  drivers/net/mlx5/Makefile      | 10 ++++++++++
>  drivers/net/mlx5/meson.build   |  4 ++++
>  drivers/net/mlx5/mlx5_ethdev.c | 13 +++++++++++--
>  drivers/net/mlx5/mlx5_nl.c     | 25 ++++++++++++++++++++++---
>  4 files changed, 47 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> 9a7da18..c326494 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -226,6 +226,16 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-
> config-h.sh
>  		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_IFLA_NUM_VF \
> +		linux/if_link.h \
> +		enum IFLA_NUM_VF \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_IFLA_EXT_MASK \
> +		linux/if_link.h \
> +		enum IFLA_EXT_MASK \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
>  		HAVE_IFLA_PHYS_SWITCH_ID \
>  		linux/if_link.h \
>  		enum IFLA_PHYS_SWITCH_ID \
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index 4540c45..0bb2f3a 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -133,6 +133,10 @@ if build
>  		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
>  		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
>  		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
> +		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
> +		'IFLA_NUM_VF' ],
> +		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
> +		'IFLA_EXT_MASK' ],
>  		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
>  		'IFLA_PHYS_SWITCH_ID' ],
>  		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git
> a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index
> 8158b4a..c30fcb0 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -1354,9 +1354,11 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)  {
>  	char ifname[IF_NAMESIZE];
>  	FILE *file;
> +	DIR *dir;
>  	struct mlx5_switch_info data = { .master = 0, };
>  	bool port_name_set = false;
>  	bool port_switch_id_set = false;
> +	bool device_dir = false;
>  	char c;
> 
>  	if (!if_indextoname(ifindex, ifname)) { @@ -1368,6 +1370,8 @@ int
> mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t
> fw_size)
>  	      ifname);
>  	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
>  	      ifname);
> +	MKSTR(pci_device, "/sys/class/net/%s/device",
> +	      ifname);
> 
>  	file = fopen(phys_port_name, "rb");
>  	if (file != NULL) {
> @@ -1385,8 +1389,13 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)
>  		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
>  		c == '\n';
>  	fclose(file);
> -	data.master = port_switch_id_set && !port_name_set;
> -	data.representor = port_switch_id_set && port_name_set;
> +	dir = opendir(pci_device);
> +	if (dir != NULL) {
> +		closedir(dir);
> +		device_dir = true;
> +	}
> +	data.master = port_switch_id_set && (!port_name_set ||
> device_dir);
> +	data.representor = port_switch_id_set && port_name_set &&
> !device_dir;
>  	*info = data;
>  	return 0;
>  }
> diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index
> d61826a..694edf1 100644
> --- a/drivers/net/mlx5/mlx5_nl.c
> +++ b/drivers/net/mlx5/mlx5_nl.c
> @@ -65,6 +65,12 @@
>  #endif
> 
>  /* These are normally found in linux/if_link.h. */
> +#ifndef HAVE_IFLA_NUM_VF
> +#define IFLA_NUM_VF 21
> +#endif
> +#ifndef HAVE_IFLA_EXT_MASK
> +#define IFLA_EXT_MASK 29
> +#endif
>  #ifndef HAVE_IFLA_PHYS_SWITCH_ID
>  #define IFLA_PHYS_SWITCH_ID 36
>  #endif
> @@ -836,6 +842,7 @@ struct mlx5_nl_ifindex_data {
>  	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
>  	bool port_name_set = false;
>  	bool switch_id_set = false;
> +	bool num_vf_set = false;
> 
>  	if (nh->nlmsg_type != RTM_NEWLINK)
>  		goto error;
> @@ -848,6 +855,9 @@ struct mlx5_nl_ifindex_data {
>  		if (ra->rta_len > nh->nlmsg_len - off)
>  			goto error;
>  		switch (ra->rta_type) {
> +		case IFLA_NUM_VF:
> +			num_vf_set = true;
> +			break;
>  		case IFLA_PHYS_PORT_NAME:
>  			errno = 0;
>  			info.port_name = strtol(payload, &end, 0); @@ -
> 867,8 +877,8 @@ struct mlx5_nl_ifindex_data {
>  		}
>  		off += RTA_ALIGN(ra->rta_len);
>  	}
> -	info.master = switch_id_set && !port_name_set;
> -	info.representor = switch_id_set && port_name_set;
> +	info.master = switch_id_set && (!port_name_set || num_vf_set);
> +	info.representor = switch_id_set && port_name_set &&
> !num_vf_set;
>  	memcpy(arg, &info, sizeof(info));
>  	return 0;
>  error:
> @@ -896,9 +906,13 @@ struct mlx5_nl_ifindex_data {
>  	struct {
>  		struct nlmsghdr nh;
>  		struct ifinfomsg info;
> +		struct rtattr rta;
> +		uint32_t extmask;
>  	} req = {
>  		.nh = {
> -			.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
> +			.nlmsg_len = NLMSG_LENGTH
> +					(sizeof(req.info) +
> +					 RTA_LENGTH(sizeof(uint32_t))),
>  			.nlmsg_type = RTM_GETLINK,
>  			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
>  		},
> @@ -906,6 +920,11 @@ struct mlx5_nl_ifindex_data {
>  			.ifi_family = AF_UNSPEC,
>  			.ifi_index = ifindex,
>  		},
> +		.rta = {
> +			.rta_type = IFLA_EXT_MASK,
> +			.rta_len = RTA_LENGTH(sizeof(int32_t)),
> +		},
> +		.extmask = RTE_LE32(1),
>  	};
>  	int ret;
> 
> --
> 1.8.3.1
  
Slava Ovsiienko March 7, 2019, 11:12 a.m. UTC | #2
> -----Original Message-----
> From: Shahaf Shuler
> Sent: Thursday, March 7, 2019 10:44
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v2 1/1] net/mlx5: add representor
> recognition on kernels 5.x
> 
> Monday, February 25, 2019 1:01 PM, Viacheslav Ovsiienko:
> > Subject: [dpdk-dev] [PATCH v2 1/1] net/mlx5: add representor
> > recognition on kernels 5.x
> >
> > The master device and VF representors were distinguished by presence
> > of port name, master device did not have one. The new Linux kernels
> > starting from 5.0 provide the port name for master device and the
> > implemented representor recognizing method does not work.
> > The new recognizing method is based on quiering the VF number, created
> > on the base of the device.
> >
> > The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> > attribute is specified in the Netlink request message.
> >
> > Also the presence of device symlink in device sysfs folder is added to
> > distinguish representors with sysfs based method.
> 
> w/ kernel 5.x, there is also a new naming scheme for representor, right?
> Wouldn't it be simpler to use it in order to decide uplink/regular
> representor?

There should not be any assumption regarding port index, we can't say
for sure on which port index we have uplink representor. It is possible
to compare the port name against -1, but it is quite possible situation 
(old kernels with new drivers, I had been working with such setup
for a while) in that have the multiport Infiniband device and old naming and
we are unable recognize the Uplink rep by port name. 

So, it seems to me, the querying numVF/sysfs method is more reliable and
works for all settings I've tested.

> 
> There is already patch for it from Dekel:
> https://patches.dpdk.org/patch/50771/

Yes, It's included in RFC because patchset is not functional w/o Dekel's patch.
The patchset will be rebased and Dekel's commit will be excluded in the
next versions.

> 
> >
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> >
> > ---
> > v2:
> >     fopen is replaced with opendir to detect whether directory exists
> >
> > v1:
> >
> >
> https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatc
> >
> hes.dpdk.org%2Fpatch%2F50411%2F&amp;data=02%7C01%7Cshahafs%40m
> >
> ellanox.com%7C570dc79e8c0f40b23a7208d69b109fa0%7Ca652971c7d2e4d9
> b
> >
> a6a4d149256f461b%7C0%7C0%7C636866893031018956&amp;sdata=jftQKX8
> B
> > d1ZVcT38q6TLSukJYMo9VRIyEPEHhFu00S4%3D&amp;reserved=0
> > ---
> >  drivers/net/mlx5/Makefile      | 10 ++++++++++
> >  drivers/net/mlx5/meson.build   |  4 ++++
> >  drivers/net/mlx5/mlx5_ethdev.c | 13 +++++++++++--
> >  drivers/net/mlx5/mlx5_nl.c     | 25 ++++++++++++++++++++++---
> >  4 files changed, 47 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > index
> > 9a7da18..c326494 100644
> > --- a/drivers/net/mlx5/Makefile
> > +++ b/drivers/net/mlx5/Makefile
> > @@ -226,6 +226,16 @@ mlx5_autoconf.h.new:
> $(RTE_SDK)/buildtools/auto-
> > config-h.sh
> >  		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
> >  		$(AUTOCONF_OUTPUT)
> >  	$Q sh -- '$<' '$@' \
> > +		HAVE_IFLA_NUM_VF \
> > +		linux/if_link.h \
> > +		enum IFLA_NUM_VF \
> > +		$(AUTOCONF_OUTPUT)
> > +	$Q sh -- '$<' '$@' \
> > +		HAVE_IFLA_EXT_MASK \
> > +		linux/if_link.h \
> > +		enum IFLA_EXT_MASK \
> > +		$(AUTOCONF_OUTPUT)
> > +	$Q sh -- '$<' '$@' \
> >  		HAVE_IFLA_PHYS_SWITCH_ID \
> >  		linux/if_link.h \
> >  		enum IFLA_PHYS_SWITCH_ID \
> > diff --git a/drivers/net/mlx5/meson.build
> > b/drivers/net/mlx5/meson.build index 4540c45..0bb2f3a 100644
> > --- a/drivers/net/mlx5/meson.build
> > +++ b/drivers/net/mlx5/meson.build
> > @@ -133,6 +133,10 @@ if build
> >  		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
> >  		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
> >  		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
> > +		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
> > +		'IFLA_NUM_VF' ],
> > +		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
> > +		'IFLA_EXT_MASK' ],
> >  		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
> >  		'IFLA_PHYS_SWITCH_ID' ],
> >  		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git
> > a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> > index
> > 8158b4a..c30fcb0 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -1354,9 +1354,11 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > *dev, char *fw_ver, size_t fw_size)  {
> >  	char ifname[IF_NAMESIZE];
> >  	FILE *file;
> > +	DIR *dir;
> >  	struct mlx5_switch_info data = { .master = 0, };
> >  	bool port_name_set = false;
> >  	bool port_switch_id_set = false;
> > +	bool device_dir = false;
> >  	char c;
> >
> >  	if (!if_indextoname(ifindex, ifname)) { @@ -1368,6 +1370,8 @@ int
> > mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t
> > fw_size)
> >  	      ifname);
> >  	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
> >  	      ifname);
> > +	MKSTR(pci_device, "/sys/class/net/%s/device",
> > +	      ifname);
> >
> >  	file = fopen(phys_port_name, "rb");
> >  	if (file != NULL) {
> > @@ -1385,8 +1389,13 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > *dev, char *fw_ver, size_t fw_size)
> >  		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
> >  		c == '\n';
> >  	fclose(file);
> > -	data.master = port_switch_id_set && !port_name_set;
> > -	data.representor = port_switch_id_set && port_name_set;
> > +	dir = opendir(pci_device);
> > +	if (dir != NULL) {
> > +		closedir(dir);
> > +		device_dir = true;
> > +	}
> > +	data.master = port_switch_id_set && (!port_name_set ||
> > device_dir);
> > +	data.representor = port_switch_id_set && port_name_set &&
> > !device_dir;
> >  	*info = data;
> >  	return 0;
> >  }
> > diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c
> > index
> > d61826a..694edf1 100644
> > --- a/drivers/net/mlx5/mlx5_nl.c
> > +++ b/drivers/net/mlx5/mlx5_nl.c
> > @@ -65,6 +65,12 @@
> >  #endif
> >
> >  /* These are normally found in linux/if_link.h. */
> > +#ifndef HAVE_IFLA_NUM_VF
> > +#define IFLA_NUM_VF 21
> > +#endif
> > +#ifndef HAVE_IFLA_EXT_MASK
> > +#define IFLA_EXT_MASK 29
> > +#endif
> >  #ifndef HAVE_IFLA_PHYS_SWITCH_ID
> >  #define IFLA_PHYS_SWITCH_ID 36
> >  #endif
> > @@ -836,6 +842,7 @@ struct mlx5_nl_ifindex_data {
> >  	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
> >  	bool port_name_set = false;
> >  	bool switch_id_set = false;
> > +	bool num_vf_set = false;
> >
> >  	if (nh->nlmsg_type != RTM_NEWLINK)
> >  		goto error;
> > @@ -848,6 +855,9 @@ struct mlx5_nl_ifindex_data {
> >  		if (ra->rta_len > nh->nlmsg_len - off)
> >  			goto error;
> >  		switch (ra->rta_type) {
> > +		case IFLA_NUM_VF:
> > +			num_vf_set = true;
> > +			break;
> >  		case IFLA_PHYS_PORT_NAME:
> >  			errno = 0;
> >  			info.port_name = strtol(payload, &end, 0); @@ -
> > 867,8 +877,8 @@ struct mlx5_nl_ifindex_data {
> >  		}
> >  		off += RTA_ALIGN(ra->rta_len);
> >  	}
> > -	info.master = switch_id_set && !port_name_set;
> > -	info.representor = switch_id_set && port_name_set;
> > +	info.master = switch_id_set && (!port_name_set || num_vf_set);
> > +	info.representor = switch_id_set && port_name_set &&
> > !num_vf_set;
> >  	memcpy(arg, &info, sizeof(info));
> >  	return 0;
> >  error:
> > @@ -896,9 +906,13 @@ struct mlx5_nl_ifindex_data {
> >  	struct {
> >  		struct nlmsghdr nh;
> >  		struct ifinfomsg info;
> > +		struct rtattr rta;
> > +		uint32_t extmask;
> >  	} req = {
> >  		.nh = {
> > -			.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
> > +			.nlmsg_len = NLMSG_LENGTH
> > +					(sizeof(req.info) +
> > +					 RTA_LENGTH(sizeof(uint32_t))),
> >  			.nlmsg_type = RTM_GETLINK,
> >  			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
> >  		},
> > @@ -906,6 +920,11 @@ struct mlx5_nl_ifindex_data {
> >  			.ifi_family = AF_UNSPEC,
> >  			.ifi_index = ifindex,
> >  		},
> > +		.rta = {
> > +			.rta_type = IFLA_EXT_MASK,
> > +			.rta_len = RTA_LENGTH(sizeof(int32_t)),
> > +		},
> > +		.extmask = RTE_LE32(1),
> >  	};
> >  	int ret;
> >
> > --
> > 1.8.3.1
  
Shahaf Shuler March 10, 2019, 8:59 a.m. UTC | #3
Thursday, March 7, 2019 1:12 PM, Slava Ovsiienko:
> Subject: RE: [dpdk-dev] [PATCH v2 1/1] net/mlx5: add representor
> recognition on kernels 5.x
> > Monday, February 25, 2019 1:01 PM, Viacheslav Ovsiienko:
> > > Subject: [dpdk-dev] [PATCH v2 1/1] net/mlx5: add representor
> > > recognition on kernels 5.x
> > >
> > > The master device and VF representors were distinguished by presence
> > > of port name, master device did not have one. The new Linux kernels
> > > starting from 5.0 provide the port name for master device and the
> > > implemented representor recognizing method does not work.
> > > The new recognizing method is based on quiering the VF number,
> > > created on the base of the device.
> > >
> > > The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> > > attribute is specified in the Netlink request message.
> > >
> > > Also the presence of device symlink in device sysfs folder is added
> > > to distinguish representors with sysfs based method.
> >
> > w/ kernel 5.x, there is also a new naming scheme for representor, right?
> > Wouldn't it be simpler to use it in order to decide uplink/regular
> > representor?
> 
> There should not be any assumption regarding port index, we can't say for
> sure on which port index we have uplink representor. It is possible to
> compare the port name against -1, but it is quite possible situation (old
> kernels with new drivers, I had been working with such setup for a while) in
> that have the multiport Infiniband device and old naming and we are unable
> recognize the Uplink rep by port name.

OK understood.  Out of curiosity which kernel was that?  
I think the safest way to detect the uplink representor is by matching on its port index. This is well defined on the PRM: "Because each PF has just one Uplink, it should access it only by vport_num = 0xffff."
For this purpose matching according to the representor phys_port_name is the most straight forward.

We should have a fallback for special cases (like you mentioned), and it will use the IFLA_NUM_VFS indication. 

Meaning your patch needs to be on top of https://patches.dpdk.org/patch/50771/ and provide a fallback detection approach in case the detection by name is not supported. 

> 
> So, it seems to me, the querying numVF/sysfs method is more reliable and
> works for all settings I've tested.
> 
> >
> > There is already patch for it from Dekel:
> > https://patches.dpdk.org/patch/50771/
> 
> Yes, It's included in RFC because patchset is not functional w/o Dekel's patch.
> The patchset will be rebased and Dekel's commit will be excluded in the next
> versions.
>
  

Patch

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 9a7da18..c326494 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -226,6 +226,16 @@  mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
+		HAVE_IFLA_NUM_VF \
+		linux/if_link.h \
+		enum IFLA_NUM_VF \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_IFLA_EXT_MASK \
+		linux/if_link.h \
+		enum IFLA_EXT_MASK \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
 		HAVE_IFLA_PHYS_SWITCH_ID \
 		linux/if_link.h \
 		enum IFLA_PHYS_SWITCH_ID \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 4540c45..0bb2f3a 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -133,6 +133,10 @@  if build
 		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
 		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
 		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
+		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
+		'IFLA_NUM_VF' ],
+		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
+		'IFLA_EXT_MASK' ],
 		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
 		'IFLA_PHYS_SWITCH_ID' ],
 		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h',
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 8158b4a..c30fcb0 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1354,9 +1354,11 @@  int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)
 {
 	char ifname[IF_NAMESIZE];
 	FILE *file;
+	DIR *dir;
 	struct mlx5_switch_info data = { .master = 0, };
 	bool port_name_set = false;
 	bool port_switch_id_set = false;
+	bool device_dir = false;
 	char c;
 
 	if (!if_indextoname(ifindex, ifname)) {
@@ -1368,6 +1370,8 @@  int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)
 	      ifname);
 	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
 	      ifname);
+	MKSTR(pci_device, "/sys/class/net/%s/device",
+	      ifname);
 
 	file = fopen(phys_port_name, "rb");
 	if (file != NULL) {
@@ -1385,8 +1389,13 @@  int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)
 		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
 		c == '\n';
 	fclose(file);
-	data.master = port_switch_id_set && !port_name_set;
-	data.representor = port_switch_id_set && port_name_set;
+	dir = opendir(pci_device);
+	if (dir != NULL) {
+		closedir(dir);
+		device_dir = true;
+	}
+	data.master = port_switch_id_set && (!port_name_set || device_dir);
+	data.representor = port_switch_id_set && port_name_set && !device_dir;
 	*info = data;
 	return 0;
 }
diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c
index d61826a..694edf1 100644
--- a/drivers/net/mlx5/mlx5_nl.c
+++ b/drivers/net/mlx5/mlx5_nl.c
@@ -65,6 +65,12 @@ 
 #endif
 
 /* These are normally found in linux/if_link.h. */
+#ifndef HAVE_IFLA_NUM_VF
+#define IFLA_NUM_VF 21
+#endif
+#ifndef HAVE_IFLA_EXT_MASK
+#define IFLA_EXT_MASK 29
+#endif
 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
 #define IFLA_PHYS_SWITCH_ID 36
 #endif
@@ -836,6 +842,7 @@  struct mlx5_nl_ifindex_data {
 	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
 	bool port_name_set = false;
 	bool switch_id_set = false;
+	bool num_vf_set = false;
 
 	if (nh->nlmsg_type != RTM_NEWLINK)
 		goto error;
@@ -848,6 +855,9 @@  struct mlx5_nl_ifindex_data {
 		if (ra->rta_len > nh->nlmsg_len - off)
 			goto error;
 		switch (ra->rta_type) {
+		case IFLA_NUM_VF:
+			num_vf_set = true;
+			break;
 		case IFLA_PHYS_PORT_NAME:
 			errno = 0;
 			info.port_name = strtol(payload, &end, 0);
@@ -867,8 +877,8 @@  struct mlx5_nl_ifindex_data {
 		}
 		off += RTA_ALIGN(ra->rta_len);
 	}
-	info.master = switch_id_set && !port_name_set;
-	info.representor = switch_id_set && port_name_set;
+	info.master = switch_id_set && (!port_name_set || num_vf_set);
+	info.representor = switch_id_set && port_name_set && !num_vf_set;
 	memcpy(arg, &info, sizeof(info));
 	return 0;
 error:
@@ -896,9 +906,13 @@  struct mlx5_nl_ifindex_data {
 	struct {
 		struct nlmsghdr nh;
 		struct ifinfomsg info;
+		struct rtattr rta;
+		uint32_t extmask;
 	} req = {
 		.nh = {
-			.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
+			.nlmsg_len = NLMSG_LENGTH
+					(sizeof(req.info) +
+					 RTA_LENGTH(sizeof(uint32_t))),
 			.nlmsg_type = RTM_GETLINK,
 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
 		},
@@ -906,6 +920,11 @@  struct mlx5_nl_ifindex_data {
 			.ifi_family = AF_UNSPEC,
 			.ifi_index = ifindex,
 		},
+		.rta = {
+			.rta_type = IFLA_EXT_MASK,
+			.rta_len = RTA_LENGTH(sizeof(int32_t)),
+		},
+		.extmask = RTE_LE32(1),
 	};
 	int ret;