[dpdk-dev,v1] net/mlx: control netdevices through ioctl only

Message ID 20180208163538.22407-1-adrien.mazarguil@6wind.com
State Accepted, archived
Delegated to: Ferruh Yigit
Headers show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch warning coding style issues

Commit Message

Adrien Mazarguil Feb. 8, 2018, 4:37 p.m.
Several control operations implemented by these PMDs affect netdevices
through sysfs, itself subject to file system permission checks enforced by
the kernel, which limits their use for most purposes to applications
running with root privileges.

Since performing the same operations through ioctl() requires fewer
capabilities (only CAP_NET_ADMIN) and given the remaining operations are
already implemented this way, this patch standardizes on ioctl() and gets
rid of redundant code.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx4/mlx4_ethdev.c | 192 ++-------------------------
 drivers/net/mlx5/mlx5.h        |   2 -
 drivers/net/mlx5/mlx5_ethdev.c | 255 ++++--------------------------------
 drivers/net/mlx5/mlx5_stats.c  |  28 +++-
 4 files changed, 63 insertions(+), 414 deletions(-)

Comments

Marcelo Ricardo Leitner Feb. 9, 2018, 8:33 p.m. | #1
On Thu, Feb 08, 2018 at 05:37:06PM +0100, Adrien Mazarguil wrote:
> Several control operations implemented by these PMDs affect netdevices
> through sysfs, itself subject to file system permission checks enforced by
> the kernel, which limits their use for most purposes to applications
> running with root privileges.
> 
> Since performing the same operations through ioctl() requires fewer
> capabilities (only CAP_NET_ADMIN) and given the remaining operations are
> already implemented this way, this patch standardizes on ioctl() and gets
> rid of redundant code.
> 
> Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

> ---
>  drivers/net/mlx4/mlx4_ethdev.c | 192 ++-------------------------
>  drivers/net/mlx5/mlx5.h        |   2 -
>  drivers/net/mlx5/mlx5_ethdev.c | 255 ++++--------------------------------
>  drivers/net/mlx5/mlx5_stats.c  |  28 +++-
>  4 files changed, 63 insertions(+), 414 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c
> index 3bc692731..fbeef16c8 100644
> --- a/drivers/net/mlx4/mlx4_ethdev.c
> +++ b/drivers/net/mlx4/mlx4_ethdev.c
> @@ -132,167 +132,6 @@ mlx4_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
>  }
>  
>  /**
> - * Read from sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[out] buf
> - *   Data output buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   Number of bytes read on success, negative errno value otherwise and
> - *   rte_errno is set.
> - */
> -static int
> -mlx4_sysfs_read(const struct priv *priv, const char *entry,
> -		char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -
> -	ret = mlx4_get_ifname(priv, &ifname);
> -	if (ret)
> -		return ret;
> -
> -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
> -	      ifname, entry);
> -
> -	file = fopen(path, "rb");
> -	if (file == NULL) {
> -		rte_errno = errno;
> -		return -rte_errno;
> -	}
> -	ret = fread(buf, 1, size, file);
> -	if ((size_t)ret < size && ferror(file)) {
> -		rte_errno = EIO;
> -		ret = -rte_errno;
> -	} else {
> -		ret = size;
> -	}
> -	fclose(file);
> -	return ret;
> -}
> -
> -/**
> - * Write to sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[in] buf
> - *   Data buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   Number of bytes written on success, negative errno value otherwise and
> - *   rte_errno is set.
> - */
> -static int
> -mlx4_sysfs_write(const struct priv *priv, const char *entry,
> -		 char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -
> -	ret = mlx4_get_ifname(priv, &ifname);
> -	if (ret)
> -		return ret;
> -
> -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
> -	      ifname, entry);
> -
> -	file = fopen(path, "wb");
> -	if (file == NULL) {
> -		rte_errno = errno;
> -		return -rte_errno;
> -	}
> -	ret = fwrite(buf, 1, size, file);
> -	if ((size_t)ret < size || ferror(file)) {
> -		rte_errno = EIO;
> -		ret = -rte_errno;
> -	} else {
> -		ret = size;
> -	}
> -	fclose(file);
> -	return ret;
> -}
> -
> -/**
> - * Get unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param[out] value
> - *   Value output buffer.
> - *
> - * @return
> - *   0 on success, negative errno value otherwise and rte_errno is set.
> - */
> -static int
> -mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
> -{
> -	int ret;
> -	unsigned long value_ret;
> -	char value_str[32];
> -
> -	ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret < 0) {
> -		DEBUG("cannot read %s value from sysfs: %s",
> -		      name, strerror(rte_errno));
> -		return ret;
> -	}
> -	value_str[ret] = '\0';
> -	errno = 0;
> -	value_ret = strtoul(value_str, NULL, 0);
> -	if (errno) {
> -		rte_errno = errno;
> -		DEBUG("invalid %s value `%s': %s", name, value_str,
> -		      strerror(rte_errno));
> -		return -rte_errno;
> -	}
> -	*value = value_ret;
> -	return 0;
> -}
> -
> -/**
> - * Set unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param value
> - *   Value to set.
> - *
> - * @return
> - *   0 on success, negative errno value otherwise and rte_errno is set.
> - */
> -static int
> -mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
> -{
> -	int ret;
> -	MKSTR(value_str, "%lu", value);
> -
> -	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret < 0) {
> -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> -		      name, value_str, value, strerror(rte_errno));
> -		return ret;
> -	}
> -	return 0;
> -}
> -
> -/**
>   * Perform ifreq ioctl() on associated Ethernet device.
>   *
>   * @param[in] priv
> @@ -361,12 +200,12 @@ mlx4_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
>  int
>  mlx4_mtu_get(struct priv *priv, uint16_t *mtu)
>  {
> -	unsigned long ulong_mtu = 0;
> -	int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
> +	struct ifreq request;
> +	int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
>  
>  	if (ret)
>  		return ret;
> -	*mtu = ulong_mtu;
> +	*mtu = request.ifr_mtu;
>  	return 0;
>  }
>  
> @@ -385,20 +224,13 @@ int
>  mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
>  {
>  	struct priv *priv = dev->data->dev_private;
> -	uint16_t new_mtu;
> -	int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
> +	struct ifreq request = { .ifr_mtu = mtu, };
> +	int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
>  
>  	if (ret)
>  		return ret;
> -	ret = mlx4_mtu_get(priv, &new_mtu);
> -	if (ret)
> -		return ret;
> -	if (new_mtu == mtu) {
> -		priv->mtu = mtu;
> -		return 0;
> -	}
> -	rte_errno = EINVAL;
> -	return -rte_errno;
> +	priv->mtu = mtu;
> +	return 0;
>  }
>  
>  /**
> @@ -417,14 +249,14 @@ mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
>  static int
>  mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
>  {
> -	unsigned long tmp = 0;
> -	int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
> +	struct ifreq request;
> +	int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
>  
>  	if (ret)
>  		return ret;
> -	tmp &= keep;
> -	tmp |= (flags & (~keep));
> -	return mlx4_set_sysfs_ulong(priv, "flags", tmp);
> +	request.ifr_flags &= keep;
> +	request.ifr_flags |= flags & ~keep;
> +	return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
>  }
>  
>  /**
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 965c19f21..da44faaf4 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -209,8 +209,6 @@ struct priv *mlx5_get_priv(struct rte_eth_dev *dev);
>  int mlx5_is_secondary(void);
>  int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]);
>  int priv_ifreq(const struct priv *, int req, struct ifreq *);
> -int priv_is_ib_cntr(const char *);
> -int priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *);
>  int priv_get_num_vfs(struct priv *, uint16_t *);
>  int priv_get_mtu(struct priv *, uint16_t *);
>  int priv_set_flags(struct priv *, unsigned int, unsigned int);
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index 666507691..b73cb53df 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -7,6 +7,7 @@
>  
>  #include <stddef.h>
>  #include <assert.h>
> +#include <inttypes.h>
>  #include <unistd.h>
>  #include <stdint.h>
>  #include <stdio.h>
> @@ -173,181 +174,6 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
>  }
>  
>  /**
> - * Check if the counter is located on ib counters file.
> - *
> - * @param[in] cntr
> - *   Counter name.
> - *
> - * @return
> - *   1 if counter is located on ib counters file , 0 otherwise.
> - */
> -int
> -priv_is_ib_cntr(const char *cntr)
> -{
> -	if (!strcmp(cntr, "out_of_buffer"))
> -		return 1;
> -	return 0;
> -}
> -
> -/**
> - * Read from sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[out] buf
> - *   Data output buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_sysfs_read(const struct priv *priv, const char *entry,
> -		char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -	int err;
> -
> -	if (priv_get_ifname(priv, &ifname))
> -		return -1;
> -
> -	if (priv_is_ib_cntr(entry)) {
> -		MKSTR(path, "%s/ports/1/hw_counters/%s",
> -		      priv->ibdev_path, entry);
> -		file = fopen(path, "rb");
> -	} else {
> -		MKSTR(path, "%s/device/net/%s/%s",
> -		      priv->ibdev_path, ifname, entry);
> -		file = fopen(path, "rb");
> -	}
> -	if (file == NULL)
> -		return -1;
> -	ret = fread(buf, 1, size, file);
> -	err = errno;
> -	if (((size_t)ret < size) && (ferror(file)))
> -		ret = -1;
> -	else
> -		ret = size;
> -	fclose(file);
> -	errno = err;
> -	return ret;
> -}
> -
> -/**
> - * Write to sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[in] buf
> - *   Data buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_sysfs_write(const struct priv *priv, const char *entry,
> -		 char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -	int err;
> -
> -	if (priv_get_ifname(priv, &ifname))
> -		return -1;
> -
> -	MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
> -
> -	file = fopen(path, "wb");
> -	if (file == NULL)
> -		return -1;
> -	ret = fwrite(buf, 1, size, file);
> -	err = errno;
> -	if (((size_t)ret < size) || (ferror(file)))
> -		ret = -1;
> -	else
> -		ret = size;
> -	fclose(file);
> -	errno = err;
> -	return ret;
> -}
> -
> -/**
> - * Get unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param[out] value
> - *   Value output buffer.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
> -{
> -	int ret;
> -	unsigned long value_ret;
> -	char value_str[32];
> -
> -	ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret == -1) {
> -		DEBUG("cannot read %s value from sysfs: %s",
> -		      name, strerror(errno));
> -		return -1;
> -	}
> -	value_str[ret] = '\0';
> -	errno = 0;
> -	value_ret = strtoul(value_str, NULL, 0);
> -	if (errno) {
> -		DEBUG("invalid %s value `%s': %s", name, value_str,
> -		      strerror(errno));
> -		return -1;
> -	}
> -	*value = value_ret;
> -	return 0;
> -}
> -
> -/**
> - * Set unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param value
> - *   Value to set.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
> -{
> -	int ret;
> -	MKSTR(value_str, "%lu", value);
> -
> -	ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret == -1) {
> -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> -		      name, value_str, value, strerror(errno));
> -		return -1;
> -	}
> -	return 0;
> -}
> -
> -/**
>   * Perform ifreq ioctl() on associated Ethernet device.
>   *
>   * @param[in] priv
> @@ -390,20 +216,25 @@ priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs)
>  {
>  	/* The sysfs entry name depends on the operating system. */
>  	const char **name = (const char *[]){
> -		"device/sriov_numvfs",
> -		"device/mlx5_num_vfs",
> +		"sriov_numvfs",
> +		"mlx5_num_vfs",
>  		NULL,
>  	};
> -	int ret;
>  
>  	do {
> -		unsigned long ulong_num_vfs;
> +		int n;
> +		FILE *file;
> +		MKSTR(path, "%s/device/%s", priv->ibdev_path, *name);
>  
> -		ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs);
> -		if (!ret)
> -			*num_vfs = ulong_num_vfs;
> -	} while (*(++name) && ret);
> -	return ret;
> +		file = fopen(path, "rb");
> +		if (!file)
> +			continue;
> +		n = fscanf(file, "%" SCNu16, num_vfs);
> +		fclose(file);
> +		if (n == 1)
> +			return 0;
> +	} while (*(++name));
> +	return -1;
>  }
>  
>  /**
> @@ -420,35 +251,12 @@ priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs)
>  int
>  priv_get_mtu(struct priv *priv, uint16_t *mtu)
>  {
> -	unsigned long ulong_mtu;
> +	struct ifreq request;
> +	int ret = priv_ifreq(priv, SIOCGIFMTU, &request);
>  
> -	if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
> -		return -1;
> -	*mtu = ulong_mtu;
> -	return 0;
> -}
> -
> -/**
> - * Read device counter from sysfs.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param name
> - *   Counter name.
> - * @param[out] cntr
> - *   Counter output buffer.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -int
> -priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
> -{
> -	unsigned long ulong_ctr;
> -
> -	if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
> -		return -1;
> -	*cntr = ulong_ctr;
> +	if (ret)
> +		return ret;
> +	*mtu = request.ifr_mtu;
>  	return 0;
>  }
>  
> @@ -466,15 +274,9 @@ priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
>  static int
>  priv_set_mtu(struct priv *priv, uint16_t mtu)
>  {
> -	uint16_t new_mtu;
> +	struct ifreq request = { .ifr_mtu = mtu, };
>  
> -	if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
> -	    priv_get_mtu(priv, &new_mtu))
> -		return -1;
> -	if (new_mtu == mtu)
> -		return 0;
> -	errno = EINVAL;
> -	return -1;
> +	return priv_ifreq(priv, SIOCSIFMTU, &request);
>  }
>  
>  /**
> @@ -493,13 +295,14 @@ priv_set_mtu(struct priv *priv, uint16_t mtu)
>  int
>  priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
>  {
> -	unsigned long tmp;
> +	struct ifreq request;
> +	int ret = priv_ifreq(priv, SIOCGIFFLAGS, &request);
>  
> -	if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
> -		return -1;
> -	tmp &= keep;
> -	tmp |= (flags & (~keep));
> -	return priv_set_sysfs_ulong(priv, "flags", tmp);
> +	if (ret)
> +		return ret;
> +	request.ifr_flags &= keep;
> +	request.ifr_flags |= flags & ~keep;
> +	return priv_ifreq(priv, SIOCSIFFLAGS, &request);
>  }
>  
>  /**
> diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
> index 378472a70..eb9c65dcc 100644
> --- a/drivers/net/mlx5/mlx5_stats.c
> +++ b/drivers/net/mlx5/mlx5_stats.c
> @@ -3,8 +3,11 @@
>   * Copyright 2015 Mellanox.
>   */
>  
> +#include <inttypes.h>
>  #include <linux/sockios.h>
>  #include <linux/ethtool.h>
> +#include <stdint.h>
> +#include <stdio.h>
>  
>  #include <rte_ethdev_driver.h>
>  #include <rte_common.h>
> @@ -19,6 +22,7 @@ struct mlx5_counter_ctrl {
>  	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
>  	/* Name of the counter on the device table. */
>  	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> +	uint32_t ib:1; /**< Nonzero for IB counters. */
>  };
>  
>  static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
> @@ -93,6 +97,7 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
>  	{
>  		.dpdk_name = "rx_out_of_buffer",
>  		.ctr_name = "out_of_buffer",
> +		.ib = 1,
>  	},
>  	{
>  		.dpdk_name = "tx_packets_phy",
> @@ -143,13 +148,24 @@ priv_read_dev_counters(struct priv *priv, uint64_t *stats)
>  		return -1;
>  	}
>  	for (i = 0; i != xstats_n; ++i) {
> -		if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
> -			priv_get_cntr_sysfs(priv,
> -					    mlx5_counters_init[i].ctr_name,
> -					    &stats[i]);
> -		else
> +		if (mlx5_counters_init[i].ib) {
> +			FILE *file;
> +			MKSTR(path, "%s/ports/1/hw_counters/%s",
> +			      priv->ibdev_path,
> +			      mlx5_counters_init[i].ctr_name);
> +
> +			file = fopen(path, "rb");
> +			if (file) {
> +				int n = fscanf(file, "%" SCNu64, &stats[i]);
> +
> +				fclose(file);
> +				if (n != 1)
> +					stats[i] = 0;
> +			}
> +		} else {
>  			stats[i] = (uint64_t)
>  				et_stats->data[xstats_ctrl->dev_table_idx[i]];
> +		}
>  	}
>  	return 0;
>  }
> @@ -232,7 +248,7 @@ priv_xstats_init(struct priv *priv)
>  		}
>  	}
>  	for (j = 0; j != xstats_n; ++j) {
> -		if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
> +		if (mlx5_counters_init[j].ib)
>  			continue;
>  		if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
>  			WARN("counter \"%s\" is not recognized",
> -- 
> 2.11.0
Shahaf Shuler Feb. 11, 2018, 11:55 a.m. | #2
Hi Adrien,

Small doc issues.

Thursday, February 8, 2018 6:37 PM, Adrien Mazarguil:
> Subject: [PATCH v1] net/mlx: control netdevices through ioctl only
> 
> Several control operations implemented by these PMDs affect netdevices
> through sysfs, itself subject to file system permission checks enforced by the
> kernel, which limits their use for most purposes to applications running with
> root privileges.
> 
> Since performing the same operations through ioctl() requires fewer
> capabilities (only CAP_NET_ADMIN) and given the remaining operations are
> already implemented this way, this patch standardizes on ioctl() and gets rid
> of redundant code.
> 
> Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> ---
>  drivers/net/mlx4/mlx4_ethdev.c | 192 ++-------------------------
>  drivers/net/mlx5/mlx5.h        |   2 -
>  drivers/net/mlx5/mlx5_ethdev.c | 255 ++++--------------------------------
>  drivers/net/mlx5/mlx5_stats.c  |  28 +++-
>  4 files changed, 63 insertions(+), 414 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4_ethdev.c
> b/drivers/net/mlx4/mlx4_ethdev.c index 3bc692731..fbeef16c8 100644
> --- a/drivers/net/mlx4/mlx4_ethdev.c
> +++ b/drivers/net/mlx4/mlx4_ethdev.c
> @@ -132,167 +132,6 @@ mlx4_get_ifname(const struct priv *priv, char
> (*ifname)[IF_NAMESIZE])  }
> 
>  /**
> - * Read from sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[out] buf
> - *   Data output buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   Number of bytes read on success, negative errno value otherwise and
> - *   rte_errno is set.
> - */
> -static int
> -mlx4_sysfs_read(const struct priv *priv, const char *entry,
> -		char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -
> -	ret = mlx4_get_ifname(priv, &ifname);
> -	if (ret)
> -		return ret;
> -
> -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device-
> >ibdev_path,
> -	      ifname, entry);
> -
> -	file = fopen(path, "rb");
> -	if (file == NULL) {
> -		rte_errno = errno;
> -		return -rte_errno;
> -	}
> -	ret = fread(buf, 1, size, file);
> -	if ((size_t)ret < size && ferror(file)) {
> -		rte_errno = EIO;
> -		ret = -rte_errno;
> -	} else {
> -		ret = size;
> -	}
> -	fclose(file);
> -	return ret;
> -}
> -
> -/**
> - * Write to sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[in] buf
> - *   Data buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   Number of bytes written on success, negative errno value otherwise and
> - *   rte_errno is set.
> - */
> -static int
> -mlx4_sysfs_write(const struct priv *priv, const char *entry,
> -		 char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -
> -	ret = mlx4_get_ifname(priv, &ifname);
> -	if (ret)
> -		return ret;
> -
> -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device-
> >ibdev_path,
> -	      ifname, entry);
> -
> -	file = fopen(path, "wb");
> -	if (file == NULL) {
> -		rte_errno = errno;
> -		return -rte_errno;
> -	}
> -	ret = fwrite(buf, 1, size, file);
> -	if ((size_t)ret < size || ferror(file)) {
> -		rte_errno = EIO;
> -		ret = -rte_errno;
> -	} else {
> -		ret = size;
> -	}
> -	fclose(file);
> -	return ret;
> -}
> -
> -/**
> - * Get unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param[out] value
> - *   Value output buffer.
> - *
> - * @return
> - *   0 on success, negative errno value otherwise and rte_errno is set.
> - */
> -static int
> -mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long
> *value) -{
> -	int ret;
> -	unsigned long value_ret;
> -	char value_str[32];
> -
> -	ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret < 0) {
> -		DEBUG("cannot read %s value from sysfs: %s",
> -		      name, strerror(rte_errno));
> -		return ret;
> -	}
> -	value_str[ret] = '\0';
> -	errno = 0;
> -	value_ret = strtoul(value_str, NULL, 0);
> -	if (errno) {
> -		rte_errno = errno;
> -		DEBUG("invalid %s value `%s': %s", name, value_str,
> -		      strerror(rte_errno));
> -		return -rte_errno;
> -	}
> -	*value = value_ret;
> -	return 0;
> -}
> -
> -/**
> - * Set unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param value
> - *   Value to set.
> - *
> - * @return
> - *   0 on success, negative errno value otherwise and rte_errno is set.
> - */
> -static int
> -mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long
> value) -{
> -	int ret;
> -	MKSTR(value_str, "%lu", value);
> -
> -	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret < 0) {
> -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> -		      name, value_str, value, strerror(rte_errno));
> -		return ret;
> -	}
> -	return 0;
> -}
> -
> -/**
>   * Perform ifreq ioctl() on associated Ethernet device.
>   *
>   * @param[in] priv
> @@ -361,12 +200,12 @@ mlx4_get_mac(struct priv *priv, uint8_t
> (*mac)[ETHER_ADDR_LEN])  int  mlx4_mtu_get(struct priv *priv, uint16_t
> *mtu)  {
> -	unsigned long ulong_mtu = 0;
> -	int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
> +	struct ifreq request;
> +	int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
> 
>  	if (ret)
>  		return ret;

Function documentation is : "0 on success, negative errno value otherwise and rte_errno is set."
Per my understating ioctl returns -1 on error with errno set. 


> -	*mtu = ulong_mtu;
> +	*mtu = request.ifr_mtu;
>  	return 0;
>  }
> 
> @@ -385,20 +224,13 @@ int
>  mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)  {
>  	struct priv *priv = dev->data->dev_private;
> -	uint16_t new_mtu;
> -	int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
> +	struct ifreq request = { .ifr_mtu = mtu, };
> +	int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
> 
>  	if (ret)
>  		return ret;

Also here.

> -	ret = mlx4_mtu_get(priv, &new_mtu);
> -	if (ret)
> -		return ret;
> -	if (new_mtu == mtu) {
> -		priv->mtu = mtu;
> -		return 0;
> -	}
> -	rte_errno = EINVAL;
> -	return -rte_errno;
> +	priv->mtu = mtu;
> +	return 0;
>  }
> 
>  /**
> @@ -417,14 +249,14 @@ mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t
> mtu)  static int  mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned
> int flags)  {
> -	unsigned long tmp = 0;
> -	int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
> +	struct ifreq request;
> +	int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
> 
>  	if (ret)
>  		return ret;

And here.

> -	tmp &= keep;
> -	tmp |= (flags & (~keep));
> -	return mlx4_set_sysfs_ulong(priv, "flags", tmp);
> +	request.ifr_flags &= keep;
> +	request.ifr_flags |= flags & ~keep;
> +	return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
>  }
> 
>  /**
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 965c19f21..da44faaf4 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -209,8 +209,6 @@ struct priv *mlx5_get_priv(struct rte_eth_dev *dev);
> int mlx5_is_secondary(void);  int priv_get_ifname(const struct priv *, char
> (*)[IF_NAMESIZE]);  int priv_ifreq(const struct priv *, int req, struct ifreq *); -
> int priv_is_ib_cntr(const char *); -int priv_get_cntr_sysfs(struct priv *, const
> char *, uint64_t *);  int priv_get_num_vfs(struct priv *, uint16_t *);  int
> priv_get_mtu(struct priv *, uint16_t *);  int priv_set_flags(struct priv *,
> unsigned int, unsigned int); diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> b/drivers/net/mlx5/mlx5_ethdev.c index 666507691..b73cb53df 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -7,6 +7,7 @@
> 
>  #include <stddef.h>
>  #include <assert.h>
> +#include <inttypes.h>
>  #include <unistd.h>
>  #include <stdint.h>
>  #include <stdio.h>
> @@ -173,181 +174,6 @@ priv_get_ifname(const struct priv *priv, char
> (*ifname)[IF_NAMESIZE])  }
> 
>  /**
> - * Check if the counter is located on ib counters file.
> - *
> - * @param[in] cntr
> - *   Counter name.
> - *
> - * @return
> - *   1 if counter is located on ib counters file , 0 otherwise.
> - */
> -int
> -priv_is_ib_cntr(const char *cntr)
> -{
> -	if (!strcmp(cntr, "out_of_buffer"))
> -		return 1;
> -	return 0;
> -}
> -
> -/**
> - * Read from sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[out] buf
> - *   Data output buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_sysfs_read(const struct priv *priv, const char *entry,
> -		char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -	int err;
> -
> -	if (priv_get_ifname(priv, &ifname))
> -		return -1;
> -
> -	if (priv_is_ib_cntr(entry)) {
> -		MKSTR(path, "%s/ports/1/hw_counters/%s",
> -		      priv->ibdev_path, entry);
> -		file = fopen(path, "rb");
> -	} else {
> -		MKSTR(path, "%s/device/net/%s/%s",
> -		      priv->ibdev_path, ifname, entry);
> -		file = fopen(path, "rb");
> -	}
> -	if (file == NULL)
> -		return -1;
> -	ret = fread(buf, 1, size, file);
> -	err = errno;
> -	if (((size_t)ret < size) && (ferror(file)))
> -		ret = -1;
> -	else
> -		ret = size;
> -	fclose(file);
> -	errno = err;
> -	return ret;
> -}
> -
> -/**
> - * Write to sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[in] buf
> - *   Data buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_sysfs_write(const struct priv *priv, const char *entry,
> -		 char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -	int err;
> -
> -	if (priv_get_ifname(priv, &ifname))
> -		return -1;
> -
> -	MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname,
> entry);
> -
> -	file = fopen(path, "wb");
> -	if (file == NULL)
> -		return -1;
> -	ret = fwrite(buf, 1, size, file);
> -	err = errno;
> -	if (((size_t)ret < size) || (ferror(file)))
> -		ret = -1;
> -	else
> -		ret = size;
> -	fclose(file);
> -	errno = err;
> -	return ret;
> -}
> -
> -/**
> - * Get unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param[out] value
> - *   Value output buffer.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long
> *value) -{
> -	int ret;
> -	unsigned long value_ret;
> -	char value_str[32];
> -
> -	ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret == -1) {
> -		DEBUG("cannot read %s value from sysfs: %s",
> -		      name, strerror(errno));
> -		return -1;
> -	}
> -	value_str[ret] = '\0';
> -	errno = 0;
> -	value_ret = strtoul(value_str, NULL, 0);
> -	if (errno) {
> -		DEBUG("invalid %s value `%s': %s", name, value_str,
> -		      strerror(errno));
> -		return -1;
> -	}
> -	*value = value_ret;
> -	return 0;
> -}
> -
> -/**
> - * Set unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param value
> - *   Value to set.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long
> value) -{
> -	int ret;
> -	MKSTR(value_str, "%lu", value);
> -
> -	ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret == -1) {
> -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> -		      name, value_str, value, strerror(errno));
> -		return -1;
> -	}
> -	return 0;
> -}
> -
> -/**
>   * Perform ifreq ioctl() on associated Ethernet device.
>   *
>   * @param[in] priv
> @@ -390,20 +216,25 @@ priv_get_num_vfs(struct priv *priv, uint16_t
> *num_vfs)  {
>  	/* The sysfs entry name depends on the operating system. */
>  	const char **name = (const char *[]){
> -		"device/sriov_numvfs",
> -		"device/mlx5_num_vfs",
> +		"sriov_numvfs",
> +		"mlx5_num_vfs",
>  		NULL,
>  	};
> -	int ret;
> 
>  	do {
> -		unsigned long ulong_num_vfs;
> +		int n;
> +		FILE *file;
> +		MKSTR(path, "%s/device/%s", priv->ibdev_path, *name);
> 
> -		ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs);
> -		if (!ret)
> -			*num_vfs = ulong_num_vfs;
> -	} while (*(++name) && ret);
> -	return ret;
> +		file = fopen(path, "rb");
> +		if (!file)
> +			continue;
> +		n = fscanf(file, "%" SCNu16, num_vfs);
> +		fclose(file);
> +		if (n == 1)
> +			return 0;
> +	} while (*(++name));
> +	return -1;
>  }
> 
>  /**
> @@ -420,35 +251,12 @@ priv_get_num_vfs(struct priv *priv, uint16_t
> *num_vfs)  int  priv_get_mtu(struct priv *priv, uint16_t *mtu)  {
> -	unsigned long ulong_mtu;
> +	struct ifreq request;
> +	int ret = priv_ifreq(priv, SIOCGIFMTU, &request);
> 
> -	if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
> -		return -1;
> -	*mtu = ulong_mtu;
> -	return 0;
> -}
> -
> -/**
> - * Read device counter from sysfs.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param name
> - *   Counter name.
> - * @param[out] cntr
> - *   Counter output buffer.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -int
> -priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr) -{
> -	unsigned long ulong_ctr;
> -
> -	if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
> -		return -1;
> -	*cntr = ulong_ctr;
> +	if (ret)
> +		return ret;
> +	*mtu = request.ifr_mtu;
>  	return 0;
>  }
> 
> @@ -466,15 +274,9 @@ priv_get_cntr_sysfs(struct priv *priv, const char
> *name, uint64_t *cntr)  static int  priv_set_mtu(struct priv *priv, uint16_t
> mtu)  {
> -	uint16_t new_mtu;
> +	struct ifreq request = { .ifr_mtu = mtu, };
> 
> -	if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
> -	    priv_get_mtu(priv, &new_mtu))
> -		return -1;
> -	if (new_mtu == mtu)
> -		return 0;
> -	errno = EINVAL;
> -	return -1;
> +	return priv_ifreq(priv, SIOCSIFMTU, &request);
>  }
> 
>  /**
> @@ -493,13 +295,14 @@ priv_set_mtu(struct priv *priv, uint16_t mtu)  int
> priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)  {
> -	unsigned long tmp;
> +	struct ifreq request;
> +	int ret = priv_ifreq(priv, SIOCGIFFLAGS, &request);
> 
> -	if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
> -		return -1;
> -	tmp &= keep;
> -	tmp |= (flags & (~keep));
> -	return priv_set_sysfs_ulong(priv, "flags", tmp);
> +	if (ret)
> +		return ret;
> +	request.ifr_flags &= keep;
> +	request.ifr_flags |= flags & ~keep;
> +	return priv_ifreq(priv, SIOCSIFFLAGS, &request);
>  }
> 
>  /**
> diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
> index 378472a70..eb9c65dcc 100644
> --- a/drivers/net/mlx5/mlx5_stats.c
> +++ b/drivers/net/mlx5/mlx5_stats.c
> @@ -3,8 +3,11 @@
>   * Copyright 2015 Mellanox.
>   */
> 
> +#include <inttypes.h>
>  #include <linux/sockios.h>
>  #include <linux/ethtool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> 
>  #include <rte_ethdev_driver.h>
>  #include <rte_common.h>
> @@ -19,6 +22,7 @@ struct mlx5_counter_ctrl {
>  	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
>  	/* Name of the counter on the device table. */
>  	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> +	uint32_t ib:1; /**< Nonzero for IB counters. */
>  };
> 
>  static const struct mlx5_counter_ctrl mlx5_counters_init[] = { @@ -93,6
> +97,7 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
>  	{
>  		.dpdk_name = "rx_out_of_buffer",
>  		.ctr_name = "out_of_buffer",
> +		.ib = 1,
>  	},
>  	{
>  		.dpdk_name = "tx_packets_phy",
> @@ -143,13 +148,24 @@ priv_read_dev_counters(struct priv *priv, uint64_t
> *stats)
>  		return -1;
>  	}
>  	for (i = 0; i != xstats_n; ++i) {
> -		if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
> -			priv_get_cntr_sysfs(priv,
> -					    mlx5_counters_init[i].ctr_name,
> -					    &stats[i]);
> -		else
> +		if (mlx5_counters_init[i].ib) {
> +			FILE *file;
> +			MKSTR(path, "%s/ports/1/hw_counters/%s",
> +			      priv->ibdev_path,
> +			      mlx5_counters_init[i].ctr_name);
> +
> +			file = fopen(path, "rb");
> +			if (file) {
> +				int n = fscanf(file, "%" SCNu64, &stats[i]);
> +
> +				fclose(file);
> +				if (n != 1)
> +					stats[i] = 0;
> +			}
> +		} else {
>  			stats[i] = (uint64_t)
>  				et_stats->data[xstats_ctrl-
> >dev_table_idx[i]];
> +		}
>  	}
>  	return 0;
>  }
> @@ -232,7 +248,7 @@ priv_xstats_init(struct priv *priv)
>  		}
>  	}
>  	for (j = 0; j != xstats_n; ++j) {
> -		if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
> +		if (mlx5_counters_init[j].ib)
>  			continue;
>  		if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
>  			WARN("counter \"%s\" is not recognized",
> --
> 2.11.0
Adrien Mazarguil Feb. 12, 2018, 8:47 a.m. | #3
Hi Shahaf,

On Sun, Feb 11, 2018 at 11:55:44AM +0000, Shahaf Shuler wrote:
> Hi Adrien,
> 
> Small doc issues.
<snip>
> > -/**
> >   * Perform ifreq ioctl() on associated Ethernet device.
> >   *
> >   * @param[in] priv
> > @@ -361,12 +200,12 @@ mlx4_get_mac(struct priv *priv, uint8_t
> > (*mac)[ETHER_ADDR_LEN])  int  mlx4_mtu_get(struct priv *priv, uint16_t
> > *mtu)  {
> > -	unsigned long ulong_mtu = 0;
> > -	int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
> > +	struct ifreq request;
> > +	int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
> > 
> >  	if (ret)
> >  		return ret;
> 
> Function documentation is : "0 on success, negative errno value otherwise and rte_errno is set."
> Per my understating ioctl returns -1 on error with errno set. 

Since the positive/negative errno mess was addressed in mlx4 (commit
9d14b27308a0 "net/mlx4: standardize on negative errno value"), unlike mlx5's
priv_ifreq(), mlx4_ifreq() returns a negative errno value with rte_errno set
in case of error. Simply returning ret is enough as rte_errno is left
unmodified; all functions are already documented accordingly.

<snip>
> > @@ -385,20 +224,13 @@ int
> >  mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)  {
> >  	struct priv *priv = dev->data->dev_private;
> > -	uint16_t new_mtu;
> > -	int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
> > +	struct ifreq request = { .ifr_mtu = mtu, };
> > +	int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
> > 
> >  	if (ret)
> >  		return ret;
> 
> Also here.

Ditto.

<snip>
> > @@ -417,14 +249,14 @@ mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t
> > mtu)  static int  mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned
> > int flags)  {
> > -	unsigned long tmp = 0;
> > -	int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
> > +	struct ifreq request;
> > +	int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
> > 
> >  	if (ret)
> >  		return ret;
> 
> And here.

Ditto.

No need for a v2.
Shahaf Shuler Feb. 28, 2018, 8:07 a.m. | #4
Friday, February 9, 2018 10:34 PM, Marcelo Ricardo Leitner:
> 
> On Thu, Feb 08, 2018 at 05:37:06PM +0100, Adrien Mazarguil wrote:
> > Several control operations implemented by these PMDs affect netdevices
> > through sysfs, itself subject to file system permission checks
> > enforced by the kernel, which limits their use for most purposes to
> > applications running with root privileges.
> >
> > Since performing the same operations through ioctl() requires fewer
> > capabilities (only CAP_NET_ADMIN) and given the remaining operations
> > are already implemented this way, this patch standardizes on ioctl()
> > and gets rid of redundant code.
> >
> > Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
> 
> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

Applied to next-net-mlx, thanks. 

> 
> > ---
> >  drivers/net/mlx4/mlx4_ethdev.c | 192 ++-------------------------
> >  drivers/net/mlx5/mlx5.h        |   2 -
> >  drivers/net/mlx5/mlx5_ethdev.c | 255
> > ++++--------------------------------
> >  drivers/net/mlx5/mlx5_stats.c  |  28 +++-
> >  4 files changed, 63 insertions(+), 414 deletions(-)
> >
> > diff --git a/drivers/net/mlx4/mlx4_ethdev.c
> > b/drivers/net/mlx4/mlx4_ethdev.c index 3bc692731..fbeef16c8 100644
> > --- a/drivers/net/mlx4/mlx4_ethdev.c
> > +++ b/drivers/net/mlx4/mlx4_ethdev.c
> > @@ -132,167 +132,6 @@ mlx4_get_ifname(const struct priv *priv, char
> > (*ifname)[IF_NAMESIZE])  }
> >
> >  /**
> > - * Read from sysfs entry.
> > - *
> > - * @param[in] priv
> > - *   Pointer to private structure.
> > - * @param[in] entry
> > - *   Entry name relative to sysfs path.
> > - * @param[out] buf
> > - *   Data output buffer.
> > - * @param size
> > - *   Buffer size.
> > - *
> > - * @return
> > - *   Number of bytes read on success, negative errno value otherwise and
> > - *   rte_errno is set.
> > - */
> > -static int
> > -mlx4_sysfs_read(const struct priv *priv, const char *entry,
> > -		char *buf, size_t size)
> > -{
> > -	char ifname[IF_NAMESIZE];
> > -	FILE *file;
> > -	int ret;
> > -
> > -	ret = mlx4_get_ifname(priv, &ifname);
> > -	if (ret)
> > -		return ret;
> > -
> > -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device-
> >ibdev_path,
> > -	      ifname, entry);
> > -
> > -	file = fopen(path, "rb");
> > -	if (file == NULL) {
> > -		rte_errno = errno;
> > -		return -rte_errno;
> > -	}
> > -	ret = fread(buf, 1, size, file);
> > -	if ((size_t)ret < size && ferror(file)) {
> > -		rte_errno = EIO;
> > -		ret = -rte_errno;
> > -	} else {
> > -		ret = size;
> > -	}
> > -	fclose(file);
> > -	return ret;
> > -}
> > -
> > -/**
> > - * Write to sysfs entry.
> > - *
> > - * @param[in] priv
> > - *   Pointer to private structure.
> > - * @param[in] entry
> > - *   Entry name relative to sysfs path.
> > - * @param[in] buf
> > - *   Data buffer.
> > - * @param size
> > - *   Buffer size.
> > - *
> > - * @return
> > - *   Number of bytes written on success, negative errno value otherwise
> and
> > - *   rte_errno is set.
> > - */
> > -static int
> > -mlx4_sysfs_write(const struct priv *priv, const char *entry,
> > -		 char *buf, size_t size)
> > -{
> > -	char ifname[IF_NAMESIZE];
> > -	FILE *file;
> > -	int ret;
> > -
> > -	ret = mlx4_get_ifname(priv, &ifname);
> > -	if (ret)
> > -		return ret;
> > -
> > -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device-
> >ibdev_path,
> > -	      ifname, entry);
> > -
> > -	file = fopen(path, "wb");
> > -	if (file == NULL) {
> > -		rte_errno = errno;
> > -		return -rte_errno;
> > -	}
> > -	ret = fwrite(buf, 1, size, file);
> > -	if ((size_t)ret < size || ferror(file)) {
> > -		rte_errno = EIO;
> > -		ret = -rte_errno;
> > -	} else {
> > -		ret = size;
> > -	}
> > -	fclose(file);
> > -	return ret;
> > -}
> > -
> > -/**
> > - * Get unsigned long sysfs property.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param[in] name
> > - *   Entry name relative to sysfs path.
> > - * @param[out] value
> > - *   Value output buffer.
> > - *
> > - * @return
> > - *   0 on success, negative errno value otherwise and rte_errno is set.
> > - */
> > -static int
> > -mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned
> > long *value) -{
> > -	int ret;
> > -	unsigned long value_ret;
> > -	char value_str[32];
> > -
> > -	ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> > -	if (ret < 0) {
> > -		DEBUG("cannot read %s value from sysfs: %s",
> > -		      name, strerror(rte_errno));
> > -		return ret;
> > -	}
> > -	value_str[ret] = '\0';
> > -	errno = 0;
> > -	value_ret = strtoul(value_str, NULL, 0);
> > -	if (errno) {
> > -		rte_errno = errno;
> > -		DEBUG("invalid %s value `%s': %s", name, value_str,
> > -		      strerror(rte_errno));
> > -		return -rte_errno;
> > -	}
> > -	*value = value_ret;
> > -	return 0;
> > -}
> > -
> > -/**
> > - * Set unsigned long sysfs property.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param[in] name
> > - *   Entry name relative to sysfs path.
> > - * @param value
> > - *   Value to set.
> > - *
> > - * @return
> > - *   0 on success, negative errno value otherwise and rte_errno is set.
> > - */
> > -static int
> > -mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned
> > long value) -{
> > -	int ret;
> > -	MKSTR(value_str, "%lu", value);
> > -
> > -	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> > -	if (ret < 0) {
> > -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> > -		      name, value_str, value, strerror(rte_errno));
> > -		return ret;
> > -	}
> > -	return 0;
> > -}
> > -
> > -/**
> >   * Perform ifreq ioctl() on associated Ethernet device.
> >   *
> >   * @param[in] priv
> > @@ -361,12 +200,12 @@ mlx4_get_mac(struct priv *priv, uint8_t
> > (*mac)[ETHER_ADDR_LEN])  int  mlx4_mtu_get(struct priv *priv, uint16_t
> > *mtu)  {
> > -	unsigned long ulong_mtu = 0;
> > -	int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
> > +	struct ifreq request;
> > +	int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
> >
> >  	if (ret)
> >  		return ret;
> > -	*mtu = ulong_mtu;
> > +	*mtu = request.ifr_mtu;
> >  	return 0;
> >  }
> >
> > @@ -385,20 +224,13 @@ int
> >  mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)  {
> >  	struct priv *priv = dev->data->dev_private;
> > -	uint16_t new_mtu;
> > -	int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
> > +	struct ifreq request = { .ifr_mtu = mtu, };
> > +	int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
> >
> >  	if (ret)
> >  		return ret;
> > -	ret = mlx4_mtu_get(priv, &new_mtu);
> > -	if (ret)
> > -		return ret;
> > -	if (new_mtu == mtu) {
> > -		priv->mtu = mtu;
> > -		return 0;
> > -	}
> > -	rte_errno = EINVAL;
> > -	return -rte_errno;
> > +	priv->mtu = mtu;
> > +	return 0;
> >  }
> >
> >  /**
> > @@ -417,14 +249,14 @@ mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t
> > mtu)  static int  mlx4_set_flags(struct priv *priv, unsigned int keep,
> > unsigned int flags)  {
> > -	unsigned long tmp = 0;
> > -	int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
> > +	struct ifreq request;
> > +	int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
> >
> >  	if (ret)
> >  		return ret;
> > -	tmp &= keep;
> > -	tmp |= (flags & (~keep));
> > -	return mlx4_set_sysfs_ulong(priv, "flags", tmp);
> > +	request.ifr_flags &= keep;
> > +	request.ifr_flags |= flags & ~keep;
> > +	return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
> >  }
> >
> >  /**
> > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > 965c19f21..da44faaf4 100644
> > --- a/drivers/net/mlx5/mlx5.h
> > +++ b/drivers/net/mlx5/mlx5.h
> > @@ -209,8 +209,6 @@ struct priv *mlx5_get_priv(struct rte_eth_dev
> > *dev);  int mlx5_is_secondary(void);  int priv_get_ifname(const struct
> > priv *, char (*)[IF_NAMESIZE]);  int priv_ifreq(const struct priv *,
> > int req, struct ifreq *); -int priv_is_ib_cntr(const char *); -int
> > priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *);  int
> > priv_get_num_vfs(struct priv *, uint16_t *);  int priv_get_mtu(struct
> > priv *, uint16_t *);  int priv_set_flags(struct priv *, unsigned int,
> > unsigned int); diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> > b/drivers/net/mlx5/mlx5_ethdev.c index 666507691..b73cb53df 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -7,6 +7,7 @@
> >
> >  #include <stddef.h>
> >  #include <assert.h>
> > +#include <inttypes.h>
> >  #include <unistd.h>
> >  #include <stdint.h>
> >  #include <stdio.h>
> > @@ -173,181 +174,6 @@ priv_get_ifname(const struct priv *priv, char
> > (*ifname)[IF_NAMESIZE])  }
> >
> >  /**
> > - * Check if the counter is located on ib counters file.
> > - *
> > - * @param[in] cntr
> > - *   Counter name.
> > - *
> > - * @return
> > - *   1 if counter is located on ib counters file , 0 otherwise.
> > - */
> > -int
> > -priv_is_ib_cntr(const char *cntr)
> > -{
> > -	if (!strcmp(cntr, "out_of_buffer"))
> > -		return 1;
> > -	return 0;
> > -}
> > -
> > -/**
> > - * Read from sysfs entry.
> > - *
> > - * @param[in] priv
> > - *   Pointer to private structure.
> > - * @param[in] entry
> > - *   Entry name relative to sysfs path.
> > - * @param[out] buf
> > - *   Data output buffer.
> > - * @param size
> > - *   Buffer size.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -static int
> > -priv_sysfs_read(const struct priv *priv, const char *entry,
> > -		char *buf, size_t size)
> > -{
> > -	char ifname[IF_NAMESIZE];
> > -	FILE *file;
> > -	int ret;
> > -	int err;
> > -
> > -	if (priv_get_ifname(priv, &ifname))
> > -		return -1;
> > -
> > -	if (priv_is_ib_cntr(entry)) {
> > -		MKSTR(path, "%s/ports/1/hw_counters/%s",
> > -		      priv->ibdev_path, entry);
> > -		file = fopen(path, "rb");
> > -	} else {
> > -		MKSTR(path, "%s/device/net/%s/%s",
> > -		      priv->ibdev_path, ifname, entry);
> > -		file = fopen(path, "rb");
> > -	}
> > -	if (file == NULL)
> > -		return -1;
> > -	ret = fread(buf, 1, size, file);
> > -	err = errno;
> > -	if (((size_t)ret < size) && (ferror(file)))
> > -		ret = -1;
> > -	else
> > -		ret = size;
> > -	fclose(file);
> > -	errno = err;
> > -	return ret;
> > -}
> > -
> > -/**
> > - * Write to sysfs entry.
> > - *
> > - * @param[in] priv
> > - *   Pointer to private structure.
> > - * @param[in] entry
> > - *   Entry name relative to sysfs path.
> > - * @param[in] buf
> > - *   Data buffer.
> > - * @param size
> > - *   Buffer size.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -static int
> > -priv_sysfs_write(const struct priv *priv, const char *entry,
> > -		 char *buf, size_t size)
> > -{
> > -	char ifname[IF_NAMESIZE];
> > -	FILE *file;
> > -	int ret;
> > -	int err;
> > -
> > -	if (priv_get_ifname(priv, &ifname))
> > -		return -1;
> > -
> > -	MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname,
> entry);
> > -
> > -	file = fopen(path, "wb");
> > -	if (file == NULL)
> > -		return -1;
> > -	ret = fwrite(buf, 1, size, file);
> > -	err = errno;
> > -	if (((size_t)ret < size) || (ferror(file)))
> > -		ret = -1;
> > -	else
> > -		ret = size;
> > -	fclose(file);
> > -	errno = err;
> > -	return ret;
> > -}
> > -
> > -/**
> > - * Get unsigned long sysfs property.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param[in] name
> > - *   Entry name relative to sysfs path.
> > - * @param[out] value
> > - *   Value output buffer.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -static int
> > -priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned
> > long *value) -{
> > -	int ret;
> > -	unsigned long value_ret;
> > -	char value_str[32];
> > -
> > -	ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> > -	if (ret == -1) {
> > -		DEBUG("cannot read %s value from sysfs: %s",
> > -		      name, strerror(errno));
> > -		return -1;
> > -	}
> > -	value_str[ret] = '\0';
> > -	errno = 0;
> > -	value_ret = strtoul(value_str, NULL, 0);
> > -	if (errno) {
> > -		DEBUG("invalid %s value `%s': %s", name, value_str,
> > -		      strerror(errno));
> > -		return -1;
> > -	}
> > -	*value = value_ret;
> > -	return 0;
> > -}
> > -
> > -/**
> > - * Set unsigned long sysfs property.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param[in] name
> > - *   Entry name relative to sysfs path.
> > - * @param value
> > - *   Value to set.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -static int
> > -priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned
> > long value) -{
> > -	int ret;
> > -	MKSTR(value_str, "%lu", value);
> > -
> > -	ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> > -	if (ret == -1) {
> > -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> > -		      name, value_str, value, strerror(errno));
> > -		return -1;
> > -	}
> > -	return 0;
> > -}
> > -
> > -/**
> >   * Perform ifreq ioctl() on associated Ethernet device.
> >   *
> >   * @param[in] priv
> > @@ -390,20 +216,25 @@ priv_get_num_vfs(struct priv *priv, uint16_t
> > *num_vfs)  {
> >  	/* The sysfs entry name depends on the operating system. */
> >  	const char **name = (const char *[]){
> > -		"device/sriov_numvfs",
> > -		"device/mlx5_num_vfs",
> > +		"sriov_numvfs",
> > +		"mlx5_num_vfs",
> >  		NULL,
> >  	};
> > -	int ret;
> >
> >  	do {
> > -		unsigned long ulong_num_vfs;
> > +		int n;
> > +		FILE *file;
> > +		MKSTR(path, "%s/device/%s", priv->ibdev_path, *name);
> >
> > -		ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs);
> > -		if (!ret)
> > -			*num_vfs = ulong_num_vfs;
> > -	} while (*(++name) && ret);
> > -	return ret;
> > +		file = fopen(path, "rb");
> > +		if (!file)
> > +			continue;
> > +		n = fscanf(file, "%" SCNu16, num_vfs);
> > +		fclose(file);
> > +		if (n == 1)
> > +			return 0;
> > +	} while (*(++name));
> > +	return -1;
> >  }
> >
> >  /**
> > @@ -420,35 +251,12 @@ priv_get_num_vfs(struct priv *priv, uint16_t
> > *num_vfs)  int  priv_get_mtu(struct priv *priv, uint16_t *mtu)  {
> > -	unsigned long ulong_mtu;
> > +	struct ifreq request;
> > +	int ret = priv_ifreq(priv, SIOCGIFMTU, &request);
> >
> > -	if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
> > -		return -1;
> > -	*mtu = ulong_mtu;
> > -	return 0;
> > -}
> > -
> > -/**
> > - * Read device counter from sysfs.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param name
> > - *   Counter name.
> > - * @param[out] cntr
> > - *   Counter output buffer.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -int
> > -priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t
> > *cntr) -{
> > -	unsigned long ulong_ctr;
> > -
> > -	if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
> > -		return -1;
> > -	*cntr = ulong_ctr;
> > +	if (ret)
> > +		return ret;
> > +	*mtu = request.ifr_mtu;
> >  	return 0;
> >  }
> >
> > @@ -466,15 +274,9 @@ priv_get_cntr_sysfs(struct priv *priv, const char
> > *name, uint64_t *cntr)  static int  priv_set_mtu(struct priv *priv,
> > uint16_t mtu)  {
> > -	uint16_t new_mtu;
> > +	struct ifreq request = { .ifr_mtu = mtu, };
> >
> > -	if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
> > -	    priv_get_mtu(priv, &new_mtu))
> > -		return -1;
> > -	if (new_mtu == mtu)
> > -		return 0;
> > -	errno = EINVAL;
> > -	return -1;
> > +	return priv_ifreq(priv, SIOCSIFMTU, &request);
> >  }
> >
> >  /**
> > @@ -493,13 +295,14 @@ priv_set_mtu(struct priv *priv, uint16_t mtu)
> > int  priv_set_flags(struct priv *priv, unsigned int keep, unsigned int
> > flags)  {
> > -	unsigned long tmp;
> > +	struct ifreq request;
> > +	int ret = priv_ifreq(priv, SIOCGIFFLAGS, &request);
> >
> > -	if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
> > -		return -1;
> > -	tmp &= keep;
> > -	tmp |= (flags & (~keep));
> > -	return priv_set_sysfs_ulong(priv, "flags", tmp);
> > +	if (ret)
> > +		return ret;
> > +	request.ifr_flags &= keep;
> > +	request.ifr_flags |= flags & ~keep;
> > +	return priv_ifreq(priv, SIOCSIFFLAGS, &request);
> >  }
> >
> >  /**
> > diff --git a/drivers/net/mlx5/mlx5_stats.c
> > b/drivers/net/mlx5/mlx5_stats.c index 378472a70..eb9c65dcc 100644
> > --- a/drivers/net/mlx5/mlx5_stats.c
> > +++ b/drivers/net/mlx5/mlx5_stats.c
> > @@ -3,8 +3,11 @@
> >   * Copyright 2015 Mellanox.
> >   */
> >
> > +#include <inttypes.h>
> >  #include <linux/sockios.h>
> >  #include <linux/ethtool.h>
> > +#include <stdint.h>
> > +#include <stdio.h>
> >
> >  #include <rte_ethdev_driver.h>
> >  #include <rte_common.h>
> > @@ -19,6 +22,7 @@ struct mlx5_counter_ctrl {
> >  	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
> >  	/* Name of the counter on the device table. */
> >  	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> > +	uint32_t ib:1; /**< Nonzero for IB counters. */
> >  };
> >
> >  static const struct mlx5_counter_ctrl mlx5_counters_init[] = { @@
> > -93,6 +97,7 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[]
> = {
> >  	{
> >  		.dpdk_name = "rx_out_of_buffer",
> >  		.ctr_name = "out_of_buffer",
> > +		.ib = 1,
> >  	},
> >  	{
> >  		.dpdk_name = "tx_packets_phy",
> > @@ -143,13 +148,24 @@ priv_read_dev_counters(struct priv *priv,
> uint64_t *stats)
> >  		return -1;
> >  	}
> >  	for (i = 0; i != xstats_n; ++i) {
> > -		if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
> > -			priv_get_cntr_sysfs(priv,
> > -					    mlx5_counters_init[i].ctr_name,
> > -					    &stats[i]);
> > -		else
> > +		if (mlx5_counters_init[i].ib) {
> > +			FILE *file;
> > +			MKSTR(path, "%s/ports/1/hw_counters/%s",
> > +			      priv->ibdev_path,
> > +			      mlx5_counters_init[i].ctr_name);
> > +
> > +			file = fopen(path, "rb");
> > +			if (file) {
> > +				int n = fscanf(file, "%" SCNu64, &stats[i]);
> > +
> > +				fclose(file);
> > +				if (n != 1)
> > +					stats[i] = 0;
> > +			}
> > +		} else {
> >  			stats[i] = (uint64_t)
> >  				et_stats->data[xstats_ctrl-
> >dev_table_idx[i]];
> > +		}
> >  	}
> >  	return 0;
> >  }
> > @@ -232,7 +248,7 @@ priv_xstats_init(struct priv *priv)
> >  		}
> >  	}
> >  	for (j = 0; j != xstats_n; ++j) {
> > -		if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
> > +		if (mlx5_counters_init[j].ib)
> >  			continue;
> >  		if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
> >  			WARN("counter \"%s\" is not recognized",
> > --
> > 2.11.0

Patch

diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c
index 3bc692731..fbeef16c8 100644
--- a/drivers/net/mlx4/mlx4_ethdev.c
+++ b/drivers/net/mlx4/mlx4_ethdev.c
@@ -132,167 +132,6 @@  mlx4_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
 }
 
 /**
- * Read from sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[out] buf
- *   Data output buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   Number of bytes read on success, negative errno value otherwise and
- *   rte_errno is set.
- */
-static int
-mlx4_sysfs_read(const struct priv *priv, const char *entry,
-		char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-
-	ret = mlx4_get_ifname(priv, &ifname);
-	if (ret)
-		return ret;
-
-	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
-	      ifname, entry);
-
-	file = fopen(path, "rb");
-	if (file == NULL) {
-		rte_errno = errno;
-		return -rte_errno;
-	}
-	ret = fread(buf, 1, size, file);
-	if ((size_t)ret < size && ferror(file)) {
-		rte_errno = EIO;
-		ret = -rte_errno;
-	} else {
-		ret = size;
-	}
-	fclose(file);
-	return ret;
-}
-
-/**
- * Write to sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[in] buf
- *   Data buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   Number of bytes written on success, negative errno value otherwise and
- *   rte_errno is set.
- */
-static int
-mlx4_sysfs_write(const struct priv *priv, const char *entry,
-		 char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-
-	ret = mlx4_get_ifname(priv, &ifname);
-	if (ret)
-		return ret;
-
-	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
-	      ifname, entry);
-
-	file = fopen(path, "wb");
-	if (file == NULL) {
-		rte_errno = errno;
-		return -rte_errno;
-	}
-	ret = fwrite(buf, 1, size, file);
-	if ((size_t)ret < size || ferror(file)) {
-		rte_errno = EIO;
-		ret = -rte_errno;
-	} else {
-		ret = size;
-	}
-	fclose(file);
-	return ret;
-}
-
-/**
- * Get unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param[out] value
- *   Value output buffer.
- *
- * @return
- *   0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
-{
-	int ret;
-	unsigned long value_ret;
-	char value_str[32];
-
-	ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret < 0) {
-		DEBUG("cannot read %s value from sysfs: %s",
-		      name, strerror(rte_errno));
-		return ret;
-	}
-	value_str[ret] = '\0';
-	errno = 0;
-	value_ret = strtoul(value_str, NULL, 0);
-	if (errno) {
-		rte_errno = errno;
-		DEBUG("invalid %s value `%s': %s", name, value_str,
-		      strerror(rte_errno));
-		return -rte_errno;
-	}
-	*value = value_ret;
-	return 0;
-}
-
-/**
- * Set unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param value
- *   Value to set.
- *
- * @return
- *   0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
-{
-	int ret;
-	MKSTR(value_str, "%lu", value);
-
-	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret < 0) {
-		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
-		      name, value_str, value, strerror(rte_errno));
-		return ret;
-	}
-	return 0;
-}
-
-/**
  * Perform ifreq ioctl() on associated Ethernet device.
  *
  * @param[in] priv
@@ -361,12 +200,12 @@  mlx4_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
 int
 mlx4_mtu_get(struct priv *priv, uint16_t *mtu)
 {
-	unsigned long ulong_mtu = 0;
-	int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
+	struct ifreq request;
+	int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
 
 	if (ret)
 		return ret;
-	*mtu = ulong_mtu;
+	*mtu = request.ifr_mtu;
 	return 0;
 }
 
@@ -385,20 +224,13 @@  int
 mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 {
 	struct priv *priv = dev->data->dev_private;
-	uint16_t new_mtu;
-	int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
+	struct ifreq request = { .ifr_mtu = mtu, };
+	int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
 
 	if (ret)
 		return ret;
-	ret = mlx4_mtu_get(priv, &new_mtu);
-	if (ret)
-		return ret;
-	if (new_mtu == mtu) {
-		priv->mtu = mtu;
-		return 0;
-	}
-	rte_errno = EINVAL;
-	return -rte_errno;
+	priv->mtu = mtu;
+	return 0;
 }
 
 /**
@@ -417,14 +249,14 @@  mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 static int
 mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
 {
-	unsigned long tmp = 0;
-	int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
+	struct ifreq request;
+	int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
 
 	if (ret)
 		return ret;
-	tmp &= keep;
-	tmp |= (flags & (~keep));
-	return mlx4_set_sysfs_ulong(priv, "flags", tmp);
+	request.ifr_flags &= keep;
+	request.ifr_flags |= flags & ~keep;
+	return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 965c19f21..da44faaf4 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -209,8 +209,6 @@  struct priv *mlx5_get_priv(struct rte_eth_dev *dev);
 int mlx5_is_secondary(void);
 int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]);
 int priv_ifreq(const struct priv *, int req, struct ifreq *);
-int priv_is_ib_cntr(const char *);
-int priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *);
 int priv_get_num_vfs(struct priv *, uint16_t *);
 int priv_get_mtu(struct priv *, uint16_t *);
 int priv_set_flags(struct priv *, unsigned int, unsigned int);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 666507691..b73cb53df 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -7,6 +7,7 @@ 
 
 #include <stddef.h>
 #include <assert.h>
+#include <inttypes.h>
 #include <unistd.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -173,181 +174,6 @@  priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
 }
 
 /**
- * Check if the counter is located on ib counters file.
- *
- * @param[in] cntr
- *   Counter name.
- *
- * @return
- *   1 if counter is located on ib counters file , 0 otherwise.
- */
-int
-priv_is_ib_cntr(const char *cntr)
-{
-	if (!strcmp(cntr, "out_of_buffer"))
-		return 1;
-	return 0;
-}
-
-/**
- * Read from sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[out] buf
- *   Data output buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_sysfs_read(const struct priv *priv, const char *entry,
-		char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-	int err;
-
-	if (priv_get_ifname(priv, &ifname))
-		return -1;
-
-	if (priv_is_ib_cntr(entry)) {
-		MKSTR(path, "%s/ports/1/hw_counters/%s",
-		      priv->ibdev_path, entry);
-		file = fopen(path, "rb");
-	} else {
-		MKSTR(path, "%s/device/net/%s/%s",
-		      priv->ibdev_path, ifname, entry);
-		file = fopen(path, "rb");
-	}
-	if (file == NULL)
-		return -1;
-	ret = fread(buf, 1, size, file);
-	err = errno;
-	if (((size_t)ret < size) && (ferror(file)))
-		ret = -1;
-	else
-		ret = size;
-	fclose(file);
-	errno = err;
-	return ret;
-}
-
-/**
- * Write to sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[in] buf
- *   Data buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_sysfs_write(const struct priv *priv, const char *entry,
-		 char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-	int err;
-
-	if (priv_get_ifname(priv, &ifname))
-		return -1;
-
-	MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
-
-	file = fopen(path, "wb");
-	if (file == NULL)
-		return -1;
-	ret = fwrite(buf, 1, size, file);
-	err = errno;
-	if (((size_t)ret < size) || (ferror(file)))
-		ret = -1;
-	else
-		ret = size;
-	fclose(file);
-	errno = err;
-	return ret;
-}
-
-/**
- * Get unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param[out] value
- *   Value output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
-{
-	int ret;
-	unsigned long value_ret;
-	char value_str[32];
-
-	ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret == -1) {
-		DEBUG("cannot read %s value from sysfs: %s",
-		      name, strerror(errno));
-		return -1;
-	}
-	value_str[ret] = '\0';
-	errno = 0;
-	value_ret = strtoul(value_str, NULL, 0);
-	if (errno) {
-		DEBUG("invalid %s value `%s': %s", name, value_str,
-		      strerror(errno));
-		return -1;
-	}
-	*value = value_ret;
-	return 0;
-}
-
-/**
- * Set unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param value
- *   Value to set.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
-{
-	int ret;
-	MKSTR(value_str, "%lu", value);
-
-	ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret == -1) {
-		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
-		      name, value_str, value, strerror(errno));
-		return -1;
-	}
-	return 0;
-}
-
-/**
  * Perform ifreq ioctl() on associated Ethernet device.
  *
  * @param[in] priv
@@ -390,20 +216,25 @@  priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs)
 {
 	/* The sysfs entry name depends on the operating system. */
 	const char **name = (const char *[]){
-		"device/sriov_numvfs",
-		"device/mlx5_num_vfs",
+		"sriov_numvfs",
+		"mlx5_num_vfs",
 		NULL,
 	};
-	int ret;
 
 	do {
-		unsigned long ulong_num_vfs;
+		int n;
+		FILE *file;
+		MKSTR(path, "%s/device/%s", priv->ibdev_path, *name);
 
-		ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs);
-		if (!ret)
-			*num_vfs = ulong_num_vfs;
-	} while (*(++name) && ret);
-	return ret;
+		file = fopen(path, "rb");
+		if (!file)
+			continue;
+		n = fscanf(file, "%" SCNu16, num_vfs);
+		fclose(file);
+		if (n == 1)
+			return 0;
+	} while (*(++name));
+	return -1;
 }
 
 /**
@@ -420,35 +251,12 @@  priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs)
 int
 priv_get_mtu(struct priv *priv, uint16_t *mtu)
 {
-	unsigned long ulong_mtu;
+	struct ifreq request;
+	int ret = priv_ifreq(priv, SIOCGIFMTU, &request);
 
-	if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
-		return -1;
-	*mtu = ulong_mtu;
-	return 0;
-}
-
-/**
- * Read device counter from sysfs.
- *
- * @param priv
- *   Pointer to private structure.
- * @param name
- *   Counter name.
- * @param[out] cntr
- *   Counter output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-int
-priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
-{
-	unsigned long ulong_ctr;
-
-	if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
-		return -1;
-	*cntr = ulong_ctr;
+	if (ret)
+		return ret;
+	*mtu = request.ifr_mtu;
 	return 0;
 }
 
@@ -466,15 +274,9 @@  priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
 static int
 priv_set_mtu(struct priv *priv, uint16_t mtu)
 {
-	uint16_t new_mtu;
+	struct ifreq request = { .ifr_mtu = mtu, };
 
-	if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
-	    priv_get_mtu(priv, &new_mtu))
-		return -1;
-	if (new_mtu == mtu)
-		return 0;
-	errno = EINVAL;
-	return -1;
+	return priv_ifreq(priv, SIOCSIFMTU, &request);
 }
 
 /**
@@ -493,13 +295,14 @@  priv_set_mtu(struct priv *priv, uint16_t mtu)
 int
 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
 {
-	unsigned long tmp;
+	struct ifreq request;
+	int ret = priv_ifreq(priv, SIOCGIFFLAGS, &request);
 
-	if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
-		return -1;
-	tmp &= keep;
-	tmp |= (flags & (~keep));
-	return priv_set_sysfs_ulong(priv, "flags", tmp);
+	if (ret)
+		return ret;
+	request.ifr_flags &= keep;
+	request.ifr_flags |= flags & ~keep;
+	return priv_ifreq(priv, SIOCSIFFLAGS, &request);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 378472a70..eb9c65dcc 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -3,8 +3,11 @@ 
  * Copyright 2015 Mellanox.
  */
 
+#include <inttypes.h>
 #include <linux/sockios.h>
 #include <linux/ethtool.h>
+#include <stdint.h>
+#include <stdio.h>
 
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
@@ -19,6 +22,7 @@  struct mlx5_counter_ctrl {
 	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
 	/* Name of the counter on the device table. */
 	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
+	uint32_t ib:1; /**< Nonzero for IB counters. */
 };
 
 static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
@@ -93,6 +97,7 @@  static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
 	{
 		.dpdk_name = "rx_out_of_buffer",
 		.ctr_name = "out_of_buffer",
+		.ib = 1,
 	},
 	{
 		.dpdk_name = "tx_packets_phy",
@@ -143,13 +148,24 @@  priv_read_dev_counters(struct priv *priv, uint64_t *stats)
 		return -1;
 	}
 	for (i = 0; i != xstats_n; ++i) {
-		if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
-			priv_get_cntr_sysfs(priv,
-					    mlx5_counters_init[i].ctr_name,
-					    &stats[i]);
-		else
+		if (mlx5_counters_init[i].ib) {
+			FILE *file;
+			MKSTR(path, "%s/ports/1/hw_counters/%s",
+			      priv->ibdev_path,
+			      mlx5_counters_init[i].ctr_name);
+
+			file = fopen(path, "rb");
+			if (file) {
+				int n = fscanf(file, "%" SCNu64, &stats[i]);
+
+				fclose(file);
+				if (n != 1)
+					stats[i] = 0;
+			}
+		} else {
 			stats[i] = (uint64_t)
 				et_stats->data[xstats_ctrl->dev_table_idx[i]];
+		}
 	}
 	return 0;
 }
@@ -232,7 +248,7 @@  priv_xstats_init(struct priv *priv)
 		}
 	}
 	for (j = 0; j != xstats_n; ++j) {
-		if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
+		if (mlx5_counters_init[j].ib)
 			continue;
 		if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
 			WARN("counter \"%s\" is not recognized",