[dpdk-dev] net/mlx5: support extended statistics

Message ID 1484124942-26279-1-git-send-email-shahafs@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel compilation fail Compilation issues

Commit Message

Shahaf Shuler Jan. 11, 2017, 8:55 a.m. UTC
  Implement xstats_*() DPDK callbacks

Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Signed-off-by: Elad Persiko <eladpe@mellanox.com>
Signed-off-by: Hanoch Haim <hhaim@cisco.com>
---
 drivers/net/mlx5/mlx5.c         |   3 +
 drivers/net/mlx5/mlx5.h         |  12 ++
 drivers/net/mlx5/mlx5_defs.h    |   3 +
 drivers/net/mlx5/mlx5_stats.c   | 342 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_trigger.c |   1 +
 5 files changed, 361 insertions(+)
  

Comments

Adrien Mazarguil Jan. 11, 2017, 4:54 p.m. UTC | #1
Hi Shahaf,

Thanks, I have a few comments, most of them relatively minor. Please see
below.

On Wed, Jan 11, 2017 at 10:55:42AM +0200, Shahaf Shuler wrote:
> Implement xstats_*() DPDK callbacks
> 
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> Signed-off-by: Elad Persiko <eladpe@mellanox.com>
> Signed-off-by: Hanoch Haim <hhaim@cisco.com>
> ---
>  drivers/net/mlx5/mlx5.c         |   3 +
>  drivers/net/mlx5/mlx5.h         |  12 ++
>  drivers/net/mlx5/mlx5_defs.h    |   3 +
>  drivers/net/mlx5/mlx5_stats.c   | 342 ++++++++++++++++++++++++++++++++++++++++
>  drivers/net/mlx5/mlx5_trigger.c |   1 +
>  5 files changed, 361 insertions(+)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index 6293c1f..3359d3c 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -199,6 +199,9 @@
>  	.link_update = mlx5_link_update,
>  	.stats_get = mlx5_stats_get,
>  	.stats_reset = mlx5_stats_reset,
> +	.xstats_get = mlx5_xstats_get,
> +	.xstats_reset = mlx5_xstats_reset,
> +	.xstats_get_names = mlx5_xstats_get_names,
>  	.dev_infos_get = mlx5_dev_infos_get,
>  	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
>  	.vlan_filter_set = mlx5_vlan_filter_set,
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index ee62e04..034a05e 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -89,6 +89,11 @@ enum {
>  	PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,
>  };
>  
> +struct mlx5_xstats_ctrl {
> +	uint64_t shadow[MLX5_MAX_XSTATS];
> +	uint16_t stats_n; /* Number of device stats. */
> +};
> +
>  struct priv {
>  	struct rte_eth_dev *dev; /* Ethernet device. */
>  	struct ibv_context *ctx; /* Verbs context. */
> @@ -142,6 +147,7 @@ struct priv {
>  	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
>  	LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
>  	uint32_t link_speed_capa; /* Link speed capabilities. */
> +	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
>  	rte_spinlock_t lock; /* Lock for control functions. */
>  };
>  
> @@ -250,8 +256,14 @@ int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
>  
>  /* mlx5_stats.c */
>  
> +void priv_xstats_init(struct priv *);
>  void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
>  void mlx5_stats_reset(struct rte_eth_dev *);
> +int mlx5_xstats_get(struct rte_eth_dev *,
> +		    struct rte_eth_xstat *, unsigned int);
> +void mlx5_xstats_reset(struct rte_eth_dev *);
> +int mlx5_xstats_get_names(struct rte_eth_dev *,
> +			  struct rte_eth_xstat_name *, unsigned int);
>  
>  /* mlx5_vlan.c */
>  
> diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
> index b32816e..beabb70 100644
> --- a/drivers/net/mlx5/mlx5_defs.h
> +++ b/drivers/net/mlx5/mlx5_defs.h
> @@ -79,4 +79,7 @@
>  /* Alarm timeout. */
>  #define MLX5_ALARM_TIMEOUT_US 100000
>  
> +/* Maximum number of extended statistics counters. */
> +#define MLX5_MAX_XSTATS 32
> +
>  #endif /* RTE_PMD_MLX5_DEFS_H_ */
> diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
> index f2b5781..08a7656 100644
> --- a/drivers/net/mlx5/mlx5_stats.c
> +++ b/drivers/net/mlx5/mlx5_stats.c
> @@ -31,11 +31,16 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>   */
>  
> +#include <linux/sockios.h>
> +#include <linux/ethtool.h>
> +
>  /* DPDK headers don't like -pedantic. */
>  #ifdef PEDANTIC
>  #pragma GCC diagnostic ignored "-Wpedantic"
>  #endif
>  #include <rte_ethdev.h>
> +#include <rte_common.h>
> +#include <rte_malloc.h>
>  #ifdef PEDANTIC
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> @@ -44,6 +49,269 @@
>  #include "mlx5_rxtx.h"
>  #include "mlx5_defs.h"
>  
> +struct mlx5_counter_ctrl {
> +	/* Name of the counter for dpdk user. */

"for dpdk user" is redundant given the purpose of this API.

> +	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
> +	/* Name of the counter on the device table. */
> +	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> +	/* Index in the device counters table. */
> +	uint16_t dev_table_idx;
> +};
> +
> +static struct mlx5_counter_ctrl mlx5_counters_init[] = {
> +	{
> +		.dpdk_name = "rx_port_unicast_bytes",
> +		.ctr_name = "rx_vport_unicast_bytes",
> +		.dev_table_idx = 0

Please add comma after ".dev_table_idx = 0" (same comment applies to the
following definitions). Note you may leave it out entirely since unspecified
fields are automatically initialized to zero.

> +	},
> +	{
> +		.dpdk_name = "rx_port_multicast_bytes",
> +		.ctr_name = "rx_vport_multicast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_broadcast_bytes",
> +		.ctr_name = "rx_vport_broadcast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_unicast_packets",
> +		.ctr_name = "rx_vport_unicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_multicast_packets",
> +		.ctr_name = "rx_vport_multicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_broadcast_packets",
> +		.ctr_name = "rx_vport_broadcast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_unicast_bytes",
> +		.ctr_name = "tx_vport_unicast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_multicast_bytes",
> +		.ctr_name = "tx_vport_multicast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_broadcast_bytes",
> +		.ctr_name = "tx_vport_broadcast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_unicast_packets",
> +		.ctr_name = "tx_vport_unicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_multicast_packets",
> +		.ctr_name = "tx_vport_multicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_broadcast_packets",
> +		.ctr_name = "tx_vport_broadcast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_wqe_err",
> +		.ctr_name = "rx_wqe_err",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_crc_errors_phy",
> +		.ctr_name = "rx_crc_errors_phy",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_in_range_len_errors_phy",
> +		.ctr_name = "rx_in_range_len_errors_phy",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_symbol_err_phy",
> +		.ctr_name = "rx_symbol_err_phy",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_errors_phy",
> +		.ctr_name = "tx_errors_phy",
> +		.dev_table_idx = 0
> +	},
> +};
> +
> +const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);

Considering this global depends on mlx5_counters_init[] and is only used in
this file, it should be static.

> +
> +/**
> + * Read device counters table.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[out] stats
> + *   Counters table output buffer.
> + *
> + * @return
> + *   0 on success and stats is filled, negative on error.
> + */
> +static int
> +priv_read_dev_counters(struct priv *priv, uint64_t *stats)
> +{
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	struct ifreq ifr;
> +	unsigned int stats_sz = (xstats_ctrl->stats_n * sizeof(uint64_t)) +
> +				 sizeof(struct ethtool_stats);
> +	struct ethtool_stats et_stats[(stats_sz + (
> +				      sizeof(struct ethtool_stats) - 1)) /
> +				      sizeof(struct ethtool_stats)];
> +
> +	et_stats->cmd = ETHTOOL_GSTATS;
> +	et_stats->n_stats = xstats_ctrl->stats_n;
> +	ifr.ifr_data = (caddr_t)et_stats;
> +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> +		WARN("unable to get statistic values");
> +		return -1;
> +	}
> +	for (i = 0; (i != xstats_n) ; ++i)
> +		stats[i] = (uint64_t)
> +			   et_stats->data[mlx5_counters_init[i].dev_table_idx];
> +	return 0;
> +}
> +
> +/**
> + * Init the strcutures to read device counters.

Typo, "strcutures".

> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +void
> +priv_xstats_init(struct priv *priv)
> +{
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	unsigned int j;
> +	char ifname[IF_NAMESIZE];
> +	struct ifreq ifr;
> +	struct ethtool_drvinfo drvinfo;
> +	struct ethtool_gstrings *strings = NULL;
> +	unsigned int dev_stats_n;
> +	unsigned int str_sz;
> +
> +	if (priv_get_ifname(priv, &ifname)) {
> +		WARN("unable to get interface name");
> +		return;
> +	}
> +	/* How many statistics are available. */
> +	drvinfo.cmd = ETHTOOL_GDRVINFO;
> +	ifr.ifr_data = (caddr_t)&drvinfo;
> +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> +		WARN("unable to get driver info");
> +		return;
> +	}
> +	dev_stats_n = drvinfo.n_stats;
> +	if (dev_stats_n < 1) {
> +		WARN("no extended statistics available");
> +		return;
> +	}
> +	xstats_ctrl->stats_n = dev_stats_n;
> +	/* Allocate memory to grab stat names and values. */
> +	str_sz = dev_stats_n * ETH_GSTRING_LEN;
> +	strings = (struct ethtool_gstrings *)
> +		  rte_malloc("xstats_strings",
> +			     str_sz + sizeof(struct ethtool_gstrings), 0);
> +	if (!strings) {
> +		WARN("unable to allocate memory for xstats");
> +		return;
> +	}
> +	strings->cmd = ETHTOOL_GSTRINGS;
> +	strings->string_set = ETH_SS_STATS;
> +	strings->len = dev_stats_n;
> +	ifr.ifr_data = (caddr_t)strings;
> +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> +		WARN("unable to get statistic names");
> +		goto free;
> +	}
> +	for (j = 0; (j != xstats_n); ++j)
> +		mlx5_counters_init[j].dev_table_idx = dev_stats_n;

As a global, mlx5_counters_init[] affects all DPDK ports that use the mlx5
PMD simultaneously. Are you sure calling priv_xstats_init() from
mlx5_dev_start() is safe?

I think this global should be const and initialized only once.

> +	for (i = 0; (i != dev_stats_n); ++i) {
> +		const char *curr_string = (const char *)
> +			&strings->data[i * ETH_GSTRING_LEN];
> +
> +		for (j = 0; (j != xstats_n); ++j) {
> +			if (!strcmp(mlx5_counters_init[j].ctr_name,
> +				    curr_string)) {
> +				mlx5_counters_init[j].dev_table_idx = i;

The above comment also applies here. The PMD instance should allocate its
own mapping as a priv field if there is no other choice. You could perhaps
make it part of the mlx5_xstats_ctrl structure.

> +				break;
> +			}
> +		}
> +	}
> +	for (j = 0; (j != xstats_n); ++j) {
> +		if (mlx5_counters_init[j].dev_table_idx >= dev_stats_n) {
> +			WARN("Counters are not recognized");

Displaying the name of the counter that is not recognized could help users
determine what's doing on. Please make sure all info/warning/error messages
are helpful enough, e.g.:

 testpmd> show port xstats 0
 ###### NIC extended statistics for port 0 
 mlx5: Counters are not recognized
 testpmd> # what?

> +			goto free;
> +		}
> +	}
> +	/* Copy to shadow at first time. */
> +	assert(xstats_n <= MLX5_MAX_XSTATS);
> +	priv_read_dev_counters(priv, xstats_ctrl->shadow);
> +free:
> +	rte_free(strings);
> +}
> +
> +/**
> + * Get device extended statistics.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[out] stats
> + *   Pointer to rte extended stats table.
> + *
> + * @return
> + *   Number of extended stats on success and stats is filled,
> + *   nagative on error.

Typo, "nagative".

> + */
> +static int
> +priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats)
> +{
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	uint64_t counters[xstats_n];
> +
> +	if (priv_read_dev_counters(priv, counters) < 0)
> +		return -1;
> +	for (i = 0; (i != xstats_n) ; ++i) {
> +		stats[i].id = i;
> +		stats[i].value = (uint64_t)
> +				 (counters[i] - xstats_ctrl->shadow[i]);

I understand the purpose of the shadow array in this context, however to me
"shadow" implies some sort of caching is taking place. I think "init" or
"base" (as the base value or something) would make its purpose clearer.

> +	}
> +	return xstats_n;
> +}
> +
> +/**
> + * Reset device extended statistics.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +static void
> +priv_xstats_reset(struct priv *priv)
> +{
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	uint64_t counters[xstats_n];
> +
> +	if (priv_read_dev_counters(priv, counters) < 0)
> +		return;
> +	for (i = 0; (i != xstats_n) ; ++i)
> +		xstats_ctrl->shadow[i] = counters[i];
> +}
> +
>  /**
>   * DPDK callback to get device statistics.
>   *
> @@ -142,3 +410,77 @@
>  #endif
>  	priv_unlock(priv);
>  }
> +
> +/**
> + * DPDK callback to get extended device statistics.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param[out] stats
> + *   Stats table output buffer.
> + * @param n
> + *   The size of the stats table.
> + *
> + * @return
> + *   Number of xstats on success, negative on failure.
> + */
> +int
> +mlx5_xstats_get(struct rte_eth_dev *dev,
> +		struct rte_eth_xstat *stats, unsigned int n)
> +{
> +	struct priv *priv = mlx5_get_priv(dev);
> +	int ret = xstats_n;
> +
> +	if (n >= xstats_n && stats) {
> +		priv_lock(priv);
> +		ret = priv_xstats_get(priv, stats);
> +		priv_unlock(priv);
> +	}
> +	return ret;
> +}
> +
> +/**
> + * DPDK callback to clear device extended statistics.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + */
> +void
> +mlx5_xstats_reset(struct rte_eth_dev *dev)
> +{
> +	struct priv *priv = mlx5_get_priv(dev);
> +
> +	priv_lock(priv);
> +	priv_xstats_reset(priv);
> +	priv_unlock(priv);
> +}
> +
> +/**
> + * DPDK callback to retrieve names of extended device statistics
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param[out] xstats_names
> + *   Block of memory to insert names into

Let's call "block of memory" a "buffer"? There is also a missing period at
the end of this sentence. 

> + * @param size
> + *   Number of names.

"num" (or "n" as in the API) would make more sense than "size" here.

> + *
> + * @return
> + *   Number of xstats names.
> + */
> +int
> +mlx5_xstats_get_names(struct rte_eth_dev *dev,
> +		struct rte_eth_xstat_name *xstats_names, unsigned int size)
> +{
> +	struct priv *priv = mlx5_get_priv(dev);
> +	unsigned int i;
> +
> +	if (size >= xstats_n && xstats_names) {
> +		priv_lock(priv);
> +		for (i = 0; (i != xstats_n) ; ++i)
> +			strcpy(xstats_names[i].name,
> +			       mlx5_counters_init[i].dpdk_name);
> +		priv_unlock(priv);
> +	}
> +	return xstats_n;
> +}
> diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
> index 2399243..30addd2 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -91,6 +91,7 @@
>  		priv_fdir_enable(priv);
>  	priv_dev_interrupt_handler_install(priv, dev);
>  	err = priv_flow_start(priv);
> +	priv_xstats_init(priv);
>  	priv_unlock(priv);
>  	return -err;
>  }
> -- 
> 1.8.3.1
  
Hanoch Haim (hhaim) Jan. 12, 2017, 9:56 a.m. UTC | #2
Hi Shahaf, 

1) I would add *all* the hw counters to PF

$ethtool -S enp135s0f0
NIC statistics:
     rx_packets: 54
     rx_bytes: 3240
     tx_packets: 138
     tx_bytes: 8280
     tx_tso_packets: 0
     tx_tso_bytes: 0
     tx_tso_inner_packets: 0
     tx_tso_inner_bytes: 0
     rx_lro_packets: 0
     rx_lro_bytes: 0
     rx_csum_unnecessary: 0
     rx_csum_none: 54
     rx_csum_complete: 0
     rx_csum_unnecessary_inner: 0
     tx_csum_partial: 0
     tx_csum_partial_inner: 0
     tx_csum_none: 138
     tx_queue_stopped: 0
     tx_queue_wake: 0
     tx_queue_dropped: 0
     rx_sw_lro_aggregated: 0
     rx_sw_lro_flushed: 0
     rx_sw_lro_no_desc: 0
     rx_wqe_err: 0
     rx_cqe_compress_pkts: 0
     rx_cqe_compress_blks: 0
     rx_mpwqe_filler: 0
     rx_mpwqe_frag: 0
     link_down_events_phy: 0
     rx_out_of_buffer: 0
     rx_vport_unicast_packets: 26126070627
     rx_vport_unicast_bytes: 1328543705864986
     tx_vport_unicast_packets: 687072959078
     tx_vport_unicast_bytes: 3339188884659844
     rx_vport_multicast_packets: 0
     rx_vport_multicast_bytes: 0
     tx_vport_multicast_packets: 0
     tx_vport_multicast_bytes: 0
     rx_vport_broadcast_packets: 4543553
     rx_vport_broadcast_bytes: 272614872
     tx_vport_broadcast_packets: 4543637
     tx_vport_broadcast_bytes: 272619912
     rx_vport_rdma_unicast_packets: 430134
     rx_vport_rdma_unicast_bytes: 32690184
     tx_vport_rdma_unicast_packets: 0
     tx_vport_rdma_unicast_bytes: 0
     rx_vport_rdma_multicast_packets: 0
     rx_vport_rdma_multicast_bytes: 0
     tx_vport_rdma_multicast_packets: 0
     tx_vport_rdma_multicast_bytes: 0
     tx_packets_phy: 11756775169850
     rx_packets_phy: 2246825269635
     rx_crc_errors_phy: 0
     tx_bytes_phy: 3404211507236837
     rx_bytes_phy: 1342896754459327
     tx_multicast_phy: 0
     tx_broadcast_phy: 4552688
     rx_multicast_phy: 0
     rx_broadcast_phy: 4552604
     rx_in_range_len_errors_phy: 0
     rx_out_of_range_len_phy: 0
     rx_oversize_pkts_phy: 1135778701
     rx_symbol_err_phy: 0
     tx_mac_control_phy: 16579953
     rx_mac_control_phy: 16670966
     rx_unsupported_op_phy: 0
     rx_pause_ctrl_phy: 16670966
     tx_pause_ctrl_phy: 16579953
     rx_discards_phy: 12244164648
     tx_discards_phy: 0
     tx_errors_phy: 0
     rx_undersize_pkts_phy: 0
     rx_fragments_phy: 0
     rx_jabbers_phy: 0
     rx_64_bytes_phy: 911948813404
     rx_65_to_127_bytes_phy: 335422614022
     rx_128_to_255_bytes_phy: 153383903056
     rx_256_to_511_bytes_phy: 80464779917
     rx_512_to_1023_bytes_phy: 184356224678
     rx_1024_to_1518_bytes_phy: 313733619509
     rx_1519_to_2047_bytes_phy: 235669614881
     rx_2048_to_4095_bytes_phy: 4051420173
     rx_4096_to_8191_bytes_phy: 1011852034
     rx_8192_to_10239_bytes_phy: 27710327250
     time_since_last_clear_phy: 2998621833
     symbol_errors_phy: 0
     sync_headers_errors_phy: 0
     edpl/bip_errors_lane0_phy: 0
     edpl/bip_errors_lane1_phy: 0
     edpl/bip_errors_lane2_phy: 0
     edpl/bip_errors_lane3_phy: 0
     fc_corrected_blocks_lane0_phy: 0
     fc_corrected_blocks_lane1_phy: 0
     fc_corrected_blocks_lane2_phy: 0
     fc_corrected_blocks_lane3_phy: 0
     fc_uncorrectable_lane0_phy: 0
     fc_uncorrectable_lane1_phy: 0
     fc_uncorrectable_lane2_phy: 0
     fc_uncorrectable_lane3_phy: 0
     rs_corrected_blocks_phy: 0
     rs_uncorrectable_blocks_phy: 0
     rs_no_errors_blocks_phy: 58566832662545
     rs_single_error_blocks_phy: 0
     rs_corrected_symbols_total_phy: 0
     rs_corrected_symbols_lane0_phy: 0
     rs_corrected_symbols_lane1_phy: 0
     rs_corrected_symbols_lane2_phy: 0
     rs_corrected_symbols_lane3_phy: 0
     rx_prio0_bytes: 1342896754459327
     rx_prio0_packets: 2234564434021
     tx_prio0_bytes: 3404210446119845
     tx_prio0_packets: 11756758589897
     rx_prio1_bytes: 0
     rx_prio1_packets: 0
     tx_prio1_bytes: 0
     tx_prio1_packets: 0
     rx_prio2_bytes: 0
     rx_prio2_packets: 0
     tx_prio2_bytes: 0
     tx_prio2_packets: 0
     rx_prio3_bytes: 0
     rx_prio3_packets: 0
     tx_prio3_bytes: 0
     tx_prio3_packets: 0
     rx_prio4_bytes: 0
     rx_prio4_packets: 0
     tx_prio4_bytes: 0
     tx_prio4_packets: 0
     rx_prio5_bytes: 0
     rx_prio5_packets: 0
     tx_prio5_bytes: 0
     tx_prio5_packets: 0
     rx_prio6_bytes: 0
     rx_prio6_packets: 0
     tx_prio6_bytes: 0
     tx_prio6_packets: 0
     rx_prio7_bytes: 0
     rx_prio7_packets: 0
     tx_prio7_bytes: 0
     tx_prio7_packets: 0
     rx0_packets: 54
     rx0_bytes: 3240
     rx0_csum_complete

2) Add the right HW counters to the VF 

tx28_nop: 0
     tx28_queue_stopped: 0
     tx28_queue_wake: 0
     tx28_queue_dropped: 0
     tx29_packets: 0
     tx29_bytes: 0
     tx29_tso_packets: 0
     tx29_tso_bytes: 0
     tx29_tso_inner_packets: 0
     tx29_tso_inner_bytes: 0
     tx29_csum_none: 0
     tx29_csum_partial: 0
     tx29_csum_partial_inner: 0
     tx29_nop: 0
     tx29_queue_stopped: 0

3) Implement the stats_get/reset using HW counters 

.stats_get = mlx5_stats_get,
.stats_reset = mlx5_stats_reset,


Thanks,
Hanoh


-----Original Message-----
From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com] 
Sent: Wednesday, January 11, 2017 6:55 PM
To: Shahaf Shuler
Cc: dev@dpdk.org; Elad Persiko; Hanoch Haim (hhaim)
Subject: Re: [PATCH] net/mlx5: support extended statistics

Hi Shahaf,

Thanks, I have a few comments, most of them relatively minor. Please see below.

On Wed, Jan 11, 2017 at 10:55:42AM +0200, Shahaf Shuler wrote:
> Implement xstats_*() DPDK callbacks
> 
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> Signed-off-by: Elad Persiko <eladpe@mellanox.com>
> Signed-off-by: Hanoch Haim <hhaim@cisco.com>
> ---
>  drivers/net/mlx5/mlx5.c         |   3 +
>  drivers/net/mlx5/mlx5.h         |  12 ++
>  drivers/net/mlx5/mlx5_defs.h    |   3 +
>  drivers/net/mlx5/mlx5_stats.c   | 342 ++++++++++++++++++++++++++++++++++++++++
>  drivers/net/mlx5/mlx5_trigger.c |   1 +
>  5 files changed, 361 insertions(+)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 
> 6293c1f..3359d3c 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -199,6 +199,9 @@
>  	.link_update = mlx5_link_update,
>  	.stats_get = mlx5_stats_get,
>  	.stats_reset = mlx5_stats_reset,
> +	.xstats_get = mlx5_xstats_get,
> +	.xstats_reset = mlx5_xstats_reset,
> +	.xstats_get_names = mlx5_xstats_get_names,
>  	.dev_infos_get = mlx5_dev_infos_get,
>  	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
>  	.vlan_filter_set = mlx5_vlan_filter_set, diff --git 
> a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 
> ee62e04..034a05e 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -89,6 +89,11 @@ enum {
>  	PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,  };
>  
> +struct mlx5_xstats_ctrl {
> +	uint64_t shadow[MLX5_MAX_XSTATS];
> +	uint16_t stats_n; /* Number of device stats. */ };
> +
>  struct priv {
>  	struct rte_eth_dev *dev; /* Ethernet device. */
>  	struct ibv_context *ctx; /* Verbs context. */ @@ -142,6 +147,7 @@ 
> struct priv {
>  	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
>  	LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
>  	uint32_t link_speed_capa; /* Link speed capabilities. */
> +	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
>  	rte_spinlock_t lock; /* Lock for control functions. */  };
>  
> @@ -250,8 +256,14 @@ int mlx5_dev_rss_reta_update(struct rte_eth_dev 
> *,
>  
>  /* mlx5_stats.c */
>  
> +void priv_xstats_init(struct priv *);
>  void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);  
> void mlx5_stats_reset(struct rte_eth_dev *);
> +int mlx5_xstats_get(struct rte_eth_dev *,
> +		    struct rte_eth_xstat *, unsigned int); void 
> +mlx5_xstats_reset(struct rte_eth_dev *); int 
> +mlx5_xstats_get_names(struct rte_eth_dev *,
> +			  struct rte_eth_xstat_name *, unsigned int);
>  
>  /* mlx5_vlan.c */
>  
> diff --git a/drivers/net/mlx5/mlx5_defs.h 
> b/drivers/net/mlx5/mlx5_defs.h index b32816e..beabb70 100644
> --- a/drivers/net/mlx5/mlx5_defs.h
> +++ b/drivers/net/mlx5/mlx5_defs.h
> @@ -79,4 +79,7 @@
>  /* Alarm timeout. */
>  #define MLX5_ALARM_TIMEOUT_US 100000
>  
> +/* Maximum number of extended statistics counters. */ #define 
> +MLX5_MAX_XSTATS 32
> +
>  #endif /* RTE_PMD_MLX5_DEFS_H_ */
> diff --git a/drivers/net/mlx5/mlx5_stats.c 
> b/drivers/net/mlx5/mlx5_stats.c index f2b5781..08a7656 100644
> --- a/drivers/net/mlx5/mlx5_stats.c
> +++ b/drivers/net/mlx5/mlx5_stats.c
> @@ -31,11 +31,16 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>   */
>  
> +#include <linux/sockios.h>
> +#include <linux/ethtool.h>
> +
>  /* DPDK headers don't like -pedantic. */  #ifdef PEDANTIC  #pragma 
> GCC diagnostic ignored "-Wpedantic"
>  #endif
>  #include <rte_ethdev.h>
> +#include <rte_common.h>
> +#include <rte_malloc.h>
>  #ifdef PEDANTIC
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> @@ -44,6 +49,269 @@
>  #include "mlx5_rxtx.h"
>  #include "mlx5_defs.h"
>  
> +struct mlx5_counter_ctrl {
> +	/* Name of the counter for dpdk user. */

"for dpdk user" is redundant given the purpose of this API.

> +	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
> +	/* Name of the counter on the device table. */
> +	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> +	/* Index in the device counters table. */
> +	uint16_t dev_table_idx;
> +};
> +
> +static struct mlx5_counter_ctrl mlx5_counters_init[] = {
> +	{
> +		.dpdk_name = "rx_port_unicast_bytes",
> +		.ctr_name = "rx_vport_unicast_bytes",
> +		.dev_table_idx = 0

Please add comma after ".dev_table_idx = 0" (same comment applies to the following definitions). Note you may leave it out entirely since unspecified fields are automatically initialized to zero.

> +	},
> +	{
> +		.dpdk_name = "rx_port_multicast_bytes",
> +		.ctr_name = "rx_vport_multicast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_broadcast_bytes",
> +		.ctr_name = "rx_vport_broadcast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_unicast_packets",
> +		.ctr_name = "rx_vport_unicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_multicast_packets",
> +		.ctr_name = "rx_vport_multicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_broadcast_packets",
> +		.ctr_name = "rx_vport_broadcast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_unicast_bytes",
> +		.ctr_name = "tx_vport_unicast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_multicast_bytes",
> +		.ctr_name = "tx_vport_multicast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_broadcast_bytes",
> +		.ctr_name = "tx_vport_broadcast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_unicast_packets",
> +		.ctr_name = "tx_vport_unicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_multicast_packets",
> +		.ctr_name = "tx_vport_multicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_broadcast_packets",
> +		.ctr_name = "tx_vport_broadcast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_wqe_err",
> +		.ctr_name = "rx_wqe_err",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_crc_errors_phy",
> +		.ctr_name = "rx_crc_errors_phy",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_in_range_len_errors_phy",
> +		.ctr_name = "rx_in_range_len_errors_phy",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_symbol_err_phy",
> +		.ctr_name = "rx_symbol_err_phy",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_errors_phy",
> +		.ctr_name = "tx_errors_phy",
> +		.dev_table_idx = 0
> +	},
> +};
> +
> +const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);

Considering this global depends on mlx5_counters_init[] and is only used in this file, it should be static.

> +
> +/**
> + * Read device counters table.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[out] stats
> + *   Counters table output buffer.
> + *
> + * @return
> + *   0 on success and stats is filled, negative on error.
> + */
> +static int
> +priv_read_dev_counters(struct priv *priv, uint64_t *stats) {
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	struct ifreq ifr;
> +	unsigned int stats_sz = (xstats_ctrl->stats_n * sizeof(uint64_t)) +
> +				 sizeof(struct ethtool_stats);
> +	struct ethtool_stats et_stats[(stats_sz + (
> +				      sizeof(struct ethtool_stats) - 1)) /
> +				      sizeof(struct ethtool_stats)];
> +
> +	et_stats->cmd = ETHTOOL_GSTATS;
> +	et_stats->n_stats = xstats_ctrl->stats_n;
> +	ifr.ifr_data = (caddr_t)et_stats;
> +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> +		WARN("unable to get statistic values");
> +		return -1;
> +	}
> +	for (i = 0; (i != xstats_n) ; ++i)
> +		stats[i] = (uint64_t)
> +			   et_stats->data[mlx5_counters_init[i].dev_table_idx];
> +	return 0;
> +}
> +
> +/**
> + * Init the strcutures to read device counters.

Typo, "strcutures".

> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +void
> +priv_xstats_init(struct priv *priv)
> +{
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	unsigned int j;
> +	char ifname[IF_NAMESIZE];
> +	struct ifreq ifr;
> +	struct ethtool_drvinfo drvinfo;
> +	struct ethtool_gstrings *strings = NULL;
> +	unsigned int dev_stats_n;
> +	unsigned int str_sz;
> +
> +	if (priv_get_ifname(priv, &ifname)) {
> +		WARN("unable to get interface name");
> +		return;
> +	}
> +	/* How many statistics are available. */
> +	drvinfo.cmd = ETHTOOL_GDRVINFO;
> +	ifr.ifr_data = (caddr_t)&drvinfo;
> +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> +		WARN("unable to get driver info");
> +		return;
> +	}
> +	dev_stats_n = drvinfo.n_stats;
> +	if (dev_stats_n < 1) {
> +		WARN("no extended statistics available");
> +		return;
> +	}
> +	xstats_ctrl->stats_n = dev_stats_n;
> +	/* Allocate memory to grab stat names and values. */
> +	str_sz = dev_stats_n * ETH_GSTRING_LEN;
> +	strings = (struct ethtool_gstrings *)
> +		  rte_malloc("xstats_strings",
> +			     str_sz + sizeof(struct ethtool_gstrings), 0);
> +	if (!strings) {
> +		WARN("unable to allocate memory for xstats");
> +		return;
> +	}
> +	strings->cmd = ETHTOOL_GSTRINGS;
> +	strings->string_set = ETH_SS_STATS;
> +	strings->len = dev_stats_n;
> +	ifr.ifr_data = (caddr_t)strings;
> +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> +		WARN("unable to get statistic names");
> +		goto free;
> +	}
> +	for (j = 0; (j != xstats_n); ++j)
> +		mlx5_counters_init[j].dev_table_idx = dev_stats_n;

As a global, mlx5_counters_init[] affects all DPDK ports that use the mlx5 PMD simultaneously. Are you sure calling priv_xstats_init() from
mlx5_dev_start() is safe?

I think this global should be const and initialized only once.

> +	for (i = 0; (i != dev_stats_n); ++i) {
> +		const char *curr_string = (const char *)
> +			&strings->data[i * ETH_GSTRING_LEN];
> +
> +		for (j = 0; (j != xstats_n); ++j) {
> +			if (!strcmp(mlx5_counters_init[j].ctr_name,
> +				    curr_string)) {
> +				mlx5_counters_init[j].dev_table_idx = i;

The above comment also applies here. The PMD instance should allocate its own mapping as a priv field if there is no other choice. You could perhaps make it part of the mlx5_xstats_ctrl structure.

> +				break;
> +			}
> +		}
> +	}
> +	for (j = 0; (j != xstats_n); ++j) {
> +		if (mlx5_counters_init[j].dev_table_idx >= dev_stats_n) {
> +			WARN("Counters are not recognized");

Displaying the name of the counter that is not recognized could help users determine what's doing on. Please make sure all info/warning/error messages are helpful enough, e.g.:

 testpmd> show port xstats 0
 ###### NIC extended statistics for port 0
 mlx5: Counters are not recognized
 testpmd> # what?

> +			goto free;
> +		}
> +	}
> +	/* Copy to shadow at first time. */
> +	assert(xstats_n <= MLX5_MAX_XSTATS);
> +	priv_read_dev_counters(priv, xstats_ctrl->shadow);
> +free:
> +	rte_free(strings);
> +}
> +
> +/**
> + * Get device extended statistics.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[out] stats
> + *   Pointer to rte extended stats table.
> + *
> + * @return
> + *   Number of extended stats on success and stats is filled,
> + *   nagative on error.

Typo, "nagative".

> + */
> +static int
> +priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats) {
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	uint64_t counters[xstats_n];
> +
> +	if (priv_read_dev_counters(priv, counters) < 0)
> +		return -1;
> +	for (i = 0; (i != xstats_n) ; ++i) {
> +		stats[i].id = i;
> +		stats[i].value = (uint64_t)
> +				 (counters[i] - xstats_ctrl->shadow[i]);

I understand the purpose of the shadow array in this context, however to me "shadow" implies some sort of caching is taking place. I think "init" or "base" (as the base value or something) would make its purpose clearer.

> +	}
> +	return xstats_n;
> +}
> +
> +/**
> + * Reset device extended statistics.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +static void
> +priv_xstats_reset(struct priv *priv)
> +{
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	uint64_t counters[xstats_n];
> +
> +	if (priv_read_dev_counters(priv, counters) < 0)
> +		return;
> +	for (i = 0; (i != xstats_n) ; ++i)
> +		xstats_ctrl->shadow[i] = counters[i]; }
> +
>  /**
>   * DPDK callback to get device statistics.
>   *
> @@ -142,3 +410,77 @@
>  #endif
>  	priv_unlock(priv);
>  }
> +
> +/**
> + * DPDK callback to get extended device statistics.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param[out] stats
> + *   Stats table output buffer.
> + * @param n
> + *   The size of the stats table.
> + *
> + * @return
> + *   Number of xstats on success, negative on failure.
> + */
> +int
> +mlx5_xstats_get(struct rte_eth_dev *dev,
> +		struct rte_eth_xstat *stats, unsigned int n) {
> +	struct priv *priv = mlx5_get_priv(dev);
> +	int ret = xstats_n;
> +
> +	if (n >= xstats_n && stats) {
> +		priv_lock(priv);
> +		ret = priv_xstats_get(priv, stats);
> +		priv_unlock(priv);
> +	}
> +	return ret;
> +}
> +
> +/**
> + * DPDK callback to clear device extended statistics.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + */
> +void
> +mlx5_xstats_reset(struct rte_eth_dev *dev) {
> +	struct priv *priv = mlx5_get_priv(dev);
> +
> +	priv_lock(priv);
> +	priv_xstats_reset(priv);
> +	priv_unlock(priv);
> +}
> +
> +/**
> + * DPDK callback to retrieve names of extended device statistics
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param[out] xstats_names
> + *   Block of memory to insert names into

Let's call "block of memory" a "buffer"? There is also a missing period at the end of this sentence. 

> + * @param size
> + *   Number of names.

"num" (or "n" as in the API) would make more sense than "size" here.

> + *
> + * @return
> + *   Number of xstats names.
> + */
> +int
> +mlx5_xstats_get_names(struct rte_eth_dev *dev,
> +		struct rte_eth_xstat_name *xstats_names, unsigned int size) {
> +	struct priv *priv = mlx5_get_priv(dev);
> +	unsigned int i;
> +
> +	if (size >= xstats_n && xstats_names) {
> +		priv_lock(priv);
> +		for (i = 0; (i != xstats_n) ; ++i)
> +			strcpy(xstats_names[i].name,
> +			       mlx5_counters_init[i].dpdk_name);
> +		priv_unlock(priv);
> +	}
> +	return xstats_n;
> +}
> diff --git a/drivers/net/mlx5/mlx5_trigger.c 
> b/drivers/net/mlx5/mlx5_trigger.c index 2399243..30addd2 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -91,6 +91,7 @@
>  		priv_fdir_enable(priv);
>  	priv_dev_interrupt_handler_install(priv, dev);
>  	err = priv_flow_start(priv);
> +	priv_xstats_init(priv);
>  	priv_unlock(priv);
>  	return -err;
>  }
> --
> 1.8.3.1

--
Adrien Mazarguil
6WIND
  
Shahaf Shuler Jan. 16, 2017, 1:32 p.m. UTC | #3
--Shahaf

-----Original Message-----
From: Hanoch Haim (hhaim) [mailto:hhaim@cisco.com] 
Sent: Thursday, January 12, 2017 11:57 AM
To: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Shahaf Shuler <shahafs@mellanox.com>
Cc: dev@dpdk.org; Elad Persiko <eladpe@mellanox.com>
Subject: RE: [PATCH] net/mlx5: support extended statistics

Hi Shahaf, 

1) I would add *all* the hw counters to PF

$ethtool -S enp135s0f0
NIC statistics:
     rx_packets: 54
     rx_bytes: 3240
     tx_packets: 138
     tx_bytes: 8280
     tx_tso_packets: 0
     tx_tso_bytes: 0
     tx_tso_inner_packets: 0
     tx_tso_inner_bytes: 0
     rx_lro_packets: 0
     rx_lro_bytes: 0
     rx_csum_unnecessary: 0
     rx_csum_none: 54
     rx_csum_complete: 0
     rx_csum_unnecessary_inner: 0
     tx_csum_partial: 0
     tx_csum_partial_inner: 0
     tx_csum_none: 138
     tx_queue_stopped: 0
     tx_queue_wake: 0
     tx_queue_dropped: 0
     rx_sw_lro_aggregated: 0
     rx_sw_lro_flushed: 0
     rx_sw_lro_no_desc: 0
     rx_wqe_err: 0
     rx_cqe_compress_pkts: 0
     rx_cqe_compress_blks: 0
     rx_mpwqe_filler: 0
     rx_mpwqe_frag: 0
     link_down_events_phy: 0
     rx_out_of_buffer: 0
     rx_vport_unicast_packets: 26126070627
     rx_vport_unicast_bytes: 1328543705864986
     tx_vport_unicast_packets: 687072959078
     tx_vport_unicast_bytes: 3339188884659844
     rx_vport_multicast_packets: 0
     rx_vport_multicast_bytes: 0
     tx_vport_multicast_packets: 0
     tx_vport_multicast_bytes: 0
     rx_vport_broadcast_packets: 4543553
     rx_vport_broadcast_bytes: 272614872
     tx_vport_broadcast_packets: 4543637
     tx_vport_broadcast_bytes: 272619912
     rx_vport_rdma_unicast_packets: 430134
     rx_vport_rdma_unicast_bytes: 32690184
     tx_vport_rdma_unicast_packets: 0
     tx_vport_rdma_unicast_bytes: 0
     rx_vport_rdma_multicast_packets: 0
     rx_vport_rdma_multicast_bytes: 0
     tx_vport_rdma_multicast_packets: 0
     tx_vport_rdma_multicast_bytes: 0
     tx_packets_phy: 11756775169850
     rx_packets_phy: 2246825269635
     rx_crc_errors_phy: 0
     tx_bytes_phy: 3404211507236837
     rx_bytes_phy: 1342896754459327
     tx_multicast_phy: 0
     tx_broadcast_phy: 4552688
     rx_multicast_phy: 0
     rx_broadcast_phy: 4552604
     rx_in_range_len_errors_phy: 0
     rx_out_of_range_len_phy: 0
     rx_oversize_pkts_phy: 1135778701
     rx_symbol_err_phy: 0
     tx_mac_control_phy: 16579953
     rx_mac_control_phy: 16670966
     rx_unsupported_op_phy: 0
     rx_pause_ctrl_phy: 16670966
     tx_pause_ctrl_phy: 16579953
     rx_discards_phy: 12244164648
     tx_discards_phy: 0
     tx_errors_phy: 0
     rx_undersize_pkts_phy: 0
     rx_fragments_phy: 0
     rx_jabbers_phy: 0
     rx_64_bytes_phy: 911948813404
     rx_65_to_127_bytes_phy: 335422614022
     rx_128_to_255_bytes_phy: 153383903056
     rx_256_to_511_bytes_phy: 80464779917
     rx_512_to_1023_bytes_phy: 184356224678
     rx_1024_to_1518_bytes_phy: 313733619509
     rx_1519_to_2047_bytes_phy: 235669614881
     rx_2048_to_4095_bytes_phy: 4051420173
     rx_4096_to_8191_bytes_phy: 1011852034
     rx_8192_to_10239_bytes_phy: 27710327250
     time_since_last_clear_phy: 2998621833
     symbol_errors_phy: 0
     sync_headers_errors_phy: 0
     edpl/bip_errors_lane0_phy: 0
     edpl/bip_errors_lane1_phy: 0
     edpl/bip_errors_lane2_phy: 0
     edpl/bip_errors_lane3_phy: 0
     fc_corrected_blocks_lane0_phy: 0
     fc_corrected_blocks_lane1_phy: 0
     fc_corrected_blocks_lane2_phy: 0
     fc_corrected_blocks_lane3_phy: 0
     fc_uncorrectable_lane0_phy: 0
     fc_uncorrectable_lane1_phy: 0
     fc_uncorrectable_lane2_phy: 0
     fc_uncorrectable_lane3_phy: 0
     rs_corrected_blocks_phy: 0
     rs_uncorrectable_blocks_phy: 0
     rs_no_errors_blocks_phy: 58566832662545
     rs_single_error_blocks_phy: 0
     rs_corrected_symbols_total_phy: 0
     rs_corrected_symbols_lane0_phy: 0
     rs_corrected_symbols_lane1_phy: 0
     rs_corrected_symbols_lane2_phy: 0
     rs_corrected_symbols_lane3_phy: 0
     rx_prio0_bytes: 1342896754459327
     rx_prio0_packets: 2234564434021
     tx_prio0_bytes: 3404210446119845
     tx_prio0_packets: 11756758589897
     rx_prio1_bytes: 0
     rx_prio1_packets: 0
     tx_prio1_bytes: 0
     tx_prio1_packets: 0
     rx_prio2_bytes: 0
     rx_prio2_packets: 0
     tx_prio2_bytes: 0
     tx_prio2_packets: 0
     rx_prio3_bytes: 0
     rx_prio3_packets: 0
     tx_prio3_bytes: 0
     tx_prio3_packets: 0
     rx_prio4_bytes: 0
     rx_prio4_packets: 0
     tx_prio4_bytes: 0
     tx_prio4_packets: 0
     rx_prio5_bytes: 0
     rx_prio5_packets: 0
     tx_prio5_bytes: 0
     tx_prio5_packets: 0
     rx_prio6_bytes: 0
     rx_prio6_packets: 0
     tx_prio6_bytes: 0
     tx_prio6_packets: 0
     rx_prio7_bytes: 0
     rx_prio7_packets: 0
     tx_prio7_bytes: 0
     tx_prio7_packets: 0
     rx0_packets: 54
     rx0_bytes: 3240
     rx0_csum_complete
[Shahaf Shuler] we will consider to insert new counters on future patches 

2) Add the right HW counters to the VF 

tx28_nop: 0
     tx28_queue_stopped: 0
     tx28_queue_wake: 0
     tx28_queue_dropped: 0
     tx29_packets: 0
     tx29_bytes: 0
     tx29_tso_packets: 0
     tx29_tso_bytes: 0
     tx29_tso_inner_packets: 0
     tx29_tso_inner_bytes: 0
     tx29_csum_none: 0
     tx29_csum_partial: 0
     tx29_csum_partial_inner: 0
     tx29_nop: 0
     tx29_queue_stopped: 0
[Shahaf Shuler] the counters are valid for both VF/PF

3) Implement the stats_get/reset using HW counters 

.stats_get = mlx5_stats_get,
.stats_reset = mlx5_stats_reset,
[Shahaf Shuler] it will be supported by future commits. 


Thanks,
Hanoh


-----Original Message-----
From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
Sent: Wednesday, January 11, 2017 6:55 PM
To: Shahaf Shuler
Cc: dev@dpdk.org; Elad Persiko; Hanoch Haim (hhaim)
Subject: Re: [PATCH] net/mlx5: support extended statistics

Hi Shahaf,

Thanks, I have a few comments, most of them relatively minor. Please see below.

On Wed, Jan 11, 2017 at 10:55:42AM +0200, Shahaf Shuler wrote:
> Implement xstats_*() DPDK callbacks
> 
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> Signed-off-by: Elad Persiko <eladpe@mellanox.com>
> Signed-off-by: Hanoch Haim <hhaim@cisco.com>
> ---
>  drivers/net/mlx5/mlx5.c         |   3 +
>  drivers/net/mlx5/mlx5.h         |  12 ++
>  drivers/net/mlx5/mlx5_defs.h    |   3 +
>  drivers/net/mlx5/mlx5_stats.c   | 342 ++++++++++++++++++++++++++++++++++++++++
>  drivers/net/mlx5/mlx5_trigger.c |   1 +
>  5 files changed, 361 insertions(+)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 
> 6293c1f..3359d3c 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -199,6 +199,9 @@
>  	.link_update = mlx5_link_update,
>  	.stats_get = mlx5_stats_get,
>  	.stats_reset = mlx5_stats_reset,
> +	.xstats_get = mlx5_xstats_get,
> +	.xstats_reset = mlx5_xstats_reset,
> +	.xstats_get_names = mlx5_xstats_get_names,
>  	.dev_infos_get = mlx5_dev_infos_get,
>  	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
>  	.vlan_filter_set = mlx5_vlan_filter_set, diff --git 
> a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 
> ee62e04..034a05e 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -89,6 +89,11 @@ enum {
>  	PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,  };
>  
> +struct mlx5_xstats_ctrl {
> +	uint64_t shadow[MLX5_MAX_XSTATS];
> +	uint16_t stats_n; /* Number of device stats. */ };
> +
>  struct priv {
>  	struct rte_eth_dev *dev; /* Ethernet device. */
>  	struct ibv_context *ctx; /* Verbs context. */ @@ -142,6 +147,7 @@ 
> struct priv {
>  	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
>  	LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
>  	uint32_t link_speed_capa; /* Link speed capabilities. */
> +	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
>  	rte_spinlock_t lock; /* Lock for control functions. */  };
>  
> @@ -250,8 +256,14 @@ int mlx5_dev_rss_reta_update(struct rte_eth_dev 
> *,
>  
>  /* mlx5_stats.c */
>  
> +void priv_xstats_init(struct priv *);
>  void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *); 
> void mlx5_stats_reset(struct rte_eth_dev *);
> +int mlx5_xstats_get(struct rte_eth_dev *,
> +		    struct rte_eth_xstat *, unsigned int); void 
> +mlx5_xstats_reset(struct rte_eth_dev *); int 
> +mlx5_xstats_get_names(struct rte_eth_dev *,
> +			  struct rte_eth_xstat_name *, unsigned int);
>  
>  /* mlx5_vlan.c */
>  
> diff --git a/drivers/net/mlx5/mlx5_defs.h 
> b/drivers/net/mlx5/mlx5_defs.h index b32816e..beabb70 100644
> --- a/drivers/net/mlx5/mlx5_defs.h
> +++ b/drivers/net/mlx5/mlx5_defs.h
> @@ -79,4 +79,7 @@
>  /* Alarm timeout. */
>  #define MLX5_ALARM_TIMEOUT_US 100000
>  
> +/* Maximum number of extended statistics counters. */ #define 
> +MLX5_MAX_XSTATS 32
> +
>  #endif /* RTE_PMD_MLX5_DEFS_H_ */
> diff --git a/drivers/net/mlx5/mlx5_stats.c 
> b/drivers/net/mlx5/mlx5_stats.c index f2b5781..08a7656 100644
> --- a/drivers/net/mlx5/mlx5_stats.c
> +++ b/drivers/net/mlx5/mlx5_stats.c
> @@ -31,11 +31,16 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>   */
>  
> +#include <linux/sockios.h>
> +#include <linux/ethtool.h>
> +
>  /* DPDK headers don't like -pedantic. */  #ifdef PEDANTIC  #pragma 
> GCC diagnostic ignored "-Wpedantic"
>  #endif
>  #include <rte_ethdev.h>
> +#include <rte_common.h>
> +#include <rte_malloc.h>
>  #ifdef PEDANTIC
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> @@ -44,6 +49,269 @@
>  #include "mlx5_rxtx.h"
>  #include "mlx5_defs.h"
>  
> +struct mlx5_counter_ctrl {
> +	/* Name of the counter for dpdk user. */

"for dpdk user" is redundant given the purpose of this API.

> +	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
> +	/* Name of the counter on the device table. */
> +	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> +	/* Index in the device counters table. */
> +	uint16_t dev_table_idx;
> +};
> +
> +static struct mlx5_counter_ctrl mlx5_counters_init[] = {
> +	{
> +		.dpdk_name = "rx_port_unicast_bytes",
> +		.ctr_name = "rx_vport_unicast_bytes",
> +		.dev_table_idx = 0

Please add comma after ".dev_table_idx = 0" (same comment applies to the following definitions). Note you may leave it out entirely since unspecified fields are automatically initialized to zero.

> +	},
> +	{
> +		.dpdk_name = "rx_port_multicast_bytes",
> +		.ctr_name = "rx_vport_multicast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_broadcast_bytes",
> +		.ctr_name = "rx_vport_broadcast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_unicast_packets",
> +		.ctr_name = "rx_vport_unicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_multicast_packets",
> +		.ctr_name = "rx_vport_multicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_port_broadcast_packets",
> +		.ctr_name = "rx_vport_broadcast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_unicast_bytes",
> +		.ctr_name = "tx_vport_unicast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_multicast_bytes",
> +		.ctr_name = "tx_vport_multicast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_broadcast_bytes",
> +		.ctr_name = "tx_vport_broadcast_bytes",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_unicast_packets",
> +		.ctr_name = "tx_vport_unicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_multicast_packets",
> +		.ctr_name = "tx_vport_multicast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_port_broadcast_packets",
> +		.ctr_name = "tx_vport_broadcast_packets",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_wqe_err",
> +		.ctr_name = "rx_wqe_err",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_crc_errors_phy",
> +		.ctr_name = "rx_crc_errors_phy",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_in_range_len_errors_phy",
> +		.ctr_name = "rx_in_range_len_errors_phy",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "rx_symbol_err_phy",
> +		.ctr_name = "rx_symbol_err_phy",
> +		.dev_table_idx = 0
> +	},
> +	{
> +		.dpdk_name = "tx_errors_phy",
> +		.ctr_name = "tx_errors_phy",
> +		.dev_table_idx = 0
> +	},
> +};
> +
> +const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);

Considering this global depends on mlx5_counters_init[] and is only used in this file, it should be static.

> +
> +/**
> + * Read device counters table.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[out] stats
> + *   Counters table output buffer.
> + *
> + * @return
> + *   0 on success and stats is filled, negative on error.
> + */
> +static int
> +priv_read_dev_counters(struct priv *priv, uint64_t *stats) {
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	struct ifreq ifr;
> +	unsigned int stats_sz = (xstats_ctrl->stats_n * sizeof(uint64_t)) +
> +				 sizeof(struct ethtool_stats);
> +	struct ethtool_stats et_stats[(stats_sz + (
> +				      sizeof(struct ethtool_stats) - 1)) /
> +				      sizeof(struct ethtool_stats)];
> +
> +	et_stats->cmd = ETHTOOL_GSTATS;
> +	et_stats->n_stats = xstats_ctrl->stats_n;
> +	ifr.ifr_data = (caddr_t)et_stats;
> +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> +		WARN("unable to get statistic values");
> +		return -1;
> +	}
> +	for (i = 0; (i != xstats_n) ; ++i)
> +		stats[i] = (uint64_t)
> +			   et_stats->data[mlx5_counters_init[i].dev_table_idx];
> +	return 0;
> +}
> +
> +/**
> + * Init the strcutures to read device counters.

Typo, "strcutures".

> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +void
> +priv_xstats_init(struct priv *priv)
> +{
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	unsigned int j;
> +	char ifname[IF_NAMESIZE];
> +	struct ifreq ifr;
> +	struct ethtool_drvinfo drvinfo;
> +	struct ethtool_gstrings *strings = NULL;
> +	unsigned int dev_stats_n;
> +	unsigned int str_sz;
> +
> +	if (priv_get_ifname(priv, &ifname)) {
> +		WARN("unable to get interface name");
> +		return;
> +	}
> +	/* How many statistics are available. */
> +	drvinfo.cmd = ETHTOOL_GDRVINFO;
> +	ifr.ifr_data = (caddr_t)&drvinfo;
> +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> +		WARN("unable to get driver info");
> +		return;
> +	}
> +	dev_stats_n = drvinfo.n_stats;
> +	if (dev_stats_n < 1) {
> +		WARN("no extended statistics available");
> +		return;
> +	}
> +	xstats_ctrl->stats_n = dev_stats_n;
> +	/* Allocate memory to grab stat names and values. */
> +	str_sz = dev_stats_n * ETH_GSTRING_LEN;
> +	strings = (struct ethtool_gstrings *)
> +		  rte_malloc("xstats_strings",
> +			     str_sz + sizeof(struct ethtool_gstrings), 0);
> +	if (!strings) {
> +		WARN("unable to allocate memory for xstats");
> +		return;
> +	}
> +	strings->cmd = ETHTOOL_GSTRINGS;
> +	strings->string_set = ETH_SS_STATS;
> +	strings->len = dev_stats_n;
> +	ifr.ifr_data = (caddr_t)strings;
> +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> +		WARN("unable to get statistic names");
> +		goto free;
> +	}
> +	for (j = 0; (j != xstats_n); ++j)
> +		mlx5_counters_init[j].dev_table_idx = dev_stats_n;

As a global, mlx5_counters_init[] affects all DPDK ports that use the mlx5 PMD simultaneously. Are you sure calling priv_xstats_init() from
mlx5_dev_start() is safe?

I think this global should be const and initialized only once.

> +	for (i = 0; (i != dev_stats_n); ++i) {
> +		const char *curr_string = (const char *)
> +			&strings->data[i * ETH_GSTRING_LEN];
> +
> +		for (j = 0; (j != xstats_n); ++j) {
> +			if (!strcmp(mlx5_counters_init[j].ctr_name,
> +				    curr_string)) {
> +				mlx5_counters_init[j].dev_table_idx = i;

The above comment also applies here. The PMD instance should allocate its own mapping as a priv field if there is no other choice. You could perhaps make it part of the mlx5_xstats_ctrl structure.

> +				break;
> +			}
> +		}
> +	}
> +	for (j = 0; (j != xstats_n); ++j) {
> +		if (mlx5_counters_init[j].dev_table_idx >= dev_stats_n) {
> +			WARN("Counters are not recognized");

Displaying the name of the counter that is not recognized could help users determine what's doing on. Please make sure all info/warning/error messages are helpful enough, e.g.:

 testpmd> show port xstats 0
 ###### NIC extended statistics for port 0
 mlx5: Counters are not recognized
 testpmd> # what?

> +			goto free;
> +		}
> +	}
> +	/* Copy to shadow at first time. */
> +	assert(xstats_n <= MLX5_MAX_XSTATS);
> +	priv_read_dev_counters(priv, xstats_ctrl->shadow);
> +free:
> +	rte_free(strings);
> +}
> +
> +/**
> + * Get device extended statistics.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param[out] stats
> + *   Pointer to rte extended stats table.
> + *
> + * @return
> + *   Number of extended stats on success and stats is filled,
> + *   nagative on error.

Typo, "nagative".

> + */
> +static int
> +priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats) {
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	uint64_t counters[xstats_n];
> +
> +	if (priv_read_dev_counters(priv, counters) < 0)
> +		return -1;
> +	for (i = 0; (i != xstats_n) ; ++i) {
> +		stats[i].id = i;
> +		stats[i].value = (uint64_t)
> +				 (counters[i] - xstats_ctrl->shadow[i]);

I understand the purpose of the shadow array in this context, however to me "shadow" implies some sort of caching is taking place. I think "init" or "base" (as the base value or something) would make its purpose clearer.

> +	}
> +	return xstats_n;
> +}
> +
> +/**
> + * Reset device extended statistics.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +static void
> +priv_xstats_reset(struct priv *priv)
> +{
> +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> +	unsigned int i;
> +	uint64_t counters[xstats_n];
> +
> +	if (priv_read_dev_counters(priv, counters) < 0)
> +		return;
> +	for (i = 0; (i != xstats_n) ; ++i)
> +		xstats_ctrl->shadow[i] = counters[i]; }
> +
>  /**
>   * DPDK callback to get device statistics.
>   *
> @@ -142,3 +410,77 @@
>  #endif
>  	priv_unlock(priv);
>  }
> +
> +/**
> + * DPDK callback to get extended device statistics.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param[out] stats
> + *   Stats table output buffer.
> + * @param n
> + *   The size of the stats table.
> + *
> + * @return
> + *   Number of xstats on success, negative on failure.
> + */
> +int
> +mlx5_xstats_get(struct rte_eth_dev *dev,
> +		struct rte_eth_xstat *stats, unsigned int n) {
> +	struct priv *priv = mlx5_get_priv(dev);
> +	int ret = xstats_n;
> +
> +	if (n >= xstats_n && stats) {
> +		priv_lock(priv);
> +		ret = priv_xstats_get(priv, stats);
> +		priv_unlock(priv);
> +	}
> +	return ret;
> +}
> +
> +/**
> + * DPDK callback to clear device extended statistics.
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + */
> +void
> +mlx5_xstats_reset(struct rte_eth_dev *dev) {
> +	struct priv *priv = mlx5_get_priv(dev);
> +
> +	priv_lock(priv);
> +	priv_xstats_reset(priv);
> +	priv_unlock(priv);
> +}
> +
> +/**
> + * DPDK callback to retrieve names of extended device statistics
> + *
> + * @param dev
> + *   Pointer to Ethernet device structure.
> + * @param[out] xstats_names
> + *   Block of memory to insert names into

Let's call "block of memory" a "buffer"? There is also a missing period at the end of this sentence. 

> + * @param size
> + *   Number of names.

"num" (or "n" as in the API) would make more sense than "size" here.

> + *
> + * @return
> + *   Number of xstats names.
> + */
> +int
> +mlx5_xstats_get_names(struct rte_eth_dev *dev,
> +		struct rte_eth_xstat_name *xstats_names, unsigned int size) {
> +	struct priv *priv = mlx5_get_priv(dev);
> +	unsigned int i;
> +
> +	if (size >= xstats_n && xstats_names) {
> +		priv_lock(priv);
> +		for (i = 0; (i != xstats_n) ; ++i)
> +			strcpy(xstats_names[i].name,
> +			       mlx5_counters_init[i].dpdk_name);
> +		priv_unlock(priv);
> +	}
> +	return xstats_n;
> +}
> diff --git a/drivers/net/mlx5/mlx5_trigger.c 
> b/drivers/net/mlx5/mlx5_trigger.c index 2399243..30addd2 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -91,6 +91,7 @@
>  		priv_fdir_enable(priv);
>  	priv_dev_interrupt_handler_install(priv, dev);
>  	err = priv_flow_start(priv);
> +	priv_xstats_init(priv);
>  	priv_unlock(priv);
>  	return -err;
>  }
> --
> 1.8.3.1

--
Adrien Mazarguil
6WIND
  
Thomas Monjalon Jan. 16, 2017, 2:38 p.m. UTC | #4
Hi Shahaf,

I am not specifically interested in this thread, but if I was,
it is not sure I would make the effort to try to understand your
message (starting with a signature, an useless original header, and
some long useless listings).
I won't try to find which special sign you use to mark your reply.

We read a lot of mails in this mailing list so we must comply to
some standard efficient formatting:
- remove the useless lines
- quote original lines with one more "> "
- reply below the regarding section

Maybe you need to tune or change your mail software, I'm sorry
but it is worth to do it.
Note: some other contributors have currently the same issue as you.
Note 2: I said nothing about Outlook ;)


---- for reference, below ----

2017-01-16 13:32, Shahaf Shuler:
> 
> --Shahaf
> 
> -----Original Message-----
> From: Hanoch Haim (hhaim) [mailto:hhaim@cisco.com] 
> Sent: Thursday, January 12, 2017 11:57 AM
> To: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Shahaf Shuler <shahafs@mellanox.com>
> Cc: dev@dpdk.org; Elad Persiko <eladpe@mellanox.com>
> Subject: RE: [PATCH] net/mlx5: support extended statistics
> 
> Hi Shahaf, 
> 
> 1) I would add *all* the hw counters to PF
> 
> $ethtool -S enp135s0f0
> NIC statistics:
>      rx_packets: 54
>      rx_bytes: 3240
>      tx_packets: 138
>      tx_bytes: 8280
>      tx_tso_packets: 0
>      tx_tso_bytes: 0
>      tx_tso_inner_packets: 0
>      tx_tso_inner_bytes: 0
>      rx_lro_packets: 0
>      rx_lro_bytes: 0
>      rx_csum_unnecessary: 0
>      rx_csum_none: 54
>      rx_csum_complete: 0
>      rx_csum_unnecessary_inner: 0
>      tx_csum_partial: 0
>      tx_csum_partial_inner: 0
>      tx_csum_none: 138
>      tx_queue_stopped: 0
>      tx_queue_wake: 0
>      tx_queue_dropped: 0
>      rx_sw_lro_aggregated: 0
>      rx_sw_lro_flushed: 0
>      rx_sw_lro_no_desc: 0
>      rx_wqe_err: 0
>      rx_cqe_compress_pkts: 0
>      rx_cqe_compress_blks: 0
>      rx_mpwqe_filler: 0
>      rx_mpwqe_frag: 0
>      link_down_events_phy: 0
>      rx_out_of_buffer: 0
>      rx_vport_unicast_packets: 26126070627
>      rx_vport_unicast_bytes: 1328543705864986
>      tx_vport_unicast_packets: 687072959078
>      tx_vport_unicast_bytes: 3339188884659844
>      rx_vport_multicast_packets: 0
>      rx_vport_multicast_bytes: 0
>      tx_vport_multicast_packets: 0
>      tx_vport_multicast_bytes: 0
>      rx_vport_broadcast_packets: 4543553
>      rx_vport_broadcast_bytes: 272614872
>      tx_vport_broadcast_packets: 4543637
>      tx_vport_broadcast_bytes: 272619912
>      rx_vport_rdma_unicast_packets: 430134
>      rx_vport_rdma_unicast_bytes: 32690184
>      tx_vport_rdma_unicast_packets: 0
>      tx_vport_rdma_unicast_bytes: 0
>      rx_vport_rdma_multicast_packets: 0
>      rx_vport_rdma_multicast_bytes: 0
>      tx_vport_rdma_multicast_packets: 0
>      tx_vport_rdma_multicast_bytes: 0
>      tx_packets_phy: 11756775169850
>      rx_packets_phy: 2246825269635
>      rx_crc_errors_phy: 0
>      tx_bytes_phy: 3404211507236837
>      rx_bytes_phy: 1342896754459327
>      tx_multicast_phy: 0
>      tx_broadcast_phy: 4552688
>      rx_multicast_phy: 0
>      rx_broadcast_phy: 4552604
>      rx_in_range_len_errors_phy: 0
>      rx_out_of_range_len_phy: 0
>      rx_oversize_pkts_phy: 1135778701
>      rx_symbol_err_phy: 0
>      tx_mac_control_phy: 16579953
>      rx_mac_control_phy: 16670966
>      rx_unsupported_op_phy: 0
>      rx_pause_ctrl_phy: 16670966
>      tx_pause_ctrl_phy: 16579953
>      rx_discards_phy: 12244164648
>      tx_discards_phy: 0
>      tx_errors_phy: 0
>      rx_undersize_pkts_phy: 0
>      rx_fragments_phy: 0
>      rx_jabbers_phy: 0
>      rx_64_bytes_phy: 911948813404
>      rx_65_to_127_bytes_phy: 335422614022
>      rx_128_to_255_bytes_phy: 153383903056
>      rx_256_to_511_bytes_phy: 80464779917
>      rx_512_to_1023_bytes_phy: 184356224678
>      rx_1024_to_1518_bytes_phy: 313733619509
>      rx_1519_to_2047_bytes_phy: 235669614881
>      rx_2048_to_4095_bytes_phy: 4051420173
>      rx_4096_to_8191_bytes_phy: 1011852034
>      rx_8192_to_10239_bytes_phy: 27710327250
>      time_since_last_clear_phy: 2998621833
>      symbol_errors_phy: 0
>      sync_headers_errors_phy: 0
>      edpl/bip_errors_lane0_phy: 0
>      edpl/bip_errors_lane1_phy: 0
>      edpl/bip_errors_lane2_phy: 0
>      edpl/bip_errors_lane3_phy: 0
>      fc_corrected_blocks_lane0_phy: 0
>      fc_corrected_blocks_lane1_phy: 0
>      fc_corrected_blocks_lane2_phy: 0
>      fc_corrected_blocks_lane3_phy: 0
>      fc_uncorrectable_lane0_phy: 0
>      fc_uncorrectable_lane1_phy: 0
>      fc_uncorrectable_lane2_phy: 0
>      fc_uncorrectable_lane3_phy: 0
>      rs_corrected_blocks_phy: 0
>      rs_uncorrectable_blocks_phy: 0
>      rs_no_errors_blocks_phy: 58566832662545
>      rs_single_error_blocks_phy: 0
>      rs_corrected_symbols_total_phy: 0
>      rs_corrected_symbols_lane0_phy: 0
>      rs_corrected_symbols_lane1_phy: 0
>      rs_corrected_symbols_lane2_phy: 0
>      rs_corrected_symbols_lane3_phy: 0
>      rx_prio0_bytes: 1342896754459327
>      rx_prio0_packets: 2234564434021
>      tx_prio0_bytes: 3404210446119845
>      tx_prio0_packets: 11756758589897
>      rx_prio1_bytes: 0
>      rx_prio1_packets: 0
>      tx_prio1_bytes: 0
>      tx_prio1_packets: 0
>      rx_prio2_bytes: 0
>      rx_prio2_packets: 0
>      tx_prio2_bytes: 0
>      tx_prio2_packets: 0
>      rx_prio3_bytes: 0
>      rx_prio3_packets: 0
>      tx_prio3_bytes: 0
>      tx_prio3_packets: 0
>      rx_prio4_bytes: 0
>      rx_prio4_packets: 0
>      tx_prio4_bytes: 0
>      tx_prio4_packets: 0
>      rx_prio5_bytes: 0
>      rx_prio5_packets: 0
>      tx_prio5_bytes: 0
>      tx_prio5_packets: 0
>      rx_prio6_bytes: 0
>      rx_prio6_packets: 0
>      tx_prio6_bytes: 0
>      tx_prio6_packets: 0
>      rx_prio7_bytes: 0
>      rx_prio7_packets: 0
>      tx_prio7_bytes: 0
>      tx_prio7_packets: 0
>      rx0_packets: 54
>      rx0_bytes: 3240
>      rx0_csum_complete
> [Shahaf Shuler] we will consider to insert new counters on future patches 
> 
> 2) Add the right HW counters to the VF 
> 
> tx28_nop: 0
>      tx28_queue_stopped: 0
>      tx28_queue_wake: 0
>      tx28_queue_dropped: 0
>      tx29_packets: 0
>      tx29_bytes: 0
>      tx29_tso_packets: 0
>      tx29_tso_bytes: 0
>      tx29_tso_inner_packets: 0
>      tx29_tso_inner_bytes: 0
>      tx29_csum_none: 0
>      tx29_csum_partial: 0
>      tx29_csum_partial_inner: 0
>      tx29_nop: 0
>      tx29_queue_stopped: 0
> [Shahaf Shuler] the counters are valid for both VF/PF
> 
> 3) Implement the stats_get/reset using HW counters 
> 
> .stats_get = mlx5_stats_get,
> .stats_reset = mlx5_stats_reset,
> [Shahaf Shuler] it will be supported by future commits. 
> 
> 
> Thanks,
> Hanoh
> 
> 
> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> Sent: Wednesday, January 11, 2017 6:55 PM
> To: Shahaf Shuler
> Cc: dev@dpdk.org; Elad Persiko; Hanoch Haim (hhaim)
> Subject: Re: [PATCH] net/mlx5: support extended statistics
> 
> Hi Shahaf,
> 
> Thanks, I have a few comments, most of them relatively minor. Please see below.
> 
> On Wed, Jan 11, 2017 at 10:55:42AM +0200, Shahaf Shuler wrote:
> > Implement xstats_*() DPDK callbacks
> > 
> > Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> > Signed-off-by: Elad Persiko <eladpe@mellanox.com>
> > Signed-off-by: Hanoch Haim <hhaim@cisco.com>
> > ---
> >  drivers/net/mlx5/mlx5.c         |   3 +
> >  drivers/net/mlx5/mlx5.h         |  12 ++
> >  drivers/net/mlx5/mlx5_defs.h    |   3 +
> >  drivers/net/mlx5/mlx5_stats.c   | 342 ++++++++++++++++++++++++++++++++++++++++
> >  drivers/net/mlx5/mlx5_trigger.c |   1 +
> >  5 files changed, 361 insertions(+)
> > 
> > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 
> > 6293c1f..3359d3c 100644
> > --- a/drivers/net/mlx5/mlx5.c
> > +++ b/drivers/net/mlx5/mlx5.c
> > @@ -199,6 +199,9 @@
> >  	.link_update = mlx5_link_update,
> >  	.stats_get = mlx5_stats_get,
> >  	.stats_reset = mlx5_stats_reset,
> > +	.xstats_get = mlx5_xstats_get,
> > +	.xstats_reset = mlx5_xstats_reset,
> > +	.xstats_get_names = mlx5_xstats_get_names,
> >  	.dev_infos_get = mlx5_dev_infos_get,
> >  	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
> >  	.vlan_filter_set = mlx5_vlan_filter_set, diff --git 
> > a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 
> > ee62e04..034a05e 100644
> > --- a/drivers/net/mlx5/mlx5.h
> > +++ b/drivers/net/mlx5/mlx5.h
> > @@ -89,6 +89,11 @@ enum {
> >  	PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,  };
> >  
> > +struct mlx5_xstats_ctrl {
> > +	uint64_t shadow[MLX5_MAX_XSTATS];
> > +	uint16_t stats_n; /* Number of device stats. */ };
> > +
> >  struct priv {
> >  	struct rte_eth_dev *dev; /* Ethernet device. */
> >  	struct ibv_context *ctx; /* Verbs context. */ @@ -142,6 +147,7 @@ 
> > struct priv {
> >  	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
> >  	LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
> >  	uint32_t link_speed_capa; /* Link speed capabilities. */
> > +	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
> >  	rte_spinlock_t lock; /* Lock for control functions. */  };
> >  
> > @@ -250,8 +256,14 @@ int mlx5_dev_rss_reta_update(struct rte_eth_dev 
> > *,
> >  
> >  /* mlx5_stats.c */
> >  
> > +void priv_xstats_init(struct priv *);
> >  void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *); 
> > void mlx5_stats_reset(struct rte_eth_dev *);
> > +int mlx5_xstats_get(struct rte_eth_dev *,
> > +		    struct rte_eth_xstat *, unsigned int); void 
> > +mlx5_xstats_reset(struct rte_eth_dev *); int 
> > +mlx5_xstats_get_names(struct rte_eth_dev *,
> > +			  struct rte_eth_xstat_name *, unsigned int);
> >  
> >  /* mlx5_vlan.c */
> >  
> > diff --git a/drivers/net/mlx5/mlx5_defs.h 
> > b/drivers/net/mlx5/mlx5_defs.h index b32816e..beabb70 100644
> > --- a/drivers/net/mlx5/mlx5_defs.h
> > +++ b/drivers/net/mlx5/mlx5_defs.h
> > @@ -79,4 +79,7 @@
> >  /* Alarm timeout. */
> >  #define MLX5_ALARM_TIMEOUT_US 100000
> >  
> > +/* Maximum number of extended statistics counters. */ #define 
> > +MLX5_MAX_XSTATS 32
> > +
> >  #endif /* RTE_PMD_MLX5_DEFS_H_ */
> > diff --git a/drivers/net/mlx5/mlx5_stats.c 
> > b/drivers/net/mlx5/mlx5_stats.c index f2b5781..08a7656 100644
> > --- a/drivers/net/mlx5/mlx5_stats.c
> > +++ b/drivers/net/mlx5/mlx5_stats.c
> > @@ -31,11 +31,16 @@
> >   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >   */
> >  
> > +#include <linux/sockios.h>
> > +#include <linux/ethtool.h>
> > +
> >  /* DPDK headers don't like -pedantic. */  #ifdef PEDANTIC  #pragma 
> > GCC diagnostic ignored "-Wpedantic"
> >  #endif
> >  #include <rte_ethdev.h>
> > +#include <rte_common.h>
> > +#include <rte_malloc.h>
> >  #ifdef PEDANTIC
> >  #pragma GCC diagnostic error "-Wpedantic"
> >  #endif
> > @@ -44,6 +49,269 @@
> >  #include "mlx5_rxtx.h"
> >  #include "mlx5_defs.h"
> >  
> > +struct mlx5_counter_ctrl {
> > +	/* Name of the counter for dpdk user. */
> 
> "for dpdk user" is redundant given the purpose of this API.
> 
> > +	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
> > +	/* Name of the counter on the device table. */
> > +	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> > +	/* Index in the device counters table. */
> > +	uint16_t dev_table_idx;
> > +};
> > +
> > +static struct mlx5_counter_ctrl mlx5_counters_init[] = {
> > +	{
> > +		.dpdk_name = "rx_port_unicast_bytes",
> > +		.ctr_name = "rx_vport_unicast_bytes",
> > +		.dev_table_idx = 0
> 
> Please add comma after ".dev_table_idx = 0" (same comment applies to the following definitions). Note you may leave it out entirely since unspecified fields are automatically initialized to zero.
> 
> > +	},
> > +	{
> > +		.dpdk_name = "rx_port_multicast_bytes",
> > +		.ctr_name = "rx_vport_multicast_bytes",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "rx_port_broadcast_bytes",
> > +		.ctr_name = "rx_vport_broadcast_bytes",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "rx_port_unicast_packets",
> > +		.ctr_name = "rx_vport_unicast_packets",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "rx_port_multicast_packets",
> > +		.ctr_name = "rx_vport_multicast_packets",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "rx_port_broadcast_packets",
> > +		.ctr_name = "rx_vport_broadcast_packets",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "tx_port_unicast_bytes",
> > +		.ctr_name = "tx_vport_unicast_bytes",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "tx_port_multicast_bytes",
> > +		.ctr_name = "tx_vport_multicast_bytes",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "tx_port_broadcast_bytes",
> > +		.ctr_name = "tx_vport_broadcast_bytes",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "tx_port_unicast_packets",
> > +		.ctr_name = "tx_vport_unicast_packets",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "tx_port_multicast_packets",
> > +		.ctr_name = "tx_vport_multicast_packets",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "tx_port_broadcast_packets",
> > +		.ctr_name = "tx_vport_broadcast_packets",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "rx_wqe_err",
> > +		.ctr_name = "rx_wqe_err",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "rx_crc_errors_phy",
> > +		.ctr_name = "rx_crc_errors_phy",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "rx_in_range_len_errors_phy",
> > +		.ctr_name = "rx_in_range_len_errors_phy",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "rx_symbol_err_phy",
> > +		.ctr_name = "rx_symbol_err_phy",
> > +		.dev_table_idx = 0
> > +	},
> > +	{
> > +		.dpdk_name = "tx_errors_phy",
> > +		.ctr_name = "tx_errors_phy",
> > +		.dev_table_idx = 0
> > +	},
> > +};
> > +
> > +const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
> 
> Considering this global depends on mlx5_counters_init[] and is only used in this file, it should be static.
> 
> > +
> > +/**
> > + * Read device counters table.
> > + *
> > + * @param priv
> > + *   Pointer to private structure.
> > + * @param[out] stats
> > + *   Counters table output buffer.
> > + *
> > + * @return
> > + *   0 on success and stats is filled, negative on error.
> > + */
> > +static int
> > +priv_read_dev_counters(struct priv *priv, uint64_t *stats) {
> > +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> > +	unsigned int i;
> > +	struct ifreq ifr;
> > +	unsigned int stats_sz = (xstats_ctrl->stats_n * sizeof(uint64_t)) +
> > +				 sizeof(struct ethtool_stats);
> > +	struct ethtool_stats et_stats[(stats_sz + (
> > +				      sizeof(struct ethtool_stats) - 1)) /
> > +				      sizeof(struct ethtool_stats)];
> > +
> > +	et_stats->cmd = ETHTOOL_GSTATS;
> > +	et_stats->n_stats = xstats_ctrl->stats_n;
> > +	ifr.ifr_data = (caddr_t)et_stats;
> > +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> > +		WARN("unable to get statistic values");
> > +		return -1;
> > +	}
> > +	for (i = 0; (i != xstats_n) ; ++i)
> > +		stats[i] = (uint64_t)
> > +			   et_stats->data[mlx5_counters_init[i].dev_table_idx];
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Init the strcutures to read device counters.
> 
> Typo, "strcutures".
> 
> > + *
> > + * @param priv
> > + *   Pointer to private structure.
> > + */
> > +void
> > +priv_xstats_init(struct priv *priv)
> > +{
> > +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> > +	unsigned int i;
> > +	unsigned int j;
> > +	char ifname[IF_NAMESIZE];
> > +	struct ifreq ifr;
> > +	struct ethtool_drvinfo drvinfo;
> > +	struct ethtool_gstrings *strings = NULL;
> > +	unsigned int dev_stats_n;
> > +	unsigned int str_sz;
> > +
> > +	if (priv_get_ifname(priv, &ifname)) {
> > +		WARN("unable to get interface name");
> > +		return;
> > +	}
> > +	/* How many statistics are available. */
> > +	drvinfo.cmd = ETHTOOL_GDRVINFO;
> > +	ifr.ifr_data = (caddr_t)&drvinfo;
> > +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> > +		WARN("unable to get driver info");
> > +		return;
> > +	}
> > +	dev_stats_n = drvinfo.n_stats;
> > +	if (dev_stats_n < 1) {
> > +		WARN("no extended statistics available");
> > +		return;
> > +	}
> > +	xstats_ctrl->stats_n = dev_stats_n;
> > +	/* Allocate memory to grab stat names and values. */
> > +	str_sz = dev_stats_n * ETH_GSTRING_LEN;
> > +	strings = (struct ethtool_gstrings *)
> > +		  rte_malloc("xstats_strings",
> > +			     str_sz + sizeof(struct ethtool_gstrings), 0);
> > +	if (!strings) {
> > +		WARN("unable to allocate memory for xstats");
> > +		return;
> > +	}
> > +	strings->cmd = ETHTOOL_GSTRINGS;
> > +	strings->string_set = ETH_SS_STATS;
> > +	strings->len = dev_stats_n;
> > +	ifr.ifr_data = (caddr_t)strings;
> > +	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
> > +		WARN("unable to get statistic names");
> > +		goto free;
> > +	}
> > +	for (j = 0; (j != xstats_n); ++j)
> > +		mlx5_counters_init[j].dev_table_idx = dev_stats_n;
> 
> As a global, mlx5_counters_init[] affects all DPDK ports that use the mlx5 PMD simultaneously. Are you sure calling priv_xstats_init() from
> mlx5_dev_start() is safe?
> 
> I think this global should be const and initialized only once.
> 
> > +	for (i = 0; (i != dev_stats_n); ++i) {
> > +		const char *curr_string = (const char *)
> > +			&strings->data[i * ETH_GSTRING_LEN];
> > +
> > +		for (j = 0; (j != xstats_n); ++j) {
> > +			if (!strcmp(mlx5_counters_init[j].ctr_name,
> > +				    curr_string)) {
> > +				mlx5_counters_init[j].dev_table_idx = i;
> 
> The above comment also applies here. The PMD instance should allocate its own mapping as a priv field if there is no other choice. You could perhaps make it part of the mlx5_xstats_ctrl structure.
> 
> > +				break;
> > +			}
> > +		}
> > +	}
> > +	for (j = 0; (j != xstats_n); ++j) {
> > +		if (mlx5_counters_init[j].dev_table_idx >= dev_stats_n) {
> > +			WARN("Counters are not recognized");
> 
> Displaying the name of the counter that is not recognized could help users determine what's doing on. Please make sure all info/warning/error messages are helpful enough, e.g.:
> 
>  testpmd> show port xstats 0
>  ###### NIC extended statistics for port 0
>  mlx5: Counters are not recognized
>  testpmd> # what?
> 
> > +			goto free;
> > +		}
> > +	}
> > +	/* Copy to shadow at first time. */
> > +	assert(xstats_n <= MLX5_MAX_XSTATS);
> > +	priv_read_dev_counters(priv, xstats_ctrl->shadow);
> > +free:
> > +	rte_free(strings);
> > +}
> > +
> > +/**
> > + * Get device extended statistics.
> > + *
> > + * @param priv
> > + *   Pointer to private structure.
> > + * @param[out] stats
> > + *   Pointer to rte extended stats table.
> > + *
> > + * @return
> > + *   Number of extended stats on success and stats is filled,
> > + *   nagative on error.
> 
> Typo, "nagative".
> 
> > + */
> > +static int
> > +priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats) {
> > +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> > +	unsigned int i;
> > +	uint64_t counters[xstats_n];
> > +
> > +	if (priv_read_dev_counters(priv, counters) < 0)
> > +		return -1;
> > +	for (i = 0; (i != xstats_n) ; ++i) {
> > +		stats[i].id = i;
> > +		stats[i].value = (uint64_t)
> > +				 (counters[i] - xstats_ctrl->shadow[i]);
> 
> I understand the purpose of the shadow array in this context, however to me "shadow" implies some sort of caching is taking place. I think "init" or "base" (as the base value or something) would make its purpose clearer.
> 
> > +	}
> > +	return xstats_n;
> > +}
> > +
> > +/**
> > + * Reset device extended statistics.
> > + *
> > + * @param priv
> > + *   Pointer to private structure.
> > + */
> > +static void
> > +priv_xstats_reset(struct priv *priv)
> > +{
> > +	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
> > +	unsigned int i;
> > +	uint64_t counters[xstats_n];
> > +
> > +	if (priv_read_dev_counters(priv, counters) < 0)
> > +		return;
> > +	for (i = 0; (i != xstats_n) ; ++i)
> > +		xstats_ctrl->shadow[i] = counters[i]; }
> > +
> >  /**
> >   * DPDK callback to get device statistics.
> >   *
> > @@ -142,3 +410,77 @@
> >  #endif
> >  	priv_unlock(priv);
> >  }
> > +
> > +/**
> > + * DPDK callback to get extended device statistics.
> > + *
> > + * @param dev
> > + *   Pointer to Ethernet device structure.
> > + * @param[out] stats
> > + *   Stats table output buffer.
> > + * @param n
> > + *   The size of the stats table.
> > + *
> > + * @return
> > + *   Number of xstats on success, negative on failure.
> > + */
> > +int
> > +mlx5_xstats_get(struct rte_eth_dev *dev,
> > +		struct rte_eth_xstat *stats, unsigned int n) {
> > +	struct priv *priv = mlx5_get_priv(dev);
> > +	int ret = xstats_n;
> > +
> > +	if (n >= xstats_n && stats) {
> > +		priv_lock(priv);
> > +		ret = priv_xstats_get(priv, stats);
> > +		priv_unlock(priv);
> > +	}
> > +	return ret;
> > +}
> > +
> > +/**
> > + * DPDK callback to clear device extended statistics.
> > + *
> > + * @param dev
> > + *   Pointer to Ethernet device structure.
> > + */
> > +void
> > +mlx5_xstats_reset(struct rte_eth_dev *dev) {
> > +	struct priv *priv = mlx5_get_priv(dev);
> > +
> > +	priv_lock(priv);
> > +	priv_xstats_reset(priv);
> > +	priv_unlock(priv);
> > +}
> > +
> > +/**
> > + * DPDK callback to retrieve names of extended device statistics
> > + *
> > + * @param dev
> > + *   Pointer to Ethernet device structure.
> > + * @param[out] xstats_names
> > + *   Block of memory to insert names into
> 
> Let's call "block of memory" a "buffer"? There is also a missing period at the end of this sentence. 
> 
> > + * @param size
> > + *   Number of names.
> 
> "num" (or "n" as in the API) would make more sense than "size" here.
> 
> > + *
> > + * @return
> > + *   Number of xstats names.
> > + */
> > +int
> > +mlx5_xstats_get_names(struct rte_eth_dev *dev,
> > +		struct rte_eth_xstat_name *xstats_names, unsigned int size) {
> > +	struct priv *priv = mlx5_get_priv(dev);
> > +	unsigned int i;
> > +
> > +	if (size >= xstats_n && xstats_names) {
> > +		priv_lock(priv);
> > +		for (i = 0; (i != xstats_n) ; ++i)
> > +			strcpy(xstats_names[i].name,
> > +			       mlx5_counters_init[i].dpdk_name);
> > +		priv_unlock(priv);
> > +	}
> > +	return xstats_n;
> > +}
> > diff --git a/drivers/net/mlx5/mlx5_trigger.c 
> > b/drivers/net/mlx5/mlx5_trigger.c index 2399243..30addd2 100644
> > --- a/drivers/net/mlx5/mlx5_trigger.c
> > +++ b/drivers/net/mlx5/mlx5_trigger.c
> > @@ -91,6 +91,7 @@
> >  		priv_fdir_enable(priv);
> >  	priv_dev_interrupt_handler_install(priv, dev);
> >  	err = priv_flow_start(priv);
> > +	priv_xstats_init(priv);
> >  	priv_unlock(priv);
> >  	return -err;
> >  }
> > --
> > 1.8.3.1
> 
> --
> Adrien Mazarguil
> 6WIND
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 6293c1f..3359d3c 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -199,6 +199,9 @@ 
 	.link_update = mlx5_link_update,
 	.stats_get = mlx5_stats_get,
 	.stats_reset = mlx5_stats_reset,
+	.xstats_get = mlx5_xstats_get,
+	.xstats_reset = mlx5_xstats_reset,
+	.xstats_get_names = mlx5_xstats_get_names,
 	.dev_infos_get = mlx5_dev_infos_get,
 	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
 	.vlan_filter_set = mlx5_vlan_filter_set,
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ee62e04..034a05e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -89,6 +89,11 @@  enum {
 	PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,
 };
 
+struct mlx5_xstats_ctrl {
+	uint64_t shadow[MLX5_MAX_XSTATS];
+	uint16_t stats_n; /* Number of device stats. */
+};
+
 struct priv {
 	struct rte_eth_dev *dev; /* Ethernet device. */
 	struct ibv_context *ctx; /* Verbs context. */
@@ -142,6 +147,7 @@  struct priv {
 	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
 	LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
+	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
@@ -250,8 +256,14 @@  int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
 
 /* mlx5_stats.c */
 
+void priv_xstats_init(struct priv *);
 void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
 void mlx5_stats_reset(struct rte_eth_dev *);
+int mlx5_xstats_get(struct rte_eth_dev *,
+		    struct rte_eth_xstat *, unsigned int);
+void mlx5_xstats_reset(struct rte_eth_dev *);
+int mlx5_xstats_get_names(struct rte_eth_dev *,
+			  struct rte_eth_xstat_name *, unsigned int);
 
 /* mlx5_vlan.c */
 
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index b32816e..beabb70 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -79,4 +79,7 @@ 
 /* Alarm timeout. */
 #define MLX5_ALARM_TIMEOUT_US 100000
 
+/* Maximum number of extended statistics counters. */
+#define MLX5_MAX_XSTATS 32
+
 #endif /* RTE_PMD_MLX5_DEFS_H_ */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index f2b5781..08a7656 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -31,11 +31,16 @@ 
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/sockios.h>
+#include <linux/ethtool.h>
+
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ethdev.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -44,6 +49,269 @@ 
 #include "mlx5_rxtx.h"
 #include "mlx5_defs.h"
 
+struct mlx5_counter_ctrl {
+	/* Name of the counter for dpdk user. */
+	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
+	/* Name of the counter on the device table. */
+	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
+	/* Index in the device counters table. */
+	uint16_t dev_table_idx;
+};
+
+static struct mlx5_counter_ctrl mlx5_counters_init[] = {
+	{
+		.dpdk_name = "rx_port_unicast_bytes",
+		.ctr_name = "rx_vport_unicast_bytes",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "rx_port_multicast_bytes",
+		.ctr_name = "rx_vport_multicast_bytes",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "rx_port_broadcast_bytes",
+		.ctr_name = "rx_vport_broadcast_bytes",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "rx_port_unicast_packets",
+		.ctr_name = "rx_vport_unicast_packets",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "rx_port_multicast_packets",
+		.ctr_name = "rx_vport_multicast_packets",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "rx_port_broadcast_packets",
+		.ctr_name = "rx_vport_broadcast_packets",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "tx_port_unicast_bytes",
+		.ctr_name = "tx_vport_unicast_bytes",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "tx_port_multicast_bytes",
+		.ctr_name = "tx_vport_multicast_bytes",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "tx_port_broadcast_bytes",
+		.ctr_name = "tx_vport_broadcast_bytes",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "tx_port_unicast_packets",
+		.ctr_name = "tx_vport_unicast_packets",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "tx_port_multicast_packets",
+		.ctr_name = "tx_vport_multicast_packets",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "tx_port_broadcast_packets",
+		.ctr_name = "tx_vport_broadcast_packets",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "rx_wqe_err",
+		.ctr_name = "rx_wqe_err",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "rx_crc_errors_phy",
+		.ctr_name = "rx_crc_errors_phy",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "rx_in_range_len_errors_phy",
+		.ctr_name = "rx_in_range_len_errors_phy",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "rx_symbol_err_phy",
+		.ctr_name = "rx_symbol_err_phy",
+		.dev_table_idx = 0
+	},
+	{
+		.dpdk_name = "tx_errors_phy",
+		.ctr_name = "tx_errors_phy",
+		.dev_table_idx = 0
+	},
+};
+
+const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
+
+/**
+ * Read device counters table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[out] stats
+ *   Counters table output buffer.
+ *
+ * @return
+ *   0 on success and stats is filled, negative on error.
+ */
+static int
+priv_read_dev_counters(struct priv *priv, uint64_t *stats)
+{
+	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+	unsigned int i;
+	struct ifreq ifr;
+	unsigned int stats_sz = (xstats_ctrl->stats_n * sizeof(uint64_t)) +
+				 sizeof(struct ethtool_stats);
+	struct ethtool_stats et_stats[(stats_sz + (
+				      sizeof(struct ethtool_stats) - 1)) /
+				      sizeof(struct ethtool_stats)];
+
+	et_stats->cmd = ETHTOOL_GSTATS;
+	et_stats->n_stats = xstats_ctrl->stats_n;
+	ifr.ifr_data = (caddr_t)et_stats;
+	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
+		WARN("unable to get statistic values");
+		return -1;
+	}
+	for (i = 0; (i != xstats_n) ; ++i)
+		stats[i] = (uint64_t)
+			   et_stats->data[mlx5_counters_init[i].dev_table_idx];
+	return 0;
+}
+
+/**
+ * Init the strcutures to read device counters.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+priv_xstats_init(struct priv *priv)
+{
+	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+	unsigned int i;
+	unsigned int j;
+	char ifname[IF_NAMESIZE];
+	struct ifreq ifr;
+	struct ethtool_drvinfo drvinfo;
+	struct ethtool_gstrings *strings = NULL;
+	unsigned int dev_stats_n;
+	unsigned int str_sz;
+
+	if (priv_get_ifname(priv, &ifname)) {
+		WARN("unable to get interface name");
+		return;
+	}
+	/* How many statistics are available. */
+	drvinfo.cmd = ETHTOOL_GDRVINFO;
+	ifr.ifr_data = (caddr_t)&drvinfo;
+	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
+		WARN("unable to get driver info");
+		return;
+	}
+	dev_stats_n = drvinfo.n_stats;
+	if (dev_stats_n < 1) {
+		WARN("no extended statistics available");
+		return;
+	}
+	xstats_ctrl->stats_n = dev_stats_n;
+	/* Allocate memory to grab stat names and values. */
+	str_sz = dev_stats_n * ETH_GSTRING_LEN;
+	strings = (struct ethtool_gstrings *)
+		  rte_malloc("xstats_strings",
+			     str_sz + sizeof(struct ethtool_gstrings), 0);
+	if (!strings) {
+		WARN("unable to allocate memory for xstats");
+		return;
+	}
+	strings->cmd = ETHTOOL_GSTRINGS;
+	strings->string_set = ETH_SS_STATS;
+	strings->len = dev_stats_n;
+	ifr.ifr_data = (caddr_t)strings;
+	if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
+		WARN("unable to get statistic names");
+		goto free;
+	}
+	for (j = 0; (j != xstats_n); ++j)
+		mlx5_counters_init[j].dev_table_idx = dev_stats_n;
+	for (i = 0; (i != dev_stats_n); ++i) {
+		const char *curr_string = (const char *)
+			&strings->data[i * ETH_GSTRING_LEN];
+
+		for (j = 0; (j != xstats_n); ++j) {
+			if (!strcmp(mlx5_counters_init[j].ctr_name,
+				    curr_string)) {
+				mlx5_counters_init[j].dev_table_idx = i;
+				break;
+			}
+		}
+	}
+	for (j = 0; (j != xstats_n); ++j) {
+		if (mlx5_counters_init[j].dev_table_idx >= dev_stats_n) {
+			WARN("Counters are not recognized");
+			goto free;
+		}
+	}
+	/* Copy to shadow at first time. */
+	assert(xstats_n <= MLX5_MAX_XSTATS);
+	priv_read_dev_counters(priv, xstats_ctrl->shadow);
+free:
+	rte_free(strings);
+}
+
+/**
+ * Get device extended statistics.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[out] stats
+ *   Pointer to rte extended stats table.
+ *
+ * @return
+ *   Number of extended stats on success and stats is filled,
+ *   nagative on error.
+ */
+static int
+priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats)
+{
+	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+	unsigned int i;
+	uint64_t counters[xstats_n];
+
+	if (priv_read_dev_counters(priv, counters) < 0)
+		return -1;
+	for (i = 0; (i != xstats_n) ; ++i) {
+		stats[i].id = i;
+		stats[i].value = (uint64_t)
+				 (counters[i] - xstats_ctrl->shadow[i]);
+	}
+	return xstats_n;
+}
+
+/**
+ * Reset device extended statistics.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_xstats_reset(struct priv *priv)
+{
+	struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+	unsigned int i;
+	uint64_t counters[xstats_n];
+
+	if (priv_read_dev_counters(priv, counters) < 0)
+		return;
+	for (i = 0; (i != xstats_n) ; ++i)
+		xstats_ctrl->shadow[i] = counters[i];
+}
+
 /**
  * DPDK callback to get device statistics.
  *
@@ -142,3 +410,77 @@ 
 #endif
 	priv_unlock(priv);
 }
+
+/**
+ * DPDK callback to get extended device statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] stats
+ *   Stats table output buffer.
+ * @param n
+ *   The size of the stats table.
+ *
+ * @return
+ *   Number of xstats on success, negative on failure.
+ */
+int
+mlx5_xstats_get(struct rte_eth_dev *dev,
+		struct rte_eth_xstat *stats, unsigned int n)
+{
+	struct priv *priv = mlx5_get_priv(dev);
+	int ret = xstats_n;
+
+	if (n >= xstats_n && stats) {
+		priv_lock(priv);
+		ret = priv_xstats_get(priv, stats);
+		priv_unlock(priv);
+	}
+	return ret;
+}
+
+/**
+ * DPDK callback to clear device extended statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+void
+mlx5_xstats_reset(struct rte_eth_dev *dev)
+{
+	struct priv *priv = mlx5_get_priv(dev);
+
+	priv_lock(priv);
+	priv_xstats_reset(priv);
+	priv_unlock(priv);
+}
+
+/**
+ * DPDK callback to retrieve names of extended device statistics
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] xstats_names
+ *   Block of memory to insert names into
+ * @param size
+ *   Number of names.
+ *
+ * @return
+ *   Number of xstats names.
+ */
+int
+mlx5_xstats_get_names(struct rte_eth_dev *dev,
+		struct rte_eth_xstat_name *xstats_names, unsigned int size)
+{
+	struct priv *priv = mlx5_get_priv(dev);
+	unsigned int i;
+
+	if (size >= xstats_n && xstats_names) {
+		priv_lock(priv);
+		for (i = 0; (i != xstats_n) ; ++i)
+			strcpy(xstats_names[i].name,
+			       mlx5_counters_init[i].dpdk_name);
+		priv_unlock(priv);
+	}
+	return xstats_n;
+}
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 2399243..30addd2 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -91,6 +91,7 @@ 
 		priv_fdir_enable(priv);
 	priv_dev_interrupt_handler_install(priv, dev);
 	err = priv_flow_start(priv);
+	priv_xstats_init(priv);
 	priv_unlock(priv);
 	return -err;
 }