[dpdk-dev,v2] net/vhost: add pmd xstats

Message ID 1473408927-40364-1-git-send-email-zhiyong.yang@intel.com
State Superseded, archived
Delegated to: Yuanhan Liu
Headers show

Commit Message

Yang, Zhiyong Sept. 9, 2016, 8:15 a.m.
This feature adds vhost pmd extended statistics from per queue perspective
for the application such as OVS etc.

The statistics counters are based on RFC 2819 and RFC 2863 as follows:

rx/tx_good_packets
rx/tx_total_bytes
rx/tx_dropped_pkts
rx/tx_broadcast_packets
rx/tx_multicast_packets
rx/tx_ucast_packets
rx/tx_undersize_errors
rx/tx_size_64_packets
rx/tx_size_65_to_127_packets;
rx/tx_size_128_to_255_packets;
rx/tx_size_256_to_511_packets;
rx/tx_size_512_to_1023_packets;
rx/tx_size_1024_to_1522_packets;
rx/tx_1523_to_max_packets;
rx/tx_errors
rx_fragmented_errors
rx_jabber_errors
rx_unknown_protos_packets;

No API is changed or added.
rte_eth_xstats_get_names() to retrieve what kinds of vhost xstats are
supported,
rte_eth_xstats_get() to retrieve vhost extended statistics,
rte_eth_xstats_reset() to reset vhost extended statistics. 

The usage of vhost pmd xstats is the same as virtio pmd xstats.
for example, when test-pmd application is running in interactive mode
vhost pmd xstats will support the two following commands:

show port xstats all | port_id will show vhost xstats
clear port xstats all | port_id will reset vhost xstats

net/virtio pmd xstats(the function virtio_update_packet_stats) is used
as reference when implementing the feature. 

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---

Changes in v2:
1. remove the compiling switch.
2. fix two code bugs.

 drivers/net/vhost/rte_eth_vhost.c | 282 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 281 insertions(+), 1 deletion(-)

Comments

Van Haaren, Harry Sept. 9, 2016, 8:40 a.m. | #1
Hi Zhiyong,

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Zhiyong Yang
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2] net/vhost: add pmd xstats
> 
> +struct vhost_xstats {
> +	uint64_t stat[16];
> +};

Perhaps we could create an enum to access the stat array?

enum VHOST_STAT {
   ...
   VHOST_STAT_64_PKT,
   ...
};

> +	{"broadcast_packets",
> +			offsetof(struct vhost_queue, xstats.stat[8])},

I think the "magic number" 8 here could be from the enum, and would be more clear which statistic is being accessed.

> +		if (pkt_len == 64) {
> +			xstats_update->stat[1]++;

Similarly, incrementing the counters would be more representative if it looked like

xstats_update->stat[VHOST_STAT_64_PKT]++;   /* or similar */

-Harry
Yang, Zhiyong Sept. 9, 2016, 8:54 a.m. | #2
Hi, Harry:

Your idea looks very good.

Thanks
--Zhiyong
> -----Original Message-----
> From: Van Haaren, Harry
> Sent: Friday, September 9, 2016 4:41 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> Cc: yuanhan.liu@linux.intel.com; thomas.monjalon@6wind.com;
> "mailto:pmatilai"@redhat.com; Yang, Zhiyong <zhiyong.yang@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v2] net/vhost: add pmd xstats
> 
> Hi Zhiyong,
> 
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Zhiyong Yang
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v2] net/vhost: add pmd xstats
> >
> > +struct vhost_xstats {
> > +	uint64_t stat[16];
> > +};
> 
> Perhaps we could create an enum to access the stat array?
> 
> enum VHOST_STAT {
>    ...
>    VHOST_STAT_64_PKT,
>    ...
> };
> 
> > +	{"broadcast_packets",
> > +			offsetof(struct vhost_queue, xstats.stat[8])},
> 
> I think the "magic number" 8 here could be from the enum, and would be
> more clear which statistic is being accessed.
> 
> > +		if (pkt_len == 64) {
> > +			xstats_update->stat[1]++;
> 
> Similarly, incrementing the counters would be more representative if it
> looked like
> 
> xstats_update->stat[VHOST_STAT_64_PKT]++;   /* or similar */
> 
> -Harry
Yuanhan Liu Sept. 14, 2016, 6:20 a.m. | #3
On Fri, Sep 09, 2016 at 04:15:27PM +0800, Zhiyong Yang wrote:
> +struct vhost_xstats {
> +	uint64_t stat[16];
> +};
> +
>  struct vhost_queue {
>  	int vid;
>  	rte_atomic32_t allow_queuing;
> @@ -85,7 +89,8 @@ struct vhost_queue {
>  	uint64_t missed_pkts;
>  	uint64_t rx_bytes;
>  	uint64_t tx_bytes;

I'd suggest to put those statistic counters to vhost_stats struct,
which could simplify the xstats_reset code a bit.

And please do it in two patches, one to introduce vhost_stats, another
one to add xstats.

> -};
> +	struct vhost_xstats xstats;
> +	};

A format issue here.

>  
>  struct pmd_internal {
>  	char *dev_name;
> @@ -127,6 +132,274 @@ struct rte_vhost_vring_state {
>  
>  static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS];
>  
> +enum rte_vhostqueue_rxtx {

Don't use "rte_" prefix for internal function, vars, etc. "rte_" prefix is
reserved for those that will be exported for public use.

> +	RTE_VHOSTQUEUE_RX = 0,
> +	RTE_VHOSTQUEUE_TX = 1
> +};
> +
> +#define RTE_ETH_VHOST_XSTATS_NAME_SIZE 64

ditto.

> +
> +struct rte_vhost_xstats_name_off {

ditto.

> +	char name[RTE_ETH_VHOST_XSTATS_NAME_SIZE];
> +	uint64_t offset;
> +};
> +
> +/* [rt]_qX_ is prepended to the name string here */
> +static void
> +vhost_dev_xstats_reset(struct rte_eth_dev *dev)
> +{
> +	struct vhost_queue *vqrx = NULL;
> +	struct vhost_queue *vqtx = NULL;
> +	unsigned int i = 0;
> +
> +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +		if (!dev->data->rx_queues[i])
> +			continue;
> +		vqrx = (struct vhost_queue *)dev->data->rx_queues[i];

Unnecessary cast.

> +		vqrx->rx_pkts = 0;
> +		vqrx->rx_bytes = 0;
> +		vqrx->missed_pkts = 0;
> +		memset(&vqrx->xstats, 0, sizeof(vqrx->xstats));
> +	}
> +	for (i = 0; i < dev->data->nb_tx_queues; i++) {
> +		if (!dev->data->tx_queues[i])
> +			continue;
> +		vqtx = (struct vhost_queue *)dev->data->tx_queues[i];
> +		vqtx->tx_pkts = 0;
> +		vqtx->tx_bytes = 0;
> +		vqtx->missed_pkts = 0;
> +		memset(&vqtx->xstats, 0, sizeof(vqtx->xstats));
> +	}
> +}
> +
> +static int
> +vhost_dev_xstats_get_names(struct rte_eth_dev *dev,
> +			   struct rte_eth_xstat_name *xstats_names,
> +			   __rte_unused unsigned int limit)

The typical way is to put __rte_unused after the key word.

> +{
> +	unsigned int i = 0;
> +	unsigned int t = 0;
> +	int count = 0;
> +	int nstats = dev->data->nb_rx_queues * VHOST_NB_RXQ_XSTATS
> +			+ dev->data->nb_tx_queues * VHOST_NB_TXQ_XSTATS;
> +
> +	if (xstats_names) {

I know you are following virtio pmd style, but you don't have to. I'd
suggest to return early for (!xstats_names) case, then we could save
one indention level for following code block.

> +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +			struct vhost_queue *rxvq = dev->data->rx_queues[i];
> +
> +			if (!rxvq)
> +				continue;
> +			for (t = 0; t < VHOST_NB_RXQ_XSTATS; t++) {
> +				snprintf(xstats_names[count].name,
> +					 sizeof(xstats_names[count].name),
> +					 "rx_q%u_%s", i,
> +					 rte_vhost_rxq_stat_strings[t].name);
> +				count++;
> +			}
> +		}
> +		for (i = 0; i < dev->data->nb_tx_queues; i++) {
> +			struct vhost_queue *txvq = dev->data->tx_queues[i];
> +
> +			if (!txvq)
> +				continue;
> +			for (t = 0; t < VHOST_NB_TXQ_XSTATS; t++) {
> +				snprintf(xstats_names[count].name,
> +					 sizeof(xstats_names[count].name),
> +					 "tx_q%u_%s", i,
> +					 rte_vhost_txq_stat_strings[t].name);
> +				count++;
> +			}
> +		}
> +		return count;
> +	}
> +	return nstats;
> +}
> +
> +static int
> +vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
> +		     unsigned int n)
> +{
> +	unsigned int i;
> +	unsigned int t;
> +	unsigned int count = 0;
> +
> +	unsigned int nxstats = dev->data->nb_rx_queues * VHOST_NB_RXQ_XSTATS
> +				+ dev->data->nb_tx_queues * VHOST_NB_TXQ_XSTATS;
> +
> +	if (n < nxstats)
> +		return nxstats;
> +
> +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +		struct vhost_queue *rxvq =
> +			(struct vhost_queue *)dev->data->rx_queues[i];

Unnecessary cast.

> +
> +		if (!rxvq)
> +			continue;
> +
> +		for (t = 0; t < VHOST_NB_RXQ_XSTATS; t++) {
> +			xstats[count].value = *(uint64_t *)(((char *)rxvq)
> +				+ rte_vhost_rxq_stat_strings[t].offset);
> +			count++;
> +		}
> +	}
> +
> +	for (i = 0; i < dev->data->nb_tx_queues; i++) {
> +		struct vhost_queue *txvq =
> +			(struct vhost_queue *)dev->data->tx_queues[i];
> +
> +		if (!txvq)
> +			continue;
> +
> +		for (t = 0; t < VHOST_NB_TXQ_XSTATS; t++) {
> +			xstats[count].value = *(uint64_t *)(((char *)txvq)
> +				+ rte_vhost_txq_stat_strings[t].offset);
> +			count++;
> +		}
> +	}
> +
> +	return count;
> +}
> +
> +static void
> +vhost_update_packet_xstats(struct vhost_queue *vq,
> +			   struct rte_mbuf **bufs,
> +			   uint16_t nb_rxtx,
> +			   uint16_t nb_bufs,
> +			   enum rte_vhostqueue_rxtx vqueue_rxtx)
> +{
> +	uint32_t pkt_len = 0;
> +	uint64_t i = 0;
> +	uint64_t index;
> +	struct ether_addr *ea = NULL;
> +	struct vhost_xstats *xstats_update = &vq->xstats;
> +
> +	for (i = 0; i < nb_rxtx ; i++) {
> +		pkt_len = bufs[i]->pkt_len;
> +		if (pkt_len == 64) {
> +			xstats_update->stat[1]++;
> +
Unnecessary blank line.

> +		} else if (pkt_len > 64 && pkt_len < 1024) {
> +			index = (sizeof(pkt_len) * 8)
> +				- __builtin_clz(pkt_len) - 5;
> +			xstats_update->stat[index]++;
> +		} else {
> +			if (pkt_len < 64)
> +				xstats_update->stat[0]++;
> +			else if (pkt_len <= 1522)
> +				xstats_update->stat[6]++;
> +			else if (pkt_len > 1522)
> +				xstats_update->stat[7]++;
> +		}
> +
> +		ea = rte_pktmbuf_mtod(bufs[i], struct ether_addr *);
> +		if (is_multicast_ether_addr(ea)) {
> +			if (is_broadcast_ether_addr(ea))
> +				/* broadcast++; */
> +				xstats_update->stat[8]++;
> +			else
> +				/* multicast++; */
> +				xstats_update->stat[9]++;

The comment could be avoided if you define a field in vhost_stats
struct like "broadcast" or "multicast". I don't object the way Harry
proposed tough, to use enum to access the stat array.

> +		}
> +	}
> +	/* non-multi/broadcast, multi/broadcast, including those
> +	 * that were discarded or not sent.

Hmmm, I don't follow it. You may want to reword it.

> from rfc2863

Which section and which page?

	--yliu
Yang, Zhiyong Sept. 14, 2016, 7:43 a.m. | #4
Hi, yuanhan:
Thanks so much for your detailed comments. 

> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Wednesday, September 14, 2016 2:20 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: dev@dpdk.org; thomas.monjalon@6wind.com; pmatilai@redhat.com
> Subject: Re: [PATCH v2] net/vhost: add pmd xstats
> 
> On Fri, Sep 09, 2016 at 04:15:27PM +0800, Zhiyong Yang wrote:
> > +struct vhost_xstats {
> > +	uint64_t stat[16];
> > +};
> > +
> >  struct vhost_queue {
> >  	int vid;
> >  	rte_atomic32_t allow_queuing;
> > @@ -85,7 +89,8 @@ struct vhost_queue {
> >  	uint64_t missed_pkts;
> >  	uint64_t rx_bytes;
> >  	uint64_t tx_bytes;
> 
> I'd suggest to put those statistic counters to vhost_stats struct, which could
> simplify the xstats_reset code a bit.
> 

I consider this point when I define it, but those statistic counters are used by pmd stats, 
not only by pmd xstats,  I'm not clear if  I can modify those code.  If permitted, 
I will do it as you said.

> And please do it in two patches, one to introduce vhost_stats, another one
> to add xstats.
> 

Ok.

> > -};
> > +	struct vhost_xstats xstats;
> > +	};
> 
> A format issue here.
> 

Naming issue? such as struct vhost_xstats vhost_stats;?

> >
> >  struct pmd_internal {
> >  	char *dev_name;
> > @@ -127,6 +132,274 @@ struct rte_vhost_vring_state {
> >
> >  static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS];
> >
> > +enum rte_vhostqueue_rxtx {
> 
> Don't use "rte_" prefix for internal function, vars, etc. "rte_" prefix is
> reserved for those that will be exported for public use.
> 

Ok, I will modify it.

> > +	RTE_VHOSTQUEUE_RX = 0,
> > +	RTE_VHOSTQUEUE_TX = 1
> > +};
> > +
> > +#define RTE_ETH_VHOST_XSTATS_NAME_SIZE 64
> 
> ditto.
> 

Ok, I will modify it.

> > +
> > +struct rte_vhost_xstats_name_off {
> 
> ditto.
> 

Ok, I will modify it.

> > +	char name[RTE_ETH_VHOST_XSTATS_NAME_SIZE];
> > +	uint64_t offset;
> > +};
> > +
> > +/* [rt]_qX_ is prepended to the name string here */ static void
> > +vhost_dev_xstats_reset(struct rte_eth_dev *dev) {
> > +	struct vhost_queue *vqrx = NULL;
> > +	struct vhost_queue *vqtx = NULL;
> > +	unsigned int i = 0;
> > +
> > +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > +		if (!dev->data->rx_queues[i])
> > +			continue;
> > +		vqrx = (struct vhost_queue *)dev->data->rx_queues[i];
> 
> Unnecessary cast.
> 
The definition of rx_queues is  void **rx_queues; when we use it to access its
Data member,   Maybe explicit cast is needed, otherwise, we have to cast
Every time when using it.

> > +		vqrx->rx_pkts = 0;
> > +		vqrx->rx_bytes = 0;
> > +		vqrx->missed_pkts = 0;
> > +		memset(&vqrx->xstats, 0, sizeof(vqrx->xstats));
> > +	}
> > +	for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > +		if (!dev->data->tx_queues[i])
> > +			continue;
> > +		vqtx = (struct vhost_queue *)dev->data->tx_queues[i];
> > +		vqtx->tx_pkts = 0;
> > +		vqtx->tx_bytes = 0;
> > +		vqtx->missed_pkts = 0;
> > +		memset(&vqtx->xstats, 0, sizeof(vqtx->xstats));
> > +	}
> > +}
> > +
> > +static int
> > +vhost_dev_xstats_get_names(struct rte_eth_dev *dev,
> > +			   struct rte_eth_xstat_name *xstats_names,
> > +			   __rte_unused unsigned int limit)
> 
> The typical way is to put __rte_unused after the key word.
> 

Ok.

> > +{
> > +	unsigned int i = 0;
> > +	unsigned int t = 0;
> > +	int count = 0;
> > +	int nstats = dev->data->nb_rx_queues * VHOST_NB_RXQ_XSTATS
> > +			+ dev->data->nb_tx_queues *
> VHOST_NB_TXQ_XSTATS;
> > +
> > +	if (xstats_names) {
> 
> I know you are following virtio pmd style, but you don't have to. I'd suggest
> to return early for (!xstats_names) case, then we could save one indention
> level for following code block.
> 

OK.

> > +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > +			struct vhost_queue *rxvq = dev->data-
> >rx_queues[i];
> > +
> > +			if (!rxvq)
> > +				continue;
> > +			for (t = 0; t < VHOST_NB_RXQ_XSTATS; t++) {
> > +				snprintf(xstats_names[count].name,
> > +					 sizeof(xstats_names[count].name),
> > +					 "rx_q%u_%s", i,
> > +					 rte_vhost_rxq_stat_strings[t].name);
> > +				count++;
> > +			}
> > +		}
> > +		for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > +			struct vhost_queue *txvq = dev->data-
> >tx_queues[i];
> > +
> > +			if (!txvq)
> > +				continue;
> > +			for (t = 0; t < VHOST_NB_TXQ_XSTATS; t++) {
> > +				snprintf(xstats_names[count].name,
> > +					 sizeof(xstats_names[count].name),
> > +					 "tx_q%u_%s", i,
> > +					 rte_vhost_txq_stat_strings[t].name);
> > +				count++;
> > +			}
> > +		}
> > +		return count;
> > +	}
> > +	return nstats;
> > +}
> > +
> > +static int
> > +vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat
> *xstats,
> > +		     unsigned int n)
> > +{
> > +	unsigned int i;
> > +	unsigned int t;
> > +	unsigned int count = 0;
> > +
> > +	unsigned int nxstats = dev->data->nb_rx_queues *
> VHOST_NB_RXQ_XSTATS
> > +				+ dev->data->nb_tx_queues *
> VHOST_NB_TXQ_XSTATS;
> > +
> > +	if (n < nxstats)
> > +		return nxstats;
> > +
> > +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > +		struct vhost_queue *rxvq =
> > +			(struct vhost_queue *)dev->data->rx_queues[i];
> 
> Unnecessary cast.
> 

Ok. I will remove it.

> > +
> > +		if (!rxvq)
> > +			continue;
> > +
> > +		for (t = 0; t < VHOST_NB_RXQ_XSTATS; t++) {
> > +			xstats[count].value = *(uint64_t *)(((char *)rxvq)
> > +				+ rte_vhost_rxq_stat_strings[t].offset);
> > +			count++;
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > +		struct vhost_queue *txvq =
> > +			(struct vhost_queue *)dev->data->tx_queues[i];
> > +
> > +		if (!txvq)
> > +			continue;
> > +
> > +		for (t = 0; t < VHOST_NB_TXQ_XSTATS; t++) {
> > +			xstats[count].value = *(uint64_t *)(((char *)txvq)
> > +				+ rte_vhost_txq_stat_strings[t].offset);
> > +			count++;
> > +		}
> > +	}
> > +
> > +	return count;
> > +}
> > +
> > +static void
> > +vhost_update_packet_xstats(struct vhost_queue *vq,
> > +			   struct rte_mbuf **bufs,
> > +			   uint16_t nb_rxtx,
> > +			   uint16_t nb_bufs,
> > +			   enum rte_vhostqueue_rxtx vqueue_rxtx) {
> > +	uint32_t pkt_len = 0;
> > +	uint64_t i = 0;
> > +	uint64_t index;
> > +	struct ether_addr *ea = NULL;
> > +	struct vhost_xstats *xstats_update = &vq->xstats;
> > +
> > +	for (i = 0; i < nb_rxtx ; i++) {
> > +		pkt_len = bufs[i]->pkt_len;
> > +		if (pkt_len == 64) {
> > +			xstats_update->stat[1]++;
> > +
> Unnecessary blank line.
> 
Ok. I will remove it.

> > +		} else if (pkt_len > 64 && pkt_len < 1024) {
> > +			index = (sizeof(pkt_len) * 8)
> > +				- __builtin_clz(pkt_len) - 5;
> > +			xstats_update->stat[index]++;
> > +		} else {
> > +			if (pkt_len < 64)
> > +				xstats_update->stat[0]++;
> > +			else if (pkt_len <= 1522)
> > +				xstats_update->stat[6]++;
> > +			else if (pkt_len > 1522)
> > +				xstats_update->stat[7]++;
> > +		}
> > +
> > +		ea = rte_pktmbuf_mtod(bufs[i], struct ether_addr *);
> > +		if (is_multicast_ether_addr(ea)) {
> > +			if (is_broadcast_ether_addr(ea))
> > +				/* broadcast++; */
> > +				xstats_update->stat[8]++;
> > +			else
> > +				/* multicast++; */
> > +				xstats_update->stat[9]++;
> 
> The comment could be avoided if you define a field in vhost_stats struct like
> "broadcast" or "multicast". I don't object the way Harry proposed tough, to
> use enum to access the stat array.
> 

I agree with you and Harry and will do like that.

> > +		}
> > +	}
> > +	/* non-multi/broadcast, multi/broadcast, including those
> > +	 * that were discarded or not sent.
> 
> Hmmm, I don't follow it. You may want to reword it.
> 
> > from rfc2863
> 
> Which section and which page?
> 
The packets received are not considered "discarded", because receiving packets via
Memory, not by physical NIC. Mainly for the number of transmit the packets
RFC2863 page 35  ifOutUcastPkts(non-multi/broadcast)
 Page 42 ifHCOutMulticastPkts
 Page 42 ifHCOutBroadcastPkts

> 	--yliu
Yang, Zhiyong Sept. 14, 2016, 8:30 a.m. | #5
Hi, Yuanhan:
            Sorry to misunderstand your comment.

> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Wednesday, September 14, 2016 2:20 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: dev@dpdk.org; thomas.monjalon@6wind.com; pmatilai@redhat.com
> Subject: Re: [PATCH v2] net/vhost: add pmd xstats 
> > -};
> > +	struct vhost_xstats xstats;
> > +	};
> 
> A format issue here.
> 
Ok. I understand it  (unnecessary Tab)and will modify it.
Yuanhan Liu Sept. 18, 2016, 1:16 p.m. | #6
On Wed, Sep 14, 2016 at 07:43:36AM +0000, Yang, Zhiyong wrote:
> Hi, yuanhan:
> Thanks so much for your detailed comments. 
> 
> > -----Original Message-----
> > From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> > Sent: Wednesday, September 14, 2016 2:20 PM
> > To: Yang, Zhiyong <zhiyong.yang@intel.com>
> > Cc: dev@dpdk.org; thomas.monjalon@6wind.com; pmatilai@redhat.com
> > Subject: Re: [PATCH v2] net/vhost: add pmd xstats
> > 
> > On Fri, Sep 09, 2016 at 04:15:27PM +0800, Zhiyong Yang wrote:
> > > +struct vhost_xstats {
> > > +	uint64_t stat[16];
> > > +};
> > > +
> > >  struct vhost_queue {
> > >  	int vid;
> > >  	rte_atomic32_t allow_queuing;
> > > @@ -85,7 +89,8 @@ struct vhost_queue {
> > >  	uint64_t missed_pkts;
> > >  	uint64_t rx_bytes;
> > >  	uint64_t tx_bytes;
> > 
> > I'd suggest to put those statistic counters to vhost_stats struct, which could
> > simplify the xstats_reset code a bit.
> > 
> 
> I consider this point when I define it, but those statistic counters are used by pmd stats, 
> not only by pmd xstats,  I'm not clear if  I can modify those code.  If permitted, 
> I will do it as you said.

For sure you can modify the code :) I just would suggest to do in an
single patch, as stated before (and below).

> > > +	char name[RTE_ETH_VHOST_XSTATS_NAME_SIZE];
> > > +	uint64_t offset;
> > > +};
> > > +
> > > +/* [rt]_qX_ is prepended to the name string here */ static void
> > > +vhost_dev_xstats_reset(struct rte_eth_dev *dev) {
> > > +	struct vhost_queue *vqrx = NULL;
> > > +	struct vhost_queue *vqtx = NULL;
> > > +	unsigned int i = 0;
> > > +
> > > +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > > +		if (!dev->data->rx_queues[i])
> > > +			continue;
> > > +		vqrx = (struct vhost_queue *)dev->data->rx_queues[i];
> > 
> > Unnecessary cast.
> > 
> The definition of rx_queues is  void **rx_queues;

Yes, but rx_queues[i] is with type "void *", so the cast is not necessary.

> > > +		}
> > > +	}
> > > +	/* non-multi/broadcast, multi/broadcast, including those
> > > +	 * that were discarded or not sent.
> > 
> > Hmmm, I don't follow it. You may want to reword it.
> > 
> > > from rfc2863
> > 
> > Which section and which page?
> > 
> The packets received are not considered "discarded", because receiving packets via
> Memory, not by physical NIC. Mainly for the number of transmit the packets

It still took me some time to understand you.

> RFC2863 page 35  ifOutUcastPkts(non-multi/broadcast)

So, by your term, unicast == non-multicast/broadcast? If so, I'd suggest
to use term "unicast" but not something else: it just introduces confusions.

And in this case, I guess you were trying to say:

    /*
     * According to RFC 2863 section ifHCOutMulticastPkts, ... and ...,
     * the counter "unicast", "multicast" and "broadcast" are also
     * increased when packets are not transmitted successfully.
     */

Well, you might still need reword it.

After taking a bit closer look at the code, I'd suggest to do the countings 
like following:

- introduce a help function to increase the "broadcast" or "multicast"
  counter, say "count_multicast_broadcast(struct vhost_stats *stats, mbuf)".

- introduce a generic function to update the generic counters: this
  function just counts those packets have been successfully transmitted
  or received.

  It also invoke above helper function for multicast counting.

  It basically looks like the function vhost_update_packet_xstats,
  execpt that it doesn't handle those failure packets: it will be
  handled in following part.

- since the case "couting multicast and broadcast with failure packts"
  only happens in the Tx side, we could do those countings only in
  eth_vhost_tx():

    nb_tx = rte_vhost_enqueue_burst(r->vid,
			r->virtqueue_id, bufs, nb_bufs);

    missed = nb_bufs - nb_tx;
    /* put above comment here */
    if (missed) {
	for_each_mbuf(mbuf) {
	    count_multicast_broadcast(&vq->stats, mbuf);
        }
    }

- there is no need to update "stat[10]" (unicast), since it can be calculated
  from other counters, meaning we could just get the right value on query.
  
  This could save some cycles.

Feel free to phone me if you have any doubts.

	--yliu
Yang, Zhiyong Sept. 19, 2016, 2:48 a.m. | #7
Hi, Yuanhan:

Thanks a lot for your comments and suggestion in so detail. I will send v3 patch
as soon as possible.

--Zhiyong

> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Sunday, September 18, 2016 9:17 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: dev@dpdk.org; thomas.monjalon@6wind.com; pmatilai@redhat.com
> Subject: Re: [PATCH v2] net/vhost: add pmd xstats
> 
> On Wed, Sep 14, 2016 at 07:43:36AM +0000, Yang, Zhiyong wrote:
> > Hi, yuanhan:
> > Thanks so much for your detailed comments.
> >
> > > -----Original Message-----
> > > From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> > > Sent: Wednesday, September 14, 2016 2:20 PM
> > > To: Yang, Zhiyong <zhiyong.yang@intel.com>
> > > Cc: dev@dpdk.org; thomas.monjalon@6wind.com; pmatilai@redhat.com
> > > Subject: Re: [PATCH v2] net/vhost: add pmd xstats
> > >
> > > On Fri, Sep 09, 2016 at 04:15:27PM +0800, Zhiyong Yang wrote:
> > > > +struct vhost_xstats {
> > > > +	uint64_t stat[16];
> > > > +};
> > > > +
> > > >  struct vhost_queue {
> > > >  	int vid;
> > > >  	rte_atomic32_t allow_queuing;
> > > > @@ -85,7 +89,8 @@ struct vhost_queue {
> > > >  	uint64_t missed_pkts;
> > > >  	uint64_t rx_bytes;
> > > >  	uint64_t tx_bytes;
> > >
> > > I'd suggest to put those statistic counters to vhost_stats struct,
> > > which could simplify the xstats_reset code a bit.
> > >
> >
> > I consider this point when I define it, but those statistic counters
> > are used by pmd stats, not only by pmd xstats,  I'm not clear if  I
> > can modify those code.  If permitted, I will do it as you said.
> 
> For sure you can modify the code :) I just would suggest to do in an single
> patch, as stated before (and below).
> 
Ok. I will do that.

> > > > +	char name[RTE_ETH_VHOST_XSTATS_NAME_SIZE];
> > > > +	uint64_t offset;
> > > > +};
> > > > +
> > > > +/* [rt]_qX_ is prepended to the name string here */ static void
> > > > +vhost_dev_xstats_reset(struct rte_eth_dev *dev) {
> > > > +	struct vhost_queue *vqrx = NULL;
> > > > +	struct vhost_queue *vqtx = NULL;
> > > > +	unsigned int i = 0;
> > > > +
> > > > +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > > > +		if (!dev->data->rx_queues[i])
> > > > +			continue;
> > > > +		vqrx = (struct vhost_queue *)dev->data->rx_queues[i];
> > >
> > > Unnecessary cast.
> > >
> > The definition of rx_queues is  void **rx_queues;
> 
> Yes, but rx_queues[i] is with type "void *", so the cast is not necessary.
> 

I'm confused if void * ptr can access a member of struct directly. Or Need
I explicitly cast it when using it?  

> > > > +		}
> > > > +	}
> > > > +	/* non-multi/broadcast, multi/broadcast, including those
> > > > +	 * that were discarded or not sent.
> > >
> > > Hmmm, I don't follow it. You may want to reword it.
> > >
> > > > from rfc2863
> > >
> > > Which section and which page?
> > >
> > The packets received are not considered "discarded", because receiving
> > packets via Memory, not by physical NIC. Mainly for the number of
> > transmit the packets
> 
> It still took me some time to understand you.
> 
> > RFC2863 page 35  ifOutUcastPkts(non-multi/broadcast)
> 
> So, by your term, unicast == non-multicast/broadcast? If so, I'd suggest to
> use term "unicast" but not something else: it just introduces confusions.
> 
You are right . non-multicast/broadcast is equal to unicast.  I agree with you
And will modify it as you say.

> And in this case, I guess you were trying to say:
> 
>     /*
>      * According to RFC 2863 section ifHCOutMulticastPkts, ... and ...,
>      * the counter "unicast", "multicast" and "broadcast" are also
>      * increased when packets are not transmitted successfully.
>      */
> 

Yes.  

> Well, you might still need reword it.
> 
> After taking a bit closer look at the code, I'd suggest to do the countings like
> following:
> 
> - introduce a help function to increase the "broadcast" or "multicast"
>   counter, say "count_multicast_broadcast(struct vhost_stats *stats, mbuf)".
> 
> - introduce a generic function to update the generic counters: this
>   function just counts those packets have been successfully transmitted
>   or received.
> 
>   It also invoke above helper function for multicast counting.
> 
>   It basically looks like the function vhost_update_packet_xstats,
>   execpt that it doesn't handle those failure packets: it will be
>   handled in following part.
> 
> - since the case "couting multicast and broadcast with failure packts"
>   only happens in the Tx side, we could do those countings only in
>   eth_vhost_tx():
> 
>     nb_tx = rte_vhost_enqueue_burst(r->vid,
> 			r->virtqueue_id, bufs, nb_bufs);
> 
>     missed = nb_bufs - nb_tx;
>     /* put above comment here */
>     if (missed) {
> 	for_each_mbuf(mbuf) {
> 	    count_multicast_broadcast(&vq->stats, mbuf);
>         }
>     }
> 
> - there is no need to update "stat[10]" (unicast), since it can be calculated
>   from other counters, meaning we could just get the right value on query.
> 
>   This could save some cycles.
> 
Ok, Your comments are detailed and clear. I will consider to remove unicast 
and modify update functions.

> Feel free to phone me if you have any doubts.
> 
> 	--yliu
Yang, Zhiyong Sept. 20, 2016, 9:36 a.m. | #8
This patch set adds the vhost pmd xstats support.

Patch 1 moves all stats counters to a new defined struct vhost_stats,
in order to manage all stats counters in a unified way.

Patch 2 adds the pmd xstats support.

Changes in v3:
1. rework the vhost_update_packet_xstats and separate it into two parts.
   One function deals with the generic packets update, another one deals
   with increasing the broadcast and multicast with failure packets sent
   according to RFC2863 page42 ifHCOutMulticastPkts ifHCOutBroadcastPkts.
2. define enum vhost_stat_pkts to replace the magic numbers and enhance
   the code readability.
3. remove some unnecessary type casts and fix one format issue.

Changes in v2:
1. remove the compiling switch.
2. fix two code bugs.

---
Zhiyong Yang (2):
  net/vhost: add a new stats struct
  net/vhost: add pmd xstats

 drivers/net/vhost/rte_eth_vhost.c | 306 +++++++++++++++++++++++++++++++++++---
 1 file changed, 286 insertions(+), 20 deletions(-)

Patch

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 7539cd4..8f805f3 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -72,6 +72,10 @@  static struct ether_addr base_eth_addr = {
 	}
 };
 
+struct vhost_xstats {
+	uint64_t stat[16];
+};
+
 struct vhost_queue {
 	int vid;
 	rte_atomic32_t allow_queuing;
@@ -85,7 +89,8 @@  struct vhost_queue {
 	uint64_t missed_pkts;
 	uint64_t rx_bytes;
 	uint64_t tx_bytes;
-};
+	struct vhost_xstats xstats;
+	};
 
 struct pmd_internal {
 	char *dev_name;
@@ -127,6 +132,274 @@  struct rte_vhost_vring_state {
 
 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS];
 
+enum rte_vhostqueue_rxtx {
+	RTE_VHOSTQUEUE_RX = 0,
+	RTE_VHOSTQUEUE_TX = 1
+};
+
+#define RTE_ETH_VHOST_XSTATS_NAME_SIZE 64
+
+struct rte_vhost_xstats_name_off {
+	char name[RTE_ETH_VHOST_XSTATS_NAME_SIZE];
+	uint64_t offset;
+};
+
+/* [rt]_qX_ is prepended to the name string here */
+static const struct rte_vhost_xstats_name_off rte_vhost_rxq_stat_strings[] = {
+	{"good_packets",
+			offsetof(struct vhost_queue, rx_pkts)},
+	{"total_bytes",
+			offsetof(struct vhost_queue, rx_bytes)},
+	{"dropped_pkts",
+			offsetof(struct vhost_queue, missed_pkts)},
+	{"broadcast_packets",
+			offsetof(struct vhost_queue, xstats.stat[8])},
+	{"multicast_packets",
+			offsetof(struct vhost_queue, xstats.stat[9])},
+	{"ucast_packets",
+			offsetof(struct vhost_queue, xstats.stat[10])},
+	{"undersize_errors",
+			offsetof(struct vhost_queue, xstats.stat[0])},
+	{"size_64_packets",
+			offsetof(struct vhost_queue, xstats.stat[1])},
+	{"size_65_to_127_packets",
+			offsetof(struct vhost_queue, xstats.stat[2])},
+	{"size_128_to_255_packets",
+			offsetof(struct vhost_queue, xstats.stat[3])},
+	{"size_256_to_511_packets",
+			offsetof(struct vhost_queue, xstats.stat[4])},
+	{"size_512_to_1023_packets",
+			offsetof(struct vhost_queue, xstats.stat[5])},
+	{"size_1024_to_1522_packets",
+			offsetof(struct vhost_queue, xstats.stat[6])},
+	{"size_1523_to_max_packets",
+			offsetof(struct vhost_queue, xstats.stat[7])},
+	{"errors",
+			offsetof(struct vhost_queue, xstats.stat[11])},
+	{"fragmented_errors",
+			offsetof(struct vhost_queue, xstats.stat[12])},
+	{"jabber_errors",
+			offsetof(struct vhost_queue, xstats.stat[13])},
+	{"unknown_protos_packets",
+			offsetof(struct vhost_queue, xstats.stat[14])},
+};
+
+/* [tx]_qX_ is prepended to the name string here */
+static const struct rte_vhost_xstats_name_off rte_vhost_txq_stat_strings[] = {
+	{"good_packets",
+			offsetof(struct vhost_queue, tx_pkts)},
+	{"total_bytes",
+			offsetof(struct vhost_queue, tx_bytes)},
+	{"dropped_pkts",
+			offsetof(struct vhost_queue, missed_pkts)},
+	{"broadcast_packets",
+			offsetof(struct vhost_queue, xstats.stat[8])},
+	{"multicast_packets",
+			offsetof(struct vhost_queue, xstats.stat[9])},
+	{"ucast_packets",
+			offsetof(struct vhost_queue, xstats.stat[10])},
+	{"size_64_packets",
+			offsetof(struct vhost_queue, xstats.stat[1])},
+	{"size_65_to_127_packets",
+			offsetof(struct vhost_queue, xstats.stat[2])},
+	{"size_128_to_255_packets",
+			offsetof(struct vhost_queue, xstats.stat[3])},
+	{"size_256_to_511_packets",
+			offsetof(struct vhost_queue, xstats.stat[4])},
+	{"size_512_to_1023_packets",
+			offsetof(struct vhost_queue, xstats.stat[5])},
+	{"size_1024_to_1522_packets",
+			offsetof(struct vhost_queue, xstats.stat[6])},
+	{"size_1523_to_max_packets",
+			offsetof(struct vhost_queue, xstats.stat[7])},
+	{"errors",
+			offsetof(struct vhost_queue, xstats.stat[11])},
+};
+
+#define VHOST_NB_RXQ_XSTATS (sizeof(rte_vhost_rxq_stat_strings) / \
+			     sizeof(rte_vhost_rxq_stat_strings[0]))
+
+#define VHOST_NB_TXQ_XSTATS (sizeof(rte_vhost_txq_stat_strings) / \
+			     sizeof(rte_vhost_txq_stat_strings[0]))
+
+static void
+vhost_dev_xstats_reset(struct rte_eth_dev *dev)
+{
+	struct vhost_queue *vqrx = NULL;
+	struct vhost_queue *vqtx = NULL;
+	unsigned int i = 0;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		if (!dev->data->rx_queues[i])
+			continue;
+		vqrx = (struct vhost_queue *)dev->data->rx_queues[i];
+		vqrx->rx_pkts = 0;
+		vqrx->rx_bytes = 0;
+		vqrx->missed_pkts = 0;
+		memset(&vqrx->xstats, 0, sizeof(vqrx->xstats));
+	}
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		if (!dev->data->tx_queues[i])
+			continue;
+		vqtx = (struct vhost_queue *)dev->data->tx_queues[i];
+		vqtx->tx_pkts = 0;
+		vqtx->tx_bytes = 0;
+		vqtx->missed_pkts = 0;
+		memset(&vqtx->xstats, 0, sizeof(vqtx->xstats));
+	}
+}
+
+static int
+vhost_dev_xstats_get_names(struct rte_eth_dev *dev,
+			   struct rte_eth_xstat_name *xstats_names,
+			   __rte_unused unsigned int limit)
+{
+	unsigned int i = 0;
+	unsigned int t = 0;
+	int count = 0;
+	int nstats = dev->data->nb_rx_queues * VHOST_NB_RXQ_XSTATS
+			+ dev->data->nb_tx_queues * VHOST_NB_TXQ_XSTATS;
+
+	if (xstats_names) {
+		for (i = 0; i < dev->data->nb_rx_queues; i++) {
+			struct vhost_queue *rxvq = dev->data->rx_queues[i];
+
+			if (!rxvq)
+				continue;
+			for (t = 0; t < VHOST_NB_RXQ_XSTATS; t++) {
+				snprintf(xstats_names[count].name,
+					 sizeof(xstats_names[count].name),
+					 "rx_q%u_%s", i,
+					 rte_vhost_rxq_stat_strings[t].name);
+				count++;
+			}
+		}
+		for (i = 0; i < dev->data->nb_tx_queues; i++) {
+			struct vhost_queue *txvq = dev->data->tx_queues[i];
+
+			if (!txvq)
+				continue;
+			for (t = 0; t < VHOST_NB_TXQ_XSTATS; t++) {
+				snprintf(xstats_names[count].name,
+					 sizeof(xstats_names[count].name),
+					 "tx_q%u_%s", i,
+					 rte_vhost_txq_stat_strings[t].name);
+				count++;
+			}
+		}
+		return count;
+	}
+	return nstats;
+}
+
+static int
+vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
+		     unsigned int n)
+{
+	unsigned int i;
+	unsigned int t;
+	unsigned int count = 0;
+
+	unsigned int nxstats = dev->data->nb_rx_queues * VHOST_NB_RXQ_XSTATS
+				+ dev->data->nb_tx_queues * VHOST_NB_TXQ_XSTATS;
+
+	if (n < nxstats)
+		return nxstats;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct vhost_queue *rxvq =
+			(struct vhost_queue *)dev->data->rx_queues[i];
+
+		if (!rxvq)
+			continue;
+
+		for (t = 0; t < VHOST_NB_RXQ_XSTATS; t++) {
+			xstats[count].value = *(uint64_t *)(((char *)rxvq)
+				+ rte_vhost_rxq_stat_strings[t].offset);
+			count++;
+		}
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct vhost_queue *txvq =
+			(struct vhost_queue *)dev->data->tx_queues[i];
+
+		if (!txvq)
+			continue;
+
+		for (t = 0; t < VHOST_NB_TXQ_XSTATS; t++) {
+			xstats[count].value = *(uint64_t *)(((char *)txvq)
+				+ rte_vhost_txq_stat_strings[t].offset);
+			count++;
+		}
+	}
+
+	return count;
+}
+
+static void
+vhost_update_packet_xstats(struct vhost_queue *vq,
+			   struct rte_mbuf **bufs,
+			   uint16_t nb_rxtx,
+			   uint16_t nb_bufs,
+			   enum rte_vhostqueue_rxtx vqueue_rxtx)
+{
+	uint32_t pkt_len = 0;
+	uint64_t i = 0;
+	uint64_t index;
+	struct ether_addr *ea = NULL;
+	struct vhost_xstats *xstats_update = &vq->xstats;
+
+	for (i = 0; i < nb_rxtx ; i++) {
+		pkt_len = bufs[i]->pkt_len;
+		if (pkt_len == 64) {
+			xstats_update->stat[1]++;
+
+		} else if (pkt_len > 64 && pkt_len < 1024) {
+			index = (sizeof(pkt_len) * 8)
+				- __builtin_clz(pkt_len) - 5;
+			xstats_update->stat[index]++;
+		} else {
+			if (pkt_len < 64)
+				xstats_update->stat[0]++;
+			else if (pkt_len <= 1522)
+				xstats_update->stat[6]++;
+			else if (pkt_len > 1522)
+				xstats_update->stat[7]++;
+		}
+
+		ea = rte_pktmbuf_mtod(bufs[i], struct ether_addr *);
+		if (is_multicast_ether_addr(ea)) {
+			if (is_broadcast_ether_addr(ea))
+				/* broadcast++; */
+				xstats_update->stat[8]++;
+			else
+				/* multicast++; */
+				xstats_update->stat[9]++;
+		}
+	}
+	/* non-multi/broadcast, multi/broadcast, including those
+	 * that were discarded or not sent. from rfc2863
+	 */
+	if (vqueue_rxtx == RTE_VHOSTQUEUE_RX) {
+		xstats_update->stat[10] =  vq->rx_pkts + vq->missed_pkts
+					   - (xstats_update->stat[8]
+					   + xstats_update->stat[9]);
+	} else {
+		for (i = nb_rxtx; i < nb_bufs ; i++) {
+			ea = rte_pktmbuf_mtod(bufs[i], struct ether_addr *);
+			if (is_multicast_ether_addr(ea)) {
+				if (is_broadcast_ether_addr(ea))
+					xstats_update->stat[8]++;
+				else
+					xstats_update->stat[9]++;
+			}
+		}
+		xstats_update->stat[10] =  vq->tx_pkts + vq->missed_pkts
+			- (xstats_update->stat[8] + xstats_update->stat[9]);
+	}
+}
+
 static uint16_t
 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
@@ -152,6 +425,8 @@  eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 		r->rx_bytes += bufs[i]->pkt_len;
 	}
 
+	vhost_update_packet_xstats(r, bufs, nb_rx, nb_rx, RTE_VHOSTQUEUE_RX);
+
 out:
 	rte_atomic32_set(&r->while_queuing, 0);
 
@@ -182,6 +457,8 @@  eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	for (i = 0; likely(i < nb_tx); i++)
 		r->tx_bytes += bufs[i]->pkt_len;
 
+	vhost_update_packet_xstats(r, bufs, nb_tx, nb_bufs, RTE_VHOSTQUEUE_TX);
+
 	for (i = 0; likely(i < nb_tx); i++)
 		rte_pktmbuf_free(bufs[i]);
 out:
@@ -682,6 +959,9 @@  static const struct eth_dev_ops ops = {
 	.link_update = eth_link_update,
 	.stats_get = eth_stats_get,
 	.stats_reset = eth_stats_reset,
+	.xstats_reset = vhost_dev_xstats_reset,
+	.xstats_get = vhost_dev_xstats_get,
+	.xstats_get_names = vhost_dev_xstats_get_names,
 };
 
 static int