[1/4] vhost: inroduce operation to get vDPA queue stats

Message ID 1585826793-28709-2-git-send-email-matan@mellanox.com (mailing list archive)
State Changes Requested, archived
Delegated to: Maxime Coquelin
Headers
Series vhost: support vDPA virtio queue statistics |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Matan Azrad April 2, 2020, 11:26 a.m. UTC
  The vDPA device offloads all the datapath of the vhost device to the HW
device.

In order to expose to the user traffic information this patch introduce
new API to get traffic statistics per virtio queue.

The statistics are taken directly from the vDPA driver managing the HW
device.

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 doc/guides/rel_notes/release_20_05.rst    |  4 +++
 doc/guides/vdpadevs/features/default.ini  |  1 +
 doc/guides/vdpadevs/features_overview.rst |  3 +++
 lib/librte_vhost/rte_vdpa.h               | 45 ++++++++++++++++++++++++++++++-
 lib/librte_vhost/rte_vhost_version.map    |  1 +
 lib/librte_vhost/vdpa.c                   | 14 ++++++++++
 6 files changed, 67 insertions(+), 1 deletion(-)
  

Comments

Maxime Coquelin April 15, 2020, 2:36 p.m. UTC | #1
Hi Matan,

On 4/2/20 1:26 PM, Matan Azrad wrote:
> The vDPA device offloads all the datapath of the vhost device to the HW
> device.
> 
> In order to expose to the user traffic information this patch introduce
> new API to get traffic statistics per virtio queue.
> 
> The statistics are taken directly from the vDPA driver managing the HW
> device.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
>  doc/guides/rel_notes/release_20_05.rst    |  4 +++
>  doc/guides/vdpadevs/features/default.ini  |  1 +
>  doc/guides/vdpadevs/features_overview.rst |  3 +++
>  lib/librte_vhost/rte_vdpa.h               | 45 ++++++++++++++++++++++++++++++-
>  lib/librte_vhost/rte_vhost_version.map    |  1 +
>  lib/librte_vhost/vdpa.c                   | 14 ++++++++++
>  6 files changed, 67 insertions(+), 1 deletion(-)

...

> diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
> index 9a3deb3..d6cbf48 100644
> --- a/lib/librte_vhost/rte_vdpa.h
> +++ b/lib/librte_vhost/rte_vdpa.h
> @@ -37,6 +37,27 @@ struct rte_vdpa_dev_addr {
>  	};
>  };
>  
> +struct rte_vdpa_queue_stats {
> +	/** Number of descriptors received by device */
> +	uint64_t received_desc;
> +	/** Number of descriptors completed by the device */
> +	uint64_t completed_desc;
> +	/** Number of bad descriptors received by device */
> +	uint32_t bad_desc;
> +	/**
> +	 * Number of chained descriptors received that exceed the max allowed
> +	 * chain by device
> +	 */
> +	uint32_t exceed_max_chain;
> +	/**
> +	 * Number of times device tried to read or write buffer that is not
> +	 * registered to the device
> +	 */
> +	uint32_t invalid_buffer;
> +	/** Number of errors detected by the device */
> +	uint32_t errors;
> +};
> +

I think doing it like that, we risk to keep the rte_vdpa_get_stats API
always experimental, as every vendor will want to add their own counters
and so break the ABI.

How about implementing something similar to rte_eth_xstat?
As these stats are for debugging purpose, it would give you much more
flexibility in adding new counters as HW or firmwares evolves.

What do you think?

Thanks,
Maxime

>  /**
>   * vdpa device operations
>   */
> @@ -73,8 +94,11 @@ struct rte_vdpa_dev_ops {
>  	int (*get_notify_area)(int vid, int qid,
>  			uint64_t *offset, uint64_t *size);
>  
> +	/** Get statistics of the queue */
> +	int (*get_stats)(int did, int qid, struct rte_vdpa_queue_stats *stats);
> +
>  	/** Reserved for future extension */
> -	void *reserved[5];
> +	void *reserved[4];
>  };
>  
>  /**
> @@ -200,4 +224,23 @@ struct rte_vdpa_device *
>  __rte_experimental
>  int
>  rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Get vDPA device queue statistics.
> + *
> + * @param did
> + *  device id
> + * @param qid
> + *  queue id
> + * @param stats
> + *  queue statistics pointer.
> + * @return
> + *  0 on success, non-zero on failure.
> + */
> +__rte_experimental
> +int
> +rte_vdpa_get_stats(int did, uint16_t qid, struct rte_vdpa_queue_stats *stats);
>  #endif /* _RTE_VDPA_H_ */
> diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
> index 051d08c..c9dcff4 100644
> --- a/lib/librte_vhost/rte_vhost_version.map
> +++ b/lib/librte_vhost/rte_vhost_version.map
> @@ -38,6 +38,7 @@ EXPERIMENTAL {
>  	rte_vdpa_find_device_id;
>  	rte_vdpa_get_device;
>  	rte_vdpa_get_device_num;
> +	rte_vdpa_get_stats;
>  	rte_vhost_driver_attach_vdpa_device;
>  	rte_vhost_driver_detach_vdpa_device;
>  	rte_vhost_driver_get_vdpa_device_id;
> diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
> index 2b86708..57900fc 100644
> --- a/lib/librte_vhost/vdpa.c
> +++ b/lib/librte_vhost/vdpa.c
> @@ -227,3 +227,17 @@ struct rte_vdpa_device *
>  		free_ind_table(idesc);
>  	return -1;
>  }
> +
> +int
> +rte_vdpa_get_stats(int did, uint16_t qid, struct rte_vdpa_queue_stats *stats)
> +{
> +	struct rte_vdpa_device *vdpa_dev;
> +
> +	vdpa_dev = rte_vdpa_get_device(did);
> +	if (!vdpa_dev)
> +		return -ENODEV;
> +
> +	RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_stats, -ENOTSUP);
> +
> +	return vdpa_dev->ops->get_stats(did, qid, stats);
> +}
>
  
Matan Azrad April 16, 2020, 9:06 a.m. UTC | #2
Hi Maxime

Can you point on specific vendor specific counter I suggested?
I think all of them come directly from virtio protocols.


äùâ àú Outlook òáåø Android<https://aka.ms/ghei36>
  
Maxime Coquelin April 16, 2020, 1:19 p.m. UTC | #3
Hi Matan,

On 4/16/20 11:06 AM, Matan Azrad wrote:
> Hi Maxime
> 
> Can you point on specific vendor specific counter I suggested?

No, I can't, but I think we can expect that other vendors may have other
counters they would be interested to dump.

Maybe Intel has some counters in the IFC that they could dump.
Xiao, any thoughts?

> I think all of them come directly from virtio protocols.

exceed_max_chain, for example. Doesn't the spec specify that a
descriptors chain can be as long as the size of the virtqueue?

Here it seems to indicate the device could support less.

Also, as the spec evolves, we may have new counters that comes up,
so better to have something flexible from the start IMHO to avoid ABI
breakages.

Maybe we can have some common xstats names for the Virtio related
counters define in vdpa lib, and then the vendors can specify more
vendor-specific counters if they wish?

Thanks,
Maxime
> 
> השג את Outlook עבור Android <https://aka.ms/ghei36>
> ------------------------------------------------------------------------
> *From:* Maxime Coquelin <maxime.coquelin@redhat.com>
> *Sent:* Wednesday, April 15, 2020 5:36:59 PM
> *To:* Matan Azrad <matan@mellanox.com>; dev@dpdk.org <dev@dpdk.org>
> *Cc:* Slava Ovsiienko <viacheslavo@mellanox.com>; Shahaf Shuler
> <shahafs@mellanox.com>
> *Subject:* Re: [PATCH 1/4] vhost: inroduce operation to get vDPA queue
> stats
>  
> Hi Matan,
> 
> On 4/2/20 1:26 PM, Matan Azrad wrote:
>> The vDPA device offloads all the datapath of the vhost device to the HW
>> device.
>> 
>> In order to expose to the user traffic information this patch introduce
>> new API to get traffic statistics per virtio queue.
>> 
>> The statistics are taken directly from the vDPA driver managing the HW
>> device.
>> 
>> Signed-off-by: Matan Azrad <matan@mellanox.com>
>> Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
>> ---
>>  doc/guides/rel_notes/release_20_05.rst    |  4 +++
>>  doc/guides/vdpadevs/features/default.ini  |  1 +
>>  doc/guides/vdpadevs/features_overview.rst |  3 +++
>>  lib/librte_vhost/rte_vdpa.h               | 45 ++++++++++++++++++++++++++++++-
>>  lib/librte_vhost/rte_vhost_version.map    |  1 +
>>  lib/librte_vhost/vdpa.c                   | 14 ++++++++++
>>  6 files changed, 67 insertions(+), 1 deletion(-)
> 
> ...
> 
>> diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
>> index 9a3deb3..d6cbf48 100644
>> --- a/lib/librte_vhost/rte_vdpa.h
>> +++ b/lib/librte_vhost/rte_vdpa.h
>> @@ -37,6 +37,27 @@ struct rte_vdpa_dev_addr {
>>        };
>>  };
>>  
>> +struct rte_vdpa_queue_stats {
>> +     /** Number of descriptors received by device */
>> +     uint64_t received_desc;
>> +     /** Number of descriptors completed by the device */
>> +     uint64_t completed_desc;
>> +     /** Number of bad descriptors received by device */
>> +     uint32_t bad_desc;
>> +     /**
>> +      * Number of chained descriptors received that exceed the max allowed
>> +      * chain by device
>> +      */
>> +     uint32_t exceed_max_chain;
>> +     /**
>> +      * Number of times device tried to read or write buffer that is not
>> +      * registered to the device
>> +      */
>> +     uint32_t invalid_buffer;
>> +     /** Number of errors detected by the device */
>> +     uint32_t errors;
>> +};
>> +
> 
> I think doing it like that, we risk to keep the rte_vdpa_get_stats API
> always experimental, as every vendor will want to add their own counters
> and so break the ABI.
> 
> How about implementing something similar to rte_eth_xstat?
> As these stats are for debugging purpose, it would give you much more
> flexibility in adding new counters as HW or firmwares evolves.
> 
> What do you think?
> 
> Thanks,
> Maxime
> 
>>  /**
>>   * vdpa device operations
>>   */
>> @@ -73,8 +94,11 @@ struct rte_vdpa_dev_ops {
>>        int (*get_notify_area)(int vid, int qid,
>>                        uint64_t *offset, uint64_t *size);
>>  
>> +     /** Get statistics of the queue */
>> +     int (*get_stats)(int did, int qid, struct rte_vdpa_queue_stats *stats);
>> +
>>        /** Reserved for future extension */
>> -     void *reserved[5];
>> +     void *reserved[4];
>>  };
>>  
>>  /**
>> @@ -200,4 +224,23 @@ struct rte_vdpa_device *
>>  __rte_experimental
>>  int
>>  rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m);
>> +
>> +/**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change without prior notice
>> + *
>> + * Get vDPA device queue statistics.
>> + *
>> + * @param did
>> + *  device id
>> + * @param qid
>> + *  queue id
>> + * @param stats
>> + *  queue statistics pointer.
>> + * @return
>> + *  0 on success, non-zero on failure.
>> + */
>> +__rte_experimental
>> +int
>> +rte_vdpa_get_stats(int did, uint16_t qid, struct rte_vdpa_queue_stats *stats);
>>  #endif /* _RTE_VDPA_H_ */
>> diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
>> index 051d08c..c9dcff4 100644
>> --- a/lib/librte_vhost/rte_vhost_version.map
>> +++ b/lib/librte_vhost/rte_vhost_version.map
>> @@ -38,6 +38,7 @@ EXPERIMENTAL {
>>        rte_vdpa_find_device_id;
>>        rte_vdpa_get_device;
>>        rte_vdpa_get_device_num;
>> +     rte_vdpa_get_stats;
>>        rte_vhost_driver_attach_vdpa_device;
>>        rte_vhost_driver_detach_vdpa_device;
>>        rte_vhost_driver_get_vdpa_device_id;
>> diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
>> index 2b86708..57900fc 100644
>> --- a/lib/librte_vhost/vdpa.c
>> +++ b/lib/librte_vhost/vdpa.c
>> @@ -227,3 +227,17 @@ struct rte_vdpa_device *
>>                free_ind_table(idesc);
>>        return -1;
>>  }
>> +
>> +int
>> +rte_vdpa_get_stats(int did, uint16_t qid, struct rte_vdpa_queue_stats *stats)
>> +{
>> +     struct rte_vdpa_device *vdpa_dev;
>> +
>> +     vdpa_dev = rte_vdpa_get_device(did);
>> +     if (!vdpa_dev)
>> +             return -ENODEV;
>> +
>> +     RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_stats, -ENOTSUP);
>> +
>> +     return vdpa_dev->ops->get_stats(did, qid, stats);
>> +}
>> 
> 
> 
> השג את Outlook עבור Android <https://aka.ms/ghei36>
  
Shahaf Shuler April 19, 2020, 6:18 a.m. UTC | #4
Thursday, April 16, 2020 4:20 PM, Maxime Coquelin:
> Subject: Re: [PATCH 1/4] vhost: inroduce operation to get vDPA queue stats
> 
> Hi Matan,
> 
> On 4/16/20 11:06 AM, Matan Azrad wrote:
> > Hi Maxime
> >
> > Can you point on specific vendor specific counter I suggested?
> 
> No, I can't, but I think we can expect that other vendors may have other
> counters they would be interested to dump.
> 
> Maybe Intel has some counters in the IFC that they could dump.
> Xiao, any thoughts?
> 
> > I think all of them come directly from virtio protocols.
> 
> exceed_max_chain, for example. Doesn't the spec specify that a descriptors
> chain can be as long as the size of the virtqueue?
> 
> Here it seems to indicate the device could support less.

Spec allows device to limit the max supported chain (see [1]). 

> 
> Also, as the spec evolves, we may have new counters that comes up, so
> better to have something flexible from the start IMHO to avoid ABI
> breakages.

I think there are better ways to address that, e.g.:
1. have some reserved fields for future
2. have the option to point to next item, and by that link chain of stat structures

> 
> Maybe we can have some common xstats names for the Virtio related
> counters define in vdpa lib, and then the vendors can specify more vendor-
> specific counters if they wish?

xstats are good, and we should have it to expose the vendor specific counters. The basic counters though, should be simple and vendor agnostic so that any SW/scripting layer on top of the DPDK can easily use and expose it. 
Hence I think it will be good to have the basic counters with well-defined stats structure as part of the vdpa stats API. Is the exceed_max_chain is the only counter you find odd or there are more? 

> 
> Thanks,
> Maxime

[1]
https://github.com/oasis-tcs/virtio-spec/blob/master/packed-ring.tex#L498
  
Maxime Coquelin April 20, 2020, 7:13 a.m. UTC | #5
Hi Shahaf,

On 4/19/20 8:18 AM, Shahaf Shuler wrote:
> Thursday, April 16, 2020 4:20 PM, Maxime Coquelin:
>> Subject: Re: [PATCH 1/4] vhost: inroduce operation to get vDPA queue stats
>>
>> Hi Matan,
>>
>> On 4/16/20 11:06 AM, Matan Azrad wrote:
>>> Hi Maxime
>>>
>>> Can you point on specific vendor specific counter I suggested?
>>
>> No, I can't, but I think we can expect that other vendors may have other
>> counters they would be interested to dump.
>>
>> Maybe Intel has some counters in the IFC that they could dump.
>> Xiao, any thoughts?
>>
>>> I think all of them come directly from virtio protocols.
>>
>> exceed_max_chain, for example. Doesn't the spec specify that a descriptors
>> chain can be as long as the size of the virtqueue?
>>
>> Here it seems to indicate the device could support less.
> 
> Spec allows device to limit the max supported chain (see [1]). 

Ha ok, I missed that. Please note that this is only allowed for packed
ring, it is not in the split ring part.

>>
>> Also, as the spec evolves, we may have new counters that comes up, so
>> better to have something flexible from the start IMHO to avoid ABI
>> breakages.
> 
> I think there are better ways to address that, e.g.:
> 1. have some reserved fields for future
> 2. have the option to point to next item, and by that link chain of stat structures
> 
>>
>> Maybe we can have some common xstats names for the Virtio related
>> counters define in vdpa lib, and then the vendors can specify more vendor-
>> specific counters if they wish?
> 
> xstats are good, and we should have it to expose the vendor specific counters. The basic counters though, should be simple and vendor agnostic so that any SW/scripting layer on top of the DPDK can easily use and expose it. 
> Hence I think it will be good to have the basic counters with well-defined stats structure as part of the vdpa stats API. Is the exceed_max_chain is the only counter you find odd or there are more? 

Problem is that not all the vDPA NIC will implement these counters, so
with only proposed implementation, the user cannot know whether counter
value is 0 or counter is just not implemented. For example, the Virtio
specification does not specify counters, so a full Virtio HW offload
device won't have them.

So I think the xstat is the right thing to do, with standardized names
for the standard  counters.

Regards,
Maxime
>>
>> Thanks,
>> Maxime
> 
> [1]
> https://github.com/oasis-tcs/virtio-spec/blob/master/packed-ring.tex#L498
>
  
Shahaf Shuler April 20, 2020, 3:57 p.m. UTC | #6
Monday, April 20, 2020 10:13 AM, Maxime Coquelin:
> Subject: Re: [PATCH 1/4] vhost: inroduce operation to get vDPA queue stats
> 
> Hi Shahaf,
> 
> On 4/19/20 8:18 AM, Shahaf Shuler wrote:
> > Thursday, April 16, 2020 4:20 PM, Maxime Coquelin:
> >> Subject: Re: [PATCH 1/4] vhost: inroduce operation to get vDPA queue
> >> stats
> >>
> >> Hi Matan,
> >>
> >> On 4/16/20 11:06 AM, Matan Azrad wrote:
> >>> Hi Maxime
> >>>
> >>> Can you point on specific vendor specific counter I suggested?
> >>
> >> No, I can't, but I think we can expect that other vendors may have
> >> other counters they would be interested to dump.
> >>
> >> Maybe Intel has some counters in the IFC that they could dump.
> >> Xiao, any thoughts?
> >>
> >>> I think all of them come directly from virtio protocols.
> >>
> >> exceed_max_chain, for example. Doesn't the spec specify that a
> >> descriptors chain can be as long as the size of the virtqueue?
> >>
> >> Here it seems to indicate the device could support less.
> >
> > Spec allows device to limit the max supported chain (see [1]).
> 
> Ha ok, I missed that. Please note that this is only allowed for packed ring, it is
> not in the split ring part.

On my version of spec (csprd01) it is also for split, however it was removed on the latest version not sure why. 

> 
> >>
> >> Also, as the spec evolves, we may have new counters that comes up, so
> >> better to have something flexible from the start IMHO to avoid ABI
> >> breakages.
> >
> > I think there are better ways to address that, e.g.:
> > 1. have some reserved fields for future 2. have the option to point to
> > next item, and by that link chain of stat structures
> >
> >>
> >> Maybe we can have some common xstats names for the Virtio related
> >> counters define in vdpa lib, and then the vendors can specify more
> >> vendor- specific counters if they wish?
> >
> > xstats are good, and we should have it to expose the vendor specific
> counters. The basic counters though, should be simple and vendor agnostic
> so that any SW/scripting layer on top of the DPDK can easily use and expose
> it.
> > Hence I think it will be good to have the basic counters with well-defined
> stats structure as part of the vdpa stats API. Is the exceed_max_chain is the
> only counter you find odd or there are more?
> 
> Problem is that not all the vDPA NIC will implement these counters, so with
> only proposed implementation, the user cannot know whether counter
> value is 0 or counter is just not implemented. For example, the Virtio
> specification does not specify counters, so a full Virtio HW offload device
> won't have them.

Yeah, full virtio emulated device is a good example. 
I think it is odd virtio doesn’t provide any statistics, e.g. how would the Netdev on top of the virtio device report anything? How will ppl debug? 
I think sooner or later we will need a way to expose stats. 

> 
> So I think the xstat is the right thing to do, with standardized names for the
> standard  counters.

Yeah tend to agree on this one due to the lack of spec statistics. 

> 
> Regards,
> Maxime
> >>
> >> Thanks,
> >> Maxime
> >
> > [1]
> > https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgith
> > ub.com%2Foasis-tcs%2Fvirtio-spec%2Fblob%2Fmaster%2Fpacked-
> ring.tex%23L
> >
> 498&amp;data=02%7C01%7Cshahafs%40mellanox.com%7C2fbf00c6e115488f
> 483e08
> >
> d7e4fa4940%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6372296
> 3594175
> >
> 9512&amp;sdata=m2rPPMM%2Fen9Vkbp%2Fg5xz0MSTWYURh7woI7w5%2B
> b2Zjy8%3D&am
> > p;reserved=0
> >
  
Maxime Coquelin April 20, 2020, 4:18 p.m. UTC | #7
On 4/20/20 5:57 PM, Shahaf Shuler wrote:
> Monday, April 20, 2020 10:13 AM, Maxime Coquelin:
>> Subject: Re: [PATCH 1/4] vhost: inroduce operation to get vDPA queue stats
>>
>> Hi Shahaf,
>>
>> On 4/19/20 8:18 AM, Shahaf Shuler wrote:
>>> Thursday, April 16, 2020 4:20 PM, Maxime Coquelin:
>>>> Subject: Re: [PATCH 1/4] vhost: inroduce operation to get vDPA queue
>>>> stats
>>>>
>>>> Hi Matan,
>>>>
>>>> On 4/16/20 11:06 AM, Matan Azrad wrote:
>>>>> Hi Maxime
>>>>>
>>>>> Can you point on specific vendor specific counter I suggested?
>>>>
>>>> No, I can't, but I think we can expect that other vendors may have
>>>> other counters they would be interested to dump.
>>>>
>>>> Maybe Intel has some counters in the IFC that they could dump.
>>>> Xiao, any thoughts?
>>>>
>>>>> I think all of them come directly from virtio protocols.
>>>>
>>>> exceed_max_chain, for example. Doesn't the spec specify that a
>>>> descriptors chain can be as long as the size of the virtqueue?
>>>>
>>>> Here it seems to indicate the device could support less.
>>>
>>> Spec allows device to limit the max supported chain (see [1]).
>>
>> Ha ok, I missed that. Please note that this is only allowed for packed ring, it is
>> not in the split ring part.
> 
> On my version of spec (csprd01) it is also for split, however it was removed on the latest version not sure why. 

Problem is that older drivers may assume max chain size is the virtio
ring size.

By the way, how is the guest Virtio driver made aware of the max
chain size? Isn't that missing in the spec?

>>
>>>>
>>>> Also, as the spec evolves, we may have new counters that comes up, so
>>>> better to have something flexible from the start IMHO to avoid ABI
>>>> breakages.
>>>
>>> I think there are better ways to address that, e.g.:
>>> 1. have some reserved fields for future 2. have the option to point to
>>> next item, and by that link chain of stat structures
>>>
>>>>
>>>> Maybe we can have some common xstats names for the Virtio related
>>>> counters define in vdpa lib, and then the vendors can specify more
>>>> vendor- specific counters if they wish?
>>>
>>> xstats are good, and we should have it to expose the vendor specific
>> counters. The basic counters though, should be simple and vendor agnostic
>> so that any SW/scripting layer on top of the DPDK can easily use and expose
>> it.
>>> Hence I think it will be good to have the basic counters with well-defined
>> stats structure as part of the vdpa stats API. Is the exceed_max_chain is the
>> only counter you find odd or there are more?
>>
>> Problem is that not all the vDPA NIC will implement these counters, so with
>> only proposed implementation, the user cannot know whether counter
>> value is 0 or counter is just not implemented. For example, the Virtio
>> specification does not specify counters, so a full Virtio HW offload device
>> won't have them.
> 
> Yeah, full virtio emulated device is a good example. 
> I think it is odd virtio doesn’t provide any statistics, e.g. how would the Netdev on top of the virtio device report anything? How will ppl debug? 
> I think sooner or later we will need a way to expose stats. 

I agree.
These missing counters were not blocking when using a SW backend, but
with HW Offload I agree such counters would be welcome.

>>
>> So I think the xstat is the right thing to do, with standardized names for the
>> standard  counters.
> 
> Yeah tend to agree on this one due to the lack of spec statistics. 
> 
>>
>> Regards,
>> Maxime
>>>>
>>>> Thanks,
>>>> Maxime
>>>
>>> [1]
>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgith
>>> ub.com%2Foasis-tcs%2Fvirtio-spec%2Fblob%2Fmaster%2Fpacked-
>> ring.tex%23L
>>>
>> 498&amp;data=02%7C01%7Cshahafs%40mellanox.com%7C2fbf00c6e115488f
>> 483e08
>>>
>> d7e4fa4940%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6372296
>> 3594175
>>>
>> 9512&amp;sdata=m2rPPMM%2Fen9Vkbp%2Fg5xz0MSTWYURh7woI7w5%2B
>> b2Zjy8%3D&am
>>> p;reserved=0
>>>
>
  
Shahaf Shuler April 21, 2020, 5:02 a.m. UTC | #8
Monday, April 20, 2020 7:19 PM, Maxime Coquelin:
> Cc: Slava Ovsiienko <viacheslavo@mellanox.com>
> Subject: Re: [PATCH 1/4] vhost: inroduce operation to get vDPA queue stats
> 
> 
> 
> On 4/20/20 5:57 PM, Shahaf Shuler wrote:
> > Monday, April 20, 2020 10:13 AM, Maxime Coquelin:
> >> Subject: Re: [PATCH 1/4] vhost: inroduce operation to get vDPA queue
> >> stats
> >>
> >> Hi Shahaf,
> >>
> >> On 4/19/20 8:18 AM, Shahaf Shuler wrote:
> >>> Thursday, April 16, 2020 4:20 PM, Maxime Coquelin:
> >>>> Subject: Re: [PATCH 1/4] vhost: inroduce operation to get vDPA
> >>>> queue stats
> >>>>
> >>>> Hi Matan,
> >>>>
> >>>> On 4/16/20 11:06 AM, Matan Azrad wrote:
> >>>>> Hi Maxime
> >>>>>
> >>>>> Can you point on specific vendor specific counter I suggested?
> >>>>
> >>>> No, I can't, but I think we can expect that other vendors may have
> >>>> other counters they would be interested to dump.
> >>>>
> >>>> Maybe Intel has some counters in the IFC that they could dump.
> >>>> Xiao, any thoughts?
> >>>>
> >>>>> I think all of them come directly from virtio protocols.
> >>>>
> >>>> exceed_max_chain, for example. Doesn't the spec specify that a
> >>>> descriptors chain can be as long as the size of the virtqueue?
> >>>>
> >>>> Here it seems to indicate the device could support less.
> >>>
> >>> Spec allows device to limit the max supported chain (see [1]).
> >>
> >> Ha ok, I missed that. Please note that this is only allowed for
> >> packed ring, it is not in the split ring part.
> >
> > On my version of spec (csprd01) it is also for split, however it was removed
> on the latest version not sure why.

Actually also for split queue there is this vouge paragraph -
https://github.com/oasis-tcs/virtio-spec/blob/master/split-ring.tex#L138

> 
> Problem is that older drivers may assume max chain size is the virtio ring size.
> 
> By the way, how is the guest Virtio driver made aware of the max chain size?
> Isn't that missing in the spec?

Spec gap. Doesn't say how device report, doesn’t device how device should fail and report error. 

>
  

Patch

diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
index c960fd2..812d19b 100644
--- a/doc/guides/rel_notes/release_20_05.rst
+++ b/doc/guides/rel_notes/release_20_05.rst
@@ -56,6 +56,10 @@  New Features
      Also, make sure to start the actual text at the margin.
      =========================================================
 
+* **Added vDPA device API to query virtio queue statistics.**
+
+  A new API has been added to query virtio queue statistics from a vDPA device.
+
 * **Updated Mellanox mlx5 driver.**
 
   Updated Mellanox mlx5 driver with new features and improvements, including:
diff --git a/doc/guides/vdpadevs/features/default.ini b/doc/guides/vdpadevs/features/default.ini
index 518e4f1..2c122a3 100644
--- a/doc/guides/vdpadevs/features/default.ini
+++ b/doc/guides/vdpadevs/features/default.ini
@@ -37,6 +37,7 @@  proto rarp           =
 proto reply ack      =
 proto host notifier  =
 proto pagefault      =
+queue statistics     =
 BSD nic_uio          =
 Linux VFIO           =
 Other kdrv           =
diff --git a/doc/guides/vdpadevs/features_overview.rst b/doc/guides/vdpadevs/features_overview.rst
index eb7eb3b..930bc87 100644
--- a/doc/guides/vdpadevs/features_overview.rst
+++ b/doc/guides/vdpadevs/features_overview.rst
@@ -96,6 +96,9 @@  proto host notifier
 proto pagefault
   Slave expose page-fault FD for migration process.
 
+queue statistics
+  Support virtio queue statistics query.
+
 BSD nic_uio
   BSD ``nic_uio`` module supported.
 
diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
index 9a3deb3..d6cbf48 100644
--- a/lib/librte_vhost/rte_vdpa.h
+++ b/lib/librte_vhost/rte_vdpa.h
@@ -37,6 +37,27 @@  struct rte_vdpa_dev_addr {
 	};
 };
 
+struct rte_vdpa_queue_stats {
+	/** Number of descriptors received by device */
+	uint64_t received_desc;
+	/** Number of descriptors completed by the device */
+	uint64_t completed_desc;
+	/** Number of bad descriptors received by device */
+	uint32_t bad_desc;
+	/**
+	 * Number of chained descriptors received that exceed the max allowed
+	 * chain by device
+	 */
+	uint32_t exceed_max_chain;
+	/**
+	 * Number of times device tried to read or write buffer that is not
+	 * registered to the device
+	 */
+	uint32_t invalid_buffer;
+	/** Number of errors detected by the device */
+	uint32_t errors;
+};
+
 /**
  * vdpa device operations
  */
@@ -73,8 +94,11 @@  struct rte_vdpa_dev_ops {
 	int (*get_notify_area)(int vid, int qid,
 			uint64_t *offset, uint64_t *size);
 
+	/** Get statistics of the queue */
+	int (*get_stats)(int did, int qid, struct rte_vdpa_queue_stats *stats);
+
 	/** Reserved for future extension */
-	void *reserved[5];
+	void *reserved[4];
 };
 
 /**
@@ -200,4 +224,23 @@  struct rte_vdpa_device *
 __rte_experimental
 int
 rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get vDPA device queue statistics.
+ *
+ * @param did
+ *  device id
+ * @param qid
+ *  queue id
+ * @param stats
+ *  queue statistics pointer.
+ * @return
+ *  0 on success, non-zero on failure.
+ */
+__rte_experimental
+int
+rte_vdpa_get_stats(int did, uint16_t qid, struct rte_vdpa_queue_stats *stats);
 #endif /* _RTE_VDPA_H_ */
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index 051d08c..c9dcff4 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -38,6 +38,7 @@  EXPERIMENTAL {
 	rte_vdpa_find_device_id;
 	rte_vdpa_get_device;
 	rte_vdpa_get_device_num;
+	rte_vdpa_get_stats;
 	rte_vhost_driver_attach_vdpa_device;
 	rte_vhost_driver_detach_vdpa_device;
 	rte_vhost_driver_get_vdpa_device_id;
diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
index 2b86708..57900fc 100644
--- a/lib/librte_vhost/vdpa.c
+++ b/lib/librte_vhost/vdpa.c
@@ -227,3 +227,17 @@  struct rte_vdpa_device *
 		free_ind_table(idesc);
 	return -1;
 }
+
+int
+rte_vdpa_get_stats(int did, uint16_t qid, struct rte_vdpa_queue_stats *stats)
+{
+	struct rte_vdpa_device *vdpa_dev;
+
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev)
+		return -ENODEV;
+
+	RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_stats, -ENOTSUP);
+
+	return vdpa_dev->ops->get_stats(did, qid, stats);
+}