diff mbox series

[RFC,v4,1/4] ethdev: add meter PPS profile

Message ID 20210301103532.184983-2-lizh@nvidia.com (mailing list archive)
State Superseded
Delegated to: Ferruh Yigit
Headers show
Series adds support PPS(packet per second) on meter | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Li Zhang March 1, 2021, 10:35 a.m. UTC
Currently meter algorithms only supports rate is bytes per second(BPS).
Add this new meter srTCMp algorithm to support rate is packet per second.
So that it can meter traffic by packet per second.
The below structure will be extended:
rte_mtr_algorithm
rte_mtr_meter_profile
Signed-off-by: Li Zhang <lizh@nvidia.com>
---
 .../traffic_metering_and_policing.rst         |  3 +-
 doc/guides/rel_notes/release_20_11.rst        |  5 +++
 lib/librte_ethdev/rte_mtr.h                   | 32 +++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

Comments

Cristian Dumitrescu March 1, 2021, 1:20 p.m. UTC | #1
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
> Sent: Monday, March 1, 2021 10:35 AM
> To: dekelp@nvidia.com; orika@nvidia.com; viacheslavo@nvidia.com;
> matan@nvidia.com
> Cc: dev@dpdk.org; thomas@monjalon.net; rasland@nvidia.com;
> mb@smartsharesystems.com; ajit.khaparde@broadcom.com
> Subject: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> 
> Currently meter algorithms only supports rate is bytes per second(BPS).
> Add this new meter srTCMp algorithm to support rate is packet per second.
> So that it can meter traffic by packet per second.
> The below structure will be extended:
> rte_mtr_algorithm
> rte_mtr_meter_profile
> Signed-off-by: Li Zhang <lizh@nvidia.com>
> ---
>  .../traffic_metering_and_policing.rst         |  3 +-
>  doc/guides/rel_notes/release_20_11.rst        |  5 +++
>  lib/librte_ethdev/rte_mtr.h                   | 32 +++++++++++++++++++
>  3 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/prog_guide/traffic_metering_and_policing.rst
> b/doc/guides/prog_guide/traffic_metering_and_policing.rst
> index 90c781eb1d..4d2405d44a 100644
> --- a/doc/guides/prog_guide/traffic_metering_and_policing.rst
> +++ b/doc/guides/prog_guide/traffic_metering_and_policing.rst
> @@ -17,7 +17,8 @@ The main features are:
>  * Part of DPDK rte_ethdev API
>  * Capability query API
>  * Metering algorithms: RFC 2697 Single Rate Three Color Marker (srTCM),
> RFC 2698
> -  and RFC 4115 Two Rate Three Color Marker (trTCM)
> +  and RFC 4115 Two Rate Three Color Marker (trTCM),
> +  Single Rate Three Color Marker, Packet based (srTCMp).
>  * Policer actions (per meter output color): recolor, drop
>  * Statistics (per policer output color)
> 
> diff --git a/doc/guides/rel_notes/release_20_11.rst
> b/doc/guides/rel_notes/release_20_11.rst
> index 7405a9864f..de04886cc9 100644
> --- a/doc/guides/rel_notes/release_20_11.rst
> +++ b/doc/guides/rel_notes/release_20_11.rst
> @@ -429,6 +429,11 @@ New Features
>    can leverage IOAT DMA channels with vhost asynchronous APIs.
>    See the :doc:`../sample_app_ug/vhost` for more details.
> 
> +* **Added support for meter PPS profile.**
> +
> +  Currently meter algorithms only supports bytes per second(BPS).
> +  Add this new meter algorithm to support packet per second (PPS) mode.
> +  So that it can meter traffic by packet per second.
> 
>  Removed Items
>  -------------
> diff --git a/lib/librte_ethdev/rte_mtr.h b/lib/librte_ethdev/rte_mtr.h
> index 916a09c5c3..f27a4b5354 100644
> --- a/lib/librte_ethdev/rte_mtr.h
> +++ b/lib/librte_ethdev/rte_mtr.h
> @@ -119,6 +119,11 @@ enum rte_mtr_algorithm {
> 
>  	/** Two Rate Three Color Marker (trTCM) - IETF RFC 4115. */
>  	RTE_MTR_TRTCM_RFC4115,
> +
> +	/** Single Rate Three Color Marker, Packet based (srTCMp).
> +	 * - - similar to IETF RFC 2697 but rate is packet per second.
> +	 */
> +	RTE_MTR_SRTCMP,
>  };
> 
>  /**
> @@ -171,6 +176,20 @@ struct rte_mtr_meter_profile {
>  			/** Excess Burst Size (EBS) (bytes). */
>  			uint64_t ebs;
>  		} trtcm_rfc4115;
> +
> +		/** Items only valid when *alg* is set to srTCMp. */
> +		struct {
> +			/** Committed Information Rate (CIR)
> +			 * (packets/second).
> +			 */
> +			uint64_t cir;
> +
> +			/** Committed Burst Size (CBS) (packets). */
> +			uint64_t cbs;
> +
> +			/** Excess Burst Size (EBS) (packets). */
> +			uint64_t ebs;
> +		} srtcmp;
>  	};
>  };
> 
> @@ -317,6 +336,13 @@ struct rte_mtr_capabilities {
>  	 */
>  	uint32_t meter_trtcm_rfc4115_n_max;
> 
> +	/** Maximum number of MTR objects that can have their meter
> configured
> +	 * to run the srTCMp algorithm. The value of 0
> +	 * indicates this metering algorithm is not supported.
> +	 * The maximum value is *n_max*.
> +	 */
> +	uint32_t meter_srtcmp_n_max;
> +
>  	/** Maximum traffic rate that can be metered by a single MTR
> object. For
>  	 * srTCM RFC 2697, this is the maximum CIR rate. For trTCM RFC 2698,
>  	 * this is the maximum PIR rate. For trTCM RFC 4115, this is the
> maximum
> @@ -342,6 +368,12 @@ struct rte_mtr_capabilities {
>  	 */
>  	int color_aware_trtcm_rfc4115_supported;
> 
> +	/**
> +	 * When non-zero, it indicates that color aware mode is supported
> for
> +	 * the srTCMp metering algorithm.
> +	 */
> +	int color_aware_srtcmp_supported;
> +
>  	/** When non-zero, it indicates that the policer packet recolor
> actions
>  	 * are supported.
>  	 * @see enum rte_mtr_policer_action
> --
> 2.21.0

Hi Li,

As specified in the MAINTEINERS file of DPDK, I am the maintainer of this API, so please make sure you add my email in the To: list of future revisions of this patch set.

Isn't this a duplicate of this other patchset that you authored as well: http://patchwork.dpdk.org/project/dpdk/patch/20210301094000.183002-2-lizh@nvidia.com/ ? Which one do you want to keep? I am pasting below my reply to this other patchset.

We had this same problem earlier for the rte_tm.h API, where people asked to add support for WRED and shaper rates specified in packets to the existing byte rate support. I am more than happy to support adding the same here, but please let's adopt the same solution here rather than invent a different approach.

Please refer to struct rte_tm_wred_params and struct rte_tm_shaper_params from rte_tm.h: the packets vs. bytes mode is explicitly specified through the use of a flag called packet_mode that is added to the WRED and shaper profile. When packet_mode is 0, the profile rates and bucket sizes are specified in bytes per second and bytes, respectively; when packet_mode is not 0, the profile rates and bucket sizes are specified in packets and packets per second, respectively. The same profile parameters are used, no need to invent additional algorithms (such as srTCM - packet mode) or profile data structures. Can we do the same here, please?

This is a quick summary of the required API changes to add support for the packet mode, they are minimal:
a) Introduce the packet_mode flag in the profile parameters data structure.
b) Change the description (comment) of the rate and bucket size parameters in the meter profile parameters data structures to reflect that their values represents either bytes or packets, depending on the value of the new flag packet_mode from the same structure.
c) Add the relevant capabilities: just search for "packet" in the rte_tm.h capabilities data structures and apply the same to the rte_mtr.h capabilities, when applicable.

Regards,
Cristian
Thomas Monjalon March 1, 2021, 3:53 p.m. UTC | #2
01/03/2021 14:20, Dumitrescu, Cristian:
> From: Li Zhang
> > As specified in the MAINTEINERS file of DPDK, I am the maintainer of this API, so please make sure you add my email in the To: list of future revisions of this patch set.

This kind of miss should be solved by using
--cc-cmd devtools/get-maintainer.sh when sending.
Li Zhang March 2, 2021, 1:27 a.m. UTC | #3
Thanks Thomas.
I will use it next time.

Regards,
Li Zhang
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Thomas Monjalon
> Sent: Monday, March 1, 2021 11:54 PM
> To: Li Zhang <lizh@nvidia.com>
> Cc: Dekel Peled <dekelp@nvidia.com>; Ori Kam <orika@nvidia.com>; Slava
> Ovsiienko <viacheslavo@nvidia.com>; Matan Azrad <matan@nvidia.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; dev@dpdk.org; Raslan
> Darawsheh <rasland@nvidia.com>; mb@smartsharesystems.com;
> ajit.khaparde@broadcom.com; Yigit, Ferruh <ferruh.yigit@intel.com>; Singh,
> Jasvinder <jasvinder.singh@intel.com>
> Subject: Re: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> 
> 01/03/2021 14:20, Dumitrescu, Cristian:
> > From: Li Zhang
> > > As specified in the MAINTEINERS file of DPDK, I am the maintainer of this
> API, so please make sure you add my email in the To: list of future revisions of
> this patch set.
> 
> This kind of miss should be solved by using --cc-cmd devtools/get-
> maintainer.sh when sending.
> 
>
Ajit Khaparde March 2, 2021, 1:46 a.m. UTC | #4
On Mon, Mar 1, 2021 at 5:20 AM Dumitrescu, Cristian
<cristian.dumitrescu@intel.com> wrote:
>
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
> > Sent: Monday, March 1, 2021 10:35 AM
> > To: dekelp@nvidia.com; orika@nvidia.com; viacheslavo@nvidia.com;
> > matan@nvidia.com
> > Cc: dev@dpdk.org; thomas@monjalon.net; rasland@nvidia.com;
> > mb@smartsharesystems.com; ajit.khaparde@broadcom.com
> > Subject: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> >
> > Currently meter algorithms only supports rate is bytes per second(BPS).
> > Add this new meter srTCMp algorithm to support rate is packet per second.
> > So that it can meter traffic by packet per second.
> > The below structure will be extended:
> > rte_mtr_algorithm
> > rte_mtr_meter_profile
> > Signed-off-by: Li Zhang <lizh@nvidia.com>
> > ---
> >  .../traffic_metering_and_policing.rst         |  3 +-
> >  doc/guides/rel_notes/release_20_11.rst        |  5 +++
> >  lib/librte_ethdev/rte_mtr.h                   | 32 +++++++++++++++++++
> >  3 files changed, 39 insertions(+), 1 deletion(-)
> >
> > diff --git a/doc/guides/prog_guide/traffic_metering_and_policing.rst
> > b/doc/guides/prog_guide/traffic_metering_and_policing.rst
> > index 90c781eb1d..4d2405d44a 100644
> > --- a/doc/guides/prog_guide/traffic_metering_and_policing.rst
> > +++ b/doc/guides/prog_guide/traffic_metering_and_policing.rst
> > @@ -17,7 +17,8 @@ The main features are:
> >  * Part of DPDK rte_ethdev API
> >  * Capability query API
> >  * Metering algorithms: RFC 2697 Single Rate Three Color Marker (srTCM),
> > RFC 2698
> > -  and RFC 4115 Two Rate Three Color Marker (trTCM)
> > +  and RFC 4115 Two Rate Three Color Marker (trTCM),
> > +  Single Rate Three Color Marker, Packet based (srTCMp).
> >  * Policer actions (per meter output color): recolor, drop
> >  * Statistics (per policer output color)
> >
> > diff --git a/doc/guides/rel_notes/release_20_11.rst
> > b/doc/guides/rel_notes/release_20_11.rst
> > index 7405a9864f..de04886cc9 100644
> > --- a/doc/guides/rel_notes/release_20_11.rst
> > +++ b/doc/guides/rel_notes/release_20_11.rst
> > @@ -429,6 +429,11 @@ New Features
> >    can leverage IOAT DMA channels with vhost asynchronous APIs.
> >    See the :doc:`../sample_app_ug/vhost` for more details.
> >
> > +* **Added support for meter PPS profile.**
> > +
> > +  Currently meter algorithms only supports bytes per second(BPS).
> > +  Add this new meter algorithm to support packet per second (PPS) mode.
> > +  So that it can meter traffic by packet per second.
> >
> >  Removed Items
> >  -------------
> > diff --git a/lib/librte_ethdev/rte_mtr.h b/lib/librte_ethdev/rte_mtr.h
> > index 916a09c5c3..f27a4b5354 100644
> > --- a/lib/librte_ethdev/rte_mtr.h
> > +++ b/lib/librte_ethdev/rte_mtr.h
> > @@ -119,6 +119,11 @@ enum rte_mtr_algorithm {
> >
> >       /** Two Rate Three Color Marker (trTCM) - IETF RFC 4115. */
> >       RTE_MTR_TRTCM_RFC4115,
> > +
> > +     /** Single Rate Three Color Marker, Packet based (srTCMp).
> > +      * - - similar to IETF RFC 2697 but rate is packet per second.
> > +      */
> > +     RTE_MTR_SRTCMP,
> >  };
> >
> >  /**
> > @@ -171,6 +176,20 @@ struct rte_mtr_meter_profile {
> >                       /** Excess Burst Size (EBS) (bytes). */
> >                       uint64_t ebs;
> >               } trtcm_rfc4115;
> > +
> > +             /** Items only valid when *alg* is set to srTCMp. */
> > +             struct {
> > +                     /** Committed Information Rate (CIR)
> > +                      * (packets/second).
> > +                      */
> > +                     uint64_t cir;
> > +
> > +                     /** Committed Burst Size (CBS) (packets). */
> > +                     uint64_t cbs;
> > +
> > +                     /** Excess Burst Size (EBS) (packets). */
> > +                     uint64_t ebs;
> > +             } srtcmp;
> >       };
> >  };
> >
> > @@ -317,6 +336,13 @@ struct rte_mtr_capabilities {
> >        */
> >       uint32_t meter_trtcm_rfc4115_n_max;
> >
> > +     /** Maximum number of MTR objects that can have their meter
> > configured
> > +      * to run the srTCMp algorithm. The value of 0
> > +      * indicates this metering algorithm is not supported.
> > +      * The maximum value is *n_max*.
> > +      */
> > +     uint32_t meter_srtcmp_n_max;
> > +
> >       /** Maximum traffic rate that can be metered by a single MTR
> > object. For
> >        * srTCM RFC 2697, this is the maximum CIR rate. For trTCM RFC 2698,
> >        * this is the maximum PIR rate. For trTCM RFC 4115, this is the
> > maximum
> > @@ -342,6 +368,12 @@ struct rte_mtr_capabilities {
> >        */
> >       int color_aware_trtcm_rfc4115_supported;
> >
> > +     /**
> > +      * When non-zero, it indicates that color aware mode is supported
> > for
> > +      * the srTCMp metering algorithm.
> > +      */
> > +     int color_aware_srtcmp_supported;
> > +
> >       /** When non-zero, it indicates that the policer packet recolor
> > actions
> >        * are supported.
> >        * @see enum rte_mtr_policer_action
> > --
> > 2.21.0
>
> Hi Li,
>
> As specified in the MAINTEINERS file of DPDK, I am the maintainer of this API, so please make sure you add my email in the To: list of future revisions of this patch set.
>
> Isn't this a duplicate of this other patchset that you authored as well: http://patchwork.dpdk.org/project/dpdk/patch/20210301094000.183002-2-lizh@nvidia.com/ ? Which one do you want to keep? I am pasting below my reply to this other patchset.
>
> We had this same problem earlier for the rte_tm.h API, where people asked to add support for WRED and shaper rates specified in packets to the existing byte rate support. I am more than happy to support adding the same here, but please let's adopt the same solution here rather than invent a different approach.
>
> Please refer to struct rte_tm_wred_params and struct rte_tm_shaper_params from rte_tm.h: the packets vs. bytes mode is explicitly specified through the use of a flag called packet_mode that is added to the WRED and shaper profile. When packet_mode is 0, the profile rates and bucket sizes are specified in bytes per second and bytes, respectively; when packet_mode is not 0, the profile rates and bucket sizes are specified in packets and packets per second, respectively. The same profile parameters are used, no need to invent additional algorithms (such as srTCM - packet mode) or profile data structures. Can we do the same here, please?
>
> This is a quick summary of the required API changes to add support for the packet mode, they are minimal:
> a) Introduce the packet_mode flag in the profile parameters data structure.
> b) Change the description (comment) of the rate and bucket size parameters in the meter profile parameters data structures to reflect that their values represents either bytes or packets, depending on the value of the new flag packet_mode from the same structure.
> c) Add the relevant capabilities: just search for "packet" in the rte_tm.h capabilities data structures and apply the same to the rte_mtr.h capabilities, when applicable.
Overall I think this is a better approach. And default packet_mode
will be bytes.


>
> Regards,
> Cristian
Matan Azrad March 2, 2021, 7:02 a.m. UTC | #5
Hi Cristian

Thank you for review, please see inline.

From: Dumitrescu, Cristian
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
<snip>
> We had this same problem earlier for the rte_tm.h API, where people asked to
> add support for WRED and shaper rates specified in packets to the existing byte
> rate support. I am more than happy to support adding the same here, but
> please let's adopt the same solution here rather than invent a different
> approach.
> 
> Please refer to struct rte_tm_wred_params and struct rte_tm_shaper_params
> from rte_tm.h: the packets vs. bytes mode is explicitly specified through the use
> of a flag called packet_mode that is added to the WRED and shaper profile.
> When packet_mode is 0, the profile rates and bucket sizes are specified in
> bytes per second and bytes, respectively; when packet_mode is not 0, the
> profile rates and bucket sizes are specified in packets and packets per second,
> respectively. The same profile parameters are used, no need to invent
> additional algorithms (such as srTCM - packet mode) or profile data structures.
> Can we do the same here, please?

This flag approach is very intuitive suggestion and it has advantages.

The main problem with the flag approach is that it breaks ABI and API.
The profile structure size is changed due to a new field - ABI breakage.
The user must initialize the flag with zero to get old behavior - API breakage. 
 
I don't see issues with Li suggestion, Do you think Li suggestion has critical issues?

> This is a quick summary of the required API changes to add support for the
> packet mode, they are minimal:
> a) Introduce the packet_mode flag in the profile parameters data structure.
> b) Change the description (comment) of the rate and bucket size parameters in
> the meter profile parameters data structures to reflect that their values
> represents either bytes or packets, depending on the value of the new flag
> packet_mode from the same structure.
> c) Add the relevant capabilities: just search for "packet" in the rte_tm.h
> capabilities data structures and apply the same to the rte_mtr.h capabilities,
> when applicable.
 
> Regards,
> Cristian
Cristian Dumitrescu March 2, 2021, 12:13 p.m. UTC | #6
<snip>...

> Overall I think this is a better approach. And default packet_mode
> will be bytes.
> 

Yes, agreed.
Cristian Dumitrescu March 2, 2021, 12:29 p.m. UTC | #7
Hi Matan,

> -----Original Message-----
> From: Matan Azrad <matan@nvidia.com>
> Sent: Tuesday, March 2, 2021 7:02 AM
> To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> <thomas@monjalon.net>; Raslan Darawsheh <rasland@nvidia.com>;
> mb@smartsharesystems.com; ajit.khaparde@broadcom.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>; Singh, Jasvinder <jasvinder.singh@intel.com>
> Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> 
> 
> 
> Hi Cristian
> 
> Thank you for review, please see inline.
> 
> From: Dumitrescu, Cristian
> > > From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
> <snip>
> > We had this same problem earlier for the rte_tm.h API, where people
> asked to
> > add support for WRED and shaper rates specified in packets to the existing
> byte
> > rate support. I am more than happy to support adding the same here, but
> > please let's adopt the same solution here rather than invent a different
> > approach.
> >
> > Please refer to struct rte_tm_wred_params and struct
> rte_tm_shaper_params
> > from rte_tm.h: the packets vs. bytes mode is explicitly specified through
> the use
> > of a flag called packet_mode that is added to the WRED and shaper profile.
> > When packet_mode is 0, the profile rates and bucket sizes are specified in
> > bytes per second and bytes, respectively; when packet_mode is not 0, the
> > profile rates and bucket sizes are specified in packets and packets per
> second,
> > respectively. The same profile parameters are used, no need to invent
> > additional algorithms (such as srTCM - packet mode) or profile data
> structures.
> > Can we do the same here, please?
> 
> This flag approach is very intuitive suggestion and it has advantages.
> 
> The main problem with the flag approach is that it breaks ABI and API.
> The profile structure size is changed due to a new field - ABI breakage.
> The user must initialize the flag with zero to get old behavior - API breakage.
> 

The rte_mtr API is experimental, all the API functions are correctly marked with __rte_experimental in rte_mtr.h file, so we can safely change the API and the ABI breakage is not applicable here. Therefore, this problem does not exist, correct?

> I don't see issues with Li suggestion, Do you think Li suggestion has critical
> issues?

It is probably better to keep the rte_mtr and the rte_tm APIs aligned, it simplifies the code maintenance and improves the user experience, which always pays off in the long run. Both APIs configure token buckets in either packet mode or byte mode, and it is desirable to have them work in the same way. Also, I think we should avoid duplicating configuration data structures for to support essentially the same algorithms (such as srTCM or trTCM) if we can.

The flag proposal is actually reducing the amount of work that you guys need to do to implement your proposal. There is no negative impact to your proposal and no big change, right?

> 
> > This is a quick summary of the required API changes to add support for the
> > packet mode, they are minimal:
> > a) Introduce the packet_mode flag in the profile parameters data
> structure.
> > b) Change the description (comment) of the rate and bucket size
> parameters in
> > the meter profile parameters data structures to reflect that their values
> > represents either bytes or packets, depending on the value of the new flag
> > packet_mode from the same structure.
> > c) Add the relevant capabilities: just search for "packet" in the rte_tm.h
> > capabilities data structures and apply the same to the rte_mtr.h
> capabilities,
> > when applicable.
> 
> > Regards,
> > Cristian

Regards,
Cristian
Matan Azrad March 2, 2021, 12:37 p.m. UTC | #8
HI Cristian

From: Dumitrescu, Cristian
> Hi Matan,
> 
> > -----Original Message-----
> > From: Matan Azrad <matan@nvidia.com>
> > Sent: Tuesday, March 2, 2021 7:02 AM
> > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon <thomas@monjalon.net>;
> > Raslan Darawsheh <rasland@nvidia.com>; mb@smartsharesystems.com;
> > ajit.khaparde@broadcom.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > Singh, Jasvinder <jasvinder.singh@intel.com>
> > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> >
> >
> >
> > Hi Cristian
> >
> > Thank you for review, please see inline.
> >
> > From: Dumitrescu, Cristian
> > > > From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
> > <snip>
> > > We had this same problem earlier for the rte_tm.h API, where people
> > asked to
> > > add support for WRED and shaper rates specified in packets to the
> > > existing
> > byte
> > > rate support. I am more than happy to support adding the same here,
> > > but please let's adopt the same solution here rather than invent a
> > > different approach.
> > >
> > > Please refer to struct rte_tm_wred_params and struct
> > rte_tm_shaper_params
> > > from rte_tm.h: the packets vs. bytes mode is explicitly specified
> > > through
> > the use
> > > of a flag called packet_mode that is added to the WRED and shaper profile.
> > > When packet_mode is 0, the profile rates and bucket sizes are
> > > specified in bytes per second and bytes, respectively; when
> > > packet_mode is not 0, the profile rates and bucket sizes are
> > > specified in packets and packets per
> > second,
> > > respectively. The same profile parameters are used, no need to
> > > invent additional algorithms (such as srTCM - packet mode) or
> > > profile data
> > structures.
> > > Can we do the same here, please?
> >
> > This flag approach is very intuitive suggestion and it has advantages.
> >
> > The main problem with the flag approach is that it breaks ABI and API.
> > The profile structure size is changed due to a new field - ABI breakage.
> > The user must initialize the flag with zero to get old behavior - API breakage.
> >
> 
> The rte_mtr API is experimental, all the API functions are correctly marked
> with __rte_experimental in rte_mtr.h file, so we can safely change the API and
> the ABI breakage is not applicable here. Therefore, this problem does not exist,
> correct?

Yes, but still meter is not new API and I know that a lot of user uses it for a long time.
Forcing them to change while we have good solution that don't force it, looks me problematic.
 

> > I don't see issues with Li suggestion, Do you think Li suggestion has
> > critical issues?
> 
> It is probably better to keep the rte_mtr and the rte_tm APIs aligned, it
> simplifies the code maintenance and improves the user experience, which
> always pays off in the long run. Both APIs configure token buckets in either
> packet mode or byte mode, and it is desirable to have them work in the same
> way. Also, I think we should avoid duplicating configuration data structures for
> to support essentially the same algorithms (such as srTCM or trTCM) if we can.
> 

Yes, but I don't think this motivation is critical.

> The flag proposal is actually reducing the amount of work that you guys need to
> do to implement your proposal. There is no negative impact to your proposal
> and no big change, right?

Yes you right, but the implementation effect is not our concern. 


> > > This is a quick summary of the required API changes to add support
> > > for the packet mode, they are minimal:
> > > a) Introduce the packet_mode flag in the profile parameters data
> > structure.
> > > b) Change the description (comment) of the rate and bucket size
> > parameters in
> > > the meter profile parameters data structures to reflect that their
> > > values represents either bytes or packets, depending on the value of
> > > the new flag packet_mode from the same structure.
> > > c) Add the relevant capabilities: just search for "packet" in the
> > > rte_tm.h capabilities data structures and apply the same to the
> > > rte_mtr.h
> > capabilities,
> > > when applicable.
> >
> > > Regards,
> > > Cristian
> 
> Regards,
> Cristian
Cristian Dumitrescu March 2, 2021, 2:33 p.m. UTC | #9
Hi Matan,

> -----Original Message-----
> From: Matan Azrad <matan@nvidia.com>
> Sent: Tuesday, March 2, 2021 12:37 PM
> To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> <thomas@monjalon.net>; Raslan Darawsheh <rasland@nvidia.com>;
> mb@smartsharesystems.com; ajit.khaparde@broadcom.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>; Singh, Jasvinder <jasvinder.singh@intel.com>
> Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> 
> HI Cristian
> 
> From: Dumitrescu, Cristian
> > Hi Matan,
> >
> > > -----Original Message-----
> > > From: Matan Azrad <matan@nvidia.com>
> > > Sent: Tuesday, March 2, 2021 7:02 AM
> > > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> <thomas@monjalon.net>;
> > > Raslan Darawsheh <rasland@nvidia.com>; mb@smartsharesystems.com;
> > > ajit.khaparde@broadcom.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > > Singh, Jasvinder <jasvinder.singh@intel.com>
> > > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> > >
> > >
> > >
> > > Hi Cristian
> > >
> > > Thank you for review, please see inline.
> > >
> > > From: Dumitrescu, Cristian
> > > > > From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
> > > <snip>
> > > > We had this same problem earlier for the rte_tm.h API, where people
> > > asked to
> > > > add support for WRED and shaper rates specified in packets to the
> > > > existing
> > > byte
> > > > rate support. I am more than happy to support adding the same here,
> > > > but please let's adopt the same solution here rather than invent a
> > > > different approach.
> > > >
> > > > Please refer to struct rte_tm_wred_params and struct
> > > rte_tm_shaper_params
> > > > from rte_tm.h: the packets vs. bytes mode is explicitly specified
> > > > through
> > > the use
> > > > of a flag called packet_mode that is added to the WRED and shaper
> profile.
> > > > When packet_mode is 0, the profile rates and bucket sizes are
> > > > specified in bytes per second and bytes, respectively; when
> > > > packet_mode is not 0, the profile rates and bucket sizes are
> > > > specified in packets and packets per
> > > second,
> > > > respectively. The same profile parameters are used, no need to
> > > > invent additional algorithms (such as srTCM - packet mode) or
> > > > profile data
> > > structures.
> > > > Can we do the same here, please?
> > >
> > > This flag approach is very intuitive suggestion and it has advantages.
> > >
> > > The main problem with the flag approach is that it breaks ABI and API.
> > > The profile structure size is changed due to a new field - ABI breakage.
> > > The user must initialize the flag with zero to get old behavior - API
> breakage.
> > >
> >
> > The rte_mtr API is experimental, all the API functions are correctly marked
> > with __rte_experimental in rte_mtr.h file, so we can safely change the API
> and
> > the ABI breakage is not applicable here. Therefore, this problem does not
> exist,
> > correct?
> 
> Yes, but still meter is not new API and I know that a lot of user uses it for a
> long time.
> Forcing them to change while we have good solution that don't force it, looks
> me problematic.
> 

Not really, only 3 drivers are currently implementing this API.

Even to these drivers, the required changes are none or extremely small: as Ajit was also noting, as the default value of 0 continues to represent the existing byte mode, all you have to do is make sure the new flag is set to zero in the profile params structure, which is already done implicitly in most places as this structure is initialized to all-zeros.

A simple search exercise for struct rte_mtr_meter_profile is all that is needed. You also agreed the flag approach is very intuitive, hence better and nicer, with no additional work needed for you, so why not do it?

> 
> > > I don't see issues with Li suggestion, Do you think Li suggestion has
> > > critical issues?
> >
> > It is probably better to keep the rte_mtr and the rte_tm APIs aligned, it
> > simplifies the code maintenance and improves the user experience, which
> > always pays off in the long run. Both APIs configure token buckets in either
> > packet mode or byte mode, and it is desirable to have them work in the
> same
> > way. Also, I think we should avoid duplicating configuration data structures
> for
> > to support essentially the same algorithms (such as srTCM or trTCM) if we
> can.
> >
> 
> Yes, but I don't think this motivation is critical.

I really disagree. As API maintainer, making every effort to keep the APIs clear and consistent is a critical task for me. We don't want to proliferate the API data structures and parameters if there is a good way to avoid it. Especially in cases like this, when the drivers are just beginning to pick up this (still experimental) API,  we have the rare chance to make things right and therefore we should do it. Please also keep in mind that, as more feature are added to the API, small corner cuts like this one that might not look like a big deal now, eventually come back as unnecessary complexity in the drivers themselves.

So, please, let's try to keep the quality of the APIs high.

> 
> > The flag proposal is actually reducing the amount of work that you guys
> need to
> > do to implement your proposal. There is no negative impact to your
> proposal
> > and no big change, right?
> 
> Yes you right, but the implementation effect is not our concern.
> 
> 
> > > > This is a quick summary of the required API changes to add support
> > > > for the packet mode, they are minimal:
> > > > a) Introduce the packet_mode flag in the profile parameters data
> > > structure.
> > > > b) Change the description (comment) of the rate and bucket size
> > > parameters in
> > > > the meter profile parameters data structures to reflect that their
> > > > values represents either bytes or packets, depending on the value of
> > > > the new flag packet_mode from the same structure.
> > > > c) Add the relevant capabilities: just search for "packet" in the
> > > > rte_tm.h capabilities data structures and apply the same to the
> > > > rte_mtr.h
> > > capabilities,
> > > > when applicable.
> > >
> > > > Regards,
> > > > Cristian
> >
> > Regards,
> > Cristian

Regards,
Cristian
Matan Azrad March 2, 2021, 6:10 p.m. UTC | #10
Hi Cristian

Good discussion, thank you for that!

From: Dumitrescu, Cristian
> Hi Matan,
> 
> > -----Original Message-----
> > From: Matan Azrad <matan@nvidia.com>
> > Sent: Tuesday, March 2, 2021 12:37 PM
> > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon <thomas@monjalon.net>;
> > Raslan Darawsheh <rasland@nvidia.com>; mb@smartsharesystems.com;
> > ajit.khaparde@broadcom.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > Singh, Jasvinder <jasvinder.singh@intel.com>
> > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> >
> > HI Cristian
> >
> > From: Dumitrescu, Cristian
> > > Hi Matan,
> > >
> > > > -----Original Message-----
> > > > From: Matan Azrad <matan@nvidia.com>
> > > > Sent: Tuesday, March 2, 2021 7:02 AM
> > > > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > > > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > > > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > > > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> > <thomas@monjalon.net>;
> > > > Raslan Darawsheh <rasland@nvidia.com>; mb@smartsharesystems.com;
> > > > ajit.khaparde@broadcom.com; Yigit, Ferruh
> > > > <ferruh.yigit@intel.com>; Singh, Jasvinder
> > > > <jasvinder.singh@intel.com>
> > > > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> > > >
> > > >
> > > >
> > > > Hi Cristian
> > > >
> > > > Thank you for review, please see inline.
> > > >
> > > > From: Dumitrescu, Cristian
> > > > > > From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
> > > > <snip>
> > > > > We had this same problem earlier for the rte_tm.h API, where
> > > > > people
> > > > asked to
> > > > > add support for WRED and shaper rates specified in packets to
> > > > > the existing
> > > > byte
> > > > > rate support. I am more than happy to support adding the same
> > > > > here, but please let's adopt the same solution here rather than
> > > > > invent a different approach.
> > > > >
> > > > > Please refer to struct rte_tm_wred_params and struct
> > > > rte_tm_shaper_params
> > > > > from rte_tm.h: the packets vs. bytes mode is explicitly
> > > > > specified through
> > > > the use
> > > > > of a flag called packet_mode that is added to the WRED and
> > > > > shaper
> > profile.
> > > > > When packet_mode is 0, the profile rates and bucket sizes are
> > > > > specified in bytes per second and bytes, respectively; when
> > > > > packet_mode is not 0, the profile rates and bucket sizes are
> > > > > specified in packets and packets per
> > > > second,
> > > > > respectively. The same profile parameters are used, no need to
> > > > > invent additional algorithms (such as srTCM - packet mode) or
> > > > > profile data
> > > > structures.
> > > > > Can we do the same here, please?
> > > >
> > > > This flag approach is very intuitive suggestion and it has advantages.
> > > >
> > > > The main problem with the flag approach is that it breaks ABI and API.
> > > > The profile structure size is changed due to a new field - ABI breakage.
> > > > The user must initialize the flag with zero to get old behavior -
> > > > API
> > breakage.
> > > >
> > >
> > > The rte_mtr API is experimental, all the API functions are correctly
> > > marked with __rte_experimental in rte_mtr.h file, so we can safely
> > > change the API
> > and
> > > the ABI breakage is not applicable here. Therefore, this problem
> > > does not
> > exist,
> > > correct?
> >
> > Yes, but still meter is not new API and I know that a lot of user uses
> > it for a long time.
> > Forcing them to change while we have good solution that don't force
> > it, looks me problematic.
> >
> 
> Not really, only 3 drivers are currently implementing this API.

The user is not the PMD, the PMDs are the providers.
I'm talking about all our customers, all the current DPDK based applications like OVS and others (I familiar with at least 4 ConnectX customer applications) which use the meter API and I'm sure there are more around the world.
  
> Even to these drivers, the required changes are none or extremely small: as Ajit
> was also noting, as the default value of 0 continues to represent the existing
> byte mode, all you have to do is make sure the new flag is set to zero in the
> profile params structure, which is already done implicitly in most places as this
> structure is initialized to all-zeros.

Are you sure all the world initialize the struct to 0? and also in this case, without new compilation, not all the struct will be zeroes(the old size is smaller). 

> A simple search exercise for struct rte_mtr_meter_profile is all that is needed.
> You also agreed the flag approach is very intuitive, hence better and nicer, with
> no additional work needed for you, so why not do it?

Do you understand that any current application that use the meter API must recompile the code of the application? Part of them also probably need to set the flag to 0....
Do you understand also the potential issues for the applications which are not aware to the change? Debug time, etc....

> > > > I don't see issues with Li suggestion, Do you think Li suggestion
> > > > has critical issues?
> > >
> > > It is probably better to keep the rte_mtr and the rte_tm APIs
> > > aligned, it simplifies the code maintenance and improves the user
> > > experience, which always pays off in the long run. Both APIs
> > > configure token buckets in either packet mode or byte mode, and it
> > > is desirable to have them work in the
> > same
> > > way. Also, I think we should avoid duplicating configuration data
> > > structures
> > for
> > > to support essentially the same algorithms (such as srTCM or trTCM)
> > > if we
> > can.
> > >
> >
> > Yes, but I don't think this motivation is critical.
> 
> I really disagree. As API maintainer, making every effort to keep the APIs clear
> and consistent is a critical task for me.

New pps profile is also clear and simple.

> We don't want to proliferate the API
> data structures and parameters if there is a good way to avoid it. Especially in
> cases like this, when the drivers are just beginning to pick up this (still
> experimental) API,  we have the rare chance to make things right and therefore
> we should do it. Please also keep in mind that, as more feature are added to
> the API, small corner cuts like this one that might not look like a big deal now,
> eventually come back as unnecessary complexity in the drivers themselves.

I don't see a complexity in the current suggestion.

> So, please, let's try to keep the quality of the APIs high.

Also by this way is high.


Look, the flag approach is also good and makes the job.
The two approaches are clear, simple and in high quality.
I don't care which one from the 2 to take but I want to be sure we are all understand the pros and cons.

If you understand my concern on flag approach and still insist to take the flag approach we will align.

And if we so, and we are going to break the API\ABI, we are going to introduce new meter policy API soon and there, breaking API can help, lets see in other discussion later.

One more point:
Currently, the meter_id is managed by the user, I think it is better to let the PMDs to manage the meter_id.

Searching the PMD meter handler inside the PMD is very expensive for the API call rate when the meter_id is managed by the user.

Same for profile_id.

Also all the rte_flow API including the shared action API taking the PMD management approach.

What do you think?

> > > The flag proposal is actually reducing the amount of work that you
> > > guys
> > need to
> > > do to implement your proposal. There is no negative impact to your
> > proposal
> > > and no big change, right?
> >
> > Yes you right, but the implementation effect is not our concern.
> >
> >
> > > > > This is a quick summary of the required API changes to add
> > > > > support for the packet mode, they are minimal:
> > > > > a) Introduce the packet_mode flag in the profile parameters data
> > > > structure.
> > > > > b) Change the description (comment) of the rate and bucket size
> > > > parameters in
> > > > > the meter profile parameters data structures to reflect that
> > > > > their values represents either bytes or packets, depending on
> > > > > the value of the new flag packet_mode from the same structure.
> > > > > c) Add the relevant capabilities: just search for "packet" in
> > > > > the rte_tm.h capabilities data structures and apply the same to
> > > > > the rte_mtr.h
> > > > capabilities,
> > > > > when applicable.
> > > >
> > > > > Regards,
> > > > > Cristian
> > >
> > > Regards,
> > > Cristian
> 
> Regards,
> Cristian
Cristian Dumitrescu March 3, 2021, 8:35 p.m. UTC | #11
Hi Matan,

> -----Original Message-----
> From: Matan Azrad <matan@nvidia.com>
> Sent: Tuesday, March 2, 2021 6:10 PM
> To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> <thomas@monjalon.net>; Raslan Darawsheh <rasland@nvidia.com>;
> mb@smartsharesystems.com; ajit.khaparde@broadcom.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>; Singh, Jasvinder <jasvinder.singh@intel.com>
> Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> 
> Hi Cristian
> 
> Good discussion, thank you for that!
> 
> From: Dumitrescu, Cristian
> > Hi Matan,
> >
> > > -----Original Message-----
> > > From: Matan Azrad <matan@nvidia.com>
> > > Sent: Tuesday, March 2, 2021 12:37 PM
> > > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> <thomas@monjalon.net>;
> > > Raslan Darawsheh <rasland@nvidia.com>; mb@smartsharesystems.com;
> > > ajit.khaparde@broadcom.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > > Singh, Jasvinder <jasvinder.singh@intel.com>
> > > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> > >
> > > HI Cristian
> > >
> > > From: Dumitrescu, Cristian
> > > > Hi Matan,
> > > >
> > > > > -----Original Message-----
> > > > > From: Matan Azrad <matan@nvidia.com>
> > > > > Sent: Tuesday, March 2, 2021 7:02 AM
> > > > > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > > > > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > > > > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > > > > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> > > <thomas@monjalon.net>;
> > > > > Raslan Darawsheh <rasland@nvidia.com>;
> mb@smartsharesystems.com;
> > > > > ajit.khaparde@broadcom.com; Yigit, Ferruh
> > > > > <ferruh.yigit@intel.com>; Singh, Jasvinder
> > > > > <jasvinder.singh@intel.com>
> > > > > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> > > > >
> > > > >
> > > > >
> > > > > Hi Cristian
> > > > >
> > > > > Thank you for review, please see inline.
> > > > >
> > > > > From: Dumitrescu, Cristian
> > > > > > > From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
> > > > > <snip>
> > > > > > We had this same problem earlier for the rte_tm.h API, where
> > > > > > people
> > > > > asked to
> > > > > > add support for WRED and shaper rates specified in packets to
> > > > > > the existing
> > > > > byte
> > > > > > rate support. I am more than happy to support adding the same
> > > > > > here, but please let's adopt the same solution here rather than
> > > > > > invent a different approach.
> > > > > >
> > > > > > Please refer to struct rte_tm_wred_params and struct
> > > > > rte_tm_shaper_params
> > > > > > from rte_tm.h: the packets vs. bytes mode is explicitly
> > > > > > specified through
> > > > > the use
> > > > > > of a flag called packet_mode that is added to the WRED and
> > > > > > shaper
> > > profile.
> > > > > > When packet_mode is 0, the profile rates and bucket sizes are
> > > > > > specified in bytes per second and bytes, respectively; when
> > > > > > packet_mode is not 0, the profile rates and bucket sizes are
> > > > > > specified in packets and packets per
> > > > > second,
> > > > > > respectively. The same profile parameters are used, no need to
> > > > > > invent additional algorithms (such as srTCM - packet mode) or
> > > > > > profile data
> > > > > structures.
> > > > > > Can we do the same here, please?
> > > > >
> > > > > This flag approach is very intuitive suggestion and it has advantages.
> > > > >
> > > > > The main problem with the flag approach is that it breaks ABI and API.
> > > > > The profile structure size is changed due to a new field - ABI
> breakage.
> > > > > The user must initialize the flag with zero to get old behavior -
> > > > > API
> > > breakage.
> > > > >
> > > >
> > > > The rte_mtr API is experimental, all the API functions are correctly
> > > > marked with __rte_experimental in rte_mtr.h file, so we can safely
> > > > change the API
> > > and
> > > > the ABI breakage is not applicable here. Therefore, this problem
> > > > does not
> > > exist,
> > > > correct?
> > >
> > > Yes, but still meter is not new API and I know that a lot of user uses
> > > it for a long time.
> > > Forcing them to change while we have good solution that don't force
> > > it, looks me problematic.
> > >
> >
> > Not really, only 3 drivers are currently implementing this API.
> 
> The user is not the PMD, the PMDs are the providers.
> I'm talking about all our customers, all the current DPDK based applications
> like OVS and others (I familiar with at least 4 ConnectX customer applications)
> which use the meter API and I'm sure there are more around the world.
> 
> > Even to these drivers, the required changes are none or extremely small:
> as Ajit
> > was also noting, as the default value of 0 continues to represent the
> existing
> > byte mode, all you have to do is make sure the new flag is set to zero in the
> > profile params structure, which is already done implicitly in most places as
> this
> > structure is initialized to all-zeros.
> 
> Are you sure all the world initialize the struct to 0? and also in this case,
> without new compilation, not all the struct will be zeroes(the old size is
> smaller).
> 
> > A simple search exercise for struct rte_mtr_meter_profile is all that is
> needed.
> > You also agreed the flag approach is very intuitive, hence better and nicer,
> with
> > no additional work needed for you, so why not do it?
> 
> Do you understand that any current application that use the meter API must
> recompile the code of the application? Part of them also probably need to
> set the flag to 0....
> Do you understand also the potential issues for the applications which are
> not aware to the change? Debug time, etc....
> 
> > > > > I don't see issues with Li suggestion, Do you think Li suggestion
> > > > > has critical issues?
> > > >
> > > > It is probably better to keep the rte_mtr and the rte_tm APIs
> > > > aligned, it simplifies the code maintenance and improves the user
> > > > experience, which always pays off in the long run. Both APIs
> > > > configure token buckets in either packet mode or byte mode, and it
> > > > is desirable to have them work in the
> > > same
> > > > way. Also, I think we should avoid duplicating configuration data
> > > > structures
> > > for
> > > > to support essentially the same algorithms (such as srTCM or trTCM)
> > > > if we
> > > can.
> > > >
> > >
> > > Yes, but I don't think this motivation is critical.
> >
> > I really disagree. As API maintainer, making every effort to keep the APIs
> clear
> > and consistent is a critical task for me.
> 
> New pps profile is also clear and simple.
> 
> > We don't want to proliferate the API
> > data structures and parameters if there is a good way to avoid it. Especially
> in
> > cases like this, when the drivers are just beginning to pick up this (still
> > experimental) API,  we have the rare chance to make things right and
> therefore
> > we should do it. Please also keep in mind that, as more feature are added
> to
> > the API, small corner cuts like this one that might not look like a big deal
> now,
> > eventually come back as unnecessary complexity in the drivers themselves.
> 
> I don't see a complexity in the current suggestion.
> 
> > So, please, let's try to keep the quality of the APIs high.
> 
> Also by this way is high.
> 
> 
> Look, the flag approach is also good and makes the job.
> The two approaches are clear, simple and in high quality.
> I don't care which one from the 2 to take but I want to be sure we are all
> understand the pros and cons.
> 
> If you understand my concern on flag approach and still insist to take the flag
> approach we will align.

Yes, thanks for summarizing the pros and cons, I confirm that I do understand your concerns.

Yes, sorry to disappoint you, I still think the packet_mode based approach is better for the long run, as it keeps the APIs clean and consistent. We are not adding new algorithms here, we're just adding a new mode to an existing algorithm, so IMO we should not duplicate configuration data structures and proliferate the number of algorithms artificially.

Yes, I do realize that in some limited cases the users will have to explicitly set the new packet_mode flag to zero or one, in case they need to enable the packet mode, but I think this is an acceptable cost because: (A) This API is clearly marked as experimental; (B) It is better to take a small incremental hit now to keep the APIs in good order rather than taking a bit hit in a few years as more features are added in the wrong way and the APIs become unmanageable.

> 
> And if we so, and we are going to break the API\ABI, we are going to
> introduce new meter policy API soon and there, breaking API can help, lets
> see in other discussion later.
> 

Yes, as you point out API changes are unavoidable as new features are added, we have to manage the API evolution correctly.

> One more point:
> Currently, the meter_id is managed by the user, I think it is better to let the
> PMDs to manage the meter_id.
> 
> Searching the PMD meter handler inside the PMD is very expensive for the
> API call rate when the meter_id is managed by the user.
> 
> Same for profile_id.
> 
> Also all the rte_flow API including the shared action API taking the PMD
> management approach.
> 
> What do you think?
> 

Yes, we have carefully considered and discussed both approaches a few years back when the API was introduced, this is not done by accident :), there are pros and cons for each of them.

If the object IDs are generated by the driver (outputs of the API), then it is the user application that needs to keep track of them, which can be very painful. Basically, for each API object the user application needs to create its own wrapper to store this ID. We basically transfer this problem to the user app.

If the object IDs are generated by the user application (inputs into the API), then we simplify the application by removing and indirection layer. Yes, it is true that this indirection layer now moves into the driver, but we should try to make the life easier for the appl developers as opposed to us, the driver developers. This indirection layer in the driver can be made a bit smarter than just a slow "for" loop; the search operation can be made faster with a small bit of effort, such as keeping this list sorted based on the object ID, splitting this list into buckets (similar to a hash table), etc, right?

Having the user app provide the object ID is especially important in the case of rte_tm API, where we have to deal with a tree of nodes, with thousands of nodes for each level. Having the app to store and manages this tree of IDs is a really bad idea, as the user app needs to mirror the tree of nodes on its side for no real benefit. As an added benefit, the user can generate these IDs using a rule, such as: given the specific path through the tree, the value of the ID can be computed.

But again, as you also mention above, there is a list of pros and cons for every approach, no approach is perfect. We took this approach for the good reasons listed above.

> > > > The flag proposal is actually reducing the amount of work that you
> > > > guys
> > > need to
> > > > do to implement your proposal. There is no negative impact to your
> > > proposal
> > > > and no big change, right?
> > >
> > > Yes you right, but the implementation effect is not our concern.
> > >
> > >
> > > > > > This is a quick summary of the required API changes to add
> > > > > > support for the packet mode, they are minimal:
> > > > > > a) Introduce the packet_mode flag in the profile parameters data
> > > > > structure.
> > > > > > b) Change the description (comment) of the rate and bucket size
> > > > > parameters in
> > > > > > the meter profile parameters data structures to reflect that
> > > > > > their values represents either bytes or packets, depending on
> > > > > > the value of the new flag packet_mode from the same structure.
> > > > > > c) Add the relevant capabilities: just search for "packet" in
> > > > > > the rte_tm.h capabilities data structures and apply the same to
> > > > > > the rte_mtr.h
> > > > > capabilities,
> > > > > > when applicable.
> > > > >
> > > > > > Regards,
> > > > > > Cristian
> > > >
> > > > Regards,
> > > > Cristian
> >
> > Regards,
> > Cristian

Regards,
Cristian
Matan Azrad March 4, 2021, 6:34 a.m. UTC | #12
Hi Cristian

From: Dumitrescu, Cristian
> Hi Matan,
> 
> > -----Original Message-----
> > From: Matan Azrad <matan@nvidia.com>
> > Sent: Tuesday, March 2, 2021 6:10 PM
> > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon <thomas@monjalon.net>;
> > Raslan Darawsheh <rasland@nvidia.com>; mb@smartsharesystems.com;
> > ajit.khaparde@broadcom.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > Singh, Jasvinder <jasvinder.singh@intel.com>
> > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> >
> > Hi Cristian
> >
> > Good discussion, thank you for that!
> >
> > From: Dumitrescu, Cristian
> > > Hi Matan,
> > >
> > > > -----Original Message-----
> > > > From: Matan Azrad <matan@nvidia.com>
> > > > Sent: Tuesday, March 2, 2021 12:37 PM
> > > > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > > > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > > > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > > > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> > <thomas@monjalon.net>;
> > > > Raslan Darawsheh <rasland@nvidia.com>; mb@smartsharesystems.com;
> > > > ajit.khaparde@broadcom.com; Yigit, Ferruh
> > > > <ferruh.yigit@intel.com>; Singh, Jasvinder
> > > > <jasvinder.singh@intel.com>
> > > > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> > > >
> > > > HI Cristian
> > > >
> > > > From: Dumitrescu, Cristian
> > > > > Hi Matan,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Matan Azrad <matan@nvidia.com>
> > > > > > Sent: Tuesday, March 2, 2021 7:02 AM
> > > > > > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li
> > > > > > Zhang <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori
> > > > > > Kam <orika@nvidia.com>; Slava Ovsiienko
> > > > > > <viacheslavo@nvidia.com>
> > > > > > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> > > > <thomas@monjalon.net>;
> > > > > > Raslan Darawsheh <rasland@nvidia.com>;
> > mb@smartsharesystems.com;
> > > > > > ajit.khaparde@broadcom.com; Yigit, Ferruh
> > > > > > <ferruh.yigit@intel.com>; Singh, Jasvinder
> > > > > > <jasvinder.singh@intel.com>
> > > > > > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS
> > > > > > profile
> > > > > >
> > > > > >
> > > > > >
> > > > > > Hi Cristian
> > > > > >
> > > > > > Thank you for review, please see inline.
> > > > > >
> > > > > > From: Dumitrescu, Cristian
> > > > > > > > From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
> > > > > > <snip>
> > > > > > > We had this same problem earlier for the rte_tm.h API, where
> > > > > > > people
> > > > > > asked to
> > > > > > > add support for WRED and shaper rates specified in packets
> > > > > > > to the existing
> > > > > > byte
> > > > > > > rate support. I am more than happy to support adding the
> > > > > > > same here, but please let's adopt the same solution here
> > > > > > > rather than invent a different approach.
> > > > > > >
> > > > > > > Please refer to struct rte_tm_wred_params and struct
> > > > > > rte_tm_shaper_params
> > > > > > > from rte_tm.h: the packets vs. bytes mode is explicitly
> > > > > > > specified through
> > > > > > the use
> > > > > > > of a flag called packet_mode that is added to the WRED and
> > > > > > > shaper
> > > > profile.
> > > > > > > When packet_mode is 0, the profile rates and bucket sizes
> > > > > > > are specified in bytes per second and bytes, respectively;
> > > > > > > when packet_mode is not 0, the profile rates and bucket
> > > > > > > sizes are specified in packets and packets per
> > > > > > second,
> > > > > > > respectively. The same profile parameters are used, no need
> > > > > > > to invent additional algorithms (such as srTCM - packet
> > > > > > > mode) or profile data
> > > > > > structures.
> > > > > > > Can we do the same here, please?
> > > > > >
> > > > > > This flag approach is very intuitive suggestion and it has advantages.
> > > > > >
> > > > > > The main problem with the flag approach is that it breaks ABI and API.
> > > > > > The profile structure size is changed due to a new field - ABI
> > breakage.
> > > > > > The user must initialize the flag with zero to get old
> > > > > > behavior - API
> > > > breakage.
> > > > > >
> > > > >
> > > > > The rte_mtr API is experimental, all the API functions are
> > > > > correctly marked with __rte_experimental in rte_mtr.h file, so
> > > > > we can safely change the API
> > > > and
> > > > > the ABI breakage is not applicable here. Therefore, this problem
> > > > > does not
> > > > exist,
> > > > > correct?
> > > >
> > > > Yes, but still meter is not new API and I know that a lot of user
> > > > uses it for a long time.
> > > > Forcing them to change while we have good solution that don't
> > > > force it, looks me problematic.
> > > >
> > >
> > > Not really, only 3 drivers are currently implementing this API.
> >
> > The user is not the PMD, the PMDs are the providers.
> > I'm talking about all our customers, all the current DPDK based
> > applications like OVS and others (I familiar with at least 4 ConnectX
> > customer applications) which use the meter API and I'm sure there are more
> around the world.
> >
> > > Even to these drivers, the required changes are none or extremely small:
> > as Ajit
> > > was also noting, as the default value of 0 continues to represent
> > > the
> > existing
> > > byte mode, all you have to do is make sure the new flag is set to
> > > zero in the profile params structure, which is already done
> > > implicitly in most places as
> > this
> > > structure is initialized to all-zeros.
> >
> > Are you sure all the world initialize the struct to 0? and also in
> > this case, without new compilation, not all the struct will be
> > zeroes(the old size is smaller).
> >
> > > A simple search exercise for struct rte_mtr_meter_profile is all
> > > that is
> > needed.
> > > You also agreed the flag approach is very intuitive, hence better
> > > and nicer,
> > with
> > > no additional work needed for you, so why not do it?
> >
> > Do you understand that any current application that use the meter API
> > must recompile the code of the application? Part of them also probably
> > need to set the flag to 0....
> > Do you understand also the potential issues for the applications which
> > are not aware to the change? Debug time, etc....
> >
> > > > > > I don't see issues with Li suggestion, Do you think Li
> > > > > > suggestion has critical issues?
> > > > >
> > > > > It is probably better to keep the rte_mtr and the rte_tm APIs
> > > > > aligned, it simplifies the code maintenance and improves the
> > > > > user experience, which always pays off in the long run. Both
> > > > > APIs configure token buckets in either packet mode or byte mode,
> > > > > and it is desirable to have them work in the
> > > > same
> > > > > way. Also, I think we should avoid duplicating configuration
> > > > > data structures
> > > > for
> > > > > to support essentially the same algorithms (such as srTCM or
> > > > > trTCM) if we
> > > > can.
> > > > >
> > > >
> > > > Yes, but I don't think this motivation is critical.
> > >
> > > I really disagree. As API maintainer, making every effort to keep
> > > the APIs
> > clear
> > > and consistent is a critical task for me.
> >
> > New pps profile is also clear and simple.
> >
> > > We don't want to proliferate the API data structures and parameters
> > > if there is a good way to avoid it. Especially
> > in
> > > cases like this, when the drivers are just beginning to pick up this
> > > (still
> > > experimental) API,  we have the rare chance to make things right and
> > therefore
> > > we should do it. Please also keep in mind that, as more feature are
> > > added
> > to
> > > the API, small corner cuts like this one that might not look like a
> > > big deal
> > now,
> > > eventually come back as unnecessary complexity in the drivers themselves.
> >
> > I don't see a complexity in the current suggestion.
> >
> > > So, please, let's try to keep the quality of the APIs high.
> >
> > Also by this way is high.
> >
> >
> > Look, the flag approach is also good and makes the job.
> > The two approaches are clear, simple and in high quality.
> > I don't care which one from the 2 to take but I want to be sure we are
> > all understand the pros and cons.
> >
> > If you understand my concern on flag approach and still insist to take
> > the flag approach we will align.
> 
> Yes, thanks for summarizing the pros and cons, I confirm that I do understand
> your concerns.
> 
> Yes, sorry to disappoint you, I still think the packet_mode based approach is
> better for the long run, as it keeps the APIs clean and consistent. We are not
> adding new algorithms here, we're just adding a new mode to an existing
> algorithm, so IMO we should not duplicate configuration data structures and
> proliferate the number of algorithms artificially.

Actually, PPS meter is a new algorithm - you can see that current algorithms RFCs don't talk about PPS.

> Yes, I do realize that in some limited cases the users will have to explicitly set
> the new packet_mode flag to zero or one, in case they need to enable the
> packet mode, but I think this is an acceptable cost because: (A) This API is
> clearly marked as experimental; (B) It is better to take a small incremental hit
> now to keep the APIs in good order rather than taking a bit hit in a few years as
> more features are added in the wrong way and the APIs become
> unmanageable.

I don't think that the current suggestion is in wrong way.
In any case, you insist, we will align.

> > And if we so, and we are going to break the API\ABI, we are going to
> > introduce new meter policy API soon and there, breaking API can help,
> > lets see in other discussion later.
> >
> 
> Yes, as you point out API changes are unavoidable as new features are added,
> we have to manage the API evolution correctly.
> 
> > One more point:
> > Currently, the meter_id is managed by the user, I think it is better
> > to let the PMDs to manage the meter_id.
> >
> > Searching the PMD meter handler inside the PMD is very expensive for
> > the API call rate when the meter_id is managed by the user.
> >
> > Same for profile_id.
> >
> > Also all the rte_flow API including the shared action API taking the
> > PMD management approach.
> >
> > What do you think?
> >
> 
> Yes, we have carefully considered and discussed both approaches a few years
> back when the API was introduced, this is not done by accident :), there are
> pros and cons for each of them.
> 
> If the object IDs are generated by the driver (outputs of the API), then it is the
> user application that needs to keep track of them, which can be very painful.
> Basically, for each API object the user application needs to create its own
> wrapper to store this ID. We basically transfer this problem to the user app.

No exactly\not for all, the app gets the meter_id in the same time it decides it now.
 
> If the object IDs are generated by the user application (inputs into the API),
> then we simplify the application by removing and indirection layer. Yes, it is
> true that this indirection layer now moves into the driver, but we should try to
> make the life easier for the appl developers as opposed to us, the driver
> developers. This indirection layer in the driver can be made a bit smarter than
> just a slow "for" loop; the search operation can be made faster with a small bit
> of effort, such as keeping this list sorted based on the object ID, splitting this list
> into buckets (similar to a hash table), etc, right?

Yes, there are even better solution than hash table from "rate" perspective.

But any solution costs a lot of memory just for this mapping...
When we talked about 4M meters supported(in mlx5 next release) it becomes an issue.  
 
> Having the user app provide the object ID is especially important in the case of
> rte_tm API, where we have to deal with a tree of nodes, with thousands of
> nodes for each level. Having the app to store and manages this tree of IDs is a
> really bad idea, as the user app needs to mirror the tree of nodes on its side for
> no real benefit. As an added benefit, the user can generate these IDs using a
> rule, such as: given the specific path through the tree, the value of the ID can
> be computed.

rte_tm is not rte_mtr - I think meter is different and used differently.
For example, as I know, no one from our dpdk meter customers(at least 5) use TREEs for meter management. OVS, for example, just randomize some meter_id and don't care about it.

Also, all the rte_flow API basics works with PMD ID\handle management approach.

> But again, as you also mention above, there is a list of pros and cons for every
> approach, no approach is perfect. We took this approach for the good reasons
> listed above.

If you familiar with TREE usage with meter, maybe we can combined easily the two approaches in this topic,

meter_id argument can be by reference, if it 0 - PMD set it, if not PMD use it.

> > > > > The flag proposal is actually reducing the amount of work that
> > > > > you guys
> > > > need to
> > > > > do to implement your proposal. There is no negative impact to
> > > > > your
> > > > proposal
> > > > > and no big change, right?
> > > >
> > > > Yes you right, but the implementation effect is not our concern.
> > > >
> > > >
> > > > > > > This is a quick summary of the required API changes to add
> > > > > > > support for the packet mode, they are minimal:
> > > > > > > a) Introduce the packet_mode flag in the profile parameters
> > > > > > > data
> > > > > > structure.
> > > > > > > b) Change the description (comment) of the rate and bucket
> > > > > > > size
> > > > > > parameters in
> > > > > > > the meter profile parameters data structures to reflect that
> > > > > > > their values represents either bytes or packets, depending
> > > > > > > on the value of the new flag packet_mode from the same structure.
> > > > > > > c) Add the relevant capabilities: just search for "packet"
> > > > > > > in the rte_tm.h capabilities data structures and apply the
> > > > > > > same to the rte_mtr.h
> > > > > > capabilities,
> > > > > > > when applicable.
> > > > > >
> > > > > > > Regards,
> > > > > > > Cristian
> > > > >
> > > > > Regards,
> > > > > Cristian
> > >
> > > Regards,
> > > Cristian
> 
> Regards,
> Cristian
Cristian Dumitrescu March 5, 2021, 6:44 p.m. UTC | #13
> -----Original Message-----
> From: Matan Azrad <matan@nvidia.com>
> Sent: Thursday, March 4, 2021 6:34 AM
> To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> <thomas@monjalon.net>; Raslan Darawsheh <rasland@nvidia.com>;
> mb@smartsharesystems.com; ajit.khaparde@broadcom.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>; Singh, Jasvinder <jasvinder.singh@intel.com>
> Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> 
> Hi Cristian
> 
> From: Dumitrescu, Cristian
> > Hi Matan,
> >
> > > -----Original Message-----
> > > From: Matan Azrad <matan@nvidia.com>
> > > Sent: Tuesday, March 2, 2021 6:10 PM
> > > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> <thomas@monjalon.net>;
> > > Raslan Darawsheh <rasland@nvidia.com>; mb@smartsharesystems.com;
> > > ajit.khaparde@broadcom.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> > > Singh, Jasvinder <jasvinder.singh@intel.com>
> > > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> > >
> > > Hi Cristian
> > >
> > > Good discussion, thank you for that!
> > >
> > > From: Dumitrescu, Cristian
> > > > Hi Matan,
> > > >
> > > > > -----Original Message-----
> > > > > From: Matan Azrad <matan@nvidia.com>
> > > > > Sent: Tuesday, March 2, 2021 12:37 PM
> > > > > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li Zhang
> > > > > <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori Kam
> > > > > <orika@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>
> > > > > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> > > <thomas@monjalon.net>;
> > > > > Raslan Darawsheh <rasland@nvidia.com>;
> mb@smartsharesystems.com;
> > > > > ajit.khaparde@broadcom.com; Yigit, Ferruh
> > > > > <ferruh.yigit@intel.com>; Singh, Jasvinder
> > > > > <jasvinder.singh@intel.com>
> > > > > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS profile
> > > > >
> > > > > HI Cristian
> > > > >
> > > > > From: Dumitrescu, Cristian
> > > > > > Hi Matan,
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Matan Azrad <matan@nvidia.com>
> > > > > > > Sent: Tuesday, March 2, 2021 7:02 AM
> > > > > > > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Li
> > > > > > > Zhang <lizh@nvidia.com>; Dekel Peled <dekelp@nvidia.com>; Ori
> > > > > > > Kam <orika@nvidia.com>; Slava Ovsiienko
> > > > > > > <viacheslavo@nvidia.com>
> > > > > > > Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> > > > > <thomas@monjalon.net>;
> > > > > > > Raslan Darawsheh <rasland@nvidia.com>;
> > > mb@smartsharesystems.com;
> > > > > > > ajit.khaparde@broadcom.com; Yigit, Ferruh
> > > > > > > <ferruh.yigit@intel.com>; Singh, Jasvinder
> > > > > > > <jasvinder.singh@intel.com>
> > > > > > > Subject: RE: [dpdk-dev] [RFC v4 1/4] ethdev: add meter PPS
> > > > > > > profile
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > Hi Cristian
> > > > > > >
> > > > > > > Thank you for review, please see inline.
> > > > > > >
> > > > > > > From: Dumitrescu, Cristian
> > > > > > > > > From: dev <dev-bounces@dpdk.org> On Behalf Of Li Zhang
> > > > > > > <snip>
> > > > > > > > We had this same problem earlier for the rte_tm.h API, where
> > > > > > > > people
> > > > > > > asked to
> > > > > > > > add support for WRED and shaper rates specified in packets
> > > > > > > > to the existing
> > > > > > > byte
> > > > > > > > rate support. I am more than happy to support adding the
> > > > > > > > same here, but please let's adopt the same solution here
> > > > > > > > rather than invent a different approach.
> > > > > > > >
> > > > > > > > Please refer to struct rte_tm_wred_params and struct
> > > > > > > rte_tm_shaper_params
> > > > > > > > from rte_tm.h: the packets vs. bytes mode is explicitly
> > > > > > > > specified through
> > > > > > > the use
> > > > > > > > of a flag called packet_mode that is added to the WRED and
> > > > > > > > shaper
> > > > > profile.
> > > > > > > > When packet_mode is 0, the profile rates and bucket sizes
> > > > > > > > are specified in bytes per second and bytes, respectively;
> > > > > > > > when packet_mode is not 0, the profile rates and bucket
> > > > > > > > sizes are specified in packets and packets per
> > > > > > > second,
> > > > > > > > respectively. The same profile parameters are used, no need
> > > > > > > > to invent additional algorithms (such as srTCM - packet
> > > > > > > > mode) or profile data
> > > > > > > structures.
> > > > > > > > Can we do the same here, please?
> > > > > > >
> > > > > > > This flag approach is very intuitive suggestion and it has
> advantages.
> > > > > > >
> > > > > > > The main problem with the flag approach is that it breaks ABI and
> API.
> > > > > > > The profile structure size is changed due to a new field - ABI
> > > breakage.
> > > > > > > The user must initialize the flag with zero to get old
> > > > > > > behavior - API
> > > > > breakage.
> > > > > > >
> > > > > >
> > > > > > The rte_mtr API is experimental, all the API functions are
> > > > > > correctly marked with __rte_experimental in rte_mtr.h file, so
> > > > > > we can safely change the API
> > > > > and
> > > > > > the ABI breakage is not applicable here. Therefore, this problem
> > > > > > does not
> > > > > exist,
> > > > > > correct?
> > > > >
> > > > > Yes, but still meter is not new API and I know that a lot of user
> > > > > uses it for a long time.
> > > > > Forcing them to change while we have good solution that don't
> > > > > force it, looks me problematic.
> > > > >
> > > >
> > > > Not really, only 3 drivers are currently implementing this API.
> > >
> > > The user is not the PMD, the PMDs are the providers.
> > > I'm talking about all our customers, all the current DPDK based
> > > applications like OVS and others (I familiar with at least 4 ConnectX
> > > customer applications) which use the meter API and I'm sure there are
> more
> > around the world.
> > >
> > > > Even to these drivers, the required changes are none or extremely
> small:
> > > as Ajit
> > > > was also noting, as the default value of 0 continues to represent
> > > > the
> > > existing
> > > > byte mode, all you have to do is make sure the new flag is set to
> > > > zero in the profile params structure, which is already done
> > > > implicitly in most places as
> > > this
> > > > structure is initialized to all-zeros.
> > >
> > > Are you sure all the world initialize the struct to 0? and also in
> > > this case, without new compilation, not all the struct will be
> > > zeroes(the old size is smaller).
> > >
> > > > A simple search exercise for struct rte_mtr_meter_profile is all
> > > > that is
> > > needed.
> > > > You also agreed the flag approach is very intuitive, hence better
> > > > and nicer,
> > > with
> > > > no additional work needed for you, so why not do it?
> > >
> > > Do you understand that any current application that use the meter API
> > > must recompile the code of the application? Part of them also probably
> > > need to set the flag to 0....
> > > Do you understand also the potential issues for the applications which
> > > are not aware to the change? Debug time, etc....
> > >
> > > > > > > I don't see issues with Li suggestion, Do you think Li
> > > > > > > suggestion has critical issues?
> > > > > >
> > > > > > It is probably better to keep the rte_mtr and the rte_tm APIs
> > > > > > aligned, it simplifies the code maintenance and improves the
> > > > > > user experience, which always pays off in the long run. Both
> > > > > > APIs configure token buckets in either packet mode or byte mode,
> > > > > > and it is desirable to have them work in the
> > > > > same
> > > > > > way. Also, I think we should avoid duplicating configuration
> > > > > > data structures
> > > > > for
> > > > > > to support essentially the same algorithms (such as srTCM or
> > > > > > trTCM) if we
> > > > > can.
> > > > > >
> > > > >
> > > > > Yes, but I don't think this motivation is critical.
> > > >
> > > > I really disagree. As API maintainer, making every effort to keep
> > > > the APIs
> > > clear
> > > > and consistent is a critical task for me.
> > >
> > > New pps profile is also clear and simple.
> > >
> > > > We don't want to proliferate the API data structures and parameters
> > > > if there is a good way to avoid it. Especially
> > > in
> > > > cases like this, when the drivers are just beginning to pick up this
> > > > (still
> > > > experimental) API,  we have the rare chance to make things right and
> > > therefore
> > > > we should do it. Please also keep in mind that, as more feature are
> > > > added
> > > to
> > > > the API, small corner cuts like this one that might not look like a
> > > > big deal
> > > now,
> > > > eventually come back as unnecessary complexity in the drivers
> themselves.
> > >
> > > I don't see a complexity in the current suggestion.
> > >
> > > > So, please, let's try to keep the quality of the APIs high.
> > >
> > > Also by this way is high.
> > >
> > >
> > > Look, the flag approach is also good and makes the job.
> > > The two approaches are clear, simple and in high quality.
> > > I don't care which one from the 2 to take but I want to be sure we are
> > > all understand the pros and cons.
> > >
> > > If you understand my concern on flag approach and still insist to take
> > > the flag approach we will align.
> >
> > Yes, thanks for summarizing the pros and cons, I confirm that I do
> understand
> > your concerns.
> >
> > Yes, sorry to disappoint you, I still think the packet_mode based approach
> is
> > better for the long run, as it keeps the APIs clean and consistent. We are
> not
> > adding new algorithms here, we're just adding a new mode to an existing
> > algorithm, so IMO we should not duplicate configuration data structures
> and
> > proliferate the number of algorithms artificially.
> 
> Actually, PPS meter is a new algorithm - you can see that current algorithms
> RFCs don't talk about PPS.
> 

Yes, I know, I implemented it in librte_meter, but still, it is the same algorithm with just a different measurement unit (packet instead of byte), that's why many people (and you included :) ) still refer to it as srTCM  - RFC 2697.

> > Yes, I do realize that in some limited cases the users will have to explicitly
> set
> > the new packet_mode flag to zero or one, in case they need to enable the
> > packet mode, but I think this is an acceptable cost because: (A) This API is
> > clearly marked as experimental; (B) It is better to take a small incremental
> hit
> > now to keep the APIs in good order rather than taking a bit hit in a few
> years as
> > more features are added in the wrong way and the APIs become
> > unmanageable.
> 
> I don't think that the current suggestion is in wrong way.
> In any case, you insist, we will align.
> 

Thank you.

> > > And if we so, and we are going to break the API\ABI, we are going to
> > > introduce new meter policy API soon and there, breaking API can help,
> > > lets see in other discussion later.
> > >
> >
> > Yes, as you point out API changes are unavoidable as new features are
> added,
> > we have to manage the API evolution correctly.
> >
> > > One more point:
> > > Currently, the meter_id is managed by the user, I think it is better
> > > to let the PMDs to manage the meter_id.
> > >
> > > Searching the PMD meter handler inside the PMD is very expensive for
> > > the API call rate when the meter_id is managed by the user.
> > >
> > > Same for profile_id.
> > >
> > > Also all the rte_flow API including the shared action API taking the
> > > PMD management approach.
> > >
> > > What do you think?
> > >
> >
> > Yes, we have carefully considered and discussed both approaches a few
> years
> > back when the API was introduced, this is not done by accident :), there are
> > pros and cons for each of them.
> >
> > If the object IDs are generated by the driver (outputs of the API), then it is
> the
> > user application that needs to keep track of them, which can be very
> painful.
> > Basically, for each API object the user application needs to create its own
> > wrapper to store this ID. We basically transfer this problem to the user app.
> 
> No exactly\not for all, the app gets the meter_id in the same time it decides
> it now.
> 
> > If the object IDs are generated by the user application (inputs into the API),
> > then we simplify the application by removing and indirection layer. Yes, it is
> > true that this indirection layer now moves into the driver, but we should try
> to
> > make the life easier for the appl developers as opposed to us, the driver
> > developers. This indirection layer in the driver can be made a bit smarter
> than
> > just a slow "for" loop; the search operation can be made faster with a small
> bit
> > of effort, such as keeping this list sorted based on the object ID, splitting
> this list
> > into buckets (similar to a hash table), etc, right?
> 
> Yes, there are even better solution than hash table from "rate" perspective.
> 

I'd be very interested to hear your proposals here.

> But any solution costs a lot of memory just for this mapping...
> When we talked about 4M meters supported(in mlx5 next release) it
> becomes an issue.
> 

I thought your concern was about the speed/rate of API calls, you are saying it is not speed but memory footprint??

I would imagine that a system that enables all the 4M meters is a big beast with the most powerful CPU on the planet and many dozens of gigabytes of RAM, so a few extra megabytes for some API layers is not a concern?

> > Having the user app provide the object ID is especially important in the case
> of
> > rte_tm API, where we have to deal with a tree of nodes, with thousands of
> > nodes for each level. Having the app to store and manages this tree of IDs
> is a
> > really bad idea, as the user app needs to mirror the tree of nodes on its
> side for
> > no real benefit. As an added benefit, the user can generate these IDs using
> a
> > rule, such as: given the specific path through the tree, the value of the ID
> can
> > be computed.
> 
> rte_tm is not rte_mtr - I think meter is different and used differently.
> For example, as I know, no one from our dpdk meter customers(at least 5)
> use TREEs for meter management. OVS, for example, just randomize some
> meter_id and don't care about it.
> 

What kinds of trees? I'd be very interested to hear some proposals to make this handle mapping faster.

> Also, all the rte_flow API basics works with PMD ID\handle management
> approach.
> 

Yes, I am not saying it is wrong, none of the approaches is wrong IMO.

> > But again, as you also mention above, there is a list of pros and cons for
> every
> > approach, no approach is perfect. We took this approach for the good
> reasons
> > listed above.
> 
> If you familiar with TREE usage with meter, maybe we can combined easily
> the two approaches in this topic,
> 
> meter_id argument can be by reference, if it 0 - PMD set it, if not PMD use it.
> 

It would be good if you could elaborate here a bit, just to make sure we are on the same page.

> > > > > > The flag proposal is actually reducing the amount of work that
> > > > > > you guys
> > > > > need to
> > > > > > do to implement your proposal. There is no negative impact to
> > > > > > your
> > > > > proposal
> > > > > > and no big change, right?
> > > > >
> > > > > Yes you right, but the implementation effect is not our concern.
> > > > >
> > > > >
> > > > > > > > This is a quick summary of the required API changes to add
> > > > > > > > support for the packet mode, they are minimal:
> > > > > > > > a) Introduce the packet_mode flag in the profile parameters
> > > > > > > > data
> > > > > > > structure.
> > > > > > > > b) Change the description (comment) of the rate and bucket
> > > > > > > > size
> > > > > > > parameters in
> > > > > > > > the meter profile parameters data structures to reflect that
> > > > > > > > their values represents either bytes or packets, depending
> > > > > > > > on the value of the new flag packet_mode from the same
> structure.
> > > > > > > > c) Add the relevant capabilities: just search for "packet"
> > > > > > > > in the rte_tm.h capabilities data structures and apply the
> > > > > > > > same to the rte_mtr.h
> > > > > > > capabilities,
> > > > > > > > when applicable.
> > > > > > >
> > > > > > > > Regards,
> > > > > > > > Cristian
> > > > > >
> > > > > > Regards,
> > > > > > Cristian
> > > >
> > > > Regards,
> > > > Cristian
> >
> > Regards,
> > Cristian

Regards,
Cristian
diff mbox series

Patch

diff --git a/doc/guides/prog_guide/traffic_metering_and_policing.rst b/doc/guides/prog_guide/traffic_metering_and_policing.rst
index 90c781eb1d..4d2405d44a 100644
--- a/doc/guides/prog_guide/traffic_metering_and_policing.rst
+++ b/doc/guides/prog_guide/traffic_metering_and_policing.rst
@@ -17,7 +17,8 @@  The main features are:
 * Part of DPDK rte_ethdev API
 * Capability query API
 * Metering algorithms: RFC 2697 Single Rate Three Color Marker (srTCM), RFC 2698
-  and RFC 4115 Two Rate Three Color Marker (trTCM)
+  and RFC 4115 Two Rate Three Color Marker (trTCM),
+  Single Rate Three Color Marker, Packet based (srTCMp).
 * Policer actions (per meter output color): recolor, drop
 * Statistics (per policer output color)
 
diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst
index 7405a9864f..de04886cc9 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -429,6 +429,11 @@  New Features
   can leverage IOAT DMA channels with vhost asynchronous APIs.
   See the :doc:`../sample_app_ug/vhost` for more details.
 
+* **Added support for meter PPS profile.**
+
+  Currently meter algorithms only supports bytes per second(BPS).
+  Add this new meter algorithm to support packet per second (PPS) mode.
+  So that it can meter traffic by packet per second.
 
 Removed Items
 -------------
diff --git a/lib/librte_ethdev/rte_mtr.h b/lib/librte_ethdev/rte_mtr.h
index 916a09c5c3..f27a4b5354 100644
--- a/lib/librte_ethdev/rte_mtr.h
+++ b/lib/librte_ethdev/rte_mtr.h
@@ -119,6 +119,11 @@  enum rte_mtr_algorithm {
 
 	/** Two Rate Three Color Marker (trTCM) - IETF RFC 4115. */
 	RTE_MTR_TRTCM_RFC4115,
+
+	/** Single Rate Three Color Marker, Packet based (srTCMp).
+	 * - - similar to IETF RFC 2697 but rate is packet per second.
+	 */
+	RTE_MTR_SRTCMP,
 };
 
 /**
@@ -171,6 +176,20 @@  struct rte_mtr_meter_profile {
 			/** Excess Burst Size (EBS) (bytes). */
 			uint64_t ebs;
 		} trtcm_rfc4115;
+
+		/** Items only valid when *alg* is set to srTCMp. */
+		struct {
+			/** Committed Information Rate (CIR)
+			 * (packets/second).
+			 */
+			uint64_t cir;
+
+			/** Committed Burst Size (CBS) (packets). */
+			uint64_t cbs;
+
+			/** Excess Burst Size (EBS) (packets). */
+			uint64_t ebs;
+		} srtcmp;
 	};
 };
 
@@ -317,6 +336,13 @@  struct rte_mtr_capabilities {
 	 */
 	uint32_t meter_trtcm_rfc4115_n_max;
 
+	/** Maximum number of MTR objects that can have their meter configured
+	 * to run the srTCMp algorithm. The value of 0
+	 * indicates this metering algorithm is not supported.
+	 * The maximum value is *n_max*.
+	 */
+	uint32_t meter_srtcmp_n_max;
+
 	/** Maximum traffic rate that can be metered by a single MTR object. For
 	 * srTCM RFC 2697, this is the maximum CIR rate. For trTCM RFC 2698,
 	 * this is the maximum PIR rate. For trTCM RFC 4115, this is the maximum
@@ -342,6 +368,12 @@  struct rte_mtr_capabilities {
 	 */
 	int color_aware_trtcm_rfc4115_supported;
 
+	/**
+	 * When non-zero, it indicates that color aware mode is supported for
+	 * the srTCMp metering algorithm.
+	 */
+	int color_aware_srtcmp_supported;
+
 	/** When non-zero, it indicates that the policer packet recolor actions
 	 * are supported.
 	 * @see enum rte_mtr_policer_action