[RFC,v3,2/2] ethdev: add API to set process to active or standby

Message ID 20221221090017.3715030-3-rongweil@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series add API to set process to active or standby |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

Rongwei Liu Dec. 21, 2022, 9 a.m. UTC
  Users may want to change the DPDK process to different versions
such as hot upgrade.
There is a strong requirement to simplify the logic and shorten the
traffic downtime as much as possible.

This update introduces new rte_eth process role definitions: active
or standby.

The active role means rules are programmed to HW immediately, and no
behavior changed. This is the default state.
The standby role means rules are queued in the HW. If no active roles
alive or back to active, the rules are effective immediately.

Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
---
 doc/guides/nics/mlx5.rst               | 10 ++++
 doc/guides/rel_notes/release_22_03.rst |  5 ++
 lib/ethdev/ethdev_driver.h             | 63 ++++++++++++++++++++++++++
 lib/ethdev/rte_ethdev.c                | 41 +++++++++++++++++
 lib/ethdev/rte_ethdev.h                |  7 ++-
 lib/ethdev/version.map                 |  3 ++
 6 files changed, 128 insertions(+), 1 deletion(-)
  

Comments

Jerin Jacob Dec. 21, 2022, 9:12 a.m. UTC | #1
On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu <rongweil@nvidia.com> wrote:
>
> Users may want to change the DPDK process to different versions

Different version of DPDK? If there is any ABI change how to support this?

> such as hot upgrade.
> There is a strong requirement to simplify the logic and shorten the
> traffic downtime as much as possible.
>
> This update introduces new rte_eth process role definitions: active
> or standby.
>
> The active role means rules are programmed to HW immediately, and no

Why it has to be specific only to rte_flow rule? If it spedieic to
rte_flow, why it is in rte_eth_process_ name space?

Also, if we are moving the standby, What about the rule whose ABI is
changed between versions?

> behavior changed. This is the default state.
> The standby role means rules are queued in the HW. If no active roles
> alive or back to active, the rules are effective immediately.
>
> Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
> ---
>  doc/guides/nics/mlx5.rst               | 10 ++++
>  doc/guides/rel_notes/release_22_03.rst |  5 ++
>  lib/ethdev/ethdev_driver.h             | 63 ++++++++++++++++++++++++++
>  lib/ethdev/rte_ethdev.c                | 41 +++++++++++++++++
>  lib/ethdev/rte_ethdev.h                |  7 ++-
>  lib/ethdev/version.map                 |  3 ++
>  6 files changed, 128 insertions(+), 1 deletion(-)
>
> diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
> index 51f51259e3..de1fdac0a1 100644
> --- a/doc/guides/nics/mlx5.rst
> +++ b/doc/guides/nics/mlx5.rst
> @@ -2001,3 +2001,13 @@ where:
>  * ``sw_queue_id``: queue index in range [64536, 65535].
>    This range is the highest 1000 numbers.
>  * ``hw_queue_id``: queue index given by HW in queue creation.
> +
> +ethdev set process active or standby
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +
> +User should only program group 0 (fdb_def_rule_en=0) when ``rte_eth_process_set_active``
> +has been called and set to a standby role.
> +Group 0 is shared across different DPDK processes while the other groups are limited
> +to the current process scope.
> +The process can't move from active to standby role if preceding active application's
> +rules are still present and vice versa.
> diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
> index 0923707cb8..6fa48106c4 100644
> --- a/doc/guides/rel_notes/release_22_03.rst
> +++ b/doc/guides/rel_notes/release_22_03.rst
> @@ -207,6 +207,11 @@ API Changes
>  * ethdev: Old public macros and enumeration constants without ``RTE_ETH_`` prefix,
>    which are kept for backward compatibility, are marked as deprecated.
>
> +* ethdev: added a new experimental api:
> +
> +  The new API ``rte_eth_process_set_active()`` was added.
> +  If ``RTE_ETH_CAPA_PROCESS_SET_ROLE`` is not advertised, this new api is not supported.
> +
>  * cryptodev: The asymmetric session handling was modified to use a single
>    mempool object. An API ``rte_cryptodev_asym_session_pool_create`` was added
>    to create a mempool with element size big enough to hold the generic asymmetric
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> index 6a550cfc83..3c583bc39d 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -179,6 +179,16 @@ struct rte_eth_dev_data {
>         pthread_mutex_t flow_ops_mutex; /**< rte_flow ops mutex */
>  } __rte_cache_aligned;
>
> +/**@{@name Different rte_eth role flag definitions which will be used
> + *  when miagrating DPDK to a different version.
> + */
> +/*
> + * Traffic coming from NIC domain rules will reach
> + * both active and standby processes.
> + */
> +#define RTE_ETH_PROCESS_NIC_DUP_WITH_STANDBY RTE_BIT32(0),
> +/**@}*/
> +
>  /**
>   * @internal
>   * The pool of *rte_eth_dev* structures. The size of the pool
> @@ -1087,6 +1097,22 @@ typedef const uint32_t *(*eth_buffer_split_supported_hdr_ptypes_get_t)(struct rt
>   */
>  typedef int (*eth_dev_priv_dump_t)(struct rte_eth_dev *dev, FILE *file);
>
> +/**
> + * @internal
> + * Set rte_eth process to active or standby role.
> + *
> + * @param dev
> + *   Port (ethdev) handle.
> + * @param active
> + *   Device (role) active or not (standby).
> + * @param flag
> + *   Role specific flag.
> + *
> + * @return
> + *   Negative value on error, 0 on success.
> + */
> +typedef int (*eth_process_set_active_t)(struct rte_eth_dev *dev, bool active, uint32_t flag);
> +
>  /**
>   * @internal Set Rx queue available descriptors threshold.
>   * @see rte_eth_rx_avail_thresh_set()
> @@ -1403,6 +1429,8 @@ struct eth_dev_ops {
>         eth_cman_config_set_t cman_config_set;
>         /** Retrieve congestion management configuration */
>         eth_cman_config_get_t cman_config_get;
> +       /** Set the whole rte_eth process to active or standby role. */
> +       eth_process_set_active_t eth_process_set_active;
>  };
>
>  /**
> @@ -2046,6 +2074,41 @@ struct rte_eth_fdir_conf {
>         struct rte_eth_fdir_flex_conf flex_conf;
>  };
>
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Set the rte_eth process to the active or standby role which affects
> + * the flow rules offloading. It doesn't allow multiple processes to be the
> + * same role unless no offload rules are set.
> + * The active process flow rules are effective immediately while the standby
> + * process rules will be matched (active) when the process becomes active or
> + * when the traffic is not matched by the active process rules.
> + * The active application will always receive traffic while the standby
> + * application will receive traffic when no matching rules are present from
> + * the active application.
> + *
> + * The application is active by default if this API is not called.
> + *
> + * When a process transforms from a standby to a active role, all preceding
> + * flow rules which are queued by hardware will be effective immediately.
> + * Before role transition, all the rules set by the active process should be
> + * flushed first.
> + *
> + * When role flag "RTE_ETH_PROCESS_NIC_DUP_WITH_STANDBY" is set, NIC domain
> + * flow rules are effective immediately even if a process is standby role.
> + *
> + * @param active
> + *   Process active (role) or not (standby).
> + * @param flag
> + *   The role flag.
> + * @return
> + *   - (>=0) Number of rte devices which have been switched successfully.
> + *   - (-EINVAL) if bad parameter.
> + */
> +__rte_experimental
> +int rte_eth_process_set_active(bool active, uint32_t flag);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index 5d5e18db1e..f19da75bfe 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -6318,6 +6318,47 @@ rte_eth_buffer_split_get_supported_hdr_ptypes(uint16_t port_id, uint32_t *ptypes
>         return j;
>  }
>
> +int rte_eth_process_set_active(bool active, uint32_t flag)
> +{
> +       struct rte_eth_dev_info dev_info = {0};
> +       uint32_t flags[RTE_MAX_ETHPORTS];
> +       struct rte_eth_dev *dev;
> +       uint16_t port_id;
> +       int ret = 0;
> +
> +       /* Check if all devices support. */
> +       RTE_ETH_FOREACH_DEV(port_id) {
> +               dev = &rte_eth_devices[port_id];
> +               if (*dev->dev_ops->dev_infos_get == NULL ||
> +                   *dev->dev_ops->eth_process_set_active == NULL)
> +                       return -ENOTSUP;
> +               if ((*dev->dev_ops->dev_infos_get)(dev, &dev_info))
> +                       return -EINVAL;
> +               if (!(dev_info.dev_capa & RTE_ETH_CAPA_PROCESS_SET_ROLE))
> +                       return -ENOTSUP;
> +       }
> +       RTE_ETH_FOREACH_DEV(port_id) {
> +               dev = &rte_eth_devices[port_id];
> +               if ((*dev->dev_ops->dev_infos_get)(dev, &dev_info))
> +                       goto err;
> +               flags[port_id] = dev_info.eth_process_flag;
> +               if ((*dev->dev_ops->eth_process_set_active)(dev, active, flag) < 0)
> +                       goto err;
> +               ret++;
> +       }
> +       return ret;
> +err:
> +       if (!ret)
> +               return 0;
> +       RTE_ETH_FOREACH_DEV(port_id) {
> +               dev = &rte_eth_devices[port_id];
> +               (*dev->dev_ops->eth_process_set_active)(dev, !active, flags[port_id]);
> +               if (--ret == 0)
> +                       break;
> +       }
> +       return 0;
> +}
> +
>  RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
>
>  RTE_INIT(ethdev_init_telemetry)
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index c129ca1eaf..d29f051d6f 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1606,6 +1606,8 @@ struct rte_eth_conf {
>  #define RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP         RTE_BIT64(3)
>  /** Device supports keeping shared flow objects across restart. */
>  #define RTE_ETH_DEV_CAPA_FLOW_SHARED_OBJECT_KEEP RTE_BIT64(4)
> +/**Device supports "rte_eth_process_set_active" callback. */
> +#define RTE_ETH_CAPA_PROCESS_SET_ROLE RTE_BIT64(5)
>  /**@}*/
>
>  /*
> @@ -1777,8 +1779,11 @@ struct rte_eth_dev_info {
>         struct rte_eth_switch_info switch_info;
>         /** Supported error handling mode. */
>         enum rte_eth_err_handle_mode err_handle_mode;
> +       /** Process specific role flag. */
> +       uint32_t eth_process_flag;
>
> -       uint64_t reserved_64s[2]; /**< Reserved for future fields */
> +       uint32_t reserved_32s[1]; /**< Reserved for future fields */
> +       uint64_t reserved_64s[1]; /**< Reserved for future fields */
>         void *reserved_ptrs[2];   /**< Reserved for future fields */
>  };
>
> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
> index 17201fbe0f..a5503f6fde 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -298,6 +298,9 @@ EXPERIMENTAL {
>         rte_flow_get_q_aged_flows;
>         rte_mtr_meter_policy_get;
>         rte_mtr_meter_profile_get;
> +
> +       # added in 23.03
> +       rte_eth_process_set_active;
>  };
>
>  INTERNAL {
> --
> 2.27.0
>
  
Rongwei Liu Dec. 21, 2022, 9:32 a.m. UTC | #2
HI Jerin:

BR
Rongwei

> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Wednesday, December 21, 2022 17:13
> To: Rongwei Liu <rongweil@nvidia.com>
> Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> <ferruh.yigit@amd.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> <rasland@nvidia.com>
> Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active or standby
> 
> External email: Use caution opening links or attachments
> 
> 
> On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu <rongweil@nvidia.com> wrote:
> >
> > Users may want to change the DPDK process to different versions
> 
> Different version of DPDK? If there is any ABI change how to support this?
> 
There is a new member which was introduced into rte_eth_dev_info but it shouldn’t be ABI breaking since using reserved fields.
> > such as hot upgrade.
> > There is a strong requirement to simplify the logic and shorten the
> > traffic downtime as much as possible.
> >
> > This update introduces new rte_eth process role definitions: active or
> > standby.
> >
> > The active role means rules are programmed to HW immediately, and no
> 
> Why it has to be specific only to rte_flow rule? If it spedieic to rte_flow, why it
> is in rte_eth_process_ name space?
For now, this design focuses on the flow rule offloading and traffic redirection. 
When switching process version, it' important to make sure which application receives and handles the traffic.
The changing should be effective across all probing eth devices, that' why it was put under rte_eth_process_ (for all rte_eth_dev) name space.
> 
> Also, if we are moving the standby, What about the rule whose ABI is changed
> between versions?

Like the comments mentioned: " Before role transition, all the rules set by the active process should be flushed first. "
> > behavior changed. This is the default state.
> > The standby role means rules are queued in the HW. If no active roles
> > alive or back to active, the rules are effective immediately.
> >
> > Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
> > ---
> >  doc/guides/nics/mlx5.rst               | 10 ++++
> >  doc/guides/rel_notes/release_22_03.rst |  5 ++
> >  lib/ethdev/ethdev_driver.h             | 63 ++++++++++++++++++++++++++
> >  lib/ethdev/rte_ethdev.c                | 41 +++++++++++++++++
> >  lib/ethdev/rte_ethdev.h                |  7 ++-
> >  lib/ethdev/version.map                 |  3 ++
> >  6 files changed, 128 insertions(+), 1 deletion(-)
> >
> > diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index
> > 51f51259e3..de1fdac0a1 100644
> > --- a/doc/guides/nics/mlx5.rst
> > +++ b/doc/guides/nics/mlx5.rst
> > @@ -2001,3 +2001,13 @@ where:
> >  * ``sw_queue_id``: queue index in range [64536, 65535].
> >    This range is the highest 1000 numbers.
> >  * ``hw_queue_id``: queue index given by HW in queue creation.
> > +
> > +ethdev set process active or standby
> > +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> > +
> > +User should only program group 0 (fdb_def_rule_en=0) when
> > +``rte_eth_process_set_active`` has been called and set to a standby role.
> > +Group 0 is shared across different DPDK processes while the other
> > +groups are limited to the current process scope.
> > +The process can't move from active to standby role if preceding
> > +active application's rules are still present and vice versa.
> > diff --git a/doc/guides/rel_notes/release_22_03.rst
> > b/doc/guides/rel_notes/release_22_03.rst
> > index 0923707cb8..6fa48106c4 100644
> > --- a/doc/guides/rel_notes/release_22_03.rst
> > +++ b/doc/guides/rel_notes/release_22_03.rst
> > @@ -207,6 +207,11 @@ API Changes
> >  * ethdev: Old public macros and enumeration constants without
> ``RTE_ETH_`` prefix,
> >    which are kept for backward compatibility, are marked as deprecated.
> >
> > +* ethdev: added a new experimental api:
> > +
> > +  The new API ``rte_eth_process_set_active()`` was added.
> > +  If ``RTE_ETH_CAPA_PROCESS_SET_ROLE`` is not advertised, this new api
> is not supported.
> > +
> >  * cryptodev: The asymmetric session handling was modified to use a single
> >    mempool object. An API ``rte_cryptodev_asym_session_pool_create`` was
> added
> >    to create a mempool with element size big enough to hold the
> > generic asymmetric diff --git a/lib/ethdev/ethdev_driver.h
> > b/lib/ethdev/ethdev_driver.h index 6a550cfc83..3c583bc39d 100644
> > --- a/lib/ethdev/ethdev_driver.h
> > +++ b/lib/ethdev/ethdev_driver.h
> > @@ -179,6 +179,16 @@ struct rte_eth_dev_data {
> >         pthread_mutex_t flow_ops_mutex; /**< rte_flow ops mutex */  }
> > __rte_cache_aligned;
> >
> > +/**@{@name Different rte_eth role flag definitions which will be used
> > + *  when miagrating DPDK to a different version.
> > + */
> > +/*
> > + * Traffic coming from NIC domain rules will reach
> > + * both active and standby processes.
> > + */
> > +#define RTE_ETH_PROCESS_NIC_DUP_WITH_STANDBY RTE_BIT32(0),
> /**@}*/
> > +
> >  /**
> >   * @internal
> >   * The pool of *rte_eth_dev* structures. The size of the pool @@
> > -1087,6 +1097,22 @@ typedef const uint32_t
> *(*eth_buffer_split_supported_hdr_ptypes_get_t)(struct rt
> >   */
> >  typedef int (*eth_dev_priv_dump_t)(struct rte_eth_dev *dev, FILE
> > *file);
> >
> > +/**
> > + * @internal
> > + * Set rte_eth process to active or standby role.
> > + *
> > + * @param dev
> > + *   Port (ethdev) handle.
> > + * @param active
> > + *   Device (role) active or not (standby).
> > + * @param flag
> > + *   Role specific flag.
> > + *
> > + * @return
> > + *   Negative value on error, 0 on success.
> > + */
> > +typedef int (*eth_process_set_active_t)(struct rte_eth_dev *dev, bool
> > +active, uint32_t flag);
> > +
> >  /**
> >   * @internal Set Rx queue available descriptors threshold.
> >   * @see rte_eth_rx_avail_thresh_set() @@ -1403,6 +1429,8 @@ struct
> > eth_dev_ops {
> >         eth_cman_config_set_t cman_config_set;
> >         /** Retrieve congestion management configuration */
> >         eth_cman_config_get_t cman_config_get;
> > +       /** Set the whole rte_eth process to active or standby role. */
> > +       eth_process_set_active_t eth_process_set_active;
> >  };
> >
> >  /**
> > @@ -2046,6 +2074,41 @@ struct rte_eth_fdir_conf {
> >         struct rte_eth_fdir_flex_conf flex_conf;  };
> >
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change without prior notice
> > + *
> > + * Set the rte_eth process to the active or standby role which
> > +affects
> > + * the flow rules offloading. It doesn't allow multiple processes to
> > +be the
> > + * same role unless no offload rules are set.
> > + * The active process flow rules are effective immediately while the
> > +standby
> > + * process rules will be matched (active) when the process becomes
> > +active or
> > + * when the traffic is not matched by the active process rules.
> > + * The active application will always receive traffic while the
> > +standby
> > + * application will receive traffic when no matching rules are
> > +present from
> > + * the active application.
> > + *
> > + * The application is active by default if this API is not called.
> > + *
> > + * When a process transforms from a standby to a active role, all
> > +preceding
> > + * flow rules which are queued by hardware will be effective immediately.
> > + * Before role transition, all the rules set by the active process
> > +should be
> > + * flushed first.
> > + *
> > + * When role flag "RTE_ETH_PROCESS_NIC_DUP_WITH_STANDBY" is set,
> NIC
> > +domain
> > + * flow rules are effective immediately even if a process is standby role.
> > + *
> > + * @param active
> > + *   Process active (role) or not (standby).
> > + * @param flag
> > + *   The role flag.
> > + * @return
> > + *   - (>=0) Number of rte devices which have been switched successfully.
> > + *   - (-EINVAL) if bad parameter.
> > + */
> > +__rte_experimental
> > +int rte_eth_process_set_active(bool active, uint32_t flag);
> > +
> >  #ifdef __cplusplus
> >  }
> >  #endif
> > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> > 5d5e18db1e..f19da75bfe 100644
> > --- a/lib/ethdev/rte_ethdev.c
> > +++ b/lib/ethdev/rte_ethdev.c
> > @@ -6318,6 +6318,47 @@
> rte_eth_buffer_split_get_supported_hdr_ptypes(uint16_t port_id, uint32_t
> *ptypes
> >         return j;
> >  }
> >
> > +int rte_eth_process_set_active(bool active, uint32_t flag) {
> > +       struct rte_eth_dev_info dev_info = {0};
> > +       uint32_t flags[RTE_MAX_ETHPORTS];
> > +       struct rte_eth_dev *dev;
> > +       uint16_t port_id;
> > +       int ret = 0;
> > +
> > +       /* Check if all devices support. */
> > +       RTE_ETH_FOREACH_DEV(port_id) {
> > +               dev = &rte_eth_devices[port_id];
> > +               if (*dev->dev_ops->dev_infos_get == NULL ||
> > +                   *dev->dev_ops->eth_process_set_active == NULL)
> > +                       return -ENOTSUP;
> > +               if ((*dev->dev_ops->dev_infos_get)(dev, &dev_info))
> > +                       return -EINVAL;
> > +               if (!(dev_info.dev_capa & RTE_ETH_CAPA_PROCESS_SET_ROLE))
> > +                       return -ENOTSUP;
> > +       }
> > +       RTE_ETH_FOREACH_DEV(port_id) {
> > +               dev = &rte_eth_devices[port_id];
> > +               if ((*dev->dev_ops->dev_infos_get)(dev, &dev_info))
> > +                       goto err;
> > +               flags[port_id] = dev_info.eth_process_flag;
> > +               if ((*dev->dev_ops->eth_process_set_active)(dev, active, flag) < 0)
> > +                       goto err;
> > +               ret++;
> > +       }
> > +       return ret;
> > +err:
> > +       if (!ret)
> > +               return 0;
> > +       RTE_ETH_FOREACH_DEV(port_id) {
> > +               dev = &rte_eth_devices[port_id];
> > +               (*dev->dev_ops->eth_process_set_active)(dev, !active,
> flags[port_id]);
> > +               if (--ret == 0)
> > +                       break;
> > +       }
> > +       return 0;
> > +}
> > +
> >  RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
> >
> >  RTE_INIT(ethdev_init_telemetry)
> > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> > c129ca1eaf..d29f051d6f 100644
> > --- a/lib/ethdev/rte_ethdev.h
> > +++ b/lib/ethdev/rte_ethdev.h
> > @@ -1606,6 +1606,8 @@ struct rte_eth_conf {
> >  #define RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP         RTE_BIT64(3)
> >  /** Device supports keeping shared flow objects across restart. */
> > #define RTE_ETH_DEV_CAPA_FLOW_SHARED_OBJECT_KEEP RTE_BIT64(4)
> > +/**Device supports "rte_eth_process_set_active" callback. */ #define
> > +RTE_ETH_CAPA_PROCESS_SET_ROLE RTE_BIT64(5)
> >  /**@}*/
> >
> >  /*
> > @@ -1777,8 +1779,11 @@ struct rte_eth_dev_info {
> >         struct rte_eth_switch_info switch_info;
> >         /** Supported error handling mode. */
> >         enum rte_eth_err_handle_mode err_handle_mode;
> > +       /** Process specific role flag. */
> > +       uint32_t eth_process_flag;
> >
> > -       uint64_t reserved_64s[2]; /**< Reserved for future fields */
> > +       uint32_t reserved_32s[1]; /**< Reserved for future fields */
> > +       uint64_t reserved_64s[1]; /**< Reserved for future fields */
> >         void *reserved_ptrs[2];   /**< Reserved for future fields */
> >  };
> >
> > diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map index
> > 17201fbe0f..a5503f6fde 100644
> > --- a/lib/ethdev/version.map
> > +++ b/lib/ethdev/version.map
> > @@ -298,6 +298,9 @@ EXPERIMENTAL {
> >         rte_flow_get_q_aged_flows;
> >         rte_mtr_meter_policy_get;
> >         rte_mtr_meter_profile_get;
> > +
> > +       # added in 23.03
> > +       rte_eth_process_set_active;
> >  };
> >
> >  INTERNAL {
> > --
> > 2.27.0
> >
  
Jerin Jacob Dec. 21, 2022, 10:59 a.m. UTC | #3
On Wed, Dec 21, 2022 at 3:02 PM Rongwei Liu <rongweil@nvidia.com> wrote:
>
> HI Jerin:
>

Hi Rongwei

> BR
> Rongwei
>
> > -----Original Message-----
> > From: Jerin Jacob <jerinjacobk@gmail.com>
> > Sent: Wednesday, December 21, 2022 17:13
> > To: Rongwei Liu <rongweil@nvidia.com>
> > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > <rasland@nvidia.com>
> > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active or standby
> >
> > External email: Use caution opening links or attachments
> >
> >
> > On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu <rongweil@nvidia.com> wrote:
> > >
> > > Users may want to change the DPDK process to different versions
> >
> > Different version of DPDK? If there is any ABI change how to support this?
> >
> There is a new member which was introduced into rte_eth_dev_info but it shouldn’t be ABI breaking since using reserved fields.

That is just for rte_eth_dev_info. What about the ABI change in
different ethdev structure and rte_flow structures across different
DPDK ABI versions.

> > > such as hot upgrade.
> > > There is a strong requirement to simplify the logic and shorten the
> > > traffic downtime as much as possible.
> > >
> > > This update introduces new rte_eth process role definitions: active or
> > > standby.
> > >
> > > The active role means rules are programmed to HW immediately, and no
> >
> > Why it has to be specific only to rte_flow rule? If it spedieic to rte_flow, why it
> > is in rte_eth_process_ name space?
> For now, this design focuses on the flow rule offloading and traffic redirection.
> When switching process version, it' important to make sure which application receives and handles the traffic.

Changing the DPDK version runtime is just beyond rte_flow driver.

> The changing should be effective across all probing eth devices, that' why it was put under rte_eth_process_ (for all rte_eth_dev) name space.
> >
> > Also, if we are moving the standby, What about the rule whose ABI is changed
> > between versions?
>
> Like the comments mentioned: " Before role transition, all the rules set by the active process should be flushed first. "

What happens to rte_flow flow handles for existing ones  which is
created with version X?
Also What if new version Y has ABI change in rte_flow_pattern and
rte_flow_action structure?

For me, If DPDK version change is needed, simply reload the
application. This API will soon bloat, and it will be a mess if to
start handling
Different DPDK version which is not ABI compatible at all.




> > > behavior changed. This is the default state.
> > > The standby role means rules are queued in the HW. If no active roles
> > > alive or back to active, the rules are effective immediately.
> > >
> > > Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
  
Rongwei Liu Dec. 21, 2022, 12:05 p.m. UTC | #4
Hi Jerin:

BR
Rongwei

> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Wednesday, December 21, 2022 19:00
> To: Rongwei Liu <rongweil@nvidia.com>
> Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> <ferruh.yigit@amd.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> <rasland@nvidia.com>
> Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active or standby
> 
> External email: Use caution opening links or attachments
> 
> 
> On Wed, Dec 21, 2022 at 3:02 PM Rongwei Liu <rongweil@nvidia.com> wrote:
> >
> > HI Jerin:
> >
> 
> Hi Rongwei
> 
> > BR
> > Rongwei
> >
> > > -----Original Message-----
> > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > Sent: Wednesday, December 21, 2022 17:13
> > > To: Rongwei Liu <rongweil@nvidia.com>
> > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> > > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > > <rasland@nvidia.com>
> > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active
> > > or standby
> > >
> > > External email: Use caution opening links or attachments
> > >
> > >
> > > On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu <rongweil@nvidia.com>
> wrote:
> > > >
> > > > Users may want to change the DPDK process to different versions
> > >
> > > Different version of DPDK? If there is any ABI change how to support this?
> > >
> > There is a new member which was introduced into rte_eth_dev_info but it
> shouldn’t be ABI breaking since using reserved fields.
> 
> That is just for rte_eth_dev_info. What about the ABI change in different
> ethdev structure and rte_flow structures across different DPDK ABI versions.
> 
Besides this, there is no other ABI changes dependency.

Assume there is a DPDK process A running with version v21.11 and plan to upgrade to
version v22.11. Let' call v22.11 as process B.

Now, process A has been running for long time and has lot of rules configured. It' "active" role per this API definition.
Process B starts and it should call this API and set itself to "standby" role and user can program the flow rules as they want
and different NIC vendors may have different recommendations. Nvidia suggests only program process B with group 0' rules now.

The user should sync all desired configurations from process A to process B, and process A starts to yield traffic like "delete all group 0
rules for Nvidia' NICs" or quit.
After that process B calls this API and set itself to "active" role, now the hot-upgrade finishes.

> > > > such as hot upgrade.
> > > > There is a strong requirement to simplify the logic and shorten
> > > > the traffic downtime as much as possible.
> > > >
> > > > This update introduces new rte_eth process role definitions:
> > > > active or standby.
> > > >
> > > > The active role means rules are programmed to HW immediately, and
> > > > no
> > >
> > > Why it has to be specific only to rte_flow rule? If it spedieic to
> > > rte_flow, why it is in rte_eth_process_ name space?
> > For now, this design focuses on the flow rule offloading and traffic
> redirection.
> > When switching process version, it' important to make sure which
> application receives and handles the traffic.
> 
> Changing the DPDK version runtime is just beyond rte_flow driver.

It' not about changing DPDK version but upgrading DPDK from one PMD version to another one.
Does the preceding example answer your question?
> 
> > The changing should be effective across all probing eth devices, that' why it
> was put under rte_eth_process_ (for all rte_eth_dev) name space.
> > >
> > > Also, if we are moving the standby, What about the rule whose ABI is
> > > changed between versions?
> >
> > Like the comments mentioned: " Before role transition, all the rules set by
> the active process should be flushed first. "
> 
> What happens to rte_flow flow handles for existing ones  which is created with
> version X?
> Also What if new version Y has ABI change in rte_flow_pattern and
> rte_flow_action structure?
> 
> For me, If DPDK version change is needed, simply reload the application. This
> API will soon bloat, and it will be a mess if to start handling Different DPDK
> version which is not ABI compatible at all.
> 
Yes, you are right. Reloading the application is the easiest way but it may have a long time
Window that traffic is lost. No traffic arrives at process A or process B. 
We are trying to simplify the reloading logic and minimize the traffic down time as much as possible.
The approach may differentiate hugely between different NIC vendors, so I think it should be better if 
DPDK can provide an abstract API.

If process A and process B are ABI different, it doesn't matter. 
1. Call this API with process A means older ABI.
2. Call this API with process B means newer ABI.
It' have process concept and working scope. 

> 
> 
> 
> > > > behavior changed. This is the default state.
> > > > The standby role means rules are queued in the HW. If no active
> > > > roles alive or back to active, the rules are effective immediately.
> > > >
> > > > Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
  
Jerin Jacob Dec. 21, 2022, 12:44 p.m. UTC | #5
On Wed, Dec 21, 2022 at 5:35 PM Rongwei Liu <rongweil@nvidia.com> wrote:
>
> Hi Jerin:
>
> BR
> Rongwei
>
> > -----Original Message-----
> > From: Jerin Jacob <jerinjacobk@gmail.com>
> > Sent: Wednesday, December 21, 2022 19:00
> > To: Rongwei Liu <rongweil@nvidia.com>
> > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > <rasland@nvidia.com>
> > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active or standby
> >
> > External email: Use caution opening links or attachments
> >
> >
> > On Wed, Dec 21, 2022 at 3:02 PM Rongwei Liu <rongweil@nvidia.com> wrote:
> > >
> > > HI Jerin:
> > >
> >
> > Hi Rongwei
> >
> > > BR
> > > Rongwei
> > >
> > > > -----Original Message-----
> > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > Sent: Wednesday, December 21, 2022 17:13
> > > > To: Rongwei Liu <rongweil@nvidia.com>
> > > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> > > > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > > > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > > > <rasland@nvidia.com>
> > > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active
> > > > or standby
> > > >
> > > > External email: Use caution opening links or attachments
> > > >
> > > >
> > > > On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu <rongweil@nvidia.com>
> > wrote:
> > > > >
> > > > > Users may want to change the DPDK process to different versions
> > > >
> > > > Different version of DPDK? If there is any ABI change how to support this?
> > > >
> > > There is a new member which was introduced into rte_eth_dev_info but it
> > shouldn’t be ABI breaking since using reserved fields.
> >
> > That is just for rte_eth_dev_info. What about the ABI change in different
> > ethdev structure and rte_flow structures across different DPDK ABI versions.
> >
> Besides this, there is no other ABI changes dependency.
>
> Assume there is a DPDK process A running with version v21.11 and plan to upgrade to
> version v22.11. Let' call v22.11 as process B.

OK. That's a relief. I understand the use case now.

Why not simply use standard DPDK multiprocess model then.
Primary process act as server for slow path API. Secondary process can
come and go(aka can be updated at runtime)
and use as client to update rules via primary-secondray communication mechanism.


>
> Now, process A has been running for long time and has lot of rules configured. It' "active" role per this API definition.
> Process B starts and it should call this API and set itself to "standby" role and user can program the flow rules as they want
> and different NIC vendors may have different recommendations. Nvidia suggests only program process B with group 0' rules now.
>
> The user should sync all desired configurations from process A to process B, and process A starts to yield traffic like "delete all group 0
> rules for Nvidia' NICs" or quit.
> After that process B calls this API and set itself to "active" role, now the hot-upgrade finishes.
>
> > > > > such as hot upgrade.
> > > > > There is a strong requirement to simplify the logic and shorten
> > > > > the traffic downtime as much as possible.
> > > > >
> > > > > This update introduces new rte_eth process role definitions:
> > > > > active or standby.
> > > > >
> > > > > The active role means rules are programmed to HW immediately, and
> > > > > no
> > > >
> > > > Why it has to be specific only to rte_flow rule? If it spedieic to
> > > > rte_flow, why it is in rte_eth_process_ name space?
> > > For now, this design focuses on the flow rule offloading and traffic
> > redirection.
> > > When switching process version, it' important to make sure which
> > application receives and handles the traffic.
> >
> > Changing the DPDK version runtime is just beyond rte_flow driver.
>
> It' not about changing DPDK version but upgrading DPDK from one PMD version to another one.
> Does the preceding example answer your question?
> >
> > > The changing should be effective across all probing eth devices, that' why it
> > was put under rte_eth_process_ (for all rte_eth_dev) name space.
> > > >
> > > > Also, if we are moving the standby, What about the rule whose ABI is
> > > > changed between versions?
> > >
> > > Like the comments mentioned: " Before role transition, all the rules set by
> > the active process should be flushed first. "
> >
> > What happens to rte_flow flow handles for existing ones  which is created with
> > version X?
> > Also What if new version Y has ABI change in rte_flow_pattern and
> > rte_flow_action structure?
> >
> > For me, If DPDK version change is needed, simply reload the application. This
> > API will soon bloat, and it will be a mess if to start handling Different DPDK
> > version which is not ABI compatible at all.
> >
> Yes, you are right. Reloading the application is the easiest way but it may have a long time
> Window that traffic is lost. No traffic arrives at process A or process B.
> We are trying to simplify the reloading logic and minimize the traffic down time as much as possible.
> The approach may differentiate hugely between different NIC vendors, so I think it should be better if
> DPDK can provide an abstract API.
>
> If process A and process B are ABI different, it doesn't matter.
> 1. Call this API with process A means older ABI.
> 2. Call this API with process B means newer ABI.
> It' have process concept and working scope.
>
> >
> >
> >
> > > > > behavior changed. This is the default state.
> > > > > The standby role means rules are queued in the HW. If no active
> > > > > roles alive or back to active, the rules are effective immediately.
> > > > >
> > > > > Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
  
Rongwei Liu Dec. 21, 2022, 12:50 p.m. UTC | #6
HI Jerin:

BR
Rongwei

> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Wednesday, December 21, 2022 20:45
> To: Rongwei Liu <rongweil@nvidia.com>
> Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> <ferruh.yigit@amd.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> <rasland@nvidia.com>
> Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active or standby
> 
> External email: Use caution opening links or attachments
> 
> 
> On Wed, Dec 21, 2022 at 5:35 PM Rongwei Liu <rongweil@nvidia.com> wrote:
> >
> > Hi Jerin:
> >
> > BR
> > Rongwei
> >
> > > -----Original Message-----
> > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > Sent: Wednesday, December 21, 2022 19:00
> > > To: Rongwei Liu <rongweil@nvidia.com>
> > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> > > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > > <rasland@nvidia.com>
> > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active
> > > or standby
> > >
> > > External email: Use caution opening links or attachments
> > >
> > >
> > > On Wed, Dec 21, 2022 at 3:02 PM Rongwei Liu <rongweil@nvidia.com>
> wrote:
> > > >
> > > > HI Jerin:
> > > >
> > >
> > > Hi Rongwei
> > >
> > > > BR
> > > > Rongwei
> > > >
> > > > > -----Original Message-----
> > > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > Sent: Wednesday, December 21, 2022 17:13
> > > > > To: Rongwei Liu <rongweil@nvidia.com>
> > > > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>;
> > > > > NBU-Contact- Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>;
> > > > > Ferruh Yigit <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > > > > <rasland@nvidia.com>
> > > > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to
> > > > > active or standby
> > > > >
> > > > > External email: Use caution opening links or attachments
> > > > >
> > > > >
> > > > > On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu
> > > > > <rongweil@nvidia.com>
> > > wrote:
> > > > > >
> > > > > > Users may want to change the DPDK process to different
> > > > > > versions
> > > > >
> > > > > Different version of DPDK? If there is any ABI change how to support
> this?
> > > > >
> > > > There is a new member which was introduced into rte_eth_dev_info
> > > > but it
> > > shouldn’t be ABI breaking since using reserved fields.
> > >
> > > That is just for rte_eth_dev_info. What about the ABI change in
> > > different ethdev structure and rte_flow structures across different DPDK
> ABI versions.
> > >
> > Besides this, there is no other ABI changes dependency.
> >
> > Assume there is a DPDK process A running with version v21.11 and plan
> > to upgrade to version v22.11. Let' call v22.11 as process B.
> 
> OK. That's a relief. I understand the use case now.
> 
> Why not simply use standard DPDK multiprocess model then.
> Primary process act as server for slow path API. Secondary process can come
> and go(aka can be updated at runtime) and use as client to update rules via
> primary-secondray communication mechanism.
> 
Just image if process A and process B have ABI breakage like different rte_flow_item_*** and rte_flow_action_*** size and members.
How can we quickly accommodate primary/secondary to be ABI compatible across different versions?
It will be very huge effort and difficult to implement, at least in my opinion. 
What do you think?
> 
> >
> > Now, process A has been running for long time and has lot of rules
> configured. It' "active" role per this API definition.
> > Process B starts and it should call this API and set itself to
> > "standby" role and user can program the flow rules as they want and
> different NIC vendors may have different recommendations. Nvidia suggests
> only program process B with group 0' rules now.
> >
> > The user should sync all desired configurations from process A to
> > process B, and process A starts to yield traffic like "delete all group 0 rules
> for Nvidia' NICs" or quit.
> > After that process B calls this API and set itself to "active" role, now the hot-
> upgrade finishes.
> >
> > > > > > such as hot upgrade.
> > > > > > There is a strong requirement to simplify the logic and
> > > > > > shorten the traffic downtime as much as possible.
> > > > > >
> > > > > > This update introduces new rte_eth process role definitions:
> > > > > > active or standby.
> > > > > >
> > > > > > The active role means rules are programmed to HW immediately,
> > > > > > and no
> > > > >
> > > > > Why it has to be specific only to rte_flow rule? If it spedieic
> > > > > to rte_flow, why it is in rte_eth_process_ name space?
> > > > For now, this design focuses on the flow rule offloading and
> > > > traffic
> > > redirection.
> > > > When switching process version, it' important to make sure which
> > > application receives and handles the traffic.
> > >
> > > Changing the DPDK version runtime is just beyond rte_flow driver.
> >
> > It' not about changing DPDK version but upgrading DPDK from one PMD
> version to another one.
> > Does the preceding example answer your question?
> > >
> > > > The changing should be effective across all probing eth devices,
> > > > that' why it
> > > was put under rte_eth_process_ (for all rte_eth_dev) name space.
> > > > >
> > > > > Also, if we are moving the standby, What about the rule whose
> > > > > ABI is changed between versions?
> > > >
> > > > Like the comments mentioned: " Before role transition, all the
> > > > rules set by
> > > the active process should be flushed first. "
> > >
> > > What happens to rte_flow flow handles for existing ones  which is
> > > created with version X?
> > > Also What if new version Y has ABI change in rte_flow_pattern and
> > > rte_flow_action structure?
> > >
> > > For me, If DPDK version change is needed, simply reload the
> > > application. This API will soon bloat, and it will be a mess if to
> > > start handling Different DPDK version which is not ABI compatible at all.
> > >
> > Yes, you are right. Reloading the application is the easiest way but
> > it may have a long time Window that traffic is lost. No traffic arrives at
> process A or process B.
> > We are trying to simplify the reloading logic and minimize the traffic down
> time as much as possible.
> > The approach may differentiate hugely between different NIC vendors,
> > so I think it should be better if DPDK can provide an abstract API.
> >
> > If process A and process B are ABI different, it doesn't matter.
> > 1. Call this API with process A means older ABI.
> > 2. Call this API with process B means newer ABI.
> > It' have process concept and working scope.
> >
> > >
> > >
> > >
> > > > > > behavior changed. This is the default state.
> > > > > > The standby role means rules are queued in the HW. If no
> > > > > > active roles alive or back to active, the rules are effective
> immediately.
> > > > > >
> > > > > > Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
  
Jerin Jacob Dec. 21, 2022, 1:12 p.m. UTC | #7
On Wed, Dec 21, 2022 at 6:20 PM Rongwei Liu <rongweil@nvidia.com> wrote:
>
> HI Jerin:
>
> BR
> Rongwei
>
> > -----Original Message-----
> > From: Jerin Jacob <jerinjacobk@gmail.com>
> > Sent: Wednesday, December 21, 2022 20:45
> > To: Rongwei Liu <rongweil@nvidia.com>
> > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > <rasland@nvidia.com>
> > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active or standby
> >
> > External email: Use caution opening links or attachments
> >
> >
> > On Wed, Dec 21, 2022 at 5:35 PM Rongwei Liu <rongweil@nvidia.com> wrote:
> > >
> > > Hi Jerin:
> > >
> > > BR
> > > Rongwei
> > >
> > > > -----Original Message-----
> > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > Sent: Wednesday, December 21, 2022 19:00
> > > > To: Rongwei Liu <rongweil@nvidia.com>
> > > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> > > > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > > > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > > > <rasland@nvidia.com>
> > > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active
> > > > or standby
> > > >
> > > > External email: Use caution opening links or attachments
> > > >
> > > >
> > > > On Wed, Dec 21, 2022 at 3:02 PM Rongwei Liu <rongweil@nvidia.com>
> > wrote:
> > > > >
> > > > > HI Jerin:
> > > > >
> > > >
> > > > Hi Rongwei
> > > >
> > > > > BR
> > > > > Rongwei
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > > Sent: Wednesday, December 21, 2022 17:13
> > > > > > To: Rongwei Liu <rongweil@nvidia.com>
> > > > > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > > > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>;
> > > > > > NBU-Contact- Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>;
> > > > > > Ferruh Yigit <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > > > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > > > > > <rasland@nvidia.com>
> > > > > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to
> > > > > > active or standby
> > > > > >
> > > > > > External email: Use caution opening links or attachments
> > > > > >
> > > > > >
> > > > > > On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu
> > > > > > <rongweil@nvidia.com>
> > > > wrote:
> > > > > > >
> > > > > > > Users may want to change the DPDK process to different
> > > > > > > versions
> > > > > >
> > > > > > Different version of DPDK? If there is any ABI change how to support
> > this?
> > > > > >
> > > > > There is a new member which was introduced into rte_eth_dev_info
> > > > > but it
> > > > shouldn’t be ABI breaking since using reserved fields.
> > > >
> > > > That is just for rte_eth_dev_info. What about the ABI change in
> > > > different ethdev structure and rte_flow structures across different DPDK
> > ABI versions.
> > > >
> > > Besides this, there is no other ABI changes dependency.
> > >
> > > Assume there is a DPDK process A running with version v21.11 and plan
> > > to upgrade to version v22.11. Let' call v22.11 as process B.
> >
> > OK. That's a relief. I understand the use case now.
> >
> > Why not simply use standard DPDK multiprocess model then.
> > Primary process act as server for slow path API. Secondary process can come
> > and go(aka can be updated at runtime) and use as client to update rules via
> > primary-secondray communication mechanism.
> >
> Just image if process A and process B have ABI breakage like different rte_flow_item_*** and rte_flow_action_*** size and members.
> How can we quickly accommodate primary/secondary to be ABI compatible across different versions?
> It will be very huge effort and difficult to implement, at least in my opinion.
> What do you think?

Yes. it difficult what ever approach we take,
On other hand, ethdev subsystem has other components like rte_tm and
other offload etc, We can not simply have rte_eth_process_set_active()
and things magical works across different DPDK versions. Example, if
rte_flow rule has meter action will depend on another HW piece in NIC
card for doing the metering but set by flow API.
IMO, Customer can simply use standard multiprocess model if version
are compatible without any special intelligence in application.
Otherwise they can reload and start the application again
or have special intelligence in application to cater the specific area
of API they need to leverage based on server and client DPDK versions.



> >
> > >
> > > Now, process A has been running for long time and has lot of rules
> > configured. It' "active" role per this API definition.
> > > Process B starts and it should call this API and set itself to
> > > "standby" role and user can program the flow rules as they want and
> > different NIC vendors may have different recommendations. Nvidia suggests
> > only program process B with group 0' rules now.
> > >
> > > The user should sync all desired configurations from process A to
> > > process B, and process A starts to yield traffic like "delete all group 0 rules
> > for Nvidia' NICs" or quit.
> > > After that process B calls this API and set itself to "active" role, now the hot-
> > upgrade finishes.
> > >
> > > > > > > such as hot upgrade.
> > > > > > > There is a strong requirement to simplify the logic and
> > > > > > > shorten the traffic downtime as much as possible.
> > > > > > >
> > > > > > > This update introduces new rte_eth process role definitions:
> > > > > > > active or standby.
> > > > > > >
> > > > > > > The active role means rules are programmed to HW immediately,
> > > > > > > and no
> > > > > >
> > > > > > Why it has to be specific only to rte_flow rule? If it spedieic
> > > > > > to rte_flow, why it is in rte_eth_process_ name space?
> > > > > For now, this design focuses on the flow rule offloading and
> > > > > traffic
> > > > redirection.
> > > > > When switching process version, it' important to make sure which
> > > > application receives and handles the traffic.
> > > >
> > > > Changing the DPDK version runtime is just beyond rte_flow driver.
> > >
> > > It' not about changing DPDK version but upgrading DPDK from one PMD
> > version to another one.
> > > Does the preceding example answer your question?
> > > >
> > > > > The changing should be effective across all probing eth devices,
> > > > > that' why it
> > > > was put under rte_eth_process_ (for all rte_eth_dev) name space.
> > > > > >
> > > > > > Also, if we are moving the standby, What about the rule whose
> > > > > > ABI is changed between versions?
> > > > >
> > > > > Like the comments mentioned: " Before role transition, all the
> > > > > rules set by
> > > > the active process should be flushed first. "
> > > >
> > > > What happens to rte_flow flow handles for existing ones  which is
> > > > created with version X?
> > > > Also What if new version Y has ABI change in rte_flow_pattern and
> > > > rte_flow_action structure?
> > > >
> > > > For me, If DPDK version change is needed, simply reload the
> > > > application. This API will soon bloat, and it will be a mess if to
> > > > start handling Different DPDK version which is not ABI compatible at all.
> > > >
> > > Yes, you are right. Reloading the application is the easiest way but
> > > it may have a long time Window that traffic is lost. No traffic arrives at
> > process A or process B.
> > > We are trying to simplify the reloading logic and minimize the traffic down
> > time as much as possible.
> > > The approach may differentiate hugely between different NIC vendors,
> > > so I think it should be better if DPDK can provide an abstract API.
> > >
> > > If process A and process B are ABI different, it doesn't matter.
> > > 1. Call this API with process A means older ABI.
> > > 2. Call this API with process B means newer ABI.
> > > It' have process concept and working scope.
> > >
> > > >
> > > >
> > > >
> > > > > > > behavior changed. This is the default state.
> > > > > > > The standby role means rules are queued in the HW. If no
> > > > > > > active roles alive or back to active, the rules are effective
> > immediately.
> > > > > > >
> > > > > > > Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
  
Rongwei Liu Dec. 21, 2022, 2:33 p.m. UTC | #8
HI Jerin:

BR
Rongwei

> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Wednesday, December 21, 2022 21:12
> To: Rongwei Liu <rongweil@nvidia.com>
> Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> <ferruh.yigit@amd.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> <rasland@nvidia.com>
> Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active or standby
> 
> External email: Use caution opening links or attachments
> 
> 
> On Wed, Dec 21, 2022 at 6:20 PM Rongwei Liu <rongweil@nvidia.com> wrote:
> >
> > HI Jerin:
> >
> > BR
> > Rongwei
> >
> > > -----Original Message-----
> > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > Sent: Wednesday, December 21, 2022 20:45
> > > To: Rongwei Liu <rongweil@nvidia.com>
> > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> > > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > > <rasland@nvidia.com>
> > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active
> > > or standby
> > >
> > > External email: Use caution opening links or attachments
> > >
> > >
> > > On Wed, Dec 21, 2022 at 5:35 PM Rongwei Liu <rongweil@nvidia.com>
> wrote:
> > > >
> > > > Hi Jerin:
> > > >
> > > > BR
> > > > Rongwei
> > > >
> > > > > -----Original Message-----
> > > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > Sent: Wednesday, December 21, 2022 19:00
> > > > > To: Rongwei Liu <rongweil@nvidia.com>
> > > > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>;
> > > > > NBU-Contact- Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>;
> > > > > Ferruh Yigit <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > > > > <rasland@nvidia.com>
> > > > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to
> > > > > active or standby
> > > > >
> > > > > External email: Use caution opening links or attachments
> > > > >
> > > > >
> > > > > On Wed, Dec 21, 2022 at 3:02 PM Rongwei Liu
> > > > > <rongweil@nvidia.com>
> > > wrote:
> > > > > >
> > > > > > HI Jerin:
> > > > > >
> > > > >
> > > > > Hi Rongwei
> > > > >
> > > > > > BR
> > > > > > Rongwei
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > > > Sent: Wednesday, December 21, 2022 17:13
> > > > > > > To: Rongwei Liu <rongweil@nvidia.com>
> > > > > > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > > > > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>;
> > > > > > > NBU-Contact- Thomas Monjalon (EXTERNAL)
> > > > > > > <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@amd.com>;
> > > > > > > Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>;
> > > > > > > dev@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>
> > > > > > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to
> > > > > > > active or standby
> > > > > > >
> > > > > > > External email: Use caution opening links or attachments
> > > > > > >
> > > > > > >
> > > > > > > On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu
> > > > > > > <rongweil@nvidia.com>
> > > > > wrote:
> > > > > > > >
> > > > > > > > Users may want to change the DPDK process to different
> > > > > > > > versions
> > > > > > >
> > > > > > > Different version of DPDK? If there is any ABI change how to
> > > > > > > support
> > > this?
> > > > > > >
> > > > > > There is a new member which was introduced into
> > > > > > rte_eth_dev_info but it
> > > > > shouldn’t be ABI breaking since using reserved fields.
> > > > >
> > > > > That is just for rte_eth_dev_info. What about the ABI change in
> > > > > different ethdev structure and rte_flow structures across
> > > > > different DPDK
> > > ABI versions.
> > > > >
> > > > Besides this, there is no other ABI changes dependency.
> > > >
> > > > Assume there is a DPDK process A running with version v21.11 and
> > > > plan to upgrade to version v22.11. Let' call v22.11 as process B.
> > >
> > > OK. That's a relief. I understand the use case now.
> > >
> > > Why not simply use standard DPDK multiprocess model then.
> > > Primary process act as server for slow path API. Secondary process
> > > can come and go(aka can be updated at runtime) and use as client to
> > > update rules via primary-secondray communication mechanism.
> > >
> > Just image if process A and process B have ABI breakage like different
> rte_flow_item_*** and rte_flow_action_*** size and members.
> > How can we quickly accommodate primary/secondary to be ABI compatible
> across different versions?
> > It will be very huge effort and difficult to implement, at least in my opinion.
> > What do you think?
> 
> Yes. it difficult what ever approach we take, On other hand, ethdev subsystem
> has other components like rte_tm and other offload etc, We can not simply
> have rte_eth_process_set_active() and things magical works across different
> DPDK versions. Example, if rte_flow rule has meter action will depend on
> another HW piece in NIC card for doing the metering but set by flow API.
> IMO, Customer can simply use standard multiprocess model if version are
> compatible without any special intelligence in application.
> Otherwise they can reload and start the application again or have special
> intelligence in application to cater the specific area of API they need to
> leverage based on server and client DPDK versions.

Thanks for the message.
IMO, we are trying to eliminate the version/ABI dependency with this new added API.
For example, if meter action is in the flow rules:
1. Process A have rules like "eth / ipv4 src 1.1.1.1 / meter / queue / end"
2. Process B starts with "rte_eth_process_set_active(false, flags)"

Just give Nvidia' hardware as example (other NIC vendors may not care if group 0 or not)
If the process A' rules are in group 0, users should set them one by one to process B.
Then either flush process A' rules or quit process A, then process B calls with "rte_eth_process_set_active(true, flags)"
All is set.
It will avoid complex operations with client/server model and avoid user mis-operation too.
We should avoid reload as much as possible since reloading is very time consuming and may take up to few seconds.
In this time slot, there is no application to handle the traffic, and everything is lost.
For end user especially cloud service providers, they are sensitive to the traffic down time.
> 
> 
> > >
> > > >
> > > > Now, process A has been running for long time and has lot of rules
> > > configured. It' "active" role per this API definition.
> > > > Process B starts and it should call this API and set itself to
> > > > "standby" role and user can program the flow rules as they want
> > > > and
> > > different NIC vendors may have different recommendations. Nvidia
> > > suggests only program process B with group 0' rules now.
> > > >
> > > > The user should sync all desired configurations from process A to
> > > > process B, and process A starts to yield traffic like "delete all
> > > > group 0 rules
> > > for Nvidia' NICs" or quit.
> > > > After that process B calls this API and set itself to "active"
> > > > role, now the hot-
> > > upgrade finishes.
> > > >
> > > > > > > > such as hot upgrade.
> > > > > > > > There is a strong requirement to simplify the logic and
> > > > > > > > shorten the traffic downtime as much as possible.
> > > > > > > >
> > > > > > > > This update introduces new rte_eth process role definitions:
> > > > > > > > active or standby.
> > > > > > > >
> > > > > > > > The active role means rules are programmed to HW
> > > > > > > > immediately, and no
> > > > > > >
> > > > > > > Why it has to be specific only to rte_flow rule? If it
> > > > > > > spedieic to rte_flow, why it is in rte_eth_process_ name space?
> > > > > > For now, this design focuses on the flow rule offloading and
> > > > > > traffic
> > > > > redirection.
> > > > > > When switching process version, it' important to make sure
> > > > > > which
> > > > > application receives and handles the traffic.
> > > > >
> > > > > Changing the DPDK version runtime is just beyond rte_flow driver.
> > > >
> > > > It' not about changing DPDK version but upgrading DPDK from one
> > > > PMD
> > > version to another one.
> > > > Does the preceding example answer your question?
> > > > >
> > > > > > The changing should be effective across all probing eth
> > > > > > devices, that' why it
> > > > > was put under rte_eth_process_ (for all rte_eth_dev) name space.
> > > > > > >
> > > > > > > Also, if we are moving the standby, What about the rule
> > > > > > > whose ABI is changed between versions?
> > > > > >
> > > > > > Like the comments mentioned: " Before role transition, all the
> > > > > > rules set by
> > > > > the active process should be flushed first. "
> > > > >
> > > > > What happens to rte_flow flow handles for existing ones  which
> > > > > is created with version X?
> > > > > Also What if new version Y has ABI change in rte_flow_pattern
> > > > > and rte_flow_action structure?
> > > > >
> > > > > For me, If DPDK version change is needed, simply reload the
> > > > > application. This API will soon bloat, and it will be a mess if
> > > > > to start handling Different DPDK version which is not ABI compatible at
> all.
> > > > >
> > > > Yes, you are right. Reloading the application is the easiest way
> > > > but it may have a long time Window that traffic is lost. No
> > > > traffic arrives at
> > > process A or process B.
> > > > We are trying to simplify the reloading logic and minimize the
> > > > traffic down
> > > time as much as possible.
> > > > The approach may differentiate hugely between different NIC
> > > > vendors, so I think it should be better if DPDK can provide an abstract API.
> > > >
> > > > If process A and process B are ABI different, it doesn't matter.
> > > > 1. Call this API with process A means older ABI.
> > > > 2. Call this API with process B means newer ABI.
> > > > It' have process concept and working scope.
> > > >
> > > > >
> > > > >
> > > > >
> > > > > > > > behavior changed. This is the default state.
> > > > > > > > The standby role means rules are queued in the HW. If no
> > > > > > > > active roles alive or back to active, the rules are
> > > > > > > > effective
> > > immediately.
> > > > > > > >
> > > > > > > > Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
  
Ori Kam Dec. 26, 2022, 4:44 p.m. UTC | #9
Hi Rongwei and Jerin,

> -----Original Message-----
> From: Rongwei Liu <rongweil@nvidia.com>
> Sent: Wednesday, 21 December 2022 16:33
> 
> HI Jerin:
> 
> BR
> Rongwei
> 
> > -----Original Message-----
> > From: Jerin Jacob <jerinjacobk@gmail.com>
> > Sent: Wednesday, December 21, 2022 21:12
> > To: Rongwei Liu <rongweil@nvidia.com>
> > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-Contact-
> > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > <rasland@nvidia.com>
> > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active or
> standby
> >
> > External email: Use caution opening links or attachments
> >
> >
> > On Wed, Dec 21, 2022 at 6:20 PM Rongwei Liu <rongweil@nvidia.com>
> wrote:
> > >
> > > HI Jerin:
> > >
> > > BR
> > > Rongwei
> > >
> > > > -----Original Message-----
> > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > Sent: Wednesday, December 21, 2022 20:45
> > > > To: Rongwei Liu <rongweil@nvidia.com>
> > > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; NBU-
> Contact-
> > > > Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Ferruh Yigit
> > > > <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan Darawsheh
> > > > <rasland@nvidia.com>
> > > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to active
> > > > or standby
> > > >
> > > > External email: Use caution opening links or attachments
> > > >
> > > >
> > > > On Wed, Dec 21, 2022 at 5:35 PM Rongwei Liu <rongweil@nvidia.com>
> > wrote:
> > > > >
> > > > > Hi Jerin:
> > > > >
> > > > > BR
> > > > > Rongwei
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > > Sent: Wednesday, December 21, 2022 19:00
> > > > > > To: Rongwei Liu <rongweil@nvidia.com>
> > > > > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > > > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>;
> > > > > > NBU-Contact- Thomas Monjalon (EXTERNAL)
> <thomas@monjalon.net>;
> > > > > > Ferruh Yigit <ferruh.yigit@amd.com>; Andrew Rybchenko
> > > > > > <andrew.rybchenko@oktetlabs.ru>; dev@dpdk.org; Raslan
> Darawsheh
> > > > > > <rasland@nvidia.com>
> > > > > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to
> > > > > > active or standby
> > > > > >
> > > > > > External email: Use caution opening links or attachments
> > > > > >
> > > > > >
> > > > > > On Wed, Dec 21, 2022 at 3:02 PM Rongwei Liu
> > > > > > <rongweil@nvidia.com>
> > > > wrote:
> > > > > > >
> > > > > > > HI Jerin:
> > > > > > >
> > > > > >
> > > > > > Hi Rongwei
> > > > > >
> > > > > > > BR
> > > > > > > Rongwei
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > > > > Sent: Wednesday, December 21, 2022 17:13
> > > > > > > > To: Rongwei Liu <rongweil@nvidia.com>
> > > > > > > > Cc: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> > > > > > > > <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>;
> > > > > > > > NBU-Contact- Thomas Monjalon (EXTERNAL)
> > > > > > > > <thomas@monjalon.net>; Ferruh Yigit
> <ferruh.yigit@amd.com>;
> > > > > > > > Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>;
> > > > > > > > dev@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>
> > > > > > > > Subject: Re: [RFC v3 2/2] ethdev: add API to set process to
> > > > > > > > active or standby
> > > > > > > >
> > > > > > > > External email: Use caution opening links or attachments
> > > > > > > >
> > > > > > > >
> > > > > > > > On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu
> > > > > > > > <rongweil@nvidia.com>
> > > > > > wrote:
> > > > > > > > >
> > > > > > > > > Users may want to change the DPDK process to different
> > > > > > > > > versions
> > > > > > > >
> > > > > > > > Different version of DPDK? If there is any ABI change how to
> > > > > > > > support
> > > > this?
> > > > > > > >
> > > > > > > There is a new member which was introduced into
> > > > > > > rte_eth_dev_info but it
> > > > > > shouldn’t be ABI breaking since using reserved fields.
> > > > > >
> > > > > > That is just for rte_eth_dev_info. What about the ABI change in
> > > > > > different ethdev structure and rte_flow structures across
> > > > > > different DPDK
> > > > ABI versions.
> > > > > >
> > > > > Besides this, there is no other ABI changes dependency.
> > > > >
> > > > > Assume there is a DPDK process A running with version v21.11 and
> > > > > plan to upgrade to version v22.11. Let' call v22.11 as process B.
> > > >
> > > > OK. That's a relief. I understand the use case now.
> > > >
> > > > Why not simply use standard DPDK multiprocess model then.
> > > > Primary process act as server for slow path API. Secondary process
> > > > can come and go(aka can be updated at runtime) and use as client to
> > > > update rules via primary-secondray communication mechanism.
> > > >
> > > Just image if process A and process B have ABI breakage like different
> > rte_flow_item_*** and rte_flow_action_*** size and members.
> > > How can we quickly accommodate primary/secondary to be ABI
> compatible
> > across different versions?
> > > It will be very huge effort and difficult to implement, at least in my
> opinion.
> > > What do you think?
> >
> > Yes. it difficult what ever approach we take, On other hand, ethdev
> subsystem
> > has other components like rte_tm and other offload etc, We can not simply
> > have rte_eth_process_set_active() and things magical works across
> different
> > DPDK versions. Example, if rte_flow rule has meter action will depend on
> > another HW piece in NIC card for doing the metering but set by flow API.
> > IMO, Customer can simply use standard multiprocess model if version are
> > compatible without any special intelligence in application.
> > Otherwise they can reload and start the application again or have special
> > intelligence in application to cater the specific area of API they need to
> > leverage based on server and client DPDK versions.
> 
> Thanks for the message.
> IMO, we are trying to eliminate the version/ABI dependency with this new
> added API.
> For example, if meter action is in the flow rules:
> 1. Process A have rules like "eth / ipv4 src 1.1.1.1 / meter / queue / end"
> 2. Process B starts with "rte_eth_process_set_active(false, flags)"
> 
> Just give Nvidia' hardware as example (other NIC vendors may not care if
> group 0 or not)
> If the process A' rules are in group 0, users should set them one by one to
> process B.
> Then either flush process A' rules or quit process A, then process B calls with
> "rte_eth_process_set_active(true, flags)"
> All is set.
> It will avoid complex operations with client/server model and avoid user mis-
> operation too.
> We should avoid reload as much as possible since reloading is very time
> consuming and may take up to few seconds.
> In this time slot, there is no application to handle the traffic, and everything is
> lost.
> For end user especially cloud service providers, they are sensitive to the
> traffic down time.

From my viewpoint the upgrade has nothing to do with DPDK as a library,
the upgrade may be because of application upgrade.
Unless I'm missing something, this API is not meant for API/ABI it is created
to allow minimum downtime when doing upgrade of the application.
Unless I'm missing something critical, since the upgrade in any case is not
only for the DPDK but the entire app, there isn't any ABI/API issue.
  
Thomas Monjalon Jan. 15, 2023, 10:46 p.m. UTC | #10
26/12/2022 17:44, Ori Kam:
> From: Rongwei Liu <rongweil@nvidia.com>
> > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > On Wed, Dec 21, 2022 at 6:20 PM Rongwei Liu wrote:
> > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > On Wed, Dec 21, 2022 at 5:35 PM Rongwei Liu wrote:
> > > > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > > > On Wed, Dec 21, 2022 at 3:02 PM Rongwei Liu wrote:
> > > > > > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > > > > > On Wed, Dec 21, 2022 at 2:31 PM Rongwei Liu wrote:
> > > > > > > > > >
> > > > > > > > > > Users may want to change the DPDK process to different
> > > > > > > > > > versions
> > > > > > > > >
> > > > > > > > > Different version of DPDK? If there is any ABI change how to
> > > > > > > > > support
> > > > > this?
> > > > > > > > >
> > > > > > > > There is a new member which was introduced into
> > > > > > > > rte_eth_dev_info but it
> > > > > > > shouldn’t be ABI breaking since using reserved fields.
> > > > > > >
> > > > > > > That is just for rte_eth_dev_info. What about the ABI change in
> > > > > > > different ethdev structure and rte_flow structures across
> > > > > > > different DPDK
> > > > > ABI versions.
> > > > > > >
> > > > > > Besides this, there is no other ABI changes dependency.
> > > > > >
> > > > > > Assume there is a DPDK process A running with version v21.11 and
> > > > > > plan to upgrade to version v22.11. Let' call v22.11 as process B.
> > > > >
> > > > > OK. That's a relief. I understand the use case now.
> > > > >
> > > > > Why not simply use standard DPDK multiprocess model then.
> > > > > Primary process act as server for slow path API. Secondary process
> > > > > can come and go(aka can be updated at runtime) and use as client to
> > > > > update rules via primary-secondray communication mechanism.
> > > > >
> > > > Just image if process A and process B have ABI breakage like different
> > > rte_flow_item_*** and rte_flow_action_*** size and members.
> > > > How can we quickly accommodate primary/secondary to be ABI
> > compatible
> > > across different versions?
> > > > It will be very huge effort and difficult to implement, at least in my
> > opinion.
> > > > What do you think?
> > >
> > > Yes. it difficult what ever approach we take, On other hand, ethdev
> > subsystem
> > > has other components like rte_tm and other offload etc, We can not simply
> > > have rte_eth_process_set_active() and things magical works across
> > different
> > > DPDK versions. Example, if rte_flow rule has meter action will depend on
> > > another HW piece in NIC card for doing the metering but set by flow API.
> > > IMO, Customer can simply use standard multiprocess model if version are
> > > compatible without any special intelligence in application.
> > > Otherwise they can reload and start the application again or have special
> > > intelligence in application to cater the specific area of API they need to
> > > leverage based on server and client DPDK versions.
> > 
> > Thanks for the message.
> > IMO, we are trying to eliminate the version/ABI dependency with this new
> > added API.
> > For example, if meter action is in the flow rules:
> > 1. Process A have rules like "eth / ipv4 src 1.1.1.1 / meter / queue / end"
> > 2. Process B starts with "rte_eth_process_set_active(false, flags)"
> > 
> > Just give Nvidia' hardware as example (other NIC vendors may not care if
> > group 0 or not)
> > If the process A' rules are in group 0, users should set them one by one to
> > process B.
> > Then either flush process A' rules or quit process A, then process B calls with
> > "rte_eth_process_set_active(true, flags)"
> > All is set.
> > It will avoid complex operations with client/server model and avoid user mis-
> > operation too.
> > We should avoid reload as much as possible since reloading is very time
> > consuming and may take up to few seconds.
> > In this time slot, there is no application to handle the traffic, and everything is
> > lost.
> > For end user especially cloud service providers, they are sensitive to the
> > traffic down time.
> 
> From my viewpoint the upgrade has nothing to do with DPDK as a library,
> the upgrade may be because of application upgrade.
> Unless I'm missing something, this API is not meant for API/ABI it is created
> to allow minimum downtime when doing upgrade of the application.
> Unless I'm missing something critical, since the upgrade in any case is not
> only for the DPDK but the entire app, there isn't any ABI/API issue.

Yes we can consider the case of an application upgrade with the same DPDK.
The patch needs to be reworded in this more realistic direction I think.
We can also improve the usage explanations.

That said, another high level question is about the scope of the feature.
In this patch, only ethdev is targetted.
Do you think we need the same migration mechanism in other classes
like vDPA, crypto, etc?
  

Patch

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 51f51259e3..de1fdac0a1 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -2001,3 +2001,13 @@  where:
 * ``sw_queue_id``: queue index in range [64536, 65535].
   This range is the highest 1000 numbers.
 * ``hw_queue_id``: queue index given by HW in queue creation.
+
+ethdev set process active or standby
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+User should only program group 0 (fdb_def_rule_en=0) when ``rte_eth_process_set_active``
+has been called and set to a standby role.
+Group 0 is shared across different DPDK processes while the other groups are limited
+to the current process scope.
+The process can't move from active to standby role if preceding active application's
+rules are still present and vice versa.
diff --git a/doc/guides/rel_notes/release_22_03.rst b/doc/guides/rel_notes/release_22_03.rst
index 0923707cb8..6fa48106c4 100644
--- a/doc/guides/rel_notes/release_22_03.rst
+++ b/doc/guides/rel_notes/release_22_03.rst
@@ -207,6 +207,11 @@  API Changes
 * ethdev: Old public macros and enumeration constants without ``RTE_ETH_`` prefix,
   which are kept for backward compatibility, are marked as deprecated.
 
+* ethdev: added a new experimental api:
+
+  The new API ``rte_eth_process_set_active()`` was added.
+  If ``RTE_ETH_CAPA_PROCESS_SET_ROLE`` is not advertised, this new api is not supported.
+
 * cryptodev: The asymmetric session handling was modified to use a single
   mempool object. An API ``rte_cryptodev_asym_session_pool_create`` was added
   to create a mempool with element size big enough to hold the generic asymmetric
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 6a550cfc83..3c583bc39d 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -179,6 +179,16 @@  struct rte_eth_dev_data {
 	pthread_mutex_t flow_ops_mutex; /**< rte_flow ops mutex */
 } __rte_cache_aligned;
 
+/**@{@name Different rte_eth role flag definitions which will be used
+ *  when miagrating DPDK to a different version.
+ */
+/*
+ * Traffic coming from NIC domain rules will reach
+ * both active and standby processes.
+ */
+#define RTE_ETH_PROCESS_NIC_DUP_WITH_STANDBY RTE_BIT32(0),
+/**@}*/
+
 /**
  * @internal
  * The pool of *rte_eth_dev* structures. The size of the pool
@@ -1087,6 +1097,22 @@  typedef const uint32_t *(*eth_buffer_split_supported_hdr_ptypes_get_t)(struct rt
  */
 typedef int (*eth_dev_priv_dump_t)(struct rte_eth_dev *dev, FILE *file);
 
+/**
+ * @internal
+ * Set rte_eth process to active or standby role.
+ *
+ * @param dev
+ *   Port (ethdev) handle.
+ * @param active
+ *   Device (role) active or not (standby).
+ * @param flag
+ *   Role specific flag.
+ *
+ * @return
+ *   Negative value on error, 0 on success.
+ */
+typedef int (*eth_process_set_active_t)(struct rte_eth_dev *dev, bool active, uint32_t flag);
+
 /**
  * @internal Set Rx queue available descriptors threshold.
  * @see rte_eth_rx_avail_thresh_set()
@@ -1403,6 +1429,8 @@  struct eth_dev_ops {
 	eth_cman_config_set_t cman_config_set;
 	/** Retrieve congestion management configuration */
 	eth_cman_config_get_t cman_config_get;
+	/** Set the whole rte_eth process to active or standby role. */
+	eth_process_set_active_t eth_process_set_active;
 };
 
 /**
@@ -2046,6 +2074,41 @@  struct rte_eth_fdir_conf {
 	struct rte_eth_fdir_flex_conf flex_conf;
 };
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set the rte_eth process to the active or standby role which affects
+ * the flow rules offloading. It doesn't allow multiple processes to be the
+ * same role unless no offload rules are set.
+ * The active process flow rules are effective immediately while the standby
+ * process rules will be matched (active) when the process becomes active or
+ * when the traffic is not matched by the active process rules.
+ * The active application will always receive traffic while the standby
+ * application will receive traffic when no matching rules are present from
+ * the active application.
+ *
+ * The application is active by default if this API is not called.
+ *
+ * When a process transforms from a standby to a active role, all preceding
+ * flow rules which are queued by hardware will be effective immediately.
+ * Before role transition, all the rules set by the active process should be
+ * flushed first.
+ *
+ * When role flag "RTE_ETH_PROCESS_NIC_DUP_WITH_STANDBY" is set, NIC domain
+ * flow rules are effective immediately even if a process is standby role.
+ *
+ * @param active
+ *   Process active (role) or not (standby).
+ * @param flag
+ *   The role flag.
+ * @return
+ *   - (>=0) Number of rte devices which have been switched successfully.
+ *   - (-EINVAL) if bad parameter.
+ */
+__rte_experimental
+int rte_eth_process_set_active(bool active, uint32_t flag);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 5d5e18db1e..f19da75bfe 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -6318,6 +6318,47 @@  rte_eth_buffer_split_get_supported_hdr_ptypes(uint16_t port_id, uint32_t *ptypes
 	return j;
 }
 
+int rte_eth_process_set_active(bool active, uint32_t flag)
+{
+	struct rte_eth_dev_info dev_info = {0};
+	uint32_t flags[RTE_MAX_ETHPORTS];
+	struct rte_eth_dev *dev;
+	uint16_t port_id;
+	int ret = 0;
+
+	/* Check if all devices support. */
+	RTE_ETH_FOREACH_DEV(port_id) {
+		dev = &rte_eth_devices[port_id];
+		if (*dev->dev_ops->dev_infos_get == NULL ||
+		    *dev->dev_ops->eth_process_set_active == NULL)
+			return -ENOTSUP;
+		if ((*dev->dev_ops->dev_infos_get)(dev, &dev_info))
+			return -EINVAL;
+		if (!(dev_info.dev_capa & RTE_ETH_CAPA_PROCESS_SET_ROLE))
+			return -ENOTSUP;
+	}
+	RTE_ETH_FOREACH_DEV(port_id) {
+		dev = &rte_eth_devices[port_id];
+		if ((*dev->dev_ops->dev_infos_get)(dev, &dev_info))
+			goto err;
+		flags[port_id] = dev_info.eth_process_flag;
+		if ((*dev->dev_ops->eth_process_set_active)(dev, active, flag) < 0)
+			goto err;
+		ret++;
+	}
+	return ret;
+err:
+	if (!ret)
+		return 0;
+	RTE_ETH_FOREACH_DEV(port_id) {
+		dev = &rte_eth_devices[port_id];
+		(*dev->dev_ops->eth_process_set_active)(dev, !active, flags[port_id]);
+		if (--ret == 0)
+			break;
+	}
+	return 0;
+}
+
 RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
 
 RTE_INIT(ethdev_init_telemetry)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index c129ca1eaf..d29f051d6f 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1606,6 +1606,8 @@  struct rte_eth_conf {
 #define RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP         RTE_BIT64(3)
 /** Device supports keeping shared flow objects across restart. */
 #define RTE_ETH_DEV_CAPA_FLOW_SHARED_OBJECT_KEEP RTE_BIT64(4)
+/**Device supports "rte_eth_process_set_active" callback. */
+#define RTE_ETH_CAPA_PROCESS_SET_ROLE RTE_BIT64(5)
 /**@}*/
 
 /*
@@ -1777,8 +1779,11 @@  struct rte_eth_dev_info {
 	struct rte_eth_switch_info switch_info;
 	/** Supported error handling mode. */
 	enum rte_eth_err_handle_mode err_handle_mode;
+	/** Process specific role flag. */
+	uint32_t eth_process_flag;
 
-	uint64_t reserved_64s[2]; /**< Reserved for future fields */
+	uint32_t reserved_32s[1]; /**< Reserved for future fields */
+	uint64_t reserved_64s[1]; /**< Reserved for future fields */
 	void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
 
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 17201fbe0f..a5503f6fde 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -298,6 +298,9 @@  EXPERIMENTAL {
 	rte_flow_get_q_aged_flows;
 	rte_mtr_meter_policy_get;
 	rte_mtr_meter_profile_get;
+
+	# added in 23.03
+	rte_eth_process_set_active;
 };
 
 INTERNAL {