[v2,2/2] net/mlx5: support flow aging
Checks
Commit Message
Currently, there is no flow aging check and age-out event callback
mechanism for mlx5 driver, this patch implements it. It's included:
- Splitting the current counter container to aged or no-aged container
since reducing memory consumption. Aged container will allocate extra
memory to save the aging parameter from user configuration.
- Aging check and age-out event callback mechanism based on current
counter. When a flow be checked aged-out, RTE_ETH_EVENT_FLOW_AGED
event will be triggered to applications.
- Implement the new API: rte_flow_get_aged_flows, applications can use
this API to get aged flows.
Signed-off-by: Bill Zhou <dongz@mellanox.com>
---
v2: Moving aging list from struct mlx5_ibv_shared to struct mlx5_priv,
one port has one aging list. Update event be triggered once after last
call of rte_flow_get_aged_flows.
---
doc/guides/rel_notes/release_20_05.rst | 1 +
drivers/net/mlx5/mlx5.c | 86 +++---
drivers/net/mlx5/mlx5.h | 49 +++-
drivers/net/mlx5/mlx5_flow.c | 201 ++++++++++++--
drivers/net/mlx5/mlx5_flow.h | 16 +-
drivers/net/mlx5/mlx5_flow_dv.c | 361 +++++++++++++++++++++----
drivers/net/mlx5/mlx5_flow_verbs.c | 14 +-
7 files changed, 607 insertions(+), 121 deletions(-)
Comments
On 4/24/2020 6:45 PM, Bill Zhou wrote:
> Currently, there is no flow aging check and age-out event callback
> mechanism for mlx5 driver, this patch implements it. It's included:
> - Splitting the current counter container to aged or no-aged container
> since reducing memory consumption. Aged container will allocate extra
> memory to save the aging parameter from user configuration.
> - Aging check and age-out event callback mechanism based on current
> counter. When a flow be checked aged-out, RTE_ETH_EVENT_FLOW_AGED
> event will be triggered to applications.
> - Implement the new API: rte_flow_get_aged_flows, applications can use
> this API to get aged flows.
>
> Signed-off-by: Bill Zhou <dongz@mellanox.com>
Reviewed-by: Suanming Mou <suanmingm@mellanox.com>
> ---
> v2: Moving aging list from struct mlx5_ibv_shared to struct mlx5_priv,
> one port has one aging list. Update event be triggered once after last
> call of rte_flow_get_aged_flows.
> ---
> doc/guides/rel_notes/release_20_05.rst | 1 +
> drivers/net/mlx5/mlx5.c | 86 +++---
> drivers/net/mlx5/mlx5.h | 49 +++-
> drivers/net/mlx5/mlx5_flow.c | 201 ++++++++++++--
> drivers/net/mlx5/mlx5_flow.h | 16 +-
> drivers/net/mlx5/mlx5_flow_dv.c | 361 +++++++++++++++++++++----
> drivers/net/mlx5/mlx5_flow_verbs.c | 14 +-
> 7 files changed, 607 insertions(+), 121 deletions(-)
>
> diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
> index b124c3f287..a5ba8a4792 100644
> --- a/doc/guides/rel_notes/release_20_05.rst
> +++ b/doc/guides/rel_notes/release_20_05.rst
> @@ -141,6 +141,7 @@ New Features
> * Added support for creating Relaxed Ordering Memory Regions.
> * Added support for jumbo frame size (9K MTU) in Multi-Packet RQ mode.
> * Optimized the memory consumption of flow.
> + * Added support for flow aging based on hardware counter.
>
> * **Updated the AESNI MB crypto PMD.**
>
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index 57d76cb741..674d0ea9d3 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -437,6 +437,20 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
> return 0;
> }
>
> +/**
> + * Initialize the private aging list information.
> + *
> + * @param[in] priv
> + * Pointer to the private device data structure.
> + */
> +static void
> +mlx5_flow_aging_list_init(struct mlx5_priv *priv)
> +{
> + TAILQ_INIT(&priv->aged_counters);
> + rte_spinlock_init(&priv->aged_sl);
> + rte_atomic16_set(&priv->trigger_event, 1);
> +}
> +
> /**
> * Initialize the counters management structure.
> *
> @@ -446,11 +460,14 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
> static void
> mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
> {
> - uint8_t i;
> + uint8_t i, age;
>
> + sh->cmng.age = 0;
> TAILQ_INIT(&sh->cmng.flow_counters);
> - for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
> - TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
> + for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
> + for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
> + TAILQ_INIT(&sh->cmng.ccont[i][age].pool_list);
> + }
> }
>
> /**
> @@ -480,7 +497,7 @@ static void
> mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
> {
> struct mlx5_counter_stats_mem_mng *mng;
> - uint8_t i;
> + uint8_t i, age = 0;
> int j;
> int retries = 1024;
>
> @@ -491,36 +508,42 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
> break;
> rte_pause();
> }
> - for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
> - struct mlx5_flow_counter_pool *pool;
> - uint32_t batch = !!(i % 2);
>
> - if (!sh->cmng.ccont[i].pools)
> - continue;
> - pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
> - while (pool) {
> - if (batch) {
> - if (pool->min_dcs)
> - claim_zero
> - (mlx5_devx_cmd_destroy(pool->min_dcs));
> - }
> - for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
> - if (MLX5_POOL_GET_CNT(pool, j)->action)
> - claim_zero
> - (mlx5_glue->destroy_flow_action
> - (MLX5_POOL_GET_CNT(pool, j)->action));
> - if (!batch && MLX5_GET_POOL_CNT_EXT
> - (pool, j)->dcs)
> - claim_zero(mlx5_devx_cmd_destroy
> - (MLX5_GET_POOL_CNT_EXT
> - (pool, j)->dcs));
> + for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
> + for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
> + struct mlx5_flow_counter_pool *pool;
> + uint32_t batch = !!(i % 2);
> +
> + if (!sh->cmng.ccont[i][age].pools)
> + continue;
> + pool = TAILQ_FIRST(&sh->cmng.ccont[i][age].pool_list);
> + while (pool) {
> + if (batch) {
> + if (pool->min_dcs)
> + claim_zero
> + (mlx5_devx_cmd_destroy
> + (pool->min_dcs));
> + }
> + for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
> + if (MLX5_POOL_GET_CNT(pool, j)->action)
> + claim_zero
> + (mlx5_glue->destroy_flow_action
> + (MLX5_POOL_GET_CNT
> + (pool, j)->action));
> + if (!batch && MLX5_GET_POOL_CNT_EXT
> + (pool, j)->dcs)
> + claim_zero(mlx5_devx_cmd_destroy
> + (MLX5_GET_POOL_CNT_EXT
> + (pool, j)->dcs));
> + }
> + TAILQ_REMOVE(&sh->cmng.ccont[i][age].pool_list,
> + pool, next);
> + rte_free(pool);
> + pool = TAILQ_FIRST
> + (&sh->cmng.ccont[i][age].pool_list);
> }
> - TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool,
> - next);
> - rte_free(pool);
> - pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
> + rte_free(sh->cmng.ccont[i][age].pools);
> }
> - rte_free(sh->cmng.ccont[i].pools);
> }
> mng = LIST_FIRST(&sh->cmng.mem_mngs);
> while (mng) {
> @@ -3003,6 +3026,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
> goto error;
> }
> }
> + mlx5_flow_aging_list_init(priv);
> return eth_dev;
> error:
> if (priv) {
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 51c3f33e6b..d1b358e929 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -222,13 +222,21 @@ struct mlx5_drop {
> #define MLX5_COUNTERS_PER_POOL 512
> #define MLX5_MAX_PENDING_QUERIES 4
> #define MLX5_CNT_CONTAINER_RESIZE 64
> +#define MLX5_CNT_AGE_OFFSET 0x80000000
> #define CNT_SIZE (sizeof(struct mlx5_flow_counter))
> #define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext))
> +#define AGE_SIZE (sizeof(struct mlx5_age_param))
>
> #define CNT_POOL_TYPE_EXT (1 << 0)
> +#define CNT_POOL_TYPE_AGE (1 << 1)
> #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT)
> +#define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE)
> +#define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1 : 0)
> +
> #define MLX5_CNT_LEN(pool) \
> - (CNT_SIZE + (IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
> + (CNT_SIZE + \
> + (IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \
> + (IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
> #define MLX5_POOL_GET_CNT(pool, index) \
> ((struct mlx5_flow_counter *) \
> ((char *)((pool) + 1) + (index) * (MLX5_CNT_LEN(pool))))
> @@ -242,13 +250,33 @@ struct mlx5_drop {
> */
> #define MLX5_MAKE_CNT_IDX(pi, offset) \
> ((pi) * MLX5_COUNTERS_PER_POOL + (offset) + 1)
> -#define MLX5_CNT_TO_CNT_EXT(cnt) \
> - ((struct mlx5_flow_counter_ext *)((cnt) + 1))
> +#define MLX5_CNT_TO_CNT_EXT(pool, cnt) \
> + ((struct mlx5_flow_counter_ext *)\
> + ((char *)((cnt) + 1) + \
> + (IS_AGE_POOL(pool) ? AGE_SIZE : 0)))
> #define MLX5_GET_POOL_CNT_EXT(pool, offset) \
> - MLX5_CNT_TO_CNT_EXT(MLX5_POOL_GET_CNT((pool), (offset)))
> + MLX5_CNT_TO_CNT_EXT(pool, MLX5_POOL_GET_CNT((pool), (offset)))
> +#define MLX5_CNT_TO_AGE(cnt) \
> + ((struct mlx5_age_param *)((cnt) + 1))
>
> struct mlx5_flow_counter_pool;
>
> +/*age status*/
> +enum {
> + AGE_FREE,
> + AGE_CANDIDATE, /* Counter assigned to flows. */
> + AGE_TMOUT, /* Timeout, wait for aged flows query and destroy. */
> +};
> +
> +/* Counter age parameter. */
> +struct mlx5_age_param {
> + rte_atomic16_t state; /**< Age state. */
> + uint16_t port_id; /**< Port id of the counter. */
> + uint32_t timeout:15; /**< Age timeout in unit of 0.1sec. */
> + uint32_t expire:16; /**< Expire time(0.1sec) in the future. */
> + void *context; /**< Flow counter age context. */
> +};
> +
> struct flow_counter_stats {
> uint64_t hits;
> uint64_t bytes;
> @@ -336,13 +364,14 @@ struct mlx5_pools_container {
>
> /* Counter global management structure. */
> struct mlx5_flow_counter_mng {
> - uint8_t mhi[2]; /* master \ host container index. */
> - struct mlx5_pools_container ccont[2 * 2];
> - /* 2 containers for single and for batch for double-buffer. */
> + uint8_t mhi[2][2]; /* master \ host and age \ no age container index. */
> + struct mlx5_pools_container ccont[2 * 2][2];
> + /* master \ host and age \ no age pools container. */
> struct mlx5_counters flow_counters; /* Legacy flow counter list. */
> uint8_t pending_queries;
> uint8_t batch;
> uint16_t pool_index;
> + uint8_t age;
> uint8_t query_thread_on;
> LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
> LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;
> @@ -566,6 +595,10 @@ struct mlx5_priv {
> uint8_t fdb_def_rule; /* Whether fdb jump to table 1 is configured. */
> struct mlx5_mp_id mp_id; /* ID of a multi-process process */
> LIST_HEAD(fdir, mlx5_fdir_flow) fdir_flows; /* fdir flows. */
> + struct mlx5_counters aged_counters; /* Aged flow counter list. */
> + rte_spinlock_t aged_sl; /* Aged flow counter list lock. */
> + rte_atomic16_t trigger_event;
> + /* Event be triggered once after last call of rte_flow_get_aged_flows*/
> };
>
> #define PORT_ID(priv) ((priv)->dev_data->port_id)
> @@ -764,6 +797,8 @@ int mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
> int mlx5_flow_dev_dump(struct rte_eth_dev *dev, FILE *file,
> struct rte_flow_error *error);
> void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
> +int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
> + uint32_t nb_contexts, struct rte_flow_error *error);
>
> /* mlx5_mp.c */
> int mlx5_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer);
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index cba1f23e81..c691b43722 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -24,6 +24,7 @@
> #include <rte_ether.h>
> #include <rte_ethdev_driver.h>
> #include <rte_flow.h>
> +#include <rte_cycles.h>
> #include <rte_flow_driver.h>
> #include <rte_malloc.h>
> #include <rte_ip.h>
> @@ -242,6 +243,7 @@ static const struct rte_flow_ops mlx5_flow_ops = {
> .isolate = mlx5_flow_isolate,
> .query = mlx5_flow_query,
> .dev_dump = mlx5_flow_dev_dump,
> + .get_aged_flows = mlx5_flow_get_aged_flows,
> };
>
> /* Convert FDIR request to Generic flow. */
> @@ -2531,6 +2533,8 @@ flow_drv_validate(struct rte_eth_dev *dev,
> * Pointer to the list of items.
> * @param[in] actions
> * Pointer to the list of actions.
> + * @param[in] flow_idx
> + * This memory pool index to the flow.
> * @param[out] error
> * Pointer to the error structure.
> *
> @@ -2543,14 +2547,19 @@ flow_drv_prepare(struct rte_eth_dev *dev,
> const struct rte_flow_attr *attr,
> const struct rte_flow_item items[],
> const struct rte_flow_action actions[],
> + uint32_t flow_idx,
> struct rte_flow_error *error)
> {
> const struct mlx5_flow_driver_ops *fops;
> enum mlx5_flow_drv_type type = flow->drv_type;
> + struct mlx5_flow *mlx5_flow = NULL;
>
> MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
> fops = flow_get_drv_ops(type);
> - return fops->prepare(dev, attr, items, actions, error);
> + mlx5_flow = fops->prepare(dev, attr, items, actions, error);
> + if (mlx5_flow)
> + mlx5_flow->flow_idx = flow_idx;
> + return mlx5_flow;
> }
>
> /**
> @@ -3498,6 +3507,8 @@ flow_hairpin_split(struct rte_eth_dev *dev,
> * Associated actions (list terminated by the END action).
> * @param[in] external
> * This flow rule is created by request external to PMD.
> + * @param[in] flow_idx
> + * This memory pool index to the flow.
> * @param[out] error
> * Perform verbose error reporting if not NULL.
> * @return
> @@ -3511,11 +3522,13 @@ flow_create_split_inner(struct rte_eth_dev *dev,
> const struct rte_flow_attr *attr,
> const struct rte_flow_item items[],
> const struct rte_flow_action actions[],
> - bool external, struct rte_flow_error *error)
> + bool external, uint32_t flow_idx,
> + struct rte_flow_error *error)
> {
> struct mlx5_flow *dev_flow;
>
> - dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, error);
> + dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
> + flow_idx, error);
> if (!dev_flow)
> return -rte_errno;
> dev_flow->flow = flow;
> @@ -3876,6 +3889,8 @@ flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
> * Associated actions (list terminated by the END action).
> * @param[in] external
> * This flow rule is created by request external to PMD.
> + * @param[in] flow_idx
> + * This memory pool index to the flow.
> * @param[out] error
> * Perform verbose error reporting if not NULL.
> * @return
> @@ -3888,7 +3903,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
> const struct rte_flow_attr *attr,
> const struct rte_flow_item items[],
> const struct rte_flow_action actions[],
> - bool external, struct rte_flow_error *error)
> + bool external, uint32_t flow_idx,
> + struct rte_flow_error *error)
> {
> struct mlx5_priv *priv = dev->data->dev_private;
> struct mlx5_dev_config *config = &priv->config;
> @@ -3908,7 +3924,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
> !mlx5_flow_ext_mreg_supported(dev))
> return flow_create_split_inner(dev, flow, NULL, prefix_layers,
> attr, items, actions, external,
> - error);
> + flow_idx, error);
> actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
> &encap_idx);
> if (qrss) {
> @@ -3992,7 +4008,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
> /* Add the unmodified original or prefix subflow. */
> ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr,
> items, ext_actions ? ext_actions :
> - actions, external, error);
> + actions, external, flow_idx, error);
> if (ret < 0)
> goto exit;
> MLX5_ASSERT(dev_flow);
> @@ -4055,7 +4071,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
> ret = flow_create_split_inner(dev, flow, &dev_flow, layers,
> &q_attr, mtr_sfx ? items :
> q_items, q_actions,
> - external, error);
> + external, flow_idx, error);
> if (ret < 0)
> goto exit;
> /* qrss ID should be freed if failed. */
> @@ -4096,6 +4112,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
> * Associated actions (list terminated by the END action).
> * @param[in] external
> * This flow rule is created by request external to PMD.
> + * @param[in] flow_idx
> + * This memory pool index to the flow.
> * @param[out] error
> * Perform verbose error reporting if not NULL.
> * @return
> @@ -4107,7 +4125,8 @@ flow_create_split_meter(struct rte_eth_dev *dev,
> const struct rte_flow_attr *attr,
> const struct rte_flow_item items[],
> const struct rte_flow_action actions[],
> - bool external, struct rte_flow_error *error)
> + bool external, uint32_t flow_idx,
> + struct rte_flow_error *error)
> {
> struct mlx5_priv *priv = dev->data->dev_private;
> struct rte_flow_action *sfx_actions = NULL;
> @@ -4151,7 +4170,7 @@ flow_create_split_meter(struct rte_eth_dev *dev,
> /* Add the prefix subflow. */
> ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr,
> items, pre_actions, external,
> - error);
> + flow_idx, error);
> if (ret) {
> ret = -rte_errno;
> goto exit;
> @@ -4168,7 +4187,7 @@ flow_create_split_meter(struct rte_eth_dev *dev,
> 0, &sfx_attr,
> sfx_items ? sfx_items : items,
> sfx_actions ? sfx_actions : actions,
> - external, error);
> + external, flow_idx, error);
> exit:
> if (sfx_actions)
> rte_free(sfx_actions);
> @@ -4205,6 +4224,8 @@ flow_create_split_meter(struct rte_eth_dev *dev,
> * Associated actions (list terminated by the END action).
> * @param[in] external
> * This flow rule is created by request external to PMD.
> + * @param[in] flow_idx
> + * This memory pool index to the flow.
> * @param[out] error
> * Perform verbose error reporting if not NULL.
> * @return
> @@ -4216,12 +4237,13 @@ flow_create_split_outer(struct rte_eth_dev *dev,
> const struct rte_flow_attr *attr,
> const struct rte_flow_item items[],
> const struct rte_flow_action actions[],
> - bool external, struct rte_flow_error *error)
> + bool external, uint32_t flow_idx,
> + struct rte_flow_error *error)
> {
> int ret;
>
> ret = flow_create_split_meter(dev, flow, attr, items,
> - actions, external, error);
> + actions, external, flow_idx, error);
> MLX5_ASSERT(ret <= 0);
> return ret;
> }
> @@ -4356,7 +4378,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
> */
> ret = flow_create_split_outer(dev, flow, attr,
> buf->entry[i].pattern,
> - p_actions_rx, external,
> + p_actions_rx, external, idx,
> error);
> if (ret < 0)
> goto error;
> @@ -4367,7 +4389,8 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
> attr_tx.ingress = 0;
> attr_tx.egress = 1;
> dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
> - actions_hairpin_tx.actions, error);
> + actions_hairpin_tx.actions,
> + idx, error);
> if (!dev_flow)
> goto error;
> dev_flow->flow = flow;
> @@ -5741,6 +5764,31 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
>
> #define MLX5_POOL_QUERY_FREQ_US 1000000
>
> +/**
> + * Get number of all validate pools.
> + *
> + * @param[in] sh
> + * Pointer to mlx5_ibv_shared object.
> + *
> + * @return
> + * The number of all validate pools.
> + */
> +static uint32_t
> +mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)
> +{
> + uint8_t age, i;
> + uint32_t pools_n = 0;
> + struct mlx5_pools_container *cont;
> +
> + for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
> + for (i = 0; i < 2 ; ++i) {
> + cont = MLX5_CNT_CONTAINER(sh, i, 0, age);
> + pools_n += rte_atomic16_read(&cont->n_valid);
> + }
> + }
> + return pools_n;
> +}
> +
> /**
> * Set the periodic procedure for triggering asynchronous batch queries for all
> * the counter pools.
> @@ -5751,12 +5799,9 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
> void
> mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
> {
> - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
> - uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
> - uint32_t us;
> + uint32_t pools_n, us;
>
> - cont = MLX5_CNT_CONTAINER(sh, 1, 0);
> - pools_n += rte_atomic16_read(&cont->n_valid);
> + pools_n = mlx5_get_all_valid_pool_count(sh);
> us = MLX5_POOL_QUERY_FREQ_US / pools_n;
> DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
> if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
> @@ -5782,6 +5827,7 @@ mlx5_flow_query_alarm(void *arg)
> uint16_t offset;
> int ret;
> uint8_t batch = sh->cmng.batch;
> + uint8_t age = sh->cmng.age;
> uint16_t pool_index = sh->cmng.pool_index;
> struct mlx5_pools_container *cont;
> struct mlx5_pools_container *mcont;
> @@ -5790,8 +5836,8 @@ mlx5_flow_query_alarm(void *arg)
> if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
> goto set_alarm;
> next_container:
> - cont = MLX5_CNT_CONTAINER(sh, batch, 1);
> - mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
> + cont = MLX5_CNT_CONTAINER(sh, batch, 1, age);
> + mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age);
> /* Check if resize was done and need to flip a container. */
> if (cont != mcont) {
> if (cont->pools) {
> @@ -5801,15 +5847,22 @@ mlx5_flow_query_alarm(void *arg)
> }
> rte_cio_wmb();
> /* Flip the host container. */
> - sh->cmng.mhi[batch] ^= (uint8_t)2;
> + sh->cmng.mhi[batch][age] ^= (uint8_t)2;
> cont = mcont;
> }
> if (!cont->pools) {
> /* 2 empty containers case is unexpected. */
> - if (unlikely(batch != sh->cmng.batch))
> + if (unlikely(batch != sh->cmng.batch) &&
> + unlikely(age != sh->cmng.age)) {
> goto set_alarm;
> + }
> batch ^= 0x1;
> pool_index = 0;
> + if (batch == 0 && pool_index == 0) {
> + age ^= 0x1;
> + sh->cmng.batch = batch;
> + sh->cmng.age = age;
> + }
> goto next_container;
> }
> pool = cont->pools[pool_index];
> @@ -5852,13 +5905,76 @@ mlx5_flow_query_alarm(void *arg)
> if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
> batch ^= 0x1;
> pool_index = 0;
> + if (batch == 0 && pool_index == 0)
> + age ^= 0x1;
> }
> set_alarm:
> sh->cmng.batch = batch;
> sh->cmng.pool_index = pool_index;
> + sh->cmng.age = age;
> mlx5_set_query_alarm(sh);
> }
>
> +/**
> + * Check and callback event for new aged flow in the counter pool
> + *
> + * @param[in] pool
> + * The pointer to Current counter pool.
> + */
> +static void
> +mlx5_flow_aging_check(struct mlx5_flow_counter_pool *pool)
> +{
> + struct mlx5_priv *priv;
> + struct mlx5_flow_counter *cnt;
> + struct mlx5_age_param *age_param;
> + struct mlx5_counter_stats_raw *cur = pool->raw_hw;
> + struct mlx5_counter_stats_raw *prev = pool->raw;
> + uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
> + uint64_t port_mask = 0;
> + uint32_t i;
> +
> + for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
> + cnt = MLX5_POOL_GET_CNT(pool, i);
> + age_param = MLX5_CNT_TO_AGE(cnt);
> + if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
> + continue;
> + if (cur->data[i].hits != prev->data[i].hits) {
> + age_param->expire = curr + age_param->timeout;
> + continue;
> + }
> + if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
> + continue;
> + /**
> + * Hold the lock first, or if between the
> + * state AGE_TMOUT and tailq operation the
> + * release happened, the release procedure
> + * may delete a non-existent tailq node.
> + */
> + priv = rte_eth_devices[age_param->port_id].data->dev_private;
> + rte_spinlock_lock(&priv->aged_sl);
> + /* If the cpmset fails, release happens. */
> + if (rte_atomic16_cmpset((volatile uint16_t *)
> + &age_param->state,
> + AGE_CANDIDATE,
> + AGE_TMOUT) ==
> + AGE_CANDIDATE) {
> + TAILQ_INSERT_TAIL(&priv->aged_counters, cnt, next);
> + port_mask |= (1ull << age_param->port_id);
> + }
> + rte_spinlock_unlock(&priv->aged_sl);
> + }
> + for (i = 0; i < 64; i++) {
> + if (port_mask & (1ull << i)) {
> + priv = rte_eth_devices[i].data->dev_private;
> + if (!rte_atomic16_read(&priv->trigger_event))
> + continue;
> + _rte_eth_dev_callback_process(&rte_eth_devices[i],
> + RTE_ETH_EVENT_FLOW_AGED, NULL);
> + rte_atomic16_set(&priv->trigger_event, 0);
> + }
> + }
> +}
> +
> /**
> * Handler for the HW respond about ready values from an asynchronous batch
> * query. This function is probably called by the host thread.
> @@ -5883,6 +5999,8 @@ mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
> raw_to_free = pool->raw_hw;
> } else {
> raw_to_free = pool->raw;
> + if (IS_AGE_POOL(pool))
> + mlx5_flow_aging_check(pool);
> rte_spinlock_lock(&pool->sl);
> pool->raw = pool->raw_hw;
> rte_spinlock_unlock(&pool->sl);
> @@ -6034,3 +6152,40 @@ mlx5_flow_dev_dump(struct rte_eth_dev *dev,
> return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
> sh->tx_domain, file);
> }
> +
> +/**
> + * Get aged-out flows.
> + *
> + * @param[in] dev
> + * Pointer to the Ethernet device structure.
> + * @param[in] context
> + * The address of an array of pointers to the aged-out flows contexts.
> + * @param[in] nb_countexts
> + * The length of context array pointers.
> + * @param[out] error
> + * Perform verbose error reporting if not NULL. Initialized in case of
> + * error only.
> + *
> + * @return
> + * how many contexts get in success, otherwise negative errno value.
> + * if nb_contexts is 0, return the amount of all aged contexts.
> + * if nb_contexts is not 0 , return the amount of aged flows reported
> + * in the context array.
> + */
> +int
> +mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
> + uint32_t nb_contexts, struct rte_flow_error *error)
> +{
> + const struct mlx5_flow_driver_ops *fops;
> + struct rte_flow_attr attr = { .transfer = 0 };
> +
> + if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
> + fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
> + return fops->get_aged_flows(dev, contexts, nb_contexts,
> + error);
> + }
> + DRV_LOG(ERR,
> + "port %u get aged flows is not supported.",
> + dev->data->port_id);
> + return -ENOTSUP;
> +}
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 2a1f59698c..bf1d5beb9b 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -199,6 +199,7 @@ enum mlx5_feature_name {
> #define MLX5_FLOW_ACTION_METER (1ull << 31)
> #define MLX5_FLOW_ACTION_SET_IPV4_DSCP (1ull << 32)
> #define MLX5_FLOW_ACTION_SET_IPV6_DSCP (1ull << 33)
> +#define MLX5_FLOW_ACTION_AGE (1ull << 34)
>
> #define MLX5_FLOW_FATE_ACTIONS \
> (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
> @@ -650,6 +651,7 @@ struct mlx5_flow_verbs_workspace {
> /** Device flow structure. */
> struct mlx5_flow {
> struct rte_flow *flow; /**< Pointer to the main flow. */
> + uint32_t flow_idx; /**< The memory pool index to the main flow. */
> uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
> uint64_t act_flags;
> /**< Bit-fields of detected actions, see MLX5_FLOW_ACTION_*. */
> @@ -873,6 +875,11 @@ typedef int (*mlx5_flow_counter_query_t)(struct rte_eth_dev *dev,
> uint32_t cnt,
> bool clear, uint64_t *pkts,
> uint64_t *bytes);
> +typedef int (*mlx5_flow_get_aged_flows_t)
> + (struct rte_eth_dev *dev,
> + void **context,
> + uint32_t nb_contexts,
> + struct rte_flow_error *error);
> struct mlx5_flow_driver_ops {
> mlx5_flow_validate_t validate;
> mlx5_flow_prepare_t prepare;
> @@ -888,13 +895,14 @@ struct mlx5_flow_driver_ops {
> mlx5_flow_counter_alloc_t counter_alloc;
> mlx5_flow_counter_free_t counter_free;
> mlx5_flow_counter_query_t counter_query;
> + mlx5_flow_get_aged_flows_t get_aged_flows;
> };
>
>
> -#define MLX5_CNT_CONTAINER(sh, batch, thread) (&(sh)->cmng.ccont \
> - [(((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
> -#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread) (&(sh)->cmng.ccont \
> - [(~((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
> +#define MLX5_CNT_CONTAINER(sh, batch, thread, age) (&(sh)->cmng.ccont \
> + [(((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
> +#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread, age) (&(sh)->cmng.ccont \
> + [(~((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
>
> /* mlx5_flow.c */
>
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
> index 784a62c521..73a5f477f8 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -24,6 +24,7 @@
> #include <rte_flow.h>
> #include <rte_flow_driver.h>
> #include <rte_malloc.h>
> +#include <rte_cycles.h>
> #include <rte_ip.h>
> #include <rte_gre.h>
> #include <rte_vxlan.h>
> @@ -3719,6 +3720,50 @@ mlx5_flow_validate_action_meter(struct rte_eth_dev *dev,
> return 0;
> }
>
> +/**
> + * Validate the age action.
> + *
> + * @param[in] action_flags
> + * Holds the actions detected until now.
> + * @param[in] action
> + * Pointer to the age action.
> + * @param[in] dev
> + * Pointer to the Ethernet device structure.
> + * @param[out] error
> + * Pointer to error structure.
> + *
> + * @return
> + * 0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_validate_action_age(uint64_t action_flags,
> + const struct rte_flow_action *action,
> + struct rte_eth_dev *dev,
> + struct rte_flow_error *error)
> +{
> + struct mlx5_priv *priv = dev->data->dev_private;
> + const struct rte_flow_action_age *age = action->conf;
> +
> + if (!priv->config.devx)
> + return rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> + NULL,
> + "age action not supported");
> + if (!(action->conf))
> + return rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_ACTION, action,
> + "configuration cannot be null");
> + if (age->timeout >= UINT16_MAX / 2 / 10)
> + return rte_flow_error_set(error, ENOTSUP,
> + RTE_FLOW_ERROR_TYPE_ACTION, action,
> + "Max age time: 3275 seconds");
> + if (action_flags & MLX5_FLOW_ACTION_AGE)
> + return rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_ACTION, NULL,
> + "Duplicate age ctions set");
> + return 0;
> +}
> +
> /**
> * Validate the modify-header IPv4 DSCP actions.
> *
> @@ -3896,14 +3941,16 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
> struct mlx5_priv *priv = dev->data->dev_private;
> struct mlx5_pools_container *cont;
> struct mlx5_flow_counter_pool *pool;
> - uint32_t batch = 0;
> + uint32_t batch = 0, age = 0;
>
> idx--;
> + age = MLX_CNT_IS_AGE(idx);
> + idx = age ? idx - MLX5_CNT_AGE_OFFSET : idx;
> if (idx >= MLX5_CNT_BATCH_OFFSET) {
> idx -= MLX5_CNT_BATCH_OFFSET;
> batch = 1;
> }
> - cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
> + cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
> MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
> pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
> MLX5_ASSERT(pool);
> @@ -4023,18 +4070,21 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
> * Pointer to the Ethernet device structure.
> * @param[in] batch
> * Whether the pool is for counter that was allocated by batch command.
> + * @param[in] age
> + * Whether the pool is for Aging counter.
> *
> * @return
> * The new container pointer on success, otherwise NULL and rte_errno is set.
> */
> static struct mlx5_pools_container *
> -flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
> +flow_dv_container_resize(struct rte_eth_dev *dev,
> + uint32_t batch, uint32_t age)
> {
> struct mlx5_priv *priv = dev->data->dev_private;
> struct mlx5_pools_container *cont =
> - MLX5_CNT_CONTAINER(priv->sh, batch, 0);
> + MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
> struct mlx5_pools_container *new_cont =
> - MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0);
> + MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0, age);
> struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
> uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
> uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
> @@ -4042,7 +4092,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
>
> /* Fallback mode has no background thread. Skip the check. */
> if (!priv->counter_fallback &&
> - cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) {
> + cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1, age)) {
> /* The last resize still hasn't detected by the host thread. */
> rte_errno = EAGAIN;
> return NULL;
> @@ -4085,7 +4135,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
> new_cont->init_mem_mng = mem_mng;
> rte_cio_wmb();
> /* Flip the master container. */
> - priv->sh->cmng.mhi[batch] ^= (uint8_t)1;
> + priv->sh->cmng.mhi[batch][age] ^= (uint8_t)1;
> return new_cont;
> }
>
> @@ -4117,7 +4167,7 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
> cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
> MLX5_ASSERT(pool);
> if (counter < MLX5_CNT_BATCH_OFFSET) {
> - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
> if (priv->counter_fallback)
> return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0,
> 0, pkts, bytes, 0, NULL, NULL, 0);
> @@ -4150,6 +4200,8 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
> * The devX counter handle.
> * @param[in] batch
> * Whether the pool is for counter that was allocated by batch command.
> + * @param[in] age
> + * Whether the pool is for counter that was allocated for aging.
> * @param[in/out] cont_cur
> * Pointer to the container pointer, it will be update in pool resize.
> *
> @@ -4158,24 +4210,23 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
> */
> static struct mlx5_pools_container *
> flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
> - uint32_t batch)
> + uint32_t batch, uint32_t age)
> {
> struct mlx5_priv *priv = dev->data->dev_private;
> struct mlx5_flow_counter_pool *pool;
> struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
> - 0);
> + 0, age);
> int16_t n_valid = rte_atomic16_read(&cont->n_valid);
> - uint32_t size;
> + uint32_t size = sizeof(*pool);
>
> if (cont->n == n_valid) {
> - cont = flow_dv_container_resize(dev, batch);
> + cont = flow_dv_container_resize(dev, batch, age);
> if (!cont)
> return NULL;
> }
> - size = sizeof(*pool);
> size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
> - if (!batch)
> - size += MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE;
> + size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
> + size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE);
> pool = rte_calloc(__func__, 1, size, 0);
> if (!pool) {
> rte_errno = ENOMEM;
> @@ -4187,8 +4238,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
> MLX5_CNT_CONTAINER_RESIZE;
> pool->raw_hw = NULL;
> pool->type = 0;
> - if (!batch)
> - pool->type |= CNT_POOL_TYPE_EXT;
> + pool->type |= (batch ? 0 : CNT_POOL_TYPE_EXT);
> + pool->type |= (!age ? 0 : CNT_POOL_TYPE_AGE);
> rte_spinlock_init(&pool->sl);
> /*
> * The generation of the new allocated counters in this pool is 0, 2 in
> @@ -4215,6 +4266,39 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
> return cont;
> }
>
> +/**
> + * Update the minimum dcs-id for aged or no-aged counter pool.
> + *
> + * @param[in] dev
> + * Pointer to the Ethernet device structure.
> + * @param[in] pool
> + * Current counter pool.
> + * @param[in] batch
> + * Whether the pool is for counter that was allocated by batch command.
> + * @param[in] age
> + * Whether the counter is for aging.
> + */
> +static void
> +flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev,
> + struct mlx5_flow_counter_pool *pool,
> + uint32_t batch, uint32_t age)
> +{
> + struct mlx5_priv *priv = dev->data->dev_private;
> + struct mlx5_flow_counter_pool *other;
> + struct mlx5_pools_container *cont;
> +
> + cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, (age ^ 0x1));
Too much space.
> + other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id);
> + if (!other)
> + return;
> + if (pool->min_dcs->id < other->min_dcs->id) {
> + rte_atomic64_set(&other->a64_dcs,
> + rte_atomic64_read(&pool->a64_dcs));
> + } else {
> + rte_atomic64_set(&pool->a64_dcs,
> + rte_atomic64_read(&other->a64_dcs));
> + }
> +}
> /**
> * Prepare a new counter and/or a new counter pool.
> *
> @@ -4224,6 +4308,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
> * Where to put the pointer of a new counter.
> * @param[in] batch
> * Whether the pool is for counter that was allocated by batch command.
> + * @param[in] age
> + * Whether the pool is for counter that was allocated for aging.
> *
> * @return
> * The counter container pointer and @p cnt_free is set on success,
> @@ -4232,7 +4318,7 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
> static struct mlx5_pools_container *
> flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
> struct mlx5_flow_counter **cnt_free,
> - uint32_t batch)
> + uint32_t batch, uint32_t age)
> {
> struct mlx5_priv *priv = dev->data->dev_private;
> struct mlx5_pools_container *cont;
> @@ -4241,7 +4327,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
> struct mlx5_flow_counter *cnt;
> uint32_t i;
>
> - cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
> + cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
> if (!batch) {
> /* bulk_bitmap must be 0 for single counter allocation. */
> dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
> @@ -4249,7 +4335,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
> return NULL;
> pool = flow_dv_find_pool_by_id(cont, dcs->id);
> if (!pool) {
> - cont = flow_dv_pool_create(dev, dcs, batch);
> + cont = flow_dv_pool_create(dev, dcs, batch, age);
> if (!cont) {
> mlx5_devx_cmd_destroy(dcs);
> return NULL;
> @@ -4259,6 +4345,8 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
> rte_atomic64_set(&pool->a64_dcs,
> (int64_t)(uintptr_t)dcs);
> }
> + flow_dv_counter_update_min_dcs(dev,
> + pool, batch, age);
As the above "else if" updates the min_dcs and this function name also
shows it will update the min_dcs, better to align the update in one
function.
Or rename the function a much better one to indicate it will update the
"other" pool with same id?
Not insist to.
> i = dcs->id % MLX5_COUNTERS_PER_POOL;
> cnt = MLX5_POOL_GET_CNT(pool, i);
> TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
> @@ -4273,7 +4361,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
> rte_errno = ENODATA;
> return NULL;
> }
> - cont = flow_dv_pool_create(dev, dcs, batch);
> + cont = flow_dv_pool_create(dev, dcs, batch, age);
> if (!cont) {
> mlx5_devx_cmd_destroy(dcs);
> return NULL;
> @@ -4334,13 +4422,15 @@ flow_dv_counter_shared_search(struct mlx5_pools_container *cont, uint32_t id,
> * Counter identifier.
> * @param[in] group
> * Counter flow group.
> + * @param[in] age
> + * Whether the counter was allocated for aging.
> *
> * @return
> * Index to flow counter on success, 0 otherwise and rte_errno is set.
> */
> static uint32_t
> flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
> - uint16_t group)
> + uint16_t group, uint32_t age)
> {
> struct mlx5_priv *priv = dev->data->dev_private;
> struct mlx5_flow_counter_pool *pool = NULL;
> @@ -4356,7 +4446,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
> */
> uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0;
> struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
> - 0);
> + 0, age);
> uint32_t cnt_idx;
>
> if (!priv->config.devx) {
> @@ -4395,13 +4485,13 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
> cnt_free = NULL;
> }
> if (!cnt_free) {
> - cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch);
> + cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
> if (!cont)
> return 0;
> pool = TAILQ_FIRST(&cont->pool_list);
> }
> if (!batch)
> - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt_free);
> + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
> /* Create a DV counter action only in the first time usage. */
> if (!cnt_free->action) {
> uint16_t offset;
> @@ -4424,6 +4514,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
> cnt_idx = MLX5_MAKE_CNT_IDX(pool->index,
> MLX5_CNT_ARRAY_IDX(pool, cnt_free));
> cnt_idx += batch * MLX5_CNT_BATCH_OFFSET;
> + cnt_idx += age * MLX5_CNT_AGE_OFFSET;
> /* Update the counter reset values. */
> if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits,
> &cnt_free->bytes))
> @@ -4445,6 +4536,62 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
> return cnt_idx;
> }
>
> +/**
> + * Get age param from counter index.
> + *
> + * @param[in] dev
> + * Pointer to the Ethernet device structure.
> + * @param[in] counter
> + * Index to the counter handler.
> + *
> + * @return
> + * The aging parameter specified for the counter index.
> + */
> +static struct mlx5_age_param*
> +flow_dv_counter_idx_get_age(struct rte_eth_dev *dev,
> + uint32_t counter)
> +{
> + struct mlx5_flow_counter *cnt;
> + struct mlx5_flow_counter_pool *pool = NULL;
> +
> + flow_dv_counter_get_by_idx(dev, counter, &pool);
> + counter = (counter - 1) % MLX5_COUNTERS_PER_POOL;
> + cnt = MLX5_POOL_GET_CNT(pool, counter);
> + return MLX5_CNT_TO_AGE(cnt);
> +}
> +
> +/**
> + * Remove a flow counter from aged counter list.
> + *
> + * @param[in] dev
> + * Pointer to the Ethernet device structure.
> + * @param[in] counter
> + * Index to the counter handler.
> + * @param[in] cnt
> + * Pointer to the counter handler.
> + */
> +static void
> +flow_dv_counter_remove_from_age(struct rte_eth_dev *dev,
> + uint32_t counter, struct mlx5_flow_counter *cnt)
> +{
> + struct mlx5_age_param *age_param;
> + struct mlx5_priv *priv = dev->data->dev_private;
> +
> + age_param = flow_dv_counter_idx_get_age(dev, counter);
> + if (rte_atomic16_cmpset((volatile uint16_t *)
> + &age_param->state,
> + AGE_CANDIDATE, AGE_FREE)
> + != AGE_CANDIDATE) {
> + /**
> + * We need the lock even it is age timeout,
> + * since counter may still in process.
> + */
> + rte_spinlock_lock(&priv->aged_sl);
> + TAILQ_REMOVE(&priv->aged_counters, cnt, next);
> + rte_spinlock_unlock(&priv->aged_sl);
> + }
> + rte_atomic16_set(&age_param->state, AGE_FREE);
> +}
> /**
> * Release a flow counter.
> *
> @@ -4465,10 +4612,12 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter)
> cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
> MLX5_ASSERT(pool);
> if (counter < MLX5_CNT_BATCH_OFFSET) {
> - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
> if (cnt_ext && --cnt_ext->ref_cnt)
> return;
> }
> + if (IS_AGE_POOL(pool))
> + flow_dv_counter_remove_from_age(dev, counter, cnt);
> /* Put the counter in the end - the last updated one. */
> TAILQ_INSERT_TAIL(&pool->counters, cnt, next);
> /*
> @@ -5243,6 +5392,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
> /* Meter action will add one more TAG action. */
> rw_act_num += MLX5_ACT_NUM_SET_TAG;
> break;
> + case RTE_FLOW_ACTION_TYPE_AGE:
> + ret = flow_dv_validate_action_age(action_flags,
> + actions, dev,
> + error);
> + if (ret < 0)
> + return ret;
> + action_flags |= MLX5_FLOW_ACTION_AGE;
> + ++actions_n;
> + break;
> case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
> ret = flow_dv_validate_action_modify_ipv4_dscp
> (action_flags,
> @@ -7281,6 +7439,54 @@ flow_dv_translate_action_port_id(struct rte_eth_dev *dev,
> return 0;
> }
>
> +/**
> + * Create a counter with aging configuration.
> + *
> + * @param[in] dev
> + * Pointer to rte_eth_dev structure.
> + * @param[out] count
> + * Pointer to the counter action configuration.
> + * @param[in] age
> + * Pointer to the aging action configuration.
> + *
> + * @return
> + * Index to flow counter on success, 0 otherwise.
> + */
> +static uint32_t
> +flow_dv_translate_create_counter(struct rte_eth_dev *dev,
> + struct mlx5_flow *dev_flow,
> + const struct rte_flow_action_count *count,
> + const struct rte_flow_action_age *age)
> +{
> + uint32_t counter;
> + struct mlx5_age_param *age_param;
> +
> + counter = flow_dv_counter_alloc(dev,
> + count ? count->shared : 0,
> + count ? count->id : 0,
> + dev_flow->dv.group, !!age);
> +
> + if (!counter || age == NULL)
> + return counter;
> + age_param = flow_dv_counter_idx_get_age(dev, counter);
> + /*
> + * The counter age accuracy may have a bit delay. Have 3/4
> + * second bias on the timeount in order to let it age in time.
> + */
> + age_param->context = age->context ? age->context :
> + (void *)(uintptr_t)(dev_flow->flow_idx);
> + /*
> + * The counter age accuracy may have a bit delay. Have 3/4
> + * second bias on the timeount in order to let it age in time.
> + */
> + age_param->timeout = age->timeout * 10 - 7;
> + /* Set expire time in unit of 0.1 sec. */
> + age_param->port_id = dev->data->port_id;
> + age_param->expire = age_param->timeout +
> + rte_rdtsc() / (rte_get_tsc_hz() / 10);
> + rte_atomic16_set(&age_param->state, AGE_CANDIDATE);
> + return counter;
> +}
> /**
> * Add Tx queue matcher
> *
> @@ -7450,6 +7656,8 @@ __flow_dv_translate(struct rte_eth_dev *dev,
> (MLX5_MAX_MODIFY_NUM + 1)];
> } mhdr_dummy;
> struct mlx5_flow_dv_modify_hdr_resource *mhdr_res = &mhdr_dummy.res;
> + const struct rte_flow_action_count *count = NULL;
> + const struct rte_flow_action_age *age = NULL;
> union flow_dv_attr flow_attr = { .attr = 0 };
> uint32_t tag_be;
> union mlx5_flow_tbl_key tbl_key;
> @@ -7478,7 +7686,6 @@ __flow_dv_translate(struct rte_eth_dev *dev,
> const struct rte_flow_action_queue *queue;
> const struct rte_flow_action_rss *rss;
> const struct rte_flow_action *action = actions;
> - const struct rte_flow_action_count *count = action->conf;
> const uint8_t *rss_key;
> const struct rte_flow_action_jump *jump_data;
> const struct rte_flow_action_meter *mtr;
> @@ -7607,36 +7814,21 @@ __flow_dv_translate(struct rte_eth_dev *dev,
> action_flags |= MLX5_FLOW_ACTION_RSS;
> dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
> break;
> + case RTE_FLOW_ACTION_TYPE_AGE:
> case RTE_FLOW_ACTION_TYPE_COUNT:
> if (!dev_conf->devx) {
> - rte_errno = ENOTSUP;
> - goto cnt_err;
> - }
> - flow->counter = flow_dv_counter_alloc(dev,
> - count->shared,
> - count->id,
> - dev_flow->dv.group);
> - if (!flow->counter)
> - goto cnt_err;
> - dev_flow->dv.actions[actions_n++] =
> - (flow_dv_counter_get_by_idx(dev,
> - flow->counter, NULL))->action;
> - action_flags |= MLX5_FLOW_ACTION_COUNT;
> - break;
> -cnt_err:
> - if (rte_errno == ENOTSUP)
> return rte_flow_error_set
> (error, ENOTSUP,
> RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> NULL,
> "count action not supported");
> + }
> + /* Save information first, will apply later. */
> + if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT)
> + count = action->conf;
> else
> - return rte_flow_error_set
> - (error, rte_errno,
> - RTE_FLOW_ERROR_TYPE_ACTION,
> - action,
> - "cannot create counter"
> - " object.");
> + age = action->conf;
> + action_flags |= MLX5_FLOW_ACTION_COUNT;
> break;
> case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
> dev_flow->dv.actions[actions_n++] =
> @@ -7909,6 +8101,22 @@ __flow_dv_translate(struct rte_eth_dev *dev,
> dev_flow->dv.actions[modify_action_position] =
> handle->dvh.modify_hdr->verbs_action;
> }
> + if (action_flags & MLX5_FLOW_ACTION_COUNT) {
> + flow->counter =
> + flow_dv_translate_create_counter(dev,
> + dev_flow, count, age);
> +
> + if (!flow->counter)
> + return rte_flow_error_set
> + (error, rte_errno,
> + RTE_FLOW_ERROR_TYPE_ACTION,
> + NULL,
> + "cannot create counter"
> + " object.");
> + dev_flow->dv.actions[actions_n++] =
> + (flow_dv_counter_get_by_idx(dev,
> + flow->counter, NULL))->action;
> + }
> break;
> default:
> break;
> @@ -9169,6 +9377,58 @@ flow_dv_counter_query(struct rte_eth_dev *dev, uint32_t counter, bool clear,
> return 0;
> }
>
> +/**
> + * Get aged-out flows.
> + *
> + * @param[in] dev
> + * Pointer to the Ethernet device structure.
> + * @param[in] context
> + * The address of an array of pointers to the aged-out flows contexts.
> + * @param[in] nb_contexts
> + * The length of context array pointers.
> + * @param[out] error
> + * Perform verbose error reporting if not NULL. Initialized in case of
> + * error only.
> + *
> + * @return
> + * how many contexts get in success, otherwise negative errno value.
> + * if nb_contexts is 0, return the amount of all aged contexts.
> + * if nb_contexts is not 0 , return the amount of aged flows reported
> + * in the context array.
> + * @note: only stub for now
> + */
> +static int
> +flow_get_aged_flows(struct rte_eth_dev *dev,
> + void **context,
> + uint32_t nb_contexts,
> + struct rte_flow_error *error)
> +{
> + struct mlx5_priv *priv = dev->data->dev_private;
> + struct mlx5_age_param *age_param;
> + struct mlx5_flow_counter *counter;
> + int nb_flows = 0;
> +
> + if (nb_contexts && !context)
> + return rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> + NULL,
> + "Should assign at least one flow or"
> + " context to get if nb_contexts != 0");
> + rte_spinlock_lock(&priv->aged_sl);
> + TAILQ_FOREACH(counter, &priv->aged_counters, next) {
> + nb_flows++;
> + if (nb_contexts) {
> + age_param = MLX5_CNT_TO_AGE(counter);
> + context[nb_flows - 1] = age_param->context;
> + if (!(--nb_contexts))
> + break;
> + }
> + }
> + rte_spinlock_unlock(&priv->aged_sl);
> + rte_atomic16_set(&priv->trigger_event, 1);
> + return nb_flows;
> +}
> +
> /*
> * Mutex-protected thunk to lock-free __flow_dv_translate().
> */
> @@ -9235,7 +9495,7 @@ flow_dv_counter_allocate(struct rte_eth_dev *dev)
> uint32_t cnt;
>
> flow_dv_shared_lock(dev);
> - cnt = flow_dv_counter_alloc(dev, 0, 0, 1);
> + cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0);
> flow_dv_shared_unlock(dev);
> return cnt;
> }
> @@ -9266,6 +9526,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
> .counter_alloc = flow_dv_counter_allocate,
> .counter_free = flow_dv_counter_free,
> .counter_query = flow_dv_counter_query,
> + .get_aged_flows = flow_get_aged_flows,
> };
>
> #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
> diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
> index 236d665852..7efd97f547 100644
> --- a/drivers/net/mlx5/mlx5_flow_verbs.c
> +++ b/drivers/net/mlx5/mlx5_flow_verbs.c
> @@ -56,7 +56,8 @@ flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
> struct mlx5_flow_counter_pool **ppool)
> {
> struct mlx5_priv *priv = dev->data->dev_private;
> - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
> + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
> + 0);
> struct mlx5_flow_counter_pool *pool;
>
> idx--;
> @@ -151,7 +152,8 @@ static uint32_t
> flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
> {
> struct mlx5_priv *priv = dev->data->dev_private;
> - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
> + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
> + 0);
> struct mlx5_flow_counter_pool *pool = NULL;
> struct mlx5_flow_counter_ext *cnt_ext = NULL;
> struct mlx5_flow_counter *cnt = NULL;
> @@ -251,7 +253,7 @@ flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
>
> cnt = flow_verbs_counter_get_by_idx(dev, counter,
> &pool);
> - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
> if (--cnt_ext->ref_cnt == 0) {
> #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
> claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
> @@ -282,7 +284,7 @@ flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
> struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
> (dev, flow->counter, &pool);
> struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
> - (cnt);
> + (pool, cnt);
> struct rte_flow_query_count *qc = data;
> uint64_t counters[2] = {0, 0};
> #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
> @@ -1083,12 +1085,12 @@ flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
> }
> #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
> cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
> - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
> counter.counter_set_handle = cnt_ext->cs->handle;
> flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
> #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
> cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
> - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
> counter.counters = cnt_ext->cs;
> flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
> #endif
@@ -141,6 +141,7 @@ New Features
* Added support for creating Relaxed Ordering Memory Regions.
* Added support for jumbo frame size (9K MTU) in Multi-Packet RQ mode.
* Optimized the memory consumption of flow.
+ * Added support for flow aging based on hardware counter.
* **Updated the AESNI MB crypto PMD.**
@@ -437,6 +437,20 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
return 0;
}
+/**
+ * Initialize the private aging list information.
+ *
+ * @param[in] priv
+ * Pointer to the private device data structure.
+ */
+static void
+mlx5_flow_aging_list_init(struct mlx5_priv *priv)
+{
+ TAILQ_INIT(&priv->aged_counters);
+ rte_spinlock_init(&priv->aged_sl);
+ rte_atomic16_set(&priv->trigger_event, 1);
+}
+
/**
* Initialize the counters management structure.
*
@@ -446,11 +460,14 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
static void
mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
{
- uint8_t i;
+ uint8_t i, age;
+ sh->cmng.age = 0;
TAILQ_INIT(&sh->cmng.flow_counters);
- for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
- TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
+ for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
+ for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
+ TAILQ_INIT(&sh->cmng.ccont[i][age].pool_list);
+ }
}
/**
@@ -480,7 +497,7 @@ static void
mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
{
struct mlx5_counter_stats_mem_mng *mng;
- uint8_t i;
+ uint8_t i, age = 0;
int j;
int retries = 1024;
@@ -491,36 +508,42 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
break;
rte_pause();
}
- for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
- struct mlx5_flow_counter_pool *pool;
- uint32_t batch = !!(i % 2);
- if (!sh->cmng.ccont[i].pools)
- continue;
- pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
- while (pool) {
- if (batch) {
- if (pool->min_dcs)
- claim_zero
- (mlx5_devx_cmd_destroy(pool->min_dcs));
- }
- for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
- if (MLX5_POOL_GET_CNT(pool, j)->action)
- claim_zero
- (mlx5_glue->destroy_flow_action
- (MLX5_POOL_GET_CNT(pool, j)->action));
- if (!batch && MLX5_GET_POOL_CNT_EXT
- (pool, j)->dcs)
- claim_zero(mlx5_devx_cmd_destroy
- (MLX5_GET_POOL_CNT_EXT
- (pool, j)->dcs));
+ for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
+ for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
+ struct mlx5_flow_counter_pool *pool;
+ uint32_t batch = !!(i % 2);
+
+ if (!sh->cmng.ccont[i][age].pools)
+ continue;
+ pool = TAILQ_FIRST(&sh->cmng.ccont[i][age].pool_list);
+ while (pool) {
+ if (batch) {
+ if (pool->min_dcs)
+ claim_zero
+ (mlx5_devx_cmd_destroy
+ (pool->min_dcs));
+ }
+ for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
+ if (MLX5_POOL_GET_CNT(pool, j)->action)
+ claim_zero
+ (mlx5_glue->destroy_flow_action
+ (MLX5_POOL_GET_CNT
+ (pool, j)->action));
+ if (!batch && MLX5_GET_POOL_CNT_EXT
+ (pool, j)->dcs)
+ claim_zero(mlx5_devx_cmd_destroy
+ (MLX5_GET_POOL_CNT_EXT
+ (pool, j)->dcs));
+ }
+ TAILQ_REMOVE(&sh->cmng.ccont[i][age].pool_list,
+ pool, next);
+ rte_free(pool);
+ pool = TAILQ_FIRST
+ (&sh->cmng.ccont[i][age].pool_list);
}
- TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool,
- next);
- rte_free(pool);
- pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
+ rte_free(sh->cmng.ccont[i][age].pools);
}
- rte_free(sh->cmng.ccont[i].pools);
}
mng = LIST_FIRST(&sh->cmng.mem_mngs);
while (mng) {
@@ -3003,6 +3026,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
goto error;
}
}
+ mlx5_flow_aging_list_init(priv);
return eth_dev;
error:
if (priv) {
@@ -222,13 +222,21 @@ struct mlx5_drop {
#define MLX5_COUNTERS_PER_POOL 512
#define MLX5_MAX_PENDING_QUERIES 4
#define MLX5_CNT_CONTAINER_RESIZE 64
+#define MLX5_CNT_AGE_OFFSET 0x80000000
#define CNT_SIZE (sizeof(struct mlx5_flow_counter))
#define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext))
+#define AGE_SIZE (sizeof(struct mlx5_age_param))
#define CNT_POOL_TYPE_EXT (1 << 0)
+#define CNT_POOL_TYPE_AGE (1 << 1)
#define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT)
+#define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE)
+#define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1 : 0)
+
#define MLX5_CNT_LEN(pool) \
- (CNT_SIZE + (IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
+ (CNT_SIZE + \
+ (IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \
+ (IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
#define MLX5_POOL_GET_CNT(pool, index) \
((struct mlx5_flow_counter *) \
((char *)((pool) + 1) + (index) * (MLX5_CNT_LEN(pool))))
@@ -242,13 +250,33 @@ struct mlx5_drop {
*/
#define MLX5_MAKE_CNT_IDX(pi, offset) \
((pi) * MLX5_COUNTERS_PER_POOL + (offset) + 1)
-#define MLX5_CNT_TO_CNT_EXT(cnt) \
- ((struct mlx5_flow_counter_ext *)((cnt) + 1))
+#define MLX5_CNT_TO_CNT_EXT(pool, cnt) \
+ ((struct mlx5_flow_counter_ext *)\
+ ((char *)((cnt) + 1) + \
+ (IS_AGE_POOL(pool) ? AGE_SIZE : 0)))
#define MLX5_GET_POOL_CNT_EXT(pool, offset) \
- MLX5_CNT_TO_CNT_EXT(MLX5_POOL_GET_CNT((pool), (offset)))
+ MLX5_CNT_TO_CNT_EXT(pool, MLX5_POOL_GET_CNT((pool), (offset)))
+#define MLX5_CNT_TO_AGE(cnt) \
+ ((struct mlx5_age_param *)((cnt) + 1))
struct mlx5_flow_counter_pool;
+/*age status*/
+enum {
+ AGE_FREE,
+ AGE_CANDIDATE, /* Counter assigned to flows. */
+ AGE_TMOUT, /* Timeout, wait for aged flows query and destroy. */
+};
+
+/* Counter age parameter. */
+struct mlx5_age_param {
+ rte_atomic16_t state; /**< Age state. */
+ uint16_t port_id; /**< Port id of the counter. */
+ uint32_t timeout:15; /**< Age timeout in unit of 0.1sec. */
+ uint32_t expire:16; /**< Expire time(0.1sec) in the future. */
+ void *context; /**< Flow counter age context. */
+};
+
struct flow_counter_stats {
uint64_t hits;
uint64_t bytes;
@@ -336,13 +364,14 @@ struct mlx5_pools_container {
/* Counter global management structure. */
struct mlx5_flow_counter_mng {
- uint8_t mhi[2]; /* master \ host container index. */
- struct mlx5_pools_container ccont[2 * 2];
- /* 2 containers for single and for batch for double-buffer. */
+ uint8_t mhi[2][2]; /* master \ host and age \ no age container index. */
+ struct mlx5_pools_container ccont[2 * 2][2];
+ /* master \ host and age \ no age pools container. */
struct mlx5_counters flow_counters; /* Legacy flow counter list. */
uint8_t pending_queries;
uint8_t batch;
uint16_t pool_index;
+ uint8_t age;
uint8_t query_thread_on;
LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;
@@ -566,6 +595,10 @@ struct mlx5_priv {
uint8_t fdb_def_rule; /* Whether fdb jump to table 1 is configured. */
struct mlx5_mp_id mp_id; /* ID of a multi-process process */
LIST_HEAD(fdir, mlx5_fdir_flow) fdir_flows; /* fdir flows. */
+ struct mlx5_counters aged_counters; /* Aged flow counter list. */
+ rte_spinlock_t aged_sl; /* Aged flow counter list lock. */
+ rte_atomic16_t trigger_event;
+ /* Event be triggered once after last call of rte_flow_get_aged_flows*/
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -764,6 +797,8 @@ int mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
int mlx5_flow_dev_dump(struct rte_eth_dev *dev, FILE *file,
struct rte_flow_error *error);
void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
+int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+ uint32_t nb_contexts, struct rte_flow_error *error);
/* mlx5_mp.c */
int mlx5_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer);
@@ -24,6 +24,7 @@
#include <rte_ether.h>
#include <rte_ethdev_driver.h>
#include <rte_flow.h>
+#include <rte_cycles.h>
#include <rte_flow_driver.h>
#include <rte_malloc.h>
#include <rte_ip.h>
@@ -242,6 +243,7 @@ static const struct rte_flow_ops mlx5_flow_ops = {
.isolate = mlx5_flow_isolate,
.query = mlx5_flow_query,
.dev_dump = mlx5_flow_dev_dump,
+ .get_aged_flows = mlx5_flow_get_aged_flows,
};
/* Convert FDIR request to Generic flow. */
@@ -2531,6 +2533,8 @@ flow_drv_validate(struct rte_eth_dev *dev,
* Pointer to the list of items.
* @param[in] actions
* Pointer to the list of actions.
+ * @param[in] flow_idx
+ * This memory pool index to the flow.
* @param[out] error
* Pointer to the error structure.
*
@@ -2543,14 +2547,19 @@ flow_drv_prepare(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
+ uint32_t flow_idx,
struct rte_flow_error *error)
{
const struct mlx5_flow_driver_ops *fops;
enum mlx5_flow_drv_type type = flow->drv_type;
+ struct mlx5_flow *mlx5_flow = NULL;
MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
fops = flow_get_drv_ops(type);
- return fops->prepare(dev, attr, items, actions, error);
+ mlx5_flow = fops->prepare(dev, attr, items, actions, error);
+ if (mlx5_flow)
+ mlx5_flow->flow_idx = flow_idx;
+ return mlx5_flow;
}
/**
@@ -3498,6 +3507,8 @@ flow_hairpin_split(struct rte_eth_dev *dev,
* Associated actions (list terminated by the END action).
* @param[in] external
* This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ * This memory pool index to the flow.
* @param[out] error
* Perform verbose error reporting if not NULL.
* @return
@@ -3511,11 +3522,13 @@ flow_create_split_inner(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
- bool external, struct rte_flow_error *error)
+ bool external, uint32_t flow_idx,
+ struct rte_flow_error *error)
{
struct mlx5_flow *dev_flow;
- dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, error);
+ dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
+ flow_idx, error);
if (!dev_flow)
return -rte_errno;
dev_flow->flow = flow;
@@ -3876,6 +3889,8 @@ flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
* Associated actions (list terminated by the END action).
* @param[in] external
* This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ * This memory pool index to the flow.
* @param[out] error
* Perform verbose error reporting if not NULL.
* @return
@@ -3888,7 +3903,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
- bool external, struct rte_flow_error *error)
+ bool external, uint32_t flow_idx,
+ struct rte_flow_error *error)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_dev_config *config = &priv->config;
@@ -3908,7 +3924,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
!mlx5_flow_ext_mreg_supported(dev))
return flow_create_split_inner(dev, flow, NULL, prefix_layers,
attr, items, actions, external,
- error);
+ flow_idx, error);
actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
&encap_idx);
if (qrss) {
@@ -3992,7 +4008,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
/* Add the unmodified original or prefix subflow. */
ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr,
items, ext_actions ? ext_actions :
- actions, external, error);
+ actions, external, flow_idx, error);
if (ret < 0)
goto exit;
MLX5_ASSERT(dev_flow);
@@ -4055,7 +4071,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
ret = flow_create_split_inner(dev, flow, &dev_flow, layers,
&q_attr, mtr_sfx ? items :
q_items, q_actions,
- external, error);
+ external, flow_idx, error);
if (ret < 0)
goto exit;
/* qrss ID should be freed if failed. */
@@ -4096,6 +4112,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
* Associated actions (list terminated by the END action).
* @param[in] external
* This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ * This memory pool index to the flow.
* @param[out] error
* Perform verbose error reporting if not NULL.
* @return
@@ -4107,7 +4125,8 @@ flow_create_split_meter(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
- bool external, struct rte_flow_error *error)
+ bool external, uint32_t flow_idx,
+ struct rte_flow_error *error)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct rte_flow_action *sfx_actions = NULL;
@@ -4151,7 +4170,7 @@ flow_create_split_meter(struct rte_eth_dev *dev,
/* Add the prefix subflow. */
ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr,
items, pre_actions, external,
- error);
+ flow_idx, error);
if (ret) {
ret = -rte_errno;
goto exit;
@@ -4168,7 +4187,7 @@ flow_create_split_meter(struct rte_eth_dev *dev,
0, &sfx_attr,
sfx_items ? sfx_items : items,
sfx_actions ? sfx_actions : actions,
- external, error);
+ external, flow_idx, error);
exit:
if (sfx_actions)
rte_free(sfx_actions);
@@ -4205,6 +4224,8 @@ flow_create_split_meter(struct rte_eth_dev *dev,
* Associated actions (list terminated by the END action).
* @param[in] external
* This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ * This memory pool index to the flow.
* @param[out] error
* Perform verbose error reporting if not NULL.
* @return
@@ -4216,12 +4237,13 @@ flow_create_split_outer(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
- bool external, struct rte_flow_error *error)
+ bool external, uint32_t flow_idx,
+ struct rte_flow_error *error)
{
int ret;
ret = flow_create_split_meter(dev, flow, attr, items,
- actions, external, error);
+ actions, external, flow_idx, error);
MLX5_ASSERT(ret <= 0);
return ret;
}
@@ -4356,7 +4378,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
*/
ret = flow_create_split_outer(dev, flow, attr,
buf->entry[i].pattern,
- p_actions_rx, external,
+ p_actions_rx, external, idx,
error);
if (ret < 0)
goto error;
@@ -4367,7 +4389,8 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
attr_tx.ingress = 0;
attr_tx.egress = 1;
dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
- actions_hairpin_tx.actions, error);
+ actions_hairpin_tx.actions,
+ idx, error);
if (!dev_flow)
goto error;
dev_flow->flow = flow;
@@ -5741,6 +5764,31 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
#define MLX5_POOL_QUERY_FREQ_US 1000000
+/**
+ * Get number of all validate pools.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_ibv_shared object.
+ *
+ * @return
+ * The number of all validate pools.
+ */
+static uint32_t
+mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)
+{
+ uint8_t age, i;
+ uint32_t pools_n = 0;
+ struct mlx5_pools_container *cont;
+
+ for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
+ for (i = 0; i < 2 ; ++i) {
+ cont = MLX5_CNT_CONTAINER(sh, i, 0, age);
+ pools_n += rte_atomic16_read(&cont->n_valid);
+ }
+ }
+ return pools_n;
+}
+
/**
* Set the periodic procedure for triggering asynchronous batch queries for all
* the counter pools.
@@ -5751,12 +5799,9 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
void
mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
{
- struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
- uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
- uint32_t us;
+ uint32_t pools_n, us;
- cont = MLX5_CNT_CONTAINER(sh, 1, 0);
- pools_n += rte_atomic16_read(&cont->n_valid);
+ pools_n = mlx5_get_all_valid_pool_count(sh);
us = MLX5_POOL_QUERY_FREQ_US / pools_n;
DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
@@ -5782,6 +5827,7 @@ mlx5_flow_query_alarm(void *arg)
uint16_t offset;
int ret;
uint8_t batch = sh->cmng.batch;
+ uint8_t age = sh->cmng.age;
uint16_t pool_index = sh->cmng.pool_index;
struct mlx5_pools_container *cont;
struct mlx5_pools_container *mcont;
@@ -5790,8 +5836,8 @@ mlx5_flow_query_alarm(void *arg)
if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
goto set_alarm;
next_container:
- cont = MLX5_CNT_CONTAINER(sh, batch, 1);
- mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
+ cont = MLX5_CNT_CONTAINER(sh, batch, 1, age);
+ mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age);
/* Check if resize was done and need to flip a container. */
if (cont != mcont) {
if (cont->pools) {
@@ -5801,15 +5847,22 @@ mlx5_flow_query_alarm(void *arg)
}
rte_cio_wmb();
/* Flip the host container. */
- sh->cmng.mhi[batch] ^= (uint8_t)2;
+ sh->cmng.mhi[batch][age] ^= (uint8_t)2;
cont = mcont;
}
if (!cont->pools) {
/* 2 empty containers case is unexpected. */
- if (unlikely(batch != sh->cmng.batch))
+ if (unlikely(batch != sh->cmng.batch) &&
+ unlikely(age != sh->cmng.age)) {
goto set_alarm;
+ }
batch ^= 0x1;
pool_index = 0;
+ if (batch == 0 && pool_index == 0) {
+ age ^= 0x1;
+ sh->cmng.batch = batch;
+ sh->cmng.age = age;
+ }
goto next_container;
}
pool = cont->pools[pool_index];
@@ -5852,13 +5905,76 @@ mlx5_flow_query_alarm(void *arg)
if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
batch ^= 0x1;
pool_index = 0;
+ if (batch == 0 && pool_index == 0)
+ age ^= 0x1;
}
set_alarm:
sh->cmng.batch = batch;
sh->cmng.pool_index = pool_index;
+ sh->cmng.age = age;
mlx5_set_query_alarm(sh);
}
+/**
+ * Check and callback event for new aged flow in the counter pool
+ *
+ * @param[in] pool
+ * The pointer to Current counter pool.
+ */
+static void
+mlx5_flow_aging_check(struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_priv *priv;
+ struct mlx5_flow_counter *cnt;
+ struct mlx5_age_param *age_param;
+ struct mlx5_counter_stats_raw *cur = pool->raw_hw;
+ struct mlx5_counter_stats_raw *prev = pool->raw;
+ uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
+ uint64_t port_mask = 0;
+ uint32_t i;
+
+ for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+ cnt = MLX5_POOL_GET_CNT(pool, i);
+ age_param = MLX5_CNT_TO_AGE(cnt);
+ if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
+ continue;
+ if (cur->data[i].hits != prev->data[i].hits) {
+ age_param->expire = curr + age_param->timeout;
+ continue;
+ }
+ if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
+ continue;
+ /**
+ * Hold the lock first, or if between the
+ * state AGE_TMOUT and tailq operation the
+ * release happened, the release procedure
+ * may delete a non-existent tailq node.
+ */
+ priv = rte_eth_devices[age_param->port_id].data->dev_private;
+ rte_spinlock_lock(&priv->aged_sl);
+ /* If the cpmset fails, release happens. */
+ if (rte_atomic16_cmpset((volatile uint16_t *)
+ &age_param->state,
+ AGE_CANDIDATE,
+ AGE_TMOUT) ==
+ AGE_CANDIDATE) {
+ TAILQ_INSERT_TAIL(&priv->aged_counters, cnt, next);
+ port_mask |= (1ull << age_param->port_id);
+ }
+ rte_spinlock_unlock(&priv->aged_sl);
+ }
+ for (i = 0; i < 64; i++) {
+ if (port_mask & (1ull << i)) {
+ priv = rte_eth_devices[i].data->dev_private;
+ if (!rte_atomic16_read(&priv->trigger_event))
+ continue;
+ _rte_eth_dev_callback_process(&rte_eth_devices[i],
+ RTE_ETH_EVENT_FLOW_AGED, NULL);
+ rte_atomic16_set(&priv->trigger_event, 0);
+ }
+ }
+}
+
/**
* Handler for the HW respond about ready values from an asynchronous batch
* query. This function is probably called by the host thread.
@@ -5883,6 +5999,8 @@ mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
raw_to_free = pool->raw_hw;
} else {
raw_to_free = pool->raw;
+ if (IS_AGE_POOL(pool))
+ mlx5_flow_aging_check(pool);
rte_spinlock_lock(&pool->sl);
pool->raw = pool->raw_hw;
rte_spinlock_unlock(&pool->sl);
@@ -6034,3 +6152,40 @@ mlx5_flow_dev_dump(struct rte_eth_dev *dev,
return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
sh->tx_domain, file);
}
+
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] context
+ * The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_countexts
+ * The length of context array pointers.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. Initialized in case of
+ * error only.
+ *
+ * @return
+ * how many contexts get in success, otherwise negative errno value.
+ * if nb_contexts is 0, return the amount of all aged contexts.
+ * if nb_contexts is not 0 , return the amount of aged flows reported
+ * in the context array.
+ */
+int
+mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+ uint32_t nb_contexts, struct rte_flow_error *error)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->get_aged_flows(dev, contexts, nb_contexts,
+ error);
+ }
+ DRV_LOG(ERR,
+ "port %u get aged flows is not supported.",
+ dev->data->port_id);
+ return -ENOTSUP;
+}
@@ -199,6 +199,7 @@ enum mlx5_feature_name {
#define MLX5_FLOW_ACTION_METER (1ull << 31)
#define MLX5_FLOW_ACTION_SET_IPV4_DSCP (1ull << 32)
#define MLX5_FLOW_ACTION_SET_IPV6_DSCP (1ull << 33)
+#define MLX5_FLOW_ACTION_AGE (1ull << 34)
#define MLX5_FLOW_FATE_ACTIONS \
(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
@@ -650,6 +651,7 @@ struct mlx5_flow_verbs_workspace {
/** Device flow structure. */
struct mlx5_flow {
struct rte_flow *flow; /**< Pointer to the main flow. */
+ uint32_t flow_idx; /**< The memory pool index to the main flow. */
uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
uint64_t act_flags;
/**< Bit-fields of detected actions, see MLX5_FLOW_ACTION_*. */
@@ -873,6 +875,11 @@ typedef int (*mlx5_flow_counter_query_t)(struct rte_eth_dev *dev,
uint32_t cnt,
bool clear, uint64_t *pkts,
uint64_t *bytes);
+typedef int (*mlx5_flow_get_aged_flows_t)
+ (struct rte_eth_dev *dev,
+ void **context,
+ uint32_t nb_contexts,
+ struct rte_flow_error *error);
struct mlx5_flow_driver_ops {
mlx5_flow_validate_t validate;
mlx5_flow_prepare_t prepare;
@@ -888,13 +895,14 @@ struct mlx5_flow_driver_ops {
mlx5_flow_counter_alloc_t counter_alloc;
mlx5_flow_counter_free_t counter_free;
mlx5_flow_counter_query_t counter_query;
+ mlx5_flow_get_aged_flows_t get_aged_flows;
};
-#define MLX5_CNT_CONTAINER(sh, batch, thread) (&(sh)->cmng.ccont \
- [(((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
-#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread) (&(sh)->cmng.ccont \
- [(~((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
+#define MLX5_CNT_CONTAINER(sh, batch, thread, age) (&(sh)->cmng.ccont \
+ [(((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
+#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread, age) (&(sh)->cmng.ccont \
+ [(~((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
/* mlx5_flow.c */
@@ -24,6 +24,7 @@
#include <rte_flow.h>
#include <rte_flow_driver.h>
#include <rte_malloc.h>
+#include <rte_cycles.h>
#include <rte_ip.h>
#include <rte_gre.h>
#include <rte_vxlan.h>
@@ -3719,6 +3720,50 @@ mlx5_flow_validate_action_meter(struct rte_eth_dev *dev,
return 0;
}
+/**
+ * Validate the age action.
+ *
+ * @param[in] action_flags
+ * Holds the actions detected until now.
+ * @param[in] action
+ * Pointer to the age action.
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_age(uint64_t action_flags,
+ const struct rte_flow_action *action,
+ struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_action_age *age = action->conf;
+
+ if (!priv->config.devx)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "age action not supported");
+ if (!(action->conf))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, action,
+ "configuration cannot be null");
+ if (age->timeout >= UINT16_MAX / 2 / 10)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION, action,
+ "Max age time: 3275 seconds");
+ if (action_flags & MLX5_FLOW_ACTION_AGE)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "Duplicate age ctions set");
+ return 0;
+}
+
/**
* Validate the modify-header IPv4 DSCP actions.
*
@@ -3896,14 +3941,16 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_pools_container *cont;
struct mlx5_flow_counter_pool *pool;
- uint32_t batch = 0;
+ uint32_t batch = 0, age = 0;
idx--;
+ age = MLX_CNT_IS_AGE(idx);
+ idx = age ? idx - MLX5_CNT_AGE_OFFSET : idx;
if (idx >= MLX5_CNT_BATCH_OFFSET) {
idx -= MLX5_CNT_BATCH_OFFSET;
batch = 1;
}
- cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+ cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
MLX5_ASSERT(pool);
@@ -4023,18 +4070,21 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
* Pointer to the Ethernet device structure.
* @param[in] batch
* Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ * Whether the pool is for Aging counter.
*
* @return
* The new container pointer on success, otherwise NULL and rte_errno is set.
*/
static struct mlx5_pools_container *
-flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
+flow_dv_container_resize(struct rte_eth_dev *dev,
+ uint32_t batch, uint32_t age)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_pools_container *cont =
- MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+ MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
struct mlx5_pools_container *new_cont =
- MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0);
+ MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0, age);
struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
@@ -4042,7 +4092,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
/* Fallback mode has no background thread. Skip the check. */
if (!priv->counter_fallback &&
- cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) {
+ cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1, age)) {
/* The last resize still hasn't detected by the host thread. */
rte_errno = EAGAIN;
return NULL;
@@ -4085,7 +4135,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
new_cont->init_mem_mng = mem_mng;
rte_cio_wmb();
/* Flip the master container. */
- priv->sh->cmng.mhi[batch] ^= (uint8_t)1;
+ priv->sh->cmng.mhi[batch][age] ^= (uint8_t)1;
return new_cont;
}
@@ -4117,7 +4167,7 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
MLX5_ASSERT(pool);
if (counter < MLX5_CNT_BATCH_OFFSET) {
- cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+ cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
if (priv->counter_fallback)
return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0,
0, pkts, bytes, 0, NULL, NULL, 0);
@@ -4150,6 +4200,8 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
* The devX counter handle.
* @param[in] batch
* Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ * Whether the pool is for counter that was allocated for aging.
* @param[in/out] cont_cur
* Pointer to the container pointer, it will be update in pool resize.
*
@@ -4158,24 +4210,23 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
*/
static struct mlx5_pools_container *
flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
- uint32_t batch)
+ uint32_t batch, uint32_t age)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_flow_counter_pool *pool;
struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
- 0);
+ 0, age);
int16_t n_valid = rte_atomic16_read(&cont->n_valid);
- uint32_t size;
+ uint32_t size = sizeof(*pool);
if (cont->n == n_valid) {
- cont = flow_dv_container_resize(dev, batch);
+ cont = flow_dv_container_resize(dev, batch, age);
if (!cont)
return NULL;
}
- size = sizeof(*pool);
size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
- if (!batch)
- size += MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE;
+ size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
+ size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE);
pool = rte_calloc(__func__, 1, size, 0);
if (!pool) {
rte_errno = ENOMEM;
@@ -4187,8 +4238,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
MLX5_CNT_CONTAINER_RESIZE;
pool->raw_hw = NULL;
pool->type = 0;
- if (!batch)
- pool->type |= CNT_POOL_TYPE_EXT;
+ pool->type |= (batch ? 0 : CNT_POOL_TYPE_EXT);
+ pool->type |= (!age ? 0 : CNT_POOL_TYPE_AGE);
rte_spinlock_init(&pool->sl);
/*
* The generation of the new allocated counters in this pool is 0, 2 in
@@ -4215,6 +4266,39 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
return cont;
}
+/**
+ * Update the minimum dcs-id for aged or no-aged counter pool.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] pool
+ * Current counter pool.
+ * @param[in] batch
+ * Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ * Whether the counter is for aging.
+ */
+static void
+flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev,
+ struct mlx5_flow_counter_pool *pool,
+ uint32_t batch, uint32_t age)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flow_counter_pool *other;
+ struct mlx5_pools_container *cont;
+
+ cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, (age ^ 0x1));
+ other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id);
+ if (!other)
+ return;
+ if (pool->min_dcs->id < other->min_dcs->id) {
+ rte_atomic64_set(&other->a64_dcs,
+ rte_atomic64_read(&pool->a64_dcs));
+ } else {
+ rte_atomic64_set(&pool->a64_dcs,
+ rte_atomic64_read(&other->a64_dcs));
+ }
+}
/**
* Prepare a new counter and/or a new counter pool.
*
@@ -4224,6 +4308,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
* Where to put the pointer of a new counter.
* @param[in] batch
* Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ * Whether the pool is for counter that was allocated for aging.
*
* @return
* The counter container pointer and @p cnt_free is set on success,
@@ -4232,7 +4318,7 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
static struct mlx5_pools_container *
flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
struct mlx5_flow_counter **cnt_free,
- uint32_t batch)
+ uint32_t batch, uint32_t age)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_pools_container *cont;
@@ -4241,7 +4327,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
struct mlx5_flow_counter *cnt;
uint32_t i;
- cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+ cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
if (!batch) {
/* bulk_bitmap must be 0 for single counter allocation. */
dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
@@ -4249,7 +4335,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
return NULL;
pool = flow_dv_find_pool_by_id(cont, dcs->id);
if (!pool) {
- cont = flow_dv_pool_create(dev, dcs, batch);
+ cont = flow_dv_pool_create(dev, dcs, batch, age);
if (!cont) {
mlx5_devx_cmd_destroy(dcs);
return NULL;
@@ -4259,6 +4345,8 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
rte_atomic64_set(&pool->a64_dcs,
(int64_t)(uintptr_t)dcs);
}
+ flow_dv_counter_update_min_dcs(dev,
+ pool, batch, age);
i = dcs->id % MLX5_COUNTERS_PER_POOL;
cnt = MLX5_POOL_GET_CNT(pool, i);
TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
@@ -4273,7 +4361,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
rte_errno = ENODATA;
return NULL;
}
- cont = flow_dv_pool_create(dev, dcs, batch);
+ cont = flow_dv_pool_create(dev, dcs, batch, age);
if (!cont) {
mlx5_devx_cmd_destroy(dcs);
return NULL;
@@ -4334,13 +4422,15 @@ flow_dv_counter_shared_search(struct mlx5_pools_container *cont, uint32_t id,
* Counter identifier.
* @param[in] group
* Counter flow group.
+ * @param[in] age
+ * Whether the counter was allocated for aging.
*
* @return
* Index to flow counter on success, 0 otherwise and rte_errno is set.
*/
static uint32_t
flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
- uint16_t group)
+ uint16_t group, uint32_t age)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_flow_counter_pool *pool = NULL;
@@ -4356,7 +4446,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
*/
uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0;
struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
- 0);
+ 0, age);
uint32_t cnt_idx;
if (!priv->config.devx) {
@@ -4395,13 +4485,13 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
cnt_free = NULL;
}
if (!cnt_free) {
- cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch);
+ cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
if (!cont)
return 0;
pool = TAILQ_FIRST(&cont->pool_list);
}
if (!batch)
- cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt_free);
+ cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
/* Create a DV counter action only in the first time usage. */
if (!cnt_free->action) {
uint16_t offset;
@@ -4424,6 +4514,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
cnt_idx = MLX5_MAKE_CNT_IDX(pool->index,
MLX5_CNT_ARRAY_IDX(pool, cnt_free));
cnt_idx += batch * MLX5_CNT_BATCH_OFFSET;
+ cnt_idx += age * MLX5_CNT_AGE_OFFSET;
/* Update the counter reset values. */
if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits,
&cnt_free->bytes))
@@ -4445,6 +4536,62 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
return cnt_idx;
}
+/**
+ * Get age param from counter index.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] counter
+ * Index to the counter handler.
+ *
+ * @return
+ * The aging parameter specified for the counter index.
+ */
+static struct mlx5_age_param*
+flow_dv_counter_idx_get_age(struct rte_eth_dev *dev,
+ uint32_t counter)
+{
+ struct mlx5_flow_counter *cnt;
+ struct mlx5_flow_counter_pool *pool = NULL;
+
+ flow_dv_counter_get_by_idx(dev, counter, &pool);
+ counter = (counter - 1) % MLX5_COUNTERS_PER_POOL;
+ cnt = MLX5_POOL_GET_CNT(pool, counter);
+ return MLX5_CNT_TO_AGE(cnt);
+}
+
+/**
+ * Remove a flow counter from aged counter list.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] counter
+ * Index to the counter handler.
+ * @param[in] cnt
+ * Pointer to the counter handler.
+ */
+static void
+flow_dv_counter_remove_from_age(struct rte_eth_dev *dev,
+ uint32_t counter, struct mlx5_flow_counter *cnt)
+{
+ struct mlx5_age_param *age_param;
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ age_param = flow_dv_counter_idx_get_age(dev, counter);
+ if (rte_atomic16_cmpset((volatile uint16_t *)
+ &age_param->state,
+ AGE_CANDIDATE, AGE_FREE)
+ != AGE_CANDIDATE) {
+ /**
+ * We need the lock even it is age timeout,
+ * since counter may still in process.
+ */
+ rte_spinlock_lock(&priv->aged_sl);
+ TAILQ_REMOVE(&priv->aged_counters, cnt, next);
+ rte_spinlock_unlock(&priv->aged_sl);
+ }
+ rte_atomic16_set(&age_param->state, AGE_FREE);
+}
/**
* Release a flow counter.
*
@@ -4465,10 +4612,12 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter)
cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
MLX5_ASSERT(pool);
if (counter < MLX5_CNT_BATCH_OFFSET) {
- cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+ cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
if (cnt_ext && --cnt_ext->ref_cnt)
return;
}
+ if (IS_AGE_POOL(pool))
+ flow_dv_counter_remove_from_age(dev, counter, cnt);
/* Put the counter in the end - the last updated one. */
TAILQ_INSERT_TAIL(&pool->counters, cnt, next);
/*
@@ -5243,6 +5392,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
/* Meter action will add one more TAG action. */
rw_act_num += MLX5_ACT_NUM_SET_TAG;
break;
+ case RTE_FLOW_ACTION_TYPE_AGE:
+ ret = flow_dv_validate_action_age(action_flags,
+ actions, dev,
+ error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_AGE;
+ ++actions_n;
+ break;
case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
ret = flow_dv_validate_action_modify_ipv4_dscp
(action_flags,
@@ -7281,6 +7439,54 @@ flow_dv_translate_action_port_id(struct rte_eth_dev *dev,
return 0;
}
+/**
+ * Create a counter with aging configuration.
+ *
+ * @param[in] dev
+ * Pointer to rte_eth_dev structure.
+ * @param[out] count
+ * Pointer to the counter action configuration.
+ * @param[in] age
+ * Pointer to the aging action configuration.
+ *
+ * @return
+ * Index to flow counter on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_counter(struct rte_eth_dev *dev,
+ struct mlx5_flow *dev_flow,
+ const struct rte_flow_action_count *count,
+ const struct rte_flow_action_age *age)
+{
+ uint32_t counter;
+ struct mlx5_age_param *age_param;
+
+ counter = flow_dv_counter_alloc(dev,
+ count ? count->shared : 0,
+ count ? count->id : 0,
+ dev_flow->dv.group, !!age);
+
+ if (!counter || age == NULL)
+ return counter;
+ age_param = flow_dv_counter_idx_get_age(dev, counter);
+ /*
+ * The counter age accuracy may have a bit delay. Have 3/4
+ * second bias on the timeount in order to let it age in time.
+ */
+ age_param->context = age->context ? age->context :
+ (void *)(uintptr_t)(dev_flow->flow_idx);
+ /*
+ * The counter age accuracy may have a bit delay. Have 3/4
+ * second bias on the timeount in order to let it age in time.
+ */
+ age_param->timeout = age->timeout * 10 - 7;
+ /* Set expire time in unit of 0.1 sec. */
+ age_param->port_id = dev->data->port_id;
+ age_param->expire = age_param->timeout +
+ rte_rdtsc() / (rte_get_tsc_hz() / 10);
+ rte_atomic16_set(&age_param->state, AGE_CANDIDATE);
+ return counter;
+}
/**
* Add Tx queue matcher
*
@@ -7450,6 +7656,8 @@ __flow_dv_translate(struct rte_eth_dev *dev,
(MLX5_MAX_MODIFY_NUM + 1)];
} mhdr_dummy;
struct mlx5_flow_dv_modify_hdr_resource *mhdr_res = &mhdr_dummy.res;
+ const struct rte_flow_action_count *count = NULL;
+ const struct rte_flow_action_age *age = NULL;
union flow_dv_attr flow_attr = { .attr = 0 };
uint32_t tag_be;
union mlx5_flow_tbl_key tbl_key;
@@ -7478,7 +7686,6 @@ __flow_dv_translate(struct rte_eth_dev *dev,
const struct rte_flow_action_queue *queue;
const struct rte_flow_action_rss *rss;
const struct rte_flow_action *action = actions;
- const struct rte_flow_action_count *count = action->conf;
const uint8_t *rss_key;
const struct rte_flow_action_jump *jump_data;
const struct rte_flow_action_meter *mtr;
@@ -7607,36 +7814,21 @@ __flow_dv_translate(struct rte_eth_dev *dev,
action_flags |= MLX5_FLOW_ACTION_RSS;
dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
break;
+ case RTE_FLOW_ACTION_TYPE_AGE:
case RTE_FLOW_ACTION_TYPE_COUNT:
if (!dev_conf->devx) {
- rte_errno = ENOTSUP;
- goto cnt_err;
- }
- flow->counter = flow_dv_counter_alloc(dev,
- count->shared,
- count->id,
- dev_flow->dv.group);
- if (!flow->counter)
- goto cnt_err;
- dev_flow->dv.actions[actions_n++] =
- (flow_dv_counter_get_by_idx(dev,
- flow->counter, NULL))->action;
- action_flags |= MLX5_FLOW_ACTION_COUNT;
- break;
-cnt_err:
- if (rte_errno == ENOTSUP)
return rte_flow_error_set
(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
NULL,
"count action not supported");
+ }
+ /* Save information first, will apply later. */
+ if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT)
+ count = action->conf;
else
- return rte_flow_error_set
- (error, rte_errno,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "cannot create counter"
- " object.");
+ age = action->conf;
+ action_flags |= MLX5_FLOW_ACTION_COUNT;
break;
case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
dev_flow->dv.actions[actions_n++] =
@@ -7909,6 +8101,22 @@ __flow_dv_translate(struct rte_eth_dev *dev,
dev_flow->dv.actions[modify_action_position] =
handle->dvh.modify_hdr->verbs_action;
}
+ if (action_flags & MLX5_FLOW_ACTION_COUNT) {
+ flow->counter =
+ flow_dv_translate_create_counter(dev,
+ dev_flow, count, age);
+
+ if (!flow->counter)
+ return rte_flow_error_set
+ (error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL,
+ "cannot create counter"
+ " object.");
+ dev_flow->dv.actions[actions_n++] =
+ (flow_dv_counter_get_by_idx(dev,
+ flow->counter, NULL))->action;
+ }
break;
default:
break;
@@ -9169,6 +9377,58 @@ flow_dv_counter_query(struct rte_eth_dev *dev, uint32_t counter, bool clear,
return 0;
}
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] context
+ * The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_contexts
+ * The length of context array pointers.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. Initialized in case of
+ * error only.
+ *
+ * @return
+ * how many contexts get in success, otherwise negative errno value.
+ * if nb_contexts is 0, return the amount of all aged contexts.
+ * if nb_contexts is not 0 , return the amount of aged flows reported
+ * in the context array.
+ * @note: only stub for now
+ */
+static int
+flow_get_aged_flows(struct rte_eth_dev *dev,
+ void **context,
+ uint32_t nb_contexts,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_age_param *age_param;
+ struct mlx5_flow_counter *counter;
+ int nb_flows = 0;
+
+ if (nb_contexts && !context)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "Should assign at least one flow or"
+ " context to get if nb_contexts != 0");
+ rte_spinlock_lock(&priv->aged_sl);
+ TAILQ_FOREACH(counter, &priv->aged_counters, next) {
+ nb_flows++;
+ if (nb_contexts) {
+ age_param = MLX5_CNT_TO_AGE(counter);
+ context[nb_flows - 1] = age_param->context;
+ if (!(--nb_contexts))
+ break;
+ }
+ }
+ rte_spinlock_unlock(&priv->aged_sl);
+ rte_atomic16_set(&priv->trigger_event, 1);
+ return nb_flows;
+}
+
/*
* Mutex-protected thunk to lock-free __flow_dv_translate().
*/
@@ -9235,7 +9495,7 @@ flow_dv_counter_allocate(struct rte_eth_dev *dev)
uint32_t cnt;
flow_dv_shared_lock(dev);
- cnt = flow_dv_counter_alloc(dev, 0, 0, 1);
+ cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0);
flow_dv_shared_unlock(dev);
return cnt;
}
@@ -9266,6 +9526,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
.counter_alloc = flow_dv_counter_allocate,
.counter_free = flow_dv_counter_free,
.counter_query = flow_dv_counter_query,
+ .get_aged_flows = flow_get_aged_flows,
};
#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
@@ -56,7 +56,8 @@ flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
struct mlx5_flow_counter_pool **ppool)
{
struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
+ struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
+ 0);
struct mlx5_flow_counter_pool *pool;
idx--;
@@ -151,7 +152,8 @@ static uint32_t
flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
{
struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
+ struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
+ 0);
struct mlx5_flow_counter_pool *pool = NULL;
struct mlx5_flow_counter_ext *cnt_ext = NULL;
struct mlx5_flow_counter *cnt = NULL;
@@ -251,7 +253,7 @@ flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
cnt = flow_verbs_counter_get_by_idx(dev, counter,
&pool);
- cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+ cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
if (--cnt_ext->ref_cnt == 0) {
#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
@@ -282,7 +284,7 @@ flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
(dev, flow->counter, &pool);
struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
- (cnt);
+ (pool, cnt);
struct rte_flow_query_count *qc = data;
uint64_t counters[2] = {0, 0};
#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
@@ -1083,12 +1085,12 @@ flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
}
#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
- cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+ cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
counter.counter_set_handle = cnt_ext->cs->handle;
flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
- cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+ cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
counter.counters = cnt_ext->cs;
flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
#endif