[v2,2/2] net/mlx5: support flow aging

Message ID 20200424104548.12655-3-dongz@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: support flow aging |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/travis-robot success Travis build: passed
ci/Intel-compilation fail Compilation issues

Commit Message

Bill Zhou April 24, 2020, 10:45 a.m. UTC
  Currently, there is no flow aging check and age-out event callback
mechanism for mlx5 driver, this patch implements it. It's included:
- Splitting the current counter container to aged or no-aged container
  since reducing memory consumption. Aged container will allocate extra
  memory to save the aging parameter from user configuration.
- Aging check and age-out event callback mechanism based on current
  counter. When a flow be checked aged-out, RTE_ETH_EVENT_FLOW_AGED
  event will be triggered to applications.
- Implement the new API: rte_flow_get_aged_flows, applications can use
  this API to get aged flows.

Signed-off-by: Bill Zhou <dongz@mellanox.com>
---
v2: Moving aging list from struct mlx5_ibv_shared to struct mlx5_priv,
one port has one aging list. Update event be triggered once after last
call of rte_flow_get_aged_flows.
---
 doc/guides/rel_notes/release_20_05.rst |   1 +
 drivers/net/mlx5/mlx5.c                |  86 +++---
 drivers/net/mlx5/mlx5.h                |  49 +++-
 drivers/net/mlx5/mlx5_flow.c           | 201 ++++++++++++--
 drivers/net/mlx5/mlx5_flow.h           |  16 +-
 drivers/net/mlx5/mlx5_flow_dv.c        | 361 +++++++++++++++++++++----
 drivers/net/mlx5/mlx5_flow_verbs.c     |  14 +-
 7 files changed, 607 insertions(+), 121 deletions(-)
  

Comments

Suanming Mou April 26, 2020, 7:07 a.m. UTC | #1
On 4/24/2020 6:45 PM, Bill Zhou wrote:
> Currently, there is no flow aging check and age-out event callback
> mechanism for mlx5 driver, this patch implements it. It's included:
> - Splitting the current counter container to aged or no-aged container
>    since reducing memory consumption. Aged container will allocate extra
>    memory to save the aging parameter from user configuration.
> - Aging check and age-out event callback mechanism based on current
>    counter. When a flow be checked aged-out, RTE_ETH_EVENT_FLOW_AGED
>    event will be triggered to applications.
> - Implement the new API: rte_flow_get_aged_flows, applications can use
>    this API to get aged flows.
>
> Signed-off-by: Bill Zhou <dongz@mellanox.com>
Reviewed-by: Suanming Mou <suanmingm@mellanox.com>
> ---
> v2: Moving aging list from struct mlx5_ibv_shared to struct mlx5_priv,
> one port has one aging list. Update event be triggered once after last
> call of rte_flow_get_aged_flows.
> ---
>   doc/guides/rel_notes/release_20_05.rst |   1 +
>   drivers/net/mlx5/mlx5.c                |  86 +++---
>   drivers/net/mlx5/mlx5.h                |  49 +++-
>   drivers/net/mlx5/mlx5_flow.c           | 201 ++++++++++++--
>   drivers/net/mlx5/mlx5_flow.h           |  16 +-
>   drivers/net/mlx5/mlx5_flow_dv.c        | 361 +++++++++++++++++++++----
>   drivers/net/mlx5/mlx5_flow_verbs.c     |  14 +-
>   7 files changed, 607 insertions(+), 121 deletions(-)
>
> diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
> index b124c3f287..a5ba8a4792 100644
> --- a/doc/guides/rel_notes/release_20_05.rst
> +++ b/doc/guides/rel_notes/release_20_05.rst
> @@ -141,6 +141,7 @@ New Features
>     * Added support for creating Relaxed Ordering Memory Regions.
>     * Added support for jumbo frame size (9K MTU) in Multi-Packet RQ mode.
>     * Optimized the memory consumption of flow.
> +  * Added support for flow aging based on hardware counter.
>   
>   * **Updated the AESNI MB crypto PMD.**
>   
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index 57d76cb741..674d0ea9d3 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -437,6 +437,20 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
>   	return 0;
>   }
>   
> +/**
> + * Initialize the private aging list information.
> + *
> + * @param[in] priv
> + *   Pointer to the private device data structure.
> + */
> +static void
> +mlx5_flow_aging_list_init(struct mlx5_priv *priv)
> +{
> +	TAILQ_INIT(&priv->aged_counters);
> +	rte_spinlock_init(&priv->aged_sl);
> +	rte_atomic16_set(&priv->trigger_event, 1);
> +}
> +
>   /**
>    * Initialize the counters management structure.
>    *
> @@ -446,11 +460,14 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
>   static void
>   mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
>   {
> -	uint8_t i;
> +	uint8_t i, age;
>   
> +	sh->cmng.age = 0;
>   	TAILQ_INIT(&sh->cmng.flow_counters);
> -	for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
> -		TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
> +	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
> +		for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
> +			TAILQ_INIT(&sh->cmng.ccont[i][age].pool_list);
> +	}
>   }
>   
>   /**
> @@ -480,7 +497,7 @@ static void
>   mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
>   {
>   	struct mlx5_counter_stats_mem_mng *mng;
> -	uint8_t i;
> +	uint8_t i, age = 0;
>   	int j;
>   	int retries = 1024;
>   
> @@ -491,36 +508,42 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
>   			break;
>   		rte_pause();
>   	}
> -	for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
> -		struct mlx5_flow_counter_pool *pool;
> -		uint32_t batch = !!(i % 2);
>   
> -		if (!sh->cmng.ccont[i].pools)
> -			continue;
> -		pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
> -		while (pool) {
> -			if (batch) {
> -				if (pool->min_dcs)
> -					claim_zero
> -					(mlx5_devx_cmd_destroy(pool->min_dcs));
> -			}
> -			for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
> -				if (MLX5_POOL_GET_CNT(pool, j)->action)
> -					claim_zero
> -					(mlx5_glue->destroy_flow_action
> -					 (MLX5_POOL_GET_CNT(pool, j)->action));
> -				if (!batch && MLX5_GET_POOL_CNT_EXT
> -				    (pool, j)->dcs)
> -					claim_zero(mlx5_devx_cmd_destroy
> -						  (MLX5_GET_POOL_CNT_EXT
> -						  (pool, j)->dcs));
> +	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
> +		for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
> +			struct mlx5_flow_counter_pool *pool;
> +			uint32_t batch = !!(i % 2);
> +
> +			if (!sh->cmng.ccont[i][age].pools)
> +				continue;
> +			pool = TAILQ_FIRST(&sh->cmng.ccont[i][age].pool_list);
> +			while (pool) {
> +				if (batch) {
> +					if (pool->min_dcs)
> +						claim_zero
> +						(mlx5_devx_cmd_destroy
> +						(pool->min_dcs));
> +				}
> +				for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
> +					if (MLX5_POOL_GET_CNT(pool, j)->action)
> +						claim_zero
> +						(mlx5_glue->destroy_flow_action
> +						 (MLX5_POOL_GET_CNT
> +						  (pool, j)->action));
> +					if (!batch && MLX5_GET_POOL_CNT_EXT
> +					    (pool, j)->dcs)
> +						claim_zero(mlx5_devx_cmd_destroy
> +							  (MLX5_GET_POOL_CNT_EXT
> +							  (pool, j)->dcs));
> +				}
> +				TAILQ_REMOVE(&sh->cmng.ccont[i][age].pool_list,
> +					pool, next);
> +				rte_free(pool);
> +				pool = TAILQ_FIRST
> +					(&sh->cmng.ccont[i][age].pool_list);
>   			}
> -			TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool,
> -				     next);
> -			rte_free(pool);
> -			pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
> +			rte_free(sh->cmng.ccont[i][age].pools);
>   		}
> -		rte_free(sh->cmng.ccont[i].pools);
>   	}
>   	mng = LIST_FIRST(&sh->cmng.mem_mngs);
>   	while (mng) {
> @@ -3003,6 +3026,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>   			goto error;
>   		}
>   	}
> +	mlx5_flow_aging_list_init(priv);
>   	return eth_dev;
>   error:
>   	if (priv) {
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 51c3f33e6b..d1b358e929 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -222,13 +222,21 @@ struct mlx5_drop {
>   #define MLX5_COUNTERS_PER_POOL 512
>   #define MLX5_MAX_PENDING_QUERIES 4
>   #define MLX5_CNT_CONTAINER_RESIZE 64
> +#define MLX5_CNT_AGE_OFFSET 0x80000000
>   #define CNT_SIZE (sizeof(struct mlx5_flow_counter))
>   #define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext))
> +#define AGE_SIZE (sizeof(struct mlx5_age_param))
>   
>   #define CNT_POOL_TYPE_EXT	(1 << 0)
> +#define CNT_POOL_TYPE_AGE	(1 << 1)
>   #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT)
> +#define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE)
> +#define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1 : 0)
> +
>   #define MLX5_CNT_LEN(pool) \
> -	(CNT_SIZE + (IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
> +	(CNT_SIZE + \
> +	(IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \
> +	(IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
>   #define MLX5_POOL_GET_CNT(pool, index) \
>   	((struct mlx5_flow_counter *) \
>   	((char *)((pool) + 1) + (index) * (MLX5_CNT_LEN(pool))))
> @@ -242,13 +250,33 @@ struct mlx5_drop {
>    */
>   #define MLX5_MAKE_CNT_IDX(pi, offset) \
>   	((pi) * MLX5_COUNTERS_PER_POOL + (offset) + 1)
> -#define MLX5_CNT_TO_CNT_EXT(cnt) \
> -	((struct mlx5_flow_counter_ext *)((cnt) + 1))
> +#define MLX5_CNT_TO_CNT_EXT(pool, cnt) \
> +	((struct mlx5_flow_counter_ext *)\
> +	((char *)((cnt) + 1) + \
> +	(IS_AGE_POOL(pool) ? AGE_SIZE : 0)))
>   #define MLX5_GET_POOL_CNT_EXT(pool, offset) \
> -	MLX5_CNT_TO_CNT_EXT(MLX5_POOL_GET_CNT((pool), (offset)))
> +	MLX5_CNT_TO_CNT_EXT(pool, MLX5_POOL_GET_CNT((pool), (offset)))
> +#define MLX5_CNT_TO_AGE(cnt) \
> +	((struct mlx5_age_param *)((cnt) + 1))
>   
>   struct mlx5_flow_counter_pool;
>   
> +/*age status*/
> +enum {
> +	AGE_FREE,
> +	AGE_CANDIDATE, /* Counter assigned to flows. */
> +	AGE_TMOUT, /* Timeout, wait for aged flows query and destroy. */
> +};
> +
> +/* Counter age parameter. */
> +struct mlx5_age_param {
> +	rte_atomic16_t state; /**< Age state. */
> +	uint16_t port_id; /**< Port id of the counter. */
> +	uint32_t timeout:15; /**< Age timeout in unit of 0.1sec. */
> +	uint32_t expire:16; /**< Expire time(0.1sec) in the future. */
> +	void *context; /**< Flow counter age context. */
> +};
> +
>   struct flow_counter_stats {
>   	uint64_t hits;
>   	uint64_t bytes;
> @@ -336,13 +364,14 @@ struct mlx5_pools_container {
>   
>   /* Counter global management structure. */
>   struct mlx5_flow_counter_mng {
> -	uint8_t mhi[2]; /* master \ host container index. */
> -	struct mlx5_pools_container ccont[2 * 2];
> -	/* 2 containers for single and for batch for double-buffer. */
> +	uint8_t mhi[2][2]; /* master \ host and age \ no age container index. */
> +	struct mlx5_pools_container ccont[2 * 2][2];
> +	/* master \ host and age \ no age pools container. */
>   	struct mlx5_counters flow_counters; /* Legacy flow counter list. */
>   	uint8_t pending_queries;
>   	uint8_t batch;
>   	uint16_t pool_index;
> +	uint8_t age;
>   	uint8_t query_thread_on;
>   	LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
>   	LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;
> @@ -566,6 +595,10 @@ struct mlx5_priv {
>   	uint8_t fdb_def_rule; /* Whether fdb jump to table 1 is configured. */
>   	struct mlx5_mp_id mp_id; /* ID of a multi-process process */
>   	LIST_HEAD(fdir, mlx5_fdir_flow) fdir_flows; /* fdir flows. */
> +	struct mlx5_counters aged_counters; /* Aged flow counter list. */
> +	rte_spinlock_t aged_sl; /* Aged flow counter list lock. */
> +	rte_atomic16_t trigger_event;
> +	/* Event be triggered once after last call of rte_flow_get_aged_flows*/
>   };
>   
>   #define PORT_ID(priv) ((priv)->dev_data->port_id)
> @@ -764,6 +797,8 @@ int mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
>   int mlx5_flow_dev_dump(struct rte_eth_dev *dev, FILE *file,
>   		       struct rte_flow_error *error);
>   void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
> +int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
> +			uint32_t nb_contexts, struct rte_flow_error *error);
>   
>   /* mlx5_mp.c */
>   int mlx5_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer);
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index cba1f23e81..c691b43722 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -24,6 +24,7 @@
>   #include <rte_ether.h>
>   #include <rte_ethdev_driver.h>
>   #include <rte_flow.h>
> +#include <rte_cycles.h>
>   #include <rte_flow_driver.h>
>   #include <rte_malloc.h>
>   #include <rte_ip.h>
> @@ -242,6 +243,7 @@ static const struct rte_flow_ops mlx5_flow_ops = {
>   	.isolate = mlx5_flow_isolate,
>   	.query = mlx5_flow_query,
>   	.dev_dump = mlx5_flow_dev_dump,
> +	.get_aged_flows = mlx5_flow_get_aged_flows,
>   };
>   
>   /* Convert FDIR request to Generic flow. */
> @@ -2531,6 +2533,8 @@ flow_drv_validate(struct rte_eth_dev *dev,
>    *   Pointer to the list of items.
>    * @param[in] actions
>    *   Pointer to the list of actions.
> + * @param[in] flow_idx
> + *   This memory pool index to the flow.
>    * @param[out] error
>    *   Pointer to the error structure.
>    *
> @@ -2543,14 +2547,19 @@ flow_drv_prepare(struct rte_eth_dev *dev,
>   		 const struct rte_flow_attr *attr,
>   		 const struct rte_flow_item items[],
>   		 const struct rte_flow_action actions[],
> +		 uint32_t flow_idx,
>   		 struct rte_flow_error *error)
>   {
>   	const struct mlx5_flow_driver_ops *fops;
>   	enum mlx5_flow_drv_type type = flow->drv_type;
> +	struct mlx5_flow *mlx5_flow = NULL;
>   
>   	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
>   	fops = flow_get_drv_ops(type);
> -	return fops->prepare(dev, attr, items, actions, error);
> +	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
> +	if (mlx5_flow)
> +		mlx5_flow->flow_idx = flow_idx;
> +	return mlx5_flow;
>   }
>   
>   /**
> @@ -3498,6 +3507,8 @@ flow_hairpin_split(struct rte_eth_dev *dev,
>    *   Associated actions (list terminated by the END action).
>    * @param[in] external
>    *   This flow rule is created by request external to PMD.
> + * @param[in] flow_idx
> + *   This memory pool index to the flow.
>    * @param[out] error
>    *   Perform verbose error reporting if not NULL.
>    * @return
> @@ -3511,11 +3522,13 @@ flow_create_split_inner(struct rte_eth_dev *dev,
>   			const struct rte_flow_attr *attr,
>   			const struct rte_flow_item items[],
>   			const struct rte_flow_action actions[],
> -			bool external, struct rte_flow_error *error)
> +			bool external, uint32_t flow_idx,
> +			struct rte_flow_error *error)
>   {
>   	struct mlx5_flow *dev_flow;
>   
> -	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, error);
> +	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
> +		flow_idx, error);
>   	if (!dev_flow)
>   		return -rte_errno;
>   	dev_flow->flow = flow;
> @@ -3876,6 +3889,8 @@ flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
>    *   Associated actions (list terminated by the END action).
>    * @param[in] external
>    *   This flow rule is created by request external to PMD.
> + * @param[in] flow_idx
> + *   This memory pool index to the flow.
>    * @param[out] error
>    *   Perform verbose error reporting if not NULL.
>    * @return
> @@ -3888,7 +3903,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
>   			   const struct rte_flow_attr *attr,
>   			   const struct rte_flow_item items[],
>   			   const struct rte_flow_action actions[],
> -			   bool external, struct rte_flow_error *error)
> +			   bool external, uint32_t flow_idx,
> +			   struct rte_flow_error *error)
>   {
>   	struct mlx5_priv *priv = dev->data->dev_private;
>   	struct mlx5_dev_config *config = &priv->config;
> @@ -3908,7 +3924,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
>   	    !mlx5_flow_ext_mreg_supported(dev))
>   		return flow_create_split_inner(dev, flow, NULL, prefix_layers,
>   					       attr, items, actions, external,
> -					       error);
> +					       flow_idx, error);
>   	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
>   							   &encap_idx);
>   	if (qrss) {
> @@ -3992,7 +4008,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
>   	/* Add the unmodified original or prefix subflow. */
>   	ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr,
>   				      items, ext_actions ? ext_actions :
> -				      actions, external, error);
> +				      actions, external, flow_idx, error);
>   	if (ret < 0)
>   		goto exit;
>   	MLX5_ASSERT(dev_flow);
> @@ -4055,7 +4071,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
>   		ret = flow_create_split_inner(dev, flow, &dev_flow, layers,
>   					      &q_attr, mtr_sfx ? items :
>   					      q_items, q_actions,
> -					      external, error);
> +					      external, flow_idx, error);
>   		if (ret < 0)
>   			goto exit;
>   		/* qrss ID should be freed if failed. */
> @@ -4096,6 +4112,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev,
>    *   Associated actions (list terminated by the END action).
>    * @param[in] external
>    *   This flow rule is created by request external to PMD.
> + * @param[in] flow_idx
> + *   This memory pool index to the flow.
>    * @param[out] error
>    *   Perform verbose error reporting if not NULL.
>    * @return
> @@ -4107,7 +4125,8 @@ flow_create_split_meter(struct rte_eth_dev *dev,
>   			   const struct rte_flow_attr *attr,
>   			   const struct rte_flow_item items[],
>   			   const struct rte_flow_action actions[],
> -			   bool external, struct rte_flow_error *error)
> +			   bool external, uint32_t flow_idx,
> +			   struct rte_flow_error *error)
>   {
>   	struct mlx5_priv *priv = dev->data->dev_private;
>   	struct rte_flow_action *sfx_actions = NULL;
> @@ -4151,7 +4170,7 @@ flow_create_split_meter(struct rte_eth_dev *dev,
>   		/* Add the prefix subflow. */
>   		ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr,
>   					      items, pre_actions, external,
> -					      error);
> +					      flow_idx, error);
>   		if (ret) {
>   			ret = -rte_errno;
>   			goto exit;
> @@ -4168,7 +4187,7 @@ flow_create_split_meter(struct rte_eth_dev *dev,
>   					 0, &sfx_attr,
>   					 sfx_items ? sfx_items : items,
>   					 sfx_actions ? sfx_actions : actions,
> -					 external, error);
> +					 external, flow_idx, error);
>   exit:
>   	if (sfx_actions)
>   		rte_free(sfx_actions);
> @@ -4205,6 +4224,8 @@ flow_create_split_meter(struct rte_eth_dev *dev,
>    *   Associated actions (list terminated by the END action).
>    * @param[in] external
>    *   This flow rule is created by request external to PMD.
> + * @param[in] flow_idx
> + *   This memory pool index to the flow.
>    * @param[out] error
>    *   Perform verbose error reporting if not NULL.
>    * @return
> @@ -4216,12 +4237,13 @@ flow_create_split_outer(struct rte_eth_dev *dev,
>   			const struct rte_flow_attr *attr,
>   			const struct rte_flow_item items[],
>   			const struct rte_flow_action actions[],
> -			bool external, struct rte_flow_error *error)
> +			bool external, uint32_t flow_idx,
> +			struct rte_flow_error *error)
>   {
>   	int ret;
>   
>   	ret = flow_create_split_meter(dev, flow, attr, items,
> -					 actions, external, error);
> +					 actions, external, flow_idx, error);
>   	MLX5_ASSERT(ret <= 0);
>   	return ret;
>   }
> @@ -4356,7 +4378,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
>   		 */
>   		ret = flow_create_split_outer(dev, flow, attr,
>   					      buf->entry[i].pattern,
> -					      p_actions_rx, external,
> +					      p_actions_rx, external, idx,
>   					      error);
>   		if (ret < 0)
>   			goto error;
> @@ -4367,7 +4389,8 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
>   		attr_tx.ingress = 0;
>   		attr_tx.egress = 1;
>   		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
> -					    actions_hairpin_tx.actions, error);
> +					 actions_hairpin_tx.actions,
> +					 idx, error);
>   		if (!dev_flow)
>   			goto error;
>   		dev_flow->flow = flow;
> @@ -5741,6 +5764,31 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
>   
>   #define MLX5_POOL_QUERY_FREQ_US 1000000
>   
> +/**
> + * Get number of all validate pools.
> + *
> + * @param[in] sh
> + *   Pointer to mlx5_ibv_shared object.
> + *
> + * @return
> + *   The number of all validate pools.
> + */
> +static uint32_t
> +mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)
> +{
> +	uint8_t age, i;
> +	uint32_t pools_n = 0;
> +	struct mlx5_pools_container *cont;
> +
> +	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
> +		for (i = 0; i < 2 ; ++i) {
> +			cont = MLX5_CNT_CONTAINER(sh, i, 0, age);
> +			pools_n += rte_atomic16_read(&cont->n_valid);
> +		}
> +	}
> +	return pools_n;
> +}
> +
>   /**
>    * Set the periodic procedure for triggering asynchronous batch queries for all
>    * the counter pools.
> @@ -5751,12 +5799,9 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
>   void
>   mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
>   {
> -	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
> -	uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
> -	uint32_t us;
> +	uint32_t pools_n, us;
>   
> -	cont = MLX5_CNT_CONTAINER(sh, 1, 0);
> -	pools_n += rte_atomic16_read(&cont->n_valid);
> +	pools_n = mlx5_get_all_valid_pool_count(sh);
>   	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
>   	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
>   	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
> @@ -5782,6 +5827,7 @@ mlx5_flow_query_alarm(void *arg)
>   	uint16_t offset;
>   	int ret;
>   	uint8_t batch = sh->cmng.batch;
> +	uint8_t age = sh->cmng.age;
>   	uint16_t pool_index = sh->cmng.pool_index;
>   	struct mlx5_pools_container *cont;
>   	struct mlx5_pools_container *mcont;
> @@ -5790,8 +5836,8 @@ mlx5_flow_query_alarm(void *arg)
>   	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
>   		goto set_alarm;
>   next_container:
> -	cont = MLX5_CNT_CONTAINER(sh, batch, 1);
> -	mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
> +	cont = MLX5_CNT_CONTAINER(sh, batch, 1, age);
> +	mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age);
>   	/* Check if resize was done and need to flip a container. */
>   	if (cont != mcont) {
>   		if (cont->pools) {
> @@ -5801,15 +5847,22 @@ mlx5_flow_query_alarm(void *arg)
>   		}
>   		rte_cio_wmb();
>   		 /* Flip the host container. */
> -		sh->cmng.mhi[batch] ^= (uint8_t)2;
> +		sh->cmng.mhi[batch][age] ^= (uint8_t)2;
>   		cont = mcont;
>   	}
>   	if (!cont->pools) {
>   		/* 2 empty containers case is unexpected. */
> -		if (unlikely(batch != sh->cmng.batch))
> +		if (unlikely(batch != sh->cmng.batch) &&
> +			unlikely(age != sh->cmng.age)) {
>   			goto set_alarm;
> +		}
>   		batch ^= 0x1;
>   		pool_index = 0;
> +		if (batch == 0 && pool_index == 0) {
> +			age ^= 0x1;
> +			sh->cmng.batch = batch;
> +			sh->cmng.age = age;
> +		}
>   		goto next_container;
>   	}
>   	pool = cont->pools[pool_index];
> @@ -5852,13 +5905,76 @@ mlx5_flow_query_alarm(void *arg)
>   	if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
>   		batch ^= 0x1;
>   		pool_index = 0;
> +		if (batch == 0 && pool_index == 0)
> +			age ^= 0x1;
>   	}
>   set_alarm:
>   	sh->cmng.batch = batch;
>   	sh->cmng.pool_index = pool_index;
> +	sh->cmng.age = age;
>   	mlx5_set_query_alarm(sh);
>   }
>   
> +/**
> + * Check and callback event for new aged flow in the counter pool
> + *
> + * @param[in] pool
> + *   The pointer to Current counter pool.
> + */
> +static void
> +mlx5_flow_aging_check(struct mlx5_flow_counter_pool *pool)
> +{
> +	struct mlx5_priv *priv;
> +	struct mlx5_flow_counter *cnt;
> +	struct mlx5_age_param *age_param;
> +	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
> +	struct mlx5_counter_stats_raw *prev = pool->raw;
> +	uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
> +	uint64_t port_mask = 0;
> +	uint32_t i;
> +
> +	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
> +		cnt = MLX5_POOL_GET_CNT(pool, i);
> +		age_param = MLX5_CNT_TO_AGE(cnt);
> +		if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
> +			continue;
> +		if (cur->data[i].hits != prev->data[i].hits) {
> +			age_param->expire = curr + age_param->timeout;
> +			continue;
> +		}
> +		if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
> +			continue;
> +		/**
> +		 * Hold the lock first, or if between the
> +		 * state AGE_TMOUT and tailq operation the
> +		 * release happened, the release procedure
> +		 * may delete a non-existent tailq node.
> +		 */
> +		priv = rte_eth_devices[age_param->port_id].data->dev_private;
> +		rte_spinlock_lock(&priv->aged_sl);
> +		/* If the cpmset fails, release happens. */
> +		if (rte_atomic16_cmpset((volatile uint16_t *)
> +					&age_param->state,
> +					AGE_CANDIDATE,
> +					AGE_TMOUT) ==
> +					AGE_CANDIDATE) {
> +			TAILQ_INSERT_TAIL(&priv->aged_counters, cnt, next);
> +			port_mask |= (1ull << age_param->port_id);
> +		}
> +		rte_spinlock_unlock(&priv->aged_sl);
> +	}
> +	for (i = 0; i < 64; i++) {
> +		if (port_mask & (1ull << i)) {
> +			priv = rte_eth_devices[i].data->dev_private;
> +			if (!rte_atomic16_read(&priv->trigger_event))
> +				continue;
> +			_rte_eth_dev_callback_process(&rte_eth_devices[i],
> +				RTE_ETH_EVENT_FLOW_AGED, NULL);
> +			rte_atomic16_set(&priv->trigger_event, 0);
> +		}
> +	}
> +}
> +
>   /**
>    * Handler for the HW respond about ready values from an asynchronous batch
>    * query. This function is probably called by the host thread.
> @@ -5883,6 +5999,8 @@ mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
>   		raw_to_free = pool->raw_hw;
>   	} else {
>   		raw_to_free = pool->raw;
> +		if (IS_AGE_POOL(pool))
> +			mlx5_flow_aging_check(pool);
>   		rte_spinlock_lock(&pool->sl);
>   		pool->raw = pool->raw_hw;
>   		rte_spinlock_unlock(&pool->sl);
> @@ -6034,3 +6152,40 @@ mlx5_flow_dev_dump(struct rte_eth_dev *dev,
>   	return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
>   				       sh->tx_domain, file);
>   }
> +
> +/**
> + * Get aged-out flows.
> + *
> + * @param[in] dev
> + *   Pointer to the Ethernet device structure.
> + * @param[in] context
> + *   The address of an array of pointers to the aged-out flows contexts.
> + * @param[in] nb_countexts
> + *   The length of context array pointers.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. Initialized in case of
> + *   error only.
> + *
> + * @return
> + *   how many contexts get in success, otherwise negative errno value.
> + *   if nb_contexts is 0, return the amount of all aged contexts.
> + *   if nb_contexts is not 0 , return the amount of aged flows reported
> + *   in the context array.
> + */
> +int
> +mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
> +			uint32_t nb_contexts, struct rte_flow_error *error)
> +{
> +	const struct mlx5_flow_driver_ops *fops;
> +	struct rte_flow_attr attr = { .transfer = 0 };
> +
> +	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
> +		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
> +		return fops->get_aged_flows(dev, contexts, nb_contexts,
> +						    error);
> +	}
> +	DRV_LOG(ERR,
> +		"port %u get aged flows is not supported.",
> +		 dev->data->port_id);
> +	return -ENOTSUP;
> +}
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 2a1f59698c..bf1d5beb9b 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -199,6 +199,7 @@ enum mlx5_feature_name {
>   #define MLX5_FLOW_ACTION_METER (1ull << 31)
>   #define MLX5_FLOW_ACTION_SET_IPV4_DSCP (1ull << 32)
>   #define MLX5_FLOW_ACTION_SET_IPV6_DSCP (1ull << 33)
> +#define MLX5_FLOW_ACTION_AGE (1ull << 34)
>   
>   #define MLX5_FLOW_FATE_ACTIONS \
>   	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
> @@ -650,6 +651,7 @@ struct mlx5_flow_verbs_workspace {
>   /** Device flow structure. */
>   struct mlx5_flow {
>   	struct rte_flow *flow; /**< Pointer to the main flow. */
> +	uint32_t flow_idx; /**< The memory pool index to the main flow. */
>   	uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
>   	uint64_t act_flags;
>   	/**< Bit-fields of detected actions, see MLX5_FLOW_ACTION_*. */
> @@ -873,6 +875,11 @@ typedef int (*mlx5_flow_counter_query_t)(struct rte_eth_dev *dev,
>   					 uint32_t cnt,
>   					 bool clear, uint64_t *pkts,
>   					 uint64_t *bytes);
> +typedef int (*mlx5_flow_get_aged_flows_t)
> +					(struct rte_eth_dev *dev,
> +					 void **context,
> +					 uint32_t nb_contexts,
> +					 struct rte_flow_error *error);
>   struct mlx5_flow_driver_ops {
>   	mlx5_flow_validate_t validate;
>   	mlx5_flow_prepare_t prepare;
> @@ -888,13 +895,14 @@ struct mlx5_flow_driver_ops {
>   	mlx5_flow_counter_alloc_t counter_alloc;
>   	mlx5_flow_counter_free_t counter_free;
>   	mlx5_flow_counter_query_t counter_query;
> +	mlx5_flow_get_aged_flows_t get_aged_flows;
>   };
>   
>   
> -#define MLX5_CNT_CONTAINER(sh, batch, thread) (&(sh)->cmng.ccont \
> -	[(((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
> -#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread) (&(sh)->cmng.ccont \
> -	[(~((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
> +#define MLX5_CNT_CONTAINER(sh, batch, thread, age) (&(sh)->cmng.ccont \
> +	[(((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
> +#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread, age) (&(sh)->cmng.ccont \
> +	[(~((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
>   
>   /* mlx5_flow.c */
>   
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
> index 784a62c521..73a5f477f8 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -24,6 +24,7 @@
>   #include <rte_flow.h>
>   #include <rte_flow_driver.h>
>   #include <rte_malloc.h>
> +#include <rte_cycles.h>
>   #include <rte_ip.h>
>   #include <rte_gre.h>
>   #include <rte_vxlan.h>
> @@ -3719,6 +3720,50 @@ mlx5_flow_validate_action_meter(struct rte_eth_dev *dev,
>   	return 0;
>   }
>   
> +/**
> + * Validate the age action.
> + *
> + * @param[in] action_flags
> + *   Holds the actions detected until now.
> + * @param[in] action
> + *   Pointer to the age action.
> + * @param[in] dev
> + *   Pointer to the Ethernet device structure.
> + * @param[out] error
> + *   Pointer to error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_validate_action_age(uint64_t action_flags,
> +			    const struct rte_flow_action *action,
> +			    struct rte_eth_dev *dev,
> +			    struct rte_flow_error *error)
> +{
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	const struct rte_flow_action_age *age = action->conf;
> +
> +	if (!priv->config.devx)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +					  NULL,
> +					  "age action not supported");
> +	if (!(action->conf))
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION, action,
> +					  "configuration cannot be null");
> +	if (age->timeout >= UINT16_MAX / 2 / 10)
> +		return rte_flow_error_set(error, ENOTSUP,
> +					  RTE_FLOW_ERROR_TYPE_ACTION, action,
> +					  "Max age time: 3275 seconds");
> +	if (action_flags & MLX5_FLOW_ACTION_AGE)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
> +					  "Duplicate age ctions set");
> +	return 0;
> +}
> +
>   /**
>    * Validate the modify-header IPv4 DSCP actions.
>    *
> @@ -3896,14 +3941,16 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
>   	struct mlx5_priv *priv = dev->data->dev_private;
>   	struct mlx5_pools_container *cont;
>   	struct mlx5_flow_counter_pool *pool;
> -	uint32_t batch = 0;
> +	uint32_t batch = 0, age = 0;
>   
>   	idx--;
> +	age = MLX_CNT_IS_AGE(idx);
> +	idx = age ? idx - MLX5_CNT_AGE_OFFSET : idx;
>   	if (idx >= MLX5_CNT_BATCH_OFFSET) {
>   		idx -= MLX5_CNT_BATCH_OFFSET;
>   		batch = 1;
>   	}
> -	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
> +	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
>   	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
>   	pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
>   	MLX5_ASSERT(pool);
> @@ -4023,18 +4070,21 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
>    *   Pointer to the Ethernet device structure.
>    * @param[in] batch
>    *   Whether the pool is for counter that was allocated by batch command.
> + * @param[in] age
> + *   Whether the pool is for Aging counter.
>    *
>    * @return
>    *   The new container pointer on success, otherwise NULL and rte_errno is set.
>    */
>   static struct mlx5_pools_container *
> -flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
> +flow_dv_container_resize(struct rte_eth_dev *dev,
> +				uint32_t batch, uint32_t age)
>   {
>   	struct mlx5_priv *priv = dev->data->dev_private;
>   	struct mlx5_pools_container *cont =
> -			MLX5_CNT_CONTAINER(priv->sh, batch, 0);
> +			MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
>   	struct mlx5_pools_container *new_cont =
> -			MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0);
> +			MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0, age);
>   	struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
>   	uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
>   	uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
> @@ -4042,7 +4092,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
>   
>   	/* Fallback mode has no background thread. Skip the check. */
>   	if (!priv->counter_fallback &&
> -	    cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) {
> +	    cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1, age)) {
>   		/* The last resize still hasn't detected by the host thread. */
>   		rte_errno = EAGAIN;
>   		return NULL;
> @@ -4085,7 +4135,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
>   	new_cont->init_mem_mng = mem_mng;
>   	rte_cio_wmb();
>   	 /* Flip the master container. */
> -	priv->sh->cmng.mhi[batch] ^= (uint8_t)1;
> +	priv->sh->cmng.mhi[batch][age] ^= (uint8_t)1;
>   	return new_cont;
>   }
>   
> @@ -4117,7 +4167,7 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
>   	cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
>   	MLX5_ASSERT(pool);
>   	if (counter < MLX5_CNT_BATCH_OFFSET) {
> -		cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> +		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
>   		if (priv->counter_fallback)
>   			return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0,
>   					0, pkts, bytes, 0, NULL, NULL, 0);
> @@ -4150,6 +4200,8 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
>    *   The devX counter handle.
>    * @param[in] batch
>    *   Whether the pool is for counter that was allocated by batch command.
> + * @param[in] age
> + *   Whether the pool is for counter that was allocated for aging.
>    * @param[in/out] cont_cur
>    *   Pointer to the container pointer, it will be update in pool resize.
>    *
> @@ -4158,24 +4210,23 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
>    */
>   static struct mlx5_pools_container *
>   flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
> -		    uint32_t batch)
> +		    uint32_t batch, uint32_t age)
>   {
>   	struct mlx5_priv *priv = dev->data->dev_private;
>   	struct mlx5_flow_counter_pool *pool;
>   	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
> -							       0);
> +							       0, age);
>   	int16_t n_valid = rte_atomic16_read(&cont->n_valid);
> -	uint32_t size;
> +	uint32_t size = sizeof(*pool);
>   
>   	if (cont->n == n_valid) {
> -		cont = flow_dv_container_resize(dev, batch);
> +		cont = flow_dv_container_resize(dev, batch, age);
>   		if (!cont)
>   			return NULL;
>   	}
> -	size = sizeof(*pool);
>   	size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
> -	if (!batch)
> -		size += MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE;
> +	size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
> +	size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE);
>   	pool = rte_calloc(__func__, 1, size, 0);
>   	if (!pool) {
>   		rte_errno = ENOMEM;
> @@ -4187,8 +4238,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
>   						     MLX5_CNT_CONTAINER_RESIZE;
>   	pool->raw_hw = NULL;
>   	pool->type = 0;
> -	if (!batch)
> -		pool->type |= CNT_POOL_TYPE_EXT;
> +	pool->type |= (batch ? 0 :  CNT_POOL_TYPE_EXT);
> +	pool->type |= (!age ? 0 :  CNT_POOL_TYPE_AGE);
>   	rte_spinlock_init(&pool->sl);
>   	/*
>   	 * The generation of the new allocated counters in this pool is 0, 2 in
> @@ -4215,6 +4266,39 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
>   	return cont;
>   }
>   
> +/**
> + * Update the minimum dcs-id for aged or no-aged counter pool.
> + *
> + * @param[in] dev
> + *   Pointer to the Ethernet device structure.
> + * @param[in] pool
> + *   Current counter pool.
> + * @param[in] batch
> + *   Whether the pool is for counter that was allocated by batch command.
> + * @param[in] age
> + *   Whether the counter is for aging.
> + */
> +static void
> +flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev,
> +			struct mlx5_flow_counter_pool *pool,
> +			uint32_t batch, uint32_t age)
> +{
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_flow_counter_pool *other;
> +	struct mlx5_pools_container *cont;
> +
> +	cont = MLX5_CNT_CONTAINER(priv->sh,	batch, 0, (age ^ 0x1));
Too much space.
> +	other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id);
> +	if (!other)
> +		return;
> +	if (pool->min_dcs->id < other->min_dcs->id) {
> +		rte_atomic64_set(&other->a64_dcs,
> +			rte_atomic64_read(&pool->a64_dcs));
> +	} else {
> +		rte_atomic64_set(&pool->a64_dcs,
> +			rte_atomic64_read(&other->a64_dcs));
> +	}
> +}
>   /**
>    * Prepare a new counter and/or a new counter pool.
>    *
> @@ -4224,6 +4308,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
>    *   Where to put the pointer of a new counter.
>    * @param[in] batch
>    *   Whether the pool is for counter that was allocated by batch command.
> + * @param[in] age
> + *   Whether the pool is for counter that was allocated for aging.
>    *
>    * @return
>    *   The counter container pointer and @p cnt_free is set on success,
> @@ -4232,7 +4318,7 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
>   static struct mlx5_pools_container *
>   flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
>   			     struct mlx5_flow_counter **cnt_free,
> -			     uint32_t batch)
> +			     uint32_t batch, uint32_t age)
>   {
>   	struct mlx5_priv *priv = dev->data->dev_private;
>   	struct mlx5_pools_container *cont;
> @@ -4241,7 +4327,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
>   	struct mlx5_flow_counter *cnt;
>   	uint32_t i;
>   
> -	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
> +	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
>   	if (!batch) {
>   		/* bulk_bitmap must be 0 for single counter allocation. */
>   		dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
> @@ -4249,7 +4335,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
>   			return NULL;
>   		pool = flow_dv_find_pool_by_id(cont, dcs->id);
>   		if (!pool) {
> -			cont = flow_dv_pool_create(dev, dcs, batch);
> +			cont = flow_dv_pool_create(dev, dcs, batch, age);
>   			if (!cont) {
>   				mlx5_devx_cmd_destroy(dcs);
>   				return NULL;
> @@ -4259,6 +4345,8 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
>   			rte_atomic64_set(&pool->a64_dcs,
>   					 (int64_t)(uintptr_t)dcs);
>   		}
> +		flow_dv_counter_update_min_dcs(dev,
> +						pool, batch, age);

As the above "else if"  updates the min_dcs and this function name also 
shows it will update the min_dcs, better to align the update in one 
function.

Or rename the function a much better one to indicate it will update the 
"other" pool with same id?

Not insist to.

>   		i = dcs->id % MLX5_COUNTERS_PER_POOL;
>   		cnt = MLX5_POOL_GET_CNT(pool, i);
>   		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
> @@ -4273,7 +4361,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
>   		rte_errno = ENODATA;
>   		return NULL;
>   	}
> -	cont = flow_dv_pool_create(dev, dcs, batch);
> +	cont = flow_dv_pool_create(dev, dcs, batch, age);
>   	if (!cont) {
>   		mlx5_devx_cmd_destroy(dcs);
>   		return NULL;
> @@ -4334,13 +4422,15 @@ flow_dv_counter_shared_search(struct mlx5_pools_container *cont, uint32_t id,
>    *   Counter identifier.
>    * @param[in] group
>    *   Counter flow group.
> + * @param[in] age
> + *   Whether the counter was allocated for aging.
>    *
>    * @return
>    *   Index to flow counter on success, 0 otherwise and rte_errno is set.
>    */
>   static uint32_t
>   flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
> -		      uint16_t group)
> +		      uint16_t group, uint32_t age)
>   {
>   	struct mlx5_priv *priv = dev->data->dev_private;
>   	struct mlx5_flow_counter_pool *pool = NULL;
> @@ -4356,7 +4446,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
>   	 */
>   	uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0;
>   	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
> -							       0);
> +							       0, age);
>   	uint32_t cnt_idx;
>   
>   	if (!priv->config.devx) {
> @@ -4395,13 +4485,13 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
>   		cnt_free = NULL;
>   	}
>   	if (!cnt_free) {
> -		cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch);
> +		cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
>   		if (!cont)
>   			return 0;
>   		pool = TAILQ_FIRST(&cont->pool_list);
>   	}
>   	if (!batch)
> -		cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt_free);
> +		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
>   	/* Create a DV counter action only in the first time usage. */
>   	if (!cnt_free->action) {
>   		uint16_t offset;
> @@ -4424,6 +4514,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
>   	cnt_idx = MLX5_MAKE_CNT_IDX(pool->index,
>   				MLX5_CNT_ARRAY_IDX(pool, cnt_free));
>   	cnt_idx += batch * MLX5_CNT_BATCH_OFFSET;
> +	cnt_idx += age * MLX5_CNT_AGE_OFFSET;
>   	/* Update the counter reset values. */
>   	if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits,
>   				 &cnt_free->bytes))
> @@ -4445,6 +4536,62 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
>   	return cnt_idx;
>   }
>   
> +/**
> + * Get age param from counter index.
> + *
> + * @param[in] dev
> + *   Pointer to the Ethernet device structure.
> + * @param[in] counter
> + *   Index to the counter handler.
> + *
> + * @return
> + *   The aging parameter specified for the counter index.
> + */
> +static struct mlx5_age_param*
> +flow_dv_counter_idx_get_age(struct rte_eth_dev *dev,
> +				uint32_t counter)
> +{
> +	struct mlx5_flow_counter *cnt;
> +	struct mlx5_flow_counter_pool *pool = NULL;
> +
> +	flow_dv_counter_get_by_idx(dev, counter, &pool);
> +	counter = (counter - 1) % MLX5_COUNTERS_PER_POOL;
> +	cnt = MLX5_POOL_GET_CNT(pool, counter);
> +	return MLX5_CNT_TO_AGE(cnt);
> +}
> +
> +/**
> + * Remove a flow counter from aged counter list.
> + *
> + * @param[in] dev
> + *   Pointer to the Ethernet device structure.
> + * @param[in] counter
> + *   Index to the counter handler.
> + * @param[in] cnt
> + *   Pointer to the counter handler.
> + */
> +static void
> +flow_dv_counter_remove_from_age(struct rte_eth_dev *dev,
> +				uint32_t counter, struct mlx5_flow_counter *cnt)
> +{
> +	struct mlx5_age_param *age_param;
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +
> +	age_param = flow_dv_counter_idx_get_age(dev, counter);
> +	if (rte_atomic16_cmpset((volatile uint16_t *)
> +			&age_param->state,
> +			AGE_CANDIDATE, AGE_FREE)
> +			!= AGE_CANDIDATE) {
> +		/**
> +		 * We need the lock even it is age timeout,
> +		 * since counter may still in process.
> +		 */
> +		rte_spinlock_lock(&priv->aged_sl);
> +		TAILQ_REMOVE(&priv->aged_counters, cnt, next);
> +		rte_spinlock_unlock(&priv->aged_sl);
> +	}
> +	rte_atomic16_set(&age_param->state, AGE_FREE);
> +}
>   /**
>    * Release a flow counter.
>    *
> @@ -4465,10 +4612,12 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter)
>   	cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
>   	MLX5_ASSERT(pool);
>   	if (counter < MLX5_CNT_BATCH_OFFSET) {
> -		cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> +		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
>   		if (cnt_ext && --cnt_ext->ref_cnt)
>   			return;
>   	}
> +	if (IS_AGE_POOL(pool))
> +		flow_dv_counter_remove_from_age(dev, counter, cnt);
>   	/* Put the counter in the end - the last updated one. */
>   	TAILQ_INSERT_TAIL(&pool->counters, cnt, next);
>   	/*
> @@ -5243,6 +5392,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
>   			/* Meter action will add one more TAG action. */
>   			rw_act_num += MLX5_ACT_NUM_SET_TAG;
>   			break;
> +		case RTE_FLOW_ACTION_TYPE_AGE:
> +			ret = flow_dv_validate_action_age(action_flags,
> +							  actions, dev,
> +							  error);
> +			if (ret < 0)
> +				return ret;
> +			action_flags |= MLX5_FLOW_ACTION_AGE;
> +			++actions_n;
> +			break;
>   		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
>   			ret = flow_dv_validate_action_modify_ipv4_dscp
>   							 (action_flags,
> @@ -7281,6 +7439,54 @@ flow_dv_translate_action_port_id(struct rte_eth_dev *dev,
>   	return 0;
>   }
>   
> +/**
> + * Create a counter with aging configuration.
> + *
> + * @param[in] dev
> + *   Pointer to rte_eth_dev structure.
> + * @param[out] count
> + *   Pointer to the counter action configuration.
> + * @param[in] age
> + *   Pointer to the aging action configuration.
> + *
> + * @return
> + *   Index to flow counter on success, 0 otherwise.
> + */
> +static uint32_t
> +flow_dv_translate_create_counter(struct rte_eth_dev *dev,
> +				struct mlx5_flow *dev_flow,
> +				const struct rte_flow_action_count *count,
> +				const struct rte_flow_action_age *age)
> +{
> +	uint32_t counter;
> +	struct mlx5_age_param *age_param;
> +
> +	counter = flow_dv_counter_alloc(dev,
> +				count ? count->shared : 0,
> +				count ? count->id : 0,
> +				dev_flow->dv.group, !!age);
> +
> +	if (!counter || age == NULL)
> +		return counter;
> +	age_param  = flow_dv_counter_idx_get_age(dev, counter);
> +	/*
> +	 * The counter age accuracy may have a bit delay. Have 3/4
> +	 * second bias on the timeount in order to let it age in time.
> +	 */
> +	age_param->context = age->context ? age->context :
> +		(void *)(uintptr_t)(dev_flow->flow_idx);
> +	/*
> +	 * The counter age accuracy may have a bit delay. Have 3/4
> +	 * second bias on the timeount in order to let it age in time.
> +	 */
> +	age_param->timeout = age->timeout * 10 - 7;
> +	/* Set expire time in unit of 0.1 sec. */
> +	age_param->port_id = dev->data->port_id;
> +	age_param->expire = age_param->timeout +
> +			rte_rdtsc() / (rte_get_tsc_hz() / 10);
> +	rte_atomic16_set(&age_param->state, AGE_CANDIDATE);
> +	return counter;
> +}
>   /**
>    * Add Tx queue matcher
>    *
> @@ -7450,6 +7656,8 @@ __flow_dv_translate(struct rte_eth_dev *dev,
>   			    (MLX5_MAX_MODIFY_NUM + 1)];
>   	} mhdr_dummy;
>   	struct mlx5_flow_dv_modify_hdr_resource *mhdr_res = &mhdr_dummy.res;
> +	const struct rte_flow_action_count *count = NULL;
> +	const struct rte_flow_action_age *age = NULL;
>   	union flow_dv_attr flow_attr = { .attr = 0 };
>   	uint32_t tag_be;
>   	union mlx5_flow_tbl_key tbl_key;
> @@ -7478,7 +7686,6 @@ __flow_dv_translate(struct rte_eth_dev *dev,
>   		const struct rte_flow_action_queue *queue;
>   		const struct rte_flow_action_rss *rss;
>   		const struct rte_flow_action *action = actions;
> -		const struct rte_flow_action_count *count = action->conf;
>   		const uint8_t *rss_key;
>   		const struct rte_flow_action_jump *jump_data;
>   		const struct rte_flow_action_meter *mtr;
> @@ -7607,36 +7814,21 @@ __flow_dv_translate(struct rte_eth_dev *dev,
>   			action_flags |= MLX5_FLOW_ACTION_RSS;
>   			dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
>   			break;
> +		case RTE_FLOW_ACTION_TYPE_AGE:
>   		case RTE_FLOW_ACTION_TYPE_COUNT:
>   			if (!dev_conf->devx) {
> -				rte_errno = ENOTSUP;
> -				goto cnt_err;
> -			}
> -			flow->counter = flow_dv_counter_alloc(dev,
> -							count->shared,
> -							count->id,
> -							dev_flow->dv.group);
> -			if (!flow->counter)
> -				goto cnt_err;
> -			dev_flow->dv.actions[actions_n++] =
> -				  (flow_dv_counter_get_by_idx(dev,
> -				  flow->counter, NULL))->action;
> -			action_flags |= MLX5_FLOW_ACTION_COUNT;
> -			break;
> -cnt_err:
> -			if (rte_errno == ENOTSUP)
>   				return rte_flow_error_set
>   					      (error, ENOTSUP,
>   					       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
>   					       NULL,
>   					       "count action not supported");
> +			}
> +			/* Save information first, will apply later. */
> +			if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT)
> +				count = action->conf;
>   			else
> -				return rte_flow_error_set
> -						(error, rte_errno,
> -						 RTE_FLOW_ERROR_TYPE_ACTION,
> -						 action,
> -						 "cannot create counter"
> -						  " object.");
> +				age = action->conf;
> +			action_flags |= MLX5_FLOW_ACTION_COUNT;
>   			break;
>   		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
>   			dev_flow->dv.actions[actions_n++] =
> @@ -7909,6 +8101,22 @@ __flow_dv_translate(struct rte_eth_dev *dev,
>   				dev_flow->dv.actions[modify_action_position] =
>   					handle->dvh.modify_hdr->verbs_action;
>   			}
> +			if (action_flags & MLX5_FLOW_ACTION_COUNT) {
> +				flow->counter =
> +					flow_dv_translate_create_counter(dev,
> +						dev_flow, count, age);
> +
> +				if (!flow->counter)
> +					return rte_flow_error_set
> +						(error, rte_errno,
> +						RTE_FLOW_ERROR_TYPE_ACTION,
> +						NULL,
> +						"cannot create counter"
> +						" object.");
> +				dev_flow->dv.actions[actions_n++] =
> +					  (flow_dv_counter_get_by_idx(dev,
> +					  flow->counter, NULL))->action;
> +			}
>   			break;
>   		default:
>   			break;
> @@ -9169,6 +9377,58 @@ flow_dv_counter_query(struct rte_eth_dev *dev, uint32_t counter, bool clear,
>   	return 0;
>   }
>   
> +/**
> + * Get aged-out flows.
> + *
> + * @param[in] dev
> + *   Pointer to the Ethernet device structure.
> + * @param[in] context
> + *   The address of an array of pointers to the aged-out flows contexts.
> + * @param[in] nb_contexts
> + *   The length of context array pointers.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. Initialized in case of
> + *   error only.
> + *
> + * @return
> + *   how many contexts get in success, otherwise negative errno value.
> + *   if nb_contexts is 0, return the amount of all aged contexts.
> + *   if nb_contexts is not 0 , return the amount of aged flows reported
> + *   in the context array.
> + * @note: only stub for now
> + */
> +static int
> +flow_get_aged_flows(struct rte_eth_dev *dev,
> +		    void **context,
> +		    uint32_t nb_contexts,
> +		    struct rte_flow_error *error)
> +{
> +	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_age_param *age_param;
> +	struct mlx5_flow_counter *counter;
> +	int nb_flows = 0;
> +
> +	if (nb_contexts && !context)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +					  NULL,
> +					  "Should assign at least one flow or"
> +					  " context to get if nb_contexts != 0");
> +	rte_spinlock_lock(&priv->aged_sl);
> +	TAILQ_FOREACH(counter, &priv->aged_counters, next) {
> +		nb_flows++;
> +		if (nb_contexts) {
> +			age_param = MLX5_CNT_TO_AGE(counter);
> +			context[nb_flows - 1] = age_param->context;
> +			if (!(--nb_contexts))
> +				break;
> +		}
> +	}
> +	rte_spinlock_unlock(&priv->aged_sl);
> +	rte_atomic16_set(&priv->trigger_event, 1);
> +	return nb_flows;
> +}
> +
>   /*
>    * Mutex-protected thunk to lock-free  __flow_dv_translate().
>    */
> @@ -9235,7 +9495,7 @@ flow_dv_counter_allocate(struct rte_eth_dev *dev)
>   	uint32_t cnt;
>   
>   	flow_dv_shared_lock(dev);
> -	cnt = flow_dv_counter_alloc(dev, 0, 0, 1);
> +	cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0);
>   	flow_dv_shared_unlock(dev);
>   	return cnt;
>   }
> @@ -9266,6 +9526,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
>   	.counter_alloc = flow_dv_counter_allocate,
>   	.counter_free = flow_dv_counter_free,
>   	.counter_query = flow_dv_counter_query,
> +	.get_aged_flows = flow_get_aged_flows,
>   };
>   
>   #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
> diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
> index 236d665852..7efd97f547 100644
> --- a/drivers/net/mlx5/mlx5_flow_verbs.c
> +++ b/drivers/net/mlx5/mlx5_flow_verbs.c
> @@ -56,7 +56,8 @@ flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
>   			      struct mlx5_flow_counter_pool **ppool)
>   {
>   	struct mlx5_priv *priv = dev->data->dev_private;
> -	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
> +	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
> +									0);
>   	struct mlx5_flow_counter_pool *pool;
>   
>   	idx--;
> @@ -151,7 +152,8 @@ static uint32_t
>   flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
>   {
>   	struct mlx5_priv *priv = dev->data->dev_private;
> -	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
> +	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
> +									0);
>   	struct mlx5_flow_counter_pool *pool = NULL;
>   	struct mlx5_flow_counter_ext *cnt_ext = NULL;
>   	struct mlx5_flow_counter *cnt = NULL;
> @@ -251,7 +253,7 @@ flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
>   
>   	cnt = flow_verbs_counter_get_by_idx(dev, counter,
>   					    &pool);
> -	cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> +	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
>   	if (--cnt_ext->ref_cnt == 0) {
>   #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
>   		claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
> @@ -282,7 +284,7 @@ flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
>   		struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
>   						(dev, flow->counter, &pool);
>   		struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
> -						(cnt);
> +						(pool, cnt);
>   		struct rte_flow_query_count *qc = data;
>   		uint64_t counters[2] = {0, 0};
>   #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
> @@ -1083,12 +1085,12 @@ flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
>   	}
>   #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
>   	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
> -	cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> +	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
>   	counter.counter_set_handle = cnt_ext->cs->handle;
>   	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
>   #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
>   	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
> -	cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
> +	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
>   	counter.counters = cnt_ext->cs;
>   	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
>   #endif
  

Patch

diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
index b124c3f287..a5ba8a4792 100644
--- a/doc/guides/rel_notes/release_20_05.rst
+++ b/doc/guides/rel_notes/release_20_05.rst
@@ -141,6 +141,7 @@  New Features
   * Added support for creating Relaxed Ordering Memory Regions.
   * Added support for jumbo frame size (9K MTU) in Multi-Packet RQ mode.
   * Optimized the memory consumption of flow.
+  * Added support for flow aging based on hardware counter.
 
 * **Updated the AESNI MB crypto PMD.**
 
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 57d76cb741..674d0ea9d3 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -437,6 +437,20 @@  mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
 	return 0;
 }
 
+/**
+ * Initialize the private aging list information.
+ *
+ * @param[in] priv
+ *   Pointer to the private device data structure.
+ */
+static void
+mlx5_flow_aging_list_init(struct mlx5_priv *priv)
+{
+	TAILQ_INIT(&priv->aged_counters);
+	rte_spinlock_init(&priv->aged_sl);
+	rte_atomic16_set(&priv->trigger_event, 1);
+}
+
 /**
  * Initialize the counters management structure.
  *
@@ -446,11 +460,14 @@  mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id)
 static void
 mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
 {
-	uint8_t i;
+	uint8_t i, age;
 
+	sh->cmng.age = 0;
 	TAILQ_INIT(&sh->cmng.flow_counters);
-	for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
-		TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
+	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
+		for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
+			TAILQ_INIT(&sh->cmng.ccont[i][age].pool_list);
+	}
 }
 
 /**
@@ -480,7 +497,7 @@  static void
 mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
 {
 	struct mlx5_counter_stats_mem_mng *mng;
-	uint8_t i;
+	uint8_t i, age = 0;
 	int j;
 	int retries = 1024;
 
@@ -491,36 +508,42 @@  mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
 			break;
 		rte_pause();
 	}
-	for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
-		struct mlx5_flow_counter_pool *pool;
-		uint32_t batch = !!(i % 2);
 
-		if (!sh->cmng.ccont[i].pools)
-			continue;
-		pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
-		while (pool) {
-			if (batch) {
-				if (pool->min_dcs)
-					claim_zero
-					(mlx5_devx_cmd_destroy(pool->min_dcs));
-			}
-			for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
-				if (MLX5_POOL_GET_CNT(pool, j)->action)
-					claim_zero
-					(mlx5_glue->destroy_flow_action
-					 (MLX5_POOL_GET_CNT(pool, j)->action));
-				if (!batch && MLX5_GET_POOL_CNT_EXT
-				    (pool, j)->dcs)
-					claim_zero(mlx5_devx_cmd_destroy
-						  (MLX5_GET_POOL_CNT_EXT
-						  (pool, j)->dcs));
+	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
+		for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
+			struct mlx5_flow_counter_pool *pool;
+			uint32_t batch = !!(i % 2);
+
+			if (!sh->cmng.ccont[i][age].pools)
+				continue;
+			pool = TAILQ_FIRST(&sh->cmng.ccont[i][age].pool_list);
+			while (pool) {
+				if (batch) {
+					if (pool->min_dcs)
+						claim_zero
+						(mlx5_devx_cmd_destroy
+						(pool->min_dcs));
+				}
+				for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
+					if (MLX5_POOL_GET_CNT(pool, j)->action)
+						claim_zero
+						(mlx5_glue->destroy_flow_action
+						 (MLX5_POOL_GET_CNT
+						  (pool, j)->action));
+					if (!batch && MLX5_GET_POOL_CNT_EXT
+					    (pool, j)->dcs)
+						claim_zero(mlx5_devx_cmd_destroy
+							  (MLX5_GET_POOL_CNT_EXT
+							  (pool, j)->dcs));
+				}
+				TAILQ_REMOVE(&sh->cmng.ccont[i][age].pool_list,
+					pool, next);
+				rte_free(pool);
+				pool = TAILQ_FIRST
+					(&sh->cmng.ccont[i][age].pool_list);
 			}
-			TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool,
-				     next);
-			rte_free(pool);
-			pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
+			rte_free(sh->cmng.ccont[i][age].pools);
 		}
-		rte_free(sh->cmng.ccont[i].pools);
 	}
 	mng = LIST_FIRST(&sh->cmng.mem_mngs);
 	while (mng) {
@@ -3003,6 +3026,7 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			goto error;
 		}
 	}
+	mlx5_flow_aging_list_init(priv);
 	return eth_dev;
 error:
 	if (priv) {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 51c3f33e6b..d1b358e929 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -222,13 +222,21 @@  struct mlx5_drop {
 #define MLX5_COUNTERS_PER_POOL 512
 #define MLX5_MAX_PENDING_QUERIES 4
 #define MLX5_CNT_CONTAINER_RESIZE 64
+#define MLX5_CNT_AGE_OFFSET 0x80000000
 #define CNT_SIZE (sizeof(struct mlx5_flow_counter))
 #define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext))
+#define AGE_SIZE (sizeof(struct mlx5_age_param))
 
 #define CNT_POOL_TYPE_EXT	(1 << 0)
+#define CNT_POOL_TYPE_AGE	(1 << 1)
 #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT)
+#define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE)
+#define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1 : 0)
+
 #define MLX5_CNT_LEN(pool) \
-	(CNT_SIZE + (IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
+	(CNT_SIZE + \
+	(IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \
+	(IS_EXT_POOL(pool) ? CNTEXT_SIZE : 0))
 #define MLX5_POOL_GET_CNT(pool, index) \
 	((struct mlx5_flow_counter *) \
 	((char *)((pool) + 1) + (index) * (MLX5_CNT_LEN(pool))))
@@ -242,13 +250,33 @@  struct mlx5_drop {
  */
 #define MLX5_MAKE_CNT_IDX(pi, offset) \
 	((pi) * MLX5_COUNTERS_PER_POOL + (offset) + 1)
-#define MLX5_CNT_TO_CNT_EXT(cnt) \
-	((struct mlx5_flow_counter_ext *)((cnt) + 1))
+#define MLX5_CNT_TO_CNT_EXT(pool, cnt) \
+	((struct mlx5_flow_counter_ext *)\
+	((char *)((cnt) + 1) + \
+	(IS_AGE_POOL(pool) ? AGE_SIZE : 0)))
 #define MLX5_GET_POOL_CNT_EXT(pool, offset) \
-	MLX5_CNT_TO_CNT_EXT(MLX5_POOL_GET_CNT((pool), (offset)))
+	MLX5_CNT_TO_CNT_EXT(pool, MLX5_POOL_GET_CNT((pool), (offset)))
+#define MLX5_CNT_TO_AGE(cnt) \
+	((struct mlx5_age_param *)((cnt) + 1))
 
 struct mlx5_flow_counter_pool;
 
+/*age status*/
+enum {
+	AGE_FREE,
+	AGE_CANDIDATE, /* Counter assigned to flows. */
+	AGE_TMOUT, /* Timeout, wait for aged flows query and destroy. */
+};
+
+/* Counter age parameter. */
+struct mlx5_age_param {
+	rte_atomic16_t state; /**< Age state. */
+	uint16_t port_id; /**< Port id of the counter. */
+	uint32_t timeout:15; /**< Age timeout in unit of 0.1sec. */
+	uint32_t expire:16; /**< Expire time(0.1sec) in the future. */
+	void *context; /**< Flow counter age context. */
+};
+
 struct flow_counter_stats {
 	uint64_t hits;
 	uint64_t bytes;
@@ -336,13 +364,14 @@  struct mlx5_pools_container {
 
 /* Counter global management structure. */
 struct mlx5_flow_counter_mng {
-	uint8_t mhi[2]; /* master \ host container index. */
-	struct mlx5_pools_container ccont[2 * 2];
-	/* 2 containers for single and for batch for double-buffer. */
+	uint8_t mhi[2][2]; /* master \ host and age \ no age container index. */
+	struct mlx5_pools_container ccont[2 * 2][2];
+	/* master \ host and age \ no age pools container. */
 	struct mlx5_counters flow_counters; /* Legacy flow counter list. */
 	uint8_t pending_queries;
 	uint8_t batch;
 	uint16_t pool_index;
+	uint8_t age;
 	uint8_t query_thread_on;
 	LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
 	LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;
@@ -566,6 +595,10 @@  struct mlx5_priv {
 	uint8_t fdb_def_rule; /* Whether fdb jump to table 1 is configured. */
 	struct mlx5_mp_id mp_id; /* ID of a multi-process process */
 	LIST_HEAD(fdir, mlx5_fdir_flow) fdir_flows; /* fdir flows. */
+	struct mlx5_counters aged_counters; /* Aged flow counter list. */
+	rte_spinlock_t aged_sl; /* Aged flow counter list lock. */
+	rte_atomic16_t trigger_event;
+	/* Event be triggered once after last call of rte_flow_get_aged_flows*/
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -764,6 +797,8 @@  int mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
 int mlx5_flow_dev_dump(struct rte_eth_dev *dev, FILE *file,
 		       struct rte_flow_error *error);
 void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev);
+int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+			uint32_t nb_contexts, struct rte_flow_error *error);
 
 /* mlx5_mp.c */
 int mlx5_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer);
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index cba1f23e81..c691b43722 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -24,6 +24,7 @@ 
 #include <rte_ether.h>
 #include <rte_ethdev_driver.h>
 #include <rte_flow.h>
+#include <rte_cycles.h>
 #include <rte_flow_driver.h>
 #include <rte_malloc.h>
 #include <rte_ip.h>
@@ -242,6 +243,7 @@  static const struct rte_flow_ops mlx5_flow_ops = {
 	.isolate = mlx5_flow_isolate,
 	.query = mlx5_flow_query,
 	.dev_dump = mlx5_flow_dev_dump,
+	.get_aged_flows = mlx5_flow_get_aged_flows,
 };
 
 /* Convert FDIR request to Generic flow. */
@@ -2531,6 +2533,8 @@  flow_drv_validate(struct rte_eth_dev *dev,
  *   Pointer to the list of items.
  * @param[in] actions
  *   Pointer to the list of actions.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Pointer to the error structure.
  *
@@ -2543,14 +2547,19 @@  flow_drv_prepare(struct rte_eth_dev *dev,
 		 const struct rte_flow_attr *attr,
 		 const struct rte_flow_item items[],
 		 const struct rte_flow_action actions[],
+		 uint32_t flow_idx,
 		 struct rte_flow_error *error)
 {
 	const struct mlx5_flow_driver_ops *fops;
 	enum mlx5_flow_drv_type type = flow->drv_type;
+	struct mlx5_flow *mlx5_flow = NULL;
 
 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
 	fops = flow_get_drv_ops(type);
-	return fops->prepare(dev, attr, items, actions, error);
+	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
+	if (mlx5_flow)
+		mlx5_flow->flow_idx = flow_idx;
+	return mlx5_flow;
 }
 
 /**
@@ -3498,6 +3507,8 @@  flow_hairpin_split(struct rte_eth_dev *dev,
  *   Associated actions (list terminated by the END action).
  * @param[in] external
  *   This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @return
@@ -3511,11 +3522,13 @@  flow_create_split_inner(struct rte_eth_dev *dev,
 			const struct rte_flow_attr *attr,
 			const struct rte_flow_item items[],
 			const struct rte_flow_action actions[],
-			bool external, struct rte_flow_error *error)
+			bool external, uint32_t flow_idx,
+			struct rte_flow_error *error)
 {
 	struct mlx5_flow *dev_flow;
 
-	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, error);
+	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
+		flow_idx, error);
 	if (!dev_flow)
 		return -rte_errno;
 	dev_flow->flow = flow;
@@ -3876,6 +3889,8 @@  flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
  *   Associated actions (list terminated by the END action).
  * @param[in] external
  *   This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @return
@@ -3888,7 +3903,8 @@  flow_create_split_metadata(struct rte_eth_dev *dev,
 			   const struct rte_flow_attr *attr,
 			   const struct rte_flow_item items[],
 			   const struct rte_flow_action actions[],
-			   bool external, struct rte_flow_error *error)
+			   bool external, uint32_t flow_idx,
+			   struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_dev_config *config = &priv->config;
@@ -3908,7 +3924,7 @@  flow_create_split_metadata(struct rte_eth_dev *dev,
 	    !mlx5_flow_ext_mreg_supported(dev))
 		return flow_create_split_inner(dev, flow, NULL, prefix_layers,
 					       attr, items, actions, external,
-					       error);
+					       flow_idx, error);
 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
 							   &encap_idx);
 	if (qrss) {
@@ -3992,7 +4008,7 @@  flow_create_split_metadata(struct rte_eth_dev *dev,
 	/* Add the unmodified original or prefix subflow. */
 	ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr,
 				      items, ext_actions ? ext_actions :
-				      actions, external, error);
+				      actions, external, flow_idx, error);
 	if (ret < 0)
 		goto exit;
 	MLX5_ASSERT(dev_flow);
@@ -4055,7 +4071,7 @@  flow_create_split_metadata(struct rte_eth_dev *dev,
 		ret = flow_create_split_inner(dev, flow, &dev_flow, layers,
 					      &q_attr, mtr_sfx ? items :
 					      q_items, q_actions,
-					      external, error);
+					      external, flow_idx, error);
 		if (ret < 0)
 			goto exit;
 		/* qrss ID should be freed if failed. */
@@ -4096,6 +4112,8 @@  flow_create_split_metadata(struct rte_eth_dev *dev,
  *   Associated actions (list terminated by the END action).
  * @param[in] external
  *   This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @return
@@ -4107,7 +4125,8 @@  flow_create_split_meter(struct rte_eth_dev *dev,
 			   const struct rte_flow_attr *attr,
 			   const struct rte_flow_item items[],
 			   const struct rte_flow_action actions[],
-			   bool external, struct rte_flow_error *error)
+			   bool external, uint32_t flow_idx,
+			   struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct rte_flow_action *sfx_actions = NULL;
@@ -4151,7 +4170,7 @@  flow_create_split_meter(struct rte_eth_dev *dev,
 		/* Add the prefix subflow. */
 		ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr,
 					      items, pre_actions, external,
-					      error);
+					      flow_idx, error);
 		if (ret) {
 			ret = -rte_errno;
 			goto exit;
@@ -4168,7 +4187,7 @@  flow_create_split_meter(struct rte_eth_dev *dev,
 					 0, &sfx_attr,
 					 sfx_items ? sfx_items : items,
 					 sfx_actions ? sfx_actions : actions,
-					 external, error);
+					 external, flow_idx, error);
 exit:
 	if (sfx_actions)
 		rte_free(sfx_actions);
@@ -4205,6 +4224,8 @@  flow_create_split_meter(struct rte_eth_dev *dev,
  *   Associated actions (list terminated by the END action).
  * @param[in] external
  *   This flow rule is created by request external to PMD.
+ * @param[in] flow_idx
+ *   This memory pool index to the flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  * @return
@@ -4216,12 +4237,13 @@  flow_create_split_outer(struct rte_eth_dev *dev,
 			const struct rte_flow_attr *attr,
 			const struct rte_flow_item items[],
 			const struct rte_flow_action actions[],
-			bool external, struct rte_flow_error *error)
+			bool external, uint32_t flow_idx,
+			struct rte_flow_error *error)
 {
 	int ret;
 
 	ret = flow_create_split_meter(dev, flow, attr, items,
-					 actions, external, error);
+					 actions, external, flow_idx, error);
 	MLX5_ASSERT(ret <= 0);
 	return ret;
 }
@@ -4356,7 +4378,7 @@  flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
 		 */
 		ret = flow_create_split_outer(dev, flow, attr,
 					      buf->entry[i].pattern,
-					      p_actions_rx, external,
+					      p_actions_rx, external, idx,
 					      error);
 		if (ret < 0)
 			goto error;
@@ -4367,7 +4389,8 @@  flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
 		attr_tx.ingress = 0;
 		attr_tx.egress = 1;
 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
-					    actions_hairpin_tx.actions, error);
+					 actions_hairpin_tx.actions,
+					 idx, error);
 		if (!dev_flow)
 			goto error;
 		dev_flow->flow = flow;
@@ -5741,6 +5764,31 @@  mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
 
 #define MLX5_POOL_QUERY_FREQ_US 1000000
 
+/**
+ * Get number of all validate pools.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_ibv_shared object.
+ *
+ * @return
+ *   The number of all validate pools.
+ */
+static uint32_t
+mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)
+{
+	uint8_t age, i;
+	uint32_t pools_n = 0;
+	struct mlx5_pools_container *cont;
+
+	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
+		for (i = 0; i < 2 ; ++i) {
+			cont = MLX5_CNT_CONTAINER(sh, i, 0, age);
+			pools_n += rte_atomic16_read(&cont->n_valid);
+		}
+	}
+	return pools_n;
+}
+
 /**
  * Set the periodic procedure for triggering asynchronous batch queries for all
  * the counter pools.
@@ -5751,12 +5799,9 @@  mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
 void
 mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
 {
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
-	uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
-	uint32_t us;
+	uint32_t pools_n, us;
 
-	cont = MLX5_CNT_CONTAINER(sh, 1, 0);
-	pools_n += rte_atomic16_read(&cont->n_valid);
+	pools_n = mlx5_get_all_valid_pool_count(sh);
 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
@@ -5782,6 +5827,7 @@  mlx5_flow_query_alarm(void *arg)
 	uint16_t offset;
 	int ret;
 	uint8_t batch = sh->cmng.batch;
+	uint8_t age = sh->cmng.age;
 	uint16_t pool_index = sh->cmng.pool_index;
 	struct mlx5_pools_container *cont;
 	struct mlx5_pools_container *mcont;
@@ -5790,8 +5836,8 @@  mlx5_flow_query_alarm(void *arg)
 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
 		goto set_alarm;
 next_container:
-	cont = MLX5_CNT_CONTAINER(sh, batch, 1);
-	mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
+	cont = MLX5_CNT_CONTAINER(sh, batch, 1, age);
+	mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age);
 	/* Check if resize was done and need to flip a container. */
 	if (cont != mcont) {
 		if (cont->pools) {
@@ -5801,15 +5847,22 @@  mlx5_flow_query_alarm(void *arg)
 		}
 		rte_cio_wmb();
 		 /* Flip the host container. */
-		sh->cmng.mhi[batch] ^= (uint8_t)2;
+		sh->cmng.mhi[batch][age] ^= (uint8_t)2;
 		cont = mcont;
 	}
 	if (!cont->pools) {
 		/* 2 empty containers case is unexpected. */
-		if (unlikely(batch != sh->cmng.batch))
+		if (unlikely(batch != sh->cmng.batch) &&
+			unlikely(age != sh->cmng.age)) {
 			goto set_alarm;
+		}
 		batch ^= 0x1;
 		pool_index = 0;
+		if (batch == 0 && pool_index == 0) {
+			age ^= 0x1;
+			sh->cmng.batch = batch;
+			sh->cmng.age = age;
+		}
 		goto next_container;
 	}
 	pool = cont->pools[pool_index];
@@ -5852,13 +5905,76 @@  mlx5_flow_query_alarm(void *arg)
 	if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
 		batch ^= 0x1;
 		pool_index = 0;
+		if (batch == 0 && pool_index == 0)
+			age ^= 0x1;
 	}
 set_alarm:
 	sh->cmng.batch = batch;
 	sh->cmng.pool_index = pool_index;
+	sh->cmng.age = age;
 	mlx5_set_query_alarm(sh);
 }
 
+/**
+ * Check and callback event for new aged flow in the counter pool
+ *
+ * @param[in] pool
+ *   The pointer to Current counter pool.
+ */
+static void
+mlx5_flow_aging_check(struct mlx5_flow_counter_pool *pool)
+{
+	struct mlx5_priv *priv;
+	struct mlx5_flow_counter *cnt;
+	struct mlx5_age_param *age_param;
+	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
+	struct mlx5_counter_stats_raw *prev = pool->raw;
+	uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
+	uint64_t port_mask = 0;
+	uint32_t i;
+
+	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+		cnt = MLX5_POOL_GET_CNT(pool, i);
+		age_param = MLX5_CNT_TO_AGE(cnt);
+		if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
+			continue;
+		if (cur->data[i].hits != prev->data[i].hits) {
+			age_param->expire = curr + age_param->timeout;
+			continue;
+		}
+		if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
+			continue;
+		/**
+		 * Hold the lock first, or if between the
+		 * state AGE_TMOUT and tailq operation the
+		 * release happened, the release procedure
+		 * may delete a non-existent tailq node.
+		 */
+		priv = rte_eth_devices[age_param->port_id].data->dev_private;
+		rte_spinlock_lock(&priv->aged_sl);
+		/* If the cpmset fails, release happens. */
+		if (rte_atomic16_cmpset((volatile uint16_t *)
+					&age_param->state,
+					AGE_CANDIDATE,
+					AGE_TMOUT) ==
+					AGE_CANDIDATE) {
+			TAILQ_INSERT_TAIL(&priv->aged_counters, cnt, next);
+			port_mask |= (1ull << age_param->port_id);
+		}
+		rte_spinlock_unlock(&priv->aged_sl);
+	}
+	for (i = 0; i < 64; i++) {
+		if (port_mask & (1ull << i)) {
+			priv = rte_eth_devices[i].data->dev_private;
+			if (!rte_atomic16_read(&priv->trigger_event))
+				continue;
+			_rte_eth_dev_callback_process(&rte_eth_devices[i],
+				RTE_ETH_EVENT_FLOW_AGED, NULL);
+			rte_atomic16_set(&priv->trigger_event, 0);
+		}
+	}
+}
+
 /**
  * Handler for the HW respond about ready values from an asynchronous batch
  * query. This function is probably called by the host thread.
@@ -5883,6 +5999,8 @@  mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
 		raw_to_free = pool->raw_hw;
 	} else {
 		raw_to_free = pool->raw;
+		if (IS_AGE_POOL(pool))
+			mlx5_flow_aging_check(pool);
 		rte_spinlock_lock(&pool->sl);
 		pool->raw = pool->raw_hw;
 		rte_spinlock_unlock(&pool->sl);
@@ -6034,3 +6152,40 @@  mlx5_flow_dev_dump(struct rte_eth_dev *dev,
 	return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
 				       sh->tx_domain, file);
 }
+
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] context
+ *   The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_countexts
+ *   The length of context array pointers.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   how many contexts get in success, otherwise negative errno value.
+ *   if nb_contexts is 0, return the amount of all aged contexts.
+ *   if nb_contexts is not 0 , return the amount of aged flows reported
+ *   in the context array.
+ */
+int
+mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+			uint32_t nb_contexts, struct rte_flow_error *error)
+{
+	const struct mlx5_flow_driver_ops *fops;
+	struct rte_flow_attr attr = { .transfer = 0 };
+
+	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+		return fops->get_aged_flows(dev, contexts, nb_contexts,
+						    error);
+	}
+	DRV_LOG(ERR,
+		"port %u get aged flows is not supported.",
+		 dev->data->port_id);
+	return -ENOTSUP;
+}
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 2a1f59698c..bf1d5beb9b 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -199,6 +199,7 @@  enum mlx5_feature_name {
 #define MLX5_FLOW_ACTION_METER (1ull << 31)
 #define MLX5_FLOW_ACTION_SET_IPV4_DSCP (1ull << 32)
 #define MLX5_FLOW_ACTION_SET_IPV6_DSCP (1ull << 33)
+#define MLX5_FLOW_ACTION_AGE (1ull << 34)
 
 #define MLX5_FLOW_FATE_ACTIONS \
 	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
@@ -650,6 +651,7 @@  struct mlx5_flow_verbs_workspace {
 /** Device flow structure. */
 struct mlx5_flow {
 	struct rte_flow *flow; /**< Pointer to the main flow. */
+	uint32_t flow_idx; /**< The memory pool index to the main flow. */
 	uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
 	uint64_t act_flags;
 	/**< Bit-fields of detected actions, see MLX5_FLOW_ACTION_*. */
@@ -873,6 +875,11 @@  typedef int (*mlx5_flow_counter_query_t)(struct rte_eth_dev *dev,
 					 uint32_t cnt,
 					 bool clear, uint64_t *pkts,
 					 uint64_t *bytes);
+typedef int (*mlx5_flow_get_aged_flows_t)
+					(struct rte_eth_dev *dev,
+					 void **context,
+					 uint32_t nb_contexts,
+					 struct rte_flow_error *error);
 struct mlx5_flow_driver_ops {
 	mlx5_flow_validate_t validate;
 	mlx5_flow_prepare_t prepare;
@@ -888,13 +895,14 @@  struct mlx5_flow_driver_ops {
 	mlx5_flow_counter_alloc_t counter_alloc;
 	mlx5_flow_counter_free_t counter_free;
 	mlx5_flow_counter_query_t counter_query;
+	mlx5_flow_get_aged_flows_t get_aged_flows;
 };
 
 
-#define MLX5_CNT_CONTAINER(sh, batch, thread) (&(sh)->cmng.ccont \
-	[(((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
-#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread) (&(sh)->cmng.ccont \
-	[(~((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)])
+#define MLX5_CNT_CONTAINER(sh, batch, thread, age) (&(sh)->cmng.ccont \
+	[(((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
+#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread, age) (&(sh)->cmng.ccont \
+	[(~((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
 
 /* mlx5_flow.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 784a62c521..73a5f477f8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -24,6 +24,7 @@ 
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
 #include <rte_malloc.h>
+#include <rte_cycles.h>
 #include <rte_ip.h>
 #include <rte_gre.h>
 #include <rte_vxlan.h>
@@ -3719,6 +3720,50 @@  mlx5_flow_validate_action_meter(struct rte_eth_dev *dev,
 	return 0;
 }
 
+/**
+ * Validate the age action.
+ *
+ * @param[in] action_flags
+ *   Holds the actions detected until now.
+ * @param[in] action
+ *   Pointer to the age action.
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_age(uint64_t action_flags,
+			    const struct rte_flow_action *action,
+			    struct rte_eth_dev *dev,
+			    struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	const struct rte_flow_action_age *age = action->conf;
+
+	if (!priv->config.devx)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "age action not supported");
+	if (!(action->conf))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, action,
+					  "configuration cannot be null");
+	if (age->timeout >= UINT16_MAX / 2 / 10)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, action,
+					  "Max age time: 3275 seconds");
+	if (action_flags & MLX5_FLOW_ACTION_AGE)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "Duplicate age ctions set");
+	return 0;
+}
+
 /**
  * Validate the modify-header IPv4 DSCP actions.
  *
@@ -3896,14 +3941,16 @@  flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_pools_container *cont;
 	struct mlx5_flow_counter_pool *pool;
-	uint32_t batch = 0;
+	uint32_t batch = 0, age = 0;
 
 	idx--;
+	age = MLX_CNT_IS_AGE(idx);
+	idx = age ? idx - MLX5_CNT_AGE_OFFSET : idx;
 	if (idx >= MLX5_CNT_BATCH_OFFSET) {
 		idx -= MLX5_CNT_BATCH_OFFSET;
 		batch = 1;
 	}
-	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
 	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
 	pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
 	MLX5_ASSERT(pool);
@@ -4023,18 +4070,21 @@  flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
  *   Pointer to the Ethernet device structure.
  * @param[in] batch
  *   Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ *   Whether the pool is for Aging counter.
  *
  * @return
  *   The new container pointer on success, otherwise NULL and rte_errno is set.
  */
 static struct mlx5_pools_container *
-flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
+flow_dv_container_resize(struct rte_eth_dev *dev,
+				uint32_t batch, uint32_t age)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_pools_container *cont =
-			MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+			MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
 	struct mlx5_pools_container *new_cont =
-			MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0);
+			MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0, age);
 	struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
 	uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
 	uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
@@ -4042,7 +4092,7 @@  flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
 
 	/* Fallback mode has no background thread. Skip the check. */
 	if (!priv->counter_fallback &&
-	    cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) {
+	    cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1, age)) {
 		/* The last resize still hasn't detected by the host thread. */
 		rte_errno = EAGAIN;
 		return NULL;
@@ -4085,7 +4135,7 @@  flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
 	new_cont->init_mem_mng = mem_mng;
 	rte_cio_wmb();
 	 /* Flip the master container. */
-	priv->sh->cmng.mhi[batch] ^= (uint8_t)1;
+	priv->sh->cmng.mhi[batch][age] ^= (uint8_t)1;
 	return new_cont;
 }
 
@@ -4117,7 +4167,7 @@  _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
 	cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
 	MLX5_ASSERT(pool);
 	if (counter < MLX5_CNT_BATCH_OFFSET) {
-		cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
 		if (priv->counter_fallback)
 			return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0,
 					0, pkts, bytes, 0, NULL, NULL, 0);
@@ -4150,6 +4200,8 @@  _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
  *   The devX counter handle.
  * @param[in] batch
  *   Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ *   Whether the pool is for counter that was allocated for aging.
  * @param[in/out] cont_cur
  *   Pointer to the container pointer, it will be update in pool resize.
  *
@@ -4158,24 +4210,23 @@  _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
  */
 static struct mlx5_pools_container *
 flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
-		    uint32_t batch)
+		    uint32_t batch, uint32_t age)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool;
 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
-							       0);
+							       0, age);
 	int16_t n_valid = rte_atomic16_read(&cont->n_valid);
-	uint32_t size;
+	uint32_t size = sizeof(*pool);
 
 	if (cont->n == n_valid) {
-		cont = flow_dv_container_resize(dev, batch);
+		cont = flow_dv_container_resize(dev, batch, age);
 		if (!cont)
 			return NULL;
 	}
-	size = sizeof(*pool);
 	size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
-	if (!batch)
-		size += MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE;
+	size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
+	size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE);
 	pool = rte_calloc(__func__, 1, size, 0);
 	if (!pool) {
 		rte_errno = ENOMEM;
@@ -4187,8 +4238,8 @@  flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 						     MLX5_CNT_CONTAINER_RESIZE;
 	pool->raw_hw = NULL;
 	pool->type = 0;
-	if (!batch)
-		pool->type |= CNT_POOL_TYPE_EXT;
+	pool->type |= (batch ? 0 :  CNT_POOL_TYPE_EXT);
+	pool->type |= (!age ? 0 :  CNT_POOL_TYPE_AGE);
 	rte_spinlock_init(&pool->sl);
 	/*
 	 * The generation of the new allocated counters in this pool is 0, 2 in
@@ -4215,6 +4266,39 @@  flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 	return cont;
 }
 
+/**
+ * Update the minimum dcs-id for aged or no-aged counter pool.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] pool
+ *   Current counter pool.
+ * @param[in] batch
+ *   Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ *   Whether the counter is for aging.
+ */
+static void
+flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev,
+			struct mlx5_flow_counter_pool *pool,
+			uint32_t batch, uint32_t age)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_flow_counter_pool *other;
+	struct mlx5_pools_container *cont;
+
+	cont = MLX5_CNT_CONTAINER(priv->sh,	batch, 0, (age ^ 0x1));
+	other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id);
+	if (!other)
+		return;
+	if (pool->min_dcs->id < other->min_dcs->id) {
+		rte_atomic64_set(&other->a64_dcs,
+			rte_atomic64_read(&pool->a64_dcs));
+	} else {
+		rte_atomic64_set(&pool->a64_dcs,
+			rte_atomic64_read(&other->a64_dcs));
+	}
+}
 /**
  * Prepare a new counter and/or a new counter pool.
  *
@@ -4224,6 +4308,8 @@  flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
  *   Where to put the pointer of a new counter.
  * @param[in] batch
  *   Whether the pool is for counter that was allocated by batch command.
+ * @param[in] age
+ *   Whether the pool is for counter that was allocated for aging.
  *
  * @return
  *   The counter container pointer and @p cnt_free is set on success,
@@ -4232,7 +4318,7 @@  flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 static struct mlx5_pools_container *
 flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
 			     struct mlx5_flow_counter **cnt_free,
-			     uint32_t batch)
+			     uint32_t batch, uint32_t age)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_pools_container *cont;
@@ -4241,7 +4327,7 @@  flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
 	struct mlx5_flow_counter *cnt;
 	uint32_t i;
 
-	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
 	if (!batch) {
 		/* bulk_bitmap must be 0 for single counter allocation. */
 		dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
@@ -4249,7 +4335,7 @@  flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
 			return NULL;
 		pool = flow_dv_find_pool_by_id(cont, dcs->id);
 		if (!pool) {
-			cont = flow_dv_pool_create(dev, dcs, batch);
+			cont = flow_dv_pool_create(dev, dcs, batch, age);
 			if (!cont) {
 				mlx5_devx_cmd_destroy(dcs);
 				return NULL;
@@ -4259,6 +4345,8 @@  flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
 			rte_atomic64_set(&pool->a64_dcs,
 					 (int64_t)(uintptr_t)dcs);
 		}
+		flow_dv_counter_update_min_dcs(dev,
+						pool, batch, age);
 		i = dcs->id % MLX5_COUNTERS_PER_POOL;
 		cnt = MLX5_POOL_GET_CNT(pool, i);
 		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
@@ -4273,7 +4361,7 @@  flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
 		rte_errno = ENODATA;
 		return NULL;
 	}
-	cont = flow_dv_pool_create(dev, dcs, batch);
+	cont = flow_dv_pool_create(dev, dcs, batch, age);
 	if (!cont) {
 		mlx5_devx_cmd_destroy(dcs);
 		return NULL;
@@ -4334,13 +4422,15 @@  flow_dv_counter_shared_search(struct mlx5_pools_container *cont, uint32_t id,
  *   Counter identifier.
  * @param[in] group
  *   Counter flow group.
+ * @param[in] age
+ *   Whether the counter was allocated for aging.
  *
  * @return
  *   Index to flow counter on success, 0 otherwise and rte_errno is set.
  */
 static uint32_t
 flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
-		      uint16_t group)
+		      uint16_t group, uint32_t age)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool = NULL;
@@ -4356,7 +4446,7 @@  flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
 	 */
 	uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0;
 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
-							       0);
+							       0, age);
 	uint32_t cnt_idx;
 
 	if (!priv->config.devx) {
@@ -4395,13 +4485,13 @@  flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
 		cnt_free = NULL;
 	}
 	if (!cnt_free) {
-		cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch);
+		cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
 		if (!cont)
 			return 0;
 		pool = TAILQ_FIRST(&cont->pool_list);
 	}
 	if (!batch)
-		cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt_free);
+		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
 	/* Create a DV counter action only in the first time usage. */
 	if (!cnt_free->action) {
 		uint16_t offset;
@@ -4424,6 +4514,7 @@  flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
 	cnt_idx = MLX5_MAKE_CNT_IDX(pool->index,
 				MLX5_CNT_ARRAY_IDX(pool, cnt_free));
 	cnt_idx += batch * MLX5_CNT_BATCH_OFFSET;
+	cnt_idx += age * MLX5_CNT_AGE_OFFSET;
 	/* Update the counter reset values. */
 	if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits,
 				 &cnt_free->bytes))
@@ -4445,6 +4536,62 @@  flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
 	return cnt_idx;
 }
 
+/**
+ * Get age param from counter index.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] counter
+ *   Index to the counter handler.
+ *
+ * @return
+ *   The aging parameter specified for the counter index.
+ */
+static struct mlx5_age_param*
+flow_dv_counter_idx_get_age(struct rte_eth_dev *dev,
+				uint32_t counter)
+{
+	struct mlx5_flow_counter *cnt;
+	struct mlx5_flow_counter_pool *pool = NULL;
+
+	flow_dv_counter_get_by_idx(dev, counter, &pool);
+	counter = (counter - 1) % MLX5_COUNTERS_PER_POOL;
+	cnt = MLX5_POOL_GET_CNT(pool, counter);
+	return MLX5_CNT_TO_AGE(cnt);
+}
+
+/**
+ * Remove a flow counter from aged counter list.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] counter
+ *   Index to the counter handler.
+ * @param[in] cnt
+ *   Pointer to the counter handler.
+ */
+static void
+flow_dv_counter_remove_from_age(struct rte_eth_dev *dev,
+				uint32_t counter, struct mlx5_flow_counter *cnt)
+{
+	struct mlx5_age_param *age_param;
+	struct mlx5_priv *priv = dev->data->dev_private;
+
+	age_param = flow_dv_counter_idx_get_age(dev, counter);
+	if (rte_atomic16_cmpset((volatile uint16_t *)
+			&age_param->state,
+			AGE_CANDIDATE, AGE_FREE)
+			!= AGE_CANDIDATE) {
+		/**
+		 * We need the lock even it is age timeout,
+		 * since counter may still in process.
+		 */
+		rte_spinlock_lock(&priv->aged_sl);
+		TAILQ_REMOVE(&priv->aged_counters, cnt, next);
+		rte_spinlock_unlock(&priv->aged_sl);
+	}
+	rte_atomic16_set(&age_param->state, AGE_FREE);
+}
 /**
  * Release a flow counter.
  *
@@ -4465,10 +4612,12 @@  flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter)
 	cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
 	MLX5_ASSERT(pool);
 	if (counter < MLX5_CNT_BATCH_OFFSET) {
-		cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
 		if (cnt_ext && --cnt_ext->ref_cnt)
 			return;
 	}
+	if (IS_AGE_POOL(pool))
+		flow_dv_counter_remove_from_age(dev, counter, cnt);
 	/* Put the counter in the end - the last updated one. */
 	TAILQ_INSERT_TAIL(&pool->counters, cnt, next);
 	/*
@@ -5243,6 +5392,15 @@  flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 			/* Meter action will add one more TAG action. */
 			rw_act_num += MLX5_ACT_NUM_SET_TAG;
 			break;
+		case RTE_FLOW_ACTION_TYPE_AGE:
+			ret = flow_dv_validate_action_age(action_flags,
+							  actions, dev,
+							  error);
+			if (ret < 0)
+				return ret;
+			action_flags |= MLX5_FLOW_ACTION_AGE;
+			++actions_n;
+			break;
 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
 			ret = flow_dv_validate_action_modify_ipv4_dscp
 							 (action_flags,
@@ -7281,6 +7439,54 @@  flow_dv_translate_action_port_id(struct rte_eth_dev *dev,
 	return 0;
 }
 
+/**
+ * Create a counter with aging configuration.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[out] count
+ *   Pointer to the counter action configuration.
+ * @param[in] age
+ *   Pointer to the aging action configuration.
+ *
+ * @return
+ *   Index to flow counter on success, 0 otherwise.
+ */
+static uint32_t
+flow_dv_translate_create_counter(struct rte_eth_dev *dev,
+				struct mlx5_flow *dev_flow,
+				const struct rte_flow_action_count *count,
+				const struct rte_flow_action_age *age)
+{
+	uint32_t counter;
+	struct mlx5_age_param *age_param;
+
+	counter = flow_dv_counter_alloc(dev,
+				count ? count->shared : 0,
+				count ? count->id : 0,
+				dev_flow->dv.group, !!age);
+
+	if (!counter || age == NULL)
+		return counter;
+	age_param  = flow_dv_counter_idx_get_age(dev, counter);
+	/*
+	 * The counter age accuracy may have a bit delay. Have 3/4
+	 * second bias on the timeount in order to let it age in time.
+	 */
+	age_param->context = age->context ? age->context :
+		(void *)(uintptr_t)(dev_flow->flow_idx);
+	/*
+	 * The counter age accuracy may have a bit delay. Have 3/4
+	 * second bias on the timeount in order to let it age in time.
+	 */
+	age_param->timeout = age->timeout * 10 - 7;
+	/* Set expire time in unit of 0.1 sec. */
+	age_param->port_id = dev->data->port_id;
+	age_param->expire = age_param->timeout +
+			rte_rdtsc() / (rte_get_tsc_hz() / 10);
+	rte_atomic16_set(&age_param->state, AGE_CANDIDATE);
+	return counter;
+}
 /**
  * Add Tx queue matcher
  *
@@ -7450,6 +7656,8 @@  __flow_dv_translate(struct rte_eth_dev *dev,
 			    (MLX5_MAX_MODIFY_NUM + 1)];
 	} mhdr_dummy;
 	struct mlx5_flow_dv_modify_hdr_resource *mhdr_res = &mhdr_dummy.res;
+	const struct rte_flow_action_count *count = NULL;
+	const struct rte_flow_action_age *age = NULL;
 	union flow_dv_attr flow_attr = { .attr = 0 };
 	uint32_t tag_be;
 	union mlx5_flow_tbl_key tbl_key;
@@ -7478,7 +7686,6 @@  __flow_dv_translate(struct rte_eth_dev *dev,
 		const struct rte_flow_action_queue *queue;
 		const struct rte_flow_action_rss *rss;
 		const struct rte_flow_action *action = actions;
-		const struct rte_flow_action_count *count = action->conf;
 		const uint8_t *rss_key;
 		const struct rte_flow_action_jump *jump_data;
 		const struct rte_flow_action_meter *mtr;
@@ -7607,36 +7814,21 @@  __flow_dv_translate(struct rte_eth_dev *dev,
 			action_flags |= MLX5_FLOW_ACTION_RSS;
 			dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
 			break;
+		case RTE_FLOW_ACTION_TYPE_AGE:
 		case RTE_FLOW_ACTION_TYPE_COUNT:
 			if (!dev_conf->devx) {
-				rte_errno = ENOTSUP;
-				goto cnt_err;
-			}
-			flow->counter = flow_dv_counter_alloc(dev,
-							count->shared,
-							count->id,
-							dev_flow->dv.group);
-			if (!flow->counter)
-				goto cnt_err;
-			dev_flow->dv.actions[actions_n++] =
-				  (flow_dv_counter_get_by_idx(dev,
-				  flow->counter, NULL))->action;
-			action_flags |= MLX5_FLOW_ACTION_COUNT;
-			break;
-cnt_err:
-			if (rte_errno == ENOTSUP)
 				return rte_flow_error_set
 					      (error, ENOTSUP,
 					       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 					       NULL,
 					       "count action not supported");
+			}
+			/* Save information first, will apply later. */
+			if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT)
+				count = action->conf;
 			else
-				return rte_flow_error_set
-						(error, rte_errno,
-						 RTE_FLOW_ERROR_TYPE_ACTION,
-						 action,
-						 "cannot create counter"
-						  " object.");
+				age = action->conf;
+			action_flags |= MLX5_FLOW_ACTION_COUNT;
 			break;
 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
 			dev_flow->dv.actions[actions_n++] =
@@ -7909,6 +8101,22 @@  __flow_dv_translate(struct rte_eth_dev *dev,
 				dev_flow->dv.actions[modify_action_position] =
 					handle->dvh.modify_hdr->verbs_action;
 			}
+			if (action_flags & MLX5_FLOW_ACTION_COUNT) {
+				flow->counter =
+					flow_dv_translate_create_counter(dev,
+						dev_flow, count, age);
+
+				if (!flow->counter)
+					return rte_flow_error_set
+						(error, rte_errno,
+						RTE_FLOW_ERROR_TYPE_ACTION,
+						NULL,
+						"cannot create counter"
+						" object.");
+				dev_flow->dv.actions[actions_n++] =
+					  (flow_dv_counter_get_by_idx(dev,
+					  flow->counter, NULL))->action;
+			}
 			break;
 		default:
 			break;
@@ -9169,6 +9377,58 @@  flow_dv_counter_query(struct rte_eth_dev *dev, uint32_t counter, bool clear,
 	return 0;
 }
 
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] context
+ *   The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_contexts
+ *   The length of context array pointers.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   how many contexts get in success, otherwise negative errno value.
+ *   if nb_contexts is 0, return the amount of all aged contexts.
+ *   if nb_contexts is not 0 , return the amount of aged flows reported
+ *   in the context array.
+ * @note: only stub for now
+ */
+static int
+flow_get_aged_flows(struct rte_eth_dev *dev,
+		    void **context,
+		    uint32_t nb_contexts,
+		    struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_age_param *age_param;
+	struct mlx5_flow_counter *counter;
+	int nb_flows = 0;
+
+	if (nb_contexts && !context)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "Should assign at least one flow or"
+					  " context to get if nb_contexts != 0");
+	rte_spinlock_lock(&priv->aged_sl);
+	TAILQ_FOREACH(counter, &priv->aged_counters, next) {
+		nb_flows++;
+		if (nb_contexts) {
+			age_param = MLX5_CNT_TO_AGE(counter);
+			context[nb_flows - 1] = age_param->context;
+			if (!(--nb_contexts))
+				break;
+		}
+	}
+	rte_spinlock_unlock(&priv->aged_sl);
+	rte_atomic16_set(&priv->trigger_event, 1);
+	return nb_flows;
+}
+
 /*
  * Mutex-protected thunk to lock-free  __flow_dv_translate().
  */
@@ -9235,7 +9495,7 @@  flow_dv_counter_allocate(struct rte_eth_dev *dev)
 	uint32_t cnt;
 
 	flow_dv_shared_lock(dev);
-	cnt = flow_dv_counter_alloc(dev, 0, 0, 1);
+	cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0);
 	flow_dv_shared_unlock(dev);
 	return cnt;
 }
@@ -9266,6 +9526,7 @@  const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
 	.counter_alloc = flow_dv_counter_allocate,
 	.counter_free = flow_dv_counter_free,
 	.counter_query = flow_dv_counter_query,
+	.get_aged_flows = flow_get_aged_flows,
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 236d665852..7efd97f547 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -56,7 +56,8 @@  flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
 			      struct mlx5_flow_counter_pool **ppool)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
+	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
+									0);
 	struct mlx5_flow_counter_pool *pool;
 
 	idx--;
@@ -151,7 +152,8 @@  static uint32_t
 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
+	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
+									0);
 	struct mlx5_flow_counter_pool *pool = NULL;
 	struct mlx5_flow_counter_ext *cnt_ext = NULL;
 	struct mlx5_flow_counter *cnt = NULL;
@@ -251,7 +253,7 @@  flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
 
 	cnt = flow_verbs_counter_get_by_idx(dev, counter,
 					    &pool);
-	cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
 	if (--cnt_ext->ref_cnt == 0) {
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
 		claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
@@ -282,7 +284,7 @@  flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
 		struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
 						(dev, flow->counter, &pool);
 		struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
-						(cnt);
+						(pool, cnt);
 		struct rte_flow_query_count *qc = data;
 		uint64_t counters[2] = {0, 0};
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
@@ -1083,12 +1085,12 @@  flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
 	}
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
-	cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
 	counter.counter_set_handle = cnt_ext->cs->handle;
 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
 	cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
-	cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt);
+	cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
 	counter.counters = cnt_ext->cs;
 	flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
 #endif