From patchwork Mon Apr 13 14:53:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bill Zhou X-Patchwork-Id: 68273 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 23CC5A0577; Mon, 13 Apr 2020 16:53:46 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 7F3A41B53; Mon, 13 Apr 2020 16:53:44 +0200 (CEST) Received: from git-send-mailer.rdmz.labs.mlnx (unknown [37.142.13.130]) by dpdk.org (Postfix) with ESMTP id 373A2F12 for ; Mon, 13 Apr 2020 16:53:43 +0200 (CEST) From: Dong Zhou To: matan@mellanox.com, dongz@mellanox.com, orika@mellanox.com, shahafs@mellanox.com, viacheslavo@mellanox.com, john.mcnamara@intel.com, marko.kovacevic@intel.com Cc: dev@dpdk.org Date: Mon, 13 Apr 2020 17:53:41 +0300 Message-Id: <20200413145342.2212-2-dongz@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200413145342.2212-1-dongz@mellanox.com> References: <20200316125205.22169-1-dongz@mellanox.com> <20200413145342.2212-1-dongz@mellanox.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH 1/2] net/mlx5: modify ext-counter memory allocation X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Currently, the counter pool needs 512 ext-counter memory for no batch counters, it's allocated separately by once, behind the 512 basic-counter memory. This is not easy to get ext-counter pointer by corresponding basic-counter pointer. This is also no easy for expanding some other potential additional type of counter memory. So, need allocate every one of ext-counter and basic-counter together, as a single piece of memory. It's will be same for further additional type of counter memory. In this case, one piece of memory contains all type of memory for one counter, it's easy to get each type memory by using offsetting. Signed-off-by: Dong Zhou --- drivers/net/mlx5/mlx5.c | 4 ++-- drivers/net/mlx5/mlx5.h | 21 ++++++++++++++++----- drivers/net/mlx5/mlx5_flow_dv.c | 27 +++++++++++++++------------ drivers/net/mlx5/mlx5_flow_verbs.c | 16 ++++++++-------- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 293d316413..3d21cffbd0 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -390,10 +390,10 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh) (mlx5_devx_cmd_destroy(pool->min_dcs)); } for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) { - if (pool->counters_raw[j].action) + if (MLX5_POOL_GET_CNT(pool, j)->action) claim_zero (mlx5_glue->destroy_flow_action - (pool->counters_raw[j].action)); + (MLX5_POOL_GET_CNT(pool, j)->action)); if (!batch && MLX5_GET_POOL_CNT_EXT (pool, j)->dcs) claim_zero(mlx5_devx_cmd_destroy diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index fccfe47341..2e8c745c06 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -240,6 +240,18 @@ struct mlx5_drop { #define MLX5_COUNTERS_PER_POOL 512 #define MLX5_MAX_PENDING_QUERIES 4 #define MLX5_CNT_CONTAINER_RESIZE 64 +#define CNT_SIZE (sizeof(struct mlx5_flow_counter)) +#define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext)) + +#define CNT_POOL_TYPE_EXT (1 << 0) +#define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT) +#define MLX5_CNT_LEN(pool) \ + (CNT_SIZE + (IS_EXT_POOL((pool)) ? CNTEXT_SIZE : 0)) +#define MLX5_POOL_GET_CNT(pool, index) \ + ((struct mlx5_flow_counter *) \ + ((char *)((pool) + 1) + (index) * (MLX5_CNT_LEN(pool)))) +#define MLX5_CNT_ARRAY_IDX(pool, cnt) \ + ((int)(((char *)(cnt) - (char *)((pool) + 1)) / MLX5_CNT_LEN((pool)))) \ /* * The pool index and offset of counter in the pool array makes up the * counter index. In case the counter is from pool 0 and offset 0, it @@ -248,11 +260,10 @@ struct mlx5_drop { */ #define MLX5_MAKE_CNT_IDX(pi, offset) \ ((pi) * MLX5_COUNTERS_PER_POOL + (offset) + 1) -#define MLX5_CNT_TO_CNT_EXT(pool, cnt) (&((struct mlx5_flow_counter_ext *) \ - ((pool) + 1))[((cnt) - (pool)->counters_raw)]) +#define MLX5_CNT_TO_CNT_EXT(cnt) \ + ((struct mlx5_flow_counter_ext *)((cnt) + 1)) #define MLX5_GET_POOL_CNT_EXT(pool, offset) \ - (&((struct mlx5_flow_counter_ext *) \ - ((pool) + 1))[offset]) + MLX5_CNT_TO_CNT_EXT(MLX5_POOL_GET_CNT((pool), (offset))) struct mlx5_flow_counter_pool; @@ -305,10 +316,10 @@ struct mlx5_flow_counter_pool { rte_atomic64_t start_query_gen; /* Query start round. */ rte_atomic64_t end_query_gen; /* Query end round. */ uint32_t index; /* Pool index in container. */ + uint32_t type: 2; rte_spinlock_t sl; /* The pool lock. */ struct mlx5_counter_stats_raw *raw; struct mlx5_counter_stats_raw *raw_hw; /* The raw on HW working. */ - struct mlx5_flow_counter counters_raw[MLX5_COUNTERS_PER_POOL]; /* The pool counters memory. */ }; diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 18ea577f8c..aa8a774f77 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -3854,7 +3854,7 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev, MLX5_ASSERT(pool); if (ppool) *ppool = pool; - return &pool->counters_raw[idx % MLX5_COUNTERS_PER_POOL]; + return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL); } /** @@ -4062,7 +4062,7 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts, cnt = flow_dv_counter_get_by_idx(dev, counter, &pool); MLX5_ASSERT(pool); if (counter < MLX5_CNT_BATCH_OFFSET) { - cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); if (priv->counter_fallback) return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0, 0, pkts, bytes, 0, NULL, NULL, 0); @@ -4078,7 +4078,7 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts, *pkts = 0; *bytes = 0; } else { - offset = cnt - &pool->counters_raw[0]; + offset = MLX5_CNT_ARRAY_IDX(pool, cnt); *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits); *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes); } @@ -4118,9 +4118,9 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, return NULL; } size = sizeof(*pool); + size += MLX5_COUNTERS_PER_POOL * CNT_SIZE; if (!batch) - size += MLX5_COUNTERS_PER_POOL * - sizeof(struct mlx5_flow_counter_ext); + size += MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE; pool = rte_calloc(__func__, 1, size, 0); if (!pool) { rte_errno = ENOMEM; @@ -4131,6 +4131,9 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, pool->raw = cont->init_mem_mng->raws + n_valid % MLX5_CNT_CONTAINER_RESIZE; pool->raw_hw = NULL; + pool->type = 0; + if (!batch) + pool->type |= CNT_POOL_TYPE_EXT; rte_spinlock_init(&pool->sl); /* * The generation of the new allocated counters in this pool is 0, 2 in @@ -4202,7 +4205,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, (int64_t)(uintptr_t)dcs); } i = dcs->id % MLX5_COUNTERS_PER_POOL; - cnt = &pool->counters_raw[i]; + cnt = MLX5_POOL_GET_CNT(pool, i); TAILQ_INSERT_HEAD(&pool->counters, cnt, next); MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs; *cnt_free = cnt; @@ -4222,10 +4225,10 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, } pool = TAILQ_FIRST(&cont->pool_list); for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { - cnt = &pool->counters_raw[i]; + cnt = MLX5_POOL_GET_CNT(pool, i); TAILQ_INSERT_HEAD(&pool->counters, cnt, next); } - *cnt_free = &pool->counters_raw[0]; + *cnt_free = MLX5_POOL_GET_CNT(pool, 0); return cont; } @@ -4343,14 +4346,14 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, pool = TAILQ_FIRST(&cont->pool_list); } if (!batch) - cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free); + cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt_free); /* Create a DV counter action only in the first time usage. */ if (!cnt_free->action) { uint16_t offset; struct mlx5_devx_obj *dcs; if (batch) { - offset = cnt_free - &pool->counters_raw[0]; + offset = MLX5_CNT_ARRAY_IDX(pool, cnt_free); dcs = pool->min_dcs; } else { offset = 0; @@ -4364,7 +4367,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, } } cnt_idx = MLX5_MAKE_CNT_IDX(pool->index, - (cnt_free - pool->counters_raw)); + MLX5_CNT_ARRAY_IDX(pool, cnt_free)); cnt_idx += batch * MLX5_CNT_BATCH_OFFSET; /* Update the counter reset values. */ if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits, @@ -4407,7 +4410,7 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter) cnt = flow_dv_counter_get_by_idx(dev, counter, &pool); MLX5_ASSERT(pool); if (counter < MLX5_CNT_BATCH_OFFSET) { - cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); if (cnt_ext && --cnt_ext->ref_cnt) return; } diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index ef4d7a3620..1a5c880221 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -64,7 +64,7 @@ flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev, MLX5_ASSERT(pool); if (ppool) *ppool = pool; - return &pool->counters_raw[idx % MLX5_COUNTERS_PER_POOL]; + return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL); } /** @@ -207,16 +207,16 @@ flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) if (!pool) return 0; for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { - cnt = &pool->counters_raw[i]; + cnt = MLX5_POOL_GET_CNT(pool, i); TAILQ_INSERT_HEAD(&pool->counters, cnt, next); } - cnt = &pool->counters_raw[0]; + cnt = MLX5_POOL_GET_CNT(pool, 0); cont->pools[n_valid] = pool; pool_idx = n_valid; rte_atomic16_add(&cont->n_valid, 1); TAILQ_INSERT_HEAD(&cont->pool_list, pool, next); } - i = cnt - pool->counters_raw; + i = MLX5_CNT_ARRAY_IDX(pool, cnt); cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i); cnt_ext->id = id; cnt_ext->shared = shared; @@ -251,7 +251,7 @@ flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter) cnt = flow_verbs_counter_get_by_idx(dev, counter, &pool); - cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); if (--cnt_ext->ref_cnt == 0) { #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs)); @@ -282,7 +282,7 @@ flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused, struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx (dev, flow->counter, &pool); struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT - (pool, cnt); + (cnt); struct rte_flow_query_count *qc = data; uint64_t counters[2] = {0, 0}; #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) @@ -1090,12 +1090,12 @@ flow_verbs_translate_action_count(struct mlx5_flow *dev_flow, } #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool); - cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); counter.counter_set_handle = cnt_ext->cs->handle; flow_verbs_spec_add(&dev_flow->verbs, &counter, size); #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool); - cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); counter.counters = cnt_ext->cs; flow_verbs_spec_add(&dev_flow->verbs, &counter, size); #endif From patchwork Mon Apr 13 14:53:42 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bill Zhou X-Patchwork-Id: 68275 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id C8D7EA0577; Mon, 13 Apr 2020 16:54:02 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 18189199BC; Mon, 13 Apr 2020 16:53:47 +0200 (CEST) Received: from git-send-mailer.rdmz.labs.mlnx (unknown [37.142.13.130]) by dpdk.org (Postfix) with ESMTP id 3EFAA293C for ; Mon, 13 Apr 2020 16:53:43 +0200 (CEST) From: Dong Zhou To: matan@mellanox.com, dongz@mellanox.com, orika@mellanox.com, shahafs@mellanox.com, viacheslavo@mellanox.com, john.mcnamara@intel.com, marko.kovacevic@intel.com Cc: dev@dpdk.org Date: Mon, 13 Apr 2020 17:53:42 +0300 Message-Id: <20200413145342.2212-3-dongz@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200413145342.2212-1-dongz@mellanox.com> References: <20200316125205.22169-1-dongz@mellanox.com> <20200413145342.2212-1-dongz@mellanox.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH 2/2] net/mlx5: support flow aging X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Currently, there is no flow aging check and age-out event callback mechanism for mlx5 driver, this patch implements it. It's included: - Splitting the current counter container to aged or no-aged container since reducing memory consumption. Aged container will allocate extra memory to save the aging parameter from user configuration. - Aging check and age-out event callback mechanism based on current counter. When a flow be checked aged-out, RTE_ETH_EVENT_FLOW_AGED event will be triggered to applications. - Implement the new API: rte_flow_get_aged_flows, applications can use this API to get aged flows. Signed-off-by: Dong Zhou --- doc/guides/rel_notes/release_20_05.rst | 1 + drivers/net/mlx5/mlx5.c | 30 ++- drivers/net/mlx5/mlx5.h | 46 +++- drivers/net/mlx5/mlx5_flow.c | 147 ++++++++++- drivers/net/mlx5/mlx5_flow.h | 15 +- drivers/net/mlx5/mlx5_flow_dv.c | 321 +++++++++++++++++++++---- drivers/net/mlx5/mlx5_flow_verbs.c | 14 +- 7 files changed, 494 insertions(+), 80 deletions(-) diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst index 6b3cd8cda7..51f79019c1 100644 --- a/doc/guides/rel_notes/release_20_05.rst +++ b/doc/guides/rel_notes/release_20_05.rst @@ -63,6 +63,7 @@ New Features * Added support for matching on IPv4 Time To Live and IPv6 Hop Limit. * Added support for creating Relaxed Ordering Memory Regions. * Added support for jumbo frame size (9K MTU) in Multi-Packet RQ mode. + * Added support for flow aging based on hardware counter. * **Updated the Intel ice driver.** diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 3d21cffbd0..bb99166511 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -331,11 +331,16 @@ mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id) static void mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh) { - uint8_t i; + uint8_t i, age; TAILQ_INIT(&sh->cmng.flow_counters); - for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) - TAILQ_INIT(&sh->cmng.ccont[i].pool_list); + for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) { + for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) + TAILQ_INIT(&sh->cmng.ccont[i][age].pool_list); + } + sh->cmng.age = 0; + TAILQ_INIT(&sh->cmng.aged_counters); + rte_spinlock_init(&sh->cmng.aged_sl); } /** @@ -365,7 +370,7 @@ static void mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh) { struct mlx5_counter_stats_mem_mng *mng; - uint8_t i; + uint8_t i, age = 0; int j; int retries = 1024; @@ -376,13 +381,14 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh) break; rte_pause(); } +age_again: for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) { struct mlx5_flow_counter_pool *pool; uint32_t batch = !!(i % 2); - if (!sh->cmng.ccont[i].pools) + if (!sh->cmng.ccont[i][age].pools) continue; - pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list); + pool = TAILQ_FIRST(&sh->cmng.ccont[i][age].pool_list); while (pool) { if (batch) { if (pool->min_dcs) @@ -400,12 +406,16 @@ mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh) (MLX5_GET_POOL_CNT_EXT (pool, j)->dcs)); } - TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool, - next); + TAILQ_REMOVE(&sh->cmng.ccont[i][age].pool_list, + pool, next); rte_free(pool); - pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list); + pool = TAILQ_FIRST(&sh->cmng.ccont[i][age].pool_list); } - rte_free(sh->cmng.ccont[i].pools); + rte_free(sh->cmng.ccont[i][age].pools); + } + if (!age) { + age = 1; + goto age_again; } mng = LIST_FIRST(&sh->cmng.mem_mngs); while (mng) { diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 2e8c745c06..03a5b5a7c5 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -240,13 +240,21 @@ struct mlx5_drop { #define MLX5_COUNTERS_PER_POOL 512 #define MLX5_MAX_PENDING_QUERIES 4 #define MLX5_CNT_CONTAINER_RESIZE 64 +#define MLX5_CNT_AGE_OFFSET 0x80000000 #define CNT_SIZE (sizeof(struct mlx5_flow_counter)) #define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext)) +#define AGE_SIZE (sizeof(struct mlx5_age_param)) #define CNT_POOL_TYPE_EXT (1 << 0) +#define CNT_POOL_TYPE_AGE (1 << 1) #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT) +#define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE) +#define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1 : 0) + #define MLX5_CNT_LEN(pool) \ - (CNT_SIZE + (IS_EXT_POOL((pool)) ? CNTEXT_SIZE : 0)) + (CNT_SIZE + \ + (IS_AGE_POOL((pool)) ? AGE_SIZE : 0) + \ + (IS_EXT_POOL((pool)) ? CNTEXT_SIZE : 0)) #define MLX5_POOL_GET_CNT(pool, index) \ ((struct mlx5_flow_counter *) \ ((char *)((pool) + 1) + (index) * (MLX5_CNT_LEN(pool)))) @@ -260,13 +268,33 @@ struct mlx5_drop { */ #define MLX5_MAKE_CNT_IDX(pi, offset) \ ((pi) * MLX5_COUNTERS_PER_POOL + (offset) + 1) -#define MLX5_CNT_TO_CNT_EXT(cnt) \ - ((struct mlx5_flow_counter_ext *)((cnt) + 1)) +#define MLX5_CNT_TO_CNT_EXT(pool, cnt) \ + ((struct mlx5_flow_counter_ext *)\ + ((char *)((cnt) + 1) + \ + (IS_AGE_POOL(pool) ? AGE_SIZE : 0))) #define MLX5_GET_POOL_CNT_EXT(pool, offset) \ - MLX5_CNT_TO_CNT_EXT(MLX5_POOL_GET_CNT((pool), (offset))) + MLX5_CNT_TO_CNT_EXT(pool, MLX5_POOL_GET_CNT((pool), (offset))) +#define MLX5_CNT_TO_AGE(cnt) \ + ((struct mlx5_age_param *)((cnt) + 1)) struct mlx5_flow_counter_pool; +/*age status*/ +enum { + AGE_FREE, + AGE_CANDIDATE, /* Counter assigned to flows. */ + AGE_TMOUT, /* Timeout, wait for aged flows query and destroy. */ +}; + +/* Counter age parameter. */ +struct mlx5_age_param { + rte_atomic16_t state; /**< Age state. */ + uint32_t timeout:15; /**< Age timeout in unit of 0.1sec. */ + uint32_t expire:16; /**< Expire time(0.1sec) in the future. */ + uint16_t port_id; /**< Port id of the counter. */ + void *context; /**< Flow counter age context. */ +}; + struct flow_counter_stats { uint64_t hits; uint64_t bytes; @@ -355,13 +383,15 @@ struct mlx5_pools_container { /* Counter global management structure. */ struct mlx5_flow_counter_mng { - uint8_t mhi[2]; /* master \ host container index. */ - struct mlx5_pools_container ccont[2 * 2]; - /* 2 containers for single and for batch for double-buffer. */ + uint8_t mhi[2][2]; /* master \ host container index. */ + struct mlx5_pools_container ccont[2 * 2][2]; + struct mlx5_counters aged_counters; /* Aged flow counter list. */ + rte_spinlock_t aged_sl; /* Aged flow counter list lock. */ struct mlx5_counters flow_counters; /* Legacy flow counter list. */ uint8_t pending_queries; uint8_t batch; uint16_t pool_index; + uint8_t age; uint8_t query_thread_on; LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs; LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws; @@ -792,6 +822,8 @@ int mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, bool clear, uint64_t *pkts, uint64_t *bytes); int mlx5_flow_dev_dump(struct rte_eth_dev *dev, FILE *file, struct rte_flow_error *error); +int mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, + uint32_t nb_contexts, struct rte_flow_error *error); /* mlx5_mp.c */ void mlx5_mp_req_start_rxtx(struct rte_eth_dev *dev); diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index c44bc1f526..58d6b8a9c5 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -242,6 +243,7 @@ static const struct rte_flow_ops mlx5_flow_ops = { .isolate = mlx5_flow_isolate, .query = mlx5_flow_query, .dev_dump = mlx5_flow_dev_dump, + .get_aged_flows = mlx5_flow_get_aged_flows, }; /* Convert FDIR request to Generic flow. */ @@ -5586,6 +5588,31 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, #define MLX5_POOL_QUERY_FREQ_US 1000000 +/** + * Get number of all validate pools. + * + * @param[in] sh + * Pointer to mlx5_ibv_shared object. + * + * @return + * The number of all validate pools. + */ +static uint32_t +mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh) +{ + uint8_t age, i; + uint32_t pools_n = 0; + struct mlx5_pools_container *cont; + + for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) { + for (i = 0; i < 2 ; i++) { + cont = MLX5_CNT_CONTAINER(sh, i, 0, age); + pools_n += rte_atomic16_read(&cont->n_valid); + } + } + return pools_n; +} + /** * Set the periodic procedure for triggering asynchronous batch queries for all * the counter pools. @@ -5596,12 +5623,9 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, void mlx5_set_query_alarm(struct mlx5_ibv_shared *sh) { - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0); - uint32_t pools_n = rte_atomic16_read(&cont->n_valid); - uint32_t us; + uint32_t pools_n, us; - cont = MLX5_CNT_CONTAINER(sh, 1, 0); - pools_n += rte_atomic16_read(&cont->n_valid); + pools_n = mlx5_get_all_valid_pool_count(sh); us = MLX5_POOL_QUERY_FREQ_US / pools_n; DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { @@ -5627,6 +5651,7 @@ mlx5_flow_query_alarm(void *arg) uint16_t offset; int ret; uint8_t batch = sh->cmng.batch; + uint8_t age = sh->cmng.age; uint16_t pool_index = sh->cmng.pool_index; struct mlx5_pools_container *cont; struct mlx5_pools_container *mcont; @@ -5635,8 +5660,8 @@ mlx5_flow_query_alarm(void *arg) if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) goto set_alarm; next_container: - cont = MLX5_CNT_CONTAINER(sh, batch, 1); - mcont = MLX5_CNT_CONTAINER(sh, batch, 0); + cont = MLX5_CNT_CONTAINER(sh, batch, 1, age); + mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age); /* Check if resize was done and need to flip a container. */ if (cont != mcont) { if (cont->pools) { @@ -5646,15 +5671,22 @@ mlx5_flow_query_alarm(void *arg) } rte_cio_wmb(); /* Flip the host container. */ - sh->cmng.mhi[batch] ^= (uint8_t)2; + sh->cmng.mhi[batch][age] ^= (uint8_t)2; cont = mcont; } if (!cont->pools) { /* 2 empty containers case is unexpected. */ - if (unlikely(batch != sh->cmng.batch)) + if (unlikely(batch != sh->cmng.batch) && + unlikely(age != sh->cmng.age)) { goto set_alarm; + } batch ^= 0x1; pool_index = 0; + if (batch == 0 && pool_index == 0) { + age ^= 0x1; + sh->cmng.batch = batch; + sh->cmng.age = age; + } goto next_container; } pool = cont->pools[pool_index]; @@ -5697,13 +5729,65 @@ mlx5_flow_query_alarm(void *arg) if (pool_index >= rte_atomic16_read(&cont->n_valid)) { batch ^= 0x1; pool_index = 0; + if (batch == 0 && pool_index == 0) + age ^= 0x1; } set_alarm: sh->cmng.batch = batch; sh->cmng.pool_index = pool_index; + sh->cmng.age = age; mlx5_set_query_alarm(sh); } +static void +mlx5_flow_aging_check(struct mlx5_ibv_shared *sh, + struct mlx5_flow_counter_pool *pool) +{ + struct mlx5_flow_counter *cnt; + struct mlx5_age_param *age_param; + struct mlx5_counter_stats_raw *cur = pool->raw_hw; + struct mlx5_counter_stats_raw *prev = pool->raw; + uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10); + uint64_t port_mask = 0; + uint32_t i; + + for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { + cnt = MLX5_POOL_GET_CNT(pool, i); + age_param = MLX5_CNT_TO_AGE(cnt); + if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE) + continue; + if (cur->data[i].hits != prev->data[i].hits) { + age_param->expire = curr + age_param->timeout; + continue; + } + if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2)) + continue; + /** + * Hold the lock first, or if between the + * state AGE_TMOUT and tailq operation the + * release happened, the release procedure + * may delete a non-existent tailq node. + */ + rte_spinlock_lock(&sh->cmng.aged_sl); + /* If the cpmset fails, release happens. */ + if (rte_atomic16_cmpset((volatile uint16_t *) + &age_param->state, + AGE_CANDIDATE, + AGE_TMOUT) == + AGE_CANDIDATE) { + TAILQ_INSERT_TAIL(&sh->cmng.aged_counters, cnt, next); + port_mask |= (1 << age_param->port_id); + } + rte_spinlock_unlock(&sh->cmng.aged_sl); + } + + for (i = 0; i < 64; i++) { + if (port_mask & (1ull << i)) + _rte_eth_dev_callback_process(&rte_eth_devices[i], + RTE_ETH_EVENT_FLOW_AGED, NULL); + } +} + /** * Handler for the HW respond about ready values from an asynchronous batch * query. This function is probably called by the host thread. @@ -5728,6 +5812,14 @@ mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh, raw_to_free = pool->raw_hw; } else { raw_to_free = pool->raw; + /** + * The the registered flow aged callback in age trigger + * function may hold the pool spinlock in case concurrent + * access to the aged flows tailq. So put the age trigger + * call out of the pool spinlock to avoid deadlock. + */ + if (IS_AGE_POOL(pool)) + mlx5_flow_aging_check(sh, pool); rte_spinlock_lock(&pool->sl); pool->raw = pool->raw_hw; rte_spinlock_unlock(&pool->sl); @@ -5876,3 +5968,40 @@ mlx5_flow_dev_dump(struct rte_eth_dev *dev, return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, sh->tx_domain, file); } + +/** + * Get aged-out flows. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] context + * The address of an array of pointers to the aged-out flows contexts. + * @param[in] nb_countexts + * The length of context array pointers. + * @param[out] error + * Perform verbose error reporting if not NULL. Initialized in case of + * error only. + * + * @return + * how many contexts get in success, otherwise negative errno value. + * if nb_contexts is 0, return the amount of all aged contexts. + * if nb_contexts is not 0 , return the amount of aged flows reported + * in the context array. + */ +int +mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, + uint32_t nb_contexts, struct rte_flow_error *error) +{ + const struct mlx5_flow_driver_ops *fops; + struct rte_flow_attr attr = { .transfer = 0 }; + + if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { + fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); + return fops->get_aged_flows(dev, contexts, nb_contexts, + error); + } + DRV_LOG(ERR, + "port %u get aged flows is not supported.", + dev->data->port_id); + return -ENOTSUP; +} diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index daa1f84145..eb6dff204f 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -199,6 +199,7 @@ enum mlx5_feature_name { #define MLX5_FLOW_ACTION_METER (1ull << 31) #define MLX5_FLOW_ACTION_SET_IPV4_DSCP (1ull << 32) #define MLX5_FLOW_ACTION_SET_IPV6_DSCP (1ull << 33) +#define MLX5_FLOW_ACTION_AGE (1ull << 34) #define MLX5_FLOW_FATE_ACTIONS \ (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \ @@ -788,6 +789,11 @@ typedef int (*mlx5_flow_counter_query_t)(struct rte_eth_dev *dev, uint32_t cnt, bool clear, uint64_t *pkts, uint64_t *bytes); +typedef int (*mlx5_flow_get_aged_flows_t) + (struct rte_eth_dev *dev, + void **context, + uint32_t nb_contexts, + struct rte_flow_error *error); struct mlx5_flow_driver_ops { mlx5_flow_validate_t validate; mlx5_flow_prepare_t prepare; @@ -803,13 +809,14 @@ struct mlx5_flow_driver_ops { mlx5_flow_counter_alloc_t counter_alloc; mlx5_flow_counter_free_t counter_free; mlx5_flow_counter_query_t counter_query; + mlx5_flow_get_aged_flows_t get_aged_flows; }; -#define MLX5_CNT_CONTAINER(sh, batch, thread) (&(sh)->cmng.ccont \ - [(((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)]) -#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread) (&(sh)->cmng.ccont \ - [(~((sh)->cmng.mhi[batch] >> (thread)) & 0x1) * 2 + (batch)]) +#define MLX5_CNT_CONTAINER(sh, batch, thread, age) (&(sh)->cmng.ccont \ + [(((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age]) +#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread, age) (&(sh)->cmng.ccont \ + [(~((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age]) /* mlx5_flow.c */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index aa8a774f77..5ec6de08bd 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -3664,6 +3665,50 @@ mlx5_flow_validate_action_meter(struct rte_eth_dev *dev, return 0; } +/** + * Validate the age action. + * + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] action + * Pointer to the age action. + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_action_age(uint64_t action_flags, + const struct rte_flow_action *action, + struct rte_eth_dev *dev, + struct rte_flow_error *error) +{ + struct mlx5_priv *priv = dev->data->dev_private; + const struct rte_flow_action_age *age = action->conf; + + if (!priv->config.devx) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "age action not supported"); + if (!(action->conf)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "configuration cannot be null"); + if (age->timeout >= UINT16_MAX / 2 / 10) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "Max age time: 3270 seconds"); + if (action_flags & MLX5_FLOW_ACTION_AGE) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "Duplicate age ctions set"); + return 0; +} + /** * Validate the modify-header IPv4 DSCP actions. * @@ -3841,14 +3886,16 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev, struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_pools_container *cont; struct mlx5_flow_counter_pool *pool; - uint32_t batch = 0; + uint32_t batch = 0, age = 0; idx--; + age = MLX_CNT_IS_AGE(idx); + idx = age ? idx - MLX5_CNT_AGE_OFFSET : idx; if (idx >= MLX5_CNT_BATCH_OFFSET) { idx -= MLX5_CNT_BATCH_OFFSET; batch = 1; } - cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0); + cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age); MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n); pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL]; MLX5_ASSERT(pool); @@ -3968,18 +4015,21 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) * Pointer to the Ethernet device structure. * @param[in] batch * Whether the pool is for counter that was allocated by batch command. + * @param[in] age + * Whether the pool is for Aging counter. * * @return * The new container pointer on success, otherwise NULL and rte_errno is set. */ static struct mlx5_pools_container * -flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch) +flow_dv_container_resize(struct rte_eth_dev *dev, + uint32_t batch, uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_pools_container *cont = - MLX5_CNT_CONTAINER(priv->sh, batch, 0); + MLX5_CNT_CONTAINER(priv->sh, batch, 0, age); struct mlx5_pools_container *new_cont = - MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0); + MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0, age); struct mlx5_counter_stats_mem_mng *mem_mng = NULL; uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE; uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize; @@ -3987,7 +4037,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch) /* Fallback mode has no background thread. Skip the check. */ if (!priv->counter_fallback && - cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) { + cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1, age)) { /* The last resize still hasn't detected by the host thread. */ rte_errno = EAGAIN; return NULL; @@ -4030,7 +4080,7 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch) new_cont->init_mem_mng = mem_mng; rte_cio_wmb(); /* Flip the master container. */ - priv->sh->cmng.mhi[batch] ^= (uint8_t)1; + priv->sh->cmng.mhi[batch][age] ^= (uint8_t)1; return new_cont; } @@ -4062,7 +4112,7 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts, cnt = flow_dv_counter_get_by_idx(dev, counter, &pool); MLX5_ASSERT(pool); if (counter < MLX5_CNT_BATCH_OFFSET) { - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); if (priv->counter_fallback) return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0, 0, pkts, bytes, 0, NULL, NULL, 0); @@ -4103,17 +4153,17 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts, */ static struct mlx5_pools_container * flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, - uint32_t batch) + uint32_t batch, uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool; struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, - 0); + 0, age); int16_t n_valid = rte_atomic16_read(&cont->n_valid); uint32_t size; if (cont->n == n_valid) { - cont = flow_dv_container_resize(dev, batch); + cont = flow_dv_container_resize(dev, batch, age); if (!cont) return NULL; } @@ -4121,6 +4171,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, size += MLX5_COUNTERS_PER_POOL * CNT_SIZE; if (!batch) size += MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE; + if (age) + size += MLX5_COUNTERS_PER_POOL * AGE_SIZE; pool = rte_calloc(__func__, 1, size, 0); if (!pool) { rte_errno = ENOMEM; @@ -4134,6 +4186,8 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, pool->type = 0; if (!batch) pool->type |= CNT_POOL_TYPE_EXT; + if (age) + pool->type |= CNT_POOL_TYPE_AGE; rte_spinlock_init(&pool->sl); /* * The generation of the new allocated counters in this pool is 0, 2 in @@ -4160,6 +4214,27 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, return cont; } +static void +flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev, + struct mlx5_flow_counter_pool *pool, + uint32_t batch, uint32_t age) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_flow_counter_pool *other; + struct mlx5_pools_container *cont; + + cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, (age ^ 0x1)); + other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id); + if (!other) + return; + if (pool->min_dcs->id < other->min_dcs->id) { + rte_atomic64_set(&other->a64_dcs, + rte_atomic64_read(&pool->a64_dcs)); + } else { + rte_atomic64_set(&pool->a64_dcs, + rte_atomic64_read(&other->a64_dcs)); + } +} /** * Prepare a new counter and/or a new counter pool. * @@ -4177,7 +4252,7 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, static struct mlx5_pools_container * flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, struct mlx5_flow_counter **cnt_free, - uint32_t batch) + uint32_t batch, uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_pools_container *cont; @@ -4186,7 +4261,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, struct mlx5_flow_counter *cnt; uint32_t i; - cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0); + cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age); if (!batch) { /* bulk_bitmap must be 0 for single counter allocation. */ dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0); @@ -4194,7 +4269,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, return NULL; pool = flow_dv_find_pool_by_id(cont, dcs->id); if (!pool) { - cont = flow_dv_pool_create(dev, dcs, batch); + cont = flow_dv_pool_create(dev, dcs, batch, age); if (!cont) { mlx5_devx_cmd_destroy(dcs); return NULL; @@ -4204,6 +4279,8 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, rte_atomic64_set(&pool->a64_dcs, (int64_t)(uintptr_t)dcs); } + flow_dv_counter_update_min_dcs(dev, + pool, batch, age); i = dcs->id % MLX5_COUNTERS_PER_POOL; cnt = MLX5_POOL_GET_CNT(pool, i); TAILQ_INSERT_HEAD(&pool->counters, cnt, next); @@ -4218,7 +4295,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, rte_errno = ENODATA; return NULL; } - cont = flow_dv_pool_create(dev, dcs, batch); + cont = flow_dv_pool_create(dev, dcs, batch, age); if (!cont) { mlx5_devx_cmd_destroy(dcs); return NULL; @@ -4285,7 +4362,7 @@ flow_dv_counter_shared_search(struct mlx5_pools_container *cont, uint32_t id, */ static uint32_t flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, - uint16_t group) + uint16_t group, uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool = NULL; @@ -4301,7 +4378,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, */ uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0; struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, - 0); + 0, age); uint32_t cnt_idx; if (!priv->config.devx) { @@ -4340,13 +4417,13 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, cnt_free = NULL; } if (!cnt_free) { - cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch); + cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age); if (!cont) return 0; pool = TAILQ_FIRST(&cont->pool_list); } if (!batch) - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt_free); + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free); /* Create a DV counter action only in the first time usage. */ if (!cnt_free->action) { uint16_t offset; @@ -4369,6 +4446,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, cnt_idx = MLX5_MAKE_CNT_IDX(pool->index, MLX5_CNT_ARRAY_IDX(pool, cnt_free)); cnt_idx += batch * MLX5_CNT_BATCH_OFFSET; + cnt_idx += age * MLX5_CNT_AGE_OFFSET; /* Update the counter reset values. */ if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits, &cnt_free->bytes)) @@ -4390,6 +4468,60 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, return cnt_idx; } +/** + * Get age param from counter index. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] counter + * Index to the counter handler. + */ +static struct mlx5_age_param* +flow_dv_counter_idx_get_age(struct rte_eth_dev *dev, + uint32_t counter) +{ + struct mlx5_flow_counter *cnt; + struct mlx5_flow_counter_pool *pool = NULL; + + flow_dv_counter_get_by_idx(dev, counter, &pool); + counter = (counter - 1) % MLX5_COUNTERS_PER_POOL; + cnt = MLX5_POOL_GET_CNT(pool, counter); + return MLX5_CNT_TO_AGE(cnt); +} + +/** + * Remove a flow counter from aged counter list. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] counter + * Index to the counter handler. + * @param[in] cnt + * Pointer to the counter handler. + */ +static void +flow_dv_counter_remove_from_age(struct rte_eth_dev *dev, + uint32_t counter, struct mlx5_flow_counter *cnt) +{ + struct mlx5_age_param *age_param; + struct mlx5_priv *priv = dev->data->dev_private; + + age_param = flow_dv_counter_idx_get_age(dev, counter); + if (rte_atomic16_cmpset((volatile uint16_t *) + &age_param->state, + AGE_CANDIDATE, AGE_FREE) + != AGE_CANDIDATE) { + /** + * We need the lock even it is age timeout, + * since counter may still in process. + */ + rte_spinlock_lock(&priv->sh->cmng.aged_sl); + TAILQ_REMOVE(&priv->sh->cmng.aged_counters, + cnt, next); + rte_spinlock_unlock(&priv->sh->cmng.aged_sl); + } + rte_atomic16_set(&age_param->state, AGE_FREE); +} /** * Release a flow counter. * @@ -4410,10 +4542,12 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter) cnt = flow_dv_counter_get_by_idx(dev, counter, &pool); MLX5_ASSERT(pool); if (counter < MLX5_CNT_BATCH_OFFSET) { - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); if (cnt_ext && --cnt_ext->ref_cnt) return; } + if (IS_AGE_POOL(pool)) + flow_dv_counter_remove_from_age(dev, counter, cnt); /* Put the counter in the end - the last updated one. */ TAILQ_INSERT_TAIL(&pool->counters, cnt, next); /* @@ -5153,6 +5287,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, action_flags |= MLX5_FLOW_ACTION_METER; ++actions_n; break; + case RTE_FLOW_ACTION_TYPE_AGE: + ret = flow_dv_validate_action_age(action_flags, + actions, dev, + error); + if (ret < 0) + return ret; + action_flags |= MLX5_FLOW_ACTION_AGE; + ++actions_n; + break; case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP: ret = flow_dv_validate_action_modify_ipv4_dscp (action_flags, @@ -7164,6 +7307,41 @@ flow_dv_translate_action_port_id(struct rte_eth_dev *dev, return 0; } +static uint32_t +flow_dv_translate_create_counter(struct rte_eth_dev *dev, + struct mlx5_flow *dev_flow, + const struct rte_flow_action_count *count, + const struct rte_flow_action_age *age) +{ + uint32_t counter; + struct mlx5_age_param *age_param; + + counter = flow_dv_counter_alloc(dev, + count ? count->shared : 0, + count ? count->id : 0, + dev_flow->dv.group, + age ? 1 : 0); + + if (!counter || age == NULL) + return counter; + age_param = flow_dv_counter_idx_get_age(dev, counter); + /* + * The counter age accuracy may have a bit delay. Have 3/4 + * second bias on the timeount in order to let it age in time. + */ + age_param->context = age->context ? age->context : dev_flow->flow; + /* + * The counter age accuracy may have a bit delay. Have 3/4 + * second bias on the timeount in order to let it age in time. + */ + age_param->timeout = age->timeout * 10 - 7; + /* Set expire time in unit of 0.1 sec. */ + age_param->port_id = dev->data->port_id; + age_param->expire = age_param->timeout + + rte_rdtsc() / (rte_get_tsc_hz() / 10); + rte_atomic16_set(&age_param->state, AGE_CANDIDATE); + return counter; +} /** * Add Tx queue matcher * @@ -7328,6 +7506,8 @@ __flow_dv_translate(struct rte_eth_dev *dev, (MLX5_MAX_MODIFY_NUM + 1)]; } mhdr_dummy; struct mlx5_flow_dv_modify_hdr_resource *mhdr_res = &mhdr_dummy.res; + const struct rte_flow_action_count *count = NULL; + const struct rte_flow_action_age *age = NULL; union flow_dv_attr flow_attr = { .attr = 0 }; uint32_t tag_be; union mlx5_flow_tbl_key tbl_key; @@ -7356,7 +7536,6 @@ __flow_dv_translate(struct rte_eth_dev *dev, const struct rte_flow_action_queue *queue; const struct rte_flow_action_rss *rss; const struct rte_flow_action *action = actions; - const struct rte_flow_action_count *count = action->conf; const uint8_t *rss_key; const struct rte_flow_action_jump *jump_data; const struct rte_flow_action_meter *mtr; @@ -7477,36 +7656,21 @@ __flow_dv_translate(struct rte_eth_dev *dev, */ action_flags |= MLX5_FLOW_ACTION_RSS; break; + case RTE_FLOW_ACTION_TYPE_AGE: case RTE_FLOW_ACTION_TYPE_COUNT: if (!dev_conf->devx) { - rte_errno = ENOTSUP; - goto cnt_err; - } - flow->counter = flow_dv_counter_alloc(dev, - count->shared, - count->id, - dev_flow->dv.group); - if (!flow->counter) - goto cnt_err; - dev_flow->dv.actions[actions_n++] = - (flow_dv_counter_get_by_idx(dev, - flow->counter, NULL))->action; - action_flags |= MLX5_FLOW_ACTION_COUNT; - break; -cnt_err: - if (rte_errno == ENOTSUP) return rte_flow_error_set (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "count action not supported"); + } + /* Save information first, will apply later. */ + if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) + count = action->conf; else - return rte_flow_error_set - (error, rte_errno, - RTE_FLOW_ERROR_TYPE_ACTION, - action, - "cannot create counter" - " object."); + age = action->conf; + action_flags |= MLX5_FLOW_ACTION_COUNT; break; case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN: dev_flow->dv.actions[actions_n++] = @@ -7766,6 +7930,22 @@ __flow_dv_translate(struct rte_eth_dev *dev, dev_flow->dv.actions[modify_action_position] = handle->dvh.modify_hdr->verbs_action; } + if (action_flags & MLX5_FLOW_ACTION_COUNT) { + flow->counter = + flow_dv_translate_create_counter(dev, + dev_flow, count, age); + + if (!flow->counter) + return rte_flow_error_set + (error, rte_errno, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "cannot create counter" + " object."); + dev_flow->dv.actions[actions_n++] = + (flow_dv_counter_get_by_idx(dev, + flow->counter, NULL))->action; + } break; default: break; @@ -8947,6 +9127,58 @@ flow_dv_counter_query(struct rte_eth_dev *dev, uint32_t counter, bool clear, return 0; } +/** + * Get aged-out flows. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] context + * The address of an array of pointers to the aged-out flows contexts. + * @param[in] nb_contexts + * The length of context array pointers. + * @param[out] error + * Perform verbose error reporting if not NULL. Initialized in case of + * error only. + * + * @return + * how many contexts get in success, otherwise negative errno value. + * if nb_contexts is 0, return the amount of all aged contexts. + * if nb_contexts is not 0 , return the amount of aged flows reported + * in the context array. + * @note: only stub for now + */ +static int +flow_get_aged_flows(struct rte_eth_dev *dev, + void **context, + uint32_t nb_contexts, + struct rte_flow_error *error) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_counters *aged_tq = &priv->sh->cmng.aged_counters; + struct mlx5_age_param *age_param; + struct mlx5_flow_counter *counter; + int nb_flows = 0; + + if (nb_contexts && !context) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "Should assign at least one flow or" + " context to get if nb_contexts != 0"); + rte_spinlock_lock(&priv->sh->cmng.aged_sl); + TAILQ_FOREACH(counter, aged_tq, next) { + nb_flows++; + if (nb_contexts) { + age_param = MLX5_CNT_TO_AGE(counter); + context[nb_flows - 1] = age_param->context; + if (!(--nb_contexts)) + break; + } + } + rte_spinlock_unlock(&priv->sh->cmng.aged_sl); + return nb_flows; +} + /* * Mutex-protected thunk to lock-free __flow_dv_translate(). */ @@ -9013,7 +9245,7 @@ flow_dv_counter_allocate(struct rte_eth_dev *dev) uint32_t cnt; flow_dv_shared_lock(dev); - cnt = flow_dv_counter_alloc(dev, 0, 0, 1); + cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0); flow_dv_shared_unlock(dev); return cnt; } @@ -9044,6 +9276,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = { .counter_alloc = flow_dv_counter_allocate, .counter_free = flow_dv_counter_free, .counter_query = flow_dv_counter_query, + .get_aged_flows = flow_get_aged_flows, }; #endif /* HAVE_IBV_FLOW_DV_SUPPORT */ diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index 1a5c880221..7cf38195bd 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -56,7 +56,8 @@ flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev, struct mlx5_flow_counter_pool **ppool) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0, + 0); struct mlx5_flow_counter_pool *pool; idx--; @@ -151,7 +152,8 @@ static uint32_t flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0, + 0); struct mlx5_flow_counter_pool *pool = NULL; struct mlx5_flow_counter_ext *cnt_ext = NULL; struct mlx5_flow_counter *cnt = NULL; @@ -251,7 +253,7 @@ flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter) cnt = flow_verbs_counter_get_by_idx(dev, counter, &pool); - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); if (--cnt_ext->ref_cnt == 0) { #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs)); @@ -282,7 +284,7 @@ flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused, struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx (dev, flow->counter, &pool); struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT - (cnt); + (pool, cnt); struct rte_flow_query_count *qc = data; uint64_t counters[2] = {0, 0}; #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) @@ -1090,12 +1092,12 @@ flow_verbs_translate_action_count(struct mlx5_flow *dev_flow, } #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool); - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); counter.counter_set_handle = cnt_ext->cs->handle; flow_verbs_spec_add(&dev_flow->verbs, &counter, size); #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool); - cnt_ext = MLX5_CNT_TO_CNT_EXT(cnt); + cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); counter.counters = cnt_ext->cs; flow_verbs_spec_add(&dev_flow->verbs, &counter, size); #endif