From patchwork Tue Oct 6 11:38:48 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Suanming Mou X-Patchwork-Id: 79749 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6AABFA04BB; Tue, 6 Oct 2020 13:40:11 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id F03034C93; Tue, 6 Oct 2020 13:39:11 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 8FF0F25B3 for ; Tue, 6 Oct 2020 13:39:05 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from suanmingm@nvidia.com) with SMTP; 6 Oct 2020 14:38:59 +0300 Received: from nvidia.com (mtbc-r640-04.mtbc.labs.mlnx [10.75.70.9]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 096BcuYM014182; Tue, 6 Oct 2020 14:38:58 +0300 From: Suanming Mou To: viacheslavo@nvidia.com, matan@nvidia.com Cc: rasland@nvidia.com, dev@dpdk.org Date: Tue, 6 Oct 2020 19:38:48 +0800 Message-Id: <1601984333-304464-2-git-send-email-suanmingm@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> References: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> Subject: [dpdk-dev] [PATCH 1/6] net/mlx5: locate aging pools in the general container X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Commit [1] introduced different container for the aging counter pools. In order to save container memory the aging counter pools can be located in the general pool container. This patch locates the aging counter pools in the general pool container. Remove the aging container management. [1] commit fd143711a6ea ("net/mlx5: separate aging counter pool range") Signed-off-by: Suanming Mou --- drivers/net/mlx5/mlx5.c | 7 ++-- drivers/net/mlx5/mlx5.h | 17 +++++---- drivers/net/mlx5/mlx5_flow.c | 19 +++------- drivers/net/mlx5/mlx5_flow_dv.c | 78 ++++++++++++++++++-------------------- drivers/net/mlx5/mlx5_flow_verbs.c | 4 +- 5 files changed, 57 insertions(+), 68 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 01ead6e..5e3569d 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -458,7 +458,7 @@ struct mlx5_flow_id_pool * static void mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh) { - int i; + int i, j; memset(&sh->cmng, 0, sizeof(sh->cmng)); TAILQ_INIT(&sh->cmng.flow_counters); @@ -468,7 +468,8 @@ struct mlx5_flow_id_pool * sh->cmng.ccont[i].last_pool_idx = POOL_IDX_INVALID; TAILQ_INIT(&sh->cmng.ccont[i].pool_list); rte_spinlock_init(&sh->cmng.ccont[i].resize_sl); - TAILQ_INIT(&sh->cmng.ccont[i].counters); + for (j = 0; j < MLX5_COUNTER_TYPE_MAX; j++) + TAILQ_INIT(&sh->cmng.ccont[i].counters[j]); rte_spinlock_init(&sh->cmng.ccont[i].csl); } } @@ -513,7 +514,7 @@ struct mlx5_flow_id_pool * } for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) { struct mlx5_flow_counter_pool *pool; - uint32_t batch = !!(i > 1); + uint32_t batch = (i == MLX5_CCONT_TYPE_BATCH); if (!sh->cmng.ccont[i].pools) continue; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index bd91e16..27c8f45 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -270,7 +270,6 @@ struct mlx5_drop { #define MLX5_COUNTERS_PER_POOL 512 #define MLX5_MAX_PENDING_QUERIES 4 #define MLX5_CNT_CONTAINER_RESIZE 64 -#define MLX5_CNT_AGE_OFFSET 0x80000000 #define CNT_SIZE (sizeof(struct mlx5_flow_counter)) #define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext)) #define AGE_SIZE (sizeof(struct mlx5_age_param)) @@ -279,7 +278,6 @@ struct mlx5_drop { #define CNT_POOL_TYPE_AGE (1 << 1) #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT) #define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE) -#define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1 : 0) #define MLX5_CNT_LEN(pool) \ (CNT_SIZE + \ (IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \ @@ -322,17 +320,20 @@ enum { AGE_TMOUT, /* Timeout, wait for rte_flow_get_aged_flows and destroy. */ }; -#define MLX5_CNT_CONTAINER(sh, batch, age) (&(sh)->cmng.ccont \ - [(batch) * 2 + (age)]) +#define MLX5_CNT_CONTAINER(sh, batch) (&(sh)->cmng.ccont[batch]) enum { MLX5_CCONT_TYPE_SINGLE, - MLX5_CCONT_TYPE_SINGLE_FOR_AGE, MLX5_CCONT_TYPE_BATCH, - MLX5_CCONT_TYPE_BATCH_FOR_AGE, MLX5_CCONT_TYPE_MAX, }; +enum mlx5_counter_type { + MLX5_COUNTER_TYPE_ORIGIN, + MLX5_COUNTER_TYPE_AGE, + MLX5_COUNTER_TYPE_MAX, +}; + /* Counter age parameter. */ struct mlx5_age_param { rte_atomic16_t state; /**< Age state. */ @@ -427,7 +428,8 @@ struct mlx5_pools_container { int max_id; /* The maximum counter ID in the pools. */ rte_spinlock_t resize_sl; /* The resize lock. */ rte_spinlock_t csl; /* The counter free list lock. */ - struct mlx5_counters counters; /* Free counter list. */ + struct mlx5_counters counters[MLX5_COUNTER_TYPE_MAX]; + /* Free counter list. */ struct mlx5_counter_pools pool_list; /* Counter pool list. */ struct mlx5_flow_counter_pool **pools; /* Counter pool array. */ struct mlx5_counter_stats_mem_mng *mem_mng; @@ -441,7 +443,6 @@ struct mlx5_flow_counter_mng { uint8_t pending_queries; uint8_t batch; uint16_t pool_index; - uint8_t age; uint8_t query_thread_on; LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs; LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws; diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index ffa7646..db7fc8f 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -5940,7 +5940,6 @@ struct mlx5_meter_domains_infos * uint16_t offset; int ret; uint8_t batch = sh->cmng.batch; - uint8_t age = sh->cmng.age; uint16_t pool_index = sh->cmng.pool_index; struct mlx5_pools_container *cont; struct mlx5_flow_counter_pool *pool; @@ -5949,7 +5948,7 @@ struct mlx5_meter_domains_infos * if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) goto set_alarm; next_container: - cont = MLX5_CNT_CONTAINER(sh, batch, age); + cont = MLX5_CNT_CONTAINER(sh, batch); rte_spinlock_lock(&cont->resize_sl); if (!cont->pools) { rte_spinlock_unlock(&cont->resize_sl); @@ -5958,11 +5957,6 @@ struct mlx5_meter_domains_infos * goto set_alarm; batch ^= 0x1; pool_index = 0; - if (batch == 0 && pool_index == 0) { - age ^= 0x1; - sh->cmng.batch = batch; - sh->cmng.age = age; - } goto next_container; } pool = cont->pools[pool_index]; @@ -6011,13 +6005,10 @@ struct mlx5_meter_domains_infos * if (pool_index >= rte_atomic16_read(&cont->n_valid)) { batch ^= 0x1; pool_index = 0; - if (batch == 0 && pool_index == 0) - age ^= 0x1; } set_alarm: sh->cmng.batch = batch; sh->cmng.pool_index = pool_index; - sh->cmng.age = age; mlx5_set_query_alarm(sh); } @@ -6103,10 +6094,12 @@ struct mlx5_meter_domains_infos * struct mlx5_flow_counter_pool *pool = (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; struct mlx5_counter_stats_raw *raw_to_free; - uint8_t age = !!IS_AGE_POOL(pool); uint8_t query_gen = pool->query_gen ^ 1; struct mlx5_pools_container *cont = - MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age); + MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool)); + enum mlx5_counter_type cnt_type = + IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE : + MLX5_COUNTER_TYPE_ORIGIN; if (unlikely(status)) { raw_to_free = pool->raw_hw; @@ -6121,7 +6114,7 @@ struct mlx5_meter_domains_infos * rte_io_wmb(); if (!TAILQ_EMPTY(&pool->counters[query_gen])) { rte_spinlock_lock(&cont->csl); - TAILQ_CONCAT(&cont->counters, + TAILQ_CONCAT(&cont->counters[cnt_type], &pool->counters[query_gen], next); rte_spinlock_unlock(&cont->csl); } diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 79fdf34..1bd3899 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -4170,16 +4170,14 @@ struct field_modify_info modify_tcp[] = { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_pools_container *cont; struct mlx5_flow_counter_pool *pool; - uint32_t batch = 0, age = 0; + uint32_t batch = 0; idx--; - age = MLX_CNT_IS_AGE(idx); - idx = age ? idx - MLX5_CNT_AGE_OFFSET : idx; if (idx >= MLX5_CNT_BATCH_OFFSET) { idx -= MLX5_CNT_BATCH_OFFSET; batch = 1; } - cont = MLX5_CNT_CONTAINER(priv->sh, batch, age); + cont = MLX5_CNT_CONTAINER(priv->sh, batch); MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n); pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL]; MLX5_ASSERT(pool); @@ -4332,19 +4330,15 @@ struct field_modify_info modify_tcp[] = { * Pointer to the Ethernet device structure. * @param[in] batch * Whether the pool is for counter that was allocated by batch command. - * @param[in] age - * Whether the pool is for Aging counter. * * @return * 0 on success, otherwise negative errno value and rte_errno is set. */ static int -flow_dv_container_resize(struct rte_eth_dev *dev, - uint32_t batch, uint32_t age) +flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, - age); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch); struct mlx5_counter_stats_mem_mng *mem_mng = NULL; void *old_pools = cont->pools; uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE; @@ -4462,12 +4456,11 @@ struct field_modify_info modify_tcp[] = { { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, - age); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch); int16_t n_valid = rte_atomic16_read(&cont->n_valid); uint32_t size = sizeof(*pool); - if (cont->n == n_valid && flow_dv_container_resize(dev, batch, age)) + if (cont->n == n_valid && flow_dv_container_resize(dev, batch)) return NULL; size += MLX5_COUNTERS_PER_POOL * CNT_SIZE; size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE); @@ -4595,10 +4588,12 @@ struct field_modify_info modify_tcp[] = { struct mlx5_devx_obj *last_min_dcs; struct mlx5_devx_obj *dcs = NULL; struct mlx5_flow_counter *cnt; + enum mlx5_counter_type cnt_type = + age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN; uint32_t add2other; uint32_t i; - cont = MLX5_CNT_CONTAINER(priv->sh, batch, age); + cont = MLX5_CNT_CONTAINER(priv->sh, batch); if (!batch) { retry: add2other = 0; @@ -4607,24 +4602,19 @@ struct field_modify_info modify_tcp[] = { if (!dcs) return NULL; pool = flow_dv_find_pool_by_id(cont, dcs->id); - /* Check if counter belongs to exist pool ID range. */ - if (!pool) { - pool = flow_dv_find_pool_by_id - (MLX5_CNT_CONTAINER - (priv->sh, batch, (age ^ 0x1)), dcs->id); - /* - * Pool eixsts, counter will be added to the other - * container, need to reallocate it later. - */ - if (pool) { - add2other = 1; - } else { - pool = flow_dv_pool_create(dev, dcs, batch, - age); - if (!pool) { - mlx5_devx_cmd_destroy(dcs); - return NULL; - } + /* + * If pool eixsts but with other type, counter will be added + * to the other pool, need to reallocate new counter in the + * ragne with same type later. + */ + if (pool && ((!!IS_AGE_POOL(pool)) != age)) { + add2other = 1; + } else if (!pool) { + pool = flow_dv_pool_create(dev, dcs, batch, + age); + if (!pool) { + mlx5_devx_cmd_destroy(dcs); + return NULL; } } if ((dcs->id < pool->min_dcs->id || @@ -4692,7 +4682,7 @@ struct field_modify_info modify_tcp[] = { TAILQ_INSERT_HEAD(&tmp_tq, cnt, next); } rte_spinlock_lock(&cont->csl); - TAILQ_CONCAT(&cont->counters, &tmp_tq, next); + TAILQ_CONCAT(&cont->counters[cnt_type], &tmp_tq, next); rte_spinlock_unlock(&cont->csl); *cnt_free = MLX5_POOL_GET_CNT(pool, 0); (*cnt_free)->pool = pool; @@ -4765,8 +4755,9 @@ struct field_modify_info modify_tcp[] = { * shared counters from the single container. */ uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, - age); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch); + enum mlx5_counter_type cnt_type = + age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN; uint32_t cnt_idx; if (!priv->config.devx) { @@ -4789,9 +4780,9 @@ struct field_modify_info modify_tcp[] = { } /* Get free counters from container. */ rte_spinlock_lock(&cont->csl); - cnt_free = TAILQ_FIRST(&cont->counters); + cnt_free = TAILQ_FIRST(&cont->counters[cnt_type]); if (cnt_free) - TAILQ_REMOVE(&cont->counters, cnt_free, next); + TAILQ_REMOVE(&cont->counters[cnt_type], cnt_free, next); rte_spinlock_unlock(&cont->csl); if (!cnt_free && !flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age)) @@ -4822,7 +4813,6 @@ struct field_modify_info modify_tcp[] = { cnt_idx = MLX5_MAKE_CNT_IDX(pool->index, MLX5_CNT_ARRAY_IDX(pool, cnt_free)); cnt_idx += batch * MLX5_CNT_BATCH_OFFSET; - cnt_idx += age * MLX5_CNT_AGE_OFFSET; /* Update the counter reset values. */ if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits, &cnt_free->bytes)) @@ -4847,7 +4837,7 @@ struct field_modify_info modify_tcp[] = { if (cnt_free) { cnt_free->pool = pool; rte_spinlock_lock(&cont->csl); - TAILQ_INSERT_TAIL(&cont->counters, cnt_free, next); + TAILQ_INSERT_TAIL(&cont->counters[cnt_type], cnt_free, next); rte_spinlock_unlock(&cont->csl); } return 0; @@ -4926,6 +4916,7 @@ struct field_modify_info modify_tcp[] = { struct mlx5_flow_counter_pool *pool = NULL; struct mlx5_flow_counter *cnt; struct mlx5_flow_counter_ext *cnt_ext = NULL; + enum mlx5_counter_type cnt_type; if (!counter) return; @@ -4954,12 +4945,15 @@ struct field_modify_info modify_tcp[] = { * function both operate with the different list. * */ - if (!priv->counter_fallback) + if (!priv->counter_fallback) { TAILQ_INSERT_TAIL(&pool->counters[pool->query_gen], cnt, next); - else + } else { + cnt_type = IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE : + MLX5_COUNTER_TYPE_ORIGIN; TAILQ_INSERT_TAIL(&((MLX5_CNT_CONTAINER - (priv->sh, 0, 0))->counters), + (priv->sh, 0))->counters[cnt_type]), cnt, next); + } } /** diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index 62c18b8..2f3035a 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -159,7 +159,7 @@ struct mlx5_flow_counter_pool **ppool) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0); struct mlx5_flow_counter_pool *pool; idx--; @@ -254,7 +254,7 @@ flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0); struct mlx5_flow_counter_pool *pool = NULL; struct mlx5_flow_counter_ext *cnt_ext = NULL; struct mlx5_flow_counter *cnt = NULL; From patchwork Tue Oct 6 11:38:49 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Suanming Mou X-Patchwork-Id: 79746 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 311A4A04BB; Tue, 6 Oct 2020 13:39:10 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 8B75E1023; Tue, 6 Oct 2020 13:39:07 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 818FAF3E for ; Tue, 6 Oct 2020 13:39:05 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from suanmingm@nvidia.com) with SMTP; 6 Oct 2020 14:39:01 +0300 Received: from nvidia.com (mtbc-r640-04.mtbc.labs.mlnx [10.75.70.9]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 096BcuYN014182; Tue, 6 Oct 2020 14:39:00 +0300 From: Suanming Mou To: viacheslavo@nvidia.com, matan@nvidia.com Cc: rasland@nvidia.com, dev@dpdk.org Date: Tue, 6 Oct 2020 19:38:49 +0800 Message-Id: <1601984333-304464-3-git-send-email-suanmingm@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> References: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> Subject: [dpdk-dev] [PATCH 2/6] net/mlx5: optimize shared counter memory X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Currently, when a counter is allocated, the counter list entry memory will not be used until the counter is released and added back to the counter container free list. In this case, if a counter is allocated as shared counter, the shared information can be save to the counter list entry memory. This adjustment engages to save more memory for the shared counter. One more thing is that now the shared counter is only available for single counter as shared counter may be applied to both root and none root table, but batch counter with offset is not supported in root table in some old OFED version. The batch counter with offset is now fully supported in the current OFED. This commit is also the initialize change for the batch counter with offeset can be shared in the later change. Signed-off-by: Suanming Mou --- drivers/net/mlx5/mlx5.h | 33 +++++++++------- drivers/net/mlx5/mlx5_flow_dv.c | 78 +++++++++++++++----------------------- drivers/net/mlx5/mlx5_flow_verbs.c | 60 +++++++++++++++++------------ 3 files changed, 86 insertions(+), 85 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 27c8f45..fe6bd88 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -270,6 +270,10 @@ struct mlx5_drop { #define MLX5_COUNTERS_PER_POOL 512 #define MLX5_MAX_PENDING_QUERIES 4 #define MLX5_CNT_CONTAINER_RESIZE 64 +#define MLX5_CNT_SHARED_OFFSET 0x80000000 +#define IS_SHARED_CNT(cnt) (!!((cnt) & MLX5_CNT_SHARED_OFFSET)) +#define IS_BATCH_CNT(cnt) (((cnt) & (MLX5_CNT_SHARED_OFFSET - 1)) >= \ + MLX5_CNT_BATCH_OFFSET) #define CNT_SIZE (sizeof(struct mlx5_flow_counter)) #define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext)) #define AGE_SIZE (sizeof(struct mlx5_age_param)) @@ -348,11 +352,21 @@ struct flow_counter_stats { uint64_t bytes; }; +/* Shared counters information for counters. */ +struct mlx5_flow_counter_shared { + uint32_t ref_cnt; /**< Reference counter. */ + uint32_t id; /**< User counter ID. */ +}; + struct mlx5_flow_counter_pool; /* Generic counters information. */ struct mlx5_flow_counter { - TAILQ_ENTRY(mlx5_flow_counter) next; - /**< Pointer to the next flow counter structure. */ + union { + TAILQ_ENTRY(mlx5_flow_counter) next; + /**< Pointer to the next flow counter structure. */ + struct mlx5_flow_counter_shared shared_info; + /**< Shared counter information. */ + }; union { uint64_t hits; /**< Reset value of hits packets. */ struct mlx5_flow_counter_pool *pool; /**< Counter pool. */ @@ -361,22 +375,15 @@ struct mlx5_flow_counter { void *action; /**< Pointer to the dv action. */ }; -/* Extend counters information for none batch counters. */ +/* Extend counters information for none batch fallback counters. */ struct mlx5_flow_counter_ext { - uint32_t shared:1; /**< Share counter ID with other flow rules. */ - uint32_t batch: 1; uint32_t skipped:1; /* This counter is skipped or not. */ - /**< Whether the counter was allocated by batch command. */ - uint32_t ref_cnt:29; /**< Reference counter. */ - uint32_t id; /**< User counter ID. */ - union { /**< Holds the counters for the rule. */ #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) - struct ibv_counter_set *cs; + struct ibv_counter_set *cs; #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) - struct ibv_counters *cs; + struct ibv_counters *cs; #endif - struct mlx5_devx_obj *dcs; /**< Counter Devx object. */ - }; + struct mlx5_devx_obj *dcs; /**< Counter Devx object. */ }; TAILQ_HEAD(mlx5_counters, mlx5_flow_counter); diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 1bd3899..10be990 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -4172,8 +4172,9 @@ struct field_modify_info modify_tcp[] = { struct mlx5_flow_counter_pool *pool; uint32_t batch = 0; - idx--; - if (idx >= MLX5_CNT_BATCH_OFFSET) { + /* Decrease to original index and clear shared bit. */ + idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1); + if (IS_BATCH_CNT(idx)) { idx -= MLX5_CNT_BATCH_OFFSET; batch = 1; } @@ -4408,7 +4409,7 @@ struct field_modify_info modify_tcp[] = { cnt = flow_dv_counter_get_by_idx(dev, counter, &pool); MLX5_ASSERT(pool); - if (counter < MLX5_CNT_BATCH_OFFSET) { + if (!IS_BATCH_CNT(counter)) { cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); if (priv->counter_fallback) return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0, @@ -4696,29 +4697,19 @@ struct field_modify_info modify_tcp[] = { * Pointer to the Ethernet device structure. * @param[in] id * The shared counter ID to search. - * @param[out] ppool - * mlx5 flow counter pool in the container, * * @return - * NULL if not existed, otherwise pointer to the shared extend counter. + * 0 if not existed, otherwise shared counter index. */ -static struct mlx5_flow_counter_ext * -flow_dv_counter_shared_search(struct rte_eth_dev *dev, uint32_t id, - struct mlx5_flow_counter_pool **ppool) +static uint32_t +flow_dv_counter_shared_search(struct rte_eth_dev *dev, uint32_t id) { struct mlx5_priv *priv = dev->data->dev_private; union mlx5_l3t_data data; - uint32_t cnt_idx; - if (mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data) || !data.dword) - return NULL; - cnt_idx = data.dword; - /* - * Shared counters don't have age info. The counter extend is after - * the counter datat structure. - */ - return (struct mlx5_flow_counter_ext *) - ((flow_dv_counter_get_by_idx(dev, cnt_idx, ppool)) + 1); + if (mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data)) + return 0; + return data.dword; } /** @@ -4765,16 +4756,15 @@ struct field_modify_info modify_tcp[] = { return 0; } if (shared) { - cnt_ext = flow_dv_counter_shared_search(dev, id, &pool); - if (cnt_ext) { - if (cnt_ext->ref_cnt + 1 == 0) { + cnt_idx = flow_dv_counter_shared_search(dev, id); + if (cnt_idx) { + cnt_free = flow_dv_counter_get_by_idx(dev, cnt_idx, + NULL); + if (cnt_free->shared_info.ref_cnt + 1 == 0) { rte_errno = E2BIG; return 0; } - cnt_ext->ref_cnt++; - cnt_idx = pool->index * MLX5_COUNTERS_PER_POOL + - (cnt_ext->dcs->id % MLX5_COUNTERS_PER_POOL) - + 1; + cnt_free->shared_info.ref_cnt++; return cnt_idx; } } @@ -4817,17 +4807,15 @@ struct field_modify_info modify_tcp[] = { if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits, &cnt_free->bytes)) goto err; - if (cnt_ext) { - cnt_ext->shared = shared; - cnt_ext->ref_cnt = 1; - cnt_ext->id = id; - if (shared) { - union mlx5_l3t_data data; - - data.dword = cnt_idx; - if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data)) - return 0; - } + if (shared) { + union mlx5_l3t_data data; + + data.dword = cnt_idx; + if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data)) + goto err; + cnt_free->shared_info.ref_cnt = 1; + cnt_free->shared_info.id = id; + cnt_idx |= MLX5_CNT_SHARED_OFFSET; } if (!priv->counter_fallback && !priv->sh->cmng.query_thread_on) /* Start the asynchronous batch query by the host thread. */ @@ -4915,22 +4903,18 @@ struct field_modify_info modify_tcp[] = { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool = NULL; struct mlx5_flow_counter *cnt; - struct mlx5_flow_counter_ext *cnt_ext = NULL; enum mlx5_counter_type cnt_type; if (!counter) return; cnt = flow_dv_counter_get_by_idx(dev, counter, &pool); MLX5_ASSERT(pool); - if (counter < MLX5_CNT_BATCH_OFFSET) { - cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); - if (cnt_ext) { - if (--cnt_ext->ref_cnt) - return; - if (cnt_ext->shared) - mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, - cnt_ext->id); - } + + if (IS_SHARED_CNT(counter)) { + if (--cnt->shared_info.ref_cnt) + return; + mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, + cnt->shared_info.id); } if (IS_AGE_POOL(pool)) flow_dv_counter_remove_from_age(dev, counter, cnt); diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index 2f3035a..0463bea 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -162,7 +162,7 @@ struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0); struct mlx5_flow_counter_pool *pool; - idx--; + idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1); pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL]; MLX5_ASSERT(pool); if (ppool) @@ -258,22 +258,21 @@ struct mlx5_flow_counter_pool *pool = NULL; struct mlx5_flow_counter_ext *cnt_ext = NULL; struct mlx5_flow_counter *cnt = NULL; + union mlx5_l3t_data data; uint32_t n_valid = rte_atomic16_read(&cont->n_valid); - uint32_t pool_idx; + uint32_t pool_idx, cnt_idx; uint32_t i; int ret; - if (shared) { - for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) { - pool = cont->pools[pool_idx]; - for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { - cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i); - if (cnt_ext->shared && cnt_ext->id == id) { - cnt_ext->ref_cnt++; - return MLX5_MAKE_CNT_IDX(pool_idx, i); - } - } + if (shared && !mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data) && + data.dword) { + cnt = flow_verbs_counter_get_by_idx(dev, data.dword, NULL); + if (cnt->shared_info.ref_cnt + 1 == 0) { + rte_errno = E2BIG; + return 0; } + cnt->shared_info.ref_cnt++; + return data.dword; } for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) { pool = cont->pools[pool_idx]; @@ -322,17 +321,23 @@ TAILQ_INSERT_HEAD(&cont->pool_list, pool, next); } i = MLX5_CNT_ARRAY_IDX(pool, cnt); + cnt_idx = MLX5_MAKE_CNT_IDX(pool_idx, i); + if (shared) { + data.dword = cnt_idx; + if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data)) + return 0; + cnt->shared_info.ref_cnt = 1; + cnt->shared_info.id = id; + cnt_idx |= MLX5_CNT_SHARED_OFFSET; + } cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i); - cnt_ext->id = id; - cnt_ext->shared = shared; - cnt_ext->ref_cnt = 1; cnt->hits = 0; cnt->bytes = 0; /* Create counter with Verbs. */ ret = flow_verbs_counter_create(dev, cnt_ext); if (!ret) { TAILQ_REMOVE(&pool->counters[0], cnt, next); - return MLX5_MAKE_CNT_IDX(pool_idx, i); + return cnt_idx; } /* Some error occurred in Verbs library. */ rte_errno = -ret; @@ -350,23 +355,28 @@ static void flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter) { + struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool; struct mlx5_flow_counter *cnt; struct mlx5_flow_counter_ext *cnt_ext; - cnt = flow_verbs_counter_get_by_idx(dev, counter, - &pool); + cnt = flow_verbs_counter_get_by_idx(dev, counter, &pool); + if (IS_SHARED_CNT(counter)) { + if (--cnt->shared_info.ref_cnt) + return; + mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, + cnt->shared_info.id); + } cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); - if (--cnt_ext->ref_cnt == 0) { #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) - claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs)); - cnt_ext->cs = NULL; + claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs)); + cnt_ext->cs = NULL; #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) - claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs)); - cnt_ext->cs = NULL; + claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs)); + cnt_ext->cs = NULL; #endif - TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next); - } + (void)cnt_ext; + TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next); } /** From patchwork Tue Oct 6 11:38:50 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Suanming Mou X-Patchwork-Id: 79747 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id BCCAEA04BB; Tue, 6 Oct 2020 13:39:29 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 2F8982B82; Tue, 6 Oct 2020 13:39:09 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 8AFD01C01 for ; Tue, 6 Oct 2020 13:39:05 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from suanmingm@nvidia.com) with SMTP; 6 Oct 2020 14:39:03 +0300 Received: from nvidia.com (mtbc-r640-04.mtbc.labs.mlnx [10.75.70.9]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 096BcuYO014182; Tue, 6 Oct 2020 14:39:01 +0300 From: Suanming Mou To: viacheslavo@nvidia.com, matan@nvidia.com Cc: rasland@nvidia.com, dev@dpdk.org Date: Tue, 6 Oct 2020 19:38:50 +0800 Message-Id: <1601984333-304464-4-git-send-email-suanmingm@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> References: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> Subject: [dpdk-dev] [PATCH 3/6] net/mlx5: remove single counter container X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" A flow counter which was allocated by a batch API couldn't be assigned to a flow in the root table (group 0) in old rdma-core version. Hence, a root table flow counter required PMD mechanism to manage counters which were allocated singly. Currently, the batch counters have already been supported in root table includes a new rdma-core version with MLX5_FLOW_ACTION_COUNTER_OFFSET enum and with a kernel driver includes MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET enum. When the PMD uses rdma-core API to assign a batch counter to a root table flow using invalid counter offset, it should get an error only if the batch counter assignment for root table is supported. Using this trial in the initialization time can help to detect the support. Using the above trial, if the support is valid, remove the management of single counter container in the fast counter mechanism. Otherwise, move the counter mechanism to fallback mode. Signed-off-by: Suanming Mou --- drivers/net/mlx5/linux/mlx5_os.c | 36 +++- drivers/net/mlx5/mlx5.c | 39 ++--- drivers/net/mlx5/mlx5.h | 27 +-- drivers/net/mlx5/mlx5_flow.c | 152 ++++++++++------- drivers/net/mlx5/mlx5_flow.h | 2 + drivers/net/mlx5/mlx5_flow_dv.c | 337 +++++++++++++------------------------ drivers/net/mlx5/mlx5_flow_verbs.c | 26 +-- 7 files changed, 273 insertions(+), 346 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 188a6d4..43d173b 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -512,6 +512,32 @@ } /** + * DV flow counter mode detect and config. + * + * @param dev + * Pointer to rte_eth_dev structure. + * + */ +static void +mlx5_flow_counter_mode_config(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + + /* If devx is not supported, counters are not working. */ + if (!priv->config.devx) + return; + priv->counter_fallback = 0; + if (!priv->config.hca_attr.flow_counters_dump || + (mlx5_flow_discover_counter_offset_support(dev) == -ENOTSUP)) + priv->counter_fallback = 1; +#ifndef HAVE_IBV_DEVX_ASYNC + priv->counter_fallback = 1; +#endif + if (priv->counter_fallback) + DRV_LOG(INFO, "Use fall-back DV counter management"); +} + +/** * Spawn an Ethernet device from Verbs information. * * @param dpdk_dev @@ -979,19 +1005,11 @@ DRV_LOG(INFO, "Rx CQE padding is enabled"); } if (config->devx) { - priv->counter_fallback = 0; err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config->hca_attr); if (err) { err = -err; goto error; } - if (!config->hca_attr.flow_counters_dump) - priv->counter_fallback = 1; -#ifndef HAVE_IBV_DEVX_ASYNC - priv->counter_fallback = 1; -#endif - if (priv->counter_fallback) - DRV_LOG(INFO, "Use fall-back DV counter management"); /* Check for LRO support. */ if (config->dest_tir && config->hca_attr.lro_cap && config->dv_flow_en) { @@ -1364,6 +1382,8 @@ goto error; } } + if (priv->config.dv_flow_en) + mlx5_flow_counter_mode_config(eth_dev); return eth_dev; error: if (priv) { diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 5e3569d..96cebba 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -458,20 +458,18 @@ struct mlx5_flow_id_pool * static void mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh) { - int i, j; + int i; memset(&sh->cmng, 0, sizeof(sh->cmng)); TAILQ_INIT(&sh->cmng.flow_counters); - for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) { - sh->cmng.ccont[i].min_id = MLX5_CNT_BATCH_OFFSET; - sh->cmng.ccont[i].max_id = -1; - sh->cmng.ccont[i].last_pool_idx = POOL_IDX_INVALID; - TAILQ_INIT(&sh->cmng.ccont[i].pool_list); - rte_spinlock_init(&sh->cmng.ccont[i].resize_sl); - for (j = 0; j < MLX5_COUNTER_TYPE_MAX; j++) - TAILQ_INIT(&sh->cmng.ccont[i].counters[j]); - rte_spinlock_init(&sh->cmng.ccont[i].csl); - } + sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET; + sh->cmng.max_id = -1; + sh->cmng.last_pool_idx = POOL_IDX_INVALID; + TAILQ_INIT(&sh->cmng.pool_list); + rte_spinlock_init(&sh->cmng.resize_sl); + for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) + TAILQ_INIT(&sh->cmng.counters[i]); + rte_spinlock_init(&sh->cmng.csl); } /** @@ -501,7 +499,6 @@ struct mlx5_flow_id_pool * mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) { struct mlx5_counter_stats_mem_mng *mng; - int i; int j; int retries = 1024; @@ -512,15 +509,13 @@ struct mlx5_flow_id_pool * break; rte_pause(); } - for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) { + + if (sh->cmng.pools) { struct mlx5_flow_counter_pool *pool; - uint32_t batch = (i == MLX5_CCONT_TYPE_BATCH); - if (!sh->cmng.ccont[i].pools) - continue; - pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list); + pool = TAILQ_FIRST(&sh->cmng.pool_list); while (pool) { - if (batch && pool->min_dcs) + if (!IS_EXT_POOL(pool) && pool->min_dcs) claim_zero(mlx5_devx_cmd_destroy (pool->min_dcs)); for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) { @@ -529,17 +524,17 @@ struct mlx5_flow_id_pool * (mlx5_glue->destroy_flow_action (MLX5_POOL_GET_CNT (pool, j)->action)); - if (!batch && MLX5_GET_POOL_CNT_EXT + if (IS_EXT_POOL(pool) && MLX5_GET_POOL_CNT_EXT (pool, j)->dcs) claim_zero(mlx5_devx_cmd_destroy (MLX5_GET_POOL_CNT_EXT (pool, j)->dcs)); } - TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool, next); + TAILQ_REMOVE(&sh->cmng.pool_list, pool, next); mlx5_free(pool); - pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list); + pool = TAILQ_FIRST(&sh->cmng.pool_list); } - mlx5_free(sh->cmng.ccont[i].pools); + mlx5_free(sh->cmng.pools); } mng = LIST_FIRST(&sh->cmng.mem_mngs); while (mng) { diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index fe6bd88..a3d4ad9 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -280,8 +280,10 @@ struct mlx5_drop { #define MLX5_AGING_TIME_DELAY 7 #define CNT_POOL_TYPE_EXT (1 << 0) #define CNT_POOL_TYPE_AGE (1 << 1) + #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT) #define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE) + #define MLX5_CNT_LEN(pool) \ (CNT_SIZE + \ (IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \ @@ -324,14 +326,6 @@ enum { AGE_TMOUT, /* Timeout, wait for rte_flow_get_aged_flows and destroy. */ }; -#define MLX5_CNT_CONTAINER(sh, batch) (&(sh)->cmng.ccont[batch]) - -enum { - MLX5_CCONT_TYPE_SINGLE, - MLX5_CCONT_TYPE_BATCH, - MLX5_CCONT_TYPE_MAX, -}; - enum mlx5_counter_type { MLX5_COUNTER_TYPE_ORIGIN, MLX5_COUNTER_TYPE_AGE, @@ -377,7 +371,6 @@ struct mlx5_flow_counter { /* Extend counters information for none batch fallback counters. */ struct mlx5_flow_counter_ext { - uint32_t skipped:1; /* This counter is skipped or not. */ #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) struct ibv_counter_set *cs; #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) @@ -397,9 +390,8 @@ struct mlx5_flow_counter_pool { rte_atomic64_t a64_dcs; }; /* The devx object of the minimum counter ID. */ - uint32_t index:28; /* Pool index in container. */ + uint32_t index:29; /* Pool index in container. */ uint32_t type:2; /* Memory type behind the counter array. */ - uint32_t skip_cnt:1; /* Pool contains skipped counter. */ volatile uint32_t query_gen:1; /* Query round. */ rte_spinlock_t sl; /* The pool lock. */ struct mlx5_counter_stats_raw *raw; @@ -419,15 +411,14 @@ struct mlx5_counter_stats_mem_mng { /* Raw memory structure for the counter statistics values of a pool. */ struct mlx5_counter_stats_raw { LIST_ENTRY(mlx5_counter_stats_raw) next; - int min_dcs_id; struct mlx5_counter_stats_mem_mng *mem_mng; volatile struct flow_counter_stats *data; }; TAILQ_HEAD(mlx5_counter_pools, mlx5_flow_counter_pool); -/* Container structure for counter pools. */ -struct mlx5_pools_container { +/* Counter global management structure. */ +struct mlx5_flow_counter_mng { rte_atomic16_t n_valid; /* Number of valid pools. */ uint16_t n; /* Number of pools. */ uint16_t last_pool_idx; /* Last used pool index */ @@ -441,14 +432,8 @@ struct mlx5_pools_container { struct mlx5_flow_counter_pool **pools; /* Counter pool array. */ struct mlx5_counter_stats_mem_mng *mem_mng; /* Hold the memory management for the next allocated pools raws. */ -}; - -/* Counter global management structure. */ -struct mlx5_flow_counter_mng { - struct mlx5_pools_container ccont[MLX5_CCONT_TYPE_MAX]; struct mlx5_counters flow_counters; /* Legacy flow counter list. */ uint8_t pending_queries; - uint8_t batch; uint16_t pool_index; uint8_t query_thread_on; LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs; @@ -838,6 +823,8 @@ struct mlx5_priv { struct mlx5_flow_meters flow_meters; /* MTR list. */ uint8_t skip_default_rss_reta; /* Skip configuration of default reta. */ uint8_t fdb_def_rule; /* Whether fdb jump to table 1 is configured. */ + void *cnt_action; /* Counter action to validate invalid offset. */ + struct mlx5_devx_obj *cnt_dcs; /* Counter validate devx object. */ struct mlx5_mp_id mp_id; /* ID of a multi-process process */ LIST_HEAD(fdir, mlx5_fdir_flow) fdir_flows; /* fdir flows. */ }; diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index db7fc8f..c280f56 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -5883,26 +5883,6 @@ struct mlx5_meter_domains_infos * #define MLX5_POOL_QUERY_FREQ_US 1000000 /** - * Get number of all validate pools. - * - * @param[in] sh - * Pointer to mlx5_dev_ctx_shared object. - * - * @return - * The number of all validate pools. - */ -static uint32_t -mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh) -{ - int i; - uint32_t pools_n = 0; - - for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) - pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid); - return pools_n; -} - -/** * Set the periodic procedure for triggering asynchronous batch queries for all * the counter pools. * @@ -5914,7 +5894,7 @@ struct mlx5_meter_domains_infos * { uint32_t pools_n, us; - pools_n = mlx5_get_all_valid_pool_count(sh); + pools_n = rte_atomic16_read(&sh->cmng.n_valid); us = MLX5_POOL_QUERY_FREQ_US / pools_n; DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { @@ -5936,31 +5916,20 @@ struct mlx5_meter_domains_infos * mlx5_flow_query_alarm(void *arg) { struct mlx5_dev_ctx_shared *sh = arg; - struct mlx5_devx_obj *dcs; - uint16_t offset; int ret; - uint8_t batch = sh->cmng.batch; uint16_t pool_index = sh->cmng.pool_index; - struct mlx5_pools_container *cont; + struct mlx5_flow_counter_mng *cmng = &sh->cmng; struct mlx5_flow_counter_pool *pool; - int cont_loop = MLX5_CCONT_TYPE_MAX; if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) goto set_alarm; -next_container: - cont = MLX5_CNT_CONTAINER(sh, batch); - rte_spinlock_lock(&cont->resize_sl); - if (!cont->pools) { - rte_spinlock_unlock(&cont->resize_sl); - /* Check if all the containers are empty. */ - if (unlikely(--cont_loop == 0)) - goto set_alarm; - batch ^= 0x1; - pool_index = 0; - goto next_container; + rte_spinlock_lock(&cmng->resize_sl); + if (!cmng->pools) { + rte_spinlock_unlock(&cmng->resize_sl); + goto set_alarm; } - pool = cont->pools[pool_index]; - rte_spinlock_unlock(&cont->resize_sl); + pool = cmng->pools[pool_index]; + rte_spinlock_unlock(&cmng->resize_sl); if (pool->raw_hw) /* There is a pool query in progress. */ goto set_alarm; @@ -5969,14 +5938,6 @@ struct mlx5_meter_domains_infos * if (!pool->raw_hw) /* No free counter statistics raw memory. */ goto set_alarm; - dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read - (&pool->a64_dcs); - if (dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) { - /* Pool without valid counter. */ - pool->raw_hw = NULL; - goto next_pool; - } - offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; /* * Identify the counters released between query trigger and query * handle more effiecntly. The counter released in this gap period @@ -5984,11 +5945,12 @@ struct mlx5_meter_domains_infos * * will not be taken into account. */ pool->query_gen++; - ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - - offset, NULL, NULL, + ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0, + MLX5_COUNTERS_PER_POOL, + NULL, NULL, pool->raw_hw->mem_mng->dm->id, (void *)(uintptr_t) - (pool->raw_hw->data + offset), + pool->raw_hw->data, sh->devx_comp, (uint64_t)(uintptr_t)pool); if (ret) { @@ -5997,17 +5959,12 @@ struct mlx5_meter_domains_infos * pool->raw_hw = NULL; goto set_alarm; } - pool->raw_hw->min_dcs_id = dcs->id; LIST_REMOVE(pool->raw_hw, next); sh->cmng.pending_queries++; -next_pool: pool_index++; - if (pool_index >= rte_atomic16_read(&cont->n_valid)) { - batch ^= 0x1; + if (pool_index >= rte_atomic16_read(&cmng->n_valid)) pool_index = 0; - } set_alarm: - sh->cmng.batch = batch; sh->cmng.pool_index = pool_index; mlx5_set_query_alarm(sh); } @@ -6095,8 +6052,7 @@ struct mlx5_meter_domains_infos * (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; struct mlx5_counter_stats_raw *raw_to_free; uint8_t query_gen = pool->query_gen ^ 1; - struct mlx5_pools_container *cont = - MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool)); + struct mlx5_flow_counter_mng *cmng = &sh->cmng; enum mlx5_counter_type cnt_type = IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN; @@ -6113,10 +6069,10 @@ struct mlx5_meter_domains_infos * /* Be sure the new raw counters data is updated in memory. */ rte_io_wmb(); if (!TAILQ_EMPTY(&pool->counters[query_gen])) { - rte_spinlock_lock(&cont->csl); - TAILQ_CONCAT(&cont->counters[cnt_type], + rte_spinlock_lock(&cmng->csl); + TAILQ_CONCAT(&cmng->counters[cnt_type], &pool->counters[query_gen], next); - rte_spinlock_unlock(&cont->csl); + rte_spinlock_unlock(&cmng->csl); } } LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); @@ -6238,6 +6194,80 @@ struct mlx5_meter_domains_infos * } /** + * Validate if batch counter supported in root table. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_flow_discover_counter_offset_support(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct rte_flow_attr attr = { + .group = 0, + .ingress = 1, + }; + struct rte_flow_item items[] = { + [0] = { + .type = RTE_FLOW_ITEM_TYPE_END, + }, + }; + struct rte_flow_action actions[] = { + [0] = { + .type = (enum rte_flow_action_type) + MLX5_RTE_FLOW_ACTION_TYPE_COUNT, + }, + [1] = { + .type = RTE_FLOW_ACTION_TYPE_JUMP, + .conf = &(struct rte_flow_action_jump){ + .group = 1, + }, + }, + [2] = { + .type = RTE_FLOW_ACTION_TYPE_END, + }, + }; + int ret = 0; + uint32_t flow_idx; + struct rte_flow_error error; + + flow_idx = flow_list_create(dev, NULL, &attr, items, + actions, true, &error); + /* + * If batch counter with offset is not supported, the driver will not + * validate the invalid offset value, flow create will successfully. + * In this case, it means batch counter is not supported in root table. + * + * Otherwise, if flow create failed with other cases, report error + * message. + */ + if (flow_idx) { + flow_list_destroy(dev, NULL, flow_idx); + DRV_LOG(WARNING, "Batch counter is not supported in root " + "table. Switch to fallback mode."); + rte_errno = ENOTSUP; + ret = -rte_errno; + } else { + if (errno != EINVAL) + DRV_LOG(ERR, "Counter may not work correctly as " + "validate fail with unknown reason."); + ret = 0; + } + if (priv->cnt_action) { + mlx5_flow_os_destroy_flow_action(priv->cnt_action); + priv->cnt_action = NULL; + } + if (priv->cnt_dcs) { + mlx5_devx_cmd_destroy(priv->cnt_dcs); + priv->cnt_dcs = NULL; + } + return ret; +} + +/** * Dump flow raw hw data to file * * @param[in] dev diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 279daf2..344634f 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -35,6 +35,7 @@ enum mlx5_rte_flow_action_type { MLX5_RTE_FLOW_ACTION_TYPE_MARK, MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, + MLX5_RTE_FLOW_ACTION_TYPE_COUNT, }; /* Matches on selected register. */ @@ -1069,4 +1070,5 @@ int mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, const struct rte_flow_attr *attr); int mlx5_flow_meter_flush(struct rte_eth_dev *dev, struct rte_mtr_error *error); +int mlx5_flow_discover_counter_offset_support(struct rte_eth_dev *dev); #endif /* RTE_PMD_MLX5_FLOW_H_ */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 10be990..43d8ea8 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -4168,19 +4168,13 @@ struct field_modify_info modify_tcp[] = { struct mlx5_flow_counter_pool **ppool) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont; + struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng; struct mlx5_flow_counter_pool *pool; - uint32_t batch = 0; /* Decrease to original index and clear shared bit. */ idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1); - if (IS_BATCH_CNT(idx)) { - idx -= MLX5_CNT_BATCH_OFFSET; - batch = 1; - } - cont = MLX5_CNT_CONTAINER(priv->sh, batch); - MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n); - pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL]; + MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cmng->n); + pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL]; MLX5_ASSERT(pool); if (ppool) *ppool = pool; @@ -4212,8 +4206,8 @@ struct field_modify_info modify_tcp[] = { /** * Get a pool by devx counter ID. * - * @param[in] cont - * Pointer to the counter container. + * @param[in] cmng + * Pointer to the counter management. * @param[in] id * The counter devx ID. * @@ -4221,25 +4215,25 @@ struct field_modify_info modify_tcp[] = { * The counter pool pointer if exists, NULL otherwise, */ static struct mlx5_flow_counter_pool * -flow_dv_find_pool_by_id(struct mlx5_pools_container *cont, int id) +flow_dv_find_pool_by_id(struct mlx5_flow_counter_mng *cmng, int id) { uint32_t i; /* Check last used pool. */ - if (cont->last_pool_idx != POOL_IDX_INVALID && - flow_dv_is_counter_in_pool(cont->pools[cont->last_pool_idx], id)) - return cont->pools[cont->last_pool_idx]; + if (cmng->last_pool_idx != POOL_IDX_INVALID && + flow_dv_is_counter_in_pool(cmng->pools[cmng->last_pool_idx], id)) + return cmng->pools[cmng->last_pool_idx]; /* ID out of range means no suitable pool in the container. */ - if (id > cont->max_id || id < cont->min_id) + if (id > cmng->max_id || id < cmng->min_id) return NULL; /* * Find the pool from the end of the container, since mostly counter * ID is sequence increasing, and the last pool should be the needed * one. */ - i = rte_atomic16_read(&cont->n_valid); + i = rte_atomic16_read(&cmng->n_valid); while (i--) { - struct mlx5_flow_counter_pool *pool = cont->pools[i]; + struct mlx5_flow_counter_pool *pool = cmng->pools[i]; if (flow_dv_is_counter_in_pool(pool, id)) return pool; @@ -4329,20 +4323,18 @@ struct field_modify_info modify_tcp[] = { * * @param[in] dev * Pointer to the Ethernet device structure. - * @param[in] batch - * Whether the pool is for counter that was allocated by batch command. * * @return * 0 on success, otherwise negative errno value and rte_errno is set. */ static int -flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch) +flow_dv_container_resize(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch); + struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng; struct mlx5_counter_stats_mem_mng *mem_mng = NULL; - void *old_pools = cont->pools; - uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE; + void *old_pools = cmng->pools; + uint32_t resize = cmng->n + MLX5_CNT_CONTAINER_RESIZE; uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize; void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY); @@ -4351,7 +4343,7 @@ struct field_modify_info modify_tcp[] = { return -ENOMEM; } if (old_pools) - memcpy(pools, old_pools, cont->n * + memcpy(pools, old_pools, cmng->n * sizeof(struct mlx5_flow_counter_pool *)); /* * Fallback mode query the counter directly, no background query @@ -4372,11 +4364,11 @@ struct field_modify_info modify_tcp[] = { MLX5_CNT_CONTAINER_RESIZE + i, next); } - rte_spinlock_lock(&cont->resize_sl); - cont->n = resize; - cont->mem_mng = mem_mng; - cont->pools = pools; - rte_spinlock_unlock(&cont->resize_sl); + rte_spinlock_lock(&cmng->resize_sl); + cmng->n = resize; + cmng->mem_mng = mem_mng; + cmng->pools = pools; + rte_spinlock_unlock(&cmng->resize_sl); if (old_pools) mlx5_free(old_pools); return 0; @@ -4409,27 +4401,16 @@ struct field_modify_info modify_tcp[] = { cnt = flow_dv_counter_get_by_idx(dev, counter, &pool); MLX5_ASSERT(pool); - if (!IS_BATCH_CNT(counter)) { + if (priv->counter_fallback) { cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); - if (priv->counter_fallback) - return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0, + return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0, 0, pkts, bytes, 0, NULL, NULL, 0); } rte_spinlock_lock(&pool->sl); - /* - * The single counters allocation may allocate smaller ID than the - * current allocated in parallel to the host reading. - * In this case the new counter values must be reported as 0. - */ - if (unlikely(cnt_ext && cnt_ext->dcs->id < pool->raw->min_dcs_id)) { - *pkts = 0; - *bytes = 0; - } else { - offset = MLX5_CNT_ARRAY_IDX(pool, cnt); - *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits); - *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes); - } + offset = MLX5_CNT_ARRAY_IDX(pool, cnt); + *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits); + *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes); rte_spinlock_unlock(&pool->sl); return 0; } @@ -4441,8 +4422,6 @@ struct field_modify_info modify_tcp[] = { * Pointer to the Ethernet device structure. * @param[out] dcs * The devX counter handle. - * @param[in] batch - * Whether the pool is for counter that was allocated by batch command. * @param[in] age * Whether the pool is for counter that was allocated for aging. * @param[in/out] cont_cur @@ -4453,123 +4432,63 @@ struct field_modify_info modify_tcp[] = { */ static struct mlx5_flow_counter_pool * flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, - uint32_t batch, uint32_t age) + uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch); - int16_t n_valid = rte_atomic16_read(&cont->n_valid); + struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng; + int16_t n_valid = rte_atomic16_read(&cmng->n_valid); + uint32_t fallback = priv->counter_fallback; uint32_t size = sizeof(*pool); - if (cont->n == n_valid && flow_dv_container_resize(dev, batch)) + if (cmng->n == n_valid && flow_dv_container_resize(dev)) return NULL; size += MLX5_COUNTERS_PER_POOL * CNT_SIZE; - size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE); + size += (!fallback ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE); size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE); pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY); if (!pool) { rte_errno = ENOMEM; return NULL; } - pool->min_dcs = dcs; - if (!priv->counter_fallback) - pool->raw = cont->mem_mng->raws + n_valid % + if (!fallback) { + pool->min_dcs = dcs; + pool->raw = cmng->mem_mng->raws + n_valid % MLX5_CNT_CONTAINER_RESIZE; + } pool->raw_hw = NULL; pool->type = 0; - pool->type |= (batch ? 0 : CNT_POOL_TYPE_EXT); + pool->type |= (!fallback ? 0 : CNT_POOL_TYPE_EXT); pool->type |= (!age ? 0 : CNT_POOL_TYPE_AGE); pool->query_gen = 0; rte_spinlock_init(&pool->sl); TAILQ_INIT(&pool->counters[0]); TAILQ_INIT(&pool->counters[1]); - TAILQ_INSERT_HEAD(&cont->pool_list, pool, next); + TAILQ_INSERT_HEAD(&cmng->pool_list, pool, next); pool->index = n_valid; - cont->pools[n_valid] = pool; - if (!batch) { + cmng->pools[n_valid] = pool; + if (fallback) { int base = RTE_ALIGN_FLOOR(dcs->id, MLX5_COUNTERS_PER_POOL); - if (base < cont->min_id) - cont->min_id = base; - if (base > cont->max_id) - cont->max_id = base + MLX5_COUNTERS_PER_POOL - 1; - cont->last_pool_idx = pool->index; + if (base < cmng->min_id) + cmng->min_id = base; + if (base > cmng->max_id) + cmng->max_id = base + MLX5_COUNTERS_PER_POOL - 1; + cmng->last_pool_idx = pool->index; } /* Pool initialization must be updated before host thread access. */ rte_io_wmb(); - rte_atomic16_add(&cont->n_valid, 1); + rte_atomic16_add(&cmng->n_valid, 1); return pool; } /** - * Restore skipped counters in the pool. - * - * As counter pool query requires the first counter dcs - * ID start with 4 alinged, if the pool counters with - * min_dcs ID are not aligned with 4, the counters will - * be skipped. - * Once other min_dcs ID less than these skipped counter - * dcs ID appears, the skipped counters will be safe to - * use. - * Should be called when min_dcs is updated. - * - * @param[in] pool - * Current counter pool. - * @param[in] last_min_dcs - * Last min_dcs. - */ -static void -flow_dv_counter_restore(struct mlx5_flow_counter_pool *pool, - struct mlx5_devx_obj *last_min_dcs) -{ - struct mlx5_flow_counter_ext *cnt_ext; - uint32_t offset, new_offset; - uint32_t skip_cnt = 0; - uint32_t i; - - if (!pool->skip_cnt) - return; - /* - * If last min_dcs is not valid. The skipped counter may even after - * last min_dcs, set the offset to the whole pool. - */ - if (last_min_dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) - offset = MLX5_COUNTERS_PER_POOL; - else - offset = last_min_dcs->id % MLX5_COUNTERS_PER_POOL; - new_offset = pool->min_dcs->id % MLX5_COUNTERS_PER_POOL; - /* - * Check the counters from 1 to the last_min_dcs range. Counters - * before new min_dcs indicates pool still has skipped counters. - * Counters be skipped after new min_dcs will be ready to use. - * Offset 0 counter must be empty or min_dcs, start from 1. - */ - for (i = 1; i < offset; i++) { - cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i); - if (cnt_ext->skipped) { - if (i > new_offset) { - cnt_ext->skipped = 0; - TAILQ_INSERT_TAIL - (&pool->counters[pool->query_gen], - MLX5_POOL_GET_CNT(pool, i), next); - } else { - skip_cnt++; - } - } - } - if (!skip_cnt) - pool->skip_cnt = 0; -} - -/** * Prepare a new counter and/or a new counter pool. * * @param[in] dev * Pointer to the Ethernet device structure. * @param[out] cnt_free * Where to put the pointer of a new counter. - * @param[in] batch - * Whether the pool is for counter that was allocated by batch command. * @param[in] age * Whether the pool is for counter that was allocated for aging. * @@ -4580,87 +4499,36 @@ struct field_modify_info modify_tcp[] = { static struct mlx5_flow_counter_pool * flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, struct mlx5_flow_counter **cnt_free, - uint32_t batch, uint32_t age) + uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont; + struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng; struct mlx5_flow_counter_pool *pool; struct mlx5_counters tmp_tq; - struct mlx5_devx_obj *last_min_dcs; struct mlx5_devx_obj *dcs = NULL; struct mlx5_flow_counter *cnt; enum mlx5_counter_type cnt_type = age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN; - uint32_t add2other; + uint32_t fallback = priv->counter_fallback; uint32_t i; - cont = MLX5_CNT_CONTAINER(priv->sh, batch); - if (!batch) { -retry: - add2other = 0; + if (fallback) { /* bulk_bitmap must be 0 for single counter allocation. */ dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0); if (!dcs) return NULL; - pool = flow_dv_find_pool_by_id(cont, dcs->id); - /* - * If pool eixsts but with other type, counter will be added - * to the other pool, need to reallocate new counter in the - * ragne with same type later. - */ - if (pool && ((!!IS_AGE_POOL(pool)) != age)) { - add2other = 1; - } else if (!pool) { - pool = flow_dv_pool_create(dev, dcs, batch, - age); + pool = flow_dv_find_pool_by_id(cmng, dcs->id); + if (!pool) { + pool = flow_dv_pool_create(dev, dcs, age); if (!pool) { mlx5_devx_cmd_destroy(dcs); return NULL; } } - if ((dcs->id < pool->min_dcs->id || - pool->min_dcs->id & - (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) && - !(dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))) { - /* - * Update the pool min_dcs only if current dcs is - * valid and exist min_dcs is not valid or greater - * than new dcs. - */ - last_min_dcs = pool->min_dcs; - rte_atomic64_set(&pool->a64_dcs, - (int64_t)(uintptr_t)dcs); - /* - * Restore any skipped counters if the new min_dcs - * ID is smaller or min_dcs is not valid. - */ - if (dcs->id < last_min_dcs->id || - last_min_dcs->id & - (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) - flow_dv_counter_restore(pool, last_min_dcs); - } i = dcs->id % MLX5_COUNTERS_PER_POOL; cnt = MLX5_POOL_GET_CNT(pool, i); cnt->pool = pool; MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs; - /* - * If min_dcs is not valid, it means the new allocated dcs - * also fail to become the valid min_dcs, just skip it. - * Or if min_dcs is valid, and new dcs ID is smaller than - * min_dcs, but not become the min_dcs, also skip it. - */ - if (pool->min_dcs->id & - (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1) || - dcs->id < pool->min_dcs->id) { - MLX5_GET_POOL_CNT_EXT(pool, i)->skipped = 1; - pool->skip_cnt = 1; - goto retry; - } - if (add2other) { - TAILQ_INSERT_TAIL(&pool->counters[pool->query_gen], - cnt, next); - goto retry; - } *cnt_free = cnt; return pool; } @@ -4671,7 +4539,7 @@ struct field_modify_info modify_tcp[] = { rte_errno = ENODATA; return NULL; } - pool = flow_dv_pool_create(dev, dcs, batch, age); + pool = flow_dv_pool_create(dev, dcs, age); if (!pool) { mlx5_devx_cmd_destroy(dcs); return NULL; @@ -4682,9 +4550,9 @@ struct field_modify_info modify_tcp[] = { cnt->pool = pool; TAILQ_INSERT_HEAD(&tmp_tq, cnt, next); } - rte_spinlock_lock(&cont->csl); - TAILQ_CONCAT(&cont->counters[cnt_type], &tmp_tq, next); - rte_spinlock_unlock(&cont->csl); + rte_spinlock_lock(&cmng->csl); + TAILQ_CONCAT(&cmng->counters[cnt_type], &tmp_tq, next); + rte_spinlock_unlock(&cmng->csl); *cnt_free = MLX5_POOL_GET_CNT(pool, 0); (*cnt_free)->pool = pool; return pool; @@ -4721,8 +4589,6 @@ struct field_modify_info modify_tcp[] = { * Indicate if this counter is shared with other flows. * @param[in] id * Counter identifier. - * @param[in] group - * Counter flow group. * @param[in] age * Whether the counter was allocated for aging. * @@ -4731,22 +4597,14 @@ struct field_modify_info modify_tcp[] = { */ static uint32_t flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, - uint16_t group, uint32_t age) + uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool = NULL; struct mlx5_flow_counter *cnt_free = NULL; struct mlx5_flow_counter_ext *cnt_ext = NULL; - /* - * Currently group 0 flow counter cannot be assigned to a flow if it is - * not the first one in the batch counter allocation, so it is better - * to allocate counters one by one for these flows in a separate - * container. - * A counter can be shared between different groups so need to take - * shared counters from the single container. - */ - uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch); + uint32_t fallback = priv->counter_fallback; + struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng; enum mlx5_counter_type cnt_type = age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN; uint32_t cnt_idx; @@ -4769,16 +4627,15 @@ struct field_modify_info modify_tcp[] = { } } /* Get free counters from container. */ - rte_spinlock_lock(&cont->csl); - cnt_free = TAILQ_FIRST(&cont->counters[cnt_type]); + rte_spinlock_lock(&cmng->csl); + cnt_free = TAILQ_FIRST(&cmng->counters[cnt_type]); if (cnt_free) - TAILQ_REMOVE(&cont->counters[cnt_type], cnt_free, next); - rte_spinlock_unlock(&cont->csl); - if (!cnt_free && !flow_dv_counter_pool_prepare(dev, &cnt_free, - batch, age)) + TAILQ_REMOVE(&cmng->counters[cnt_type], cnt_free, next); + rte_spinlock_unlock(&cmng->csl); + if (!cnt_free && !flow_dv_counter_pool_prepare(dev, &cnt_free, age)) goto err; pool = cnt_free->pool; - if (!batch) + if (fallback) cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free); /* Create a DV counter action only in the first time usage. */ if (!cnt_free->action) { @@ -4786,7 +4643,7 @@ struct field_modify_info modify_tcp[] = { struct mlx5_devx_obj *dcs; int ret; - if (batch) { + if (!fallback) { offset = MLX5_CNT_ARRAY_IDX(pool, cnt_free); dcs = pool->min_dcs; } else { @@ -4802,7 +4659,6 @@ struct field_modify_info modify_tcp[] = { } cnt_idx = MLX5_MAKE_CNT_IDX(pool->index, MLX5_CNT_ARRAY_IDX(pool, cnt_free)); - cnt_idx += batch * MLX5_CNT_BATCH_OFFSET; /* Update the counter reset values. */ if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits, &cnt_free->bytes)) @@ -4817,16 +4673,16 @@ struct field_modify_info modify_tcp[] = { cnt_free->shared_info.id = id; cnt_idx |= MLX5_CNT_SHARED_OFFSET; } - if (!priv->counter_fallback && !priv->sh->cmng.query_thread_on) + if (!fallback && !priv->sh->cmng.query_thread_on) /* Start the asynchronous batch query by the host thread. */ mlx5_set_query_alarm(priv->sh); return cnt_idx; err: if (cnt_free) { cnt_free->pool = pool; - rte_spinlock_lock(&cont->csl); - TAILQ_INSERT_TAIL(&cont->counters[cnt_type], cnt_free, next); - rte_spinlock_unlock(&cont->csl); + rte_spinlock_lock(&cmng->csl); + TAILQ_INSERT_TAIL(&cmng->counters[cnt_type], cnt_free, next); + rte_spinlock_unlock(&cmng->csl); } return 0; } @@ -4909,7 +4765,6 @@ struct field_modify_info modify_tcp[] = { return; cnt = flow_dv_counter_get_by_idx(dev, counter, &pool); MLX5_ASSERT(pool); - if (IS_SHARED_CNT(counter)) { if (--cnt->shared_info.ref_cnt) return; @@ -4934,13 +4789,43 @@ struct field_modify_info modify_tcp[] = { } else { cnt_type = IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN; - TAILQ_INSERT_TAIL(&((MLX5_CNT_CONTAINER - (priv->sh, 0))->counters[cnt_type]), + TAILQ_INSERT_TAIL(&priv->sh->cmng.counters[cnt_type], cnt, next); } } /** + * Create a counter action with invalid offset. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * + * @return + * Counter action pointer if success, NULL otherwise. + */ +static void* +flow_dv_counter_create_invalid(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + int ret; + + if (!priv->cnt_dcs) { + priv->cnt_dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, + 0x4); + if (!priv->cnt_dcs) + return NULL; + } + if (!priv->cnt_action) { + ret = mlx5_flow_os_create_flow_action_count(priv->cnt_dcs->obj, + UINT16_MAX, + &priv->cnt_action); + if (ret) + return NULL; + } + return priv->cnt_action; +} + +/** * Verify the @p attributes will be correctly understood by the NIC and store * them in the @p flow if everything is correct. * @@ -5781,6 +5666,8 @@ struct field_modify_info modify_tcp[] = { action_flags |= MLX5_FLOW_ACTION_SET_IPV6_DSCP; rw_act_num += MLX5_ACT_NUM_SET_DSCP; break; + case MLX5_RTE_FLOW_ACTION_TYPE_COUNT: + break; default: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, @@ -7988,8 +7875,7 @@ struct field_modify_info modify_tcp[] = { counter = flow_dv_counter_alloc(dev, count ? count->shared : 0, - count ? count->id : 0, - dev_flow->dv.group, !!age); + count ? count->id : 0, !!age); if (!counter || age == NULL) return counter; age_param = flow_dv_counter_idx_get_age(dev, counter); @@ -8359,6 +8245,13 @@ struct field_modify_info modify_tcp[] = { age = action->conf; action_flags |= MLX5_FLOW_ACTION_COUNT; break; + case MLX5_RTE_FLOW_ACTION_TYPE_COUNT: + if (flow_dv_counter_create_invalid(dev) == NULL) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "count action not supported"); + dev_flow->dv.actions[actions_n++] = priv->cnt_action; + break; case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN: dev_flow->dv.actions[actions_n++] = priv->sh->pop_vlan_action; @@ -10112,7 +10005,7 @@ struct field_modify_info modify_tcp[] = { uint32_t cnt; flow_dv_shared_lock(dev); - cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0); + cnt = flow_dv_counter_alloc(dev, 0, 0, 0); flow_dv_shared_unlock(dev); return cnt; } diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index 0463bea..f3b0e89 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -159,11 +159,11 @@ struct mlx5_flow_counter_pool **ppool) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0); + struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng; struct mlx5_flow_counter_pool *pool; idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1); - pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL]; + pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL]; MLX5_ASSERT(pool); if (ppool) *ppool = pool; @@ -254,12 +254,12 @@ flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0); + struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng; struct mlx5_flow_counter_pool *pool = NULL; struct mlx5_flow_counter_ext *cnt_ext = NULL; struct mlx5_flow_counter *cnt = NULL; union mlx5_l3t_data data; - uint32_t n_valid = rte_atomic16_read(&cont->n_valid); + uint32_t n_valid = rte_atomic16_read(&cmng->n_valid); uint32_t pool_idx, cnt_idx; uint32_t i; int ret; @@ -275,7 +275,7 @@ return data.dword; } for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) { - pool = cont->pools[pool_idx]; + pool = cmng->pools[pool_idx]; if (!pool) continue; cnt = TAILQ_FIRST(&pool->counters[0]); @@ -286,7 +286,7 @@ struct mlx5_flow_counter_pool **pools; uint32_t size; - if (n_valid == cont->n) { + if (n_valid == cmng->n) { /* Resize the container pool array. */ size = sizeof(struct mlx5_flow_counter_pool *) * (n_valid + MLX5_CNT_CONTAINER_RESIZE); @@ -295,13 +295,13 @@ if (!pools) return 0; if (n_valid) { - memcpy(pools, cont->pools, + memcpy(pools, cmng->pools, sizeof(struct mlx5_flow_counter_pool *) * n_valid); - mlx5_free(cont->pools); + mlx5_free(cmng->pools); } - cont->pools = pools; - cont->n += MLX5_CNT_CONTAINER_RESIZE; + cmng->pools = pools; + cmng->n += MLX5_CNT_CONTAINER_RESIZE; } /* Allocate memory for new pool*/ size = sizeof(*pool) + (sizeof(*cnt_ext) + sizeof(*cnt)) * @@ -315,10 +315,10 @@ TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next); } cnt = MLX5_POOL_GET_CNT(pool, 0); - cont->pools[n_valid] = pool; + cmng->pools[n_valid] = pool; pool_idx = n_valid; - rte_atomic16_add(&cont->n_valid, 1); - TAILQ_INSERT_HEAD(&cont->pool_list, pool, next); + rte_atomic16_add(&cmng->n_valid, 1); + TAILQ_INSERT_HEAD(&cmng->pool_list, pool, next); } i = MLX5_CNT_ARRAY_IDX(pool, cnt); cnt_idx = MLX5_MAKE_CNT_IDX(pool_idx, i); From patchwork Tue Oct 6 11:38:51 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Suanming Mou X-Patchwork-Id: 79752 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 4F494A04BB; Tue, 6 Oct 2020 13:41:15 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id B06A01B677; Tue, 6 Oct 2020 13:39:16 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 1114A2C54 for ; Tue, 6 Oct 2020 13:39:09 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from suanmingm@nvidia.com) with SMTP; 6 Oct 2020 14:39:04 +0300 Received: from nvidia.com (mtbc-r640-04.mtbc.labs.mlnx [10.75.70.9]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 096BcuYP014182; Tue, 6 Oct 2020 14:39:03 +0300 From: Suanming Mou To: viacheslavo@nvidia.com, matan@nvidia.com Cc: rasland@nvidia.com, dev@dpdk.org Date: Tue, 6 Oct 2020 19:38:51 +0800 Message-Id: <1601984333-304464-5-git-send-email-suanmingm@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> References: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> Subject: [dpdk-dev] [PATCH 4/6] net/mlx5: synchronize flow counter pool creation X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Currently, counter operations are not thread safe as the counter pools' array resize is not protected. This commit protects the container pools' array resize using a spinlock. The original counter pool statistic memory allocate is moved to the host thread in order to minimize the critical section. Since that pool statistic memory is required only in query time. The container pools' array should be resized by the user threads, the new pool may be used by other rte_flow APIs before the host thread resize is done, if the pool is not saved to the pools' array, the specified counter memory will not be found as the pool is not saved to the counter management pool array. The pool raw statistic memory will be filled in host thread. The shared counters will be protected in other commit. Signed-off-by: Suanming Mou --- drivers/net/mlx5/mlx5.c | 12 ++- drivers/net/mlx5/mlx5.h | 11 ++- drivers/net/mlx5/mlx5_flow.c | 127 ++++++++++++++++++++++++++++++-- drivers/net/mlx5/mlx5_flow_dv.c | 146 ++++++------------------------------- drivers/net/mlx5/mlx5_flow_verbs.c | 5 +- 5 files changed, 160 insertions(+), 141 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 96cebba..79c5563 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -465,8 +465,7 @@ struct mlx5_flow_id_pool * sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET; sh->cmng.max_id = -1; sh->cmng.last_pool_idx = POOL_IDX_INVALID; - TAILQ_INIT(&sh->cmng.pool_list); - rte_spinlock_init(&sh->cmng.resize_sl); + rte_spinlock_init(&sh->cmng.pool_update_sl); for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) TAILQ_INIT(&sh->cmng.counters[i]); rte_spinlock_init(&sh->cmng.csl); @@ -499,7 +498,7 @@ struct mlx5_flow_id_pool * mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) { struct mlx5_counter_stats_mem_mng *mng; - int j; + int i, j; int retries = 1024; rte_errno = 0; @@ -512,9 +511,10 @@ struct mlx5_flow_id_pool * if (sh->cmng.pools) { struct mlx5_flow_counter_pool *pool; + int n_valid = sh->cmng.n_valid; - pool = TAILQ_FIRST(&sh->cmng.pool_list); - while (pool) { + for (i = 0; i < n_valid; ++i) { + pool = sh->cmng.pools[i]; if (!IS_EXT_POOL(pool) && pool->min_dcs) claim_zero(mlx5_devx_cmd_destroy (pool->min_dcs)); @@ -530,9 +530,7 @@ struct mlx5_flow_id_pool * (MLX5_GET_POOL_CNT_EXT (pool, j)->dcs)); } - TAILQ_REMOVE(&sh->cmng.pool_list, pool, next); mlx5_free(pool); - pool = TAILQ_FIRST(&sh->cmng.pool_list); } mlx5_free(sh->cmng.pools); } diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index a3d4ad9..8c951e2 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -395,7 +395,11 @@ struct mlx5_flow_counter_pool { volatile uint32_t query_gen:1; /* Query round. */ rte_spinlock_t sl; /* The pool lock. */ struct mlx5_counter_stats_raw *raw; - struct mlx5_counter_stats_raw *raw_hw; /* The raw on HW working. */ + union { + struct rte_eth_dev *dev; /* The counter pool create device. */ + struct mlx5_counter_stats_raw *raw_hw; + /* The raw on HW working. */ + }; }; struct mlx5_counter_stats_raw; @@ -419,16 +423,15 @@ struct mlx5_counter_stats_raw { /* Counter global management structure. */ struct mlx5_flow_counter_mng { - rte_atomic16_t n_valid; /* Number of valid pools. */ + volatile uint16_t n_valid; /* Number of valid pools. */ uint16_t n; /* Number of pools. */ uint16_t last_pool_idx; /* Last used pool index */ int min_id; /* The minimum counter ID in the pools. */ int max_id; /* The maximum counter ID in the pools. */ - rte_spinlock_t resize_sl; /* The resize lock. */ + rte_spinlock_t pool_update_sl; /* The pool update lock. */ rte_spinlock_t csl; /* The counter free list lock. */ struct mlx5_counters counters[MLX5_COUNTER_TYPE_MAX]; /* Free counter list. */ - struct mlx5_counter_pools pool_list; /* Counter pool list. */ struct mlx5_flow_counter_pool **pools; /* Counter pool array. */ struct mlx5_counter_stats_mem_mng *mem_mng; /* Hold the memory management for the next allocated pools raws. */ diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index c280f56..a9664b8 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include "mlx5_flow.h" #include "mlx5_flow_os.h" #include "mlx5_rxtx.h" +#include "mlx5_common_os.h" /** Device flow drivers. */ extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; @@ -5880,6 +5882,116 @@ struct mlx5_meter_domains_infos * return -ENOTSUP; } +/** + * Allocate a new memory for the counter values wrapped by all the needed + * management. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] raws_n + * The raw memory areas - each one for MLX5_COUNTERS_PER_POOL counters. + * + * @return + * 0 on success, a negative errno value otherwise. + */ +static int +mlx5_flow_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_ctx_shared *sh = priv->sh; + struct mlx5_devx_mkey_attr mkey_attr; + struct mlx5_counter_stats_mem_mng *mem_mng; + volatile struct flow_counter_stats *raw_data; + int size = (sizeof(struct flow_counter_stats) * + MLX5_COUNTERS_PER_POOL + + sizeof(struct mlx5_counter_stats_raw)) * raws_n + + sizeof(struct mlx5_counter_stats_mem_mng); + size_t pgsize = rte_mem_page_size(); + if (pgsize == (size_t)-1) { + DRV_LOG(ERR, "Failed to get mem page size"); + rte_errno = ENOMEM; + return -ENOMEM; + } + uint8_t *mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, + SOCKET_ID_ANY); + int i; + + if (!mem) { + rte_errno = ENOMEM; + return -ENOMEM; + } + mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1; + size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n; + mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size, + IBV_ACCESS_LOCAL_WRITE); + if (!mem_mng->umem) { + rte_errno = errno; + mlx5_free(mem); + return -rte_errno; + } + mkey_attr.addr = (uintptr_t)mem; + mkey_attr.size = size; + mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem); + mkey_attr.pd = sh->pdn; + mkey_attr.log_entity_size = 0; + mkey_attr.pg_access = 0; + mkey_attr.klm_array = NULL; + mkey_attr.klm_num = 0; + if (priv->config.hca_attr.relaxed_ordering_write && + priv->config.hca_attr.relaxed_ordering_read && + !haswell_broadwell_cpu) + mkey_attr.relaxed_ordering = 1; + mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr); + if (!mem_mng->dm) { + mlx5_glue->devx_umem_dereg(mem_mng->umem); + rte_errno = errno; + mlx5_free(mem); + return -rte_errno; + } + mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size); + raw_data = (volatile struct flow_counter_stats *)mem; + for (i = 0; i < raws_n; ++i) { + mem_mng->raws[i].mem_mng = mem_mng; + mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL; + } + for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i) + LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws, + mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i, + next); + LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next); + priv->sh->cmng.mem_mng = mem_mng; + return 0; +} + +/** + * Set the statistic memory to the new counter pool. + * + * @param[in] cmng + * Pointer to the counter management. + * @param[in] pool + * Pointer to the pool to set the statistic memory. + * + * @return + * 0 on success, a negative errno value otherwise. + */ +static int +mlx5_flow_set_counter_stat_mem(struct mlx5_flow_counter_mng *cmng, + struct mlx5_flow_counter_pool *pool) +{ + /* Resize statistic memory once used out. */ + if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) && + mlx5_flow_create_counter_stat_mem_mng(pool->dev, + MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES)) { + DRV_LOG(ERR, "Cannot resize counter stat mem."); + return -1; + } + MLX5_ASSERT(pool->index < n_valid); + pool->raw = cmng->mem_mng->raws + pool->index % + MLX5_CNT_CONTAINER_RESIZE; + pool->raw_hw = NULL; + return 0; +} + #define MLX5_POOL_QUERY_FREQ_US 1000000 /** @@ -5894,7 +6006,7 @@ struct mlx5_meter_domains_infos * { uint32_t pools_n, us; - pools_n = rte_atomic16_read(&sh->cmng.n_valid); + pools_n = sh->cmng.n_valid; us = MLX5_POOL_QUERY_FREQ_US / pools_n; DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { @@ -5920,16 +6032,21 @@ struct mlx5_meter_domains_infos * uint16_t pool_index = sh->cmng.pool_index; struct mlx5_flow_counter_mng *cmng = &sh->cmng; struct mlx5_flow_counter_pool *pool; + int n_valid; if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) goto set_alarm; - rte_spinlock_lock(&cmng->resize_sl); + rte_spinlock_lock(&cmng->pool_update_sl); if (!cmng->pools) { - rte_spinlock_unlock(&cmng->resize_sl); + rte_spinlock_unlock(&cmng->pool_update_sl); goto set_alarm; } pool = cmng->pools[pool_index]; - rte_spinlock_unlock(&cmng->resize_sl); + n_valid = cmng->n_valid; + rte_spinlock_unlock(&cmng->pool_update_sl); + /* Set the statistic memory to the new created pool. */ + if ((!pool->raw && mlx5_flow_set_counter_stat_mem(cmng, pool))) + goto set_alarm; if (pool->raw_hw) /* There is a pool query in progress. */ goto set_alarm; @@ -5962,7 +6079,7 @@ struct mlx5_meter_domains_infos * LIST_REMOVE(pool->raw_hw, next); sh->cmng.pending_queries++; pool_index++; - if (pool_index >= rte_atomic16_read(&cmng->n_valid)) + if (pool_index >= n_valid) pool_index = 0; set_alarm: sh->cmng.pool_index = pool_index; diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 43d8ea8..31d7fe4 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -4231,7 +4231,7 @@ struct field_modify_info modify_tcp[] = { * ID is sequence increasing, and the last pool should be the needed * one. */ - i = rte_atomic16_read(&cmng->n_valid); + i = cmng->n_valid; while (i--) { struct mlx5_flow_counter_pool *pool = cmng->pools[i]; @@ -4242,83 +4242,6 @@ struct field_modify_info modify_tcp[] = { } /** - * Allocate a new memory for the counter values wrapped by all the needed - * management. - * - * @param[in] dev - * Pointer to the Ethernet device structure. - * @param[in] raws_n - * The raw memory areas - each one for MLX5_COUNTERS_PER_POOL counters. - * - * @return - * The new memory management pointer on success, otherwise NULL and rte_errno - * is set. - */ -static struct mlx5_counter_stats_mem_mng * -flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) -{ - struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_dev_ctx_shared *sh = priv->sh; - struct mlx5_devx_mkey_attr mkey_attr; - struct mlx5_counter_stats_mem_mng *mem_mng; - volatile struct flow_counter_stats *raw_data; - int size = (sizeof(struct flow_counter_stats) * - MLX5_COUNTERS_PER_POOL + - sizeof(struct mlx5_counter_stats_raw)) * raws_n + - sizeof(struct mlx5_counter_stats_mem_mng); - size_t pgsize = rte_mem_page_size(); - if (pgsize == (size_t)-1) { - DRV_LOG(ERR, "Failed to get mem page size"); - rte_errno = ENOMEM; - return NULL; - } - uint8_t *mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, - SOCKET_ID_ANY); - int i; - - if (!mem) { - rte_errno = ENOMEM; - return NULL; - } - mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1; - size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n; - mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size, - IBV_ACCESS_LOCAL_WRITE); - if (!mem_mng->umem) { - rte_errno = errno; - mlx5_free(mem); - return NULL; - } - mkey_attr.addr = (uintptr_t)mem; - mkey_attr.size = size; - mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem); - mkey_attr.pd = sh->pdn; - mkey_attr.log_entity_size = 0; - mkey_attr.pg_access = 0; - mkey_attr.klm_array = NULL; - mkey_attr.klm_num = 0; - if (priv->config.hca_attr.relaxed_ordering_write && - priv->config.hca_attr.relaxed_ordering_read && - !haswell_broadwell_cpu) - mkey_attr.relaxed_ordering = 1; - mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr); - if (!mem_mng->dm) { - mlx5_glue->devx_umem_dereg(mem_mng->umem); - rte_errno = errno; - mlx5_free(mem); - return NULL; - } - mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size); - raw_data = (volatile struct flow_counter_stats *)mem; - for (i = 0; i < raws_n; ++i) { - mem_mng->raws[i].mem_mng = mem_mng; - mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL; - } - LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next); - return mem_mng; -} - -/** * Resize a counter container. * * @param[in] dev @@ -4332,7 +4255,6 @@ struct field_modify_info modify_tcp[] = { { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng; - struct mlx5_counter_stats_mem_mng *mem_mng = NULL; void *old_pools = cmng->pools; uint32_t resize = cmng->n + MLX5_CNT_CONTAINER_RESIZE; uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize; @@ -4345,30 +4267,8 @@ struct field_modify_info modify_tcp[] = { if (old_pools) memcpy(pools, old_pools, cmng->n * sizeof(struct mlx5_flow_counter_pool *)); - /* - * Fallback mode query the counter directly, no background query - * resources are needed. - */ - if (!priv->counter_fallback) { - int i; - - mem_mng = flow_dv_create_counter_stat_mem_mng(dev, - MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES); - if (!mem_mng) { - mlx5_free(pools); - return -ENOMEM; - } - for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i) - LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws, - mem_mng->raws + - MLX5_CNT_CONTAINER_RESIZE + - i, next); - } - rte_spinlock_lock(&cmng->resize_sl); cmng->n = resize; - cmng->mem_mng = mem_mng; cmng->pools = pools; - rte_spinlock_unlock(&cmng->resize_sl); if (old_pools) mlx5_free(old_pools); return 0; @@ -4406,11 +4306,15 @@ struct field_modify_info modify_tcp[] = { return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0, 0, pkts, bytes, 0, NULL, NULL, 0); } - rte_spinlock_lock(&pool->sl); - offset = MLX5_CNT_ARRAY_IDX(pool, cnt); - *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits); - *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes); + if (!pool->raw) { + *pkts = 0; + *bytes = 0; + } else { + offset = MLX5_CNT_ARRAY_IDX(pool, cnt); + *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits); + *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes); + } rte_spinlock_unlock(&pool->sl); return 0; } @@ -4437,12 +4341,9 @@ struct field_modify_info modify_tcp[] = { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool; struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng; - int16_t n_valid = rte_atomic16_read(&cmng->n_valid); uint32_t fallback = priv->counter_fallback; uint32_t size = sizeof(*pool); - if (cmng->n == n_valid && flow_dv_container_resize(dev)) - return NULL; size += MLX5_COUNTERS_PER_POOL * CNT_SIZE; size += (!fallback ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE); size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE); @@ -4451,23 +4352,25 @@ struct field_modify_info modify_tcp[] = { rte_errno = ENOMEM; return NULL; } - if (!fallback) { - pool->min_dcs = dcs; - pool->raw = cmng->mem_mng->raws + n_valid % - MLX5_CNT_CONTAINER_RESIZE; - } - pool->raw_hw = NULL; + pool->raw = NULL; pool->type = 0; - pool->type |= (!fallback ? 0 : CNT_POOL_TYPE_EXT); pool->type |= (!age ? 0 : CNT_POOL_TYPE_AGE); pool->query_gen = 0; + pool->min_dcs = dcs; rte_spinlock_init(&pool->sl); TAILQ_INIT(&pool->counters[0]); TAILQ_INIT(&pool->counters[1]); - TAILQ_INSERT_HEAD(&cmng->pool_list, pool, next); - pool->index = n_valid; - cmng->pools[n_valid] = pool; - if (fallback) { + rte_spinlock_lock(&cmng->pool_update_sl); + pool->index = cmng->n_valid; + if (pool->index == cmng->n && flow_dv_container_resize(dev)) { + mlx5_free(pool); + rte_spinlock_unlock(&cmng->pool_update_sl); + return NULL; + } + cmng->pools[pool->index] = pool; + pool->dev = dev; + cmng->n_valid++; + if (unlikely(fallback)) { int base = RTE_ALIGN_FLOOR(dcs->id, MLX5_COUNTERS_PER_POOL); if (base < cmng->min_id) @@ -4475,10 +4378,9 @@ struct field_modify_info modify_tcp[] = { if (base > cmng->max_id) cmng->max_id = base + MLX5_COUNTERS_PER_POOL - 1; cmng->last_pool_idx = pool->index; + pool->type |= CNT_POOL_TYPE_EXT; } - /* Pool initialization must be updated before host thread access. */ - rte_io_wmb(); - rte_atomic16_add(&cmng->n_valid, 1); + rte_spinlock_unlock(&cmng->pool_update_sl); return pool; } diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index f3b0e89..69ecc27 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -259,7 +259,7 @@ struct mlx5_flow_counter_ext *cnt_ext = NULL; struct mlx5_flow_counter *cnt = NULL; union mlx5_l3t_data data; - uint32_t n_valid = rte_atomic16_read(&cmng->n_valid); + uint32_t n_valid = cmng->n_valid; uint32_t pool_idx, cnt_idx; uint32_t i; int ret; @@ -317,8 +317,7 @@ cnt = MLX5_POOL_GET_CNT(pool, 0); cmng->pools[n_valid] = pool; pool_idx = n_valid; - rte_atomic16_add(&cmng->n_valid, 1); - TAILQ_INSERT_HEAD(&cmng->pool_list, pool, next); + cmng->n_valid++; } i = MLX5_CNT_ARRAY_IDX(pool, cnt); cnt_idx = MLX5_MAKE_CNT_IDX(pool_idx, i); From patchwork Tue Oct 6 11:38:52 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Suanming Mou X-Patchwork-Id: 79750 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 7C9C0A04BB; Tue, 6 Oct 2020 13:40:36 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id D8CA81B3E7; Tue, 6 Oct 2020 13:39:13 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 1101F2C28 for ; Tue, 6 Oct 2020 13:39:09 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from suanmingm@nvidia.com) with SMTP; 6 Oct 2020 14:39:05 +0300 Received: from nvidia.com (mtbc-r640-04.mtbc.labs.mlnx [10.75.70.9]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 096BcuYQ014182; Tue, 6 Oct 2020 14:39:04 +0300 From: Suanming Mou To: viacheslavo@nvidia.com, matan@nvidia.com Cc: rasland@nvidia.com, dev@dpdk.org Date: Tue, 6 Oct 2020 19:38:52 +0800 Message-Id: <1601984333-304464-6-git-send-email-suanmingm@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> References: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> Subject: [dpdk-dev] [PATCH 5/6] net/mlx5: make three level table thread safe X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This commit adds thread safety support in three level table using spinlock and reference counter for each the table entry. Signed-off-by: Suanming Mou --- drivers/net/mlx5/mlx5_utils.c | 144 +++++++++++++++++++++++++++++++----------- drivers/net/mlx5/mlx5_utils.h | 52 ++++++++++----- 2 files changed, 142 insertions(+), 54 deletions(-) diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c index fefe833..f3c259d 100644 --- a/drivers/net/mlx5/mlx5_utils.c +++ b/drivers/net/mlx5/mlx5_utils.c @@ -551,26 +551,23 @@ struct mlx5_l3t_tbl * tbl->type = type; switch (type) { case MLX5_L3T_TYPE_WORD: - l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word) + - sizeof(uint16_t) * MLX5_L3T_ET_SIZE; + l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word); l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w"; break; case MLX5_L3T_TYPE_DWORD: - l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword) + - sizeof(uint32_t) * MLX5_L3T_ET_SIZE; + l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword); l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw"; break; case MLX5_L3T_TYPE_QWORD: - l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword) + - sizeof(uint64_t) * MLX5_L3T_ET_SIZE; + l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword); l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw"; break; default: - l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr) + - sizeof(void *) * MLX5_L3T_ET_SIZE; + l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr); l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr"; break; } + rte_spinlock_init(&tbl->sl); tbl->eip = mlx5_ipool_create(&l3t_ip_cfg); if (!tbl->eip) { rte_errno = ENOMEM; @@ -620,46 +617,63 @@ struct mlx5_l3t_tbl * mlx5_free(tbl); } -uint32_t +int32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx, union mlx5_l3t_data *data) { struct mlx5_l3t_level_tbl *g_tbl, *m_tbl; + struct mlx5_l3t_entry_word *w_e_tbl; + struct mlx5_l3t_entry_dword *dw_e_tbl; + struct mlx5_l3t_entry_qword *qw_e_tbl; + struct mlx5_l3t_entry_ptr *ptr_e_tbl; void *e_tbl; uint32_t entry_idx; + int32_t ret = -1; + rte_spinlock_lock(&tbl->sl); g_tbl = tbl->tbl; if (!g_tbl) - return -1; + goto out; m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK]; if (!m_tbl) - return -1; + goto out; e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK]; if (!e_tbl) - return -1; + goto out; + ret = 0; entry_idx = idx & MLX5_L3T_ET_MASK; switch (tbl->type) { case MLX5_L3T_TYPE_WORD: - data->word = ((struct mlx5_l3t_entry_word *)e_tbl)->entry - [entry_idx]; + w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl; + data->word = w_e_tbl->entry[entry_idx].data; + if (w_e_tbl->entry[entry_idx].data) + w_e_tbl->entry[entry_idx].ref_cnt++; break; case MLX5_L3T_TYPE_DWORD: - data->dword = ((struct mlx5_l3t_entry_dword *)e_tbl)->entry - [entry_idx]; + dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl; + data->dword = dw_e_tbl->entry[entry_idx].data; + if (dw_e_tbl->entry[entry_idx].data) + dw_e_tbl->entry[entry_idx].ref_cnt++; break; case MLX5_L3T_TYPE_QWORD: - data->qword = ((struct mlx5_l3t_entry_qword *)e_tbl)->entry - [entry_idx]; + qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl; + data->qword = qw_e_tbl->entry[entry_idx].data; + if (qw_e_tbl->entry[entry_idx].data) + qw_e_tbl->entry[entry_idx].ref_cnt++; break; default: - data->ptr = ((struct mlx5_l3t_entry_ptr *)e_tbl)->entry - [entry_idx]; + ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl; + data->ptr = ptr_e_tbl->entry[entry_idx].data; + if (ptr_e_tbl->entry[entry_idx].data) + ptr_e_tbl->entry[entry_idx].ref_cnt++; break; } - return 0; +out: + rte_spinlock_unlock(&tbl->sl); + return ret; } -void +int32_t mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx) { struct mlx5_l3t_level_tbl *g_tbl, *m_tbl; @@ -670,36 +684,54 @@ struct mlx5_l3t_tbl * void *e_tbl; uint32_t entry_idx; uint64_t ref_cnt; + int32_t ret = -1; + rte_spinlock_lock(&tbl->sl); g_tbl = tbl->tbl; if (!g_tbl) - return; + goto out; m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK]; if (!m_tbl) - return; + goto out; e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK]; if (!e_tbl) - return; + goto out; entry_idx = idx & MLX5_L3T_ET_MASK; switch (tbl->type) { case MLX5_L3T_TYPE_WORD: w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl; - w_e_tbl->entry[entry_idx] = 0; + MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt); + ret = --w_e_tbl->entry[entry_idx].ref_cnt; + if (ret) + goto out; + w_e_tbl->entry[entry_idx].data = 0; ref_cnt = --w_e_tbl->ref_cnt; break; case MLX5_L3T_TYPE_DWORD: dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl; - dw_e_tbl->entry[entry_idx] = 0; + MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt); + ret = --dw_e_tbl->entry[entry_idx].ref_cnt; + if (ret) + goto out; + dw_e_tbl->entry[entry_idx].data = 0; ref_cnt = --dw_e_tbl->ref_cnt; break; case MLX5_L3T_TYPE_QWORD: qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl; - qw_e_tbl->entry[entry_idx] = 0; + MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt); + ret = --qw_e_tbl->entry[entry_idx].ref_cnt; + if (ret) + goto out; + qw_e_tbl->entry[entry_idx].data = 0; ref_cnt = --qw_e_tbl->ref_cnt; break; default: ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl; - ptr_e_tbl->entry[entry_idx] = NULL; + MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt); + ret = --ptr_e_tbl->entry[entry_idx].ref_cnt; + if (ret) + goto out; + ptr_e_tbl->entry[entry_idx].data = NULL; ref_cnt = --ptr_e_tbl->ref_cnt; break; } @@ -718,9 +750,12 @@ struct mlx5_l3t_tbl * } } } +out: + rte_spinlock_unlock(&tbl->sl); + return ret; } -uint32_t +int32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx, union mlx5_l3t_data *data) { @@ -731,8 +766,10 @@ struct mlx5_l3t_tbl * struct mlx5_l3t_entry_ptr *ptr_e_tbl; void *e_tbl; uint32_t entry_idx, tbl_idx = 0; + int32_t ret = -1; /* Check the global table, create it if empty. */ + rte_spinlock_lock(&tbl->sl); g_tbl = tbl->tbl; if (!g_tbl) { g_tbl = mlx5_malloc(MLX5_MEM_ZERO, @@ -741,7 +778,7 @@ struct mlx5_l3t_tbl * SOCKET_ID_ANY); if (!g_tbl) { rte_errno = ENOMEM; - return -1; + goto out; } tbl->tbl = g_tbl; } @@ -757,7 +794,7 @@ struct mlx5_l3t_tbl * SOCKET_ID_ANY); if (!m_tbl) { rte_errno = ENOMEM; - return -1; + goto out; } g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = m_tbl; @@ -772,7 +809,7 @@ struct mlx5_l3t_tbl * e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx); if (!e_tbl) { rte_errno = ENOMEM; - return -1; + goto out; } ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx; m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] = @@ -783,24 +820,55 @@ struct mlx5_l3t_tbl * switch (tbl->type) { case MLX5_L3T_TYPE_WORD: w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl; - w_e_tbl->entry[entry_idx] = data->word; + if (w_e_tbl->entry[entry_idx].data) { + data->word = w_e_tbl->entry[entry_idx].data; + w_e_tbl->entry[entry_idx].ref_cnt++; + rte_errno = EEXIST; + goto out; + } + w_e_tbl->entry[entry_idx].data = data->word; + w_e_tbl->entry[entry_idx].ref_cnt = 1; w_e_tbl->ref_cnt++; break; case MLX5_L3T_TYPE_DWORD: dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl; - dw_e_tbl->entry[entry_idx] = data->dword; + if (dw_e_tbl->entry[entry_idx].data) { + data->dword = dw_e_tbl->entry[entry_idx].data; + dw_e_tbl->entry[entry_idx].ref_cnt++; + rte_errno = EEXIST; + goto out; + } + dw_e_tbl->entry[entry_idx].data = data->dword; + dw_e_tbl->entry[entry_idx].ref_cnt = 1; dw_e_tbl->ref_cnt++; break; case MLX5_L3T_TYPE_QWORD: qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl; - qw_e_tbl->entry[entry_idx] = data->qword; + if (qw_e_tbl->entry[entry_idx].data) { + data->qword = qw_e_tbl->entry[entry_idx].data; + qw_e_tbl->entry[entry_idx].ref_cnt++; + rte_errno = EEXIST; + goto out; + } + qw_e_tbl->entry[entry_idx].data = data->qword; + qw_e_tbl->entry[entry_idx].ref_cnt = 1; qw_e_tbl->ref_cnt++; break; default: ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl; - ptr_e_tbl->entry[entry_idx] = data->ptr; + if (ptr_e_tbl->entry[entry_idx].data) { + data->ptr = ptr_e_tbl->entry[entry_idx].data; + ptr_e_tbl->entry[entry_idx].ref_cnt++; + rte_errno = EEXIST; + goto out; + } + ptr_e_tbl->entry[entry_idx].data = data->ptr; + ptr_e_tbl->entry[entry_idx].ref_cnt = 1; ptr_e_tbl->ref_cnt++; break; } - return 0; + ret = 0; +out: + rte_spinlock_unlock(&tbl->sl); + return ret; } diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h index f078bdc..0da4961 100644 --- a/drivers/net/mlx5/mlx5_utils.h +++ b/drivers/net/mlx5/mlx5_utils.h @@ -118,29 +118,41 @@ struct mlx5_l3t_level_tbl { struct mlx5_l3t_entry_word { uint32_t idx; /* Table index. */ uint64_t ref_cnt; /* Table ref_cnt. */ - uint16_t entry[]; /* Entry array. */ -}; + struct { + uint16_t data; + uint32_t ref_cnt; + } entry[MLX5_L3T_ET_SIZE]; /* Entry array */ +} __rte_packed; /* L3 double word entry table data structure. */ struct mlx5_l3t_entry_dword { uint32_t idx; /* Table index. */ uint64_t ref_cnt; /* Table ref_cnt. */ - uint32_t entry[]; /* Entry array. */ -}; + struct { + uint32_t data; + int32_t ref_cnt; + } entry[MLX5_L3T_ET_SIZE]; /* Entry array */ +} __rte_packed; /* L3 quad word entry table data structure. */ struct mlx5_l3t_entry_qword { uint32_t idx; /* Table index. */ uint64_t ref_cnt; /* Table ref_cnt. */ - uint64_t entry[]; /* Entry array. */ -}; + struct { + uint64_t data; + uint32_t ref_cnt; + } entry[MLX5_L3T_ET_SIZE]; /* Entry array */ +} __rte_packed; /* L3 pointer entry table data structure. */ struct mlx5_l3t_entry_ptr { uint32_t idx; /* Table index. */ uint64_t ref_cnt; /* Table ref_cnt. */ - void *entry[]; /* Entry array. */ -}; + struct { + void *data; + uint32_t ref_cnt; + } entry[MLX5_L3T_ET_SIZE]; /* Entry array */ +} __rte_packed; /* L3 table data structure. */ struct mlx5_l3t_tbl { @@ -148,6 +160,7 @@ struct mlx5_l3t_tbl { struct mlx5_indexed_pool *eip; /* Table index pool handles. */ struct mlx5_l3t_level_tbl *tbl; /* Global table index. */ + rte_spinlock_t sl; /* The table lock. */ }; /* @@ -535,32 +548,39 @@ struct mlx5_indexed_pool * * 0 if success, -1 on error. */ -uint32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx, +int32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx, union mlx5_l3t_data *data); /** - * This function clears the index entry from Three-level table. + * This function decreases and clear index entry if reference + * counter is 0 from Three-level table. * * @param tbl * Pointer to the l3t. * @param idx * Index to the entry. + * + * @return + * The remaining reference count, 0 means entry be cleared, -1 on error. */ -void mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx); +int32_t mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx); /** - * This function gets the index entry from Three-level table. + * This function sets the index entry to Three-level table. + * If the entry is already set, the EEXIST errno will be given, and + * the set data will be filled to the data. * - * @param tbl + * @param tbl[in] * Pointer to the l3t. - * @param idx + * @param idx[in] * Index to the entry. - * @param data + * @param data[in/out] * Pointer to the memory which contains the entry data save to l3t. + * If the entry is already set, the set data will be filled. * * @return * 0 if success, -1 on error. */ -uint32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx, +int32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx, union mlx5_l3t_data *data); /* From patchwork Tue Oct 6 11:38:53 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Suanming Mou X-Patchwork-Id: 79751 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 058ACA04BB; Tue, 6 Oct 2020 13:40:53 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 4C71F1B65F; Tue, 6 Oct 2020 13:39:15 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 1168C2C6E for ; Tue, 6 Oct 2020 13:39:09 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from suanmingm@nvidia.com) with SMTP; 6 Oct 2020 14:39:07 +0300 Received: from nvidia.com (mtbc-r640-04.mtbc.labs.mlnx [10.75.70.9]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 096BcuYR014182; Tue, 6 Oct 2020 14:39:06 +0300 From: Suanming Mou To: viacheslavo@nvidia.com, matan@nvidia.com Cc: rasland@nvidia.com, dev@dpdk.org Date: Tue, 6 Oct 2020 19:38:53 +0800 Message-Id: <1601984333-304464-7-git-send-email-suanmingm@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> References: <1601984333-304464-1-git-send-email-suanmingm@nvidia.com> Subject: [dpdk-dev] [PATCH 6/6] net/mlx5: make shared counters thread safe X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The shared counters save the counter index to three level table. As three level table supports multiple-thread opertations now, the shared counters can take advantage of the table to support multiple-thread. Once multiple threads saves the same ID counter to the same table entry at the same time, only one will be success, others will get the EEXIST errno with entry reference counter increase. In this case, the other duplicate created counter will be released. Signed-off-by: Suanming Mou --- drivers/net/mlx5/mlx5.h | 1 - drivers/net/mlx5/mlx5_flow_dv.c | 62 +++++++++++--------------------------- drivers/net/mlx5/mlx5_flow_verbs.c | 19 +++--------- 3 files changed, 21 insertions(+), 61 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 8c951e2..6e0b2e2 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -348,7 +348,6 @@ struct flow_counter_stats { /* Shared counters information for counters. */ struct mlx5_flow_counter_shared { - uint32_t ref_cnt; /**< Reference counter. */ uint32_t id; /**< User counter ID. */ }; diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 31d7fe4..3adb905 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -4461,28 +4461,6 @@ struct field_modify_info modify_tcp[] = { } /** - * Search for existed shared counter. - * - * @param[in] dev - * Pointer to the Ethernet device structure. - * @param[in] id - * The shared counter ID to search. - * - * @return - * 0 if not existed, otherwise shared counter index. - */ -static uint32_t -flow_dv_counter_shared_search(struct rte_eth_dev *dev, uint32_t id) -{ - struct mlx5_priv *priv = dev->data->dev_private; - union mlx5_l3t_data data; - - if (mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data)) - return 0; - return data.dword; -} - -/** * Allocate a flow counter. * * @param[in] dev @@ -4510,24 +4488,15 @@ struct field_modify_info modify_tcp[] = { enum mlx5_counter_type cnt_type = age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN; uint32_t cnt_idx; + union mlx5_l3t_data data; if (!priv->config.devx) { rte_errno = ENOTSUP; return 0; } - if (shared) { - cnt_idx = flow_dv_counter_shared_search(dev, id); - if (cnt_idx) { - cnt_free = flow_dv_counter_get_by_idx(dev, cnt_idx, - NULL); - if (cnt_free->shared_info.ref_cnt + 1 == 0) { - rte_errno = E2BIG; - return 0; - } - cnt_free->shared_info.ref_cnt++; - return cnt_idx; - } - } + if (shared && !mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data) && + data.dword) + return data.dword; /* Get free counters from container. */ rte_spinlock_lock(&cmng->csl); cnt_free = TAILQ_FIRST(&cmng->counters[cnt_type]); @@ -4566,12 +4535,18 @@ struct field_modify_info modify_tcp[] = { &cnt_free->bytes)) goto err; if (shared) { - union mlx5_l3t_data data; - data.dword = cnt_idx; - if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data)) + if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data)) { + if (rte_errno == EEXIST) { + cnt_free->pool = pool; + rte_spinlock_lock(&cmng->csl); + TAILQ_INSERT_TAIL(&cmng->counters[cnt_type], + cnt_free, next); + rte_spinlock_unlock(&cmng->csl); + return data.dword; + } goto err; - cnt_free->shared_info.ref_cnt = 1; + } cnt_free->shared_info.id = id; cnt_idx |= MLX5_CNT_SHARED_OFFSET; } @@ -4667,12 +4642,9 @@ struct field_modify_info modify_tcp[] = { return; cnt = flow_dv_counter_get_by_idx(dev, counter, &pool); MLX5_ASSERT(pool); - if (IS_SHARED_CNT(counter)) { - if (--cnt->shared_info.ref_cnt) - return; - mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, - cnt->shared_info.id); - } + if (IS_SHARED_CNT(counter) && + mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, cnt->shared_info.id)) + return; if (IS_AGE_POOL(pool)) flow_dv_counter_remove_from_age(dev, counter, cnt); cnt->pool = pool; diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index 69ecc27..f351a68 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -265,15 +265,8 @@ int ret; if (shared && !mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data) && - data.dword) { - cnt = flow_verbs_counter_get_by_idx(dev, data.dword, NULL); - if (cnt->shared_info.ref_cnt + 1 == 0) { - rte_errno = E2BIG; - return 0; - } - cnt->shared_info.ref_cnt++; + data.dword) return data.dword; - } for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) { pool = cmng->pools[pool_idx]; if (!pool) @@ -325,7 +318,6 @@ data.dword = cnt_idx; if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data)) return 0; - cnt->shared_info.ref_cnt = 1; cnt->shared_info.id = id; cnt_idx |= MLX5_CNT_SHARED_OFFSET; } @@ -360,12 +352,9 @@ struct mlx5_flow_counter_ext *cnt_ext; cnt = flow_verbs_counter_get_by_idx(dev, counter, &pool); - if (IS_SHARED_CNT(counter)) { - if (--cnt->shared_info.ref_cnt) - return; - mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, - cnt->shared_info.id); - } + if (IS_SHARED_CNT(counter) && + mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, cnt->shared_info.id)) + return; cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt); #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));