[v2,01/11] net/mlx5: allocate local DR rule action buffers

Message ID 20240229115157.201671-2-dsosnowski@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: flow insertion performance improvements |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Dariusz Sosnowski Feb. 29, 2024, 11:51 a.m. UTC
  Goal of this is to remove the unnecessary copying of precalculated
mlx5dr_rule_action structures used to create HWS flow rules.

Before this patch, during template table creation an array of these
structures was calculated for each actions template used.
Each of these structures contained either full action definition or
partial (depends on mask configuration).
During flow creation, this array was copied to stack and later passed to
mlx5dr_rule_create().

This patch removes this copy by implementing the following:

- Allocate an array of mlx5dr_rule_action structures for each actions
  template and queue.
- Populate them with precalculated data from relevant actions templates.
- During flow creation, construction of unmasked actions works on an
  array dedicated for the specific queue and actions template.
- Pass this buffer to mlx5dr_rule_create directly.

Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
Acked-by: Ori Kam <orika@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    | 13 +++++++++
 drivers/net/mlx5/mlx5_flow_hw.c | 51 +++++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 5 deletions(-)
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 7aa24f7c52..02af0a08fa 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1566,6 +1566,10 @@  struct mlx5_matcher_info {
 	uint32_t refcnt;
 };
 
+struct mlx5_dr_rule_action_container {
+	struct mlx5dr_rule_action acts[MLX5_HW_MAX_ACTS];
+} __rte_cache_aligned;
+
 struct rte_flow_template_table {
 	LIST_ENTRY(rte_flow_template_table) next;
 	struct mlx5_flow_group *grp; /* The group rte_flow_template_table uses. */
@@ -1585,6 +1589,15 @@  struct rte_flow_template_table {
 	uint32_t refcnt; /* Table reference counter. */
 	struct mlx5_tbl_multi_pattern_ctx mpctx;
 	struct mlx5dr_matcher_attr matcher_attr;
+	/**
+	 * Variable length array of containers containing precalculated templates of DR actions
+	 * arrays. This array is allocated at template table creation time and contains
+	 * one container per each queue, per each actions template.
+	 * Essentially rule_acts is a 2-dimensional array indexed with (AT index, queue) pair.
+	 * Each container will provide a local "queue buffer" to work on for flow creation
+	 * operations when using a given actions template.
+	 */
+	struct mlx5_dr_rule_action_container rule_acts[];
 };
 
 static __rte_always_inline struct mlx5dr_matcher *
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 9620b7f576..ef91a23a9b 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -2512,6 +2512,34 @@  __flow_hw_actions_translate(struct rte_eth_dev *dev,
 				  "fail to create rte table");
 }
 
+static __rte_always_inline struct mlx5dr_rule_action *
+flow_hw_get_dr_action_buffer(struct mlx5_priv *priv,
+			     struct rte_flow_template_table *table,
+			     uint8_t action_template_index,
+			     uint32_t queue)
+{
+	uint32_t offset = action_template_index * priv->nb_queue + queue;
+
+	return &table->rule_acts[offset].acts[0];
+}
+
+static void
+flow_hw_populate_rule_acts_caches(struct rte_eth_dev *dev,
+				  struct rte_flow_template_table *table,
+				  uint8_t at_idx)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	uint32_t q;
+
+	for (q = 0; q < priv->nb_queue; ++q) {
+		struct mlx5dr_rule_action *rule_acts =
+				flow_hw_get_dr_action_buffer(priv, table, at_idx, q);
+
+		rte_memcpy(rule_acts, table->ats[at_idx].acts.rule_acts,
+			   sizeof(table->ats[at_idx].acts.rule_acts));
+	}
+}
+
 /**
  * Translate rte_flow actions to DR action.
  *
@@ -2539,6 +2567,7 @@  flow_hw_actions_translate(struct rte_eth_dev *dev,
 						tbl->ats[i].action_template,
 						&tbl->mpctx, error))
 			goto err;
+		flow_hw_populate_rule_acts_caches(dev, tbl, i);
 	}
 	ret = mlx5_tbl_multi_pattern_process(dev, tbl, &tbl->mpctx.segments[0],
 					     rte_log2_u32(tbl->cfg.attr.nb_flows),
@@ -2928,7 +2957,6 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 	struct mlx5_aso_mtr *aso_mtr;
 	struct mlx5_multi_pattern_segment *mp_segment = NULL;
 
-	rte_memcpy(rule_acts, hw_acts->rule_acts, sizeof(*rule_acts) * at->dr_actions_num);
 	attr.group = table->grp->group_id;
 	ft_flag = mlx5_hw_act_flag[!!table->grp->group_id][table->type];
 	if (table->type == MLX5DR_TABLE_TYPE_FDB) {
@@ -3335,7 +3363,7 @@  flow_hw_async_flow_create(struct rte_eth_dev *dev,
 		.user_data = user_data,
 		.burst = attr->postpone,
 	};
-	struct mlx5dr_rule_action rule_acts[MLX5_HW_MAX_ACTS];
+	struct mlx5dr_rule_action *rule_acts;
 	struct rte_flow_hw *flow = NULL;
 	struct mlx5_hw_q_job *job = NULL;
 	const struct rte_flow_item *rule_items;
@@ -3358,6 +3386,7 @@  flow_hw_async_flow_create(struct rte_eth_dev *dev,
 	mlx5_ipool_malloc(table->resource, &res_idx);
 	if (!res_idx)
 		goto error;
+	rule_acts = flow_hw_get_dr_action_buffer(priv, table, action_template_index, queue);
 	/*
 	 * Set the table here in order to know the destination table
 	 * when free the flow afterward.
@@ -3479,7 +3508,7 @@  flow_hw_async_flow_create_by_index(struct rte_eth_dev *dev,
 		.user_data = user_data,
 		.burst = attr->postpone,
 	};
-	struct mlx5dr_rule_action rule_acts[MLX5_HW_MAX_ACTS];
+	struct mlx5dr_rule_action *rule_acts;
 	struct rte_flow_hw *flow = NULL;
 	struct mlx5_hw_q_job *job = NULL;
 	uint32_t flow_idx = 0;
@@ -3501,6 +3530,7 @@  flow_hw_async_flow_create_by_index(struct rte_eth_dev *dev,
 	mlx5_ipool_malloc(table->resource, &res_idx);
 	if (!res_idx)
 		goto error;
+	rule_acts = flow_hw_get_dr_action_buffer(priv, table, action_template_index, queue);
 	/*
 	 * Set the table here in order to know the destination table
 	 * when free the flow afterwards.
@@ -3610,7 +3640,7 @@  flow_hw_async_flow_update(struct rte_eth_dev *dev,
 		.user_data = user_data,
 		.burst = attr->postpone,
 	};
-	struct mlx5dr_rule_action rule_acts[MLX5_HW_MAX_ACTS];
+	struct mlx5dr_rule_action *rule_acts;
 	struct rte_flow_hw *of = (struct rte_flow_hw *)flow;
 	struct rte_flow_hw *nf;
 	struct rte_flow_template_table *table = of->table;
@@ -3628,6 +3658,7 @@  flow_hw_async_flow_update(struct rte_eth_dev *dev,
 		goto error;
 	nf = job->upd_flow;
 	memset(nf, 0, sizeof(struct rte_flow_hw));
+	rule_acts = flow_hw_get_dr_action_buffer(priv, table, action_template_index, queue);
 	/*
 	 * Set the table here in order to know the destination table
 	 * when free the flow afterwards.
@@ -4354,6 +4385,7 @@  mlx5_hw_build_template_table(struct rte_eth_dev *dev,
 			i++;
 			goto at_error;
 		}
+		flow_hw_populate_rule_acts_caches(dev, tbl, i);
 	}
 	tbl->nb_action_templates = nb_action_templates;
 	if (mlx5_is_multi_pattern_active(&tbl->mpctx)) {
@@ -4442,6 +4474,7 @@  flow_hw_table_create(struct rte_eth_dev *dev,
 	uint32_t i = 0, max_tpl = MLX5_HW_TBL_MAX_ITEM_TEMPLATE;
 	uint32_t nb_flows = rte_align32pow2(attr->nb_flows);
 	bool port_started = !!dev->data->dev_started;
+	size_t tbl_mem_size;
 	int err;
 
 	/* HWS layer accepts only 1 item template with root table. */
@@ -4461,8 +4494,16 @@  flow_hw_table_create(struct rte_eth_dev *dev,
 		rte_errno = EINVAL;
 		goto error;
 	}
+	/*
+	 * Amount of memory required for rte_flow_template_table struct:
+	 * - Size of the struct itself.
+	 * - VLA of DR rule action containers at the end =
+	 *     number of actions templates * number of queues * size of DR rule actions container.
+	 */
+	tbl_mem_size = sizeof(*tbl);
+	tbl_mem_size += nb_action_templates * priv->nb_queue * sizeof(tbl->rule_acts[0]);
 	/* Allocate the table memory. */
-	tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*tbl), 0, rte_socket_id());
+	tbl = mlx5_malloc(MLX5_MEM_ZERO, tbl_mem_size, RTE_CACHE_LINE_SIZE, rte_socket_id());
 	if (!tbl)
 		goto error;
 	tbl->cfg = *table_cfg;