[v2,02/25] net/mlx5: use thread specific flow workspace
diff mbox series

Message ID 1603437295-119083-3-git-send-email-suanmingm@nvidia.com
State Superseded
Delegated to: Raslan Darawsheh
Headers show
Series
  • *net/mlx5: support multiple-thread flow operations
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Suanming Mou Oct. 23, 2020, 7:14 a.m. UTC
From: Xueming Li <xuemingl@nvidia.com>

As part of multi-thread flow support, this patch moves flow intermediate
data to thread specific, makes them a flow workspace. The workspace is
allocated per thread, destroyed along with thread life-cycle.

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c   |   5 --
 drivers/net/mlx5/mlx5.c            |   2 -
 drivers/net/mlx5/mlx5.h            |   6 --
 drivers/net/mlx5/mlx5_flow.c       | 159 +++++++++++++++++++++++++++----------
 drivers/net/mlx5/mlx5_flow.h       |  15 +++-
 drivers/net/mlx5/mlx5_flow_dv.c    |  41 +++++-----
 drivers/net/mlx5/mlx5_flow_verbs.c |  24 +++---
 7 files changed, 166 insertions(+), 86 deletions(-)

Patch
diff mbox series

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 40f9446..1313dee 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1431,11 +1431,6 @@ 
 			err = ENOTSUP;
 			goto error;
 	}
-	/*
-	 * Allocate the buffer for flow creating, just once.
-	 * The allocation must be done before any flow creating.
-	 */
-	mlx5_flow_alloc_intermediate(eth_dev);
 	/* Query availability of metadata reg_c's. */
 	err = mlx5_flow_discover_mreg_c(eth_dev);
 	if (err < 0) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index e1df11f..faf947f 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1402,8 +1402,6 @@  struct mlx5_dev_ctx_shared *
 	 */
 	mlx5_flow_list_flush(dev, &priv->flows, true);
 	mlx5_flow_meter_flush(dev, NULL);
-	/* Free the intermediate buffers for flow creation. */
-	mlx5_flow_free_intermediate(dev);
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c9d5d71..bfb0c28 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -847,10 +847,6 @@  struct mlx5_priv {
 	struct mlx5_drop drop_queue; /* Flow drop queues. */
 	uint32_t flows; /* RTE Flow rules. */
 	uint32_t ctrl_flows; /* Control flow rules. */
-	void *inter_flows; /* Intermediate resources for flow creation. */
-	void *rss_desc; /* Intermediate rss description resources. */
-	int flow_idx; /* Intermediate device flow index. */
-	int flow_nested_idx; /* Intermediate device flow index, nested. */
 	struct mlx5_obj_ops obj_ops; /* HW objects operations. */
 	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqobj, mlx5_rxq_obj) rxqsobj; /* Verbs/DevX Rx queues. */
@@ -1074,8 +1070,6 @@  int mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 void mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list);
 int mlx5_flow_start_default(struct rte_eth_dev *dev);
 void mlx5_flow_stop_default(struct rte_eth_dev *dev);
-void mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev);
-void mlx5_flow_free_intermediate(struct rte_eth_dev *dev);
 int mlx5_flow_verify(struct rte_eth_dev *dev);
 int mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, uint32_t queue);
 int mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index d7243a8..2f2b97f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -634,6 +634,13 @@  struct mlx5_flow_tunnel_info {
 	},
 };
 
+/* Key of thread specific flow workspace data. */
+static pthread_key_t key_workspace;
+
+/* Thread specific flow workspace data once initialization data. */
+static pthread_once_t key_workspace_init;
+
+
 /**
  * Translate tag ID to register.
  *
@@ -5005,6 +5012,38 @@  struct mlx5_flow_tunnel_info {
 }
 
 /**
+ * Adjust flow RSS workspace if needed.
+ *
+ * @param wks
+ *   Pointer to thread flow work space.
+ * @param rss_desc
+ *   Pointer to RSS descriptor.
+ * @param[in] nrssq_num
+ *   New RSS queue number.
+ *
+ * @return
+ *   0 on success, -1 otherwise and rte_errno is set.
+ */
+static int
+flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
+			  struct mlx5_flow_rss_desc *rss_desc,
+			  uint32_t nrssq_num)
+{
+	bool fidx = !!wks->flow_idx;
+
+	if (likely(nrssq_num <= wks->rssq_num[fidx]))
+		return 0;
+	rss_desc->queue = realloc(rss_desc->queue,
+			  sizeof(rss_desc->queue[0]) * RTE_ALIGN(nrssq_num, 2));
+	if (!rss_desc->queue) {
+		rte_errno = ENOMEM;
+		return -1;
+	}
+	wks->rssq_num[fidx] = RTE_ALIGN(nrssq_num, 2);
+	return 0;
+}
+
+/**
  * Create a flow and add it to @p list.
  *
  * @param dev
@@ -5056,8 +5095,7 @@  struct mlx5_flow_tunnel_info {
 		uint8_t buffer[2048];
 	} items_tx;
 	struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
-	struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
-					      priv->rss_desc)[!!priv->flow_idx];
+	struct mlx5_flow_rss_desc *rss_desc;
 	const struct rte_flow_action *p_actions_rx = actions;
 	uint32_t i;
 	uint32_t idx = 0;
@@ -5065,8 +5103,12 @@  struct mlx5_flow_tunnel_info {
 	uint32_t hairpin_id = 0;
 	struct rte_flow_attr attr_tx = { .priority = 0 };
 	struct rte_flow_attr attr_factor = {0};
+	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
+	bool fidx = !!wks->flow_idx;
 	int ret;
 
+	MLX5_ASSERT(wks);
+	rss_desc = &wks->rss_desc[fidx];
 	memcpy((void *)&attr_factor, (const void *)attr, sizeof(*attr));
 	if (external)
 		attr_factor.group *= MLX5_FLOW_TABLE_FACTOR;
@@ -5095,9 +5137,11 @@  struct mlx5_flow_tunnel_info {
 		flow->hairpin_flow_id = hairpin_id;
 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
-	memset(rss_desc, 0, sizeof(*rss_desc));
+	memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
 	rss = flow_get_rss_action(p_actions_rx);
 	if (rss) {
+		if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
+			return 0;
 		/*
 		 * The following information is required by
 		 * mlx5_flow_hashfields_adjust() in advance.
@@ -5125,9 +5169,9 @@  struct mlx5_flow_tunnel_info {
 	 * need to be translated before another calling.
 	 * No need to use ping-pong buffer to save memory here.
 	 */
-	if (priv->flow_idx) {
-		MLX5_ASSERT(!priv->flow_nested_idx);
-		priv->flow_nested_idx = priv->flow_idx;
+	if (fidx) {
+		MLX5_ASSERT(!wks->flow_nested_idx);
+		wks->flow_nested_idx = fidx;
 	}
 	for (i = 0; i < buf->entries; ++i) {
 		/*
@@ -5192,9 +5236,9 @@  struct mlx5_flow_tunnel_info {
 			     flow, next);
 	flow_rxq_flags_set(dev, flow);
 	/* Nested flow creation index recovery. */
-	priv->flow_idx = priv->flow_nested_idx;
-	if (priv->flow_nested_idx)
-		priv->flow_nested_idx = 0;
+	wks->flow_idx = wks->flow_nested_idx;
+	if (wks->flow_nested_idx)
+		wks->flow_nested_idx = 0;
 	return idx;
 error:
 	MLX5_ASSERT(flow);
@@ -5209,9 +5253,9 @@  struct mlx5_flow_tunnel_info {
 		mlx5_flow_id_release(priv->sh->flow_id_pool,
 				     hairpin_id);
 	rte_errno = ret;
-	priv->flow_idx = priv->flow_nested_idx;
-	if (priv->flow_nested_idx)
-		priv->flow_nested_idx = 0;
+	wks->flow_idx = wks->flow_nested_idx;
+	if (wks->flow_nested_idx)
+		wks->flow_nested_idx = 0;
 	return 0;
 }
 
@@ -5493,48 +5537,75 @@  struct rte_flow *
 }
 
 /**
- * Allocate intermediate resources for flow creation.
- *
- * @param dev
- *   Pointer to Ethernet device.
+ * Release key of thread specific flow workspace data.
  */
-void
-mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev)
+static void
+flow_release_workspace(void *data)
 {
-	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_flow_workspace *wks = data;
 
-	if (!priv->inter_flows) {
-		priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO,
-				    MLX5_NUM_MAX_DEV_FLOWS *
-				    sizeof(struct mlx5_flow) +
-				    (sizeof(struct mlx5_flow_rss_desc) +
-				    sizeof(uint16_t) * UINT16_MAX) * 2, 0,
-				    SOCKET_ID_ANY);
-		if (!priv->inter_flows) {
-			DRV_LOG(ERR, "can't allocate intermediate memory.");
-			return;
-		}
-	}
-	priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows)
-			 [MLX5_NUM_MAX_DEV_FLOWS];
-	/* Reset the index. */
-	priv->flow_idx = 0;
-	priv->flow_nested_idx = 0;
+	if (!wks)
+		return;
+	free(wks->rss_desc[0].queue);
+	free(wks->rss_desc[1].queue);
+	free(wks);
+}
+
+/**
+ * Initialize key of thread specific flow workspace data.
+ */
+static void
+flow_alloc_workspace(void)
+{
+	if (pthread_key_create(&key_workspace, flow_release_workspace))
+		DRV_LOG(ERR, "Can't create flow workspace data thread key.");
 }
 
 /**
- * Free intermediate resources for flows.
+ * Get thread specific flow workspace.
  *
- * @param dev
- *   Pointer to Ethernet device.
+ * @return pointer to thread specific flowworkspace data, NULL on error.
  */
-void
-mlx5_flow_free_intermediate(struct rte_eth_dev *dev)
+struct mlx5_flow_workspace*
+mlx5_flow_get_thread_workspace(void)
 {
-	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_flow_workspace *data;
 
-	mlx5_free(priv->inter_flows);
-	priv->inter_flows = NULL;
+	if (pthread_once(&key_workspace_init, flow_alloc_workspace)) {
+		DRV_LOG(ERR, "Failed to init flow workspace data thread key.");
+		return NULL;
+	}
+	data = pthread_getspecific(key_workspace);
+	if (!data) {
+		data = calloc(1, sizeof(*data));
+		if (!data) {
+			DRV_LOG(ERR, "Failed to allocate flow workspace "
+				"memory.");
+			return NULL;
+		}
+		data->rss_desc[0].queue = calloc(1,
+				sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
+		if (!data->rss_desc[0].queue)
+			goto err;
+		data->rss_desc[1].queue = calloc(1,
+				sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
+		if (!data->rss_desc[1].queue)
+			goto err;
+		data->rssq_num[0] = MLX5_RSSQ_DEFAULT_NUM;
+		data->rssq_num[1] = MLX5_RSSQ_DEFAULT_NUM;
+		if (pthread_setspecific(key_workspace, data)) {
+			DRV_LOG(ERR, "Failed to set flow workspace to thread.");
+			goto err;
+		}
+	}
+	return data;
+err:
+	if (data->rss_desc[0].queue)
+		free(data->rss_desc[0].queue);
+	if (data->rss_desc[1].queue)
+		free(data->rss_desc[1].queue);
+	free(data);
+	return NULL;
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index b4be476..9bc7608 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -73,6 +73,9 @@  enum mlx5_feature_name {
 	MLX5_MTR_SFX,
 };
 
+/* Default queue number. */
+#define MLX5_RSSQ_DEFAULT_NUM 16
+
 /* Pattern outer Layer bits. */
 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
@@ -596,7 +599,7 @@  struct mlx5_flow_rss_desc {
 	uint32_t queue_num; /**< Number of entries in @p queue. */
 	uint64_t types; /**< Specific RSS hash types (see ETH_RSS_*). */
 	uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
-	uint16_t queue[]; /**< Destination queues to redirect traffic to. */
+	uint16_t *queue; /**< Destination queues. */
 };
 
 /* PMD flow priority for tunnel */
@@ -929,6 +932,15 @@  struct rte_flow {
 	uint16_t meter; /**< Holds flow meter id. */
 } __rte_packed;
 
+/* Thread specific flow workspace intermediate data. */
+struct mlx5_flow_workspace {
+	struct mlx5_flow flows[MLX5_NUM_MAX_DEV_FLOWS];
+	struct mlx5_flow_rss_desc rss_desc[2];
+	uint32_t rssq_num[2]; /* Allocated queue num in rss_desc. */
+	int flow_idx; /* Intermediate device flow index. */
+	int flow_nested_idx; /* Intermediate device flow index, nested. */
+};
+
 typedef int (*mlx5_flow_validate_t)(struct rte_eth_dev *dev,
 				    const struct rte_flow_attr *attr,
 				    const struct rte_flow_item items[],
@@ -1003,6 +1015,7 @@  struct mlx5_flow_driver_ops {
 
 /* mlx5_flow.c */
 
+struct mlx5_flow_workspace *mlx5_flow_get_thread_workspace(void);
 struct mlx5_flow_id_pool *mlx5_flow_id_pool_alloc(uint32_t max_id);
 void mlx5_flow_id_pool_release(struct mlx5_flow_id_pool *pool);
 uint32_t mlx5_flow_id_get(struct mlx5_flow_id_pool *pool, uint32_t *id);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 15cd34e..df36a24 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -6139,9 +6139,11 @@  struct field_modify_info modify_tcp[] = {
 	struct mlx5_flow *dev_flow;
 	struct mlx5_flow_handle *dev_handle;
 	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
 
+	MLX5_ASSERT(wks);
 	/* In case of corrupting the memory. */
-	if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
+	if (wks->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
 		rte_flow_error_set(error, ENOSPC,
 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 				   "not free temporary device flow");
@@ -6155,8 +6157,8 @@  struct field_modify_info modify_tcp[] = {
 				   "not enough memory to create flow handle");
 		return NULL;
 	}
-	/* No multi-thread supporting. */
-	dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
+	MLX5_ASSERT(wks->flow_idx + 1 < RTE_DIM(wks->flows));
+	dev_flow = &wks->flows[wks->flow_idx++];
 	dev_flow->handle = dev_handle;
 	dev_flow->handle_idx = handle_idx;
 	/*
@@ -8779,11 +8781,12 @@  struct field_modify_info modify_tcp[] = {
 	const struct rte_flow_action_queue *queue;
 	struct mlx5_flow_sub_actions_list *sample_act;
 	struct mlx5_flow_sub_actions_idx *sample_idx;
-	struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
-					      priv->rss_desc)
-					      [!!priv->flow_nested_idx];
+	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
+	struct mlx5_flow_rss_desc *rss_desc;
 	uint64_t action_flags = 0;
 
+	MLX5_ASSERT(wks);
+	rss_desc = &wks->rss_desc[!!wks->flow_nested_idx];
 	sample_act = &res->sample_act;
 	sample_idx = &res->sample_idx;
 	sample_action = (const struct rte_flow_action_sample *)action->conf;
@@ -8985,18 +8988,18 @@  struct field_modify_info modify_tcp[] = {
 			     uint64_t action_flags,
 			     struct rte_flow_error *error)
 {
-	struct mlx5_priv *priv = dev->data->dev_private;
 	/* update normal path action resource into last index of array */
 	uint32_t dest_index = MLX5_MAX_DEST_NUM - 1;
 	struct mlx5_flow_sub_actions_list *sample_act =
 					&mdest_res->sample_act[dest_index];
-	struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
-					      priv->rss_desc)
-					      [!!priv->flow_nested_idx];
+	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
+	struct mlx5_flow_rss_desc *rss_desc;
 	uint32_t normal_idx = 0;
 	struct mlx5_hrxq *hrxq;
 	uint32_t hrxq_idx;
 
+	MLX5_ASSERT(wks);
+	rss_desc = &wks->rss_desc[!!wks->flow_nested_idx];
 	if (num_of_dest > 1) {
 		if (sample_act->action_flags & MLX5_FLOW_ACTION_QUEUE) {
 			/* Handle QP action for mirroring */
@@ -9086,9 +9089,8 @@  struct field_modify_info modify_tcp[] = {
 	struct mlx5_dev_config *dev_conf = &priv->config;
 	struct rte_flow *flow = dev_flow->flow;
 	struct mlx5_flow_handle *handle = dev_flow->handle;
-	struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
-					      priv->rss_desc)
-					      [!!priv->flow_nested_idx];
+	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
+	struct mlx5_flow_rss_desc *rss_desc;
 	uint64_t item_flags = 0;
 	uint64_t last_item = 0;
 	uint64_t action_flags = 0;
@@ -9128,6 +9130,8 @@  struct field_modify_info modify_tcp[] = {
 	uint32_t table;
 	int ret = 0;
 
+	MLX5_ASSERT(wks);
+	rss_desc = &wks->rss_desc[!!wks->flow_nested_idx];
 	memset(&mdest_res, 0, sizeof(struct mlx5_flow_dv_dest_array_resource));
 	memset(&sample_res, 0, sizeof(struct mlx5_flow_dv_sample_resource));
 	mhdr_res->ft_type = attr->egress ? MLX5DV_FLOW_TABLE_TYPE_NIC_TX :
@@ -9974,9 +9978,11 @@  struct field_modify_info modify_tcp[] = {
 	int n;
 	int err;
 	int idx;
+	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
 
-	for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
-		dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
+	MLX5_ASSERT(wks);
+	for (idx = wks->flow_idx - 1; idx >= wks->flow_nested_idx; idx--) {
+		dev_flow = &wks->flows[idx];
 		dv = &dev_flow->dv;
 		dh = dev_flow->handle;
 		dv_h = &dh->dvh;
@@ -10008,9 +10014,8 @@  struct field_modify_info modify_tcp[] = {
 			   !dv_h->rix_sample && !dv_h->rix_dest_array) {
 			struct mlx5_hrxq *hrxq;
 			uint32_t hrxq_idx;
-			struct mlx5_flow_rss_desc *rss_desc =
-				&((struct mlx5_flow_rss_desc *)priv->rss_desc)
-				[!!priv->flow_nested_idx];
+			struct mlx5_flow_rss_desc *rss_desc = &wks->rss_desc
+					[!!wks->flow_nested_idx];
 
 			MLX5_ASSERT(rss_desc->queue_num);
 			hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 710622c..a498d89 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1623,7 +1623,9 @@ 
 	struct mlx5_flow *dev_flow;
 	struct mlx5_flow_handle *dev_handle;
 	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
 
+	MLX5_ASSERT(wks);
 	size += flow_verbs_get_actions_size(actions);
 	size += flow_verbs_get_items_size(items);
 	if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
@@ -1633,7 +1635,7 @@ 
 		return NULL;
 	}
 	/* In case of corrupting the memory. */
-	if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
+	if (wks->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
 		rte_flow_error_set(error, ENOSPC,
 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 				   "not free temporary device flow");
@@ -1647,8 +1649,8 @@ 
 				   "not enough memory to create flow handle");
 		return NULL;
 	}
-	/* No multi-thread supporting. */
-	dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
+	MLX5_ASSERT(wks->flow_idx + 1 < RTE_DIM(wks->flows));
+	dev_flow = &wks->flows[wks->flow_idx++];
 	dev_flow->handle = dev_handle;
 	dev_flow->handle_idx = handle_idx;
 	/* Memcpy is used, only size needs to be cleared to 0. */
@@ -1692,10 +1694,11 @@ 
 	uint64_t priority = attr->priority;
 	uint32_t subpriority = 0;
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
-					      priv->rss_desc)
-					      [!!priv->flow_nested_idx];
+	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
+	struct mlx5_flow_rss_desc *rss_desc;
 
+	MLX5_ASSERT(wks);
+	rss_desc = &wks->rss_desc[!!wks->flow_nested_idx];
 	if (priority == MLX5_FLOW_PRIO_RSVD)
 		priority = priv->config.flow_prio - 1;
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
@@ -1951,9 +1954,11 @@ 
 	uint32_t dev_handles;
 	int err;
 	int idx;
+	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
 
-	for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
-		dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
+	MLX5_ASSERT(wks);
+	for (idx = wks->flow_idx - 1; idx >= wks->flow_nested_idx; idx--) {
+		dev_flow = &wks->flows[idx];
 		handle = dev_flow->handle;
 		if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
 			hrxq = mlx5_drop_action_create(dev);
@@ -1967,8 +1972,7 @@ 
 		} else {
 			uint32_t hrxq_idx;
 			struct mlx5_flow_rss_desc *rss_desc =
-				&((struct mlx5_flow_rss_desc *)priv->rss_desc)
-				[!!priv->flow_nested_idx];
+				&wks->rss_desc[!!wks->flow_nested_idx];
 
 			MLX5_ASSERT(rss_desc->queue_num);
 			hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,