diff mbox series

[v1,1/4] net/mlx5: support meter action in meter policy

Message ID 20210706131450.30917-2-shunh@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers show
Series Add support for meter hierarchy | expand

Checks

Context Check Description
ci/iol-testing warning apply patch failure
ci/checkpatch warning coding style issues

Commit Message

Shun Hao July 6, 2021, 1:14 p.m. UTC
This makes the meter policy support meter action. So multiple meters
can be chained as a meter hierarchy.

Only termination meter is allowed as the last meter in a hierarchy,
and there're two cases:
1. The last meter has non-RSS policy, can directly create sub-policy
and color rules during each meter's policy creation.
2. The last meter has RSS policy, don't create sub-policy/rules when
creating meter policy. Only when a RTE flow is using the meter hierarchy,
will iterate all meters of the hierarchy and create neede sub-
policies and color rules for them.

Signed-off-by: Shun Hao <shunh@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/mlx5.h            |  12 ++
 drivers/net/mlx5/mlx5_flow.c       |  71 +++++---
 drivers/net/mlx5/mlx5_flow.h       |   5 +
 drivers/net/mlx5/mlx5_flow_dv.c    | 270 ++++++++++++++++++++++++-----
 drivers/net/mlx5/mlx5_flow_meter.c |  43 ++++-
 5 files changed, 332 insertions(+), 69 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 0f4b239142..0c555f0b1f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -677,6 +677,12 @@  struct mlx5_meter_policy_action_container {
 		/* Jump/drop action per color. */
 		uint16_t queue;
 		/* Queue action configuration. */
+		struct {
+			uint32_t next_mtr_id;
+			/* The next meter id. */
+			void *next_sub_policy;
+			/* Next meter's sub-policy. */
+		};
 	};
 };
 
@@ -694,6 +700,8 @@  struct mlx5_flow_meter_policy {
 	/* Rule applies to transfer domain. */
 	uint32_t is_queue:1;
 	/* Is queue action in policy table. */
+	uint32_t is_hierarchy:1;
+	/* Is meter action in policy table. */
 	rte_spinlock_t sl;
 	uint32_t ref_cnt;
 	/* Use count. */
@@ -712,6 +720,7 @@  struct mlx5_flow_meter_policy {
 #define MLX5_MTR_SUB_POLICY_NUM_SHIFT  3
 #define MLX5_MTR_SUB_POLICY_NUM_MASK  0x7
 #define MLX5_MTRS_DEFAULT_RULE_PRIORITY 0xFFFF
+#define MLX5_MTR_CHAIN_MAX_NUM 8
 
 /* Flow meter default policy parameter structure.
  * Policy index 0 is reserved by default policy table.
@@ -1669,6 +1678,9 @@  struct mlx5_flow_meter_policy *mlx5_flow_meter_policy_find
 		(struct rte_eth_dev *dev,
 		uint32_t policy_id,
 		uint32_t *policy_idx);
+struct mlx5_flow_meter_policy *
+mlx5_flow_meter_hierarchy_get_final_policy(struct rte_eth_dev *dev,
+					struct mlx5_flow_meter_policy *policy);
 int mlx5_flow_meter_flush(struct rte_eth_dev *dev,
 			  struct rte_mtr_error *error);
 void mlx5_flow_meter_rxq_flush(struct rte_eth_dev *dev);
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index c27f6197a0..6c4bfde098 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3492,10 +3492,18 @@  flow_get_rss_action(struct rte_eth_dev *dev,
 			const struct rte_flow_action_meter *mtr = actions->conf;
 
 			fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
-			if (fm) {
+			if (fm && !fm->def_policy) {
 				policy = mlx5_flow_meter_policy_find(dev,
 						fm->policy_id, NULL);
-				if (policy && policy->is_rss)
+				MLX5_ASSERT(policy);
+				if (policy->is_hierarchy) {
+					policy =
+				mlx5_flow_meter_hierarchy_get_final_policy(dev,
+									policy);
+					if (!policy)
+						return NULL;
+				}
+				if (policy->is_rss)
 					rss =
 				policy->act_cnt[RTE_COLOR_GREEN].rss->conf;
 			}
@@ -4564,8 +4572,8 @@  flow_create_split_inner(struct rte_eth_dev *dev,
  *   Pointer to Ethernet device.
  * @param[in] flow
  *   Parent flow structure pointer.
- * @param[in] policy_id;
- *   Meter Policy id.
+ * @param wks
+ *   Pointer to thread flow work space.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[in] items
@@ -4579,31 +4587,22 @@  flow_create_split_inner(struct rte_eth_dev *dev,
 static struct mlx5_flow_meter_sub_policy *
 get_meter_sub_policy(struct rte_eth_dev *dev,
 		     struct rte_flow *flow,
-		     uint32_t policy_id,
+		     struct mlx5_flow_workspace *wks,
 		     const struct rte_flow_attr *attr,
 		     const struct rte_flow_item items[],
 		     struct rte_flow_error *error)
 {
 	struct mlx5_flow_meter_policy *policy;
+	struct mlx5_flow_meter_policy *final_policy;
 	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
 
-	policy = mlx5_flow_meter_policy_find(dev, policy_id, NULL);
-	if (!policy) {
-		rte_flow_error_set(error, EINVAL,
-				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-				   "Failed to find Meter Policy.");
-		goto exit;
-	}
-	if (policy->is_rss ||
-		(policy->is_queue &&
-	!policy->sub_policys[MLX5_MTR_DOMAIN_INGRESS][0]->rix_hrxq[0])) {
-		struct mlx5_flow_workspace *wks =
-				mlx5_flow_get_thread_workspace();
+	policy = wks->policy;
+	final_policy = policy->is_hierarchy ? wks->final_policy : policy;
+	if (final_policy->is_rss || final_policy->is_queue) {
 		struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
 		uint32_t i;
 
-		MLX5_ASSERT(wks);
 		/**
 		 * This is a tmp dev_flow,
 		 * no need to register any matcher for it in translate.
@@ -4613,9 +4612,9 @@  get_meter_sub_policy(struct rte_eth_dev *dev,
 			struct mlx5_flow dev_flow = {0};
 			struct mlx5_flow_handle dev_handle = { {0} };
 
-			if (policy->is_rss) {
+			if (final_policy->is_rss) {
 				const void *rss_act =
-					policy->act_cnt[i].rss->conf;
+					final_policy->act_cnt[i].rss->conf;
 				struct rte_flow_action rss_actions[2] = {
 					[0] = {
 					.type = RTE_FLOW_ACTION_TYPE_RSS,
@@ -4656,7 +4655,7 @@  get_meter_sub_policy(struct rte_eth_dev *dev,
 				rss_desc_v[i].key_len = 0;
 				rss_desc_v[i].hash_fields = 0;
 				rss_desc_v[i].queue =
-					&policy->act_cnt[i].queue;
+					&final_policy->act_cnt[i].queue;
 				rss_desc_v[i].queue_num = 1;
 			}
 			rss_desc[i] = &rss_desc_v[i];
@@ -4696,8 +4695,8 @@  get_meter_sub_policy(struct rte_eth_dev *dev,
  *   Pointer to Ethernet device.
  * @param[in] flow
  *   Parent flow structure pointer.
- * @param[in] fm
- *   Pointer to flow meter structure.
+ * @param wks
+ *   Pointer to thread flow work space.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[in] items
@@ -4721,7 +4720,7 @@  get_meter_sub_policy(struct rte_eth_dev *dev,
 static int
 flow_meter_split_prep(struct rte_eth_dev *dev,
 		      struct rte_flow *flow,
-		      struct mlx5_flow_meter_info *fm,
+		      struct mlx5_flow_workspace *wks,
 		      const struct rte_flow_attr *attr,
 		      const struct rte_flow_item items[],
 		      struct rte_flow_item sfx_items[],
@@ -4732,6 +4731,7 @@  flow_meter_split_prep(struct rte_eth_dev *dev,
 		      struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_flow_meter_info *fm = wks->fm;
 	struct rte_flow_action *tag_action = NULL;
 	struct rte_flow_item *tag_item;
 	struct mlx5_rte_flow_action_set_tag *set_tag;
@@ -4856,9 +4856,8 @@  flow_meter_split_prep(struct rte_eth_dev *dev,
 		struct mlx5_flow_tbl_data_entry *tbl_data;
 
 		if (!fm->def_policy) {
-			sub_policy = get_meter_sub_policy(dev, flow,
-							  fm->policy_id, attr,
-							  items, error);
+			sub_policy = get_meter_sub_policy(dev, flow, wks,
+							  attr, items, error);
 			if (!sub_policy)
 				return -rte_errno;
 		} else {
@@ -5746,6 +5745,22 @@  flow_create_split_meter(struct rte_eth_dev *dev,
 		}
 		MLX5_ASSERT(wks);
 		wks->fm = fm;
+		if (!fm->def_policy) {
+			wks->policy = mlx5_flow_meter_policy_find(dev,
+								  fm->policy_id,
+								  NULL);
+			MLX5_ASSERT(wks->policy);
+			if (wks->policy->is_hierarchy) {
+				wks->final_policy =
+				mlx5_flow_meter_hierarchy_get_final_policy(dev,
+								wks->policy);
+				if (!wks->final_policy)
+					return rte_flow_error_set(error,
+					EINVAL,
+					RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+				"Failed to find terminal policy of hierarchy.");
+			}
+		}
 		/*
 		 * If it isn't default-policy Meter, and
 		 * 1. There's no action in flow to change
@@ -5776,7 +5791,7 @@  flow_create_split_meter(struct rte_eth_dev *dev,
 			pre_actions = sfx_actions + 1;
 		else
 			pre_actions = sfx_actions + actions_n;
-		ret = flow_meter_split_prep(dev, flow, fm, &sfx_attr,
+		ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
 					    items, sfx_items, actions,
 					    sfx_actions, pre_actions,
 					    (set_mtr_reg ? &mtr_flow_id : NULL),
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 2f2aa962f9..09d6d609db 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -454,6 +454,7 @@  enum mlx5_flow_fate_type {
 	MLX5_FLOW_FATE_DROP,
 	MLX5_FLOW_FATE_DEFAULT_MISS,
 	MLX5_FLOW_FATE_SHARED_RSS,
+	MLX5_FLOW_FATE_MTR,
 	MLX5_FLOW_FATE_MAX,
 };
 
@@ -1102,6 +1103,10 @@  struct mlx5_flow_workspace {
 	uint32_t rssq_num; /* Allocated queue num in rss_desc. */
 	uint32_t flow_idx; /* Intermediate device flow index. */
 	struct mlx5_flow_meter_info *fm; /* Pointer to the meter in flow. */
+	struct mlx5_flow_meter_policy *policy;
+	/* The meter policy used by meter in flow. */
+	struct mlx5_flow_meter_policy *final_policy;
+	/* The final policy when meter policy is hierarchy. */
 	uint32_t skip_matcher_reg:1;
 	/* Indicates if need to skip matcher register in translate. */
 };
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 75ef6216ac..d34f5214a8 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -7864,6 +7864,8 @@  flow_dv_prepare(struct rte_eth_dev *dev,
 
 	MLX5_ASSERT(wks);
 	wks->skip_matcher_reg = 0;
+	wks->policy = NULL;
+	wks->final_policy = NULL;
 	/* In case of corrupting the memory. */
 	if (wks->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
 		rte_flow_error_set(error, ENOSPC,
@@ -15028,6 +15030,37 @@  __flow_dv_create_domain_policy_acts(struct rte_eth_dev *dev,
 				action_flags |= MLX5_FLOW_ACTION_JUMP;
 				break;
 			}
+			case RTE_FLOW_ACTION_TYPE_METER:
+			{
+				const struct rte_flow_action_meter *mtr;
+				struct mlx5_flow_meter_info *next_fm;
+				struct mlx5_flow_meter_policy *next_policy;
+				uint32_t next_mtr_idx = 0;
+
+				mtr = act->conf;
+				next_fm = mlx5_flow_meter_find(priv,
+							mtr->mtr_id,
+							&next_mtr_idx);
+				if (!next_fm)
+					return -rte_mtr_error_set(error, EINVAL,
+						RTE_MTR_ERROR_TYPE_MTR_ID, NULL,
+						"Fail to find next meter.");
+				if (next_fm->def_policy)
+					return -rte_mtr_error_set(error, EINVAL,
+						RTE_MTR_ERROR_TYPE_MTR_ID, NULL,
+				"Hierarchy only supports termination meter.");
+				next_policy = mlx5_flow_meter_policy_find(dev,
+						next_fm->policy_id, NULL);
+				MLX5_ASSERT(next_policy);
+				act_cnt->fate_action = MLX5_FLOW_FATE_MTR;
+				act_cnt->next_mtr_id = next_fm->meter_id;
+				act_cnt->next_sub_policy = NULL;
+				mtr_policy->is_hierarchy = 1;
+				mtr_policy->dev = next_policy->dev;
+				action_flags |=
+				MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY;
+				break;
+			}
 			default:
 				return -rte_mtr_error_set(error, ENOTSUP,
 					  RTE_MTR_ERROR_TYPE_METER_POLICY,
@@ -15563,7 +15596,14 @@  __flow_dv_create_policy_acts_rules(struct rte_eth_dev *dev,
 	struct mlx5_flow_dv_tag_resource *tag;
 	struct mlx5_flow_dv_port_id_action_resource *port_action;
 	struct mlx5_hrxq *hrxq;
-	uint8_t egress, transfer;
+	struct mlx5_flow_meter_info *next_fm = NULL;
+	struct mlx5_flow_meter_policy *next_policy;
+	struct mlx5_flow_meter_sub_policy *next_sub_policy;
+	struct mlx5_flow_tbl_data_entry *tbl_data;
+	struct rte_flow_error error;
+	uint8_t egress = (domain == MLX5_MTR_DOMAIN_EGRESS) ? 1 : 0;
+	uint8_t transfer = (domain == MLX5_MTR_DOMAIN_TRANSFER) ? 1 : 0;
+	bool mtr_first = egress || (transfer && priv->representor_id != 0xffff);
 	bool match_src_port = false;
 	int i;
 
@@ -15578,13 +15618,39 @@  __flow_dv_create_policy_acts_rules(struct rte_eth_dev *dev,
 			acts[i].actions_n = 1;
 			continue;
 		}
+		if (mtr_policy->act_cnt[i].fate_action == MLX5_FLOW_FATE_MTR) {
+			struct rte_flow_attr attr = {
+				.transfer = transfer
+			};
+
+			next_fm = mlx5_flow_meter_find(priv,
+					mtr_policy->act_cnt[i].next_mtr_id,
+					NULL);
+			if (!next_fm) {
+				DRV_LOG(ERR,
+					"Failed to get next hierarchy meter.");
+				goto err_exit;
+			}
+			if (mlx5_flow_meter_attach(priv, next_fm,
+						   &attr, &error)) {
+				DRV_LOG(ERR, "%s", error.message);
+				next_fm = NULL;
+				goto err_exit;
+			}
+			/* Meter action must be the first for TX. */
+			if (mtr_first) {
+				acts[i].dv_actions[acts[i].actions_n] =
+					next_fm->meter_action;
+				acts[i].actions_n++;
+			}
+		}
 		if (mtr_policy->act_cnt[i].rix_mark) {
 			tag = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_TAG],
 					mtr_policy->act_cnt[i].rix_mark);
 			if (!tag) {
 				DRV_LOG(ERR, "Failed to find "
 				"mark action for policy.");
-				return -1;
+				goto err_exit;
 			}
 			acts[i].dv_actions[acts[i].actions_n] =
 						tag->action;
@@ -15604,7 +15670,7 @@  __flow_dv_create_policy_acts_rules(struct rte_eth_dev *dev,
 				if (!port_action) {
 					DRV_LOG(ERR, "Failed to find "
 						"port action for policy.");
-					return -1;
+					goto err_exit;
 				}
 				acts[i].dv_actions[acts[i].actions_n] =
 				port_action->action;
@@ -15626,12 +15692,42 @@  __flow_dv_create_policy_acts_rules(struct rte_eth_dev *dev,
 				if (!hrxq) {
 					DRV_LOG(ERR, "Failed to find "
 						"queue action for policy.");
-					return -1;
+					goto err_exit;
 				}
 				acts[i].dv_actions[acts[i].actions_n] =
 				hrxq->action;
 				acts[i].actions_n++;
 				break;
+			case MLX5_FLOW_FATE_MTR:
+				if (!next_fm) {
+					DRV_LOG(ERR,
+						"No next hierarchy meter.");
+					goto err_exit;
+				}
+				if (!mtr_first) {
+					acts[i].dv_actions[acts[i].actions_n] =
+							next_fm->meter_action;
+					acts[i].actions_n++;
+				}
+				if (mtr_policy->act_cnt[i].next_sub_policy) {
+					next_sub_policy =
+					mtr_policy->act_cnt[i].next_sub_policy;
+				} else {
+					next_policy =
+						mlx5_flow_meter_policy_find(dev,
+						next_fm->policy_id, NULL);
+					MLX5_ASSERT(next_policy);
+					next_sub_policy =
+					next_policy->sub_policys[domain][0];
+				}
+				tbl_data =
+					container_of(next_sub_policy->tbl_rsc,
+					struct mlx5_flow_tbl_data_entry, tbl);
+				acts[i].dv_actions[acts[i].actions_n++] =
+							tbl_data->jump.action;
+				if (mtr_policy->act_cnt[i].modify_hdr)
+					match_src_port = !!transfer;
+				break;
 			default:
 				/*Queue action do nothing*/
 				break;
@@ -15644,9 +15740,13 @@  __flow_dv_create_policy_acts_rules(struct rte_eth_dev *dev,
 				egress, transfer, match_src_port, acts)) {
 		DRV_LOG(ERR,
 		"Failed to create policy rules per domain.");
-		return -1;
+		goto err_exit;
 	}
 	return 0;
+err_exit:
+	if (next_fm)
+		mlx5_flow_meter_detach(priv, next_fm);
+	return -1;
 }
 
 /**
@@ -15956,22 +16056,12 @@  flow_dv_create_mtr_tbls(struct rte_eth_dev *dev,
 	return -1;
 }
 
-/**
- * Find the policy table for prefix table with RSS.
- *
- * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[in] mtr_policy
- *   Pointer to meter policy table.
- * @param[in] rss_desc
- *   Pointer to rss_desc
- * @return
- *   Pointer to table set on success, NULL otherwise and rte_errno is set.
- */
 static struct mlx5_flow_meter_sub_policy *
-flow_dv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
+__flow_dv_meter_get_rss_sub_policy(struct rte_eth_dev *dev,
 		struct mlx5_flow_meter_policy *mtr_policy,
-		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
+		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS],
+		struct mlx5_flow_meter_sub_policy *next_sub_policy,
+		bool *is_reuse)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
@@ -16013,6 +16103,7 @@  flow_dv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
 			rte_spinlock_unlock(&mtr_policy->sl);
 			for (j = 0; j < MLX5_MTR_RTE_COLORS; j++)
 				mlx5_hrxq_release(dev, hrxq_idx[j]);
+			*is_reuse = true;
 			return mtr_policy->sub_policys[domain][i];
 		}
 	}
@@ -16038,24 +16129,30 @@  flow_dv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
 		if (!rss_desc[i])
 			continue;
 		sub_policy->rix_hrxq[i] = hrxq_idx[i];
-		/*
-		 * Overwrite the last action from
-		 * RSS action to Queue action.
-		 */
-		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
-			      hrxq_idx[i]);
-		if (!hrxq) {
-			DRV_LOG(ERR, "Failed to create policy hrxq");
-			goto rss_sub_policy_error;
-		}
-		act_cnt = &mtr_policy->act_cnt[i];
-		if (act_cnt->rix_mark || act_cnt->modify_hdr) {
-			memset(&dh, 0, sizeof(struct mlx5_flow_handle));
-			if (act_cnt->rix_mark)
-				dh.mark = 1;
-			dh.fate_action = MLX5_FLOW_FATE_QUEUE;
-			dh.rix_hrxq = hrxq_idx[i];
-			flow_drv_rxq_flags_set(dev, &dh);
+		if (mtr_policy->is_hierarchy) {
+			act_cnt = &mtr_policy->act_cnt[i];
+			act_cnt->next_sub_policy = next_sub_policy;
+			mlx5_hrxq_release(dev, hrxq_idx[i]);
+		} else {
+			/*
+			 * Overwrite the last action from
+			 * RSS action to Queue action.
+			 */
+			hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
+				hrxq_idx[i]);
+			if (!hrxq) {
+				DRV_LOG(ERR, "Failed to create policy hrxq");
+				goto rss_sub_policy_error;
+			}
+			act_cnt = &mtr_policy->act_cnt[i];
+			if (act_cnt->rix_mark || act_cnt->modify_hdr) {
+				memset(&dh, 0, sizeof(struct mlx5_flow_handle));
+				if (act_cnt->rix_mark)
+					dh.mark = 1;
+				dh.fate_action = MLX5_FLOW_FATE_QUEUE;
+				dh.rix_hrxq = hrxq_idx[i];
+				flow_drv_rxq_flags_set(dev, &dh);
+			}
 		}
 	}
 	if (__flow_dv_create_policy_acts_rules(dev, mtr_policy,
@@ -16079,6 +16176,7 @@  flow_dv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
 			(MLX5_MTR_SUB_POLICY_NUM_SHIFT * domain);
 	}
 	rte_spinlock_unlock(&mtr_policy->sl);
+	*is_reuse = false;
 	return sub_policy;
 rss_sub_policy_error:
 	if (sub_policy) {
@@ -16093,13 +16191,105 @@  flow_dv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
 					sub_policy->idx);
 		}
 	}
-	if (sub_policy_idx)
-		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MTR_POLICY],
-			sub_policy_idx);
 	rte_spinlock_unlock(&mtr_policy->sl);
 	return NULL;
 }
 
+/**
+ * Find the policy table for prefix table with RSS.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] mtr_policy
+ *   Pointer to meter policy table.
+ * @param[in] rss_desc
+ *   Pointer to rss_desc
+ * @return
+ *   Pointer to table set on success, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_meter_sub_policy *
+flow_dv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
+		struct mlx5_flow_meter_policy *mtr_policy,
+		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
+	struct mlx5_flow_meter_info *next_fm;
+	struct mlx5_flow_meter_policy *next_policy;
+	struct mlx5_flow_meter_sub_policy *next_sub_policy = NULL;
+	struct mlx5_flow_meter_policy *policies[MLX5_MTR_CHAIN_MAX_NUM];
+	struct mlx5_flow_meter_sub_policy *sub_policies[MLX5_MTR_CHAIN_MAX_NUM];
+	uint32_t domain = MLX5_MTR_DOMAIN_INGRESS;
+	bool reuse_sub_policy;
+	uint32_t i = 0;
+	uint32_t j = 0;
+
+	while (true) {
+		/* Iterate hierarchy to get all policies in this hierarchy. */
+		policies[i++] = mtr_policy;
+		if (!mtr_policy->is_hierarchy)
+			break;
+		if (i >= MLX5_MTR_CHAIN_MAX_NUM) {
+			DRV_LOG(ERR, "Exceed max meter number in hierarchy.");
+			return NULL;
+		}
+		next_fm = mlx5_flow_meter_find(priv,
+			mtr_policy->act_cnt[RTE_COLOR_GREEN].next_mtr_id, NULL);
+		if (!next_fm) {
+			DRV_LOG(ERR, "Failed to get next meter in hierarchy.");
+			return NULL;
+		}
+		next_policy =
+			mlx5_flow_meter_policy_find(dev, next_fm->policy_id,
+						    NULL);
+		MLX5_ASSERT(next_policy);
+		mtr_policy = next_policy;
+	}
+	while (i) {
+		/**
+		 * From last policy to the first one in hierarchy,
+		 * create/get the sub policy for each of them.
+		 */
+		sub_policy = __flow_dv_meter_get_rss_sub_policy(dev,
+							policies[--i],
+							rss_desc,
+							next_sub_policy,
+							&reuse_sub_policy);
+		if (!sub_policy) {
+			DRV_LOG(ERR, "Failed to get the sub policy.");
+			goto err_exit;
+		}
+		if (!reuse_sub_policy)
+			sub_policies[j++] = sub_policy;
+		next_sub_policy = sub_policy;
+	}
+	return sub_policy;
+err_exit:
+	while (j) {
+		uint16_t sub_policy_num;
+
+		sub_policy = sub_policies[--j];
+		mtr_policy = sub_policy->main_policy;
+		__flow_dv_destroy_sub_policy_rules(dev, sub_policy);
+		if (sub_policy != mtr_policy->sub_policys[domain][0]) {
+			sub_policy_num = (mtr_policy->sub_policy_num >>
+				(MLX5_MTR_SUB_POLICY_NUM_SHIFT * domain)) &
+				MLX5_MTR_SUB_POLICY_NUM_MASK;
+			mtr_policy->sub_policys[domain][sub_policy_num - 1] =
+									NULL;
+			sub_policy_num--;
+			mtr_policy->sub_policy_num &=
+				~(MLX5_MTR_SUB_POLICY_NUM_MASK <<
+				  (MLX5_MTR_SUB_POLICY_NUM_SHIFT * i));
+			mtr_policy->sub_policy_num |=
+			(sub_policy_num & MLX5_MTR_SUB_POLICY_NUM_MASK) <<
+			(MLX5_MTR_SUB_POLICY_NUM_SHIFT * i);
+			mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MTR_POLICY],
+					sub_policy->idx);
+		}
+	}
+	return NULL;
+}
 
 /**
  * Destroy the sub policy table with RX queue.
diff --git a/drivers/net/mlx5/mlx5_flow_meter.c b/drivers/net/mlx5/mlx5_flow_meter.c
index 6f962a8d52..03f7e120e1 100644
--- a/drivers/net/mlx5/mlx5_flow_meter.c
+++ b/drivers/net/mlx5/mlx5_flow_meter.c
@@ -529,6 +529,37 @@  mlx5_flow_meter_policy_find(struct rte_eth_dev *dev,
 	return NULL;
 }
 
+/**
+ * Get the last meter's policy from one meter's policy in hierarchy.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] policy
+ *   Pointer to flow meter policy.
+ *
+ * @return
+ *   Pointer to the final meter's policy, or NULL when fail.
+ */
+struct mlx5_flow_meter_policy *
+mlx5_flow_meter_hierarchy_get_final_policy(struct rte_eth_dev *dev,
+					struct mlx5_flow_meter_policy *policy)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_flow_meter_info *next_fm;
+	struct mlx5_flow_meter_policy *next_policy = policy;
+
+	while (next_policy->is_hierarchy) {
+		next_fm = mlx5_flow_meter_find(priv,
+		       next_policy->act_cnt[RTE_COLOR_GREEN].next_mtr_id, NULL);
+		if (!next_fm || next_fm->def_policy)
+			return NULL;
+		next_policy = mlx5_flow_meter_policy_find(dev,
+						next_fm->policy_id, NULL);
+		MLX5_ASSERT(next_policy);
+	}
+	return next_policy;
+}
+
 /**
  * Callback to check MTR policy action validate
  *
@@ -650,6 +681,7 @@  mlx5_flow_meter_policy_add(struct rte_eth_dev *dev,
 	uint16_t sub_policy_num;
 	uint8_t domain_bitmap = 0;
 	union mlx5_l3t_data data;
+	bool skip_rule = false;
 
 	if (!priv->mtr_en)
 		return -rte_mtr_error_set(error, ENOTSUP,
@@ -759,7 +791,16 @@  mlx5_flow_meter_policy_add(struct rte_eth_dev *dev,
 					policy->actions, error);
 	if (ret)
 		goto policy_add_err;
-	if (!is_rss && !mtr_policy->is_queue) {
+	if (mtr_policy->is_hierarchy) {
+		struct mlx5_flow_meter_policy *final_policy;
+
+		final_policy =
+		mlx5_flow_meter_hierarchy_get_final_policy(dev, mtr_policy);
+		if (!final_policy)
+			goto policy_add_err;
+		skip_rule = (final_policy->is_rss || final_policy->is_queue);
+	}
+	if (!is_rss && !mtr_policy->is_queue && !skip_rule) {
 		/* Create policy rules in HW. */
 		ret = mlx5_flow_create_policy_rules(dev, mtr_policy);
 		if (ret)