[dpdk-dev,12/18] net/mlx5: support tunnel RSS level

Message ID 20180226150947.107179-13-xuemingl@mellanox.com
State Superseded, archived
Headers show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Xueming(Steven) Li Feb. 26, 2018, 3:09 p.m.
RTE flow tunnel RSS level give user a choice to do RSS hash calculation
on inner or outer RSS fields. This path make mlx5 PMD support rte_flow
tunel RSS level. Testpmd flow command examples:

GRE tunnel with inner TCP, UDP and IP RSS:
  flow create 0 ingress pattern eth  / ipv4 proto is 47 / gre / end
actions rss queues 1 2 end level 1 / end

GRE tunnel rule with specific inner pattern and same RSS queue as above
rule:
  flow create 0 ingress pattern eth  / ipv4 proto is 47 / gre / ipv4 /
tcp / end actions rss queues 1 2 end level 1 / end

GRE tunnel rule with outer IP RSS:
  flow create 0 ingress pattern eth  / ipv4 proto is 47 / gre / end
actions rss queues 1 2 end level 0 / end

Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
 drivers/net/mlx5/mlx5_flow.c | 242 +++++++++++++++++++++++++++++++------------
 drivers/net/mlx5/mlx5_rxq.c  |  50 ++++++++-
 drivers/net/mlx5/mlx5_rxtx.h |   7 +-
 3 files changed, 226 insertions(+), 73 deletions(-)

Patch

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 0c45228..1e9e3d6 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -105,6 +105,7 @@  enum hash_rxq_type {
 	HASH_RXQ_UDPV6,
 	HASH_RXQ_IPV6,
 	HASH_RXQ_ETH,
+	HASH_RXQ_TUNNEL,
 };
 
 /* Initialization data for hash RX queue. */
@@ -441,6 +442,7 @@  struct mlx5_flow_parse {
 	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
 	uint8_t rss_key[40]; /**< copy of the RSS key. */
 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
+	enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
 	uint32_t tunnel; /* Tunnel type as RTE_PTYPE_TUNNEL_XXX. */
 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
 	struct {
@@ -448,6 +450,7 @@  struct mlx5_flow_parse {
 		/**< Pointer to Verbs attributes. */
 		unsigned int offset;
 		/**< Current position or total size of the attribute. */
+		uint64_t hash_fields; /**< verbs hash fields. */
 	} queue[RTE_DIM(hash_rxq_init)];
 };
 
@@ -579,7 +582,7 @@  struct ibv_spec_header {
 	if (rss_conf) {
 		if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
 			return EINVAL;
-		if (rss_conf->rss_key_len != 40)
+		if (rss_conf->rss_key_len > 0 && rss_conf->rss_key_len != 40)
 			return EINVAL;
 		if (rss_conf->rss_key_len && rss_conf->rss_key) {
 			parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
@@ -587,7 +590,9 @@  struct ibv_spec_header {
 			       rss_conf->rss_key_len);
 			parser->rss_conf.rss_key = parser->rss_key;
 		}
-		parser->rss_conf.rss_hf = rss_conf->rss_hf;
+		if (rss_conf->rss_hf)
+			parser->rss_conf.rss_hf = rss_conf->rss_hf;
+		parser->rss_conf.rss_level = rss_conf->rss_level;
 	}
 	return 0;
 }
@@ -759,6 +764,13 @@  struct ibv_spec_header {
 						   "wrong RSS configuration");
 				return -rte_errno;
 			}
+			if (parser->rss_conf.rss_level > 1) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ACTION,
+						   actions,
+						   "wrong RSS level");
+				return -rte_errno;
+			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -875,10 +887,19 @@  struct ibv_spec_header {
 				rte_flow_error_set(error, ENOTSUP,
 					RTE_FLOW_ERROR_TYPE_ITEM,
 					items,
-					"Cannot support hw tunnel checksum "
+					"Cannot support tunnel checksum "
 					"offloads");
+			if (!priv->config.tunnel_en &&
+			    parser->rss_conf.rss_level)
+				rte_flow_error_set(error, ENOTSUP,
+					RTE_FLOW_ERROR_TYPE_ITEM,
+					items,
+					"Cannot support tunnel inner RSS");
 			parser->inner = IBV_FLOW_SPEC_INNER;
 			parser->tunnel = rte_ptype[items->type];
+			parser->layer = HASH_RXQ_TUNNEL;
+		} else {
+			parser->layer = HASH_RXQ_ETH;
 		}
 		if (parser->drop) {
 			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
@@ -957,8 +978,12 @@  struct ibv_spec_header {
 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
 {
 	unsigned int i;
+	uint32_t inner = parser->inner;
 
 	(void)priv;
+	/* Don't create extra flows for outer RSS. */
+	if (parser->tunnel && !parser->rss_conf.rss_level)
+		return;
 	/*
 	 * Fill missing layers in verbs specifications, or compute the correct
 	 * offset to allocate the memory space for the attributes and
@@ -969,23 +994,25 @@  struct ibv_spec_header {
 			struct ibv_flow_spec_ipv4_ext ipv4;
 			struct ibv_flow_spec_ipv6 ipv6;
 			struct ibv_flow_spec_tcp_udp udp_tcp;
+			struct ibv_flow_spec_eth eth;
 		} specs;
 		void *dst;
 		uint16_t size;
 
 		if (i == parser->layer)
 			continue;
-		if (parser->layer == HASH_RXQ_ETH) {
+		if (parser->layer == HASH_RXQ_ETH ||
+		    parser->layer == HASH_RXQ_TUNNEL) {
 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
-					.type = IBV_FLOW_SPEC_IPV4_EXT,
+					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
 					.size = size,
 				};
 			} else {
 				size = sizeof(struct ibv_flow_spec_ipv6);
 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
-					.type = IBV_FLOW_SPEC_IPV6,
+					.type = inner | IBV_FLOW_SPEC_IPV6,
 					.size = size,
 				};
 			}
@@ -1002,7 +1029,7 @@  struct ibv_spec_header {
 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
 			size = sizeof(struct ibv_flow_spec_tcp_udp);
 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
-				.type = ((i == HASH_RXQ_UDPV4 ||
+				.type = inner | ((i == HASH_RXQ_UDPV4 ||
 					  i == HASH_RXQ_UDPV6) ?
 					 IBV_FLOW_SPEC_UDP :
 					 IBV_FLOW_SPEC_TCP),
@@ -1025,25 +1052,72 @@  struct ibv_spec_header {
  *
  * @param priv
  *   Pointer to private structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
  * @param[in, out] parser
  *   Internal parser structure.
  */
 static int
-priv_flow_convert_rss(struct priv *priv, struct mlx5_flow_parse *parser)
+priv_flow_convert_rss(struct priv *priv, struct rte_flow_error *error,
+		      struct mlx5_flow_parse *parser)
 {
-	const unsigned int ipv4 =
+	unsigned int ipv4 =
 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
-	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+	enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
 	unsigned int i;
+	int found = 0;
 
 	(void)priv;
-	if (parser->layer == HASH_RXQ_ETH)
+	/*
+	 * Outer RSS.
+	 * HASH_RXQ_ETH is the only rule since tunnel packet match this
+	 * rule must match outer pattern.
+	 */
+	if (parser->tunnel && !parser->rss_conf.rss_level) {
+		/* Remove flows other than default. */
+		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+		ipv4 = hash_rxq_init[parser->out_layer].ip_version == MLX5_IPV4;
+		ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+		if (hash_rxq_init[parser->out_layer].dpdk_rss_hf &
+		    parser->rss_conf.rss_hf) {
+			parser->queue[HASH_RXQ_ETH].hash_fields =
+				hash_rxq_init[parser->out_layer].hash_fields;
+		} else if (ip && (hash_rxq_init[ip].dpdk_rss_hf &
+		    parser->rss_conf.rss_hf)) {
+			parser->queue[HASH_RXQ_ETH].hash_fields =
+				hash_rxq_init[ip].hash_fields;
+		} else if (parser->rss_conf.rss_hf) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					   "rss outer hash function not match pattern");
+			return -rte_errno;
+		}
 		return 0;
-	/* This layer becomes useless as the pattern define under layers. */
+	}
+	if (parser->layer == HASH_RXQ_ETH || parser->layer == HASH_RXQ_TUNNEL) {
+		/* Remove unused flows according to hash function. */
+		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
+			if (!parser->queue[i].ibv_attr)
+				continue;
+			if (hash_rxq_init[i].dpdk_rss_hf &
+			    parser->rss_conf.rss_hf) {
+				parser->queue[i].hash_fields =
+					hash_rxq_init[i].hash_fields;
+				continue;
+			}
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+		return 0;
+	}
+	/* Remove ETH layer flow. */
 	rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
@@ -1053,9 +1127,54 @@  struct ibv_spec_header {
 		rte_free(parser->queue[i].ibv_attr);
 		parser->queue[i].ibv_attr = NULL;
 	}
-	/* Remove impossible flow according to the RSS configuration. */
-	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
-	    parser->rss_conf.rss_hf) {
+	/*
+	 * Keep L4 flows as IP pattern has to support L4 RSS.
+	 * Otherwise, only keep the flow that match the pattern.
+	 */
+	if (parser->layer != ip) {
+		/* Only keep the flow that match the pattern. */
+		for (i = hmin; i != (hmax + 1); ++i) {
+			if (i == parser->layer)
+				continue;
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+	}
+	if (parser->rss_conf.rss_hf) {
+		/* Remove impossible flow according to the RSS configuration. */
+		for (i = hmin; i != (hmax + 1); ++i) {
+			if (!parser->queue[i].ibv_attr)
+				continue;
+			if (parser->rss_conf.rss_hf &
+			    hash_rxq_init[i].dpdk_rss_hf) {
+				parser->queue[i].hash_fields =
+					hash_rxq_init[i].hash_fields;
+				found = 1;
+				continue;
+			}
+			/* L4 flow could be used for L3 RSS. */
+			if (i == parser->layer && i < ip &&
+			    (hash_rxq_init[ip].dpdk_rss_hf &
+			     parser->rss_conf.rss_hf)) {
+				parser->queue[i].hash_fields =
+					hash_rxq_init[ip].hash_fields;
+				found = 1;
+				continue;
+			}
+			/* L3 flow and L4 hash: non-rss L3 flow. */
+			if (i == parser->layer && i == ip && found)
+				/* IP pattern and L4 HF. */
+				continue;
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+		if (!found) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   NULL, "rss hash function not match pattern");
+			return -rte_errno;
+		}
+	} else {
 		/* Remove any other flow. */
 		for (i = hmin; i != (hmax + 1); ++i) {
 			if ((i == parser->layer) ||
@@ -1064,8 +1183,6 @@  struct ibv_spec_header {
 			rte_free(parser->queue[i].ibv_attr);
 			parser->queue[i].ibv_attr = NULL;
 		}
-	} else if (!parser->queue[ip].ibv_attr) {
-		/* no RSS possible with the current configuration. */
 		parser->queues_n = 1;
 	}
 	return 0;
@@ -1142,10 +1259,6 @@  struct ibv_spec_header {
 				hash_rxq_init[i].flow_priority;
 			unsigned int offset;
 
-			if (!(parser->rss_conf.rss_hf &
-			      hash_rxq_init[i].dpdk_rss_hf) &&
-			    (i != HASH_RXQ_ETH))
-				continue;
 			offset = parser->queue[i].offset;
 			parser->queue[i].ibv_attr =
 				priv_flow_convert_allocate(priv, priority,
@@ -1158,6 +1271,7 @@  struct ibv_spec_header {
 	/* Third step. Conversion parse, fill the specifications. */
 	parser->inner = 0;
 	parser->tunnel = 0;
+	parser->layer = HASH_RXQ_ETH;
 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
 			continue;
@@ -1174,19 +1288,12 @@  struct ibv_spec_header {
 			goto exit_free;
 		}
 	}
-	if (parser->mark)
-		mlx5_flow_create_flag_mark(parser, parser->mark_id);
-	if (parser->count && parser->create) {
-		mlx5_flow_create_count(priv, parser);
-		if (!parser->cs)
-			goto exit_count_error;
-	}
 	/*
 	 * Last step. Complete missing specification to reach the RSS
 	 * configuration.
 	 */
 	if (!parser->drop) {
-		ret = priv_flow_convert_rss(priv, parser);
+		ret = priv_flow_convert_rss(priv, error, parser);
 		if (ret)
 			goto exit_free;
 		priv_flow_convert_finalise(priv, parser);
@@ -1195,6 +1302,13 @@  struct ibv_spec_header {
 			attr->priority +
 			hash_rxq_init[parser->layer].flow_priority;
 	}
+	if (parser->mark)
+		mlx5_flow_create_flag_mark(parser, parser->mark_id);
+	if (parser->count && parser->create) {
+		mlx5_flow_create_count(priv, parser);
+		if (!parser->cs)
+			goto exit_count_error;
+	}
 exit_free:
 	/* Only verification is expected, all resources should be released. */
 	if (!parser->create) {
@@ -1242,17 +1356,11 @@  struct ibv_spec_header {
 	for (i = 0; i != hash_rxq_init_n; ++i) {
 		if (!parser->queue[i].ibv_attr)
 			continue;
-		/* Specification must be the same l3 type or none. */
-		if (parser->layer == HASH_RXQ_ETH ||
-		    (hash_rxq_init[parser->layer].ip_version ==
-		     hash_rxq_init[i].ip_version) ||
-		    (hash_rxq_init[i].ip_version == 0)) {
-			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
-					parser->queue[i].offset);
-			memcpy(dst, src, size);
-			++parser->queue[i].ibv_attr->num_of_specs;
-			parser->queue[i].offset += size;
-		}
+		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+				parser->queue[i].offset);
+		memcpy(dst, src, size);
+		++parser->queue[i].ibv_attr->num_of_specs;
+		parser->queue[i].offset += size;
 	}
 }
 
@@ -1280,9 +1388,7 @@  struct ibv_spec_header {
 		.size = eth_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner)
-		parser->layer = HASH_RXQ_ETH;
+	parser->layer = HASH_RXQ_ETH;
 	if (spec) {
 		unsigned int i;
 
@@ -1369,9 +1475,7 @@  struct ibv_spec_header {
 		.size = ipv4_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner)
-		parser->layer = HASH_RXQ_IPV4;
+	parser->layer = HASH_RXQ_IPV4;
 	if (spec) {
 		if (!mask)
 			mask = default_mask;
@@ -1421,9 +1525,7 @@  struct ibv_spec_header {
 		.size = ipv6_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner)
-		parser->layer = HASH_RXQ_IPV6;
+	parser->layer = HASH_RXQ_IPV6;
 	if (spec) {
 		unsigned int i;
 		uint32_t vtc_flow_val;
@@ -1493,13 +1595,10 @@  struct ibv_spec_header {
 		.size = udp_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner) {
-		if (parser->layer == HASH_RXQ_IPV4)
-			parser->layer = HASH_RXQ_UDPV4;
-		else
-			parser->layer = HASH_RXQ_UDPV6;
-	}
+	if (parser->layer == HASH_RXQ_IPV4)
+		parser->layer = HASH_RXQ_UDPV4;
+	else
+		parser->layer = HASH_RXQ_UDPV6;
 	if (spec) {
 		if (!mask)
 			mask = default_mask;
@@ -1539,13 +1638,10 @@  struct ibv_spec_header {
 		.size = tcp_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner) {
-		if (parser->layer == HASH_RXQ_IPV4)
-			parser->layer = HASH_RXQ_TCPV4;
-		else
-			parser->layer = HASH_RXQ_TCPV6;
-	}
+	if (parser->layer == HASH_RXQ_IPV4)
+		parser->layer = HASH_RXQ_TCPV4;
+	else
+		parser->layer = HASH_RXQ_TCPV6;
 	if (spec) {
 		if (!mask)
 			mask = default_mask;
@@ -1592,6 +1688,8 @@  struct ibv_spec_header {
 	id.vni[0] = 0;
 	parser->inner = IBV_FLOW_SPEC_INNER;
 	parser->tunnel = rte_ptype[item->type];
+	parser->out_layer = parser->layer;
+	parser->layer = HASH_RXQ_TUNNEL;
 	if (spec) {
 		if (!mask)
 			mask = default_mask;
@@ -1642,6 +1740,8 @@  struct ibv_spec_header {
 	(void)default_mask;
 	parser->inner = IBV_FLOW_SPEC_INNER;
 	parser->tunnel = rte_ptype[item->type];
+	parser->out_layer = parser->layer;
+	parser->layer = HASH_RXQ_TUNNEL;
 	mlx5_flow_create_copy(parser, &tunnel, size);
 	return 0;
 }
@@ -1805,7 +1905,7 @@  struct ibv_spec_header {
 			continue;
 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
 		parser->queue[i].ibv_attr = NULL;
-		hash_fields = hash_rxq_init[i].hash_fields;
+		hash_fields = parser->queue[i].hash_fields;
 		if (!priv->dev->data->dev_started)
 			continue;
 		flow->frxq[i].hrxq =
@@ -1815,7 +1915,8 @@  struct ibv_spec_header {
 					   hash_fields,
 					   parser->queues,
 					   parser->queues_n,
-					   parser->tunnel);
+					   parser->tunnel,
+					   parser->rss_conf.rss_level);
 		if (flow->frxq[i].hrxq)
 			continue;
 		flow->frxq[i].hrxq =
@@ -1825,7 +1926,8 @@  struct ibv_spec_header {
 					   hash_fields,
 					   parser->queues,
 					   parser->queues_n,
-					   parser->tunnel);
+					   parser->tunnel,
+					   parser->rss_conf.rss_level);
 		if (!flow->frxq[i].hrxq) {
 			rte_flow_error_set(error, ENOMEM,
 					   RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1885,7 +1987,7 @@  struct ibv_spec_header {
 		}
 		DEBUG("%p type %d QP %p ibv_flow %p",
 		      (void *)flow, i,
-		      (void *)flow->frxq[i].hrxq,
+		      (void *)flow->frxq[i].hrxq->qp,
 		      (void *)flow->frxq[i].ibv_flow);
 	}
 	for (i = 0; i != parser->queues_n; ++i) {
@@ -2385,7 +2487,8 @@  struct rte_flow *
 						   hash_rxq_init[i].hash_fields,
 						   (*flow->queues),
 						   flow->queues_n,
-						   flow->tunnel);
+						   flow->tunnel,
+						   flow->rss_conf.rss_level);
 			if (flow->frxq[i].hrxq)
 				goto flow_create;
 			flow->frxq[i].hrxq =
@@ -2394,7 +2497,8 @@  struct rte_flow *
 						   hash_rxq_init[i].hash_fields,
 						   (*flow->queues),
 						   flow->queues_n,
-						   flow->tunnel);
+						   flow->tunnel,
+						   flow->rss_conf.rss_level);
 			if (!flow->frxq[i].hrxq) {
 				DEBUG("Flow %p cannot be applied",
 				      (void *)flow);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 09ef189..470fd2a 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1327,6 +1327,8 @@  struct mlx5_ind_table_ibv*
  *   Number of queues.
  * @param tunnel
  *   Tunnel type.
+ * @param rss_level
+ *   RSS hash on tunnel level, 0: outer most, 1: inner.
  *
  * @return
  *   An hash Rx queue on success.
@@ -1334,11 +1336,14 @@  struct mlx5_ind_table_ibv*
 struct mlx5_hrxq*
 mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
 		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n,
-		   uint32_t tunnel)
+		   uint32_t tunnel, uint8_t rss_level)
 {
 	struct mlx5_hrxq *hrxq;
 	struct mlx5_ind_table_ibv *ind_tbl;
 	struct ibv_qp *qp;
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	struct mlx5dv_qp_init_attr qp_init_attr = {0};
+#endif
 
 	queues_n = hash_fields ? queues_n : 1;
 	ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
@@ -1346,6 +1351,40 @@  struct mlx5_hrxq*
 		ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
 	if (!ind_tbl)
 		return NULL;
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	if (tunnel) {
+		qp_init_attr.comp_mask =
+				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
+		qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
+	}
+	qp = mlx5dv_create_qp(
+		priv->ctx,
+		&(struct ibv_qp_init_attr_ex){
+			.qp_type = IBV_QPT_RAW_PACKET,
+			.comp_mask =
+				IBV_QP_INIT_ATTR_PD |
+				IBV_QP_INIT_ATTR_IND_TABLE |
+				IBV_QP_INIT_ATTR_RX_HASH,
+			.rx_hash_conf = (struct ibv_rx_hash_conf){
+				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
+				.rx_hash_key_len = rss_key_len,
+				.rx_hash_key = rss_key,
+				.rx_hash_fields_mask = hash_fields |
+					(tunnel && rss_level ?
+					(uint32_t)IBV_RX_HASH_INNER : 0),
+			},
+			.rwq_ind_tbl = ind_tbl->ind_table,
+			.pd = priv->pd,
+		},
+		&qp_init_attr);
+	DEBUG("mxl5dv_create_qp(): %p, hash_fields: %lx ind_tbl:%p dv_attr: "
+	      "comp_mask: %lx create_flags:%x",
+	      (void *)qp, hash_fields | (tunnel && rss_level ?
+	      (uint32_t)IBV_RX_HASH_INNER : 0), (void *)ind_tbl,
+	      qp_init_attr.comp_mask, qp_init_attr.create_flags);
+	if (!qp)
+		goto error;
+#else
 	qp = mlx5_glue->create_qp_ex
 		(priv->ctx,
 		 &(struct ibv_qp_init_attr_ex){
@@ -1363,6 +1402,7 @@  struct mlx5_hrxq*
 			.rwq_ind_tbl = ind_tbl->ind_table,
 			.pd = priv->pd,
 		 });
+#endif
 	if (!qp)
 		goto error;
 	hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
@@ -1373,6 +1413,7 @@  struct mlx5_hrxq*
 	hrxq->rss_key_len = rss_key_len;
 	hrxq->hash_fields = hash_fields;
 	hrxq->tunnel = tunnel;
+	hrxq->rss_level = rss_level;
 	memcpy(hrxq->rss_key, rss_key, rss_key_len);
 	rte_atomic32_inc(&hrxq->refcnt);
 	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
@@ -1380,6 +1421,7 @@  struct mlx5_hrxq*
 	      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
 	return hrxq;
 error:
+	ERROR("%p: Error creating Hash Rx queue", (void *)priv);
 	mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
 	if (qp)
 		claim_zero(mlx5_glue->destroy_qp(qp));
@@ -1400,6 +1442,8 @@  struct mlx5_hrxq*
  *   Number of queues.
  * @param tunnel
  *   Tunnel type.
+ * @param rss_level
+ *   RSS hash on tunnel level, 0: outer most, 1: inner
  *
  * @return
  *   An hash Rx queue on success.
@@ -1407,7 +1451,7 @@  struct mlx5_hrxq*
 struct mlx5_hrxq*
 mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
 		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n,
-		   uint32_t tunnel)
+		   uint32_t tunnel, uint8_t rss_level)
 {
 	struct mlx5_hrxq *hrxq;
 
@@ -1423,6 +1467,8 @@  struct mlx5_hrxq*
 			continue;
 		if (hrxq->tunnel != tunnel)
 			continue;
+		if (hrxq->rss_level != rss_level)
+			continue;
 		ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
 		if (!ind_tbl)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7763641..626ebe8 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -146,6 +146,7 @@  struct mlx5_hrxq {
 	struct ibv_qp *qp; /* Verbs queue pair. */
 	uint64_t hash_fields; /* Verbs Hash fields. */
 	uint32_t tunnel; /* Tunnel type. */
+	uint8_t rss_level; /* RSS on tunnel level. */
 	uint8_t rss_key_len; /* Hash key length in bytes. */
 	uint8_t rss_key[]; /* Hash key. */
 };
@@ -243,9 +244,11 @@  struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *,
 int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *);
 int mlx5_priv_ind_table_ibv_verify(struct priv *);
 struct mlx5_hrxq *mlx5_priv_hrxq_new(struct priv *, uint8_t *, uint8_t,
-				     uint64_t, uint16_t [], uint16_t, uint32_t);
+				     uint64_t, uint16_t [], uint16_t,
+				     uint32_t, uint8_t);
 struct mlx5_hrxq *mlx5_priv_hrxq_get(struct priv *, uint8_t *, uint8_t,
-				     uint64_t, uint16_t [], uint16_t, uint32_t);
+				     uint64_t, uint16_t [], uint16_t,
+				     uint32_t, uint8_t);
 int mlx5_priv_hrxq_release(struct priv *, struct mlx5_hrxq *);
 int mlx5_priv_hrxq_ibv_verify(struct priv *);
 uint64_t mlx5_priv_get_rx_port_offloads(struct priv *);