diff mbox series

[v2,2/9] net/hns3: maximize the queue number

Message ID 20200929120117.50394-3-huwei013@chinasoftinc.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers show
Series updates and fixes for hns3 PMD driver | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Wei Hu (Xavier) Sept. 29, 2020, 12:01 p.m. UTC
From: "Wei Hu (Xavier)" <xavier.huwei@huawei.com>

The maximum number of queues for hns3 PF and VF driver is 64 based on hns3
network engine with revision_id equals 0x21. Based on hns3 network engine
with revision_id equals 0x30, the hns3 PF PMD driver can support up to 1280
queues, and hns3 VF PMD driver can support up to 128 queues.

The following points need to be modified to support maximizing queue number
and maintain better compatibility:
1) Maximizing the number of queues for hns3 PF and VF PMD driver
   In current version, VF is not supported when PF is driven by hns3 PMD
   driver. If maximum queue numbers allocated to PF PMD driver is less than
   total tqps_num allocated to this port, all remaining number of queues
   are mapped to VF function, which is unreasonable. So we fix that all
   remaining number of queues are mapped to PF function.

   Using RTE_LIBRTE_HNS3_MAX_TQP_NUM_PER_PF which comes from configuration
   file to limit the queue number allocated to PF device based on hns3
   network engine with revision_id greater than 0x30. And PF device still
   keep the maximum 64 queues based on hns3 network engine with revision_id
   equals 0x21.

   Remove restriction of the macro HNS3_MAX_TQP_NUM_PER_FUNC on the maximum
   number of queues in hns3 VF PMD driver and use the value allocated by
   hns3 PF kernel netdev driver.

2) According to the queue number allocated to PF device, a variable array
   for Rx and Tx queue is dynamically allocated to record the statistics of
   Rx and Tx queues during the .dev_init ops implementation function.
3) Add an extended field in hns3_pf_res_cmd to support the case that
   numbers of queue are greater than 1024.
4) Use new base address of Rx or Tx queue if QUEUE_ID of Rx or Tx queue is
   greater than 1024.
5) Remove queue id mask and use all bits of actual queue_id as the queue_id
   to configure hardware.
6) Currently, 0~9 bits of qset_id in hns3_nq_to_qs_link_cmd used to record
   actual qset id and 10 bit as VLD bit are configured to hardware. So we
   also need to use 11~15 bits when actual qset_id is greater than 1024.
7) The number of queue sets based on different network engine are
   different. We use it to calculate group number and configure to hardware
   in the backpressure configuration.
8) Adding check operations for number of Rx and Tx queue user configured
   when mapping queue to tc Rx queue numbers under a single TC must be
   less than rss_size_max supported by a single TC. Rx and Tx queue numbers
   are allocated to every TC by average. So Rx and Tx queue numbers must be
   an integer multiple of 2, or redundant queues are not available.
9) We can specify which packets enter the queue with a specific queue
   number, when creating flow table rules by rte_flow API. Currently,
   driver uses 0~9 bits to record the queue_id. So it is necessary to
   extend one bit field to record queue_id and configure to hardware, if
   the queue_id is greater than 1024.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
---
v1 -> v2: fix typo. replace 'fpr'  with 'for'.
---
 config/rte_config.h               |   3 +
 drivers/net/hns3/hns3_cmd.h       |   7 +-
 drivers/net/hns3/hns3_dcb.c       | 138 +++++++++++++++++++++++++++++++-------
 drivers/net/hns3/hns3_dcb.h       |  21 +++---
 drivers/net/hns3/hns3_ethdev.c    | 100 +++++++++++++++++++--------
 drivers/net/hns3/hns3_ethdev.h    |  30 ++++++++-
 drivers/net/hns3/hns3_ethdev_vf.c |  59 +++++++++-------
 drivers/net/hns3/hns3_fdir.c      |   5 ++
 drivers/net/hns3/hns3_regs.c      |   2 +-
 drivers/net/hns3/hns3_regs.h      |   3 +
 drivers/net/hns3/hns3_rxtx.c      |  28 ++++++--
 drivers/net/hns3/hns3_rxtx.h      |   2 +
 drivers/net/hns3/hns3_stats.c     |  73 +++++++++++++++-----
 drivers/net/hns3/hns3_stats.h     |   6 +-
 14 files changed, 362 insertions(+), 115 deletions(-)
diff mbox series

Patch

diff --git a/config/rte_config.h b/config/rte_config.h
index 0bae630..03d90d7 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -117,6 +117,9 @@ 
 /* fm10k defines */
 #define RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE 1
 
+/* hns3 defines */
+#define RTE_LIBRTE_HNS3_MAX_TQP_NUM_PER_PF 256
+
 /* i40e defines */
 #define RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC 1
 #undef RTE_LIBRTE_I40E_16BYTE_RX_DESC
diff --git a/drivers/net/hns3/hns3_cmd.h b/drivers/net/hns3/hns3_cmd.h
index dd50484..ecca75c 100644
--- a/drivers/net/hns3/hns3_cmd.h
+++ b/drivers/net/hns3/hns3_cmd.h
@@ -9,7 +9,6 @@ 
 #define HNS3_CMDQ_RX_INVLD_B		0
 #define HNS3_CMDQ_RX_OUTVLD_B		1
 #define HNS3_CMD_DESC_ALIGNMENT		4096
-#define HNS3_QUEUE_ID_MASK		0x1ff
 #define HNS3_CMD_FLAG_NEXT		BIT(2)
 
 struct hns3_hw;
@@ -388,7 +387,8 @@  struct hns3_pf_res_cmd {
 	uint16_t pf_own_fun_number;
 	uint16_t tx_buf_size;
 	uint16_t dv_buf_size;
-	uint16_t tqp_num_ext;
+	/* number of queues that exceed 1024 */
+	uint16_t ext_tqp_num;
 	uint16_t roh_pf_intr_vector_number;
 	uint32_t rsv[1];
 };
@@ -671,7 +671,6 @@  struct hns3_config_mac_speed_dup_cmd {
 	uint8_t rsv[22];
 };
 
-#define HNS3_RING_ID_MASK		GENMASK(9, 0)
 #define HNS3_TQP_ENABLE_B		0
 
 #define HNS3_MAC_CFG_AN_EN_B		0
@@ -835,7 +834,7 @@  struct hns3_dev_specs_0_cmd {
 	uint32_t max_tm_rate;
 };
 
-#define HNS3_MAX_TQP_NUM_PER_FUNC	64
+#define HNS3_MAX_TQP_NUM_HIP08_PF	64
 #define HNS3_DEFAULT_TX_BUF		0x4000    /* 16k  bytes */
 #define HNS3_TOTAL_PKT_BUF		0x108000  /* 1.03125M bytes */
 #define HNS3_DEFAULT_DV			0xA000    /* 40k byte */
diff --git a/drivers/net/hns3/hns3_dcb.c b/drivers/net/hns3/hns3_dcb.c
index c1be49e..fecedff 100644
--- a/drivers/net/hns3/hns3_dcb.c
+++ b/drivers/net/hns3/hns3_dcb.c
@@ -576,21 +576,31 @@  hns3_dcb_pri_shaper_cfg(struct hns3_hw *hw)
 	return ret;
 }
 
-void
+static int
 hns3_set_rss_size(struct hns3_hw *hw, uint16_t nb_rx_q)
 {
 	struct hns3_rss_conf *rss_cfg = &hw->rss_info;
 	uint16_t rx_qnum_per_tc;
+	uint16_t used_rx_queues;
 	int i;
 
 	rx_qnum_per_tc = nb_rx_q / hw->num_tc;
-	rx_qnum_per_tc = RTE_MIN(hw->rss_size_max, rx_qnum_per_tc);
-	if (hw->alloc_rss_size != rx_qnum_per_tc) {
-		hns3_info(hw, "rss size changes from %u to %u",
-			  hw->alloc_rss_size, rx_qnum_per_tc);
-		hw->alloc_rss_size = rx_qnum_per_tc;
+	if (rx_qnum_per_tc > hw->rss_size_max) {
+		hns3_err(hw, "rx queue number of per tc (%u) is greater than "
+			 "value (%u) hardware supported.",
+			 rx_qnum_per_tc, hw->rss_size_max);
+		return -EINVAL;
 	}
-	hw->used_rx_queues = hw->num_tc * hw->alloc_rss_size;
+
+	used_rx_queues = hw->num_tc * rx_qnum_per_tc;
+	if (used_rx_queues != nb_rx_q) {
+		hns3_err(hw, "rx queue number (%u) configured must be an "
+			 "integral multiple of valid tc number (%u).",
+			 nb_rx_q, hw->num_tc);
+		return -EINVAL;
+	}
+	hw->alloc_rss_size = rx_qnum_per_tc;
+	hw->used_rx_queues = used_rx_queues;
 
 	/*
 	 * When rss size is changed, we need to update rss redirection table
@@ -604,15 +614,29 @@  hns3_set_rss_size(struct hns3_hw *hw, uint16_t nb_rx_q)
 			rss_cfg->rss_indirection_tbl[i] =
 							i % hw->alloc_rss_size;
 	}
+
+	return 0;
 }
 
-void
-hns3_tc_queue_mapping_cfg(struct hns3_hw *hw, uint16_t nb_queue)
+static int
+hns3_tc_queue_mapping_cfg(struct hns3_hw *hw, uint16_t nb_tx_q)
 {
 	struct hns3_tc_queue_info *tc_queue;
+	uint16_t used_tx_queues;
+	uint16_t tx_qnum_per_tc;
 	uint8_t i;
 
-	hw->tx_qnum_per_tc = nb_queue / hw->num_tc;
+	tx_qnum_per_tc = nb_tx_q / hw->num_tc;
+	used_tx_queues = hw->num_tc * tx_qnum_per_tc;
+	if (used_tx_queues != nb_tx_q) {
+		hns3_err(hw, "tx queue number (%u) configured must be an "
+			 "integral multiple of valid tc number (%u).",
+			 nb_tx_q, hw->num_tc);
+		return -EINVAL;
+	}
+
+	hw->used_tx_queues = used_tx_queues;
+	hw->tx_qnum_per_tc = tx_qnum_per_tc;
 	for (i = 0; i < HNS3_MAX_TC_NUM; i++) {
 		tc_queue = &hw->tc_queue[i];
 		if (hw->hw_tc_map & BIT(i) && i < hw->num_tc) {
@@ -628,22 +652,39 @@  hns3_tc_queue_mapping_cfg(struct hns3_hw *hw, uint16_t nb_queue)
 			tc_queue->tc = 0;
 		}
 	}
-	hw->used_tx_queues = hw->num_tc * hw->tx_qnum_per_tc;
+
+	return 0;
 }
 
-static void
+int
+hns3_queue_to_tc_mapping(struct hns3_hw *hw, uint16_t nb_rx_q, uint16_t nb_tx_q)
+{
+	int ret;
+
+	ret = hns3_set_rss_size(hw, nb_rx_q);
+	if (ret)
+		return ret;
+
+	return hns3_tc_queue_mapping_cfg(hw, nb_tx_q);
+}
+
+static int
 hns3_dcb_update_tc_queue_mapping(struct hns3_hw *hw, uint16_t nb_rx_q,
 				 uint16_t nb_tx_q)
 {
 	struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw);
 	struct hns3_pf *pf = &hns->pf;
+	int ret;
 
 	hw->num_tc = hw->dcb_info.num_tc;
-	hns3_set_rss_size(hw, nb_rx_q);
-	hns3_tc_queue_mapping_cfg(hw, nb_tx_q);
+	ret = hns3_queue_to_tc_mapping(hw, nb_rx_q, nb_tx_q);
+	if (ret)
+		return ret;
 
 	if (!hns->is_vf)
 		memcpy(pf->prio_tc, hw->dcb_info.prio_tc, HNS3_MAX_USER_PRIO);
+
+	return 0;
 }
 
 int
@@ -886,13 +927,35 @@  hns3_q_to_qs_map_cfg(struct hns3_hw *hw, uint16_t q_id, uint16_t qs_id)
 {
 	struct hns3_nq_to_qs_link_cmd *map;
 	struct hns3_cmd_desc desc;
+	uint16_t tmp_qs_id = 0;
+	uint16_t qs_id_l;
+	uint16_t qs_id_h;
 
 	hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_TM_NQ_TO_QS_LINK, false);
 
 	map = (struct hns3_nq_to_qs_link_cmd *)desc.data;
 
 	map->nq_id = rte_cpu_to_le_16(q_id);
-	map->qset_id = rte_cpu_to_le_16(qs_id | HNS3_DCB_Q_QS_LINK_VLD_MSK);
+
+	/*
+	 * Network engine with revision_id 0x21 uses 0~9 bit of qs_id to
+	 * configure qset_id. So we need to convert qs_id to the follow
+	 * format to support qset_id > 1024.
+	 * qs_id: | 15 | 14 ~ 10 |  9 ~ 0   |
+	 *            /         / \         \
+	 *           /         /   \         \
+	 * qset_id: | 15 ~ 11 |  10 |  9 ~ 0  |
+	 *          | qs_id_h | vld | qs_id_l |
+	 */
+	qs_id_l = hns3_get_field(qs_id, HNS3_DCB_QS_ID_L_MSK,
+				 HNS3_DCB_QS_ID_L_S);
+	qs_id_h = hns3_get_field(qs_id, HNS3_DCB_QS_ID_H_MSK,
+				 HNS3_DCB_QS_ID_H_S);
+	hns3_set_field(tmp_qs_id, HNS3_DCB_QS_ID_L_MSK, HNS3_DCB_QS_ID_L_S,
+		       qs_id_l);
+	hns3_set_field(tmp_qs_id, HNS3_DCB_QS_ID_H_EXT_MSK,
+		       HNS3_DCB_QS_ID_H_EXT_S, qs_id_h);
+	map->qset_id = rte_cpu_to_le_16(tmp_qs_id | HNS3_DCB_Q_QS_LINK_VLD_MSK);
 
 	return hns3_cmd_send(hw, &desc, 1);
 }
@@ -1291,7 +1354,7 @@  hns3_dcb_cfg_validate(struct hns3_adapter *hns, uint8_t *tc, bool *changed)
 		*changed = true;
 }
 
-static void
+static int
 hns3_dcb_info_cfg(struct hns3_adapter *hns)
 {
 	struct rte_eth_dcb_rx_conf *dcb_rx_conf;
@@ -1299,6 +1362,7 @@  hns3_dcb_info_cfg(struct hns3_adapter *hns)
 	struct hns3_hw *hw = &hns->hw;
 	uint8_t tc_bw, bw_rest;
 	uint8_t i, j;
+	int ret;
 
 	dcb_rx_conf = &hw->data->dev_conf.rx_adv_conf.dcb_rx_conf;
 	pf->local_max_tc = (uint8_t)dcb_rx_conf->nb_tcs;
@@ -1338,8 +1402,12 @@  hns3_dcb_info_cfg(struct hns3_adapter *hns)
 	for (i = 0; i < HNS3_MAX_USER_PRIO; i++)
 		hw->dcb_info.prio_tc[i] = dcb_rx_conf->dcb_tc[i];
 
-	hns3_dcb_update_tc_queue_mapping(hw, hw->data->nb_rx_queues,
-					 hw->data->nb_tx_queues);
+	ret = hns3_dcb_update_tc_queue_mapping(hw, hw->data->nb_rx_queues,
+					       hw->data->nb_tx_queues);
+	if (ret)
+		hns3_err(hw, "update tc queue mapping failed, ret = %d.", ret);
+
+	return ret;
 }
 
 static int
@@ -1378,9 +1446,8 @@  hns3_dcb_info_update(struct hns3_adapter *hns, uint8_t num_tc)
 		hw->dcb_info.num_tc = 1;
 	}
 	hw->hw_tc_map = bit_map;
-	hns3_dcb_info_cfg(hns);
 
-	return 0;
+	return hns3_dcb_info_cfg(hns);
 }
 
 static int
@@ -1505,6 +1572,7 @@  hns3_dcb_init(struct hns3_hw *hw)
 {
 	struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw);
 	struct hns3_pf *pf = &hns->pf;
+	uint16_t default_tqp_num;
 	int ret;
 
 	PMD_INIT_FUNC_TRACE();
@@ -1525,11 +1593,24 @@  hns3_dcb_init(struct hns3_hw *hw)
 
 		ret = hns3_dcb_info_init(hw);
 		if (ret) {
-			hns3_err(hw, "dcb info init failed: %d", ret);
+			hns3_err(hw, "dcb info init failed, ret = %d.", ret);
+			return ret;
+		}
+
+		/*
+		 * The number of queues configured by default cannot exceed
+		 * the maximum number of queues for a single TC.
+		 */
+		default_tqp_num = RTE_MIN(hw->rss_size_max,
+					  hw->tqps_num / hw->dcb_info.num_tc);
+		ret = hns3_dcb_update_tc_queue_mapping(hw, default_tqp_num,
+						       default_tqp_num);
+		if (ret) {
+			hns3_err(hw,
+				 "update tc queue mapping failed, ret = %d.",
+				 ret);
 			return ret;
 		}
-		hns3_dcb_update_tc_queue_mapping(hw, hw->tqps_num,
-						 hw->tqps_num);
 	}
 
 	/*
@@ -1541,7 +1622,7 @@  hns3_dcb_init(struct hns3_hw *hw)
 	 */
 	ret = hns3_dcb_init_hw(hw);
 	if (ret) {
-		hns3_err(hw, "dcb init hardware failed: %d", ret);
+		hns3_err(hw, "dcb init hardware failed, ret = %d.", ret);
 		return ret;
 	}
 
@@ -1556,10 +1637,15 @@  hns3_update_queue_map_configure(struct hns3_adapter *hns)
 	uint16_t nb_tx_q = hw->data->nb_tx_queues;
 	int ret;
 
-	hns3_dcb_update_tc_queue_mapping(hw, nb_rx_q, nb_tx_q);
+	ret = hns3_dcb_update_tc_queue_mapping(hw, nb_rx_q, nb_tx_q);
+	if (ret) {
+		hns3_err(hw, "failed to update tc queue mapping, ret = %d.",
+			 ret);
+		return ret;
+	}
 	ret = hns3_q_to_qs_map(hw);
 	if (ret)
-		hns3_err(hw, "failed to map nq to qs! ret = %d", ret);
+		hns3_err(hw, "failed to map nq to qs, ret = %d.", ret);
 
 	return ret;
 }
diff --git a/drivers/net/hns3/hns3_dcb.h b/drivers/net/hns3/hns3_dcb.h
index 557d88b..05c9786 100644
--- a/drivers/net/hns3/hns3_dcb.h
+++ b/drivers/net/hns3/hns3_dcb.h
@@ -52,6 +52,12 @@  struct hns3_qs_to_pri_link_cmd {
 	uint16_t rsvd;
 	uint8_t priority;
 #define HNS3_DCB_QS_PRI_LINK_VLD_MSK	BIT(0)
+#define HNS3_DCB_QS_ID_L_MSK		GENMASK(9, 0)
+#define HNS3_DCB_QS_ID_L_S		0
+#define HNS3_DCB_QS_ID_H_MSK		GENMASK(14, 10)
+#define HNS3_DCB_QS_ID_H_S		10
+#define HNS3_DCB_QS_ID_H_EXT_S		11
+#define HNS3_DCB_QS_ID_H_EXT_MSK	GENMASK(15, 11)
 	uint8_t link_vld;
 	uint8_t rsvd1[18];
 };
@@ -89,11 +95,12 @@  struct hns3_pg_shapping_cmd {
 	uint32_t rsvd1[4];
 };
 
-#define HNS3_BP_GRP_NUM		32
+#define HNS3_BP_GRP_NUM			32
 #define HNS3_BP_SUB_GRP_ID_S		0
 #define HNS3_BP_SUB_GRP_ID_M		GENMASK(4, 0)
 #define HNS3_BP_GRP_ID_S		5
 #define HNS3_BP_GRP_ID_M		GENMASK(9, 5)
+
 struct hns3_bp_to_qs_map_cmd {
 	uint8_t tc_id;
 	uint8_t rsvd[2];
@@ -165,15 +172,13 @@  int hns3_dcb_init_hw(struct hns3_hw *hw);
 
 int hns3_dcb_info_init(struct hns3_hw *hw);
 
-int
-hns3_fc_enable(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
-
-int
-hns3_dcb_pfc_enable(struct rte_eth_dev *dev, struct rte_eth_pfc_conf *pfc_conf);
+int hns3_fc_enable(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
 
-void hns3_set_rss_size(struct hns3_hw *hw, uint16_t nb_rx_q);
+int hns3_dcb_pfc_enable(struct rte_eth_dev *dev,
+			struct rte_eth_pfc_conf *pfc_conf);
 
-void hns3_tc_queue_mapping_cfg(struct hns3_hw *hw, uint16_t nb_queue);
+int hns3_queue_to_tc_mapping(struct hns3_hw *hw, uint16_t nb_rx_q,
+			     uint16_t nb_tx_q);
 
 int hns3_dcb_cfg_update(struct hns3_adapter *hns);
 
diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c
index c45b1b3..6e0f52b 100644
--- a/drivers/net/hns3/hns3_ethdev.c
+++ b/drivers/net/hns3/hns3_ethdev.c
@@ -2660,6 +2660,49 @@  hns3_query_function_status(struct hns3_hw *hw)
 }
 
 static int
+hns3_get_pf_max_tqp_num(struct hns3_hw *hw)
+{
+	struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw);
+	struct hns3_pf *pf = &hns->pf;
+
+	if (pf->tqp_config_mode == HNS3_FLEX_MAX_TQP_NUM_MODE) {
+		/*
+		 * The total_tqps_num obtained from firmware is maximum tqp
+		 * numbers of this port, which should be used for PF and VFs.
+		 * There is no need for pf to have so many tqp numbers in
+		 * most cases. RTE_LIBRTE_HNS3_MAX_TQP_NUM_PER_PF,
+		 * coming from config file, is assigned to maximum queue number
+		 * for the PF of this port by user. So users can modify the
+		 * maximum queue number of PF according to their own application
+		 * scenarios, which is more flexible to use. In addition, many
+		 * memories can be saved due to allocating queue statistics
+		 * room according to the actual number of queues required. The
+		 * maximum queue number of PF for network engine with
+		 * revision_id greater than 0x30 is assigned by config file.
+		 */
+		if (RTE_LIBRTE_HNS3_MAX_TQP_NUM_PER_PF <= 0) {
+			hns3_err(hw, "RTE_LIBRTE_HNS3_MAX_TQP_NUM_PER_PF(%d) "
+				 "must be greater than 0.",
+				 RTE_LIBRTE_HNS3_MAX_TQP_NUM_PER_PF);
+			return -EINVAL;
+		}
+
+		hw->tqps_num = RTE_MIN(RTE_LIBRTE_HNS3_MAX_TQP_NUM_PER_PF,
+				       hw->total_tqps_num);
+	} else {
+		/*
+		 * Due to the limitation on the number of PF interrupts
+		 * available, the maximum queue number assigned to PF on
+		 * the network engine with revision_id 0x21 is 64.
+		 */
+		hw->tqps_num = RTE_MIN(hw->total_tqps_num,
+				       HNS3_MAX_TQP_NUM_HIP08_PF);
+	}
+
+	return 0;
+}
+
+static int
 hns3_query_pf_resource(struct hns3_hw *hw)
 {
 	struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw);
@@ -2676,9 +2719,13 @@  hns3_query_pf_resource(struct hns3_hw *hw)
 	}
 
 	req = (struct hns3_pf_res_cmd *)desc.data;
-	hw->total_tqps_num = rte_le_to_cpu_16(req->tqp_num);
+	hw->total_tqps_num = rte_le_to_cpu_16(req->tqp_num) +
+			     rte_le_to_cpu_16(req->ext_tqp_num);
+	ret = hns3_get_pf_max_tqp_num(hw);
+	if (ret)
+		return ret;
+
 	pf->pkt_buf_size = rte_le_to_cpu_16(req->buf_size) << HNS3_BUF_UNIT_S;
-	hw->tqps_num = RTE_MIN(hw->total_tqps_num, HNS3_MAX_TQP_NUM_PER_FUNC);
 	pf->func_num = rte_le_to_cpu_16(req->pf_own_fun_number);
 
 	if (req->tx_buf_size)
@@ -2902,7 +2949,9 @@  hns3_query_dev_specifications(struct hns3_hw *hw)
 static int
 hns3_get_capability(struct hns3_hw *hw)
 {
+	struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw);
 	struct rte_pci_device *pci_dev;
+	struct hns3_pf *pf = &hns->pf;
 	struct rte_eth_dev *eth_dev;
 	uint16_t device_id;
 	uint8_t revision;
@@ -2936,6 +2985,7 @@  hns3_get_capability(struct hns3_hw *hw)
 		hw->tso_mode = HNS3_TSO_SW_CAL_PSEUDO_H_CSUM;
 		hw->vlan_mode = HNS3_SW_SHIFT_AND_DISCARD_MODE;
 		hw->min_tx_pkt_len = HNS3_HIP08_MIN_TX_PKT_LEN;
+		pf->tqp_config_mode = HNS3_FIXED_MAX_TQP_NUM_MODE;
 		return 0;
 	}
 
@@ -2953,6 +3003,7 @@  hns3_get_capability(struct hns3_hw *hw)
 	hw->tso_mode = HNS3_TSO_HW_CAL_PSEUDO_H_CSUM;
 	hw->vlan_mode = HNS3_HW_SHIFT_AND_DISCARD_MODE;
 	hw->min_tx_pkt_len = HNS3_HIP09_MIN_TX_PKT_LEN;
+	pf->tqp_config_mode = HNS3_FLEX_MAX_TQP_NUM_MODE;
 
 	return 0;
 }
@@ -3048,7 +3099,7 @@  hns3_get_configuration(struct hns3_hw *hw)
 
 	ret = hns3_get_board_configuration(hw);
 	if (ret)
-		PMD_INIT_LOG(ERR, "Failed to get board configuration: %d", ret);
+		PMD_INIT_LOG(ERR, "failed to get board configuration: %d", ret);
 
 	return ret;
 }
@@ -3081,29 +3132,18 @@  hns3_map_tqps_to_func(struct hns3_hw *hw, uint16_t func_id, uint16_t tqp_pid,
 static int
 hns3_map_tqp(struct hns3_hw *hw)
 {
-	uint16_t tqps_num = hw->total_tqps_num;
-	uint16_t func_id;
-	uint16_t tqp_id;
-	bool is_pf;
-	int num;
 	int ret;
 	int i;
 
 	/*
-	 * In current version VF is not supported when PF is driven by DPDK
-	 * driver, so we allocate tqps to PF as much as possible.
+	 * In current version, VF is not supported when PF is driven by DPDK
+	 * driver, so we assign total tqps_num tqps allocated to this port
+	 * to PF.
 	 */
-	tqp_id = 0;
-	num = DIV_ROUND_UP(hw->total_tqps_num, HNS3_MAX_TQP_NUM_PER_FUNC);
-	for (func_id = HNS3_PF_FUNC_ID; func_id < num; func_id++) {
-		is_pf = func_id == HNS3_PF_FUNC_ID ? true : false;
-		for (i = 0;
-		     i < HNS3_MAX_TQP_NUM_PER_FUNC && tqp_id < tqps_num; i++) {
-			ret = hns3_map_tqps_to_func(hw, func_id, tqp_id++, i,
-						    is_pf);
-			if (ret)
-				return ret;
-		}
+	for (i = 0; i < hw->total_tqps_num; i++) {
+		ret = hns3_map_tqps_to_func(hw, HNS3_PF_FUNC_ID, i, i, true);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -4558,17 +4598,21 @@  hns3_init_pf(struct rte_eth_dev *eth_dev)
 		goto err_get_config;
 	}
 
+	ret = hns3_tqp_stats_init(hw);
+	if (ret)
+		goto err_get_config;
+
 	ret = hns3_init_hardware(hns);
 	if (ret) {
 		PMD_INIT_LOG(ERR, "Failed to init hardware: %d", ret);
-		goto err_get_config;
+		goto err_init_hw;
 	}
 
 	/* Initialize flow director filter list & hash */
 	ret = hns3_fdir_filter_init(hns);
 	if (ret) {
 		PMD_INIT_LOG(ERR, "Failed to alloc hashmap for fdir: %d", ret);
-		goto err_hw_init;
+		goto err_fdir;
 	}
 
 	hns3_set_default_rss_args(hw);
@@ -4577,16 +4621,17 @@  hns3_init_pf(struct rte_eth_dev *eth_dev)
 	if (ret) {
 		PMD_INIT_LOG(ERR, "fail to enable hw error interrupts: %d",
 			     ret);
-		goto err_fdir;
+		goto err_enable_intr;
 	}
 
 	return 0;
 
-err_fdir:
+err_enable_intr:
 	hns3_fdir_filter_uninit(hns);
-err_hw_init:
+err_fdir:
 	hns3_uninit_umv_space(hw);
-
+err_init_hw:
+	hns3_tqp_stats_uninit(hw);
 err_get_config:
 	hns3_pf_disable_irq0(hw);
 	rte_intr_disable(&pci_dev->intr_handle);
@@ -4618,6 +4663,7 @@  hns3_uninit_pf(struct rte_eth_dev *eth_dev)
 	hns3_promisc_uninit(hw);
 	hns3_fdir_filter_uninit(hns);
 	hns3_uninit_umv_space(hw);
+	hns3_tqp_stats_uninit(hw);
 	hns3_pf_disable_irq0(hw);
 	rte_intr_disable(&pci_dev->intr_handle);
 	hns3_intr_unregister(&pci_dev->intr_handle, hns3_interrupt_handler,
diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h
index 3c5ccc7..22c6a15 100644
--- a/drivers/net/hns3/hns3_ethdev.h
+++ b/drivers/net/hns3/hns3_ethdev.h
@@ -134,9 +134,9 @@  enum hns3_fc_status {
 };
 
 struct hns3_tc_queue_info {
-	uint8_t	tqp_offset;     /* TQP offset from base TQP */
-	uint8_t	tqp_count;      /* Total TQPs */
-	uint8_t	tc;             /* TC index */
+	uint16_t tqp_offset;    /* TQP offset from base TQP */
+	uint16_t tqp_count;     /* Total TQPs */
+	uint8_t tc;             /* TC index */
 	bool enable;            /* If this TC is enable or not */
 };
 
@@ -661,11 +661,35 @@  struct hns3_ptype_table {
 	uint32_t ol4table[HNS3_OL4TBL_NUM];
 };
 
+#define HNS3_FIXED_MAX_TQP_NUM_MODE		0
+#define HNS3_FLEX_MAX_TQP_NUM_MODE		1
+
 struct hns3_pf {
 	struct hns3_adapter *adapter;
 	bool is_main_pf;
 	uint16_t func_num; /* num functions of this pf, include pf and vfs */
 
+	/*
+	 * tqp_config mode
+	 * tqp_config_mode value range:
+	 *	HNS3_FIXED_MAX_TQP_NUM_MODE,
+	 *	HNS3_FLEX_MAX_TQP_NUM_MODE
+	 *
+	 * - HNS3_FIXED_MAX_TQP_NUM_MODE
+	 *   There is a limitation on the number of pf interrupts available for
+	 *   on some versions of network engines. In this case, the maximum
+	 *   queue number of pf can not be greater than the interrupt number,
+	 *   such as pf of network engine with revision_id 0x21. So the maximum
+	 *   number of queues must be fixed.
+	 *
+	 * - HNS3_FLEX_MAX_TQP_NUM_MODE
+	 *   In this mode, the maximum queue number of pf has not any constraint
+	 *   and comes from the macro RTE_LIBRTE_HNS3_MAX_TQP_NUM_PER_PF
+	 *   in the config file. Users can modify the macro according to their
+	 *   own application scenarios, which is more flexible to use.
+	 */
+	uint8_t tqp_config_mode;
+
 	uint32_t pkt_buf_size; /* Total pf buf size for tx/rx */
 	uint32_t tx_buf_size; /* Tx buffer size for each TC */
 	uint32_t dv_buf_size; /* Dv buffer size for each TC */
diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c
index 4c73441..0b949e9 100644
--- a/drivers/net/hns3/hns3_ethdev_vf.c
+++ b/drivers/net/hns3/hns3_ethdev_vf.c
@@ -1191,20 +1191,21 @@  hns3vf_get_capability(struct hns3_hw *hw)
 static int
 hns3vf_check_tqp_info(struct hns3_hw *hw)
 {
-	uint16_t tqps_num;
+	if (hw->tqps_num == 0) {
+		PMD_INIT_LOG(ERR, "Get invalid tqps_num(0) from PF.");
+		return -EINVAL;
+	}
 
-	tqps_num = hw->tqps_num;
-	if (tqps_num > HNS3_MAX_TQP_NUM_PER_FUNC || tqps_num == 0) {
-		PMD_INIT_LOG(ERR, "Get invalid tqps_num(%u) from PF. valid "
-				  "range: 1~%d",
-			     tqps_num, HNS3_MAX_TQP_NUM_PER_FUNC);
+	if (hw->rss_size_max == 0) {
+		PMD_INIT_LOG(ERR, "Get invalid rss_size_max(0) from PF.");
 		return -EINVAL;
 	}
 
-	hw->alloc_rss_size = RTE_MIN(hw->rss_size_max, hw->tqps_num);
+	hw->tqps_num = RTE_MIN(hw->rss_size_max, hw->tqps_num);
 
 	return 0;
 }
+
 static int
 hns3vf_get_port_base_vlan_filter_state(struct hns3_hw *hw)
 {
@@ -1295,6 +1296,7 @@  hns3vf_get_tc_info(struct hns3_hw *hw)
 {
 	uint8_t resp_msg;
 	int ret;
+	int i;
 
 	ret = hns3_send_mbx_msg(hw, HNS3_MBX_GET_TCINFO, 0, NULL, 0,
 				true, &resp_msg, sizeof(resp_msg));
@@ -1306,6 +1308,11 @@  hns3vf_get_tc_info(struct hns3_hw *hw)
 
 	hw->hw_tc_map = resp_msg;
 
+	for (i = 0; i < HNS3_MAX_TC_NUM; i++) {
+		if (hw->hw_tc_map & BIT(i))
+			hw->num_tc++;
+	}
+
 	return 0;
 }
 
@@ -1366,17 +1373,10 @@  hns3vf_get_configuration(struct hns3_hw *hw)
 }
 
 static int
-hns3vf_set_tc_info(struct hns3_adapter *hns)
+hns3vf_set_tc_queue_mapping(struct hns3_adapter *hns, uint16_t nb_rx_q,
+			    uint16_t nb_tx_q)
 {
 	struct hns3_hw *hw = &hns->hw;
-	uint16_t nb_rx_q = hw->data->nb_rx_queues;
-	uint16_t nb_tx_q = hw->data->nb_tx_queues;
-	uint8_t i;
-
-	hw->num_tc = 0;
-	for (i = 0; i < HNS3_MAX_TC_NUM; i++)
-		if (hw->hw_tc_map & BIT(i))
-			hw->num_tc++;
 
 	if (nb_rx_q < hw->num_tc) {
 		hns3_err(hw, "number of Rx queues(%d) is less than tcs(%d).",
@@ -1390,10 +1390,7 @@  hns3vf_set_tc_info(struct hns3_adapter *hns)
 		return -EINVAL;
 	}
 
-	hns3_set_rss_size(hw, nb_rx_q);
-	hns3_tc_queue_mapping_cfg(hw, nb_tx_q);
-
-	return 0;
+	return hns3_queue_to_tc_mapping(hw, nb_rx_q, nb_tx_q);
 }
 
 static void
@@ -1783,20 +1780,33 @@  hns3vf_init_vf(struct rte_eth_dev *eth_dev)
 		goto err_get_config;
 	}
 
+	ret = hns3_tqp_stats_init(hw);
+	if (ret)
+		goto err_get_config;
+
+	ret = hns3vf_set_tc_queue_mapping(hns, hw->tqps_num, hw->tqps_num);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "failed to set tc info, ret = %d.", ret);
+		goto err_set_tc_queue;
+	}
+
 	ret = hns3vf_clear_vport_list(hw);
 	if (ret) {
 		PMD_INIT_LOG(ERR, "Failed to clear tbl list: %d", ret);
-		goto err_get_config;
+		goto err_set_tc_queue;
 	}
 
 	ret = hns3vf_init_hardware(hns);
 	if (ret)
-		goto err_get_config;
+		goto err_set_tc_queue;
 
 	hns3_set_default_rss_args(hw);
 
 	return 0;
 
+err_set_tc_queue:
+	hns3_tqp_stats_uninit(hw);
+
 err_get_config:
 	hns3vf_disable_irq0(hw);
 	rte_intr_disable(&pci_dev->intr_handle);
@@ -1825,6 +1835,7 @@  hns3vf_uninit_vf(struct rte_eth_dev *eth_dev)
 	(void)hns3_config_gro(hw, false);
 	(void)hns3vf_set_alive(hw, false);
 	(void)hns3vf_set_promisc_mode(hw, false, false, false);
+	hns3_tqp_stats_uninit(hw);
 	hns3vf_disable_irq0(hw);
 	rte_intr_disable(&pci_dev->intr_handle);
 	hns3_intr_unregister(&pci_dev->intr_handle, hns3vf_interrupt_handler,
@@ -2004,9 +2015,11 @@  static int
 hns3vf_do_start(struct hns3_adapter *hns, bool reset_queue)
 {
 	struct hns3_hw *hw = &hns->hw;
+	uint16_t nb_rx_q = hw->data->nb_rx_queues;
+	uint16_t nb_tx_q = hw->data->nb_tx_queues;
 	int ret;
 
-	ret = hns3vf_set_tc_info(hns);
+	ret = hns3vf_set_tc_queue_mapping(hns, nb_rx_q, nb_tx_q);
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/hns3/hns3_fdir.c b/drivers/net/hns3/hns3_fdir.c
index e6a065b..79e3028 100644
--- a/drivers/net/hns3/hns3_fdir.c
+++ b/drivers/net/hns3/hns3_fdir.c
@@ -46,6 +46,8 @@ 
 #define HNS3_FD_AD_QUEUE_REGION_SIZE_M	GENMASK(20, 17)
 #define HNS3_FD_AD_COUNTER_HIGH_BIT	7
 #define HNS3_FD_AD_COUNTER_HIGH_BIT_B	26
+#define HNS3_FD_AD_QUEUE_ID_HIGH_BIT	10
+#define HNS3_FD_AD_QUEUE_ID_HIGH_BIT_B	21
 
 enum HNS3_PORT_TYPE {
 	HOST_PORT,
@@ -437,6 +439,9 @@  static int hns3_fd_ad_config(struct hns3_hw *hw, int loc,
 	/* set extend bit if counter_id is in [128 ~ 255] */
 	if (action->counter_id & BIT(HNS3_FD_AD_COUNTER_HIGH_BIT))
 		hns3_set_bit(ad_data, HNS3_FD_AD_COUNTER_HIGH_BIT_B, 1);
+	/* set extend bit if queue id > 1024 */
+	if (action->queue_id & BIT(HNS3_FD_AD_QUEUE_ID_HIGH_BIT))
+		hns3_set_bit(ad_data, HNS3_FD_AD_QUEUE_ID_HIGH_BIT_B, 1);
 	ad_data <<= HNS3_FD_AD_DATA_S;
 	hns3_set_bit(ad_data, HNS3_FD_AD_DROP_B, action->drop_packet);
 	if (action->nb_queues == 1)
diff --git a/drivers/net/hns3/hns3_regs.c b/drivers/net/hns3/hns3_regs.c
index 63c8602..a76f42c 100644
--- a/drivers/net/hns3/hns3_regs.c
+++ b/drivers/net/hns3/hns3_regs.c
@@ -295,7 +295,7 @@  hns3_direct_access_regs(struct hns3_hw *hw, uint32_t *data)
 	reg_um = sizeof(ring_reg_addrs) / sizeof(uint32_t);
 	separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE;
 	for (j = 0; j < hw->tqps_num; j++) {
-		reg_offset = HNS3_TQP_REG_OFFSET + HNS3_TQP_REG_SIZE * j;
+		reg_offset = hns3_get_tqp_reg_offset(j);
 		for (i = 0; i < reg_um; i++)
 			*data++ = hns3_read_dev(hw,
 						ring_reg_addrs[i] + reg_offset);
diff --git a/drivers/net/hns3/hns3_regs.h b/drivers/net/hns3/hns3_regs.h
index 5cf924e..d83c3b3 100644
--- a/drivers/net/hns3/hns3_regs.h
+++ b/drivers/net/hns3/hns3_regs.h
@@ -89,6 +89,9 @@ 
 #define HNS3_TQP_REG_OFFSET			0x80000
 #define HNS3_TQP_REG_SIZE			0x200
 
+#define HNS3_TQP_EXT_REG_OFFSET			0x100
+#define HNS3_MIN_EXTEND_QUEUE_ID		1024
+
 /* bar registers for tqp interrupt */
 #define HNS3_TQP_INTR_CTRL_REG			0x20000
 #define HNS3_TQP_INTR_GL0_REG			0x20100
diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c
index 930aa28..e3f0db4 100644
--- a/drivers/net/hns3/hns3_rxtx.c
+++ b/drivers/net/hns3/hns3_rxtx.c
@@ -405,7 +405,7 @@  hns3_tqp_enable(struct hns3_hw *hw, uint16_t queue_id, bool enable)
 	req = (struct hns3_cfg_com_tqp_queue_cmd *)desc.data;
 
 	hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_CFG_COM_TQP_QUEUE, false);
-	req->tqp_id = rte_cpu_to_le_16(queue_id & HNS3_RING_ID_MASK);
+	req->tqp_id = rte_cpu_to_le_16(queue_id);
 	req->stream_id = 0;
 	hns3_set_bit(req->enable, HNS3_TQP_ENABLE_B, enable ? 1 : 0);
 
@@ -426,7 +426,7 @@  hns3_send_reset_tqp_cmd(struct hns3_hw *hw, uint16_t queue_id, bool enable)
 	hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_RESET_TQP_QUEUE, false);
 
 	req = (struct hns3_reset_tqp_queue_cmd *)desc.data;
-	req->tqp_id = rte_cpu_to_le_16(queue_id & HNS3_RING_ID_MASK);
+	req->tqp_id = rte_cpu_to_le_16(queue_id);
 	hns3_set_bit(req->reset_req, HNS3_TQP_RESET_B, enable ? 1 : 0);
 
 	ret = hns3_cmd_send(hw, &desc, 1);
@@ -446,7 +446,7 @@  hns3_get_reset_status(struct hns3_hw *hw, uint16_t queue_id)
 	hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_RESET_TQP_QUEUE, true);
 
 	req = (struct hns3_reset_tqp_queue_cmd *)desc.data;
-	req->tqp_id = rte_cpu_to_le_16(queue_id & HNS3_RING_ID_MASK);
+	req->tqp_id = rte_cpu_to_le_16(queue_id);
 
 	ret = hns3_cmd_send(hw, &desc, 1);
 	if (ret) {
@@ -1341,6 +1341,22 @@  hns3_rx_queue_conf_check(struct hns3_hw *hw, const struct rte_eth_rxconf *conf,
 	return 0;
 }
 
+uint32_t
+hns3_get_tqp_reg_offset(uint16_t queue_id)
+{
+	uint32_t reg_offset;
+
+	/* Need an extend offset to config queue > 1024 */
+	if (queue_id < HNS3_MIN_EXTEND_QUEUE_ID)
+		reg_offset = HNS3_TQP_REG_OFFSET + queue_id * HNS3_TQP_REG_SIZE;
+	else
+		reg_offset = HNS3_TQP_REG_OFFSET + HNS3_TQP_EXT_REG_OFFSET +
+			     (queue_id - HNS3_MIN_EXTEND_QUEUE_ID) *
+			     HNS3_TQP_REG_SIZE;
+
+	return reg_offset;
+}
+
 int
 hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
 		    unsigned int socket_id, const struct rte_eth_rxconf *conf,
@@ -1422,6 +1438,8 @@  hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
 	rxq->configured = true;
 	rxq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET +
 				idx * HNS3_TQP_REG_SIZE);
+	rxq->io_base = (void *)((char *)hw->io_base +
+					hns3_get_tqp_reg_offset(idx));
 	rxq->io_head_reg = (volatile void *)((char *)rxq->io_base +
 			   HNS3_RING_RX_HEAD_REG);
 	rxq->rx_buf_len = rx_buf_size;
@@ -2183,8 +2201,8 @@  hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
 		txq->pvid_sw_shift_en = false;
 	txq->max_non_tso_bd_num = hw->max_non_tso_bd_num;
 	txq->configured = true;
-	txq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET +
-				idx * HNS3_TQP_REG_SIZE);
+	txq->io_base = (void *)((char *)hw->io_base +
+						hns3_get_tqp_reg_offset(idx));
 	txq->io_tail_reg = (volatile void *)((char *)txq->io_base +
 					     HNS3_RING_TX_TAIL_REG);
 	txq->min_tx_pkt_len = hw->min_tx_pkt_len;
diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index d7d70f6..cdfe115 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -661,4 +661,6 @@  void hns3_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		       struct rte_eth_rxq_info *qinfo);
 void hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		       struct rte_eth_txq_info *qinfo);
+uint32_t hns3_get_tqp_reg_offset(uint16_t idx);
+
 #endif /* _HNS3_RXTX_H_ */
diff --git a/drivers/net/hns3/hns3_stats.c b/drivers/net/hns3/hns3_stats.c
index e8846b9..8c3c7cc 100644
--- a/drivers/net/hns3/hns3_stats.c
+++ b/drivers/net/hns3/hns3_stats.c
@@ -330,6 +330,8 @@  static const struct hns3_xstats_name_offset hns3_tx_queue_strings[] = {
 #define HNS3_FIX_NUM_STATS (HNS3_NUM_MAC_STATS + HNS3_NUM_ERROR_INT_XSTATS + \
 			    HNS3_NUM_RESET_XSTATS)
 
+static void hns3_tqp_stats_clear(struct hns3_hw *hw);
+
 /*
  * Query all the MAC statistics data of Network ICL command ,opcode id: 0x0034.
  * This command is used before send 'query_mac_stat command', the descriptor
@@ -456,8 +458,7 @@  hns3_update_tqp_stats(struct hns3_hw *hw)
 		hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_QUERY_RX_STATUS,
 					  true);
 
-		desc.data[0] = rte_cpu_to_le_32((uint32_t)i &
-						HNS3_QUEUE_ID_MASK);
+		desc.data[0] = rte_cpu_to_le_32((uint32_t)i);
 		ret = hns3_cmd_send(hw, &desc, 1);
 		if (ret) {
 			hns3_err(hw, "Failed to query RX No.%d queue stat: %d",
@@ -471,8 +472,7 @@  hns3_update_tqp_stats(struct hns3_hw *hw)
 		hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_QUERY_TX_STATUS,
 					  true);
 
-		desc.data[0] = rte_cpu_to_le_32((uint32_t)i &
-						HNS3_QUEUE_ID_MASK);
+		desc.data[0] = rte_cpu_to_le_32((uint32_t)i);
 		ret = hns3_cmd_send(hw, &desc, 1);
 		if (ret) {
 			hns3_err(hw, "Failed to query TX No.%d queue stat: %d",
@@ -553,7 +553,6 @@  hns3_stats_reset(struct rte_eth_dev *eth_dev)
 {
 	struct hns3_adapter *hns = eth_dev->data->dev_private;
 	struct hns3_hw *hw = &hns->hw;
-	struct hns3_tqp_stats *stats = &hw->tqp_stats;
 	struct hns3_cmd_desc desc_reset;
 	struct hns3_rx_queue *rxq;
 	struct hns3_tx_queue *txq;
@@ -561,14 +560,13 @@  hns3_stats_reset(struct rte_eth_dev *eth_dev)
 	int ret;
 
 	/*
-	 * If this is a reset xstats is NULL, and we have cleared the
-	 * registers by reading them.
+	 * Note: Reading hardware statistics of rx/tx queue packet number
+	 * will clear them.
 	 */
 	for (i = 0; i < hw->tqps_num; i++) {
 		hns3_cmd_setup_basic_desc(&desc_reset, HNS3_OPC_QUERY_RX_STATUS,
 					  true);
-		desc_reset.data[0] = rte_cpu_to_le_32((uint32_t)i &
-						      HNS3_QUEUE_ID_MASK);
+		desc_reset.data[0] = rte_cpu_to_le_32((uint32_t)i);
 		ret = hns3_cmd_send(hw, &desc_reset, 1);
 		if (ret) {
 			hns3_err(hw, "Failed to reset RX No.%d queue stat: %d",
@@ -578,8 +576,7 @@  hns3_stats_reset(struct rte_eth_dev *eth_dev)
 
 		hns3_cmd_setup_basic_desc(&desc_reset, HNS3_OPC_QUERY_TX_STATUS,
 					  true);
-		desc_reset.data[0] = rte_cpu_to_le_32((uint32_t)i &
-						      HNS3_QUEUE_ID_MASK);
+		desc_reset.data[0] = rte_cpu_to_le_32((uint32_t)i);
 		ret = hns3_cmd_send(hw, &desc_reset, 1);
 		if (ret) {
 			hns3_err(hw, "Failed to reset TX No.%d queue stat: %d",
@@ -614,7 +611,7 @@  hns3_stats_reset(struct rte_eth_dev *eth_dev)
 		}
 	}
 
-	memset(stats, 0, sizeof(struct hns3_tqp_stats));
+	hns3_tqp_stats_clear(hw);
 
 	return 0;
 }
@@ -668,8 +665,7 @@  hns3_get_queue_stats(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 	/* Get rx queue stats */
 	for (j = 0; j < dev->data->nb_rx_queues; j++) {
 		for (i = 0; i < HNS3_NUM_RX_QUEUE_STATS; i++) {
-			reg_offset = HNS3_TQP_REG_OFFSET +
-					HNS3_TQP_REG_SIZE * j;
+			reg_offset = hns3_get_tqp_reg_offset(j);
 			xstats[*count].value = hns3_read_dev(hw,
 				reg_offset + hns3_rx_queue_strings[i].offset);
 			xstats[*count].id = *count;
@@ -680,8 +676,7 @@  hns3_get_queue_stats(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 	/* Get tx queue stats */
 	for (j = 0; j < dev->data->nb_tx_queues; j++) {
 		for (i = 0; i < HNS3_NUM_TX_QUEUE_STATS; i++) {
-			reg_offset = HNS3_TQP_REG_OFFSET +
-					HNS3_TQP_REG_SIZE * j;
+			reg_offset = hns3_get_tqp_reg_offset(j);
 			xstats[*count].value = hns3_read_dev(hw,
 				reg_offset + hns3_tx_queue_strings[i].offset);
 			xstats[*count].id = *count;
@@ -1071,3 +1066,49 @@  hns3_dev_xstats_reset(struct rte_eth_dev *dev)
 
 	return 0;
 }
+
+int
+hns3_tqp_stats_init(struct hns3_hw *hw)
+{
+	struct hns3_tqp_stats *tqp_stats = &hw->tqp_stats;
+
+	tqp_stats->rcb_rx_ring_pktnum = rte_zmalloc("hns3_rx_ring_pkt_num",
+					 sizeof(uint64_t) * hw->tqps_num, 0);
+	if (tqp_stats->rcb_rx_ring_pktnum == NULL) {
+		hns3_err(hw, "failed to allocate rx_ring pkt_num.");
+		return -ENOMEM;
+	}
+
+	tqp_stats->rcb_tx_ring_pktnum = rte_zmalloc("hns3_tx_ring_pkt_num",
+					 sizeof(uint64_t) * hw->tqps_num, 0);
+	if (tqp_stats->rcb_tx_ring_pktnum == NULL) {
+		hns3_err(hw, "failed to allocate tx_ring pkt_num.");
+		rte_free(tqp_stats->rcb_rx_ring_pktnum);
+		tqp_stats->rcb_rx_ring_pktnum = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void
+hns3_tqp_stats_uninit(struct hns3_hw *hw)
+{
+	struct hns3_tqp_stats *tqp_stats = &hw->tqp_stats;
+
+	rte_free(tqp_stats->rcb_rx_ring_pktnum);
+	tqp_stats->rcb_rx_ring_pktnum = NULL;
+	rte_free(tqp_stats->rcb_tx_ring_pktnum);
+	tqp_stats->rcb_tx_ring_pktnum = NULL;
+}
+
+static void
+hns3_tqp_stats_clear(struct hns3_hw *hw)
+{
+	struct hns3_tqp_stats *stats = &hw->tqp_stats;
+
+	stats->rcb_rx_ring_pktnum_rcd = 0;
+	stats->rcb_tx_ring_pktnum_rcd = 0;
+	memset(stats->rcb_rx_ring_pktnum, 0, sizeof(uint64_t) * hw->tqps_num);
+	memset(stats->rcb_tx_ring_pktnum, 0, sizeof(uint64_t) * hw->tqps_num);
+}
diff --git a/drivers/net/hns3/hns3_stats.h b/drivers/net/hns3/hns3_stats.h
index 07570cb..9fcd5f9 100644
--- a/drivers/net/hns3/hns3_stats.h
+++ b/drivers/net/hns3/hns3_stats.h
@@ -14,8 +14,8 @@ 
 struct hns3_tqp_stats {
 	uint64_t rcb_tx_ring_pktnum_rcd; /* Total num of transmitted packets */
 	uint64_t rcb_rx_ring_pktnum_rcd; /* Total num of received packets */
-	uint64_t rcb_tx_ring_pktnum[HNS3_MAX_TQP_NUM_PER_FUNC];
-	uint64_t rcb_rx_ring_pktnum[HNS3_MAX_TQP_NUM_PER_FUNC];
+	uint64_t *rcb_rx_ring_pktnum;
+	uint64_t *rcb_tx_ring_pktnum;
 };
 
 /* mac stats, Statistics counters collected by the MAC, opcode id: 0x0032 */
@@ -149,5 +149,7 @@  int hns3_dev_xstats_get_names_by_id(struct rte_eth_dev *dev,
 				    uint32_t size);
 int hns3_stats_reset(struct rte_eth_dev *dev);
 void hns3_error_int_stats_add(struct hns3_adapter *hns, const char *err);
+int hns3_tqp_stats_init(struct hns3_hw *hw);
+void hns3_tqp_stats_uninit(struct hns3_hw *hw);
 
 #endif /* _HNS3_STATS_H_ */