From patchwork Mon Sep 7 09:08:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wei Hu (Xavier)" X-Patchwork-Id: 76653 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 90D30A04B9; Mon, 7 Sep 2020 11:09:12 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 82AB91C0D5; Mon, 7 Sep 2020 11:09:07 +0200 (CEST) Received: from mail.chinasoftinc.com (unknown [114.113.233.8]) by dpdk.org (Postfix) with ESMTP id 97D7E1BE85 for ; Mon, 7 Sep 2020 11:09:05 +0200 (CEST) Received: from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020 17:09:01 +0800 From: "Wei Hu (Xavier)" To: CC: Date: Mon, 7 Sep 2020 17:08:18 +0800 Message-ID: <20200907090825.1761-2-huwei013@chinasoftinc.com> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20200907090825.1761-1-huwei013@chinasoftinc.com> References: <20200907090825.1761-1-huwei013@chinasoftinc.com> MIME-Version: 1.0 X-Originating-IP: [65.49.108.226] Subject: [dpdk-dev] [PATCH 1/8] net/hns3: report Rx free threshold X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Wei Hu (Xavier)" This patch reports .rx_free_thresh value in the .dev_infos_get ops implementation function named hns3_dev_infos_get and hns3vf_dev_infos_get. In addition, the name of the member variable of struct hns3_rx_queue is modified and comments are added to improve code readability. Signed-off-by: Chengwen Feng Signed-off-by: Wei Hu (Xavier) --- drivers/net/hns3/hns3_ethdev.c | 2 ++ drivers/net/hns3/hns3_ethdev_vf.c | 2 ++ drivers/net/hns3/hns3_rxtx.c | 30 ++++++++++++------------------ drivers/net/hns3/hns3_rxtx.h | 9 ++++++--- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c index 4d5fa94..6fa34e8 100644 --- a/drivers/net/hns3/hns3_ethdev.c +++ b/drivers/net/hns3/hns3_ethdev.c @@ -2501,12 +2501,14 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) }; info->default_rxconf = (struct rte_eth_rxconf) { + .rx_free_thresh = HNS3_DEFAULT_RX_FREE_THRESH, /* * If there are no available Rx buffer descriptors, incoming * packets are always dropped by hardware based on hns3 network * engine. */ .rx_drop_en = 1, + .offloads = 0, }; info->vmdq_queue_num = 0; diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index 7fd0e6a..7226cc5 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -944,12 +944,14 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) }; info->default_rxconf = (struct rte_eth_rxconf) { + .rx_free_thresh = HNS3_DEFAULT_RX_FREE_THRESH, /* * If there are no available Rx buffer descriptors, incoming * packets are always dropped by hardware based on hns3 network * engine. */ .rx_drop_en = 1, + .offloads = 0, }; info->vmdq_queue_num = 0; diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index 308d0a6..fe2a7a4 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -652,8 +652,7 @@ hns3_dev_rx_queue_start(struct hns3_adapter *hns, uint16_t idx) } rxq->next_to_use = 0; - rxq->next_to_clean = 0; - rxq->nb_rx_hold = 0; + rxq->rx_free_hold = 0; hns3_init_rx_queue_hw(rxq); return 0; @@ -667,8 +666,7 @@ hns3_fake_rx_queue_start(struct hns3_adapter *hns, uint16_t idx) rxq = (struct hns3_rx_queue *)hw->fkq_data.rx_queues[idx]; rxq->next_to_use = 0; - rxq->next_to_clean = 0; - rxq->nb_rx_hold = 0; + rxq->rx_free_hold = 0; hns3_init_rx_queue_hw(rxq); } @@ -1303,10 +1301,8 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, rxq->hns = hns; rxq->mb_pool = mp; - if (conf->rx_free_thresh <= 0) - rxq->rx_free_thresh = DEFAULT_RX_FREE_THRESH; - else - rxq->rx_free_thresh = conf->rx_free_thresh; + rxq->rx_free_thresh = (conf->rx_free_thresh > 0) ? + conf->rx_free_thresh : HNS3_DEFAULT_RX_FREE_THRESH; rxq->rx_deferred_start = conf->rx_deferred_start; rx_entry_len = sizeof(struct hns3_entry) * rxq->nb_rx_desc; @@ -1319,8 +1315,7 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, } rxq->next_to_use = 0; - rxq->next_to_clean = 0; - rxq->nb_rx_hold = 0; + rxq->rx_free_hold = 0; rxq->pkt_first_seg = NULL; rxq->pkt_last_seg = NULL; rxq->port_id = dev->data->port_id; @@ -1656,11 +1651,11 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) nb_rx_bd = 0; rxq = rx_queue; - rx_id = rxq->next_to_clean; + rx_id = rxq->next_to_use; rx_ring = rxq->rx_ring; + sw_ring = rxq->sw_ring; first_seg = rxq->pkt_first_seg; last_seg = rxq->pkt_last_seg; - sw_ring = rxq->sw_ring; while (nb_rx < nb_pkts) { rxdp = &rx_ring[rx_id]; @@ -1843,16 +1838,15 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) first_seg = NULL; } - rxq->next_to_clean = rx_id; + rxq->next_to_use = rx_id; rxq->pkt_first_seg = first_seg; rxq->pkt_last_seg = last_seg; - nb_rx_bd = nb_rx_bd + rxq->nb_rx_hold; - if (nb_rx_bd > rxq->rx_free_thresh) { - hns3_clean_rx_buffers(rxq, nb_rx_bd); - nb_rx_bd = 0; + rxq->rx_free_hold += nb_rx_bd; + if (rxq->rx_free_hold > rxq->rx_free_thresh) { + hns3_clean_rx_buffers(rxq, rxq->rx_free_hold); + rxq->rx_free_hold = 0; } - rxq->nb_rx_hold = nb_rx_bd; return nb_rx; } diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h index 4b3269b..a2d6514 100644 --- a/drivers/net/hns3/hns3_rxtx.h +++ b/drivers/net/hns3/hns3_rxtx.h @@ -10,6 +10,7 @@ #define HNS3_DEFAULT_RING_DESC 1024 #define HNS3_ALIGN_RING_DESC 32 #define HNS3_RING_BASE_ALIGN 128 +#define HNS3_DEFAULT_RX_FREE_THRESH 32 #define HNS3_512_BD_BUF_SIZE 512 #define HNS3_1K_BD_BUF_SIZE 1024 @@ -243,12 +244,14 @@ struct hns3_rx_queue { uint16_t queue_id; uint16_t port_id; uint16_t nb_rx_desc; - uint16_t nb_rx_hold; - uint16_t rx_tail; - uint16_t next_to_clean; uint16_t next_to_use; uint16_t rx_buf_len; + /* + * threshold for the number of BDs waited to passed to hardware. If the + * number exceeds the threshold, driver will pass these BDs to hardware. + */ uint16_t rx_free_thresh; + uint16_t rx_free_hold; /* num of BDs waited to passed to hardware */ /* * port based vlan configuration state. From patchwork Mon Sep 7 09:08:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wei Hu (Xavier)" X-Patchwork-Id: 76654 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 23558A04B9; Mon, 7 Sep 2020 11:09:21 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id BD6D01C0DC; Mon, 7 Sep 2020 11:09:08 +0200 (CEST) Received: from mail.chinasoftinc.com (unknown [114.113.233.8]) by dpdk.org (Postfix) with ESMTP id 9B76C1C0D2 for ; Mon, 7 Sep 2020 11:09:06 +0200 (CEST) Received: from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020 17:09:03 +0800 From: "Wei Hu (Xavier)" To: CC: Date: Mon, 7 Sep 2020 17:08:19 +0800 Message-ID: <20200907090825.1761-3-huwei013@chinasoftinc.com> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20200907090825.1761-1-huwei013@chinasoftinc.com> References: <20200907090825.1761-1-huwei013@chinasoftinc.com> MIME-Version: 1.0 X-Originating-IP: [65.49.108.226] Subject: [dpdk-dev] [PATCH 2/8] net/hns3: reduce address calculation in Rx X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Wei Hu (Xavier)" This patch adds the internal function named hns3_write_reg_opt to avoid performance loss from address calculation during register access in the '.rx_pkt_burst' ops implementation function named hns3_recv_pkts. In addition, because hardware always access register in little-endian mode based on hns3 network engine, so driver should also call rte_cpu_to_le_32 to convert data in little-endian mode before writing register and call rte_le_to_cpu_32 to convert data after reading from register. Here the driver encapsulates the data conversion operation in the register read/write operation function as below: hns3_write_reg hns3_write_reg_opt hns3_read_reg Therefore, when calling these functions, conversion is not required again. Signed-off-by: Chengwen Feng Signed-off-by: Wei Hu (Xavier) --- drivers/net/hns3/hns3_ethdev.h | 29 +++++++++++++++++++++++++++-- drivers/net/hns3/hns3_rxtx.c | 14 +++----------- drivers/net/hns3/hns3_rxtx.h | 1 + 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h index 9e49e28..3cb0535 100644 --- a/drivers/net/hns3/hns3_ethdev.h +++ b/drivers/net/hns3/hns3_ethdev.h @@ -708,14 +708,39 @@ struct hns3_adapter { #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +/* + * Because hardware always access register in little-endian mode based on hns3 + * network engine, so driver should also call rte_cpu_to_le_32 to convert data + * in little-endian mode before writing register and call rte_le_to_cpu_32 to + * convert data after reading from register. + * + * Here the driver encapsulates the data conversion operation in the register + * read/write operation function as below: + * hns3_write_reg + * hns3_write_reg_opt + * hns3_read_reg + * Therefore, when calling these functions, conversion is not required again. + */ static inline void hns3_write_reg(void *base, uint32_t reg, uint32_t value) { - rte_write32(value, (volatile void *)((char *)base + reg)); + rte_write32(rte_cpu_to_le_32(value), + (volatile void *)((char *)base + reg)); +} + +/* + * The optimized function for writing registers used in the '.rx_pkt_burst' and + * '.tx_pkt_burst' ops implementation function. + */ +static inline void hns3_write_reg_opt(volatile void *addr, uint32_t value) +{ + rte_io_wmb(); + rte_write32_relaxed(rte_cpu_to_le_32(value), addr); } static inline uint32_t hns3_read_reg(void *base, uint32_t reg) { - return rte_read32((volatile void *)((char *)base + reg)); + uint32_t read_val = rte_read32((volatile void *)((char *)base + reg)); + return rte_le_to_cpu_32(read_val); } #define hns3_write_dev(a, reg, value) \ diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index fe2a7a4..703b12a 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -1323,6 +1323,8 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, rxq->configured = true; rxq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET + idx * HNS3_TQP_REG_SIZE); + rxq->io_head_reg = (volatile void *)((char *)rxq->io_base + + HNS3_RING_RX_HEAD_REG); rxq->rx_buf_len = rx_buf_size; rxq->l2_errors = 0; rxq->pkt_len_errors = 0; @@ -1472,16 +1474,6 @@ hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev) return NULL; } -static void -hns3_clean_rx_buffers(struct hns3_rx_queue *rxq, int count) -{ - rxq->next_to_use += count; - if (rxq->next_to_use >= rxq->nb_rx_desc) - rxq->next_to_use -= rxq->nb_rx_desc; - - hns3_write_dev(rxq, HNS3_RING_RX_HEAD_REG, count); -} - static int hns3_handle_bdinfo(struct hns3_rx_queue *rxq, struct rte_mbuf *rxm, uint32_t bd_base_info, uint32_t l234_info, @@ -1844,7 +1836,7 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxq->rx_free_hold += nb_rx_bd; if (rxq->rx_free_hold > rxq->rx_free_thresh) { - hns3_clean_rx_buffers(rxq, rxq->rx_free_hold); + hns3_write_reg_opt(rxq->io_head_reg, rxq->rx_free_hold); rxq->rx_free_hold = 0; } diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h index a2d6514..c1a34e2 100644 --- a/drivers/net/hns3/hns3_rxtx.h +++ b/drivers/net/hns3/hns3_rxtx.h @@ -231,6 +231,7 @@ struct hns3_entry { struct hns3_rx_queue { void *io_base; + volatile void *io_head_reg; struct hns3_adapter *hns; struct rte_mempool *mb_pool; struct hns3_desc *rx_ring; From patchwork Mon Sep 7 09:08:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wei Hu (Xavier)" X-Patchwork-Id: 76655 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 4A395A04B9; Mon, 7 Sep 2020 11:09:30 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 037D01C0BE; Mon, 7 Sep 2020 11:09:14 +0200 (CEST) Received: from mail.chinasoftinc.com (unknown [114.113.233.8]) by dpdk.org (Postfix) with ESMTP id 7E7B41C10F for ; Mon, 7 Sep 2020 11:09:10 +0200 (CEST) Received: from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020 17:09:05 +0800 From: "Wei Hu (Xavier)" To: CC: Date: Mon, 7 Sep 2020 17:08:20 +0800 Message-ID: <20200907090825.1761-4-huwei013@chinasoftinc.com> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20200907090825.1761-1-huwei013@chinasoftinc.com> References: <20200907090825.1761-1-huwei013@chinasoftinc.com> MIME-Version: 1.0 X-Originating-IP: [65.49.108.226] Subject: [dpdk-dev] [PATCH 3/8] net/hns3: add simple Rx process function X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Wei Hu (Xavier)" This patch adds simple Rx process function and support chose Rx function by real Rx offloads capability. Signed-off-by: Chengwen Feng Signed-off-by: Wei Hu (Xavier) Signed-off-by: Huisong Li --- drivers/net/hns3/hns3_ethdev.c | 7 +- drivers/net/hns3/hns3_ethdev.h | 21 ++ drivers/net/hns3/hns3_ethdev_vf.c | 11 +- drivers/net/hns3/hns3_rxtx.c | 538 +++++++++++++++++++++++--------------- drivers/net/hns3/hns3_rxtx.h | 130 ++++++++- 5 files changed, 489 insertions(+), 218 deletions(-) diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c index 6fa34e8..5d612f1 100644 --- a/drivers/net/hns3/hns3_ethdev.c +++ b/drivers/net/hns3/hns3_ethdev.c @@ -2351,6 +2351,8 @@ hns3_dev_configure(struct rte_eth_dev *dev) if (ret) goto cfg_err; + hns->rx_simple_allowed = true; + hns3_init_rx_ptype_tble(dev); hw->adapter_state = HNS3_NIC_CONFIGURED; return 0; @@ -4746,6 +4748,7 @@ hns3_dev_start(struct rte_eth_dev *dev) hw->adapter_state = HNS3_NIC_STARTED; rte_spinlock_unlock(&hw->lock); + hns3_rx_scattered_calc(dev); hns3_set_rxtx_function(dev); hns3_mp_req_start_rxtx(dev); rte_eal_alarm_set(HNS3_SERVICE_INTERVAL, hns3_service_handler, dev); @@ -4844,6 +4847,7 @@ hns3_dev_stop(struct rte_eth_dev *dev) hns3_dev_release_mbufs(hns); hw->adapter_state = HNS3_NIC_CONFIGURED; } + hns3_rx_scattered_reset(dev); rte_eal_alarm_cancel(hns3_service_handler, dev); rte_spinlock_unlock(&hw->lock); } @@ -5514,6 +5518,7 @@ hns3_reset_service(void *param) } static const struct eth_dev_ops hns3_eth_dev_ops = { + .dev_configure = hns3_dev_configure, .dev_start = hns3_dev_start, .dev_stop = hns3_dev_stop, .dev_close = hns3_dev_close, @@ -5539,7 +5544,7 @@ static const struct eth_dev_ops hns3_eth_dev_ops = { .rx_queue_intr_disable = hns3_dev_rx_queue_intr_disable, .rxq_info_get = hns3_rxq_info_get, .txq_info_get = hns3_txq_info_get, - .dev_configure = hns3_dev_configure, + .rx_burst_mode_get = hns3_rx_burst_mode_get, .flow_ctrl_get = hns3_flow_ctrl_get, .flow_ctrl_set = hns3_flow_ctrl_set, .priority_flow_ctrl_set = hns3_priority_flow_ctrl_set, diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h index 3cb0535..d93c5b2 100644 --- a/drivers/net/hns3/hns3_ethdev.h +++ b/drivers/net/hns3/hns3_ethdev.h @@ -433,6 +433,7 @@ struct hns3_hw { uint16_t tqps_num; /* num task queue pairs of this function */ uint16_t intr_tqps_num; /* num queue pairs mapping interrupt */ uint16_t rss_size_max; /* HW defined max RSS task queue */ + uint16_t rx_buf_len; /* hold min hardware rx buf len */ uint16_t num_tx_desc; /* desc num of per tx queue */ uint16_t num_rx_desc; /* desc num of per rx queue */ uint32_t mng_entry_num; /* number of manager table entry */ @@ -575,6 +576,23 @@ struct hns3_mp_param { /* Key string for IPC. */ #define HNS3_MP_NAME "net_hns3_mp" +#define HNS3_L2TBL_NUM 4 +#define HNS3_L3TBL_NUM 16 +#define HNS3_L4TBL_NUM 16 +#define HNS3_OL3TBL_NUM 16 +#define HNS3_OL4TBL_NUM 16 + +struct hns3_ptype_table { + uint32_t l2table[HNS3_L2TBL_NUM]; + uint32_t l3table[HNS3_L3TBL_NUM]; + uint32_t l4table[HNS3_L4TBL_NUM]; + uint32_t inner_l2table[HNS3_L2TBL_NUM]; + uint32_t inner_l3table[HNS3_L3TBL_NUM]; + uint32_t inner_l4table[HNS3_L4TBL_NUM]; + uint32_t ol3table[HNS3_OL3TBL_NUM]; + uint32_t ol4table[HNS3_OL4TBL_NUM]; +}; + struct hns3_pf { struct hns3_adapter *adapter; bool is_main_pf; @@ -623,6 +641,9 @@ struct hns3_adapter { struct hns3_pf pf; struct hns3_vf vf; }; + + bool rx_simple_allowed; + struct hns3_ptype_table ptype_tbl __rte_cache_min_aligned; }; #define HNS3_DEV_SUPPORT_DCB_B 0x0 diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index 7226cc5..0f155d8 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -745,7 +745,8 @@ hns3vf_init_ring_with_vector(struct hns3_hw *hw) static int hns3vf_dev_configure(struct rte_eth_dev *dev) { - struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; struct hns3_rss_conf *rss_cfg = &hw->rss_info; struct rte_eth_conf *conf = &dev->data->dev_conf; enum rte_eth_rx_mq_mode mq_mode = conf->rxmode.mq_mode; @@ -820,6 +821,9 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) if (ret) goto cfg_err; + hns->rx_simple_allowed = true; + hns3_init_rx_ptype_tble(dev); + hw->adapter_state = HNS3_NIC_CONFIGURED; return 0; @@ -1875,6 +1879,7 @@ hns3vf_dev_stop(struct rte_eth_dev *dev) hns3_dev_release_mbufs(hns); hw->adapter_state = HNS3_NIC_CONFIGURED; } + hns3_rx_scattered_reset(dev); rte_eal_alarm_cancel(hns3vf_service_handler, dev); rte_spinlock_unlock(&hw->lock); } @@ -2111,6 +2116,7 @@ hns3vf_dev_start(struct rte_eth_dev *dev) hw->adapter_state = HNS3_NIC_STARTED; rte_spinlock_unlock(&hw->lock); + hns3_rx_scattered_calc(dev); hns3_set_rxtx_function(dev); hns3_mp_req_start_rxtx(dev); rte_eal_alarm_set(HNS3VF_SERVICE_INTERVAL, hns3vf_service_handler, dev); @@ -2508,6 +2514,7 @@ hns3vf_reinit_dev(struct hns3_adapter *hns) } static const struct eth_dev_ops hns3vf_eth_dev_ops = { + .dev_configure = hns3vf_dev_configure, .dev_start = hns3vf_dev_start, .dev_stop = hns3vf_dev_stop, .dev_close = hns3vf_dev_close, @@ -2533,7 +2540,7 @@ static const struct eth_dev_ops hns3vf_eth_dev_ops = { .rx_queue_intr_disable = hns3_dev_rx_queue_intr_disable, .rxq_info_get = hns3_rxq_info_get, .txq_info_get = hns3_txq_info_get, - .dev_configure = hns3vf_dev_configure, + .rx_burst_mode_get = hns3_rx_burst_mode_get, .mac_addr_add = hns3vf_add_mac_addr, .mac_addr_remove = hns3vf_remove_mac_addr, .mac_addr_set = hns3vf_set_default_mac_addr, diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index 703b12a..38ad454 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -30,7 +30,7 @@ #include "hns3_logs.h" #define HNS3_CFG_DESC_NUM(num) ((num) / 8 - 1) -#define DEFAULT_RX_FREE_THRESH 32 +#define HNS3_RX_RING_PREFECTH_MASK 3 static void hns3_rx_queue_release_mbufs(struct hns3_rx_queue *rxq) @@ -38,13 +38,20 @@ hns3_rx_queue_release_mbufs(struct hns3_rx_queue *rxq) uint16_t i; /* Note: Fake rx queue will not enter here */ - if (rxq->sw_ring) { - for (i = 0; i < rxq->nb_rx_desc; i++) { - if (rxq->sw_ring[i].mbuf) { - rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); - rxq->sw_ring[i].mbuf = NULL; - } - } + if (rxq->sw_ring == NULL) + return; + + for (i = 0; i < rxq->nb_rx_desc; i++) + if (rxq->sw_ring[i].mbuf) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + + for (i = 0; i < rxq->bulk_mbuf_num; i++) + rte_pktmbuf_free_seg(rxq->bulk_mbuf[i]); + rxq->bulk_mbuf_num = 0; + + if (rxq->pkt_first_seg) { + rte_pktmbuf_free(rxq->pkt_first_seg); + rxq->pkt_first_seg = NULL; } } @@ -653,6 +660,8 @@ hns3_dev_rx_queue_start(struct hns3_adapter *hns, uint16_t idx) rxq->next_to_use = 0; rxq->rx_free_hold = 0; + rxq->pkt_first_seg = NULL; + rxq->pkt_last_seg = NULL; hns3_init_rx_queue_hw(rxq); return 0; @@ -1243,6 +1252,33 @@ hns3_rx_buf_len_calc(struct rte_mempool *mp, uint16_t *rx_buf_len) return 0; } +static int +hns3_rx_queue_conf_check(struct hns3_hw *hw, const struct rte_eth_rxconf *conf, + struct rte_mempool *mp, uint16_t nb_desc, + uint16_t *buf_size) +{ + if (nb_desc > HNS3_MAX_RING_DESC || nb_desc < HNS3_MIN_RING_DESC || + nb_desc % HNS3_ALIGN_RING_DESC) { + hns3_err(hw, "Number (%u) of rx descriptors is invalid", + nb_desc); + return -EINVAL; + } + + if (conf->rx_drop_en == 0) + hns3_warn(hw, "if no descriptors available, packets are always " + "dropped and rx_drop_en (1) is fixed on"); + + if (hns3_rx_buf_len_calc(mp, buf_size)) { + hns3_err(hw, "rxq mbufs' data room size (%u) is not enough! " + "minimal data room size (%u).", + rte_pktmbuf_data_room_size(mp), + HNS3_MIN_BD_BUF_SIZE + RTE_PKTMBUF_HEADROOM); + return -EINVAL; + } + + return 0; +} + int hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, unsigned int socket_id, const struct rte_eth_rxconf *conf, @@ -1254,24 +1290,16 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, struct hns3_rx_queue *rxq; uint16_t rx_buf_size; int rx_entry_len; + int ret; if (dev->data->dev_started) { hns3_err(hw, "rx_queue_setup after dev_start no supported"); return -EINVAL; } - if (nb_desc > HNS3_MAX_RING_DESC || nb_desc < HNS3_MIN_RING_DESC || - nb_desc % HNS3_ALIGN_RING_DESC) { - hns3_err(hw, "Number (%u) of rx descriptors is invalid", - nb_desc); - return -EINVAL; - } - - if (conf->rx_drop_en == 0) - hns3_warn(hw, "if there are no available Rx descriptors," - "incoming packets are always dropped. input parameter" - " conf->rx_drop_en(%u) is uneffective.", - conf->rx_drop_en); + ret = hns3_rx_queue_conf_check(hw, conf, mp, nb_desc, &rx_buf_size); + if (ret) + return ret; if (dev->data->rx_queues[idx]) { hns3_rx_queue_release(dev->data->rx_queues[idx]); @@ -1284,14 +1312,6 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, q_info.type = "hns3 RX queue"; q_info.ring_name = "rx_ring"; - if (hns3_rx_buf_len_calc(mp, &rx_buf_size)) { - hns3_err(hw, "rxq mbufs' data room size:%u is not enough! " - "minimal data room size:%u.", - rte_pktmbuf_data_room_size(mp), - HNS3_MIN_BD_BUF_SIZE + RTE_PKTMBUF_HEADROOM); - return -EINVAL; - } - rxq = hns3_alloc_rxq_and_dma_zone(dev, &q_info); if (rxq == NULL) { hns3_err(hw, @@ -1300,6 +1320,7 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, } rxq->hns = hns; + rxq->ptype_tbl = &hns->ptype_tbl; rxq->mb_pool = mp; rxq->rx_free_thresh = (conf->rx_free_thresh > 0) ? conf->rx_free_thresh : HNS3_DEFAULT_RX_FREE_THRESH; @@ -1339,6 +1360,8 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, else rxq->crc_len = 0; + rxq->bulk_mbuf_num = 0; + rte_spinlock_lock(&hw->lock); dev->data->rx_queues[idx] = rxq; rte_spinlock_unlock(&hw->lock); @@ -1346,104 +1369,40 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, return 0; } -static inline uint32_t -rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint32_t ol_info) +void +hns3_rx_scattered_reset(struct rte_eth_dev *dev) { -#define HNS3_L2TBL_NUM 4 -#define HNS3_L3TBL_NUM 16 -#define HNS3_L4TBL_NUM 16 -#define HNS3_OL3TBL_NUM 16 -#define HNS3_OL4TBL_NUM 16 - uint32_t pkt_type = 0; - uint32_t l2id, l3id, l4id; - uint32_t ol3id, ol4id; - - static const uint32_t l2table[HNS3_L2TBL_NUM] = { - RTE_PTYPE_L2_ETHER, - RTE_PTYPE_L2_ETHER_QINQ, - RTE_PTYPE_L2_ETHER_VLAN, - RTE_PTYPE_L2_ETHER_VLAN - }; - - static const uint32_t l3table[HNS3_L3TBL_NUM] = { - RTE_PTYPE_L3_IPV4, - RTE_PTYPE_L3_IPV6, - RTE_PTYPE_L2_ETHER_ARP, - RTE_PTYPE_L2_ETHER, - RTE_PTYPE_L3_IPV4_EXT, - RTE_PTYPE_L3_IPV6_EXT, - RTE_PTYPE_L2_ETHER_LLDP, - 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - - static const uint32_t l4table[HNS3_L4TBL_NUM] = { - RTE_PTYPE_L4_UDP, - RTE_PTYPE_L4_TCP, - RTE_PTYPE_TUNNEL_GRE, - RTE_PTYPE_L4_SCTP, - RTE_PTYPE_L4_IGMP, - RTE_PTYPE_L4_ICMP, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - - static const uint32_t inner_l2table[HNS3_L2TBL_NUM] = { - RTE_PTYPE_INNER_L2_ETHER, - RTE_PTYPE_INNER_L2_ETHER_VLAN, - RTE_PTYPE_INNER_L2_ETHER_QINQ, - 0 - }; + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; - static const uint32_t inner_l3table[HNS3_L3TBL_NUM] = { - RTE_PTYPE_INNER_L3_IPV4, - RTE_PTYPE_INNER_L3_IPV6, - 0, - RTE_PTYPE_INNER_L2_ETHER, - RTE_PTYPE_INNER_L3_IPV4_EXT, - RTE_PTYPE_INNER_L3_IPV6_EXT, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; + hw->rx_buf_len = 0; + dev->data->scattered_rx = false; +} - static const uint32_t inner_l4table[HNS3_L4TBL_NUM] = { - RTE_PTYPE_INNER_L4_UDP, - RTE_PTYPE_INNER_L4_TCP, - RTE_PTYPE_TUNNEL_GRE, - RTE_PTYPE_INNER_L4_SCTP, - RTE_PTYPE_L4_IGMP, - RTE_PTYPE_INNER_L4_ICMP, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; +void +hns3_rx_scattered_calc(struct rte_eth_dev *dev) +{ + struct rte_eth_conf *dev_conf = &dev->data->dev_conf; + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; + struct hns3_rx_queue *rxq; + uint32_t queue_id; - static const uint32_t ol3table[HNS3_OL3TBL_NUM] = { - RTE_PTYPE_L3_IPV4, - RTE_PTYPE_L3_IPV6, - 0, 0, - RTE_PTYPE_L3_IPV4_EXT, - RTE_PTYPE_L3_IPV6_EXT, - 0, 0, 0, 0, 0, 0, 0, 0, 0, - RTE_PTYPE_UNKNOWN - }; + if (dev->data->rx_queues == NULL) + return; - static const uint32_t ol4table[HNS3_OL4TBL_NUM] = { - 0, - RTE_PTYPE_TUNNEL_VXLAN, - RTE_PTYPE_TUNNEL_NVGRE, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; + for (queue_id = 0; queue_id < dev->data->nb_rx_queues; queue_id++) { + rxq = dev->data->rx_queues[queue_id]; + if (hw->rx_buf_len == 0) + hw->rx_buf_len = rxq->rx_buf_len; + else + hw->rx_buf_len = RTE_MIN(hw->rx_buf_len, + rxq->rx_buf_len); + } - l2id = hns3_get_field(pkt_info, HNS3_RXD_STRP_TAGP_M, - HNS3_RXD_STRP_TAGP_S); - l3id = hns3_get_field(pkt_info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S); - l4id = hns3_get_field(pkt_info, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S); - ol3id = hns3_get_field(ol_info, HNS3_RXD_OL3ID_M, HNS3_RXD_OL3ID_S); - ol4id = hns3_get_field(ol_info, HNS3_RXD_OL4ID_M, HNS3_RXD_OL4ID_S); - - if (ol4table[ol4id]) - pkt_type |= (inner_l2table[l2id] | inner_l3table[l3id] | - inner_l4table[l4id] | ol3table[ol3id] | - ol4table[ol4id]); - else - pkt_type |= (l2table[l2id] | l3table[l3id] | l4table[l4id]); - return pkt_type; + if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_SCATTER || + dev_conf->rxmode.max_rx_pkt_len > hw->rx_buf_len) + dev->data->scattered_rx = true; } const uint32_t * @@ -1468,81 +1427,69 @@ hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev) RTE_PTYPE_UNKNOWN }; - if (dev->rx_pkt_burst == hns3_recv_pkts) + if (dev->rx_pkt_burst == hns3_recv_pkts || + dev->rx_pkt_burst == hns3_recv_scattered_pkts) return ptypes; return NULL; } -static int -hns3_handle_bdinfo(struct hns3_rx_queue *rxq, struct rte_mbuf *rxm, - uint32_t bd_base_info, uint32_t l234_info, - uint32_t *cksum_err) +void +hns3_init_rx_ptype_tble(struct rte_eth_dev *dev) { - uint32_t tmp = 0; - - if (unlikely(l234_info & BIT(HNS3_RXD_L2E_B))) { - rxq->l2_errors++; - return -EINVAL; - } - - if (unlikely(rxm->pkt_len == 0 || - (l234_info & BIT(HNS3_RXD_TRUNCAT_B)))) { - rxq->pkt_len_errors++; - return -EINVAL; - } - - if (bd_base_info & BIT(HNS3_RXD_L3L4P_B)) { - if (unlikely(l234_info & BIT(HNS3_RXD_L3E_B))) { - rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; - rxq->l3_csum_erros++; - tmp |= HNS3_L3_CKSUM_ERR; - } - - if (unlikely(l234_info & BIT(HNS3_RXD_L4E_B))) { - rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; - rxq->l4_csum_erros++; - tmp |= HNS3_L4_CKSUM_ERR; - } - - if (unlikely(l234_info & BIT(HNS3_RXD_OL3E_B))) { - rxq->ol3_csum_erros++; - tmp |= HNS3_OUTER_L3_CKSUM_ERR; - } - - if (unlikely(l234_info & BIT(HNS3_RXD_OL4E_B))) { - rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD; - rxq->ol4_csum_erros++; - tmp |= HNS3_OUTER_L4_CKSUM_ERR; - } - } - *cksum_err = tmp; - - return 0; -} - -static void -hns3_rx_set_cksum_flag(struct rte_mbuf *rxm, uint64_t packet_type, - const uint32_t cksum_err) -{ - if (unlikely((packet_type & RTE_PTYPE_TUNNEL_MASK))) { - if (likely(packet_type & RTE_PTYPE_INNER_L3_MASK) && - (cksum_err & HNS3_L3_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD; - if (likely(packet_type & RTE_PTYPE_INNER_L4_MASK) && - (cksum_err & HNS3_L4_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD; - if (likely(packet_type & RTE_PTYPE_L4_MASK) && - (cksum_err & HNS3_OUTER_L4_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD; - } else { - if (likely(packet_type & RTE_PTYPE_L3_MASK) && - (cksum_err & HNS3_L3_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD; - if (likely(packet_type & RTE_PTYPE_L4_MASK) && - (cksum_err & HNS3_L4_CKSUM_ERR) == 0) - rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD; - } + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_ptype_table *tbl = &hns->ptype_tbl; + + memset(tbl, 0, sizeof(*tbl)); + + tbl->l2table[0] = RTE_PTYPE_L2_ETHER; + tbl->l2table[1] = RTE_PTYPE_L2_ETHER_QINQ; + tbl->l2table[2] = RTE_PTYPE_L2_ETHER_VLAN; + tbl->l2table[3] = RTE_PTYPE_L2_ETHER_VLAN; + + tbl->l3table[0] = RTE_PTYPE_L3_IPV4; + tbl->l3table[1] = RTE_PTYPE_L3_IPV6; + tbl->l3table[2] = RTE_PTYPE_L2_ETHER_ARP; + tbl->l3table[3] = RTE_PTYPE_L2_ETHER; + tbl->l3table[4] = RTE_PTYPE_L3_IPV4_EXT; + tbl->l3table[5] = RTE_PTYPE_L3_IPV6_EXT; + tbl->l3table[6] = RTE_PTYPE_L2_ETHER_LLDP; + + tbl->l4table[0] = RTE_PTYPE_L4_UDP; + tbl->l4table[1] = RTE_PTYPE_L4_TCP; + tbl->l4table[2] = RTE_PTYPE_TUNNEL_GRE; + tbl->l4table[3] = RTE_PTYPE_L4_SCTP; + tbl->l4table[4] = RTE_PTYPE_L4_IGMP; + tbl->l4table[5] = RTE_PTYPE_L4_ICMP; + + tbl->inner_l2table[0] = RTE_PTYPE_INNER_L2_ETHER; + tbl->inner_l2table[1] = RTE_PTYPE_INNER_L2_ETHER_VLAN; + tbl->inner_l2table[2] = RTE_PTYPE_INNER_L2_ETHER_QINQ; + + tbl->inner_l3table[0] = RTE_PTYPE_INNER_L3_IPV4; + tbl->inner_l3table[1] = RTE_PTYPE_INNER_L3_IPV6; + tbl->inner_l3table[2] = 0; + tbl->inner_l3table[3] = RTE_PTYPE_INNER_L2_ETHER; + tbl->inner_l3table[4] = RTE_PTYPE_INNER_L3_IPV4_EXT; + tbl->inner_l3table[5] = RTE_PTYPE_INNER_L3_IPV6_EXT; + + tbl->inner_l4table[0] = RTE_PTYPE_INNER_L4_UDP; + tbl->inner_l4table[1] = RTE_PTYPE_INNER_L4_TCP; + tbl->inner_l4table[2] = RTE_PTYPE_TUNNEL_GRE; + tbl->inner_l4table[3] = RTE_PTYPE_INNER_L4_SCTP; + tbl->inner_l4table[4] = RTE_PTYPE_L4_IGMP; + tbl->inner_l4table[5] = RTE_PTYPE_INNER_L4_ICMP; + + tbl->ol3table[0] = RTE_PTYPE_L3_IPV4; + tbl->ol3table[1] = RTE_PTYPE_L3_IPV6; + tbl->ol3table[2] = 0; + tbl->ol3table[3] = 0; + tbl->ol3table[4] = RTE_PTYPE_L3_IPV4_EXT; + tbl->ol3table[5] = RTE_PTYPE_L3_IPV6_EXT; + + tbl->ol4table[0] = 0; + tbl->ol4table[1] = RTE_PTYPE_TUNNEL_VXLAN; + tbl->ol4table[2] = RTE_PTYPE_TUNNEL_NVGRE; } static inline void @@ -1612,6 +1559,23 @@ recalculate_data_len(struct rte_mbuf *first_seg, struct rte_mbuf *last_seg, rxm->data_len = (uint16_t)(data_len - crc_len); } +static inline struct rte_mbuf * +hns3_rx_alloc_buffer(struct hns3_rx_queue *rxq) +{ + int ret; + + if (likely(rxq->bulk_mbuf_num > 0)) + return rxq->bulk_mbuf[--rxq->bulk_mbuf_num]; + + ret = rte_mempool_get_bulk(rxq->mb_pool, (void **)rxq->bulk_mbuf, + HNS3_BULK_ALLOC_MBUF_NUM); + if (likely(ret == 0)) { + rxq->bulk_mbuf_num = HNS3_BULK_ALLOC_MBUF_NUM; + return rxq->bulk_mbuf[--rxq->bulk_mbuf_num]; + } else + return rte_mbuf_raw_alloc(rxq->mb_pool); +} + uint16_t hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { @@ -1620,6 +1584,119 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) struct hns3_rx_queue *rxq; /* RX queue */ struct hns3_entry *sw_ring; struct hns3_entry *rxe; + struct hns3_desc rxd; + struct rte_mbuf *nmb; /* pointer of the new mbuf */ + struct rte_mbuf *rxm; + uint32_t bd_base_info; + uint32_t cksum_err; + uint32_t l234_info; + uint32_t ol_info; + uint64_t dma_addr; + uint16_t nb_rx_bd; + uint16_t nb_rx; + uint16_t rx_id; + int ret; + + nb_rx = 0; + nb_rx_bd = 0; + rxq = rx_queue; + rx_ring = rxq->rx_ring; + sw_ring = rxq->sw_ring; + rx_id = rxq->next_to_use; + + while (nb_rx < nb_pkts) { + rxdp = &rx_ring[rx_id]; + bd_base_info = rte_le_to_cpu_32(rxdp->rx.bd_base_info); + if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) + break; + + rxd = rxdp[(bd_base_info & (1u << HNS3_RXD_VLD_B)) - + (1u << HNS3_RXD_VLD_B)]; + + nmb = hns3_rx_alloc_buffer(rxq); + if (unlikely(nmb == NULL)) { + uint16_t port_id; + + port_id = rxq->port_id; + rte_eth_devices[port_id].data->rx_mbuf_alloc_failed++; + break; + } + + nb_rx_bd++; + rxe = &sw_ring[rx_id]; + rx_id++; + if (unlikely(rx_id == rxq->nb_rx_desc)) + rx_id = 0; + + rte_prefetch0(sw_ring[rx_id].mbuf); + if ((rx_id & HNS3_RX_RING_PREFECTH_MASK) == 0) { + rte_prefetch0(&rx_ring[rx_id]); + rte_prefetch0(&sw_ring[rx_id]); + } + + rxm = rxe->mbuf; + rxe->mbuf = nmb; + + dma_addr = rte_mbuf_data_iova_default(nmb); + rxdp->addr = rte_cpu_to_le_64(dma_addr); + rxdp->rx.bd_base_info = 0; + + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rxm->pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.pkt_len)) - + rxq->crc_len; + rxm->data_len = rxm->pkt_len; + rxm->port = rxq->port_id; + rxm->hash.rss = rte_le_to_cpu_32(rxd.rx.rss_hash); + rxm->ol_flags = PKT_RX_RSS_HASH; + if (unlikely(bd_base_info & BIT(HNS3_RXD_LUM_B))) { + rxm->hash.fdir.hi = + rte_le_to_cpu_16(rxd.rx.fd_id); + rxm->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; + } + rxm->nb_segs = 1; + rxm->next = NULL; + + /* Load remained descriptor data and extract necessary fields */ + l234_info = rte_le_to_cpu_32(rxd.rx.l234_info); + ol_info = rte_le_to_cpu_32(rxd.rx.ol_info); + ret = hns3_handle_bdinfo(rxq, rxm, bd_base_info, + l234_info, &cksum_err); + if (unlikely(ret)) + goto pkt_err; + + rxm->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info); + + if (likely(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) + hns3_rx_set_cksum_flag(rxm, rxm->packet_type, + cksum_err); + hns3_rxd_to_vlan_tci(rxq, rxm, l234_info, &rxd); + + rx_pkts[nb_rx++] = rxm; + continue; +pkt_err: + rte_pktmbuf_free(rxm); + } + + rxq->next_to_use = rx_id; + rxq->rx_free_hold += nb_rx_bd; + if (rxq->rx_free_hold > rxq->rx_free_thresh) { + hns3_write_reg_opt(rxq->io_head_reg, rxq->rx_free_hold); + rxq->rx_free_hold = 0; + } + + return nb_rx; +} + +uint16_t +hns3_recv_scattered_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + volatile struct hns3_desc *rx_ring; /* RX ring (desc) */ + volatile struct hns3_desc *rxdp; /* pointer of the current desc */ + struct hns3_rx_queue *rxq; /* RX queue */ + struct hns3_entry *sw_ring; + struct hns3_entry *rxe; struct rte_mbuf *first_seg; struct rte_mbuf *last_seg; struct hns3_desc rxd; @@ -1632,9 +1709,7 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) uint32_t gro_size; uint32_t ol_info; uint64_t dma_addr; - uint16_t data_len; uint16_t nb_rx_bd; - uint16_t pkt_len; uint16_t nb_rx; uint16_t rx_id; int ret; @@ -1652,8 +1727,9 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) while (nb_rx < nb_pkts) { rxdp = &rx_ring[rx_id]; bd_base_info = rte_le_to_cpu_32(rxdp->rx.bd_base_info); - if (unlikely(!hns3_get_bit(bd_base_info, HNS3_RXD_VLD_B))) + if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) break; + /* * The interactive process between software and hardware of * receiving a new packet in hns3 network engine: @@ -1716,7 +1792,7 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxd = rxdp[(bd_base_info & (1u << HNS3_RXD_VLD_B)) - (1u << HNS3_RXD_VLD_B)]; - nmb = rte_mbuf_raw_alloc(rxq->mb_pool); + nmb = hns3_rx_alloc_buffer(rxq); if (unlikely(nmb == NULL)) { dev = &rte_eth_devices[rxq->port_id]; dev->data->rx_mbuf_alloc_failed++; @@ -1730,7 +1806,7 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rx_id = 0; rte_prefetch0(sw_ring[rx_id].mbuf); - if ((rx_id & 0x3) == 0) { + if ((rx_id & HNS3_RX_RING_PREFECTH_MASK) == 0) { rte_prefetch0(&rx_ring[rx_id]); rte_prefetch0(&sw_ring[rx_id]); } @@ -1742,15 +1818,6 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxdp->rx.bd_base_info = 0; rxdp->addr = dma_addr; - /* - * Load remained descriptor data and extract necessary fields. - * Data size from buffer description may contains CRC len, - * packet len should subtract it. - */ - data_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.size)); - l234_info = rte_le_to_cpu_32(rxd.rx.l234_info); - ol_info = rte_le_to_cpu_32(rxd.rx.ol_info); - if (first_seg == NULL) { first_seg = rxm; first_seg->nb_segs = 1; @@ -1760,10 +1827,11 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) } rxm->data_off = RTE_PKTMBUF_HEADROOM; - rxm->data_len = data_len; + rxm->data_len = rte_le_to_cpu_16(rxd.rx.size); - if (!hns3_get_bit(bd_base_info, HNS3_RXD_FE_B)) { + if (!(bd_base_info & BIT(HNS3_RXD_FE_B))) { last_seg = rxm; + rxm->next = NULL; continue; } @@ -1772,8 +1840,7 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) * buffer description may contains CRC len, packet len should * subtract it, same as data len. */ - pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.pkt_len)); - first_seg->pkt_len = pkt_len; + first_seg->pkt_len = rte_le_to_cpu_16(rxd.rx.pkt_len); /* * This is the last buffer of the received packet. If the CRC @@ -1789,15 +1856,15 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) if (unlikely(rxq->crc_len > 0)) { first_seg->pkt_len -= rxq->crc_len; recalculate_data_len(first_seg, last_seg, rxm, rxq, - data_len); + rxm->data_len); } first_seg->port = rxq->port_id; first_seg->hash.rss = rte_le_to_cpu_32(rxd.rx.rss_hash); first_seg->ol_flags = PKT_RX_RSS_HASH; - if (unlikely(hns3_get_bit(bd_base_info, HNS3_RXD_LUM_B))) { + if (unlikely(bd_base_info & BIT(HNS3_RXD_LUM_B))) { first_seg->hash.fdir.hi = - rte_le_to_cpu_32(rxd.rx.fd_id); + rte_le_to_cpu_16(rxd.rx.fd_id); first_seg->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; } @@ -1808,13 +1875,15 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) first_seg->tso_segsz = gro_size; } + l234_info = rte_le_to_cpu_32(rxd.rx.l234_info); + ol_info = rte_le_to_cpu_32(rxd.rx.ol_info); ret = hns3_handle_bdinfo(rxq, first_seg, bd_base_info, l234_info, &cksum_err); if (unlikely(ret)) goto pkt_err; - first_seg->packet_type = rxd_pkt_info_to_pkt_type(l234_info, - ol_info); + first_seg->packet_type = hns3_rx_calc_ptype(rxq, + l234_info, ol_info); if (bd_base_info & BIT(HNS3_RXD_L3L4P_B)) hns3_rx_set_cksum_flag(first_seg, @@ -1844,6 +1913,46 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) } int +hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, + struct rte_eth_burst_mode *mode) +{ + static const struct { + eth_rx_burst_t pkt_burst; + const char *info; + } burst_infos[] = { + { hns3_recv_pkts, "Scalar" }, + { hns3_recv_scattered_pkts, "Scalar Scattered" }, + }; + + eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; + int ret = -EINVAL; + unsigned int i; + + for (i = 0; i < RTE_DIM(burst_infos); i++) { + if (pkt_burst == burst_infos[i].pkt_burst) { + snprintf(mode->info, sizeof(mode->info), "%s", + burst_infos[i].info); + ret = 0; + break; + } + } + + return ret; +} + +static eth_rx_burst_t +hns3_get_rx_function(struct rte_eth_dev *dev) +{ + struct hns3_adapter *hns = dev->data->dev_private; + uint64_t offloads = dev->data->dev_conf.rxmode.offloads; + + if (hns->rx_simple_allowed && !dev->data->scattered_rx && + (offloads & DEV_RX_OFFLOAD_TCP_LRO) == 0) + return hns3_recv_pkts; + + return hns3_recv_scattered_pkts; +} +int hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, unsigned int socket_id, const struct rte_eth_txconf *conf) { @@ -1932,7 +2041,8 @@ hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq) struct hns3_desc *desc = &txq->tx_ring[tx_next_clean]; struct rte_mbuf *mbuf; - while ((!hns3_get_bit(desc->tx.tp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B)) && + while ((!(desc->tx.tp_fe_sc_vld_ra_ri & + rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))) && tx_next_use != tx_next_clean) { mbuf = tx_bak_pkt->mbuf; if (mbuf) { @@ -2818,7 +2928,7 @@ void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev) if (hns->hw.adapter_state == HNS3_NIC_STARTED && rte_atomic16_read(&hns->hw.reset.resetting) == 0) { - eth_dev->rx_pkt_burst = hns3_recv_pkts; + eth_dev->rx_pkt_burst = hns3_get_rx_function(eth_dev); eth_dev->tx_pkt_burst = hns3_xmit_pkts; eth_dev->tx_pkt_prepare = hns3_prep_pkts; } else { diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h index c1a34e2..3d3f0a0 100644 --- a/drivers/net/hns3/hns3_rxtx.h +++ b/drivers/net/hns3/hns3_rxtx.h @@ -10,6 +10,8 @@ #define HNS3_DEFAULT_RING_DESC 1024 #define HNS3_ALIGN_RING_DESC 32 #define HNS3_RING_BASE_ALIGN 128 +#define HNS3_BULK_ALLOC_MBUF_NUM 32 + #define HNS3_DEFAULT_RX_FREE_THRESH 32 #define HNS3_512_BD_BUF_SIZE 512 @@ -233,6 +235,7 @@ struct hns3_rx_queue { void *io_base; volatile void *io_head_reg; struct hns3_adapter *hns; + struct hns3_ptype_table *ptype_tbl; struct rte_mempool *mb_pool; struct hns3_desc *rx_ring; uint64_t rx_ring_phys_addr; /* RX ring DMA address */ @@ -245,13 +248,13 @@ struct hns3_rx_queue { uint16_t queue_id; uint16_t port_id; uint16_t nb_rx_desc; - uint16_t next_to_use; uint16_t rx_buf_len; /* * threshold for the number of BDs waited to passed to hardware. If the * number exceeds the threshold, driver will pass these BDs to hardware. */ uint16_t rx_free_thresh; + uint16_t next_to_use; /* index of next BD to be polled */ uint16_t rx_free_hold; /* num of BDs waited to passed to hardware */ /* @@ -272,6 +275,9 @@ struct hns3_rx_queue { uint64_t l4_csum_erros; uint64_t ol3_csum_erros; uint64_t ol4_csum_erros; + + struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM]; + uint16_t bulk_mbuf_num; }; struct hns3_tx_queue { @@ -380,6 +386,120 @@ enum hns3_cksum_status { HNS3_OUTER_L4_CKSUM_ERR = 8 }; +static inline int +hns3_handle_bdinfo(struct hns3_rx_queue *rxq, struct rte_mbuf *rxm, + uint32_t bd_base_info, uint32_t l234_info, + uint32_t *cksum_err) +{ +#define L2E_TRUNC_ERR_FLAG (BIT(HNS3_RXD_L2E_B) | \ + BIT(HNS3_RXD_TRUNCAT_B)) +#define CHECKSUM_ERR_FLAG (BIT(HNS3_RXD_L3E_B) | \ + BIT(HNS3_RXD_L4E_B) | \ + BIT(HNS3_RXD_OL3E_B) | \ + BIT(HNS3_RXD_OL4E_B)) + + uint32_t tmp = 0; + + /* + * If packet len bigger than mtu when recv with no-scattered algorithm, + * the first n bd will without FE bit, we need process this sisution. + * Note: we don't need add statistic counter because lastest bd which + * with FE bit will mark HNS3_RXD_L2E_B bit. + */ + if (unlikely((bd_base_info & BIT(HNS3_RXD_FE_B)) == 0)) + return -EINVAL; + + if (unlikely((l234_info & L2E_TRUNC_ERR_FLAG) || rxm->pkt_len == 0)) { + if (l234_info & BIT(HNS3_RXD_L2E_B)) + rxq->l2_errors++; + else + rxq->pkt_len_errors++; + return -EINVAL; + } + + if (bd_base_info & BIT(HNS3_RXD_L3L4P_B)) { + if (likely((l234_info & CHECKSUM_ERR_FLAG) == 0)) { + *cksum_err = 0; + return 0; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_L3E_B))) { + rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; + rxq->l3_csum_erros++; + tmp |= HNS3_L3_CKSUM_ERR; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_L4E_B))) { + rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; + rxq->l4_csum_erros++; + tmp |= HNS3_L4_CKSUM_ERR; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_OL3E_B))) { + rxq->ol3_csum_erros++; + tmp |= HNS3_OUTER_L3_CKSUM_ERR; + } + + if (unlikely(l234_info & BIT(HNS3_RXD_OL4E_B))) { + rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD; + rxq->ol4_csum_erros++; + tmp |= HNS3_OUTER_L4_CKSUM_ERR; + } + } + *cksum_err = tmp; + + return 0; +} + +static inline void +hns3_rx_set_cksum_flag(struct rte_mbuf *rxm, const uint64_t packet_type, + const uint32_t cksum_err) +{ + if (unlikely((packet_type & RTE_PTYPE_TUNNEL_MASK))) { + if (likely(packet_type & RTE_PTYPE_INNER_L3_MASK) && + (cksum_err & HNS3_L3_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD; + if (likely(packet_type & RTE_PTYPE_INNER_L4_MASK) && + (cksum_err & HNS3_L4_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + if (likely(packet_type & RTE_PTYPE_L4_MASK) && + (cksum_err & HNS3_OUTER_L4_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD; + } else { + if (likely(packet_type & RTE_PTYPE_L3_MASK) && + (cksum_err & HNS3_L3_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_IP_CKSUM_GOOD; + if (likely(packet_type & RTE_PTYPE_L4_MASK) && + (cksum_err & HNS3_L4_CKSUM_ERR) == 0) + rxm->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } +} + +static inline uint32_t +hns3_rx_calc_ptype(struct hns3_rx_queue *rxq, const uint32_t l234_info, + const uint32_t ol_info) +{ + const struct hns3_ptype_table *const ptype_tbl = rxq->ptype_tbl; + uint32_t l2id, l3id, l4id; + uint32_t ol3id, ol4id; + + ol4id = hns3_get_field(ol_info, HNS3_RXD_OL4ID_M, HNS3_RXD_OL4ID_S); + ol3id = hns3_get_field(ol_info, HNS3_RXD_OL3ID_M, HNS3_RXD_OL3ID_S); + l2id = hns3_get_field(l234_info, HNS3_RXD_STRP_TAGP_M, + HNS3_RXD_STRP_TAGP_S); + l3id = hns3_get_field(l234_info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S); + l4id = hns3_get_field(l234_info, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S); + + if (unlikely(ptype_tbl->ol4table[ol4id])) + return ptype_tbl->inner_l2table[l2id] | + ptype_tbl->inner_l3table[l3id] | + ptype_tbl->inner_l4table[l4id] | + ptype_tbl->ol3table[ol3id] | ptype_tbl->ol4table[ol4id]; + else + return ptype_tbl->l2table[l2id] | ptype_tbl->l3table[l3id] | + ptype_tbl->l4table[l4id]; +} + void hns3_dev_rx_queue_release(void *queue); void hns3_dev_tx_queue_release(void *queue); void hns3_free_all_queues(struct rte_eth_dev *dev); @@ -398,11 +518,17 @@ int hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, unsigned int socket, const struct rte_eth_txconf *conf); uint16_t hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t hns3_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +int hns3_rx_burst_mode_get(struct rte_eth_dev *dev, + __rte_unused uint16_t queue_id, + struct rte_eth_burst_mode *mode); uint16_t hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); const uint32_t *hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev); +void hns3_init_rx_ptype_tble(struct rte_eth_dev *dev); void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev); void hns3_set_queue_intr_gl(struct hns3_hw *hw, uint16_t queue_id, uint8_t gl_idx, uint16_t gl_value); @@ -415,6 +541,8 @@ int hns3_set_fake_rx_or_tx_queues(struct rte_eth_dev *dev, uint16_t nb_rx_q, int hns3_config_gro(struct hns3_hw *hw, bool en); int hns3_restore_gro_conf(struct hns3_hw *hw); void hns3_update_all_queues_pvid_state(struct hns3_hw *hw); +void hns3_rx_scattered_reset(struct rte_eth_dev *dev); +void hns3_rx_scattered_calc(struct rte_eth_dev *dev); void hns3_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_rxq_info *qinfo); void hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, From patchwork Mon Sep 7 09:08:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wei Hu (Xavier)" X-Patchwork-Id: 76656 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id D7C62A04B9; Mon, 7 Sep 2020 11:09:40 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 4ED121C115; Mon, 7 Sep 2020 11:09:15 +0200 (CEST) Received: from mail.chinasoftinc.com (unknown [114.113.233.8]) by dpdk.org (Postfix) with ESMTP id 8CFD21C10F for ; Mon, 7 Sep 2020 11:09:12 +0200 (CEST) Received: from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020 17:09:08 +0800 From: "Wei Hu (Xavier)" To: CC: Date: Mon, 7 Sep 2020 17:08:21 +0800 Message-ID: <20200907090825.1761-5-huwei013@chinasoftinc.com> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20200907090825.1761-1-huwei013@chinasoftinc.com> References: <20200907090825.1761-1-huwei013@chinasoftinc.com> MIME-Version: 1.0 X-Originating-IP: [65.49.108.226] Subject: [dpdk-dev] [PATCH 4/8] net/hns3: add simple Tx process function X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Wei Hu (Xavier)" This patch adds simple Tx process function. When multiple segment packets are not needed, Which means that DEV_TX_OFFLOAD_MBUF_FAST_FREE offload is not set, we can simple Tx process. Signed-off-by: Huisong Li Signed-off-by: Wei Hu (Xavier) Signed-off-by: Chengwen Feng --- drivers/net/hns3/hns3_ethdev.c | 6 + drivers/net/hns3/hns3_ethdev.h | 1 + drivers/net/hns3/hns3_ethdev_vf.c | 6 + drivers/net/hns3/hns3_rxtx.c | 260 +++++++++++++++++++++++++++++++++++--- drivers/net/hns3/hns3_rxtx.h | 34 +++++ 5 files changed, 292 insertions(+), 15 deletions(-) diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c index 5d612f1..8701994 100644 --- a/drivers/net/hns3/hns3_ethdev.c +++ b/drivers/net/hns3/hns3_ethdev.c @@ -2352,6 +2352,7 @@ hns3_dev_configure(struct rte_eth_dev *dev) goto cfg_err; hns->rx_simple_allowed = true; + hns->tx_simple_allowed = true; hns3_init_rx_ptype_tble(dev); hw->adapter_state = HNS3_NIC_CONFIGURED; @@ -2512,6 +2513,10 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) .rx_drop_en = 1, .offloads = 0, }; + info->default_txconf = (struct rte_eth_txconf) { + .tx_rs_thresh = HNS3_DEFAULT_TX_RS_THRESH, + .offloads = 0, + }; info->vmdq_queue_num = 0; @@ -5545,6 +5550,7 @@ static const struct eth_dev_ops hns3_eth_dev_ops = { .rxq_info_get = hns3_rxq_info_get, .txq_info_get = hns3_txq_info_get, .rx_burst_mode_get = hns3_rx_burst_mode_get, + .tx_burst_mode_get = hns3_tx_burst_mode_get, .flow_ctrl_get = hns3_flow_ctrl_get, .flow_ctrl_set = hns3_flow_ctrl_set, .priority_flow_ctrl_set = hns3_priority_flow_ctrl_set, diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h index d93c5b2..ef85034 100644 --- a/drivers/net/hns3/hns3_ethdev.h +++ b/drivers/net/hns3/hns3_ethdev.h @@ -643,6 +643,7 @@ struct hns3_adapter { }; bool rx_simple_allowed; + bool tx_simple_allowed; struct hns3_ptype_table ptype_tbl __rte_cache_min_aligned; }; diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index 0f155d8..915b896 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -822,6 +822,7 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) goto cfg_err; hns->rx_simple_allowed = true; + hns->tx_simple_allowed = true; hns3_init_rx_ptype_tble(dev); hw->adapter_state = HNS3_NIC_CONFIGURED; @@ -957,6 +958,10 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) .rx_drop_en = 1, .offloads = 0, }; + info->default_txconf = (struct rte_eth_txconf) { + .tx_rs_thresh = HNS3_DEFAULT_TX_RS_THRESH, + .offloads = 0, + }; info->vmdq_queue_num = 0; @@ -2541,6 +2546,7 @@ static const struct eth_dev_ops hns3vf_eth_dev_ops = { .rxq_info_get = hns3_rxq_info_get, .txq_info_get = hns3_txq_info_get, .rx_burst_mode_get = hns3_rx_burst_mode_get, + .tx_burst_mode_get = hns3_tx_burst_mode_get, .mac_addr_add = hns3vf_add_mac_addr, .mac_addr_remove = hns3vf_remove_mac_addr, .mac_addr_set = hns3vf_set_default_mac_addr, diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index 38ad454..08a3dcd 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -1952,27 +1952,72 @@ hns3_get_rx_function(struct rte_eth_dev *dev) return hns3_recv_scattered_pkts; } + +static int +hns3_tx_queue_conf_check(struct hns3_hw *hw, const struct rte_eth_txconf *conf, + uint16_t nb_desc, uint16_t *tx_rs_thresh, + uint16_t *tx_free_thresh, uint16_t idx) +{ +#define HNS3_TX_RS_FREE_THRESH_GAP 8 + uint16_t rs_thresh, free_thresh, fast_free_thresh; + + if (nb_desc > HNS3_MAX_RING_DESC || nb_desc < HNS3_MIN_RING_DESC || + nb_desc % HNS3_ALIGN_RING_DESC) { + hns3_err(hw, "number (%u) of tx descriptors is invalid", + nb_desc); + return -EINVAL; + } + + rs_thresh = (conf->tx_rs_thresh > 0) ? + conf->tx_rs_thresh : HNS3_DEFAULT_TX_RS_THRESH; + free_thresh = (conf->tx_free_thresh > 0) ? + conf->tx_free_thresh : HNS3_DEFAULT_TX_FREE_THRESH; + if (rs_thresh + free_thresh > nb_desc || nb_desc % rs_thresh || + rs_thresh >= nb_desc - HNS3_TX_RS_FREE_THRESH_GAP || + free_thresh >= nb_desc - HNS3_TX_RS_FREE_THRESH_GAP) { + hns3_err(hw, "tx_rs_thresh (%d) tx_free_thresh (%d) nb_desc " + "(%d) of tx descriptors for port=%d queue=%d check " + "fail!", + rs_thresh, free_thresh, nb_desc, hw->data->port_id, + idx); + return -EINVAL; + } + + if (conf->tx_free_thresh == 0) { + /* Fast free Tx memory buffer to improve cache hit rate */ + fast_free_thresh = nb_desc - rs_thresh; + if (fast_free_thresh >= + HNS3_TX_FAST_FREE_AHEAD + HNS3_DEFAULT_TX_FREE_THRESH) + free_thresh = fast_free_thresh - + HNS3_TX_FAST_FREE_AHEAD; + } + + *tx_rs_thresh = rs_thresh; + *tx_free_thresh = free_thresh; + return 0; +} + int hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, unsigned int socket_id, const struct rte_eth_txconf *conf) { struct hns3_adapter *hns = dev->data->dev_private; + uint16_t tx_rs_thresh, tx_free_thresh; struct hns3_hw *hw = &hns->hw; struct hns3_queue_info q_info; struct hns3_tx_queue *txq; int tx_entry_len; + int ret; if (dev->data->dev_started) { hns3_err(hw, "tx_queue_setup after dev_start no supported"); return -EINVAL; } - if (nb_desc > HNS3_MAX_RING_DESC || nb_desc < HNS3_MIN_RING_DESC || - nb_desc % HNS3_ALIGN_RING_DESC) { - hns3_err(hw, "Number (%u) of tx descriptors is invalid", - nb_desc); - return -EINVAL; - } + ret = hns3_tx_queue_conf_check(hw, conf, nb_desc, + &tx_rs_thresh, &tx_free_thresh, idx); + if (ret) + return ret; if (dev->data->tx_queues[idx] != NULL) { hns3_tx_queue_release(dev->data->tx_queues[idx]); @@ -2005,11 +2050,15 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, txq->next_to_use = 0; txq->next_to_clean = 0; txq->tx_bd_ready = txq->nb_tx_desc - 1; + txq->tx_free_thresh = tx_free_thresh; + txq->tx_rs_thresh = tx_rs_thresh; txq->port_id = dev->data->port_id; txq->pvid_state = hw->port_base_vlan_cfg.state; txq->configured = true; txq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET + idx * HNS3_TQP_REG_SIZE); + txq->io_tail_reg = (volatile void *)((char *)txq->io_base + + HNS3_RING_TX_TAIL_REG); txq->min_tx_pkt_len = hw->min_tx_pkt_len; txq->over_length_pkt_cnt = 0; txq->exceed_limit_bd_pkt_cnt = 0; @@ -2024,12 +2073,6 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, return 0; } -static inline void -hns3_queue_xmit(struct hns3_tx_queue *txq, uint32_t buf_num) -{ - hns3_write_dev(txq, HNS3_RING_TX_TAIL_REG, buf_num); -} - static void hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq) { @@ -2798,6 +2841,154 @@ hns3_check_non_tso_pkt(uint16_t nb_buf, struct rte_mbuf **m_seg, return 0; } +static inline void +hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq) +{ + struct hns3_entry *tx_entry; + struct hns3_desc *desc; + uint16_t tx_next_clean; + int i; + + while (1) { + if (HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) < txq->tx_rs_thresh) + break; + + /* + * All mbufs can be released only when the VLD bits of all + * descriptors in a batch are cleared. + */ + tx_next_clean = (txq->next_to_clean + txq->tx_rs_thresh - 1) % + txq->nb_tx_desc; + desc = &txq->tx_ring[tx_next_clean]; + for (i = 0; i < txq->tx_rs_thresh; i++) { + if (rte_le_to_cpu_16(desc->tx.tp_fe_sc_vld_ra_ri) & + BIT(HNS3_TXD_VLD_B)) + return; + desc--; + } + + tx_entry = &txq->sw_ring[txq->next_to_clean]; + + for (i = 0; i < txq->tx_rs_thresh; i++) + rte_prefetch0((tx_entry + i)->mbuf); + for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) { + rte_mempool_put(tx_entry->mbuf->pool, tx_entry->mbuf); + tx_entry->mbuf = NULL; + } + + txq->next_to_clean = (tx_next_clean + 1) % txq->nb_tx_desc; + txq->tx_bd_ready += txq->tx_rs_thresh; + } +} + +static inline void +hns3_tx_backup_1mbuf(struct hns3_entry *tx_entry, struct rte_mbuf **pkts) +{ + tx_entry->mbuf = pkts[0]; +} + +static inline void +hns3_tx_backup_4mbuf(struct hns3_entry *tx_entry, struct rte_mbuf **pkts) +{ + hns3_tx_backup_1mbuf(&tx_entry[0], &pkts[0]); + hns3_tx_backup_1mbuf(&tx_entry[1], &pkts[1]); + hns3_tx_backup_1mbuf(&tx_entry[2], &pkts[2]); + hns3_tx_backup_1mbuf(&tx_entry[3], &pkts[3]); +} + +static inline void +hns3_tx_setup_4bd(struct hns3_desc *txdp, struct rte_mbuf **pkts) +{ +#define PER_LOOP_NUM 4 + const uint16_t bd_flag = BIT(HNS3_TXD_VLD_B) | BIT(HNS3_TXD_FE_B); + uint64_t dma_addr; + uint32_t i; + + for (i = 0; i < PER_LOOP_NUM; i++, txdp++, pkts++) { + dma_addr = rte_mbuf_data_iova(*pkts); + txdp->addr = rte_cpu_to_le_64(dma_addr); + txdp->tx.send_size = rte_cpu_to_le_16((*pkts)->data_len); + txdp->tx.paylen = 0; + txdp->tx.type_cs_vlan_tso_len = 0; + txdp->tx.ol_type_vlan_len_msec = 0; + txdp->tx.tp_fe_sc_vld_ra_ri = rte_cpu_to_le_16(bd_flag); + } +} + +static inline void +hns3_tx_setup_1bd(struct hns3_desc *txdp, struct rte_mbuf **pkts) +{ + const uint16_t bd_flag = BIT(HNS3_TXD_VLD_B) | BIT(HNS3_TXD_FE_B); + uint64_t dma_addr; + + dma_addr = rte_mbuf_data_iova(*pkts); + txdp->addr = rte_cpu_to_le_64(dma_addr); + txdp->tx.send_size = rte_cpu_to_le_16((*pkts)->data_len); + txdp->tx.paylen = 0; + txdp->tx.type_cs_vlan_tso_len = 0; + txdp->tx.ol_type_vlan_len_msec = 0; + txdp->tx.tp_fe_sc_vld_ra_ri = rte_cpu_to_le_16(bd_flag); +} + +static inline void +hns3_tx_fill_hw_ring(struct hns3_tx_queue *txq, + struct rte_mbuf **pkts, + uint16_t nb_pkts) +{ +#define PER_LOOP_NUM 4 +#define PER_LOOP_MASK (PER_LOOP_NUM - 1) + struct hns3_desc *txdp = &txq->tx_ring[txq->next_to_use]; + struct hns3_entry *tx_entry = &txq->sw_ring[txq->next_to_use]; + const uint32_t mainpart = (nb_pkts & ((uint32_t)~PER_LOOP_MASK)); + const uint32_t leftover = (nb_pkts & ((uint32_t)PER_LOOP_MASK)); + uint32_t i; + + for (i = 0; i < mainpart; i += PER_LOOP_NUM) { + hns3_tx_backup_4mbuf(tx_entry + i, pkts + i); + hns3_tx_setup_4bd(txdp + i, pkts + i); + } + if (unlikely(leftover > 0)) { + for (i = 0; i < leftover; i++) { + hns3_tx_backup_1mbuf(tx_entry + mainpart + i, + pkts + mainpart + i); + hns3_tx_setup_1bd(txdp + mainpart + i, + pkts + mainpart + i); + } + } +} + +uint16_t +hns3_xmit_pkts_simple(void *tx_queue, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct hns3_tx_queue *txq = tx_queue; + uint16_t nb_tx = 0; + + hns3_tx_free_buffer_simple(txq); + + nb_pkts = RTE_MIN(txq->tx_bd_ready, nb_pkts); + if (unlikely(nb_pkts == 0)) { + if (txq->tx_bd_ready == 0) + txq->queue_full_cnt++; + return 0; + } + + txq->tx_bd_ready -= nb_pkts; + if (txq->next_to_use + nb_pkts > txq->nb_tx_desc) { + nb_tx = txq->nb_tx_desc - txq->next_to_use; + hns3_tx_fill_hw_ring(txq, tx_pkts, nb_tx); + txq->next_to_use = 0; + } + + hns3_tx_fill_hw_ring(txq, tx_pkts + nb_tx, nb_pkts - nb_tx); + txq->next_to_use += nb_pkts - nb_tx; + + hns3_write_reg_opt(txq->io_tail_reg, nb_pkts); + + return nb_pkts; +} + uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { @@ -2909,11 +3100,47 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) end_of_tx: if (likely(nb_tx)) - hns3_queue_xmit(txq, nb_hold); + hns3_write_reg_opt(txq->io_tail_reg, nb_hold); return nb_tx; } +int +hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, + struct rte_eth_burst_mode *mode) +{ + eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; + const char *info = NULL; + + if (pkt_burst == hns3_xmit_pkts_simple) + info = "Scalar Simple"; + else if (pkt_burst == hns3_xmit_pkts) + info = "Scalar"; + + if (info == NULL) + return -EINVAL; + + snprintf(mode->info, sizeof(mode->info), "%s", info); + + return 0; +} + +static eth_tx_burst_t +hns3_get_tx_function(struct rte_eth_dev *dev, eth_tx_prep_t *prep) +{ + uint64_t offloads = dev->data->dev_conf.txmode.offloads; + struct hns3_adapter *hns = dev->data->dev_private; + + if (hns->tx_simple_allowed && + offloads == (offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)) { + *prep = NULL; + return hns3_xmit_pkts_simple; + } + + *prep = hns3_prep_pkts; + return hns3_xmit_pkts; +} + static uint16_t hns3_dummy_rxtx_burst(void *dpdk_txq __rte_unused, struct rte_mbuf **pkts __rte_unused, @@ -2925,12 +3152,13 @@ hns3_dummy_rxtx_burst(void *dpdk_txq __rte_unused, void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev) { struct hns3_adapter *hns = eth_dev->data->dev_private; + eth_tx_prep_t prep = NULL; if (hns->hw.adapter_state == HNS3_NIC_STARTED && rte_atomic16_read(&hns->hw.reset.resetting) == 0) { eth_dev->rx_pkt_burst = hns3_get_rx_function(eth_dev); - eth_dev->tx_pkt_burst = hns3_xmit_pkts; - eth_dev->tx_pkt_prepare = hns3_prep_pkts; + eth_dev->tx_pkt_burst = hns3_get_tx_function(eth_dev, &prep); + eth_dev->tx_pkt_prepare = prep; } else { eth_dev->rx_pkt_burst = hns3_dummy_rxtx_burst; eth_dev->tx_pkt_burst = hns3_dummy_rxtx_burst; @@ -2966,5 +3194,7 @@ hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, qinfo->nb_desc = txq->nb_tx_desc; qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; + qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh; + qinfo->conf.tx_free_thresh = txq->tx_free_thresh; qinfo->conf.tx_deferred_start = txq->tx_deferred_start; } diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h index 3d3f0a0..9933494 100644 --- a/drivers/net/hns3/hns3_rxtx.h +++ b/drivers/net/hns3/hns3_rxtx.h @@ -13,6 +13,9 @@ #define HNS3_BULK_ALLOC_MBUF_NUM 32 #define HNS3_DEFAULT_RX_FREE_THRESH 32 +#define HNS3_DEFAULT_TX_FREE_THRESH 32 +#define HNS3_DEFAULT_TX_RS_THRESH 32 +#define HNS3_TX_FAST_FREE_AHEAD 64 #define HNS3_512_BD_BUF_SIZE 512 #define HNS3_1K_BD_BUF_SIZE 1024 @@ -282,6 +285,7 @@ struct hns3_rx_queue { struct hns3_tx_queue { void *io_base; + volatile void *io_tail_reg; struct hns3_adapter *hns; struct hns3_desc *tx_ring; uint64_t tx_ring_phys_addr; /* TX ring DMA address */ @@ -291,10 +295,32 @@ struct hns3_tx_queue { uint16_t queue_id; uint16_t port_id; uint16_t nb_tx_desc; + /* + * index of next BD whose corresponding rte_mbuf can be released by + * driver. + */ uint16_t next_to_clean; + /* index of next BD to be filled by driver to send packet */ uint16_t next_to_use; + /* num of remaining BDs ready to be filled by driver to send packet */ uint16_t tx_bd_ready; + /* threshold for free tx buffer if available BDs less than this value */ + uint16_t tx_free_thresh; + + /* + * For better performance in tx datapath, releasing mbuf in batches is + * required. + * Only checking the VLD bit of the last descriptor in a batch of the + * thresh descriptors does not mean that these descriptors are all sent + * by hardware successfully. So we need to check that the VLD bits of + * all descriptors are cleared. and then free all mbufs in the batch. + * - tx_rs_thresh + * Number of mbufs released at a time. + + */ + uint16_t tx_rs_thresh; + /* * port based vlan configuration state. * value range: HNS3_PORT_BASE_VLAN_DISABLE / HNS3_PORT_BASE_VLAN_ENABLE @@ -360,6 +386,9 @@ struct hns3_tx_queue { uint64_t pkt_padding_fail_cnt; }; +#define HNS3_GET_TX_QUEUE_PEND_BD_NUM(txq) \ + ((txq)->nb_tx_desc - 1 - (txq)->tx_bd_ready) + struct hns3_queue_info { const char *type; /* point to queue memory name */ const char *ring_name; /* point to hardware ring name */ @@ -525,8 +554,13 @@ int hns3_rx_burst_mode_get(struct rte_eth_dev *dev, struct rte_eth_burst_mode *mode); uint16_t hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +int hns3_tx_burst_mode_get(struct rte_eth_dev *dev, + __rte_unused uint16_t queue_id, + struct rte_eth_burst_mode *mode); const uint32_t *hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev); void hns3_init_rx_ptype_tble(struct rte_eth_dev *dev); void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev); From patchwork Mon Sep 7 09:08:22 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wei Hu (Xavier)" X-Patchwork-Id: 76657 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 28F5CA04B9; Mon, 7 Sep 2020 11:09:53 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id BB3711C120; Mon, 7 Sep 2020 11:09:20 +0200 (CEST) Received: from mail.chinasoftinc.com (unknown [114.113.233.8]) by dpdk.org (Postfix) with ESMTP id 75BF61BF8A for ; Mon, 7 Sep 2020 11:09:18 +0200 (CEST) Received: from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020 17:09:11 +0800 From: "Wei Hu (Xavier)" To: CC: Date: Mon, 7 Sep 2020 17:08:22 +0800 Message-ID: <20200907090825.1761-6-huwei013@chinasoftinc.com> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20200907090825.1761-1-huwei013@chinasoftinc.com> References: <20200907090825.1761-1-huwei013@chinasoftinc.com> MIME-Version: 1.0 X-Originating-IP: [65.49.108.226] Subject: [dpdk-dev] [PATCH 5/8] net/hns3: add vector Tx burst with NEON instructions X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Wei Hu (Xavier)" This patch adds NEON vector instructions to optimize Tx burst process. Signed-off-by: Huisong Li Signed-off-by: Wei Hu (Xavier) Signed-off-by: Chengwen Feng --- config/common_base | 1 + config/common_linux | 1 + drivers/net/hns3/Makefile | 5 +++ drivers/net/hns3/hns3_ethdev.c | 2 + drivers/net/hns3/hns3_ethdev.h | 2 + drivers/net/hns3/hns3_ethdev_vf.c | 2 + drivers/net/hns3/hns3_rxtx.c | 33 ++++++++++++++ drivers/net/hns3/hns3_rxtx.h | 20 ++++++++- drivers/net/hns3/hns3_rxtx_vec.c | 47 ++++++++++++++++++++ drivers/net/hns3/hns3_rxtx_vec.h | 57 ++++++++++++++++++++++++ drivers/net/hns3/hns3_rxtx_vec_neon.h | 81 +++++++++++++++++++++++++++++++++++ drivers/net/hns3/meson.build | 4 ++ 12 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 drivers/net/hns3/hns3_rxtx_vec.c create mode 100644 drivers/net/hns3/hns3_rxtx_vec.h create mode 100644 drivers/net/hns3/hns3_rxtx_vec_neon.h diff --git a/config/common_base b/config/common_base index fbf0ee7..af1dea6 100644 --- a/config/common_base +++ b/config/common_base @@ -292,6 +292,7 @@ CONFIG_RTE_LIBRTE_HINIC_PMD=n # Compile burst-oriented HNS3 PMD driver # CONFIG_RTE_LIBRTE_HNS3_PMD=n +CONFIG_RTE_LIBRTE_HNS3_INC_VECTOR=n # # Compile Pensando IONIC PMD driver diff --git a/config/common_linux b/config/common_linux index 8168106..e88a404 100644 --- a/config/common_linux +++ b/config/common_linux @@ -66,3 +66,4 @@ CONFIG_RTE_LIBRTE_HINIC_PMD=y # Hisilicon HNS3 PMD driver # CONFIG_RTE_LIBRTE_HNS3_PMD=y +CONFIG_RTE_LIBRTE_HNS3_INC_VECTOR=y diff --git a/drivers/net/hns3/Makefile b/drivers/net/hns3/Makefile index d7798a4..d08d8fa 100644 --- a/drivers/net/hns3/Makefile +++ b/drivers/net/hns3/Makefile @@ -30,6 +30,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_ethdev_vf.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_cmd.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_mbx.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_rxtx.c + +ifeq ($(CONFIG_RTE_ARCH_ARM64),y) +SRCS-$(CONFIG_RTE_LIBRTE_HNS3_INC_VECTOR) += hns3_rxtx_vec.c +endif + SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_rss.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_flow.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_fdir.c diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c index 8701994..68239f5 100644 --- a/drivers/net/hns3/hns3_ethdev.c +++ b/drivers/net/hns3/hns3_ethdev.c @@ -2353,6 +2353,8 @@ hns3_dev_configure(struct rte_eth_dev *dev) hns->rx_simple_allowed = true; hns->tx_simple_allowed = true; + hns->tx_vec_allowed = true; + hns3_init_rx_ptype_tble(dev); hw->adapter_state = HNS3_NIC_CONFIGURED; diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h index ef85034..098b6ce 100644 --- a/drivers/net/hns3/hns3_ethdev.h +++ b/drivers/net/hns3/hns3_ethdev.h @@ -644,6 +644,8 @@ struct hns3_adapter { bool rx_simple_allowed; bool tx_simple_allowed; + bool tx_vec_allowed; + struct hns3_ptype_table ptype_tbl __rte_cache_min_aligned; }; diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index 915b896..f3e6aea 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -823,6 +823,8 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) hns->rx_simple_allowed = true; hns->tx_simple_allowed = true; + hns->tx_vec_allowed = true; + hns3_init_rx_ptype_tble(dev); hw->adapter_state = HNS3_NIC_CONFIGURED; diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index 08a3dcd..a537fbe 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -95,6 +95,8 @@ hns3_tx_queue_release(void *queue) rte_memzone_free(txq->mz); if (txq->sw_ring) rte_free(txq->sw_ring); + if (txq->free) + rte_free(txq->free); rte_free(txq); } } @@ -1020,6 +1022,7 @@ hns3_fake_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, /* Don't need alloc sw_ring, because upper applications don't use it */ txq->sw_ring = NULL; + txq->free = NULL; txq->hns = hns; txq->tx_deferred_start = false; @@ -2052,6 +2055,15 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, txq->tx_bd_ready = txq->nb_tx_desc - 1; txq->tx_free_thresh = tx_free_thresh; txq->tx_rs_thresh = tx_rs_thresh; + txq->free = rte_zmalloc_socket("hns3 TX mbuf free array", + sizeof(struct rte_mbuf *) * txq->tx_rs_thresh, + RTE_CACHE_LINE_SIZE, socket_id); + if (!txq->free) { + hns3_err(hw, "failed to allocate tx mbuf free array!"); + hns3_tx_queue_release(txq); + return -ENOMEM; + } + txq->port_id = dev->data->port_id; txq->pvid_state = hw->port_base_vlan_cfg.state; txq->configured = true; @@ -3105,6 +3117,20 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) return nb_tx; } +int __rte_weak +hns3_tx_check_vec_support(__rte_unused struct rte_eth_dev *dev) +{ + return -ENOTSUP; +} + +uint16_t __rte_weak +hns3_xmit_pkts_vec(__rte_unused void *tx_queue, + __rte_unused struct rte_mbuf **tx_pkts, + __rte_unused uint16_t nb_pkts) +{ + return 0; +} + int hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, struct rte_eth_burst_mode *mode) @@ -3116,6 +3142,8 @@ hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, info = "Scalar Simple"; else if (pkt_burst == hns3_xmit_pkts) info = "Scalar"; + else if (pkt_burst == hns3_xmit_pkts_vec) + info = "Vector Neon"; if (info == NULL) return -EINVAL; @@ -3131,6 +3159,11 @@ hns3_get_tx_function(struct rte_eth_dev *dev, eth_tx_prep_t *prep) uint64_t offloads = dev->data->dev_conf.txmode.offloads; struct hns3_adapter *hns = dev->data->dev_private; + if (hns->tx_vec_allowed && hns3_tx_check_vec_support(dev) == 0) { + *prep = NULL; + return hns3_xmit_pkts_vec; + } + if (hns->tx_simple_allowed && offloads == (offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)) { *prep = NULL; diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h index 9933494..c5a510b 100644 --- a/drivers/net/hns3/hns3_rxtx.h +++ b/drivers/net/hns3/hns3_rxtx.h @@ -17,6 +17,10 @@ #define HNS3_DEFAULT_TX_RS_THRESH 32 #define HNS3_TX_FAST_FREE_AHEAD 64 +#define HNS3_UINT8_BIT 8 +#define HNS3_UINT16_BIT 16 +#define HNS3_UINT32_BIT 32 + #define HNS3_512_BD_BUF_SIZE 512 #define HNS3_1K_BD_BUF_SIZE 1024 #define HNS3_2K_BD_BUF_SIZE 2048 @@ -132,6 +136,13 @@ #define HNS3_L3_LEN_UNIT 2UL #define HNS3_L4_LEN_UNIT 2UL +#define HNS3_TXD_DEFAULT_BDTYPE 0 +#define HNS3_TXD_VLD_CMD (0x1 << HNS3_TXD_VLD_B) +#define HNS3_TXD_FE_CMD (0x1 << HNS3_TXD_FE_B) +#define HNS3_TXD_DEFAULT_VLD_FE_BDTYPE \ + (HNS3_TXD_VLD_CMD | HNS3_TXD_FE_CMD | HNS3_TXD_DEFAULT_BDTYPE) +#define HNS3_TXD_SEND_SIZE_SHIFT 16 + enum hns3_pkt_l2t_type { HNS3_L2_TYPE_UNICAST, HNS3_L2_TYPE_MULTICAST, @@ -317,9 +328,13 @@ struct hns3_tx_queue { * all descriptors are cleared. and then free all mbufs in the batch. * - tx_rs_thresh * Number of mbufs released at a time. - + * + * - free + * Tx mbuf free array used for preserving temporarily address of mbuf + * released back to mempool, when releasing mbuf in batches. */ uint16_t tx_rs_thresh; + struct rte_mbuf **free; /* * port based vlan configuration state. @@ -558,6 +573,8 @@ uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); int hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, struct rte_eth_burst_mode *mode); @@ -577,6 +594,7 @@ int hns3_restore_gro_conf(struct hns3_hw *hw); void hns3_update_all_queues_pvid_state(struct hns3_hw *hw); void hns3_rx_scattered_reset(struct rte_eth_dev *dev); void hns3_rx_scattered_calc(struct rte_eth_dev *dev); +int hns3_tx_check_vec_support(struct rte_eth_dev *dev); void hns3_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_rxq_info *qinfo); void hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, diff --git a/drivers/net/hns3/hns3_rxtx_vec.c b/drivers/net/hns3/hns3_rxtx_vec.c new file mode 100644 index 0000000..1154b6f --- /dev/null +++ b/drivers/net/hns3/hns3_rxtx_vec.c @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Hisilicon Limited. + */ + +#include +#include + +#include "hns3_ethdev.h" +#include "hns3_rxtx.h" +#include "hns3_rxtx_vec.h" + +#if defined RTE_ARCH_ARM64 +#include "hns3_rxtx_vec_neon.h" +#endif + +int +hns3_tx_check_vec_support(struct rte_eth_dev *dev) +{ + struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode; + + /* Only support DEV_TX_OFFLOAD_MBUF_FAST_FREE */ + if (txmode->offloads != DEV_TX_OFFLOAD_MBUF_FAST_FREE) + return -ENOTSUP; + + return 0; +} + +uint16_t +hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue; + uint16_t nb_tx = 0; + + while (nb_pkts) { + uint16_t ret, new_burst; + + new_burst = RTE_MIN(nb_pkts, txq->tx_rs_thresh); + ret = hns3_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx], + new_burst); + nb_tx += ret; + nb_pkts -= ret; + if (ret < new_burst) + break; + } + + return nb_tx; +} diff --git a/drivers/net/hns3/hns3_rxtx_vec.h b/drivers/net/hns3/hns3_rxtx_vec.h new file mode 100644 index 0000000..90679bf --- /dev/null +++ b/drivers/net/hns3/hns3_rxtx_vec.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Hisilicon Limited. + */ + +#ifndef _HNS3_RXTX_VEC_H_ +#define _HNS3_RXTX_VEC_H_ + +#include "hns3_rxtx.h" +#include "hns3_ethdev.h" + +static inline void +hns3_tx_free_buffers(struct hns3_tx_queue *txq) +{ + struct rte_mbuf **free = txq->free; + struct hns3_entry *tx_entry; + struct hns3_desc *tx_desc; + struct rte_mbuf *m; + int nb_free = 0; + int i; + + /* + * All mbufs can be released only when the VLD bits of all + * descriptors in a batch are cleared. + */ + tx_desc = &txq->tx_ring[txq->next_to_clean]; + for (i = 0; i < txq->tx_rs_thresh; i++, tx_desc++) { + if (tx_desc->tx.tp_fe_sc_vld_ra_ri & + rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B))) + return; + } + + tx_entry = &txq->sw_ring[txq->next_to_clean]; + for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) { + m = rte_pktmbuf_prefree_seg(tx_entry->mbuf); + tx_entry->mbuf = NULL; + + if (m == NULL) + continue; + + if (nb_free && m->pool != free[0]->pool) { + rte_mempool_put_bulk(free[0]->pool, (void **)free, + nb_free); + nb_free = 0; + } + free[nb_free++] = m; + } + + if (nb_free) + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + + /* Update numbers of available descriptor due to buffer freed */ + txq->tx_bd_ready += txq->tx_rs_thresh; + txq->next_to_clean += txq->tx_rs_thresh; + if (txq->next_to_clean >= txq->nb_tx_desc) + txq->next_to_clean = 0; +} +#endif /* _HNS3_RXTX_VEC_H_ */ diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h new file mode 100644 index 0000000..2bd2b35 --- /dev/null +++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Hisilicon Limited. + */ + +#ifndef _HNS3_RXTX_VEC_NEON_H_ +#define _HNS3_RXTX_VEC_NEON_H_ + +#include + +#pragma GCC diagnostic ignored "-Wcast-qual" + +static inline void +hns3_vec_tx(volatile struct hns3_desc *desc, struct rte_mbuf *pkt) +{ + uint64x2_t val1 = { pkt->buf_iova + pkt->data_off, + ((uint64_t)pkt->data_len) << HNS3_TXD_SEND_SIZE_SHIFT }; + uint64x2_t val2 = { 0, + ((uint64_t)HNS3_TXD_DEFAULT_VLD_FE_BDTYPE) << HNS3_UINT32_BIT }; + vst1q_u64((uint64_t *)&desc->addr, val1); + vst1q_u64((uint64_t *)&desc->tx.outer_vlan_tag, val2); +} + +static uint16_t +hns3_xmit_fixed_burst_vec(void *__restrict tx_queue, + struct rte_mbuf **__restrict tx_pkts, + uint16_t nb_pkts) +{ + struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue; + volatile struct hns3_desc *tx_desc; + struct hns3_entry *tx_entry; + uint16_t next_to_use; + uint16_t nb_commit; + uint16_t nb_tx; + uint16_t n, i; + + if (txq->tx_bd_ready < txq->tx_free_thresh) + hns3_tx_free_buffers(txq); + + nb_commit = RTE_MIN(txq->tx_bd_ready, nb_pkts); + if (unlikely(nb_commit == 0)) { + txq->queue_full_cnt++; + return 0; + } + nb_tx = nb_commit; + + next_to_use = txq->next_to_use; + tx_desc = &txq->tx_ring[next_to_use]; + tx_entry = &txq->sw_ring[next_to_use]; + + /* + * We need to deal with n descriptors first for better performance, + * if nb_commit is greater than the difference between txq->nb_tx_desc + * and next_to_use in sw_ring and tx_ring. + */ + n = txq->nb_tx_desc - next_to_use; + if (nb_commit >= n) { + for (i = 0; i < n; i++, tx_pkts++, tx_desc++) { + hns3_vec_tx(tx_desc, *tx_pkts); + tx_entry[i].mbuf = *tx_pkts; + } + + nb_commit -= n; + next_to_use = 0; + tx_desc = &txq->tx_ring[next_to_use]; + tx_entry = &txq->sw_ring[next_to_use]; + } + + for (i = 0; i < nb_commit; i++, tx_pkts++, tx_desc++) { + hns3_vec_tx(tx_desc, *tx_pkts); + tx_entry[i].mbuf = *tx_pkts; + } + + next_to_use += nb_commit; + txq->next_to_use = next_to_use; + txq->tx_bd_ready -= nb_tx; + + hns3_write_reg_opt(txq->io_tail_reg, nb_tx); + + return nb_tx; +} +#endif /* _HNS3_RXTX_VEC_NEON_H_ */ diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build index e01e6ce..19aee71 100644 --- a/drivers/net/hns3/meson.build +++ b/drivers/net/hns3/meson.build @@ -27,4 +27,8 @@ sources = files('hns3_cmd.c', 'hns3_stats.c', 'hns3_mp.c') +if (dpdk_conf.has('RTE_ARCH_ARM64')) + sources += files('hns3_rxtx_vec.c') +endif + deps += ['hash'] From patchwork Mon Sep 7 09:08:23 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wei Hu (Xavier)" X-Patchwork-Id: 76658 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id A14E0A04B9; Mon, 7 Sep 2020 11:10:04 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 487C31C11C; Mon, 7 Sep 2020 11:09:23 +0200 (CEST) Received: from mail.chinasoftinc.com (unknown [114.113.233.8]) by dpdk.org (Postfix) with ESMTP id B74A41BF8A for ; Mon, 7 Sep 2020 11:09:19 +0200 (CEST) Received: from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020 17:09:15 +0800 From: "Wei Hu (Xavier)" To: CC: Date: Mon, 7 Sep 2020 17:08:23 +0800 Message-ID: <20200907090825.1761-7-huwei013@chinasoftinc.com> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20200907090825.1761-1-huwei013@chinasoftinc.com> References: <20200907090825.1761-1-huwei013@chinasoftinc.com> MIME-Version: 1.0 X-Originating-IP: [65.49.108.226] Subject: [dpdk-dev] [PATCH 6/8] net/hns3: add vector Rx burst with NEON instructions X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Wei Hu (Xavier)" This patch adds NEON vector instructions to optimize Rx burst process. Signed-off-by: Chengwen Feng Signed-off-by: Wei Hu (Xavier) Signed-off-by: Huisong Li --- drivers/net/hns3/hns3_ethdev.c | 1 + drivers/net/hns3/hns3_ethdev.h | 1 + drivers/net/hns3/hns3_ethdev_vf.c | 1 + drivers/net/hns3/hns3_rxtx.c | 94 +++++++++++++++- drivers/net/hns3/hns3_rxtx.h | 35 +++++- drivers/net/hns3/hns3_rxtx_vec.c | 167 ++++++++++++++++++++++++++++ drivers/net/hns3/hns3_rxtx_vec.h | 20 ++++ drivers/net/hns3/hns3_rxtx_vec_neon.h | 203 ++++++++++++++++++++++++++++++++++ 8 files changed, 514 insertions(+), 8 deletions(-) diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c index 68239f5..0727c6d 100644 --- a/drivers/net/hns3/hns3_ethdev.c +++ b/drivers/net/hns3/hns3_ethdev.c @@ -2352,6 +2352,7 @@ hns3_dev_configure(struct rte_eth_dev *dev) goto cfg_err; hns->rx_simple_allowed = true; + hns->rx_vec_allowed = true; hns->tx_simple_allowed = true; hns->tx_vec_allowed = true; diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h index 098b6ce..fd6a9f9 100644 --- a/drivers/net/hns3/hns3_ethdev.h +++ b/drivers/net/hns3/hns3_ethdev.h @@ -643,6 +643,7 @@ struct hns3_adapter { }; bool rx_simple_allowed; + bool rx_vec_allowed; bool tx_simple_allowed; bool tx_vec_allowed; diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index f3e6aea..93f2c93 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -822,6 +822,7 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) goto cfg_err; hns->rx_simple_allowed = true; + hns->rx_vec_allowed = true; hns->tx_simple_allowed = true; hns->tx_vec_allowed = true; diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index a537fbe..03d69b1 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -41,9 +41,19 @@ hns3_rx_queue_release_mbufs(struct hns3_rx_queue *rxq) if (rxq->sw_ring == NULL) return; - for (i = 0; i < rxq->nb_rx_desc; i++) - if (rxq->sw_ring[i].mbuf) - rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + if (rxq->rx_rearm_nb == 0) { + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } else { + for (i = rxq->next_to_use; + i != rxq->rx_rearm_start; + i = (i + 1) % rxq->nb_rx_desc) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } for (i = 0; i < rxq->bulk_mbuf_num; i++) rte_pktmbuf_free_seg(rxq->bulk_mbuf[i]); @@ -661,10 +671,13 @@ hns3_dev_rx_queue_start(struct hns3_adapter *hns, uint16_t idx) } rxq->next_to_use = 0; + rxq->rx_rearm_start = 0; rxq->rx_free_hold = 0; + rxq->rx_rearm_nb = 0; rxq->pkt_first_seg = NULL; rxq->pkt_last_seg = NULL; hns3_init_rx_queue_hw(rxq); + hns3_rxq_vec_setup(rxq); return 0; } @@ -678,6 +691,8 @@ hns3_fake_rx_queue_start(struct hns3_adapter *hns, uint16_t idx) rxq = (struct hns3_rx_queue *)hw->fkq_data.rx_queues[idx]; rxq->next_to_use = 0; rxq->rx_free_hold = 0; + rxq->rx_rearm_start = 0; + rxq->rx_rearm_nb = 0; hns3_init_rx_queue_hw(rxq); } @@ -860,6 +875,40 @@ hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue) return 0; } +/* + * Iterate over all Rx Queue, and call the callback() function for each Rx + * queue. + * + * @param[in] dev + * The target eth dev. + * @param[in] callback + * The function to call for each queue. + * if callback function return nonzero will stop iterate and return it's value + * @param[in] arg + * The arguments to provide the callback function with. + * + * @return + * 0 on success, otherwise with errno set. + */ +int +hns3_rxq_iterate(struct rte_eth_dev *dev, + int (*callback)(struct hns3_rx_queue *, void *), void *arg) +{ + uint32_t i; + int ret; + + if (dev->data->rx_queues == NULL) + return -EINVAL; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + ret = callback(dev->data->rx_queues[i], arg); + if (ret != 0) + return ret; + } + + return 0; +} + static void* hns3_alloc_rxq_and_dma_zone(struct rte_eth_dev *dev, struct hns3_queue_info *q_info) @@ -880,7 +929,13 @@ hns3_alloc_rxq_and_dma_zone(struct rte_eth_dev *dev, /* Allocate rx ring hardware descriptors. */ rxq->queue_id = q_info->idx; rxq->nb_rx_desc = q_info->nb_desc; - rx_desc = rxq->nb_rx_desc * sizeof(struct hns3_desc); + + /* + * Allocate a litter more memory because rx vector functions + * don't check boundaries each time. + */ + rx_desc = (rxq->nb_rx_desc + HNS3_DEFAULT_RX_BURST) * + sizeof(struct hns3_desc); rx_mz = rte_eth_dma_zone_reserve(dev, q_info->ring_name, q_info->idx, rx_desc, HNS3_RING_BASE_ALIGN, q_info->socket_id); @@ -1329,7 +1384,8 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, conf->rx_free_thresh : HNS3_DEFAULT_RX_FREE_THRESH; rxq->rx_deferred_start = conf->rx_deferred_start; - rx_entry_len = sizeof(struct hns3_entry) * rxq->nb_rx_desc; + rx_entry_len = (rxq->nb_rx_desc + HNS3_DEFAULT_RX_BURST) * + sizeof(struct hns3_entry); rxq->sw_ring = rte_zmalloc_socket("hns3 RX sw ring", rx_entry_len, RTE_CACHE_LINE_SIZE, socket_id); if (rxq->sw_ring == NULL) { @@ -1340,6 +1396,8 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, rxq->next_to_use = 0; rxq->rx_free_hold = 0; + rxq->rx_rearm_start = 0; + rxq->rx_rearm_nb = 0; rxq->pkt_first_seg = NULL; rxq->pkt_last_seg = NULL; rxq->port_id = dev->data->port_id; @@ -1431,7 +1489,8 @@ hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev) }; if (dev->rx_pkt_burst == hns3_recv_pkts || - dev->rx_pkt_burst == hns3_recv_scattered_pkts) + dev->rx_pkt_burst == hns3_recv_scattered_pkts || + dev->rx_pkt_burst == hns3_recv_pkts_vec) return ptypes; return NULL; @@ -1915,6 +1974,25 @@ hns3_recv_scattered_pkts(void *rx_queue, return nb_rx; } +void __rte_weak +hns3_rxq_vec_setup(__rte_unused struct hns3_rx_queue *rxq) +{ +} + +int __rte_weak +hns3_rx_check_vec_support(__rte_unused struct rte_eth_dev *dev) +{ + return -ENOTSUP; +} + +uint16_t __rte_weak +hns3_recv_pkts_vec(__rte_unused void *tx_queue, + __rte_unused struct rte_mbuf **tx_pkts, + __rte_unused uint16_t nb_pkts) +{ + return 0; +} + int hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, struct rte_eth_burst_mode *mode) @@ -1925,6 +2003,7 @@ hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, } burst_infos[] = { { hns3_recv_pkts, "Scalar" }, { hns3_recv_scattered_pkts, "Scalar Scattered" }, + { hns3_recv_pkts_vec, "Vector Neon" }, }; eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; @@ -1949,6 +2028,9 @@ hns3_get_rx_function(struct rte_eth_dev *dev) struct hns3_adapter *hns = dev->data->dev_private; uint64_t offloads = dev->data->dev_conf.rxmode.offloads; + if (hns->rx_vec_allowed && hns3_rx_check_vec_support(dev) == 0) + return hns3_recv_pkts_vec; + if (hns->rx_simple_allowed && !dev->data->scattered_rx && (offloads & DEV_RX_OFFLOAD_TCP_LRO) == 0) return hns3_recv_pkts; diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h index c5a510b..a629be9 100644 --- a/drivers/net/hns3/hns3_rxtx.h +++ b/drivers/net/hns3/hns3_rxtx.h @@ -17,6 +17,18 @@ #define HNS3_DEFAULT_TX_RS_THRESH 32 #define HNS3_TX_FAST_FREE_AHEAD 64 +#define HNS3_DEFAULT_RX_BURST 32 +#if (HNS3_DEFAULT_RX_BURST > 64) +#error "PMD HNS3: HNS3_DEFAULT_RX_BURST must <= 64\n" +#endif +#define HNS3_DEFAULT_DESCS_PER_LOOP 4 +#define HNS3_SVE_DEFAULT_DESCS_PER_LOOP 8 +#if (HNS3_DEFAULT_DESCS_PER_LOOP > HNS3_SVE_DEFAULT_DESCS_PER_LOOP) +#define HNS3_VECTOR_RX_OFFSET_TABLE_LEN HNS3_DEFAULT_DESCS_PER_LOOP +#else +#define HNS3_VECTOR_RX_OFFSET_TABLE_LEN HNS3_SVE_DEFAULT_DESCS_PER_LOOP +#endif +#define HNS3_DEFAULT_RXQ_REARM_THRESH 64 #define HNS3_UINT8_BIT 8 #define HNS3_UINT16_BIT 16 #define HNS3_UINT32_BIT 32 @@ -236,7 +248,13 @@ struct hns3_desc { uint16_t ot_vlan_tag; }; }; - uint32_t bd_base_info; + union { + uint32_t bd_base_info; + struct { + uint16_t bdtype_vld_udp0; + uint16_t fe_lum_crcp_l3l4p; + }; + }; } rx; }; } __rte_packed; @@ -270,7 +288,8 @@ struct hns3_rx_queue { uint16_t rx_free_thresh; uint16_t next_to_use; /* index of next BD to be polled */ uint16_t rx_free_hold; /* num of BDs waited to passed to hardware */ - + uint16_t rx_rearm_start; /* index of BD that driver re-arming from */ + uint16_t rx_rearm_nb; /* number of remaining BDs to be re-armed */ /* * port based vlan configuration state. * value range: HNS3_PORT_BASE_VLAN_DISABLE / HNS3_PORT_BASE_VLAN_ENABLE @@ -292,6 +311,11 @@ struct hns3_rx_queue { struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM]; uint16_t bulk_mbuf_num; + + /* offset_table: used for vector, to solve execute re-order problem */ + uint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1]; + uint64_t mbuf_initializer; /* value to init mbufs used with vector rx */ + struct rte_mbuf fake_mbuf; /* fake mbuf used with vector rx */ }; struct hns3_tx_queue { @@ -554,6 +578,8 @@ int hns3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id); void hns3_enable_all_queues(struct hns3_hw *hw, bool en); int hns3_start_queues(struct hns3_adapter *hns, bool reset_queue); int hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue); +int hns3_rxq_iterate(struct rte_eth_dev *dev, + int (*callback)(struct hns3_rx_queue *, void *), void *arg); void hns3_dev_release_mbufs(struct hns3_adapter *hns); int hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, unsigned int socket, const struct rte_eth_rxconf *conf, @@ -564,9 +590,12 @@ uint16_t hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); uint16_t hns3_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t hns3_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); int hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, struct rte_eth_burst_mode *mode); +int hns3_rx_check_vec_support(struct rte_eth_dev *dev); uint16_t hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -594,7 +623,9 @@ int hns3_restore_gro_conf(struct hns3_hw *hw); void hns3_update_all_queues_pvid_state(struct hns3_hw *hw); void hns3_rx_scattered_reset(struct rte_eth_dev *dev); void hns3_rx_scattered_calc(struct rte_eth_dev *dev); +int hns3_rx_check_vec_support(struct rte_eth_dev *dev); int hns3_tx_check_vec_support(struct rte_eth_dev *dev); +void hns3_rxq_vec_setup(struct hns3_rx_queue *rxq); void hns3_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_rxq_info *qinfo); void hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, diff --git a/drivers/net/hns3/hns3_rxtx_vec.c b/drivers/net/hns3/hns3_rxtx_vec.c index 1154b6f..a26c83d 100644 --- a/drivers/net/hns3/hns3_rxtx_vec.c +++ b/drivers/net/hns3/hns3_rxtx_vec.c @@ -45,3 +45,170 @@ hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) return nb_tx; } + +static inline void +hns3_rxq_rearm_mbuf(struct hns3_rx_queue *rxq) +{ +#define REARM_LOOP_STEP_NUM 4 + struct hns3_entry *rxep = &rxq->sw_ring[rxq->rx_rearm_start]; + struct hns3_desc *rxdp = rxq->rx_ring + rxq->rx_rearm_start; + uint64_t dma_addr; + int i; + + if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep, + HNS3_DEFAULT_RXQ_REARM_THRESH) < 0)) { + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; + return; + } + + for (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM, + rxep += REARM_LOOP_STEP_NUM, rxdp += REARM_LOOP_STEP_NUM) { + if (likely(i < + HNS3_DEFAULT_RXQ_REARM_THRESH - REARM_LOOP_STEP_NUM)) { + rte_prefetch_non_temporal(rxep[4].mbuf); + rte_prefetch_non_temporal(rxep[5].mbuf); + rte_prefetch_non_temporal(rxep[6].mbuf); + rte_prefetch_non_temporal(rxep[7].mbuf); + } + + dma_addr = rte_mbuf_data_iova_default(rxep[0].mbuf); + rxdp[0].addr = rte_cpu_to_le_64(dma_addr); + rxdp[0].rx.bd_base_info = 0; + + dma_addr = rte_mbuf_data_iova_default(rxep[1].mbuf); + rxdp[1].addr = rte_cpu_to_le_64(dma_addr); + rxdp[1].rx.bd_base_info = 0; + + dma_addr = rte_mbuf_data_iova_default(rxep[2].mbuf); + rxdp[2].addr = rte_cpu_to_le_64(dma_addr); + rxdp[2].rx.bd_base_info = 0; + + dma_addr = rte_mbuf_data_iova_default(rxep[3].mbuf); + rxdp[3].addr = rte_cpu_to_le_64(dma_addr); + rxdp[3].rx.bd_base_info = 0; + } + + rxq->rx_rearm_start += HNS3_DEFAULT_RXQ_REARM_THRESH; + if (rxq->rx_rearm_start >= rxq->nb_rx_desc) + rxq->rx_rearm_start = 0; + + rxq->rx_rearm_nb -= HNS3_DEFAULT_RXQ_REARM_THRESH; + + hns3_write_reg_opt(rxq->io_head_reg, HNS3_DEFAULT_RXQ_REARM_THRESH); +} + +uint16_t +hns3_recv_pkts_vec(void *__restrict rx_queue, + struct rte_mbuf **__restrict rx_pkts, + uint16_t nb_pkts) +{ + struct hns3_rx_queue *rxq = rx_queue; + struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use]; + uint64_t bd_err_mask; /* bit mask indicate whick pkts is error */ + uint16_t nb_rx; + + nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST); + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP); + + rte_prefetch_non_temporal(rxdp); + + if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH) + hns3_rxq_rearm_mbuf(rxq); + + if (unlikely(!(rxdp->rx.bd_base_info & + rte_cpu_to_le_32(1u << HNS3_RXD_VLD_B)))) + return 0; + + rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 0].mbuf); + rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 1].mbuf); + rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf); + rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf); + + bd_err_mask = 0; + nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, &bd_err_mask); + if (unlikely(bd_err_mask)) + nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask); + + return nb_rx; +} + +static void +hns3_rxq_vec_setup_rearm_data(struct hns3_rx_queue *rxq) +{ + uintptr_t p; + struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ + + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; + mb_def.port = rxq->port_id; + rte_mbuf_refcnt_set(&mb_def, 1); + + /* prevent compiler reordering: rearm_data covers previous fields */ + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + rxq->mbuf_initializer = *(uint64_t *)p; +} + +void +hns3_rxq_vec_setup(struct hns3_rx_queue *rxq) +{ + struct hns3_entry *sw_ring = &rxq->sw_ring[rxq->nb_rx_desc]; + unsigned int i; + + memset(&rxq->rx_ring[rxq->nb_rx_desc], 0, + sizeof(struct hns3_desc) * HNS3_DEFAULT_RX_BURST); + + memset(&rxq->fake_mbuf, 0, sizeof(rxq->fake_mbuf)); + for (i = 0; i < HNS3_DEFAULT_RX_BURST; i++) + sw_ring[i].mbuf = &rxq->fake_mbuf; + + hns3_rxq_vec_setup_rearm_data(rxq); + + memset(rxq->offset_table, 0, sizeof(rxq->offset_table)); +} + +#ifndef RTE_LIBRTE_IEEE1588 +static int +hns3_rxq_vec_check(struct hns3_rx_queue *rxq, void *arg) +{ + uint32_t min_vec_bds = HNS3_DEFAULT_RXQ_REARM_THRESH + + HNS3_DEFAULT_RX_BURST; + + if (rxq->nb_rx_desc < min_vec_bds) + return -ENOTSUP; + + if (rxq->nb_rx_desc % HNS3_DEFAULT_RXQ_REARM_THRESH) + return -ENOTSUP; + + RTE_SET_USED(arg); + return 0; +} +#endif + +int +hns3_rx_check_vec_support(struct rte_eth_dev *dev) +{ +#ifndef RTE_LIBRTE_IEEE1588 + struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; + struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; + uint64_t offloads_mask = DEV_RX_OFFLOAD_TCP_LRO | + DEV_RX_OFFLOAD_VLAN; + + if (dev->data->scattered_rx) + return -ENOTSUP; + + if (fconf->mode != RTE_FDIR_MODE_NONE) + return -ENOTSUP; + + if (rxmode->offloads & offloads_mask) + return -ENOTSUP; + + if (hns3_rxq_iterate(dev, hns3_rxq_vec_check, NULL) != 0) + return -ENOTSUP; + + return 0; +#else + RTE_SET_USED(dev); + return -ENOTSUP; +#endif +} diff --git a/drivers/net/hns3/hns3_rxtx_vec.h b/drivers/net/hns3/hns3_rxtx_vec.h index 90679bf..c6df36d 100644 --- a/drivers/net/hns3/hns3_rxtx_vec.h +++ b/drivers/net/hns3/hns3_rxtx_vec.h @@ -54,4 +54,24 @@ hns3_tx_free_buffers(struct hns3_tx_queue *txq) if (txq->next_to_clean >= txq->nb_tx_desc) txq->next_to_clean = 0; } + +static inline uint16_t +hns3_rx_reassemble_pkts(struct rte_mbuf **rx_pkts, + uint16_t nb_pkts, + uint64_t pkt_err_mask) +{ + uint16_t count, i; + uint64_t mask; + + count = 0; + for (i = 0; i < nb_pkts; i++) { + mask = ((uint64_t)1u) << i; + if (pkt_err_mask & mask) + rte_pktmbuf_free_seg(rx_pkts[i]); + else + rx_pkts[count++] = rx_pkts[i]; + } + + return count; +} #endif /* _HNS3_RXTX_VEC_H_ */ diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h index 2bd2b35..700ee8d 100644 --- a/drivers/net/hns3/hns3_rxtx_vec_neon.h +++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h @@ -78,4 +78,207 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue, return nb_tx; } + +static inline uint32_t +hns3_desc_parse_field(struct hns3_rx_queue *rxq, + struct hns3_entry *sw_ring, + struct hns3_desc *rxdp, + uint32_t bd_vld_num) +{ + uint32_t l234_info, ol_info, bd_base_info; + struct rte_mbuf *pkt; + uint32_t retcode = 0; + uint32_t cksum_err; + int ret, i; + + for (i = 0; i < (int)bd_vld_num; i++) { + pkt = sw_ring[i].mbuf; + + /* init rte_mbuf.rearm_data last 64-bit */ + pkt->ol_flags = PKT_RX_RSS_HASH; + + l234_info = rxdp[i].rx.l234_info; + ol_info = rxdp[i].rx.ol_info; + bd_base_info = rxdp[i].rx.bd_base_info; + ret = hns3_handle_bdinfo(rxq, pkt, bd_base_info, + l234_info, &cksum_err); + if (unlikely(ret)) { + retcode |= 1u << i; + continue; + } + + pkt->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info); + if (likely(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) + hns3_rx_set_cksum_flag(pkt, pkt->packet_type, + cksum_err); + } + + return retcode; +} + +static inline uint16_t +hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq, + struct rte_mbuf **__restrict rx_pkts, + uint16_t nb_pkts, + uint64_t *bd_err_mask) +{ + uint16_t rx_id = rxq->next_to_use; + struct hns3_entry *sw_ring = &rxq->sw_ring[rx_id]; + struct hns3_desc *rxdp = &rxq->rx_ring[rx_id]; + uint32_t bd_valid_num, parse_retcode; + uint16_t nb_rx = 0; + int pos, offset; + + /* mask to shuffle from desc to mbuf's rx_descriptor_fields1 */ + uint8x16_t shuf_rx_desc_fields_msk = { + 0xff, 0xff, 0xff, 0xff, /* packet type init zero */ + 22, 23, 0xff, 0xff, /* rx.pkt_len to rte_mbuf.pkt_len */ + 20, 21, /* size to rte_mbuf.data_len */ + 0xff, 0xff, /* rte_mbuf.vlan_tci init zero */ + 8, 9, 10, 11, /* rx.rss_hash to rte_mbuf.hash.rss */ + }; + + uint16x8_t crc_adjust = { + 0, 0, /* ignore pkt_type field */ + rxq->crc_len, /* sub crc on pkt_len */ + 0, /* ignore high-16bits of pkt_len */ + rxq->crc_len, /* sub crc on data_len */ + 0, 0, 0, /* ignore non-length fields */ + }; + + for (pos = 0; pos < nb_pkts; pos += HNS3_DEFAULT_DESCS_PER_LOOP, + rxdp += HNS3_DEFAULT_DESCS_PER_LOOP) { + uint64x2x2_t descs[HNS3_DEFAULT_DESCS_PER_LOOP]; + uint8x16x2_t pkt_mbuf1, pkt_mbuf2, pkt_mbuf3, pkt_mbuf4; + uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; + uint64x2_t mbp1, mbp2; + uint16x4_t bd_vld = {0}; + uint16x8_t tmp; + uint64_t stat; + + /* calc how many bd valid */ + bd_vld = vset_lane_u16(rxdp[0].rx.bdtype_vld_udp0, bd_vld, 0); + bd_vld = vset_lane_u16(rxdp[1].rx.bdtype_vld_udp0, bd_vld, 1); + bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2); + bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3); + + /* load 2 mbuf pointer */ + mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); + + bd_vld = vshl_n_u16(bd_vld, + HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B); + bd_vld = vreinterpret_u16_s16( + vshr_n_s16(vreinterpret_s16_u16(bd_vld), + HNS3_UINT16_BIT - 1)); + stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0); + + /* load 2 mbuf pointer again */ + mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); + + if (likely(stat == 0)) + bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP; + else + bd_valid_num = __builtin_ctzl(stat) / HNS3_UINT16_BIT; + if (bd_valid_num == 0) + break; + + /* use offset to control below data load oper ordering */ + offset = rxq->offset_table[bd_valid_num]; + + /* store 2 mbuf pointer into rx_pkts */ + vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); + + /* read first two descs */ + descs[0] = vld2q_u64((uint64_t *)(rxdp + offset)); + descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1)); + + /* store 2 mbuf pointer into rx_pkts again */ + vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); + + /* read remains two descs */ + descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2)); + descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3)); + + pkt_mbuf1.val[0] = vreinterpretq_u8_u64(descs[0].val[0]); + pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]); + pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]); + pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]); + + /* pkt 1,2 convert format from desc to pktmbuf */ + pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_rx_desc_fields_msk); + pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_rx_desc_fields_msk); + + /* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */ + *(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data = + rxq->mbuf_initializer; + *(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data = + rxq->mbuf_initializer; + + /* pkt 1,2 remove crc */ + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust); + pkt_mb1 = vreinterpretq_u8_u16(tmp); + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust); + pkt_mb2 = vreinterpretq_u8_u16(tmp); + + pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]); + pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]); + pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]); + pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]); + + /* pkt 3,4 convert format from desc to pktmbuf */ + pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_rx_desc_fields_msk); + pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_rx_desc_fields_msk); + + /* pkt 1,2 save to rx_pkts mbuf */ + vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1, + pkt_mb1); + vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1, + pkt_mb2); + + /* pkt 3,4 remove crc */ + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); + pkt_mb3 = vreinterpretq_u8_u16(tmp); + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); + pkt_mb4 = vreinterpretq_u8_u16(tmp); + + /* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */ + *(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data = + rxq->mbuf_initializer; + *(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data = + rxq->mbuf_initializer; + + /* pkt 3,4 save to rx_pkts mbuf */ + vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1, + pkt_mb3); + vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1, + pkt_mb4); + + rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP); + + parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos], + &rxdp[offset], bd_valid_num); + if (unlikely(parse_retcode)) + (*bd_err_mask) |= ((uint64_t)parse_retcode) << pos; + + rte_prefetch0(sw_ring[pos + + HNS3_DEFAULT_DESCS_PER_LOOP + 0].mbuf); + rte_prefetch0(sw_ring[pos + + HNS3_DEFAULT_DESCS_PER_LOOP + 1].mbuf); + rte_prefetch0(sw_ring[pos + + HNS3_DEFAULT_DESCS_PER_LOOP + 2].mbuf); + rte_prefetch0(sw_ring[pos + + HNS3_DEFAULT_DESCS_PER_LOOP + 3].mbuf); + + nb_rx += bd_valid_num; + if (bd_valid_num < HNS3_DEFAULT_DESCS_PER_LOOP) + break; + } + + rxq->rx_rearm_nb += nb_rx; + rxq->next_to_use += nb_rx; + if (rxq->next_to_use >= rxq->nb_rx_desc) + rxq->next_to_use = 0; + + return nb_rx; +} #endif /* _HNS3_RXTX_VEC_NEON_H_ */ From patchwork Mon Sep 7 09:08:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wei Hu (Xavier)" X-Patchwork-Id: 76659 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 27DFEA04B9; Mon, 7 Sep 2020 11:10:14 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 7A2761C11A; Mon, 7 Sep 2020 11:09:24 +0200 (CEST) Received: from mail.chinasoftinc.com (unknown [114.113.233.8]) by dpdk.org (Postfix) with ESMTP id 04F341C122 for ; Mon, 7 Sep 2020 11:09:21 +0200 (CEST) Received: from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020 17:09:18 +0800 From: "Wei Hu (Xavier)" To: CC: Date: Mon, 7 Sep 2020 17:08:24 +0800 Message-ID: <20200907090825.1761-8-huwei013@chinasoftinc.com> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20200907090825.1761-1-huwei013@chinasoftinc.com> References: <20200907090825.1761-1-huwei013@chinasoftinc.com> MIME-Version: 1.0 X-Originating-IP: [65.49.108.226] Subject: [dpdk-dev] [PATCH 7/8] net/hns3: add restriction on setting VF MTU X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Wei Hu (Xavier)" when Rx of scattered packets is off, we have some possibility of using vector Rx process function or simple Rx functions in hns3 PMD driver. If the input MTU is increased and the maximum length of received packets is greater than the length of a buffer for Rx packets, the hardware network engine needs to use multiple BDs and buffers to store these packets. This will cause problems when still using vector Rx process function or simple Rx function to receiving packets. So, when Rx of scattered packets is off and device is started, it is not permitted to increase MTU so that the maximum length of Rx packets is greater than Rx buffer length. Signed-off-by: Chengwen Feng Signed-off-by: Wei Hu (Xavier) --- drivers/net/hns3/hns3_ethdev_vf.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index 93f2c93..44e51b5 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -871,6 +871,25 @@ hns3vf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return -EIO; } + /* + * when Rx of scattered packets is off, we have some possibility of + * using vector Rx process function or simple Rx functions in hns3 PMD + * driver. If the input MTU is increased and the maximum length of + * received packets is greater than the length of a buffer for Rx + * packet, the hardware network engine needs to use multiple BDs and + * buffers to store these packets. This will cause problems when still + * using vector Rx process function or simple Rx function to receiving + * packets. So, when Rx of scattered packets is off and device is + * started, it is not permitted to increase MTU so that the maximum + * length of Rx packets is greater than Rx buffer length. + */ + if (dev->data->dev_started && !dev->data->scattered_rx && + frame_size > hw->rx_buf_len) { + hns3_err(hw, "failed to set mtu because current is " + "not scattered rx mode"); + return -EOPNOTSUPP; + } + rte_spinlock_lock(&hw->lock); ret = hns3vf_config_mtu(hw, mtu); if (ret) { From patchwork Mon Sep 7 09:08:25 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wei Hu (Xavier)" X-Patchwork-Id: 76660 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6679EA04B9; Mon, 7 Sep 2020 11:10:22 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 7E1C91C135; Mon, 7 Sep 2020 11:09:25 +0200 (CEST) Received: from mail.chinasoftinc.com (unknown [114.113.233.8]) by dpdk.org (Postfix) with ESMTP id E59131C12D for ; Mon, 7 Sep 2020 11:09:23 +0200 (CEST) Received: from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020 17:09:20 +0800 From: "Wei Hu (Xavier)" To: CC: Date: Mon, 7 Sep 2020 17:08:25 +0800 Message-ID: <20200907090825.1761-9-huwei013@chinasoftinc.com> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20200907090825.1761-1-huwei013@chinasoftinc.com> References: <20200907090825.1761-1-huwei013@chinasoftinc.com> MIME-Version: 1.0 X-Originating-IP: [65.49.108.226] Subject: [dpdk-dev] [PATCH 8/8] net/hns3: fix segfault when Tx multiple buffer packets X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Chengchang Tang Currently, there is a possibility that segment faults occur when sending packets whose payloads are stored in multiple buffers based on hns3 network engine. The related core dump information as follows: Program terminated with signal 11, Segmentation fault. 0 hns3_reassemble_tx_pkts 2512 temp = temp->next; Missing separate debuginfos, use: (gdb) bt 0 hns3_reassemble_tx_pkts 1 0x0000000000969c60 in hns3_check_non_tso_pkt 2 0x000000000096adbc in hns3_xmit_pkts 3 0x000000000050d4d0 in rte_eth_tx_burst 4 0x000000000050fca4 in pkt_burst_transmit 5 0x00000000004ca6b8 in run_pkt_fwd_on_lcore 6 0x00000000004ca7fc in start_pkt_forward_on_core 7 0x00000000006975a4 in eal_thread_loop 8 0x0000ffffa6f7fc48 in start_thread 9 0x0000ffffa6ed1600 in thread_start The root cause is that hns3 PMD driver invokes the rte_pktmbuf_free_seg API function to release the same rte_mbuf multiple times. The rte_mbuf pointer is not set to NULL in the internal function hns3_rx_queue_release_mbufs which is invoked during queue setup, stop and close. As a result the rte_mbuf in Rx queues will be repeatedly released when the user application setup queues or stop/start the dev for multiple times. Probably for performance reasons, DPDK mempool lib does not check for the repeated rte_mbuf releases. The Address of released rte_mbuf are directly stored into the per lcore cache of the mempool. This makes the rte_mbufs obtained from mempool by calling rte_mempool_get_bulk API function repetitively. ultimately, it causes to access to a NULL pointer in PMD driver. This patch fixes this problem by setting released mbuf pointer to NULL in the internal function named hns3_rx_queue_release_mbuf. And the other internal function named hns3_reassemble_tx_pkts is optimized to avoid a similar problem. Fixes: bba636698316 ("net/hns3: support Rx/Tx and related operations") Cc: stable@dpdk.org Signed-off-by: Chengchang Tang Signed-off-by: Wei Hu (Xavier) Signed-off-by: Chengwen Feng --- drivers/net/hns3/hns3_rxtx.c | 61 +++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 38 deletions(-) diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index 03d69b1..1a1f828 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -43,15 +43,19 @@ hns3_rx_queue_release_mbufs(struct hns3_rx_queue *rxq) if (rxq->rx_rearm_nb == 0) { for (i = 0; i < rxq->nb_rx_desc; i++) { - if (rxq->sw_ring[i].mbuf != NULL) + if (rxq->sw_ring[i].mbuf != NULL) { rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + rxq->sw_ring[i].mbuf = NULL; + } } } else { for (i = rxq->next_to_use; i != rxq->rx_rearm_start; i = (i + 1) % rxq->nb_rx_desc) { - if (rxq->sw_ring[i].mbuf != NULL) + if (rxq->sw_ring[i].mbuf != NULL) { rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + rxq->sw_ring[i].mbuf = NULL; + } } } @@ -2368,37 +2372,24 @@ hns3_fill_first_desc(struct hns3_tx_queue *txq, struct hns3_desc *desc, } } -static int -hns3_tx_alloc_mbufs(struct hns3_tx_queue *txq, struct rte_mempool *mb_pool, - uint16_t nb_new_buf, struct rte_mbuf **alloc_mbuf) +static inline int +hns3_tx_alloc_mbufs(struct rte_mempool *mb_pool, uint16_t nb_new_buf, + struct rte_mbuf **alloc_mbuf) { - struct rte_mbuf *new_mbuf = NULL; - struct rte_eth_dev *dev; - struct rte_mbuf *temp; - struct hns3_hw *hw; +#define MAX_NON_TSO_BD_PER_PKT 18 + struct rte_mbuf *pkt_segs[MAX_NON_TSO_BD_PER_PKT]; uint16_t i; /* Allocate enough mbufs */ - for (i = 0; i < nb_new_buf; i++) { - temp = rte_pktmbuf_alloc(mb_pool); - if (unlikely(temp == NULL)) { - dev = &rte_eth_devices[txq->port_id]; - hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); - hns3_err(hw, "Failed to alloc TX mbuf port_id=%d," - "queue_id=%d in reassemble tx pkts.", - txq->port_id, txq->queue_id); - rte_pktmbuf_free(new_mbuf); - return -ENOMEM; - } - temp->next = new_mbuf; - new_mbuf = temp; - } - - if (new_mbuf == NULL) + if (rte_mempool_get_bulk(mb_pool, (void **)pkt_segs, nb_new_buf)) return -ENOMEM; - new_mbuf->nb_segs = nb_new_buf; - *alloc_mbuf = new_mbuf; + for (i = 0; i < nb_new_buf - 1; i++) + pkt_segs[i]->next = pkt_segs[i + 1]; + + pkt_segs[nb_new_buf - 1]->next = NULL; + pkt_segs[0]->nb_segs = nb_new_buf; + *alloc_mbuf = pkt_segs[0]; return 0; } @@ -2418,10 +2409,8 @@ hns3_pktmbuf_copy_hdr(struct rte_mbuf *new_pkt, struct rte_mbuf *old_pkt) } static int -hns3_reassemble_tx_pkts(void *tx_queue, struct rte_mbuf *tx_pkt, - struct rte_mbuf **new_pkt) +hns3_reassemble_tx_pkts(struct rte_mbuf *tx_pkt, struct rte_mbuf **new_pkt) { - struct hns3_tx_queue *txq = tx_queue; struct rte_mempool *mb_pool; struct rte_mbuf *new_mbuf; struct rte_mbuf *temp_new; @@ -2433,7 +2422,6 @@ hns3_reassemble_tx_pkts(void *tx_queue, struct rte_mbuf *tx_pkt, uint16_t len_s; uint16_t len_d; uint16_t len; - uint16_t i; int ret; char *s; char *d; @@ -2449,7 +2437,7 @@ hns3_reassemble_tx_pkts(void *tx_queue, struct rte_mbuf *tx_pkt, last_buf_len = buf_size; /* Allocate enough mbufs */ - ret = hns3_tx_alloc_mbufs(txq, mb_pool, nb_new_buf, &new_mbuf); + ret = hns3_tx_alloc_mbufs(mb_pool, nb_new_buf, &new_mbuf); if (ret) return ret; @@ -2458,12 +2446,9 @@ hns3_reassemble_tx_pkts(void *tx_queue, struct rte_mbuf *tx_pkt, s = rte_pktmbuf_mtod(temp, char *); len_s = rte_pktmbuf_data_len(temp); temp_new = new_mbuf; - for (i = 0; i < nb_new_buf; i++) { + while (temp != NULL && temp_new != NULL) { d = rte_pktmbuf_mtod(temp_new, char *); - if (i < nb_new_buf - 1) - buf_len = buf_size; - else - buf_len = last_buf_len; + buf_len = temp_new->next == NULL ? last_buf_len : buf_size; len_d = buf_len; while (len_d) { @@ -2924,7 +2909,7 @@ hns3_check_non_tso_pkt(uint16_t nb_buf, struct rte_mbuf **m_seg, if (unlikely(nb_buf > HNS3_MAX_NON_TSO_BD_PER_PKT)) { txq->exceed_limit_bd_pkt_cnt++; - ret = hns3_reassemble_tx_pkts(txq, tx_pkt, &new_pkt); + ret = hns3_reassemble_tx_pkts(tx_pkt, &new_pkt); if (ret) { txq->exceed_limit_bd_reassem_fail++; return ret;