[03/19] net/txgbe: fix Tx hang on queue disable

Message ID 20240618071150.21564-4-jiawenwu@trustnetic.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers
Series Wangxun fixes and supports |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Jiawen Wu June 18, 2024, 7:11 a.m. UTC
The problem of Tx hang also occurs on Wangxun 10Gb NICs, when stop device
under heavy traffic.

refer to commit ac6c5e9af56a ("net/ngbe: fix Tx hang on queue disable")

Disable PCIe bus master to clear BME when stop hardware, and verify there
are no pending requests. Move disabling Tx queue after disabling PCIe bus
master to ensure that there are no packets left to cause Tx hang.

Fixes: b1f596677d8e ("net/txgbe: support device start")
Cc: stable@dpdk.org

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/txgbe/base/meson.build   |  2 +-
 drivers/net/txgbe/base/txgbe_hw.c    | 60 +++++++++++++++++++++++-----
 drivers/net/txgbe/base/txgbe_hw.h    |  1 +
 drivers/net/txgbe/base/txgbe_osdep.h |  1 +
 drivers/net/txgbe/base/txgbe_regs.h  |  3 ++
 drivers/net/txgbe/base/txgbe_type.h  |  1 +
 drivers/net/txgbe/txgbe_ethdev.c     |  7 ++++
 7 files changed, 65 insertions(+), 10 deletions(-)
  

Patch

diff --git a/drivers/net/txgbe/base/meson.build b/drivers/net/txgbe/base/meson.build
index a81d6890fe..4cf90a394a 100644
--- a/drivers/net/txgbe/base/meson.build
+++ b/drivers/net/txgbe/base/meson.build
@@ -22,6 +22,6 @@  foreach flag: error_cflags
 endforeach
 
 base_lib = static_library('txgbe_base', sources,
-    dependencies: [static_rte_eal, static_rte_net],
+    dependencies: [static_rte_eal, static_rte_net, static_rte_bus_pci],
     c_args: c_args)
 base_objs = base_lib.extract_all_objects(recursive: true)
diff --git a/drivers/net/txgbe/base/txgbe_hw.c b/drivers/net/txgbe/base/txgbe_hw.c
index d19fd0065d..7094551fee 100644
--- a/drivers/net/txgbe/base/txgbe_hw.c
+++ b/drivers/net/txgbe/base/txgbe_hw.c
@@ -462,7 +462,7 @@  void txgbe_set_lan_id_multi_port(struct txgbe_hw *hw)
  **/
 s32 txgbe_stop_hw(struct txgbe_hw *hw)
 {
-	u32 reg_val;
+	s32 status = 0;
 	u16 i;
 
 	/*
@@ -484,16 +484,26 @@  s32 txgbe_stop_hw(struct txgbe_hw *hw)
 	wr32(hw, TXGBE_ICR(0), TXGBE_ICR_MASK);
 	wr32(hw, TXGBE_ICR(1), TXGBE_ICR_MASK);
 
-	/* Disable the transmit unit.  Each queue must be disabled. */
-	for (i = 0; i < hw->mac.max_tx_queues; i++)
-		wr32(hw, TXGBE_TXCFG(i), TXGBE_TXCFG_FLUSH);
+	wr32(hw, TXGBE_BMECTL, 0x3);
 
 	/* Disable the receive unit by stopping each queue */
-	for (i = 0; i < hw->mac.max_rx_queues; i++) {
-		reg_val = rd32(hw, TXGBE_RXCFG(i));
-		reg_val &= ~TXGBE_RXCFG_ENA;
-		wr32(hw, TXGBE_RXCFG(i), reg_val);
-	}
+	for (i = 0; i < hw->mac.max_rx_queues; i++)
+		wr32(hw, TXGBE_RXCFG(i), 0);
+
+	/* flush all queues disables */
+	txgbe_flush(hw);
+	msec_delay(2);
+
+	/* Prevent the PCI-E bus from hanging by disabling PCI-E master
+	 * access and verify no pending requests
+	 */
+	status = txgbe_set_pcie_master(hw, false);
+	if (status)
+		return status;
+
+	/* Disable the transmit unit.  Each queue must be disabled. */
+	for (i = 0; i < hw->mac.max_tx_queues; i++)
+		wr32(hw, TXGBE_TXCFG(i), 0);
 
 	/* flush all queues disables */
 	txgbe_flush(hw);
@@ -1174,6 +1184,38 @@  void txgbe_fc_autoneg(struct txgbe_hw *hw)
 	}
 }
 
+s32 txgbe_set_pcie_master(struct txgbe_hw *hw, bool enable)
+{
+	struct rte_pci_device *pci_dev = (struct rte_pci_device *)hw->back;
+	s32 status = 0;
+	u32 i;
+
+	if (rte_pci_set_bus_master(pci_dev, enable) < 0) {
+		DEBUGOUT("Cannot configure PCI bus master.");
+		return -1;
+	}
+
+	if (enable)
+		goto out;
+
+	/* Exit if master requests are blocked */
+	if (!(rd32(hw, TXGBE_BMEPEND)))
+		goto out;
+
+	/* Poll for master request bit to clear */
+	for (i = 0; i < TXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
+		usec_delay(100);
+		if (!(rd32(hw, TXGBE_BMEPEND)))
+			goto out;
+	}
+
+	DEBUGOUT("PCIe transaction pending bit also did not clear.");
+	status = TXGBE_ERR_MASTER_REQUESTS_PENDING;
+
+out:
+	return status;
+}
+
 /**
  *  txgbe_acquire_swfw_sync - Acquire SWFW semaphore
  *  @hw: pointer to hardware structure
diff --git a/drivers/net/txgbe/base/txgbe_hw.h b/drivers/net/txgbe/base/txgbe_hw.h
index 7031589f7c..4bf9da2d4c 100644
--- a/drivers/net/txgbe/base/txgbe_hw.h
+++ b/drivers/net/txgbe/base/txgbe_hw.h
@@ -40,6 +40,7 @@  s32 txgbe_setup_fc(struct txgbe_hw *hw);
 s32 txgbe_validate_mac_addr(u8 *mac_addr);
 s32 txgbe_acquire_swfw_sync(struct txgbe_hw *hw, u32 mask);
 void txgbe_release_swfw_sync(struct txgbe_hw *hw, u32 mask);
+s32 txgbe_set_pcie_master(struct txgbe_hw *hw, bool enable);
 
 s32 txgbe_get_san_mac_addr(struct txgbe_hw *hw, u8 *san_mac_addr);
 s32 txgbe_set_san_mac_addr(struct txgbe_hw *hw, u8 *san_mac_addr);
diff --git a/drivers/net/txgbe/base/txgbe_osdep.h b/drivers/net/txgbe/base/txgbe_osdep.h
index 4fce355000..62d16a6abb 100644
--- a/drivers/net/txgbe/base/txgbe_osdep.h
+++ b/drivers/net/txgbe/base/txgbe_osdep.h
@@ -19,6 +19,7 @@ 
 #include <rte_config.h>
 #include <rte_io.h>
 #include <rte_ether.h>
+#include <bus_pci_driver.h>
 
 #include "../txgbe_logs.h"
 
diff --git a/drivers/net/txgbe/base/txgbe_regs.h b/drivers/net/txgbe/base/txgbe_regs.h
index 79290a7afe..86896d11dc 100644
--- a/drivers/net/txgbe/base/txgbe_regs.h
+++ b/drivers/net/txgbe/base/txgbe_regs.h
@@ -1236,6 +1236,9 @@  enum txgbe_5tuple_protocol {
 #define TXGBE_TCPTMR                    0x000170
 #define TXGBE_ITRSEL                    0x000180
 
+#define TXGBE_BMECTL                    0x012020
+#define TXGBE_BMEPEND                   0x000168
+
 /* P2V Mailbox */
 #define TXGBE_MBMEM(i)           (0x005000 + 0x40 * (i)) /* 0-63 */
 #define TXGBE_MBCTL(i)           (0x000600 + 4 * (i)) /* 0-63 */
diff --git a/drivers/net/txgbe/base/txgbe_type.h b/drivers/net/txgbe/base/txgbe_type.h
index 75e839b7de..f52736cae9 100644
--- a/drivers/net/txgbe/base/txgbe_type.h
+++ b/drivers/net/txgbe/base/txgbe_type.h
@@ -29,6 +29,7 @@ 
 #define TXGBE_FDIRCMD_CMD_POLL			10
 #define TXGBE_VF_INIT_TIMEOUT	200 /* Number of retries to clear RSTI */
 #define TXGBE_SPI_TIMEOUT	10000
+#define TXGBE_PCI_MASTER_DISABLE_TIMEOUT	800
 
 #define TXGBE_ALIGN		128 /* as intel did */
 
diff --git a/drivers/net/txgbe/txgbe_ethdev.c b/drivers/net/txgbe/txgbe_ethdev.c
index fa68a5d2ca..121dccb5eb 100644
--- a/drivers/net/txgbe/txgbe_ethdev.c
+++ b/drivers/net/txgbe/txgbe_ethdev.c
@@ -601,6 +601,7 @@  eth_txgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
 	hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
 
 	/* Vendor and Device ID need to be set before init of shared code */
+	hw->back = pci_dev;
 	hw->device_id = pci_dev->id.device_id;
 	hw->vendor_id = pci_dev->id.vendor_id;
 	if (pci_dev->id.subsystem_vendor_id == PCI_VENDOR_ID_WANGXUN) {
@@ -1717,6 +1718,8 @@  txgbe_dev_start(struct rte_eth_dev *dev)
 	hw->mac.get_link_status = true;
 	hw->dev_start = true;
 
+	txgbe_set_pcie_master(hw, true);
+
 	/* workaround for GPIO intr lost when mng_veto bit is set */
 	if (txgbe_check_reset_blocked(hw))
 		txgbe_reinit_gpio_intr(hw);
@@ -1980,6 +1983,8 @@  txgbe_dev_stop(struct rte_eth_dev *dev)
 	adapter->rss_reta_updated = 0;
 	wr32m(hw, TXGBE_LEDCTL, 0xFFFFFFFF, TXGBE_LEDCTL_SEL_MASK);
 
+	txgbe_set_pcie_master(hw, true);
+
 	hw->adapter_stopped = true;
 	dev->data->dev_started = 0;
 	hw->dev_start = false;
@@ -2062,6 +2067,8 @@  txgbe_dev_close(struct rte_eth_dev *dev)
 
 	txgbe_dev_free_queues(dev);
 
+	txgbe_set_pcie_master(hw, false);
+
 	/* reprogram the RAR[0] in case user changed it. */
 	txgbe_set_rar(hw, 0, hw->mac.addr, 0, true);