[v2,03/13] net/bnxt: handle reset notify async event from FW
diff mbox series

Message ID 20190830163537.32704-4-ajit.khaparde@broadcom.com
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers show
Series
  • bnxt patchset to support device error recovery
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Ajit Khaparde Aug. 30, 2019, 4:35 p.m. UTC
From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When the FW upgrade is initiated the current instance
of FW issues a HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY
async notification to the driver. On receiving this notification,
the PMD shall quiesce itself and poll on the HWRM_VER_GET FW
command at regular intervals.

Once the VER_GET command succeeds, the driver should go through
the rediscovery process and re-initialize the device.

Also register with FW for the reset notify async event.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  13 ++++
 drivers/net/bnxt/bnxt_cpr.c    |  16 +++++
 drivers/net/bnxt/bnxt_cpr.h    |   1 +
 drivers/net/bnxt/bnxt_ethdev.c | 109 ++++++++++++++++++++++++++++-----
 drivers/net/bnxt/bnxt_hwrm.c   |  39 +++++++++---
 drivers/net/bnxt/bnxt_hwrm.h   |   2 +
 6 files changed, 157 insertions(+), 23 deletions(-)

Patch
diff mbox series

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 37b4c717d..8797b032e 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -333,6 +333,16 @@  struct bnxt_ctx_mem_info {
 	struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TC_Q];
 };
 
+/* Maximum Firmware Reset bail out value in milliseconds */
+#define BNXT_MAX_FW_RESET_TIMEOUT	6000
+/* Minimum time required for the firmware readiness in milliseconds */
+#define BNXT_MIN_FW_READY_TIMEOUT	2000
+/* Frequency for the firmware readiness check in milliseconds */
+#define BNXT_FW_READY_WAIT_INTERVAL	100
+
+#define US_PER_MS			1000
+#define NS_PER_US			1000
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -463,6 +473,9 @@  struct bnxt {
 	struct bnxt_ptp_cfg     *ptp_cfg;
 	uint16_t		vf_resv_strategy;
 	struct bnxt_ctx_mem_info        *ctx;
+
+	uint16_t		fw_reset_min_msecs;
+	uint16_t		fw_reset_max_msecs;
 };
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 655bcf1a8..62a16d2ed 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -4,6 +4,7 @@ 
  */
 
 #include <rte_malloc.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -40,6 +41,21 @@  void bnxt_handle_async_event(struct bnxt *bp,
 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED:
 		PMD_DRV_LOG(INFO, "Port conn async event\n");
 		break;
+	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
+		/* timestamp_lo/hi values are in units of 100ms */
+		bp->fw_reset_max_msecs = async_cmp->timestamp_hi ?
+			rte_le_to_cpu_16(async_cmp->timestamp_hi) * 100 :
+			BNXT_MAX_FW_RESET_TIMEOUT;
+		bp->fw_reset_min_msecs = async_cmp->timestamp_lo ?
+			async_cmp->timestamp_lo * 100 :
+			BNXT_MIN_FW_READY_TIMEOUT;
+		PMD_DRV_LOG(INFO,
+			    "Firmware non-fatal reset event received\n");
+
+		bp->flags |= BNXT_FLAG_FW_RESET;
+		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
+				  (void *)bp);
+		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
 		break;
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index 8c6a34b61..f48293b96 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -106,5 +106,6 @@  struct bnxt;
 void bnxt_handle_async_event(struct bnxt *bp, struct cmpl_base *cmp);
 void bnxt_handle_fwd_req(struct bnxt *bp, struct cmpl_base *cmp);
 int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp);
+void bnxt_dev_reset_and_resume(void *arg);
 
 #endif
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 33ff4a5a7..e545802ce 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -11,6 +11,7 @@ 
 #include <rte_ethdev_pci.h>
 #include <rte_malloc.h>
 #include <rte_cycles.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -166,6 +167,8 @@  static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
 static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
+static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -201,19 +204,25 @@  static uint16_t  bnxt_rss_hash_tbl_size(const struct bnxt *bp)
 	return bnxt_rss_ctxts(bp) * BNXT_RSS_ENTRIES_PER_CTX_THOR;
 }
 
-static void bnxt_free_mem(struct bnxt *bp)
+static void bnxt_free_mem(struct bnxt *bp, bool reconfig)
 {
 	bnxt_free_filter_mem(bp);
 	bnxt_free_vnic_attributes(bp);
 	bnxt_free_vnic_mem(bp);
 
-	bnxt_free_stats(bp);
-	bnxt_free_tx_rings(bp);
-	bnxt_free_rx_rings(bp);
+	/* tx/rx rings are configured as part of *_queue_setup callbacks.
+	 * If the number of rings change across fw update,
+	 * we don't have much choice except to warn the user.
+	 */
+	if (!reconfig) {
+		bnxt_free_stats(bp);
+		bnxt_free_tx_rings(bp);
+		bnxt_free_rx_rings(bp);
+	}
 	bnxt_free_async_cp_ring(bp);
 }
 
-static int bnxt_alloc_mem(struct bnxt *bp)
+static int bnxt_alloc_mem(struct bnxt *bp, bool reconfig)
 {
 	int rc;
 
@@ -244,7 +253,7 @@  static int bnxt_alloc_mem(struct bnxt *bp)
 	return 0;
 
 alloc_mem_err:
-	bnxt_free_mem(bp);
+	bnxt_free_mem(bp, reconfig);
 	return rc;
 }
 
@@ -3483,6 +3492,71 @@  static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+static void bnxt_dev_cleanup(struct bnxt *bp)
+{
+	bnxt_set_hwrm_link_config(bp, false);
+	bp->link_info.link_up = 0;
+	if (bp->dev_stopped == 0)
+		bnxt_dev_stop_op(bp->eth_dev);
+
+	bnxt_uninit_resources(bp, true);
+}
+
+static void bnxt_dev_recover(void *arg)
+{
+	struct bnxt *bp = arg;
+	int timeout = bp->fw_reset_max_msecs;
+	int rc = 0;
+
+	do {
+		rc = bnxt_hwrm_ver_get(bp);
+		if (rc == 0)
+			break;
+		rte_delay_ms(BNXT_FW_READY_WAIT_INTERVAL);
+		timeout -= BNXT_FW_READY_WAIT_INTERVAL;
+	} while (rc && timeout);
+
+	if (rc) {
+		PMD_DRV_LOG(ERR, "FW is not Ready after reset\n");
+		goto err;
+	}
+
+	rc = bnxt_init_resources(bp, true);
+	if (rc) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to initialize resources after reset\n");
+		goto err;
+	}
+	/* clear reset flag as the device is initialized now */
+	bp->flags &= ~BNXT_FLAG_FW_RESET;
+
+	rc = bnxt_dev_start_op(bp->eth_dev);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Failed to start port after reset\n");
+		goto err;
+	}
+
+	PMD_DRV_LOG(INFO, "Recovered from FW reset\n");
+	return;
+err:
+	bp->flags |= BNXT_FLAG_FATAL_ERROR;
+	bnxt_uninit_resources(bp, false);
+	PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n");
+}
+
+void bnxt_dev_reset_and_resume(void *arg)
+{
+	struct bnxt *bp = arg;
+	int rc;
+
+	bnxt_dev_cleanup(bp);
+
+	rc = rte_eal_alarm_set(US_PER_MS * bp->fw_reset_min_msecs,
+			       bnxt_dev_recover, (void *)bp);
+	if (rc)
+		PMD_DRV_LOG(ERR, "Error setting recovery alarm");
+}
+
 static bool bnxt_vf_pciid(uint16_t id)
 {
 	if (id == BROADCOM_DEV_ID_57304_VF ||
@@ -3998,7 +4072,7 @@  static int bnxt_init_fw(struct bnxt *bp)
 	return 0;
 }
 
-static int bnxt_init_resources(struct bnxt *bp)
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev)
 {
 	int rc;
 
@@ -4006,9 +4080,11 @@  static int bnxt_init_resources(struct bnxt *bp)
 	if (rc)
 		return rc;
 
-	rc = bnxt_setup_mac_addr(bp->eth_dev);
-	if (rc)
-		return rc;
+	if (!reconfig_dev) {
+		rc = bnxt_setup_mac_addr(bp->eth_dev);
+		if (rc)
+			return rc;
+	}
 
 	bnxt_config_vf_req_fwd(bp);
 
@@ -4035,7 +4111,7 @@  static int bnxt_init_resources(struct bnxt *bp)
 		}
 	}
 
-	rc = bnxt_alloc_mem(bp);
+	rc = bnxt_alloc_mem(bp, reconfig_dev);
 	if (rc)
 		return rc;
 
@@ -4109,7 +4185,7 @@  bnxt_dev_init(struct rte_eth_dev *eth_dev)
 			    "Failed to allocate hwrm resource rc: %x\n", rc);
 		goto error_free;
 	}
-	rc = bnxt_init_resources(bp);
+	rc = bnxt_init_resources(bp, false);
 	if (rc)
 		goto error_free;
 
@@ -4130,18 +4206,19 @@  bnxt_dev_init(struct rte_eth_dev *eth_dev)
 }
 
 static int
-bnxt_uninit_resources(struct bnxt *bp)
+bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 {
 	int rc;
 
 	bnxt_disable_int(bp);
 	bnxt_free_int(bp);
-	bnxt_free_mem(bp);
+	bnxt_free_mem(bp, reconfig_dev);
 	bnxt_hwrm_func_buf_unrgtr(bp);
 	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
 	bp->flags &= ~BNXT_FLAG_REGISTERED;
 	bnxt_free_ctx_mem(bp);
-	bnxt_free_hwrm_resources(bp);
+	if (!reconfig_dev)
+		bnxt_free_hwrm_resources(bp);
 
 	return rc;
 }
@@ -4157,7 +4234,7 @@  bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 
 	PMD_DRV_LOG(DEBUG, "Calling Device uninit\n");
 
-	rc = bnxt_uninit_resources(bp);
+	rc = bnxt_uninit_resources(bp, false);
 
 	if (bp->grp_info != NULL) {
 		rte_free(bp->grp_info);
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 24a5a0914..b27dbe87e 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -26,7 +26,7 @@ 
 
 #include <rte_io.h>
 
-#define HWRM_CMD_TIMEOUT		6000000
+#define HWRM_SHORT_CMD_TIMEOUT		50000
 #define HWRM_SPEC_CODE_1_8_3		0x10803
 #define HWRM_VERSION_1_9_1		0x10901
 #define HWRM_VERSION_1_9_2		0x10903
@@ -97,6 +97,14 @@  static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 		GRCPF_REG_KONG_CHANNEL_OFFSET : GRCPF_REG_CHIMP_CHANNEL_OFFSET;
 	uint16_t mb_trigger_offset = use_kong_mb ?
 		GRCPF_REG_KONG_COMM_TRIGGER : GRCPF_REG_CHIMP_COMM_TRIGGER;
+	uint32_t timeout;
+
+	/* Do not send HWRM commands to firmware in error state */
+	if (bp->flags & BNXT_FLAG_FATAL_ERROR)
+		return 0;
+
+	/* For VER_GET command, set timeout as 50ms */
+	timeout = HWRM_SHORT_CMD_TIMEOUT;
 
 	if (bp->flags & BNXT_FLAG_SHORT_CMD ||
 	    msg_len > bp->max_req_len) {
@@ -139,7 +147,7 @@  static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 	rte_write32(1, bar);
 
 	/* Poll for the valid bit */
-	for (i = 0; i < HWRM_CMD_TIMEOUT; i++) {
+	for (i = 0; i < timeout; i++) {
 		/* Sanity check on the resp->resp_len */
 		rte_rmb();
 		if (resp->resp_len && resp->resp_len <= bp->max_resp_len) {
@@ -151,7 +159,12 @@  static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 		rte_delay_us(1);
 	}
 
-	if (i >= HWRM_CMD_TIMEOUT) {
+	if (i >= timeout) {
+		/* Suppress VER_GET timeout messages during reset recovery */
+		if (bp->flags & BNXT_FLAG_FW_RESET &&
+		    rte_cpu_to_le_16(req->req_type) == HWRM_VER_GET)
+			return -ETIMEDOUT;
+
 		PMD_DRV_LOG(ERR, "Error(timeout) sending msg 0x%04x\n",
 			    req->req_type);
 		return -ETIMEDOUT;
@@ -657,12 +670,15 @@  int bnxt_hwrm_func_reset(struct bnxt *bp)
 int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 {
 	int rc;
+	uint32_t flags = 0;
 	struct hwrm_func_drv_rgtr_input req = {.req_type = 0 };
 	struct hwrm_func_drv_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
 
 	if (bp->flags & BNXT_FLAG_REGISTERED)
 		return 0;
 
+	flags = HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT;
+
 	HWRM_PREP(req, FUNC_DRV_RGTR, BNXT_USE_CHIMP_MB);
 	req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER |
 			HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD);
@@ -683,14 +699,16 @@  int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 		 * this HWRM sniffer list in FW because DPDK PF driver does
 		 * not support this.
 		 */
-		req.flags =
-		rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE);
+		flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE;
 	}
 
+	req.flags = rte_cpu_to_le_32(flags);
+
 	req.async_event_fwd[0] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_LINK_STATUS_CHANGE |
 				 ASYNC_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED |
-				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE);
+				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE |
+				 ASYNC_CMPL_EVENT_ID_RESET_NOTIFY);
 	req.async_event_fwd[1] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD |
 				 ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE);
@@ -837,7 +855,10 @@  int bnxt_hwrm_ver_get(struct bnxt *bp)
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
-	HWRM_CHECK_RESULT();
+	if (bp->flags & BNXT_FLAG_FW_RESET)
+		HWRM_CHECK_RESULT_SILENT();
+	else
+		HWRM_CHECK_RESULT();
 
 	PMD_DRV_LOG(INFO, "%d.%d.%d:%d.%d.%d\n",
 		resp->hwrm_intf_maj_8b, resp->hwrm_intf_min_8b,
@@ -2685,6 +2706,10 @@  int bnxt_hwrm_func_qcfg(struct bnxt *bp, uint16_t *mtu)
 	if (BNXT_VF(bp) && (flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF)) {
 		bp->flags |= BNXT_FLAG_TRUSTED_VF_EN;
 		PMD_DRV_LOG(INFO, "Trusted VF cap enabled\n");
+	} else if (BNXT_VF(bp) &&
+		   !(flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF)) {
+		bp->flags &= ~BNXT_FLAG_TRUSTED_VF_EN;
+		PMD_DRV_LOG(INFO, "Trusted VF cap disabled\n");
 	}
 
 	if (mtu)
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index c882fc2a1..a03620532 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -21,6 +21,8 @@  struct bnxt_cp_ring_info;
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED)
 #define ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE	\
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE)
+#define ASYNC_CMPL_EVENT_ID_RESET_NOTIFY \
+	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY)
 #define ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD	\
 	(1 << (HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD - 32))
 #define ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE	\