@@ -924,6 +924,8 @@ port_infos_display(portid_t port_id)
}
if (dev_info.err_handle_mode == RTE_ETH_ERROR_HANDLE_MODE_PASSIVE)
printf("Device error handling mode: passive\n");
+ else if (dev_info.err_handle_mode == RTE_ETH_ERROR_HANDLE_MODE_PROACTIVE)
+ printf("Device error handling mode: proactive\n");
}
void
@@ -627,3 +627,42 @@ by application.
The PMD itself should not call rte_eth_dev_reset(). The PMD can trigger
the application to handle reset event. It is duty of application to
handle all synchronization before it calls rte_eth_dev_reset().
+
+The above error handling mode is known as ``RTE_ETH_ERROR_HANDLE_MODE_PASSIVE``.
+
+Proactive Error Handling Mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If PMD supports ``RTE_ETH_ERROR_HANDLE_MODE_PROACTIVE``, it means once detect
+hardware or firmware errors, the PMD will try to recover from the errors. In
+this process, the PMD sets the data path pointers to dummy functions (which
+will prevent the crash), and also make sure the control path operations failed
+with retcode -EBUSY.
+
+Also in this process, from the perspective of application, services are
+affected. For example, the Rx/Tx bust APIs cannot receive and send packets,
+and the control plane API return failure.
+
+In some service scenarios, application needs to be aware of the event to
+determine whether to migrate services. So three events were introduced:
+
+* RTE_ETH_EVENT_ERR_RECOVERING: used to notify the application that it detected
+ an error and the recovery is being started. Upon receiving the event, the
+ application should not invoke any control path APIs until receiving
+ RTE_ETH_EVENT_RECOVERY_SUCCESS or RTE_ETH_EVENT_RECOVERY_FAILED event.
+
+
+* RTE_ETH_EVENT_RECOVERY_SUCCESS: used to notify the application that it
+ recovers successful from the error, the PMD already re-configures the port to
+ the state prior to the error.
+
+* RTE_ETH_EVENT_RECOVERY_FAILED: used to notify the application that it
+ recovers failed from the error, the port should not usable anymore. the
+ application should close the port.
+
+.. note::
+ * Before the PMD reports the recovery result, the PMD may report the
+ ``RTE_ETH_EVENT_ERR_RECOVERING`` event again, because a larger error
+ may occur during the recovery.
+ * The error handling mode supported by the PMD can be reported through
+ the ``rte_eth_dev_info_get`` API.
@@ -55,6 +55,18 @@ New Features
Also, make sure to start the actual text at the margin.
=======================================================
+* **Added proactive error handling mode for ethdev.**
+
+ Added proactive error handling mode for ethdev, and three event were
+ introduced:
+
+ * Added new event: ``RTE_ETH_EVENT_ERR_RECOVERING`` for the PMD to report
+ that the port is recovering from an error.
+ * Added new event: ``RTE_ETH_EVENT_RECOVER_SUCCESS`` for the PMD to report
+ that the port recover successful from an error.
+ * Added new event: ``RTE_ETH_EVENT_RECOVER_FAILED`` for the PMD to report
+ that the prot recover failed from an error.
+
Removed Items
-------------
@@ -1859,6 +1859,12 @@ enum rte_eth_err_handle_mode {
* application invoke @see rte_eth_dev_reset to recover the port.
*/
RTE_ETH_ERROR_HANDLE_MODE_PASSIVE,
+ /** Proactive error handling, after the PMD detect that a reset is
+ * required, the PMD reports @see RTE_ETH_EVENT_ERR_RECOVERING event,
+ * and do recovery internally, finally, reports the recovery result
+ * event (@see RTE_ETH_EVENT_RECOVERY_*).
+ */
+ RTE_ETH_ERROR_HANDLE_MODE_PROACTIVE,
};
/**
@@ -3944,6 +3950,33 @@ enum rte_eth_event_type {
* @see rte_eth_rx_avail_thresh_set()
*/
RTE_ETH_EVENT_RX_AVAIL_THRESH,
+ /** Port recovering from a hardware or firmware error.
+ * If PMD supports proactive error recovery, it should trigger this
+ * event to notify application that it detected an error and the
+ * recovery is being started. Upon receiving the event, the application
+ * should not invoke any control path APIs (such as
+ * rte_eth_dev_configure/rte_eth_dev_stop...) until receiving
+ * RTE_ETH_EVENT_RECOVERY_SUCCESS or RTE_ETH_EVENT_RECOVERY_FAILED
+ * event.
+ * The PMD will set the data path pointers to dummy functions, and
+ * re-set the data patch pointers to non-dummy functions before reports
+ * RTE_ETH_EVENT_RECOVERY_SUCCESS event. It means that the application
+ * cannot send or receive any packets during this period.
+ * @note Before the PMD reports the recovery result, the PMD may report
+ * the RTE_ETH_EVENT_ERR_RECOVERING event again, because a larger error
+ * may occur during the recovery.
+ */
+ RTE_ETH_EVENT_ERR_RECOVERING,
+ /** Port recovers successful from the error.
+ * The PMD already re-configures the port to the state prior to the
+ * error.
+ */
+ RTE_ETH_EVENT_RECOVERY_SUCCESS,
+ /** Port recovers failed from the error.
+ * It means that the port should not usable anymore. The application
+ * should close the port.
+ */
+ RTE_ETH_EVENT_RECOVERY_FAILED,
RTE_ETH_EVENT_MAX /**< max value of this enum */
};