[1/1] net/failsafe: link_update request crashing at boot

Message ID 20211021115139.2634-1-vipul.ashri@oracle.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [1/1] net/failsafe: link_update request crashing at boot |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/github-robot: build fail github build: failed
ci/iol-testing fail build patch failure

Commit Message

Vipul Ashri Oct. 21, 2021, 11:51 a.m. UTC
  From: Vipul Ashri <vipul.ashri@oracle.com>

failsafe crashed while sending early link_update request during
boot time initialization.
Based on debugging we found failsafe device was good but sub-
devices were progressing towards initialization and SUBOPS macro
where expanding macro gives [partial_dev]->dev_ops->link_update()
execution of which triggered crash because dev_ops==0. similar
crash seen at failsafe_eth_dev_close()

Failsafe driver need a separate check for subdevices similar to
"RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);" which is
called to almost every eth_dev function.

Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org
Signed-off-by: Vipul Ashri <vipul.ashri@oracle.com>
---
 drivers/net/failsafe/failsafe_ops.c     | 45 +++++++++++++++++++++++--
 drivers/net/failsafe/failsafe_private.h |  6 ++++
 2 files changed, 49 insertions(+), 2 deletions(-)
  

Patch

diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index d0030af061..f0c5e40fd5 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -371,7 +371,8 @@  fs_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 		close(rxq->event_fd);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		if (ETH(sdev)->data->rx_queues != NULL &&
-		    ETH(sdev)->data->rx_queues[rxq->qid] != NULL)
+			ETH(sdev)->data->rx_queues[rxq->qid] != NULL &&
+			SUBDEV_VALID_PORTID(sdev)) {
 			SUBOPS(sdev, rx_queue_release)(ETH(sdev), rxq->qid);
 	}
 	dev->data->rx_queues[rxq->qid] = NULL;
@@ -405,6 +406,12 @@  fs_rx_queue_setup(struct rte_eth_dev *dev,
 	fs_lock(dev, 0);
 	if (rx_conf->rx_deferred_start) {
 		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+			if (!SUBDEV_VALID_PORTID(sdev)) {
+				ERROR("fs_rx_queue_setup: Invalid sub-device "
+					"port_id=%u\n", PORT_ID(sdev));
+				fs_unlock(dev, 0);
+				return -ENODEV;
+			}
 			if (SUBOPS(sdev, rx_queue_start) == NULL) {
 				ERROR("Rx queue deferred start is not "
 					"supported for subdevice %d", i);
@@ -548,7 +555,8 @@  fs_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		if (ETH(sdev)->data->tx_queues != NULL &&
-		    ETH(sdev)->data->tx_queues[txq->qid] != NULL)
+			ETH(sdev)->data->tx_queues[txq->qid] != NULL &&
+			SUBDEV_VALID_PORTID(sdev)) {
 			SUBOPS(sdev, tx_queue_release)(ETH(sdev), txq->qid);
 	}
 	dev->data->tx_queues[txq->qid] = NULL;
@@ -571,6 +579,12 @@  fs_tx_queue_setup(struct rte_eth_dev *dev,
 	fs_lock(dev, 0);
 	if (tx_conf->tx_deferred_start) {
 		FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+			if (!SUBDEV_VALID_PORTID(sdev)) {
+				ERROR("fs_tx_queue_setup: Invalid sub-device "
+					"port_id=%u\n", PORT_ID(sdev));
+				fs_unlock(dev, 0);
+				return -ENODEV;
+			}
 			if (SUBOPS(sdev, tx_queue_start) == NULL) {
 				ERROR("Tx queue deferred start is not "
 					"supported for subdevice %d", i);
@@ -645,6 +659,12 @@  failsafe_eth_dev_close(struct rte_eth_dev *dev)
 	fs_lock(dev, 0);
 	failsafe_hotplug_alarm_cancel(dev);
 	if (PRIV(dev)->state == DEV_STARTED) {
+		if (!rte_eth_dev_is_valid_port(dev->data->port_id)) {
+			ERROR("failsafe_eth_dev_close: Invalid sub-device "
+				"port_id=%u\n", dev->data->port_id);
+			fs_unlock(dev, 0);
+			return -ENODEV;
+		}
 		ret = dev->dev_ops->dev_stop(dev);
 		if (ret != 0) {
 			fs_unlock(dev, 0);
@@ -827,6 +847,12 @@  fs_link_update(struct rte_eth_dev *dev,
 
 	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		if (!SUBDEV_VALID_PORTID(sdev)) {
+			ERROR("fs_link_update: Invalid Sub-device "
+				"port_id=%u\n", PORT_ID(sdev));
+			fs_unlock(dev, 0);
+			return -ENODEV;
+		}
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
 		if (ret && ret != -1 && sdev->remove == 0 &&
@@ -1251,6 +1277,15 @@  fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 		goto unlock;
 	}
 	edev = ETH(sdev);
+
+	if (!SUBDEV_VALID_PORTID(sdev)) {
+		ERROR("fs_dev_supported_ptypes_get: "
+			"Invalid TX_SUBDEV port_id=%u\n", PORT_ID(sdev));
+		rte_errno = -ENODEV;
+		ret = NULL;
+		goto unlock;
+	}
+
 	/* ENOTSUP: counts as no supported ptypes */
 	if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL) {
 		ret = NULL;
@@ -1326,6 +1361,12 @@  fs_flow_ctrl_get(struct rte_eth_dev *dev,
 		ret = 0;
 		goto unlock;
 	}
+	if (!SUBDEV_VALID_PORTID(sdev)) {
+		ERROR("fs_flow_ctrl_get_get: Invalid TX_SUBDEV "
+			"port_id=%u\n", PORT_ID(sdev));
+		ret = -ENODEV;
+		goto unlock;
+	}
 	if (SUBOPS(sdev, flow_ctrl_get) == NULL) {
 		ret = -ENOTSUP;
 		goto unlock;
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index cd39d103c6..95eb31f9e6 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -308,6 +308,12 @@  extern int failsafe_mac_from_arg;
 	 : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \
 	 : &PRIV(dev)->subs[PRIV(dev)->subs_tx]))
 
+/**
+ * check for fail-safe sub-device valid port
+ */
+#define SUBDEV_VALID_PORTID(s) \
+    rte_eth_dev_is_valid_port(PORT_ID(s))
+
 /**
  * s:   (struct sub_device *)
  * ops: (struct eth_dev_ops) member