[v10,4/8] bus/pci: implement sigbus handler ops

Message ID 1534502916-31636-5-git-send-email-jia.guo@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series hotplug failure handle mechanism |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Guo, Jia Aug. 17, 2018, 10:48 a.m. UTC
  This patch implements the ops for the PCI bus sigbus handler. It finds the
PCI device that is being hotplugged out and calls the relevant ops of the
memory failure handler to handle the failure of the device.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
Acked-by: Shaopeng He <shaopeng.he@intel.com>
---
v10->v9:
refine doc.
---
 drivers/bus/pci/pci_common.c | 53 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
  

Patch

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 759ccc3..b8f3244 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -405,6 +405,36 @@  pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
 	return NULL;
 }
 
+/**
+ * find the device which encounter the failure, by iterate all device on
+ * PCI bus to check if the memory failure address is located in the range
+ * of the BARs of any device.
+ */
+static struct rte_pci_device *
+pci_find_device_by_addr(const void *failure_addr)
+{
+	struct rte_pci_device *pdev = NULL;
+	int i;
+
+	FOREACH_DEVICE_ON_PCIBUS(pdev) {
+		for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) {
+			if ((uint64_t)(uintptr_t)failure_addr >=
+			    (uint64_t)(uintptr_t)pdev->mem_resource[i].addr &&
+			    (uint64_t)(uintptr_t)failure_addr <
+			    (uint64_t)(uintptr_t)pdev->mem_resource[i].addr +
+			    pdev->mem_resource[i].len) {
+				RTE_LOG(INFO, EAL, "Failure address "
+					"%16.16"PRIx64" belongs to "
+					"device %s!\n",
+					(uint64_t)(uintptr_t)failure_addr,
+					pdev->device.name);
+				return pdev;
+			}
+		}
+	}
+	return NULL;
+}
+
 static int
 pci_memory_failure_handler(struct rte_device *dev)
 {
@@ -433,6 +463,28 @@  pci_memory_failure_handler(struct rte_device *dev)
 }
 
 static int
+pci_sigbus_handler(const void *failure_addr)
+{
+	struct rte_pci_device *pdev = NULL;
+	int ret = 0;
+
+	pdev = pci_find_device_by_addr(failure_addr);
+	if (!pdev) {
+		/* It is a generic sigbus error, no bus would handle it. */
+		ret = 1;
+	} else {
+		/* The sigbus error is caused of hotplug-out. */
+		ret = pci_memory_failure_handler(&pdev->device);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "Failed to handle failure for "
+				"device %s", pdev->name);
+			ret = -1;
+		}
+	}
+	return ret;
+}
+
+static int
 pci_plug(struct rte_device *dev)
 {
 	return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
@@ -463,6 +515,7 @@  struct rte_pci_bus rte_pci_bus = {
 		.parse = pci_parse,
 		.get_iommu_class = rte_pci_get_iommu_class,
 		.memory_failure_handler = pci_memory_failure_handler,
+		.sigbus_handler = pci_sigbus_handler,
 	},
 	.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
 	.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),