[v9,7/7] igb_uio: fix unexpected remove issue for hotplug

Message ID 1531305717-15504-8-git-send-email-jia.guo@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series hotplug failure handle mechanism |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Guo, Jia July 11, 2018, 10:41 a.m. UTC
  When device be hotplug out, the pci resource will be released in kernel,
the uio fd will disappear, and the irq will be released. At this time,
if igb uio driver still try to access or release these resource, it will
cause kernel crash.

On the other hand, uio_remove will be called unexpectedly before
uio_release. The uio_remove procedure will free resources which are needed
by uio_release. So there is no chance to disable interrupt which is defined
inside uio_release procedure. This will affect later usage of interrupt.

So the case of unexpectedly removal by hot unplug should be identify and
correspondingly processed.

This patch propose to add enum rte_udev_state in struct rte_uio_pci_dev,
that will keep the state of uio device as probed/opened/released/removed.

This patch also checks kobject’s remove_uevent_sent state to detect the
unexpectedly removal status which means hot unplug. Once hot unplug be
detected, it will call uio_release as soon as possible and set the uio
status to be “removed”. After that, uio will check this status in
uio_release function, if uio have already been removed, it will only free
the dirty uio resource.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
v9->v8:
refine commit log to be more readable.
---
 kernel/linux/igb_uio/igb_uio.c | 69 +++++++++++++++++++++++++++++++++---------
 1 file changed, 55 insertions(+), 14 deletions(-)
  

Comments

He, Shaopeng July 12, 2018, 1:57 a.m. UTC | #1
> -----Original Message-----
> From: Guo, Jia
> Sent: Wednesday, July 11, 2018 6:42 PM
> 
> When device be hotplug out, the pci resource will be released in kernel,
> the uio fd will disappear, and the irq will be released. At this time,
> if igb uio driver still try to access or release these resource, it will
> cause kernel crash.
> 
> On the other hand, uio_remove will be called unexpectedly before
> uio_release. The uio_remove procedure will free resources which are needed
> by uio_release. So there is no chance to disable interrupt which is defined
> inside uio_release procedure. This will affect later usage of interrupt.
> 
> So the case of unexpectedly removal by hot unplug should be identify and
> correspondingly processed.
> 
> This patch propose to add enum rte_udev_state in struct rte_uio_pci_dev,
> that will keep the state of uio device as probed/opened/released/removed.
> 
> This patch also checks kobject’s remove_uevent_sent state to detect the
> unexpectedly removal status which means hot unplug. Once hot unplug be
> detected, it will call uio_release as soon as possible and set the uio
> status to be “removed”. After that, uio will check this status in
> uio_release function, if uio have already been removed, it will only free
> the dirty uio resource.
> 
> Signed-off-by: Jeff Guo <jia.guo@intel.com>

Even though we prefer vfio than igb_uio as vfio is safer and more standard way
of accessing devices, it's still good to have this bug-fixing to avoiding kernel crash
and memory leak. 

Later on, we might further enhance igb_uio by introducing similar mechanism
which vfio-pci currently uses (send event to up-layer application in the middle of 
pci remove process), so up-layer application can close this device more gracefully.
Or, we can suggest to use vfio, and leave igb_uio as it is.

Thanks,
--Shaopeng

Acked-by: Shaopeng He <shaopeng.he@intel.com>
  

Patch

diff --git a/kernel/linux/igb_uio/igb_uio.c b/kernel/linux/igb_uio/igb_uio.c
index 3398eac..d126371 100644
--- a/kernel/linux/igb_uio/igb_uio.c
+++ b/kernel/linux/igb_uio/igb_uio.c
@@ -19,6 +19,14 @@ 
 
 #include "compat.h"
 
+/* uio pci device state */
+enum rte_udev_state {
+	RTE_UDEV_PROBED,
+	RTE_UDEV_OPENNED,
+	RTE_UDEV_RELEASED,
+	RTE_UDEV_REMOVED,
+};
+
 /**
  * A structure describing the private information for a uio device.
  */
@@ -28,6 +36,7 @@  struct rte_uio_pci_dev {
 	enum rte_intr_mode mode;
 	struct mutex lock;
 	int refcnt;
+	enum rte_udev_state state;
 };
 
 static int wc_activate;
@@ -309,6 +318,17 @@  igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev)
 #endif
 }
 
+/* Unmap previously ioremap'd resources */
+static void
+igbuio_pci_release_iomem(struct uio_info *info)
+{
+	int i;
+
+	for (i = 0; i < MAX_UIO_MAPS; i++) {
+		if (info->mem[i].internal_addr)
+			iounmap(info->mem[i].internal_addr);
+	}
+}
 
 /**
  * This gets called while opening uio device file.
@@ -331,20 +351,35 @@  igbuio_pci_open(struct uio_info *info, struct inode *inode)
 
 	/* enable interrupts */
 	err = igbuio_pci_enable_interrupts(udev);
-	mutex_unlock(&udev->lock);
 	if (err) {
 		dev_err(&dev->dev, "Enable interrupt fails\n");
+		pci_clear_master(dev);
+		mutex_unlock(&udev->lock);
 		return err;
 	}
+	udev->state = RTE_UDEV_OPENNED;
+	mutex_unlock(&udev->lock);
 	return 0;
 }
 
+/**
+ * This gets called while closing uio device file.
+ */
 static int
 igbuio_pci_release(struct uio_info *info, struct inode *inode)
 {
 	struct rte_uio_pci_dev *udev = info->priv;
 	struct pci_dev *dev = udev->pdev;
 
+	if (udev->state == RTE_UDEV_REMOVED) {
+		mutex_destroy(&udev->lock);
+		igbuio_pci_release_iomem(&udev->info);
+		pci_disable_device(dev);
+		pci_set_drvdata(dev, NULL);
+		kfree(udev);
+		return 0;
+	}
+
 	mutex_lock(&udev->lock);
 	if (--udev->refcnt > 0) {
 		mutex_unlock(&udev->lock);
@@ -356,7 +391,7 @@  igbuio_pci_release(struct uio_info *info, struct inode *inode)
 
 	/* stop the device from further DMA */
 	pci_clear_master(dev);
-
+	udev->state = RTE_UDEV_RELEASED;
 	mutex_unlock(&udev->lock);
 	return 0;
 }
@@ -414,18 +449,6 @@  igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
 	return 0;
 }
 
-/* Unmap previously ioremap'd resources */
-static void
-igbuio_pci_release_iomem(struct uio_info *info)
-{
-	int i;
-
-	for (i = 0; i < MAX_UIO_MAPS; i++) {
-		if (info->mem[i].internal_addr)
-			iounmap(info->mem[i].internal_addr);
-	}
-}
-
 static int
 igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
 {
@@ -562,6 +585,9 @@  igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 			 (unsigned long long)map_dma_addr, map_addr);
 	}
 
+	mutex_lock(&udev->lock);
+	udev->state = RTE_UDEV_PROBED;
+	mutex_unlock(&udev->lock);
 	return 0;
 
 fail_remove_group:
@@ -579,6 +605,21 @@  static void
 igbuio_pci_remove(struct pci_dev *dev)
 {
 	struct rte_uio_pci_dev *udev = pci_get_drvdata(dev);
+	struct pci_dev *pdev = udev->pdev;
+	int ret;
+
+	/* handle unexpected removal */
+	if (udev->state == RTE_UDEV_OPENNED ||
+	    (&pdev->dev.kobj)->state_remove_uevent_sent == 1) {
+		dev_notice(&dev->dev, "Unexpected removal!\n");
+		ret = igbuio_pci_release(&udev->info, NULL);
+		if (ret)
+			return;
+		mutex_lock(&udev->lock);
+		udev->state = RTE_UDEV_REMOVED;
+		mutex_unlock(&udev->lock);
+		return;
+	}
 
 	mutex_destroy(&udev->lock);
 	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);