[v1,5/5] vfio: enable vfio hotplug by req notifier handler

Message ID 1534503091-31910-6-git-send-email-jia.guo@intel.com
State Superseded, archived
Delegated to: Thomas Monjalon
Headers show
Series
  • Enable hotplug in vfio
Related show

Checks

Context Check Description
ci/Intel-compilation fail apply issues
ci/checkpatch success coding style OK

Commit Message

Jeff Guo Aug. 17, 2018, 10:51 a.m.
This patch will add req notifier processing to enable hotplug for vfio.
When device is be hotplug-out, the vfio kernel module will sent req
notifier to request user space to release the allocated resources. It
could use the bus failure mechanism to trigger the currently hotplug
procedure of user space. After that, vfio kernel module will detect the
device disappear, and then release the kernel resource of the device.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
 drivers/bus/pci/linux/pci_vfio.c | 104 +++++++++++++++++++++++++++++++++++++++
 drivers/bus/pci/pci_common.c     |  10 ++++
 lib/librte_ethdev/rte_ethdev.c   |   1 +
 3 files changed, 115 insertions(+)

Comments

Burakov, Anatoly Sept. 26, 2018, 12:28 p.m. | #1
On 17-Aug-18 11:51 AM, Jeff Guo wrote:
> This patch will add req notifier processing to enable hotplug for vfio.
> When device is be hotplug-out, the vfio kernel module will sent req
> notifier to request user space to release the allocated resources. It
> could use the bus failure mechanism to trigger the currently hotplug
> procedure of user space. After that, vfio kernel module will detect the
> device disappear, and then release the kernel resource of the device.
> 
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> ---

<snip>

> +	 * resources before resources be released in kernel, so it can directly
> +	 * call the bus failure handler to use the specific mechanism in
> +	 * user space to handle it.
> +	 */
> +	ret = bus->memory_failure_handler(device);
> +	if (ret) {
> +		RTE_LOG(ERR, EAL, "Can not handle failure for "
> +			"device (%s)\n", device->name);
> +		return;

Return is not needed here :)

Otherwise, LGTM
Jeff Guo Sept. 29, 2018, 5:51 a.m. | #2
Anatoly, thanks for your review, please see comment as below.

On 9/26/2018 8:28 PM, Burakov, Anatoly wrote:
> On 17-Aug-18 11:51 AM, Jeff Guo wrote:
>> This patch will add req notifier processing to enable hotplug for vfio.
>> When device is be hotplug-out, the vfio kernel module will sent req
>> notifier to request user space to release the allocated resources. It
>> could use the bus failure mechanism to trigger the currently hotplug
>> procedure of user space. After that, vfio kernel module will detect the
>> device disappear, and then release the kernel resource of the device.
>>
>> Signed-off-by: Jeff Guo <jia.guo@intel.com>
>> ---
>
> <snip>
>
>> +     * resources before resources be released in kernel, so it can 
>> directly
>> +     * call the bus failure handler to use the specific mechanism in
>> +     * user space to handle it.
>> +     */
>> +    ret = bus->memory_failure_handler(device);
>> +    if (ret) {
>> +        RTE_LOG(ERR, EAL, "Can not handle failure for "
>> +            "device (%s)\n", device->name);
>> +        return;
>
> Return is not needed here :)
>
> Otherwise, LGTM
>

yes, will modify it.

Patch

diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 686386d..8a14f92 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -17,6 +17,8 @@ 
 #include <rte_eal_memconfig.h>
 #include <rte_malloc.h>
 #include <rte_vfio.h>
+#include <rte_eal.h>
+#include <rte_bus.h>
 
 #include "eal_filesystem.h"
 
@@ -277,6 +279,92 @@  pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
 	return -1;
 }
 
+static void
+pci_vfio_req_handler(void *param)
+{
+	struct rte_bus *bus;
+	int ret;
+	struct rte_device *device = (struct rte_device *)param;
+
+	bus = rte_bus_find_by_device(device);
+	if (bus == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
+			device->name);
+		return;
+	}
+
+	/**
+	 * vfio kernel module request user space to release allocated
+	 * resources before resources be released in kernel, so it can directly
+	 * call the bus failure handler to use the specific mechanism in
+	 * user space to handle it.
+	 */
+	ret = bus->memory_failure_handler(device);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Can not handle failure for "
+			"device (%s)\n", device->name);
+		return;
+	}
+}
+
+/* enable notifier (only enable req now) */
+static int
+pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+	int ret;
+	int fd = -1;
+
+	/* set up an eventfd for req notifier */
+	fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot set up eventfd, "
+				"error %i (%s)\n", errno, strerror(errno));
+		return -1;
+	}
+
+	dev->req_notifier_handler.fd = fd;
+	dev->req_notifier_handler.type = RTE_INTR_HANDLE_VFIO_REQ;
+	dev->req_notifier_handler.vfio_dev_fd = vfio_dev_fd;
+	ret = rte_intr_callback_register(&dev->req_notifier_handler,
+					 pci_vfio_req_handler,
+					 (void *)&dev->device);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n");
+		return -1;
+	}
+
+	ret = rte_intr_enable(&dev->req_notifier_handler);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*disable notifier (only disable req now) */
+static int
+pci_vfio_disable_notifier(struct rte_pci_device *dev)
+{
+	int ret;
+
+	ret = rte_intr_callback_unregister(&dev->req_notifier_handler,
+					 pci_vfio_req_handler,
+					 (void *)&dev->device);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "fail to unregister req notifier handler.\n");
+		return -1;
+	}
+
+	ret = rte_intr_enable(&dev->req_notifier_handler);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
 static int
 pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
 {
@@ -430,6 +518,7 @@  pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 	struct pci_map *maps;
 
 	dev->intr_handle.fd = -1;
+	dev->req_notifier_handler.fd = -1;
 
 	/* store PCI address string */
 	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -521,6 +610,11 @@  pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 		goto err_vfio_res;
 	}
 
+	if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) {
+		RTE_LOG(ERR, EAL, "Error setting up notifier!\n");
+		return -1;
+	}
+
 	TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
 
 	return 0;
@@ -546,6 +640,7 @@  pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 	struct pci_map *maps;
 
 	dev->intr_handle.fd = -1;
+	dev->req_notifier_handler.fd = -1;
 
 	/* store PCI address string */
 	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -586,6 +681,7 @@  pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 
 	/* we need save vfio_dev_fd, so it can be used during release */
 	dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+	dev->req_notifier_handler.vfio_dev_fd = vfio_dev_fd;
 
 	return 0;
 err_vfio_dev_fd:
@@ -658,12 +754,20 @@  pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
 	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
 			loc->domain, loc->bus, loc->devid, loc->function);
 
+	pci_vfio_disable_notifier(dev);
+
 	if (close(dev->intr_handle.fd) < 0) {
 		RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
 			pci_addr);
 		return -1;
 	}
 
+	if (close(dev->req_notifier_handler.fd) < 0) {
+		RTE_LOG(INFO, EAL, "Error when closing req eventfd file "
+			"descriptor for %s\n", pci_addr);
+		return -1;
+	}
+
 	if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) {
 		RTE_LOG(ERR, EAL, "  %s cannot unset bus mastering for PCI device!\n",
 				pci_addr);
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index b8f3244..21a7397 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -446,6 +446,16 @@  pci_memory_failure_handler(struct rte_device *dev)
 		return -1;
 
 	switch (pdev->kdrv) {
+	case RTE_KDRV_VFIO:
+		/**
+		 * vfio kernel module guaranty the pci device would not be
+		 * deleted until the user space release the resource, so no
+		 * need to remap mmio resource here, just directly notify
+		 * the req event to the user space to handle it.
+		 */
+		rte_dev_event_callback_process(dev->name,
+					       RTE_DEV_EVENT_REQ);
+		break;
 	case RTE_KDRV_IGB_UIO:
 	case RTE_KDRV_UIO_GENERIC:
 	case RTE_KDRV_NIC_UIO:
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index da1e9a6..74694c4 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -4424,6 +4424,7 @@  eth_dev_event_callback(const char *device_name, enum rte_dev_event_type type,
 	struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)arg;
 
 	switch (type) {
+	case RTE_DEV_EVENT_REQ:
 	case RTE_DEV_EVENT_REMOVE:
 		RTE_ETHDEV_LOG(INFO, "The device %s has been removed!\n",
 			       device_name);