@@ -97,6 +97,29 @@ rte_pci_unmap_device(struct rte_pci_device *dev)
}
}
+/* re-map pci device */
+int
+rte_pci_remap_device(struct rte_pci_device *dev)
+{
+ int ret;
+
+ if (dev == NULL)
+ return -EINVAL;
+
+ switch (dev->kdrv) {
+ case RTE_KDRV_NIC_UIO:
+ ret = pci_uio_remap_resource(dev);
+ break;
+ default:
+ RTE_LOG(DEBUG, EAL,
+ " Not managed by a supported kernel driver, skipped\n");
+ ret = 1;
+ break;
+ }
+
+ return ret;
+}
+
void
pci_uio_free_resource(struct rte_pci_device *dev,
struct mapped_pci_resource *uio_res)
@@ -116,6 +116,38 @@ rte_pci_unmap_device(struct rte_pci_device *dev)
}
}
+/* Map pci device */
+int
+rte_pci_remap_device(struct rte_pci_device *dev)
+{
+ int ret = -1;
+
+ if (dev == NULL)
+ return -EINVAL;
+
+ switch (dev->kdrv) {
+ case RTE_KDRV_VFIO:
+#ifdef VFIO_PRESENT
+ /* no thing to do */
+#endif
+ break;
+ case RTE_KDRV_IGB_UIO:
+ case RTE_KDRV_UIO_GENERIC:
+ if (rte_eal_using_phys_addrs()) {
+ /* map resources for devices that use uio */
+ ret = pci_uio_remap_resource(dev);
+ }
+ break;
+ default:
+ RTE_LOG(DEBUG, EAL,
+ " Not managed by a supported kernel driver, skipped\n");
+ ret = 1;
+ break;
+ }
+
+ return ret;
+}
+
void *
pci_find_max_end_va(void)
{
@@ -357,6 +389,8 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
rte_pci_add_device(dev);
}
+ dev->device.state = RTE_DEV_PARSED;
+ TAILQ_INIT(&(dev->device.uev_cbs));
return 0;
}
@@ -282,6 +282,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
if (rc > 0)
/* positive value means driver doesn't support it */
continue;
+ dev->device.state = RTE_DEV_PROBED;
return 0;
}
return 1;
@@ -481,6 +482,7 @@ rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
void
rte_pci_remove_device(struct rte_pci_device *pci_dev)
{
+ RTE_LOG(DEBUG, EAL, " rte_pci_remove_device for device list\n");
TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next);
}
@@ -522,6 +524,25 @@ pci_find_device_by_name(const struct rte_device *start,
}
static int
+pci_remap_device(struct rte_device *dev)
+{
+ struct rte_pci_device *pdev;
+ int ret;
+
+ if (dev == NULL)
+ return -EINVAL;
+
+ pdev = RTE_DEV_TO_PCI(dev);
+
+ /* remap resources for devices that use igb_uio */
+ ret = rte_pci_remap_device(pdev);
+ if (ret != 0)
+ RTE_LOG(ERR, EAL, "failed to remap device %s",
+ dev->name);
+ return ret;
+}
+
+static int
pci_plug(struct rte_device *dev)
{
return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
@@ -552,6 +573,7 @@ struct rte_pci_bus rte_pci_bus = {
.unplug = pci_unplug,
.parse = pci_parse,
.get_iommu_class = rte_pci_get_iommu_class,
+ .remap_device = pci_remap_device,
},
.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
@@ -147,6 +147,34 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res)
}
}
+/* remap the PCI resource of a PCI device in private virtual memory */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev)
+{
+ int i;
+ uint64_t phaddr;
+ void *map_address;
+
+ /* Map all BARs */
+ for (i = 0; i != PCI_MAX_RESOURCE; i++) {
+ /* skip empty BAR */
+ phaddr = dev->mem_resource[i].phys_addr;
+ if (phaddr == 0)
+ continue;
+ map_address = pci_map_private_resource(
+ dev->mem_resource[i].addr, 0,
+ (size_t)dev->mem_resource[i].len);
+ if (map_address == MAP_FAILED)
+ goto error;
+ memset(map_address, 0xFF, (size_t)dev->mem_resource[i].len);
+ dev->mem_resource[i].addr = map_address;
+ }
+
+ return 0;
+error:
+ return -1;
+}
+
static struct mapped_pci_resource *
pci_uio_find_resource(struct rte_pci_device *dev)
{
@@ -202,6 +202,18 @@ void pci_uio_free_resource(struct rte_pci_device *dev,
struct mapped_pci_resource *uio_res);
/**
+ * remap the pci uio resource..
+ *
+ * @param dev
+ * Point to the struct rte pci device.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev);
+
+/**
* Map device memory to uio resource
*
* This function is private to EAL.
@@ -52,6 +52,8 @@ extern "C" {
#include <sys/queue.h>
#include <stdint.h>
#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
#include <rte_debug.h>
#include <rte_interrupts.h>
@@ -197,6 +199,15 @@ int rte_pci_map_device(struct rte_pci_device *dev);
void rte_pci_unmap_device(struct rte_pci_device *dev);
/**
+ * Remap this device
+ *
+ * @param dev
+ * A pointer to a rte_pci_device structure describing the device
+ * to use
+ */
+int rte_pci_remap_device(struct rte_pci_device *dev);
+
+/**
* Dump the content of the PCI bus.
*
* @param f
@@ -323,7 +323,6 @@ vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
return NULL;
}
-
static struct rte_device *
vdev_find_device_by_name(const struct rte_device *start,
rte_dev_cmp_name_t cmp_name,
@@ -343,6 +342,13 @@ vdev_find_device_by_name(const struct rte_device *start,
}
static int
+vdev_remap_device(struct rte_device *dev)
+{
+ RTE_SET_USED(dev);
+ return 0;
+}
+
+static int
vdev_plug(struct rte_device *dev)
{
return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev));
@@ -362,6 +368,7 @@ static struct rte_bus rte_vdev_bus = {
.plug = vdev_plug,
.unplug = vdev_unplug,
.parse = vdev_parse,
+ .remap_device = vdev_remap_device,
};
RTE_REGISTER_BUS(vdev, rte_vdev_bus);
@@ -54,6 +54,7 @@ rte_bus_register(struct rte_bus *bus)
RTE_VERIFY(bus->find_device_by_name);
/* Buses supporting driver plug also require unplug. */
RTE_VERIFY(!bus->plug || bus->unplug);
+ RTE_VERIFY(bus->remap_device);
TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name);
@@ -196,6 +196,22 @@ typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
typedef int (*rte_bus_parse_t)(const char *name, void *addr);
/**
+ * Implementation specific remap function which is responsible for
+ * remmaping devices on that bus from original share memory resource
+ * to a private memory resource for the sake of device has been removal,
+ * when detect the device removal event invoke from the kernel side,
+ * prior to call this function before any operation for device hw.
+ *
+ * @param dev
+ * Device pointer that was returned by a previous call to find_device.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error.
+ */
+typedef int (*rte_bus_remap_device_t)(struct rte_device *dev);
+
+/**
* Bus scan policies
*/
enum rte_bus_scan_mode {
@@ -239,6 +255,7 @@ struct rte_bus {
rte_bus_plug_t plug; /**< Probe single device for drivers */
rte_bus_unplug_t unplug; /**< Remove single device from driver */
rte_bus_parse_t parse; /**< Parse a device name */
+ rte_bus_remap_device_t remap_device; /**< remap a device */
struct rte_bus_conf conf; /**< Bus configuration */
rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
};
@@ -32,6 +32,12 @@ bool service_no_init = true;
#define DEV_EV_MNT_SERVICE_NAME "device_event_monitor_service"
+static void sig_handler(int signum)
+{
+ if (signum == SIGINT || signum == SIGTERM)
+ rte_dev_monitor_stop();
+}
+
static int
dev_monitor_fd_new(void)
{
@@ -168,6 +174,7 @@ dev_uev_process(struct epoll_event *events, int nfds)
struct rte_bus *bus;
struct rte_device *dev;
struct rte_eal_uevent uevent;
+ int ret;
int i;
for (i = 0; i < nfds; i++) {
@@ -187,9 +194,17 @@ dev_uev_process(struct epoll_event *events, int nfds)
if ((!dev) || dev->state == RTE_DEV_UNDEFINED)
return 0;
- return(_rte_dev_callback_process(dev,
- RTE_DEV_EVENT_REMOVE,
- NULL));
+ dev->state = RTE_DEV_FAULT;
+
+ /**
+ * remap the resource to be fake
+ * before user's removal processing
+ */
+ ret = bus->remap_device(dev);
+ if (!ret)
+ return(_rte_dev_callback_process(dev,
+ RTE_DEV_EVENT_REMOVE,
+ NULL));
} else if (uevent.type == RTE_DEV_EVENT_ADD) {
if (dev == NULL) {
return(_rte_dev_callback_process(NULL,
@@ -215,12 +230,26 @@ dev_uev_process(struct epoll_event *events, int nfds)
*/
static int32_t dev_uev_monitoring(__rte_unused void *arg)
{
+ struct sigaction act;
+ sigset_t mask;
int netlink_fd = -1;
struct epoll_event ep_kernel;
int fd_ep = -1;
service_exit = false;
+ /* set signal handlers */
+ memset(&act, 0x00, sizeof(struct sigaction));
+ act.sa_handler = sig_handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_RESTART;
+ sigaction(SIGINT, &act, NULL);
+ sigaction(SIGTERM, &act, NULL);
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGINT);
+ sigaddset(&mask, SIGTERM);
+ sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
fd_ep = epoll_create1(EPOLL_CLOEXEC);
if (fd_ep < 0) {
RTE_LOG(ERR, EAL, "error creating epoll fd: %m\n");
@@ -354,6 +354,12 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode)
struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *dev = udev->pdev;
+ /* check if device have been remove before release */
+ if ((&dev->dev.kobj)->state_remove_uevent_sent == 1) {
+ pr_info("The device have been removed\n");
+ return -1;
+ }
+
/* disable interrupts */
igbuio_pci_disable_interrupts(udev);
@@ -172,6 +172,26 @@ rte_pci_addr_parse(const char *str, struct rte_pci_addr *addr)
return -1;
}
+/* map a private resource from an address*/
+void *
+pci_map_private_resource(void *requested_addr, off_t offset, size_t size)
+{
+ void *mapaddr;
+
+ mapaddr = mmap(requested_addr, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ if (mapaddr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "%s(): cannot mmap(%p, 0x%lx, 0x%lx): "
+ "%s (%p)\n",
+ __func__, requested_addr,
+ (unsigned long)size, (unsigned long)offset,
+ strerror(errno), mapaddr);
+ } else
+ RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr);
+
+ return mapaddr;
+}
/* map a particular resource from a file */
void *
@@ -227,6 +227,23 @@ int rte_pci_addr_cmp(const struct rte_pci_addr *addr,
int rte_pci_addr_parse(const char *str, struct rte_pci_addr *addr);
/**
+ * @internal
+ * Map to a particular private resource.
+ *
+ * @param requested_addr
+ * The starting address for the new mapping range.
+ * @param offset
+ * The offset for the mapping range.
+ * @param size
+ * The size for the mapping range.
+ * @return
+ * - On success, the function returns a pointer to the mapped area.
+ * - On error, the value MAP_FAILED is returned.
+ */
+void *pci_map_private_resource(void *requested_addr, off_t offset,
+ size_t size);
+
+/**
* Map a particular resource from a file.
*
* @param requested_addr