[dpdk-dev,v5,1/2] eal: add uevent monitor for hot plug

Message ID 1505880732-16539-2-git-send-email-jia.guo@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues

Commit Message

Guo, Jia Sept. 20, 2017, 4:12 a.m. UTC
This patch aim to add a general uevent mechanism in eal device layer,
to enable all kernel object hot plug monitoring, so user could use these
API to monitor and read out the device status info sent from the kernel
side, then corresponding to handle it, such as detach or attach the
device, and even benefit to use it for do smoothly fail safe work.

1) About uevent monitoring:
a: add one epolling to poll the netlink socket, to monitor the uevent of
   the device, add device_state in struct of rte_device, to identify the
   device state machine.
b: add enum of rte_eal_dev_event_type and struct of rte_eal_uevent.
c: add below API in rte eal device common layer.
   rte_eal_dev_monitor_enable
   rte_dev_callback_register
   rte_dev_callback_unregister
   _rte_dev_callback_process
   rte_dev_bind_driver
   rte_dev_monitor_start
   rte_dev_monitor_stop

2) About failure handler, use pci uio for example,
   add pci_remap_device in bus layer and below function to process it:
   rte_pci_remap_device
   pci_uio_remap_resource
   pci_map_private_resource

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
v5->v4:
1.Move uevent monitor epolling from eal interrupt to eal device layer.
2.Redefine the eal device API for common, and distinguish between linux and bsd  
3.Add failure handler helper api in bus layer.Add function of find device by name. 
4.Replace of individual fd bind with single device, use a common fd to polling all device.
5.Add to register hot insertion monitoring and process, add function to auto bind driver befor user add device
6.Refine some coding style and typos issue
---
 lib/librte_eal/bsdapp/eal/eal_dev.c                |  64 ++++
 .../bsdapp/eal/include/exec-env/rte_dev.h          | 105 ++++++
 lib/librte_eal/common/eal_common_bus.c             |  31 ++
 lib/librte_eal/common/eal_common_dev.c             | 223 ++++++++++++-
 lib/librte_eal/common/eal_common_pci.c             |  69 +++-
 lib/librte_eal/common/eal_common_pci_uio.c         |  31 +-
 lib/librte_eal/common/eal_common_vdev.c            |  29 +-
 lib/librte_eal/common/eal_private.h                |  13 +-
 lib/librte_eal/common/include/rte_bus.h            |  52 +++
 lib/librte_eal/common/include/rte_dev.h            | 102 +++++-
 lib/librte_eal/common/include/rte_pci.h            |  26 ++
 lib/librte_eal/linuxapp/eal/Makefile               |   3 +-
 lib/librte_eal/linuxapp/eal/eal_dev.c              | 351 +++++++++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_pci.c              |  33 ++
 .../linuxapp/eal/include/exec-env/rte_dev.h        | 105 ++++++
 15 files changed, 1228 insertions(+), 9 deletions(-)
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_dev.c
 create mode 100644 lib/librte_eal/bsdapp/eal/include/exec-env/rte_dev.h
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_dev.c
 create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_dev.h
  

Patch

diff --git a/lib/librte_eal/bsdapp/eal/eal_dev.c b/lib/librte_eal/bsdapp/eal/eal_dev.c
new file mode 100644
index 0000000..6ea9a74
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_dev.c
@@ -0,0 +1,64 @@ 
+/*-
+ *   Copyright(c) 2010-2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+#include <sys/signalfd.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include <rte_bus.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+
+#include "eal_thread.h"
+
+int
+rte_dev_monitor_start(void)
+{
+	return -1;
+}
+
+int
+rte_dev_monitor_stop(void)
+{
+	return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dev.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dev.h
new file mode 100644
index 0000000..7d2c3c3
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dev.h
@@ -0,0 +1,105 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DEV_H_
+#error "don't include this file directly, please include generic <rte_dev.h>"
+#endif
+
+#ifndef _RTE_LINUXAPP_DEV_H_
+#define _RTE_LINUXAPP_DEV_H_
+
+#include <stdio.h>
+
+#include <rte_dev.h>
+
+#define RTE_EAL_UEVENT_MSG_LEN 4096
+
+enum uev_subsystem {
+	UEV_SUBSYSTEM_UIO,
+	UEV_SUBSYSTEM_VFIO,
+	UEV_SUBSYSTEM_PCI,
+	UEV_SUBSYSTEM_MAX
+};
+
+enum uev_monitor_netlink_group {
+	UEV_MONITOR_KERNEL,
+	UEV_MONITOR_UDEV,
+};
+
+/**
+ * The device event type.
+ */
+enum rte_eal_dev_event_type {
+	RTE_EAL_DEV_EVENT_UNKNOWN,	/**< unknown event type */
+	RTE_EAL_DEV_EVENT_ADD,		/**< device adding event */
+	RTE_EAL_DEV_EVENT_REMOVE,
+					/**< device removing event */
+	RTE_EAL_DEV_EVENT_CHANGE,
+					/**< device status change event */
+	RTE_EAL_DEV_EVENT_MOVE,		/**< device sys path move event */
+	RTE_EAL_DEV_EVENT_ONLINE,	/**< device online event */
+	RTE_EAL_DEV_EVENT_OFFLINE,	/**< device offline event */
+	RTE_EAL_DEV_EVENT_MAX		/**< max value of this enum */
+};
+
+struct rte_eal_uevent {
+	enum rte_eal_dev_event_type type;	/**< device event type */
+	int subsystem;				/**< subsystem id */
+	char *devname;				/**< device name */
+	enum uev_monitor_netlink_group group;	/**< device netlink group */
+};
+
+/**
+ * Start the device uevent monitoring.
+ *
+ * @param none
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_dev_monitor_start(void);
+
+/**
+ * Stop the device uevent monitoring .
+ *
+ * @param none
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+
+int
+rte_dev_monitor_stop(void);
+
+#endif /* _RTE_LINUXAPP_DEV_H_ */
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
index 08bec2d..e8ed396 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -50,8 +50,10 @@  rte_bus_register(struct rte_bus *bus)
 	RTE_VERIFY(bus->scan);
 	RTE_VERIFY(bus->probe);
 	RTE_VERIFY(bus->find_device);
+	RTE_VERIFY(bus->find_device_by_name);
 	/* Buses supporting driver plug also require unplug. */
 	RTE_VERIFY(!bus->plug || bus->unplug);
+	RTE_VERIFY(bus->remap_device);
 
 	TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
 	RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name);
@@ -174,6 +176,15 @@  cmp_rte_device(const struct rte_device *dev1, const void *_dev2)
 }
 
 static int
+cmp_rte_device_name(const char *dev_name1, const void *_dev_name2)
+{
+	const char *dev_name2 = _dev_name2;
+
+	return strcmp(dev_name1, dev_name2);
+}
+
+
+static int
 bus_find_device(const struct rte_bus *bus, const void *_dev)
 {
 	struct rte_device *dev;
@@ -182,6 +193,26 @@  bus_find_device(const struct rte_bus *bus, const void *_dev)
 	return dev == NULL;
 }
 
+static struct rte_device *
+bus_find_device_by_name(const struct rte_bus *bus, const void *_dev_name)
+{
+	struct rte_device *dev;
+
+	dev = bus->find_device_by_name(NULL, cmp_rte_device_name, _dev_name);
+	return dev;
+}
+
+
+struct rte_device *
+
+rte_bus_find_device(const struct rte_bus *bus, const void *_dev_name)
+{
+	struct rte_device *dev;
+
+	dev = bus_find_device_by_name(bus, _dev_name);
+	return dev;
+}
+
 struct rte_bus *
 rte_bus_find_by_device(const struct rte_device *dev)
 {
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index e251275..9ebdd6a 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -36,15 +36,40 @@ 
 #include <string.h>
 #include <inttypes.h>
 #include <sys/queue.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
 
 #include <rte_bus.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_debug.h>
 #include <rte_log.h>
+#include <rte_spinlock.h>
+#include <rte_malloc.h>
 
 #include "eal_private.h"
 
+/* spinlock for device callbacks */
+static rte_spinlock_t rte_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
+
+/**
+ * The user application callback description.
+ *
+ * It contains callback address to be registered by user application,
+ * the pointer to the parameters for callback, and the event type.
+ */
+struct rte_eal_dev_callback {
+	TAILQ_ENTRY(rte_eal_dev_callback) next; /**< Callbacks list */
+	rte_eal_dev_cb_fn cb_fn;                /**< Callback address */
+	void *cb_arg;                           /**< Parameter for callback */
+	void *ret_param;                        /**< Return parameter */
+	enum rte_eal_dev_event_type event;          /**< device event type */
+	uint32_t active;                        /**< Callback is executing */
+};
+
+struct rte_eal_dev_cb_list uev_cbs;
+
 static int cmp_detached_dev_name(const struct rte_device *dev,
 	const void *_name)
 {
@@ -53,7 +78,6 @@  static int cmp_detached_dev_name(const struct rte_device *dev,
 	/* skip attached devices */
 	if (dev->driver != NULL)
 		return 1;
-
 	return strcmp(dev->name, name);
 }
 
@@ -244,3 +268,200 @@  int rte_eal_hotplug_remove(const char *busname, const char *devname)
 	rte_eal_devargs_remove(busname, devname);
 	return ret;
 }
+
+int
+rte_eal_dev_monitor_enable(void)
+{
+	int ret;
+
+	if (TAILQ_EMPTY(&uev_cbs))
+		TAILQ_INIT(&uev_cbs);
+
+	ret = rte_dev_monitor_start();
+	if (ret)
+		RTE_LOG(ERR, EAL, "Can not init device monitor\n");
+	return ret;
+}
+
+int
+rte_dev_callback_register(enum rte_eal_dev_event_type event,
+			rte_eal_dev_cb_fn cb_fn, void *cb_arg)
+{
+	struct rte_eal_dev_callback *user_cb;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	rte_spinlock_lock(&rte_dev_cb_lock);
+
+	TAILQ_FOREACH(user_cb, &uev_cbs, next) {
+		if (user_cb->cb_fn == cb_fn &&
+			user_cb->cb_arg == cb_arg &&
+			user_cb->event == event) {
+			break;
+		}
+	}
+
+	/* create a new callback. */
+	if (user_cb == NULL) {
+		/* allocate a new interrupt callback entity */
+		user_cb = rte_zmalloc("eal device event",
+					sizeof(*user_cb), 0);
+		if (user_cb == NULL) {
+			RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+			return -ENOMEM;
+		}
+		user_cb->cb_fn = cb_fn;
+		user_cb->cb_arg = cb_arg;
+		user_cb->event = event;
+		TAILQ_INSERT_TAIL(&uev_cbs, user_cb, next);
+	}
+
+	rte_spinlock_unlock(&rte_dev_cb_lock);
+	return 0;
+}
+
+int
+rte_dev_callback_unregister(enum rte_eal_dev_event_type event,
+			rte_eal_dev_cb_fn cb_fn, void *cb_arg)
+{
+	int ret;
+	struct rte_eal_dev_callback *cb, *next;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	rte_spinlock_lock(&rte_dev_cb_lock);
+
+	ret = 0;
+	for (cb = TAILQ_FIRST(&uev_cbs); cb != NULL; cb = next) {
+
+		next = TAILQ_NEXT(cb, next);
+
+		if (cb->cb_fn != cb_fn || cb->event != event ||
+				(cb->cb_arg != (void *)-1 &&
+				cb->cb_arg != cb_arg))
+			continue;
+
+		/*
+		 * if this callback is not executing right now,
+		 * then remove it.
+		 */
+		if (cb->active == 0) {
+			TAILQ_REMOVE(&(uev_cbs), cb, next);
+			rte_free(cb);
+		} else {
+			ret = -EAGAIN;
+		}
+	}
+
+	rte_spinlock_unlock(&rte_dev_cb_lock);
+	return ret;
+}
+
+int
+_rte_dev_callback_process(enum rte_eal_dev_event_type event,
+			void *cb_arg, void *ret_param)
+{
+	struct rte_eal_dev_callback *cb_lst;
+	struct rte_eal_dev_callback dev_cb;
+	int rc = 0;
+
+	rte_spinlock_lock(&rte_dev_cb_lock);
+	TAILQ_FOREACH(cb_lst, &(uev_cbs), next) {
+		if (cb_lst->cb_fn == NULL || cb_lst->event != event)
+			continue;
+		dev_cb = *cb_lst;
+		cb_lst->active = 1;
+		if (cb_arg != NULL)
+			dev_cb.cb_arg = cb_arg;
+		if (ret_param != NULL)
+			dev_cb.ret_param = ret_param;
+
+		rte_spinlock_unlock(&rte_dev_cb_lock);
+		rc = dev_cb.cb_fn(dev_cb.event,
+				dev_cb.cb_arg, dev_cb.ret_param);
+		rte_spinlock_lock(&rte_dev_cb_lock);
+		cb_lst->active = 0;
+	}
+	rte_spinlock_unlock(&rte_dev_cb_lock);
+	return rc;
+}
+
+int
+rte_dev_bind_driver(const char *dev_name, const char *drv_type) {
+	char drv_bind_path[PATH_MAX];
+	char drv_override_path[PATH_MAX]; /* contains the /dev/uioX */
+	int drv_override_fd, drv_bind_fd;
+
+	RTE_SET_USED(drv_type);
+
+	snprintf(drv_override_path, sizeof(drv_override_path),
+		"/sys/bus/pci/devices/%s/driver_override", dev_name);
+
+	/* specify the driver for a device by writing to driver_override */
+	drv_override_fd = open(drv_override_path, O_WRONLY);
+	if (drv_override_fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+			drv_override_path, strerror(errno));
+		goto err;
+	}
+
+	if (write(drv_override_fd, drv_type, sizeof(drv_type)) < 0) {
+		RTE_LOG(ERR, EAL,
+			"Error: bind failed - Cannot write "
+			"driver %s to device %s\n", drv_type, dev_name);
+		goto err;
+	}
+	close(drv_override_fd);
+
+
+	snprintf(drv_bind_path, sizeof(drv_bind_path),
+		"/sys/bus/pci/drivers/%s/bind", drv_type);
+
+	/* do the bind by writing device to the specific driver  */
+	drv_bind_fd = open(drv_bind_path, O_APPEND | O_WRONLY);
+	if (drv_bind_fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+			drv_bind_path, strerror(errno));
+		goto err;
+	}
+
+	if (write(drv_bind_fd, dev_name, sizeof(dev_name)) < 0) {
+		RTE_LOG(ERR, EAL,
+		"Error: bind failed - Cannot write device %s "
+		"to driver %s \n", dev_name, drv_type);
+		goto err;
+	}
+	close(drv_bind_fd);
+
+	snprintf(drv_override_path, sizeof(drv_override_path),
+		"/sys/bus/pci/devices/%s/driver_override", dev_name);
+
+	/**
+	 * Before unbinding it, overwrite driver_override with empty
+	 * string so that the device can be bound to any other
+	 * driver
+	 */
+	drv_override_fd = open(drv_override_path, O_WRONLY);
+	if (drv_override_fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+			drv_override_path, strerror(errno));
+		goto err;
+	}
+
+	drv_type = "\00";
+
+	if (write(drv_override_fd, drv_type, sizeof(drv_type)) < 0) {
+		RTE_LOG(ERR, EAL,
+			"Error: bind failed - Cannot write "
+			"driver %s to device %s\n", drv_type, dev_name);
+		goto err;
+	}
+	close(drv_override_fd);
+	return 0;
+err:
+	close(drv_override_fd);
+	close(drv_bind_fd);
+	return -1;
+}
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index 52fd38c..439852b 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -110,6 +110,27 @@  pci_name_set(struct rte_pci_device *dev)
 		dev->device.name = dev->name;
 }
 
+/* map a private resource from an address*/
+void *
+pci_map_private_resource(void *requested_addr, off_t offset, size_t size)
+{
+	void *mapaddr;
+
+	mapaddr = mmap(requested_addr, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+	if (mapaddr == MAP_FAILED) {
+		RTE_LOG(ERR, EAL, "%s(): cannot mmap(%p, 0x%lx, 0x%lx): "
+			"%s (%p)\n",
+			__func__, requested_addr,
+			(unsigned long)size, (unsigned long)offset,
+			strerror(errno), mapaddr);
+	} else
+		RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
+
+	return mapaddr;
+}
+
 /* map a particular resource from a file */
 void *
 pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
@@ -285,9 +306,10 @@  rte_pci_detach_dev(struct rte_pci_device *dev)
 	RTE_LOG(DEBUG, EAL, "  remove driver: %x:%x %s\n", dev->id.vendor_id,
 			dev->id.device_id, dr->driver.name);
 
-	if (dr->remove && (dr->remove(dev) < 0))
-		return -1;	/* negative value is an error */
-
+	if (dev->device.state != DEVICE_FAULT) {
+		if (dr->remove && (dr->remove(dev) < 0))
+			return -1;	/* negative value is an error */
+	}
 	/* clear driver structure */
 	dev->driver = NULL;
 
@@ -538,6 +560,7 @@  pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
 			start = NULL; /* starting point found */
 			continue;
 		}
+
 		if (cmp(&dev->device, data) == 0)
 			return &dev->device;
 	}
@@ -545,6 +568,44 @@  pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
 	return NULL;
 }
 
+static struct rte_device *
+pci_find_device_by_name(const struct rte_device *start,
+		rte_dev_cmp_name_t cmp_name,
+		const void *data)
+{
+	struct rte_pci_device *dev;
+
+	FOREACH_DEVICE_ON_PCIBUS(dev) {
+		if (start && &dev->device == start) {
+			start = NULL; /* starting point found */
+			continue;
+		}
+		if (cmp_name(dev->device.name, data) == 0)
+			return &dev->device;
+	}
+
+	return NULL;
+}
+
+static int
+pci_remap_device(struct rte_device *dev)
+{
+	struct rte_pci_device *pdev;
+	int ret;
+
+	if (dev == NULL)
+		return -EINVAL;
+
+	pdev = RTE_DEV_TO_PCI(dev);
+
+	/* remap resources for devices that use igb_uio */
+	ret = rte_pci_remap_device(pdev);
+	if (ret != 0)
+		RTE_LOG(ERR, EAL, "failed to remap device %s",
+			dev->name);
+	return ret;
+}
+
 static int
 pci_plug(struct rte_device *dev)
 {
@@ -569,9 +630,11 @@  struct rte_pci_bus rte_pci_bus = {
 		.scan = rte_pci_scan,
 		.probe = rte_pci_probe,
 		.find_device = pci_find_device,
+		.find_device_by_name = pci_find_device_by_name,
 		.plug = pci_plug,
 		.unplug = pci_unplug,
 		.parse = pci_parse,
+		.remap_device = pci_remap_device,
 	},
 	.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
 	.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c
index 367a681..cb12349 100644
--- a/lib/librte_eal/common/eal_common_pci_uio.c
+++ b/lib/librte_eal/common/eal_common_pci_uio.c
@@ -174,6 +174,34 @@  pci_uio_unmap(struct mapped_pci_resource *uio_res)
 	}
 }
 
+/* remap the PCI resource of a PCI device in private virtual memory */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev)
+{
+	int i;
+	uint64_t phaddr;
+	void *map_address;
+
+	/* Map all BARs */
+	for (i = 0; i != PCI_MAX_RESOURCE; i++) {
+		/* skip empty BAR */
+		phaddr = dev->mem_resource[i].phys_addr;
+		if (phaddr == 0)
+			continue;
+		map_address = pci_map_private_resource(
+				dev->mem_resource[i].addr, 0,
+				(size_t)dev->mem_resource[i].len);
+		if (map_address == MAP_FAILED)
+			goto error;
+		memset(map_address, 0xFF, (size_t)dev->mem_resource[i].len);
+		dev->mem_resource[i].addr = map_address;
+	}
+
+	return 0;
+error:
+	return -1;
+}
+
 static struct mapped_pci_resource *
 pci_uio_find_resource(struct rte_pci_device *dev)
 {
@@ -222,7 +250,8 @@  pci_uio_unmap_resource(struct rte_pci_device *dev)
 	rte_free(uio_res);
 
 	/* close fd if in primary process */
-	close(dev->intr_handle.fd);
+	if (dev->device.state != DEVICE_FAULT)
+		close(dev->intr_handle.fd);
 	if (dev->intr_handle.uio_cfg_fd >= 0) {
 		close(dev->intr_handle.uio_cfg_fd);
 		dev->intr_handle.uio_cfg_fd = -1;
diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c
index f7e547a..ebed0b7 100644
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -303,7 +303,7 @@  vdev_probe(void)
 
 static struct rte_device *
 vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
-		 const void *data)
+		const void *data)
 {
 	struct rte_vdev_device *dev;
 
@@ -318,6 +318,31 @@  vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
 	return NULL;
 }
 
+static struct rte_device *
+vdev_find_device_by_name(const struct rte_device *start,
+		rte_dev_cmp_name_t cmp_name,
+		const void *data)
+{
+	struct rte_vdev_device *dev;
+
+	TAILQ_FOREACH(dev, &vdev_device_list, next) {
+		if (start && &dev->device == start) {
+			start = NULL;
+			continue;
+		}
+		if (cmp_name(dev->device.name, data) == 0)
+			return &dev->device;
+	}
+	return NULL;
+}
+
+static int
+vdev_remap_device(struct rte_device *dev)
+{
+	RTE_SET_USED(dev);
+	return 0;
+}
+
 static int
 vdev_plug(struct rte_device *dev)
 {
@@ -334,9 +359,11 @@  static struct rte_bus rte_vdev_bus = {
 	.scan = vdev_scan,
 	.probe = vdev_probe,
 	.find_device = vdev_find_device,
+	.find_device_by_name = vdev_find_device_by_name,
 	.plug = vdev_plug,
 	.unplug = vdev_unplug,
 	.parse = vdev_parse,
+	.remap_device = vdev_remap_device,
 };
 
 RTE_REGISTER_BUS(vdev, rte_vdev_bus);
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 597d82e..4da42be 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -222,6 +222,18 @@  void pci_uio_free_resource(struct rte_pci_device *dev,
 		struct mapped_pci_resource *uio_res);
 
 /**
+ * remap the pci uio resource..
+ *
+ * @param dev
+ *   Point to the struct rte pci device.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev);
+
+/**
  * Map device memory to uio resource
  *
  * This function is private to EAL.
@@ -354,5 +366,4 @@  bool rte_eal_using_phys_addrs(void);
  *   NULL if no bus is able to parse this device.
  */
 struct rte_bus *rte_bus_find_by_device_name(const char *str);
-
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index c79368d..3514014 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -107,6 +107,34 @@  typedef struct rte_device *
 			 const void *data);
 
 /**
+ * Device iterator to find a device on a bus.
+ *
+ * This function returns an rte_device if one of those held by the bus
+ * matches the data passed as parameter.
+ *
+ * If the comparison function returns zero this function should stop iterating
+ * over any more devices. To continue a search the device of a previous search
+ * can be passed via the start parameter.
+ *
+ * @param cmp
+ *	the device name comparison function.
+ *
+ * @param data
+ *	Data to compare each device against.
+ *
+ * @param start
+ *	starting point for the iteration
+ *
+ * @return
+ *	The first device matching the data, NULL if none exists.
+ */
+typedef struct rte_device *
+(*rte_bus_find_device_by_name_t)(const struct rte_device *start,
+			 rte_dev_cmp_name_t cmp,
+			 const void *data);
+
+
+/**
  * Implementation specific probe function which is responsible for linking
  * devices on that bus with applicable drivers.
  *
@@ -153,6 +181,20 @@  typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
 typedef int (*rte_bus_parse_t)(const char *name, void *addr);
 
 /**
+ * Implementation specific remap function which is responsible for remmaping
+ * devices on that bus from original share memory resource to a private memory
+ * resource for the sake of device has been removal.
+ *
+ * @param dev
+ *	Device pointer that was returned by a previous call to find_device.
+ *
+ * @return
+ *	0 on success.
+ *	!0 on error.
+ */
+typedef int (*rte_bus_remap_device_t)(struct rte_device *dev);
+
+/**
  * Bus scan policies
  */
 enum rte_bus_scan_mode {
@@ -177,9 +219,12 @@  struct rte_bus {
 	rte_bus_scan_t scan;         /**< Scan for devices attached to bus */
 	rte_bus_probe_t probe;       /**< Probe devices on bus */
 	rte_bus_find_device_t find_device; /**< Find a device on the bus */
+	rte_bus_find_device_by_name_t find_device_by_name;
+				     /**< Find a device on the bus */
 	rte_bus_plug_t plug;         /**< Probe single device for drivers */
 	rte_bus_unplug_t unplug;     /**< Remove single device from driver */
 	rte_bus_parse_t parse;       /**< Parse a device name */
+	rte_bus_remap_device_t remap_device;       /**< remap a device */
 	struct rte_bus_conf conf;    /**< Bus configuration */
 };
 
@@ -276,6 +321,13 @@  struct rte_bus *rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp,
 struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev);
 
 /**
+ * Find the registered bus for a particular device.
+ */
+struct rte_device *rte_bus_find_device(const struct rte_bus *bus,
+				const void *dev_name);
+
+
+/**
  * Find the registered bus for a given name.
  */
 struct rte_bus *rte_bus_find_by_name(const char *busname);
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 5386d3a..9cea6bf 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -52,6 +52,15 @@  extern "C" {
 #include <rte_config.h>
 #include <rte_log.h>
 
+#include <exec-env/rte_dev.h>
+
+typedef int (*rte_eal_dev_cb_fn)(enum rte_eal_dev_event_type event,
+					void *cb_arg, void *ret_param);
+
+struct rte_eal_dev_callback;
+/** @internal Structure to keep track of registered callbacks */
+TAILQ_HEAD(rte_eal_dev_cb_list, rte_eal_dev_callback);
+
 __attribute__((format(printf, 2, 0)))
 static inline void
 rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
@@ -154,6 +163,13 @@  struct rte_driver {
 
 #define RTE_DEV_NAME_MAX_LEN (32)
 
+enum device_state {
+	DEVICE_UNDEFINED,
+	DEVICE_FAULT,
+	DEVICE_PARSED,
+	DEVICE_PROBED,
+};
+
 /**
  * A structure describing a generic device.
  */
@@ -163,6 +179,7 @@  struct rte_device {
 	const struct rte_driver *driver;/**< Associated driver */
 	int numa_node;                /**< NUMA node connection */
 	struct rte_devargs *devargs;  /**< Device user arguments */
+	enum device_state state;  /**< Device state */
 };
 
 /**
@@ -267,6 +284,8 @@  int rte_eal_hotplug_remove(const char *busname, const char *devname);
  */
 typedef int (*rte_dev_cmp_t)(const struct rte_device *dev, const void *data);
 
+typedef int (*rte_dev_cmp_name_t)(const char *dev_name, const void *data);
+
 #define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[]
 
 #define RTE_PMD_EXPORT_NAME(name, idx) \
@@ -312,4 +331,85 @@  __attribute__((used)) = str
 }
 #endif
 
-#endif /* _RTE_VDEV_H_ */
+/**
+ * It enable the device event monitoring for a specific event.
+ *
+ * @param none
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_eal_dev_monitor_enable(void);
+/**
+ * It registers the callback for the specific event. Multiple
+ * callbacks cal be registered at the same time.
+ * @param event
+ *  The device event type.
+ * @param cb_fn
+ *  callback address.
+ * @param cb_arg
+ *  address of parameter for callback.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int rte_dev_callback_register(enum rte_eal_dev_event_type event,
+			rte_eal_dev_cb_fn cb_fn, void *cb_arg);
+
+/**
+ * It unregisters the callback according to the specified event.
+ *
+ * @param event
+ *  The event type which corresponding to the callback.
+ * @param cb_fn
+ *  callback address.
+ *  address of parameter for callback, (void *)-1 means to remove all
+ *  registered which has the same callback address.
+ *
+ * @return
+ *  - On success, return the number of callback entities removed.
+ *  - On failure, a negative value.
+ */
+int rte_dev_callback_unregister(enum rte_eal_dev_event_type event,
+			rte_eal_dev_cb_fn cb_fn, void *cb_arg);
+
+/**
+ * @internal Executes all the user application registered callbacks for
+ * the specific device. It is for DPDK internal user only. User
+ * application should not call it directly.
+ *
+ * @param event
+ *  The device event type.
+ * @param cb_arg
+ *  callback parameter.
+ * @param ret_param
+ *  To pass data back to user application.
+ *  This allows the user application to decide if a particular function
+ *  is permitted or not.
+ *
+ * @return
+ *  - On success, return zero.
+ *  - On failure, a negative value.
+ */
+int
+_rte_dev_callback_process(enum rte_eal_dev_event_type event,
+			void *cb_arg, void *ret_param);
+
+/**
+ * It can be used to bind a device to a specific type of driver.
+ *
+ * @param dev_name
+ *  The device name.
+ * @param drv_type
+ *  The specific driver's type.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int
+rte_dev_bind_driver(const char *dev_name, const char *drv_type);
+
+#endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 8b12339..b33d53e 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -393,6 +393,32 @@  int rte_pci_map_device(struct rte_pci_device *dev);
 void rte_pci_unmap_device(struct rte_pci_device *dev);
 
 /**
+ * Remap this device
+ *
+ * @param dev
+ *   A pointer to a rte_pci_device structure describing the device
+ *   to use
+ */
+int rte_pci_remap_device(struct rte_pci_device *dev);
+
+/**
+ * @internal
+ * Map to a particular private resource.
+ *
+ * @param requested_addr
+ *      The starting address for the new mapping range.
+ * @param offset
+ *      The offset for the mapping range.
+ * @param size
+ *      The size for the mapping range.
+ * @return
+ *   - On success, the function returns a pointer to the mapped area.
+ *   - On error, the value MAP_FAILED is returned.
+ */
+void *pci_map_private_resource(void *requested_addr, off_t offset,
+		size_t size);
+
+/**
  * @internal
  * Map a particular resource from a file.
  *
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 90bca4d..34cf8b0 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -73,6 +73,7 @@  SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_dev.c
 
 # from common dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
@@ -130,7 +131,7 @@  ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
 CFLAGS_eal_thread.o += -Wno-return-type
 endif
 
-INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
+INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h rte_dev.h
 
 SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
 	$(addprefix include/exec-env/,$(INC))
diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
new file mode 100644
index 0000000..9478d29
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -0,0 +1,351 @@ 
+/*-
+ *   Copyright(c) 2010-2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+#include <sys/signalfd.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include <rte_bus.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+
+#include "eal_thread.h"
+
+/* uev monitoring thread */
+static pthread_t uev_monitor_thread;
+
+bool udev_exit = true;
+
+bool no_request_thread = true;
+
+static void sig_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM)
+		rte_dev_monitor_stop();
+}
+
+static int
+dev_monitor_fd_new(void)
+{
+
+	int uevent_fd;
+
+	uevent_fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC |
+			SOCK_NONBLOCK,
+			NETLINK_KOBJECT_UEVENT);
+	if (uevent_fd < 0) {
+		RTE_LOG(ERR, EAL, "create uevent fd failed\n");
+		return -1;
+	}
+	return uevent_fd;
+}
+
+static int
+dev_monitor_enable(int netlink_fd)
+{
+	struct sockaddr_nl addr;
+	int ret;
+	int size = 64 * 1024;
+	int nonblock = 1;
+
+	memset(&addr, 0, sizeof(addr));
+	addr.nl_family = AF_NETLINK;
+	addr.nl_pid = 0;
+	addr.nl_groups = 0xffffffff;
+
+	if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+		RTE_LOG(ERR, EAL, "bind failed\n");
+		goto err;
+	}
+
+	setsockopt(netlink_fd, SOL_SOCKET, SO_PASSCRED, &size, sizeof(size));
+
+	ret = ioctl(netlink_fd, FIONBIO, &nonblock);
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL, "ioctl(FIONBIO) failed\n");
+		goto err;
+	}
+	return 0;
+err:
+	close(netlink_fd);
+	return -1;
+}
+
+static int
+dev_uev_parse(const char *buf, struct rte_eal_uevent *event)
+{
+	char action[RTE_EAL_UEVENT_MSG_LEN];
+	char subsystem[RTE_EAL_UEVENT_MSG_LEN];
+	char dev_path[RTE_EAL_UEVENT_MSG_LEN];
+	char pci_slot_name[RTE_EAL_UEVENT_MSG_LEN];
+	int i = 0;
+
+	memset(action, 0, RTE_EAL_UEVENT_MSG_LEN);
+	memset(subsystem, 0, RTE_EAL_UEVENT_MSG_LEN);
+	memset(dev_path, 0, RTE_EAL_UEVENT_MSG_LEN);
+	memset(pci_slot_name, 0, RTE_EAL_UEVENT_MSG_LEN);
+
+	while (i < RTE_EAL_UEVENT_MSG_LEN) {
+		for (; i < RTE_EAL_UEVENT_MSG_LEN; i++) {
+			if (*buf)
+				break;
+			buf++;
+		}
+		if (!strncmp(buf, "libudev", 7)) {
+			buf += 7;
+			i += 7;
+			event->group = UEV_MONITOR_UDEV;
+		}
+		if (!strncmp(buf, "ACTION=", 7)) {
+			buf += 7;
+			i += 7;
+			snprintf(action, sizeof(action), "%s", buf);
+		} else if (!strncmp(buf, "DEVPATH=", 8)) {
+			buf += 8;
+			i += 8;
+			snprintf(dev_path, sizeof(dev_path), "%s", buf);
+		} else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
+			buf += 10;
+			i += 10;
+			snprintf(subsystem, sizeof(subsystem), "%s", buf);
+		} else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
+			buf += 14;
+			i += 14;
+			snprintf(pci_slot_name, sizeof(subsystem), "%s", buf);
+		}
+		for (; i < RTE_EAL_UEVENT_MSG_LEN; i++) {
+			if (*buf == '\0')
+				break;
+			buf++;
+		}
+	}
+
+	if (!strncmp(subsystem, "pci", 3))
+		event->subsystem = UEV_SUBSYSTEM_PCI;
+	if (!strncmp(action, "add", 3))
+		event->type = RTE_EAL_DEV_EVENT_ADD;
+	if (!strncmp(action, "remove", 6))
+		event->type = RTE_EAL_DEV_EVENT_REMOVE;
+	event->devname = pci_slot_name;
+
+	return 0;
+}
+
+static int
+dev_uev_receive(int fd, struct rte_eal_uevent *uevent)
+{
+	int ret;
+	char buf[RTE_EAL_UEVENT_MSG_LEN];
+
+	memset(uevent, 0, sizeof(struct rte_eal_uevent));
+	memset(buf, 0, RTE_EAL_UEVENT_MSG_LEN);
+
+	ret = recv(fd, buf, RTE_EAL_UEVENT_MSG_LEN - 1, MSG_DONTWAIT);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL,
+		"Socket read error(%d): %s\n",
+		errno, strerror(errno));
+		return -1;
+	} else if (ret == 0)
+		/* connection closed */
+		return -1;
+
+	return dev_uev_parse(buf, uevent);
+}
+
+static int
+dev_uev_process(struct epoll_event *events, int nfds)
+{
+	struct rte_bus *bus;
+	struct rte_device *dev;
+	struct rte_eal_uevent uevent;
+	int ret;
+	int i;
+
+	for (i = 0; i < nfds; i++) {
+		/**
+		 * check device uevent from kernel side, no need to check
+		 * uevent from udev.
+		 */
+		if ((dev_uev_receive(events[i].data.fd, &uevent)) ||
+			(uevent.group == UEV_MONITOR_UDEV))
+			return 0;
+		if (uevent.subsystem == UEV_SUBSYSTEM_PCI) {
+			bus = rte_bus_find_by_name("pci");
+			dev = rte_bus_find_device(bus, uevent.devname);
+			if (uevent.type == RTE_EAL_DEV_EVENT_REMOVE) {
+				if (!dev)
+					return 0;
+				dev->state = DEVICE_FAULT;
+				/**
+				 * remap the resource to be fake
+				 * before user's removal processing
+				 */
+				ret = bus->remap_device(dev);
+				if (!ret)
+					return(_rte_dev_callback_process(
+					  RTE_EAL_DEV_EVENT_REMOVE,
+					  NULL, NULL));
+			} else if (uevent.type == RTE_EAL_DEV_EVENT_ADD) {
+				if (dev == NULL) {
+					/**
+					 * bind the drvier to the device
+					 * before user's add processing
+					 */
+					rte_dev_bind_driver(
+						uevent.devname,
+						"igb_uio");
+					return(_rte_dev_callback_process(
+					  RTE_EAL_DEV_EVENT_ADD,
+					  uevent.devname, NULL));
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * It builds/rebuilds up the epoll file descriptor with all the
+ * file descriptors being waited on. Then handles the interrupts.
+ *
+ * @param arg
+ *  pointer. (unused)
+ *
+ * @return
+ *  never return;
+ */
+static __attribute__((noreturn)) void *
+dev_uev_monitoring(__rte_unused void *arg)
+{
+	struct sigaction act;
+	sigset_t mask;
+	int netlink_fd;
+	struct epoll_event ep_kernel;
+	int fd_ep;
+
+	udev_exit = false;
+
+	/* set signal handlers */
+	memset(&act, 0x00, sizeof(struct sigaction));
+	act.sa_handler = sig_handler;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_RESTART;
+	sigaction(SIGINT, &act, NULL);
+	sigaction(SIGTERM, &act, NULL);
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGINT);
+	sigaddset(&mask, SIGTERM);
+	sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+	fd_ep = epoll_create1(EPOLL_CLOEXEC);
+	if (fd_ep < 0) {
+		RTE_LOG(ERR, EAL, "error creating epoll fd: %m\n");
+		goto out;
+	}
+
+	netlink_fd = dev_monitor_fd_new();
+
+	if (dev_monitor_enable(netlink_fd) < 0) {
+		RTE_LOG(ERR, EAL, "error subscribing to kernel events\n");
+		goto out;
+	}
+
+	memset(&ep_kernel, 0, sizeof(struct epoll_event));
+	ep_kernel.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+	ep_kernel.data.fd = netlink_fd;
+	if (epoll_ctl(fd_ep, EPOLL_CTL_ADD, netlink_fd,
+		&ep_kernel) < 0) {
+		RTE_LOG(ERR, EAL, "error addding fd to epoll: %m\n");
+		goto out;
+	}
+
+	while (!udev_exit) {
+		int fdcount;
+		struct epoll_event ev[1];
+
+		fdcount = epoll_wait(fd_ep, ev, 1, -1);
+		if (fdcount < 0) {
+			if (errno != EINTR)
+				RTE_LOG(ERR, EAL, "error receiving uevent "
+					"message: %m\n");
+				continue;
+			}
+
+		/* epoll_wait has at least one fd ready to read */
+		if (dev_uev_process(ev, fdcount) < 0) {
+			if (errno != EINTR)
+				RTE_LOG(ERR, EAL, "error processing uevent "
+					"message: %m\n");
+		}
+	}
+out:
+	if (fd_ep >= 0)
+		close(fd_ep);
+	if (netlink_fd >= 0)
+		close(netlink_fd);
+	rte_panic("uev monitoring fail\n");
+}
+
+int
+rte_dev_monitor_start(void)
+{
+	int ret;
+
+	if (!no_request_thread)
+		return 0;
+	no_request_thread = false;
+
+	/* create the host thread to wait/handle the uevent from kernel */
+	ret = pthread_create(&uev_monitor_thread, NULL,
+		dev_uev_monitoring, NULL);
+	return ret;
+}
+
+int
+rte_dev_monitor_stop(void)
+{
+	udev_exit = true;
+	no_request_thread = true;
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 8951ce7..6438047 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -141,6 +141,39 @@  rte_pci_unmap_device(struct rte_pci_device *dev)
 	}
 }
 
+/* Map pci device */
+int
+rte_pci_remap_device(struct rte_pci_device *dev)
+{
+	int ret = -1;
+
+	if (dev == NULL)
+		return -EINVAL;
+
+	/* try mapping the NIC resources using VFIO if it exists */
+	switch (dev->kdrv) {
+	case RTE_KDRV_VFIO:
+#ifdef VFIO_PRESENT
+		/* no thing to do */
+#endif
+		break;
+	case RTE_KDRV_IGB_UIO:
+	case RTE_KDRV_UIO_GENERIC:
+		if (rte_eal_using_phys_addrs()) {
+			/* map resources for devices that use uio */
+			ret = pci_uio_remap_resource(dev);
+		}
+		break;
+	default:
+		RTE_LOG(DEBUG, EAL,
+			"  Not managed by a supported kernel driver, skipped\n");
+		ret = 1;
+		break;
+	}
+
+	return ret;
+}
+
 void *
 pci_find_max_end_va(void)
 {
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dev.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dev.h
new file mode 100644
index 0000000..7d2c3c3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dev.h
@@ -0,0 +1,105 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DEV_H_
+#error "don't include this file directly, please include generic <rte_dev.h>"
+#endif
+
+#ifndef _RTE_LINUXAPP_DEV_H_
+#define _RTE_LINUXAPP_DEV_H_
+
+#include <stdio.h>
+
+#include <rte_dev.h>
+
+#define RTE_EAL_UEVENT_MSG_LEN 4096
+
+enum uev_subsystem {
+	UEV_SUBSYSTEM_UIO,
+	UEV_SUBSYSTEM_VFIO,
+	UEV_SUBSYSTEM_PCI,
+	UEV_SUBSYSTEM_MAX
+};
+
+enum uev_monitor_netlink_group {
+	UEV_MONITOR_KERNEL,
+	UEV_MONITOR_UDEV,
+};
+
+/**
+ * The device event type.
+ */
+enum rte_eal_dev_event_type {
+	RTE_EAL_DEV_EVENT_UNKNOWN,	/**< unknown event type */
+	RTE_EAL_DEV_EVENT_ADD,		/**< device adding event */
+	RTE_EAL_DEV_EVENT_REMOVE,
+					/**< device removing event */
+	RTE_EAL_DEV_EVENT_CHANGE,
+					/**< device status change event */
+	RTE_EAL_DEV_EVENT_MOVE,		/**< device sys path move event */
+	RTE_EAL_DEV_EVENT_ONLINE,	/**< device online event */
+	RTE_EAL_DEV_EVENT_OFFLINE,	/**< device offline event */
+	RTE_EAL_DEV_EVENT_MAX		/**< max value of this enum */
+};
+
+struct rte_eal_uevent {
+	enum rte_eal_dev_event_type type;	/**< device event type */
+	int subsystem;				/**< subsystem id */
+	char *devname;				/**< device name */
+	enum uev_monitor_netlink_group group;	/**< device netlink group */
+};
+
+/**
+ * Start the device uevent monitoring.
+ *
+ * @param none
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_dev_monitor_start(void);
+
+/**
+ * Stop the device uevent monitoring .
+ *
+ * @param none
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+
+int
+rte_dev_monitor_stop(void);
+
+#endif /* _RTE_LINUXAPP_DEV_H_ */