[V4,6/9] eal: add failure handle mechanism for hot plug

Message ID 1530268248-7328-7-git-send-email-jia.guo@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [V4,1/9] bus: introduce hotplug failure handler |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail apply issues

Commit Message

Guo, Jia June 29, 2018, 10:30 a.m. UTC
  This patch introduces a failure handler mechanism to handle device
hot plug removal event.

First register sigbus handler, once sigbus error be captured, will
check the failure address and accordingly remap the invalid memory
for the corresponding device. Bese on this mechanism, it could
guaranty the application not to be crash when hot unplug devices.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
v4->v3:
split patches to be small and clear.
---
 lib/librte_eal/linuxapp/eal/eal_dev.c | 88 ++++++++++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 1 deletion(-)
  

Comments

Ananyev, Konstantin June 29, 2018, 10:49 a.m. UTC | #1
Hi Jeff,

> 
> This patch introduces a failure handler mechanism to handle device
> hot plug removal event.
> 
> First register sigbus handler, once sigbus error be captured, will
> check the failure address and accordingly remap the invalid memory
> for the corresponding device. Bese on this mechanism, it could
> guaranty the application not to be crash when hot unplug devices.
> 
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> ---
> v4->v3:
> split patches to be small and clear.
> ---
>  lib/librte_eal/linuxapp/eal/eal_dev.c | 88 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 87 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
> index 1cf6aeb..c9dddab 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
> @@ -4,6 +4,8 @@
> 
>  #include <string.h>
>  #include <unistd.h>
> +#include <fcntl.h>
> +#include <signal.h>
>  #include <sys/socket.h>
>  #include <linux/netlink.h>
> 
> @@ -14,15 +16,24 @@
>  #include <rte_malloc.h>
>  #include <rte_interrupts.h>
>  #include <rte_alarm.h>
> +#include <rte_bus.h>
> +#include <rte_eal.h>
> +#include <rte_spinlock.h>
> +#include <rte_errno.h>
> 
>  #include "eal_private.h"
> 
>  static struct rte_intr_handle intr_handle = {.fd = -1 };
>  static bool monitor_started;
> 
> +extern struct rte_bus_list rte_bus_list;
> +
>  #define EAL_UEV_MSG_LEN 4096
>  #define EAL_UEV_MSG_ELEM_LEN 128
> 
> +/* spinlock for device failure process */
> +static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER;
> +
>  static void dev_uev_handler(__rte_unused void *param);
> 
>  /* identify the system layer which reports this event. */
> @@ -33,6 +44,34 @@ enum eal_dev_event_subsystem {
>  	EAL_DEV_EVENT_SUBSYSTEM_MAX
>  };
> 
> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
> +				void *ctx __rte_unused)
> +{
> +	int ret;
> +
> +	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
> +		(int)pthread_self(), info->si_addr);
> +
> +	rte_spinlock_lock(&dev_failure_lock);
> +	ret = rte_bus_sigbus_handler(info->si_addr);
> +	rte_spinlock_unlock(&dev_failure_lock);
> +	if (!ret)
> +		RTE_LOG(INFO, EAL,
> +			"Success to handle SIGBUS error for hotplug!\n");
> +	else
> +		rte_exit(EXIT_FAILURE,
> +			 "A generic SIGBUS error, (rte_errno: %s)!",
> +			 strerror(rte_errno));
> +}

As I said in comments for previous versions:
I think we need to distinguish why do we fail -
 1) address doesn't belong to any device,
 2) we failed to remap
For 1) we probably need to call previous sigbus handler.

> +
> +static int cmp_dev_name(const struct rte_device *dev,
> +	const void *_name)
> +{
> +	const char *name = _name;
> +
> +	return strcmp(dev->name, name);
> +}
> +
>  static int
>  dev_uev_socket_fd_create(void)
>  {
> @@ -147,6 +186,9 @@ dev_uev_handler(__rte_unused void *param)
>  	struct rte_dev_event uevent;
>  	int ret;
>  	char buf[EAL_UEV_MSG_LEN];
> +	struct rte_bus *bus;
> +	struct rte_device *dev;
> +	const char *busname;
> 
>  	memset(&uevent, 0, sizeof(struct rte_dev_event));
>  	memset(buf, 0, EAL_UEV_MSG_LEN);
> @@ -171,13 +213,48 @@ dev_uev_handler(__rte_unused void *param)
>  	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
>  		uevent.devname, uevent.type, uevent.subsystem);
> 
> -	if (uevent.devname)
> +	switch (uevent.subsystem) {
> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
> +		busname = "pci";
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (uevent.devname) {
> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
> +			bus = rte_bus_find_by_name(busname);
> +			if (bus == NULL) {
> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
> +					busname);
> +				return;
> +			}
> +			dev = bus->find_device(NULL, cmp_dev_name,
> +					       uevent.devname);
> +			if (dev == NULL) {
> +				RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
> +					"bus (%s)\n", uevent.devname, busname);
> +				return;
> +			}
> +			rte_spinlock_lock(&dev_failure_lock);
> +			ret = bus->hotplug_handler(dev);
> +			rte_spinlock_unlock(&dev_failure_lock);

Ok, but this function is executed from interrupt thread, correct?
What would happen if user would do dev-detach() at the same time and dev would not be valid anymore?
Shouldn't we have a lock (per bus?) that we would grab before find_device() and release after hotplug_handler? 
Though in that case we probably need to revisit other bus ops too.

> +			if (ret) {
> +				RTE_LOG(ERR, EAL, "Can not handle hotplug for "
> +					"device (%s)\n", dev->name);
> +				return;
> +			}
> +		}
>  		dev_callback_process(uevent.devname, uevent.type);
> +	}
>  }
> 
>  int __rte_experimental
>  rte_dev_event_monitor_start(void)
>  {
> +	sigset_t mask;
> +	struct sigaction action;
>  	int ret;
> 
>  	if (monitor_started)
> @@ -197,6 +274,14 @@ rte_dev_event_monitor_start(void)
>  		return -1;
>  	}
> 
> +	/* register sigbus handler */
> +	sigemptyset(&mask);
> +	sigaddset(&mask, SIGBUS);
> +	action.sa_flags = SA_SIGINFO;
> +	action.sa_mask = mask;
> +	action.sa_sigaction = sigbus_handler;
> +	sigaction(SIGBUS, &action, NULL);
> +

I still think we have to save (and restore at monitor_stop) previous sigbus handler.

>  	monitor_started = true;
> 
>  	return 0;
> @@ -220,5 +305,6 @@ rte_dev_event_monitor_stop(void)
>  	close(intr_handle.fd);
>  	intr_handle.fd = -1;
>  	monitor_started = false;
> +
>  	return 0;
>  }
> --
> 2.7.4
  
Guo, Jia June 29, 2018, 11:15 a.m. UTC | #2
hi,konstantin


On 6/29/2018 6:49 PM, Ananyev, Konstantin wrote:
> Hi Jeff,
>
>> This patch introduces a failure handler mechanism to handle device
>> hot plug removal event.
>>
>> First register sigbus handler, once sigbus error be captured, will
>> check the failure address and accordingly remap the invalid memory
>> for the corresponding device. Bese on this mechanism, it could
>> guaranty the application not to be crash when hot unplug devices.
>>
>> Signed-off-by: Jeff Guo <jia.guo@intel.com>
>> ---
>> v4->v3:
>> split patches to be small and clear.
>> ---
>>   lib/librte_eal/linuxapp/eal/eal_dev.c | 88 ++++++++++++++++++++++++++++++++++-
>>   1 file changed, 87 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> index 1cf6aeb..c9dddab 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> @@ -4,6 +4,8 @@
>>
>>   #include <string.h>
>>   #include <unistd.h>
>> +#include <fcntl.h>
>> +#include <signal.h>
>>   #include <sys/socket.h>
>>   #include <linux/netlink.h>
>>
>> @@ -14,15 +16,24 @@
>>   #include <rte_malloc.h>
>>   #include <rte_interrupts.h>
>>   #include <rte_alarm.h>
>> +#include <rte_bus.h>
>> +#include <rte_eal.h>
>> +#include <rte_spinlock.h>
>> +#include <rte_errno.h>
>>
>>   #include "eal_private.h"
>>
>>   static struct rte_intr_handle intr_handle = {.fd = -1 };
>>   static bool monitor_started;
>>
>> +extern struct rte_bus_list rte_bus_list;
>> +
>>   #define EAL_UEV_MSG_LEN 4096
>>   #define EAL_UEV_MSG_ELEM_LEN 128
>>
>> +/* spinlock for device failure process */
>> +static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER;
>> +
>>   static void dev_uev_handler(__rte_unused void *param);
>>
>>   /* identify the system layer which reports this event. */
>> @@ -33,6 +44,34 @@ enum eal_dev_event_subsystem {
>>   	EAL_DEV_EVENT_SUBSYSTEM_MAX
>>   };
>>
>> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
>> +				void *ctx __rte_unused)
>> +{
>> +	int ret;
>> +
>> +	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
>> +		(int)pthread_self(), info->si_addr);
>> +
>> +	rte_spinlock_lock(&dev_failure_lock);
>> +	ret = rte_bus_sigbus_handler(info->si_addr);
>> +	rte_spinlock_unlock(&dev_failure_lock);
>> +	if (!ret)
>> +		RTE_LOG(INFO, EAL,
>> +			"Success to handle SIGBUS error for hotplug!\n");
>> +	else
>> +		rte_exit(EXIT_FAILURE,
>> +			 "A generic SIGBUS error, (rte_errno: %s)!",
>> +			 strerror(rte_errno));
>> +}
> As I said in comments for previous versions:
> I think we need to distinguish why do we fail -
>   1) address doesn't belong to any device,
>   2) we failed to remap
> For 1) we probably need to call previous sigbus handler.

i know your point, but i think what ever 1) or 2), we should also need 
to call previous sigbus handler to show exception of the memory error, 
to cut down any other after try to run.
and for the previous sigbus handler, i still not find a explicit call to 
use it, i think it the sigbus handler could be restore but only will use 
when next error occur, right?
if so, do you think i just use a rte_exit to replace this origin handler 
is make sense?

>> +
>> +static int cmp_dev_name(const struct rte_device *dev,
>> +	const void *_name)
>> +{
>> +	const char *name = _name;
>> +
>> +	return strcmp(dev->name, name);
>> +}
>> +
>>   static int
>>   dev_uev_socket_fd_create(void)
>>   {
>> @@ -147,6 +186,9 @@ dev_uev_handler(__rte_unused void *param)
>>   	struct rte_dev_event uevent;
>>   	int ret;
>>   	char buf[EAL_UEV_MSG_LEN];
>> +	struct rte_bus *bus;
>> +	struct rte_device *dev;
>> +	const char *busname;
>>
>>   	memset(&uevent, 0, sizeof(struct rte_dev_event));
>>   	memset(buf, 0, EAL_UEV_MSG_LEN);
>> @@ -171,13 +213,48 @@ dev_uev_handler(__rte_unused void *param)
>>   	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
>>   		uevent.devname, uevent.type, uevent.subsystem);
>>
>> -	if (uevent.devname)
>> +	switch (uevent.subsystem) {
>> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
>> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
>> +		busname = "pci";
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	if (uevent.devname) {
>> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
>> +			bus = rte_bus_find_by_name(busname);
>> +			if (bus == NULL) {
>> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
>> +					busname);
>> +				return;
>> +			}
>> +			dev = bus->find_device(NULL, cmp_dev_name,
>> +					       uevent.devname);
>> +			if (dev == NULL) {
>> +				RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
>> +					"bus (%s)\n", uevent.devname, busname);
>> +				return;
>> +			}
>> +			rte_spinlock_lock(&dev_failure_lock);
>> +			ret = bus->hotplug_handler(dev);
>> +			rte_spinlock_unlock(&dev_failure_lock);
> Ok, but this function is executed from interrupt thread, correct?

yes.

> What would happen if user would do dev-detach() at the same time and dev would not be valid anymore?
> Shouldn't we have a lock (per bus?) that we would grab before find_device() and release after hotplug_handler?
> Though in that case we probably need to revisit other bus ops too.

make sense, i think should be the case and need lock any bus ops here to 
sync.

>> +			if (ret) {
>> +				RTE_LOG(ERR, EAL, "Can not handle hotplug for "
>> +					"device (%s)\n", dev->name);
>> +				return;
>> +			}
>> +		}
>>   		dev_callback_process(uevent.devname, uevent.type);
>> +	}
>>   }
>>
>>   int __rte_experimental
>>   rte_dev_event_monitor_start(void)
>>   {
>> +	sigset_t mask;
>> +	struct sigaction action;
>>   	int ret;
>>
>>   	if (monitor_started)
>> @@ -197,6 +274,14 @@ rte_dev_event_monitor_start(void)
>>   		return -1;
>>   	}
>>
>> +	/* register sigbus handler */
>> +	sigemptyset(&mask);
>> +	sigaddset(&mask, SIGBUS);
>> +	action.sa_flags = SA_SIGINFO;
>> +	action.sa_mask = mask;
>> +	action.sa_sigaction = sigbus_handler;
>> +	sigaction(SIGBUS, &action, NULL);
>> +
> I still think we have to save (and restore at monitor_stop) previous sigbus handler.

ok, i think i missing here, if monitor_stop, definitely should restore 
the previous sigbus handler.

>>   	monitor_started = true;
>>
>>   	return 0;
>> @@ -220,5 +305,6 @@ rte_dev_event_monitor_stop(void)
>>   	close(intr_handle.fd);
>>   	intr_handle.fd = -1;
>>   	monitor_started = false;
>> +
>>   	return 0;
>>   }
>> --
>> 2.7.4
  
Ananyev, Konstantin June 29, 2018, 12:06 p.m. UTC | #3
> -----Original Message-----
> From: Guo, Jia
> Sent: Friday, June 29, 2018 12:15 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; stephen@networkplumber.org; Richardson, Bruce
> <bruce.richardson@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>; gaetan.rivet@6wind.com; Wu, Jingjing
> <jingjing.wu@intel.com>; thomas@monjalon.net; motih@mellanox.com; matan@mellanox.com; Van Haaren, Harry
> <harry.van.haaren@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; He, Shaopeng <shaopeng.he@intel.com>; Iremonger, Bernard
> <bernard.iremonger@intel.com>
> Cc: jblunck@infradead.org; shreyansh.jain@nxp.com; dev@dpdk.org; Zhang, Helin <helin.zhang@intel.com>
> Subject: Re: [PATCH V4 6/9] eal: add failure handle mechanism for hot plug
> 
> hi,konstantin
> 
> 
> On 6/29/2018 6:49 PM, Ananyev, Konstantin wrote:
> > Hi Jeff,
> >
> >> This patch introduces a failure handler mechanism to handle device
> >> hot plug removal event.
> >>
> >> First register sigbus handler, once sigbus error be captured, will
> >> check the failure address and accordingly remap the invalid memory
> >> for the corresponding device. Bese on this mechanism, it could
> >> guaranty the application not to be crash when hot unplug devices.
> >>
> >> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> >> ---
> >> v4->v3:
> >> split patches to be small and clear.
> >> ---
> >>   lib/librte_eal/linuxapp/eal/eal_dev.c | 88 ++++++++++++++++++++++++++++++++++-
> >>   1 file changed, 87 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
> >> index 1cf6aeb..c9dddab 100644
> >> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
> >> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
> >> @@ -4,6 +4,8 @@
> >>
> >>   #include <string.h>
> >>   #include <unistd.h>
> >> +#include <fcntl.h>
> >> +#include <signal.h>
> >>   #include <sys/socket.h>
> >>   #include <linux/netlink.h>
> >>
> >> @@ -14,15 +16,24 @@
> >>   #include <rte_malloc.h>
> >>   #include <rte_interrupts.h>
> >>   #include <rte_alarm.h>
> >> +#include <rte_bus.h>
> >> +#include <rte_eal.h>
> >> +#include <rte_spinlock.h>
> >> +#include <rte_errno.h>
> >>
> >>   #include "eal_private.h"
> >>
> >>   static struct rte_intr_handle intr_handle = {.fd = -1 };
> >>   static bool monitor_started;
> >>
> >> +extern struct rte_bus_list rte_bus_list;
> >> +
> >>   #define EAL_UEV_MSG_LEN 4096
> >>   #define EAL_UEV_MSG_ELEM_LEN 128
> >>
> >> +/* spinlock for device failure process */
> >> +static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER;
> >> +
> >>   static void dev_uev_handler(__rte_unused void *param);
> >>
> >>   /* identify the system layer which reports this event. */
> >> @@ -33,6 +44,34 @@ enum eal_dev_event_subsystem {
> >>   	EAL_DEV_EVENT_SUBSYSTEM_MAX
> >>   };
> >>
> >> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
> >> +				void *ctx __rte_unused)
> >> +{
> >> +	int ret;
> >> +
> >> +	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
> >> +		(int)pthread_self(), info->si_addr);
> >> +
> >> +	rte_spinlock_lock(&dev_failure_lock);
> >> +	ret = rte_bus_sigbus_handler(info->si_addr);
> >> +	rte_spinlock_unlock(&dev_failure_lock);
> >> +	if (!ret)
> >> +		RTE_LOG(INFO, EAL,
> >> +			"Success to handle SIGBUS error for hotplug!\n");
> >> +	else
> >> +		rte_exit(EXIT_FAILURE,
> >> +			 "A generic SIGBUS error, (rte_errno: %s)!",
> >> +			 strerror(rte_errno));
> >> +}
> > As I said in comments for previous versions:
> > I think we need to distinguish why do we fail -
> >   1) address doesn't belong to any device,
> >   2) we failed to remap
> > For 1) we probably need to call previous sigbus handler.
> 
> i know your point, but i think what ever 1) or 2), we should also need
> to call previous sigbus handler to show exception of the memory error,
> to cut down any other after try to run.

I don't agree.
If 1) - that error doesn't belong to us (DPDK), but user app might know how to handle it.
So we just invoke previously saved previous (if any) sigbus handler.
for 2) - there is not much we can do but rte_exit(). 

> and for the previous sigbus handler, i still not find a explicit call to
> use it, i think it the sigbus handler could be restore but only will use
> when next error occur, right?
> if so, do you think i just use a rte_exit to replace this origin handler
> is make sense?
> 
> >> +
> >> +static int cmp_dev_name(const struct rte_device *dev,
> >> +	const void *_name)
> >> +{
> >> +	const char *name = _name;
> >> +
> >> +	return strcmp(dev->name, name);
> >> +}
> >> +
> >>   static int
> >>   dev_uev_socket_fd_create(void)
> >>   {
> >> @@ -147,6 +186,9 @@ dev_uev_handler(__rte_unused void *param)
> >>   	struct rte_dev_event uevent;
> >>   	int ret;
> >>   	char buf[EAL_UEV_MSG_LEN];
> >> +	struct rte_bus *bus;
> >> +	struct rte_device *dev;
> >> +	const char *busname;
> >>
> >>   	memset(&uevent, 0, sizeof(struct rte_dev_event));
> >>   	memset(buf, 0, EAL_UEV_MSG_LEN);
> >> @@ -171,13 +213,48 @@ dev_uev_handler(__rte_unused void *param)
> >>   	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
> >>   		uevent.devname, uevent.type, uevent.subsystem);
> >>
> >> -	if (uevent.devname)
> >> +	switch (uevent.subsystem) {
> >> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
> >> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
> >> +		busname = "pci";
> >> +		break;
> >> +	default:
> >> +		break;
> >> +	}
> >> +
> >> +	if (uevent.devname) {
> >> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
> >> +			bus = rte_bus_find_by_name(busname);
> >> +			if (bus == NULL) {
> >> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
> >> +					busname);
> >> +				return;
> >> +			}
> >> +			dev = bus->find_device(NULL, cmp_dev_name,
> >> +					       uevent.devname);
> >> +			if (dev == NULL) {
> >> +				RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
> >> +					"bus (%s)\n", uevent.devname, busname);
> >> +				return;
> >> +			}
> >> +			rte_spinlock_lock(&dev_failure_lock);
> >> +			ret = bus->hotplug_handler(dev);
> >> +			rte_spinlock_unlock(&dev_failure_lock);
> > Ok, but this function is executed from interrupt thread, correct?
> 
> yes.
> 
> > What would happen if user would do dev-detach() at the same time and dev would not be valid anymore?
> > Shouldn't we have a lock (per bus?) that we would grab before find_device() and release after hotplug_handler?
> > Though in that case we probably need to revisit other bus ops too.
> 
> make sense, i think should be the case and need lock any bus ops here to
> sync.
> 
> >> +			if (ret) {
> >> +				RTE_LOG(ERR, EAL, "Can not handle hotplug for "
> >> +					"device (%s)\n", dev->name);
> >> +				return;
> >> +			}
> >> +		}
> >>   		dev_callback_process(uevent.devname, uevent.type);
> >> +	}
> >>   }
> >>
> >>   int __rte_experimental
> >>   rte_dev_event_monitor_start(void)
> >>   {
> >> +	sigset_t mask;
> >> +	struct sigaction action;
> >>   	int ret;
> >>
> >>   	if (monitor_started)
> >> @@ -197,6 +274,14 @@ rte_dev_event_monitor_start(void)
> >>   		return -1;
> >>   	}
> >>
> >> +	/* register sigbus handler */
> >> +	sigemptyset(&mask);
> >> +	sigaddset(&mask, SIGBUS);
> >> +	action.sa_flags = SA_SIGINFO;
> >> +	action.sa_mask = mask;
> >> +	action.sa_sigaction = sigbus_handler;
> >> +	sigaction(SIGBUS, &action, NULL);
> >> +
> > I still think we have to save (and restore at monitor_stop) previous sigbus handler.
> 
> ok, i think i missing here, if monitor_stop, definitely should restore
> the previous sigbus handler.
> 
> >>   	monitor_started = true;
> >>
> >>   	return 0;
> >> @@ -220,5 +305,6 @@ rte_dev_event_monitor_stop(void)
> >>   	close(intr_handle.fd);
> >>   	intr_handle.fd = -1;
> >>   	monitor_started = false;
> >> +
> >>   	return 0;
> >>   }
> >> --
> >> 2.7.4
  

Patch

diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
index 1cf6aeb..c9dddab 100644
--- a/lib/librte_eal/linuxapp/eal/eal_dev.c
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -4,6 +4,8 @@ 
 
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
 #include <sys/socket.h>
 #include <linux/netlink.h>
 
@@ -14,15 +16,24 @@ 
 #include <rte_malloc.h>
 #include <rte_interrupts.h>
 #include <rte_alarm.h>
+#include <rte_bus.h>
+#include <rte_eal.h>
+#include <rte_spinlock.h>
+#include <rte_errno.h>
 
 #include "eal_private.h"
 
 static struct rte_intr_handle intr_handle = {.fd = -1 };
 static bool monitor_started;
 
+extern struct rte_bus_list rte_bus_list;
+
 #define EAL_UEV_MSG_LEN 4096
 #define EAL_UEV_MSG_ELEM_LEN 128
 
+/* spinlock for device failure process */
+static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER;
+
 static void dev_uev_handler(__rte_unused void *param);
 
 /* identify the system layer which reports this event. */
@@ -33,6 +44,34 @@  enum eal_dev_event_subsystem {
 	EAL_DEV_EVENT_SUBSYSTEM_MAX
 };
 
+static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
+				void *ctx __rte_unused)
+{
+	int ret;
+
+	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
+		(int)pthread_self(), info->si_addr);
+
+	rte_spinlock_lock(&dev_failure_lock);
+	ret = rte_bus_sigbus_handler(info->si_addr);
+	rte_spinlock_unlock(&dev_failure_lock);
+	if (!ret)
+		RTE_LOG(INFO, EAL,
+			"Success to handle SIGBUS error for hotplug!\n");
+	else
+		rte_exit(EXIT_FAILURE,
+			 "A generic SIGBUS error, (rte_errno: %s)!",
+			 strerror(rte_errno));
+}
+
+static int cmp_dev_name(const struct rte_device *dev,
+	const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(dev->name, name);
+}
+
 static int
 dev_uev_socket_fd_create(void)
 {
@@ -147,6 +186,9 @@  dev_uev_handler(__rte_unused void *param)
 	struct rte_dev_event uevent;
 	int ret;
 	char buf[EAL_UEV_MSG_LEN];
+	struct rte_bus *bus;
+	struct rte_device *dev;
+	const char *busname;
 
 	memset(&uevent, 0, sizeof(struct rte_dev_event));
 	memset(buf, 0, EAL_UEV_MSG_LEN);
@@ -171,13 +213,48 @@  dev_uev_handler(__rte_unused void *param)
 	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
 		uevent.devname, uevent.type, uevent.subsystem);
 
-	if (uevent.devname)
+	switch (uevent.subsystem) {
+	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
+	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
+		busname = "pci";
+		break;
+	default:
+		break;
+	}
+
+	if (uevent.devname) {
+		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
+			bus = rte_bus_find_by_name(busname);
+			if (bus == NULL) {
+				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
+					busname);
+				return;
+			}
+			dev = bus->find_device(NULL, cmp_dev_name,
+					       uevent.devname);
+			if (dev == NULL) {
+				RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
+					"bus (%s)\n", uevent.devname, busname);
+				return;
+			}
+			rte_spinlock_lock(&dev_failure_lock);
+			ret = bus->hotplug_handler(dev);
+			rte_spinlock_unlock(&dev_failure_lock);
+			if (ret) {
+				RTE_LOG(ERR, EAL, "Can not handle hotplug for "
+					"device (%s)\n", dev->name);
+				return;
+			}
+		}
 		dev_callback_process(uevent.devname, uevent.type);
+	}
 }
 
 int __rte_experimental
 rte_dev_event_monitor_start(void)
 {
+	sigset_t mask;
+	struct sigaction action;
 	int ret;
 
 	if (monitor_started)
@@ -197,6 +274,14 @@  rte_dev_event_monitor_start(void)
 		return -1;
 	}
 
+	/* register sigbus handler */
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGBUS);
+	action.sa_flags = SA_SIGINFO;
+	action.sa_mask = mask;
+	action.sa_sigaction = sigbus_handler;
+	sigaction(SIGBUS, &action, NULL);
+
 	monitor_started = true;
 
 	return 0;
@@ -220,5 +305,6 @@  rte_dev_event_monitor_stop(void)
 	close(intr_handle.fd);
 	intr_handle.fd = -1;
 	monitor_started = false;
+
 	return 0;
 }