[dpdk-dev,V21,2/4] eal: add failure handle mechanism for hot plug

Message ID 1525344524-26946-3-git-send-email-jia.guo@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Guo, Jia May 3, 2018, 10:48 a.m. UTC
  This patch introduces a failure handler mechanism to handle device
hot unplug event. When device be hot plug out, the device resource
become invalid, if this resource is still be unexpected read/write,
system will crash. This patch let eal help application to handle
this fault, when sigbus error occur, check the failure address and
accordingly remap the invalid memory for the corresponding device,
that could guaranty the application not to be shut down when hot plug.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
v21->v20:
sync failure hanlde to fix multiple process issue
---
 lib/librte_eal/linuxapp/eal/eal_dev.c | 154 +++++++++++++++++++++++++++++++++-
 1 file changed, 153 insertions(+), 1 deletion(-)
  

Comments

Ananyev, Konstantin May 4, 2018, 3:56 p.m. UTC | #1
Hi Jeff,

> 
> This patch introduces a failure handler mechanism to handle device
> hot unplug event. When device be hot plug out, the device resource
> become invalid, if this resource is still be unexpected read/write,
> system will crash. This patch let eal help application to handle
> this fault, when sigbus error occur, check the failure address and
> accordingly remap the invalid memory for the corresponding device,
> that could guaranty the application not to be shut down when hot plug.
> 
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> ---
> v21->v20:
> sync failure hanlde to fix multiple process issue
> ---
>  lib/librte_eal/linuxapp/eal/eal_dev.c | 154 +++++++++++++++++++++++++++++++++-
>  1 file changed, 153 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
> index 1cf6aeb..3067f39 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
> @@ -4,6 +4,8 @@
> 
>  #include <string.h>
>  #include <unistd.h>
> +#include <fcntl.h>
> +#include <signal.h>
>  #include <sys/socket.h>
>  #include <linux/netlink.h>
> 
> @@ -14,15 +16,27 @@
>  #include <rte_malloc.h>
>  #include <rte_interrupts.h>
>  #include <rte_alarm.h>
> +#include <rte_bus.h>
> +#include <rte_eal.h>
> +#include <rte_spinlock.h>
> 
>  #include "eal_private.h"
> 
>  static struct rte_intr_handle intr_handle = {.fd = -1 };
>  static bool monitor_started;
> 
> +extern struct rte_bus_list rte_bus_list;
> +
>  #define EAL_UEV_MSG_LEN 4096
>  #define EAL_UEV_MSG_ELEM_LEN 128
> 
> +/* spinlock for device failure process */
> +static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER;
> +
> +static struct sigaction sigbus_action_old;
> +
> +static int sigbus_need_recover;
> +
>  static void dev_uev_handler(__rte_unused void *param);
> 
>  /* identify the system layer which reports this event. */
> @@ -34,6 +48,93 @@ enum eal_dev_event_subsystem {
>  };
> 
>  static int
> +dev_uev_failure_process(struct rte_device *dev, void *dev_addr)
> +{
> +	struct rte_bus *bus;
> +	int ret = 0;
> +
> +	if (!dev && !dev_addr) {
> +		return -EINVAL;
> +	} else if (dev) {
> +		bus = rte_bus_find_by_device_name(dev->name);
> +		if (bus->handle_hot_unplug) {
> +			/**
> +			 * call bus ops to handle hot unplug.
> +			 */
> +			ret = bus->handle_hot_unplug(dev, dev_addr);
> +			if (ret) {
> +				RTE_LOG(ERR, EAL,
> +					"Cannot handle hot unplug "
> +					"for device %s "
> +					"on the bus %s.\n ",
> +					dev->name, bus->name);
> +			}
> +		} else {
> +			RTE_LOG(ERR, EAL,
> +				"Not support handle hot unplug for bus %s!\n",
> +				bus->name);
> +			ret = -ENOTSUP;
> +		}
> +	} else {
> +		TAILQ_FOREACH(bus, &rte_bus_list, next) {
> +			if (bus->handle_hot_unplug) {
> +				/**
> +				 * call bus ops to handle hot unplug.
> +				 */
> +				ret = bus->handle_hot_unplug(dev, dev_addr);
> +				if (ret)
> +					RTE_LOG(ERR, EAL,
> +						"Cannot handle hot unplug "
> +						"for the device "
> +						"on the bus %s!\n", bus->name);
> +				else
> +					break;
> +			} else {
> +				RTE_LOG(ERR, EAL,
> +					"Not support handle hot unplug "
> +					"for bus %s!\n", bus->name);
> +				ret = -ENOTSUP;
> +			}
> +		}
> +	}
> +	return ret;
> +}
> +
> +static void
> +sigbus_action_recover(void)
> +{
> +	if (sigbus_need_recover) {
> +		sigaction(SIGBUS, &sigbus_action_old, NULL);
> +		sigbus_need_recover = 0;
> +	}
> +}
> +
> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
> +				void *ctx __rte_unused)
> +{
> +	int ret;
> +
> +	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
> +		(int)pthread_self(), info->si_addr);
> +	rte_spinlock_lock(&dev_failure_lock);
> +	ret = dev_uev_failure_process(NULL, info->si_addr);
> +	rte_spinlock_unlock(&dev_failure_lock);
> +	if (!ret)
> +		RTE_LOG(DEBUG, EAL,
> +			"Success to handle SIGBUS error for hot unplug!\n");
> +	else
> +		rte_exit(EXIT_FAILURE, "exit for SIGBUS error!");

I still think we have to distinguish here 2 cases:
1) failure addr is not belong to any dpdk devices
2) failure addr does belong to dpdk device, but we fail to remap it.

For 1) we probably need to call previous sigbus handler.
For 2) we probably can only do exit().

> +}
> +
> +static int cmp_dev_name(const struct rte_device *dev,
> +	const void *_name)
> +{
> +	const char *name = _name;
> +
> +	return strcmp(dev->name, name);
> +}
> +
> +static int
>  dev_uev_socket_fd_create(void)
>  {
>  	struct sockaddr_nl addr;
> @@ -147,6 +248,9 @@ dev_uev_handler(__rte_unused void *param)
>  	struct rte_dev_event uevent;
>  	int ret;
>  	char buf[EAL_UEV_MSG_LEN];
> +	struct rte_bus *bus;
> +	struct rte_device *dev;
> +	const char *busname;
> 
>  	memset(&uevent, 0, sizeof(struct rte_dev_event));
>  	memset(buf, 0, EAL_UEV_MSG_LEN);
> @@ -171,13 +275,50 @@ dev_uev_handler(__rte_unused void *param)
>  	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
>  		uevent.devname, uevent.type, uevent.subsystem);
> 
> -	if (uevent.devname)
> +	switch (uevent.subsystem) {
> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
> +		busname = "pci";
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (uevent.devname) {
> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
> +			bus = rte_bus_find_by_name(busname);
> +			if (bus == NULL) {
> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
> +					uevent.devname);
> +				return;
> +			}
> +			dev = bus->find_device(NULL, cmp_dev_name,
> +					       uevent.devname);
> +			if (dev == NULL) {
> +				RTE_LOG(ERR, EAL,
> +					"Cannot find unplugged device (%s)\n",
> +					uevent.devname);
> +				return;
> +			}
> +			rte_spinlock_lock(&dev_failure_lock);
> +			ret = dev_uev_failure_process(dev, NULL);
> +			rte_spinlock_unlock(&dev_failure_lock);

That's interrupt thread, right?
I wonder could it happen that user will call device_detach() at the same moment?
Konstantin


> +			if (ret) {
> +				RTE_LOG(ERR, EAL, "Driver cannot remap the "
> +					"device (%s)\n",
> +					dev->name);
> +				return;
> +			}
> +		}
>  		dev_callback_process(uevent.devname, uevent.type);
> +	}
>  }
> 
>  int __rte_experimental
>  rte_dev_event_monitor_start(void)
>  {
> +	sigset_t mask;
> +	struct sigaction action;
>  	int ret;
> 
>  	if (monitor_started)
> @@ -197,6 +338,14 @@ rte_dev_event_monitor_start(void)
>  		return -1;
>  	}
> 
> +	/* register sigbus handler */
> +	sigemptyset(&mask);
> +	sigaddset(&mask, SIGBUS);
> +	action.sa_flags = SA_SIGINFO;
> +	action.sa_mask = mask;
> +	action.sa_sigaction = sigbus_handler;
> +	sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
> +
>  	monitor_started = true;
> 
>  	return 0;
> @@ -217,8 +366,11 @@ rte_dev_event_monitor_stop(void)
>  		return ret;
>  	}
> 
> +	sigbus_action_recover();
> +
>  	close(intr_handle.fd);
>  	intr_handle.fd = -1;
>  	monitor_started = false;
> +
>  	return 0;
>  }
> --
> 2.7.4
  
Guo, Jia May 8, 2018, 2:57 p.m. UTC | #2
On 5/4/2018 11:56 PM, Ananyev, Konstantin wrote:
> Hi Jeff,
>
>> This patch introduces a failure handler mechanism to handle device
>> hot unplug event. When device be hot plug out, the device resource
>> become invalid, if this resource is still be unexpected read/write,
>> system will crash. This patch let eal help application to handle
>> this fault, when sigbus error occur, check the failure address and
>> accordingly remap the invalid memory for the corresponding device,
>> that could guaranty the application not to be shut down when hot plug.
>>
>> Signed-off-by: Jeff Guo <jia.guo@intel.com>
>> ---
>> v21->v20:
>> sync failure hanlde to fix multiple process issue
>> ---
>>   lib/librte_eal/linuxapp/eal/eal_dev.c | 154 +++++++++++++++++++++++++++++++++-
>>   1 file changed, 153 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> index 1cf6aeb..3067f39 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> @@ -4,6 +4,8 @@
>>
>>   #include <string.h>
>>   #include <unistd.h>
>> +#include <fcntl.h>
>> +#include <signal.h>
>>   #include <sys/socket.h>
>>   #include <linux/netlink.h>
>>
>> @@ -14,15 +16,27 @@
>>   #include <rte_malloc.h>
>>   #include <rte_interrupts.h>
>>   #include <rte_alarm.h>
>> +#include <rte_bus.h>
>> +#include <rte_eal.h>
>> +#include <rte_spinlock.h>
>>
>>   #include "eal_private.h"
>>
>>   static struct rte_intr_handle intr_handle = {.fd = -1 };
>>   static bool monitor_started;
>>
>> +extern struct rte_bus_list rte_bus_list;
>> +
>>   #define EAL_UEV_MSG_LEN 4096
>>   #define EAL_UEV_MSG_ELEM_LEN 128
>>
>> +/* spinlock for device failure process */
>> +static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER;
>> +
>> +static struct sigaction sigbus_action_old;
>> +
>> +static int sigbus_need_recover;
>> +
>>   static void dev_uev_handler(__rte_unused void *param);
>>
>>   /* identify the system layer which reports this event. */
>> @@ -34,6 +48,93 @@ enum eal_dev_event_subsystem {
>>   };
>>
>>   static int
>> +dev_uev_failure_process(struct rte_device *dev, void *dev_addr)
>> +{
>> +	struct rte_bus *bus;
>> +	int ret = 0;
>> +
>> +	if (!dev && !dev_addr) {
>> +		return -EINVAL;
>> +	} else if (dev) {
>> +		bus = rte_bus_find_by_device_name(dev->name);
>> +		if (bus->handle_hot_unplug) {
>> +			/**
>> +			 * call bus ops to handle hot unplug.
>> +			 */
>> +			ret = bus->handle_hot_unplug(dev, dev_addr);
>> +			if (ret) {
>> +				RTE_LOG(ERR, EAL,
>> +					"Cannot handle hot unplug "
>> +					"for device %s "
>> +					"on the bus %s.\n ",
>> +					dev->name, bus->name);
>> +			}
>> +		} else {
>> +			RTE_LOG(ERR, EAL,
>> +				"Not support handle hot unplug for bus %s!\n",
>> +				bus->name);
>> +			ret = -ENOTSUP;
>> +		}
>> +	} else {
>> +		TAILQ_FOREACH(bus, &rte_bus_list, next) {
>> +			if (bus->handle_hot_unplug) {
>> +				/**
>> +				 * call bus ops to handle hot unplug.
>> +				 */
>> +				ret = bus->handle_hot_unplug(dev, dev_addr);
>> +				if (ret)
>> +					RTE_LOG(ERR, EAL,
>> +						"Cannot handle hot unplug "
>> +						"for the device "
>> +						"on the bus %s!\n", bus->name);
>> +				else
>> +					break;
>> +			} else {
>> +				RTE_LOG(ERR, EAL,
>> +					"Not support handle hot unplug "
>> +					"for bus %s!\n", bus->name);
>> +				ret = -ENOTSUP;
>> +			}
>> +		}
>> +	}
>> +	return ret;
>> +}
>> +
>> +static void
>> +sigbus_action_recover(void)
>> +{
>> +	if (sigbus_need_recover) {
>> +		sigaction(SIGBUS, &sigbus_action_old, NULL);
>> +		sigbus_need_recover = 0;
>> +	}
>> +}
>> +
>> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
>> +				void *ctx __rte_unused)
>> +{
>> +	int ret;
>> +
>> +	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
>> +		(int)pthread_self(), info->si_addr);
>> +	rte_spinlock_lock(&dev_failure_lock);
>> +	ret = dev_uev_failure_process(NULL, info->si_addr);
>> +	rte_spinlock_unlock(&dev_failure_lock);
>> +	if (!ret)
>> +		RTE_LOG(DEBUG, EAL,
>> +			"Success to handle SIGBUS error for hot unplug!\n");
>> +	else
>> +		rte_exit(EXIT_FAILURE, "exit for SIGBUS error!");
> I still think we have to distinguish here 2 cases:
> 1) failure addr is not belong to any dpdk devices
> 2) failure addr does belong to dpdk device, but we fail to remap it.
>
> For 1) we probably need to call previous sigbus handler.
> For 2) we probably can only do exit().

i think the previous sigbus handler is just a exception of sigbus error 
and exit out of the process, so i think should use one way to handler 
1)+2) should be fine, do you agree with that? or you could find any 
chance to
call any other sigbus handler at this positoin?
>> +}
>> +
>> +static int cmp_dev_name(const struct rte_device *dev,
>> +	const void *_name)
>> +{
>> +	const char *name = _name;
>> +
>> +	return strcmp(dev->name, name);
>> +}
>> +
>> +static int
>>   dev_uev_socket_fd_create(void)
>>   {
>>   	struct sockaddr_nl addr;
>> @@ -147,6 +248,9 @@ dev_uev_handler(__rte_unused void *param)
>>   	struct rte_dev_event uevent;
>>   	int ret;
>>   	char buf[EAL_UEV_MSG_LEN];
>> +	struct rte_bus *bus;
>> +	struct rte_device *dev;
>> +	const char *busname;
>>
>>   	memset(&uevent, 0, sizeof(struct rte_dev_event));
>>   	memset(buf, 0, EAL_UEV_MSG_LEN);
>> @@ -171,13 +275,50 @@ dev_uev_handler(__rte_unused void *param)
>>   	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
>>   		uevent.devname, uevent.type, uevent.subsystem);
>>
>> -	if (uevent.devname)
>> +	switch (uevent.subsystem) {
>> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
>> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
>> +		busname = "pci";
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	if (uevent.devname) {
>> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
>> +			bus = rte_bus_find_by_name(busname);
>> +			if (bus == NULL) {
>> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
>> +					uevent.devname);
>> +				return;
>> +			}
>> +			dev = bus->find_device(NULL, cmp_dev_name,
>> +					       uevent.devname);
>> +			if (dev == NULL) {
>> +				RTE_LOG(ERR, EAL,
>> +					"Cannot find unplugged device (%s)\n",
>> +					uevent.devname);
>> +				return;
>> +			}
>> +			rte_spinlock_lock(&dev_failure_lock);
>> +			ret = dev_uev_failure_process(dev, NULL);
>> +			rte_spinlock_unlock(&dev_failure_lock);
> That's interrupt thread, right?
> I wonder could it happen that user will call device_detach() at the same moment?
> Konstantin

it is in interrupt thread, and user will call device_detach after failure process, you concern about twice or more device detach? i don't think is there any problem here.

>> +			if (ret) {
>> +				RTE_LOG(ERR, EAL, "Driver cannot remap the "
>> +					"device (%s)\n",
>> +					dev->name);
>> +				return;
>> +			}
>> +		}
>>   		dev_callback_process(uevent.devname, uevent.type);
>> +	}
>>   }
>>
>>   int __rte_experimental
>>   rte_dev_event_monitor_start(void)
>>   {
>> +	sigset_t mask;
>> +	struct sigaction action;
>>   	int ret;
>>
>>   	if (monitor_started)
>> @@ -197,6 +338,14 @@ rte_dev_event_monitor_start(void)
>>   		return -1;
>>   	}
>>
>> +	/* register sigbus handler */
>> +	sigemptyset(&mask);
>> +	sigaddset(&mask, SIGBUS);
>> +	action.sa_flags = SA_SIGINFO;
>> +	action.sa_mask = mask;
>> +	action.sa_sigaction = sigbus_handler;
>> +	sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
>> +
>>   	monitor_started = true;
>>
>>   	return 0;
>> @@ -217,8 +366,11 @@ rte_dev_event_monitor_stop(void)
>>   		return ret;
>>   	}
>>
>> +	sigbus_action_recover();
>> +
>>   	close(intr_handle.fd);
>>   	intr_handle.fd = -1;
>>   	monitor_started = false;
>> +
>>   	return 0;
>>   }
>> --
>> 2.7.4
  
Ananyev, Konstantin May 8, 2018, 3:19 p.m. UTC | #3
> -----Original Message-----
> From: Guo, Jia
> Sent: Tuesday, May 8, 2018 3:57 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; stephen@networkplumber.org; Richardson, Bruce
> <bruce.richardson@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>; gaetan.rivet@6wind.com; Wu, Jingjing
> <jingjing.wu@intel.com>; thomas@monjalon.net; motih@mellanox.com; matan@mellanox.com; Van Haaren, Harry
> <harry.van.haaren@intel.com>; Tan, Jianfeng <jianfeng.tan@intel.com>
> Cc: jblunck@infradead.org; shreyansh.jain@nxp.com; dev@dpdk.org; Zhang, Helin <helin.zhang@intel.com>
> Subject: Re: [PATCH V21 2/4] eal: add failure handle mechanism for hot plug
> 
> 
> 
> On 5/4/2018 11:56 PM, Ananyev, Konstantin wrote:
> > Hi Jeff,
> >
> >> This patch introduces a failure handler mechanism to handle device
> >> hot unplug event. When device be hot plug out, the device resource
> >> become invalid, if this resource is still be unexpected read/write,
> >> system will crash. This patch let eal help application to handle
> >> this fault, when sigbus error occur, check the failure address and
> >> accordingly remap the invalid memory for the corresponding device,
> >> that could guaranty the application not to be shut down when hot plug.
> >>
> >> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> >> ---
> >> v21->v20:
> >> sync failure hanlde to fix multiple process issue
> >> ---
> >>   lib/librte_eal/linuxapp/eal/eal_dev.c | 154 +++++++++++++++++++++++++++++++++-
> >>   1 file changed, 153 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
> >> index 1cf6aeb..3067f39 100644
> >> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
> >> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
> >> @@ -4,6 +4,8 @@
> >>
> >>   #include <string.h>
> >>   #include <unistd.h>
> >> +#include <fcntl.h>
> >> +#include <signal.h>
> >>   #include <sys/socket.h>
> >>   #include <linux/netlink.h>
> >>
> >> @@ -14,15 +16,27 @@
> >>   #include <rte_malloc.h>
> >>   #include <rte_interrupts.h>
> >>   #include <rte_alarm.h>
> >> +#include <rte_bus.h>
> >> +#include <rte_eal.h>
> >> +#include <rte_spinlock.h>
> >>
> >>   #include "eal_private.h"
> >>
> >>   static struct rte_intr_handle intr_handle = {.fd = -1 };
> >>   static bool monitor_started;
> >>
> >> +extern struct rte_bus_list rte_bus_list;
> >> +
> >>   #define EAL_UEV_MSG_LEN 4096
> >>   #define EAL_UEV_MSG_ELEM_LEN 128
> >>
> >> +/* spinlock for device failure process */
> >> +static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER;
> >> +
> >> +static struct sigaction sigbus_action_old;
> >> +
> >> +static int sigbus_need_recover;
> >> +
> >>   static void dev_uev_handler(__rte_unused void *param);
> >>
> >>   /* identify the system layer which reports this event. */
> >> @@ -34,6 +48,93 @@ enum eal_dev_event_subsystem {
> >>   };
> >>
> >>   static int
> >> +dev_uev_failure_process(struct rte_device *dev, void *dev_addr)
> >> +{
> >> +	struct rte_bus *bus;
> >> +	int ret = 0;
> >> +
> >> +	if (!dev && !dev_addr) {
> >> +		return -EINVAL;
> >> +	} else if (dev) {
> >> +		bus = rte_bus_find_by_device_name(dev->name);
> >> +		if (bus->handle_hot_unplug) {
> >> +			/**
> >> +			 * call bus ops to handle hot unplug.
> >> +			 */
> >> +			ret = bus->handle_hot_unplug(dev, dev_addr);
> >> +			if (ret) {
> >> +				RTE_LOG(ERR, EAL,
> >> +					"Cannot handle hot unplug "
> >> +					"for device %s "
> >> +					"on the bus %s.\n ",
> >> +					dev->name, bus->name);
> >> +			}
> >> +		} else {
> >> +			RTE_LOG(ERR, EAL,
> >> +				"Not support handle hot unplug for bus %s!\n",
> >> +				bus->name);
> >> +			ret = -ENOTSUP;
> >> +		}
> >> +	} else {
> >> +		TAILQ_FOREACH(bus, &rte_bus_list, next) {
> >> +			if (bus->handle_hot_unplug) {
> >> +				/**
> >> +				 * call bus ops to handle hot unplug.
> >> +				 */
> >> +				ret = bus->handle_hot_unplug(dev, dev_addr);
> >> +				if (ret)
> >> +					RTE_LOG(ERR, EAL,
> >> +						"Cannot handle hot unplug "
> >> +						"for the device "
> >> +						"on the bus %s!\n", bus->name);
> >> +				else
> >> +					break;
> >> +			} else {
> >> +				RTE_LOG(ERR, EAL,
> >> +					"Not support handle hot unplug "
> >> +					"for bus %s!\n", bus->name);
> >> +				ret = -ENOTSUP;
> >> +			}
> >> +		}
> >> +	}
> >> +	return ret;
> >> +}
> >> +
> >> +static void
> >> +sigbus_action_recover(void)
> >> +{
> >> +	if (sigbus_need_recover) {
> >> +		sigaction(SIGBUS, &sigbus_action_old, NULL);
> >> +		sigbus_need_recover = 0;
> >> +	}
> >> +}
> >> +
> >> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
> >> +				void *ctx __rte_unused)
> >> +{
> >> +	int ret;
> >> +
> >> +	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
> >> +		(int)pthread_self(), info->si_addr);
> >> +	rte_spinlock_lock(&dev_failure_lock);
> >> +	ret = dev_uev_failure_process(NULL, info->si_addr);
> >> +	rte_spinlock_unlock(&dev_failure_lock);
> >> +	if (!ret)
> >> +		RTE_LOG(DEBUG, EAL,
> >> +			"Success to handle SIGBUS error for hot unplug!\n");
> >> +	else
> >> +		rte_exit(EXIT_FAILURE, "exit for SIGBUS error!");
> > I still think we have to distinguish here 2 cases:
> > 1) failure addr is not belong to any dpdk devices
> > 2) failure addr does belong to dpdk device, but we fail to remap it.
> >
> > For 1) we probably need to call previous sigbus handler.
> > For 2) we probably can only do exit().
> 
> i think the previous sigbus handler is just a exception of sigbus error
> and exit out of the process, so i think should use one way to handler
> 1)+2) should be fine, do you agree with that? or you could find any
> chance to
> call any other sigbus handler at this positoin?

I think application can have its own sigbus handler installed (same as we do).

> >> +}
> >> +
> >> +static int cmp_dev_name(const struct rte_device *dev,
> >> +	const void *_name)
> >> +{
> >> +	const char *name = _name;
> >> +
> >> +	return strcmp(dev->name, name);
> >> +}
> >> +
> >> +static int
> >>   dev_uev_socket_fd_create(void)
> >>   {
> >>   	struct sockaddr_nl addr;
> >> @@ -147,6 +248,9 @@ dev_uev_handler(__rte_unused void *param)
> >>   	struct rte_dev_event uevent;
> >>   	int ret;
> >>   	char buf[EAL_UEV_MSG_LEN];
> >> +	struct rte_bus *bus;
> >> +	struct rte_device *dev;
> >> +	const char *busname;
> >>
> >>   	memset(&uevent, 0, sizeof(struct rte_dev_event));
> >>   	memset(buf, 0, EAL_UEV_MSG_LEN);
> >> @@ -171,13 +275,50 @@ dev_uev_handler(__rte_unused void *param)
> >>   	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
> >>   		uevent.devname, uevent.type, uevent.subsystem);
> >>
> >> -	if (uevent.devname)
> >> +	switch (uevent.subsystem) {
> >> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
> >> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
> >> +		busname = "pci";
> >> +		break;
> >> +	default:
> >> +		break;
> >> +	}
> >> +
> >> +	if (uevent.devname) {
> >> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
> >> +			bus = rte_bus_find_by_name(busname);
> >> +			if (bus == NULL) {
> >> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
> >> +					uevent.devname);
> >> +				return;
> >> +			}
> >> +			dev = bus->find_device(NULL, cmp_dev_name,
> >> +					       uevent.devname);
> >> +			if (dev == NULL) {
> >> +				RTE_LOG(ERR, EAL,
> >> +					"Cannot find unplugged device (%s)\n",
> >> +					uevent.devname);
> >> +				return;
> >> +			}
> >> +			rte_spinlock_lock(&dev_failure_lock);
> >> +			ret = dev_uev_failure_process(dev, NULL);
> >> +			rte_spinlock_unlock(&dev_failure_lock);
> > That's interrupt thread, right?
> > I wonder could it happen that user will call device_detach() at the same moment?
> > Konstantin
> 
> it is in interrupt thread, and user will call device_detach after failure process, you concern about twice or more device detach? i
> don't think is there any problem here.

Ok, but user can call device_detach() on his own, without waiting for failure to happen, right?


> 
> >> +			if (ret) {
> >> +				RTE_LOG(ERR, EAL, "Driver cannot remap the "
> >> +					"device (%s)\n",
> >> +					dev->name);
> >> +				return;
> >> +			}
> >> +		}
> >>   		dev_callback_process(uevent.devname, uevent.type);
> >> +	}
> >>   }
> >>
> >>   int __rte_experimental
> >>   rte_dev_event_monitor_start(void)
> >>   {
> >> +	sigset_t mask;
> >> +	struct sigaction action;
> >>   	int ret;
> >>
> >>   	if (monitor_started)
> >> @@ -197,6 +338,14 @@ rte_dev_event_monitor_start(void)
> >>   		return -1;
> >>   	}
> >>
> >> +	/* register sigbus handler */
> >> +	sigemptyset(&mask);
> >> +	sigaddset(&mask, SIGBUS);
> >> +	action.sa_flags = SA_SIGINFO;
> >> +	action.sa_mask = mask;
> >> +	action.sa_sigaction = sigbus_handler;
> >> +	sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
> >> +
> >>   	monitor_started = true;
> >>
> >>   	return 0;
> >> @@ -217,8 +366,11 @@ rte_dev_event_monitor_stop(void)
> >>   		return ret;
> >>   	}
> >>
> >> +	sigbus_action_recover();
> >> +
> >>   	close(intr_handle.fd);
> >>   	intr_handle.fd = -1;
> >>   	monitor_started = false;
> >> +
> >>   	return 0;
> >>   }
> >> --
> >> 2.7.4
  

Patch

diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
index 1cf6aeb..3067f39 100644
--- a/lib/librte_eal/linuxapp/eal/eal_dev.c
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -4,6 +4,8 @@ 
 
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
 #include <sys/socket.h>
 #include <linux/netlink.h>
 
@@ -14,15 +16,27 @@ 
 #include <rte_malloc.h>
 #include <rte_interrupts.h>
 #include <rte_alarm.h>
+#include <rte_bus.h>
+#include <rte_eal.h>
+#include <rte_spinlock.h>
 
 #include "eal_private.h"
 
 static struct rte_intr_handle intr_handle = {.fd = -1 };
 static bool monitor_started;
 
+extern struct rte_bus_list rte_bus_list;
+
 #define EAL_UEV_MSG_LEN 4096
 #define EAL_UEV_MSG_ELEM_LEN 128
 
+/* spinlock for device failure process */
+static rte_spinlock_t dev_failure_lock = RTE_SPINLOCK_INITIALIZER;
+
+static struct sigaction sigbus_action_old;
+
+static int sigbus_need_recover;
+
 static void dev_uev_handler(__rte_unused void *param);
 
 /* identify the system layer which reports this event. */
@@ -34,6 +48,93 @@  enum eal_dev_event_subsystem {
 };
 
 static int
+dev_uev_failure_process(struct rte_device *dev, void *dev_addr)
+{
+	struct rte_bus *bus;
+	int ret = 0;
+
+	if (!dev && !dev_addr) {
+		return -EINVAL;
+	} else if (dev) {
+		bus = rte_bus_find_by_device_name(dev->name);
+		if (bus->handle_hot_unplug) {
+			/**
+			 * call bus ops to handle hot unplug.
+			 */
+			ret = bus->handle_hot_unplug(dev, dev_addr);
+			if (ret) {
+				RTE_LOG(ERR, EAL,
+					"Cannot handle hot unplug "
+					"for device %s "
+					"on the bus %s.\n ",
+					dev->name, bus->name);
+			}
+		} else {
+			RTE_LOG(ERR, EAL,
+				"Not support handle hot unplug for bus %s!\n",
+				bus->name);
+			ret = -ENOTSUP;
+		}
+	} else {
+		TAILQ_FOREACH(bus, &rte_bus_list, next) {
+			if (bus->handle_hot_unplug) {
+				/**
+				 * call bus ops to handle hot unplug.
+				 */
+				ret = bus->handle_hot_unplug(dev, dev_addr);
+				if (ret)
+					RTE_LOG(ERR, EAL,
+						"Cannot handle hot unplug "
+						"for the device "
+						"on the bus %s!\n", bus->name);
+				else
+					break;
+			} else {
+				RTE_LOG(ERR, EAL,
+					"Not support handle hot unplug "
+					"for bus %s!\n", bus->name);
+				ret = -ENOTSUP;
+			}
+		}
+	}
+	return ret;
+}
+
+static void
+sigbus_action_recover(void)
+{
+	if (sigbus_need_recover) {
+		sigaction(SIGBUS, &sigbus_action_old, NULL);
+		sigbus_need_recover = 0;
+	}
+}
+
+static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
+				void *ctx __rte_unused)
+{
+	int ret;
+
+	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
+		(int)pthread_self(), info->si_addr);
+	rte_spinlock_lock(&dev_failure_lock);
+	ret = dev_uev_failure_process(NULL, info->si_addr);
+	rte_spinlock_unlock(&dev_failure_lock);
+	if (!ret)
+		RTE_LOG(DEBUG, EAL,
+			"Success to handle SIGBUS error for hot unplug!\n");
+	else
+		rte_exit(EXIT_FAILURE, "exit for SIGBUS error!");
+}
+
+static int cmp_dev_name(const struct rte_device *dev,
+	const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(dev->name, name);
+}
+
+static int
 dev_uev_socket_fd_create(void)
 {
 	struct sockaddr_nl addr;
@@ -147,6 +248,9 @@  dev_uev_handler(__rte_unused void *param)
 	struct rte_dev_event uevent;
 	int ret;
 	char buf[EAL_UEV_MSG_LEN];
+	struct rte_bus *bus;
+	struct rte_device *dev;
+	const char *busname;
 
 	memset(&uevent, 0, sizeof(struct rte_dev_event));
 	memset(buf, 0, EAL_UEV_MSG_LEN);
@@ -171,13 +275,50 @@  dev_uev_handler(__rte_unused void *param)
 	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
 		uevent.devname, uevent.type, uevent.subsystem);
 
-	if (uevent.devname)
+	switch (uevent.subsystem) {
+	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
+	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
+		busname = "pci";
+		break;
+	default:
+		break;
+	}
+
+	if (uevent.devname) {
+		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
+			bus = rte_bus_find_by_name(busname);
+			if (bus == NULL) {
+				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
+					uevent.devname);
+				return;
+			}
+			dev = bus->find_device(NULL, cmp_dev_name,
+					       uevent.devname);
+			if (dev == NULL) {
+				RTE_LOG(ERR, EAL,
+					"Cannot find unplugged device (%s)\n",
+					uevent.devname);
+				return;
+			}
+			rte_spinlock_lock(&dev_failure_lock);
+			ret = dev_uev_failure_process(dev, NULL);
+			rte_spinlock_unlock(&dev_failure_lock);
+			if (ret) {
+				RTE_LOG(ERR, EAL, "Driver cannot remap the "
+					"device (%s)\n",
+					dev->name);
+				return;
+			}
+		}
 		dev_callback_process(uevent.devname, uevent.type);
+	}
 }
 
 int __rte_experimental
 rte_dev_event_monitor_start(void)
 {
+	sigset_t mask;
+	struct sigaction action;
 	int ret;
 
 	if (monitor_started)
@@ -197,6 +338,14 @@  rte_dev_event_monitor_start(void)
 		return -1;
 	}
 
+	/* register sigbus handler */
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGBUS);
+	action.sa_flags = SA_SIGINFO;
+	action.sa_mask = mask;
+	action.sa_sigaction = sigbus_handler;
+	sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
+
 	monitor_started = true;
 
 	return 0;
@@ -217,8 +366,11 @@  rte_dev_event_monitor_stop(void)
 		return ret;
 	}
 
+	sigbus_action_recover();
+
 	close(intr_handle.fd);
 	intr_handle.fd = -1;
 	monitor_started = false;
+
 	return 0;
 }