[dpdk-dev,V20,2/4] eal: add failure handler mechanism for hot plug

Message ID 1524058689-4954-3-git-send-email-jia.guo@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues

Commit Message

Guo, Jia April 18, 2018, 1:38 p.m. UTC
  This patch introduces a failure handler mechanism to handle device
hot unplug event. When device be hot plug out, the device resource
become invalid, if this resource is still be unexpected read/write,
system will crash. This patch let eal help application to handle
this fault, when sigbus error occur, check the failure address and
accordingly remap the invalid memory for the corresponding device,
that could guaranty the application not to be shut down when hot plug.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
---
v20->v19:
refine the logic of remapping for multiple device.
---
 doc/guides/rel_notes/release_18_05.rst  |   6 ++
 lib/librte_eal/common/include/rte_dev.h |  11 +++
 lib/librte_eal/linuxapp/eal/eal_dev.c   | 124 +++++++++++++++++++++++++++++++-
 lib/librte_eal/rte_eal_version.map      |   1 +
 4 files changed, 141 insertions(+), 1 deletion(-)
  

Comments

Qi Zhang April 19, 2018, 1:30 a.m. UTC | #1
Hi Jeff

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jeff Guo
> Sent: Wednesday, April 18, 2018 9:38 PM
> To: stephen@networkplumber.org; Richardson, Bruce
> <bruce.richardson@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> gaetan.rivet@6wind.com; Wu, Jingjing <jingjing.wu@intel.com>;
> thomas@monjalon.net; motih@mellanox.com; matan@mellanox.com; Van
> Haaren, Harry <harry.van.haaren@intel.com>; Tan, Jianfeng
> <jianfeng.tan@intel.com>
> Cc: jblunck@infradead.org; shreyansh.jain@nxp.com; dev@dpdk.org; Guo,
> Jia <jia.guo@intel.com>; Zhang, Helin <helin.zhang@intel.com>
> Subject: [dpdk-dev] [PATCH V20 2/4] eal: add failure handler mechanism for
> hot plug
> 
> This patch introduces a failure handler mechanism to handle device hot
> unplug event. When device be hot plug out, the device resource become
> invalid, if this resource is still be unexpected read/write, system will crash.
> This patch let eal help application to handle this fault, when sigbus error
> occur, check the failure address and accordingly remap the invalid memory
> for the corresponding device, that could guaranty the application not to be
> shut down when hot plug.
> 
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> ---
> v20->v19:
> refine the logic of remapping for multiple device.
> ---
>  doc/guides/rel_notes/release_18_05.rst  |   6 ++
>  lib/librte_eal/common/include/rte_dev.h |  11 +++
>  lib/librte_eal/linuxapp/eal/eal_dev.c   | 124
> +++++++++++++++++++++++++++++++-
>  lib/librte_eal/rte_eal_version.map      |   1 +
>  4 files changed, 141 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/rel_notes/release_18_05.rst
> b/doc/guides/rel_notes/release_18_05.rst
> index a018ef5..a4ea9af 100644
> --- a/doc/guides/rel_notes/release_18_05.rst
> +++ b/doc/guides/rel_notes/release_18_05.rst
> @@ -70,6 +70,12 @@ New Features
> 
>    Linux uevent is supported as backend of this device event notification
> framework.
> 
> +* **Added hot plug failure handler.**
> +
> +  Added a failure handler machenism to handle hot unplug device.
> +
> +  * ``rte_dev_handle_hot_unplug`` for handle hot unplug device failure.
> +
> 
>  API Changes
>  -----------
> diff --git a/lib/librte_eal/common/include/rte_dev.h
> b/lib/librte_eal/common/include/rte_dev.h
> index 0955e9a..9933131 100644
> --- a/lib/librte_eal/common/include/rte_dev.h
> +++ b/lib/librte_eal/common/include/rte_dev.h
> @@ -360,4 +360,15 @@ rte_dev_event_monitor_start(void);
>  int __rte_experimental
>  rte_dev_event_monitor_stop(void);
> 
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * It can be used to handle the device signal bus error. when signal
> +bus error
> + * occur, the handler would check the failure address to find the
> +corresponding
> + * device and remap the memory resource of the device, that would
> +guaranty
> + * the system not crash when the device be hot unplug.
> + */
> +void __rte_experimental
> +rte_dev_handle_hot_unplug(void);
>  #endif /* _RTE_DEV_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c
> b/lib/librte_eal/linuxapp/eal/eal_dev.c
> index 9478a39..33e7026 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
> @@ -4,6 +4,8 @@
> 
>  #include <string.h>
>  #include <unistd.h>
> +#include <fcntl.h>
> +#include <signal.h>
>  #include <sys/socket.h>
>  #include <linux/netlink.h>
> 
> @@ -13,12 +15,16 @@
>  #include <rte_malloc.h>
>  #include <rte_interrupts.h>
>  #include <rte_alarm.h>
> +#include <rte_bus.h>
> +#include <rte_eal.h>
> 
>  #include "eal_private.h"
> 
>  static struct rte_intr_handle intr_handle = {.fd = -1 };  static bool
> monitor_started;
> 
> +extern struct rte_bus_list rte_bus_list;
> +
>  #define EAL_UEV_MSG_LEN 4096
>  #define EAL_UEV_MSG_ELEM_LEN 128
> 
> @@ -33,6 +39,68 @@ enum eal_dev_event_subsystem {  };
> 
>  static int
> +dev_uev_failure_process(struct rte_device *dev, void *dev_addr) {
> +	struct rte_bus *bus;
> +	int ret = 0;
> +
> +	if (!dev && !dev_addr) {
> +		return -EINVAL;
> +	} else if (dev) {
> +		bus = rte_bus_find_by_device_name(dev->name);
> +		if (bus->handle_hot_unplug) {
> +			/**
> +			 * call bus ops to handle hot unplug.
> +			 */
> +			ret = bus->handle_hot_unplug(dev, dev_addr);
> +			if (ret) {
> +				RTE_LOG(ERR, EAL,
> +					"It cannot handle hot unplug "
> +					"for device (%s) "
> +					"on the bus.\n ",
> +					dev->name);
> +			}
> +		}
> +	} else {
> +		TAILQ_FOREACH(bus, &rte_bus_list, next) {
> +			if (bus->handle_hot_unplug) {
> +				/**
> +				 * call bus ops to handle hot unplug.
> +				 */
> +				ret = bus->handle_hot_unplug(dev, dev_addr);
> +				if (ret) {
> +					RTE_LOG(ERR, EAL,
> +						"It cannot handle hot unplug "
> +						"for the device "
> +						"on the bus.\n ");
> +				}
> +			}
> +		}
> +	}
> +	return ret;
> +}
> +
> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
> +				void *ctx __rte_unused)
> +{
> +	int ret;
> +
> +	RTE_LOG(ERR, EAL, "SIGBUS error, fault address:%p\n", info->si_addr);
> +	ret = dev_uev_failure_process(NULL, info->si_addr);
> +	if (!ret)
> +		RTE_LOG(DEBUG, EAL,
> +			"SIGBUS error is because of hot unplug!\n"); }
> +
> +static int cmp_dev_name(const struct rte_device *dev,
> +	const void *_name)
> +{
> +	const char *name = _name;
> +
> +	return strcmp(dev->name, name);
> +}
> +
> +static int
>  dev_uev_socket_fd_create(void)
>  {
>  	struct sockaddr_nl addr;
> @@ -146,6 +214,9 @@ dev_uev_handler(__rte_unused void *param)
>  	struct rte_dev_event uevent;
>  	int ret;
>  	char buf[EAL_UEV_MSG_LEN];
> +	struct rte_bus *bus;
> +	struct rte_device *dev;
> +	const char *busname;
> 
>  	memset(&uevent, 0, sizeof(struct rte_dev_event));
>  	memset(buf, 0, EAL_UEV_MSG_LEN);
> @@ -170,8 +241,41 @@ dev_uev_handler(__rte_unused void *param)
>  	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d,
> subsystem:%d)\n",
>  		uevent.devname, uevent.type, uevent.subsystem);
> 
> -	if (uevent.devname)
> +	switch (uevent.subsystem) {
> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
> +		busname = "pci";
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (uevent.devname) {
> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
> +			bus = rte_bus_find_by_name(busname);
> +			if (bus == NULL) {
> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
> +					uevent.devname);
> +				return;
> +			}
> +			dev = bus->find_device(NULL, cmp_dev_name,
> +					       uevent.devname);
> +			if (dev == NULL) {
> +				RTE_LOG(ERR, EAL,
> +					"Cannot find unplugged device (%s)\n",
> +					uevent.devname);
> +				return;
> +			}
> +			ret = dev_uev_failure_process(dev, NULL);
> +			if (ret) {
> +				RTE_LOG(ERR, EAL, "Driver cannot remap the "
> +					"device (%s)\n",
> +					dev->name);
> +				return;
> +			}
> +		}
>  		dev_callback_process(uevent.devname, uevent.type);
> +	}
>  }
> 
>  int __rte_experimental
> @@ -216,8 +320,26 @@ rte_dev_event_monitor_stop(void)
>  		return ret;
>  	}
> 
> +	/* recover sigbus. */
> +	sigaction(SIGBUS, NULL, NULL);
> +
>  	close(intr_handle.fd);
>  	intr_handle.fd = -1;
>  	monitor_started = false;
> +
>  	return 0;
>  }
> +
> +void __rte_experimental
> +rte_dev_handle_hot_unplug(void)
> +{
> +	struct sigaction act;
> +
> +	/* set sigbus handler for hotplug. */
> +	memset(&act, 0x00, sizeof(struct sigaction));
> +	act.sa_sigaction = sigbus_handler;
> +	sigemptyset(&act.sa_mask);
> +	sigaddset(&act.sa_mask, SIGBUS);
> +	act.sa_flags = SA_SIGINFO;
> +	sigaction(SIGBUS, &act, NULL);
> +}

Not sure if it's necessary to expose this API, 
it can be invoked in rte_dev_event_monitor_start, 
since register a sigbus handler looks like an init step when user device to enable hotplug

Regards
Qi

> diff --git a/lib/librte_eal/rte_eal_version.map
> b/lib/librte_eal/rte_eal_version.map
> index d02d80b..39a0213 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -217,6 +217,7 @@ EXPERIMENTAL {
>  	rte_dev_event_callback_unregister;
>  	rte_dev_event_monitor_start;
>  	rte_dev_event_monitor_stop;
> +	rte_dev_handle_hot_unplug;
>  	rte_eal_cleanup;
>  	rte_eal_devargs_insert;
>  	rte_eal_devargs_parse;
> --
> 2.7.4
  
Ananyev, Konstantin April 20, 2018, 11:14 a.m. UTC | #2
> This patch introduces a failure handler mechanism to handle device
> hot unplug event. When device be hot plug out, the device resource
> become invalid, if this resource is still be unexpected read/write,
> system will crash. This patch let eal help application to handle
> this fault, when sigbus error occur, check the failure address and
> accordingly remap the invalid memory for the corresponding device,
> that could guaranty the application not to be shut down when hot plug.
> 
> Signed-off-by: Jeff Guo <jia.guo@intel.com>
> ---
> v20->v19:
> refine the logic of remapping for multiple device.
> ---
>  doc/guides/rel_notes/release_18_05.rst  |   6 ++
>  lib/librte_eal/common/include/rte_dev.h |  11 +++
>  lib/librte_eal/linuxapp/eal/eal_dev.c   | 124 +++++++++++++++++++++++++++++++-
>  lib/librte_eal/rte_eal_version.map      |   1 +
>  4 files changed, 141 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
> index a018ef5..a4ea9af 100644
> --- a/doc/guides/rel_notes/release_18_05.rst
> +++ b/doc/guides/rel_notes/release_18_05.rst
> @@ -70,6 +70,12 @@ New Features
> 
>    Linux uevent is supported as backend of this device event notification framework.
> 
> +* **Added hot plug failure handler.**
> +
> +  Added a failure handler machenism to handle hot unplug device.
> +
> +  * ``rte_dev_handle_hot_unplug`` for handle hot unplug device failure.
> +
> 
>  API Changes
>  -----------
> diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
> index 0955e9a..9933131 100644
> --- a/lib/librte_eal/common/include/rte_dev.h
> +++ b/lib/librte_eal/common/include/rte_dev.h
> @@ -360,4 +360,15 @@ rte_dev_event_monitor_start(void);
>  int __rte_experimental
>  rte_dev_event_monitor_stop(void);
> 
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * It can be used to handle the device signal bus error. when signal bus error
> + * occur, the handler would check the failure address to find the corresponding
> + * device and remap the memory resource of the device, that would guaranty
> + * the system not crash when the device be hot unplug.
> + */
> +void __rte_experimental
> +rte_dev_handle_hot_unplug(void);
>  #endif /* _RTE_DEV_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
> index 9478a39..33e7026 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
> @@ -4,6 +4,8 @@
> 
>  #include <string.h>
>  #include <unistd.h>
> +#include <fcntl.h>
> +#include <signal.h>
>  #include <sys/socket.h>
>  #include <linux/netlink.h>
> 
> @@ -13,12 +15,16 @@
>  #include <rte_malloc.h>
>  #include <rte_interrupts.h>
>  #include <rte_alarm.h>
> +#include <rte_bus.h>
> +#include <rte_eal.h>
> 
>  #include "eal_private.h"
> 
>  static struct rte_intr_handle intr_handle = {.fd = -1 };
>  static bool monitor_started;
> 
> +extern struct rte_bus_list rte_bus_list;
> +
>  #define EAL_UEV_MSG_LEN 4096
>  #define EAL_UEV_MSG_ELEM_LEN 128
> 
> @@ -33,6 +39,68 @@ enum eal_dev_event_subsystem {
>  };
> 
>  static int
> +dev_uev_failure_process(struct rte_device *dev, void *dev_addr)
> +{
> +	struct rte_bus *bus;
> +	int ret = 0;
> +
> +	if (!dev && !dev_addr) {
> +		return -EINVAL;
> +	} else if (dev) {
> +		bus = rte_bus_find_by_device_name(dev->name);
> +		if (bus->handle_hot_unplug) {
> +			/**
> +			 * call bus ops to handle hot unplug.
> +			 */
> +			ret = bus->handle_hot_unplug(dev, dev_addr);
> +			if (ret) {
> +				RTE_LOG(ERR, EAL,
> +					"It cannot handle hot unplug "
> +					"for device (%s) "
> +					"on the bus.\n ",
> +					dev->name);
> +			}
> +		}


You would retrun 0 if bus->handle_hot_unplug == NULL.
Is that intended?
Shouldn't be I think.

> +	} else {
> +		TAILQ_FOREACH(bus, &rte_bus_list, next) {
> +			if (bus->handle_hot_unplug) {
> +				/**
> +				 * call bus ops to handle hot unplug.
> +				 */
> +				ret = bus->handle_hot_unplug(dev, dev_addr);
> +				if (ret) {
> +					RTE_LOG(ERR, EAL,
> +						"It cannot handle hot unplug "
> +						"for the device "
> +						"on the bus.\n ");
> +				}

So how we would know what happened here:
That address doesn't belong to that bus or unplug_handler failed?
Should we separate search and unplug ops?
Another question - shouldn't we break out of loop if bus->handle_hot_unplug()
returns 0?
Otherwise you can return error value even when unplug handled worked correctly.


> +			}
> +		}
> +	}
> +	return ret;
> +}
> +
> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
> +				void *ctx __rte_unused)
> +{
> +	int ret;
> +
> +	RTE_LOG(ERR, EAL, "SIGBUS error, fault address:%p\n", info->si_addr);
> +	ret = dev_uev_failure_process(NULL, info->si_addr);

As now you can try to mmap/munmap same address from two or more different threads
you probably need some synchronization here.
Something simple as spinlock seems to be enough here. 
We might have one per device or might be even a global one would be ok here.

> +	if (!ret)
> +		RTE_LOG(DEBUG, EAL,
> +			"SIGBUS error is because of hot unplug!\n");
> +}
> +
> +static int cmp_dev_name(const struct rte_device *dev,
> +	const void *_name)
> +{
> +	const char *name = _name;
> +
> +	return strcmp(dev->name, name);
> +}

Is it really worth a separate function?

> +
> +static int
>  dev_uev_socket_fd_create(void)
>  {
>  	struct sockaddr_nl addr;
> @@ -146,6 +214,9 @@ dev_uev_handler(__rte_unused void *param)
>  	struct rte_dev_event uevent;
>  	int ret;
>  	char buf[EAL_UEV_MSG_LEN];
> +	struct rte_bus *bus;
> +	struct rte_device *dev;
> +	const char *busname;
> 
>  	memset(&uevent, 0, sizeof(struct rte_dev_event));
>  	memset(buf, 0, EAL_UEV_MSG_LEN);
> @@ -170,8 +241,41 @@ dev_uev_handler(__rte_unused void *param)
>  	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
>  		uevent.devname, uevent.type, uevent.subsystem);
> 
> -	if (uevent.devname)
> +	switch (uevent.subsystem) {
> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
> +		busname = "pci";
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (uevent.devname) {
> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
> +			bus = rte_bus_find_by_name(busname);
> +			if (bus == NULL) {
> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
> +					uevent.devname);
> +				return;
> +			}
> +			dev = bus->find_device(NULL, cmp_dev_name,
> +					       uevent.devname);
> +			if (dev == NULL) {
> +				RTE_LOG(ERR, EAL,
> +					"Cannot find unplugged device (%s)\n",
> +					uevent.devname);
> +				return;
> +			}
> +			ret = dev_uev_failure_process(dev, NULL);
> +			if (ret) {
> +				RTE_LOG(ERR, EAL, "Driver cannot remap the "
> +					"device (%s)\n",
> +					dev->name);
> +				return;
> +			}
> +		}
>  		dev_callback_process(uevent.devname, uevent.type);
> +	}
>  }
> 
>  int __rte_experimental
> @@ -216,8 +320,26 @@ rte_dev_event_monitor_stop(void)
>  		return ret;
>  	}
> 
> +	/* recover sigbus. */
> +	sigaction(SIGBUS, NULL, NULL);
> +

Probably better to restore previous action.

>  	close(intr_handle.fd);
>  	intr_handle.fd = -1;
>  	monitor_started = false;
> +
>  	return 0;
>  }
> +
> +void __rte_experimental
> +rte_dev_handle_hot_unplug(void)
> +{
> +	struct sigaction act;
> +
> +	/* set sigbus handler for hotplug. */
> +	memset(&act, 0x00, sizeof(struct sigaction));
> +	act.sa_sigaction = sigbus_handler;
> +	sigemptyset(&act.sa_mask);
> +	sigaddset(&act.sa_mask, SIGBUS);
> +	act.sa_flags = SA_SIGINFO;
> +	sigaction(SIGBUS, &act, NULL);
> +}
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index d02d80b..39a0213 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -217,6 +217,7 @@ EXPERIMENTAL {
>  	rte_dev_event_callback_unregister;
>  	rte_dev_event_monitor_start;
>  	rte_dev_event_monitor_stop;
> +	rte_dev_handle_hot_unplug;
>  	rte_eal_cleanup;
>  	rte_eal_devargs_insert;
>  	rte_eal_devargs_parse;
> --
> 2.7.4
  
Ananyev, Konstantin April 20, 2018, 4:16 p.m. UTC | #3
> > +
> > +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
> > +				void *ctx __rte_unused)
> > +{
> > +	int ret;
> > +
> > +	RTE_LOG(ERR, EAL, "SIGBUS error, fault address:%p\n", info->si_addr);
> > +	ret = dev_uev_failure_process(NULL, info->si_addr);
> 
> As now you can try to mmap/munmap same address from two or more different threads
> you probably need some synchronization here.
> Something simple as spinlock seems to be enough here.
> We might have one per device or might be even a global one would be ok here.
> 
> > +	if (!ret)
> > +		RTE_LOG(DEBUG, EAL,
> > +			"SIGBUS error is because of hot unplug!\n");

Also if sigbus handler wasn't able to fix things - failure addr doesn't belong to 
any devices, or remaping fails - we probably should invoke previously installed handler
or just apply default action.
Konstantin

> > +}
> > +
  
Guo, Jia May 3, 2018, 3:13 a.m. UTC | #4
On 4/20/2018 7:14 PM, Ananyev, Konstantin wrote:
>
>> This patch introduces a failure handler mechanism to handle device
>> hot unplug event. When device be hot plug out, the device resource
>> become invalid, if this resource is still be unexpected read/write,
>> system will crash. This patch let eal help application to handle
>> this fault, when sigbus error occur, check the failure address and
>> accordingly remap the invalid memory for the corresponding device,
>> that could guaranty the application not to be shut down when hot plug.
>>
>> Signed-off-by: Jeff Guo <jia.guo@intel.com>
>> ---
>> v20->v19:
>> refine the logic of remapping for multiple device.
>> ---
>>   doc/guides/rel_notes/release_18_05.rst  |   6 ++
>>   lib/librte_eal/common/include/rte_dev.h |  11 +++
>>   lib/librte_eal/linuxapp/eal/eal_dev.c   | 124 +++++++++++++++++++++++++++++++-
>>   lib/librte_eal/rte_eal_version.map      |   1 +
>>   4 files changed, 141 insertions(+), 1 deletion(-)
>>
>> diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
>> index a018ef5..a4ea9af 100644
>> --- a/doc/guides/rel_notes/release_18_05.rst
>> +++ b/doc/guides/rel_notes/release_18_05.rst
>> @@ -70,6 +70,12 @@ New Features
>>
>>     Linux uevent is supported as backend of this device event notification framework.
>>
>> +* **Added hot plug failure handler.**
>> +
>> +  Added a failure handler machenism to handle hot unplug device.
>> +
>> +  * ``rte_dev_handle_hot_unplug`` for handle hot unplug device failure.
>> +
>>
>>   API Changes
>>   -----------
>> diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
>> index 0955e9a..9933131 100644
>> --- a/lib/librte_eal/common/include/rte_dev.h
>> +++ b/lib/librte_eal/common/include/rte_dev.h
>> @@ -360,4 +360,15 @@ rte_dev_event_monitor_start(void);
>>   int __rte_experimental
>>   rte_dev_event_monitor_stop(void);
>>
>> +/**
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change without prior notice
>> + *
>> + * It can be used to handle the device signal bus error. when signal bus error
>> + * occur, the handler would check the failure address to find the corresponding
>> + * device and remap the memory resource of the device, that would guaranty
>> + * the system not crash when the device be hot unplug.
>> + */
>> +void __rte_experimental
>> +rte_dev_handle_hot_unplug(void);
>>   #endif /* _RTE_DEV_H_ */
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> index 9478a39..33e7026 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_dev.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
>> @@ -4,6 +4,8 @@
>>
>>   #include <string.h>
>>   #include <unistd.h>
>> +#include <fcntl.h>
>> +#include <signal.h>
>>   #include <sys/socket.h>
>>   #include <linux/netlink.h>
>>
>> @@ -13,12 +15,16 @@
>>   #include <rte_malloc.h>
>>   #include <rte_interrupts.h>
>>   #include <rte_alarm.h>
>> +#include <rte_bus.h>
>> +#include <rte_eal.h>
>>
>>   #include "eal_private.h"
>>
>>   static struct rte_intr_handle intr_handle = {.fd = -1 };
>>   static bool monitor_started;
>>
>> +extern struct rte_bus_list rte_bus_list;
>> +
>>   #define EAL_UEV_MSG_LEN 4096
>>   #define EAL_UEV_MSG_ELEM_LEN 128
>>
>> @@ -33,6 +39,68 @@ enum eal_dev_event_subsystem {
>>   };
>>
>>   static int
>> +dev_uev_failure_process(struct rte_device *dev, void *dev_addr)
>> +{
>> +	struct rte_bus *bus;
>> +	int ret = 0;
>> +
>> +	if (!dev && !dev_addr) {
>> +		return -EINVAL;
>> +	} else if (dev) {
>> +		bus = rte_bus_find_by_device_name(dev->name);
>> +		if (bus->handle_hot_unplug) {
>> +			/**
>> +			 * call bus ops to handle hot unplug.
>> +			 */
>> +			ret = bus->handle_hot_unplug(dev, dev_addr);
>> +			if (ret) {
>> +				RTE_LOG(ERR, EAL,
>> +					"It cannot handle hot unplug "
>> +					"for device (%s) "
>> +					"on the bus.\n ",
>> +					dev->name);
>> +			}
>> +		}
>
> You would retrun 0 if bus->handle_hot_unplug == NULL.
> Is that intended?
> Shouldn't be I think.
shouldn't be. will modify it.
>> +	} else {
>> +		TAILQ_FOREACH(bus, &rte_bus_list, next) {
>> +			if (bus->handle_hot_unplug) {
>> +				/**
>> +				 * call bus ops to handle hot unplug.
>> +				 */
>> +				ret = bus->handle_hot_unplug(dev, dev_addr);
>> +				if (ret) {
>> +					RTE_LOG(ERR, EAL,
>> +						"It cannot handle hot unplug "
>> +						"for the device "
>> +						"on the bus.\n ");
>> +				}
> So how we would know what happened here:
> That address doesn't belong to that bus or unplug_handler failed?
> Should we separate search and unplug ops?
> Another question - shouldn't we break out of loop if bus->handle_hot_unplug()
> returns 0?
> Otherwise you can return error value even when unplug handled worked correctly.
>
you are right here.
>> +			}
>> +		}
>> +	}
>> +	return ret;
>> +}
>> +
>> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
>> +				void *ctx __rte_unused)
>> +{
>> +	int ret;
>> +
>> +	RTE_LOG(ERR, EAL, "SIGBUS error, fault address:%p\n", info->si_addr);
>> +	ret = dev_uev_failure_process(NULL, info->si_addr);
> As now you can try to mmap/munmap same address from two or more different threads
> you probably need some synchronization here.
> Something simple as spinlock seems to be enough here.
> We might have one per device or might be even a global one would be ok here.
i think global one and synchronization would be fine.
>> +	if (!ret)
>> +		RTE_LOG(DEBUG, EAL,
>> +			"SIGBUS error is because of hot unplug!\n");
>> +}
>> +
>> +static int cmp_dev_name(const struct rte_device *dev,
>> +	const void *_name)
>> +{
>> +	const char *name = _name;
>> +
>> +	return strcmp(dev->name, name);
>> +}
> Is it really worth a separate function?
i think that would be the bus ops struct of rte_bus_find_device_t usage 
here.
>> +
>> +static int
>>   dev_uev_socket_fd_create(void)
>>   {
>>   	struct sockaddr_nl addr;
>> @@ -146,6 +214,9 @@ dev_uev_handler(__rte_unused void *param)
>>   	struct rte_dev_event uevent;
>>   	int ret;
>>   	char buf[EAL_UEV_MSG_LEN];
>> +	struct rte_bus *bus;
>> +	struct rte_device *dev;
>> +	const char *busname;
>>
>>   	memset(&uevent, 0, sizeof(struct rte_dev_event));
>>   	memset(buf, 0, EAL_UEV_MSG_LEN);
>> @@ -170,8 +241,41 @@ dev_uev_handler(__rte_unused void *param)
>>   	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
>>   		uevent.devname, uevent.type, uevent.subsystem);
>>
>> -	if (uevent.devname)
>> +	switch (uevent.subsystem) {
>> +	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
>> +	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
>> +		busname = "pci";
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	if (uevent.devname) {
>> +		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
>> +			bus = rte_bus_find_by_name(busname);
>> +			if (bus == NULL) {
>> +				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
>> +					uevent.devname);
>> +				return;
>> +			}
>> +			dev = bus->find_device(NULL, cmp_dev_name,
>> +					       uevent.devname);
>> +			if (dev == NULL) {
>> +				RTE_LOG(ERR, EAL,
>> +					"Cannot find unplugged device (%s)\n",
>> +					uevent.devname);
>> +				return;
>> +			}
>> +			ret = dev_uev_failure_process(dev, NULL);
>> +			if (ret) {
>> +				RTE_LOG(ERR, EAL, "Driver cannot remap the "
>> +					"device (%s)\n",
>> +					dev->name);
>> +				return;
>> +			}
>> +		}
>>   		dev_callback_process(uevent.devname, uevent.type);
>> +	}
>>   }
>>
>>   int __rte_experimental
>> @@ -216,8 +320,26 @@ rte_dev_event_monitor_stop(void)
>>   		return ret;
>>   	}
>>
>> +	/* recover sigbus. */
>> +	sigaction(SIGBUS, NULL, NULL);
>> +
> Probably better to restore previous action.
correct, restore the previous sigbus action so that no affect other.
>>   	close(intr_handle.fd);
>>   	intr_handle.fd = -1;
>>   	monitor_started = false;
>> +
>>   	return 0;
>>   }
>> +
>> +void __rte_experimental
>> +rte_dev_handle_hot_unplug(void)
>> +{
>> +	struct sigaction act;
>> +
>> +	/* set sigbus handler for hotplug. */
>> +	memset(&act, 0x00, sizeof(struct sigaction));
>> +	act.sa_sigaction = sigbus_handler;
>> +	sigemptyset(&act.sa_mask);
>> +	sigaddset(&act.sa_mask, SIGBUS);
>> +	act.sa_flags = SA_SIGINFO;
>> +	sigaction(SIGBUS, &act, NULL);
>> +}
>> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
>> index d02d80b..39a0213 100644
>> --- a/lib/librte_eal/rte_eal_version.map
>> +++ b/lib/librte_eal/rte_eal_version.map
>> @@ -217,6 +217,7 @@ EXPERIMENTAL {
>>   	rte_dev_event_callback_unregister;
>>   	rte_dev_event_monitor_start;
>>   	rte_dev_event_monitor_stop;
>> +	rte_dev_handle_hot_unplug;
>>   	rte_eal_cleanup;
>>   	rte_eal_devargs_insert;
>>   	rte_eal_devargs_parse;
>> --
>> 2.7.4
  
Guo, Jia May 3, 2018, 3:17 a.m. UTC | #5
On 4/21/2018 12:16 AM, Ananyev, Konstantin wrote:
>>> +
>>> +static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
>>> +				void *ctx __rte_unused)
>>> +{
>>> +	int ret;
>>> +
>>> +	RTE_LOG(ERR, EAL, "SIGBUS error, fault address:%p\n", info->si_addr);
>>> +	ret = dev_uev_failure_process(NULL, info->si_addr);
>> As now you can try to mmap/munmap same address from two or more different threads
>> you probably need some synchronization here.
>> Something simple as spinlock seems to be enough here.
>> We might have one per device or might be even a global one would be ok here.
>>
>>> +	if (!ret)
>>> +		RTE_LOG(DEBUG, EAL,
>>> +			"SIGBUS error is because of hot unplug!\n");
> Also if sigbus handler wasn't able to fix things - failure addr doesn't belong to
> any devices, or remaping fails - we probably should invoke previously installed handler
> or just apply default action.
> Konstantin
i think just exception here by exit for apply default action, and info 
that is a normal sigbus error should be ok.
>>> +}
>>> +
  

Patch

diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
index a018ef5..a4ea9af 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -70,6 +70,12 @@  New Features
 
   Linux uevent is supported as backend of this device event notification framework.
 
+* **Added hot plug failure handler.**
+
+  Added a failure handler machenism to handle hot unplug device.
+
+  * ``rte_dev_handle_hot_unplug`` for handle hot unplug device failure.
+
 
 API Changes
 -----------
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 0955e9a..9933131 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -360,4 +360,15 @@  rte_dev_event_monitor_start(void);
 int __rte_experimental
 rte_dev_event_monitor_stop(void);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It can be used to handle the device signal bus error. when signal bus error
+ * occur, the handler would check the failure address to find the corresponding
+ * device and remap the memory resource of the device, that would guaranty
+ * the system not crash when the device be hot unplug.
+ */
+void __rte_experimental
+rte_dev_handle_hot_unplug(void);
 #endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
index 9478a39..33e7026 100644
--- a/lib/librte_eal/linuxapp/eal/eal_dev.c
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -4,6 +4,8 @@ 
 
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
 #include <sys/socket.h>
 #include <linux/netlink.h>
 
@@ -13,12 +15,16 @@ 
 #include <rte_malloc.h>
 #include <rte_interrupts.h>
 #include <rte_alarm.h>
+#include <rte_bus.h>
+#include <rte_eal.h>
 
 #include "eal_private.h"
 
 static struct rte_intr_handle intr_handle = {.fd = -1 };
 static bool monitor_started;
 
+extern struct rte_bus_list rte_bus_list;
+
 #define EAL_UEV_MSG_LEN 4096
 #define EAL_UEV_MSG_ELEM_LEN 128
 
@@ -33,6 +39,68 @@  enum eal_dev_event_subsystem {
 };
 
 static int
+dev_uev_failure_process(struct rte_device *dev, void *dev_addr)
+{
+	struct rte_bus *bus;
+	int ret = 0;
+
+	if (!dev && !dev_addr) {
+		return -EINVAL;
+	} else if (dev) {
+		bus = rte_bus_find_by_device_name(dev->name);
+		if (bus->handle_hot_unplug) {
+			/**
+			 * call bus ops to handle hot unplug.
+			 */
+			ret = bus->handle_hot_unplug(dev, dev_addr);
+			if (ret) {
+				RTE_LOG(ERR, EAL,
+					"It cannot handle hot unplug "
+					"for device (%s) "
+					"on the bus.\n ",
+					dev->name);
+			}
+		}
+	} else {
+		TAILQ_FOREACH(bus, &rte_bus_list, next) {
+			if (bus->handle_hot_unplug) {
+				/**
+				 * call bus ops to handle hot unplug.
+				 */
+				ret = bus->handle_hot_unplug(dev, dev_addr);
+				if (ret) {
+					RTE_LOG(ERR, EAL,
+						"It cannot handle hot unplug "
+						"for the device "
+						"on the bus.\n ");
+				}
+			}
+		}
+	}
+	return ret;
+}
+
+static void sigbus_handler(int signum __rte_unused, siginfo_t *info,
+				void *ctx __rte_unused)
+{
+	int ret;
+
+	RTE_LOG(ERR, EAL, "SIGBUS error, fault address:%p\n", info->si_addr);
+	ret = dev_uev_failure_process(NULL, info->si_addr);
+	if (!ret)
+		RTE_LOG(DEBUG, EAL,
+			"SIGBUS error is because of hot unplug!\n");
+}
+
+static int cmp_dev_name(const struct rte_device *dev,
+	const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(dev->name, name);
+}
+
+static int
 dev_uev_socket_fd_create(void)
 {
 	struct sockaddr_nl addr;
@@ -146,6 +214,9 @@  dev_uev_handler(__rte_unused void *param)
 	struct rte_dev_event uevent;
 	int ret;
 	char buf[EAL_UEV_MSG_LEN];
+	struct rte_bus *bus;
+	struct rte_device *dev;
+	const char *busname;
 
 	memset(&uevent, 0, sizeof(struct rte_dev_event));
 	memset(buf, 0, EAL_UEV_MSG_LEN);
@@ -170,8 +241,41 @@  dev_uev_handler(__rte_unused void *param)
 	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
 		uevent.devname, uevent.type, uevent.subsystem);
 
-	if (uevent.devname)
+	switch (uevent.subsystem) {
+	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
+	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
+		busname = "pci";
+		break;
+	default:
+		break;
+	}
+
+	if (uevent.devname) {
+		if (uevent.type == RTE_DEV_EVENT_REMOVE) {
+			bus = rte_bus_find_by_name(busname);
+			if (bus == NULL) {
+				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
+					uevent.devname);
+				return;
+			}
+			dev = bus->find_device(NULL, cmp_dev_name,
+					       uevent.devname);
+			if (dev == NULL) {
+				RTE_LOG(ERR, EAL,
+					"Cannot find unplugged device (%s)\n",
+					uevent.devname);
+				return;
+			}
+			ret = dev_uev_failure_process(dev, NULL);
+			if (ret) {
+				RTE_LOG(ERR, EAL, "Driver cannot remap the "
+					"device (%s)\n",
+					dev->name);
+				return;
+			}
+		}
 		dev_callback_process(uevent.devname, uevent.type);
+	}
 }
 
 int __rte_experimental
@@ -216,8 +320,26 @@  rte_dev_event_monitor_stop(void)
 		return ret;
 	}
 
+	/* recover sigbus. */
+	sigaction(SIGBUS, NULL, NULL);
+
 	close(intr_handle.fd);
 	intr_handle.fd = -1;
 	monitor_started = false;
+
 	return 0;
 }
+
+void __rte_experimental
+rte_dev_handle_hot_unplug(void)
+{
+	struct sigaction act;
+
+	/* set sigbus handler for hotplug. */
+	memset(&act, 0x00, sizeof(struct sigaction));
+	act.sa_sigaction = sigbus_handler;
+	sigemptyset(&act.sa_mask);
+	sigaddset(&act.sa_mask, SIGBUS);
+	act.sa_flags = SA_SIGINFO;
+	sigaction(SIGBUS, &act, NULL);
+}
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index d02d80b..39a0213 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -217,6 +217,7 @@  EXPERIMENTAL {
 	rte_dev_event_callback_unregister;
 	rte_dev_event_monitor_start;
 	rte_dev_event_monitor_stop;
+	rte_dev_handle_hot_unplug;
 	rte_eal_cleanup;
 	rte_eal_devargs_insert;
 	rte_eal_devargs_parse;