So far, about hot plug in dpdk, we already have hot plug add/remove
api and fail-safe driver to offload the fail-safe work from the app
user. But there are still lack of a general event api, since the interrupt
event, which hot plug related with, is diversity between each device and
driver, such as mlx4, pci driver and others.
Use the hot removal event for example, pci drivers not all exposure the
remove interrupt, so in order to make user to easy use the hot plug feature
for pci driver, something must be done to detect the remove event at the
kernel level and offer a new line of interrupt to the user land.
Base on the uevent of kobject mechanism in kernel, we could use it to
benefit for monitoring the hot plug status of the device which not only
uio/vfio of pci bus devices, but also other, such as cpu/usb/pci-express
bus devices.
The idea is comming as bellow.
a.The uevent message form FD monitoring which will be useful.
remove@/devices/pci0000:80/0000:80:02.2/0000:82:00.0/0000:83:03.0/0000:84:00.2/uio/uio2
ACTION=remove
DEVPATH=/devices/pci0000:80/0000:80:02.2/0000:82:00.0/0000:83:03.0/0000:84:00.2/uio/uio2
SUBSYSTEM=uio
MAJOR=243
MINOR=2
DEVNAME=uio2
SEQNUM=11366
b.add uevent monitoring machanism:
add several general api to enable uevent monitoring.
c.add common uevent handler and uevent failure handler
uevent of device should be handler at bus or device layer, and the memory read
and write failure when hot removal should be handle correctly before detach behaviors.
d.show example how to use uevent monitor
enable uevent monitoring in testpmd or fail-safe to show usage.
patchset history:
v8->v7:
1.use rte_service to replace pthread management.
2.fix defind issue and copyright issue
3.fix some lock issue
v7->v6:
1.modify vdev part according to the vdev rework
2.re-define and split the func into common and bus specific code
3.fix some incorrect issue.
4.fix the system hung after send packcet issue.
v6->v5:
1.add hot plug policy, in eal, default handle to prepare hot plug work for
all pci device, then let app to manage to deside which device need to
hot plug.
2.modify to manage event callback in each device.
3.fix some system hung issue when igb_uio release.
4.modify the pci part to the bus-pci base on the bus rework.
5.add hot plug policy in app, show example to use hotplug list to manage
to deside which device need to hot plug.
v5->v4:
1.Move uevent monitor epolling from eal interrupt to eal device layer.
2.Redefine the eal device API for common, and distinguish between linux and bsd
3.Add failure handler helper api in bus layer.Add function of find device by name.
4.Replace of individual fd bind with single device, use a common fd to polling all device.
5.Add to register hot insertion monitoring and process, add function to auto bind driver befor user add device
6.Refine some coding style and typos issue
7.add new callback to process hot insertion
v4->v3:
1.move uevent monitor api from eal interrupt to eal device layer.
2.create uevent type and struct in eal device.
3.move uevent handler for each driver to eal layer.
4.add uevent failure handler to process signal fault issue.
5.add example for request and use uevent monitoring in testpmd.
v3->v2:
1.refine some return error
2.refine the string searching logic to avoid memory issue
v2->v1:
1.remove global variables of hotplug_fd, add uevent_fd
in rte_intr_handle to let each pci device self maintain it fd,
to fix dual device fd issue.
2.refine some typo error.
Jeff Guo (3):
eal: add uevent monitor for hot plug
igb_uio: fix device removal issuse for hotplug
app/testpmd: use uevent to monitor hotplug
app/test-pmd/testpmd.c | 178 ++++++++++
app/test-pmd/testpmd.h | 9 +
drivers/bus/pci/bsd/pci.c | 30 ++
drivers/bus/pci/linux/pci.c | 87 +++++
drivers/bus/pci/pci_common.c | 43 +++
drivers/bus/pci/pci_common_uio.c | 28 ++
drivers/bus/pci/private.h | 12 +
drivers/bus/pci/rte_bus_pci.h | 25 ++
drivers/bus/vdev/vdev.c | 36 ++
lib/librte_eal/bsdapp/eal/eal_dev.c | 37 ++
.../bsdapp/eal/include/exec-env/rte_dev.h | 39 +++
lib/librte_eal/common/eal_common_bus.c | 30 ++
lib/librte_eal/common/eal_common_dev.c | 160 +++++++++
lib/librte_eal/common/include/rte_bus.h | 71 ++++
lib/librte_eal/common/include/rte_dev.h | 128 +++++++
lib/librte_eal/linuxapp/eal/Makefile | 3 +-
lib/librte_eal/linuxapp/eal/eal_dev.c | 375 +++++++++++++++++++++
.../linuxapp/eal/include/exec-env/rte_dev.h | 39 +++
lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 6 +
lib/librte_pci/rte_pci.c | 20 ++
lib/librte_pci/rte_pci.h | 17 +
21 files changed, 1372 insertions(+), 1 deletion(-)
create mode 100644 lib/librte_eal/bsdapp/eal/eal_dev.c
create mode 100644 lib/librte_eal/bsdapp/eal/include/exec-env/rte_dev.h
create mode 100644 lib/librte_eal/linuxapp/eal/eal_dev.c
create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_dev.h
@@ -401,6 +401,8 @@ uint8_t bitrate_enabled;
struct gro_status gro_ports[RTE_MAX_ETHPORTS];
uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
+static struct hotplug_request_list hp_list;
+
/* Forward function declarations */
static void map_port_queue_stats_mapping_registers(portid_t pi,
struct rte_port *port);
@@ -408,6 +410,13 @@ static void check_all_ports_link_status(uint32_t port_mask);
static int eth_event_callback(portid_t port_id,
enum rte_eth_event_type type,
void *param, void *ret_param);
+static int eth_uevent_callback(enum rte_eal_dev_event_type type,
+ void *param, void *ret_param);
+static int eth_uevent_callback_register(portid_t pid);
+static int in_hotplug_list(const char *dev_name);
+
+static int hotplug_list_add(const char *dev_name,
+ enum rte_eal_dev_event_type event);
/*
* Check if all the ports are started.
@@ -1757,6 +1766,31 @@ reset_port(portid_t pid)
printf("Done\n");
}
+static int
+eth_uevent_callback_register(portid_t pid) {
+ int diag;
+ struct rte_eth_dev *dev;
+ enum rte_eal_dev_event_type dev_event_type;
+
+ /* register the uevent callback */
+ dev = &rte_eth_devices[pid];
+ for (dev_event_type = RTE_EAL_DEV_EVENT_ADD;
+ dev_event_type < RTE_EAL_DEV_EVENT_CHANGE;
+ dev_event_type++) {
+ diag = rte_dev_callback_register(dev->device, dev_event_type,
+ eth_uevent_callback,
+ (void *)(intptr_t)pid);
+ if (diag) {
+ printf("Failed to setup uevent callback for"
+ " device event %d\n",
+ dev_event_type);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
void
attach_port(char *identifier)
{
@@ -1773,6 +1807,8 @@ attach_port(char *identifier)
if (rte_eth_dev_attach(identifier, &pi))
return;
+ eth_uevent_callback_register(pi);
+
socket_id = (unsigned)rte_eth_dev_socket_id(pi);
/* if socket_id is invalid, set to 0 */
if (check_socket_id(socket_id) < 0)
@@ -1784,6 +1820,8 @@ attach_port(char *identifier)
ports[pi].port_status = RTE_PORT_STOPPED;
+ hotplug_list_add(identifier, RTE_EAL_DEV_EVENT_REMOVE);
+
printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
printf("Done\n");
}
@@ -1810,6 +1848,9 @@ detach_port(portid_t port_id)
nb_ports = rte_eth_dev_count();
+ hotplug_list_add(rte_eth_devices[port_id].device->name,
+ RTE_EAL_DEV_EVENT_ADD);
+
printf("Port '%s' is detached. Now total ports is %d\n",
name, nb_ports);
printf("Done\n");
@@ -1833,6 +1874,9 @@ pmd_test_exit(void)
close_port(pt_id);
}
}
+
+ rte_dev_monitor_stop();
+
printf("\nBye...\n");
}
@@ -1917,6 +1961,49 @@ rmv_event_callback(void *arg)
dev->device->name);
}
+static void
+rmv_uevent_callback(void *arg)
+{
+ char name[RTE_ETH_NAME_MAX_LEN];
+ uint8_t port_id = (intptr_t)arg;
+
+ rte_eal_alarm_cancel(rmv_uevent_callback, arg);
+
+ RTE_ETH_VALID_PORTID_OR_RET(port_id);
+ printf("removing port id:%u\n", port_id);
+
+ if (!in_hotplug_list(rte_eth_devices[port_id].device->name))
+ return;
+
+ stop_packet_forwarding();
+
+ stop_port(port_id);
+ close_port(port_id);
+ if (rte_eth_dev_detach(port_id, name)) {
+ RTE_LOG(ERR, USER1, "Failed to detach port '%s'\n", name);
+ return;
+ }
+
+ nb_ports = rte_eth_dev_count();
+
+ printf("Port '%s' is detached. Now total ports is %d\n",
+ name, nb_ports);
+}
+
+static void
+add_uevent_callback(void *arg)
+{
+ char *dev_name = (char *)arg;
+
+ rte_eal_alarm_cancel(add_uevent_callback, arg);
+
+ if (!in_hotplug_list(dev_name))
+ return;
+
+ RTE_LOG(ERR, EAL, "add device: %s\n", dev_name);
+ attach_port(dev_name);
+}
+
/* This function is used by the interrupt thread */
static int
eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
@@ -1959,6 +2046,88 @@ eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
}
static int
+in_hotplug_list(const char *dev_name)
+{
+ struct hotplug_request *hp_request = NULL;
+
+ TAILQ_FOREACH(hp_request, &hp_list, next) {
+ if (!strcmp(hp_request->dev_name, dev_name))
+ break;
+ }
+
+ if (hp_request)
+ return 1;
+
+ return 0;
+}
+
+static int
+hotplug_list_add(const char *dev_name, enum rte_eal_dev_event_type event)
+{
+ struct hotplug_request *hp_request;
+
+ hp_request = rte_zmalloc("hoplug request",
+ sizeof(*hp_request), 0);
+ if (hp_request == NULL) {
+ fprintf(stderr, "%s can not alloc memory\n",
+ __func__);
+ return -ENOMEM;
+ }
+
+ hp_request->dev_name = dev_name;
+ hp_request->event = event;
+
+ TAILQ_INSERT_TAIL(&hp_list, hp_request, next);
+
+ return 0;
+}
+
+/* This function is used by the interrupt thread */
+static int
+eth_uevent_callback(enum rte_eal_dev_event_type type, void *arg,
+ void *ret_param)
+{
+ static const char * const event_desc[] = {
+ [RTE_EAL_DEV_EVENT_UNKNOWN] = "Unknown",
+ [RTE_EAL_DEV_EVENT_ADD] = "add",
+ [RTE_EAL_DEV_EVENT_REMOVE] = "remove",
+ };
+ static char *device_name;
+
+ RTE_SET_USED(ret_param);
+
+ if (type >= RTE_EAL_DEV_EVENT_MAX) {
+ fprintf(stderr, "%s called upon invalid event %d\n",
+ __func__, type);
+ fflush(stderr);
+ } else if (event_print_mask & (UINT32_C(1) << type)) {
+ printf("%s event\n",
+ event_desc[type]);
+ fflush(stdout);
+ }
+
+ switch (type) {
+ case RTE_EAL_DEV_EVENT_REMOVE:
+ if (rte_eal_alarm_set(100000,
+ rmv_uevent_callback, arg))
+ fprintf(stderr, "Could not set up deferred "
+ "device removal\n");
+ break;
+ case RTE_EAL_DEV_EVENT_ADD:
+ device_name = malloc(strlen((const char *)arg) + 1);
+ strcpy(device_name, arg);
+ if (rte_eal_alarm_set(500000,
+ add_uevent_callback, device_name))
+ fprintf(stderr, "Could not set up deferred "
+ "device add\n");
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int
set_tx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port)
{
uint16_t i;
@@ -2438,6 +2607,15 @@ main(int argc, char** argv)
nb_rxq, nb_txq);
init_config();
+
+ /* enable hot plug monitoring */
+ TAILQ_INIT(&hp_list);
+ rte_eal_dev_monitor_enable();
+ RTE_ETH_FOREACH_DEV(port_id) {
+ hotplug_list_add(rte_eth_devices[port_id].device->name,
+ RTE_EAL_DEV_EVENT_REMOVE);
+ eth_uevent_callback_register(port_id);
+ }
if (start_port(RTE_PORT_ALL) != 0)
rte_exit(EXIT_FAILURE, "Start ports failed\n");
@@ -92,6 +92,15 @@ typedef uint16_t streamid_t;
#define TM_MODE 0
#endif
+struct hotplug_request {
+ TAILQ_ENTRY(hotplug_request) next; /**< Callbacks list */
+ const char *dev_name; /* request device name */
+ enum rte_eal_dev_event_type event; /**< device event type */
+};
+
+/** @internal Structure to keep track of registered callbacks */
+TAILQ_HEAD(hotplug_request_list, hotplug_request);
+
enum {
PORT_TOPOLOGY_PAIRED,
PORT_TOPOLOGY_CHAINED,