> -----Original Message-----
> From: Guo, Jia
> Sent: Thursday, June 29, 2017 12:38 PM
> To: Zhang, Helin <helin.zhang@intel.com>; Wu, Jingjing
> <jingjing.wu@intel.com>
> Cc: dev@dpdk.org; Guo, Jia <jia.guo@intel.com>
> Subject: [PATCH v3 1/2] eal: add uevent api for hot plug
>
> From: "Guo, Jia" <jia.guo@intel.com>
>
> This patch aim to add a variable "uevent_fd" in structure "rte_intr_handle" for
> enable kernel object uevent monitoring, and add some uevent API in rte eal
> interrupt, that is “rte_uevent_connect” and “rte_uevent_get”, so that all driver
> could use these API to monitor and read out the uevent, then corresponding to
> handle these uevent, such as detach or attach the device.
>
> Signed-off-by: Guo, Jia <jia.guo@intel.com>
Looks fine from me.
Reviewed-by: Jingjing Wu <jingjing.wu@intel.com>
@@ -117,6 +117,7 @@
dev->intr_handle.fd = -1;
dev->intr_handle.uio_cfg_fd = -1;
+ dev->intr_handle.uevent_fd = -1;
dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
/* secondary processes - use already recorded details */
@@ -227,7 +228,10 @@
close(dev->intr_handle.uio_cfg_fd);
dev->intr_handle.uio_cfg_fd = -1;
}
-
+ if (dev->intr_handle.uevent_fd >= 0) {
+ close(dev->intr_handle.uevent_fd);
+ dev->intr_handle.uevent_fd = -1;
+ }
dev->intr_handle.fd = -1;
dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
}
@@ -65,6 +65,10 @@
#include <rte_errno.h>
#include <rte_spinlock.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <sys/epoll.h>
+
#include "eal_private.h"
#include "eal_vfio.h"
#include "eal_thread.h"
@@ -669,10 +673,13 @@ struct rte_intr_source {
RTE_SET_USED(r);
return -1;
}
+
rte_spinlock_lock(&intr_lock);
TAILQ_FOREACH(src, &intr_sources, next)
- if (src->intr_handle.fd ==
- events[n].data.fd)
+ if ((src->intr_handle.fd ==
+ events[n].data.fd) ||
+ (src->intr_handle.uevent_fd ==
+ events[n].data.fd))
break;
if (src == NULL){
rte_spinlock_unlock(&intr_lock);
@@ -858,7 +865,24 @@ static __attribute__((noreturn)) void *
}
else
numfds++;
+
+ /**
+ * add device uevent file descriptor
+ * into wait list for uevent monitoring.
+ */
+ ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+ ev.data.fd = src->intr_handle.uevent_fd;
+ if (epoll_ctl(pfd, EPOLL_CTL_ADD,
+ src->intr_handle.uevent_fd, &ev) < 0){
+ rte_panic("Error adding uevent_fd %d epoll_ctl"
+ ", %s\n",
+ src->intr_handle.uevent_fd,
+ strerror(errno));
+ } else
+ numfds++;
}
+
+
rte_spinlock_unlock(&intr_lock);
/* serve the interrupt */
eal_intr_handle_interrupts(pfd, numfds);
@@ -1255,3 +1279,111 @@ static __attribute__((noreturn)) void *
return 0;
}
+
+int
+rte_uevent_connect(void)
+{
+ struct sockaddr_nl addr;
+ int ret;
+ int netlink_fd = -1;
+ int size = 64 * 1024;
+ int nonblock = 1;
+ memset(&addr, 0, sizeof(addr));
+ addr.nl_family = AF_NETLINK;
+ addr.nl_pid = 0;
+ addr.nl_groups = 0xffffffff;
+
+ netlink_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
+ if (netlink_fd < 0)
+ return -1;
+
+ setsockopt(netlink_fd, SOL_SOCKET, SO_RCVBUFFORCE, &size, sizeof(size));
+
+ ret = ioctl(netlink_fd, FIONBIO, &nonblock);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "ioctl(FIONBIO) failed\n");
+ close(netlink_fd);
+ return -1;
+ }
+
+ if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+ close(netlink_fd);
+ return -1;
+ }
+
+ return netlink_fd;
+}
+
+static int
+parse_event(const char *buf, struct rte_uevent *event)
+{
+ char action[RTE_UEVENT_MSG_LEN];
+ char subsystem[RTE_UEVENT_MSG_LEN];
+ char dev_path[RTE_UEVENT_MSG_LEN];
+ int i = 0;
+
+ memset(action, 0, RTE_UEVENT_MSG_LEN);
+ memset(subsystem, 0, RTE_UEVENT_MSG_LEN);
+ memset(dev_path, 0, RTE_UEVENT_MSG_LEN);
+
+ while (i < RTE_UEVENT_MSG_LEN) {
+ for (; i < RTE_UEVENT_MSG_LEN; i++) {
+ if (*buf)
+ break;
+ buf++;
+ }
+ if (!strncmp(buf, "ACTION=", 7)) {
+ buf += 7;
+ i += 7;
+ snprintf(action, sizeof(action), "%s", buf);
+ } else if (!strncmp(buf, "DEVPATH=", 8)) {
+ buf += 8;
+ i += 8;
+ snprintf(dev_path, sizeof(dev_path), "%s", buf);
+ } else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
+ buf += 10;
+ i += 10;
+ snprintf(subsystem, sizeof(subsystem), "%s", buf);
+ }
+ for (; i < RTE_UEVENT_MSG_LEN; i++) {
+ if (*buf == '\0')
+ break;
+ buf++;
+ }
+ }
+
+ if (!strncmp(subsystem, "uio", 3)) {
+
+ event->subsystem = RTE_UEVENT_SUBSYSTEM_UIO;
+ if (!strncmp(action, "add", 3))
+ event->action = RTE_UEVENT_ADD;
+ if (!strncmp(action, "remove", 6))
+ event->action = RTE_UEVENT_REMOVE;
+ return 0;
+ }
+
+ return -1;
+}
+
+int
+rte_uevent_get(int fd, struct rte_uevent *uevent)
+{
+ int ret;
+ char buf[RTE_UEVENT_MSG_LEN];
+
+ memset(uevent, 0, sizeof(struct rte_uevent));
+ memset(buf, 0, RTE_UEVENT_MSG_LEN);
+
+ ret = recv(fd, buf, RTE_UEVENT_MSG_LEN - 1, MSG_DONTWAIT);
+ if (ret > 0)
+ return parse_event(buf, uevent);
+ else if (ret < 0) {
+ RTE_LOG(ERR, EAL,
+ "Socket read error(%d): %s\n",
+ errno, strerror(errno));
+ return -1;
+ } else
+ /* connection closed */
+ return -1;
+}
@@ -231,6 +231,10 @@
close(dev->intr_handle.uio_cfg_fd);
dev->intr_handle.uio_cfg_fd = -1;
}
+ if (dev->intr_handle.uevent_fd >= 0) {
+ close(dev->intr_handle.uevent_fd);
+ dev->intr_handle.uevent_fd = -1;
+ }
if (dev->intr_handle.fd >= 0) {
close(dev->intr_handle.fd);
dev->intr_handle.fd = -1;
@@ -276,6 +280,8 @@
goto error;
}
+ dev->intr_handle.uevent_fd = rte_uevent_connect();
+
if (dev->kdrv == RTE_KDRV_IGB_UIO)
dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
else {
@@ -90,6 +90,7 @@ struct rte_intr_handle {
for uio_pci_generic */
};
int fd; /**< interrupt event file descriptor */
+ int uevent_fd; /**< uevent file descriptor */
enum rte_intr_handle_type type; /**< handle type */
uint32_t max_intr; /**< max interrupt requested */
uint32_t nb_efd; /**< number of available efd(event fd) */
@@ -99,6 +100,19 @@ struct rte_intr_handle {
int *intr_vec; /**< intr vector number array */
};
+#define RTE_UEVENT_MSG_LEN 4096
+#define RTE_UEVENT_SUBSYSTEM_UIO 1
+
+enum rte_uevent_action {
+ RTE_UEVENT_ADD = 0, /**< uevent type of device add */
+ RTE_UEVENT_REMOVE = 1, /**< uevent type of device remove*/
+};
+
+struct rte_uevent {
+ enum rte_uevent_action action; /**< uevent action type */
+ int subsystem; /**< subsystem id */
+};
+
#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */
/**
@@ -236,4 +250,27 @@ struct rte_intr_handle {
int
rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
+/**
+ * It read out the uevent from the specific file descriptor.
+ *
+ * @param fd
+ * The fd which the uevent associated to
+ * @param uevent
+ * Pointer to the uevent which read from the monitoring fd.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_uevent_get(int fd, struct rte_uevent *uevent);
+
+/**
+ * Connect to the device uevent file descriptor.
+ * @return
+ * - On success, the connected uevent fd.
+ * - On failure, a negative value.
+ */
+int
+rte_uevent_connect(void);
+
#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */