@@ -713,18 +713,8 @@ static int rte_eal_vfio_setup(void)
return -1;
vfio_enabled = rte_vfio_is_enabled("vfio");
- if (vfio_enabled) {
-
- /* if we are primary process, create a thread to communicate with
- * secondary processes. the thread will use a socket to wait for
- * requests from secondary process to send open file descriptors,
- * because VFIO does not allow multiple open descriptors on a group or
- * VFIO container.
- */
- if (internal_config.process_type == RTE_PROC_PRIMARY &&
- vfio_mp_sync_setup() < 0)
- return -1;
- }
+ if (vfio_enabled && vfio_mp_sync_setup() < 0)
+ return -1;
return 0;
}
@@ -68,9 +68,11 @@ int
vfio_get_group_fd(int iommu_group_no)
{
int i;
+ int ret;
int vfio_group_fd;
char filename[PATH_MAX];
struct vfio_group *cur_grp;
+ struct vfio_mp_param p;
/* check if we already have the group descriptor open */
for (i = 0; i < VFIO_MAX_GROUPS; i++)
@@ -129,51 +131,21 @@ vfio_get_group_fd(int iommu_group_no)
vfio_cfg.vfio_active_groups++;
return vfio_group_fd;
}
- /* if we're in a secondary process, request group fd from the primary
- * process via our socket
- */
- else {
- int socket_fd, ret;
+ /* For secondary process, request group fd from the primary */
- socket_fd = vfio_mp_sync_connect_to_primary();
+ p.req = SOCKET_REQ_GROUP;
+ p.group_no = iommu_group_no;
- if (socket_fd < 0) {
- RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
- return -1;
- }
- if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
- RTE_LOG(ERR, EAL, " cannot request container fd!\n");
- close(socket_fd);
- return -1;
- }
- if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
- RTE_LOG(ERR, EAL, " cannot send group number!\n");
- close(socket_fd);
- return -1;
- }
- ret = vfio_mp_sync_receive_request(socket_fd);
- switch (ret) {
- case SOCKET_NO_FD:
- close(socket_fd);
- return 0;
- case SOCKET_OK:
- vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
- /* if we got the fd, store it and return it */
- if (vfio_group_fd > 0) {
- close(socket_fd);
- cur_grp->group_no = iommu_group_no;
- cur_grp->fd = vfio_group_fd;
- vfio_cfg.vfio_active_groups++;
- return vfio_group_fd;
- }
- /* fall-through on error */
- default:
- RTE_LOG(ERR, EAL, " cannot get container fd!\n");
- close(socket_fd);
- return -1;
- }
+ ret = rte_eal_mp_sendmsg("vfio", &p, sizeof(p), NULL, 0, 1);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " cannot request group fd!\n");
+ cur_grp->group_no = -1;
+ } else {
+ cur_grp->group_no = iommu_group_no;
+ vfio_cfg.vfio_active_groups++;
}
- return -1;
+
+ return ret;
}
@@ -229,11 +201,12 @@ int
clear_group(int vfio_group_fd)
{
int i;
- int socket_fd, ret;
+ struct vfio_mp_param p;
+
+ i = get_vfio_group_idx(vfio_group_fd);
if (internal_config.process_type == RTE_PROC_PRIMARY) {
- i = get_vfio_group_idx(vfio_group_fd);
if (i < 0)
return -1;
vfio_cfg.vfio_groups[i].group_no = -1;
@@ -243,44 +216,20 @@ clear_group(int vfio_group_fd)
return 0;
}
- /* This is just for SECONDARY processes */
- socket_fd = vfio_mp_sync_connect_to_primary();
-
- if (socket_fd < 0) {
- RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
- return -1;
- }
+ p.req = SOCKET_CLR_GROUP;
+ p.group_no = vfio_cfg.vfio_groups[i].group_no;
- if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) {
- RTE_LOG(ERR, EAL, " cannot request container fd!\n");
- close(socket_fd);
+ if (rte_eal_mp_sendmsg("vfio", &p, sizeof(p), NULL, 0, 1) < 0) {
+ RTE_LOG(ERR, EAL, "request primary to clear group fd, failed!\n");
return -1;
}
- if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) {
- RTE_LOG(ERR, EAL, " cannot send group fd!\n");
- close(socket_fd);
- return -1;
- }
+ vfio_cfg.vfio_groups[i].group_no = -1;
+ vfio_cfg.vfio_groups[i].fd = -1;
+ vfio_cfg.vfio_groups[i].devices = 0;
+ vfio_cfg.vfio_active_groups--;
- ret = vfio_mp_sync_receive_request(socket_fd);
- switch (ret) {
- case SOCKET_NO_FD:
- RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n");
- close(socket_fd);
- break;
- case SOCKET_OK:
- close(socket_fd);
- return 0;
- case SOCKET_ERR:
- RTE_LOG(ERR, EAL, " Socket error\n");
- close(socket_fd);
- break;
- default:
- RTE_LOG(ERR, EAL, " UNKNOWN reply, %d\n", ret);
- close(socket_fd);
- }
- return -1;
+ return 0;
}
int
@@ -590,6 +539,7 @@ int
vfio_get_container_fd(void)
{
int ret, vfio_container_fd;
+ struct vfio_mp_param p;
/* if we're in a primary process, try to open the container */
if (internal_config.process_type == RTE_PROC_PRIMARY) {
@@ -620,34 +570,17 @@ vfio_get_container_fd(void)
}
return vfio_container_fd;
- } else {
- /*
- * if we're in a secondary process, request container fd from the
- * primary process via our socket
- */
- int socket_fd;
-
- socket_fd = vfio_mp_sync_connect_to_primary();
- if (socket_fd < 0) {
- RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
- return -1;
- }
- if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
- RTE_LOG(ERR, EAL, " cannot request container fd!\n");
- close(socket_fd);
- return -1;
- }
- vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
- if (vfio_container_fd < 0) {
- RTE_LOG(ERR, EAL, " cannot get container fd!\n");
- close(socket_fd);
- return -1;
- }
- close(socket_fd);
- return vfio_container_fd;
}
- return -1;
+ /* For secondary process, request container fd from primary process */
+
+ p.req = SOCKET_REQ_CONTAINER;
+
+ ret = rte_eal_mp_sendmsg("vfio", &p, sizeof(p), NULL, 0, 1);
+ if (ret < 0)
+ RTE_LOG(ERR, EAL, " cannot request container fd!\n");
+
+ return ret;
}
int
@@ -117,15 +117,6 @@ struct vfio_iommu_spapr_tce_info {
#define VFIO_MAX_GROUPS 64
/*
- * Function prototypes for VFIO multiprocess sync functions
- */
-int vfio_mp_sync_send_request(int socket, int req);
-int vfio_mp_sync_receive_request(int socket);
-int vfio_mp_sync_send_fd(int socket, int fd);
-int vfio_mp_sync_receive_fd(int socket);
-int vfio_mp_sync_connect_to_primary(void);
-
-/*
* we don't need to store device fd's anywhere since they can be obtained from
* the group fd via an ioctl() call.
*/
@@ -190,6 +181,12 @@ int vfio_mp_sync_setup(void);
#define SOCKET_NO_FD 0x1
#define SOCKET_ERR 0xFF
+struct vfio_mp_param {
+ int req;
+ int result;
+ int group_no;
+};
+
#endif /* VFIO_PRESENT */
#endif /* EAL_VFIO_H_ */
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,31 +31,11 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <string.h>
-#include <fcntl.h>
-#include <sys/socket.h>
-#include <pthread.h>
-
-/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
-#ifdef __USE_MISC
-#define REMOVED_USE_MISC
-#undef __USE_MISC
-#endif
-#include <sys/un.h>
-/* make sure we redefine __USE_MISC only if it was previously undefined */
-#ifdef REMOVED_USE_MISC
-#define __USE_MISC
-#undef REMOVED_USE_MISC
-#endif
-
#include <rte_log.h>
-#include <rte_eal_memconfig.h>
-#include <rte_malloc.h>
#include <rte_vfio.h>
+#include <rte_eal.h>
-#include "eal_filesystem.h"
#include "eal_vfio.h"
-#include "eal_thread.h"
/**
* @file
@@ -66,360 +46,94 @@
#ifdef VFIO_PRESENT
-#define SOCKET_PATH_FMT "%s/.%s_mp_socket"
-#define CMSGLEN (CMSG_LEN(sizeof(int)))
-#define FD_TO_CMSGHDR(fd, chdr) \
- do {\
- (chdr).cmsg_len = CMSGLEN;\
- (chdr).cmsg_level = SOL_SOCKET;\
- (chdr).cmsg_type = SCM_RIGHTS;\
- memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\
- } while (0)
-#define CMSGHDR_TO_FD(chdr, fd) \
- memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd))
-
-static pthread_t socket_thread;
-static int mp_socket_fd;
-
-
-/* get socket path (/var/run if root, $HOME otherwise) */
-static void
-get_socket_path(char *buffer, int bufsz)
-{
- const char *dir = "/var/run";
- const char *home_dir = getenv("HOME");
-
- if (getuid() != 0 && home_dir != NULL)
- dir = home_dir;
-
- /* use current prefix as file path */
- snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir,
- internal_config.hugefile_prefix);
-}
-
-
-
-/*
- * data flow for socket comm protocol:
- * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP
- * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number
- * 2. server receives message
- * 2a. in case of invalid group, SOCKET_ERR is sent back to client
- * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client
- * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd
- *
- * in case of any error, socket is closed.
- */
-
-/* send a request, return -1 on error */
-int
-vfio_mp_sync_send_request(int socket, int req)
-{
- struct msghdr hdr;
- struct iovec iov;
- int buf;
- int ret;
-
- memset(&hdr, 0, sizeof(hdr));
-
- buf = req;
-
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
- iov.iov_base = (char *) &buf;
- iov.iov_len = sizeof(buf);
-
- ret = sendmsg(socket, &hdr, 0);
- if (ret < 0)
- return -1;
- return 0;
-}
-
-/* receive a request and return it */
-int
-vfio_mp_sync_receive_request(int socket)
-{
- int buf;
- struct msghdr hdr;
- struct iovec iov;
- int ret, req;
-
- memset(&hdr, 0, sizeof(hdr));
-
- buf = SOCKET_ERR;
-
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
- iov.iov_base = (char *) &buf;
- iov.iov_len = sizeof(buf);
-
- ret = recvmsg(socket, &hdr, 0);
- if (ret < 0)
- return -1;
-
- req = buf;
-
- return req;
-}
-
-/* send OK in message, fd in control message */
-int
-vfio_mp_sync_send_fd(int socket, int fd)
-{
- int buf;
- struct msghdr hdr;
- struct cmsghdr *chdr;
- char chdr_buf[CMSGLEN];
- struct iovec iov;
- int ret;
-
- chdr = (struct cmsghdr *) chdr_buf;
- memset(chdr, 0, sizeof(chdr_buf));
- memset(&hdr, 0, sizeof(hdr));
-
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
- iov.iov_base = (char *) &buf;
- iov.iov_len = sizeof(buf);
- hdr.msg_control = chdr;
- hdr.msg_controllen = CMSGLEN;
-
- buf = SOCKET_OK;
- FD_TO_CMSGHDR(fd, *chdr);
-
- ret = sendmsg(socket, &hdr, 0);
- if (ret < 0)
- return -1;
- return 0;
-}
-
-/* receive OK in message, fd in control message */
-int
-vfio_mp_sync_receive_fd(int socket)
-{
- int buf;
- struct msghdr hdr;
- struct cmsghdr *chdr;
- char chdr_buf[CMSGLEN];
- struct iovec iov;
- int ret, req, fd;
-
- buf = SOCKET_ERR;
-
- chdr = (struct cmsghdr *) chdr_buf;
- memset(chdr, 0, sizeof(chdr_buf));
- memset(&hdr, 0, sizeof(hdr));
-
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
- iov.iov_base = (char *) &buf;
- iov.iov_len = sizeof(buf);
- hdr.msg_control = chdr;
- hdr.msg_controllen = CMSGLEN;
-
- ret = recvmsg(socket, &hdr, 0);
- if (ret < 0)
- return -1;
-
- req = buf;
-
- if (req != SOCKET_OK)
- return -1;
-
- CMSGHDR_TO_FD(*chdr, fd);
-
- return fd;
-}
-
-/* connect socket_fd in secondary process to the primary process's socket */
-int
-vfio_mp_sync_connect_to_primary(void)
+static int
+vfio_mp_primary(const void *params, int len,
+ int fd[] __rte_unused, int fds_num __rte_unused)
{
- struct sockaddr_un addr;
- socklen_t sockaddr_len;
- int socket_fd;
+ int fds[1];
+ const struct vfio_mp_param *p = params;
+ struct vfio_mp_param r;
- /* set up a socket */
- socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
- if (socket_fd < 0) {
- RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+ if (len != sizeof(*p)) {
+ RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
return -1;
}
- get_socket_path(addr.sun_path, sizeof(addr.sun_path));
- addr.sun_family = AF_UNIX;
-
- sockaddr_len = sizeof(struct sockaddr_un);
-
- if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
- return socket_fd;
-
- /* if connect failed */
- close(socket_fd);
- return -1;
-}
-
-
-
-/*
- * socket listening thread for primary process
- */
-static __attribute__((noreturn)) void *
-vfio_mp_sync_thread(void __rte_unused * arg)
-{
- int ret, fd, vfio_data;
-
- /* wait for requests on the socket */
- for (;;) {
- int conn_sock;
- struct sockaddr_un addr;
- socklen_t sockaddr_len = sizeof(addr);
-
- /* this is a blocking call */
- conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr,
- &sockaddr_len);
-
- /* just restart on error */
- if (conn_sock == -1)
- continue;
-
- /* set socket to linger after close */
- struct linger l;
- l.l_onoff = 1;
- l.l_linger = 60;
-
- if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0)
- RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option "
- "on listen socket (%s)\n", strerror(errno));
-
- ret = vfio_mp_sync_receive_request(conn_sock);
-
- switch (ret) {
- case SOCKET_REQ_CONTAINER:
- fd = vfio_get_container_fd();
- if (fd < 0)
- vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
- else
- vfio_mp_sync_send_fd(conn_sock, fd);
- if (fd >= 0)
- close(fd);
- break;
- case SOCKET_REQ_GROUP:
- /* wait for group number */
- vfio_data = vfio_mp_sync_receive_request(conn_sock);
- if (vfio_data < 0) {
- close(conn_sock);
- continue;
- }
-
- fd = vfio_get_group_fd(vfio_data);
-
- if (fd < 0)
- vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
+ switch (p->req) {
+ case SOCKET_REQ_GROUP:
+ r.req = SOCKET_REQ_GROUP;
+ r.group_no = p->group_no;
+ fds[0] = vfio_get_group_fd(p->group_no);
+ if (fds[0] < 0) {
+ r.result = SOCKET_ERR;
+ rte_eal_mp_sendmsg("vfio", &r, sizeof(r), NULL, 0, 0);
+ } else if (fds[0] == 0) {
/* if VFIO group exists but isn't bound to VFIO driver */
- else if (fd == 0)
- vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
+ r.result = SOCKET_NO_FD;
+ rte_eal_mp_sendmsg("vfio", &r, sizeof(r), NULL, 0, 0);
+ } else {
/* if group exists and is bound to VFIO driver */
- else {
- vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
- vfio_mp_sync_send_fd(conn_sock, fd);
- }
- break;
- case SOCKET_CLR_GROUP:
- /* wait for group fd */
- vfio_data = vfio_mp_sync_receive_request(conn_sock);
- if (vfio_data < 0) {
- close(conn_sock);
- continue;
- }
-
- ret = clear_group(vfio_data);
-
- if (ret < 0)
- vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
- else
- vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
- break;
- default:
- vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
- break;
+ r.result = SOCKET_OK;
+ rte_eal_mp_sendmsg("vfio", &r, sizeof(r), fds, 1, 0);
}
- close(conn_sock);
+ break;
+ case SOCKET_REQ_CONTAINER:
+ r.req = SOCKET_REQ_CONTAINER;
+ fds[0] = vfio_get_container_fd();
+ rte_eal_mp_sendmsg("vfio", &r, sizeof(r), fds, 1, 0);
+ break;
+ default:
+ RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
+ return -1;
}
+
+ return 0;
}
static int
-vfio_mp_sync_socket_setup(void)
+vfio_mp_secondary(const void *params, int len, int fds[],
+ int fds_num __rte_unused)
{
- int ret, socket_fd;
- struct sockaddr_un addr;
- socklen_t sockaddr_len;
-
- /* set up a socket */
- socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
- if (socket_fd < 0) {
- RTE_LOG(ERR, EAL, "Failed to create socket!\n");
- return -1;
- }
-
- get_socket_path(addr.sun_path, sizeof(addr.sun_path));
- addr.sun_family = AF_UNIX;
-
- sockaddr_len = sizeof(struct sockaddr_un);
+ const struct vfio_mp_param *p = params;
- unlink(addr.sun_path);
-
- ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len);
- if (ret) {
- RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno));
- close(socket_fd);
+ if (len != sizeof(*p)) {
+ RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
return -1;
}
- ret = listen(socket_fd, 50);
- if (ret) {
- RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno));
- close(socket_fd);
+ switch (p->req) {
+ case SOCKET_REQ_GROUP:
+ switch (p->result) {
+ case SOCKET_NO_FD:
+ return 0;
+ case SOCKET_OK:
+ if (fds_num == 1 && fds[0] > 0)
+ return fds[0];
+ /* fall-through on error */
+ default:
+ RTE_LOG(ERR, EAL, " cannot get group fd!\n");
+ return -1;
+ }
+ case SOCKET_REQ_CONTAINER:
+ if (fds_num == 1 && fds[0] > 0)
+ return fds[0];
return -1;
+ default:
+ RTE_LOG(ERR, EAL, "Invalid req!\n");
}
-
- /* save the socket in local configuration */
- mp_socket_fd = socket_fd;
-
- return 0;
+ return -1;
}
-/*
- * set up a local socket and tell it to listen for incoming connections
- */
int
vfio_mp_sync_setup(void)
{
- int ret;
- char thread_name[RTE_MAX_THREAD_NAME_LEN];
+ rte_eal_mp_t action;
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ action = vfio_mp_primary;
+ else
+ action = vfio_mp_secondary;
- if (vfio_mp_sync_socket_setup() < 0) {
- RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
- return -1;
- }
-
- ret = pthread_create(&socket_thread, NULL,
- vfio_mp_sync_thread, NULL);
- if (ret) {
- RTE_LOG(ERR, EAL,
- "Failed to create thread for communication with secondary processes!\n");
- close(mp_socket_fd);
- return -1;
- }
-
- /* Set thread_name for aid in debugging. */
- snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync");
- ret = rte_thread_setname(socket_thread, thread_name);
- if (ret)
- RTE_LOG(DEBUG, EAL,
- "Failed to set thread name for secondary processes!\n");
-
- return 0;
+ return rte_eal_mp_action_register("vfio", action);
}
#endif