diff mbox series

[v2,4/9] vfio_user: implement DMA table and socket address API

Message ID 20210114061411.39166-5-chenbo.xia@intel.com (mailing list archive)
State Deferred
Delegated to: Thomas Monjalon
Headers show
Series Introduce vfio-user library | expand

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Chenbo Xia Jan. 14, 2021, 6:14 a.m. UTC
This patch introduces an API called rte_vfio_user_get_mem_table()
for emulated devices to acquire DMA memory table from vfio-user
library.

Notify operations are also introduced to notify the emulated
devices of several events. Another socket address API is introduced
for translation between device ID and socket address in notify
callbacks.

Signed-off-by: Chenbo Xia <chenbo.xia@intel.com>
Signed-off-by: Xiuchun Lu <xiuchun.lu@intel.com>
---
 lib/librte_vfio_user/rte_vfio_user.h    |  77 ++++-
 lib/librte_vfio_user/version.map        |   2 +
 lib/librte_vfio_user/vfio_user_base.h   |   2 +
 lib/librte_vfio_user/vfio_user_server.c | 375 +++++++++++++++++++++++-
 lib/librte_vfio_user/vfio_user_server.h |   3 +
 5 files changed, 451 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/lib/librte_vfio_user/rte_vfio_user.h b/lib/librte_vfio_user/rte_vfio_user.h
index 117e994cc6..f575017bdf 100644
--- a/lib/librte_vfio_user/rte_vfio_user.h
+++ b/lib/librte_vfio_user/rte_vfio_user.h
@@ -5,10 +5,52 @@ 
 #ifndef _RTE_VFIO_USER_H
 #define _RTE_VFIO_USER_H
 
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
 #include <linux/vfio.h>
+#include <sys/types.h>
 
 #include <rte_compat.h>
 
+#define RTE_VUSER_MAX_DMA 256
+
+struct rte_vfio_user_notify_ops {
+	/* Add device */
+	int (*new_device)(int dev_id);
+	/* Remove device */
+	void (*destroy_device)(int dev_id);
+	/* Update device status */
+	int (*update_status)(int dev_id);
+	/* Lock or unlock data path */
+	int (*lock_dp)(int dev_id, int lock);
+	/* Reset device */
+	int (*reset_device)(int dev_id);
+};
+
+struct rte_vfio_user_mem_reg {
+	uint64_t gpa;
+	uint64_t size;
+	uint64_t fd_offset;
+	uint32_t protection;	/* attributes in <sys/mman.h> */
+#define RTE_VUSER_MEM_MAPPABLE	(0x1 << 0)
+	uint32_t flags;
+};
+
+struct rte_vfio_user_mtb_entry {
+	uint64_t gpa;
+	uint64_t size;
+	uint64_t host_user_addr;
+	void	 *mmap_addr;
+	uint64_t mmap_size;
+	int fd;
+};
+
+struct rte_vfio_user_mem {
+	uint32_t entry_num;
+	struct rte_vfio_user_mtb_entry entry[RTE_VUSER_MAX_DMA];
+};
+
 struct rte_vfio_user_reg_info;
 
 typedef ssize_t (*rte_vfio_user_reg_acc_t)(struct rte_vfio_user_reg_info *reg,
@@ -32,6 +74,8 @@  struct rte_vfio_user_regions {
  *	*rte_vfio_user_register
  *	*rte_vfio_user_unregister
  *	*rte_vfio_user_start
+ *	*rte_vfio_get_sock_addr
+ *	*rte_vfio_user_get_mem_table
  *	*rte_vfio_user_set_dev_info
  *	*rte_vfio_user_set_reg_info
  */
@@ -41,12 +85,15 @@  struct rte_vfio_user_regions {
  *
  * @param sock_addr
  *   Unix domain socket address
+ * @param ops
+ *   Notify ops for the device
  * @return
  *   0 on success, -1 on failure
  */
 __rte_experimental
 int
-rte_vfio_user_register(const char *sock_addr);
+rte_vfio_user_register(const char *sock_addr,
+	const struct rte_vfio_user_notify_ops *ops);
 
 /**
  * Unregister a vfio-user device.
@@ -73,6 +120,18 @@  __rte_experimental
 int
 rte_vfio_user_start(const char *sock_addr);
 
+/**
+ * Get the memory table of a vfio-user device.
+ *
+ * @param dev_id
+ *   Vfio-user device ID
+ * @return
+ *   Pointer to memory table on success, NULL on failure
+ */
+__rte_experimental
+const struct rte_vfio_user_mem *
+rte_vfio_user_get_mem_table(int dev_id);
+
 /**
  * Set the device information for a vfio-user device.
  *
@@ -113,4 +172,20 @@  int
 rte_vfio_user_set_reg_info(const char *sock_addr,
 	struct rte_vfio_user_regions *reg);
 
+/**
+ * Get the socket address for a vfio-user device.
+ *
+ * @param dev_id
+ *   Vfio-user device ID
+ * @param[out] buf
+ *   Buffer to store socket address
+ * @param len
+ *   The length of the buffer
+ * @return
+ *   0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vfio_get_sock_addr(int dev_id, char *buf, size_t len);
+
 #endif
diff --git a/lib/librte_vfio_user/version.map b/lib/librte_vfio_user/version.map
index 0f4f5acba5..3a50b5ef0e 100644
--- a/lib/librte_vfio_user/version.map
+++ b/lib/librte_vfio_user/version.map
@@ -4,6 +4,8 @@  EXPERIMENTAL {
 	rte_vfio_user_register;
 	rte_vfio_user_unregister;
 	rte_vfio_user_start;
+	rte_vfio_get_sock_addr;
+	rte_vfio_user_get_mem_table;
 	rte_vfio_user_set_dev_info;
 	rte_vfio_user_set_reg_info;
 
diff --git a/lib/librte_vfio_user/vfio_user_base.h b/lib/librte_vfio_user/vfio_user_base.h
index f92886b56a..dd13170298 100644
--- a/lib/librte_vfio_user/vfio_user_base.h
+++ b/lib/librte_vfio_user/vfio_user_base.h
@@ -9,6 +9,7 @@ 
 
 #include "rte_vfio_user.h"
 
+#define VFIO_USER_MSG_MAX_NREG 8
 #define VFIO_USER_VERSION_MAJOR 1
 #define VFIO_USER_VERSION_MINOR 0
 #define VFIO_USER_MAX_RSVD 512
@@ -79,6 +80,7 @@  struct vfio_user_msg {
 	uint32_t err;				/* Valid in reply, optional */
 	union {
 		struct vfio_user_version ver;
+		struct rte_vfio_user_mem_reg memory[VFIO_USER_MSG_MAX_NREG];
 		struct vfio_device_info dev_info;
 		struct vfio_user_reg reg_info;
 		struct vfio_user_reg_rw reg_rw;
diff --git a/lib/librte_vfio_user/vfio_user_server.c b/lib/librte_vfio_user/vfio_user_server.c
index aab923e727..9e98b4ec81 100644
--- a/lib/librte_vfio_user/vfio_user_server.c
+++ b/lib/librte_vfio_user/vfio_user_server.c
@@ -7,6 +7,7 @@ 
 #include <pthread.h>
 #include <inttypes.h>
 #include <sys/socket.h>
+#include <sys/mman.h>
 #include <sys/un.h>
 
 #include "vfio_user_server.h"
@@ -40,6 +41,217 @@  vfio_user_negotiate_version(struct vfio_user_server *dev,
 		return -ENOTSUP;
 }
 
+static int
+mmap_one_region(struct rte_vfio_user_mtb_entry *entry,
+	struct rte_vfio_user_mem_reg *memory, int fd)
+{
+	if (fd != -1) {
+		if (memory->fd_offset >= -memory->size) {
+			VFIO_USER_LOG(ERR, "memory fd_offset and size overflow\n");
+			return -EINVAL;
+		}
+		entry->mmap_size = memory->fd_offset + memory->size;
+		entry->mmap_addr = mmap(NULL,
+			entry->mmap_size,
+			memory->protection, MAP_SHARED,
+			fd, 0);
+		if (entry->mmap_addr == MAP_FAILED) {
+			VFIO_USER_LOG(ERR, "Failed to mmap dma region\n");
+			return -EINVAL;
+		}
+
+		entry->host_user_addr =
+			(uint64_t)entry->mmap_addr + memory->fd_offset;
+		entry->fd = fd;
+	} else {
+		entry->mmap_size = 0;
+		entry->mmap_addr = NULL;
+		entry->host_user_addr = 0;
+		entry->fd = -1;
+	}
+
+	entry->gpa = memory->gpa;
+	entry->size = memory->size;
+
+	return 0;
+}
+
+static uint32_t
+add_one_region(struct rte_vfio_user_mem *mem,
+	struct rte_vfio_user_mem_reg *memory, int fd)
+{
+	struct rte_vfio_user_mtb_entry *entry = &mem->entry[0];
+	uint32_t num = mem->entry_num, i, j;
+	uint32_t sz = sizeof(struct rte_vfio_user_mtb_entry);
+	struct rte_vfio_user_mtb_entry ent;
+	int err = 0;
+
+	if (mem->entry_num == RTE_VUSER_MAX_DMA) {
+		VFIO_USER_LOG(ERR, "Add mem region failed, reach max!\n");
+		return -EBUSY;
+	}
+
+	for (i = 0; i < num; i++) {
+		entry = &mem->entry[i];
+
+		if (memory->gpa == entry->gpa &&
+			memory->size == entry->size)
+			return -EEXIST;
+
+		if (memory->gpa > entry->gpa &&
+			memory->gpa >= entry->gpa + entry->size)
+			continue;
+
+		if (memory->gpa < entry->gpa &&
+			memory->gpa + memory->size <= entry->gpa)
+			break;
+
+		return -EINVAL;
+	}
+
+	err = mmap_one_region(&ent, memory, fd);
+	if (err)
+		return err;
+
+	for (j = num; j > i; j--)
+		memcpy(&mem->entry[j], &mem->entry[j - 1], sz);
+	memcpy(&mem->entry[i], &ent, sz);
+	mem->entry_num++;
+
+	VFIO_USER_LOG(DEBUG, "DMA MAP(gpa: 0x%" PRIx64 ", sz: 0x%" PRIx64
+			", hva: 0x%" PRIx64 ", ma: 0x%" PRIx64
+			", msz: 0x%" PRIx64 ", fd: %d)\n", ent.gpa,
+			ent.size, ent.host_user_addr, (uint64_t)ent.mmap_addr,
+			ent.mmap_size, ent.fd);
+	return 0;
+}
+
+static void
+del_one_region(struct rte_vfio_user_mem *mem,
+	struct rte_vfio_user_mem_reg *memory)
+{
+	struct rte_vfio_user_mtb_entry *entry;
+	uint32_t num = mem->entry_num, i, j;
+	uint32_t sz = sizeof(struct rte_vfio_user_mtb_entry);
+
+	if (mem->entry_num == 0) {
+		VFIO_USER_LOG(ERR, "Delete mem region failed (No region exists)!\n");
+		return;
+	}
+
+	for (i = 0; i < num; i++) {
+		entry = &mem->entry[i];
+
+		if (memory->gpa == entry->gpa &&
+			memory->size == entry->size) {
+			if (entry->mmap_addr != NULL) {
+				munmap(entry->mmap_addr, entry->mmap_size);
+				mem->entry[i].mmap_size = 0;
+				mem->entry[i].mmap_addr = NULL;
+				mem->entry[i].host_user_addr = 0;
+				mem->entry[i].fd = -1;
+			}
+
+			mem->entry[i].gpa = 0;
+			mem->entry[i].size = 0;
+
+			for (j = i; j < num - 1; j++) {
+				memcpy(&mem->entry[j], &mem->entry[j + 1],
+					sz);
+			}
+			mem->entry_num--;
+
+			VFIO_USER_LOG(DEBUG, "DMA UNMAP(gpa: 0x%" PRIx64
+				", sz: 0x%" PRIx64 ", hva: 0x%" PRIx64
+				", ma: 0x%" PRIx64", msz: 0x%" PRIx64
+				", fd: %d)\n", entry->gpa, entry->size,
+				entry->host_user_addr,
+				(uint64_t)entry->mmap_addr, entry->mmap_size,
+				entry->fd);
+
+			return;
+		}
+	}
+
+	VFIO_USER_LOG(ERR, "Failed to find the region for dma unmap!\n");
+}
+
+static int
+vfio_user_dma_map(struct vfio_user_server *dev, struct vfio_user_msg *msg)
+{
+	struct rte_vfio_user_mem_reg *memory = msg->payload.memory;
+	uint32_t region_num, expected_fd = 0;
+	uint32_t i, j, fd, fd_idx = 0;
+	int ret = 0;
+
+	if ((msg->size - VFIO_USER_MSG_HDR_SIZE) % sizeof(*memory) != 0) {
+		VFIO_USER_LOG(ERR, "Invalid msg size for dma map\n");
+		vfio_user_close_msg_fds(msg);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	region_num = (msg->size - VFIO_USER_MSG_HDR_SIZE)
+		/ sizeof(struct rte_vfio_user_mem_reg);
+
+	for (i = 0; i < region_num; i++) {
+		if (memory[i].flags & RTE_VUSER_MEM_MAPPABLE)
+			expected_fd++;
+	}
+
+	if (vfio_user_check_msg_fdnum(msg, expected_fd) != 0) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	for (i = 0; i < region_num; i++) {
+		fd = (memory[i].flags & RTE_VUSER_MEM_MAPPABLE) ?
+			msg->fds[fd_idx++] : -1;
+
+		ret = add_one_region(dev->mem, memory + i, fd);
+		if (ret < 0) {
+			VFIO_USER_LOG(ERR, "Failed to add dma map\n");
+			break;
+		}
+	}
+
+	if (i != region_num) {
+		/* Clear all mmaped region and fds */
+		for (j = 0; j < region_num; j++) {
+			if (j < i)
+				del_one_region(dev->mem, memory + j);
+			else
+				close(msg->fds[j]);
+		}
+	}
+err:
+	/* Do not reply fds back */
+	msg->fd_num = 0;
+	return ret;
+}
+
+static int
+vfio_user_dma_unmap(struct vfio_user_server *dev, struct vfio_user_msg *msg)
+{
+	struct rte_vfio_user_mem_reg *memory = msg->payload.memory;
+	uint32_t region_num = (msg->size - VFIO_USER_MSG_HDR_SIZE)
+		/ sizeof(struct rte_vfio_user_mem_reg);
+	uint32_t i;
+
+	if (vfio_user_check_msg_fdnum(msg, 0) != 0)
+		return -EINVAL;
+
+	if ((msg->size - VFIO_USER_MSG_HDR_SIZE) % sizeof(*memory) != 0) {
+		VFIO_USER_LOG(ERR, "Invalid msg size for dma unmap\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < region_num; i++)
+		del_one_region(dev->mem, memory);
+
+	return 0;
+}
+
 static int
 vfio_user_device_get_info(struct vfio_user_server *dev,
 	struct vfio_user_msg *msg)
@@ -178,11 +390,65 @@  vfio_user_region_write(struct vfio_user_server *dev,
 	return 0;
 }
 
+static inline void
+vfio_user_destroy_mem_entries(struct rte_vfio_user_mem *mem)
+{
+	struct rte_vfio_user_mtb_entry *ent;
+	uint32_t i;
+
+	for (i = 0; i < mem->entry_num; i++) {
+		ent = &mem->entry[i];
+		if (ent->host_user_addr) {
+			munmap(ent->mmap_addr, ent->mmap_size);
+			close(ent->fd);
+		}
+	}
+
+	memset(mem, 0, sizeof(*mem));
+}
+
+static inline void
+vfio_user_destroy_mem(struct vfio_user_server *dev)
+{
+	struct rte_vfio_user_mem *mem = dev->mem;
+
+	if (!mem)
+		return;
+
+	vfio_user_destroy_mem_entries(mem);
+
+	free(mem);
+	dev->mem = NULL;
+}
+
+static int
+vfio_user_device_reset(struct vfio_user_server *dev,
+	struct vfio_user_msg *msg)
+{
+	struct vfio_device_info *dev_info;
+
+	if (vfio_user_check_msg_fdnum(msg, 0) != 0)
+		return -EINVAL;
+
+	dev_info = dev->dev_info;
+
+	if (!(dev_info->flags & VFIO_DEVICE_FLAGS_RESET))
+		return -ENOTSUP;
+
+	vfio_user_destroy_mem_entries(dev->mem);
+	dev->is_ready = 0;
+
+	if (dev->ops->reset_device)
+		dev->ops->reset_device(dev->dev_id);
+
+	return 0;
+}
+
 static vfio_user_msg_handler_t vfio_user_msg_handlers[VFIO_USER_MAX] = {
 	[VFIO_USER_NONE] = NULL,
 	[VFIO_USER_VERSION] = vfio_user_negotiate_version,
-	[VFIO_USER_DMA_MAP] = NULL,
-	[VFIO_USER_DMA_UNMAP] = NULL,
+	[VFIO_USER_DMA_MAP] = vfio_user_dma_map,
+	[VFIO_USER_DMA_UNMAP] = vfio_user_dma_unmap,
 	[VFIO_USER_DEVICE_GET_INFO] = vfio_user_device_get_info,
 	[VFIO_USER_DEVICE_GET_REGION_INFO] = vfio_user_device_get_reg_info,
 	[VFIO_USER_DEVICE_GET_IRQ_INFO] = NULL,
@@ -192,7 +458,7 @@  static vfio_user_msg_handler_t vfio_user_msg_handlers[VFIO_USER_MAX] = {
 	[VFIO_USER_DMA_READ] = NULL,
 	[VFIO_USER_DMA_WRITE] = NULL,
 	[VFIO_USER_VM_INTERRUPT] = NULL,
-	[VFIO_USER_DEVICE_RESET] = NULL,
+	[VFIO_USER_DEVICE_RESET] = vfio_user_device_reset,
 };
 
 static struct vfio_user_server_socket *
@@ -534,6 +800,21 @@  vfio_user_get_device(int dev_id)
 	return dev;
 }
 
+static inline int
+vfio_user_is_ready(struct vfio_user_server *dev)
+{
+	/* vfio-user currently has no definition of when the device is ready.
+	 * For now, we define it as when the device has at least one dma
+	 * memory table entry.
+	 */
+	if (dev->mem->entry_num > 0) {
+		dev->is_ready = 1;
+		return 1;
+	}
+
+	return 0;
+}
+
 static int
 vfio_user_message_handler(int dev_id, int fd)
 {
@@ -541,6 +822,7 @@  vfio_user_message_handler(int dev_id, int fd)
 	struct vfio_user_msg msg;
 	uint32_t cmd;
 	int ret = 0;
+	int dev_locked = 0;
 
 	dev = vfio_user_get_device(dev_id);
 	if (!dev)
@@ -569,6 +851,17 @@  vfio_user_message_handler(int dev_id, int fd)
 		return -1;
 	}
 
+	/*
+	 * Below messages should lock the data path upon receiving
+	 * to avoid errors in data path handling
+	 */
+	if ((cmd == VFIO_USER_DMA_MAP || cmd == VFIO_USER_DMA_UNMAP ||
+		cmd == VFIO_USER_DEVICE_RESET)
+		&& dev->ops->lock_dp) {
+		dev->ops->lock_dp(dev_id, 1);
+		dev_locked = 1;
+	}
+
 	if (vfio_user_msg_handlers[cmd])
 		ret = vfio_user_msg_handlers[cmd](dev, &msg);
 	else {
@@ -601,7 +894,18 @@  vfio_user_message_handler(int dev_id, int fd)
 		}
 	}
 
+	if (!dev->is_ready) {
+		if (vfio_user_is_ready(dev) && dev->ops->new_device)
+			dev->ops->new_device(dev_id);
+	} else {
+		if ((cmd == VFIO_USER_DMA_MAP || cmd == VFIO_USER_DMA_UNMAP)
+			&& dev->ops->update_status)
+			dev->ops->update_status(dev_id);
+	}
+
 handle_end:
+	if (dev_locked)
+		dev->ops->lock_dp(dev_id, 0);
 	return ret;
 }
 
@@ -619,8 +923,12 @@  vfio_user_sock_read(int fd, void *data)
 		close(fd);
 		sk->conn_fd = -1;
 		dev = vfio_user_get_device(dev_id);
-		if (dev)
+		if (dev) {
+			dev->ops->destroy_device(dev_id);
+			vfio_user_destroy_mem_entries(dev->mem);
+			dev->is_ready = 0;
 			dev->msg_id = 0;
+		}
 	}
 
 	return ret;
@@ -752,13 +1060,14 @@  vfio_user_start_server(struct vfio_user_server_socket *sk)
 }
 
 int
-rte_vfio_user_register(const char *sock_addr)
+rte_vfio_user_register(const char *sock_addr,
+	const struct rte_vfio_user_notify_ops *ops)
 {
 	struct vfio_user_server_socket *sk;
 	struct vfio_user_server *dev;
 	int dev_id;
 
-	if (!sock_addr)
+	if (!sock_addr || !ops)
 		return -1;
 
 	sk = vfio_user_create_sock(sock_addr);
@@ -776,11 +1085,22 @@  rte_vfio_user_register(const char *sock_addr)
 
 	dev = vfio_user_get_device(dev_id);
 
+	dev->mem = malloc(sizeof(struct rte_vfio_user_mem));
+	if (!dev->mem) {
+		VFIO_USER_LOG(ERR, "Failed to alloc vfio_user_mem\n");
+		goto err_mem;
+	}
+	memset(dev->mem, 0, sizeof(struct rte_vfio_user_mem));
+
 	dev->ver.major = VFIO_USER_VERSION_MAJOR;
 	dev->ver.minor = VFIO_USER_VERSION_MINOR;
+	dev->ops = ops;
+	dev->is_ready = 0;
 
 	return 0;
 
+err_mem:
+	vfio_user_del_device(dev);
 err_add_dev:
 	vfio_user_delete_sock(sk);
 exit:
@@ -818,7 +1138,7 @@  rte_vfio_user_unregister(const char *sock_addr)
 			"device not found.\n");
 		return -1;
 	}
-
+	vfio_user_destroy_mem(dev);
 	vfio_user_del_device(dev);
 
 	return 0;
@@ -940,3 +1260,44 @@  rte_vfio_user_set_reg_info(const char *sock_addr,
 
 	return 0;
 }
+
+int
+rte_vfio_get_sock_addr(int dev_id, char *buf, size_t len)
+{
+	struct vfio_user_server *dev;
+
+	dev = vfio_user_get_device(dev_id);
+	if (!dev) {
+		VFIO_USER_LOG(ERR, "Failed to get sock address:"
+			"device %d not found.\n", dev_id);
+		return -1;
+	}
+
+	len = len > sizeof(dev->sock_addr) ?
+		sizeof(dev->sock_addr) : len;
+	strncpy(buf, dev->sock_addr, len);
+	buf[len - 1] = '\0';
+
+	return 0;
+}
+
+const struct rte_vfio_user_mem *
+rte_vfio_user_get_mem_table(int dev_id)
+{
+	struct vfio_user_server *dev;
+
+	dev = vfio_user_get_device(dev_id);
+	if (!dev) {
+		VFIO_USER_LOG(ERR, "Failed to get memory table:"
+			"device %d not found.\n", dev_id);
+		return NULL;
+	}
+
+	if (!dev->mem) {
+		VFIO_USER_LOG(ERR, "Failed to get memory table for device %d:"
+			"memory table not allocated.\n", dev_id);
+		return NULL;
+	}
+
+	return dev->mem;
+}
diff --git a/lib/librte_vfio_user/vfio_user_server.h b/lib/librte_vfio_user/vfio_user_server.h
index 4e7337113c..0b20ab4e3a 100644
--- a/lib/librte_vfio_user/vfio_user_server.h
+++ b/lib/librte_vfio_user/vfio_user_server.h
@@ -11,11 +11,14 @@ 
 
 struct vfio_user_server {
 	int dev_id;
+	int is_ready;
 	int started;
 	int conn_fd;
 	uint32_t msg_id;
 	char sock_addr[PATH_MAX];
+	const struct rte_vfio_user_notify_ops *ops;
 	struct vfio_user_version ver;
+	struct rte_vfio_user_mem *mem;
 	struct vfio_device_info *dev_info;
 	struct rte_vfio_user_regions *reg;
 };