[v11,5/9] vhost: checkout the resubmit inflight information

Message ID 20191009204837.65039-6-jin.yu@intel.com
State Accepted
Delegated to: Maxime Coquelin
Headers show
Series
  • vhost: support inflight share memory protocol feature
Related show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Jin Yu Oct. 9, 2019, 8:48 p.m.
This patch shows how to checkout the inflight ring and construct
the resubmit information also include destroying resubmit info.

Signed-off-by: Lin Li <lilin24@baidu.com>
Signed-off-by: Xun Ni <nixun@baidu.com>
Signed-off-by: Yu Zhang <zhangyu31@baidu.com>
Signed-off-by: Jin Yu <jin.yu@intel.com>
---
 lib/librte_vhost/rte_vhost.h  |  19 +++
 lib/librte_vhost/vhost.c      |  29 ++++-
 lib/librte_vhost/vhost.h      |   9 ++
 lib/librte_vhost/vhost_user.c | 217 +++++++++++++++++++++++++++++++++-
 4 files changed, 271 insertions(+), 3 deletions(-)

Comments

Maxime Coquelin Oct. 11, 2019, 10:09 a.m. | #1
On 10/9/19 10:48 PM, Jin Yu wrote:
> This patch shows how to checkout the inflight ring and construct
> the resubmit information also include destroying resubmit info.
> 
> Signed-off-by: Lin Li <lilin24@baidu.com>
> Signed-off-by: Xun Ni <nixun@baidu.com>
> Signed-off-by: Yu Zhang <zhangyu31@baidu.com>
> Signed-off-by: Jin Yu <jin.yu@intel.com>
> ---
>  lib/librte_vhost/rte_vhost.h  |  19 +++
>  lib/librte_vhost/vhost.c      |  29 ++++-
>  lib/librte_vhost/vhost.h      |   9 ++
>  lib/librte_vhost/vhost_user.c | 217 +++++++++++++++++++++++++++++++++-
>  4 files changed, 271 insertions(+), 3 deletions(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Patch

diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 5241e36e8..95e4d720e 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -145,6 +145,25 @@  struct rte_vhost_inflight_info_packed {
 	struct rte_vhost_inflight_desc_packed desc[0];
 };
 
+struct rte_vhost_resubmit_desc {
+	uint16_t index;
+	uint64_t counter;
+};
+
+struct rte_vhost_resubmit_info {
+	struct rte_vhost_resubmit_desc *resubmit_list;
+	uint16_t resubmit_num;
+};
+
+struct rte_vhost_ring_inflight {
+	union {
+		struct rte_vhost_inflight_info_split *inflight_split;
+		struct rte_vhost_inflight_info_packed *inflight_packed;
+	};
+
+	struct rte_vhost_resubmit_info *resubmit_inflight;
+};
+
 struct rte_vhost_vring {
 	union {
 		struct vring_desc *desc;
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 981837b5d..f8660edbf 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -242,6 +242,31 @@  cleanup_vq(struct vhost_virtqueue *vq, int destroy)
 		close(vq->kickfd);
 }
 
+void
+cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	if (!(dev->protocol_features &
+	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
+		return;
+
+	if (vq_is_packed(dev)) {
+		if (vq->inflight_packed)
+			vq->inflight_packed = NULL;
+	} else {
+		if (vq->inflight_split)
+			vq->inflight_split = NULL;
+	}
+
+	if (vq->resubmit_inflight) {
+		if (vq->resubmit_inflight->resubmit_list) {
+			free(vq->resubmit_inflight->resubmit_list);
+			vq->resubmit_inflight->resubmit_list = NULL;
+		}
+		free(vq->resubmit_inflight);
+		vq->resubmit_inflight = NULL;
+	}
+}
+
 /*
  * Unmap any memory, close any file descriptors and
  * free any memory owned by a device.
@@ -253,8 +278,10 @@  cleanup_device(struct virtio_net *dev, int destroy)
 
 	vhost_backend_cleanup(dev);
 
-	for (i = 0; i < dev->nr_vring; i++)
+	for (i = 0; i < dev->nr_vring; i++) {
 		cleanup_vq(dev->virtqueue[i], destroy);
+		cleanup_vq_inflight(dev, dev->virtqueue[i]);
+	}
 }
 
 void
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index d67ba849a..ab95999c4 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -128,6 +128,14 @@  struct vhost_virtqueue {
 	/* Physical address of used ring, for logging */
 	uint64_t		log_guest_addr;
 
+	/* inflight share memory info */
+	union {
+		struct rte_vhost_inflight_info_split *inflight_split;
+		struct rte_vhost_inflight_info_packed *inflight_packed;
+	};
+	struct rte_vhost_resubmit_info *resubmit_inflight;
+	uint64_t		global_counter;
+
 	uint16_t		nr_zmbuf;
 	uint16_t		zmbuf_size;
 	uint16_t		last_zmbuf_idx;
@@ -474,6 +482,7 @@  void vhost_destroy_device(int);
 void vhost_destroy_device_notify(struct virtio_net *dev);
 
 void cleanup_vq(struct vhost_virtqueue *vq, int destroy);
+void cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq);
 void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index a7bc42050..5cc16cd91 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -331,6 +331,7 @@  vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg,
 
 			dev->virtqueue[dev->nr_vring] = NULL;
 			cleanup_vq(vq, 1);
+			cleanup_vq_inflight(dev, vq);
 			free_vq(dev, vq);
 		}
 	}
@@ -1338,10 +1339,11 @@  vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
 {
 	uint64_t mmap_size, mmap_offset;
 	uint16_t num_queues, queue_size;
-	uint32_t pervq_inflight_size;
 	struct virtio_net *dev = *pdev;
+	uint32_t pervq_inflight_size;
+	struct vhost_virtqueue *vq;
 	void *addr;
-	int fd;
+	int fd, i;
 
 	fd = msg->fds[0];
 	if (msg->size != sizeof(msg->payload.inflight) || fd < 0) {
@@ -1402,6 +1404,18 @@  vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
 	dev->inflight_info->addr = addr;
 	dev->inflight_info->size = mmap_size;
 
+	for (i = 0; i < num_queues; i++) {
+		vq = dev->virtqueue[i];
+		if (vq_is_packed(dev)) {
+			vq->inflight_packed = addr;
+			vq->inflight_packed->desc_num = queue_size;
+		} else {
+			vq->inflight_split = addr;
+			vq->inflight_split->desc_num = queue_size;
+		}
+		addr = (void *)((char *)addr + pervq_inflight_size);
+	}
+
 	return RTE_VHOST_MSG_RESULT_OK;
 }
 
@@ -1441,6 +1455,191 @@  static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused,
 	return RTE_VHOST_MSG_RESULT_OK;
 }
 
+static int
+resubmit_desc_compare(const void *a, const void *b)
+{
+	const struct rte_vhost_resubmit_desc *desc0 = a;
+	const struct rte_vhost_resubmit_desc *desc1 = b;
+
+	if (desc1->counter > desc0->counter)
+		return 1;
+
+	return -1;
+}
+
+static int
+vhost_check_queue_inflights_split(struct virtio_net *dev,
+				  struct vhost_virtqueue *vq)
+{
+	uint16_t i;
+	uint16_t resubmit_num = 0, last_io, num;
+	struct vring_used *used = vq->used;
+	struct rte_vhost_resubmit_info *resubmit;
+	struct rte_vhost_inflight_info_split *inflight_split;
+
+	if (!(dev->protocol_features &
+	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
+		return RTE_VHOST_MSG_RESULT_OK;
+
+	if ((!vq->inflight_split))
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (!vq->inflight_split->version) {
+		vq->inflight_split->version = INFLIGHT_VERSION;
+		return RTE_VHOST_MSG_RESULT_OK;
+	}
+
+	if (vq->resubmit_inflight)
+		return RTE_VHOST_MSG_RESULT_OK;
+
+	inflight_split = vq->inflight_split;
+	vq->global_counter = 0;
+	last_io = inflight_split->last_inflight_io;
+
+	if (inflight_split->used_idx != used->idx) {
+		inflight_split->desc[last_io].inflight = 0;
+		rte_smp_mb();
+		inflight_split->used_idx = used->idx;
+	}
+
+	for (i = 0; i < inflight_split->desc_num; i++) {
+		if (inflight_split->desc[i].inflight == 1)
+			resubmit_num++;
+	}
+
+	vq->last_avail_idx += resubmit_num;
+
+	if (resubmit_num) {
+		resubmit  = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+		if (!resubmit) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to allocate memory for resubmit info.\n");
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+
+		resubmit->resubmit_list = calloc(resubmit_num,
+			sizeof(struct rte_vhost_resubmit_desc));
+		if (!resubmit->resubmit_list) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to allocate memory for inflight desc.\n");
+			free(resubmit);
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+
+		num = 0;
+		for (i = 0; i < vq->inflight_split->desc_num; i++) {
+			if (vq->inflight_split->desc[i].inflight == 1) {
+				resubmit->resubmit_list[num].index = i;
+				resubmit->resubmit_list[num].counter =
+					inflight_split->desc[i].counter;
+				num++;
+			}
+		}
+		resubmit->resubmit_num = num;
+
+		if (resubmit->resubmit_num > 1)
+			qsort(resubmit->resubmit_list, resubmit->resubmit_num,
+			      sizeof(struct rte_vhost_resubmit_desc),
+			      resubmit_desc_compare);
+
+		vq->global_counter = resubmit->resubmit_list[0].counter + 1;
+		vq->resubmit_inflight = resubmit;
+	}
+
+	return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_check_queue_inflights_packed(struct virtio_net *dev,
+				   struct vhost_virtqueue *vq)
+{
+	uint16_t i;
+	uint16_t resubmit_num = 0, old_used_idx, num;
+	struct rte_vhost_resubmit_info *resubmit;
+	struct rte_vhost_inflight_info_packed *inflight_packed;
+
+	if (!(dev->protocol_features &
+	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
+		return RTE_VHOST_MSG_RESULT_OK;
+
+	if ((!vq->inflight_packed))
+		return RTE_VHOST_MSG_RESULT_ERR;
+
+	if (!vq->inflight_packed->version) {
+		vq->inflight_packed->version = INFLIGHT_VERSION;
+		return RTE_VHOST_MSG_RESULT_OK;
+	}
+
+	if (vq->resubmit_inflight)
+		return RTE_VHOST_MSG_RESULT_OK;
+
+	inflight_packed = vq->inflight_packed;
+	vq->global_counter = 0;
+	old_used_idx = inflight_packed->old_used_idx;
+
+	if (inflight_packed->used_idx != old_used_idx) {
+		if (inflight_packed->desc[old_used_idx].inflight == 0) {
+			inflight_packed->old_used_idx =
+				inflight_packed->used_idx;
+			inflight_packed->old_used_wrap_counter =
+				inflight_packed->used_wrap_counter;
+			inflight_packed->old_free_head =
+				inflight_packed->free_head;
+		} else {
+			inflight_packed->used_idx =
+				inflight_packed->old_used_idx;
+			inflight_packed->used_wrap_counter =
+				inflight_packed->old_used_wrap_counter;
+			inflight_packed->free_head =
+				inflight_packed->old_free_head;
+		}
+	}
+
+	for (i = 0; i < inflight_packed->desc_num; i++) {
+		if (inflight_packed->desc[i].inflight == 1)
+			resubmit_num++;
+	}
+
+	if (resubmit_num) {
+		resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+		if (resubmit == NULL) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to allocate memory for resubmit info.\n");
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+
+		resubmit->resubmit_list = calloc(resubmit_num,
+			sizeof(struct rte_vhost_resubmit_desc));
+		if (resubmit->resubmit_list == NULL) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to allocate memory for resubmit desc.\n");
+			free(resubmit);
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+
+		num = 0;
+		for (i = 0; i < inflight_packed->desc_num; i++) {
+			if (vq->inflight_packed->desc[i].inflight == 1) {
+				resubmit->resubmit_list[num].index = i;
+				resubmit->resubmit_list[num].counter =
+					inflight_packed->desc[i].counter;
+				num++;
+			}
+		}
+		resubmit->resubmit_num = num;
+
+		if (resubmit->resubmit_num > 1)
+			qsort(resubmit->resubmit_list, resubmit->resubmit_num,
+			      sizeof(struct rte_vhost_resubmit_desc),
+			      resubmit_desc_compare);
+
+		vq->global_counter = resubmit->resubmit_list[0].counter + 1;
+		vq->resubmit_inflight = resubmit;
+	}
+
+	return RTE_VHOST_MSG_RESULT_OK;
+}
+
 static int
 vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
 			int main_fd __rte_unused)
@@ -1482,6 +1681,20 @@  vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		close(vq->kickfd);
 	vq->kickfd = file.fd;
 
+	if (vq_is_packed(dev)) {
+		if (vhost_check_queue_inflights_packed(dev, vq)) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to inflights for vq: %d\n", file.index);
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+	} else {
+		if (vhost_check_queue_inflights_split(dev, vq)) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to inflights for vq: %d\n", file.index);
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+	}
+
 	return RTE_VHOST_MSG_RESULT_OK;
 }