[dpdk-dev,v7,3/8] vhost: vring queue setup for multiple queue support

Message ID 1445399294-18826-4-git-send-email-yuanhan.liu@linux.intel.com (mailing list archive)
State Changes Requested, archived
Headers

Commit Message

Yuanhan Liu Oct. 21, 2015, 3:48 a.m. UTC
  All queue pairs, including the default (the first) queue pair,
are allocated dynamically, when a vring_call message is received
first time for a specific queue pair.

This is a refactor work for enabling vhost-user multiple queue;
it should not break anything as it does no functional changes:
we don't support mq set, so there is only one mq at max.

This patch is based on Changchun's patch.

Signed-off-by: Ouyang Changchun <changchun.ouyang@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
---
 lib/librte_vhost/rte_virtio_net.h             |   3 +-
 lib/librte_vhost/vhost_user/virtio-net-user.c |  44 ++++----
 lib/librte_vhost/virtio-net.c                 | 144 ++++++++++++++++----------
 3 files changed, 114 insertions(+), 77 deletions(-)
  

Comments

Stephen Hemminger Oct. 21, 2015, 4:45 a.m. UTC | #1
On Wed, 21 Oct 2015 11:48:09 +0800
Yuanhan Liu <yuanhan.liu@linux.intel.com> wrote:

>  struct virtio_net {
> -	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
> +	struct vhost_virtqueue	*virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];	/**< Contains all virtqueue information. */
>  	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */

Since vhost_virtqueue takes up space, why not put it at end of array,
that way offsets are smaller and all the early fields will be in
adjacent cache lines.
  
Huawei Xie Oct. 22, 2015, 9:49 a.m. UTC | #2
On 10/21/2015 11:48 AM, Yuanhan Liu wrote:
> All queue pairs, including the default (the first) queue pair,
> are allocated dynamically, when a vring_call message is received
> first time for a specific queue pair.
>
> This is a refactor work for enabling vhost-user multiple queue;
> it should not break anything as it does no functional changes:
> we don't support mq set, so there is only one mq at max.
>
> This patch is based on Changchun's patch.
>
[...]
>  
>  void
> @@ -290,13 +298,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
>  	 * sent and only sent in vhost_vring_stop.
>  	 * TODO: cleanup the vring, it isn't usable since here.
>  	 */
> -	if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
> -		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> -		dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> -	}
> -	if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
> -		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> -		dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> +	if ((dev->virtqueue[state->index]->kickfd) >= 0) {
> +		close(dev->virtqueue[state->index]->kickfd);
> +		dev->virtqueue[state->index]->kickfd = -1;
>  	}
Since we change the behavior here, better list in the commit message as
well.

>  
>  
> @@ -680,13 +704,21 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>  {
>  	struct virtio_net *dev;
>  	struct vhost_virtqueue *vq;
> +	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
>  
>  	dev = get_device(ctx);
>  	if (dev == NULL)
>  		return -1;
>  
> +	/* alloc vring queue pair if it is a new queue pair */
> +	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
> +		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
> +			return -1;
> +	}
> +
Here we rely on the fact that this set_vring_call message is sent in the
continuous ascending order of queue idx 0, 1, 2, ...

>  	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
>  	vq = dev->virtqueue[file->index];
> +	assert(vq != NULL);
>  
If we allocate the queue until the we receive the first vring message,
better add comment that we rely on this fact.
Could we add the vhost-user message to tell us the queue number QEMU
allocates before vring message?
>  	if (vq->callfd >= 0)
>  		close(vq->callfd);
  
Yuanhan Liu Oct. 22, 2015, 11:30 a.m. UTC | #3
On Thu, Oct 22, 2015 at 09:49:58AM +0000, Xie, Huawei wrote:
> On 10/21/2015 11:48 AM, Yuanhan Liu wrote:
> > All queue pairs, including the default (the first) queue pair,
> > are allocated dynamically, when a vring_call message is received
> > first time for a specific queue pair.
> >
> > This is a refactor work for enabling vhost-user multiple queue;
> > it should not break anything as it does no functional changes:
> > we don't support mq set, so there is only one mq at max.
> >
> > This patch is based on Changchun's patch.
> >
> [...]
> >  
> >  void
> > @@ -290,13 +298,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
> >  	 * sent and only sent in vhost_vring_stop.
> >  	 * TODO: cleanup the vring, it isn't usable since here.
> >  	 */
> > -	if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
> > -		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> > -		dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> > -	}
> > -	if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
> > -		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> > -		dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> > +	if ((dev->virtqueue[state->index]->kickfd) >= 0) {
> > +		close(dev->virtqueue[state->index]->kickfd);
> > +		dev->virtqueue[state->index]->kickfd = -1;
> >  	}
> Since we change the behavior here, better list in the commit message as
> well.

I checked the code again, and found I should not change that:
GET_VRING_BASE is sent per virt queue pair.

BTW, it's wrong to do this kind of stuff here, we need fix
it in future.

> 
> >  
> >  
> > @@ -680,13 +704,21 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> >  {
> >  	struct virtio_net *dev;
> >  	struct vhost_virtqueue *vq;
> > +	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
> >  
> >  	dev = get_device(ctx);
> >  	if (dev == NULL)
> >  		return -1;
> >  
> > +	/* alloc vring queue pair if it is a new queue pair */
> > +	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
> > +		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
> > +			return -1;
> > +	}
> > +
> Here we rely on the fact that this set_vring_call message is sent in the
> continuous ascending order of queue idx 0, 1, 2, ...

That's true.

> 
> >  	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
> >  	vq = dev->virtqueue[file->index];
> > +	assert(vq != NULL);
> >  
> If we allocate the queue until the we receive the first vring message,
> better add comment that we rely on this fact.

Will do that.

> Could we add the vhost-user message to tell us the queue number QEMU
> allocates before vring message?

We may need do that. But it's too late to make it in v2.2

	--yliu

> >  	if (vq->callfd >= 0)
> >  		close(vq->callfd);
>
  

Patch

diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index e3a21e5..5dd6493 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -96,7 +96,7 @@  struct vhost_virtqueue {
  * Device structure contains all configuration information relating to the device.
  */
 struct virtio_net {
-	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
+	struct vhost_virtqueue	*virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];	/**< Contains all virtqueue information. */
 	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
 	uint64_t		features;	/**< Negotiated feature set. */
 	uint64_t		protocol_features;	/**< Negotiated protocol feature set. */
@@ -104,6 +104,7 @@  struct virtio_net {
 	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
+	uint32_t		virt_qp_nb;	/**< number of queue pair we have allocated */
 	void			*priv;		/**< private context */
 } __rte_cache_aligned;
 
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 360254e..e83d279 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -206,25 +206,33 @@  err_mmap:
 }
 
 static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+	return vq && vq->desc   &&
+	       vq->kickfd != -1 &&
+	       vq->callfd != -1;
+}
+
+static int
 virtio_is_ready(struct virtio_net *dev)
 {
 	struct vhost_virtqueue *rvq, *tvq;
+	uint32_t i;
 
-	/* mq support in future.*/
-	rvq = dev->virtqueue[VIRTIO_RXQ];
-	tvq = dev->virtqueue[VIRTIO_TXQ];
-	if (rvq && tvq && rvq->desc && tvq->desc &&
-		(rvq->kickfd != -1) &&
-		(rvq->callfd != -1) &&
-		(tvq->kickfd != -1) &&
-		(tvq->callfd != -1)) {
-		RTE_LOG(INFO, VHOST_CONFIG,
-			"virtio is now ready for processing.\n");
-		return 1;
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"virtio is not ready for processing.\n");
+			return 0;
+		}
 	}
+
 	RTE_LOG(INFO, VHOST_CONFIG,
-		"virtio isn't ready for processing.\n");
-	return 0;
+		"virtio is now ready for processing.\n");
+	return 1;
 }
 
 void
@@ -290,13 +298,9 @@  user_get_vring_base(struct vhost_device_ctx ctx,
 	 * sent and only sent in vhost_vring_stop.
 	 * TODO: cleanup the vring, it isn't usable since here.
 	 */
-	if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
-		dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
-	}
-	if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
-		dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
+	if ((dev->virtqueue[state->index]->kickfd) >= 0) {
+		close(dev->virtqueue[state->index]->kickfd);
+		dev->virtqueue[state->index]->kickfd = -1;
 	}
 
 	return 0;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index deac6b9..57fb7b1 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -36,6 +36,7 @@ 
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <assert.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #ifdef RTE_LIBRTE_VHOST_NUMA
@@ -178,6 +179,15 @@  add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
 
 }
 
+static void
+cleanup_vq(struct vhost_virtqueue *vq)
+{
+	if (vq->callfd >= 0)
+		close(vq->callfd);
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+}
+
 /*
  * Unmap any memory, close any file descriptors and
  * free any memory owned by a device.
@@ -185,6 +195,8 @@  add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
 static void
 cleanup_device(struct virtio_net *dev)
 {
+	uint32_t i;
+
 	/* Unmap QEMU memory file if mapped. */
 	if (dev->mem) {
 		munmap((void *)(uintptr_t)dev->mem->mapped_address,
@@ -192,15 +204,10 @@  cleanup_device(struct virtio_net *dev)
 		free(dev->mem);
 	}
 
-	/* Close any event notifiers opened by device. */
-	if (dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
-		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
-	if (dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
-	if (dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
-		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
-	if (dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]);
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]);
+	}
 }
 
 /*
@@ -209,9 +216,11 @@  cleanup_device(struct virtio_net *dev)
 static void
 free_device(struct virtio_net_config_ll *ll_dev)
 {
-	/* Free any malloc'd memory */
-	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
-	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
+	uint32_t i;
+
+	for (i = 0; i < ll_dev->dev.virt_qp_nb; i++)
+		rte_free(ll_dev->dev.virtqueue[i * VIRTIO_QNUM]);
+
 	rte_free(ll_dev);
 }
 
@@ -244,6 +253,50 @@  rm_config_ll_entry(struct virtio_net_config_ll *ll_dev,
 	}
 }
 
+static void
+init_vring_queue(struct vhost_virtqueue *vq)
+{
+	memset(vq, 0, sizeof(struct vhost_virtqueue));
+
+	vq->kickfd = -1;
+	vq->callfd = -1;
+
+	/* Backends are set to -1 indicating an inactive device. */
+	vq->backend = -1;
+}
+
+static void
+init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_RXQ]);
+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_TXQ]);
+}
+
+static int
+alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	struct vhost_virtqueue *virtqueue = NULL;
+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+
+	virtqueue = rte_malloc(NULL,
+			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
+	if (virtqueue == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
+		return -1;
+	}
+
+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
+
+	init_vring_queue_pair(dev, qp_idx);
+
+	dev->virt_qp_nb += 1;
+
+	return 0;
+}
+
 /*
  *  Initialise all variables in device structure.
  */
@@ -251,6 +304,7 @@  static void
 init_device(struct virtio_net *dev)
 {
 	uint64_t vq_offset;
+	uint32_t i;
 
 	/*
 	 * Virtqueues have already been malloced so
@@ -261,17 +315,9 @@  init_device(struct virtio_net *dev)
 	/* Set everything to 0. */
 	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
 		(sizeof(struct virtio_net) - (size_t)vq_offset));
-	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
-	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
-
-	dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
-	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
-	dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
-	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
 
-	/* Backends are set to -1 indicating an inactive device. */
-	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
-	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
+	for (i = 0; i < dev->virt_qp_nb; i++)
+		init_vring_queue_pair(dev, i);
 }
 
 /*
@@ -283,7 +329,6 @@  static int
 new_device(struct vhost_device_ctx ctx)
 {
 	struct virtio_net_config_ll *new_ll_dev;
-	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
 
 	/* Setup device and virtqueues. */
 	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
@@ -294,28 +339,6 @@  new_device(struct vhost_device_ctx ctx)
 		return -1;
 	}
 
-	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
-	if (virtqueue_rx == NULL) {
-		rte_free(new_ll_dev);
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for rxq.\n",
-			ctx.fh);
-		return -1;
-	}
-
-	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
-	if (virtqueue_tx == NULL) {
-		rte_free(virtqueue_rx);
-		rte_free(new_ll_dev);
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for txq.\n",
-			ctx.fh);
-		return -1;
-	}
-
-	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
-	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
-
 	/* Initialise device and virtqueues. */
 	init_device(&new_ll_dev->dev);
 
@@ -437,6 +460,8 @@  static int
 set_features(struct vhost_device_ctx ctx, uint64_t *pu)
 {
 	struct virtio_net *dev;
+	uint16_t vhost_hlen;
+	uint16_t i;
 
 	dev = get_device(ctx);
 	if (dev == NULL)
@@ -444,27 +469,26 @@  set_features(struct vhost_device_ctx ctx, uint64_t *pu)
 	if (*pu & ~VHOST_FEATURES)
 		return -1;
 
-	/* Store the negotiated feature list for the device. */
 	dev->features = *pu;
-
-	/* Set the vhost_hlen depending on if VIRTIO_NET_F_MRG_RXBUF is set. */
 	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
 		LOG_DEBUG(VHOST_CONFIG,
 			"(%"PRIu64") Mergeable RX buffers enabled\n",
 			dev->device_fh);
-		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
-			sizeof(struct virtio_net_hdr_mrg_rxbuf);
-		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
-			sizeof(struct virtio_net_hdr_mrg_rxbuf);
+		vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	} else {
 		LOG_DEBUG(VHOST_CONFIG,
 			"(%"PRIu64") Mergeable RX buffers disabled\n",
 			dev->device_fh);
-		dev->virtqueue[VIRTIO_RXQ]->vhost_hlen =
-			sizeof(struct virtio_net_hdr);
-		dev->virtqueue[VIRTIO_TXQ]->vhost_hlen =
-			sizeof(struct virtio_net_hdr);
+		vhost_hlen = sizeof(struct virtio_net_hdr);
+	}
+
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		uint16_t base_idx = i * VIRTIO_QNUM;
+
+		dev->virtqueue[base_idx + VIRTIO_RXQ]->vhost_hlen = vhost_hlen;
+		dev->virtqueue[base_idx + VIRTIO_TXQ]->vhost_hlen = vhost_hlen;
 	}
+
 	return 0;
 }
 
@@ -680,13 +704,21 @@  set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 {
 	struct virtio_net *dev;
 	struct vhost_virtqueue *vq;
+	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
 
 	dev = get_device(ctx);
 	if (dev == NULL)
 		return -1;
 
+	/* alloc vring queue pair if it is a new queue pair */
+	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
+		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
+			return -1;
+	}
+
 	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
 	vq = dev->virtqueue[file->index];
+	assert(vq != NULL);
 
 	if (vq->callfd >= 0)
 		close(vq->callfd);