[dpdk-dev,v5,resend,03/12] vhost: vring queue setup for multiple queue support

Message ID 1442589061-19225-4-git-send-email-yuanhan.liu@linux.intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Yuanhan Liu Sept. 18, 2015, 3:10 p.m. UTC
  All queue pairs, including the default (the first) queue pair,
are allocated dynamically, when a vring_call message is received
first time for a specific queue pair.

This is a refactor work for enabling vhost-user multiple queue;
it should not break anything as it does no functional changes:
we don't support mq set, so there is only one mq at max.

This patch is based on Changchun's patch.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
 lib/librte_vhost/rte_virtio_net.h             |   3 +-
 lib/librte_vhost/vhost_user/virtio-net-user.c |  44 +++++-----
 lib/librte_vhost/virtio-net.c                 | 121 ++++++++++++++++----------
 3 files changed, 102 insertions(+), 66 deletions(-)
  

Comments

Marcel Apfelbaum Sept. 20, 2015, 1:58 p.m. UTC | #1
On 09/18/2015 06:10 PM, Yuanhan Liu wrote:
> All queue pairs, including the default (the first) queue pair,
> are allocated dynamically, when a vring_call message is received
> first time for a specific queue pair.
>
> This is a refactor work for enabling vhost-user multiple queue;
> it should not break anything as it does no functional changes:
> we don't support mq set, so there is only one mq at max.
>
> This patch is based on Changchun's patch.
>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
>   lib/librte_vhost/rte_virtio_net.h             |   3 +-
>   lib/librte_vhost/vhost_user/virtio-net-user.c |  44 +++++-----
>   lib/librte_vhost/virtio-net.c                 | 121 ++++++++++++++++----------
>   3 files changed, 102 insertions(+), 66 deletions(-)
>
> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> index e3a21e5..5dd6493 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -96,7 +96,7 @@ struct vhost_virtqueue {
>    * Device structure contains all configuration information relating to the device.
>    */
>   struct virtio_net {
> -	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
> +	struct vhost_virtqueue	*virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];	/**< Contains all virtqueue information. */
>   	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
>   	uint64_t		features;	/**< Negotiated feature set. */
>   	uint64_t		protocol_features;	/**< Negotiated protocol feature set. */
> @@ -104,6 +104,7 @@ struct virtio_net {
>   	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
>   #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
>   	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
> +	uint32_t		virt_qp_nb;	/**< number of queue pair we have allocated */
>   	void			*priv;		/**< private context */
>   } __rte_cache_aligned;
>
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> index 360254e..e83d279 100644
> --- a/lib/librte_vhost/vhost_user/virtio-net-user.c
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> @@ -206,25 +206,33 @@ err_mmap:
>   }
>

Hi,

>   static int
> +vq_is_ready(struct vhost_virtqueue *vq)
> +{
> +	return vq && vq->desc   &&
> +	       vq->kickfd != -1 &&
> +	       vq->callfd != -1;

  kickfd and callfd are unsigned

> +}
> +
> +static int
>   virtio_is_ready(struct virtio_net *dev)
>   {
>   	struct vhost_virtqueue *rvq, *tvq;
> +	uint32_t i;
>
> -	/* mq support in future.*/
> -	rvq = dev->virtqueue[VIRTIO_RXQ];
> -	tvq = dev->virtqueue[VIRTIO_TXQ];
> -	if (rvq && tvq && rvq->desc && tvq->desc &&
> -		(rvq->kickfd != -1) &&
> -		(rvq->callfd != -1) &&
> -		(tvq->kickfd != -1) &&
> -		(tvq->callfd != -1)) {
> -		RTE_LOG(INFO, VHOST_CONFIG,
> -			"virtio is now ready for processing.\n");
> -		return 1;
> +	for (i = 0; i < dev->virt_qp_nb; i++) {
> +		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
> +		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
> +
> +		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
> +			RTE_LOG(INFO, VHOST_CONFIG,
> +				"virtio is not ready for processing.\n");
> +			return 0;
> +		}
>   	}
> +
>   	RTE_LOG(INFO, VHOST_CONFIG,
> -		"virtio isn't ready for processing.\n");
> -	return 0;
> +		"virtio is now ready for processing.\n");
> +	return 1;
>   }
>
>   void
> @@ -290,13 +298,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
>   	 * sent and only sent in vhost_vring_stop.
>   	 * TODO: cleanup the vring, it isn't usable since here.
>   	 */
> -	if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
> -		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> -		dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> -	}
> -	if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
> -		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> -		dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> +	if ((dev->virtqueue[state->index]->kickfd) >= 0) {

always >= 0

> +		close(dev->virtqueue[state->index]->kickfd);
> +		dev->virtqueue[state->index]->kickfd = -1;

again unsigned

>   	}
>
>   	return 0;
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index deac6b9..643a92e 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -36,6 +36,7 @@
>   #include <stddef.h>
>   #include <stdint.h>
>   #include <stdlib.h>
> +#include <assert.h>
>   #include <sys/mman.h>
>   #include <unistd.h>
>   #ifdef RTE_LIBRTE_VHOST_NUMA
> @@ -178,6 +179,15 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
>
>   }
>
> +static void
> +cleanup_vq(struct vhost_virtqueue *vq)
> +{
> +	if (vq->callfd >= 0)
> +		close(vq->callfd);
> +	if (vq->kickfd >= 0)
> +		close(vq->kickfd);

both always >=0

> +}
> +
>   /*
>    * Unmap any memory, close any file descriptors and
>    * free any memory owned by a device.
> @@ -185,6 +195,8 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
>   static void
>   cleanup_device(struct virtio_net *dev)
>   {
> +	uint32_t i;
> +
>   	/* Unmap QEMU memory file if mapped. */
>   	if (dev->mem) {
>   		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> @@ -192,15 +204,10 @@ cleanup_device(struct virtio_net *dev)
>   		free(dev->mem);
>   	}
>
> -	/* Close any event notifiers opened by device. */
> -	if (dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
> -		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> -	if (dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
> -		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> -	if (dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
> -		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> -	if (dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
> -		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> +	for (i = 0; i < dev->virt_qp_nb; i++) {
> +		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]);
> +		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]);
> +	}
>   }
>
>   /*
> @@ -209,9 +216,11 @@ cleanup_device(struct virtio_net *dev)
>   static void
>   free_device(struct virtio_net_config_ll *ll_dev)
>   {
> -	/* Free any malloc'd memory */
> -	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
> -	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
> +	uint32_t i;
> +
> +	for (i = 0; i < ll_dev->dev.virt_qp_nb; i++)
> +		rte_free(ll_dev->dev.virtqueue[i * VIRTIO_QNUM]);
> +
>   	rte_free(ll_dev);
>   }
>
> @@ -244,6 +253,50 @@ rm_config_ll_entry(struct virtio_net_config_ll *ll_dev,
>   	}
>   }
>
> +static void
> +init_vring_queue(struct vhost_virtqueue *vq)
> +{
> +	memset(vq, 0, sizeof(struct vhost_virtqueue));
> +
> +	vq->kickfd = -1;
> +	vq->callfd = -1;

same here

> +
> +	/* Backends are set to -1 indicating an inactive device. */
> +	vq->backend = -1;
> +}
> +
> +static void
> +init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
> +{
> +	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_RXQ]);
> +	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_TXQ]);
> +}
> +
> +static int
> +alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
> +{
> +	struct vhost_virtqueue *virtqueue = NULL;
> +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> +
> +	virtqueue = rte_malloc(NULL,
> +			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
> +	if (virtqueue == NULL) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
> +		return -1;
> +	}
> +
> +	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> +	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> +
> +	init_vring_queue_pair(dev, qp_idx);
> +
> +	dev->virt_qp_nb += 1;
> +
> +	return 0;
> +}
> +
>   /*
>    *  Initialise all variables in device structure.
>    */
> @@ -251,6 +304,7 @@ static void
>   init_device(struct virtio_net *dev)
>   {
>   	uint64_t vq_offset;
> +	uint32_t i;
>
>   	/*
>   	 * Virtqueues have already been malloced so
> @@ -261,17 +315,9 @@ init_device(struct virtio_net *dev)
>   	/* Set everything to 0. */
>   	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
>   		(sizeof(struct virtio_net) - (size_t)vq_offset));
> -	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> -	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
>
> -	dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> -	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
> -	dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> -	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
> -
> -	/* Backends are set to -1 indicating an inactive device. */
> -	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> -	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> +	for (i = 0; i < dev->virt_qp_nb; i++)
> +		init_vring_queue_pair(dev, i);
>   }
>
>   /*
> @@ -283,7 +329,6 @@ static int
>   new_device(struct vhost_device_ctx ctx)
>   {
>   	struct virtio_net_config_ll *new_ll_dev;
> -	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
>
>   	/* Setup device and virtqueues. */
>   	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
> @@ -294,28 +339,6 @@ new_device(struct vhost_device_ctx ctx)
>   		return -1;
>   	}
>
> -	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> -	if (virtqueue_rx == NULL) {
> -		rte_free(new_ll_dev);
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> -			ctx.fh);
> -		return -1;
> -	}
> -
> -	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> -	if (virtqueue_tx == NULL) {
> -		rte_free(virtqueue_rx);
> -		rte_free(new_ll_dev);
> -		RTE_LOG(ERR, VHOST_CONFIG,
> -			"(%"PRIu64") Failed to allocate memory for txq.\n",
> -			ctx.fh);
> -		return -1;
> -	}
> -
> -	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> -	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> -
>   	/* Initialise device and virtqueues. */
>   	init_device(&new_ll_dev->dev);
>
> @@ -680,13 +703,21 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>   {
>   	struct virtio_net *dev;
>   	struct vhost_virtqueue *vq;
> +	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
>
>   	dev = get_device(ctx);
>   	if (dev == NULL)
>   		return -1;
>
> +	/* alloc vring queue pair if it is a new queue pair */
> +	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
> +		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
> +			return -1;
> +	}
> +
>   	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
>   	vq = dev->virtqueue[file->index];
> +	assert(vq != NULL);
>
>   	if (vq->callfd >= 0)
>   		close(vq->callfd);
>


I hope I helped,
Thanks,
Marcel
  
Yuanhan Liu Sept. 21, 2015, 2:06 a.m. UTC | #2
On Sun, Sep 20, 2015 at 04:58:42PM +0300, Marcel Apfelbaum wrote:
> On 09/18/2015 06:10 PM, Yuanhan Liu wrote:
> >All queue pairs, including the default (the first) queue pair,
> >are allocated dynamically, when a vring_call message is received
> >first time for a specific queue pair.
> >
> >This is a refactor work for enabling vhost-user multiple queue;
> >it should not break anything as it does no functional changes:
> >we don't support mq set, so there is only one mq at max.
> >
> >This patch is based on Changchun's patch.
> >
> >Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> >---
> >  lib/librte_vhost/rte_virtio_net.h             |   3 +-
> >  lib/librte_vhost/vhost_user/virtio-net-user.c |  44 +++++-----
> >  lib/librte_vhost/virtio-net.c                 | 121 ++++++++++++++++----------
> >  3 files changed, 102 insertions(+), 66 deletions(-)
> >
> >diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> >index e3a21e5..5dd6493 100644
> >--- a/lib/librte_vhost/rte_virtio_net.h
> >+++ b/lib/librte_vhost/rte_virtio_net.h
> >@@ -96,7 +96,7 @@ struct vhost_virtqueue {
> >   * Device structure contains all configuration information relating to the device.
> >   */
> >  struct virtio_net {
> >-	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
> >+	struct vhost_virtqueue	*virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];	/**< Contains all virtqueue information. */
> >  	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
> >  	uint64_t		features;	/**< Negotiated feature set. */
> >  	uint64_t		protocol_features;	/**< Negotiated protocol feature set. */
> >@@ -104,6 +104,7 @@ struct virtio_net {
> >  	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
> >  #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
> >  	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
> >+	uint32_t		virt_qp_nb;	/**< number of queue pair we have allocated */
> >  	void			*priv;		/**< private context */
> >  } __rte_cache_aligned;
> >
> >diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >index 360254e..e83d279 100644
> >--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
> >+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >@@ -206,25 +206,33 @@ err_mmap:
> >  }
> >
> 
> Hi,
> 
> >  static int
> >+vq_is_ready(struct vhost_virtqueue *vq)
> >+{
> >+	return vq && vq->desc   &&
> >+	       vq->kickfd != -1 &&
> >+	       vq->callfd != -1;
> 
>  kickfd and callfd are unsigned

Hi,

I have made 4 cleanup patches few weeks before, including the patch
to define kickfd and callfd as int type, and they have already got
the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
they will be merged, hence I made this patchset based on them.

This will also answer the question from your another email: can't
apply.

Sorry for not pointing it out, as I assume Thomas(cc'ed) will apply
them soon. And thanks for the review, anyway.

	--yliu
> 
> >+}
> >+
> >+static int
> >  virtio_is_ready(struct virtio_net *dev)
> >  {
> >  	struct vhost_virtqueue *rvq, *tvq;
> >+	uint32_t i;
> >
> >-	/* mq support in future.*/
> >-	rvq = dev->virtqueue[VIRTIO_RXQ];
> >-	tvq = dev->virtqueue[VIRTIO_TXQ];
> >-	if (rvq && tvq && rvq->desc && tvq->desc &&
> >-		(rvq->kickfd != -1) &&
> >-		(rvq->callfd != -1) &&
> >-		(tvq->kickfd != -1) &&
> >-		(tvq->callfd != -1)) {
> >-		RTE_LOG(INFO, VHOST_CONFIG,
> >-			"virtio is now ready for processing.\n");
> >-		return 1;
> >+	for (i = 0; i < dev->virt_qp_nb; i++) {
> >+		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
> >+		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
> >+
> >+		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
> >+			RTE_LOG(INFO, VHOST_CONFIG,
> >+				"virtio is not ready for processing.\n");
> >+			return 0;
> >+		}
> >  	}
> >+
> >  	RTE_LOG(INFO, VHOST_CONFIG,
> >-		"virtio isn't ready for processing.\n");
> >-	return 0;
> >+		"virtio is now ready for processing.\n");
> >+	return 1;
> >  }
> >
> >  void
> >@@ -290,13 +298,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
> >  	 * sent and only sent in vhost_vring_stop.
> >  	 * TODO: cleanup the vring, it isn't usable since here.
> >  	 */
> >-	if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
> >-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> >-		dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> >-	}
> >-	if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
> >-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> >-		dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> >+	if ((dev->virtqueue[state->index]->kickfd) >= 0) {
> 
> always >= 0
> 
> >+		close(dev->virtqueue[state->index]->kickfd);
> >+		dev->virtqueue[state->index]->kickfd = -1;
> 
> again unsigned
> 
> >  	}
> >
> >  	return 0;
> >diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> >index deac6b9..643a92e 100644
> >--- a/lib/librte_vhost/virtio-net.c
> >+++ b/lib/librte_vhost/virtio-net.c
> >@@ -36,6 +36,7 @@
> >  #include <stddef.h>
> >  #include <stdint.h>
> >  #include <stdlib.h>
> >+#include <assert.h>
> >  #include <sys/mman.h>
> >  #include <unistd.h>
> >  #ifdef RTE_LIBRTE_VHOST_NUMA
> >@@ -178,6 +179,15 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
> >
> >  }
> >
> >+static void
> >+cleanup_vq(struct vhost_virtqueue *vq)
> >+{
> >+	if (vq->callfd >= 0)
> >+		close(vq->callfd);
> >+	if (vq->kickfd >= 0)
> >+		close(vq->kickfd);
> 
> both always >=0
> 
> >+}
> >+
> >  /*
> >   * Unmap any memory, close any file descriptors and
> >   * free any memory owned by a device.
> >@@ -185,6 +195,8 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
> >  static void
> >  cleanup_device(struct virtio_net *dev)
> >  {
> >+	uint32_t i;
> >+
> >  	/* Unmap QEMU memory file if mapped. */
> >  	if (dev->mem) {
> >  		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> >@@ -192,15 +204,10 @@ cleanup_device(struct virtio_net *dev)
> >  		free(dev->mem);
> >  	}
> >
> >-	/* Close any event notifiers opened by device. */
> >-	if (dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
> >-		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> >-	if (dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
> >-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> >-	if (dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
> >-		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> >-	if (dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
> >-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> >+	for (i = 0; i < dev->virt_qp_nb; i++) {
> >+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]);
> >+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]);
> >+	}
> >  }
> >
> >  /*
> >@@ -209,9 +216,11 @@ cleanup_device(struct virtio_net *dev)
> >  static void
> >  free_device(struct virtio_net_config_ll *ll_dev)
> >  {
> >-	/* Free any malloc'd memory */
> >-	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
> >-	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
> >+	uint32_t i;
> >+
> >+	for (i = 0; i < ll_dev->dev.virt_qp_nb; i++)
> >+		rte_free(ll_dev->dev.virtqueue[i * VIRTIO_QNUM]);
> >+
> >  	rte_free(ll_dev);
> >  }
> >
> >@@ -244,6 +253,50 @@ rm_config_ll_entry(struct virtio_net_config_ll *ll_dev,
> >  	}
> >  }
> >
> >+static void
> >+init_vring_queue(struct vhost_virtqueue *vq)
> >+{
> >+	memset(vq, 0, sizeof(struct vhost_virtqueue));
> >+
> >+	vq->kickfd = -1;
> >+	vq->callfd = -1;
> 
> same here
> 
> >+
> >+	/* Backends are set to -1 indicating an inactive device. */
> >+	vq->backend = -1;
> >+}
> >+
> >+static void
> >+init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
> >+{
> >+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_RXQ]);
> >+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_TXQ]);
> >+}
> >+
> >+static int
> >+alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
> >+{
> >+	struct vhost_virtqueue *virtqueue = NULL;
> >+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> >+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> >+
> >+	virtqueue = rte_malloc(NULL,
> >+			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
> >+	if (virtqueue == NULL) {
> >+		RTE_LOG(ERR, VHOST_CONFIG,
> >+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
> >+		return -1;
> >+	}
> >+
> >+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> >+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> >+
> >+	init_vring_queue_pair(dev, qp_idx);
> >+
> >+	dev->virt_qp_nb += 1;
> >+
> >+	return 0;
> >+}
> >+
> >  /*
> >   *  Initialise all variables in device structure.
> >   */
> >@@ -251,6 +304,7 @@ static void
> >  init_device(struct virtio_net *dev)
> >  {
> >  	uint64_t vq_offset;
> >+	uint32_t i;
> >
> >  	/*
> >  	 * Virtqueues have already been malloced so
> >@@ -261,17 +315,9 @@ init_device(struct virtio_net *dev)
> >  	/* Set everything to 0. */
> >  	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> >  		(sizeof(struct virtio_net) - (size_t)vq_offset));
> >-	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> >-	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
> >
> >-	dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> >-	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
> >-	dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> >-	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
> >-
> >-	/* Backends are set to -1 indicating an inactive device. */
> >-	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> >-	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> >+	for (i = 0; i < dev->virt_qp_nb; i++)
> >+		init_vring_queue_pair(dev, i);
> >  }
> >
> >  /*
> >@@ -283,7 +329,6 @@ static int
> >  new_device(struct vhost_device_ctx ctx)
> >  {
> >  	struct virtio_net_config_ll *new_ll_dev;
> >-	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
> >
> >  	/* Setup device and virtqueues. */
> >  	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
> >@@ -294,28 +339,6 @@ new_device(struct vhost_device_ctx ctx)
> >  		return -1;
> >  	}
> >
> >-	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> >-	if (virtqueue_rx == NULL) {
> >-		rte_free(new_ll_dev);
> >-		RTE_LOG(ERR, VHOST_CONFIG,
> >-			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> >-			ctx.fh);
> >-		return -1;
> >-	}
> >-
> >-	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> >-	if (virtqueue_tx == NULL) {
> >-		rte_free(virtqueue_rx);
> >-		rte_free(new_ll_dev);
> >-		RTE_LOG(ERR, VHOST_CONFIG,
> >-			"(%"PRIu64") Failed to allocate memory for txq.\n",
> >-			ctx.fh);
> >-		return -1;
> >-	}
> >-
> >-	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> >-	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> >-
> >  	/* Initialise device and virtqueues. */
> >  	init_device(&new_ll_dev->dev);
> >
> >@@ -680,13 +703,21 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> >  {
> >  	struct virtio_net *dev;
> >  	struct vhost_virtqueue *vq;
> >+	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
> >
> >  	dev = get_device(ctx);
> >  	if (dev == NULL)
> >  		return -1;
> >
> >+	/* alloc vring queue pair if it is a new queue pair */
> >+	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
> >+		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
> >+			return -1;
> >+	}
> >+
> >  	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
> >  	vq = dev->virtqueue[file->index];
> >+	assert(vq != NULL);
> >
> >  	if (vq->callfd >= 0)
> >  		close(vq->callfd);
> >
> 
> 
> I hope I helped,
> Thanks,
> Marcel
>
  
Michael S. Tsirkin Sept. 21, 2015, 9:07 a.m. UTC | #3
On Sun, Sep 20, 2015 at 04:58:42PM +0300, Marcel Apfelbaum wrote:
> On 09/18/2015 06:10 PM, Yuanhan Liu wrote:
> >All queue pairs, including the default (the first) queue pair,
> >are allocated dynamically, when a vring_call message is received
> >first time for a specific queue pair.
> >
> >This is a refactor work for enabling vhost-user multiple queue;
> >it should not break anything as it does no functional changes:
> >we don't support mq set, so there is only one mq at max.
> >
> >This patch is based on Changchun's patch.
> >
> >Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> >---
> >  lib/librte_vhost/rte_virtio_net.h             |   3 +-
> >  lib/librte_vhost/vhost_user/virtio-net-user.c |  44 +++++-----
> >  lib/librte_vhost/virtio-net.c                 | 121 ++++++++++++++++----------
> >  3 files changed, 102 insertions(+), 66 deletions(-)
> >
> >diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> >index e3a21e5..5dd6493 100644
> >--- a/lib/librte_vhost/rte_virtio_net.h
> >+++ b/lib/librte_vhost/rte_virtio_net.h
> >@@ -96,7 +96,7 @@ struct vhost_virtqueue {
> >   * Device structure contains all configuration information relating to the device.
> >   */
> >  struct virtio_net {
> >-	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
> >+	struct vhost_virtqueue	*virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];	/**< Contains all virtqueue information. */
> >  	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
> >  	uint64_t		features;	/**< Negotiated feature set. */
> >  	uint64_t		protocol_features;	/**< Negotiated protocol feature set. */
> >@@ -104,6 +104,7 @@ struct virtio_net {
> >  	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
> >  #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
> >  	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
> >+	uint32_t		virt_qp_nb;	/**< number of queue pair we have allocated */
> >  	void			*priv;		/**< private context */
> >  } __rte_cache_aligned;
> >
> >diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >index 360254e..e83d279 100644
> >--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
> >+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >@@ -206,25 +206,33 @@ err_mmap:
> >  }
> >
> 
> Hi,
> 
> >  static int
> >+vq_is_ready(struct vhost_virtqueue *vq)
> >+{
> >+	return vq && vq->desc   &&
> >+	       vq->kickfd != -1 &&
> >+	       vq->callfd != -1;
> 
>  kickfd and callfd are unsigned

That's probably a bug.
fds are signed, and -1 is what qemu uses to mean "nop".
This comparison will convert -1 to unsigned int so it'll work.
The >= ones below won't work.

I think fd types need to be fixed.


> >+}
> >+
> >+static int
> >  virtio_is_ready(struct virtio_net *dev)
> >  {
> >  	struct vhost_virtqueue *rvq, *tvq;
> >+	uint32_t i;
> >
> >-	/* mq support in future.*/
> >-	rvq = dev->virtqueue[VIRTIO_RXQ];
> >-	tvq = dev->virtqueue[VIRTIO_TXQ];
> >-	if (rvq && tvq && rvq->desc && tvq->desc &&
> >-		(rvq->kickfd != -1) &&
> >-		(rvq->callfd != -1) &&
> >-		(tvq->kickfd != -1) &&
> >-		(tvq->callfd != -1)) {
> >-		RTE_LOG(INFO, VHOST_CONFIG,
> >-			"virtio is now ready for processing.\n");
> >-		return 1;
> >+	for (i = 0; i < dev->virt_qp_nb; i++) {
> >+		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
> >+		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
> >+
> >+		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
> >+			RTE_LOG(INFO, VHOST_CONFIG,
> >+				"virtio is not ready for processing.\n");
> >+			return 0;
> >+		}
> >  	}
> >+
> >  	RTE_LOG(INFO, VHOST_CONFIG,
> >-		"virtio isn't ready for processing.\n");
> >-	return 0;
> >+		"virtio is now ready for processing.\n");
> >+	return 1;
> >  }
> >
> >  void
> >@@ -290,13 +298,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
> >  	 * sent and only sent in vhost_vring_stop.
> >  	 * TODO: cleanup the vring, it isn't usable since here.
> >  	 */
> >-	if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
> >-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> >-		dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> >-	}
> >-	if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
> >-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> >-		dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> >+	if ((dev->virtqueue[state->index]->kickfd) >= 0) {
> 
> always >= 0
> 
> >+		close(dev->virtqueue[state->index]->kickfd);
> >+		dev->virtqueue[state->index]->kickfd = -1;
> 
> again unsigned
> 
> >  	}
> >
> >  	return 0;
> >diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> >index deac6b9..643a92e 100644
> >--- a/lib/librte_vhost/virtio-net.c
> >+++ b/lib/librte_vhost/virtio-net.c
> >@@ -36,6 +36,7 @@
> >  #include <stddef.h>
> >  #include <stdint.h>
> >  #include <stdlib.h>
> >+#include <assert.h>
> >  #include <sys/mman.h>
> >  #include <unistd.h>
> >  #ifdef RTE_LIBRTE_VHOST_NUMA
> >@@ -178,6 +179,15 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
> >
> >  }
> >
> >+static void
> >+cleanup_vq(struct vhost_virtqueue *vq)
> >+{
> >+	if (vq->callfd >= 0)
> >+		close(vq->callfd);
> >+	if (vq->kickfd >= 0)
> >+		close(vq->kickfd);
> 
> both always >=0
> 
> >+}
> >+
> >  /*
> >   * Unmap any memory, close any file descriptors and
> >   * free any memory owned by a device.
> >@@ -185,6 +195,8 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
> >  static void
> >  cleanup_device(struct virtio_net *dev)
> >  {
> >+	uint32_t i;
> >+
> >  	/* Unmap QEMU memory file if mapped. */
> >  	if (dev->mem) {
> >  		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> >@@ -192,15 +204,10 @@ cleanup_device(struct virtio_net *dev)
> >  		free(dev->mem);
> >  	}
> >
> >-	/* Close any event notifiers opened by device. */
> >-	if (dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
> >-		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> >-	if (dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
> >-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> >-	if (dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
> >-		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> >-	if (dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
> >-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> >+	for (i = 0; i < dev->virt_qp_nb; i++) {
> >+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]);
> >+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]);
> >+	}
> >  }
> >
> >  /*
> >@@ -209,9 +216,11 @@ cleanup_device(struct virtio_net *dev)
> >  static void
> >  free_device(struct virtio_net_config_ll *ll_dev)
> >  {
> >-	/* Free any malloc'd memory */
> >-	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
> >-	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
> >+	uint32_t i;
> >+
> >+	for (i = 0; i < ll_dev->dev.virt_qp_nb; i++)
> >+		rte_free(ll_dev->dev.virtqueue[i * VIRTIO_QNUM]);
> >+
> >  	rte_free(ll_dev);
> >  }
> >
> >@@ -244,6 +253,50 @@ rm_config_ll_entry(struct virtio_net_config_ll *ll_dev,
> >  	}
> >  }
> >
> >+static void
> >+init_vring_queue(struct vhost_virtqueue *vq)
> >+{
> >+	memset(vq, 0, sizeof(struct vhost_virtqueue));
> >+
> >+	vq->kickfd = -1;
> >+	vq->callfd = -1;
> 
> same here
> 
> >+
> >+	/* Backends are set to -1 indicating an inactive device. */
> >+	vq->backend = -1;
> >+}
> >+
> >+static void
> >+init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
> >+{
> >+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_RXQ]);
> >+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_TXQ]);
> >+}
> >+
> >+static int
> >+alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
> >+{
> >+	struct vhost_virtqueue *virtqueue = NULL;
> >+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> >+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> >+
> >+	virtqueue = rte_malloc(NULL,
> >+			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
> >+	if (virtqueue == NULL) {
> >+		RTE_LOG(ERR, VHOST_CONFIG,
> >+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
> >+		return -1;
> >+	}
> >+
> >+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> >+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> >+
> >+	init_vring_queue_pair(dev, qp_idx);
> >+
> >+	dev->virt_qp_nb += 1;
> >+
> >+	return 0;
> >+}
> >+
> >  /*
> >   *  Initialise all variables in device structure.
> >   */
> >@@ -251,6 +304,7 @@ static void
> >  init_device(struct virtio_net *dev)
> >  {
> >  	uint64_t vq_offset;
> >+	uint32_t i;
> >
> >  	/*
> >  	 * Virtqueues have already been malloced so
> >@@ -261,17 +315,9 @@ init_device(struct virtio_net *dev)
> >  	/* Set everything to 0. */
> >  	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> >  		(sizeof(struct virtio_net) - (size_t)vq_offset));
> >-	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> >-	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
> >
> >-	dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> >-	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
> >-	dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> >-	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
> >-
> >-	/* Backends are set to -1 indicating an inactive device. */
> >-	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> >-	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> >+	for (i = 0; i < dev->virt_qp_nb; i++)
> >+		init_vring_queue_pair(dev, i);
> >  }
> >
> >  /*
> >@@ -283,7 +329,6 @@ static int
> >  new_device(struct vhost_device_ctx ctx)
> >  {
> >  	struct virtio_net_config_ll *new_ll_dev;
> >-	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
> >
> >  	/* Setup device and virtqueues. */
> >  	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
> >@@ -294,28 +339,6 @@ new_device(struct vhost_device_ctx ctx)
> >  		return -1;
> >  	}
> >
> >-	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> >-	if (virtqueue_rx == NULL) {
> >-		rte_free(new_ll_dev);
> >-		RTE_LOG(ERR, VHOST_CONFIG,
> >-			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> >-			ctx.fh);
> >-		return -1;
> >-	}
> >-
> >-	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> >-	if (virtqueue_tx == NULL) {
> >-		rte_free(virtqueue_rx);
> >-		rte_free(new_ll_dev);
> >-		RTE_LOG(ERR, VHOST_CONFIG,
> >-			"(%"PRIu64") Failed to allocate memory for txq.\n",
> >-			ctx.fh);
> >-		return -1;
> >-	}
> >-
> >-	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> >-	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> >-
> >  	/* Initialise device and virtqueues. */
> >  	init_device(&new_ll_dev->dev);
> >
> >@@ -680,13 +703,21 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> >  {
> >  	struct virtio_net *dev;
> >  	struct vhost_virtqueue *vq;
> >+	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
> >
> >  	dev = get_device(ctx);
> >  	if (dev == NULL)
> >  		return -1;
> >
> >+	/* alloc vring queue pair if it is a new queue pair */
> >+	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
> >+		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
> >+			return -1;
> >+	}
> >+
> >  	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
> >  	vq = dev->virtqueue[file->index];
> >+	assert(vq != NULL);
> >
> >  	if (vq->callfd >= 0)
> >  		close(vq->callfd);
> >
> 
> 
> I hope I helped,
> Thanks,
> Marcel
>
  
Marcel Apfelbaum Sept. 21, 2015, 5:56 p.m. UTC | #4
On 09/21/2015 05:06 AM, Yuanhan Liu wrote:
> On Sun, Sep 20, 2015 at 04:58:42PM +0300, Marcel Apfelbaum wrote:
>> On 09/18/2015 06:10 PM, Yuanhan Liu wrote:
>>> All queue pairs, including the default (the first) queue pair,
>>> are allocated dynamically, when a vring_call message is received
>>> first time for a specific queue pair.
>>>
>>> This is a refactor work for enabling vhost-user multiple queue;
>>> it should not break anything as it does no functional changes:
>>> we don't support mq set, so there is only one mq at max.
>>>
>>> This patch is based on Changchun's patch.
>>>
>>> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
>>> ---
>>>   lib/librte_vhost/rte_virtio_net.h             |   3 +-
>>>   lib/librte_vhost/vhost_user/virtio-net-user.c |  44 +++++-----
>>>   lib/librte_vhost/virtio-net.c                 | 121 ++++++++++++++++----------
>>>   3 files changed, 102 insertions(+), 66 deletions(-)
>>>
>>> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
>>> index e3a21e5..5dd6493 100644
>>> --- a/lib/librte_vhost/rte_virtio_net.h
>>> +++ b/lib/librte_vhost/rte_virtio_net.h
>>> @@ -96,7 +96,7 @@ struct vhost_virtqueue {
>>>    * Device structure contains all configuration information relating to the device.
>>>    */
>>>   struct virtio_net {
>>> -	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
>>> +	struct vhost_virtqueue	*virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];	/**< Contains all virtqueue information. */
>>>   	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
>>>   	uint64_t		features;	/**< Negotiated feature set. */
>>>   	uint64_t		protocol_features;	/**< Negotiated protocol feature set. */
>>> @@ -104,6 +104,7 @@ struct virtio_net {
>>>   	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
>>>   #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
>>>   	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
>>> +	uint32_t		virt_qp_nb;	/**< number of queue pair we have allocated */
>>>   	void			*priv;		/**< private context */
>>>   } __rte_cache_aligned;
>>>
>>> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
>>> index 360254e..e83d279 100644
>>> --- a/lib/librte_vhost/vhost_user/virtio-net-user.c
>>> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
>>> @@ -206,25 +206,33 @@ err_mmap:
>>>   }
>>>
>>
>> Hi,
>>
>>>   static int
>>> +vq_is_ready(struct vhost_virtqueue *vq)
>>> +{
>>> +	return vq && vq->desc   &&
>>> +	       vq->kickfd != -1 &&
>>> +	       vq->callfd != -1;
>>
>>   kickfd and callfd are unsigned
>
> Hi,
>
> I have made 4 cleanup patches few weeks before, including the patch
> to define kickfd and callfd as int type, and they have already got
> the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
> they will be merged, hence I made this patchset based on them.
>
> This will also answer the question from your another email: can't
> apply.

Hi,
Thank you for the response, it makes sense now.

T have another issue, maybe you can help.
I have some problems making it work with OVS/DPDK backend and virtio-net driver in guest.

I am using a simple setup:
     http://wiki.qemu.org/Features/vhost-user-ovs-dpdk
that connects 2 VMs using OVS's dpdkvhostuser ports (regular virtio-net driver in guest, not the PMD driver).

The setup worked fine with the prev DPDK MQ implementation (V4), however on this one the traffic stops
once I set queues=n in guest. (virtio-net uses only one queue when the guest starts, even if QEMU has multiple queues).

Two steps are required in order to enable multiple queues in OVS.
1. Apply the following patch:
  - https://www.mail-archive.com/dev@openvswitch.org/msg49198.html
  - It needs merging (I think)
2. Configure ovs for multiqueue:
  - ovs-vsctl set Open_vSwitch . other_config:n-dpdk-rxqs=<queues nr, the same as QEMU>
  - ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=<cpu mask for rx queues, say 0xff00>
3. In order to set queues=n in guest use:
  - ethtool -L eth0 combined <queues nr, the same as QEMU>

Any pointers/ideas would be appreciated.

Thank you,
Marcel



>
> Sorry for not pointing it out, as I assume Thomas(cc'ed) will apply
> them soon. And thanks for the review, anyway.
>
> 	--yliu
>>
>>> +}
>>> +
>>> +static int
>>>   virtio_is_ready(struct virtio_net *dev)
>>>   {
>>>   	struct vhost_virtqueue *rvq, *tvq;
>>> +	uint32_t i;
>>>
>>> -	/* mq support in future.*/
>>> -	rvq = dev->virtqueue[VIRTIO_RXQ];
>>> -	tvq = dev->virtqueue[VIRTIO_TXQ];
>>> -	if (rvq && tvq && rvq->desc && tvq->desc &&
>>> -		(rvq->kickfd != -1) &&
>>> -		(rvq->callfd != -1) &&
>>> -		(tvq->kickfd != -1) &&
>>> -		(tvq->callfd != -1)) {
>>> -		RTE_LOG(INFO, VHOST_CONFIG,
>>> -			"virtio is now ready for processing.\n");
>>> -		return 1;
>>> +	for (i = 0; i < dev->virt_qp_nb; i++) {
>>> +		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
>>> +		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
>>> +
>>> +		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
>>> +			RTE_LOG(INFO, VHOST_CONFIG,
>>> +				"virtio is not ready for processing.\n");
>>> +			return 0;
>>> +		}
>>>   	}
>>> +
>>>   	RTE_LOG(INFO, VHOST_CONFIG,
>>> -		"virtio isn't ready for processing.\n");
>>> -	return 0;
>>> +		"virtio is now ready for processing.\n");
>>> +	return 1;
>>>   }
>>>
>>>   void
>>> @@ -290,13 +298,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
>>>   	 * sent and only sent in vhost_vring_stop.
>>>   	 * TODO: cleanup the vring, it isn't usable since here.
>>>   	 */
>>> -	if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
>>> -		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
>>> -		dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
>>> -	}
>>> -	if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
>>> -		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
>>> -		dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
>>> +	if ((dev->virtqueue[state->index]->kickfd) >= 0) {
>>
>> always >= 0
>>
>>> +		close(dev->virtqueue[state->index]->kickfd);
>>> +		dev->virtqueue[state->index]->kickfd = -1;
>>
>> again unsigned
>>
>>>   	}
>>>
>>>   	return 0;
>>> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
>>> index deac6b9..643a92e 100644
>>> --- a/lib/librte_vhost/virtio-net.c
>>> +++ b/lib/librte_vhost/virtio-net.c
>>> @@ -36,6 +36,7 @@
>>>   #include <stddef.h>
>>>   #include <stdint.h>
>>>   #include <stdlib.h>
>>> +#include <assert.h>
>>>   #include <sys/mman.h>
>>>   #include <unistd.h>
>>>   #ifdef RTE_LIBRTE_VHOST_NUMA
>>> @@ -178,6 +179,15 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
>>>
>>>   }
>>>
>>> +static void
>>> +cleanup_vq(struct vhost_virtqueue *vq)
>>> +{
>>> +	if (vq->callfd >= 0)
>>> +		close(vq->callfd);
>>> +	if (vq->kickfd >= 0)
>>> +		close(vq->kickfd);
>>
>> both always >=0
>>
>>> +}
>>> +
>>>   /*
>>>    * Unmap any memory, close any file descriptors and
>>>    * free any memory owned by a device.
>>> @@ -185,6 +195,8 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
>>>   static void
>>>   cleanup_device(struct virtio_net *dev)
>>>   {
>>> +	uint32_t i;
>>> +
>>>   	/* Unmap QEMU memory file if mapped. */
>>>   	if (dev->mem) {
>>>   		munmap((void *)(uintptr_t)dev->mem->mapped_address,
>>> @@ -192,15 +204,10 @@ cleanup_device(struct virtio_net *dev)
>>>   		free(dev->mem);
>>>   	}
>>>
>>> -	/* Close any event notifiers opened by device. */
>>> -	if (dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
>>> -		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
>>> -	if (dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
>>> -		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
>>> -	if (dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
>>> -		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
>>> -	if (dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
>>> -		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
>>> +	for (i = 0; i < dev->virt_qp_nb; i++) {
>>> +		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]);
>>> +		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]);
>>> +	}
>>>   }
>>>
>>>   /*
>>> @@ -209,9 +216,11 @@ cleanup_device(struct virtio_net *dev)
>>>   static void
>>>   free_device(struct virtio_net_config_ll *ll_dev)
>>>   {
>>> -	/* Free any malloc'd memory */
>>> -	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
>>> -	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
>>> +	uint32_t i;
>>> +
>>> +	for (i = 0; i < ll_dev->dev.virt_qp_nb; i++)
>>> +		rte_free(ll_dev->dev.virtqueue[i * VIRTIO_QNUM]);
>>> +
>>>   	rte_free(ll_dev);
>>>   }
>>>
>>> @@ -244,6 +253,50 @@ rm_config_ll_entry(struct virtio_net_config_ll *ll_dev,
>>>   	}
>>>   }
>>>
>>> +static void
>>> +init_vring_queue(struct vhost_virtqueue *vq)
>>> +{
>>> +	memset(vq, 0, sizeof(struct vhost_virtqueue));
>>> +
>>> +	vq->kickfd = -1;
>>> +	vq->callfd = -1;
>>
>> same here
>>
>>> +
>>> +	/* Backends are set to -1 indicating an inactive device. */
>>> +	vq->backend = -1;
>>> +}
>>> +
>>> +static void
>>> +init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
>>> +{
>>> +	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_RXQ]);
>>> +	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_TXQ]);
>>> +}
>>> +
>>> +static int
>>> +alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
>>> +{
>>> +	struct vhost_virtqueue *virtqueue = NULL;
>>> +	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
>>> +	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
>>> +
>>> +	virtqueue = rte_malloc(NULL,
>>> +			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
>>> +	if (virtqueue == NULL) {
>>> +		RTE_LOG(ERR, VHOST_CONFIG,
>>> +			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
>>> +		return -1;
>>> +	}
>>> +
>>> +	dev->virtqueue[virt_rx_q_idx] = virtqueue;
>>> +	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
>>> +
>>> +	init_vring_queue_pair(dev, qp_idx);
>>> +
>>> +	dev->virt_qp_nb += 1;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>   /*
>>>    *  Initialise all variables in device structure.
>>>    */
>>> @@ -251,6 +304,7 @@ static void
>>>   init_device(struct virtio_net *dev)
>>>   {
>>>   	uint64_t vq_offset;
>>> +	uint32_t i;
>>>
>>>   	/*
>>>   	 * Virtqueues have already been malloced so
>>> @@ -261,17 +315,9 @@ init_device(struct virtio_net *dev)
>>>   	/* Set everything to 0. */
>>>   	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
>>>   		(sizeof(struct virtio_net) - (size_t)vq_offset));
>>> -	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
>>> -	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
>>>
>>> -	dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
>>> -	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
>>> -	dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
>>> -	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
>>> -
>>> -	/* Backends are set to -1 indicating an inactive device. */
>>> -	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
>>> -	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
>>> +	for (i = 0; i < dev->virt_qp_nb; i++)
>>> +		init_vring_queue_pair(dev, i);
>>>   }
>>>
>>>   /*
>>> @@ -283,7 +329,6 @@ static int
>>>   new_device(struct vhost_device_ctx ctx)
>>>   {
>>>   	struct virtio_net_config_ll *new_ll_dev;
>>> -	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
>>>
>>>   	/* Setup device and virtqueues. */
>>>   	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
>>> @@ -294,28 +339,6 @@ new_device(struct vhost_device_ctx ctx)
>>>   		return -1;
>>>   	}
>>>
>>> -	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
>>> -	if (virtqueue_rx == NULL) {
>>> -		rte_free(new_ll_dev);
>>> -		RTE_LOG(ERR, VHOST_CONFIG,
>>> -			"(%"PRIu64") Failed to allocate memory for rxq.\n",
>>> -			ctx.fh);
>>> -		return -1;
>>> -	}
>>> -
>>> -	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
>>> -	if (virtqueue_tx == NULL) {
>>> -		rte_free(virtqueue_rx);
>>> -		rte_free(new_ll_dev);
>>> -		RTE_LOG(ERR, VHOST_CONFIG,
>>> -			"(%"PRIu64") Failed to allocate memory for txq.\n",
>>> -			ctx.fh);
>>> -		return -1;
>>> -	}
>>> -
>>> -	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
>>> -	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
>>> -
>>>   	/* Initialise device and virtqueues. */
>>>   	init_device(&new_ll_dev->dev);
>>>
>>> @@ -680,13 +703,21 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>>>   {
>>>   	struct virtio_net *dev;
>>>   	struct vhost_virtqueue *vq;
>>> +	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
>>>
>>>   	dev = get_device(ctx);
>>>   	if (dev == NULL)
>>>   		return -1;
>>>
>>> +	/* alloc vring queue pair if it is a new queue pair */
>>> +	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
>>> +		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
>>> +			return -1;
>>> +	}
>>> +
>>>   	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
>>>   	vq = dev->virtqueue[file->index];
>>> +	assert(vq != NULL);
>>>
>>>   	if (vq->callfd >= 0)
>>>   		close(vq->callfd);
>>>
>>
>>
>> I hope I helped,
>> Thanks,
>> Marcel
>>
  
Yuanhan Liu Sept. 22, 2015, 7:31 a.m. UTC | #5
On Mon, Sep 21, 2015 at 08:56:30PM +0300, Marcel Apfelbaum wrote:
> On 09/21/2015 05:06 AM, Yuanhan Liu wrote:
> >On Sun, Sep 20, 2015 at 04:58:42PM +0300, Marcel Apfelbaum wrote:
> >>On 09/18/2015 06:10 PM, Yuanhan Liu wrote:
> >>>All queue pairs, including the default (the first) queue pair,
> >>>are allocated dynamically, when a vring_call message is received
> >>>first time for a specific queue pair.
> >>>
> >>>This is a refactor work for enabling vhost-user multiple queue;
> >>>it should not break anything as it does no functional changes:
> >>>we don't support mq set, so there is only one mq at max.
> >>>
> >>>This patch is based on Changchun's patch.
> >>>
> >>>Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> >>>---
> >>>  lib/librte_vhost/rte_virtio_net.h             |   3 +-
> >>>  lib/librte_vhost/vhost_user/virtio-net-user.c |  44 +++++-----
> >>>  lib/librte_vhost/virtio-net.c                 | 121 ++++++++++++++++----------
> >>>  3 files changed, 102 insertions(+), 66 deletions(-)
> >>>
> >>>diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> >>>index e3a21e5..5dd6493 100644
> >>>--- a/lib/librte_vhost/rte_virtio_net.h
> >>>+++ b/lib/librte_vhost/rte_virtio_net.h
> >>>@@ -96,7 +96,7 @@ struct vhost_virtqueue {
> >>>   * Device structure contains all configuration information relating to the device.
> >>>   */
> >>>  struct virtio_net {
> >>>-	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
> >>>+	struct vhost_virtqueue	*virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];	/**< Contains all virtqueue information. */
> >>>  	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
> >>>  	uint64_t		features;	/**< Negotiated feature set. */
> >>>  	uint64_t		protocol_features;	/**< Negotiated protocol feature set. */
> >>>@@ -104,6 +104,7 @@ struct virtio_net {
> >>>  	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
> >>>  #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
> >>>  	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
> >>>+	uint32_t		virt_qp_nb;	/**< number of queue pair we have allocated */
> >>>  	void			*priv;		/**< private context */
> >>>  } __rte_cache_aligned;
> >>>
> >>>diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >>>index 360254e..e83d279 100644
> >>>--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
> >>>+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >>>@@ -206,25 +206,33 @@ err_mmap:
> >>>  }
> >>>
> >>
> >>Hi,
> >>
> >>>  static int
> >>>+vq_is_ready(struct vhost_virtqueue *vq)
> >>>+{
> >>>+	return vq && vq->desc   &&
> >>>+	       vq->kickfd != -1 &&
> >>>+	       vq->callfd != -1;
> >>
> >>  kickfd and callfd are unsigned
> >
> >Hi,
> >
> >I have made 4 cleanup patches few weeks before, including the patch
> >to define kickfd and callfd as int type, and they have already got
> >the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
> >they will be merged, hence I made this patchset based on them.
> >
> >This will also answer the question from your another email: can't
> >apply.
> 
> Hi,
> Thank you for the response, it makes sense now.
> 
> T have another issue, maybe you can help.
> I have some problems making it work with OVS/DPDK backend and virtio-net driver in guest.
> 
> I am using a simple setup:
>     http://wiki.qemu.org/Features/vhost-user-ovs-dpdk
> that connects 2 VMs using OVS's dpdkvhostuser ports (regular virtio-net driver in guest, not the PMD driver).
> 
> The setup worked fine with the prev DPDK MQ implementation (V4), however on this one the traffic stops
> once I set queues=n in guest.

Hi,

Could you be more specific about that? It also would be helpful if you
could tell me the steps, besides those setup steps you mentioned in the
qemu wiki and this email, you did for testing.

I had a very rough testing based on your test guides, I indeed found
an issue: the IP address assigned by "ifconfig" disappears soon in the
first few times and after about 2 or 3 times reset, it never changes.

(well, I saw that quite few times before while trying different QEMU
net devices. So, it might be a system configuration issue, or something
else?)

Besides that, it works, say, I can wget a big file from host.

	--yliu

> (virtio-net uses only one queue when the guest starts, even if QEMU has multiple queues).
> 
> Two steps are required in order to enable multiple queues in OVS.
> 1. Apply the following patch:
>  - https://www.mail-archive.com/dev@openvswitch.org/msg49198.html
>  - It needs merging (I think)
> 2. Configure ovs for multiqueue:
>  - ovs-vsctl set Open_vSwitch . other_config:n-dpdk-rxqs=<queues nr, the same as QEMU>
>  - ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=<cpu mask for rx queues, say 0xff00>
> 3. In order to set queues=n in guest use:
>  - ethtool -L eth0 combined <queues nr, the same as QEMU>
> 
> Any pointers/ideas would be appreciated.
> 
> Thank you,
> Marcel
> 
> 
> 
> >
> >Sorry for not pointing it out, as I assume Thomas(cc'ed) will apply
> >them soon. And thanks for the review, anyway.
> >
> >	--yliu
> >>
> >>>+}
> >>>+
> >>>+static int
> >>>  virtio_is_ready(struct virtio_net *dev)
> >>>  {
> >>>  	struct vhost_virtqueue *rvq, *tvq;
> >>>+	uint32_t i;
> >>>
> >>>-	/* mq support in future.*/
> >>>-	rvq = dev->virtqueue[VIRTIO_RXQ];
> >>>-	tvq = dev->virtqueue[VIRTIO_TXQ];
> >>>-	if (rvq && tvq && rvq->desc && tvq->desc &&
> >>>-		(rvq->kickfd != -1) &&
> >>>-		(rvq->callfd != -1) &&
> >>>-		(tvq->kickfd != -1) &&
> >>>-		(tvq->callfd != -1)) {
> >>>-		RTE_LOG(INFO, VHOST_CONFIG,
> >>>-			"virtio is now ready for processing.\n");
> >>>-		return 1;
> >>>+	for (i = 0; i < dev->virt_qp_nb; i++) {
> >>>+		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
> >>>+		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
> >>>+
> >>>+		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
> >>>+			RTE_LOG(INFO, VHOST_CONFIG,
> >>>+				"virtio is not ready for processing.\n");
> >>>+			return 0;
> >>>+		}
> >>>  	}
> >>>+
> >>>  	RTE_LOG(INFO, VHOST_CONFIG,
> >>>-		"virtio isn't ready for processing.\n");
> >>>-	return 0;
> >>>+		"virtio is now ready for processing.\n");
> >>>+	return 1;
> >>>  }
> >>>
> >>>  void
> >>>@@ -290,13 +298,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
> >>>  	 * sent and only sent in vhost_vring_stop.
> >>>  	 * TODO: cleanup the vring, it isn't usable since here.
> >>>  	 */
> >>>-	if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
> >>>-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> >>>-		dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> >>>-	}
> >>>-	if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
> >>>-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> >>>-		dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> >>>+	if ((dev->virtqueue[state->index]->kickfd) >= 0) {
> >>
> >>always >= 0
> >>
> >>>+		close(dev->virtqueue[state->index]->kickfd);
> >>>+		dev->virtqueue[state->index]->kickfd = -1;
> >>
> >>again unsigned
> >>
> >>>  	}
> >>>
> >>>  	return 0;
> >>>diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> >>>index deac6b9..643a92e 100644
> >>>--- a/lib/librte_vhost/virtio-net.c
> >>>+++ b/lib/librte_vhost/virtio-net.c
> >>>@@ -36,6 +36,7 @@
> >>>  #include <stddef.h>
> >>>  #include <stdint.h>
> >>>  #include <stdlib.h>
> >>>+#include <assert.h>
> >>>  #include <sys/mman.h>
> >>>  #include <unistd.h>
> >>>  #ifdef RTE_LIBRTE_VHOST_NUMA
> >>>@@ -178,6 +179,15 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
> >>>
> >>>  }
> >>>
> >>>+static void
> >>>+cleanup_vq(struct vhost_virtqueue *vq)
> >>>+{
> >>>+	if (vq->callfd >= 0)
> >>>+		close(vq->callfd);
> >>>+	if (vq->kickfd >= 0)
> >>>+		close(vq->kickfd);
> >>
> >>both always >=0
> >>
> >>>+}
> >>>+
> >>>  /*
> >>>   * Unmap any memory, close any file descriptors and
> >>>   * free any memory owned by a device.
> >>>@@ -185,6 +195,8 @@ add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
> >>>  static void
> >>>  cleanup_device(struct virtio_net *dev)
> >>>  {
> >>>+	uint32_t i;
> >>>+
> >>>  	/* Unmap QEMU memory file if mapped. */
> >>>  	if (dev->mem) {
> >>>  		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> >>>@@ -192,15 +204,10 @@ cleanup_device(struct virtio_net *dev)
> >>>  		free(dev->mem);
> >>>  	}
> >>>
> >>>-	/* Close any event notifiers opened by device. */
> >>>-	if (dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
> >>>-		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> >>>-	if (dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
> >>>-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
> >>>-	if (dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
> >>>-		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> >>>-	if (dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
> >>>-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
> >>>+	for (i = 0; i < dev->virt_qp_nb; i++) {
> >>>+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]);
> >>>+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]);
> >>>+	}
> >>>  }
> >>>
> >>>  /*
> >>>@@ -209,9 +216,11 @@ cleanup_device(struct virtio_net *dev)
> >>>  static void
> >>>  free_device(struct virtio_net_config_ll *ll_dev)
> >>>  {
> >>>-	/* Free any malloc'd memory */
> >>>-	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
> >>>-	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
> >>>+	uint32_t i;
> >>>+
> >>>+	for (i = 0; i < ll_dev->dev.virt_qp_nb; i++)
> >>>+		rte_free(ll_dev->dev.virtqueue[i * VIRTIO_QNUM]);
> >>>+
> >>>  	rte_free(ll_dev);
> >>>  }
> >>>
> >>>@@ -244,6 +253,50 @@ rm_config_ll_entry(struct virtio_net_config_ll *ll_dev,
> >>>  	}
> >>>  }
> >>>
> >>>+static void
> >>>+init_vring_queue(struct vhost_virtqueue *vq)
> >>>+{
> >>>+	memset(vq, 0, sizeof(struct vhost_virtqueue));
> >>>+
> >>>+	vq->kickfd = -1;
> >>>+	vq->callfd = -1;
> >>
> >>same here
> >>
> >>>+
> >>>+	/* Backends are set to -1 indicating an inactive device. */
> >>>+	vq->backend = -1;
> >>>+}
> >>>+
> >>>+static void
> >>>+init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
> >>>+{
> >>>+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_RXQ]);
> >>>+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_TXQ]);
> >>>+}
> >>>+
> >>>+static int
> >>>+alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
> >>>+{
> >>>+	struct vhost_virtqueue *virtqueue = NULL;
> >>>+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
> >>>+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
> >>>+
> >>>+	virtqueue = rte_malloc(NULL,
> >>>+			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
> >>>+	if (virtqueue == NULL) {
> >>>+		RTE_LOG(ERR, VHOST_CONFIG,
> >>>+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
> >>>+		return -1;
> >>>+	}
> >>>+
> >>>+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
> >>>+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
> >>>+
> >>>+	init_vring_queue_pair(dev, qp_idx);
> >>>+
> >>>+	dev->virt_qp_nb += 1;
> >>>+
> >>>+	return 0;
> >>>+}
> >>>+
> >>>  /*
> >>>   *  Initialise all variables in device structure.
> >>>   */
> >>>@@ -251,6 +304,7 @@ static void
> >>>  init_device(struct virtio_net *dev)
> >>>  {
> >>>  	uint64_t vq_offset;
> >>>+	uint32_t i;
> >>>
> >>>  	/*
> >>>  	 * Virtqueues have already been malloced so
> >>>@@ -261,17 +315,9 @@ init_device(struct virtio_net *dev)
> >>>  	/* Set everything to 0. */
> >>>  	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
> >>>  		(sizeof(struct virtio_net) - (size_t)vq_offset));
> >>>-	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> >>>-	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
> >>>
> >>>-	dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
> >>>-	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
> >>>-	dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
> >>>-	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
> >>>-
> >>>-	/* Backends are set to -1 indicating an inactive device. */
> >>>-	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> >>>-	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> >>>+	for (i = 0; i < dev->virt_qp_nb; i++)
> >>>+		init_vring_queue_pair(dev, i);
> >>>  }
> >>>
> >>>  /*
> >>>@@ -283,7 +329,6 @@ static int
> >>>  new_device(struct vhost_device_ctx ctx)
> >>>  {
> >>>  	struct virtio_net_config_ll *new_ll_dev;
> >>>-	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
> >>>
> >>>  	/* Setup device and virtqueues. */
> >>>  	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
> >>>@@ -294,28 +339,6 @@ new_device(struct vhost_device_ctx ctx)
> >>>  		return -1;
> >>>  	}
> >>>
> >>>-	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> >>>-	if (virtqueue_rx == NULL) {
> >>>-		rte_free(new_ll_dev);
> >>>-		RTE_LOG(ERR, VHOST_CONFIG,
> >>>-			"(%"PRIu64") Failed to allocate memory for rxq.\n",
> >>>-			ctx.fh);
> >>>-		return -1;
> >>>-	}
> >>>-
> >>>-	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
> >>>-	if (virtqueue_tx == NULL) {
> >>>-		rte_free(virtqueue_rx);
> >>>-		rte_free(new_ll_dev);
> >>>-		RTE_LOG(ERR, VHOST_CONFIG,
> >>>-			"(%"PRIu64") Failed to allocate memory for txq.\n",
> >>>-			ctx.fh);
> >>>-		return -1;
> >>>-	}
> >>>-
> >>>-	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
> >>>-	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
> >>>-
> >>>  	/* Initialise device and virtqueues. */
> >>>  	init_device(&new_ll_dev->dev);
> >>>
> >>>@@ -680,13 +703,21 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> >>>  {
> >>>  	struct virtio_net *dev;
> >>>  	struct vhost_virtqueue *vq;
> >>>+	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
> >>>
> >>>  	dev = get_device(ctx);
> >>>  	if (dev == NULL)
> >>>  		return -1;
> >>>
> >>>+	/* alloc vring queue pair if it is a new queue pair */
> >>>+	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
> >>>+		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
> >>>+			return -1;
> >>>+	}
> >>>+
> >>>  	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
> >>>  	vq = dev->virtqueue[file->index];
> >>>+	assert(vq != NULL);
> >>>
> >>>  	if (vq->callfd >= 0)
> >>>  		close(vq->callfd);
> >>>
> >>
> >>
> >>I hope I helped,
> >>Thanks,
> >>Marcel
> >>
  
Marcel Apfelbaum Sept. 22, 2015, 8:10 a.m. UTC | #6
On 09/22/2015 10:31 AM, Yuanhan Liu wrote:
> On Mon, Sep 21, 2015 at 08:56:30PM +0300, Marcel Apfelbaum wrote:
[...]
>>>
>>> Hi,
>>>
>>> I have made 4 cleanup patches few weeks before, including the patch
>>> to define kickfd and callfd as int type, and they have already got
>>> the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
>>> they will be merged, hence I made this patchset based on them.
>>>
>>> This will also answer the question from your another email: can't
>>> apply.
>>
>> Hi,
>> Thank you for the response, it makes sense now.
>>
>> T have another issue, maybe you can help.
>> I have some problems making it work with OVS/DPDK backend and virtio-net driver in guest.
>>
>> I am using a simple setup:
>>      http://wiki.qemu.org/Features/vhost-user-ovs-dpdk
>> that connects 2 VMs using OVS's dpdkvhostuser ports (regular virtio-net driver in guest, not the PMD driver).
>>
>> The setup worked fine with the prev DPDK MQ implementation (V4), however on this one the traffic stops
>> once I set queues=n in guest.
>
> Hi,
>
> Could you be more specific about that? It also would be helpful if you
> could tell me the steps, besides those setup steps you mentioned in the
> qemu wiki and this email, you did for testing.
>

Hi,
Thank you for your help.

I am sorry the wiki is not enough, I'll be happy to add all the missing parts.
In the meantime maybe you can tell me where the problem is, I also suggest to
post here the output of journalctl command.

We only need a regular machine and we want traffic between 2 VMs. I'll try to summarize the steps:

1. Be sure you have enough hugepages enabled (2M pages are enough) and mounted.
2. Configure and start OVS following the wiki
    - we only want one bridge with 2 dpdkvhostuser ports.
3. Start VMs using the wiki command line
    - check journalctl for possible errors. You can use
         journalctl  --since `date +%T --date="-10 minutes"`
      to see only last 10 minutes.
4. Configure the guests IPs.
    - Disable the Network Manager as described bellow in the mail.
5. At this point you should be able to ping between guests.

Please let me know if you have any problem until this point.
I'll be happy to help. Please point any special steps you made that
are not in the WIKI. The journalctl logs would also help.

Does the ping between VMS work now?

If yes, please let me know and I'll go over MQ enabling.

> I had a very rough testing based on your test guides, I indeed found
> an issue: the IP address assigned by "ifconfig" disappears soon in the
> first few times and after about 2 or 3 times reset, it never changes.
>
> (well, I saw that quite few times before while trying different QEMU
> net devices. So, it might be a system configuration issue, or something
> else?)
>

You are right, this is a guest config issue, I think you should disable NetworkManager
for static IP addresses. Please use only the virtio-net device.

You cant try this:
sudo systemctl stop NetworkManager
sudo systemctl disable NetworkManager


> Besides that, it works, say, I can wget a big file from host.
>

The target here is traffic between 2 VMs.
We want to be able to ping (for example) between VMS when MQ > 1 is enabled on both guests:
- ethtool -L eth0 combined <queues nr, the same as QEMU>

Thank you again for the involvement, this is very much appreciated!
Marcel

> 	--yliu
>
>> (virtio-net uses only one queue when the guest starts, even if QEMU has multiple queues).
>>
>> Two steps are required in order to enable multiple queues in OVS.
>> 1. Apply the following patch:
>>   - https://www.mail-archive.com/dev@openvswitch.org/msg49198.html
>>   - It needs merging (I think)
>> 2. Configure ovs for multiqueue:
>>   - ovs-vsctl set Open_vSwitch . other_config:n-dpdk-rxqs=<queues nr, the same as QEMU>
>>   - ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=<cpu mask for rx queues, say 0xff00>
>> 3. In order to set queues=n in guest use:
>>   - ethtool -L eth0 combined <queues nr, the same as QEMU>
>>
>> Any pointers/ideas would be appreciated.
>>
>> Thank you,
>> Marcel
>>
>>
>>
>>>
[...]
  
Yuanhan Liu Sept. 22, 2015, 8:34 a.m. UTC | #7
On Tue, Sep 22, 2015 at 11:10:13AM +0300, Marcel Apfelbaum wrote:
> On 09/22/2015 10:31 AM, Yuanhan Liu wrote:
> >On Mon, Sep 21, 2015 at 08:56:30PM +0300, Marcel Apfelbaum wrote:
> [...]
> >>>
> >>>Hi,
> >>>
> >>>I have made 4 cleanup patches few weeks before, including the patch
> >>>to define kickfd and callfd as int type, and they have already got
> >>>the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
> >>>they will be merged, hence I made this patchset based on them.
> >>>
> >>>This will also answer the question from your another email: can't
> >>>apply.
> >>
> >>Hi,
> >>Thank you for the response, it makes sense now.
> >>
> >>T have another issue, maybe you can help.
> >>I have some problems making it work with OVS/DPDK backend and virtio-net driver in guest.
> >>
> >>I am using a simple setup:
> >>     http://wiki.qemu.org/Features/vhost-user-ovs-dpdk
> >>that connects 2 VMs using OVS's dpdkvhostuser ports (regular virtio-net driver in guest, not the PMD driver).
> >>
> >>The setup worked fine with the prev DPDK MQ implementation (V4), however on this one the traffic stops
> >>once I set queues=n in guest.
> >
> >Hi,
> >
> >Could you be more specific about that? It also would be helpful if you
> >could tell me the steps, besides those setup steps you mentioned in the
> >qemu wiki and this email, you did for testing.
> >
> 
> Hi,
> Thank you for your help.
> 
> I am sorry the wiki is not enough, I'll be happy to add all the missing parts.
> In the meantime maybe you can tell me where the problem is, I also suggest to
> post here the output of journalctl command.
> 
> We only need a regular machine and we want traffic between 2 VMs. I'll try to summarize the steps:
> 
> 1. Be sure you have enough hugepages enabled (2M pages are enough) and mounted.
> 2. Configure and start OVS following the wiki
>    - we only want one bridge with 2 dpdkvhostuser ports.
> 3. Start VMs using the wiki command line
>    - check journalctl for possible errors. You can use
>         journalctl  --since `date +%T --date="-10 minutes"`
>      to see only last 10 minutes.
> 4. Configure the guests IPs.
>    - Disable the Network Manager as described bellow in the mail.
> 5. At this point you should be able to ping between guests.
> 
> Please let me know if you have any problem until this point.
> I'll be happy to help. Please point any special steps you made that
> are not in the WIKI. The journalctl logs would also help.
> 
> Does the ping between VMS work now?

Yes, it works, too. I can ping the other vm inside a vm.

    [root@dpdk-kvm ~]# ethtool -l eth0
    Channel parameters for eth0:
    Pre-set maximums:
    RX:             0
    TX:             0
    Other:          0
    Combined:       2
    Current hardware settings:
    RX:             0
    TX:             0
    Other:          0
    Combined:       2

    [root@dpdk-kvm ~]# ifconfig eth0
    eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
            inet 192.168.100.11  netmask 255.255.255.0  broadcast 192.168.100.255
            inet6 fe80::5054:ff:fe12:3459  prefixlen 64  scopeid 0x20<link>
            ether 52:54:00:12:34:59  txqueuelen 1000  (Ethernet)
            RX packets 56  bytes 5166 (5.0 KiB)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 84  bytes 8303 (8.1 KiB)
            TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
    
    [root@dpdk-kvm ~]# ping 192.168.100.10
    PING 192.168.100.10 (192.168.100.10) 56(84) bytes of data.
    64 bytes from 192.168.100.10: icmp_seq=1 ttl=64 time=0.213 ms
    64 bytes from 192.168.100.10: icmp_seq=2 ttl=64 time=0.094 ms
    64 bytes from 192.168.100.10: icmp_seq=3 ttl=64 time=0.246 ms
    64 bytes from 192.168.100.10: icmp_seq=4 ttl=64 time=0.153 ms
    64 bytes from 192.168.100.10: icmp_seq=5 ttl=64 time=0.104 ms
    ^C
> 
> If yes, please let me know and I'll go over MQ enabling.

I'm just wondering why it doesn't work on your side.

> 
> >I had a very rough testing based on your test guides, I indeed found
> >an issue: the IP address assigned by "ifconfig" disappears soon in the
> >first few times and after about 2 or 3 times reset, it never changes.
> >
> >(well, I saw that quite few times before while trying different QEMU
> >net devices. So, it might be a system configuration issue, or something
> >else?)
> >
> 
> You are right, this is a guest config issue, I think you should disable NetworkManager

Yeah, I figured it out by my self, and it worked when I hardcoded it at
/etc/sysconfig/network-scripts/ifcfg-eth0.

> for static IP addresses. Please use only the virtio-net device.
> 
> You cant try this:
> sudo systemctl stop NetworkManager
> sudo systemctl disable NetworkManager

Thanks for the info and tip!

> 
> >Besides that, it works, say, I can wget a big file from host.
> >
> 
> The target here is traffic between 2 VMs.
> We want to be able to ping (for example) between VMS when MQ > 1 is enabled on both guests:
> - ethtool -L eth0 combined <queues nr, the same as QEMU>

As you can see from my command log, I did so and it worked :)

> 
> Thank you again for the involvement, this is very much appreciated!

Welcome! I need fix it if there is a bug.

	--yliu
  
Marcel Apfelbaum Sept. 22, 2015, 8:47 a.m. UTC | #8
On 09/22/2015 11:34 AM, Yuanhan Liu wrote:
> On Tue, Sep 22, 2015 at 11:10:13AM +0300, Marcel Apfelbaum wrote:
>> On 09/22/2015 10:31 AM, Yuanhan Liu wrote:
>>> On Mon, Sep 21, 2015 at 08:56:30PM +0300, Marcel Apfelbaum wrote:
>> [...]
>>>>>
>>>>> Hi,
>>>>>
>>>>> I have made 4 cleanup patches few weeks before, including the patch
>>>>> to define kickfd and callfd as int type, and they have already got
>>>>> the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
>>>>> they will be merged, hence I made this patchset based on them.
>>>>>
>>>>> This will also answer the question from your another email: can't
>>>>> apply.
>>>>
>>>> Hi,
>>>> Thank you for the response, it makes sense now.
>>>>
>>>> T have another issue, maybe you can help.
>>>> I have some problems making it work with OVS/DPDK backend and virtio-net driver in guest.
>>>>
>>>> I am using a simple setup:
>>>>      http://wiki.qemu.org/Features/vhost-user-ovs-dpdk
>>>> that connects 2 VMs using OVS's dpdkvhostuser ports (regular virtio-net driver in guest, not the PMD driver).
>>>>
>>>> The setup worked fine with the prev DPDK MQ implementation (V4), however on this one the traffic stops
>>>> once I set queues=n in guest.
>>>
>>> Hi,
>>>
>>> Could you be more specific about that? It also would be helpful if you
>>> could tell me the steps, besides those setup steps you mentioned in the
>>> qemu wiki and this email, you did for testing.
>>>
>>
>> Hi,
>> Thank you for your help.
>>
>> I am sorry the wiki is not enough, I'll be happy to add all the missing parts.
>> In the meantime maybe you can tell me where the problem is, I also suggest to
>> post here the output of journalctl command.
>>
>> We only need a regular machine and we want traffic between 2 VMs. I'll try to summarize the steps:
>>
>> 1. Be sure you have enough hugepages enabled (2M pages are enough) and mounted.
>> 2. Configure and start OVS following the wiki
>>     - we only want one bridge with 2 dpdkvhostuser ports.
>> 3. Start VMs using the wiki command line
>>     - check journalctl for possible errors. You can use
>>          journalctl  --since `date +%T --date="-10 minutes"`
>>       to see only last 10 minutes.
>> 4. Configure the guests IPs.
>>     - Disable the Network Manager as described bellow in the mail.
>> 5. At this point you should be able to ping between guests.
>>
>> Please let me know if you have any problem until this point.
>> I'll be happy to help. Please point any special steps you made that
>> are not in the WIKI. The journalctl logs would also help.
>>
>> Does the ping between VMS work now?
>
> Yes, it works, too. I can ping the other vm inside a vm.
>
>      [root@dpdk-kvm ~]# ethtool -l eth0
>      Channel parameters for eth0:
>      Pre-set maximums:
>      RX:             0
>      TX:             0
>      Other:          0
>      Combined:       2
>      Current hardware settings:
>      RX:             0
>      TX:             0
>      Other:          0
>      Combined:       2
>
>      [root@dpdk-kvm ~]# ifconfig eth0
>      eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
>              inet 192.168.100.11  netmask 255.255.255.0  broadcast 192.168.100.255
>              inet6 fe80::5054:ff:fe12:3459  prefixlen 64  scopeid 0x20<link>
>              ether 52:54:00:12:34:59  txqueuelen 1000  (Ethernet)
>              RX packets 56  bytes 5166 (5.0 KiB)
>              RX errors 0  dropped 0  overruns 0  frame 0
>              TX packets 84  bytes 8303 (8.1 KiB)
>              TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
>
>      [root@dpdk-kvm ~]# ping 192.168.100.10
>      PING 192.168.100.10 (192.168.100.10) 56(84) bytes of data.
>      64 bytes from 192.168.100.10: icmp_seq=1 ttl=64 time=0.213 ms
>      64 bytes from 192.168.100.10: icmp_seq=2 ttl=64 time=0.094 ms
>      64 bytes from 192.168.100.10: icmp_seq=3 ttl=64 time=0.246 ms
>      64 bytes from 192.168.100.10: icmp_seq=4 ttl=64 time=0.153 ms
>      64 bytes from 192.168.100.10: icmp_seq=5 ttl=64 time=0.104 ms
>      ^C
>>
>> If yes, please let me know and I'll go over MQ enabling.
>
> I'm just wondering why it doesn't work on your side.

Hi,

This is working also for me, but without enabling the MQ. (ethtool -L eth0 combined n (n>1) )
The problem starts when I am applying the patches and I enable MQ. (Need a slightly different QEMU commandline)

>
>>
>>> I had a very rough testing based on your test guides, I indeed found
>>> an issue: the IP address assigned by "ifconfig" disappears soon in the
>>> first few times and after about 2 or 3 times reset, it never changes.
>>>
>>> (well, I saw that quite few times before while trying different QEMU
>>> net devices. So, it might be a system configuration issue, or something
>>> else?)
>>>
>>
>> You are right, this is a guest config issue, I think you should disable NetworkManager
>
> Yeah, I figured it out by my self, and it worked when I hardcoded it at
> /etc/sysconfig/network-scripts/ifcfg-eth0.
>
>> for static IP addresses. Please use only the virtio-net device.
>>
>> You cant try this:
>> sudo systemctl stop NetworkManager
>> sudo systemctl disable NetworkManager
>
> Thanks for the info and tip!
>
>>
>>> Besides that, it works, say, I can wget a big file from host.
>>>
>>
>> The target here is traffic between 2 VMs.
>> We want to be able to ping (for example) between VMS when MQ > 1 is enabled on both guests:
>> - ethtool -L eth0 combined <queues nr, the same as QEMU>
>
> As you can see from my command log, I did so and it worked :)
>

Let me understand, it worked after applying MQ patches on all 3 projects (DPDK, QEMU and OVS)?
It worked with MQ enabled? MQ >1 ?

You can be sure by using the following command in one of the VMs:
   cat /proc/interrupts | grep virtio
and see that you have interrupts for all virtio0-input.0/1/...


Thanks,
Marcel

>>
>> Thank you again for the involvement, this is very much appreciated!
>
> Welcome! I need fix it if there is a bug.
>
> 	--yliu
>
  
Yuanhan Liu Sept. 22, 2015, 9:21 a.m. UTC | #9
On Tue, Sep 22, 2015 at 11:47:34AM +0300, Marcel Apfelbaum wrote:
> On 09/22/2015 11:34 AM, Yuanhan Liu wrote:
> >On Tue, Sep 22, 2015 at 11:10:13AM +0300, Marcel Apfelbaum wrote:
> >>On 09/22/2015 10:31 AM, Yuanhan Liu wrote:
> >>>On Mon, Sep 21, 2015 at 08:56:30PM +0300, Marcel Apfelbaum wrote:
> >>[...]
> >>>>>
> >>>>>Hi,
> >>>>>
> >>>>>I have made 4 cleanup patches few weeks before, including the patch
> >>>>>to define kickfd and callfd as int type, and they have already got
> >>>>>the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
> >>>>>they will be merged, hence I made this patchset based on them.
> >>>>>
> >>>>>This will also answer the question from your another email: can't
> >>>>>apply.
> >>>>
> >>>>Hi,
> >>>>Thank you for the response, it makes sense now.
> >>>>
> >>>>T have another issue, maybe you can help.
> >>>>I have some problems making it work with OVS/DPDK backend and virtio-net driver in guest.
> >>>>
> >>>>I am using a simple setup:
> >>>>     http://wiki.qemu.org/Features/vhost-user-ovs-dpdk
> >>>>that connects 2 VMs using OVS's dpdkvhostuser ports (regular virtio-net driver in guest, not the PMD driver).
> >>>>
> >>>>The setup worked fine with the prev DPDK MQ implementation (V4), however on this one the traffic stops
> >>>>once I set queues=n in guest.
> >>>
> >>>Hi,
> >>>
> >>>Could you be more specific about that? It also would be helpful if you
> >>>could tell me the steps, besides those setup steps you mentioned in the
> >>>qemu wiki and this email, you did for testing.
> >>>
> >>
> >>Hi,
> >>Thank you for your help.
> >>
> >>I am sorry the wiki is not enough, I'll be happy to add all the missing parts.
> >>In the meantime maybe you can tell me where the problem is, I also suggest to
> >>post here the output of journalctl command.
> >>
> >>We only need a regular machine and we want traffic between 2 VMs. I'll try to summarize the steps:
> >>
> >>1. Be sure you have enough hugepages enabled (2M pages are enough) and mounted.
> >>2. Configure and start OVS following the wiki
> >>    - we only want one bridge with 2 dpdkvhostuser ports.
> >>3. Start VMs using the wiki command line
> >>    - check journalctl for possible errors. You can use
> >>         journalctl  --since `date +%T --date="-10 minutes"`
> >>      to see only last 10 minutes.
> >>4. Configure the guests IPs.
> >>    - Disable the Network Manager as described bellow in the mail.
> >>5. At this point you should be able to ping between guests.
> >>
> >>Please let me know if you have any problem until this point.
> >>I'll be happy to help. Please point any special steps you made that
> >>are not in the WIKI. The journalctl logs would also help.
> >>
> >>Does the ping between VMS work now?
> >
> >Yes, it works, too. I can ping the other vm inside a vm.
> >
> >     [root@dpdk-kvm ~]# ethtool -l eth0
> >     Channel parameters for eth0:
> >     Pre-set maximums:
> >     RX:             0
> >     TX:             0
> >     Other:          0
> >     Combined:       2
> >     Current hardware settings:
> >     RX:             0
> >     TX:             0
> >     Other:          0
> >     Combined:       2
> >
> >     [root@dpdk-kvm ~]# ifconfig eth0
> >     eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
> >             inet 192.168.100.11  netmask 255.255.255.0  broadcast 192.168.100.255
> >             inet6 fe80::5054:ff:fe12:3459  prefixlen 64  scopeid 0x20<link>
> >             ether 52:54:00:12:34:59  txqueuelen 1000  (Ethernet)
> >             RX packets 56  bytes 5166 (5.0 KiB)
> >             RX errors 0  dropped 0  overruns 0  frame 0
> >             TX packets 84  bytes 8303 (8.1 KiB)
> >             TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
> >
> >     [root@dpdk-kvm ~]# ping 192.168.100.10
> >     PING 192.168.100.10 (192.168.100.10) 56(84) bytes of data.
> >     64 bytes from 192.168.100.10: icmp_seq=1 ttl=64 time=0.213 ms
> >     64 bytes from 192.168.100.10: icmp_seq=2 ttl=64 time=0.094 ms
> >     64 bytes from 192.168.100.10: icmp_seq=3 ttl=64 time=0.246 ms
> >     64 bytes from 192.168.100.10: icmp_seq=4 ttl=64 time=0.153 ms
> >     64 bytes from 192.168.100.10: icmp_seq=5 ttl=64 time=0.104 ms
> >     ^C
> >>
> >>If yes, please let me know and I'll go over MQ enabling.
> >
> >I'm just wondering why it doesn't work on your side.
> 
> Hi,
> 
> This is working also for me, but without enabling the MQ. (ethtool -L eth0 combined n (n>1) )
> The problem starts when I am applying the patches and I enable MQ. (Need a slightly different QEMU commandline)
> 
> >
> >>
> >>>I had a very rough testing based on your test guides, I indeed found
> >>>an issue: the IP address assigned by "ifconfig" disappears soon in the
> >>>first few times and after about 2 or 3 times reset, it never changes.
> >>>
> >>>(well, I saw that quite few times before while trying different QEMU
> >>>net devices. So, it might be a system configuration issue, or something
> >>>else?)
> >>>
> >>
> >>You are right, this is a guest config issue, I think you should disable NetworkManager
> >
> >Yeah, I figured it out by my self, and it worked when I hardcoded it at
> >/etc/sysconfig/network-scripts/ifcfg-eth0.
> >
> >>for static IP addresses. Please use only the virtio-net device.
> >>
> >>You cant try this:
> >>sudo systemctl stop NetworkManager
> >>sudo systemctl disable NetworkManager
> >
> >Thanks for the info and tip!
> >
> >>
> >>>Besides that, it works, say, I can wget a big file from host.
> >>>
> >>
> >>The target here is traffic between 2 VMs.
> >>We want to be able to ping (for example) between VMS when MQ > 1 is enabled on both guests:
> >>- ethtool -L eth0 combined <queues nr, the same as QEMU>
> >
> >As you can see from my command log, I did so and it worked :)
> >
> 
> Let me understand, it worked after applying MQ patches on all 3 projects (DPDK, QEMU and OVS)?
> It worked with MQ enabled? MQ >1 ?

Yes, however, I tried few more times this time, and found it sometimes
worked, and sometimes not. Sounds like there is a bug somewhere.

> 
> You can be sure by using the following command in one of the VMs:
>   cat /proc/interrupts | grep virtio
> and see that you have interrupts for all virtio0-input.0/1/...

     [root@dpdk-kvm ~]# cat /proc/interrupts | grep virtio
     24:        0        0    PCI-MSI-edge       virtio0-config
     25:      425        0    PCI-MSI-edge       virtio0-virtqueues


BTW, I have seen some warnings from ovs:

    2015-09-22T02:08:58Z|00003|ofproto_dpif_upcall(pmd45)|WARN|upcall_cb failure: ukey installation fails
    
    2015-09-22T02:11:05Z|00003|ofproto_dpif_upcall(pmd44)|WARN|Dropped 29 log messages in last 127 seconds (most recently, 82 seconds ago) due to excessive rate
    2015-09-22T02:11:05Z|00004|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
    2015-09-22T02:12:17Z|00005|ofproto_dpif_upcall(pmd44)|WARN|Dropped 11 log messages in last 32 seconds (most recently, 14 seconds ago) due to excessive rate
    2015-09-22T02:12:17Z|00006|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
    2015-09-22T02:14:59Z|00007|ofproto_dpif_upcall(pmd44)|WARN|Dropped 2 log messages in last 161 seconds (most recently, 161 seconds ago) due to excessive rate
    2015-09-22T02:14:59Z|00008|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
    

Does that look abnormal to you?

Anyway, I here check if there is anything I can fix.

	--yliu
  
Marcel Apfelbaum Sept. 22, 2015, 10:06 a.m. UTC | #10
On 09/22/2015 12:21 PM, Yuanhan Liu wrote:
> On Tue, Sep 22, 2015 at 11:47:34AM +0300, Marcel Apfelbaum wrote:
>> On 09/22/2015 11:34 AM, Yuanhan Liu wrote:
>>> On Tue, Sep 22, 2015 at 11:10:13AM +0300, Marcel Apfelbaum wrote:
>>>> On 09/22/2015 10:31 AM, Yuanhan Liu wrote:
>>>>> On Mon, Sep 21, 2015 at 08:56:30PM +0300, Marcel Apfelbaum wrote:
>>>> [...]
>>>>>>>
>>>>>>> Hi,
>>>>>>>
>>>>>>> I have made 4 cleanup patches few weeks before, including the patch
>>>>>>> to define kickfd and callfd as int type, and they have already got
>>>>>>> the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
>>>>>>> they will be merged, hence I made this patchset based on them.
>>>>>>>
>>>>>>> This will also answer the question from your another email: can't
>>>>>>> apply.
>>>>>>
>>>>>> Hi,
>>>>>> Thank you for the response, it makes sense now.
>>>>>>
>>>>>> T have another issue, maybe you can help.
>>>>>> I have some problems making it work with OVS/DPDK backend and virtio-net driver in guest.
>>>>>>
>>>>>> I am using a simple setup:
>>>>>>      http://wiki.qemu.org/Features/vhost-user-ovs-dpdk
>>>>>> that connects 2 VMs using OVS's dpdkvhostuser ports (regular virtio-net driver in guest, not the PMD driver).
>>>>>>
>>>>>> The setup worked fine with the prev DPDK MQ implementation (V4), however on this one the traffic stops
>>>>>> once I set queues=n in guest.
>>>>>
>>>>> Hi,
>>>>>
>>>>> Could you be more specific about that? It also would be helpful if you
>>>>> could tell me the steps, besides those setup steps you mentioned in the
>>>>> qemu wiki and this email, you did for testing.
>>>>>
>>>>
>>>> Hi,
>>>> Thank you for your help.
>>>>
>>>> I am sorry the wiki is not enough, I'll be happy to add all the missing parts.
>>>> In the meantime maybe you can tell me where the problem is, I also suggest to
>>>> post here the output of journalctl command.
>>>>
>>>> We only need a regular machine and we want traffic between 2 VMs. I'll try to summarize the steps:
>>>>
>>>> 1. Be sure you have enough hugepages enabled (2M pages are enough) and mounted.
>>>> 2. Configure and start OVS following the wiki
>>>>     - we only want one bridge with 2 dpdkvhostuser ports.
>>>> 3. Start VMs using the wiki command line
>>>>     - check journalctl for possible errors. You can use
>>>>          journalctl  --since `date +%T --date="-10 minutes"`
>>>>       to see only last 10 minutes.
>>>> 4. Configure the guests IPs.
>>>>     - Disable the Network Manager as described bellow in the mail.
>>>> 5. At this point you should be able to ping between guests.
>>>>
>>>> Please let me know if you have any problem until this point.
>>>> I'll be happy to help. Please point any special steps you made that
>>>> are not in the WIKI. The journalctl logs would also help.
>>>>
>>>> Does the ping between VMS work now?
>>>
>>> Yes, it works, too. I can ping the other vm inside a vm.
>>>
>>>      [root@dpdk-kvm ~]# ethtool -l eth0
>>>      Channel parameters for eth0:
>>>      Pre-set maximums:
>>>      RX:             0
>>>      TX:             0
>>>      Other:          0
>>>      Combined:       2
>>>      Current hardware settings:
>>>      RX:             0
>>>      TX:             0
>>>      Other:          0
>>>      Combined:       2
>>>
>>>      [root@dpdk-kvm ~]# ifconfig eth0
>>>      eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
>>>              inet 192.168.100.11  netmask 255.255.255.0  broadcast 192.168.100.255
>>>              inet6 fe80::5054:ff:fe12:3459  prefixlen 64  scopeid 0x20<link>
>>>              ether 52:54:00:12:34:59  txqueuelen 1000  (Ethernet)
>>>              RX packets 56  bytes 5166 (5.0 KiB)
>>>              RX errors 0  dropped 0  overruns 0  frame 0
>>>              TX packets 84  bytes 8303 (8.1 KiB)
>>>              TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
>>>
>>>      [root@dpdk-kvm ~]# ping 192.168.100.10
>>>      PING 192.168.100.10 (192.168.100.10) 56(84) bytes of data.
>>>      64 bytes from 192.168.100.10: icmp_seq=1 ttl=64 time=0.213 ms
>>>      64 bytes from 192.168.100.10: icmp_seq=2 ttl=64 time=0.094 ms
>>>      64 bytes from 192.168.100.10: icmp_seq=3 ttl=64 time=0.246 ms
>>>      64 bytes from 192.168.100.10: icmp_seq=4 ttl=64 time=0.153 ms
>>>      64 bytes from 192.168.100.10: icmp_seq=5 ttl=64 time=0.104 ms
>>>      ^C
>>>>
>>>> If yes, please let me know and I'll go over MQ enabling.
>>>
>>> I'm just wondering why it doesn't work on your side.
>>
>> Hi,
>>
>> This is working also for me, but without enabling the MQ. (ethtool -L eth0 combined n (n>1) )
>> The problem starts when I am applying the patches and I enable MQ. (Need a slightly different QEMU commandline)
>>
>>>
>>>>
>>>>> I had a very rough testing based on your test guides, I indeed found
>>>>> an issue: the IP address assigned by "ifconfig" disappears soon in the
>>>>> first few times and after about 2 or 3 times reset, it never changes.
>>>>>
>>>>> (well, I saw that quite few times before while trying different QEMU
>>>>> net devices. So, it might be a system configuration issue, or something
>>>>> else?)
>>>>>
>>>>
>>>> You are right, this is a guest config issue, I think you should disable NetworkManager
>>>
>>> Yeah, I figured it out by my self, and it worked when I hardcoded it at
>>> /etc/sysconfig/network-scripts/ifcfg-eth0.
>>>
>>>> for static IP addresses. Please use only the virtio-net device.
>>>>
>>>> You cant try this:
>>>> sudo systemctl stop NetworkManager
>>>> sudo systemctl disable NetworkManager
>>>
>>> Thanks for the info and tip!
>>>
>>>>
>>>>> Besides that, it works, say, I can wget a big file from host.
>>>>>
>>>>
>>>> The target here is traffic between 2 VMs.
>>>> We want to be able to ping (for example) between VMS when MQ > 1 is enabled on both guests:
>>>> - ethtool -L eth0 combined <queues nr, the same as QEMU>
>>>
>>> As you can see from my command log, I did so and it worked :)
>>>
>>
>> Let me understand, it worked after applying MQ patches on all 3 projects (DPDK, QEMU and OVS)?
>> It worked with MQ enabled? MQ >1 ?
>
> Yes, however, I tried few more times this time, and found it sometimes
> worked, and sometimes not. Sounds like there is a bug somewhere.
>

Yes, I've been hunting it since you submitted the series :)

>>
>> You can be sure by using the following command in one of the VMs:
>>    cat /proc/interrupts | grep virtio
>> and see that you have interrupts for all virtio0-input.0/1/...
>
>       [root@dpdk-kvm ~]# cat /proc/interrupts | grep virtio
>       24:        0        0    PCI-MSI-edge       virtio0-config
>       25:      425        0    PCI-MSI-edge       virtio0-virtqueues
>

Here it shows that MQ is not enabled in the guest.
For queues=2 in qemu commandline and  'ethtool -L eth0 combined 2' in guest you should see:

  24:          0          0          0          0   PCI-MSI 65536-edge      virtio0-config
  25:         32          0         14          0   PCI-MSI 65537-edge      virtio0-input.0
  26:          1          0          0          0   PCI-MSI 65538-edge      virtio0-output.0
  27:         53          0          0          0   PCI-MSI 65539-edge      virtio0-input.1
  28:          1          0          0          0   PCI-MSI 65540-edge      virtio0-output.1


So, you are very close to reproduce the MQ bug:
Please ensure:
1. You have applied MQ patches to QEMU/DPDK
2. You applies the MQ patch to *OVS*:
    https://www.mail-archive.com/dev@openvswitch.org/msg49198.html
    - It does not apply correctly, just remove the chunk with the "if" statement that it fails to compile
3. Configure OVS for 2 queues:
   - ovs-vsctl set Open_vSwitch . other_config:n-dpdk-rxqs=2
   - ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xff00
4. Enable MQ on virtio-net device:
    -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce,queues=2 \
    -device virtio-net-pci,netdev=mynet1,mac=52:54:00:02:d9:$2,mq=on,vectors=8 \

At this stage you should still have ping working between VMS.

However, when running on both VMs:
    ethtool -L eth0 combined 2
traffic stops...

Thanks again for the help!

>
> BTW, I have seen some warnings from ovs:
>
>      2015-09-22T02:08:58Z|00003|ofproto_dpif_upcall(pmd45)|WARN|upcall_cb failure: ukey installation fails
>
>      2015-09-22T02:11:05Z|00003|ofproto_dpif_upcall(pmd44)|WARN|Dropped 29 log messages in last 127 seconds (most recently, 82 seconds ago) due to excessive rate
>      2015-09-22T02:11:05Z|00004|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
>      2015-09-22T02:12:17Z|00005|ofproto_dpif_upcall(pmd44)|WARN|Dropped 11 log messages in last 32 seconds (most recently, 14 seconds ago) due to excessive rate
>      2015-09-22T02:12:17Z|00006|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
>      2015-09-22T02:14:59Z|00007|ofproto_dpif_upcall(pmd44)|WARN|Dropped 2 log messages in last 161 seconds (most recently, 161 seconds ago) due to excessive rate
>      2015-09-22T02:14:59Z|00008|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
>
>
> Does that look abnormal to you?

Nope, but since you have ping between VMS it should not bother
>
> Anyway, I here check if there is anything I can fix.
Thanks!!!

Marcel
>
> 	--yliu
>
  
Yuanhan Liu Sept. 22, 2015, 2:22 p.m. UTC | #11
On Tue, Sep 22, 2015 at 01:06:17PM +0300, Marcel Apfelbaum wrote:
> On 09/22/2015 12:21 PM, Yuanhan Liu wrote:
> >On Tue, Sep 22, 2015 at 11:47:34AM +0300, Marcel Apfelbaum wrote:
> >>On 09/22/2015 11:34 AM, Yuanhan Liu wrote:
> >>>On Tue, Sep 22, 2015 at 11:10:13AM +0300, Marcel Apfelbaum wrote:
> >>>>On 09/22/2015 10:31 AM, Yuanhan Liu wrote:
> >>>>>On Mon, Sep 21, 2015 at 08:56:30PM +0300, Marcel Apfelbaum wrote:
> >>>>[...]
> >>>>>>>
> >>>>>>>Hi,
> >>>>>>>
> >>>>>>>I have made 4 cleanup patches few weeks before, including the patch
> >>>>>>>to define kickfd and callfd as int type, and they have already got
> >>>>>>>the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
> >>>>>>>they will be merged, hence I made this patchset based on them.
> >>>>>>>
> >>>>>>>This will also answer the question from your another email: can't
> >>>>>>>apply.
> >>>>>>
> >>>>>>Hi,
> >>>>>>Thank you for the response, it makes sense now.
> >>>>>>
> >>>>>>T have another issue, maybe you can help.
> >>>>>>I have some problems making it work with OVS/DPDK backend and virtio-net driver in guest.
> >>>>>>
> >>>>>>I am using a simple setup:
> >>>>>>     http://wiki.qemu.org/Features/vhost-user-ovs-dpdk
> >>>>>>that connects 2 VMs using OVS's dpdkvhostuser ports (regular virtio-net driver in guest, not the PMD driver).
> >>>>>>
> >>>>>>The setup worked fine with the prev DPDK MQ implementation (V4), however on this one the traffic stops
> >>>>>>once I set queues=n in guest.
> >>>>>
> >>>>>Hi,
> >>>>>
> >>>>>Could you be more specific about that? It also would be helpful if you
> >>>>>could tell me the steps, besides those setup steps you mentioned in the
> >>>>>qemu wiki and this email, you did for testing.
> >>>>>
> >>>>
> >>>>Hi,
> >>>>Thank you for your help.
> >>>>
> >>>>I am sorry the wiki is not enough, I'll be happy to add all the missing parts.
> >>>>In the meantime maybe you can tell me where the problem is, I also suggest to
> >>>>post here the output of journalctl command.
> >>>>
> >>>>We only need a regular machine and we want traffic between 2 VMs. I'll try to summarize the steps:
> >>>>
> >>>>1. Be sure you have enough hugepages enabled (2M pages are enough) and mounted.
> >>>>2. Configure and start OVS following the wiki
> >>>>    - we only want one bridge with 2 dpdkvhostuser ports.
> >>>>3. Start VMs using the wiki command line
> >>>>    - check journalctl for possible errors. You can use
> >>>>         journalctl  --since `date +%T --date="-10 minutes"`
> >>>>      to see only last 10 minutes.
> >>>>4. Configure the guests IPs.
> >>>>    - Disable the Network Manager as described bellow in the mail.
> >>>>5. At this point you should be able to ping between guests.
> >>>>
> >>>>Please let me know if you have any problem until this point.
> >>>>I'll be happy to help. Please point any special steps you made that
> >>>>are not in the WIKI. The journalctl logs would also help.
> >>>>
> >>>>Does the ping between VMS work now?
> >>>
> >>>Yes, it works, too. I can ping the other vm inside a vm.
> >>>
> >>>     [root@dpdk-kvm ~]# ethtool -l eth0
> >>>     Channel parameters for eth0:
> >>>     Pre-set maximums:
> >>>     RX:             0
> >>>     TX:             0
> >>>     Other:          0
> >>>     Combined:       2
> >>>     Current hardware settings:
> >>>     RX:             0
> >>>     TX:             0
> >>>     Other:          0
> >>>     Combined:       2
> >>>
> >>>     [root@dpdk-kvm ~]# ifconfig eth0
> >>>     eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
> >>>             inet 192.168.100.11  netmask 255.255.255.0  broadcast 192.168.100.255
> >>>             inet6 fe80::5054:ff:fe12:3459  prefixlen 64  scopeid 0x20<link>
> >>>             ether 52:54:00:12:34:59  txqueuelen 1000  (Ethernet)
> >>>             RX packets 56  bytes 5166 (5.0 KiB)
> >>>             RX errors 0  dropped 0  overruns 0  frame 0
> >>>             TX packets 84  bytes 8303 (8.1 KiB)
> >>>             TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
> >>>
> >>>     [root@dpdk-kvm ~]# ping 192.168.100.10
> >>>     PING 192.168.100.10 (192.168.100.10) 56(84) bytes of data.
> >>>     64 bytes from 192.168.100.10: icmp_seq=1 ttl=64 time=0.213 ms
> >>>     64 bytes from 192.168.100.10: icmp_seq=2 ttl=64 time=0.094 ms
> >>>     64 bytes from 192.168.100.10: icmp_seq=3 ttl=64 time=0.246 ms
> >>>     64 bytes from 192.168.100.10: icmp_seq=4 ttl=64 time=0.153 ms
> >>>     64 bytes from 192.168.100.10: icmp_seq=5 ttl=64 time=0.104 ms
> >>>     ^C
> >>>>
> >>>>If yes, please let me know and I'll go over MQ enabling.
> >>>
> >>>I'm just wondering why it doesn't work on your side.
> >>
> >>Hi,
> >>
> >>This is working also for me, but without enabling the MQ. (ethtool -L eth0 combined n (n>1) )
> >>The problem starts when I am applying the patches and I enable MQ. (Need a slightly different QEMU commandline)
> >>
> >>>
> >>>>
> >>>>>I had a very rough testing based on your test guides, I indeed found
> >>>>>an issue: the IP address assigned by "ifconfig" disappears soon in the
> >>>>>first few times and after about 2 or 3 times reset, it never changes.
> >>>>>
> >>>>>(well, I saw that quite few times before while trying different QEMU
> >>>>>net devices. So, it might be a system configuration issue, or something
> >>>>>else?)
> >>>>>
> >>>>
> >>>>You are right, this is a guest config issue, I think you should disable NetworkManager
> >>>
> >>>Yeah, I figured it out by my self, and it worked when I hardcoded it at
> >>>/etc/sysconfig/network-scripts/ifcfg-eth0.
> >>>
> >>>>for static IP addresses. Please use only the virtio-net device.
> >>>>
> >>>>You cant try this:
> >>>>sudo systemctl stop NetworkManager
> >>>>sudo systemctl disable NetworkManager
> >>>
> >>>Thanks for the info and tip!
> >>>
> >>>>
> >>>>>Besides that, it works, say, I can wget a big file from host.
> >>>>>
> >>>>
> >>>>The target here is traffic between 2 VMs.
> >>>>We want to be able to ping (for example) between VMS when MQ > 1 is enabled on both guests:
> >>>>- ethtool -L eth0 combined <queues nr, the same as QEMU>
> >>>
> >>>As you can see from my command log, I did so and it worked :)
> >>>
> >>
> >>Let me understand, it worked after applying MQ patches on all 3 projects (DPDK, QEMU and OVS)?
> >>It worked with MQ enabled? MQ >1 ?
> >
> >Yes, however, I tried few more times this time, and found it sometimes
> >worked, and sometimes not. Sounds like there is a bug somewhere.

I put two quick debug printf at ovs dpdk code, and found out why it
sometimes works, and sometimes not: all data goes to the first queue
works, and otherwise, it fails.

:: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 32, dequeued: 1
:: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 1, enqueued: 1
:: TX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 32, dequeued: 1
:: RX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 1, enqueued: 1
:: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 32, dequeued: 1
:: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 1, enqueued: 1
:: TX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 32, dequeued: 1
:: RX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 1, enqueued: 1
:: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 32, dequeued: 1
:: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 1, enqueued: 1
:: TX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 32, dequeued: 1
:: RX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 1, enqueued: 1


And the failed ones:

:: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 1, asked: 32, dequeued: 1
:: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 1, asked: 1, enqueued: 1
:: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 1, asked: 32, dequeued: 1
:: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 1, asked: 1, enqueued: 1
:: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 1, asked: 32, dequeued: 1
:: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 1, asked: 1, enqueued: 1

You can see that vhost-user1 never transfer back packet, hence ping
didn't work.

And if you run ifconfig at the VM-1, you will find packet drops.


---

I then spent some time for figuring out why packet drops happened,
and found that the vq->vhost_hlen for second (and above) queue
pairs is set wrongly: that's why it failed if packets are not
transfered by the first queue.

What's "ironic" is that while making this patchset, I am somehow 
aware of that I missed it, and I had planned to fix it.  But I
just forgot it, and then it takes me (as well as you) some time
to figure it out, in a more painful way.

So, thank you a lot for your testing as well as the effort to
guide me on the OVS DPDK test.

It's proved to work after the fix (at least in my testing), but
it's late here and I'm gonna send a new version tomorrow, including
some other comments addressing. Please do more test then :)


	--yliu

> >
> 
> Yes, I've been hunting it since you submitted the series :)
> 
> >>
> >>You can be sure by using the following command in one of the VMs:
> >>   cat /proc/interrupts | grep virtio
> >>and see that you have interrupts for all virtio0-input.0/1/...
> >
> >      [root@dpdk-kvm ~]# cat /proc/interrupts | grep virtio
> >      24:        0        0    PCI-MSI-edge       virtio0-config
> >      25:      425        0    PCI-MSI-edge       virtio0-virtqueues
> >
> 
> Here it shows that MQ is not enabled in the guest.
> For queues=2 in qemu commandline and  'ethtool -L eth0 combined 2' in guest you should see:
> 
>  24:          0          0          0          0   PCI-MSI 65536-edge      virtio0-config
>  25:         32          0         14          0   PCI-MSI 65537-edge      virtio0-input.0
>  26:          1          0          0          0   PCI-MSI 65538-edge      virtio0-output.0
>  27:         53          0          0          0   PCI-MSI 65539-edge      virtio0-input.1
>  28:          1          0          0          0   PCI-MSI 65540-edge      virtio0-output.1
> 
> 
> So, you are very close to reproduce the MQ bug:
> Please ensure:
> 1. You have applied MQ patches to QEMU/DPDK
> 2. You applies the MQ patch to *OVS*:
>    https://www.mail-archive.com/dev@openvswitch.org/msg49198.html
>    - It does not apply correctly, just remove the chunk with the "if" statement that it fails to compile
> 3. Configure OVS for 2 queues:
>   - ovs-vsctl set Open_vSwitch . other_config:n-dpdk-rxqs=2
>   - ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xff00
> 4. Enable MQ on virtio-net device:
>    -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce,queues=2 \
>    -device virtio-net-pci,netdev=mynet1,mac=52:54:00:02:d9:$2,mq=on,vectors=8 \
> 
> At this stage you should still have ping working between VMS.
> 
> However, when running on both VMs:
>    ethtool -L eth0 combined 2
> traffic stops...
> 
> Thanks again for the help!
> 
> >
> >BTW, I have seen some warnings from ovs:
> >
> >     2015-09-22T02:08:58Z|00003|ofproto_dpif_upcall(pmd45)|WARN|upcall_cb failure: ukey installation fails
> >
> >     2015-09-22T02:11:05Z|00003|ofproto_dpif_upcall(pmd44)|WARN|Dropped 29 log messages in last 127 seconds (most recently, 82 seconds ago) due to excessive rate
> >     2015-09-22T02:11:05Z|00004|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
> >     2015-09-22T02:12:17Z|00005|ofproto_dpif_upcall(pmd44)|WARN|Dropped 11 log messages in last 32 seconds (most recently, 14 seconds ago) due to excessive rate
> >     2015-09-22T02:12:17Z|00006|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
> >     2015-09-22T02:14:59Z|00007|ofproto_dpif_upcall(pmd44)|WARN|Dropped 2 log messages in last 161 seconds (most recently, 161 seconds ago) due to excessive rate
> >     2015-09-22T02:14:59Z|00008|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
> >
> >
> >Does that look abnormal to you?
> 
> Nope, but since you have ping between VMS it should not bother
> >
> >Anyway, I here check if there is anything I can fix.
> Thanks!!!
> 
> Marcel
> >
> >	--yliu
> >
  
Marcel Apfelbaum Sept. 22, 2015, 2:51 p.m. UTC | #12
On 09/22/2015 05:22 PM, Yuanhan Liu wrote:
> On Tue, Sep 22, 2015 at 01:06:17PM +0300, Marcel Apfelbaum wrote:
>> On 09/22/2015 12:21 PM, Yuanhan Liu wrote:
>>> On Tue, Sep 22, 2015 at 11:47:34AM +0300, Marcel Apfelbaum wrote:
>>>> On 09/22/2015 11:34 AM, Yuanhan Liu wrote:
>>>>> On Tue, Sep 22, 2015 at 11:10:13AM +0300, Marcel Apfelbaum wrote:
>>>>>> On 09/22/2015 10:31 AM, Yuanhan Liu wrote:
>>>>>>> On Mon, Sep 21, 2015 at 08:56:30PM +0300, Marcel Apfelbaum wrote:
>>>>>> [...]
>>>>>>>>>
>>>>>>>>> Hi,
>>>>>>>>>
>>>>>>>>> I have made 4 cleanup patches few weeks before, including the patch
>>>>>>>>> to define kickfd and callfd as int type, and they have already got
>>>>>>>>> the ACK from Huawei Xie, and Chuangchun Ouyang. It's likely that
>>>>>>>>> they will be merged, hence I made this patchset based on them.
>>>>>>>>>
>>>>>>>>> This will also answer the question from your another email: can't
>>>>>>>>> apply.
>>>>>>>>
>>>>>>>> Hi,
>>>>>>>> Thank you for the response, it makes sense now.
>>>>>>>>
>>>>>>>> T have another issue, maybe you can help.
>>>>>>>> I have some problems making it work with OVS/DPDK backend and virtio-net driver in guest.
>>>>>>>>
>>>>>>>> I am using a simple setup:
>>>>>>>>      http://wiki.qemu.org/Features/vhost-user-ovs-dpdk
>>>>>>>> that connects 2 VMs using OVS's dpdkvhostuser ports (regular virtio-net driver in guest, not the PMD driver).
>>>>>>>>
>>>>>>>> The setup worked fine with the prev DPDK MQ implementation (V4), however on this one the traffic stops
>>>>>>>> once I set queues=n in guest.
>>>>>>>
>>>>>>> Hi,
>>>>>>>
>>>>>>> Could you be more specific about that? It also would be helpful if you
>>>>>>> could tell me the steps, besides those setup steps you mentioned in the
>>>>>>> qemu wiki and this email, you did for testing.
>>>>>>>
>>>>>>
>>>>>> Hi,
>>>>>> Thank you for your help.
>>>>>>
>>>>>> I am sorry the wiki is not enough, I'll be happy to add all the missing parts.
>>>>>> In the meantime maybe you can tell me where the problem is, I also suggest to
>>>>>> post here the output of journalctl command.
>>>>>>
>>>>>> We only need a regular machine and we want traffic between 2 VMs. I'll try to summarize the steps:
>>>>>>
>>>>>> 1. Be sure you have enough hugepages enabled (2M pages are enough) and mounted.
>>>>>> 2. Configure and start OVS following the wiki
>>>>>>     - we only want one bridge with 2 dpdkvhostuser ports.
>>>>>> 3. Start VMs using the wiki command line
>>>>>>     - check journalctl for possible errors. You can use
>>>>>>          journalctl  --since `date +%T --date="-10 minutes"`
>>>>>>       to see only last 10 minutes.
>>>>>> 4. Configure the guests IPs.
>>>>>>     - Disable the Network Manager as described bellow in the mail.
>>>>>> 5. At this point you should be able to ping between guests.
>>>>>>
>>>>>> Please let me know if you have any problem until this point.
>>>>>> I'll be happy to help. Please point any special steps you made that
>>>>>> are not in the WIKI. The journalctl logs would also help.
>>>>>>
>>>>>> Does the ping between VMS work now?
>>>>>
>>>>> Yes, it works, too. I can ping the other vm inside a vm.
>>>>>
>>>>>      [root@dpdk-kvm ~]# ethtool -l eth0
>>>>>      Channel parameters for eth0:
>>>>>      Pre-set maximums:
>>>>>      RX:             0
>>>>>      TX:             0
>>>>>      Other:          0
>>>>>      Combined:       2
>>>>>      Current hardware settings:
>>>>>      RX:             0
>>>>>      TX:             0
>>>>>      Other:          0
>>>>>      Combined:       2
>>>>>
>>>>>      [root@dpdk-kvm ~]# ifconfig eth0
>>>>>      eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
>>>>>              inet 192.168.100.11  netmask 255.255.255.0  broadcast 192.168.100.255
>>>>>              inet6 fe80::5054:ff:fe12:3459  prefixlen 64  scopeid 0x20<link>
>>>>>              ether 52:54:00:12:34:59  txqueuelen 1000  (Ethernet)
>>>>>              RX packets 56  bytes 5166 (5.0 KiB)
>>>>>              RX errors 0  dropped 0  overruns 0  frame 0
>>>>>              TX packets 84  bytes 8303 (8.1 KiB)
>>>>>              TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
>>>>>
>>>>>      [root@dpdk-kvm ~]# ping 192.168.100.10
>>>>>      PING 192.168.100.10 (192.168.100.10) 56(84) bytes of data.
>>>>>      64 bytes from 192.168.100.10: icmp_seq=1 ttl=64 time=0.213 ms
>>>>>      64 bytes from 192.168.100.10: icmp_seq=2 ttl=64 time=0.094 ms
>>>>>      64 bytes from 192.168.100.10: icmp_seq=3 ttl=64 time=0.246 ms
>>>>>      64 bytes from 192.168.100.10: icmp_seq=4 ttl=64 time=0.153 ms
>>>>>      64 bytes from 192.168.100.10: icmp_seq=5 ttl=64 time=0.104 ms
>>>>>      ^C
>>>>>>
>>>>>> If yes, please let me know and I'll go over MQ enabling.
>>>>>
>>>>> I'm just wondering why it doesn't work on your side.
>>>>
>>>> Hi,
>>>>
>>>> This is working also for me, but without enabling the MQ. (ethtool -L eth0 combined n (n>1) )
>>>> The problem starts when I am applying the patches and I enable MQ. (Need a slightly different QEMU commandline)
>>>>
>>>>>
>>>>>>
>>>>>>> I had a very rough testing based on your test guides, I indeed found
>>>>>>> an issue: the IP address assigned by "ifconfig" disappears soon in the
>>>>>>> first few times and after about 2 or 3 times reset, it never changes.
>>>>>>>
>>>>>>> (well, I saw that quite few times before while trying different QEMU
>>>>>>> net devices. So, it might be a system configuration issue, or something
>>>>>>> else?)
>>>>>>>
>>>>>>
>>>>>> You are right, this is a guest config issue, I think you should disable NetworkManager
>>>>>
>>>>> Yeah, I figured it out by my self, and it worked when I hardcoded it at
>>>>> /etc/sysconfig/network-scripts/ifcfg-eth0.
>>>>>
>>>>>> for static IP addresses. Please use only the virtio-net device.
>>>>>>
>>>>>> You cant try this:
>>>>>> sudo systemctl stop NetworkManager
>>>>>> sudo systemctl disable NetworkManager
>>>>>
>>>>> Thanks for the info and tip!
>>>>>
>>>>>>
>>>>>>> Besides that, it works, say, I can wget a big file from host.
>>>>>>>
>>>>>>
>>>>>> The target here is traffic between 2 VMs.
>>>>>> We want to be able to ping (for example) between VMS when MQ > 1 is enabled on both guests:
>>>>>> - ethtool -L eth0 combined <queues nr, the same as QEMU>
>>>>>
>>>>> As you can see from my command log, I did so and it worked :)
>>>>>
>>>>
>>>> Let me understand, it worked after applying MQ patches on all 3 projects (DPDK, QEMU and OVS)?
>>>> It worked with MQ enabled? MQ >1 ?
>>>
>>> Yes, however, I tried few more times this time, and found it sometimes
>>> worked, and sometimes not. Sounds like there is a bug somewhere.
>
> I put two quick debug printf at ovs dpdk code, and found out why it
> sometimes works, and sometimes not: all data goes to the first queue
> works, and otherwise, it fails.
>

I saw this, yes, but I couldn't understand why.

> :: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 32, dequeued: 1
> :: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 1, enqueued: 1
> :: TX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 32, dequeued: 1
> :: RX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 1, enqueued: 1
> :: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 32, dequeued: 1
> :: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 1, enqueued: 1
> :: TX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 32, dequeued: 1
> :: RX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 1, enqueued: 1
> :: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 32, dequeued: 1
> :: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 1, enqueued: 1
> :: TX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 0, asked: 32, dequeued: 1
> :: RX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 0, asked: 1, enqueued: 1
>
>
> And the failed ones:
>
> :: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 1, asked: 32, dequeued: 1
> :: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 1, asked: 1, enqueued: 1
> :: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 1, asked: 32, dequeued: 1
> :: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 1, asked: 1, enqueued: 1
> :: TX: vhost-dev: /var/run/openvswitch/vhost-user2, qp_index: 1, asked: 32, dequeued: 1
> :: RX: vhost-dev: /var/run/openvswitch/vhost-user1, qp_index: 1, asked: 1, enqueued: 1
>
> You can see that vhost-user1 never transfer back packet, hence ping
> didn't work.
>
> And if you run ifconfig at the VM-1, you will find packet drops.
>
>
> ---
>
> I then spent some time for figuring out why packet drops happened,
> and found that the vq->vhost_hlen for second (and above) queue
> pairs is set wrongly: that's why it failed if packets are not
> transfered by the first queue.

I saw that sometimes with the DEBUG data = ON, it shows HEADER = 0,
sadly my virtio knowledge is rather limited  and I couldn't make
the connection with vhost_hlen.

Since I saw is not going anywhere for a while I asked your help :)

>
> What's "ironic" is that while making this patchset, I am somehow
> aware of that I missed it, and I had planned to fix it.  But I
> just forgot it, and then it takes me (as well as you) some time
> to figure it out, in a more painful way.

Same old, same old ...
>
> So, thank you a lot for your testing as well as the effort to
> guide me on the OVS DPDK test.
>
No problem. Thank you for investing your time in it!

> It's proved to work after the fix (at least in my testing), but
> it's late here and I'm gonna send a new version tomorrow, including
> some other comments addressing. Please do more test then :)
>

Those are very good news!
Tomorrow we have holidays but the day after that I'll try it for sure.

Thanks again and good night!
Marcel

>
> 	--yliu
>
>>>
>>
>> Yes, I've been hunting it since you submitted the series :)
>>
>>>>
>>>> You can be sure by using the following command in one of the VMs:
>>>>    cat /proc/interrupts | grep virtio
>>>> and see that you have interrupts for all virtio0-input.0/1/...
>>>
>>>       [root@dpdk-kvm ~]# cat /proc/interrupts | grep virtio
>>>       24:        0        0    PCI-MSI-edge       virtio0-config
>>>       25:      425        0    PCI-MSI-edge       virtio0-virtqueues
>>>
>>
>> Here it shows that MQ is not enabled in the guest.
>> For queues=2 in qemu commandline and  'ethtool -L eth0 combined 2' in guest you should see:
>>
>>   24:          0          0          0          0   PCI-MSI 65536-edge      virtio0-config
>>   25:         32          0         14          0   PCI-MSI 65537-edge      virtio0-input.0
>>   26:          1          0          0          0   PCI-MSI 65538-edge      virtio0-output.0
>>   27:         53          0          0          0   PCI-MSI 65539-edge      virtio0-input.1
>>   28:          1          0          0          0   PCI-MSI 65540-edge      virtio0-output.1
>>
>>
>> So, you are very close to reproduce the MQ bug:
>> Please ensure:
>> 1. You have applied MQ patches to QEMU/DPDK
>> 2. You applies the MQ patch to *OVS*:
>>     https://www.mail-archive.com/dev@openvswitch.org/msg49198.html
>>     - It does not apply correctly, just remove the chunk with the "if" statement that it fails to compile
>> 3. Configure OVS for 2 queues:
>>    - ovs-vsctl set Open_vSwitch . other_config:n-dpdk-rxqs=2
>>    - ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xff00
>> 4. Enable MQ on virtio-net device:
>>     -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce,queues=2 \
>>     -device virtio-net-pci,netdev=mynet1,mac=52:54:00:02:d9:$2,mq=on,vectors=8 \
>>
>> At this stage you should still have ping working between VMS.
>>
>> However, when running on both VMs:
>>     ethtool -L eth0 combined 2
>> traffic stops...
>>
>> Thanks again for the help!
>>
>>>
>>> BTW, I have seen some warnings from ovs:
>>>
>>>      2015-09-22T02:08:58Z|00003|ofproto_dpif_upcall(pmd45)|WARN|upcall_cb failure: ukey installation fails
>>>
>>>      2015-09-22T02:11:05Z|00003|ofproto_dpif_upcall(pmd44)|WARN|Dropped 29 log messages in last 127 seconds (most recently, 82 seconds ago) due to excessive rate
>>>      2015-09-22T02:11:05Z|00004|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
>>>      2015-09-22T02:12:17Z|00005|ofproto_dpif_upcall(pmd44)|WARN|Dropped 11 log messages in last 32 seconds (most recently, 14 seconds ago) due to excessive rate
>>>      2015-09-22T02:12:17Z|00006|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
>>>      2015-09-22T02:14:59Z|00007|ofproto_dpif_upcall(pmd44)|WARN|Dropped 2 log messages in last 161 seconds (most recently, 161 seconds ago) due to excessive rate
>>>      2015-09-22T02:14:59Z|00008|ofproto_dpif_upcall(pmd44)|WARN|upcall_cb failure: ukey installation fails
>>>
>>>
>>> Does that look abnormal to you?
>>
>> Nope, but since you have ping between VMS it should not bother
>>>
>>> Anyway, I here check if there is anything I can fix.
>> Thanks!!!
>>
>> Marcel
>>>
>>> 	--yliu
>>>
  

Patch

diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index e3a21e5..5dd6493 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -96,7 +96,7 @@  struct vhost_virtqueue {
  * Device structure contains all configuration information relating to the device.
  */
 struct virtio_net {
-	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM];	/**< Contains all virtqueue information. */
+	struct vhost_virtqueue	*virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];	/**< Contains all virtqueue information. */
 	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
 	uint64_t		features;	/**< Negotiated feature set. */
 	uint64_t		protocol_features;	/**< Negotiated protocol feature set. */
@@ -104,6 +104,7 @@  struct virtio_net {
 	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
+	uint32_t		virt_qp_nb;	/**< number of queue pair we have allocated */
 	void			*priv;		/**< private context */
 } __rte_cache_aligned;
 
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 360254e..e83d279 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -206,25 +206,33 @@  err_mmap:
 }
 
 static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+	return vq && vq->desc   &&
+	       vq->kickfd != -1 &&
+	       vq->callfd != -1;
+}
+
+static int
 virtio_is_ready(struct virtio_net *dev)
 {
 	struct vhost_virtqueue *rvq, *tvq;
+	uint32_t i;
 
-	/* mq support in future.*/
-	rvq = dev->virtqueue[VIRTIO_RXQ];
-	tvq = dev->virtqueue[VIRTIO_TXQ];
-	if (rvq && tvq && rvq->desc && tvq->desc &&
-		(rvq->kickfd != -1) &&
-		(rvq->callfd != -1) &&
-		(tvq->kickfd != -1) &&
-		(tvq->callfd != -1)) {
-		RTE_LOG(INFO, VHOST_CONFIG,
-			"virtio is now ready for processing.\n");
-		return 1;
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"virtio is not ready for processing.\n");
+			return 0;
+		}
 	}
+
 	RTE_LOG(INFO, VHOST_CONFIG,
-		"virtio isn't ready for processing.\n");
-	return 0;
+		"virtio is now ready for processing.\n");
+	return 1;
 }
 
 void
@@ -290,13 +298,9 @@  user_get_vring_base(struct vhost_device_ctx ctx,
 	 * sent and only sent in vhost_vring_stop.
 	 * TODO: cleanup the vring, it isn't usable since here.
 	 */
-	if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
-		dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
-	}
-	if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
-		dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
+	if ((dev->virtqueue[state->index]->kickfd) >= 0) {
+		close(dev->virtqueue[state->index]->kickfd);
+		dev->virtqueue[state->index]->kickfd = -1;
 	}
 
 	return 0;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index deac6b9..643a92e 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -36,6 +36,7 @@ 
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <assert.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #ifdef RTE_LIBRTE_VHOST_NUMA
@@ -178,6 +179,15 @@  add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
 
 }
 
+static void
+cleanup_vq(struct vhost_virtqueue *vq)
+{
+	if (vq->callfd >= 0)
+		close(vq->callfd);
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+}
+
 /*
  * Unmap any memory, close any file descriptors and
  * free any memory owned by a device.
@@ -185,6 +195,8 @@  add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
 static void
 cleanup_device(struct virtio_net *dev)
 {
+	uint32_t i;
+
 	/* Unmap QEMU memory file if mapped. */
 	if (dev->mem) {
 		munmap((void *)(uintptr_t)dev->mem->mapped_address,
@@ -192,15 +204,10 @@  cleanup_device(struct virtio_net *dev)
 		free(dev->mem);
 	}
 
-	/* Close any event notifiers opened by device. */
-	if (dev->virtqueue[VIRTIO_RXQ]->callfd >= 0)
-		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
-	if (dev->virtqueue[VIRTIO_RXQ]->kickfd >= 0)
-		close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
-	if (dev->virtqueue[VIRTIO_TXQ]->callfd >= 0)
-		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
-	if (dev->virtqueue[VIRTIO_TXQ]->kickfd >= 0)
-		close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]);
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]);
+	}
 }
 
 /*
@@ -209,9 +216,11 @@  cleanup_device(struct virtio_net *dev)
 static void
 free_device(struct virtio_net_config_ll *ll_dev)
 {
-	/* Free any malloc'd memory */
-	rte_free(ll_dev->dev.virtqueue[VIRTIO_RXQ]);
-	rte_free(ll_dev->dev.virtqueue[VIRTIO_TXQ]);
+	uint32_t i;
+
+	for (i = 0; i < ll_dev->dev.virt_qp_nb; i++)
+		rte_free(ll_dev->dev.virtqueue[i * VIRTIO_QNUM]);
+
 	rte_free(ll_dev);
 }
 
@@ -244,6 +253,50 @@  rm_config_ll_entry(struct virtio_net_config_ll *ll_dev,
 	}
 }
 
+static void
+init_vring_queue(struct vhost_virtqueue *vq)
+{
+	memset(vq, 0, sizeof(struct vhost_virtqueue));
+
+	vq->kickfd = -1;
+	vq->callfd = -1;
+
+	/* Backends are set to -1 indicating an inactive device. */
+	vq->backend = -1;
+}
+
+static void
+init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_RXQ]);
+	init_vring_queue(dev->virtqueue[qp_idx * VIRTIO_QNUM + VIRTIO_TXQ]);
+}
+
+static int
+alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	struct vhost_virtqueue *virtqueue = NULL;
+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+
+	virtqueue = rte_malloc(NULL,
+			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
+	if (virtqueue == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
+		return -1;
+	}
+
+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
+
+	init_vring_queue_pair(dev, qp_idx);
+
+	dev->virt_qp_nb += 1;
+
+	return 0;
+}
+
 /*
  *  Initialise all variables in device structure.
  */
@@ -251,6 +304,7 @@  static void
 init_device(struct virtio_net *dev)
 {
 	uint64_t vq_offset;
+	uint32_t i;
 
 	/*
 	 * Virtqueues have already been malloced so
@@ -261,17 +315,9 @@  init_device(struct virtio_net *dev)
 	/* Set everything to 0. */
 	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
 		(sizeof(struct virtio_net) - (size_t)vq_offset));
-	memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
-	memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
 
-	dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
-	dev->virtqueue[VIRTIO_RXQ]->callfd = -1;
-	dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
-	dev->virtqueue[VIRTIO_TXQ]->callfd = -1;
-
-	/* Backends are set to -1 indicating an inactive device. */
-	dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
-	dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
+	for (i = 0; i < dev->virt_qp_nb; i++)
+		init_vring_queue_pair(dev, i);
 }
 
 /*
@@ -283,7 +329,6 @@  static int
 new_device(struct vhost_device_ctx ctx)
 {
 	struct virtio_net_config_ll *new_ll_dev;
-	struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
 
 	/* Setup device and virtqueues. */
 	new_ll_dev = rte_malloc(NULL, sizeof(struct virtio_net_config_ll), 0);
@@ -294,28 +339,6 @@  new_device(struct vhost_device_ctx ctx)
 		return -1;
 	}
 
-	virtqueue_rx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
-	if (virtqueue_rx == NULL) {
-		rte_free(new_ll_dev);
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for rxq.\n",
-			ctx.fh);
-		return -1;
-	}
-
-	virtqueue_tx = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
-	if (virtqueue_tx == NULL) {
-		rte_free(virtqueue_rx);
-		rte_free(new_ll_dev);
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for txq.\n",
-			ctx.fh);
-		return -1;
-	}
-
-	new_ll_dev->dev.virtqueue[VIRTIO_RXQ] = virtqueue_rx;
-	new_ll_dev->dev.virtqueue[VIRTIO_TXQ] = virtqueue_tx;
-
 	/* Initialise device and virtqueues. */
 	init_device(&new_ll_dev->dev);
 
@@ -680,13 +703,21 @@  set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 {
 	struct virtio_net *dev;
 	struct vhost_virtqueue *vq;
+	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
 
 	dev = get_device(ctx);
 	if (dev == NULL)
 		return -1;
 
+	/* alloc vring queue pair if it is a new queue pair */
+	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
+		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
+			return -1;
+	}
+
 	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
 	vq = dev->virtqueue[file->index];
+	assert(vq != NULL);
 
 	if (vq->callfd >= 0)
 		close(vq->callfd);