[dpdk-dev,2/2] vhost: realloc virtio_net and virtqueue to the same node of vring desc table
Commit Message
When we get the address of vring descriptor table in VHOST_SET_VRING_ADDR message,
will try to reallocate virtio_net device and virtqueue to the same numa node.
Signed-off-by: Huawei Xie <huawei.xie@intel.com>
---
config/common_linuxapp | 1 +
lib/librte_vhost/Makefile | 4 ++
lib/librte_vhost/virtio-net.c | 93 +++++++++++++++++++++++++++++++++++++++++++
mk/rte.app.mk | 3 ++
4 files changed, 101 insertions(+)
Comments
2015-06-05 11:13, Huawei Xie:
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -92,6 +92,9 @@ endif # ! CONFIG_RTE_BUILD_COMBINE_LIBS
>
> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += -lpcap
>
> +ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lnuma
> +
> ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),n)
> _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lfuse
> endif
An endif is missing.
2015-06-17 18:47, Thomas Monjalon:
> 2015-06-05 11:13, Huawei Xie:
> > --- a/mk/rte.app.mk
> > +++ b/mk/rte.app.mk
> > @@ -92,6 +92,9 @@ endif # ! CONFIG_RTE_BUILD_COMBINE_LIBS
> >
> > _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += -lpcap
> >
> > +ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
> > +_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lnuma
> > +
> > ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),n)
> > _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lfuse
> > endif
>
> An endif is missing.
After adding the endif and enabling the NUMA option, these errors appear:
lib/librte_vhost/virtio-net.c:535:21: error: ‘new_vq’ may be used uninitialized in this function
lib/librte_vhost/virtio-net.c:547:63: error: ‘new_ll_dev’ may be used uninitialized in this function
Tommy,
I won't review the code, but given it doesn't build, I can guess how it has
been reviewed.
Acked-by line is valuable only if the review is carefully done.
This patch series go back to lowest merge priority.
@@ -421,6 +421,7 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
#
CONFIG_RTE_LIBRTE_VHOST=n
CONFIG_RTE_LIBRTE_VHOST_USER=y
+CONFIG_RTE_LIBRTE_VHOST_NUMA=n
CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
#
@@ -46,6 +46,10 @@ CFLAGS += -I vhost_cuse -lfuse
LDFLAGS += -lfuse
endif
+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
+LDFLAGS += -lnuma
+endif
+
# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
@@ -38,6 +38,9 @@
#include <stdlib.h>
#include <sys/mman.h>
#include <unistd.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
#include <sys/socket.h>
@@ -481,6 +484,93 @@ set_vring_num(struct vhost_device_ctx ctx, struct vhost_vring_state *state)
}
/*
+ * Reallocate virtio_det and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+ int oldnode, newnode;
+ struct virtio_net_config_ll *old_ll_dev, *new_ll_dev;
+ struct vhost_virtqueue *old_vq, *new_vq;
+ int ret;
+ int realloc_dev = 0, realloc_vq = 0;
+
+ old_ll_dev = (struct virtio_net_config_ll *)dev;
+ old_vq = dev->virtqueue[index];
+
+ ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
+ MPOL_F_NODE | MPOL_F_ADDR);
+ ret = ret | get_mempolicy(&oldnode, NULL, 0, old_ll_dev,
+ MPOL_F_NODE | MPOL_F_ADDR);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Unable to get vring desc or dev numa information.\n");
+ return dev;
+ }
+ if (oldnode != newnode)
+ realloc_dev = 1;
+
+ ret = get_mempolicy(&oldnode, NULL, 0, old_vq,
+ MPOL_F_NODE | MPOL_F_ADDR);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Unable to get vq numa information.\n");
+ return dev;
+ }
+ if (oldnode != newnode)
+ realloc_vq = 1;
+
+ if (realloc_dev == 0 && realloc_vq == 0)
+ return dev;
+
+ if (realloc_dev)
+ new_ll_dev = rte_malloc_socket(NULL,
+ sizeof(struct virtio_net_config_ll), 0, newnode);
+ if (realloc_vq)
+ new_vq = rte_malloc_socket(NULL,
+ sizeof(struct vhost_virtqueue), 0, newnode);
+ if (!new_ll_dev || !new_vq) {
+ if (new_ll_dev)
+ rte_free(new_ll_dev);
+ if (new_vq)
+ rte_free(new_vq);
+ return dev;
+ }
+
+ if (realloc_vq)
+ memcpy(new_vq, old_vq, sizeof(*new_vq));
+ if (realloc_dev)
+ memcpy(new_ll_dev, old_ll_dev, sizeof(*new_ll_dev));
+ (new_ll_dev ? new_ll_dev : old_ll_dev)->dev.virtqueue[index] =
+ new_vq ? new_vq : old_vq;
+ if (realloc_vq)
+ rte_free(old_vq);
+ if (realloc_dev) {
+ if (ll_root == old_ll_dev)
+ ll_root = new_ll_dev;
+ else {
+ struct virtio_net_config_ll *prev = ll_root;
+ while (prev->next != old_ll_dev)
+ prev = prev->next;
+ prev->next = new_ll_dev;
+ new_ll_dev->next = old_ll_dev->next;
+ }
+ rte_free(old_ll_dev);
+ }
+
+ return &new_ll_dev->dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+ return dev;
+}
+#endif
+
+/*
* Called from CUSE IOCTL: VHOST_SET_VRING_ADDR
* The virtio device sends us the desc, used and avail ring addresses.
* This function then converts these to our address space.
@@ -508,6 +598,9 @@ set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
return -1;
}
+ dev = numa_realloc(dev, addr->index);
+ vq = dev->virtqueue[addr->index];
+
vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
addr->avail_user_addr);
if (vq->avail == 0) {
@@ -92,6 +92,9 @@ endif # ! CONFIG_RTE_BUILD_COMBINE_LIBS
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += -lpcap
+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lnuma
+
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),n)
_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lfuse
endif