From patchwork Fri Jun 5 03:13:25 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Huawei Xie X-Patchwork-Id: 5154 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 84F49C324; Fri, 5 Jun 2015 05:13:45 +0200 (CEST) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id E6C005A06 for ; Fri, 5 Jun 2015 05:13:42 +0200 (CEST) Received: from orsmga001.jf.intel.com ([10.7.209.18]) by fmsmga103.fm.intel.com with ESMTP; 04 Jun 2015 20:13:42 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.13,555,1427785200"; d="scan'208";a="705676009" Received: from shvmail01.sh.intel.com ([10.239.29.42]) by orsmga001.jf.intel.com with ESMTP; 04 Jun 2015 20:13:39 -0700 Received: from shecgisg003.sh.intel.com (shecgisg003.sh.intel.com [10.239.29.90]) by shvmail01.sh.intel.com with ESMTP id t553DZdL031498; Fri, 5 Jun 2015 11:13:35 +0800 Received: from shecgisg003.sh.intel.com (localhost [127.0.0.1]) by shecgisg003.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id t553DX4e000645; Fri, 5 Jun 2015 11:13:35 +0800 Received: (from hxie5@localhost) by shecgisg003.sh.intel.com (8.13.6/8.13.6/Submit) id t553DWOg000641; Fri, 5 Jun 2015 11:13:32 +0800 From: Huawei Xie To: dev@dpdk.org Date: Fri, 5 Jun 2015 11:13:25 +0800 Message-Id: <1433474005-597-3-git-send-email-huawei.xie@intel.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1433474005-597-1-git-send-email-huawei.xie@intel.com> References: <1432778661-25428-1-git-send-email-huawei.xie@intel.com> <1433474005-597-1-git-send-email-huawei.xie@intel.com> Subject: [dpdk-dev] [PATCH 2/2] vhost: realloc virtio_net and virtqueue to the same node of vring desc table X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" When we get the address of vring descriptor table in VHOST_SET_VRING_ADDR message, will try to reallocate virtio_net device and virtqueue to the same numa node. Signed-off-by: Huawei Xie --- config/common_linuxapp | 1 + lib/librte_vhost/Makefile | 4 ++ lib/librte_vhost/virtio-net.c | 93 +++++++++++++++++++++++++++++++++++++++++++ mk/rte.app.mk | 3 ++ 4 files changed, 101 insertions(+) diff --git a/config/common_linuxapp b/config/common_linuxapp index 0078dc9..4ace24e 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -421,6 +421,7 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n # CONFIG_RTE_LIBRTE_VHOST=n CONFIG_RTE_LIBRTE_VHOST_USER=y +CONFIG_RTE_LIBRTE_VHOST_NUMA=n CONFIG_RTE_LIBRTE_VHOST_DEBUG=n # diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index a8645a6..6681f22 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -46,6 +46,10 @@ CFLAGS += -I vhost_cuse -lfuse LDFLAGS += -lfuse endif +ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) +LDFLAGS += -lnuma +endif + # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y) diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c index 19b74d6..8a80f5e 100644 --- a/lib/librte_vhost/virtio-net.c +++ b/lib/librte_vhost/virtio-net.c @@ -38,6 +38,9 @@ #include #include #include +#ifdef RTE_LIBRTE_VHOST_NUMA +#include +#endif #include @@ -481,6 +484,93 @@ set_vring_num(struct vhost_device_ctx ctx, struct vhost_vring_state *state) } /* + * Reallocate virtio_det and vhost_virtqueue data structure to make them on the + * same numa node as the memory of vring descriptor. + */ +#ifdef RTE_LIBRTE_VHOST_NUMA +static struct virtio_net* +numa_realloc(struct virtio_net *dev, int index) +{ + int oldnode, newnode; + struct virtio_net_config_ll *old_ll_dev, *new_ll_dev; + struct vhost_virtqueue *old_vq, *new_vq; + int ret; + int realloc_dev = 0, realloc_vq = 0; + + old_ll_dev = (struct virtio_net_config_ll *)dev; + old_vq = dev->virtqueue[index]; + + ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc, + MPOL_F_NODE | MPOL_F_ADDR); + ret = ret | get_mempolicy(&oldnode, NULL, 0, old_ll_dev, + MPOL_F_NODE | MPOL_F_ADDR); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, + "Unable to get vring desc or dev numa information.\n"); + return dev; + } + if (oldnode != newnode) + realloc_dev = 1; + + ret = get_mempolicy(&oldnode, NULL, 0, old_vq, + MPOL_F_NODE | MPOL_F_ADDR); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, + "Unable to get vq numa information.\n"); + return dev; + } + if (oldnode != newnode) + realloc_vq = 1; + + if (realloc_dev == 0 && realloc_vq == 0) + return dev; + + if (realloc_dev) + new_ll_dev = rte_malloc_socket(NULL, + sizeof(struct virtio_net_config_ll), 0, newnode); + if (realloc_vq) + new_vq = rte_malloc_socket(NULL, + sizeof(struct vhost_virtqueue), 0, newnode); + if (!new_ll_dev || !new_vq) { + if (new_ll_dev) + rte_free(new_ll_dev); + if (new_vq) + rte_free(new_vq); + return dev; + } + + if (realloc_vq) + memcpy(new_vq, old_vq, sizeof(*new_vq)); + if (realloc_dev) + memcpy(new_ll_dev, old_ll_dev, sizeof(*new_ll_dev)); + (new_ll_dev ? new_ll_dev : old_ll_dev)->dev.virtqueue[index] = + new_vq ? new_vq : old_vq; + if (realloc_vq) + rte_free(old_vq); + if (realloc_dev) { + if (ll_root == old_ll_dev) + ll_root = new_ll_dev; + else { + struct virtio_net_config_ll *prev = ll_root; + while (prev->next != old_ll_dev) + prev = prev->next; + prev->next = new_ll_dev; + new_ll_dev->next = old_ll_dev->next; + } + rte_free(old_ll_dev); + } + + return &new_ll_dev->dev; +} +#else +static struct virtio_net* +numa_realloc(struct virtio_net *dev, int index __rte_unused) +{ + return dev; +} +#endif + +/* * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR * The virtio device sends us the desc, used and avail ring addresses. * This function then converts these to our address space. @@ -508,6 +598,9 @@ set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr) return -1; } + dev = numa_realloc(dev, addr->index); + vq = dev->virtqueue[addr->index]; + vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev, addr->avail_user_addr); if (vq->avail == 0) { diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 1a2043a..5aba56a 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -92,6 +92,9 @@ endif # ! CONFIG_RTE_BUILD_COMBINE_LIBS _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += -lpcap +ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) +_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lnuma + ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),n) _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST) += -lfuse endif