From patchwork Mon Apr 15 07:56:23 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiao Wang X-Patchwork-Id: 52788 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 981231B10E; Mon, 15 Apr 2019 10:12:02 +0200 (CEST) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by dpdk.org (Postfix) with ESMTP id C41871B104 for ; Mon, 15 Apr 2019 10:12:00 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga107.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Apr 2019 01:12:00 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,352,1549958400"; d="scan'208";a="161962037" Received: from dpdk-xiao-1.sh.intel.com ([10.67.111.145]) by fmsmga004.fm.intel.com with ESMTP; 15 Apr 2019 01:11:58 -0700 From: Xiao Wang To: maxime.coquelin@redhat.com Cc: dev@dpdk.org, tiwei.bie@intel.com, zhihong.wang@intel.com, zhe.wan@intel.com, Xiao Wang Date: Mon, 15 Apr 2019 15:56:23 +0800 Message-Id: <20190415075625.109948-2-xiao.w.wang@intel.com> X-Mailer: git-send-email 2.15.1 In-Reply-To: <20190415075625.109948-1-xiao.w.wang@intel.com> References: <20190415075625.109948-1-xiao.w.wang@intel.com> Subject: [dpdk-dev] [PATCH 1/3] net/ifc: do not relay for Tx queue X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Relay for Tx queue doesn't provide any benefit, since the packet buffer in Tx queue is read-only, there's no dirty page logging for Tx buffer. This change further reduces CPU usage and improves throughput. Signed-off-by: Xiao Wang --- drivers/net/ifc/ifcvf_vdpa.c | 88 +++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index 921a7e058..619cdc245 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -66,6 +66,8 @@ struct ifcvf_internal { bool sw_fallback_running; /* mediated vring for sw fallback */ struct vring m_vring[IFCVF_MAX_QUEUES * 2]; + /* eventfd for used ring interrupt */ + int intr_fd[IFCVF_MAX_QUEUES * 2]; }; struct internal_list { @@ -334,7 +336,7 @@ vdpa_ifcvf_stop(struct ifcvf_internal *internal) #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1)) static int -vdpa_enable_vfio_intr(struct ifcvf_internal *internal) +vdpa_enable_vfio_intr(struct ifcvf_internal *internal, bool m_rx) { int ret; uint32_t i, nr_vring; @@ -342,6 +344,7 @@ vdpa_enable_vfio_intr(struct ifcvf_internal *internal) struct vfio_irq_set *irq_set; int *fd_ptr; struct rte_vhost_vring vring; + int fd; nr_vring = rte_vhost_get_vring_num(internal->vid); @@ -355,9 +358,22 @@ vdpa_enable_vfio_intr(struct ifcvf_internal *internal) fd_ptr = (int *)&irq_set->data; fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = internal->pdev->intr_handle.fd; + for (i = 0; i < nr_vring; i++) + internal->intr_fd[i] = -1; + for (i = 0; i < nr_vring; i++) { rte_vhost_get_vhost_vring(internal->vid, i, &vring); fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; + if ((i & 1) == 0 && m_rx == true) { + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + DRV_LOG(ERR, "can't setup eventfd: %s", + strerror(errno)); + return -1; + } + internal->intr_fd[i] = fd; + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; + } } ret = ioctl(internal->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); @@ -374,6 +390,7 @@ static int vdpa_disable_vfio_intr(struct ifcvf_internal *internal) { int ret; + uint32_t i, nr_vring; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -384,6 +401,13 @@ vdpa_disable_vfio_intr(struct ifcvf_internal *internal) irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; + nr_vring = rte_vhost_get_vring_num(internal->vid); + for (i = 0; i < nr_vring; i++) { + if (internal->intr_fd[i] >= 0) + close(internal->intr_fd[i]); + internal->intr_fd[i] = -1; + } + ret = ioctl(internal->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { DRV_LOG(ERR, "Error disabling MSI-X interrupts: %s", @@ -505,7 +529,7 @@ update_datapath(struct ifcvf_internal *internal) if (ret) goto err; - ret = vdpa_enable_vfio_intr(internal); + ret = vdpa_enable_vfio_intr(internal, 0); if (ret) goto err; @@ -591,9 +615,19 @@ m_ifcvf_start(struct ifcvf_internal *internal) } hw->vring[i].avail = gpa; - hw->vring[i].used = m_vring_iova + - (char *)internal->m_vring[i].used - - (char *)internal->m_vring[i].desc; + /* Direct I/O for Tx queue, relay for Rx queue */ + if (i & 1) { + gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used); + if (gpa == 0) { + DRV_LOG(ERR, "Fail to get GPA for used ring."); + return -1; + } + hw->vring[i].used = gpa; + } else { + hw->vring[i].used = m_vring_iova + + (char *)internal->m_vring[i].used - + (char *)internal->m_vring[i].desc; + } hw->vring[i].size = vq.size; @@ -647,35 +681,6 @@ m_ifcvf_stop(struct ifcvf_internal *internal) return 0; } -static int -m_enable_vfio_intr(struct ifcvf_internal *internal) -{ - uint32_t nr_vring; - struct rte_intr_handle *intr_handle = &internal->pdev->intr_handle; - int ret; - - nr_vring = rte_vhost_get_vring_num(internal->vid); - - ret = rte_intr_efd_enable(intr_handle, nr_vring); - if (ret) - return -1; - - ret = rte_intr_enable(intr_handle); - if (ret) - return -1; - - return 0; -} - -static void -m_disable_vfio_intr(struct ifcvf_internal *internal) -{ - struct rte_intr_handle *intr_handle = &internal->pdev->intr_handle; - - rte_intr_efd_disable(intr_handle); - rte_intr_disable(intr_handle); -} - static void update_used_ring(struct ifcvf_internal *internal, uint16_t qid) { @@ -689,7 +694,6 @@ vring_relay(void *arg) int i, vid, epfd, fd, nfds; struct ifcvf_internal *internal = (struct ifcvf_internal *)arg; struct rte_vhost_vring vring; - struct rte_intr_handle *intr_handle; uint16_t qid, q_num; struct epoll_event events[IFCVF_MAX_QUEUES * 4]; struct epoll_event ev; @@ -722,12 +726,12 @@ vring_relay(void *arg) } } - intr_handle = &internal->pdev->intr_handle; - for (qid = 0; qid < q_num; qid++) { + for (qid = 0; qid < q_num; qid += 2) { ev.events = EPOLLIN | EPOLLPRI; + /* leave a flag to mark it's for interrupt */ ev.data.u64 = 1 | qid << 1 | - (uint64_t)intr_handle->efds[qid] << 32; - if (epoll_ctl(epfd, EPOLL_CTL_ADD, intr_handle->efds[qid], &ev) + (uint64_t)internal->intr_fd[qid] << 32; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev) < 0) { DRV_LOG(ERR, "epoll add error: %s", strerror(errno)); return NULL; @@ -824,7 +828,7 @@ ifcvf_sw_fallback_switchover(struct ifcvf_internal *internal) goto error; /* set up interrupt for interrupt relay */ - ret = m_enable_vfio_intr(internal); + ret = vdpa_enable_vfio_intr(internal, 1); if (ret) goto unmap; @@ -847,7 +851,7 @@ ifcvf_sw_fallback_switchover(struct ifcvf_internal *internal) stop_vf: m_ifcvf_stop(internal); unset_intr: - m_disable_vfio_intr(internal); + vdpa_disable_vfio_intr(internal); unmap: ifcvf_dma_map(internal, 0); error: @@ -903,7 +907,7 @@ ifcvf_dev_close(int vid) m_ifcvf_stop(internal); /* remove interrupt setting */ - m_disable_vfio_intr(internal); + vdpa_disable_vfio_intr(internal); /* unset DMA map for guest memory */ ifcvf_dma_map(internal, 0); From patchwork Mon Apr 15 07:56:24 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiao Wang X-Patchwork-Id: 52789 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id BA4D51B116; Mon, 15 Apr 2019 10:12:07 +0200 (CEST) Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id 3D99A1B115 for ; Mon, 15 Apr 2019 10:12:06 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Apr 2019 01:12:05 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,352,1549958400"; d="scan'208";a="161962067" Received: from dpdk-xiao-1.sh.intel.com ([10.67.111.145]) by fmsmga004.fm.intel.com with ESMTP; 15 Apr 2019 01:12:04 -0700 From: Xiao Wang To: maxime.coquelin@redhat.com Cc: dev@dpdk.org, tiwei.bie@intel.com, zhihong.wang@intel.com, zhe.wan@intel.com, Xiao Wang Date: Mon, 15 Apr 2019 15:56:24 +0800 Message-Id: <20190415075625.109948-3-xiao.w.wang@intel.com> X-Mailer: git-send-email 2.15.1 In-Reply-To: <20190415075625.109948-1-xiao.w.wang@intel.com> References: <20190415075625.109948-1-xiao.w.wang@intel.com> Subject: [dpdk-dev] [PATCH 2/3] net/ifc: fix mediated vring initialization X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The mediated vring's index should be set as initial value before enabling HW datapath. Fixes: 4bb531e152d3 ("net/ifc: support SW assisted VDPA live migration") Signed-off-by: Xiao Wang --- drivers/net/ifc/ifcvf_vdpa.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index 619cdc245..9e729ff72 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -631,6 +631,10 @@ m_ifcvf_start(struct ifcvf_internal *internal) hw->vring[i].size = vq.size; + rte_vhost_get_vring_base(vid, i, + &internal->m_vring[i].avail->idx, + &internal->m_vring[i].used->idx); + rte_vhost_get_vring_base(vid, i, &hw->vring[i].last_avail_idx, &hw->vring[i].last_used_idx); @@ -702,11 +706,6 @@ vring_relay(void *arg) vid = internal->vid; q_num = rte_vhost_get_vring_num(vid); - /* prepare the mediated vring */ - for (qid = 0; qid < q_num; qid++) - rte_vhost_get_vring_base(vid, qid, - &internal->m_vring[qid].avail->idx, - &internal->m_vring[qid].used->idx); /* add notify fd and interrupt fd to epoll */ epfd = epoll_create(IFCVF_MAX_QUEUES * 2); From patchwork Mon Apr 15 07:56:25 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiao Wang X-Patchwork-Id: 52790 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 341121B128; Mon, 15 Apr 2019 10:12:12 +0200 (CEST) Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id 84BEF1B11A for ; Mon, 15 Apr 2019 10:12:10 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 Apr 2019 01:12:10 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,352,1549958400"; d="scan'208";a="161962082" Received: from dpdk-xiao-1.sh.intel.com ([10.67.111.145]) by fmsmga004.fm.intel.com with ESMTP; 15 Apr 2019 01:12:09 -0700 From: Xiao Wang To: maxime.coquelin@redhat.com Cc: dev@dpdk.org, tiwei.bie@intel.com, zhihong.wang@intel.com, zhe.wan@intel.com, Xiao Wang Date: Mon, 15 Apr 2019 15:56:25 +0800 Message-Id: <20190415075625.109948-4-xiao.w.wang@intel.com> X-Mailer: git-send-email 2.15.1 In-Reply-To: <20190415075625.109948-1-xiao.w.wang@intel.com> References: <20190415075625.109948-1-xiao.w.wang@intel.com> Subject: [dpdk-dev] [PATCH 3/3] net/ifc: fix used ring update X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The vring relay thread is created after HW datapath start and is canceled before HW datapath stop, so we need to take care of the ring update when the relay thread is not on duty. Fixes: 4bb531e152d3 ("net/ifc: support SW assisted VDPA live migration") Signed-off-by: Xiao Wang --- drivers/net/ifc/ifcvf_vdpa.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index 9e729ff72..e59084034 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -81,6 +81,8 @@ static struct internal_list_head internal_list = static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; +static void update_used_ring(struct ifcvf_internal *internal, uint16_t qid); + static struct internal_list * find_internal_resource_by_did(int did) { @@ -666,6 +668,10 @@ m_ifcvf_stop(struct ifcvf_internal *internal) ifcvf_stop_hw(hw); for (i = 0; i < hw->nr_vring; i++) { + /* synchronize remaining new used entries if any */ + if ((i & 1) == 0) + update_used_ring(internal, i); + rte_vhost_get_vhost_vring(vid, i, &vq); len = IFCVF_USED_RING_LEN(vq.size); rte_vhost_log_used_vring(vid, i, 0, len); @@ -735,6 +741,7 @@ vring_relay(void *arg) DRV_LOG(ERR, "epoll add error: %s", strerror(errno)); return NULL; } + update_used_ring(internal, qid); } /* start relay with a first kick */