From patchwork Thu Aug 31 09:50:23 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Maxime Coquelin <maxime.coquelin@redhat.com>
X-Patchwork-Id: 28141
X-Patchwork-Delegate: yuanhan.liu@linux.intel.com
Return-Path: <dev-bounces@dpdk.org>
X-Original-To: patchwork@dpdk.org
Delivered-To: patchwork@dpdk.org
Received: from [92.243.14.124] (localhost [IPv6:::1])
	by dpdk.org (Postfix) with ESMTP id 484343770;
	Thu, 31 Aug 2017 11:52:57 +0200 (CEST)
Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28])
	by dpdk.org (Postfix) with ESMTP id E0F4C37A8
	for <dev@dpdk.org>; Thu, 31 Aug 2017 11:52:55 +0200 (CEST)
Received: from smtp.corp.redhat.com
	(int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12])
	(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
	(No client certificate requested)
	by mx1.redhat.com (Postfix) with ESMTPS id 3579BC0587E5;
	Thu, 31 Aug 2017 09:52:55 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 3579BC0587E5
Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com;
	dmarc=none (p=none dis=none) header.from=redhat.com
Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com;
	spf=fail smtp.mailfrom=maxime.coquelin@redhat.com
Received: from localhost.localdomain (ovpn-112-32.ams2.redhat.com
	[10.36.112.32])
	by smtp.corp.redhat.com (Postfix) with ESMTP id 6FE55841B0;
	Thu, 31 Aug 2017 09:52:50 +0000 (UTC)
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: dev@dpdk.org, yliu@fridaylinux.org, jfreiman@redhat.com,
	tiwei.bie@intel.com
Cc: mst@redhat.com, vkaplans@redhat.com, jasowang@redhat.com,
	Maxime Coquelin <maxime.coquelin@redhat.com>
Date: Thu, 31 Aug 2017 11:50:23 +0200
Message-Id: <20170831095023.21037-22-maxime.coquelin@redhat.com>
In-Reply-To: <20170831095023.21037-1-maxime.coquelin@redhat.com>
References: <20170831095023.21037-1-maxime.coquelin@redhat.com>
X-Scanned-By: MIMEDefang 2.79 on 10.5.11.12
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16
	(mx1.redhat.com [10.5.110.32]); Thu, 31 Aug 2017 09:52:55 +0000 (UTC)
Subject: [dpdk-dev] [PATCH 21/21] vhost: iotlb: reduce iotlb read lock usage
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
	<mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
	<mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org
Sender: "dev" <dev-bounces@dpdk.org>

Prior to this patch, iotlb cache's read/write lock was
read-locked at every guest IOVA to app VA translation,
i.e. at least once per packet with indirect off and twice
with indirect on.

The problem is that rte_rwlock_read_lock() makes use of atomic
operation, which is costly.

This patch introduces iotlb lock helpers, so that a full burst
can be protected with taking the lock once, which reduces the
number of atomic operations by up to 64 with indirect
descriptors.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/iotlb.c      | 22 +++++++++++-----------
 lib/librte_vhost/iotlb.h      | 14 ++++++++++++++
 lib/librte_vhost/vhost.h      |  1 +
 lib/librte_vhost/virtio_net.c | 22 ++++++++++++++++++++++
 4 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
index d014bfe98..7dca95281 100644
--- a/lib/librte_vhost/iotlb.c
+++ b/lib/librte_vhost/iotlb.c
@@ -55,14 +55,14 @@ static void vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
 {
 	struct vhost_iotlb_entry *node, *temp_node;
 
-	rte_rwlock_write_lock(&vq->iotlb_lock);
+	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
 
 	TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
 		rte_mempool_put(vq->iotlb_pool, node);
 	}
 
-	rte_rwlock_write_unlock(&vq->iotlb_lock);
+	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
 }
 
 int vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
@@ -71,7 +71,7 @@ int vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
 	struct vhost_iotlb_entry *node;
 	int found = 0;
 
-	rte_rwlock_read_lock(&vq->iotlb_lock);
+	rte_rwlock_read_lock(&vq->iotlb_pending_lock);
 
 	TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
 		if ((node->iova == iova) && (node->perm == perm)) {
@@ -80,7 +80,7 @@ int vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
 		}
 	}
 
-	rte_rwlock_read_unlock(&vq->iotlb_lock);
+	rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
 
 	return found;
 }
@@ -105,11 +105,11 @@ void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq,
 	node->iova = iova;
 	node->perm = perm;
 
-	rte_rwlock_write_lock(&vq->iotlb_lock);
+	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
 
 	TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
 
-	rte_rwlock_write_unlock(&vq->iotlb_lock);
+	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
 }
 
 static void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
@@ -117,7 +117,8 @@ static void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
 {
 	struct vhost_iotlb_entry *node, *temp_node;
 
-	/* .iotlb_lock already locked by the caller */
+	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
+
 	TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
 		if (node->iova < iova)
 			continue;
@@ -128,6 +129,8 @@ static void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
 		rte_mempool_put(vq->iotlb_pool, node);
 	}
+
+	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
 }
 
 static void vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
@@ -226,8 +229,6 @@ uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
 	if (unlikely(!*size))
 		goto out;
 
-	rte_rwlock_read_lock(&vq->iotlb_lock);
-
 	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
 		/* List sorted by iova */
 		if (unlikely(iova < node->iova))
@@ -252,8 +253,6 @@ uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
 			break;
 	}
 
-	rte_rwlock_read_unlock(&vq->iotlb_lock);
-
 out:
 	/* Only part of the requested chunk is mapped */
 	if (unlikely(mapped < *size))
@@ -285,6 +284,7 @@ int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
 		socket = 0;
 
 	rte_rwlock_init(&vq->iotlb_lock);
+	rte_rwlock_init(&vq->iotlb_pending_lock);
 
 	TAILQ_INIT(&vq->iotlb_list);
 	TAILQ_INIT(&vq->iotlb_pending_list);
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
index 4be1f7e85..d70c05a70 100644
--- a/lib/librte_vhost/iotlb.h
+++ b/lib/librte_vhost/iotlb.h
@@ -34,6 +34,20 @@
 #define _VHOST_IOTLB_H_
 
 #include "vhost.h"
+
+
+static __rte_always_inline void
+vhost_user_iotlb_rd_lock(struct vhost_virtqueue *vq)
+{
+	rte_rwlock_read_lock(&vq->iotlb_lock);
+}
+
+static __rte_always_inline void
+vhost_user_iotlb_rd_unlock(struct vhost_virtqueue *vq)
+{
+	rte_rwlock_read_unlock(&vq->iotlb_lock);
+}
+
 void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
 					uint64_t uaddr, uint64_t size,
 					uint8_t perm);
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 52bbc9a1c..008fc2ada 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -119,6 +119,7 @@ struct vhost_virtqueue {
 	struct vhost_vring_addr ring_addrs;
 
 	rte_rwlock_t	iotlb_lock;
+	rte_rwlock_t	iotlb_pending_lock;
 	struct rte_mempool *iotlb_pool;
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
 	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 1bd21330e..799e12d2c 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -45,6 +45,7 @@
 #include <rte_sctp.h>
 #include <rte_arp.h>
 
+#include "iotlb.h"
 #include "vhost.h"
 
 #define MAX_PKT_BURST 32
@@ -306,6 +307,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	}
 
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
 	for (i = 0; i < count; i++) {
 		uint16_t desc_idx = desc_indexes[i];
 		int err;
@@ -338,6 +343,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
 	}
 
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+
 	rte_smp_wmb();
 
 	*(volatile uint16_t *)&vq->used->idx += count;
@@ -574,6 +582,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	vq->shadow_used_idx = 0;
 	avail_head = *((volatile uint16_t *)&vq->avail->idx);
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
 
@@ -600,6 +612,9 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		vq->last_avail_idx += num_buffers;
 	}
 
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+
 	if (likely(vq->shadow_used_idx)) {
 		flush_shadow_used_ring(dev, vq);
 
@@ -1143,6 +1158,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	/* Prefetch descriptor index. */
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
 	for (i = 0; i < count; i++) {
 		struct vring_desc *desc;
 		uint16_t sz, idx;
@@ -1206,6 +1225,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
 		}
 	}
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+
 	vq->last_avail_idx += i;
 
 	if (likely(dev->dequeue_zero_copy == 0)) {