From patchwork Wed Apr  5 12:40:52 2023
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Eelco Chaudron <echaudro@redhat.com>
X-Patchwork-Id: 125798
X-Patchwork-Delegate: thomas@monjalon.net
Return-Path: <dev-bounces@dpdk.org>
X-Original-To: patchwork@inbox.dpdk.org
Delivered-To: patchwork@inbox.dpdk.org
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 7B635428D3;
	Wed,  5 Apr 2023 14:41:09 +0200 (CEST)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id 6ADF541611;
	Wed,  5 Apr 2023 14:41:09 +0200 (CEST)
Received: from us-smtp-delivery-124.mimecast.com
 (us-smtp-delivery-124.mimecast.com [170.10.129.124])
 by mails.dpdk.org (Postfix) with ESMTP id 6B2DF41133
 for <dev@dpdk.org>; Wed,  5 Apr 2023 14:41:07 +0200 (CEST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;
 s=mimecast20190719; t=1680698466;
 h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
 to:to:cc:cc:mime-version:mime-version:content-type:content-type:
 content-transfer-encoding:content-transfer-encoding:
 in-reply-to:in-reply-to:references:references;
 bh=ARoBOb++lZSPQind4w9PgMGw0a6uRSrRtasW6xfWeKM=;
 b=BG207yfLABw6/mDGP8NgeCzMgwXNvYu6fK6c6D4587/NGCQlN2O9edLksHxhrx1BKZYvu1
 kIuSzTcJLoRYywupWekA7eNYnhAybDqaRi+LptzLOrnSbOUzc1Z3BoiOJ2hy1XcrsGy2zf
 nGQWC21ZTaXjOww+dlGqqDcuNS+GfgQ=
Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com
 [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS
 (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id
 us-mta-647-lK6IznM4PnKpVEKztXBCIQ-1; Wed, 05 Apr 2023 08:41:03 -0400
X-MC-Unique: lK6IznM4PnKpVEKztXBCIQ-1
Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com
 [10.11.54.5])
 (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
 (No client certificate requested)
 by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 7466985A588;
 Wed,  5 Apr 2023 12:41:03 +0000 (UTC)
Received: from localhost.localdomain (unknown [10.39.193.129])
 by smtp.corp.redhat.com (Postfix) with ESMTP id AED8F440BC;
 Wed,  5 Apr 2023 12:41:02 +0000 (UTC)
From: Eelco Chaudron <echaudro@redhat.com>
To: maxime.coquelin@redhat.com,
	chenbo.xia@intel.com
Cc: dev@dpdk.org
Subject: [PATCH v2 1/3] vhost: Change vhost_virtqueue access lock to a
 read/write one.
Date: Wed,  5 Apr 2023 14:40:52 +0200
Message-Id: <168069842756.833254.13949971045184398628.stgit@ebuild.local>
In-Reply-To: <168069838578.833254.4856666346839028593.stgit@ebuild.local>
References: <168069838578.833254.4856666346839028593.stgit@ebuild.local>
User-Agent: StGit/1.5
MIME-Version: 1.0
X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5
X-Mimecast-Spam-Score: 0
X-Mimecast-Originator: redhat.com
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

This change will allow the vhost interrupt datapath handling to be split
between two processed without one of them holding an explicit lock.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/eal/include/generic/rte_rwlock.h |   17 ++++++
 lib/vhost/vhost.c                    |   46 +++++++++--------
 lib/vhost/vhost.h                    |    4 +-
 lib/vhost/vhost_user.c               |   14 +++--
 lib/vhost/virtio_net.c               |   90 +++++++++++++++++-----------------
 5 files changed, 94 insertions(+), 77 deletions(-)

diff --git a/lib/eal/include/generic/rte_rwlock.h b/lib/eal/include/generic/rte_rwlock.h
index d45c22c189..3a5f0e568f 100644
--- a/lib/eal/include/generic/rte_rwlock.h
+++ b/lib/eal/include/generic/rte_rwlock.h
@@ -236,6 +236,23 @@ rte_rwlock_write_unlock(rte_rwlock_t *rwl)
 	__atomic_fetch_sub(&rwl->cnt, RTE_RWLOCK_WRITE, __ATOMIC_RELEASE);
 }
 
+/**
+ * Test if the write lock is taken.
+ *
+ * @param rwl
+ *   A pointer to a rwlock structure.
+ * @return
+ *   1 if the write lock is currently taken; 0 otherwise.
+ */
+static inline int
+rte_rwlock_write_is_locked(rte_rwlock_t *rwl)
+{
+	if (__atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED) & RTE_RWLOCK_WRITE)
+		return 1;
+
+	return 0;
+}
+
 /**
  * Try to execute critical section in a hardware memory transaction, if it
  * fails or not available take a read lock
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index ef37943817..74bdbfd810 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -393,9 +393,9 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	else
 		rte_free(vq->shadow_used_split);
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_write_lock(&vq->access_lock);
 	vhost_free_async_mem(vq);
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 	rte_free(vq->batch_copy_elems);
 	vhost_user_iotlb_destroy(vq);
 	rte_free(vq->log_cache);
@@ -630,7 +630,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 
 		dev->virtqueue[i] = vq;
 		init_vring_queue(dev, vq, i);
-		rte_spinlock_init(&vq->access_lock);
+		rte_rwlock_init(&vq->access_lock);
 		vq->avail_wrap_counter = 1;
 		vq->used_wrap_counter = 1;
 		vq->signalled_used_valid = false;
@@ -1305,14 +1305,14 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
 	if (!vq)
 		return -1;
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_read_lock(&vq->access_lock);
 
 	if (vq_is_packed(dev))
 		vhost_vring_call_packed(dev, vq);
 	else
 		vhost_vring_call_split(dev, vq);
 
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_read_unlock(&vq->access_lock);
 
 	return 0;
 }
@@ -1334,7 +1334,7 @@ rte_vhost_vring_call_nonblock(int vid, uint16_t vring_idx)
 	if (!vq)
 		return -1;
 
-	if (!rte_spinlock_trylock(&vq->access_lock))
+	if (rte_rwlock_read_trylock(&vq->access_lock))
 		return -EAGAIN;
 
 	if (vq_is_packed(dev))
@@ -1342,7 +1342,7 @@ rte_vhost_vring_call_nonblock(int vid, uint16_t vring_idx)
 	else
 		vhost_vring_call_split(dev, vq);
 
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_read_unlock(&vq->access_lock);
 
 	return 0;
 }
@@ -1365,7 +1365,7 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
 	if (!vq)
 		return 0;
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_write_lock(&vq->access_lock);
 
 	if (unlikely(!vq->enabled || vq->avail == NULL))
 		goto out;
@@ -1373,7 +1373,7 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
 	ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
 
 out:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 	return ret;
 }
 
@@ -1457,12 +1457,12 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 	if (!vq)
 		return -1;
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_write_lock(&vq->access_lock);
 
 	vq->notif_enable = enable;
 	ret = vhost_enable_guest_notification(dev, vq, enable);
 
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 
 	return ret;
 }
@@ -1520,7 +1520,7 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid)
 	if (vq == NULL)
 		return 0;
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_write_lock(&vq->access_lock);
 
 	if (unlikely(!vq->enabled || vq->avail == NULL))
 		goto out;
@@ -1528,7 +1528,7 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid)
 	ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
 
 out:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 	return ret;
 }
 
@@ -1757,9 +1757,9 @@ rte_vhost_async_channel_register(int vid, uint16_t queue_id)
 	if (unlikely(vq == NULL || !dev->async_copy || dev->vdpa_dev != NULL))
 		return -1;
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_write_lock(&vq->access_lock);
 	ret = async_channel_register(dev, vq);
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 
 	return ret;
 }
@@ -1804,7 +1804,7 @@ rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
 	if (vq == NULL)
 		return ret;
 
-	if (!rte_spinlock_trylock(&vq->access_lock)) {
+	if (rte_rwlock_write_trylock(&vq->access_lock)) {
 		VHOST_LOG_CONFIG(dev->ifname, ERR,
 			"failed to unregister async channel, virtqueue busy.\n");
 		return ret;
@@ -1821,7 +1821,7 @@ rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
 		ret = 0;
 	}
 
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 
 	return ret;
 }
@@ -1954,7 +1954,7 @@ rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
 	if (vq == NULL)
 		return ret;
 
-	if (!rte_spinlock_trylock(&vq->access_lock)) {
+	if (rte_rwlock_write_trylock(&vq->access_lock)) {
 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
 			"failed to check in-flight packets. virtqueue busy.\n");
 		return ret;
@@ -1963,7 +1963,7 @@ rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
 	if (vq->async)
 		ret = vq->async->pkts_inflight_n;
 
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 
 	return ret;
 }
@@ -2084,13 +2084,13 @@ rte_vhost_vring_stats_get(int vid, uint16_t queue_id,
 
 	vq = dev->virtqueue[queue_id];
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_write_lock(&vq->access_lock);
 	for (i = 0; i < VHOST_NB_VQ_STATS; i++) {
 		stats[i].value =
 			*(uint64_t *)(((char *)vq) + vhost_vq_stat_strings[i].offset);
 		stats[i].id = i;
 	}
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 
 	return VHOST_NB_VQ_STATS;
 }
@@ -2111,9 +2111,9 @@ int rte_vhost_vring_stats_reset(int vid, uint16_t queue_id)
 
 	vq = dev->virtqueue[queue_id];
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_write_lock(&vq->access_lock);
 	memset(&vq->stats, 0, sizeof(vq->stats));
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 
 	return 0;
 }
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 8fdab13c70..5c939ef06f 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -277,7 +277,7 @@ struct vhost_virtqueue {
 	bool			access_ok;
 	bool			ready;
 
-	rte_spinlock_t		access_lock;
+	rte_rwlock_t		access_lock;
 
 
 	union {
@@ -517,7 +517,7 @@ static inline void
 vq_assert_lock__(struct virtio_net *dev, struct vhost_virtqueue *vq, const char *func)
 	__rte_assert_exclusive_lock(&vq->access_lock)
 {
-	if (unlikely(!rte_spinlock_is_locked(&vq->access_lock)))
+	if (unlikely(!rte_rwlock_write_is_locked(&vq->access_lock)))
 		rte_panic("VHOST_CONFIG: (%s) %s() called without access lock taken.\n",
 			dev->ifname, func);
 }
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index d60e39b6bc..c9454ce3d9 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -399,7 +399,7 @@ vhost_user_set_features(struct virtio_net **pdev,
 			cleanup_vq_inflight(dev, vq);
 			/* vhost_user_lock_all_queue_pairs locked all qps */
 			vq_assert_lock(dev, vq);
-			rte_spinlock_unlock(&vq->access_lock);
+			rte_rwlock_write_unlock(&vq->access_lock);
 			free_vq(dev, vq);
 		}
 	}
@@ -2649,10 +2649,10 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
 					len, imsg->perm);
 
 			if (is_vring_iotlb(dev, vq, imsg)) {
-				rte_spinlock_lock(&vq->access_lock);
+				rte_rwlock_write_lock(&vq->access_lock);
 				translate_ring_addresses(&dev, &vq);
 				*pdev = dev;
-				rte_spinlock_unlock(&vq->access_lock);
+				rte_rwlock_write_unlock(&vq->access_lock);
 			}
 		}
 		break;
@@ -2667,9 +2667,9 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
 					imsg->size);
 
 			if (is_vring_iotlb(dev, vq, imsg)) {
-				rte_spinlock_lock(&vq->access_lock);
+				rte_rwlock_write_lock(&vq->access_lock);
 				vring_invalidate(dev, vq);
-				rte_spinlock_unlock(&vq->access_lock);
+				rte_rwlock_write_unlock(&vq->access_lock);
 			}
 		}
 		break;
@@ -3030,7 +3030,7 @@ vhost_user_lock_all_queue_pairs(struct virtio_net *dev)
 		struct vhost_virtqueue *vq = dev->virtqueue[i];
 
 		if (vq) {
-			rte_spinlock_lock(&vq->access_lock);
+			rte_rwlock_write_lock(&vq->access_lock);
 			vq_num++;
 		}
 		i++;
@@ -3048,7 +3048,7 @@ vhost_user_unlock_all_queue_pairs(struct virtio_net *dev)
 		struct vhost_virtqueue *vq = dev->virtqueue[i];
 
 		if (vq) {
-			rte_spinlock_unlock(&vq->access_lock);
+			rte_rwlock_write_unlock(&vq->access_lock);
 			vq_num++;
 		}
 		i++;
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index be28ea5151..267faaa4fd 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -55,7 +55,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
 static inline void
 vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mbuf **pkts, uint16_t count)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	struct virtqueue_stats *stats = &vq->stats;
 	int i;
@@ -102,7 +102,7 @@ static __rte_always_inline int64_t
 vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx,
 		struct vhost_iov_iter *pkt)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id];
 	uint16_t ring_mask = dma_info->ring_mask;
@@ -152,7 +152,7 @@ static __rte_always_inline uint16_t
 vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		int16_t dma_id, uint16_t vchan_id, uint16_t head_idx,
 		struct vhost_iov_iter *pkts, uint16_t nr_pkts)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id];
 	int64_t ret, nr_copies = 0;
@@ -454,7 +454,7 @@ vhost_async_shadow_enqueue_packed_batch(struct vhost_virtqueue *vq,
 
 static __rte_always_inline void
 vhost_async_shadow_dequeue_packed_batch(struct vhost_virtqueue *vq, uint16_t *ids)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	uint16_t i;
 	struct vhost_async *async = vq->async;
@@ -1131,7 +1131,7 @@ static __rte_always_inline int
 async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mbuf *m, uint32_t mbuf_offset,
 		uint64_t buf_iova, uint32_t cpy_len, bool to_desc)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	struct vhost_async *async = vq->async;
@@ -1211,7 +1211,7 @@ static __rte_always_inline int
 mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mbuf *m, struct buf_vector *buf_vec,
 		uint16_t nr_vec, uint16_t num_buffers, bool is_async)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	uint32_t vec_idx = 0;
@@ -1341,7 +1341,7 @@ vhost_enqueue_single_packed(struct virtio_net *dev,
 			    struct rte_mbuf *pkt,
 			    struct buf_vector *buf_vec,
 			    uint16_t *nr_descs)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	uint16_t nr_vec = 0;
@@ -1403,7 +1403,7 @@ vhost_enqueue_single_packed(struct virtio_net *dev,
 static __rte_noinline uint32_t
 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mbuf **pkts, uint32_t count)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	uint32_t pkt_idx = 0;
@@ -1635,7 +1635,7 @@ static __rte_always_inline int16_t
 virtio_dev_rx_single_packed(struct virtio_net *dev,
 			    struct vhost_virtqueue *vq,
 			    struct rte_mbuf *pkt)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
@@ -1661,7 +1661,7 @@ virtio_dev_rx_packed(struct virtio_net *dev,
 		     struct vhost_virtqueue *__rte_restrict vq,
 		     struct rte_mbuf **__rte_restrict pkts,
 		     uint32_t count)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	uint32_t pkt_idx = 0;
@@ -1701,7 +1701,7 @@ virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	uint32_t nb_tx = 0;
 
 	VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__);
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_read_lock(&vq->access_lock);
 
 	if (unlikely(!vq->enabled))
 		goto out_access_unlock;
@@ -1727,7 +1727,7 @@ virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_read_unlock(&vq->access_lock);
 
 	return nb_tx;
 }
@@ -1760,7 +1760,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 
 static __rte_always_inline uint16_t
 async_get_first_inflight_pkt_idx(struct vhost_virtqueue *vq)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	struct vhost_async *async = vq->async;
 
@@ -2165,7 +2165,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue
 
 static __rte_always_inline void
 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	struct vhost_async *async = vq->async;
 	uint16_t nr_left = n_descs;
@@ -2198,7 +2198,7 @@ write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)
 static __rte_always_inline void
 write_back_completed_descs_packed(struct vhost_virtqueue *vq,
 				uint16_t n_buffers)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	struct vhost_async *async = vq->async;
 	uint16_t from = async->last_buffer_idx_packed;
@@ -2263,7 +2263,7 @@ write_back_completed_descs_packed(struct vhost_virtqueue *vq,
 static __rte_always_inline uint16_t
 vhost_poll_enqueue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mbuf **pkts, uint16_t count, int16_t dma_id, uint16_t vchan_id)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	struct vhost_async *async = vq->async;
 	struct async_inflight_info *pkts_info = async->pkts_info;
@@ -2357,7 +2357,7 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
 
 	vq = dev->virtqueue[queue_id];
 
-	if (!rte_spinlock_trylock(&vq->access_lock)) {
+	if (rte_rwlock_read_trylock(&vq->access_lock)) {
 		VHOST_LOG_DATA(dev->ifname, DEBUG,
 			"%s: virtqueue %u is busy.\n",
 			__func__, queue_id);
@@ -2377,7 +2377,7 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
 	vq->stats.inflight_completed += n_pkts_cpl;
 
 out:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_read_unlock(&vq->access_lock);
 
 	return n_pkts_cpl;
 }
@@ -2465,7 +2465,7 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts,
 
 	vq = dev->virtqueue[queue_id];
 
-	if (!rte_spinlock_trylock(&vq->access_lock)) {
+	if (rte_rwlock_read_trylock(&vq->access_lock)) {
 		VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: virtqueue %u is busy.\n",
 			__func__, queue_id);
 		return 0;
@@ -2495,7 +2495,7 @@ rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts,
 	vq->stats.inflight_completed += n_pkts_cpl;
 
 out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_read_unlock(&vq->access_lock);
 
 	return n_pkts_cpl;
 }
@@ -2516,7 +2516,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		return 0;
 	}
 
-	rte_spinlock_lock(&vq->access_lock);
+	rte_rwlock_write_lock(&vq->access_lock);
 
 	if (unlikely(!vq->enabled || !vq->async))
 		goto out_access_unlock;
@@ -2544,7 +2544,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_write_unlock(&vq->access_lock);
 
 	return nb_tx;
 }
@@ -2866,7 +2866,7 @@ desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		  struct buf_vector *buf_vec, uint16_t nr_vec,
 		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
 		  bool legacy_ol_flags, uint16_t slot_idx, bool is_async)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	uint32_t buf_avail, buf_offset, buf_len;
@@ -3073,7 +3073,7 @@ static uint16_t
 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
 	bool legacy_ol_flags)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	uint16_t i;
@@ -3178,7 +3178,7 @@ static uint16_t
 virtio_dev_tx_split_legacy(struct virtio_net *dev,
 	struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
 	struct rte_mbuf **pkts, uint16_t count)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true);
@@ -3189,7 +3189,7 @@ static uint16_t
 virtio_dev_tx_split_compliant(struct virtio_net *dev,
 	struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
 	struct rte_mbuf **pkts, uint16_t count)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false);
@@ -3393,7 +3393,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev,
 			    uint16_t *buf_id,
 			    uint16_t *desc_count,
 			    bool legacy_ol_flags)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
@@ -3443,7 +3443,7 @@ virtio_dev_tx_single_packed(struct virtio_net *dev,
 			    struct rte_mempool *mbuf_pool,
 			    struct rte_mbuf *pkts,
 			    bool legacy_ol_flags)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 
@@ -3475,7 +3475,7 @@ virtio_dev_tx_packed(struct virtio_net *dev,
 		     struct rte_mbuf **__rte_restrict pkts,
 		     uint32_t count,
 		     bool legacy_ol_flags)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	uint32_t pkt_idx = 0;
@@ -3520,7 +3520,7 @@ static uint16_t
 virtio_dev_tx_packed_legacy(struct virtio_net *dev,
 	struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
 	struct rte_mbuf **__rte_restrict pkts, uint32_t count)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true);
@@ -3531,7 +3531,7 @@ static uint16_t
 virtio_dev_tx_packed_compliant(struct virtio_net *dev,
 	struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
 	struct rte_mbuf **__rte_restrict pkts, uint32_t count)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false);
@@ -3566,7 +3566,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	vq = dev->virtqueue[queue_id];
 
-	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+	if (unlikely(rte_rwlock_read_trylock(&vq->access_lock) != 0))
 		return 0;
 
 	if (unlikely(!vq->enabled)) {
@@ -3636,7 +3636,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_read_unlock(&vq->access_lock);
 
 	if (unlikely(rarp_mbuf != NULL))
 		count += 1;
@@ -3648,7 +3648,7 @@ static __rte_always_inline uint16_t
 async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
 		uint16_t vchan_id, bool legacy_ol_flags)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	uint16_t start_idx, from, i;
 	uint16_t nr_cpl_pkts = 0;
@@ -3695,7 +3695,7 @@ static __rte_always_inline uint16_t
 virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
 		int16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	static bool allocerr_warned;
@@ -3844,7 +3844,7 @@ virtio_dev_tx_async_split_legacy(struct virtio_net *dev,
 		struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
 		struct rte_mbuf **pkts, uint16_t count,
 		int16_t dma_id, uint16_t vchan_id)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	return virtio_dev_tx_async_split(dev, vq, mbuf_pool,
@@ -3857,7 +3857,7 @@ virtio_dev_tx_async_split_compliant(struct virtio_net *dev,
 		struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
 		struct rte_mbuf **pkts, uint16_t count,
 		int16_t dma_id, uint16_t vchan_id)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	return virtio_dev_tx_async_split(dev, vq, mbuf_pool,
@@ -3867,7 +3867,7 @@ virtio_dev_tx_async_split_compliant(struct virtio_net *dev,
 static __rte_always_inline void
 vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,
 				uint16_t buf_id, uint16_t count)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 {
 	struct vhost_async *async = vq->async;
 	uint16_t idx = async->buffer_idx_packed;
@@ -3889,7 +3889,7 @@ virtio_dev_tx_async_single_packed(struct virtio_net *dev,
 			struct rte_mbuf *pkts,
 			uint16_t slot_idx,
 			bool legacy_ol_flags)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	int err;
@@ -3942,7 +3942,7 @@ virtio_dev_tx_async_packed_batch(struct virtio_net *dev,
 			   struct vhost_virtqueue *vq,
 			   struct rte_mbuf **pkts, uint16_t slot_idx,
 			   uint16_t dma_id, uint16_t vchan_id)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	uint16_t avail_idx = vq->last_avail_idx;
@@ -4000,7 +4000,7 @@ static __rte_always_inline uint16_t
 virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
 		uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	uint32_t pkt_idx = 0;
@@ -4111,7 +4111,7 @@ static uint16_t
 virtio_dev_tx_async_packed_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
 		uint16_t count, uint16_t dma_id, uint16_t vchan_id)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	return virtio_dev_tx_async_packed(dev, vq, mbuf_pool,
@@ -4123,7 +4123,7 @@ static uint16_t
 virtio_dev_tx_async_packed_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
 		uint16_t count, uint16_t dma_id, uint16_t vchan_id)
-	__rte_exclusive_locks_required(&vq->access_lock)
+	__rte_shared_locks_required(&vq->access_lock)
 	__rte_shared_locks_required(&vq->iotlb_lock)
 {
 	return virtio_dev_tx_async_packed(dev, vq, mbuf_pool,
@@ -4173,7 +4173,7 @@ rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
 
 	vq = dev->virtqueue[queue_id];
 
-	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+	if (unlikely(rte_rwlock_read_trylock(&vq->access_lock) != 0))
 		return 0;
 
 	if (unlikely(vq->enabled == 0)) {
@@ -4255,7 +4255,7 @@ rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
 	vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	rte_rwlock_read_unlock(&vq->access_lock);
 
 	if (unlikely(rarp_mbuf != NULL))
 		count += 1;

From patchwork Wed Apr  5 12:41:26 2023
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Eelco Chaudron <echaudro@redhat.com>
X-Patchwork-Id: 125799
X-Patchwork-Delegate: thomas@monjalon.net
Return-Path: <dev-bounces@dpdk.org>
X-Original-To: patchwork@inbox.dpdk.org
Delivered-To: patchwork@inbox.dpdk.org
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id A944C428D3;
	Wed,  5 Apr 2023 14:41:41 +0200 (CEST)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id 9853242B71;
	Wed,  5 Apr 2023 14:41:41 +0200 (CEST)
Received: from us-smtp-delivery-124.mimecast.com
 (us-smtp-delivery-124.mimecast.com [170.10.133.124])
 by mails.dpdk.org (Postfix) with ESMTP id CD5A741153
 for <dev@dpdk.org>; Wed,  5 Apr 2023 14:41:39 +0200 (CEST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;
 s=mimecast20190719; t=1680698499;
 h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
 to:to:cc:cc:mime-version:mime-version:content-type:content-type:
 content-transfer-encoding:content-transfer-encoding:
 in-reply-to:in-reply-to:references:references;
 bh=q4L5prnidxqi8bdQ9JYTRVD+0dnlAfo32PGFitT4KoE=;
 b=Ml6LDYt7SPHffz7JJrNt6ufoz00TTfg8/2+2wP+jIgNOOVnaXhrs56CqJITBn7C4oLlwLU
 +WoD91nMpyKJ/l5FpLkqnn+v0Q+C2N/Pug/5W2RaQT/hb/56CthDp3rYQJ9X65ny0NvDwD
 9JuTfnxPwwpi/UkhoJDydYXBUTkx4DM=
Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com
 [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS
 (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id
 us-mta-74-NxtXVl14MzWh19dq6Bg6aQ-1; Wed, 05 Apr 2023 08:41:37 -0400
X-MC-Unique: NxtXVl14MzWh19dq6Bg6aQ-1
Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com
 [10.11.54.1])
 (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
 (No client certificate requested)
 by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 985CF185A792;
 Wed,  5 Apr 2023 12:41:37 +0000 (UTC)
Received: from localhost.localdomain (unknown [10.39.193.129])
 by smtp.corp.redhat.com (Postfix) with ESMTP id 0DD9D40C20FA;
 Wed,  5 Apr 2023 12:41:36 +0000 (UTC)
From: Eelco Chaudron <echaudro@redhat.com>
To: maxime.coquelin@redhat.com,
	chenbo.xia@intel.com
Cc: dev@dpdk.org
Subject: [PATCH v2 2/3] vhost: make the guest_notifications statistic counter
 atomic.
Date: Wed,  5 Apr 2023 14:41:26 +0200
Message-Id: <168069846863.833254.7655588319565934173.stgit@ebuild.local>
In-Reply-To: <168069838578.833254.4856666346839028593.stgit@ebuild.local>
References: <168069838578.833254.4856666346839028593.stgit@ebuild.local>
User-Agent: StGit/1.5
MIME-Version: 1.0
X-Scanned-By: MIMEDefang 3.1 on 10.11.54.1
X-Mimecast-Spam-Score: 0
X-Mimecast-Originator: redhat.com
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

Making the guest_notifications statistic counter atomic, allows
it to be safely incremented while holding the read access_lock.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/vhost/vhost.c |    8 ++++++++
 lib/vhost/vhost.h |    9 ++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 74bdbfd810..8ff6434c93 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -2086,6 +2086,10 @@ rte_vhost_vring_stats_get(int vid, uint16_t queue_id,
 
 	rte_rwlock_write_lock(&vq->access_lock);
 	for (i = 0; i < VHOST_NB_VQ_STATS; i++) {
+		/*
+		 * No need to the read atomic counters as such, due to the
+		 * above write access_lock preventing them to be updated.
+		 */
 		stats[i].value =
 			*(uint64_t *)(((char *)vq) + vhost_vq_stat_strings[i].offset);
 		stats[i].id = i;
@@ -2112,6 +2116,10 @@ int rte_vhost_vring_stats_reset(int vid, uint16_t queue_id)
 	vq = dev->virtqueue[queue_id];
 
 	rte_rwlock_write_lock(&vq->access_lock);
+	/*
+	 * No need to the reset atomic counters as such, due to the
+	 * above write access_lock preventing them to be updated.
+	 */
 	memset(&vq->stats, 0, sizeof(vq->stats));
 	rte_rwlock_write_unlock(&vq->access_lock);
 
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 5c939ef06f..37609c7c8d 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -135,11 +135,12 @@ struct virtqueue_stats {
 	uint64_t broadcast;
 	/* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
 	uint64_t size_bins[8];
-	uint64_t guest_notifications;
 	uint64_t iotlb_hits;
 	uint64_t iotlb_misses;
 	uint64_t inflight_submitted;
 	uint64_t inflight_completed;
+	/* Counters below are atomic, and should be incremented as such. */
+	uint64_t guest_notifications;
 };
 
 /**
@@ -907,7 +908,8 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 				unlikely(!signalled_used_valid)) {
 			eventfd_write(vq->callfd, (eventfd_t) 1);
 			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
-				vq->stats.guest_notifications++;
+				__atomic_fetch_add(&vq->stats.guest_notifications,
+					1, __ATOMIC_RELAXED);
 			if (dev->notify_ops->guest_notified)
 				dev->notify_ops->guest_notified(dev->vid);
 		}
@@ -917,7 +919,8 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 				&& (vq->callfd >= 0)) {
 			eventfd_write(vq->callfd, (eventfd_t)1);
 			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
-				vq->stats.guest_notifications++;
+				__atomic_fetch_add(&vq->stats.guest_notifications,
+					1, __ATOMIC_RELAXED);
 			if (dev->notify_ops->guest_notified)
 				dev->notify_ops->guest_notified(dev->vid);
 		}

From patchwork Wed Apr  5 12:41:57 2023
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Eelco Chaudron <echaudro@redhat.com>
X-Patchwork-Id: 125800
X-Patchwork-Delegate: thomas@monjalon.net
Return-Path: <dev-bounces@dpdk.org>
X-Original-To: patchwork@inbox.dpdk.org
Delivered-To: patchwork@inbox.dpdk.org
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 272D2428D3;
	Wed,  5 Apr 2023 14:42:13 +0200 (CEST)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id 1937A41153;
	Wed,  5 Apr 2023 14:42:13 +0200 (CEST)
Received: from us-smtp-delivery-124.mimecast.com
 (us-smtp-delivery-124.mimecast.com [170.10.129.124])
 by mails.dpdk.org (Postfix) with ESMTP id D86CB41133
 for <dev@dpdk.org>; Wed,  5 Apr 2023 14:42:10 +0200 (CEST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;
 s=mimecast20190719; t=1680698530;
 h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
 to:to:cc:cc:mime-version:mime-version:content-type:content-type:
 content-transfer-encoding:content-transfer-encoding:
 in-reply-to:in-reply-to:references:references;
 bh=03crKHooXe16TJCTXiq8ExYm6p2ga7n56b2HWhM92Wk=;
 b=ecJGUCLC1glvN2EzfIaMgoss/BCaKo3mTLHcm7OwHil8ppUZHH1HaogG4DBE663Tqrhktu
 0Vf00m/QLDm+fwc7L7psqWmv92Fu6mu4ZK+ogWXuKAHehn4F6nz4AiY/3aF6iOSZ5bWXwJ
 lnRszkwWbILWhzZICsa7vyr0z8NvhnA=
Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com
 [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS
 (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id
 us-mta-617-tCqRXf4vNxyFpqQ_86BxZw-1; Wed, 05 Apr 2023 08:42:09 -0400
X-MC-Unique: tCqRXf4vNxyFpqQ_86BxZw-1
Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.rdu2.redhat.com
 [10.11.54.6])
 (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))
 (No client certificate requested)
 by mimecast-mx02.redhat.com (Postfix) with ESMTPS id F3F6D101A553;
 Wed,  5 Apr 2023 12:42:08 +0000 (UTC)
Received: from localhost.localdomain (unknown [10.39.193.129])
 by smtp.corp.redhat.com (Postfix) with ESMTP id 5940D2166B29;
 Wed,  5 Apr 2023 12:42:08 +0000 (UTC)
From: Eelco Chaudron <echaudro@redhat.com>
To: maxime.coquelin@redhat.com,
	chenbo.xia@intel.com
Cc: dev@dpdk.org
Subject: [PATCH v2 3/3] vhost: add device op to offload the interrupt kick
Date: Wed,  5 Apr 2023 14:41:57 +0200
Message-Id: <168069850278.833254.17836553051894685658.stgit@ebuild.local>
In-Reply-To: <168069838578.833254.4856666346839028593.stgit@ebuild.local>
References: <168069838578.833254.4856666346839028593.stgit@ebuild.local>
User-Agent: StGit/1.5
MIME-Version: 1.0
X-Scanned-By: MIMEDefang 3.1 on 10.11.54.6
X-Mimecast-Spam-Score: 0
X-Mimecast-Originator: redhat.com
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

This patch adds an operation callback which gets called every time the
library wants to call eventfd_write(). This eventfd_write() call could
result in a system call, which could potentially block the PMD thread.

The callback function can decide whether it's ok to handle the
eventfd_write() now or have the newly introduced function,
rte_vhost_notify_guest(), called at a later time.

This can be used by 3rd party applications, like OVS, to avoid system
calls being called as part of the PMD threads.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/vhost/meson.build |    2 +
 lib/vhost/rte_vhost.h |   23 +++++++++++++++-
 lib/vhost/socket.c    |   72 ++++++++++++++++++++++++++++++++++++++++++++++---
 lib/vhost/version.map |    9 ++++++
 lib/vhost/vhost.c     |   38 ++++++++++++++++++++++++++
 lib/vhost/vhost.h     |   65 +++++++++++++++++++++++++++++++-------------
 6 files changed, 184 insertions(+), 25 deletions(-)

diff --git a/lib/vhost/meson.build b/lib/vhost/meson.build
index 197a51d936..e93ba6b078 100644
--- a/lib/vhost/meson.build
+++ b/lib/vhost/meson.build
@@ -39,3 +39,5 @@ driver_sdk_headers = files(
         'vdpa_driver.h',
 )
 deps += ['ethdev', 'cryptodev', 'hash', 'pci', 'dmadev']
+
+use_function_versioning = true
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index 58a5d4be92..7a10bc36cf 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -298,7 +298,13 @@ struct rte_vhost_device_ops {
 	 */
 	void (*guest_notified)(int vid);
 
-	void *reserved[1]; /**< Reserved for future extension */
+	/**
+	 * If this callback is registered, notification to the guest can
+	 * be handled by the front-end calling rte_vhost_notify_guest().
+	 * If it's not handled, 'false' should be returned. This can be used
+	 * to remove the "slow" eventfd_write() syscall from the datapath.
+	 */
+	bool (*guest_notify)(int vid, uint16_t queue_id);
 };
 
 /**
@@ -433,6 +439,21 @@ void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
 
 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice.
+ *
+ * Inject the offloaded interrupt into the vhost device's queue. For more
+ * details see the 'guest_notify' vhost device operation.
+ *
+ * @param vid
+ *  vhost device ID
+ * @param queue_id
+ *  virtio queue index
+ */
+__rte_experimental
+void rte_vhost_notify_guest(int vid, uint16_t queue_id);
+
 /**
  * Register vhost driver. path could be different for multiple
  * instance support.
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index 669c322e12..787d6bacf8 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -15,6 +15,7 @@
 #include <fcntl.h>
 #include <pthread.h>
 
+#include <rte_function_versioning.h>
 #include <rte_log.h>
 
 #include "fd_man.h"
@@ -43,6 +44,7 @@ struct vhost_user_socket {
 	bool async_copy;
 	bool net_compliant_ol_flags;
 	bool stats_enabled;
+	bool alloc_notify_ops;
 
 	/*
 	 * The "supported_features" indicates the feature bits the
@@ -846,6 +848,14 @@ vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
 		vsocket->path = NULL;
 	}
 
+	if (vsocket && vsocket->alloc_notify_ops) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+		free((struct rte_vhost_device_ops *)vsocket->notify_ops);
+#pragma GCC diagnostic pop
+		vsocket->notify_ops = NULL;
+	}
+
 	if (vsocket) {
 		free(vsocket);
 		vsocket = NULL;
@@ -1099,21 +1109,75 @@ rte_vhost_driver_unregister(const char *path)
 /*
  * Register ops so that we can add/remove device to data core.
  */
-int
-rte_vhost_driver_callback_register(const char *path,
-	struct rte_vhost_device_ops const * const ops)
+static int
+rte_vhost_driver_callback_register__(const char *path,
+	struct rte_vhost_device_ops const * const ops, bool ops_allocated)
 {
 	struct vhost_user_socket *vsocket;
 
 	pthread_mutex_lock(&vhost_user.mutex);
 	vsocket = find_vhost_user_socket(path);
-	if (vsocket)
+	if (vsocket) {
+		if (vsocket->alloc_notify_ops) {
+			vsocket->alloc_notify_ops = false;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+			free((struct rte_vhost_device_ops *)vsocket->notify_ops);
+#pragma GCC diagnostic pop
+		}
 		vsocket->notify_ops = ops;
+		if (ops_allocated)
+			vsocket->alloc_notify_ops = true;
+	}
 	pthread_mutex_unlock(&vhost_user.mutex);
 
 	return vsocket ? 0 : -1;
 }
 
+int __vsym
+rte_vhost_driver_callback_register_v24(const char *path,
+	struct rte_vhost_device_ops const * const ops)
+{
+	return rte_vhost_driver_callback_register__(path, ops, false);
+}
+
+int __vsym
+rte_vhost_driver_callback_register_v23(const char *path,
+	struct rte_vhost_device_ops const * const ops)
+{
+	int ret;
+
+	/*
+	 * Although the ops structure is a const structure, we do need to
+	 * override the guest_notify operation. This is because with the
+	 * previous APIs it was "reserved" and if any garbage value was passed,
+	 * it could crash the application.
+	 */
+	if (ops && !ops->guest_notify) {
+		struct rte_vhost_device_ops *new_ops;
+
+		new_ops = malloc(sizeof(*new_ops));
+		if (new_ops == NULL)
+			return -1;
+
+		memcpy(new_ops, ops, sizeof(*new_ops));
+		new_ops->guest_notify = NULL;
+
+		ret = rte_vhost_driver_callback_register__(path, ops, true);
+	} else {
+		ret = rte_vhost_driver_callback_register__(path, ops, false);
+	}
+
+	return ret;
+}
+
+/* Mark the v23 function as the old version, and v24 as the default version. */
+VERSION_SYMBOL(rte_vhost_driver_callback_register, _v23, 23);
+BIND_DEFAULT_SYMBOL(rte_vhost_driver_callback_register, _v24, 24);
+MAP_STATIC_SYMBOL(int rte_vhost_driver_callback_register(const char *path,
+		struct rte_vhost_device_ops const * const ops),
+		rte_vhost_driver_callback_register_v24);
+
 struct rte_vhost_device_ops const *
 vhost_driver_callback_get(const char *path)
 {
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index d322a4a888..7bcbfd12cf 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -64,6 +64,12 @@ DPDK_23 {
 	local: *;
 };
 
+DPDK_24 {
+	global:
+
+	rte_vhost_driver_callback_register;
+} DPDK_23;
+
 EXPERIMENTAL {
 	global:
 
@@ -98,6 +104,9 @@ EXPERIMENTAL {
 	# added in 22.11
 	rte_vhost_async_dma_unconfigure;
 	rte_vhost_vring_call_nonblock;
+
+        # added in 23.07
+	rte_vhost_notify_guest;
 };
 
 INTERNAL {
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 8ff6434c93..79e88f986e 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -44,6 +44,10 @@ static const struct vhost_vq_stats_name_off vhost_vq_stat_strings[] = {
 	{"size_1024_1518_packets", offsetof(struct vhost_virtqueue, stats.size_bins[6])},
 	{"size_1519_max_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[7])},
 	{"guest_notifications",    offsetof(struct vhost_virtqueue, stats.guest_notifications)},
+	{"guest_notifications_offloaded", offsetof(struct vhost_virtqueue,
+		stats.guest_notifications_offloaded)},
+	{"guest_notifications_error", offsetof(struct vhost_virtqueue,
+		stats.guest_notifications_error)},
 	{"iotlb_hits",             offsetof(struct vhost_virtqueue, stats.iotlb_hits)},
 	{"iotlb_misses",           offsetof(struct vhost_virtqueue, stats.iotlb_misses)},
 	{"inflight_submitted",     offsetof(struct vhost_virtqueue, stats.inflight_submitted)},
@@ -1467,6 +1471,40 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 	return ret;
 }
 
+void
+rte_vhost_notify_guest(int vid, uint16_t queue_id)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
+
+	if (!dev ||  queue_id >= VHOST_MAX_VRING)
+		return;
+
+	vq = dev->virtqueue[queue_id];
+	if (!vq)
+		return;
+
+	rte_rwlock_read_lock(&vq->access_lock);
+
+	if (vq->callfd >= 0) {
+		int ret = eventfd_write(vq->callfd, (eventfd_t)1);
+
+		if (ret) {
+			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+				__atomic_fetch_add(&vq->stats.guest_notifications_error,
+					1, __ATOMIC_RELAXED);
+		} else {
+			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+				__atomic_fetch_add(&vq->stats.guest_notifications,
+					1, __ATOMIC_RELAXED);
+			if (dev->notify_ops->guest_notified)
+				dev->notify_ops->guest_notified(dev->vid);
+		}
+	}
+
+	rte_rwlock_read_unlock(&vq->access_lock);
+}
+
 void
 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
 {
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 37609c7c8d..0ab75b78b5 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -141,6 +141,8 @@ struct virtqueue_stats {
 	uint64_t inflight_completed;
 	/* Counters below are atomic, and should be incremented as such. */
 	uint64_t guest_notifications;
+	uint64_t guest_notifications_offloaded;
+	uint64_t guest_notifications_error;
 };
 
 /**
@@ -884,6 +886,35 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
 	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
 }
 
+static __rte_always_inline void
+vhost_vring_inject_irq(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	int ret;
+
+	if (dev->notify_ops->guest_notify &&
+	    dev->notify_ops->guest_notify(dev->vid, vq->index)) {
+		if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+			__atomic_fetch_add(&vq->stats.guest_notifications_offloaded,
+				1, __ATOMIC_RELAXED);
+		return;
+	}
+
+	ret = eventfd_write(vq->callfd, (eventfd_t) 1);
+	if (ret) {
+		if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+			__atomic_fetch_add(&vq->stats.guest_notifications_error,
+				1, __ATOMIC_RELAXED);
+		return;
+	}
+
+	if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+		__atomic_fetch_add(&vq->stats.guest_notifications,
+			1, __ATOMIC_RELAXED);
+	if (dev->notify_ops->guest_notified)
+		dev->notify_ops->guest_notified(dev->vid);
+}
+
+
 static __rte_always_inline void
 vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
@@ -903,26 +934,16 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 			"%s: used_event_idx=%d, old=%d, new=%d\n",
 			__func__, vhost_used_event(vq), old, new);
 
-		if ((vhost_need_event(vhost_used_event(vq), new, old) &&
-					(vq->callfd >= 0)) ||
-				unlikely(!signalled_used_valid)) {
-			eventfd_write(vq->callfd, (eventfd_t) 1);
-			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
-				__atomic_fetch_add(&vq->stats.guest_notifications,
-					1, __ATOMIC_RELAXED);
-			if (dev->notify_ops->guest_notified)
-				dev->notify_ops->guest_notified(dev->vid);
+		if ((vhost_need_event(vhost_used_event(vq), new, old) ||
+		     unlikely(!signalled_used_valid)) &&
+		    vq->callfd >= 0) {
+			vhost_vring_inject_irq(dev, vq);
 		}
 	} else {
 		/* Kick the guest if necessary. */
 		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
 				&& (vq->callfd >= 0)) {
-			eventfd_write(vq->callfd, (eventfd_t)1);
-			if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
-				__atomic_fetch_add(&vq->stats.guest_notifications,
-					1, __ATOMIC_RELAXED);
-			if (dev->notify_ops->guest_notified)
-				dev->notify_ops->guest_notified(dev->vid);
+			vhost_vring_inject_irq(dev, vq);
 		}
 	}
 }
@@ -974,11 +995,8 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	if (vhost_need_event(off, new, old))
 		kick = true;
 kick:
-	if (kick) {
-		eventfd_write(vq->callfd, (eventfd_t)1);
-		if (dev->notify_ops->guest_notified)
-			dev->notify_ops->guest_notified(dev->vid);
-	}
+	if (kick && vq->callfd >= 0)
+		vhost_vring_inject_irq(dev, vq);
 }
 
 static __rte_always_inline void
@@ -1017,4 +1035,11 @@ mbuf_is_consumed(struct rte_mbuf *m)
 
 uint64_t hua_to_alignment(struct rte_vhost_memory *mem, void *ptr);
 void mem_set_dump(void *ptr, size_t size, bool enable, uint64_t alignment);
+
+/* Versioned functions */
+int rte_vhost_driver_callback_register_v23(const char *path,
+	struct rte_vhost_device_ops const * const ops);
+int rte_vhost_driver_callback_register_v24(const char *path,
+	struct rte_vhost_device_ops const * const ops);
+
 #endif /* _VHOST_NET_CDEV_H_ */