[RFC,v3,7/8] vhost: annotate IOTLB locks

Message ID 20220411110013.18624-8-david.marchand@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series vhost lock annotations |

Commit Message

David Marchand April 11, 2022, 11 a.m. UTC
  This change simply annotates existing paths of the code leading to
manipulations of the IOTLB r/w locks.

clang does not support conditionally held locks, so always take iotlb
locks regardless of VIRTIO_F_IOMMU_PLATFORM feature.

vdpa and vhost_crypto code are annotated though they end up not taking
a IOTLB lock and have been marked with a FIXME.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/vhost/iotlb.h        |  8 +++++++
 lib/vhost/vdpa.c         |  1 +
 lib/vhost/vhost.c        | 11 +++++----
 lib/vhost/vhost.h        | 22 +++++++++++++-----
 lib/vhost/vhost_crypto.c |  7 ++++++
 lib/vhost/virtio_net.c   | 49 ++++++++++++++++++++++++++++++----------
 6 files changed, 75 insertions(+), 23 deletions(-)
  

Comments

Maxime Coquelin April 22, 2022, 7:46 a.m. UTC | #1
On 4/11/22 13:00, David Marchand wrote:
> This change simply annotates existing paths of the code leading to
> manipulations of the IOTLB r/w locks.
> 
> clang does not support conditionally held locks, so always take iotlb
> locks regardless of VIRTIO_F_IOMMU_PLATFORM feature.
> 
> vdpa and vhost_crypto code are annotated though they end up not taking
> a IOTLB lock and have been marked with a FIXME.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>   lib/vhost/iotlb.h        |  8 +++++++
>   lib/vhost/vdpa.c         |  1 +
>   lib/vhost/vhost.c        | 11 +++++----
>   lib/vhost/vhost.h        | 22 +++++++++++++-----
>   lib/vhost/vhost_crypto.c |  7 ++++++
>   lib/vhost/virtio_net.c   | 49 ++++++++++++++++++++++++++++++----------
>   6 files changed, 75 insertions(+), 23 deletions(-)
> 


I agree with the change. I don't expect performance impact of taking the
lock unconditionally, because there won't be cache line sharing since it
is per-vq lock and the locking cost will be offset by removing the
feature check.

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime
  

Patch

diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h
index 8d0ff7473b..8b97d308d1 100644
--- a/lib/vhost/iotlb.h
+++ b/lib/vhost/iotlb.h
@@ -11,24 +11,32 @@ 
 
 static __rte_always_inline void
 vhost_user_iotlb_rd_lock(struct vhost_virtqueue *vq)
+	__rte_shared_lock_function(vq->iotlb_lock)
+	__rte_no_thread_safety_analysis
 {
 	rte_rwlock_read_lock(&vq->iotlb_lock);
 }
 
 static __rte_always_inline void
 vhost_user_iotlb_rd_unlock(struct vhost_virtqueue *vq)
+	__rte_unlock_function(vq->iotlb_lock)
+	__rte_no_thread_safety_analysis
 {
 	rte_rwlock_read_unlock(&vq->iotlb_lock);
 }
 
 static __rte_always_inline void
 vhost_user_iotlb_wr_lock(struct vhost_virtqueue *vq)
+	__rte_exclusive_lock_function(vq->iotlb_lock)
+	__rte_no_thread_safety_analysis
 {
 	rte_rwlock_write_lock(&vq->iotlb_lock);
 }
 
 static __rte_always_inline void
 vhost_user_iotlb_wr_unlock(struct vhost_virtqueue *vq)
+	__rte_unlock_function(vq->iotlb_lock)
+	__rte_no_thread_safety_analysis
 {
 	rte_rwlock_write_unlock(&vq->iotlb_lock);
 }
diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c
index a3f9f8f072..22b8f48b39 100644
--- a/lib/vhost/vdpa.c
+++ b/lib/vhost/vdpa.c
@@ -133,6 +133,7 @@  rte_vdpa_unregister_device(struct rte_vdpa_device *dev)
 
 int
 rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m)
+	__rte_no_thread_safety_analysis /* FIXME: requires iotlb_lock? */
 {
 	struct virtio_net *dev = get_device(vid);
 	uint16_t idx, idx_m, desc_id;
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 79236df000..f96e8e0376 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -383,6 +383,7 @@  free_device(struct virtio_net *dev)
 
 static __rte_always_inline int
 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
 		return 0;
@@ -434,6 +435,7 @@  translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
 /* Caller should have iotlb_lock read-locked */
 static int
 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint64_t req_size, size;
 
@@ -473,6 +475,7 @@  vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 /* Caller should have iotlb_lock read-locked */
 static int
 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint64_t req_size, size;
 
@@ -527,10 +530,9 @@  vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
 }
 
 void
-vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+vring_invalidate(struct virtio_net *dev __rte_unused, struct vhost_virtqueue *vq)
 {
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_wr_lock(vq);
+	vhost_user_iotlb_wr_lock(vq);
 
 	vq->access_ok = false;
 	vq->desc = NULL;
@@ -538,8 +540,7 @@  vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	vq->used = NULL;
 	vq->log_guest_addr = 0;
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_wr_unlock(vq);
+	vhost_user_iotlb_wr_unlock(vq);
 }
 
 static void
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index c2065e5324..b5724c6d2a 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -525,12 +525,15 @@  void __vhost_log_cache_write(struct virtio_net *dev,
 		uint64_t addr, uint64_t len);
 void __vhost_log_cache_write_iova(struct virtio_net *dev,
 		struct vhost_virtqueue *vq,
-		uint64_t iova, uint64_t len);
+		uint64_t iova, uint64_t len)
+	__rte_shared_locks_required(vq->iotlb_lock);
 void __vhost_log_cache_sync(struct virtio_net *dev,
 		struct vhost_virtqueue *vq);
+
 void __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len);
 void __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			    uint64_t iova, uint64_t len);
+			    uint64_t iova, uint64_t len)
+	__rte_shared_locks_required(vq->iotlb_lock);
 
 static __rte_always_inline void
 vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
@@ -580,6 +583,7 @@  vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
 static __rte_always_inline void
 vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			   uint64_t iova, uint64_t len)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	if (likely(!(dev->features & (1ULL << VHOST_F_LOG_ALL))))
 		return;
@@ -593,6 +597,7 @@  vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
 static __rte_always_inline void
 vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			   uint64_t iova, uint64_t len)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	if (likely(!(dev->features & (1ULL << VHOST_F_LOG_ALL))))
 		return;
@@ -796,18 +801,23 @@  struct rte_vhost_device_ops const *vhost_driver_callback_get(const char *path);
 void vhost_backend_cleanup(struct virtio_net *dev);
 
 uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			uint64_t iova, uint64_t *len, uint8_t perm);
+			uint64_t iova, uint64_t *len, uint8_t perm)
+	__rte_shared_locks_required(vq->iotlb_lock);
 void *vhost_alloc_copy_ind_table(struct virtio_net *dev,
 			struct vhost_virtqueue *vq,
-			uint64_t desc_addr, uint64_t desc_len);
-int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
+			uint64_t desc_addr, uint64_t desc_len)
+	__rte_shared_locks_required(vq->iotlb_lock);
+int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+	__rte_shared_locks_required(vq->iotlb_lock);
 uint64_t translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		uint64_t log_addr);
+		uint64_t log_addr)
+	__rte_shared_locks_required(vq->iotlb_lock);
 void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 static __rte_always_inline uint64_t
 vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			uint64_t iova, uint64_t *len, uint8_t perm)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
 		return rte_vhost_va_from_guest_pa(dev->mem, iova, len);
diff --git a/lib/vhost/vhost_crypto.c b/lib/vhost/vhost_crypto.c
index b1c0eb6a0f..7f8d11ad9f 100644
--- a/lib/vhost/vhost_crypto.c
+++ b/lib/vhost/vhost_crypto.c
@@ -506,6 +506,7 @@  static __rte_always_inline struct virtio_crypto_inhdr *
 reach_inhdr(struct vhost_crypto_data_req *vc_req,
 		struct vhost_crypto_desc *head,
 		uint32_t max_n_descs)
+	__rte_shared_locks_required(vc_req->vq->iotlb_lock)
 {
 	struct virtio_crypto_inhdr *inhdr;
 	struct vhost_crypto_desc *last = head + (max_n_descs - 1);
@@ -552,6 +553,7 @@  static __rte_always_inline void *
 get_data_ptr(struct vhost_crypto_data_req *vc_req,
 		struct vhost_crypto_desc *cur_desc,
 		uint8_t perm)
+	__rte_shared_locks_required(vc_req->vq->iotlb_lock)
 {
 	void *data;
 	uint64_t dlen = cur_desc->len;
@@ -570,6 +572,7 @@  copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
 		struct vhost_crypto_desc *head,
 		struct vhost_crypto_desc **cur_desc,
 		uint32_t size, uint32_t max_n_descs)
+	__rte_shared_locks_required(vc_req->vq->iotlb_lock)
 {
 	struct vhost_crypto_desc *desc = *cur_desc;
 	uint64_t remain, addr, dlen, len;
@@ -718,6 +721,7 @@  prepare_write_back_data(struct vhost_crypto_data_req *vc_req,
 		uint32_t offset,
 		uint64_t write_back_len,
 		uint32_t max_n_descs)
+	__rte_shared_locks_required(vc_req->vq->iotlb_lock)
 {
 	struct vhost_crypto_writeback_data *wb_data, *head;
 	struct vhost_crypto_desc *desc = *cur_desc;
@@ -838,6 +842,7 @@  prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 		struct virtio_crypto_cipher_data_req *cipher,
 		struct vhost_crypto_desc *head,
 		uint32_t max_n_descs)
+	__rte_shared_locks_required(vc_req->vq->iotlb_lock)
 {
 	struct vhost_crypto_desc *desc = head;
 	struct vhost_crypto_writeback_data *ewb = NULL;
@@ -990,6 +995,7 @@  prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 		struct virtio_crypto_alg_chain_data_req *chain,
 		struct vhost_crypto_desc *head,
 		uint32_t max_n_descs)
+	__rte_shared_locks_required(vc_req->vq->iotlb_lock)
 {
 	struct vhost_crypto_desc *desc = head, *digest_desc;
 	struct vhost_crypto_writeback_data *ewb = NULL, *ewb2 = NULL;
@@ -1172,6 +1178,7 @@  vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
 		struct vhost_virtqueue *vq, struct rte_crypto_op *op,
 		struct vring_desc *head, struct vhost_crypto_desc *descs,
 		uint16_t desc_idx)
+	__rte_no_thread_safety_analysis /* FIXME: requires iotlb_lock? */
 {
 	struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(op->sym->m_src);
 	struct rte_cryptodev_sym_session *session;
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 0c40d5a7a5..8a3e3aa297 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -180,6 +180,7 @@  vhost_async_dma_check_completed(struct virtio_net *dev, int16_t dma_id, uint16_t
 
 static inline void
 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	struct batch_copy_elem *elem = vq->batch_copy_elems;
 	uint16_t count = vq->batch_copy_nb_elems;
@@ -526,6 +527,7 @@  vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
 				   uint16_t *id,
 				   uint16_t *count,
 				   uint16_t num_buffers)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers);
 
@@ -607,6 +609,7 @@  static __rte_always_inline int
 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct buf_vector *buf_vec, uint16_t *vec_idx,
 		uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint16_t vec_id = *vec_idx;
 
@@ -644,6 +647,7 @@  fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			 uint32_t avail_idx, uint16_t *vec_idx,
 			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
 			 uint32_t *desc_chain_len, uint8_t perm)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
 	uint16_t vec_id = *vec_idx;
@@ -727,6 +731,7 @@  reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint32_t size, struct buf_vector *buf_vec,
 				uint16_t *num_buffers, uint16_t avail_head,
 				uint16_t *nr_vec)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint16_t cur_idx;
 	uint16_t vec_idx = 0;
@@ -777,6 +782,7 @@  fill_vec_buf_packed_indirect(struct virtio_net *dev,
 			struct vhost_virtqueue *vq,
 			struct vring_packed_desc *desc, uint16_t *vec_idx,
 			struct buf_vector *buf_vec, uint32_t *len, uint8_t perm)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint16_t i;
 	uint32_t nr_descs;
@@ -835,6 +841,7 @@  fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint16_t avail_idx, uint16_t *desc_count,
 				struct buf_vector *buf_vec, uint16_t *vec_idx,
 				uint16_t *buf_id, uint32_t *len, uint8_t perm)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	bool wrap_counter = vq->avail_wrap_counter;
 	struct vring_packed_desc *descs = vq->desc_packed;
@@ -900,6 +907,7 @@  static __rte_noinline void
 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct buf_vector *buf_vec,
 		struct virtio_net_hdr_mrg_rxbuf *hdr)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint64_t len;
 	uint64_t remain = dev->vhost_hlen;
@@ -1036,6 +1044,7 @@  static __rte_always_inline void
 sync_mbuf_to_desc_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mbuf *m, uint32_t mbuf_offset,
 		uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
 
@@ -1061,6 +1070,7 @@  mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		struct rte_mbuf *m, struct buf_vector *buf_vec,
 		uint16_t nr_vec, uint16_t num_buffers, bool is_async)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint32_t vec_idx = 0;
 	uint32_t mbuf_offset, mbuf_avail;
@@ -1191,6 +1201,7 @@  vhost_enqueue_single_packed(struct virtio_net *dev,
 			    struct buf_vector *buf_vec,
 			    uint16_t *nr_descs)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint16_t nr_vec = 0;
 	uint16_t avail_idx = vq->last_avail_idx;
@@ -1252,6 +1263,7 @@  static __rte_noinline uint32_t
 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mbuf **pkts, uint32_t count)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint32_t pkt_idx = 0;
 	uint16_t num_buffers;
@@ -1309,6 +1321,7 @@  virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
 			   struct rte_mbuf **pkts,
 			   uint64_t *desc_addrs,
 			   uint64_t *lens)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	bool wrap_counter = vq->avail_wrap_counter;
 	struct vring_packed_desc *descs = vq->desc_packed;
@@ -1360,6 +1373,7 @@  virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
 			   struct rte_mbuf **pkts,
 			   uint64_t *desc_addrs,
 			   uint64_t *lens)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
@@ -1401,6 +1415,7 @@  static __rte_always_inline int
 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev,
 			   struct vhost_virtqueue *vq,
 			   struct rte_mbuf **pkts)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint64_t desc_addrs[PACKED_BATCH_SIZE];
 	uint64_t lens[PACKED_BATCH_SIZE];
@@ -1423,6 +1438,7 @@  virtio_dev_rx_single_packed(struct virtio_net *dev,
 			    struct vhost_virtqueue *vq,
 			    struct rte_mbuf *pkt)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 	uint16_t nr_descs = 0;
@@ -1449,6 +1465,7 @@  virtio_dev_rx_packed(struct virtio_net *dev,
 		     struct rte_mbuf **__rte_restrict pkts,
 		     uint32_t count)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint32_t pkt_idx = 0;
 
@@ -1501,8 +1518,7 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (unlikely(!vq->enabled))
 		goto out_access_unlock;
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+	vhost_user_iotlb_rd_lock(vq);
 
 	if (unlikely(!vq->access_ok))
 		if (unlikely(vring_translate(dev, vq) < 0))
@@ -1518,8 +1534,7 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		nb_tx = virtio_dev_rx_split(dev, vq, pkts, count);
 
 out:
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
+	vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
 	rte_spinlock_unlock(&vq->access_lock);
@@ -1595,6 +1610,7 @@  virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue
 		uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count,
 		int16_t dma_id, uint16_t vchan_id)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 	uint32_t pkt_idx = 0;
@@ -1700,6 +1716,7 @@  vhost_enqueue_async_packed(struct virtio_net *dev,
 			    uint16_t *nr_descs,
 			    uint16_t *nr_buffers)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint16_t nr_vec = 0;
 	uint16_t avail_idx = vq->last_avail_idx;
@@ -1758,6 +1775,7 @@  static __rte_always_inline int16_t
 virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			    struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 
@@ -1805,6 +1823,7 @@  virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue
 		uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count,
 		int16_t dma_id, uint16_t vchan_id)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint32_t pkt_idx = 0;
 	uint32_t remained = count;
@@ -2154,8 +2173,7 @@  virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
 	if (unlikely(!vq->enabled || !vq->async))
 		goto out_access_unlock;
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+	vhost_user_iotlb_rd_lock(vq);
 
 	if (unlikely(!vq->access_ok))
 		if (unlikely(vring_translate(dev, vq) < 0))
@@ -2173,8 +2191,7 @@  virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
 				pkts, count, dma_id, vchan_id);
 
 out:
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
+	vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
 	rte_spinlock_unlock(&vq->access_lock);
@@ -2697,6 +2714,7 @@  virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
 	bool legacy_ol_flags)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint16_t i;
 	uint16_t free_entries;
@@ -2793,6 +2811,7 @@  virtio_dev_tx_split_legacy(struct virtio_net *dev,
 	struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
 	struct rte_mbuf **pkts, uint16_t count)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true);
 }
@@ -2803,6 +2822,7 @@  virtio_dev_tx_split_compliant(struct virtio_net *dev,
 	struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
 	struct rte_mbuf **pkts, uint16_t count)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false);
 }
@@ -2814,6 +2834,7 @@  vhost_reserve_avail_batch_packed(struct virtio_net *dev,
 				 uint16_t avail_idx,
 				 uintptr_t *desc_addrs,
 				 uint16_t *ids)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	bool wrap = vq->avail_wrap_counter;
 	struct vring_packed_desc *descs = vq->desc_packed;
@@ -2883,6 +2904,7 @@  virtio_dev_tx_batch_packed(struct virtio_net *dev,
 			   struct vhost_virtqueue *vq,
 			   struct rte_mbuf **pkts,
 			   bool legacy_ol_flags)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint16_t avail_idx = vq->last_avail_idx;
 	uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
@@ -2929,6 +2951,7 @@  vhost_dequeue_single_packed(struct virtio_net *dev,
 			    uint16_t *buf_id,
 			    uint16_t *desc_count,
 			    bool legacy_ol_flags)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 	uint32_t buf_len;
@@ -2972,6 +2995,7 @@  virtio_dev_tx_single_packed(struct virtio_net *dev,
 			    struct rte_mempool *mbuf_pool,
 			    struct rte_mbuf *pkts,
 			    bool legacy_ol_flags)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 
 	uint16_t buf_id, desc_count = 0;
@@ -3003,6 +3027,7 @@  virtio_dev_tx_packed(struct virtio_net *dev,
 		     uint32_t count,
 		     bool legacy_ol_flags)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	uint32_t pkt_idx = 0;
 
@@ -3047,6 +3072,7 @@  virtio_dev_tx_packed_legacy(struct virtio_net *dev,
 	struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
 	struct rte_mbuf **__rte_restrict pkts, uint32_t count)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true);
 }
@@ -3057,6 +3083,7 @@  virtio_dev_tx_packed_compliant(struct virtio_net *dev,
 	struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
 	struct rte_mbuf **__rte_restrict pkts, uint32_t count)
 	__rte_exclusive_locks_required(vq->access_lock)
+	__rte_shared_locks_required(vq->iotlb_lock)
 {
 	return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false);
 }
@@ -3096,8 +3123,7 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		goto out_access_unlock;
 	}
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+	vhost_user_iotlb_rd_lock(vq);
 
 	if (unlikely(!vq->access_ok))
 		if (unlikely(vring_translate(dev, vq) < 0)) {
@@ -3153,8 +3179,7 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	}
 
 out:
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
+	vhost_user_iotlb_rd_unlock(vq);
 
 out_access_unlock:
 	rte_spinlock_unlock(&vq->access_lock);