From patchwork Thu Jul 5 21:07:33 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maxime Coquelin X-Patchwork-Id: 42435 X-Patchwork-Delegate: maxime.coquelin@redhat.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 25F791BF52; Thu, 5 Jul 2018 23:08:01 +0200 (CEST) Received: from mx1.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by dpdk.org (Postfix) with ESMTP id 5D8781BF42 for ; Thu, 5 Jul 2018 23:07:59 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.rdu2.redhat.com [10.11.54.6]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 07CF5401CB89; Thu, 5 Jul 2018 21:07:59 +0000 (UTC) Received: from localhost.localdomain (ovpn-112-37.ams2.redhat.com [10.36.112.37]) by smtp.corp.redhat.com (Postfix) with ESMTP id 4923E2156899; Thu, 5 Jul 2018 21:07:57 +0000 (UTC) From: Maxime Coquelin To: tiwei.bie@intel.com, zhihong.wang@intel.com, jfreimann@redhat.com, dev@dpdk.org Cc: mst@redhat.com, jasowang@redhat.com, wexu@redhat.com, Maxime Coquelin Date: Thu, 5 Jul 2018 23:07:33 +0200 Message-Id: <20180705210741.17974-8-maxime.coquelin@redhat.com> In-Reply-To: <20180705210741.17974-1-maxime.coquelin@redhat.com> References: <20180705210741.17974-1-maxime.coquelin@redhat.com> X-Scanned-By: MIMEDefang 2.78 on 10.11.54.6 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.5]); Thu, 05 Jul 2018 21:07:59 +0000 (UTC) X-Greylist: inspected by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.5]); Thu, 05 Jul 2018 21:07:59 +0000 (UTC) for IP:'10.11.54.6' DOMAIN:'int-mx06.intmail.prod.int.rdu2.redhat.com' HELO:'smtp.corp.redhat.com' FROM:'maxime.coquelin@redhat.com' RCPT:'' Subject: [dpdk-dev] [PATCH v8 07/15] vhost: extract split ring handling from Rx and Tx functions X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Signed-off-by: Maxime Coquelin --- lib/librte_vhost/virtio_net.c | 238 +++++++++++++++++++++++------------------- 1 file changed, 129 insertions(+), 109 deletions(-) diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index b5f4f14df..7705c853b 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -226,13 +226,13 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) } static __rte_always_inline int -fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint32_t avail_idx, uint32_t *vec_idx, +fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint32_t avail_idx, uint16_t *vec_idx, struct buf_vector *buf_vec, uint16_t *desc_chain_head, uint16_t *desc_chain_len, uint8_t perm) { uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; - uint32_t vec_id = *vec_idx; + uint16_t vec_id = *vec_idx; uint32_t len = 0; uint64_t dlen, desc_avail, desc_iova; struct vring_desc *descs = vq->desc; @@ -323,13 +323,13 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, * Returns -1 on fail, 0 on success */ static inline int -reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, +reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, uint32_t size, struct buf_vector *buf_vec, uint16_t *num_buffers, uint16_t avail_head, uint16_t *nr_vec) { uint16_t cur_idx; - uint32_t vec_idx = 0; + uint16_t vec_idx = 0; uint16_t max_tries, tries = 0; uint16_t head_idx = 0; @@ -347,7 +347,8 @@ reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, if (unlikely(cur_idx == avail_head)) return -1; - if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec, + if (unlikely(fill_vec_buf_split(dev, vq, cur_idx, + &vec_idx, buf_vec, &head_idx, &len, VHOST_ACCESS_RW) < 0)) return -1; @@ -534,48 +535,22 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, } static __rte_always_inline uint32_t -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, +virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count) { - struct vhost_virtqueue *vq; uint32_t pkt_idx = 0; uint16_t num_buffers; struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint16_t avail_head; - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - - rte_spinlock_lock(&vq->access_lock); - - if (unlikely(vq->enabled == 0)) - goto out_access_unlock; - - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) - vhost_user_iotlb_rd_lock(vq); - - if (unlikely(vq->access_ok == 0)) - if (unlikely(vring_translate(dev, vq) < 0)) - goto out; - - count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); - if (count == 0) - goto out; - rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); - avail_head = *((volatile uint16_t *)&vq->avail->idx); + for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; uint16_t nr_vec = 0; - if (unlikely(reserve_avail_buf(dev, vq, + if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec, &num_buffers, avail_head, &nr_vec) < 0)) { VHOST_LOG_DEBUG(VHOST_DATA, @@ -608,6 +583,42 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, vhost_vring_call(dev, vq); } + return pkt_idx; +} + +static __rte_always_inline uint32_t +virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint32_t count) +{ + struct vhost_virtqueue *vq; + + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + + rte_spinlock_lock(&vq->access_lock); + + if (unlikely(vq->enabled == 0)) + goto out_access_unlock; + + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_lock(vq); + + if (unlikely(vq->access_ok == 0)) + if (unlikely(vring_translate(dev, vq) < 0)) + goto out; + + count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); + if (count == 0) + goto out; + + count = virtio_dev_rx_split(dev, vq, pkts, count); + out: if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) vhost_user_iotlb_rd_unlock(vq); @@ -615,7 +626,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, out_access_unlock: rte_spinlock_unlock(&vq->access_lock); - return pkt_idx; + return count; } uint16_t @@ -1020,48 +1031,13 @@ restore_mbuf(struct rte_mbuf *m) } } -uint16_t -rte_vhost_dequeue_burst(int vid, uint16_t queue_id, +static __rte_always_inline uint16_t +virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) { - struct virtio_net *dev; - struct rte_mbuf *rarp_mbuf = NULL; - struct vhost_virtqueue *vq; - uint32_t i = 0; + uint16_t i; uint16_t free_entries; - dev = get_device(vid); - if (!dev) - return 0; - - if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { - RTE_LOG(ERR, VHOST_DATA, - "(%d) %s: built-in vhost net backend is disabled.\n", - dev->vid, __func__); - return 0; - } - - if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - - if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) - return 0; - - if (unlikely(vq->enabled == 0)) - goto out_access_unlock; - - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) - vhost_user_iotlb_rd_lock(vq); - - if (unlikely(vq->access_ok == 0)) - if (unlikely(vring_translate(dev, vq) < 0)) - goto out; - if (unlikely(dev->dequeue_zero_copy)) { struct zcopy_mbuf *zmbuf, *next; int nr_updated = 0; @@ -1088,39 +1064,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); - /* - * Construct a RARP broadcast packet, and inject it to the "pkts" - * array, to looks like that guest actually send such packet. - * - * Check user_send_rarp() for more information. - * - * broadcast_rarp shares a cacheline in the virtio_net structure - * with some fields that are accessed during enqueue and - * rte_atomic16_cmpset() causes a write if using cmpxchg. This could - * result in false sharing between enqueue and dequeue. - * - * Prevent unnecessary false sharing by reading broadcast_rarp first - * and only performing cmpset if the read indicates it is likely to - * be set. - */ - - if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) && - rte_atomic16_cmpset((volatile uint16_t *) - &dev->broadcast_rarp.cnt, 1, 0))) { - - rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); - if (rarp_mbuf == NULL) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to make RARP packet.\n"); - return 0; - } - count -= 1; - } - free_entries = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx; if (free_entries == 0) - goto out; + return 0; VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); @@ -1132,10 +1079,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, for (i = 0; i < count; i++) { struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint16_t head_idx, dummy_len; - uint32_t nr_vec = 0; + uint16_t nr_vec = 0; int err; - if (unlikely(fill_vec_buf(dev, vq, + if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx + i, &nr_vec, buf_vec, &head_idx, &dummy_len, @@ -1194,6 +1141,79 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, vhost_vring_call(dev, vq); } + return i; +} + +uint16_t +rte_vhost_dequeue_burst(int vid, uint16_t queue_id, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) +{ + struct virtio_net *dev; + struct rte_mbuf *rarp_mbuf = NULL; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (!dev) + return 0; + + if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { + RTE_LOG(ERR, VHOST_DATA, + "(%d) %s: built-in vhost net backend is disabled.\n", + dev->vid, __func__); + return 0; + } + + if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + + if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) + return 0; + + if (unlikely(vq->enabled == 0)) + goto out_access_unlock; + + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + vhost_user_iotlb_rd_lock(vq); + + if (unlikely(vq->access_ok == 0)) + if (unlikely(vring_translate(dev, vq) < 0)) + goto out; + + /* + * Construct a RARP broadcast packet, and inject it to the "pkts" + * array, to looks like that guest actually send such packet. + * + * Check user_send_rarp() for more information. + * + * broadcast_rarp shares a cacheline in the virtio_net structure + * with some fields that are accessed during enqueue and + * rte_atomic16_cmpset() causes a write if using cmpxchg. This could + * result in false sharing between enqueue and dequeue. + * + * Prevent unnecessary false sharing by reading broadcast_rarp first + * and only performing cmpset if the read indicates it is likely to + * be set. + */ + if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) && + rte_atomic16_cmpset((volatile uint16_t *) + &dev->broadcast_rarp.cnt, 1, 0))) { + + rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); + if (rarp_mbuf == NULL) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to make RARP packet.\n"); + return 0; + } + count -= 1; + } + + count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count); + out: if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) vhost_user_iotlb_rd_unlock(vq); @@ -1206,10 +1226,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, * Inject it to the head of "pkts" array, so that switch's mac * learning table will get updated first. */ - memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *)); + memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *)); pkts[0] = rarp_mbuf; - i += 1; + count += 1; } - return i; + return count; }