get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/61851/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 61851,
    "url": "https://patches.dpdk.org/api/patches/61851/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20191024160832.14543-13-yong.liu@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20191024160832.14543-13-yong.liu@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20191024160832.14543-13-yong.liu@intel.com",
    "date": "2019-10-24T16:08:31",
    "name": "[v9,12/13] vhost: optimize packed ring dequeue",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "0808a5fc75042dfff5f4c0dfcc99bb4f87db15cf",
    "submitter": {
        "id": 17,
        "url": "https://patches.dpdk.org/api/people/17/?format=api",
        "name": "Marvin Liu",
        "email": "yong.liu@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "https://patches.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20191024160832.14543-13-yong.liu@intel.com/mbox/",
    "series": [
        {
            "id": 7033,
            "url": "https://patches.dpdk.org/api/series/7033/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=7033",
            "date": "2019-10-24T16:08:19",
            "name": "vhost packed ring performance optimization",
            "version": 9,
            "mbox": "https://patches.dpdk.org/series/7033/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/61851/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/61851/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 3D5701E8AB;\n\tThu, 24 Oct 2019 10:29:40 +0200 (CEST)",
            "from mga11.intel.com (mga11.intel.com [192.55.52.93])\n\tby dpdk.org (Postfix) with ESMTP id 9D53D1E568\n\tfor <dev@dpdk.org>; Thu, 24 Oct 2019 10:28:54 +0200 (CEST)",
            "from fmsmga002.fm.intel.com ([10.253.24.26])\n\tby fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t24 Oct 2019 01:28:54 -0700",
            "from npg-dpdk-virtual-marvin-dev.sh.intel.com ([10.67.119.142])\n\tby fmsmga002.fm.intel.com with ESMTP; 24 Oct 2019 01:28:52 -0700"
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.68,224,1569308400\"; d=\"scan'208\";a=\"228431127\"",
        "From": "Marvin Liu <yong.liu@intel.com>",
        "To": "maxime.coquelin@redhat.com, tiwei.bie@intel.com, zhihong.wang@intel.com, \n\tstephen@networkplumber.org, gavin.hu@arm.com",
        "Cc": "dev@dpdk.org,\n\tMarvin Liu <yong.liu@intel.com>",
        "Date": "Fri, 25 Oct 2019 00:08:31 +0800",
        "Message-Id": "<20191024160832.14543-13-yong.liu@intel.com>",
        "X-Mailer": "git-send-email 2.17.1",
        "In-Reply-To": "<20191024160832.14543-1-yong.liu@intel.com>",
        "References": "<20191021220813.55236-1-yong.liu@intel.com>\n\t<20191024160832.14543-1-yong.liu@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v9 12/13] vhost: optimize packed ring dequeue",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Optimize vhost device packed ring dequeue function by splitting batch\nand single functions. No-chained and direct descriptors will be handled\nby batch and other will be handled by single as before.\n\nSigned-off-by: Marvin Liu <yong.liu@intel.com>\nReviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>\n---\n lib/librte_vhost/virtio_net.c | 236 ++++++++++------------------------\n 1 file changed, 67 insertions(+), 169 deletions(-)",
    "diff": "diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c\nindex 0243573a3..ab6726996 100644\n--- a/lib/librte_vhost/virtio_net.c\n+++ b/lib/librte_vhost/virtio_net.c\n@@ -201,68 +201,6 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev,\n \tvq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);\n }\n \n-static __rte_always_inline void\n-flush_shadow_used_ring_packed(struct virtio_net *dev,\n-\t\t\tstruct vhost_virtqueue *vq)\n-{\n-\tint i;\n-\tuint16_t used_idx = vq->last_used_idx;\n-\tuint16_t head_idx = vq->last_used_idx;\n-\tuint16_t head_flags = 0;\n-\n-\t/* Split loop in two to save memory barriers */\n-\tfor (i = 0; i < vq->shadow_used_idx; i++) {\n-\t\tvq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;\n-\t\tvq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;\n-\n-\t\tused_idx += vq->shadow_used_packed[i].count;\n-\t\tif (used_idx >= vq->size)\n-\t\t\tused_idx -= vq->size;\n-\t}\n-\n-\tfor (i = 0; i < vq->shadow_used_idx; i++) {\n-\t\tuint16_t flags;\n-\n-\t\tif (vq->shadow_used_packed[i].len)\n-\t\t\tflags = VRING_DESC_F_WRITE;\n-\t\telse\n-\t\t\tflags = 0;\n-\n-\t\tif (vq->used_wrap_counter) {\n-\t\t\tflags |= VRING_DESC_F_USED;\n-\t\t\tflags |= VRING_DESC_F_AVAIL;\n-\t\t} else {\n-\t\t\tflags &= ~VRING_DESC_F_USED;\n-\t\t\tflags &= ~VRING_DESC_F_AVAIL;\n-\t\t}\n-\n-\t\tif (i > 0) {\n-\t\t\tvq->desc_packed[vq->last_used_idx].flags = flags;\n-\n-\t\t\tvhost_log_cache_used_vring(dev, vq,\n-\t\t\t\t\tvq->last_used_idx *\n-\t\t\t\t\tsizeof(struct vring_packed_desc),\n-\t\t\t\t\tsizeof(struct vring_packed_desc));\n-\t\t} else {\n-\t\t\thead_idx = vq->last_used_idx;\n-\t\t\thead_flags = flags;\n-\t\t}\n-\n-\t\tvq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count);\n-\t}\n-\n-\t__atomic_store_n(&vq->desc_packed[head_idx].flags, head_flags,\n-\t\t\t __ATOMIC_RELEASE);\n-\n-\tvhost_log_cache_used_vring(dev, vq,\n-\t\t\t\thead_idx *\n-\t\t\t\tsizeof(struct vring_packed_desc),\n-\t\t\t\tsizeof(struct vring_packed_desc));\n-\n-\tvq->shadow_used_idx = 0;\n-\tvhost_log_cache_sync(dev, vq);\n-}\n-\n static __rte_always_inline void\n vhost_shadow_dequeue_batch_packed(struct virtio_net *dev,\n \t\t\t\t  struct vhost_virtqueue *vq,\n@@ -335,17 +273,6 @@ vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,\n \tvq_inc_last_used_packed(vq, count);\n }\n \n-static __rte_always_inline void\n-update_shadow_used_ring_packed(struct vhost_virtqueue *vq,\n-\t\t\t uint16_t desc_idx, uint32_t len, uint16_t count)\n-{\n-\tuint16_t i = vq->shadow_used_idx++;\n-\n-\tvq->shadow_used_packed[i].id  = desc_idx;\n-\tvq->shadow_used_packed[i].len = len;\n-\tvq->shadow_used_packed[i].count = count;\n-}\n-\n static inline void\n do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)\n {\n@@ -403,7 +330,7 @@ vhost_shadow_enqueue_single_packed(struct virtio_net *dev,\n \t}\n }\n \n-static __rte_unused void\n+static __rte_always_inline void\n vhost_flush_dequeue_packed(struct virtio_net *dev,\n \t\t\t   struct vhost_virtqueue *vq)\n {\n@@ -1893,7 +1820,7 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev,\n \treturn -1;\n }\n \n-static __rte_unused int\n+static __rte_always_inline int\n virtio_dev_tx_batch_packed(struct virtio_net *dev,\n \t\t\t   struct vhost_virtqueue *vq,\n \t\t\t   struct rte_mempool *mbuf_pool,\n@@ -1961,7 +1888,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev,\n \treturn 0;\n }\n \n-static __rte_unused int\n+static __rte_always_inline int\n virtio_dev_tx_single_packed(struct virtio_net *dev,\n \t\t\t    struct vhost_virtqueue *vq,\n \t\t\t    struct rte_mempool *mbuf_pool,\n@@ -1981,7 +1908,7 @@ virtio_dev_tx_single_packed(struct virtio_net *dev,\n \treturn 0;\n }\n \n-static __rte_unused int\n+static __rte_always_inline int\n virtio_dev_tx_batch_packed_zmbuf(struct virtio_net *dev,\n \t\t\t\t struct vhost_virtqueue *vq,\n \t\t\t\t struct rte_mempool *mbuf_pool,\n@@ -2030,7 +1957,7 @@ virtio_dev_tx_batch_packed_zmbuf(struct virtio_net *dev,\n \treturn -1;\n }\n \n-static __rte_unused int\n+static __rte_always_inline int\n virtio_dev_tx_single_packed_zmbuf(struct virtio_net *dev,\n \t\t\t\t  struct vhost_virtqueue *vq,\n \t\t\t\t  struct rte_mempool *mbuf_pool,\n@@ -2061,7 +1988,7 @@ virtio_dev_tx_single_packed_zmbuf(struct virtio_net *dev,\n \treturn 0;\n }\n \n-static __rte_unused void\n+static __rte_always_inline void\n free_zmbuf(struct vhost_virtqueue *vq)\n {\n \tstruct zcopy_mbuf *next = NULL;\n@@ -2102,111 +2029,77 @@ free_zmbuf(struct vhost_virtqueue *vq)\n }\n \n static __rte_noinline uint16_t\n-virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,\n-\tstruct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)\n+virtio_dev_tx_packed_zmbuf(struct virtio_net *dev,\n+\t\t\t   struct vhost_virtqueue *vq,\n+\t\t\t   struct rte_mempool *mbuf_pool,\n+\t\t\t   struct rte_mbuf **pkts,\n+\t\t\t   uint32_t count)\n {\n-\tuint16_t i;\n-\n-\tif (unlikely(dev->dequeue_zero_copy)) {\n-\t\tstruct zcopy_mbuf *zmbuf, *next;\n-\n-\t\tfor (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);\n-\t\t     zmbuf != NULL; zmbuf = next) {\n-\t\t\tnext = TAILQ_NEXT(zmbuf, next);\n+\tuint32_t pkt_idx = 0;\n+\tuint32_t remained = count;\n \n-\t\t\tif (mbuf_is_consumed(zmbuf->mbuf)) {\n-\t\t\t\tupdate_shadow_used_ring_packed(vq,\n-\t\t\t\t\t\tzmbuf->desc_idx,\n-\t\t\t\t\t\t0,\n-\t\t\t\t\t\tzmbuf->desc_count);\n+\tfree_zmbuf(vq);\n \n-\t\t\t\tTAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);\n-\t\t\t\trestore_mbuf(zmbuf->mbuf);\n-\t\t\t\trte_pktmbuf_free(zmbuf->mbuf);\n-\t\t\t\tput_zmbuf(zmbuf);\n-\t\t\t\tvq->nr_zmbuf -= 1;\n+\tdo {\n+\t\tif (remained >= PACKED_BATCH_SIZE) {\n+\t\t\tif (!virtio_dev_tx_batch_packed_zmbuf(dev, vq,\n+\t\t\t\tmbuf_pool, &pkts[pkt_idx])) {\n+\t\t\t\tpkt_idx += PACKED_BATCH_SIZE;\n+\t\t\t\tremained -= PACKED_BATCH_SIZE;\n+\t\t\t\tcontinue;\n \t\t\t}\n \t\t}\n \n-\t\tif (likely(vq->shadow_used_idx)) {\n-\t\t\tflush_shadow_used_ring_packed(dev, vq);\n-\t\t\tvhost_vring_call_packed(dev, vq);\n-\t\t}\n-\t}\n-\n-\tVHOST_LOG_DEBUG(VHOST_DATA, \"(%d) %s\\n\", dev->vid, __func__);\n-\n-\tcount = RTE_MIN(count, MAX_PKT_BURST);\n-\tVHOST_LOG_DEBUG(VHOST_DATA, \"(%d) about to dequeue %u buffers\\n\",\n-\t\t\tdev->vid, count);\n-\n-\tfor (i = 0; i < count; i++) {\n-\t\tstruct buf_vector buf_vec[BUF_VECTOR_MAX];\n-\t\tuint16_t buf_id;\n-\t\tuint32_t buf_len;\n-\t\tuint16_t desc_count, nr_vec = 0;\n-\t\tint err;\n-\n-\t\tif (unlikely(fill_vec_buf_packed(dev, vq,\n-\t\t\t\t\t\tvq->last_avail_idx, &desc_count,\n-\t\t\t\t\t\tbuf_vec, &nr_vec,\n-\t\t\t\t\t\t&buf_id, &buf_len,\n-\t\t\t\t\t\tVHOST_ACCESS_RO) < 0))\n+\t\tif (virtio_dev_tx_single_packed_zmbuf(dev, vq, mbuf_pool,\n+\t\t\t\t\t\t      &pkts[pkt_idx]))\n \t\t\tbreak;\n+\t\tpkt_idx++;\n+\t\tremained--;\n \n-\t\tif (likely(dev->dequeue_zero_copy == 0))\n-\t\t\tupdate_shadow_used_ring_packed(vq, buf_id, 0,\n-\t\t\t\t\tdesc_count);\n+\t} while (remained);\n \n-\t\tpkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len);\n-\t\tif (unlikely(pkts[i] == NULL))\n-\t\t\tbreak;\n-\n-\t\terr = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],\n-\t\t\t\tmbuf_pool);\n-\t\tif (unlikely(err)) {\n-\t\t\trte_pktmbuf_free(pkts[i]);\n-\t\t\tbreak;\n-\t\t}\n+\tif (pkt_idx)\n+\t\tvhost_vring_call_packed(dev, vq);\n \n-\t\tif (unlikely(dev->dequeue_zero_copy)) {\n-\t\t\tstruct zcopy_mbuf *zmbuf;\n+\treturn pkt_idx;\n+}\n \n-\t\t\tzmbuf = get_zmbuf(vq);\n-\t\t\tif (!zmbuf) {\n-\t\t\t\trte_pktmbuf_free(pkts[i]);\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t\tzmbuf->mbuf = pkts[i];\n-\t\t\tzmbuf->desc_idx = buf_id;\n-\t\t\tzmbuf->desc_count = desc_count;\n+static __rte_noinline uint16_t\n+virtio_dev_tx_packed(struct virtio_net *dev,\n+\t\t     struct vhost_virtqueue *vq,\n+\t\t     struct rte_mempool *mbuf_pool,\n+\t\t     struct rte_mbuf **pkts,\n+\t\t     uint32_t count)\n+{\n+\tuint32_t pkt_idx = 0;\n+\tuint32_t remained = count;\n \n-\t\t\t/*\n-\t\t\t * Pin lock the mbuf; we will check later to see\n-\t\t\t * whether the mbuf is freed (when we are the last\n-\t\t\t * user) or not. If that's the case, we then could\n-\t\t\t * update the used ring safely.\n-\t\t\t */\n-\t\t\trte_mbuf_refcnt_update(pkts[i], 1);\n+\tdo {\n+\t\trte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);\n \n-\t\t\tvq->nr_zmbuf += 1;\n-\t\t\tTAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);\n+\t\tif (remained >= PACKED_BATCH_SIZE) {\n+\t\t\tif (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool,\n+\t\t\t\t\t\t\t&pkts[pkt_idx])) {\n+\t\t\t\tvhost_flush_dequeue_packed(dev, vq);\n+\t\t\t\tpkt_idx += PACKED_BATCH_SIZE;\n+\t\t\t\tremained -= PACKED_BATCH_SIZE;\n+\t\t\t\tcontinue;\n+\t\t\t}\n \t\t}\n \n-\t\tvq_inc_last_avail_packed(vq, desc_count);\n-\t}\n+\t\tif (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,\n+\t\t\t\t\t\t&pkts[pkt_idx]))\n+\t\t\tbreak;\n+\t\tvhost_flush_dequeue_packed(dev, vq);\n+\t\tpkt_idx++;\n+\t\tremained--;\n \n-\tif (likely(dev->dequeue_zero_copy == 0)) {\n+\t} while (remained);\n+\n+\tif (vq->shadow_used_idx)\n \t\tdo_data_copy_dequeue(vq);\n-\t\tif (unlikely(i < count))\n-\t\t\tvq->shadow_used_idx = i;\n-\t\tif (likely(vq->shadow_used_idx)) {\n-\t\t\tflush_shadow_used_ring_packed(dev, vq);\n-\t\t\tvhost_vring_call_packed(dev, vq);\n-\t\t}\n-\t}\n \n-\treturn i;\n+\treturn pkt_idx;\n }\n \n uint16_t\n@@ -2282,9 +2175,14 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,\n \t\tcount -= 1;\n \t}\n \n-\tif (vq_is_packed(dev))\n-\t\tcount = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count);\n-\telse\n+\tif (vq_is_packed(dev)) {\n+\t\tif (unlikely(dev->dequeue_zero_copy))\n+\t\t\tcount = virtio_dev_tx_packed_zmbuf(dev, vq, mbuf_pool,\n+\t\t\t\t\t\t\t   pkts, count);\n+\t\telse\n+\t\t\tcount = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts,\n+\t\t\t\t\t\t     count);\n+\t} else\n \t\tcount = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count);\n \n out:\n",
    "prefixes": [
        "v9",
        "12/13"
    ]
}