get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/97145/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 97145,
    "url": "https://patches.dpdk.org/api/patches/97145/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/1629463466-450012-1-git-send-email-jiayu.hu@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1629463466-450012-1-git-send-email-jiayu.hu@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1629463466-450012-1-git-send-email-jiayu.hu@intel.com",
    "date": "2021-08-20T12:44:26",
    "name": "vhost: remove copy threshold for async vhost",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "ddca9ab597eaa572d8029f26e01f166a9229ec9e",
    "submitter": {
        "id": 539,
        "url": "https://patches.dpdk.org/api/people/539/?format=api",
        "name": "Hu, Jiayu",
        "email": "jiayu.hu@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "https://patches.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/1629463466-450012-1-git-send-email-jiayu.hu@intel.com/mbox/",
    "series": [
        {
            "id": 18367,
            "url": "https://patches.dpdk.org/api/series/18367/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=18367",
            "date": "2021-08-20T12:44:26",
            "name": "vhost: remove copy threshold for async vhost",
            "version": 1,
            "mbox": "https://patches.dpdk.org/series/18367/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/97145/comments/",
    "check": "warning",
    "checks": "https://patches.dpdk.org/api/patches/97145/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id D4C8CA0C4B;\n\tFri, 20 Aug 2021 08:17:45 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 582F64013F;\n\tFri, 20 Aug 2021 08:17:45 +0200 (CEST)",
            "from mga06.intel.com (mga06.intel.com [134.134.136.31])\n by mails.dpdk.org (Postfix) with ESMTP id E704D4003D\n for <dev@dpdk.org>; Fri, 20 Aug 2021 08:17:42 +0200 (CEST)",
            "from fmsmga006.fm.intel.com ([10.253.24.20])\n by orsmga104.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 19 Aug 2021 23:17:41 -0700",
            "from npg_dpdk_virtio_jiayuhu_07.sh.intel.com ([10.67.119.25])\n by fmsmga006.fm.intel.com with ESMTP; 19 Aug 2021 23:17:39 -0700"
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6200,9189,10081\"; a=\"277738290\"",
            "E=Sophos;i=\"5.84,336,1620716400\"; d=\"scan'208\";a=\"277738290\"",
            "E=Sophos;i=\"5.84,336,1620716400\"; d=\"scan'208\";a=\"679869390\""
        ],
        "X-ExtLoop1": "1",
        "From": "Jiayu Hu <jiayu.hu@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "maxime.coquelin@redhat.com, chenbo.xia@intel.com,\n david.marchand@redhat.com, Jiayu Hu <jiayu.hu@intel.com>,\n Cheng Jiang <cheng1.jiang@intel.com>",
        "Date": "Fri, 20 Aug 2021 08:44:26 -0400",
        "Message-Id": "<1629463466-450012-1-git-send-email-jiayu.hu@intel.com>",
        "X-Mailer": "git-send-email 2.7.4",
        "Subject": "[dpdk-dev] [PATCH] vhost: remove copy threshold for async vhost",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Copy threshold is introduced in async vhost data path to select\nthe appropriate copy engine to do copies for higher efficiency.\nHowever, it may cause packets out-of-order, and it also causes\ndata path performance unpredictable.\n\nTherefore, this patch removes copy threshold support in async vhost\ndata path.\n\nSigned-off-by: Jiayu Hu <jiayu.hu@intel.com>\nSigned-off-by: Cheng Jiang <cheng1.jiang@intel.com>\n---\n doc/guides/prog_guide/vhost_lib.rst |   7 -\n examples/vhost/main.c               |  22 +-\n lib/vhost/rte_vhost_async.h         |  22 +-\n lib/vhost/vhost.c                   |   6 +-\n lib/vhost/vhost.h                   |   1 -\n lib/vhost/virtio_net.c              | 439 +++++++++---------------------------\n 6 files changed, 116 insertions(+), 381 deletions(-)",
    "diff": "diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst\nindex 8874033..171e009 100644\n--- a/doc/guides/prog_guide/vhost_lib.rst\n+++ b/doc/guides/prog_guide/vhost_lib.rst\n@@ -235,13 +235,6 @@ The following is an overview of some key Vhost API functions:\n     Currently, only ``RTE_VHOST_ASYNC_INORDER`` capable device is\n     supported by vhost.\n \n-  * ``async_threshold``\n-\n-    The copy length (in bytes) below which CPU copy will be used even if\n-    applications call async vhost APIs to enqueue/dequeue data.\n-\n-    Typical value is 256~1024 depending on the async device capability.\n-\n   Applications must provide following ``ops`` callbacks for vhost lib to\n   work with the async copy devices:\n \ndiff --git a/examples/vhost/main.c b/examples/vhost/main.c\nindex bc3d71c..a4a8214 100644\n--- a/examples/vhost/main.c\n+++ b/examples/vhost/main.c\n@@ -891,17 +891,11 @@ drain_vhost(struct vhost_dev *vdev)\n \tif (builtin_net_driver) {\n \t\tret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);\n \t} else if (async_vhost_driver) {\n-\t\tuint32_t cpu_cpl_nr = 0;\n \t\tuint16_t enqueue_fail = 0;\n-\t\tstruct rte_mbuf *m_cpu_cpl[nr_xmit];\n \n \t\tcomplete_async_pkts(vdev);\n-\t\tret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,\n-\t\t\t\t\tm, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);\n-\t\t__atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, __ATOMIC_SEQ_CST);\n-\n-\t\tif (cpu_cpl_nr)\n-\t\t\tfree_pkts(m_cpu_cpl, cpu_cpl_nr);\n+\t\tret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m, nr_xmit);\n+\t\t__atomic_add_fetch(&vdev->pkts_inflight, ret, __ATOMIC_SEQ_CST);\n \n \t\tenqueue_fail = nr_xmit - ret;\n \t\tif (enqueue_fail)\n@@ -1222,19 +1216,12 @@ drain_eth_rx(struct vhost_dev *vdev)\n \t\tenqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,\n \t\t\t\t\t\tpkts, rx_count);\n \t} else if (async_vhost_driver) {\n-\t\tuint32_t cpu_cpl_nr = 0;\n \t\tuint16_t enqueue_fail = 0;\n-\t\tstruct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];\n \n \t\tcomplete_async_pkts(vdev);\n \t\tenqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,\n-\t\t\t\t\tVIRTIO_RXQ, pkts, rx_count,\n-\t\t\t\t\tm_cpu_cpl, &cpu_cpl_nr);\n-\t\t__atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - cpu_cpl_nr,\n-\t\t\t\t\t__ATOMIC_SEQ_CST);\n-\n-\t\tif (cpu_cpl_nr)\n-\t\t\tfree_pkts(m_cpu_cpl, cpu_cpl_nr);\n+\t\t\t\t\tVIRTIO_RXQ, pkts, rx_count);\n+\t\t__atomic_add_fetch(&vdev->pkts_inflight, enqueue_count, __ATOMIC_SEQ_CST);\n \n \t\tenqueue_fail = rx_count - enqueue_count;\n \t\tif (enqueue_fail)\n@@ -1495,7 +1482,6 @@ new_device(int vid)\n \t\t\t\tioat_check_completed_copies_cb;\n \n \t\t\tconfig.features = RTE_VHOST_ASYNC_INORDER;\n-\t\t\tconfig.async_threshold = 256;\n \n \t\t\treturn rte_vhost_async_channel_register(vid, VIRTIO_RXQ,\n \t\t\t\tconfig, &channel_ops);\ndiff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h\nindex b25ff44..ad71555 100644\n--- a/lib/vhost/rte_vhost_async.h\n+++ b/lib/vhost/rte_vhost_async.h\n@@ -103,7 +103,6 @@ enum {\n  *  async channel configuration\n  */\n struct rte_vhost_async_config {\n-\tuint32_t async_threshold;\n \tuint32_t features;\n \tuint32_t rsvd[2];\n };\n@@ -182,13 +181,9 @@ int rte_vhost_async_channel_unregister_thread_unsafe(int vid,\n \t\tuint16_t queue_id);\n \n /**\n- * This function submits enqueue data to async engine. Successfully\n- * enqueued packets can be transfer completed or being occupied by DMA\n- * engines, when this API returns. Transfer completed packets are returned\n- * in comp_pkts, so users need to guarantee its size is greater than or\n- * equal to the size of pkts; for packets that are successfully enqueued\n- * but not transfer completed, users should poll transfer status by\n- * rte_vhost_poll_enqueue_completed().\n+ * This function submits enqueue packets to async copy engine. Users\n+ * need to poll transfer status by rte_vhost_poll_enqueue_completed()\n+ * for successfully enqueued packets.\n  *\n  * @param vid\n  *  id of vhost device to enqueue data\n@@ -198,19 +193,12 @@ int rte_vhost_async_channel_unregister_thread_unsafe(int vid,\n  *  array of packets to be enqueued\n  * @param count\n  *  packets num to be enqueued\n- * @param comp_pkts\n- *  empty array to get transfer completed packets. Users need to\n- *  guarantee its size is greater than or equal to that of pkts\n- * @param comp_count\n- *  num of packets that are transfer completed, when this API returns.\n- *  If no packets are transfer completed, its value is set to 0.\n  * @return\n- *  num of packets enqueued, including in-flight and transfer completed\n+ *  num of packets enqueued\n  */\n __rte_experimental\n uint16_t rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,\n-\t\tstruct rte_mbuf **pkts, uint16_t count,\n-\t\tstruct rte_mbuf **comp_pkts, uint32_t *comp_count);\n+\t\tstruct rte_mbuf **pkts, uint16_t count);\n \n /**\n  * This function checks async completion status for a specific vhost\ndiff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c\nindex 355ff37..996287c 100644\n--- a/lib/vhost/vhost.c\n+++ b/lib/vhost/vhost.c\n@@ -1621,7 +1621,6 @@ int rte_vhost_extern_callback_register(int vid,\n \n static __rte_always_inline int\n async_channel_register(int vid, uint16_t queue_id,\n-\t\tstruct rte_vhost_async_config config,\n \t\tstruct rte_vhost_async_channel_ops *ops)\n {\n \tstruct virtio_net *dev = get_device(vid);\n@@ -1693,7 +1692,6 @@ async_channel_register(int vid, uint16_t queue_id,\n \n \tvq->async_ops.check_completed_copies = ops->check_completed_copies;\n \tvq->async_ops.transfer_data = ops->transfer_data;\n-\tvq->async_threshold = config.async_threshold;\n \n \tvq->async_registered = true;\n \n@@ -1732,7 +1730,7 @@ rte_vhost_async_channel_register(int vid, uint16_t queue_id,\n \t\treturn -1;\n \n \trte_spinlock_lock(&vq->access_lock);\n-\tret = async_channel_register(vid, queue_id, config, ops);\n+\tret = async_channel_register(vid, queue_id, ops);\n \trte_spinlock_unlock(&vq->access_lock);\n \n \treturn ret;\n@@ -1768,7 +1766,7 @@ rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id,\n \t\tops->transfer_data == NULL))\n \t\treturn -1;\n \n-\treturn async_channel_register(vid, queue_id, config, ops);\n+\treturn async_channel_register(vid, queue_id, ops);\n }\n \n int\ndiff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h\nindex d98ca8a..1e56311 100644\n--- a/lib/vhost/vhost.h\n+++ b/lib/vhost/vhost.h\n@@ -219,7 +219,6 @@ struct vhost_virtqueue {\n \n \t/* vq async features */\n \tbool\t\tasync_registered;\n-\tuint32_t\tasync_threshold;\n \n \tint\t\t\tnotif_enable;\n #define VIRTIO_UNINITIALIZED_NOTIF\t(-1)\ndiff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c\nindex 8549afb..f6127c7 100644\n--- a/lib/vhost/virtio_net.c\n+++ b/lib/vhost/virtio_net.c\n@@ -965,17 +965,16 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\t\tstruct rte_vhost_iov_iter *src_it,\n \t\t\tstruct rte_vhost_iov_iter *dst_it)\n {\n+\tstruct rte_mbuf *hdr_mbuf;\n+\tstruct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;\n+\tuint64_t buf_addr, buf_iova;\n+\tuint64_t hdr_addr;\n+\tuint64_t mapped_len;\n \tuint32_t vec_idx = 0;\n \tuint32_t mbuf_offset, mbuf_avail;\n \tuint32_t buf_offset, buf_avail;\n-\tuint64_t buf_addr, buf_iova, buf_len;\n-\tuint32_t cpy_len, cpy_threshold;\n-\tuint64_t hdr_addr;\n-\tstruct rte_mbuf *hdr_mbuf;\n-\tstruct batch_copy_elem *batch_copy = vq->batch_copy_elems;\n-\tstruct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;\n+\tuint32_t cpy_len, buf_len;\n \tint error = 0;\n-\tuint64_t mapped_len;\n \n \tuint32_t tlen = 0;\n \tint tvec_idx = 0;\n@@ -986,8 +985,6 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\tgoto out;\n \t}\n \n-\tcpy_threshold = vq->async_threshold;\n-\n \tbuf_addr = buf_vec[vec_idx].buf_addr;\n \tbuf_iova = buf_vec[vec_idx].buf_iova;\n \tbuf_len = buf_vec[vec_idx].buf_len;\n@@ -1037,7 +1034,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\t\tbuf_len = buf_vec[vec_idx].buf_len;\n \n \t\t\tbuf_offset = 0;\n-\t\t\tbuf_avail  = buf_len;\n+\t\t\tbuf_avail = buf_len;\n \t\t}\n \n \t\t/* done with current mbuf, get the next one */\n@@ -1045,7 +1042,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\t\tm = m->next;\n \n \t\t\tmbuf_offset = 0;\n-\t\t\tmbuf_avail  = rte_pktmbuf_data_len(m);\n+\t\t\tmbuf_avail = rte_pktmbuf_data_len(m);\n \t\t}\n \n \t\tif (hdr_addr) {\n@@ -1069,18 +1066,20 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \n \t\tcpy_len = RTE_MIN(buf_avail, mbuf_avail);\n \n-\t\twhile (unlikely(cpy_len && cpy_len >= cpy_threshold)) {\n+\t\twhile (unlikely(cpy_len)) {\n \t\t\thpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,\n \t\t\t\t\tbuf_iova + buf_offset,\n \t\t\t\t\tcpy_len, &mapped_len);\n-\n-\t\t\tif (unlikely(!hpa || mapped_len < cpy_threshold))\n-\t\t\t\tbreak;\n+\t\t\tif (unlikely(!hpa)) {\n+\t\t\t\tVHOST_LOG_DATA(ERR, \"(%d) %s: failed to get hpa.\\n\",\n+\t\t\t\tdev->vid, __func__);\n+\t\t\t\terror = -1;\n+\t\t\t\tgoto out;\n+\t\t\t}\n \n \t\t\tasync_fill_vec(src_iovec + tvec_idx,\n \t\t\t\t(void *)(uintptr_t)rte_pktmbuf_iova_offset(m,\n \t\t\t\tmbuf_offset), (size_t)mapped_len);\n-\n \t\t\tasync_fill_vec(dst_iovec + tvec_idx,\n \t\t\t\t\thpa, (size_t)mapped_len);\n \n@@ -1092,45 +1091,11 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\t\tbuf_offset += (uint32_t)mapped_len;\n \t\t\ttvec_idx++;\n \t\t}\n-\n-\t\tif (likely(cpy_len)) {\n-\t\t\tif (unlikely(vq->batch_copy_nb_elems >= vq->size)) {\n-\t\t\t\trte_memcpy(\n-\t\t\t\t(void *)((uintptr_t)(buf_addr + buf_offset)),\n-\t\t\t\trte_pktmbuf_mtod_offset(m, void *, mbuf_offset),\n-\t\t\t\tcpy_len);\n-\n-\t\t\t\tPRINT_PACKET(dev,\n-\t\t\t\t\t(uintptr_t)(buf_addr + buf_offset),\n-\t\t\t\t\tcpy_len, 0);\n-\t\t\t} else {\n-\t\t\t\tbatch_copy[vq->batch_copy_nb_elems].dst =\n-\t\t\t\t(void *)((uintptr_t)(buf_addr + buf_offset));\n-\t\t\t\tbatch_copy[vq->batch_copy_nb_elems].src =\n-\t\t\t\trte_pktmbuf_mtod_offset(m, void *, mbuf_offset);\n-\t\t\t\tbatch_copy[vq->batch_copy_nb_elems].log_addr =\n-\t\t\t\t\tbuf_iova + buf_offset;\n-\t\t\t\tbatch_copy[vq->batch_copy_nb_elems].len =\n-\t\t\t\t\tcpy_len;\n-\t\t\t\tvq->batch_copy_nb_elems++;\n-\t\t\t}\n-\n-\t\t\tmbuf_avail  -= cpy_len;\n-\t\t\tmbuf_offset += cpy_len;\n-\t\t\tbuf_avail  -= cpy_len;\n-\t\t\tbuf_offset += cpy_len;\n-\t\t}\n-\n \t}\n \n+\tasync_fill_iter(src_it, tlen, src_iovec, tvec_idx);\n+\tasync_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);\n out:\n-\tif (tlen) {\n-\t\tasync_fill_iter(src_it, tlen, src_iovec, tvec_idx);\n-\t\tasync_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);\n-\t} else {\n-\t\tsrc_it->count = 0;\n-\t}\n-\n \treturn error;\n }\n \n@@ -1303,67 +1268,6 @@ virtio_dev_rx_sync_batch_check(struct virtio_net *dev,\n \treturn 0;\n }\n \n-static __rte_always_inline int\n-virtio_dev_rx_async_batch_check(struct virtio_net *dev,\n-\t\t\t   struct vhost_virtqueue *vq,\n-\t\t\t   struct rte_mbuf **pkts,\n-\t\t\t   uint64_t *desc_addrs,\n-\t\t\t   uint64_t *lens)\n-{\n-\tbool wrap_counter = vq->avail_wrap_counter;\n-\tstruct vring_packed_desc *descs = vq->desc_packed;\n-\tuint16_t avail_idx = vq->last_avail_idx;\n-\tuint16_t used_idx = vq->last_used_idx;\n-\tuint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);\n-\tuint32_t cpy_threshold = vq->async_threshold;\n-\tuint16_t i;\n-\n-\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n-\t\tif (unlikely(pkts[i]->data_len >= cpy_threshold))\n-\t\t\treturn -1;\n-\t}\n-\n-\tif (unlikely(avail_idx & PACKED_BATCH_MASK))\n-\t\treturn -1;\n-\n-\tif (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))\n-\t\treturn -1;\n-\n-\tif (unlikely((used_idx + PACKED_BATCH_SIZE) > vq->size))\n-\t\treturn -1;\n-\n-\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n-\t\tif (unlikely(pkts[i]->next != NULL))\n-\t\t\treturn -1;\n-\t\tif (unlikely(!desc_is_avail(&descs[avail_idx + i],\n-\t\t\t\t\t    wrap_counter)))\n-\t\t\treturn -1;\n-\t}\n-\n-\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)\n-\t\tlens[i] = descs[avail_idx + i].len;\n-\n-\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n-\t\tif (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))\n-\t\t\treturn -1;\n-\t}\n-\n-\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)\n-\t\tdesc_addrs[i] = vhost_iova_to_vva(dev, vq,\n-\t\t\t\t\t\t  descs[avail_idx + i].addr,\n-\t\t\t\t\t\t  &lens[i],\n-\t\t\t\t\t\t  VHOST_ACCESS_RW);\n-\n-\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n-\t\tif (unlikely(!desc_addrs[i]))\n-\t\t\treturn -1;\n-\t\tif (unlikely(lens[i] != descs[avail_idx + i].len))\n-\t\t\treturn -1;\n-\t}\n-\n-\treturn 0;\n-}\n-\n static __rte_always_inline void\n virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,\n \t\t\t   struct vhost_virtqueue *vq,\n@@ -1428,32 +1332,6 @@ virtio_dev_rx_sync_batch_packed(struct virtio_net *dev,\n \treturn 0;\n }\n \n-static __rte_always_inline int\n-virtio_dev_rx_async_batch_packed(struct virtio_net *dev,\n-\t\t\t   struct vhost_virtqueue *vq,\n-\t\t\t   struct rte_mbuf **pkts,\n-\t\t\t   struct rte_mbuf **comp_pkts, uint32_t *pkt_done)\n-{\n-\tuint16_t i;\n-\tuint64_t desc_addrs[PACKED_BATCH_SIZE];\n-\tuint64_t lens[PACKED_BATCH_SIZE];\n-\n-\tif (virtio_dev_rx_async_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)\n-\t\treturn -1;\n-\n-\tvirtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);\n-\n-\tif (vq->shadow_used_idx) {\n-\t\tdo_data_copy_enqueue(dev, vq);\n-\t\tvhost_flush_enqueue_shadow_packed(dev, vq);\n-\t}\n-\n-\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)\n-\t\tcomp_pkts[(*pkt_done)++] = pkts[i];\n-\n-\treturn 0;\n-}\n-\n static __rte_always_inline int16_t\n virtio_dev_rx_single_packed(struct virtio_net *dev,\n \t\t\t    struct vhost_virtqueue *vq,\n@@ -1625,12 +1503,11 @@ store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring,\n static __rte_noinline uint32_t\n virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \tstruct vhost_virtqueue *vq, uint16_t queue_id,\n-\tstruct rte_mbuf **pkts, uint32_t count,\n-\tstruct rte_mbuf **comp_pkts, uint32_t *comp_count)\n+\tstruct rte_mbuf **pkts, uint32_t count)\n {\n+\tstruct buf_vector buf_vec[BUF_VECTOR_MAX];\n \tuint32_t pkt_idx = 0, pkt_burst_idx = 0;\n \tuint16_t num_buffers;\n-\tstruct buf_vector buf_vec[BUF_VECTOR_MAX];\n \tuint16_t avail_head;\n \n \tstruct rte_vhost_iov_iter *it_pool = vq->it_pool;\n@@ -1638,17 +1515,11 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \tstruct rte_vhost_async_desc tdes[MAX_PKT_BURST];\n \tstruct iovec *src_iovec = vec_pool;\n \tstruct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);\n-\tuint16_t slot_idx = 0;\n-\tuint16_t segs_await = 0;\n-\tuint16_t iovec_idx = 0, it_idx = 0;\n \tstruct async_inflight_info *pkts_info = vq->async_pkts_info;\n \tuint32_t n_pkts = 0, pkt_err = 0;\n-\tuint32_t num_async_pkts = 0, num_done_pkts = 0;\n \tint32_t n_xfer;\n-\tstruct {\n-\t\tuint16_t pkt_idx;\n-\t\tuint16_t last_avail_idx;\n-\t} async_pkts_log[MAX_PKT_BURST];\n+\tuint16_t segs_await = 0;\n+\tuint16_t iovec_idx = 0, it_idx = 0, slot_idx = 0;\n \n \t/*\n \t * The ordering between avail index and desc reads need to be enforced.\n@@ -1682,38 +1553,16 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \t\t\tbreak;\n \t\t}\n \n-\t\tslot_idx = (vq->async_pkts_idx + num_async_pkts) &\n-\t\t\t(vq->size - 1);\n-\t\tif (it_pool[it_idx].count) {\n-\t\t\tuint16_t from, to;\n-\n-\t\t\tasync_fill_desc(&tdes[pkt_burst_idx++],\n-\t\t\t\t&it_pool[it_idx], &it_pool[it_idx + 1]);\n-\t\t\tpkts_info[slot_idx].descs = num_buffers;\n-\t\t\tpkts_info[slot_idx].mbuf = pkts[pkt_idx];\n-\t\t\tasync_pkts_log[num_async_pkts].pkt_idx = pkt_idx;\n-\t\t\tasync_pkts_log[num_async_pkts++].last_avail_idx =\n-\t\t\t\tvq->last_avail_idx;\n-\n-\t\t\tiovec_idx += it_pool[it_idx].nr_segs;\n-\t\t\tit_idx += 2;\n-\n-\t\t\tsegs_await += it_pool[it_idx].nr_segs;\n-\n-\t\t\t/**\n-\t\t\t * recover shadow used ring and keep DMA-occupied\n-\t\t\t * descriptors.\n-\t\t\t */\n-\t\t\tfrom = vq->shadow_used_idx - num_buffers;\n-\t\t\tto = vq->async_desc_idx_split & (vq->size - 1);\n+\t\tasync_fill_desc(&tdes[pkt_burst_idx++], &it_pool[it_idx],\n+\t\t\t\t&it_pool[it_idx + 1]);\n \n-\t\t\tstore_dma_desc_info_split(vq->shadow_used_split,\n-\t\t\t\t\tvq->async_descs_split, vq->size, from, to, num_buffers);\n+\t\tslot_idx = (vq->async_pkts_idx + pkt_idx) & (vq->size - 1);\n+\t\tpkts_info[slot_idx].descs = num_buffers;\n+\t\tpkts_info[slot_idx].mbuf = pkts[pkt_idx];\n \n-\t\t\tvq->async_desc_idx_split += num_buffers;\n-\t\t\tvq->shadow_used_idx -= num_buffers;\n-\t\t} else\n-\t\t\tcomp_pkts[num_done_pkts++] = pkts[pkt_idx];\n+\t\tiovec_idx += it_pool[it_idx].nr_segs;\n+\t\tsegs_await += it_pool[it_idx].nr_segs;\n+\t\tit_idx += 2;\n \n \t\tvq->last_avail_idx += num_buffers;\n \n@@ -1727,7 +1576,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \t\t\tBUF_VECTOR_MAX))) {\n \t\t\tn_xfer = vq->async_ops.transfer_data(dev->vid,\n \t\t\t\t\tqueue_id, tdes, 0, pkt_burst_idx);\n-\t\t\tif (n_xfer >= 0) {\n+\t\t\tif (likely(n_xfer >= 0)) {\n \t\t\t\tn_pkts = n_xfer;\n \t\t\t} else {\n \t\t\t\tVHOST_LOG_DATA(ERR,\n@@ -1738,9 +1587,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \n \t\t\tiovec_idx = 0;\n \t\t\tit_idx = 0;\n-\n \t\t\tsegs_await = 0;\n-\t\t\tvq->async_pkts_inflight_n += n_pkts;\n \n \t\t\tif (unlikely(n_pkts < pkt_burst_idx)) {\n \t\t\t\t/*\n@@ -1749,6 +1596,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \t\t\t\t * completion\n \t\t\t\t */\n \t\t\t\tpkt_err = pkt_burst_idx - n_pkts;\n+\t\t\t\tpkt_idx++;\n \t\t\t\tpkt_burst_idx = 0;\n \t\t\t\tbreak;\n \t\t\t}\n@@ -1759,7 +1607,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \n \tif (pkt_burst_idx) {\n \t\tn_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);\n-\t\tif (n_xfer >= 0) {\n+\t\tif (likely(n_xfer >= 0)) {\n \t\t\tn_pkts = n_xfer;\n \t\t} else {\n \t\t\tVHOST_LOG_DATA(ERR, \"(%d) %s: failed to transfer data for queue id %d.\\n\",\n@@ -1767,40 +1615,39 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \t\t\tn_pkts = 0;\n \t\t}\n \n-\t\tvq->async_pkts_inflight_n += n_pkts;\n-\n \t\tif (unlikely(n_pkts < pkt_burst_idx))\n \t\t\tpkt_err = pkt_burst_idx - n_pkts;\n \t}\n \n-\tdo_data_copy_enqueue(dev, vq);\n-\n \tif (unlikely(pkt_err)) {\n \t\tuint16_t num_descs = 0;\n \n-\t\tnum_async_pkts -= pkt_err;\n-\t\t/* calculate the sum of descriptors of DMA-error packets. */\n+\t\t/* update number of completed packets */\n+\t\tpkt_idx -= pkt_err;\n+\n+\t\t/* calculate the sum of descriptors to revert */\n \t\twhile (pkt_err-- > 0) {\n \t\t\tnum_descs += pkts_info[slot_idx & (vq->size - 1)].descs;\n \t\t\tslot_idx--;\n \t\t}\n-\t\tvq->async_desc_idx_split -= num_descs;\n+\n \t\t/* recover shadow used ring and available ring */\n-\t\tvq->shadow_used_idx -= (vq->last_avail_idx -\n-\t\t\t\tasync_pkts_log[num_async_pkts].last_avail_idx -\n-\t\t\t\tnum_descs);\n-\t\tvq->last_avail_idx =\n-\t\t\tasync_pkts_log[num_async_pkts].last_avail_idx;\n-\t\tpkt_idx = async_pkts_log[num_async_pkts].pkt_idx;\n-\t\tnum_done_pkts = pkt_idx - num_async_pkts;\n+\t\tvq->shadow_used_idx -= num_descs;\n+\t\tvq->last_avail_idx -= num_descs;\n \t}\n \n-\tvq->async_pkts_idx += num_async_pkts;\n-\t*comp_count = num_done_pkts;\n-\n+\t/* keep used descriptors */\n \tif (likely(vq->shadow_used_idx)) {\n-\t\tflush_shadow_used_ring_split(dev, vq);\n-\t\tvhost_vring_call_split(dev, vq);\n+\t\tuint16_t to = vq->async_desc_idx_split & (vq->size - 1);\n+\n+\t\tstore_dma_desc_info_split(vq->shadow_used_split,\n+\t\t\t\tvq->async_descs_split, vq->size, 0, to,\n+\t\t\t\tvq->shadow_used_idx);\n+\n+\t\tvq->async_desc_idx_split += vq->shadow_used_idx;\n+\t\tvq->async_pkts_idx += pkt_idx;\n+\t\tvq->async_pkts_inflight_n += pkt_idx;\n+\t\tvq->shadow_used_idx = 0;\n \t}\n \n \treturn pkt_idx;\n@@ -1862,13 +1709,12 @@ vhost_update_used_packed(struct vhost_virtqueue *vq,\n }\n \n static __rte_always_inline int\n-vhost_enqueue_async_single_packed(struct virtio_net *dev,\n+vhost_enqueue_async_packed(struct virtio_net *dev,\n \t\t\t    struct vhost_virtqueue *vq,\n \t\t\t    struct rte_mbuf *pkt,\n \t\t\t    struct buf_vector *buf_vec,\n \t\t\t    uint16_t *nr_descs,\n \t\t\t    uint16_t *nr_buffers,\n-\t\t\t    struct vring_packed_desc *async_descs,\n \t\t\t    struct iovec *src_iovec, struct iovec *dst_iovec,\n \t\t\t    struct rte_vhost_iov_iter *src_it,\n \t\t\t    struct rte_vhost_iov_iter *dst_it)\n@@ -1909,28 +1755,15 @@ vhost_enqueue_async_single_packed(struct virtio_net *dev,\n \t\tbuffer_buf_id[*nr_buffers] = buf_id;\n \t\tbuffer_desc_count[*nr_buffers] = desc_count;\n \t\t*nr_buffers += 1;\n-\n \t\t*nr_descs += desc_count;\n \t\tavail_idx += desc_count;\n \t\tif (avail_idx >= vq->size)\n \t\t\tavail_idx -= vq->size;\n \t}\n \n-\tif (async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, src_iovec, dst_iovec,\n-\t\t\tsrc_it, dst_it) < 0)\n+\tif (unlikely(async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, src_iovec, dst_iovec,\n+\t\t\tsrc_it, dst_it) < 0))\n \t\treturn -1;\n-\t/* store descriptors for DMA */\n-\tif (avail_idx >= *nr_descs) {\n-\t\trte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],\n-\t\t\t*nr_descs * sizeof(struct vring_packed_desc));\n-\t} else {\n-\t\tuint16_t nr_copy = vq->size - vq->last_avail_idx;\n-\n-\t\trte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],\n-\t\t\tnr_copy * sizeof(struct vring_packed_desc));\n-\t\trte_memcpy(async_descs + nr_copy, vq->desc_packed,\n-\t\t\t(*nr_descs - nr_copy) * sizeof(struct vring_packed_desc));\n-\t}\n \n \tvhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers);\n \n@@ -1938,16 +1771,15 @@ vhost_enqueue_async_single_packed(struct virtio_net *dev,\n }\n \n static __rte_always_inline int16_t\n-virtio_dev_rx_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,\n+virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\t\t    struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers,\n-\t\t\t    struct vring_packed_desc *async_descs,\n \t\t\t    struct iovec *src_iovec, struct iovec *dst_iovec,\n \t\t\t    struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it)\n {\n \tstruct buf_vector buf_vec[BUF_VECTOR_MAX];\n \n-\tif (unlikely(vhost_enqueue_async_single_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers,\n-\t\t\t\t\t\t async_descs, src_iovec, dst_iovec,\n+\tif (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers,\n+\t\t\t\t\t\t src_iovec, dst_iovec,\n \t\t\t\t\t\t src_it, dst_it) < 0)) {\n \t\tVHOST_LOG_DATA(DEBUG, \"(%d) failed to get enough desc from vring\\n\", dev->vid);\n \t\treturn -1;\n@@ -1960,15 +1792,13 @@ virtio_dev_rx_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue\n }\n \n static __rte_always_inline void\n-dma_error_handler_packed(struct vhost_virtqueue *vq, struct vring_packed_desc *async_descs,\n-\t\t\tuint16_t async_descs_idx, uint16_t slot_idx, uint32_t nr_err,\n-\t\t\tuint32_t *pkt_idx, uint32_t *num_async_pkts, uint32_t *num_done_pkts)\n+dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx,\n+\t\t\tuint32_t nr_err, uint32_t *pkt_idx)\n {\n \tuint16_t descs_err = 0;\n \tuint16_t buffers_err = 0;\n \tstruct async_inflight_info *pkts_info = vq->async_pkts_info;\n \n-\t*num_async_pkts -= nr_err;\n \t*pkt_idx -= nr_err;\n \t/* calculate the sum of buffers and descs of DMA-error packets. */\n \twhile (nr_err-- > 0) {\n@@ -1977,113 +1807,59 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, struct vring_packed_desc *a\n \t\tslot_idx--;\n \t}\n \n-\tvq->async_buffer_idx_packed -= buffers_err;\n-\n \tif (vq->last_avail_idx >= descs_err) {\n \t\tvq->last_avail_idx -= descs_err;\n-\n-\t\trte_memcpy(&vq->desc_packed[vq->last_avail_idx],\n-\t\t\t&async_descs[async_descs_idx - descs_err],\n-\t\t\tdescs_err * sizeof(struct vring_packed_desc));\n \t} else {\n-\t\tuint16_t nr_copy;\n-\n \t\tvq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err;\n-\t\tnr_copy = vq->size - vq->last_avail_idx;\n-\t\trte_memcpy(&vq->desc_packed[vq->last_avail_idx],\n-\t\t\t&async_descs[async_descs_idx - descs_err],\n-\t\t\tnr_copy * sizeof(struct vring_packed_desc));\n-\t\tdescs_err -= nr_copy;\n-\t\trte_memcpy(&vq->desc_packed[0], &async_descs[async_descs_idx - descs_err],\n-\t\t\tdescs_err * sizeof(struct vring_packed_desc));\n \t\tvq->avail_wrap_counter ^= 1;\n \t}\n \n-\t*num_done_pkts = *pkt_idx - *num_async_pkts;\n+\tvq->shadow_used_idx -= buffers_err;\n }\n \n static __rte_noinline uint32_t\n virtio_dev_rx_async_submit_packed(struct virtio_net *dev,\n \tstruct vhost_virtqueue *vq, uint16_t queue_id,\n-\tstruct rte_mbuf **pkts, uint32_t count,\n-\tstruct rte_mbuf **comp_pkts, uint32_t *comp_count)\n+\tstruct rte_mbuf **pkts, uint32_t count)\n {\n \tuint32_t pkt_idx = 0, pkt_burst_idx = 0;\n \tuint32_t remained = count;\n-\tuint16_t async_descs_idx = 0;\n+\tint32_t n_xfer;\n \tuint16_t num_buffers;\n \tuint16_t num_descs;\n-\tint32_t n_xfer;\n \n \tstruct rte_vhost_iov_iter *it_pool = vq->it_pool;\n \tstruct iovec *vec_pool = vq->vec_pool;\n \tstruct rte_vhost_async_desc tdes[MAX_PKT_BURST];\n \tstruct iovec *src_iovec = vec_pool;\n \tstruct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);\n+\tstruct async_inflight_info *pkts_info = vq->async_pkts_info;\n+\tuint32_t n_pkts = 0, pkt_err = 0;\n \tuint16_t slot_idx = 0;\n \tuint16_t segs_await = 0;\n \tuint16_t iovec_idx = 0, it_idx = 0;\n-\tstruct async_inflight_info *pkts_info = vq->async_pkts_info;\n-\tuint32_t n_pkts = 0, pkt_err = 0;\n-\tuint32_t num_async_pkts = 0, num_done_pkts = 0;\n-\tstruct vring_packed_desc async_descs[vq->size];\n \n \tdo {\n \t\trte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);\n-\t\tif (remained >= PACKED_BATCH_SIZE) {\n-\t\t\tif (!virtio_dev_rx_async_batch_packed(dev, vq,\n-\t\t\t\t&pkts[pkt_idx], comp_pkts, &num_done_pkts)) {\n-\t\t\t\tpkt_idx += PACKED_BATCH_SIZE;\n-\t\t\t\tremained -= PACKED_BATCH_SIZE;\n-\t\t\t\tcontinue;\n-\t\t\t}\n-\t\t}\n \n \t\tnum_buffers = 0;\n \t\tnum_descs = 0;\n-\t\tif (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx],\n+\t\tif (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx],\n \t\t\t\t\t\t&num_descs, &num_buffers,\n-\t\t\t\t\t\t&async_descs[async_descs_idx],\n \t\t\t\t\t\t&src_iovec[iovec_idx], &dst_iovec[iovec_idx],\n \t\t\t\t\t\t&it_pool[it_idx], &it_pool[it_idx + 1]) < 0))\n \t\t\tbreak;\n \n-\t\tVHOST_LOG_DATA(DEBUG, \"(%d) current index %d | end index %d\\n\",\n-\t\t\tdev->vid, vq->last_avail_idx,\n-\t\t\tvq->last_avail_idx + num_descs);\n-\n-\t\tslot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size;\n-\t\tif (it_pool[it_idx].count) {\n-\t\t\tuint16_t from;\n-\n-\t\t\tasync_descs_idx += num_descs;\n-\t\t\tasync_fill_desc(&tdes[pkt_burst_idx++],\n-\t\t\t\t&it_pool[it_idx], &it_pool[it_idx + 1]);\n-\t\t\tpkts_info[slot_idx].descs = num_descs;\n-\t\t\tpkts_info[slot_idx].nr_buffers = num_buffers;\n-\t\t\tpkts_info[slot_idx].mbuf = pkts[pkt_idx];\n-\t\t\tnum_async_pkts++;\n-\t\t\tiovec_idx += it_pool[it_idx].nr_segs;\n-\t\t\tit_idx += 2;\n-\n-\t\t\tsegs_await += it_pool[it_idx].nr_segs;\n-\n-\t\t\t/**\n-\t\t\t * recover shadow used ring and keep DMA-occupied\n-\t\t\t * descriptors.\n-\t\t\t */\n-\t\t\tfrom = vq->shadow_used_idx - num_buffers;\n-\t\t\tstore_dma_desc_info_packed(vq->shadow_used_packed,\n-\t\t\t\t\tvq->async_buffers_packed, vq->size, from,\n-\t\t\t\t\tvq->async_buffer_idx_packed, num_buffers);\n-\n-\t\t\tvq->async_buffer_idx_packed += num_buffers;\n-\t\t\tif (vq->async_buffer_idx_packed >= vq->size)\n-\t\t\t\tvq->async_buffer_idx_packed -= vq->size;\n-\t\t\tvq->shadow_used_idx -= num_buffers;\n-\t\t} else {\n-\t\t\tcomp_pkts[num_done_pkts++] = pkts[pkt_idx];\n-\t\t}\n+\t\tslot_idx = (vq->async_pkts_idx + pkt_idx) % vq->size;\n+\n+\t\tasync_fill_desc(&tdes[pkt_burst_idx++], &it_pool[it_idx],\n+\t\t\t\t&it_pool[it_idx + 1]);\n+\t\tpkts_info[slot_idx].descs = num_descs;\n+\t\tpkts_info[slot_idx].nr_buffers = num_buffers;\n+\t\tpkts_info[slot_idx].mbuf = pkts[pkt_idx];\n+\t\tiovec_idx += it_pool[it_idx].nr_segs;\n+\t\tsegs_await += it_pool[it_idx].nr_segs;\n+\t\tit_idx += 2;\n \n \t\tpkt_idx++;\n \t\tremained--;\n@@ -2098,7 +1874,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,\n \t\t\t((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) {\n \t\t\tn_xfer = vq->async_ops.transfer_data(dev->vid,\n \t\t\t\t\tqueue_id, tdes, 0, pkt_burst_idx);\n-\t\t\tif (n_xfer >= 0) {\n+\t\t\tif (likely(n_xfer >= 0)) {\n \t\t\t\tn_pkts = n_xfer;\n \t\t\t} else {\n \t\t\t\tVHOST_LOG_DATA(ERR,\n@@ -2110,7 +1886,6 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,\n \t\t\tiovec_idx = 0;\n \t\t\tit_idx = 0;\n \t\t\tsegs_await = 0;\n-\t\t\tvq->async_pkts_inflight_n += n_pkts;\n \n \t\t\tif (unlikely(n_pkts < pkt_burst_idx)) {\n \t\t\t\t/*\n@@ -2129,7 +1904,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,\n \n \tif (pkt_burst_idx) {\n \t\tn_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);\n-\t\tif (n_xfer >= 0) {\n+\t\tif (likely(n_xfer >= 0)) {\n \t\t\tn_pkts = n_xfer;\n \t\t} else {\n \t\t\tVHOST_LOG_DATA(ERR, \"(%d) %s: failed to transfer data for queue id %d.\\n\",\n@@ -2137,25 +1912,29 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,\n \t\t\tn_pkts = 0;\n \t\t}\n \n-\t\tvq->async_pkts_inflight_n += n_pkts;\n-\n \t\tif (unlikely(n_pkts < pkt_burst_idx))\n \t\t\tpkt_err = pkt_burst_idx - n_pkts;\n \t}\n \n-\tdo_data_copy_enqueue(dev, vq);\n-\n \tif (unlikely(pkt_err))\n-\t\tdma_error_handler_packed(vq, async_descs, async_descs_idx, slot_idx, pkt_err,\n-\t\t\t\t\t&pkt_idx, &num_async_pkts, &num_done_pkts);\n-\tvq->async_pkts_idx += num_async_pkts;\n-\tif (vq->async_pkts_idx >= vq->size)\n-\t\tvq->async_pkts_idx -= vq->size;\n-\t*comp_count = num_done_pkts;\n+\t\tdma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx);\n \n \tif (likely(vq->shadow_used_idx)) {\n-\t\tvhost_flush_enqueue_shadow_packed(dev, vq);\n-\t\tvhost_vring_call_packed(dev, vq);\n+\t\t/* keep used descriptors. */\n+\t\tstore_dma_desc_info_packed(vq->shadow_used_packed, vq->async_buffers_packed,\n+\t\t\t\t\tvq->size, 0, vq->async_buffer_idx_packed,\n+\t\t\t\t\tvq->shadow_used_idx);\n+\n+\t\tvq->async_buffer_idx_packed += vq->shadow_used_idx;\n+\t\tif (vq->async_buffer_idx_packed >= vq->size)\n+\t\t\tvq->async_buffer_idx_packed -= vq->size;\n+\n+\t\tvq->async_pkts_idx += pkt_idx;\n+\t\tif (vq->async_pkts_idx >= vq->size)\n+\t\t\tvq->async_pkts_idx -= vq->size;\n+\n+\t\tvq->shadow_used_idx = 0;\n+\t\tvq->async_pkts_inflight_n += pkt_idx;\n \t}\n \n \treturn pkt_idx;\n@@ -2219,14 +1998,13 @@ vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,\n \t\tstruct rte_mbuf **pkts, uint16_t count)\n {\n \tstruct vhost_virtqueue *vq;\n+\tstruct async_inflight_info *pkts_info;\n+\tint32_t n_cpl;\n \tuint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;\n \tuint16_t start_idx, pkts_idx, vq_size;\n-\tstruct async_inflight_info *pkts_info;\n \tuint16_t from, i;\n-\tint32_t n_cpl;\n \n \tvq = dev->virtqueue[queue_id];\n-\n \tpkts_idx = vq->async_pkts_idx % vq->size;\n \tpkts_info = vq->async_pkts_info;\n \tvq_size = vq->size;\n@@ -2236,7 +2014,7 @@ vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,\n \tif (count > vq->async_last_pkts_n) {\n \t\tn_cpl = vq->async_ops.check_completed_copies(dev->vid,\n \t\t\tqueue_id, 0, count - vq->async_last_pkts_n);\n-\t\tif (n_cpl >= 0) {\n+\t\tif (likely(n_cpl >= 0)) {\n \t\t\tn_pkts_cpl = n_cpl;\n \t\t} else {\n \t\t\tVHOST_LOG_DATA(ERR,\n@@ -2245,9 +2023,9 @@ vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,\n \t\t\tn_pkts_cpl = 0;\n \t\t}\n \t}\n-\tn_pkts_cpl += vq->async_last_pkts_n;\n \n-\tn_pkts_put = RTE_MIN(count, n_pkts_cpl);\n+\tn_pkts_cpl += vq->async_last_pkts_n;\n+\tn_pkts_put = RTE_MIN(n_pkts_cpl, count);\n \tif (unlikely(n_pkts_put == 0)) {\n \t\tvq->async_last_pkts_n = n_pkts_cpl;\n \t\treturn 0;\n@@ -2266,7 +2044,6 @@ vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,\n \t\t\tpkts[i] = pkts_info[from].mbuf;\n \t\t}\n \t}\n-\n \tvq->async_last_pkts_n = n_pkts_cpl - n_pkts_put;\n \tvq->async_pkts_inflight_n -= n_pkts_put;\n \n@@ -2303,7 +2080,7 @@ rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,\n \tstruct vhost_virtqueue *vq;\n \tuint16_t n_pkts_cpl = 0;\n \n-\tif (!dev)\n+\tif (unlikely(!dev))\n \t\treturn 0;\n \n \tVHOST_LOG_DATA(DEBUG, \"(%d) %s\\n\", dev->vid, __func__);\n@@ -2363,8 +2140,7 @@ rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,\n \n static __rte_always_inline uint32_t\n virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,\n-\tstruct rte_mbuf **pkts, uint32_t count,\n-\tstruct rte_mbuf **comp_pkts, uint32_t *comp_count)\n+\tstruct rte_mbuf **pkts, uint32_t count)\n {\n \tstruct vhost_virtqueue *vq;\n \tuint32_t nb_tx = 0;\n@@ -2395,13 +2171,11 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,\n \t\tgoto out;\n \n \tif (vq_is_packed(dev))\n-\t\tnb_tx = virtio_dev_rx_async_submit_packed(dev,\n-\t\t\t\tvq, queue_id, pkts, count, comp_pkts,\n-\t\t\t\tcomp_count);\n+\t\tnb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id,\n+\t\t\t\tpkts, count);\n \telse\n-\t\tnb_tx = virtio_dev_rx_async_submit_split(dev,\n-\t\t\t\tvq, queue_id, pkts, count, comp_pkts,\n-\t\t\t\tcomp_count);\n+\t\tnb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id,\n+\t\t\t\tpkts, count);\n \n out:\n \tif (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))\n@@ -2415,12 +2189,10 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,\n \n uint16_t\n rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,\n-\t\tstruct rte_mbuf **pkts, uint16_t count,\n-\t\tstruct rte_mbuf **comp_pkts, uint32_t *comp_count)\n+\t\tstruct rte_mbuf **pkts, uint16_t count)\n {\n \tstruct virtio_net *dev = get_device(vid);\n \n-\t*comp_count = 0;\n \tif (!dev)\n \t\treturn 0;\n \n@@ -2431,8 +2203,7 @@ rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,\n \t\treturn 0;\n \t}\n \n-\treturn virtio_dev_rx_async_submit(dev, queue_id, pkts, count, comp_pkts,\n-\t\t\tcomp_count);\n+\treturn virtio_dev_rx_async_submit(dev, queue_id, pkts, count);\n }\n \n static inline bool\n",
    "prefixes": []
}