get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/92234/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 92234,
    "url": "http://patches.dpdk.org/api/patches/92234/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20210427080335.20246-3-Cheng1.jiang@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20210427080335.20246-3-Cheng1.jiang@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20210427080335.20246-3-Cheng1.jiang@intel.com",
    "date": "2021-04-27T08:03:33",
    "name": "[v9,2/4] vhost: add support for packed ring in async vhost",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "6f73cccf3d927e82a9b3bca1f9ce17d1d30468db",
    "submitter": {
        "id": 1530,
        "url": "http://patches.dpdk.org/api/people/1530/?format=api",
        "name": "Jiang, Cheng1",
        "email": "Cheng1.jiang@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "http://patches.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20210427080335.20246-3-Cheng1.jiang@intel.com/mbox/",
    "series": [
        {
            "id": 16691,
            "url": "http://patches.dpdk.org/api/series/16691/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=16691",
            "date": "2021-04-27T08:03:31",
            "name": "add support for packed ring in async vhost",
            "version": 9,
            "mbox": "http://patches.dpdk.org/series/16691/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/92234/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/92234/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id A82CDA0548;\n\tTue, 27 Apr 2021 10:18:00 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 591A04120D;\n\tTue, 27 Apr 2021 10:17:51 +0200 (CEST)",
            "from mga11.intel.com (mga11.intel.com [192.55.52.93])\n by mails.dpdk.org (Postfix) with ESMTP id 2980641203\n for <dev@dpdk.org>; Tue, 27 Apr 2021 10:17:48 +0200 (CEST)",
            "from fmsmga002.fm.intel.com ([10.253.24.26])\n by fmsmga102.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 27 Apr 2021 01:17:48 -0700",
            "from dpdk_jiangcheng.sh.intel.com ([10.67.119.112])\n by fmsmga002.fm.intel.com with ESMTP; 27 Apr 2021 01:17:46 -0700"
        ],
        "IronPort-SDR": [
            "\n niq2l7u1+uuQ/ug85OJW0gqt1WnLfRHqiIeU8U4zFh/LLep3PKi5W26WUEx5kGwXE70sEa5Dse\n byURziioSIaQ==",
            "\n z1NoFAUEVNiVuSCGaUEc83dxSKR9Kr2ui+Acfhb4yd44r/qr/bNn1T+g+dIv8g7dV6jIZbQIvQ\n vZkltm4A5d2g=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6200,9189,9966\"; a=\"193285063\"",
            "E=Sophos;i=\"5.82,254,1613462400\"; d=\"scan'208\";a=\"193285063\"",
            "E=Sophos;i=\"5.82,254,1613462400\"; d=\"scan'208\";a=\"457521405\""
        ],
        "X-ExtLoop1": "1",
        "From": "Cheng Jiang <Cheng1.jiang@intel.com>",
        "To": "maxime.coquelin@redhat.com,\n\tchenbo.xia@intel.com",
        "Cc": "dev@dpdk.org, jiayu.hu@intel.com, yvonnex.yang@intel.com,\n yinan.wang@intel.com, yong.liu@intel.com,\n Cheng Jiang <Cheng1.jiang@intel.com>",
        "Date": "Tue, 27 Apr 2021 08:03:33 +0000",
        "Message-Id": "<20210427080335.20246-3-Cheng1.jiang@intel.com>",
        "X-Mailer": "git-send-email 2.29.2",
        "In-Reply-To": "<20210427080335.20246-1-Cheng1.jiang@intel.com>",
        "References": "<20210317085426.10119-1-Cheng1.jiang@intel.com>\n <20210427080335.20246-1-Cheng1.jiang@intel.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Subject": "[dpdk-dev] [PATCH v9 2/4] vhost: add support for packed ring in\n async vhost",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "For now async vhost data path only supports split ring. This patch\nenables packed ring in async vhost data path to make async vhost\ncompatible with virtio 1.1 spec.\n\nSigned-off-by: Cheng Jiang <Cheng1.jiang@intel.com>\n---\n lib/vhost/rte_vhost_async.h |   1 +\n lib/vhost/vhost.c           |  79 +++++--\n lib/vhost/vhost.h           |  15 +-\n lib/vhost/virtio_net.c      | 442 ++++++++++++++++++++++++++++++++++--\n 4 files changed, 489 insertions(+), 48 deletions(-)",
    "diff": "diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h\nindex c855ff875e..6faa31f5ad 100644\n--- a/lib/vhost/rte_vhost_async.h\n+++ b/lib/vhost/rte_vhost_async.h\n@@ -89,6 +89,7 @@ struct rte_vhost_async_channel_ops {\n struct async_inflight_info {\n \tstruct rte_mbuf *mbuf;\n \tuint16_t descs; /* num of descs inflight */\n+\tuint16_t nr_buffers; /* num of buffers inflight for packed ring */\n };\n \n /**\ndiff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c\nindex a70fe01d8f..2e3f9eb095 100644\n--- a/lib/vhost/vhost.c\n+++ b/lib/vhost/vhost.c\n@@ -340,17 +340,17 @@ cleanup_device(struct virtio_net *dev, int destroy)\n static void\n vhost_free_async_mem(struct vhost_virtqueue *vq)\n {\n-\tif (vq->async_pkts_info)\n-\t\trte_free(vq->async_pkts_info);\n-\tif (vq->async_descs_split)\n-\t\trte_free(vq->async_descs_split);\n-\tif (vq->it_pool)\n-\t\trte_free(vq->it_pool);\n-\tif (vq->vec_pool)\n-\t\trte_free(vq->vec_pool);\n+\trte_free(vq->async_pkts_info);\n \n-\tvq->async_pkts_info = NULL;\n+\trte_free(vq->async_buffers_packed);\n+\tvq->async_buffers_packed = NULL;\n+\trte_free(vq->async_descs_split);\n \tvq->async_descs_split = NULL;\n+\n+\trte_free(vq->it_pool);\n+\trte_free(vq->vec_pool);\n+\n+\tvq->async_pkts_info = NULL;\n \tvq->it_pool = NULL;\n \tvq->vec_pool = NULL;\n }\n@@ -360,10 +360,10 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)\n {\n \tif (vq_is_packed(dev))\n \t\trte_free(vq->shadow_used_packed);\n-\telse {\n+\telse\n \t\trte_free(vq->shadow_used_split);\n-\t\tvhost_free_async_mem(vq);\n-\t}\n+\n+\tvhost_free_async_mem(vq);\n \trte_free(vq->batch_copy_elems);\n \tif (vq->iotlb_pool)\n \t\trte_mempool_free(vq->iotlb_pool);\n@@ -1626,10 +1626,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,\n \tif (unlikely(vq == NULL || !dev->async_copy))\n \t\treturn -1;\n \n-\t/* packed queue is not supported */\n-\tif (unlikely(vq_is_packed(dev) || !f.async_inorder)) {\n+\tif (unlikely(!f.async_inorder)) {\n \t\tVHOST_LOG_CONFIG(ERR,\n-\t\t\t\"async copy is not supported on packed queue or non-inorder mode \"\n+\t\t\t\"async copy is not supported on non-inorder mode \"\n \t\t\t\"(vid %d, qid: %d)\\n\", vid, queue_id);\n \t\treturn -1;\n \t}\n@@ -1661,24 +1660,60 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,\n \tvq->async_pkts_info = rte_malloc_socket(NULL,\n \t\t\tvq->size * sizeof(struct async_inflight_info),\n \t\t\tRTE_CACHE_LINE_SIZE, node);\n+\tif (!vq->async_pkts_info) {\n+\t\tvhost_free_async_mem(vq);\n+\t\tVHOST_LOG_CONFIG(ERR,\n+\t\t\t\"async register failed: cannot allocate memory for async_pkts_info \"\n+\t\t\t\"(vid %d, qid: %d)\\n\", vid, queue_id);\n+\t\tgoto reg_out;\n+\t}\n+\n \tvq->it_pool = rte_malloc_socket(NULL,\n \t\t\tVHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),\n \t\t\tRTE_CACHE_LINE_SIZE, node);\n+\tif (!vq->it_pool) {\n+\t\tvhost_free_async_mem(vq);\n+\t\tVHOST_LOG_CONFIG(ERR,\n+\t\t\t\"async register failed: cannot allocate memory for it_pool \"\n+\t\t\t\"(vid %d, qid: %d)\\n\", vid, queue_id);\n+\t\tgoto reg_out;\n+\t}\n+\n \tvq->vec_pool = rte_malloc_socket(NULL,\n \t\t\tVHOST_MAX_ASYNC_VEC * sizeof(struct iovec),\n \t\t\tRTE_CACHE_LINE_SIZE, node);\n-\tvq->async_descs_split = rte_malloc_socket(NULL,\n-\t\t\tvq->size * sizeof(struct vring_used_elem),\n-\t\t\tRTE_CACHE_LINE_SIZE, node);\n-\tif (!vq->async_descs_split || !vq->async_pkts_info ||\n-\t\t!vq->it_pool || !vq->vec_pool) {\n+\tif (!vq->vec_pool) {\n \t\tvhost_free_async_mem(vq);\n \t\tVHOST_LOG_CONFIG(ERR,\n-\t\t\t\t\"async register failed: cannot allocate memory for vq data \"\n-\t\t\t\t\"(vid %d, qid: %d)\\n\", vid, queue_id);\n+\t\t\t\"async register failed: cannot allocate memory for vec_pool \"\n+\t\t\t\"(vid %d, qid: %d)\\n\", vid, queue_id);\n \t\tgoto reg_out;\n \t}\n \n+\tif (vq_is_packed(dev)) {\n+\t\tvq->async_buffers_packed = rte_malloc_socket(NULL,\n+\t\t\tvq->size * sizeof(struct vring_used_elem_packed),\n+\t\t\tRTE_CACHE_LINE_SIZE, node);\n+\t\tif (!vq->async_buffers_packed) {\n+\t\t\tvhost_free_async_mem(vq);\n+\t\t\tVHOST_LOG_CONFIG(ERR,\n+\t\t\t\t\"async register failed: cannot allocate memory for async buffers \"\n+\t\t\t\t\"(vid %d, qid: %d)\\n\", vid, queue_id);\n+\t\t\tgoto reg_out;\n+\t\t}\n+\t} else {\n+\t\tvq->async_descs_split = rte_malloc_socket(NULL,\n+\t\t\tvq->size * sizeof(struct vring_used_elem),\n+\t\t\tRTE_CACHE_LINE_SIZE, node);\n+\t\tif (!vq->async_descs_split) {\n+\t\t\tvhost_free_async_mem(vq);\n+\t\t\tVHOST_LOG_CONFIG(ERR,\n+\t\t\t\t\"async register failed: cannot allocate memory for async descs \"\n+\t\t\t\t\"(vid %d, qid: %d)\\n\", vid, queue_id);\n+\t\t\tgoto reg_out;\n+\t\t}\n+\t}\n+\n \tvq->async_ops.check_completed_copies = ops->check_completed_copies;\n \tvq->async_ops.transfer_data = ops->transfer_data;\n \ndiff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h\nindex f628714c24..b303635645 100644\n--- a/lib/vhost/vhost.h\n+++ b/lib/vhost/vhost.h\n@@ -201,9 +201,18 @@ struct vhost_virtqueue {\n \tuint16_t\tasync_pkts_idx;\n \tuint16_t\tasync_pkts_inflight_n;\n \tuint16_t\tasync_last_pkts_n;\n-\tstruct vring_used_elem  *async_descs_split;\n-\tuint16_t async_desc_idx;\n-\tuint16_t last_async_desc_idx;\n+\tunion {\n+\t\tstruct vring_used_elem  *async_descs_split;\n+\t\tstruct vring_used_elem_packed *async_buffers_packed;\n+\t};\n+\tunion {\n+\t\tuint16_t async_desc_idx_split;\n+\t\tuint16_t async_buffer_idx_packed;\n+\t};\n+\tunion {\n+\t\tuint16_t last_async_desc_idx_split;\n+\t\tuint16_t last_async_buffer_idx_packed;\n+\t};\n \n \t/* vq async features */\n \tbool\t\tasync_inorder;\ndiff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c\nindex 438bdafd14..5d540e5599 100644\n--- a/lib/vhost/virtio_net.c\n+++ b/lib/vhost/virtio_net.c\n@@ -363,14 +363,14 @@ vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq,\n }\n \n static __rte_always_inline void\n-vhost_shadow_enqueue_single_packed(struct virtio_net *dev,\n-\t\t\t\t   struct vhost_virtqueue *vq,\n-\t\t\t\t   uint32_t len[],\n-\t\t\t\t   uint16_t id[],\n-\t\t\t\t   uint16_t count[],\n+vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,\n+\t\t\t\t   uint32_t *len,\n+\t\t\t\t   uint16_t *id,\n+\t\t\t\t   uint16_t *count,\n \t\t\t\t   uint16_t num_buffers)\n {\n \tuint16_t i;\n+\n \tfor (i = 0; i < num_buffers; i++) {\n \t\t/* enqueue shadow flush action aligned with batch num */\n \t\tif (!vq->shadow_used_idx)\n@@ -382,6 +382,17 @@ vhost_shadow_enqueue_single_packed(struct virtio_net *dev,\n \t\tvq->shadow_aligned_idx += count[i];\n \t\tvq->shadow_used_idx++;\n \t}\n+}\n+\n+static __rte_always_inline void\n+vhost_shadow_enqueue_single_packed(struct virtio_net *dev,\n+\t\t\t\t   struct vhost_virtqueue *vq,\n+\t\t\t\t   uint32_t *len,\n+\t\t\t\t   uint16_t *id,\n+\t\t\t\t   uint16_t *count,\n+\t\t\t\t   uint16_t num_buffers)\n+{\n+\tvhost_shadow_enqueue_packed(vq, len, id, count, num_buffers);\n \n \tif (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) {\n \t\tdo_data_copy_enqueue(dev, vq);\n@@ -1474,6 +1485,23 @@ store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem\n \t}\n }\n \n+static __rte_always_inline void\n+store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring,\n+\t\tstruct vring_used_elem_packed *d_ring,\n+\t\tuint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)\n+{\n+\tuint16_t elem_size = sizeof(struct vring_used_elem_packed);\n+\n+\tif (d_idx + count <= ring_size) {\n+\t\trte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);\n+\t} else {\n+\t\tuint16_t size = ring_size - d_idx;\n+\n+\t\trte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);\n+\t\trte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);\n+\t}\n+}\n+\n static __rte_noinline uint32_t\n virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \tstruct vhost_virtqueue *vq, uint16_t queue_id,\n@@ -1556,12 +1584,12 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \t\t\t * descriptors.\n \t\t\t */\n \t\t\tfrom = vq->shadow_used_idx - num_buffers;\n-\t\t\tto = vq->async_desc_idx & (vq->size - 1);\n+\t\t\tto = vq->async_desc_idx_split & (vq->size - 1);\n \n \t\t\tstore_dma_desc_info_split(vq->shadow_used_split,\n \t\t\t\t\tvq->async_descs_split, vq->size, from, to, num_buffers);\n \n-\t\t\tvq->async_desc_idx += num_buffers;\n+\t\t\tvq->async_desc_idx_split += num_buffers;\n \t\t\tvq->shadow_used_idx -= num_buffers;\n \t\t} else\n \t\t\tcomp_pkts[num_done_pkts++] = pkts[pkt_idx];\n@@ -1619,7 +1647,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \t\t\tnum_descs += pkts_info[slot_idx & (vq->size - 1)].descs;\n \t\t\tslot_idx--;\n \t\t}\n-\t\tvq->async_desc_idx -= num_descs;\n+\t\tvq->async_desc_idx_split -= num_descs;\n \t\t/* recover shadow used ring and available ring */\n \t\tvq->shadow_used_idx -= (vq->last_avail_idx -\n \t\t\t\tasync_pkts_log[num_async_pkts].last_avail_idx -\n@@ -1641,6 +1669,330 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \treturn pkt_idx;\n }\n \n+static __rte_always_inline void\n+vhost_update_used_packed(struct vhost_virtqueue *vq,\n+\t\t\tstruct vring_used_elem_packed *shadow_ring,\n+\t\t\tuint16_t count)\n+{\n+\tint i;\n+\tuint16_t used_idx = vq->last_used_idx;\n+\tuint16_t head_idx = vq->last_used_idx;\n+\tuint16_t head_flags = 0;\n+\n+\tif (count == 0)\n+\t\treturn;\n+\n+\t/* Split loop in two to save memory barriers */\n+\tfor (i = 0; i < count; i++) {\n+\t\tvq->desc_packed[used_idx].id = shadow_ring[i].id;\n+\t\tvq->desc_packed[used_idx].len = shadow_ring[i].len;\n+\n+\t\tused_idx += shadow_ring[i].count;\n+\t\tif (used_idx >= vq->size)\n+\t\t\tused_idx -= vq->size;\n+\t}\n+\n+\t/* The ordering for storing desc flags needs to be enforced. */\n+\trte_atomic_thread_fence(__ATOMIC_RELEASE);\n+\n+\tfor (i = 0; i < count; i++) {\n+\t\tuint16_t flags;\n+\n+\t\tif (vq->shadow_used_packed[i].len)\n+\t\t\tflags = VRING_DESC_F_WRITE;\n+\t\telse\n+\t\t\tflags = 0;\n+\n+\t\tif (vq->used_wrap_counter) {\n+\t\t\tflags |= VRING_DESC_F_USED;\n+\t\t\tflags |= VRING_DESC_F_AVAIL;\n+\t\t} else {\n+\t\t\tflags &= ~VRING_DESC_F_USED;\n+\t\t\tflags &= ~VRING_DESC_F_AVAIL;\n+\t\t}\n+\n+\t\tif (i > 0) {\n+\t\t\tvq->desc_packed[vq->last_used_idx].flags = flags;\n+\t\t} else {\n+\t\t\thead_idx = vq->last_used_idx;\n+\t\t\thead_flags = flags;\n+\t\t}\n+\n+\t\tvq_inc_last_used_packed(vq, shadow_ring[i].count);\n+\t}\n+\n+\tvq->desc_packed[head_idx].flags = head_flags;\n+}\n+\n+static __rte_always_inline int\n+vhost_enqueue_async_single_packed(struct virtio_net *dev,\n+\t\t\t    struct vhost_virtqueue *vq,\n+\t\t\t    struct rte_mbuf *pkt,\n+\t\t\t    struct buf_vector *buf_vec,\n+\t\t\t    uint16_t *nr_descs,\n+\t\t\t    uint16_t *nr_buffers,\n+\t\t\t    struct vring_packed_desc *async_descs,\n+\t\t\t    struct iovec *src_iovec, struct iovec *dst_iovec,\n+\t\t\t    struct rte_vhost_iov_iter *src_it,\n+\t\t\t    struct rte_vhost_iov_iter *dst_it)\n+{\n+\tuint16_t nr_vec = 0;\n+\tuint16_t avail_idx = vq->last_avail_idx;\n+\tuint16_t max_tries, tries = 0;\n+\tuint16_t buf_id = 0;\n+\tuint32_t len = 0;\n+\tuint16_t desc_count = 0;\n+\tuint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);\n+\tuint32_t buffer_len[vq->size];\n+\tuint16_t buffer_buf_id[vq->size];\n+\tuint16_t buffer_desc_count[vq->size];\n+\n+\tif (rxvq_is_mergeable(dev))\n+\t\tmax_tries = vq->size - 1;\n+\telse\n+\t\tmax_tries = 1;\n+\n+\twhile (size > 0) {\n+\t\t/*\n+\t\t * if we tried all available ring items, and still\n+\t\t * can't get enough buf, it means something abnormal\n+\t\t * happened.\n+\t\t */\n+\t\tif (unlikely(++tries > max_tries))\n+\t\t\treturn -1;\n+\n+\t\tif (unlikely(fill_vec_buf_packed(dev, vq, avail_idx, &desc_count, buf_vec, &nr_vec,\n+\t\t\t\t\t\t&buf_id, &len, VHOST_ACCESS_RW) < 0))\n+\t\t\treturn -1;\n+\n+\t\tlen = RTE_MIN(len, size);\n+\t\tsize -= len;\n+\n+\t\tbuffer_len[*nr_buffers] = len;\n+\t\tbuffer_buf_id[*nr_buffers] = buf_id;\n+\t\tbuffer_desc_count[*nr_buffers] = desc_count;\n+\t\t*nr_buffers += 1;\n+\n+\t\t*nr_descs += desc_count;\n+\t\tavail_idx += desc_count;\n+\t\tif (avail_idx >= vq->size)\n+\t\t\tavail_idx -= vq->size;\n+\t}\n+\n+\tif (async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, src_iovec, dst_iovec,\n+\t\t\tsrc_it, dst_it) < 0)\n+\t\treturn -1;\n+\t/* store descriptors for DMA */\n+\tif (avail_idx >= *nr_descs) {\n+\t\trte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],\n+\t\t\t*nr_descs * sizeof(struct vring_packed_desc));\n+\t} else {\n+\t\tuint16_t nr_copy = vq->size - vq->last_avail_idx;\n+\n+\t\trte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],\n+\t\t\tnr_copy * sizeof(struct vring_packed_desc));\n+\t\trte_memcpy(async_descs + nr_copy, vq->desc_packed,\n+\t\t\t(*nr_descs - nr_copy) * sizeof(struct vring_packed_desc));\n+\t}\n+\n+\tvhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers);\n+\n+\treturn 0;\n+}\n+\n+static __rte_always_inline int16_t\n+virtio_dev_rx_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,\n+\t\t\t    struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers,\n+\t\t\t    struct vring_packed_desc *async_descs,\n+\t\t\t    struct iovec *src_iovec, struct iovec *dst_iovec,\n+\t\t\t    struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it)\n+{\n+\tstruct buf_vector buf_vec[BUF_VECTOR_MAX];\n+\n+\tif (unlikely(vhost_enqueue_async_single_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers,\n+\t\t\t\t\t\t async_descs, src_iovec, dst_iovec,\n+\t\t\t\t\t\t src_it, dst_it) < 0)) {\n+\t\tVHOST_LOG_DATA(DEBUG, \"(%d) failed to get enough desc from vring\\n\", dev->vid);\n+\t\treturn -1;\n+\t}\n+\n+\tVHOST_LOG_DATA(DEBUG, \"(%d) current index %d | end index %d\\n\",\n+\t\t\tdev->vid, vq->last_avail_idx, vq->last_avail_idx + *nr_descs);\n+\n+\treturn 0;\n+}\n+\n+static __rte_always_inline void\n+dma_error_handler_packed(struct vhost_virtqueue *vq, struct vring_packed_desc *async_descs,\n+\t\t\tuint16_t async_descs_idx, uint16_t slot_idx, uint32_t nr_err,\n+\t\t\tuint32_t *pkt_idx, uint32_t *num_async_pkts, uint32_t *num_done_pkts)\n+{\n+\tuint16_t descs_err = 0;\n+\tuint16_t buffers_err = 0;\n+\tstruct async_inflight_info *pkts_info = vq->async_pkts_info;\n+\n+\t*num_async_pkts -= nr_err;\n+\t*pkt_idx -= nr_err;\n+\t/* calculate the sum of buffers and descs of DMA-error packets. */\n+\twhile (nr_err-- > 0) {\n+\t\tdescs_err += pkts_info[slot_idx % vq->size].descs;\n+\t\tbuffers_err += pkts_info[slot_idx % vq->size].nr_buffers;\n+\t\tslot_idx--;\n+\t}\n+\n+\tvq->async_buffer_idx_packed -= buffers_err;\n+\n+\tif (vq->last_avail_idx >= descs_err) {\n+\t\tvq->last_avail_idx -= descs_err;\n+\n+\t\trte_memcpy(&vq->desc_packed[vq->last_avail_idx],\n+\t\t\t&async_descs[async_descs_idx - descs_err],\n+\t\t\tdescs_err * sizeof(struct vring_packed_desc));\n+\t} else {\n+\t\tuint16_t nr_copy;\n+\n+\t\tvq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err;\n+\t\tnr_copy = vq->size - vq->last_avail_idx;\n+\t\trte_memcpy(&vq->desc_packed[vq->last_avail_idx],\n+\t\t\t&async_descs[async_descs_idx - descs_err],\n+\t\t\tnr_copy * sizeof(struct vring_packed_desc));\n+\t\tdescs_err -= nr_copy;\n+\t\trte_memcpy(&vq->desc_packed[0], &async_descs[async_descs_idx - descs_err],\n+\t\t\tdescs_err * sizeof(struct vring_packed_desc));\n+\t\tvq->avail_wrap_counter ^= 1;\n+\t}\n+\n+\t*num_done_pkts = *pkt_idx - *num_async_pkts;\n+}\n+\n+static __rte_noinline uint32_t\n+virtio_dev_rx_async_submit_packed(struct virtio_net *dev,\n+\tstruct vhost_virtqueue *vq, uint16_t queue_id,\n+\tstruct rte_mbuf **pkts, uint32_t count,\n+\tstruct rte_mbuf **comp_pkts, uint32_t *comp_count)\n+{\n+\tuint32_t pkt_idx = 0, pkt_burst_idx = 0;\n+\tuint16_t async_descs_idx = 0;\n+\tuint16_t num_buffers;\n+\tuint16_t num_descs;\n+\n+\tstruct rte_vhost_iov_iter *it_pool = vq->it_pool;\n+\tstruct iovec *vec_pool = vq->vec_pool;\n+\tstruct rte_vhost_async_desc tdes[MAX_PKT_BURST];\n+\tstruct iovec *src_iovec = vec_pool;\n+\tstruct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);\n+\tuint16_t slot_idx = 0;\n+\tuint16_t segs_await = 0;\n+\tuint16_t iovec_idx = 0, it_idx = 0;\n+\tstruct async_inflight_info *pkts_info = vq->async_pkts_info;\n+\tuint32_t n_pkts = 0, pkt_err = 0;\n+\tuint32_t num_async_pkts = 0, num_done_pkts = 0;\n+\tstruct vring_packed_desc async_descs[vq->size];\n+\n+\trte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);\n+\n+\tfor (pkt_idx = 0; pkt_idx < count; pkt_idx++) {\n+\t\tnum_buffers = 0;\n+\t\tnum_descs = 0;\n+\n+\t\tif (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx],\n+\t\t\t\t\t\t&num_descs, &num_buffers,\n+\t\t\t\t\t\t&async_descs[async_descs_idx],\n+\t\t\t\t\t\t&src_iovec[iovec_idx], &dst_iovec[iovec_idx],\n+\t\t\t\t\t\t&it_pool[it_idx], &it_pool[it_idx + 1]) < 0))\n+\t\t\tbreak;\n+\n+\t\tVHOST_LOG_DATA(DEBUG, \"(%d) current index %d | end index %d\\n\",\n+\t\t\tdev->vid, vq->last_avail_idx,\n+\t\t\tvq->last_avail_idx + num_descs);\n+\n+\t\tslot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size;\n+\t\tif (it_pool[it_idx].count) {\n+\t\t\tuint16_t from, to;\n+\n+\t\t\tasync_descs_idx += num_descs;\n+\t\t\tasync_fill_desc(&tdes[pkt_burst_idx++],\n+\t\t\t\t&it_pool[it_idx], &it_pool[it_idx + 1]);\n+\t\t\tpkts_info[slot_idx].descs = num_descs;\n+\t\t\tpkts_info[slot_idx].nr_buffers = num_buffers;\n+\t\t\tpkts_info[slot_idx].mbuf = pkts[pkt_idx];\n+\t\t\tnum_async_pkts++;\n+\t\t\tiovec_idx += it_pool[it_idx].nr_segs;\n+\t\t\tit_idx += 2;\n+\n+\t\t\tsegs_await += it_pool[it_idx].nr_segs;\n+\n+\t\t\t/**\n+\t\t\t * recover shadow used ring and keep DMA-occupied\n+\t\t\t * descriptors.\n+\t\t\t */\n+\t\t\tfrom = vq->shadow_used_idx - num_buffers;\n+\t\t\tto = vq->async_buffer_idx_packed % vq->size;\n+\t\t\tstore_dma_desc_info_packed(vq->shadow_used_packed,\n+\t\t\t\t\tvq->async_buffers_packed, vq->size, from, to, num_buffers);\n+\n+\t\t\tvq->async_buffer_idx_packed += num_buffers;\n+\t\t\tvq->shadow_used_idx -= num_buffers;\n+\t\t} else {\n+\t\t\tcomp_pkts[num_done_pkts++] = pkts[pkt_idx];\n+\t\t}\n+\n+\t\tvq_inc_last_avail_packed(vq, num_descs);\n+\n+\t\t/*\n+\t\t * conditions to trigger async device transfer:\n+\t\t * - buffered packet number reaches transfer threshold\n+\t\t * - unused async iov number is less than max vhost vector\n+\t\t */\n+\t\tif (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||\n+\t\t\t((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) {\n+\t\t\tn_pkts = vq->async_ops.transfer_data(dev->vid, queue_id,\n+\t\t\t\ttdes, 0, pkt_burst_idx);\n+\t\t\tiovec_idx = 0;\n+\t\t\tit_idx = 0;\n+\t\t\tsegs_await = 0;\n+\t\t\tvq->async_pkts_inflight_n += n_pkts;\n+\n+\t\t\tif (unlikely(n_pkts < pkt_burst_idx)) {\n+\t\t\t\t/*\n+\t\t\t\t * log error packets number here and do actual\n+\t\t\t\t * error processing when applications poll\n+\t\t\t\t * completion\n+\t\t\t\t */\n+\t\t\t\tpkt_err = pkt_burst_idx - n_pkts;\n+\t\t\t\tpkt_burst_idx = 0;\n+\t\t\t\tpkt_idx++;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\n+\t\t\tpkt_burst_idx = 0;\n+\t\t}\n+\t}\n+\n+\tif (pkt_burst_idx) {\n+\t\tn_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);\n+\t\tvq->async_pkts_inflight_n += n_pkts;\n+\n+\t\tif (unlikely(n_pkts < pkt_burst_idx))\n+\t\t\tpkt_err = pkt_burst_idx - n_pkts;\n+\t}\n+\n+\tdo_data_copy_enqueue(dev, vq);\n+\n+\tif (unlikely(pkt_err))\n+\t\tdma_error_handler_packed(vq, async_descs, async_descs_idx, slot_idx, pkt_err,\n+\t\t\t\t\t&pkt_idx, &num_async_pkts, &num_done_pkts);\n+\tvq->async_pkts_idx += num_async_pkts;\n+\t*comp_count = num_done_pkts;\n+\n+\tif (likely(vq->shadow_used_idx)) {\n+\t\tvhost_flush_enqueue_shadow_packed(dev, vq);\n+\t\tvhost_vring_call_packed(dev, vq);\n+\t}\n+\n+\treturn pkt_idx;\n+}\n+\n static __rte_always_inline void\n write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)\n {\n@@ -1649,7 +2001,7 @@ write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)\n \tuint16_t to, from;\n \n \tdo {\n-\t\tfrom = vq->last_async_desc_idx & (vq->size - 1);\n+\t\tfrom = vq->last_async_desc_idx_split & (vq->size - 1);\n \t\tnr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from;\n \t\tto = vq->last_used_idx & (vq->size - 1);\n \n@@ -1665,18 +2017,41 @@ write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)\n \t\t\t\t\t(nr_copy - size) * sizeof(struct vring_used_elem));\n \t\t}\n \n-\t\tvq->last_async_desc_idx += nr_copy;\n+\t\tvq->last_async_desc_idx_split += nr_copy;\n \t\tvq->last_used_idx += nr_copy;\n \t\tnr_left -= nr_copy;\n \t} while (nr_left > 0);\n }\n \n+static __rte_always_inline void\n+write_back_completed_descs_packed(struct vhost_virtqueue *vq,\n+\t\t\t\tuint16_t n_buffers)\n+{\n+\tuint16_t nr_left = n_buffers;\n+\tuint16_t from, to;\n+\n+\tdo {\n+\t\tfrom = vq->last_async_buffer_idx_packed % vq->size;\n+\t\tto = (from + nr_left) % vq->size;\n+\t\tif (to > from) {\n+\t\t\tvhost_update_used_packed(vq, vq->async_buffers_packed + from, to - from);\n+\t\t\tvq->last_async_buffer_idx_packed += nr_left;\n+\t\t\tnr_left = 0;\n+\t\t} else {\n+\t\t\tvhost_update_used_packed(vq, vq->async_buffers_packed + from,\n+\t\t\t\tvq->size - from);\n+\t\t\tvq->last_async_buffer_idx_packed += vq->size - from;\n+\t\t\tnr_left -= vq->size - from;\n+\t\t}\n+\t} while (nr_left > 0);\n+}\n+\n uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,\n \t\tstruct rte_mbuf **pkts, uint16_t count)\n {\n \tstruct virtio_net *dev = get_device(vid);\n \tstruct vhost_virtqueue *vq;\n-\tuint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0;\n+\tuint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;\n \tuint16_t start_idx, pkts_idx, vq_size;\n \tstruct async_inflight_info *pkts_info;\n \tuint16_t from, i;\n@@ -1701,7 +2076,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,\n \n \trte_spinlock_lock(&vq->access_lock);\n \n-\tpkts_idx = vq->async_pkts_idx & (vq->size - 1);\n+\tpkts_idx = vq->async_pkts_idx % vq->size;\n \tpkts_info = vq->async_pkts_info;\n \tvq_size = vq->size;\n \tstart_idx = virtio_dev_rx_async_get_info_idx(pkts_idx,\n@@ -1718,21 +2093,41 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,\n \t\tgoto done;\n \t}\n \n-\tfor (i = 0; i < n_pkts_put; i++) {\n-\t\tfrom = (start_idx + i) & (vq_size - 1);\n-\t\tn_descs += pkts_info[from].descs;\n-\t\tpkts[i] = pkts_info[from].mbuf;\n+\tif (vq_is_packed(dev)) {\n+\t\tfor (i = 0; i < n_pkts_put; i++) {\n+\t\t\tfrom = (start_idx + i) & (vq_size - 1);\n+\t\t\tn_buffers += pkts_info[from].nr_buffers;\n+\t\t\tpkts[i] = pkts_info[from].mbuf;\n+\t\t}\n+\t} else {\n+\t\tfor (i = 0; i < n_pkts_put; i++) {\n+\t\t\tfrom = (start_idx + i) & (vq_size - 1);\n+\t\t\tn_descs += pkts_info[from].descs;\n+\t\t\tpkts[i] = pkts_info[from].mbuf;\n+\t\t}\n \t}\n+\n \tvq->async_last_pkts_n = n_pkts_cpl - n_pkts_put;\n \tvq->async_pkts_inflight_n -= n_pkts_put;\n \n \tif (likely(vq->enabled && vq->access_ok)) {\n-\t\twrite_back_completed_descs_split(vq, n_descs);\n+\t\tif (vq_is_packed(dev)) {\n+\t\t\twrite_back_completed_descs_packed(vq, n_buffers);\n \n-\t\t__atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE);\n-\t\tvhost_vring_call_split(dev, vq);\n-\t} else\n-\t\tvq->last_async_desc_idx += n_descs;\n+\t\t\tvhost_vring_call_packed(dev, vq);\n+\t\t} else {\n+\t\t\twrite_back_completed_descs_split(vq, n_descs);\n+\n+\t\t\t__atomic_add_fetch(&vq->used->idx, n_descs,\n+\t\t\t\t\t__ATOMIC_RELEASE);\n+\t\t\tvhost_vring_call_split(dev, vq);\n+\t\t}\n+\t} else {\n+\t\tif (vq_is_packed(dev))\n+\t\t\tvq->last_async_buffer_idx_packed += n_buffers;\n+\t\telse\n+\t\t\tvq->last_async_desc_idx_split += n_descs;\n+\t}\n \n done:\n \trte_spinlock_unlock(&vq->access_lock);\n@@ -1773,9 +2168,10 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,\n \tif (count == 0)\n \t\tgoto out;\n \n-\t/* TODO: packed queue not implemented */\n \tif (vq_is_packed(dev))\n-\t\tnb_tx = 0;\n+\t\tnb_tx = virtio_dev_rx_async_submit_packed(dev,\n+\t\t\t\tvq, queue_id, pkts, count, comp_pkts,\n+\t\t\t\tcomp_count);\n \telse\n \t\tnb_tx = virtio_dev_rx_async_submit_split(dev,\n \t\t\t\tvq, queue_id, pkts, count, comp_pkts,\n",
    "prefixes": [
        "v9",
        "2/4"
    ]
}