get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/115562/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 115562,
    "url": "https://patches.dpdk.org/api/patches/115562/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20220829005658.84590-1-wenwux.ma@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20220829005658.84590-1-wenwux.ma@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20220829005658.84590-1-wenwux.ma@intel.com",
    "date": "2022-08-29T00:56:58",
    "name": "[v4] vhost: support CPU copy for small packets",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "7c3f09efb317a0792e94391dbfa1576a09225af5",
    "submitter": {
        "id": 2163,
        "url": "https://patches.dpdk.org/api/people/2163/?format=api",
        "name": "Ma, WenwuX",
        "email": "wenwux.ma@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "https://patches.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20220829005658.84590-1-wenwux.ma@intel.com/mbox/",
    "series": [
        {
            "id": 24434,
            "url": "https://patches.dpdk.org/api/series/24434/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=24434",
            "date": "2022-08-29T00:56:58",
            "name": "[v4] vhost: support CPU copy for small packets",
            "version": 4,
            "mbox": "https://patches.dpdk.org/series/24434/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/115562/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/115562/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 79992A0545;\n\tMon, 29 Aug 2022 02:58:29 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 0995C40DDA;\n\tMon, 29 Aug 2022 02:58:29 +0200 (CEST)",
            "from mga12.intel.com (mga12.intel.com [192.55.52.136])\n by mails.dpdk.org (Postfix) with ESMTP id 9767C4069F\n for <dev@dpdk.org>; Mon, 29 Aug 2022 02:58:27 +0200 (CEST)",
            "from orsmga008.jf.intel.com ([10.7.209.65])\n by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 28 Aug 2022 17:58:26 -0700",
            "from unknown (HELO localhost.localdomain) ([10.239.252.251])\n by orsmga008-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 28 Aug 2022 17:58:22 -0700"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple;\n d=intel.com; i=@intel.com; q=dns/txt; s=Intel;\n t=1661734707; x=1693270707;\n h=from:to:cc:subject:date:message-id:in-reply-to:\n references:mime-version:content-transfer-encoding;\n bh=YSIffM5/RmLNy/wZAkZsPypiAR1/qT7jzjoPne8bAmY=;\n b=ICQhEHyvP1Mq7gpsAfZngPxMMiwc9nLzYdcl8vPXdnxbdX+uXgO5Z8oS\n UmqnghXTiFWsRfidDtWLlD/7ciFL9Hzu3INZefqAADRi2JjtcRW/8x6sm\n +WLEp/R6LKfCvXubNKH7ETqKpW0lFIlhP1bDfuvM4I3Q+zdcaghk/NVpB\n ZvGBIJdun38wy/LQUW75F79Vwwy0TFiI5yfPM8Wlk7hp62ZFMYmyTTEvv\n cyDxSqrGYdF3AE2dq0hqKq9v4Y4YC5+xsgwNwfNr/der5fz6u+FOgcMFg\n Ciwc9TtGYktWmwnCWcl9XOecl6DmFeHYU2mghzIhvmOWRdWTQ1v7hyhoP g==;",
        "X-IronPort-AV": [
            "E=McAfee;i=\"6500,9779,10453\"; a=\"274532581\"",
            "E=Sophos;i=\"5.93,271,1654585200\"; d=\"scan'208\";a=\"274532581\"",
            "E=Sophos;i=\"5.93,271,1654585200\"; d=\"scan'208\";a=\"640729571\""
        ],
        "From": "Wenwu Ma <wenwux.ma@intel.com>",
        "To": "maxime.coquelin@redhat.com,\n\tchenbo.xia@intel.com,\n\tdev@dpdk.org",
        "Cc": "sunil.pai.g@intel.com, jiayu.hu@intel.com, yinan.wang@intel.com,\n xingguang.he@intel.com, xuan.ding@intel.com, cheng1.jiang@intel.com,\n yuanx.wang@intel.com, Wenwu Ma <wenwux.ma@intel.com>",
        "Subject": "[PATCH v4] vhost: support CPU copy for small packets",
        "Date": "Mon, 29 Aug 2022 08:56:58 +0800",
        "Message-Id": "<20220829005658.84590-1-wenwux.ma@intel.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20220812064517.272530-1-wenwux.ma@intel.com>",
        "References": "<20220812064517.272530-1-wenwux.ma@intel.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Offloading small packets to DMA degrades throughput 10%~20%,\nand this is because DMA offloading is not free and DMA is not\ngood at processing small packets. In addition, control plane\npackets are usually small, and assign those packets to DMA will\nsignificantly increase latency, which may cause timeout like\nTCP handshake packets. Therefore, this patch use CPU to perform\nsmall copies in vhost.\n\nSigned-off-by: Wenwu Ma <wenwux.ma@intel.com>\n---\nv4:\n* fix coding style issues\nv3:\n* compare threshold with entire packet length\nv2:\n* fix CI build error\n---\n lib/vhost/vhost.h      |  7 ++--\n lib/vhost/virtio_net.c | 73 +++++++++++++++++++++++++++++++++---------\n 2 files changed, 62 insertions(+), 18 deletions(-)",
    "diff": "diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h\nindex 40fac3b7c6..8a7d90f737 100644\n--- a/lib/vhost/vhost.h\n+++ b/lib/vhost/vhost.h\n@@ -142,8 +142,10 @@ struct virtqueue_stats {\n  * iovec\n  */\n struct vhost_iovec {\n-\tvoid *src_addr;\n-\tvoid *dst_addr;\n+\tvoid *src_iov_addr;\n+\tvoid *dst_iov_addr;\n+\tvoid *src_virt_addr;\n+\tvoid *dst_virt_addr;\n \tsize_t len;\n };\n \n@@ -155,6 +157,7 @@ struct vhost_iov_iter {\n \tstruct vhost_iovec *iov;\n \t/** number of iovec in this iterator */\n \tunsigned long nr_segs;\n+\tunsigned long nr_len;\n };\n \n struct async_dma_vchan_info {\ndiff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c\nindex 35fa4670fd..cf796183a0 100644\n--- a/lib/vhost/virtio_net.c\n+++ b/lib/vhost/virtio_net.c\n@@ -26,6 +26,8 @@\n \n #define MAX_BATCH_LEN 256\n \n+#define CPU_COPY_THRESHOLD_LEN 256\n+\n static __rte_always_inline uint16_t\n async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\tstruct rte_mbuf **pkts, uint16_t count, int16_t dma_id,\n@@ -119,8 +121,8 @@ vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\treturn -1;\n \n \tfor (i = 0; i < nr_segs; i++) {\n-\t\tcopy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr,\n-\t\t\t\t(rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC);\n+\t\tcopy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_iov_addr,\n+\t\t\t\t(rte_iova_t)iov[i].dst_iov_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC);\n \t\t/**\n \t\t * Since all memory is pinned and DMA vChannel\n \t\t * ring has enough space, failure should be a\n@@ -149,6 +151,22 @@ vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \treturn nr_segs;\n }\n \n+static __rte_always_inline int64_t\n+vhost_async_cpu_transfer_one(struct vhost_virtqueue *vq, uint16_t flag_idx,\n+\t\tstruct vhost_iov_iter *pkt)\n+{\n+\tuint16_t i;\n+\tstruct vhost_iovec *iov = pkt->iov;\n+\tuint32_t nr_segs = pkt->nr_segs;\n+\n+\tfor (i = 0; i < nr_segs; i++)\n+\t\trte_memcpy(iov[i].dst_virt_addr, iov[i].src_virt_addr, iov[i].len);\n+\n+\tvq->async->pkts_cmpl_flag[flag_idx] = true;\n+\n+\treturn 0;\n+}\n+\n static __rte_always_inline uint16_t\n vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\tint16_t dma_id, uint16_t vchan_id, uint16_t head_idx,\n@@ -161,8 +179,13 @@ vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \trte_spinlock_lock(&dma_info->dma_lock);\n \n \tfor (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) {\n-\t\tret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx,\n-\t\t\t\t&pkts[pkt_idx]);\n+\t\tif (pkts[pkt_idx].nr_len > CPU_COPY_THRESHOLD_LEN) {\n+\t\t\tret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx,\n+\t\t\t\t\t&pkts[pkt_idx]);\n+\t\t} else {\n+\t\t\tret = vhost_async_cpu_transfer_one(vq, head_idx, &pkts[pkt_idx]);\n+\t\t}\n+\n \t\tif (unlikely(ret < 0))\n \t\t\tbreak;\n \n@@ -1002,13 +1025,14 @@ async_iter_initialize(struct virtio_net *dev, struct vhost_async *async)\n \titer = async->iov_iter + async->iter_idx;\n \titer->iov = async->iovec + async->iovec_idx;\n \titer->nr_segs = 0;\n+\titer->nr_len = 0;\n \n \treturn 0;\n }\n \n static __rte_always_inline int\n async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async,\n-\t\tvoid *src, void *dst, size_t len)\n+\t\tvoid *src_iova, void *dst_iova, void *src_addr, void *dst_addr, size_t len)\n {\n \tstruct vhost_iov_iter *iter;\n \tstruct vhost_iovec *iovec;\n@@ -1027,8 +1051,10 @@ async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async,\n \titer = async->iov_iter + async->iter_idx;\n \tiovec = async->iovec + async->iovec_idx;\n \n-\tiovec->src_addr = src;\n-\tiovec->dst_addr = dst;\n+\tiovec->src_iov_addr = src_iova;\n+\tiovec->dst_iov_addr = dst_iova;\n+\tiovec->src_virt_addr = src_addr;\n+\tiovec->dst_virt_addr = dst_addr;\n \tiovec->len = len;\n \n \titer->nr_segs++;\n@@ -1051,6 +1077,7 @@ async_iter_cancel(struct vhost_async *async)\n \titer = async->iov_iter + async->iter_idx;\n \tasync->iovec_idx -= iter->nr_segs;\n \titer->nr_segs = 0;\n+\titer->nr_len = 0;\n \titer->iov = NULL;\n }\n \n@@ -1064,13 +1091,18 @@ async_iter_reset(struct vhost_async *async)\n static __rte_always_inline int\n async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\tstruct rte_mbuf *m, uint32_t mbuf_offset,\n-\t\tuint64_t buf_iova, uint32_t cpy_len, bool to_desc)\n+\t\tuint64_t buf_iova, uint64_t buf_addr, uint32_t cpy_len, bool to_desc)\n {\n \tstruct vhost_async *async = vq->async;\n \tuint64_t mapped_len;\n \tuint32_t buf_offset = 0;\n-\tvoid *src, *dst;\n+\tvoid *src_iova, *dst_iova;\n+\tvoid *src_addr, *dst_addr;\n \tvoid *host_iova;\n+\tstruct vhost_iov_iter *iter;\n+\n+\titer = async->iov_iter + async->iter_idx;\n+\titer->nr_len += cpy_len;\n \n \twhile (cpy_len) {\n \t\thost_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev,\n@@ -1083,14 +1115,21 @@ async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \t\t}\n \n \t\tif (to_desc) {\n-\t\t\tsrc = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);\n-\t\t\tdst = host_iova;\n+\t\t\tsrc_iova = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);\n+\t\t\tdst_iova = host_iova;\n+\n+\t\t\tsrc_addr = rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);\n+\t\t\tdst_addr = (void *)(uintptr_t)(buf_addr + buf_offset);\n \t\t} else {\n-\t\t\tsrc = host_iova;\n-\t\t\tdst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);\n+\t\t\tsrc_iova = host_iova;\n+\t\t\tdst_iova = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);\n+\n+\t\t\tsrc_addr = (void *)(uintptr_t)(buf_addr + buf_offset);\n+\t\t\tdst_addr = rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);\n \t\t}\n \n-\t\tif (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len)))\n+\t\tif (unlikely(async_iter_add_iovec(dev, async, src_iova, dst_iova,\n+\t\t\t\t\t\tsrc_addr, dst_addr, (size_t)mapped_len)))\n \t\t\treturn -1;\n \n \t\tcpy_len -= (uint32_t)mapped_len;\n@@ -1239,7 +1278,8 @@ mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \n \t\tif (is_async) {\n \t\t\tif (async_fill_seg(dev, vq, m, mbuf_offset,\n-\t\t\t\t\t   buf_iova + buf_offset, cpy_len, true) < 0)\n+\t\t\t\t\t   buf_iova + buf_offset, buf_addr + buf_offset,\n+\t\t\t\t\t   cpy_len, true) < 0)\n \t\t\t\tgoto error;\n \t\t} else {\n \t\t\tsync_fill_seg(dev, vq, m, mbuf_offset,\n@@ -2737,7 +2777,8 @@ desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,\n \n \t\tif (is_async) {\n \t\t\tif (async_fill_seg(dev, vq, cur, mbuf_offset,\n-\t\t\t\t\t   buf_iova + buf_offset, cpy_len, false) < 0)\n+\t\t\t\t\t   buf_iova + buf_offset, buf_addr + buf_offset,\n+\t\t\t\t\t   cpy_len, false) < 0)\n \t\t\t\tgoto error;\n \t\t} else if (likely(hdr && cur == m)) {\n \t\t\trte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset),\n",
    "prefixes": [
        "v4"
    ]
}