get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/80112/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 80112,
    "url": "http://patches.dpdk.org/api/patches/80112/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20201009081410.63944-5-yong.liu@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20201009081410.63944-5-yong.liu@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20201009081410.63944-5-yong.liu@intel.com",
    "date": "2020-10-09T08:14:09",
    "name": "[v3,4/5] vhost: add packed ring vectorized dequeue",
    "commit_ref": null,
    "pull_url": null,
    "state": "rejected",
    "archived": true,
    "hash": "69375a1545060edc81ea1fbf68c5ca77162036f9",
    "submitter": {
        "id": 17,
        "url": "http://patches.dpdk.org/api/people/17/?format=api",
        "name": "Marvin Liu",
        "email": "yong.liu@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "http://patches.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20201009081410.63944-5-yong.liu@intel.com/mbox/",
    "series": [
        {
            "id": 12813,
            "url": "http://patches.dpdk.org/api/series/12813/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=12813",
            "date": "2020-10-09T08:14:05",
            "name": "vhost add vectorized data path",
            "version": 3,
            "mbox": "http://patches.dpdk.org/series/12813/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/80112/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/80112/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 11ADFA04BC;\n\tFri,  9 Oct 2020 10:21:55 +0200 (CEST)",
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id E63321C1F2;\n\tFri,  9 Oct 2020 10:20:42 +0200 (CEST)",
            "from mga12.intel.com (mga12.intel.com [192.55.52.136])\n by dpdk.org (Postfix) with ESMTP id 9E2D31C1E5\n for <dev@dpdk.org>; Fri,  9 Oct 2020 10:20:40 +0200 (CEST)",
            "from orsmga005.jf.intel.com ([10.7.209.41])\n by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 09 Oct 2020 01:20:38 -0700",
            "from npg-dpdk-virtual-marvin-dev.sh.intel.com ([10.67.119.56])\n by orsmga005.jf.intel.com with ESMTP; 09 Oct 2020 01:20:35 -0700"
        ],
        "IronPort-SDR": [
            "\n zPmeE7LSUfcIGCPdP3dB2CZL+hJu0BKA3T/JYZ19jdYMFgJgKNmY0JaDIEYVzPVhV7AZwLi5in\n enzKpmmsRneA==",
            "\n k53vc21eC5edO39TggCPsKzo+qcoPgvyj/AUuRCcP+szderzhSPv+bUaKnTycVQ0MLi4bJ9zOD\n Lkbwci8GbkaA=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6000,8403,9768\"; a=\"144778919\"",
            "E=Sophos;i=\"5.77,354,1596524400\"; d=\"scan'208\";a=\"144778919\"",
            "E=Sophos;i=\"5.77,354,1596524400\"; d=\"scan'208\";a=\"528833366\""
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "From": "Marvin Liu <yong.liu@intel.com>",
        "To": "maxime.coquelin@redhat.com, chenbo.xia@intel.com, zhihong.wang@intel.com",
        "Cc": "dev@dpdk.org,\n\tMarvin Liu <yong.liu@intel.com>",
        "Date": "Fri,  9 Oct 2020 16:14:09 +0800",
        "Message-Id": "<20201009081410.63944-5-yong.liu@intel.com>",
        "X-Mailer": "git-send-email 2.17.1",
        "In-Reply-To": "<20201009081410.63944-1-yong.liu@intel.com>",
        "References": "<20200819032414.51430-2-yong.liu@intel.com>\n <20201009081410.63944-1-yong.liu@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v3 4/5] vhost: add packed ring vectorized dequeue",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Optimize vhost packed ring dequeue path with SIMD instructions. Four\ndescriptors status check and writeback are batched handled with AVX512\ninstructions. Address translation operations are also accelerated by\nAVX512 instructions.\n\nIf platform or compiler not support vectorization, will fallback to\ndefault path.\n\nSigned-off-by: Marvin Liu <yong.liu@intel.com>",
    "diff": "diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build\nindex cc9aa65c6..5eadcbae4 100644\n--- a/lib/librte_vhost/meson.build\n+++ b/lib/librte_vhost/meson.build\n@@ -8,6 +8,22 @@ endif\n if has_libnuma == 1\n \tdpdk_conf.set10('RTE_LIBRTE_VHOST_NUMA', true)\n endif\n+\n+if arch_subdir == 'x86'\n+        if not machine_args.contains('-mno-avx512f')\n+                if cc.has_argument('-mavx512f') and cc.has_argument('-mavx512vl') and cc.has_argument('-mavx512bw')\n+                        cflags += ['-DCC_AVX512_SUPPORT']\n+                        vhost_avx512_lib = static_library('vhost_avx512_lib',\n+                                              'virtio_net_avx.c',\n+                                              dependencies: [static_rte_eal, static_rte_mempool,\n+                                                  static_rte_mbuf, static_rte_ethdev, static_rte_net],\n+                                              include_directories: includes,\n+                                              c_args: [cflags, '-mavx512f', '-mavx512bw', '-mavx512vl'])\n+                        objs += vhost_avx512_lib.extract_objects('virtio_net_avx.c')\n+                endif\n+        endif\n+endif\n+\n if (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0'))\n \tcflags += '-DVHOST_GCC_UNROLL_PRAGMA'\n elif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0'))\ndiff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h\nindex a19fe9423..b270c424b 100644\n--- a/lib/librte_vhost/vhost.h\n+++ b/lib/librte_vhost/vhost.h\n@@ -1100,4 +1100,15 @@ virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp,\n \n \treturn NULL;\n }\n+\n+int\n+vhost_reserve_avail_batch_packed_avx(struct virtio_net *dev,\n+\t\t\t\t struct vhost_virtqueue *vq,\n+\t\t\t\t struct rte_mempool *mbuf_pool,\n+\t\t\t\t struct rte_mbuf **pkts,\n+\t\t\t\t uint16_t avail_idx,\n+\t\t\t\t uintptr_t *desc_addrs,\n+\t\t\t\t uint16_t *ids);\n+\n+\n #endif /* _VHOST_NET_CDEV_H_ */\ndiff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c\nindex 9757ed053..3bc6b9b20 100644\n--- a/lib/librte_vhost/virtio_net.c\n+++ b/lib/librte_vhost/virtio_net.c\n@@ -2136,6 +2136,28 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev,\n \treturn -1;\n }\n \n+static __rte_always_inline int\n+vhost_handle_avail_batch_packed(struct virtio_net *dev,\n+\t\t\t\t struct vhost_virtqueue *vq,\n+\t\t\t\t struct rte_mempool *mbuf_pool,\n+\t\t\t\t struct rte_mbuf **pkts,\n+\t\t\t\t uint16_t avail_idx,\n+\t\t\t\t uintptr_t *desc_addrs,\n+\t\t\t\t uint16_t *ids)\n+{\n+#ifdef CC_AVX512_SUPPORT\n+\tif (unlikely(dev->vectorized))\n+\t\treturn vhost_reserve_avail_batch_packed_avx(dev, vq, mbuf_pool,\n+\t\t\t\tpkts, avail_idx, desc_addrs, ids);\n+\telse\n+\t\treturn vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool,\n+\t\t\t\tpkts, avail_idx, desc_addrs, ids);\n+#else\n+\treturn vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts,\n+\t\t\tavail_idx, desc_addrs, ids);\n+#endif\n+}\n+\n static __rte_always_inline int\n virtio_dev_tx_batch_packed(struct virtio_net *dev,\n \t\t\t   struct vhost_virtqueue *vq,\n@@ -2148,8 +2170,9 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev,\n \tuint16_t ids[PACKED_BATCH_SIZE];\n \tuint16_t i;\n \n-\tif (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts,\n-\t\t\t\t\t     avail_idx, desc_addrs, ids))\n+\n+\tif (vhost_handle_avail_batch_packed(dev, vq, mbuf_pool, pkts,\n+\t\tavail_idx, desc_addrs, ids))\n \t\treturn -1;\n \n \tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)\ndiff --git a/lib/librte_vhost/virtio_net_avx.c b/lib/librte_vhost/virtio_net_avx.c\nnew file mode 100644\nindex 000000000..e10b2a285\n--- /dev/null\n+++ b/lib/librte_vhost/virtio_net_avx.c\n@@ -0,0 +1,184 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2010-2016 Intel Corporation\n+ */\n+#include <stdint.h>\n+\n+#include \"vhost.h\"\n+\n+#define BYTE_SIZE 8\n+/* reference count offset in mbuf rearm data */\n+#define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \\\n+\toffsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)\n+/* segment number offset in mbuf rearm data */\n+#define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \\\n+\toffsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)\n+\n+/* default rearm data */\n+#define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \\\n+\t1ULL << REFCNT_BITS_OFFSET)\n+\n+#define DESC_FLAGS_SHORT_OFFSET (offsetof(struct vring_packed_desc, flags) / \\\n+\tsizeof(uint16_t))\n+\n+#define DESC_FLAGS_SHORT_SIZE (sizeof(struct vring_packed_desc) / \\\n+\tsizeof(uint16_t))\n+#define BATCH_FLAGS_MASK (1 << DESC_FLAGS_SHORT_OFFSET | \\\n+\t1 << (DESC_FLAGS_SHORT_OFFSET + DESC_FLAGS_SHORT_SIZE) | \\\n+\t1 << (DESC_FLAGS_SHORT_OFFSET + DESC_FLAGS_SHORT_SIZE * 2)  | \\\n+\t1 << (DESC_FLAGS_SHORT_OFFSET + DESC_FLAGS_SHORT_SIZE * 3))\n+\n+#define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \\\n+\toffsetof(struct vring_packed_desc, len)) * BYTE_SIZE)\n+\n+#define PACKED_FLAGS_MASK ((0ULL | VRING_DESC_F_AVAIL | VRING_DESC_F_USED) \\\n+\t<< FLAGS_BITS_OFFSET)\n+#define PACKED_AVAIL_FLAG ((0ULL | VRING_DESC_F_AVAIL) << FLAGS_BITS_OFFSET)\n+#define PACKED_AVAIL_FLAG_WRAP ((0ULL | VRING_DESC_F_USED) << \\\n+\tFLAGS_BITS_OFFSET)\n+\n+#define DESC_FLAGS_POS 0xaa\n+#define MBUF_LENS_POS 0x6666\n+\n+int\n+vhost_reserve_avail_batch_packed_avx(struct virtio_net *dev,\n+\t\t\t\t struct vhost_virtqueue *vq,\n+\t\t\t\t struct rte_mempool *mbuf_pool,\n+\t\t\t\t struct rte_mbuf **pkts,\n+\t\t\t\t uint16_t avail_idx,\n+\t\t\t\t uintptr_t *desc_addrs,\n+\t\t\t\t uint16_t *ids)\n+{\n+\tstruct vring_packed_desc *descs = vq->desc_packed;\n+\tuint32_t descs_status;\n+\tvoid *desc_addr;\n+\tuint16_t i;\n+\tuint8_t cmp_low, cmp_high, cmp_result;\n+\tuint64_t lens[PACKED_BATCH_SIZE];\n+\tstruct virtio_net_hdr *hdr;\n+\n+\tif (unlikely(avail_idx & PACKED_BATCH_MASK))\n+\t\treturn -1;\n+\tif (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))\n+\t\treturn -1;\n+\n+\t/* load 4 descs */\n+\tdesc_addr = &vq->desc_packed[avail_idx];\n+\t__m512i desc_vec = _mm512_loadu_si512(desc_addr);\n+\n+\t/* burst check four status */\n+\t__m512i avail_flag_vec;\n+\tif (vq->avail_wrap_counter)\n+#if defined(RTE_ARCH_I686)\n+\t\tavail_flag_vec = _mm512_set4_epi64(PACKED_AVAIL_FLAG, 0x0,\n+\t\t\t\t\tPACKED_FLAGS_MASK, 0x0);\n+#else\n+\t\tavail_flag_vec = _mm512_maskz_set1_epi64(DESC_FLAGS_POS,\n+\t\t\t\t\tPACKED_AVAIL_FLAG);\n+\n+#endif\n+\telse\n+#if defined(RTE_ARCH_I686)\n+\t\tavail_flag_vec = _mm512_set4_epi64(PACKED_AVAIL_FLAG_WRAP,\n+\t\t\t\t\t0x0, PACKED_AVAIL_FLAG_WRAP, 0x0);\n+#else\n+\t\tavail_flag_vec = _mm512_maskz_set1_epi64(DESC_FLAGS_POS,\n+\t\t\t\t\tPACKED_AVAIL_FLAG_WRAP);\n+#endif\n+\n+\tdescs_status = _mm512_cmp_epu16_mask(desc_vec, avail_flag_vec,\n+\t\t_MM_CMPINT_NE);\n+\tif (descs_status & BATCH_FLAGS_MASK)\n+\t\treturn -1;\n+\n+\tif (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {\n+\t\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\tuint64_t size = (uint64_t)descs[avail_idx + i].len;\n+\t\t\tdesc_addrs[i] = __vhost_iova_to_vva(dev, vq,\n+\t\t\t\tdescs[avail_idx + i].addr, &size,\n+\t\t\t\tVHOST_ACCESS_RO);\n+\n+\t\t\tif (!desc_addrs[i])\n+\t\t\t\tgoto free_buf;\n+\t\t\tlens[i] = descs[avail_idx + i].len;\n+\t\t\trte_prefetch0((void *)(uintptr_t)desc_addrs[i]);\n+\n+\t\t\tpkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool,\n+\t\t\t\t\tlens[i]);\n+\t\t\tif (!pkts[i])\n+\t\t\t\tgoto free_buf;\n+\t\t}\n+\t} else {\n+\t\t/* check buffer fit into one region & translate address */\n+\t\tstruct mem_regions_range *range = dev->regions_range;\n+\t\t__m512i regions_low_addrs =\n+\t\t\t_mm512_loadu_si512((void *)&range->regions_low_addrs);\n+\t\t__m512i regions_high_addrs =\n+\t\t\t_mm512_loadu_si512((void *)&range->regions_high_addrs);\n+\t\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\tuint64_t addr_low = descs[avail_idx + i].addr;\n+\t\t\tuint64_t addr_high = addr_low +\n+\t\t\t\t\t\tdescs[avail_idx + i].len;\n+\t\t\t__m512i low_addr_vec = _mm512_set1_epi64(addr_low);\n+\t\t\t__m512i high_addr_vec = _mm512_set1_epi64(addr_high);\n+\n+\t\t\tcmp_low = _mm512_cmp_epi64_mask(low_addr_vec,\n+\t\t\t\t\tregions_low_addrs, _MM_CMPINT_NLT);\n+\t\t\tcmp_high = _mm512_cmp_epi64_mask(high_addr_vec,\n+\t\t\t\t\tregions_high_addrs, _MM_CMPINT_LT);\n+\t\t\tcmp_result = cmp_low & cmp_high;\n+\t\t\tint index = __builtin_ctz(cmp_result);\n+\t\t\tif (unlikely((uint32_t)index >= dev->mem->nregions))\n+\t\t\t\tgoto free_buf;\n+\n+\t\t\tdesc_addrs[i] = addr_low +\n+\t\t\t\tdev->mem->regions[index].host_user_addr -\n+\t\t\t\tdev->mem->regions[index].guest_phys_addr;\n+\t\t\tlens[i] = descs[avail_idx + i].len;\n+\t\t\trte_prefetch0((void *)(uintptr_t)desc_addrs[i]);\n+\n+\t\t\tpkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool,\n+\t\t\t\t\tlens[i]);\n+\t\t\tif (!pkts[i])\n+\t\t\t\tgoto free_buf;\n+\t\t}\n+\t}\n+\n+\tif (virtio_net_with_host_offload(dev)) {\n+\t\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\thdr = (struct virtio_net_hdr *)(desc_addrs[i]);\n+\t\t\tvhost_dequeue_offload(hdr, pkts[i]);\n+\t\t}\n+\t}\n+\n+\tif (virtio_net_is_inorder(dev)) {\n+\t\tids[PACKED_BATCH_SIZE - 1] =\n+\t\t\tdescs[avail_idx + PACKED_BATCH_SIZE - 1].id;\n+\t} else {\n+\t\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)\n+\t\t\tids[i] = descs[avail_idx + i].id;\n+\t}\n+\n+\tuint64_t addrs[PACKED_BATCH_SIZE << 1];\n+\t/* store mbuf data_len, pkt_len */\n+\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\taddrs[i << 1] = (uint64_t)pkts[i]->rx_descriptor_fields1;\n+\t\taddrs[(i << 1) + 1] = (uint64_t)pkts[i]->rx_descriptor_fields1\n+\t\t\t\t\t+ sizeof(uint64_t);\n+\t}\n+\n+\t/* save pkt_len and data_len into mbufs */\n+\t__m512i value_vec = _mm512_maskz_shuffle_epi32(MBUF_LENS_POS, desc_vec,\n+\t\t\t\t\t0xAA);\n+\t__m512i offsets_vec = _mm512_maskz_set1_epi32(MBUF_LENS_POS,\n+\t\t\t\t\t(uint32_t)-12);\n+\tvalue_vec = _mm512_add_epi32(value_vec, offsets_vec);\n+\t__m512i vindex = _mm512_loadu_si512((void *)addrs);\n+\t_mm512_i64scatter_epi64(0, vindex, value_vec, 1);\n+\n+\treturn 0;\n+free_buf:\n+\tfor (i = 0; i < PACKED_BATCH_SIZE; i++)\n+\t\trte_pktmbuf_free(pkts[i]);\n+\n+\treturn -1;\n+}\n",
    "prefixes": [
        "v3",
        "4/5"
    ]
}