get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/80113/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 80113,
    "url": "http://patches.dpdk.org/api/patches/80113/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20201009081410.63944-6-yong.liu@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20201009081410.63944-6-yong.liu@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20201009081410.63944-6-yong.liu@intel.com",
    "date": "2020-10-09T08:14:10",
    "name": "[v3,5/5] vhost: add packed ring vectorized enqueue",
    "commit_ref": null,
    "pull_url": null,
    "state": "rejected",
    "archived": true,
    "hash": "23e3274203c47a0057fc9cc95b75d65e8b1b161d",
    "submitter": {
        "id": 17,
        "url": "http://patches.dpdk.org/api/people/17/?format=api",
        "name": "Marvin Liu",
        "email": "yong.liu@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "http://patches.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20201009081410.63944-6-yong.liu@intel.com/mbox/",
    "series": [
        {
            "id": 12813,
            "url": "http://patches.dpdk.org/api/series/12813/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=12813",
            "date": "2020-10-09T08:14:05",
            "name": "vhost add vectorized data path",
            "version": 3,
            "mbox": "http://patches.dpdk.org/series/12813/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/80113/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/80113/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id C5E74A04BC;\n\tFri,  9 Oct 2020 10:22:14 +0200 (CEST)",
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 490D11C1F8;\n\tFri,  9 Oct 2020 10:20:44 +0200 (CEST)",
            "from mga12.intel.com (mga12.intel.com [192.55.52.136])\n by dpdk.org (Postfix) with ESMTP id E99181C1F3\n for <dev@dpdk.org>; Fri,  9 Oct 2020 10:20:41 +0200 (CEST)",
            "from orsmga005.jf.intel.com ([10.7.209.41])\n by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 09 Oct 2020 01:20:41 -0700",
            "from npg-dpdk-virtual-marvin-dev.sh.intel.com ([10.67.119.56])\n by orsmga005.jf.intel.com with ESMTP; 09 Oct 2020 01:20:38 -0700"
        ],
        "IronPort-SDR": [
            "\n /HeHOQWh1wPA4C2LIXHMAzff1TjR0Xmjw3oekKv909eXcqO1QFq4S5op61hN1ivsbAPZa0seWf\n r+Zf3ghhpMiQ==",
            "\n cCEDoaTXEMH6LOWvCKO508veWXp0bjIJAwBkt+XhJbtt7oFmxyx9n6TDZeL3VrI2m5SvEuPI53\n 10rpgLQa/yjA=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6000,8403,9768\"; a=\"144778924\"",
            "E=Sophos;i=\"5.77,354,1596524400\"; d=\"scan'208\";a=\"144778924\"",
            "E=Sophos;i=\"5.77,354,1596524400\"; d=\"scan'208\";a=\"528833399\""
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "From": "Marvin Liu <yong.liu@intel.com>",
        "To": "maxime.coquelin@redhat.com, chenbo.xia@intel.com, zhihong.wang@intel.com",
        "Cc": "dev@dpdk.org,\n\tMarvin Liu <yong.liu@intel.com>",
        "Date": "Fri,  9 Oct 2020 16:14:10 +0800",
        "Message-Id": "<20201009081410.63944-6-yong.liu@intel.com>",
        "X-Mailer": "git-send-email 2.17.1",
        "In-Reply-To": "<20201009081410.63944-1-yong.liu@intel.com>",
        "References": "<20200819032414.51430-2-yong.liu@intel.com>\n <20201009081410.63944-1-yong.liu@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v3 5/5] vhost: add packed ring vectorized enqueue",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Optimize vhost packed ring enqueue path with SIMD instructions. Four\ndescriptors status and length are batched handled with AVX512\ninstructions. Address translation operations are also accelerated\nby AVX512 instructions.\n\nSigned-off-by: Marvin Liu <yong.liu@intel.com>",
    "diff": "diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h\nindex b270c424b..84dc289e9 100644\n--- a/lib/librte_vhost/vhost.h\n+++ b/lib/librte_vhost/vhost.h\n@@ -1110,5 +1110,9 @@ vhost_reserve_avail_batch_packed_avx(struct virtio_net *dev,\n \t\t\t\t uintptr_t *desc_addrs,\n \t\t\t\t uint16_t *ids);\n \n+int\n+virtio_dev_rx_batch_packed_avx(struct virtio_net *dev,\n+\t\t\t       struct vhost_virtqueue *vq,\n+\t\t\t       struct rte_mbuf **pkts);\n \n #endif /* _VHOST_NET_CDEV_H_ */\ndiff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c\nindex 3bc6b9b20..3e49c88ac 100644\n--- a/lib/librte_vhost/virtio_net.c\n+++ b/lib/librte_vhost/virtio_net.c\n@@ -1354,6 +1354,22 @@ virtio_dev_rx_single_packed(struct virtio_net *dev,\n \treturn 0;\n }\n \n+static __rte_always_inline int\n+virtio_dev_rx_handle_batch_packed(struct virtio_net *dev,\n+\t\t\t   struct vhost_virtqueue *vq,\n+\t\t\t   struct rte_mbuf **pkts)\n+\n+{\n+#ifdef CC_AVX512_SUPPORT\n+\tif (unlikely(dev->vectorized))\n+\t\treturn virtio_dev_rx_batch_packed_avx(dev, vq, pkts);\n+\telse\n+\t\treturn virtio_dev_rx_batch_packed(dev, vq, pkts);\n+#else\n+\treturn virtio_dev_rx_batch_packed(dev, vq, pkts);\n+#endif\n+}\n+\n static __rte_noinline uint32_t\n virtio_dev_rx_packed(struct virtio_net *dev,\n \t\t     struct vhost_virtqueue *__rte_restrict vq,\n@@ -1367,8 +1383,8 @@ virtio_dev_rx_packed(struct virtio_net *dev,\n \t\trte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);\n \n \t\tif (remained >= PACKED_BATCH_SIZE) {\n-\t\t\tif (!virtio_dev_rx_batch_packed(dev, vq,\n-\t\t\t\t\t\t\t&pkts[pkt_idx])) {\n+\t\t\tif (!virtio_dev_rx_handle_batch_packed(dev, vq,\n+\t\t\t\t&pkts[pkt_idx])) {\n \t\t\t\tpkt_idx += PACKED_BATCH_SIZE;\n \t\t\t\tremained -= PACKED_BATCH_SIZE;\n \t\t\t\tcontinue;\ndiff --git a/lib/librte_vhost/virtio_net_avx.c b/lib/librte_vhost/virtio_net_avx.c\nindex e10b2a285..aa47b15ae 100644\n--- a/lib/librte_vhost/virtio_net_avx.c\n+++ b/lib/librte_vhost/virtio_net_avx.c\n@@ -35,9 +35,15 @@\n #define PACKED_AVAIL_FLAG ((0ULL | VRING_DESC_F_AVAIL) << FLAGS_BITS_OFFSET)\n #define PACKED_AVAIL_FLAG_WRAP ((0ULL | VRING_DESC_F_USED) << \\\n \tFLAGS_BITS_OFFSET)\n+#define PACKED_WRITE_AVAIL_FLAG (PACKED_AVAIL_FLAG | \\\n+\t((0ULL | VRING_DESC_F_WRITE) << FLAGS_BITS_OFFSET))\n+#define PACKED_WRITE_AVAIL_FLAG_WRAP (PACKED_AVAIL_FLAG_WRAP | \\\n+\t((0ULL | VRING_DESC_F_WRITE) << FLAGS_BITS_OFFSET))\n \n #define DESC_FLAGS_POS 0xaa\n #define MBUF_LENS_POS 0x6666\n+#define DESC_LENS_POS 0x4444\n+#define DESC_LENS_FLAGS_POS 0xB0B0B0B0\n \n int\n vhost_reserve_avail_batch_packed_avx(struct virtio_net *dev,\n@@ -182,3 +188,157 @@ vhost_reserve_avail_batch_packed_avx(struct virtio_net *dev,\n \n \treturn -1;\n }\n+\n+int\n+virtio_dev_rx_batch_packed_avx(struct virtio_net *dev,\n+\t\t\t       struct vhost_virtqueue *vq,\n+\t\t\t       struct rte_mbuf **pkts)\n+{\n+\tstruct vring_packed_desc *descs = vq->desc_packed;\n+\tuint16_t avail_idx = vq->last_avail_idx;\n+\tuint64_t desc_addrs[PACKED_BATCH_SIZE];\n+\tuint32_t buf_offset = dev->vhost_hlen;\n+\tuint32_t desc_status;\n+\tuint64_t lens[PACKED_BATCH_SIZE];\n+\tuint16_t i;\n+\tvoid *desc_addr;\n+\tuint8_t cmp_low, cmp_high, cmp_result;\n+\n+\tif (unlikely(avail_idx & PACKED_BATCH_MASK))\n+\t\treturn -1;\n+\tif (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))\n+\t\treturn -1;\n+\n+\t/* check refcnt and nb_segs */\n+\t__m256i mbuf_ref = _mm256_set1_epi64x(DEFAULT_REARM_DATA);\n+\n+\t/* load four mbufs rearm data */\n+\t__m256i mbufs = _mm256_set_epi64x(\n+\t\t\t\t*pkts[3]->rearm_data,\n+\t\t\t\t*pkts[2]->rearm_data,\n+\t\t\t\t*pkts[1]->rearm_data,\n+\t\t\t\t*pkts[0]->rearm_data);\n+\n+\tuint16_t cmp = _mm256_cmpneq_epu16_mask(mbufs, mbuf_ref);\n+\tif (cmp & MBUF_LENS_POS)\n+\t\treturn -1;\n+\n+\t/* check desc status */\n+\tdesc_addr = &vq->desc_packed[avail_idx];\n+\t__m512i desc_vec = _mm512_loadu_si512(desc_addr);\n+\n+\t__m512i avail_flag_vec;\n+\t__m512i used_flag_vec;\n+\tif (vq->avail_wrap_counter) {\n+#if defined(RTE_ARCH_I686)\n+\t\tavail_flag_vec = _mm512_set4_epi64(PACKED_WRITE_AVAIL_FLAG,\n+\t\t\t\t\t0x0, PACKED_WRITE_AVAIL_FLAG, 0x0);\n+\t\tused_flag_vec = _mm512_set4_epi64(PACKED_FLAGS_MASK, 0x0,\n+\t\t\t\t\tPACKED_FLAGS_MASK, 0x0);\n+#else\n+\t\tavail_flag_vec = _mm512_maskz_set1_epi64(DESC_FLAGS_POS,\n+\t\t\t\t\tPACKED_WRITE_AVAIL_FLAG);\n+\t\tused_flag_vec = _mm512_maskz_set1_epi64(DESC_FLAGS_POS,\n+\t\t\t\t\tPACKED_FLAGS_MASK);\n+#endif\n+\t} else {\n+#if defined(RTE_ARCH_I686)\n+\t\tavail_flag_vec = _mm512_set4_epi64(\n+\t\t\t\t\tPACKED_WRITE_AVAIL_FLAG_WRAP, 0x0,\n+\t\t\t\t\tPACKED_WRITE_AVAIL_FLAG, 0x0);\n+\t\tused_flag_vec = _mm512_set4_epi64(0x0, 0x0, 0x0, 0x0);\n+#else\n+\t\tavail_flag_vec = _mm512_maskz_set1_epi64(DESC_FLAGS_POS,\n+\t\t\t\t\tPACKED_WRITE_AVAIL_FLAG_WRAP);\n+\t\tused_flag_vec = _mm512_setzero_epi32();\n+#endif\n+\t}\n+\n+\tdesc_status = _mm512_mask_cmp_epu16_mask(BATCH_FLAGS_MASK, desc_vec,\n+\t\t\t\tavail_flag_vec, _MM_CMPINT_NE);\n+\tif (desc_status)\n+\t\treturn -1;\n+\n+\tif (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {\n+\t\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\tuint64_t size = (uint64_t)descs[avail_idx + i].len;\n+\t\t\tdesc_addrs[i] = __vhost_iova_to_vva(dev, vq,\n+\t\t\t\tdescs[avail_idx + i].addr, &size,\n+\t\t\t\tVHOST_ACCESS_RW);\n+\n+\t\t\tif (!desc_addrs[i])\n+\t\t\t\treturn -1;\n+\n+\t\t\trte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *,\n+\t\t\t\t\t0));\n+\t\t}\n+\t} else {\n+\t\t/* check buffer fit into one region & translate address */\n+\t\tstruct mem_regions_range *range = dev->regions_range;\n+\t\t__m512i regions_low_addrs =\n+\t\t\t_mm512_loadu_si512((void *)&range->regions_low_addrs);\n+\t\t__m512i regions_high_addrs =\n+\t\t\t_mm512_loadu_si512((void *)&range->regions_high_addrs);\n+\t\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\tuint64_t addr_low = descs[avail_idx + i].addr;\n+\t\t\tuint64_t addr_high = addr_low +\n+\t\t\t\t\t\tdescs[avail_idx + i].len;\n+\t\t\t__m512i low_addr_vec = _mm512_set1_epi64(addr_low);\n+\t\t\t__m512i high_addr_vec = _mm512_set1_epi64(addr_high);\n+\n+\t\t\tcmp_low = _mm512_cmp_epi64_mask(low_addr_vec,\n+\t\t\t\t\tregions_low_addrs, _MM_CMPINT_NLT);\n+\t\t\tcmp_high = _mm512_cmp_epi64_mask(high_addr_vec,\n+\t\t\t\t\tregions_high_addrs, _MM_CMPINT_LT);\n+\t\t\tcmp_result = cmp_low & cmp_high;\n+\t\t\tint index = __builtin_ctz(cmp_result);\n+\t\t\tif (unlikely((uint32_t)index >= dev->mem->nregions))\n+\t\t\t\treturn -1;\n+\n+\t\t\tdesc_addrs[i] = addr_low +\n+\t\t\t\tdev->mem->regions[index].host_user_addr -\n+\t\t\t\tdev->mem->regions[index].guest_phys_addr;\n+\t\t\trte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *,\n+\t\t\t\t\t0));\n+\t\t}\n+\t}\n+\n+\t/* check length is enough */\n+\t__m512i pkt_lens = _mm512_set_epi32(\n+\t\t\t0, pkts[3]->pkt_len, 0, 0,\n+\t\t\t0, pkts[2]->pkt_len, 0, 0,\n+\t\t\t0, pkts[1]->pkt_len, 0, 0,\n+\t\t\t0, pkts[0]->pkt_len, 0, 0);\n+\n+\t__m512i mbuf_len_offset = _mm512_maskz_set1_epi32(DESC_LENS_POS,\n+\t\t\t\t\tdev->vhost_hlen);\n+\t__m512i buf_len_vec = _mm512_add_epi32(pkt_lens, mbuf_len_offset);\n+\tuint16_t lens_cmp = _mm512_mask_cmp_epu32_mask(DESC_LENS_POS,\n+\t\t\t\tdesc_vec, buf_len_vec, _MM_CMPINT_LT);\n+\tif (lens_cmp)\n+\t\treturn -1;\n+\n+\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\trte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset),\n+\t\t\t   rte_pktmbuf_mtod_offset(pkts[i], void *, 0),\n+\t\t\t   pkts[i]->pkt_len);\n+\t}\n+\n+\tif (unlikely((dev->features & (1ULL << VHOST_F_LOG_ALL)))) {\n+\t\tvhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\tlens[i] = descs[avail_idx + i].len;\n+\t\t\tvhost_log_cache_write_iova(dev, vq,\n+\t\t\t\tdescs[avail_idx + i].addr, lens[i]);\n+\t\t}\n+\t}\n+\n+\tvq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);\n+\tvq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);\n+\t/* save len and flags, skip addr and id */\n+\t__m512i desc_updated = _mm512_mask_add_epi16(desc_vec,\n+\t\t\t\t\tDESC_LENS_FLAGS_POS, buf_len_vec,\n+\t\t\t\t\tused_flag_vec);\n+\t_mm512_storeu_si512(desc_addr, desc_updated);\n+\n+\treturn 0;\n+}\n",
    "prefixes": [
        "v3",
        "5/5"
    ]
}