get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/68545/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 68545,
    "url": "https://patches.dpdk.org/api/patches/68545/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20200415164733.75416-6-yong.liu@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20200415164733.75416-6-yong.liu@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20200415164733.75416-6-yong.liu@intel.com",
    "date": "2020-04-15T16:47:30",
    "name": "[v4,5/8] net/virtio: add vectorized packed ring Tx datapath",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "6eb4e7c6ad777b1b5fdbea76630f9af2bcd5c4f7",
    "submitter": {
        "id": 17,
        "url": "https://patches.dpdk.org/api/people/17/?format=api",
        "name": "Marvin Liu",
        "email": "yong.liu@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "https://patches.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20200415164733.75416-6-yong.liu@intel.com/mbox/",
    "series": [
        {
            "id": 9391,
            "url": "https://patches.dpdk.org/api/series/9391/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=9391",
            "date": "2020-04-15T16:47:25",
            "name": "add packed ring vectorized datapath",
            "version": 4,
            "mbox": "https://patches.dpdk.org/series/9391/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/68545/comments/",
    "check": "fail",
    "checks": "https://patches.dpdk.org/api/patches/68545/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 38F06A0563;\n\tWed, 15 Apr 2020 11:14:24 +0200 (CEST)",
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id CD2D51D676;\n\tWed, 15 Apr 2020 11:13:33 +0200 (CEST)",
            "from mga09.intel.com (mga09.intel.com [134.134.136.24])\n by dpdk.org (Postfix) with ESMTP id 3ED581D661\n for <dev@dpdk.org>; Wed, 15 Apr 2020 11:13:28 +0200 (CEST)",
            "from orsmga004.jf.intel.com ([10.7.209.38])\n by orsmga102.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 15 Apr 2020 02:13:27 -0700",
            "from npg-dpdk-virtual-marvin-dev.sh.intel.com ([10.67.119.58])\n by orsmga004.jf.intel.com with ESMTP; 15 Apr 2020 02:13:25 -0700"
        ],
        "IronPort-SDR": [
            "\n 4uUqQbEHXH4M2LU04rZ5+Kbn2vOa4wgK9xJl8uL798NXh+y4kd/ireXtmfOK3uCEumLgBK6acK\n KEjCxc3t4A5Q==",
            "\n gDS2cc6ZdD4px1Q0TABnP44ofyRCjujptqeT/gryEv3k4+uEGsJRyaVQrxu4rKnQirEsrs6tQu\n nGEpYz5J4qsw=="
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.72,386,1580803200\"; d=\"scan'208\";a=\"400250869\"",
        "From": "Marvin Liu <yong.liu@intel.com>",
        "To": "maxime.coquelin@redhat.com, xiaolong.ye@intel.com, zhihong.wang@intel.com",
        "Cc": "harry.van.haaren@intel.com, dev@dpdk.org, Marvin Liu <yong.liu@intel.com>",
        "Date": "Thu, 16 Apr 2020 00:47:30 +0800",
        "Message-Id": "<20200415164733.75416-6-yong.liu@intel.com>",
        "X-Mailer": "git-send-email 2.17.1",
        "In-Reply-To": "<20200415164733.75416-1-yong.liu@intel.com>",
        "References": "<20200313174230.74661-1-yong.liu@intel.com>\n <20200415164733.75416-1-yong.liu@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v4 5/8] net/virtio: add vectorized packed ring Tx\n\tdatapath",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Optimize packed ring Tx datapath alike Rx datapath. Split Tx datapath\ninto batch and single Tx functions. Batch function further optimized by\nvector instructions.\n\nSigned-off-by: Marvin Liu <yong.liu@intel.com>",
    "diff": "diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h\nindex 10e39670e..c9aaef0af 100644\n--- a/drivers/net/virtio/virtio_ethdev.h\n+++ b/drivers/net/virtio/virtio_ethdev.h\n@@ -107,6 +107,9 @@ uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\n uint16_t virtio_recv_pkts_packed_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\tuint16_t nb_pkts);\n \n+uint16_t virtio_xmit_pkts_packed_vec(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\tuint16_t nb_pkts);\n+\n int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);\n \n void virtio_interrupt_handler(void *param);\ndiff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c\nindex 1d8135f4f..58c7778f4 100644\n--- a/drivers/net/virtio/virtio_rxtx.c\n+++ b/drivers/net/virtio/virtio_rxtx.c\n@@ -2174,3 +2174,11 @@ virtio_recv_pkts_packed_vec(void __rte_unused *rx_queue,\n {\n \treturn 0;\n }\n+\n+__rte_weak uint16_t\n+virtio_xmit_pkts_packed_vec(void __rte_unused *tx_queue,\n+\t\t\t    struct rte_mbuf __rte_unused **tx_pkts,\n+\t\t\t    uint16_t __rte_unused nb_pkts)\n+{\n+\treturn 0;\n+}\ndiff --git a/drivers/net/virtio/virtio_rxtx_packed_avx.c b/drivers/net/virtio/virtio_rxtx_packed_avx.c\nindex f2976b98f..732256c86 100644\n--- a/drivers/net/virtio/virtio_rxtx_packed_avx.c\n+++ b/drivers/net/virtio/virtio_rxtx_packed_avx.c\n@@ -15,6 +15,21 @@\n #include \"virtio_pci.h\"\n #include \"virtqueue.h\"\n \n+/* reference count offset in mbuf rearm data */\n+#define REF_CNT_OFFSET 16\n+/* segment number offset in mbuf rearm data */\n+#define SEG_NUM_OFFSET 32\n+\n+#define DEFAULT_REARM_DATA (1ULL << SEG_NUM_OFFSET | \\\n+\t\t\t  1ULL << REF_CNT_OFFSET)\n+/* id offset in packed ring desc higher 64bits */\n+#define ID_OFFSET 32\n+/* flag offset in packed ring desc higher 64bits */\n+#define FLAG_OFFSET 48\n+\n+/* net hdr short size mask */\n+#define NET_HDR_MASK 0x3F\n+\n #define PACKED_FLAGS_MASK (1ULL << 55 | 1ULL << 63)\n \n #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \\\n@@ -41,6 +56,47 @@\n \tfor (iter = val; iter < num; iter++)\n #endif\n \n+static void\n+virtio_xmit_cleanup_packed_vec(struct virtqueue *vq)\n+{\n+\tstruct vring_packed_desc *desc = vq->vq_packed.ring.desc;\n+\tstruct vq_desc_extra *dxp;\n+\tuint16_t used_idx, id, curr_id, free_cnt = 0;\n+\tuint16_t size = vq->vq_nentries;\n+\tstruct rte_mbuf *mbufs[size];\n+\tuint16_t nb_mbuf = 0, i;\n+\n+\tused_idx = vq->vq_used_cons_idx;\n+\n+\tif (!desc_is_used(&desc[used_idx], vq))\n+\t\treturn;\n+\n+\tid = desc[used_idx].id;\n+\n+\tdo {\n+\t\tcurr_id = used_idx;\n+\t\tdxp = &vq->vq_descx[used_idx];\n+\t\tused_idx += dxp->ndescs;\n+\t\tfree_cnt += dxp->ndescs;\n+\n+\t\tif (dxp->cookie != NULL) {\n+\t\t\tmbufs[nb_mbuf] = dxp->cookie;\n+\t\t\tdxp->cookie = NULL;\n+\t\t\tnb_mbuf++;\n+\t\t}\n+\n+\t\tif (used_idx >= size) {\n+\t\t\tused_idx -= size;\n+\t\t\tvq->vq_packed.used_wrap_counter ^= 1;\n+\t\t}\n+\t} while (curr_id != id);\n+\n+\tfor (i = 0; i < nb_mbuf; i++)\n+\t\trte_pktmbuf_free(mbufs[i]);\n+\n+\tvq->vq_used_cons_idx = used_idx;\n+\tvq->vq_free_cnt += free_cnt;\n+}\n \n static inline void\n virtio_update_batch_stats(struct virtnet_stats *stats,\n@@ -54,6 +110,229 @@ virtio_update_batch_stats(struct virtnet_stats *stats,\n \tstats->bytes += pkt_len3;\n \tstats->bytes += pkt_len4;\n }\n+\n+static inline int\n+virtqueue_enqueue_batch_packed_vec(struct virtnet_tx *txvq,\n+\t\t\t\t   struct rte_mbuf **tx_pkts)\n+{\n+\tstruct virtqueue *vq = txvq->vq;\n+\tuint16_t head_size = vq->hw->vtnet_hdr_size;\n+\tuint16_t idx = vq->vq_avail_idx;\n+\tstruct virtio_net_hdr *hdr;\n+\tuint16_t i, cmp;\n+\n+\tif (vq->vq_avail_idx & PACKED_BATCH_MASK)\n+\t\treturn -1;\n+\n+\t/* Load four mbufs rearm data */\n+\t__m256i mbufs = _mm256_set_epi64x(\n+\t\t\t*tx_pkts[3]->rearm_data,\n+\t\t\t*tx_pkts[2]->rearm_data,\n+\t\t\t*tx_pkts[1]->rearm_data,\n+\t\t\t*tx_pkts[0]->rearm_data);\n+\n+\t/* refcnt=1 and nb_segs=1 */\n+\t__m256i mbuf_ref = _mm256_set1_epi64x(DEFAULT_REARM_DATA);\n+\t__m256i head_rooms = _mm256_set1_epi16(head_size);\n+\n+\t/* Check refcnt and nb_segs */\n+\tcmp = _mm256_cmpneq_epu16_mask(mbufs, mbuf_ref);\n+\tif (cmp & 0x6666)\n+\t\treturn -1;\n+\n+\t/* Check headroom is enough */\n+\tcmp = _mm256_mask_cmp_epu16_mask(0x1111, mbufs, head_rooms,\n+\t\t\t_MM_CMPINT_LT);\n+\tif (unlikely(cmp))\n+\t\treturn -1;\n+\n+\t__m512i dxps = _mm512_set_epi64(\n+\t\t\t0x1, (uint64_t)tx_pkts[3],\n+\t\t\t0x1, (uint64_t)tx_pkts[2],\n+\t\t\t0x1, (uint64_t)tx_pkts[1],\n+\t\t\t0x1, (uint64_t)tx_pkts[0]);\n+\n+\t_mm512_storeu_si512((void *)&vq->vq_descx[idx], dxps);\n+\n+\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\ttx_pkts[i]->data_off -= head_size;\n+\t\ttx_pkts[i]->data_len += head_size;\n+\t}\n+\n+#ifdef RTE_VIRTIO_USER\n+\t__m512i descs_base = _mm512_set_epi64(\n+\t\t\ttx_pkts[3]->data_len,\n+\t\t\t(uint64_t)(*(uintptr_t *)((uintptr_t)tx_pkts[3])),\n+\t\t\ttx_pkts[2]->data_len,\n+\t\t\t(uint64_t)(*(uintptr_t *)((uintptr_t)tx_pkts[2])),\n+\t\t\ttx_pkts[1]->data_len,\n+\t\t\t(uint64_t)(*(uintptr_t *)((uintptr_t)tx_pkts[1])),\n+\t\t\ttx_pkts[0]->data_len,\n+\t\t\t(uint64_t)(*(uintptr_t *)((uintptr_t)tx_pkts[0])));\n+#else\n+\t__m512i descs_base = _mm512_set_epi64(\n+\t\t\ttx_pkts[3]->data_len, tx_pkts[3]->buf_iova,\n+\t\t\ttx_pkts[2]->data_len, tx_pkts[2]->buf_iova,\n+\t\t\ttx_pkts[1]->data_len, tx_pkts[1]->buf_iova,\n+\t\t\ttx_pkts[0]->data_len, tx_pkts[0]->buf_iova);\n+#endif\n+\n+\t/* id offset and data offset */\n+\t__m512i data_offsets = _mm512_set_epi64(\n+\t\t\t(uint64_t)3 << ID_OFFSET, tx_pkts[3]->data_off,\n+\t\t\t(uint64_t)2 << ID_OFFSET, tx_pkts[2]->data_off,\n+\t\t\t(uint64_t)1 << ID_OFFSET, tx_pkts[1]->data_off,\n+\t\t\t0, tx_pkts[0]->data_off);\n+\n+\t__m512i new_descs = _mm512_add_epi64(descs_base, data_offsets);\n+\n+\tuint64_t flags_temp = (uint64_t)idx << ID_OFFSET |\n+\t\t(uint64_t)vq->vq_packed.cached_flags << FLAG_OFFSET;\n+\n+\t/* flags offset and guest virtual address offset */\n+#ifdef RTE_VIRTIO_USER\n+\t__m128i flag_offset = _mm_set_epi64x(flags_temp, (uint64_t)vq->offset);\n+#else\n+\t__m128i flag_offset = _mm_set_epi64x(flags_temp, 0);\n+#endif\n+\t__m512i flag_offsets = _mm512_broadcast_i32x4(flag_offset);\n+\n+\t__m512i descs = _mm512_add_epi64(new_descs, flag_offsets);\n+\n+\tif (!vq->hw->has_tx_offload) {\n+\t\t__m128i mask = _mm_set1_epi16(0xFFFF);\n+\t\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\thdr = rte_pktmbuf_mtod_offset(tx_pkts[i],\n+\t\t\t\t\tstruct virtio_net_hdr *, -head_size);\n+\t\t\t__m128i v_hdr = _mm_loadu_si128((void *)hdr);\n+\t\t\tif (unlikely(_mm_mask_test_epi16_mask(NET_HDR_MASK,\n+\t\t\t\t\t\t\tv_hdr, mask))) {\n+\t\t\t\t__m128i all_zero = _mm_setzero_si128();\n+\t\t\t\t_mm_mask_storeu_epi16((void *)hdr,\n+\t\t\t\t\t\tNET_HDR_MASK, all_zero);\n+\t\t\t}\n+\t\t}\n+\t} else {\n+\t\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\thdr = rte_pktmbuf_mtod_offset(tx_pkts[i],\n+\t\t\t\t\tstruct virtio_net_hdr *, -head_size);\n+\t\t\tvirtqueue_xmit_offload(hdr, tx_pkts[i], true);\n+\t\t}\n+\t}\n+\n+\t/* Enqueue Packet buffers */\n+\trte_smp_wmb();\n+\t_mm512_storeu_si512((void *)&vq->vq_packed.ring.desc[idx], descs);\n+\n+\tvirtio_update_batch_stats(&txvq->stats, tx_pkts[0]->pkt_len,\n+\t\t\ttx_pkts[1]->pkt_len, tx_pkts[2]->pkt_len,\n+\t\t\ttx_pkts[3]->pkt_len);\n+\n+\tvq->vq_avail_idx += PACKED_BATCH_SIZE;\n+\tvq->vq_free_cnt -= PACKED_BATCH_SIZE;\n+\n+\tif (vq->vq_avail_idx >= vq->vq_nentries) {\n+\t\tvq->vq_avail_idx -= vq->vq_nentries;\n+\t\tvq->vq_packed.cached_flags ^=\n+\t\t\tVRING_PACKED_DESC_F_AVAIL_USED;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static inline int\n+virtqueue_enqueue_single_packed_vec(struct virtnet_tx *txvq,\n+\t\t\t\t    struct rte_mbuf *txm)\n+{\n+\tstruct virtqueue *vq = txvq->vq;\n+\tstruct virtio_hw *hw = vq->hw;\n+\tuint16_t hdr_size = hw->vtnet_hdr_size;\n+\tuint16_t slots, can_push;\n+\tint16_t need;\n+\n+\t/* How many main ring entries are needed to this Tx?\n+\t * any_layout => number of segments\n+\t * default    => number of segments + 1\n+\t */\n+\tcan_push = rte_mbuf_refcnt_read(txm) == 1 &&\n+\t\t   RTE_MBUF_DIRECT(txm) &&\n+\t\t   txm->nb_segs == 1 &&\n+\t\t   rte_pktmbuf_headroom(txm) >= hdr_size;\n+\n+\tslots = txm->nb_segs + !can_push;\n+\tneed = slots - vq->vq_free_cnt;\n+\n+\t/* Positive value indicates it need free vring descriptors */\n+\tif (unlikely(need > 0)) {\n+\t\tvirtio_xmit_cleanup_packed_vec(vq);\n+\t\tneed = slots - vq->vq_free_cnt;\n+\t\tif (unlikely(need > 0)) {\n+\t\t\tPMD_TX_LOG(ERR,\n+\t\t\t\t   \"No free tx descriptors to transmit\");\n+\t\t\treturn -1;\n+\t\t}\n+\t}\n+\n+\t/* Enqueue Packet buffers */\n+\tvirtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push, 1);\n+\n+\ttxvq->stats.bytes += txm->pkt_len;\n+\treturn 0;\n+}\n+\n+uint16_t\n+virtio_xmit_pkts_packed_vec(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\tuint16_t nb_pkts)\n+{\n+\tstruct virtnet_tx *txvq = tx_queue;\n+\tstruct virtqueue *vq = txvq->vq;\n+\tstruct virtio_hw *hw = vq->hw;\n+\tuint16_t nb_tx = 0;\n+\tuint16_t remained;\n+\n+\tif (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))\n+\t\treturn nb_tx;\n+\n+\tif (unlikely(nb_pkts < 1))\n+\t\treturn nb_pkts;\n+\n+\tPMD_TX_LOG(DEBUG, \"%d packets to xmit\", nb_pkts);\n+\n+\tif (vq->vq_free_cnt <= vq->vq_nentries - vq->vq_free_thresh)\n+\t\tvirtio_xmit_cleanup_packed_vec(vq);\n+\n+\tremained = RTE_MIN(nb_pkts, vq->vq_free_cnt);\n+\n+\twhile (remained) {\n+\t\tif (remained >= PACKED_BATCH_SIZE) {\n+\t\t\tif (!virtqueue_enqueue_batch_packed_vec(txvq,\n+\t\t\t\t\t\t&tx_pkts[nb_tx])) {\n+\t\t\t\tnb_tx += PACKED_BATCH_SIZE;\n+\t\t\t\tremained -= PACKED_BATCH_SIZE;\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t}\n+\t\tif (!virtqueue_enqueue_single_packed_vec(txvq,\n+\t\t\t\t\ttx_pkts[nb_tx])) {\n+\t\t\tnb_tx++;\n+\t\t\tremained--;\n+\t\t\tcontinue;\n+\t\t}\n+\t\tbreak;\n+\t};\n+\n+\ttxvq->stats.packets += nb_tx;\n+\n+\tif (likely(nb_tx)) {\n+\t\tif (unlikely(virtqueue_kick_prepare_packed(vq))) {\n+\t\t\tvirtqueue_notify(vq);\n+\t\t\tPMD_TX_LOG(DEBUG, \"Notified backend after xmit\");\n+\t\t}\n+\t}\n+\n+\treturn nb_tx;\n+}\n+\n /* Optionally fill offload information in structure */\n static inline int\n virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)\n",
    "prefixes": [
        "v4",
        "5/8"
    ]
}