Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/84261/?format=api
https://patches.dpdk.org/api/patches/84261/?format=api", "web_url": "https://patches.dpdk.org/project/dpdk/patch/20201117100635.27690-4-joyce.kong@arm.com/", "project": { "id": 1, "url": "https://patches.dpdk.org/api/projects/1/?format=api", "name": "DPDK", "link_name": "dpdk", "list_id": "dev.dpdk.org", "list_email": "dev@dpdk.org", "web_url": "http://core.dpdk.org", "scm_url": "git://dpdk.org/dpdk", "webscm_url": "http://git.dpdk.org/dpdk", "list_archive_url": "https://inbox.dpdk.org/dev", "list_archive_url_format": "https://inbox.dpdk.org/dev/{}", "commit_url_format": "" }, "msgid": "<20201117100635.27690-4-joyce.kong@arm.com>", "list_archive_url": "https://inbox.dpdk.org/dev/20201117100635.27690-4-joyce.kong@arm.com", "date": "2020-11-17T10:06:34", "name": "[v1,3/4] net/virtio: add vectorized packed ring Tx NEON path", "commit_ref": null, "pull_url": null, "state": "accepted", "archived": true, "hash": "f9728ba99bfbfff51127868efa5800cab533b1ae", "submitter": { "id": 970, "url": "https://patches.dpdk.org/api/people/970/?format=api", "name": "Joyce Kong", "email": "joyce.kong@arm.com" }, "delegate": { "id": 2642, "url": "https://patches.dpdk.org/api/users/2642/?format=api", "username": "mcoquelin", "first_name": "Maxime", "last_name": "Coquelin", "email": "maxime.coquelin@redhat.com" }, "mbox": "https://patches.dpdk.org/project/dpdk/patch/20201117100635.27690-4-joyce.kong@arm.com/mbox/", "series": [ { "id": 13920, "url": "https://patches.dpdk.org/api/series/13920/?format=api", "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=13920", "date": "2020-11-17T10:06:31", "name": "Vectorize packed ring RX/TX path with NEON", "version": 1, "mbox": "https://patches.dpdk.org/series/13920/mbox/" } ], "comments": "https://patches.dpdk.org/api/patches/84261/comments/", "check": "success", "checks": "https://patches.dpdk.org/api/patches/84261/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<dev-bounces@dpdk.org>", "X-Original-To": "patchwork@inbox.dpdk.org", "Delivered-To": "patchwork@inbox.dpdk.org", "Received": [ "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id CF224A04DB;\n\tTue, 17 Nov 2020 11:08:13 +0100 (CET)", "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 2FA1BC8F4;\n\tTue, 17 Nov 2020 11:07:26 +0100 (CET)", "from foss.arm.com (foss.arm.com [217.140.110.172])\n by dpdk.org (Postfix) with ESMTP id 5C66AC8DA\n for <dev@dpdk.org>; Tue, 17 Nov 2020 11:07:25 +0100 (CET)", "from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])\n by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id BCBA41477;\n Tue, 17 Nov 2020 02:07:23 -0800 (PST)", "from net-arm-thunderx2-03.shanghai.arm.com\n (net-arm-thunderx2-03.shanghai.arm.com [10.169.208.206])\n by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 454D93F718;\n Tue, 17 Nov 2020 02:07:21 -0800 (PST)" ], "From": "Joyce Kong <joyce.kong@arm.com>", "To": "maxime.coquelin@redhat.com, chenbo.xia@intel.com, jerinj@marvell.com,\n ruifeng.wang@arm.com, honnappa.nagarahalli@arm.com", "Cc": "dev@dpdk.org,\n\tnd@arm.com", "Date": "Tue, 17 Nov 2020 18:06:34 +0800", "Message-Id": "<20201117100635.27690-4-joyce.kong@arm.com>", "X-Mailer": "git-send-email 2.28.0", "In-Reply-To": "<20201117100635.27690-1-joyce.kong@arm.com>", "References": "<20200911120906.45995-1-joyce.kong@arm.com>\n <20201117100635.27690-1-joyce.kong@arm.com>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "Subject": "[dpdk-dev] [PATCH v1 3/4] net/virtio: add vectorized packed ring Tx\n\tNEON path", "X-BeenThere": "dev@dpdk.org", "X-Mailman-Version": "2.1.15", "Precedence": "list", "List-Id": "DPDK patches and discussions <dev.dpdk.org>", "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>", "List-Archive": "<http://mails.dpdk.org/archives/dev/>", "List-Post": "<mailto:dev@dpdk.org>", "List-Help": "<mailto:dev-request@dpdk.org?subject=help>", "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>", "Errors-To": "dev-bounces@dpdk.org", "Sender": "\"dev\" <dev-bounces@dpdk.org>" }, "content": "Optimize packed ring Tx batch path with NEON instructions.\n\nSigned-off-by: Joyce Kong <joyce.kong@arm.com>\nReviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>\n---\n drivers/net/virtio/virtio_rxtx_packed.h | 6 +-\n drivers/net/virtio/virtio_rxtx_packed_neon.h | 143 +++++++++++++++++++\n 2 files changed, 148 insertions(+), 1 deletion(-)", "diff": "diff --git a/drivers/net/virtio/virtio_rxtx_packed.h b/drivers/net/virtio/virtio_rxtx_packed.h\nindex 8f5198ad7..016b6fb24 100644\n--- a/drivers/net/virtio/virtio_rxtx_packed.h\n+++ b/drivers/net/virtio/virtio_rxtx_packed.h\n@@ -28,6 +28,8 @@\n /* flag bits offset in packed ring desc from ID */\n #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \\\n \toffsetof(struct vring_packed_desc, id)) * BYTE_SIZE)\n+#define FLAGS_LEN_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \\\n+\toffsetof(struct vring_packed_desc, len)) * BYTE_SIZE)\n #endif\n \n #define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \\\n@@ -36,13 +38,15 @@\n /* reference count offset in mbuf rearm data */\n #define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \\\n \toffsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)\n+\n+#ifdef CC_AVX512_SUPPORT\n /* segment number offset in mbuf rearm data */\n #define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \\\n \toffsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)\n-\n /* default rearm data */\n #define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \\\n \t1ULL << REFCNT_BITS_OFFSET)\n+#endif\n \n /* id bits offset in packed ring desc higher 64bits */\n #define ID_BITS_OFFSET ((offsetof(struct vring_packed_desc, id) - \\\ndiff --git a/drivers/net/virtio/virtio_rxtx_packed_neon.h b/drivers/net/virtio/virtio_rxtx_packed_neon.h\nindex fb1e49909..041f771ea 100644\n--- a/drivers/net/virtio/virtio_rxtx_packed_neon.h\n+++ b/drivers/net/virtio/virtio_rxtx_packed_neon.h\n@@ -16,6 +16,149 @@\n #include \"virtio_rxtx_packed.h\"\n #include \"virtqueue.h\"\n \n+static inline int\n+virtqueue_enqueue_batch_packed_vec(struct virtnet_tx *txvq,\n+\t\t\t\t struct rte_mbuf **tx_pkts)\n+{\n+\tstruct virtqueue *vq = txvq->vq;\n+\tuint16_t head_size = vq->hw->vtnet_hdr_size;\n+\tuint16_t idx = vq->vq_avail_idx;\n+\tstruct virtio_net_hdr *hdr;\n+\tstruct vq_desc_extra *dxp;\n+\tstruct vring_packed_desc *p_desc;\n+\tuint16_t i;\n+\n+\tif (idx & PACKED_BATCH_MASK)\n+\t\treturn -1;\n+\n+\tif (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))\n+\t\treturn -1;\n+\n+\t/* Map four refcnt and nb_segs from mbufs to one NEON register. */\n+\tuint8x16_t ref_seg_msk = {\n+\t\t2, 3, 4, 5,\n+\t\t10, 11, 12, 13,\n+\t\t18, 19, 20, 21,\n+\t\t26, 27, 28, 29\n+\t};\n+\n+\t/* Map four data_off from mbufs to one NEON register. */\n+\tuint8x8_t data_msk = {\n+\t\t0, 1,\n+\t\t8, 9,\n+\t\t16, 17,\n+\t\t24, 25\n+\t};\n+\n+\tuint16x8_t net_hdr_msk = {\n+\t\t0xFFFF, 0xFFFF,\n+\t\t0, 0, 0, 0\n+\t};\n+\n+\tuint16x4_t pkts[PACKED_BATCH_SIZE];\n+\tuint8x16x2_t mbuf;\n+\t/* Load four mbufs rearm data. */\n+\tRTE_BUILD_BUG_ON(REFCNT_BITS_OFFSET >= 64);\n+\tpkts[0] = vld1_u16((uint16_t *)&tx_pkts[0]->rearm_data);\n+\tpkts[1] = vld1_u16((uint16_t *)&tx_pkts[1]->rearm_data);\n+\tpkts[2] = vld1_u16((uint16_t *)&tx_pkts[2]->rearm_data);\n+\tpkts[3] = vld1_u16((uint16_t *)&tx_pkts[3]->rearm_data);\n+\n+\tmbuf.val[0] = vreinterpretq_u8_u16(vcombine_u16(pkts[0], pkts[1]));\n+\tmbuf.val[1] = vreinterpretq_u8_u16(vcombine_u16(pkts[2], pkts[3]));\n+\n+\t/* refcnt = 1 and nb_segs = 1 */\n+\tuint32x4_t def_ref_seg = vdupq_n_u32(0x10001);\n+\t/* Check refcnt and nb_segs. */\n+\tuint32x4_t ref_seg = vreinterpretq_u32_u8(vqtbl2q_u8(mbuf, ref_seg_msk));\n+\tpoly128_t cmp1 = vreinterpretq_p128_u32(~vceqq_u32(ref_seg, def_ref_seg));\n+\tif (unlikely(cmp1))\n+\t\treturn -1;\n+\n+\t/* Check headroom is enough. */\n+\tuint16x4_t head_rooms = vdup_n_u16(head_size);\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=\n+\t\t\t offsetof(struct rte_mbuf, rearm_data));\n+\tuint16x4_t data_offset = vreinterpret_u16_u8(vqtbl2_u8(mbuf, data_msk));\n+\tuint64x1_t cmp2 = vreinterpret_u64_u16(vclt_u16(data_offset, head_rooms));\n+\tif (unlikely(vget_lane_u64(cmp2, 0)))\n+\t\treturn -1;\n+\n+\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\tdxp = &vq->vq_descx[idx + i];\n+\t\tdxp->ndescs = 1;\n+\t\tdxp->cookie = tx_pkts[i];\n+\t}\n+\n+\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\ttx_pkts[i]->data_off -= head_size;\n+\t\ttx_pkts[i]->data_len += head_size;\n+\t}\n+\n+\tuint64x2x2_t desc[PACKED_BATCH_SIZE / 2];\n+\tuint64x2_t base_addr0 = {\n+\t\tVIRTIO_MBUF_ADDR(tx_pkts[0], vq) + tx_pkts[0]->data_off,\n+\t\tVIRTIO_MBUF_ADDR(tx_pkts[1], vq) + tx_pkts[1]->data_off\n+\t};\n+\tuint64x2_t base_addr1 = {\n+\t\tVIRTIO_MBUF_ADDR(tx_pkts[2], vq) + tx_pkts[2]->data_off,\n+\t\tVIRTIO_MBUF_ADDR(tx_pkts[3], vq) + tx_pkts[3]->data_off\n+\t};\n+\n+\tdesc[0].val[0] = base_addr0;\n+\tdesc[1].val[0] = base_addr1;\n+\n+\tuint64_t flags = (uint64_t)vq->vq_packed.cached_flags << FLAGS_LEN_BITS_OFFSET;\n+\tuint64x2_t tx_desc0 = {\n+\t\tflags | (uint64_t)idx << ID_BITS_OFFSET | tx_pkts[0]->data_len,\n+\t\tflags | (uint64_t)(idx + 1) << ID_BITS_OFFSET | tx_pkts[1]->data_len\n+\t};\n+\n+\tuint64x2_t tx_desc1 = {\n+\t\tflags | (uint64_t)(idx + 2) << ID_BITS_OFFSET | tx_pkts[2]->data_len,\n+\t\tflags | (uint64_t)(idx + 3) << ID_BITS_OFFSET | tx_pkts[3]->data_len\n+\t};\n+\n+\tdesc[0].val[1] = tx_desc0;\n+\tdesc[1].val[1] = tx_desc1;\n+\n+\tif (!vq->hw->has_tx_offload) {\n+\t\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\thdr = rte_pktmbuf_mtod_offset(tx_pkts[i],\n+\t\t\t\t\tstruct virtio_net_hdr *, -head_size);\n+\t\t\t/* Clear net hdr. */\n+\t\t\tuint16x8_t v_hdr = vld1q_u16((void *)hdr);\n+\t\t\tvst1q_u16((void *)hdr, vandq_u16(v_hdr, net_hdr_msk));\n+\t\t}\n+\t} else {\n+\t\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\thdr = rte_pktmbuf_mtod_offset(tx_pkts[i],\n+\t\t\t\t\tstruct virtio_net_hdr *, -head_size);\n+\t\t\tvirtqueue_xmit_offload(hdr, tx_pkts[i], true);\n+\t\t}\n+\t}\n+\n+\t/* Enqueue packet buffers. */\n+\tp_desc = &vq->vq_packed.ring.desc[idx];\n+\tvst2q_u64((uint64_t *)p_desc, desc[0]);\n+\tvst2q_u64((uint64_t *)(p_desc + 2), desc[1]);\n+\n+\tvirtio_update_batch_stats(&txvq->stats, tx_pkts[0]->pkt_len,\n+\t\t\ttx_pkts[1]->pkt_len, tx_pkts[2]->pkt_len,\n+\t\t\ttx_pkts[3]->pkt_len);\n+\n+\tvq->vq_avail_idx += PACKED_BATCH_SIZE;\n+\tvq->vq_free_cnt -= PACKED_BATCH_SIZE;\n+\n+\tif (vq->vq_avail_idx >= vq->vq_nentries) {\n+\t\tvq->vq_avail_idx -= vq->vq_nentries;\n+\t\tvq->vq_packed.cached_flags ^=\n+\t\t\tVRING_PACKED_DESC_F_AVAIL_USED;\n+\t}\n+\n+\treturn 0;\n+}\n+\n static inline uint16_t\n virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq,\n \t\t\t\t struct rte_mbuf **rx_pkts)\n", "prefixes": [ "v1", "3/4" ] }{ "id": 84261, "url": "