Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/1472/?format=api
http://patches.dpdk.org/api/patches/1472/?format=api", "web_url": "http://patches.dpdk.org/project/dpdk/patch/1416792142-23132-8-git-send-email-chaozhu@linux.vnet.ibm.com/", "project": { "id": 1, "url": "http://patches.dpdk.org/api/projects/1/?format=api", "name": "DPDK", "link_name": "dpdk", "list_id": "dev.dpdk.org", "list_email": "dev@dpdk.org", "web_url": "http://core.dpdk.org", "scm_url": "git://dpdk.org/dpdk", "webscm_url": "http://git.dpdk.org/dpdk", "list_archive_url": "https://inbox.dpdk.org/dev", "list_archive_url_format": "https://inbox.dpdk.org/dev/{}", "commit_url_format": "" }, "msgid": "<1416792142-23132-8-git-send-email-chaozhu@linux.vnet.ibm.com>", "list_archive_url": "https://inbox.dpdk.org/dev/1416792142-23132-8-git-send-email-chaozhu@linux.vnet.ibm.com", "date": "2014-11-24T01:22:15", "name": "[dpdk-dev,v3,07/14] Add vector memcpy for IBM Power architecture", "commit_ref": null, "pull_url": null, "state": "superseded", "archived": true, "hash": "bde2a34e6ce68b75b4ed8c223e03a758bc226535", "submitter": { "id": 114, "url": "http://patches.dpdk.org/api/people/114/?format=api", "name": "Chao Zhu", "email": "chaozhu@linux.vnet.ibm.com" }, "delegate": null, "mbox": "http://patches.dpdk.org/project/dpdk/patch/1416792142-23132-8-git-send-email-chaozhu@linux.vnet.ibm.com/mbox/", "series": [], "comments": "http://patches.dpdk.org/api/patches/1472/comments/", "check": "pending", "checks": "http://patches.dpdk.org/api/patches/1472/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<dev-bounces@dpdk.org>", "X-Original-To": "patchwork@dpdk.org", "Delivered-To": "patchwork@dpdk.org", "Received": [ "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id 70B308025;\n\tSun, 23 Nov 2014 14:11:46 +0100 (CET)", "from e23smtp08.au.ibm.com (e23smtp08.au.ibm.com [202.81.31.141])\n\tby dpdk.org (Postfix) with ESMTP id BB2187FEA\n\tfor <dev@dpdk.org>; Sun, 23 Nov 2014 14:11:34 +0100 (CET)", "from /spool/local\n\tby e23smtp08.au.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use\n\tOnly! Violators will be prosecuted\n\tfor <dev@dpdk.org> from <chaozhu@linux.vnet.ibm.com>;\n\tSun, 23 Nov 2014 23:22:16 +1000", "from d23dlp03.au.ibm.com (202.81.31.214)\n\tby e23smtp08.au.ibm.com (202.81.31.205) with IBM ESMTP SMTP Gateway:\n\tAuthorized Use Only! Violators will be prosecuted; \n\tSun, 23 Nov 2014 23:22:14 +1000", "from d23relay10.au.ibm.com (d23relay10.au.ibm.com [9.190.26.77])\n\tby d23dlp03.au.ibm.com (Postfix) with ESMTP id B907C3578055\n\tfor <dev@dpdk.org>; Mon, 24 Nov 2014 00:22:13 +1100 (EST)", "from d23av03.au.ibm.com (d23av03.au.ibm.com [9.190.234.97])\n\tby d23relay10.au.ibm.com (8.14.9/8.14.9/NCO v10.0) with ESMTP id\n\tsANDM5YE25559096 for <dev@dpdk.org>; Mon, 24 Nov 2014 00:22:13 +1100", "from d23av03.au.ibm.com (localhost [127.0.0.1])\n\tby d23av03.au.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id\n\tsANDLfui017424 for <dev@dpdk.org>; Mon, 24 Nov 2014 00:21:41 +1100", "from os_controller.crl.ibm.com ([9.186.57.97])\n\tby d23av03.au.ibm.com (8.14.4/8.14.4/NCO v10.0 AVin) with ESMTP id\n\tsANDLYfw017172 for <dev@dpdk.org>; Mon, 24 Nov 2014 00:21:40 +1100" ], "From": "Chao Zhu <chaozhu@linux.vnet.ibm.com>", "To": "dev@dpdk.org", "Date": "Sun, 23 Nov 2014 20:22:15 -0500", "Message-Id": "<1416792142-23132-8-git-send-email-chaozhu@linux.vnet.ibm.com>", "X-Mailer": "git-send-email 1.7.1", "In-Reply-To": "<1416792142-23132-1-git-send-email-chaozhu@linux.vnet.ibm.com>", "References": "<1416792142-23132-1-git-send-email-chaozhu@linux.vnet.ibm.com>", "X-TM-AS-MML": "disable", "X-Content-Scanned": "Fidelis XPS MAILER", "x-cbid": "14112313-0029-0000-0000-000000A742B2", "Subject": "[dpdk-dev] [PATCH v3 07/14] Add vector memcpy for IBM Power\n\tarchitecture", "X-BeenThere": "dev@dpdk.org", "X-Mailman-Version": "2.1.15", "Precedence": "list", "List-Id": "patches and discussions about DPDK <dev.dpdk.org>", "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>", "List-Archive": "<http://dpdk.org/ml/archives/dev/>", "List-Post": "<mailto:dev@dpdk.org>", "List-Help": "<mailto:dev-request@dpdk.org?subject=help>", "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>", "Errors-To": "dev-bounces@dpdk.org", "Sender": "\"dev\" <dev-bounces@dpdk.org>" }, "content": "The SSE based memory copy in DPDK only support x86. This patch adds\naltivec based memory copy functions for IBM Power architecture. This\npatch includes altivec.h which requires GCC version>= 4.8.\n\nSigned-off-by: Chao Zhu <chaozhu@linux.vnet.ibm.com>\n---\n .../common/include/arch/ppc_64/rte_memcpy.h | 224 ++++++++++++++++++++\n 1 files changed, 224 insertions(+), 0 deletions(-)\n create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h", "diff": "diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h\nnew file mode 100644\nindex 0000000..b9b8ddc\n--- /dev/null\n+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h\n@@ -0,0 +1,224 @@\n+/*\n+ * BSD LICENSE\n+ *\n+ * Copyright (C) IBM Corporation 2014.\n+ *\n+ * Redistribution and use in source and binary forms, with or without\n+ * modification, are permitted provided that the following conditions\n+ * are met:\n+ *\n+ * * Redistributions of source code must retain the above copyright\n+ * notice, this list of conditions and the following disclaimer.\n+ * * Redistributions in binary form must reproduce the above copyright\n+ * notice, this list of conditions and the following disclaimer in\n+ * the documentation and/or other materials provided with the\n+ * distribution.\n+ * * Neither the name of IBM Corporation nor the names of its\n+ * contributors may be used to endorse or promote products derived\n+ * from this software without specific prior written permission.\n+ *\n+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ * \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+#ifndef _RTE_MEMCPY_PPC_64_H_\n+#define _RTE_MEMCPY_PPC_64_H_\n+\n+#include <stdint.h>\n+#include <string.h>\n+/*To include altivec.h, GCC version must >= 4.8 */\n+#include <altivec.h>\n+\n+#ifdef __cplusplus\n+extern \"C\" {\n+#endif\n+\n+#include \"generic/rte_memcpy.h\"\n+\n+static inline void\n+rte_mov16(uint8_t *dst, const uint8_t *src)\n+{\n+\tvec_vsx_st(vec_vsx_ld(0, src), 0, dst);\n+}\n+\n+static inline void\n+rte_mov32(uint8_t *dst, const uint8_t *src)\n+{\n+\tvec_vsx_st(vec_vsx_ld(0, src), 0, dst);\n+\tvec_vsx_st(vec_vsx_ld(16, src), 16, dst);\n+}\n+\n+static inline void\n+rte_mov48(uint8_t *dst, const uint8_t *src)\n+{\n+\tvec_vsx_st(vec_vsx_ld(0, src), 0, dst);\n+\tvec_vsx_st(vec_vsx_ld(16, src), 16, dst);\n+\tvec_vsx_st(vec_vsx_ld(32, src), 32, dst);\n+}\n+\n+static inline void\n+rte_mov64(uint8_t *dst, const uint8_t *src)\n+{\n+\tvec_vsx_st(vec_vsx_ld(0, src), 0, dst);\n+\tvec_vsx_st(vec_vsx_ld(16, src), 16, dst);\n+\tvec_vsx_st(vec_vsx_ld(32, src), 32, dst);\n+\tvec_vsx_st(vec_vsx_ld(48, src), 48, dst);\n+}\n+\n+static inline void\n+rte_mov128(uint8_t *dst, const uint8_t *src)\n+{\n+\tvec_vsx_st(vec_vsx_ld(0, src), 0, dst);\n+\tvec_vsx_st(vec_vsx_ld(16, src), 16, dst);\n+\tvec_vsx_st(vec_vsx_ld(32, src), 32, dst);\n+\tvec_vsx_st(vec_vsx_ld(48, src), 48, dst);\n+\tvec_vsx_st(vec_vsx_ld(64, src), 64, dst);\n+\tvec_vsx_st(vec_vsx_ld(80, src), 80, dst);\n+\tvec_vsx_st(vec_vsx_ld(96, src), 96, dst);\n+\tvec_vsx_st(vec_vsx_ld(112, src), 112, dst);\n+}\n+\n+static inline void\n+rte_mov256(uint8_t *dst, const uint8_t *src)\n+{\n+\trte_mov128(dst, src);\n+\trte_mov128(dst + 128, src + 128);\n+}\n+\n+#define rte_memcpy(dst, src, n) \\\n+\t((__builtin_constant_p(n)) ? \\\n+\tmemcpy((dst), (src), (n)) : \\\n+\trte_memcpy_func((dst), (src), (n)))\n+\n+static inline void *\n+rte_memcpy_func(void *dst, const void *src, size_t n)\n+{\n+\tvoid *ret = dst;\n+\n+\t/* We can't copy < 16 bytes using XMM registers so do it manually. */\n+\tif (n < 16) {\n+\t\tif (n & 0x01) {\n+\t\t\t*(uint8_t *)dst = *(const uint8_t *)src;\n+\t\t\tdst = (uint8_t *)dst + 1;\n+\t\t\tsrc = (const uint8_t *)src + 1;\n+\t\t}\n+\t\tif (n & 0x02) {\n+\t\t\t*(uint16_t *)dst = *(const uint16_t *)src;\n+\t\t\tdst = (uint16_t *)dst + 1;\n+\t\t\tsrc = (const uint16_t *)src + 1;\n+\t\t}\n+\t\tif (n & 0x04) {\n+\t\t\t*(uint32_t *)dst = *(const uint32_t *)src;\n+\t\t\tdst = (uint32_t *)dst + 1;\n+\t\t\tsrc = (const uint32_t *)src + 1;\n+\t\t}\n+\t\tif (n & 0x08) {\n+\t\t\t*(uint64_t *)dst = *(const uint64_t *)src;\n+\t\t}\n+\t\treturn ret;\n+\t}\n+\n+\t/* Special fast cases for <= 128 bytes */\n+\tif (n <= 32) {\n+\t\trte_mov16((uint8_t *)dst, (const uint8_t *)src);\n+\t\trte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);\n+\t\treturn ret;\n+\t}\n+\n+\tif (n <= 64) {\n+\t\trte_mov32((uint8_t *)dst, (const uint8_t *)src);\n+\t\trte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);\n+\t\treturn ret;\n+\t}\n+\n+\tif (n <= 128) {\n+\t\trte_mov64((uint8_t *)dst, (const uint8_t *)src);\n+\t\trte_mov64((uint8_t *)dst - 64 + n, (const uint8_t *)src - 64 + n);\n+\t\treturn ret;\n+\t}\n+\n+\t/*\n+\t * For large copies > 128 bytes. This combination of 256, 64 and 16 byte\n+\t * copies was found to be faster than doing 128 and 32 byte copies as\n+\t * well.\n+\t */\n+\tfor ( ; n >= 256; n -= 256) {\n+\t\trte_mov256((uint8_t *)dst, (const uint8_t *)src);\n+\t\tdst = (uint8_t *)dst + 256;\n+\t\tsrc = (const uint8_t *)src + 256;\n+\t}\n+\n+\t/*\n+\t * We split the remaining bytes (which will be less than 256) into\n+\t * 64byte (2^6) chunks.\n+\t * Using incrementing integers in the case labels of a switch statement\n+\t * enourages the compiler to use a jump table. To get incrementing\n+\t * integers, we shift the 2 relevant bits to the LSB position to first\n+\t * get decrementing integers, and then subtract.\n+\t */\n+\tswitch (3 - (n >> 6)) {\n+\tcase 0x00:\n+\t\trte_mov64((uint8_t *)dst, (const uint8_t *)src);\n+\t\tn -= 64;\n+\t\tdst = (uint8_t *)dst + 64;\n+\t\tsrc = (const uint8_t *)src + 64; /* fallthrough */\n+\tcase 0x01:\n+\t\trte_mov64((uint8_t *)dst, (const uint8_t *)src);\n+\t\tn -= 64;\n+\t\tdst = (uint8_t *)dst + 64;\n+\t\tsrc = (const uint8_t *)src + 64; /* fallthrough */\n+\tcase 0x02:\n+\t\trte_mov64((uint8_t *)dst, (const uint8_t *)src);\n+\t\tn -= 64;\n+\t\tdst = (uint8_t *)dst + 64;\n+\t\tsrc = (const uint8_t *)src + 64; /* fallthrough */\n+\tdefault:\n+\t\t;\n+\t}\n+\n+\t/*\n+\t * We split the remaining bytes (which will be less than 64) into\n+\t * 16byte (2^4) chunks, using the same switch structure as above.\n+\t */\n+\tswitch (3 - (n >> 4)) {\n+\tcase 0x00:\n+\t\trte_mov16((uint8_t *)dst, (const uint8_t *)src);\n+\t\tn -= 16;\n+\t\tdst = (uint8_t *)dst + 16;\n+\t\tsrc = (const uint8_t *)src + 16; /* fallthrough */\n+\tcase 0x01:\n+\t\trte_mov16((uint8_t *)dst, (const uint8_t *)src);\n+\t\tn -= 16;\n+\t\tdst = (uint8_t *)dst + 16;\n+\t\tsrc = (const uint8_t *)src + 16; /* fallthrough */\n+\tcase 0x02:\n+\t\trte_mov16((uint8_t *)dst, (const uint8_t *)src);\n+\t\tn -= 16;\n+\t\tdst = (uint8_t *)dst + 16;\n+\t\tsrc = (const uint8_t *)src + 16; /* fallthrough */\n+\tdefault:\n+\t\t;\n+\t}\n+\n+\t/* Copy any remaining bytes, without going beyond end of buffers */\n+\tif (n != 0) {\n+\t\trte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);\n+\t}\n+\treturn ret;\n+}\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif /* _RTE_MEMCPY_PPC_64_H_ */\n+\n", "prefixes": [ "dpdk-dev", "v3", "07/14" ] }{ "id": 1472, "url": "