get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/24226/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 24226,
    "url": "https://patches.dpdk.org/api/patches/24226/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/1494494708-20642-6-git-send-email-jianbo.liu@linaro.org/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1494494708-20642-6-git-send-email-jianbo.liu@linaro.org>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1494494708-20642-6-git-send-email-jianbo.liu@linaro.org",
    "date": "2017-05-11T09:25:06",
    "name": "[dpdk-dev,v3,5/7] examples/l3fwd: add neon support for l3fwd",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "72d55b998b5a8685d8faea0fdc1fe20accc444d4",
    "submitter": {
        "id": 380,
        "url": "https://patches.dpdk.org/api/people/380/?format=api",
        "name": "Jianbo Liu",
        "email": "jianbo.liu@linaro.org"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/1494494708-20642-6-git-send-email-jianbo.liu@linaro.org/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/24226/comments/",
    "check": "success",
    "checks": "https://patches.dpdk.org/api/patches/24226/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id EEC5169D8;\n\tThu, 11 May 2017 11:25:37 +0200 (CEST)",
            "from foss.arm.com (foss.arm.com [217.140.101.70])\n\tby dpdk.org (Postfix) with ESMTP id 04DCF282\n\tfor <dev@dpdk.org>; Thu, 11 May 2017 11:25:26 +0200 (CEST)",
            "from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249])\n\tby usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 73F3A80D;\n\tThu, 11 May 2017 02:25:25 -0700 (PDT)",
            "from localhost.localdomain.com (usa-sjc-imap-foss1.foss.arm.com\n\t[10.72.51.249])\n\tby usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id\n\t9EDA63F220; Thu, 11 May 2017 02:25:24 -0700 (PDT)"
        ],
        "From": "Jianbo Liu <jianbo.liu@linaro.org>",
        "To": "dev@dpdk.org, tomasz.kantecki@intel.com, jerin.jacob@caviumnetworks.com, \n\tashwin.sekhar@caviumnetworks.com",
        "Cc": "Jianbo Liu <jianbo.liu@linaro.org>",
        "Date": "Thu, 11 May 2017 17:25:06 +0800",
        "Message-Id": "<1494494708-20642-6-git-send-email-jianbo.liu@linaro.org>",
        "X-Mailer": "git-send-email 1.8.3.1",
        "In-Reply-To": "<1494494708-20642-1-git-send-email-jianbo.liu@linaro.org>",
        "References": "<1493709255-8887-1-git-send-email-jianbo.liu@linaro.org>\n\t<1494494708-20642-1-git-send-email-jianbo.liu@linaro.org>",
        "Subject": "[dpdk-dev] [PATCH v3 5/7] examples/l3fwd: add neon support for l3fwd",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Use ARM NEON intrinsics to accelerate l3 fowarding.\n\nSigned-off-by: Jianbo Liu <jianbo.liu@linaro.org>\n---\n examples/l3fwd/l3fwd_em.c            |   4 +-\n examples/l3fwd/l3fwd_em_hlm.h        |  17 ++-\n examples/l3fwd/l3fwd_em_hlm_neon.h   |  74 ++++++++++\n examples/l3fwd/l3fwd_em_sequential.h |  18 ++-\n examples/l3fwd/l3fwd_lpm.c           |   4 +-\n examples/l3fwd/l3fwd_lpm_neon.h      | 193 ++++++++++++++++++++++++++\n examples/l3fwd/l3fwd_neon.h          | 259 +++++++++++++++++++++++++++++++++++\n 7 files changed, 563 insertions(+), 6 deletions(-)\n create mode 100644 examples/l3fwd/l3fwd_em_hlm_neon.h\n create mode 100644 examples/l3fwd/l3fwd_lpm_neon.h\n create mode 100644 examples/l3fwd/l3fwd_neon.h",
    "diff": "diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c\nindex ba844b2..da96cfd 100644\n--- a/examples/l3fwd/l3fwd_em.c\n+++ b/examples/l3fwd/l3fwd_em.c\n@@ -328,7 +328,7 @@ struct ipv6_l3fwd_em_route {\n \treturn (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);\n }\n \n-#if defined(__SSE4_1__)\n+#if defined(__SSE4_1__) || defined(RTE_MACHINE_CPUFLAG_NEON)\n #if defined(NO_HASH_MULTI_LOOKUP)\n #include \"l3fwd_em_sequential.h\"\n #else\n@@ -709,7 +709,7 @@ struct ipv6_l3fwd_em_route {\n \t\t\tif (nb_rx == 0)\n \t\t\t\tcontinue;\n \n-#if defined(__SSE4_1__)\n+#if defined(__SSE4_1__) || defined(RTE_MACHINE_CPUFLAG_NEON)\n \t\t\tl3fwd_em_send_packets(nb_rx, pkts_burst,\n \t\t\t\t\t\t\tportid, qconf);\n #else\ndiff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h\nindex 636dea4..b9163e3 100644\n--- a/examples/l3fwd/l3fwd_em_hlm.h\n+++ b/examples/l3fwd/l3fwd_em_hlm.h\n@@ -35,8 +35,13 @@\n #ifndef __L3FWD_EM_HLM_H__\n #define __L3FWD_EM_HLM_H__\n \n+#if defined(__SSE4_1__)\n #include \"l3fwd_sse.h\"\n #include \"l3fwd_em_hlm_sse.h\"\n+#elif defined(RTE_MACHINE_CPUFLAG_NEON)\n+#include \"l3fwd_neon.h\"\n+#include \"l3fwd_em_hlm_neon.h\"\n+#endif\n \n static inline __attribute__((always_inline)) void\n em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],\n@@ -238,7 +243,7 @@ static inline __attribute__((always_inline)) uint16_t\n l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,\n \t\tuint8_t portid, struct lcore_conf *qconf)\n {\n-\tint32_t j;\n+\tint32_t i, j, pos;\n \tuint16_t dst_port[MAX_PKT_BURST];\n \n \t/*\n@@ -247,6 +252,11 @@ static inline __attribute__((always_inline)) uint16_t\n \t */\n \tint32_t n = RTE_ALIGN_FLOOR(nb_rx, 8);\n \n+\tfor (j = 0; j < 8 && j < nb_rx; j++) {\n+\t\trte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],\n+\t\t\t\t\t       struct ether_hdr *) + 1);\n+\t}\n+\n \tfor (j = 0; j < n; j += 8) {\n \n \t\tuint32_t pkt_type =\n@@ -263,6 +273,11 @@ static inline __attribute__((always_inline)) uint16_t\n \t\tuint32_t tcp_or_udp = pkt_type &\n \t\t\t(RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);\n \n+\t\tfor (i = 0, pos = j + 8; i < 8 && pos < nb_rx; i++, pos++) {\n+\t\t\trte_prefetch0(rte_pktmbuf_mtod(pkts_burst[pos],\n+\t\t\t\t\t\t       struct ether_hdr *) + 1);\n+\t\t}\n+\n \t\tif (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {\n \n \t\t\tem_get_dst_port_ipv4x8(qconf, &pkts_burst[j], portid,\ndiff --git a/examples/l3fwd/l3fwd_em_hlm_neon.h b/examples/l3fwd/l3fwd_em_hlm_neon.h\nnew file mode 100644\nindex 0000000..dae1acf\n--- /dev/null\n+++ b/examples/l3fwd/l3fwd_em_hlm_neon.h\n@@ -0,0 +1,74 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.\n+ *   Copyright(c) 2017, Linaro Limited\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#ifndef __L3FWD_EM_HLM_NEON_H__\n+#define __L3FWD_EM_HLM_NEON_H__\n+\n+#include <arm_neon.h>\n+\n+static inline void\n+get_ipv4_5tuple(struct rte_mbuf *m0, int32x4_t mask0,\n+\t\tunion ipv4_5tuple_host *key)\n+{\n+\tint32x4_t tmpdata0 = vld1q_s32(rte_pktmbuf_mtod_offset(m0, int32_t *,\n+\t\t\t\tsizeof(struct ether_hdr) +\n+\t\t\t\toffsetof(struct ipv4_hdr, time_to_live)));\n+\n+\tkey->xmm = vandq_s32(tmpdata0, mask0);\n+}\n+\n+static inline void\n+get_ipv6_5tuple(struct rte_mbuf *m0, int32x4_t mask0,\n+\t\tint32x4_t mask1, union ipv6_5tuple_host *key)\n+{\n+\tint32x4_t tmpdata0 = vld1q_s32(\n+\t\t\trte_pktmbuf_mtod_offset(m0, int *,\n+\t\t\t\tsizeof(struct ether_hdr) +\n+\t\t\t\toffsetof(struct ipv6_hdr, payload_len)));\n+\n+\tint32x4_t tmpdata1 = vld1q_s32(\n+\t\t\trte_pktmbuf_mtod_offset(m0, int *,\n+\t\t\t\tsizeof(struct ether_hdr) +\n+\t\t\t\toffsetof(struct ipv6_hdr, payload_len) + 8));\n+\n+\tint32x4_t tmpdata2 = vld1q_s32(\n+\t\t\trte_pktmbuf_mtod_offset(m0, int *,\n+\t\t\t\tsizeof(struct ether_hdr) +\n+\t\t\t\toffsetof(struct ipv6_hdr, payload_len) + 16));\n+\n+\tkey->xmm[0] = vandq_s32(tmpdata0, mask0);\n+\tkey->xmm[1] = tmpdata1;\n+\tkey->xmm[2] = vandq_s32(tmpdata2, mask1);\n+}\n+#endif /* __L3FWD_EM_HLM_NEON_H__ */\ndiff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h\nindex c0a9725..2b3ec16 100644\n--- a/examples/l3fwd/l3fwd_em_sequential.h\n+++ b/examples/l3fwd/l3fwd_em_sequential.h\n@@ -43,7 +43,11 @@\n  * compilation time.\n  */\n \n+#if defined(__SSE4_1__)\n #include \"l3fwd_sse.h\"\n+#elif defined(RTE_MACHINE_CPUFLAG_NEON)\n+#include \"l3fwd_neon.h\"\n+#endif\n \n static inline __attribute__((always_inline)) uint16_t\n em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,\n@@ -101,11 +105,21 @@ static inline __attribute__((always_inline)) uint16_t\n l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,\n \t\t\tuint8_t portid, struct lcore_conf *qconf)\n {\n-\tint32_t j;\n+\tint32_t i, j;\n \tuint16_t dst_port[MAX_PKT_BURST];\n \n-\tfor (j = 0; j < nb_rx; j++)\n+\tif (nb_rx > 0) {\n+\t\trte_prefetch0(rte_pktmbuf_mtod(pkts_burst[0],\n+\t\t\t\t\t       struct ether_hdr *) + 1);\n+\t}\n+\n+\tfor (i = 1, j = 0; j < nb_rx; i++, j++) {\n+\t\tif (i < nb_rx) {\n+\t\t\trte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],\n+\t\t\t\t\t\t       struct ether_hdr *) + 1);\n+\t\t}\n \t\tdst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);\n+\t}\n \n \tsend_packets_multi(qconf, pkts_burst, dst_port, nb_rx);\n }\ndiff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c\nindex fc554fc..ddef250 100644\n--- a/examples/l3fwd/l3fwd_lpm.c\n+++ b/examples/l3fwd/l3fwd_lpm.c\n@@ -189,6 +189,8 @@ static inline __attribute__((always_inline)) uint16_t\n \n #if defined(__SSE4_1__)\n #include \"l3fwd_lpm_sse.h\"\n+#elif defined(RTE_MACHINE_CPUFLAG_NEON)\n+#include \"l3fwd_lpm_neon.h\"\n #else\n #include \"l3fwd_lpm.h\"\n #endif\n@@ -261,7 +263,7 @@ static inline __attribute__((always_inline)) uint16_t\n \t\t\tif (nb_rx == 0)\n \t\t\t\tcontinue;\n \n-#if defined(__SSE4_1__)\n+#if defined(__SSE4_1__) || defined(RTE_MACHINE_CPUFLAG_NEON)\n \t\t\tl3fwd_lpm_send_packets(nb_rx, pkts_burst,\n \t\t\t\t\t\tportid, qconf);\n #else\ndiff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h\nnew file mode 100644\nindex 0000000..baedbfe\n--- /dev/null\n+++ b/examples/l3fwd/l3fwd_lpm_neon.h\n@@ -0,0 +1,193 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.\n+ *   Copyright(c) 2017, Linaro Limited\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#ifndef __L3FWD_LPM_NEON_H__\n+#define __L3FWD_LPM_NEON_H__\n+\n+#include <arm_neon.h>\n+\n+#include \"l3fwd_neon.h\"\n+\n+/*\n+ * Read packet_type and destination IPV4 addresses from 4 mbufs.\n+ */\n+static inline void\n+processx4_step1(struct rte_mbuf *pkt[FWDSTEP],\n+\t\tint32x4_t *dip,\n+\t\tuint32_t *ipv4_flag)\n+{\n+\tstruct ipv4_hdr *ipv4_hdr;\n+\tstruct ether_hdr *eth_hdr;\n+\tint32_t dst[FWDSTEP];\n+\n+\teth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);\n+\tipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);\n+\tdst[0] = ipv4_hdr->dst_addr;\n+\tipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;\n+\n+\teth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);\n+\tipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);\n+\tdst[1] = ipv4_hdr->dst_addr;\n+\tipv4_flag[0] &= pkt[1]->packet_type;\n+\n+\teth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);\n+\tipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);\n+\tdst[2] = ipv4_hdr->dst_addr;\n+\tipv4_flag[0] &= pkt[2]->packet_type;\n+\n+\teth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);\n+\tipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);\n+\tdst[3] = ipv4_hdr->dst_addr;\n+\tipv4_flag[0] &= pkt[3]->packet_type;\n+\n+\tdip[0] = vld1q_s32(dst);\n+}\n+\n+/*\n+ * Lookup into LPM for destination port.\n+ * If lookup fails, use incoming port (portid) as destination port.\n+ */\n+static inline void\n+processx4_step2(const struct lcore_conf *qconf,\n+\t\tint32x4_t dip,\n+\t\tuint32_t ipv4_flag,\n+\t\tuint8_t portid,\n+\t\tstruct rte_mbuf *pkt[FWDSTEP],\n+\t\tuint16_t dprt[FWDSTEP])\n+{\n+\trte_xmm_t dst;\n+\n+\tdip = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(dip)));\n+\n+\t/* if all 4 packets are IPV4. */\n+\tif (likely(ipv4_flag)) {\n+\t\trte_lpm_lookupx4(qconf->ipv4_lookup_struct, dip, dst.u32,\n+\t\t\tportid);\n+\t\t/* get rid of unused upper 16 bit for each dport. */\n+\t\tvst1_s16((int16_t *)dprt, vqmovn_s32(dst.x));\n+\t} else {\n+\t\tdst.x = dip;\n+\t\tdprt[0] = lpm_get_dst_port_with_ipv4(qconf, pkt[0],\n+\t\t\t\t\t\t     dst.u32[0], portid);\n+\t\tdprt[1] = lpm_get_dst_port_with_ipv4(qconf, pkt[1],\n+\t\t\t\t\t\t     dst.u32[1], portid);\n+\t\tdprt[2] = lpm_get_dst_port_with_ipv4(qconf, pkt[2],\n+\t\t\t\t\t\t     dst.u32[2], portid);\n+\t\tdprt[3] = lpm_get_dst_port_with_ipv4(qconf, pkt[3],\n+\t\t\t\t\t\t     dst.u32[3], portid);\n+\t}\n+}\n+\n+/*\n+ * Buffer optimized handling of packets, invoked\n+ * from main_loop.\n+ */\n+static inline void\n+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,\n+\t\t\tuint8_t portid, struct lcore_conf *qconf)\n+{\n+\tint32_t i = 0, j = 0;\n+\tuint16_t dst_port[MAX_PKT_BURST];\n+\tint32x4_t dip;\n+\tuint32_t ipv4_flag;\n+\tconst int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);\n+\tconst int32_t m = nb_rx % FWDSTEP;\n+\n+\tif (k) {\n+\t\tfor (i = 0; i < FWDSTEP; i++) {\n+\t\t\trte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],\n+\t\t\t\t\t\tstruct ether_hdr *) + 1);\n+\t\t}\n+\n+\t\tfor (j = 0; j != k - FWDSTEP; j += FWDSTEP) {\n+\t\t\tfor (i = 0; i < FWDSTEP; i++) {\n+\t\t\t\trte_prefetch0(rte_pktmbuf_mtod(\n+\t\t\t\t\t\tpkts_burst[j + i + FWDSTEP],\n+\t\t\t\t\t\tstruct ether_hdr *) + 1);\n+\t\t\t}\n+\n+\t\t\tprocessx4_step1(&pkts_burst[j], &dip, &ipv4_flag);\n+\t\t\tprocessx4_step2(qconf, dip, ipv4_flag, portid,\n+\t\t\t\t\t&pkts_burst[j], &dst_port[j]);\n+\t\t}\n+\n+\t\tprocessx4_step1(&pkts_burst[j], &dip, &ipv4_flag);\n+\t\tprocessx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j],\n+\t\t\t\t&dst_port[j]);\n+\n+\t\tj += FWDSTEP;\n+\t}\n+\n+\tif (m) {\n+\t\t/* Prefetch last up to 3 packets one by one */\n+\t\tswitch (m) {\n+\t\tcase 3:\n+\t\t\trte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],\n+\t\t\t\t\t\tstruct ether_hdr *) + 1);\n+\t\t\tj++;\n+\t\t\t/* fallthrough */\n+\t\tcase 2:\n+\t\t\trte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],\n+\t\t\t\t\t\tstruct ether_hdr *) + 1);\n+\t\t\tj++;\n+\t\t\t/* fallthrough */\n+\t\tcase 1:\n+\t\t\trte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],\n+\t\t\t\t\t\tstruct ether_hdr *) + 1);\n+\t\t\tj++;\n+\t\t}\n+\n+\t\tj -= m;\n+\t\t/* Classify last up to 3 packets one by one */\n+\t\tswitch (m) {\n+\t\tcase 3:\n+\t\t\tdst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],\n+\t\t\t\t\t\t       portid);\n+\t\t\tj++;\n+\t\t\t/* fallthrough */\n+\t\tcase 2:\n+\t\t\tdst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],\n+\t\t\t\t\t\t       portid);\n+\t\t\tj++;\n+\t\t\t/* fallthrough */\n+\t\tcase 1:\n+\t\t\tdst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j],\n+\t\t\t\t\t\t       portid);\n+\t\t}\n+\t}\n+\n+\tsend_packets_multi(qconf, pkts_burst, dst_port, nb_rx);\n+}\n+\n+#endif /* __L3FWD_LPM_NEON_H__ */\ndiff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h\nnew file mode 100644\nindex 0000000..7a91afc\n--- /dev/null\n+++ b/examples/l3fwd/l3fwd_neon.h\n@@ -0,0 +1,259 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.\n+ *   Copyright(c) 2017, Linaro Limited\n+ *   All rights reserved.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of Intel Corporation nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+\n+#ifndef _L3FWD_NEON_H_\n+#define _L3FWD_NEON_H_\n+\n+#include \"l3fwd.h\"\n+#include \"l3fwd_common.h\"\n+\n+/*\n+ * Update source and destination MAC addresses in the ethernet header.\n+ * Perform RFC1812 checks and updates for IPV4 packets.\n+ */\n+static inline void\n+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])\n+{\n+\tuint32x4_t te[FWDSTEP];\n+\tuint32x4_t ve[FWDSTEP];\n+\tuint32_t *p[FWDSTEP];\n+\n+\tp[0] = rte_pktmbuf_mtod(pkt[0], uint32_t *);\n+\tp[1] = rte_pktmbuf_mtod(pkt[1], uint32_t *);\n+\tp[2] = rte_pktmbuf_mtod(pkt[2], uint32_t *);\n+\tp[3] = rte_pktmbuf_mtod(pkt[3], uint32_t *);\n+\n+\tve[0] = vreinterpretq_u32_s32(val_eth[dst_port[0]]);\n+\tte[0] = vld1q_u32(p[0]);\n+\n+\tve[1] = vreinterpretq_u32_s32(val_eth[dst_port[1]]);\n+\tte[1] = vld1q_u32(p[1]);\n+\n+\tve[2] = vreinterpretq_u32_s32(val_eth[dst_port[2]]);\n+\tte[2] = vld1q_u32(p[2]);\n+\n+\tve[3] = vreinterpretq_u32_s32(val_eth[dst_port[3]]);\n+\tte[3] = vld1q_u32(p[3]);\n+\n+\t/* Update last 4 bytes */\n+\tve[0] = vsetq_lane_u32(vgetq_lane_u32(te[0], 3), ve[0], 3);\n+\tve[1] = vsetq_lane_u32(vgetq_lane_u32(te[1], 3), ve[1], 3);\n+\tve[2] = vsetq_lane_u32(vgetq_lane_u32(te[2], 3), ve[2], 3);\n+\tve[3] = vsetq_lane_u32(vgetq_lane_u32(te[3], 3), ve[3], 3);\n+\n+\tvst1q_u32(p[0], ve[0]);\n+\tvst1q_u32(p[1], ve[1]);\n+\tvst1q_u32(p[2], ve[2]);\n+\tvst1q_u32(p[3], ve[3]);\n+\n+\trfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),\n+\t\t&dst_port[0], pkt[0]->packet_type);\n+\trfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),\n+\t\t&dst_port[1], pkt[1]->packet_type);\n+\trfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),\n+\t\t&dst_port[2], pkt[2]->packet_type);\n+\trfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),\n+\t\t&dst_port[3], pkt[3]->packet_type);\n+}\n+\n+/*\n+ * Group consecutive packets with the same destination port in bursts of 4.\n+ * Suppose we have array of destionation ports:\n+ * dst_port[] = {a, b, c, d,, e, ... }\n+ * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.\n+ * We doing 4 comparisions at once and the result is 4 bit mask.\n+ * This mask is used as an index into prebuild array of pnum values.\n+ */\n+static inline uint16_t *\n+port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1,\n+\t     uint16x8_t dp2)\n+{\n+\tunion {\n+\t\tuint16_t u16[FWDSTEP + 1];\n+\t\tuint64_t u64;\n+\t} *pnum = (void *)pn;\n+\n+\tint32_t v;\n+\tuint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0};\n+\n+\tdp1 = vceqq_u16(dp1, dp2);\n+\tdp1 = vandq_u16(dp1, mask);\n+\tv = vaddvq_u16(dp1);\n+\n+\t/* update last port counter. */\n+\tlp[0] += gptbl[v].lpv;\n+\n+\t/* if dest port value has changed. */\n+\tif (v != GRPMSK) {\n+\t\tpnum->u64 = gptbl[v].pnum;\n+\t\tpnum->u16[FWDSTEP] = 1;\n+\t\tlp = pnum->u16 + gptbl[v].idx;\n+\t}\n+\n+\treturn lp;\n+}\n+\n+/**\n+ * Process one packet:\n+ * Update source and destination MAC addresses in the ethernet header.\n+ * Perform RFC1812 checks and updates for IPV4 packets.\n+ */\n+static inline void\n+process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)\n+{\n+\tstruct ether_hdr *eth_hdr;\n+\tuint32x4_t te, ve;\n+\n+\teth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);\n+\n+\tte = vld1q_u32((uint32_t *)eth_hdr);\n+\tve = vreinterpretq_u32_s32(val_eth[dst_port[0]]);\n+\n+\n+\trfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port,\n+\t\t\tpkt->packet_type);\n+\n+\tve = vcopyq_lane_u32(ve, 3, te, 3);\n+\tvst1q_u32((uint32_t *)eth_hdr, ve);\n+}\n+\n+/**\n+ * Send packets burst from pkts_burst to the ports in dst_port array\n+ */\n+static inline __attribute__((always_inline)) void\n+send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,\n+\t\tuint16_t dst_port[MAX_PKT_BURST], int nb_rx)\n+{\n+\tint32_t k;\n+\tint j = 0;\n+\tuint16_t dlp;\n+\tuint16_t *lp;\n+\tuint16_t pnum[MAX_PKT_BURST + 1];\n+\n+\t/*\n+\t * Finish packet processing and group consecutive\n+\t * packets with the same destination port.\n+\t */\n+\tk = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);\n+\tif (k != 0) {\n+\t\tuint16x8_t dp1, dp2;\n+\n+\t\tlp = pnum;\n+\t\tlp[0] = 1;\n+\n+\t\tprocessx4_step3(pkts_burst, dst_port);\n+\n+\t\t/* dp1: <d[0], d[1], d[2], d[3], ... > */\n+\t\tdp1 = vld1q_u16(dst_port);\n+\n+\t\tfor (j = FWDSTEP; j != k; j += FWDSTEP) {\n+\t\t\tprocessx4_step3(&pkts_burst[j], &dst_port[j]);\n+\n+\t\t\t/*\n+\t\t\t * dp2:\n+\t\t\t * <d[j-3], d[j-2], d[j-1], d[j], ... >\n+\t\t\t */\n+\t\t\tdp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]);\n+\t\t\tlp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);\n+\n+\t\t\t/*\n+\t\t\t * dp1:\n+\t\t\t * <d[j], d[j+1], d[j+2], d[j+3], ... >\n+\t\t\t */\n+\t\t\tdp1 = vextq_u16(dp1, dp1, FWDSTEP - 1);\n+\t\t}\n+\n+\t\t/*\n+\t\t * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >\n+\t\t */\n+\t\tdp2 = vextq_u16(dp1, dp1, 1);\n+\t\tdp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3);\n+\t\tlp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);\n+\n+\t\t/*\n+\t\t * remove values added by the last repeated\n+\t\t * dst port.\n+\t\t */\n+\t\tlp[0]--;\n+\t\tdlp = dst_port[j - 1];\n+\t} else {\n+\t\t/* set dlp and lp to the never used values. */\n+\t\tdlp = BAD_PORT - 1;\n+\t\tlp = pnum + MAX_PKT_BURST;\n+\t}\n+\n+\t/* Process up to last 3 packets one by one. */\n+\tswitch (nb_rx % FWDSTEP) {\n+\tcase 3:\n+\t\tprocess_packet(pkts_burst[j], dst_port + j);\n+\t\tGROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);\n+\t\tj++;\n+\t\t/* fallthrough */\n+\tcase 2:\n+\t\tprocess_packet(pkts_burst[j], dst_port + j);\n+\t\tGROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);\n+\t\tj++;\n+\t\t/* fallthrough */\n+\tcase 1:\n+\t\tprocess_packet(pkts_burst[j], dst_port + j);\n+\t\tGROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);\n+\t\tj++;\n+\t}\n+\n+\t/*\n+\t * Send packets out, through destination port.\n+\t * Consecutive packets with the same destination port\n+\t * are already grouped together.\n+\t * If destination port for the packet equals BAD_PORT,\n+\t * then free the packet without sending it out.\n+\t */\n+\tfor (j = 0; j < nb_rx; j += k) {\n+\n+\t\tint32_t m;\n+\t\tuint16_t pn;\n+\n+\t\tpn = dst_port[j];\n+\t\tk = pnum[j];\n+\n+\t\tif (likely(pn != BAD_PORT))\n+\t\t\tsend_packetsx4(qconf, pn, pkts_burst + j, k);\n+\t\telse\n+\t\t\tfor (m = j; m != j + k; m++)\n+\t\t\t\trte_pktmbuf_free(pkts_burst[m]);\n+\n+\t}\n+}\n+\n+#endif /* _L3FWD_NEON_H_ */\n",
    "prefixes": [
        "dpdk-dev",
        "v3",
        "5/7"
    ]
}