From patchwork Mon Apr 8 18:24:18 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Aaron Conole X-Patchwork-Id: 52437 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id EA7D54CC7; Mon, 8 Apr 2019 20:24:28 +0200 (CEST) Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id CA6382C55 for ; Mon, 8 Apr 2019 20:24:23 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx03.intmail.prod.int.phx2.redhat.com [10.5.11.13]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 31A2E882EF; Mon, 8 Apr 2019 18:24:23 +0000 (UTC) Received: from dhcp-25.97.bos.redhat.com (unknown [10.18.25.61]) by smtp.corp.redhat.com (Postfix) with ESMTP id 9B643665C4; Mon, 8 Apr 2019 18:24:22 +0000 (UTC) From: Aaron Conole To: dev@dpdk.org Cc: Konstantin Ananyev , Jerin Jacob , Gavin Hu Date: Mon, 8 Apr 2019 14:24:18 -0400 Message-Id: <20190408182420.4398-2-aconole@redhat.com> In-Reply-To: <20190408182420.4398-1-aconole@redhat.com> References: <20190408182420.4398-1-aconole@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.13 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Mon, 08 Apr 2019 18:24:23 +0000 (UTC) Subject: [dpdk-dev] [PATCH 1/3] acl: fix arm argument types X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Compiler complains of argument type mismatch, like: ../lib/librte_acl/acl_run_neon.h: In function ‘transition4’: ../lib/librte_acl/acl_run_neon.h:115:2: note: use -flax-vector-conversions to permit conversions between vectors with differing element types or numbers of subparts node_type = vbicq_s32(tr_hi_lo.val[0], index_msk); ^ ../lib/librte_acl/acl_run_neon.h:115:41: error: incompatible type for argument 2 of ‘vbicq_s32’ Signed-off-by: Aaron Conole --- lib/librte_acl/acl_run_neon.h | 46 ++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h index 01b9766d8..4a8e4b681 100644 --- a/lib/librte_acl/acl_run_neon.h +++ b/lib/librte_acl/acl_run_neon.h @@ -112,37 +112,41 @@ transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[]) index_msk = vld1q_u32((const uint32_t *)&neon_acl_const.xmm_index_mask); /* Calc node type and node addr */ - node_type = vbicq_s32(tr_hi_lo.val[0], index_msk); - addr = vandq_s32(tr_hi_lo.val[0], index_msk); + node_type = (uint32x4_t) vbicq_s32(tr_hi_lo.val[0], + (int32x4_t)index_msk); + addr = (uint32x4_t) vandq_s32(tr_hi_lo.val[0], (int32x4_t) index_msk); /* t = 0 */ - t = veorq_s32(node_type, node_type); + t = veorq_s32((int32x4_t)node_type, (int32x4_t)node_type); /* mask for DFA type(0) nodes */ - dfa_msk = vceqq_u32(node_type, t); + dfa_msk = vceqq_u32(node_type, (uint32x4_t)t); - mask = vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input); - in = vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask); + mask = (uint32x4_t) + vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input); + in = (int32x4_t) vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask); /* DFA calculations. */ - r = vshrq_n_u32(in, 30); /* div by 64 */ - mask = vld1q_s32((const int32_t *)&neon_acl_const.range_base); - r = vaddq_u8(r, mask); - t = vshrq_n_u32(in, 24); - r = vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r); - dfa_ofs = vsubq_s32(t, r); + r = (int32x4_t) vshrq_n_u32((uint32x4_t) in, 30); /* div by 64 */ + mask = (uint32x4_t) + vld1q_s32((const int32_t *)&neon_acl_const.range_base); + r = (int32x4_t) vaddq_u8((uint8x16_t)r, (uint8x16_t)mask); + t = (int32x4_t) vshrq_n_u32((uint32x4_t)in, 24); + r = (int32x4_t) vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r); + dfa_ofs = (uint32x4_t) vsubq_s32(t, r); /* QUAD/SINGLE calculations. */ - t = vcgtq_s8(in, tr_hi_lo.val[1]); - t = vabsq_s8(t); - t = vpaddlq_u8(t); - quad_ofs = vpaddlq_u16(t); + t = (int32x4_t) vcgtq_s8((int8x16_t)in, (int8x16_t)tr_hi_lo.val[1]); + t = (int32x4_t) vabsq_s8((int8x16_t)t); + t = (int32x4_t) vpaddlq_u8((uint8x16_t)t); + quad_ofs = vpaddlq_u16((uint16x8_t)t); /* blend DFA and QUAD/SINGLE. */ - t = vbslq_u8(dfa_msk, dfa_ofs, quad_ofs); + t = (int32x4_t) vbslq_u8((uint8x16_t)dfa_msk, (uint8x16_t)dfa_ofs, + (uint8x16_t)quad_ofs); /* calculate address for next transitions */ - addr = vaddq_u32(addr, t); + addr = vaddq_u32(addr, (uint32x4_t)t); /* Fill next transitions */ transitions[0] = trans[vgetq_lane_u32(addr, 0)]; @@ -150,7 +154,7 @@ transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[]) transitions[2] = trans[vgetq_lane_u32(addr, 2)]; transitions[3] = trans[vgetq_lane_u32(addr, 3)]; - return vshrq_n_u32(next_input, CHAR_BIT); + return (int32x4_t) vshrq_n_u32((uint32x4_t)next_input, CHAR_BIT); } /* @@ -179,6 +183,9 @@ search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t **data, acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]); acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]); + memset(&input0, 0, sizeof(input0)); + memset(&input1, 0, sizeof(input1)); + while (flows.started > 0) { /* Gather 4 bytes of input data for each stream. */ input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input0, 0); @@ -240,6 +247,7 @@ search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t **data, /* Check for any matches. */ acl_match_check_x4(0, ctx, parms, &flows, index_array); + memset(&input, 0, sizeof(input)); while (flows.started > 0) { /* Gather 4 bytes of input data for each stream. */ input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, 0); From patchwork Mon Apr 8 18:24:19 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Aaron Conole X-Patchwork-Id: 52438 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id C81254F91; Mon, 8 Apr 2019 20:24:31 +0200 (CEST) Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id 6CD5F2BCE for ; Mon, 8 Apr 2019 20:24:24 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx03.intmail.prod.int.phx2.redhat.com [10.5.11.13]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id D0AA930917AB; Mon, 8 Apr 2019 18:24:23 +0000 (UTC) Received: from dhcp-25.97.bos.redhat.com (unknown [10.18.25.61]) by smtp.corp.redhat.com (Postfix) with ESMTP id 535E56092E; Mon, 8 Apr 2019 18:24:23 +0000 (UTC) From: Aaron Conole To: dev@dpdk.org Cc: Konstantin Ananyev , Jerin Jacob , Gavin Hu Date: Mon, 8 Apr 2019 14:24:19 -0400 Message-Id: <20190408182420.4398-3-aconole@redhat.com> In-Reply-To: <20190408182420.4398-1-aconole@redhat.com> References: <20190408182420.4398-1-aconole@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.13 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.41]); Mon, 08 Apr 2019 18:24:23 +0000 (UTC) Subject: [dpdk-dev] [PATCH 2/3] acl: update the build for multi-arch X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" For the introduction of the meson build, the build file for the ACL library architecture specific files was not ported. This means the compiler didn't know about the optimized versions when building the RTE_ACL library for each architecture. Now hook up the different architecures by checking the architecture build environment and including the right objects. Weak symbols aren't working with this commit but will get fixed to properly select the right runtime version in a future commit. Signed-off-by: Aaron Conole --- lib/librte_acl/meson.build | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build index 2207dbafe..03c19e4e5 100644 --- a/lib/librte_acl/meson.build +++ b/lib/librte_acl/meson.build @@ -27,5 +27,8 @@ if arch_subdir == 'x86' objs += avx2_tmplib.extract_objects('acl_run_avx2.c') cflags += '-DCC_AVX2_SUPPORT' endif - +elif arch_subdir == 'arm' and host_machine.cpu_family().startswith('aarch64') + sources += files('acl_run_neon.c') +elif arch_subdir == 'ppc_64' + sources += files('acl_run_altivec.c') endif From patchwork Mon Apr 8 18:24:20 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Aaron Conole X-Patchwork-Id: 52439 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 2B9815323; Mon, 8 Apr 2019 20:24:34 +0200 (CEST) Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id 1A93E2BCE for ; Mon, 8 Apr 2019 20:24:25 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx03.intmail.prod.int.phx2.redhat.com [10.5.11.13]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 7C3584DB00; Mon, 8 Apr 2019 18:24:24 +0000 (UTC) Received: from dhcp-25.97.bos.redhat.com (unknown [10.18.25.61]) by smtp.corp.redhat.com (Postfix) with ESMTP id F387364051; Mon, 8 Apr 2019 18:24:23 +0000 (UTC) From: Aaron Conole To: dev@dpdk.org Cc: Konstantin Ananyev , Jerin Jacob , Gavin Hu Date: Mon, 8 Apr 2019 14:24:20 -0400 Message-Id: <20190408182420.4398-4-aconole@redhat.com> In-Reply-To: <20190408182420.4398-1-aconole@redhat.com> References: <20190408182420.4398-1-aconole@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.13 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.29]); Mon, 08 Apr 2019 18:24:24 +0000 (UTC) Subject: [dpdk-dev] [PATCH 3/3] acl: adjust the tests X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This makes the tests pass, and also ensures that on platforms where the testing is supported, we can properly test the implementation specific code. One edge case is when we run on x86_64 systems that don't support AVX2, but where the compiler can generate such instructions. That could be an enhancement in the future, but for now at least the tests will pass. Signed-off-by: Aaron Conole --- app/test/test_acl.c | 62 +++++++++++++-------------------- lib/librte_acl/Makefile | 1 + lib/librte_acl/acl_run_notsup.c | 46 ++++++++++++++++++++++++ lib/librte_acl/meson.build | 4 +-- 4 files changed, 73 insertions(+), 40 deletions(-) create mode 100644 lib/librte_acl/acl_run_notsup.c diff --git a/app/test/test_acl.c b/app/test/test_acl.c index b1f75d1bc..c44faa251 100644 --- a/app/test/test_acl.c +++ b/app/test/test_acl.c @@ -408,6 +408,9 @@ test_classify(void) return -1; } + /* Always use the scalar testing for now. */ + rte_acl_set_ctx_classify(acx, RTE_ACL_CLASSIFY_SCALAR); + ret = 0; for (i = 0; i != TEST_CLASSIFY_ITER; i++) { @@ -547,6 +550,7 @@ test_build_ports_range(void) for (i = 0; i != RTE_DIM(test_data); i++) data[i] = (uint8_t *)&test_data[i]; + rte_acl_set_ctx_classify(acx, RTE_ACL_CLASSIFY_SCALAR); for (i = 0; i != RTE_DIM(test_rules); i++) { rte_acl_reset(acx); ret = test_classify_buid(acx, test_rules, i + 1); @@ -911,6 +915,8 @@ test_convert_rules(const char *desc, return -1; } + rte_acl_set_ctx_classify(acx, RTE_ACL_CLASSIFY_SCALAR); + rc = convert_rules(acx, convert, acl_test_rules, RTE_DIM(acl_test_rules)); if (rc != 0) @@ -1352,7 +1358,7 @@ test_invalid_parameters(void) struct rte_acl_param param; struct rte_acl_ctx *acx; struct rte_acl_ipv4vlan_rule rule; - int result; + int i, result; uint32_t layout[RTE_ACL_IPV4VLAN_NUM] = {0}; @@ -1513,45 +1519,25 @@ test_invalid_parameters(void) return -1; } - /* SSE classify test */ - - /* cover zero categories in classify (should not fail) */ - result = rte_acl_classify(acx, NULL, NULL, 0, 0); - if (result != 0) { - printf("Line %i: SSE classify with zero categories " - "failed!\n", __LINE__); - rte_acl_free(acx); - return -1; - } - - /* cover invalid but positive categories in classify */ - result = rte_acl_classify(acx, NULL, NULL, 0, 3); - if (result == 0) { - printf("Line %i: SSE classify with 3 categories " - "should have failed!\n", __LINE__); - rte_acl_free(acx); - return -1; - } - - /* scalar classify test */ + for (i = RTE_ACL_CLASSIFY_DEFAULT; i < RTE_ACL_CLASSIFY_NUM; ++i) { + rte_acl_set_ctx_classify(acx, i); /* set up the classify code */ - /* cover zero categories in classify (should not fail) */ - result = rte_acl_classify_alg(acx, NULL, NULL, 0, 0, - RTE_ACL_CLASSIFY_SCALAR); - if (result != 0) { - printf("Line %i: Scalar classify with zero categories " - "failed!\n", __LINE__); - rte_acl_free(acx); - return -1; - } + /* cover zero categories in classify (should not fail) */ + result = rte_acl_classify(acx, NULL, NULL, 0, 0); + if (result != 0 && result != -ENOTSUP) { + printf("AGL: %d, ACL classify with zero categories failed: %d!\n", + i, result); + return -1; + } - /* cover invalid but positive categories in classify */ - result = rte_acl_classify(acx, NULL, NULL, 0, 3); - if (result == 0) { - printf("Line %i: Scalar classify with 3 categories " - "should have failed!\n", __LINE__); - rte_acl_free(acx); - return -1; + /* cover invalid but positive categories in classify */ + result = rte_acl_classify(acx, NULL, NULL, 0, 3); + /* we don't check for -ENOTSUP here, since it is a failure */ + if (result == 0) { + printf("AGL: %d, ACL classify with 3 categories should fail!\n", + i); + return -1; + } } /* free ACL context */ diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile index ea5edf00a..c5dfdb832 100644 --- a/lib/librte_acl/Makefile +++ b/lib/librte_acl/Makefile @@ -21,6 +21,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += rte_acl.c SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_bld.c SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_gen.c SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_notsup.c ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),) SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c diff --git a/lib/librte_acl/acl_run_notsup.c b/lib/librte_acl/acl_run_notsup.c new file mode 100644 index 000000000..2bcc6e67f --- /dev/null +++ b/lib/librte_acl/acl_run_notsup.c @@ -0,0 +1,46 @@ +#include +#include "acl.h" + +/* + * If the compiler doesn't support AVX2 instructions, + * then the dummy one would be used instead for AVX2 classify method. + */ +int __rte_weak +rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + +int __rte_weak +rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + +int __rte_weak +rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + +int __rte_weak +rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build index 03c19e4e5..fc8689aa9 100644 --- a/lib/librte_acl/meson.build +++ b/lib/librte_acl/meson.build @@ -2,8 +2,8 @@ # Copyright(c) 2017 Intel Corporation version = 2 -sources = files('acl_bld.c', 'acl_gen.c', 'acl_run_scalar.c', - 'rte_acl.c', 'tb_mem.c') +sources = files('acl_bld.c', 'acl_gen.c', 'acl_run_notsup.c', + 'acl_run_scalar.c', 'rte_acl.c', 'tb_mem.c') headers = files('rte_acl.h', 'rte_acl_osdep.h') if arch_subdir == 'x86'