From patchwork Fri Aug 7 16:28:26 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Ananyev, Konstantin" X-Patchwork-Id: 75314 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 4D69BA04B0; Fri, 7 Aug 2020 18:29:25 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id B40161C0C2; Fri, 7 Aug 2020 18:28:59 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id 861921C0BC for ; Fri, 7 Aug 2020 18:28:58 +0200 (CEST) IronPort-SDR: XBFACAqq8vpD1VmmC/Qv7JixxPipMNY1eYcFZi9u+20WjxJShh8VauX99UTLZ+WKycJpx8DfRF fLW30Oa7FZXw== X-IronPort-AV: E=McAfee;i="6000,8403,9706"; a="141003400" X-IronPort-AV: E=Sophos;i="5.75,446,1589266800"; d="scan'208";a="141003400" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Aug 2020 09:28:58 -0700 IronPort-SDR: F+B6XTHL0Z+4S6QajgiT+avQVQEs1MsWQuYrZ5ynFxlENU/tRVTflYy7wzKbDkrRWT925LmvqH kL1y4bpI+A0Q== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.75,446,1589266800"; d="scan'208";a="323799717" Received: from sivswdev08.ir.intel.com ([10.237.217.47]) by orsmga008.jf.intel.com with ESMTP; 07 Aug 2020 09:28:56 -0700 From: Konstantin Ananyev To: dev@dpdk.org Cc: jerinj@marvell.com, ruifeng.wang@arm.com, vladimir.medvedkin@intel.com, Konstantin Ananyev Date: Fri, 7 Aug 2020 17:28:26 +0100 Message-Id: <20200807162829.11690-5-konstantin.ananyev@intel.com> X-Mailer: git-send-email 2.18.0 In-Reply-To: <20200807162829.11690-1-konstantin.ananyev@intel.com> References: <20200807162829.11690-1-konstantin.ananyev@intel.com> Subject: [dpdk-dev] [PATCH 20.11 4/7] acl: add infrastructure to support AVX512 classify X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add necessary changes to support new AVX512 specific ACL classify algorithm: - changes in meson.build and Makefile to check that build tools (compiler, assembler, etc.) do properly support AVX512. - dummy rte_acl_classify_avx512() for targets where AVX512 implementation couldn't be properly supported. Signed-off-by: Konstantin Ananyev --- config/x86/meson.build | 3 ++- lib/librte_acl/Makefile | 26 ++++++++++++++++++++++ lib/librte_acl/acl.h | 4 ++++ lib/librte_acl/acl_run_avx512.c | 17 ++++++++++++++ lib/librte_acl/meson.build | 39 +++++++++++++++++++++++++++++++++ lib/librte_acl/rte_acl.c | 17 ++++++++++++++ lib/librte_acl/rte_acl.h | 1 + 7 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 lib/librte_acl/acl_run_avx512.c diff --git a/config/x86/meson.build b/config/x86/meson.build index 6ec020ef6..c5626e914 100644 --- a/config/x86/meson.build +++ b/config/x86/meson.build @@ -23,7 +23,8 @@ foreach f:base_flags endforeach optional_flags = ['AES', 'PCLMUL', - 'AVX', 'AVX2', 'AVX512F', + 'AVX', 'AVX2', + 'AVX512F', 'AVX512VL', 'AVX512CD', 'AVX512BW', 'RDRND', 'RDSEED'] foreach f:optional_flags if cc.get_define('__@0@__'.format(f), args: machine_args) == '1' diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile index f4332b044..8bd469c2b 100644 --- a/lib/librte_acl/Makefile +++ b/lib/librte_acl/Makefile @@ -58,6 +58,32 @@ ifeq ($(CC_AVX2_SUPPORT), 1) CFLAGS_rte_acl.o += -DCC_AVX2_SUPPORT endif +# compile AVX512 version if: +# we are building 64-bit binary AND binutils can generate proper code +ifeq ($(CONFIG_RTE_ARCH_X86_64),y) + + BINUTIL_OK=$(shell AS=as; \ + $(RTE_SDK)/buildtools/binutils-avx512-check.sh && \ + echo 1) + ifeq ($(BINUTIL_OK), 1) + + # If the compiler supports AVX512 instructions, + # then add support for AVX512 classify method. + + CC_AVX512_FLAGS=$(shell $(CC) \ + -mavx512f -mavx512vl -mavx512cd -mavx512bw \ + -dM -E - &1 | grep AVX512 | wc -l) + ifeq ($(CC_AVX512_FLAGS), 4) + CFLAGS_acl_run_avx512.o += -mavx512f + CFLAGS_acl_run_avx512.o += -mavx512vl + CFLAGS_acl_run_avx512.o += -mavx512cd + CFLAGS_acl_run_avx512.o += -mavx512bw + SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_avx512.c + CFLAGS_rte_acl.o += -DCC_AVX512_SUPPORT + endif + endif +endif + # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include := rte_acl_osdep.h SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include += rte_acl.h diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h index 39d45a0c2..2022cf253 100644 --- a/lib/librte_acl/acl.h +++ b/lib/librte_acl/acl.h @@ -201,6 +201,10 @@ int rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data, uint32_t *results, uint32_t num, uint32_t categories); +int +rte_acl_classify_avx512(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + int rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data, uint32_t *results, uint32_t num, uint32_t categories); diff --git a/lib/librte_acl/acl_run_avx512.c b/lib/librte_acl/acl_run_avx512.c new file mode 100644 index 000000000..67274989d --- /dev/null +++ b/lib/librte_acl/acl_run_avx512.c @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include "acl_run_sse.h" + +int +rte_acl_classify_avx512(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + if (num >= MAX_SEARCHES_SSE8) + return search_sse_8(ctx, data, results, num, categories); + if (num >= MAX_SEARCHES_SSE4) + return search_sse_4(ctx, data, results, num, categories); + + return rte_acl_classify_scalar(ctx, data, results, num, categories); +} diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build index d1e2c184c..b2fd61cad 100644 --- a/lib/librte_acl/meson.build +++ b/lib/librte_acl/meson.build @@ -27,6 +27,45 @@ if dpdk_conf.has('RTE_ARCH_X86') cflags += '-DCC_AVX2_SUPPORT' endif + # compile AVX512 version if: + # we are building 64-bit binary AND binutils can generate proper code + + if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok.returncode() == 0 + + # compile AVX512 version if either: + # a. we have AVX512 supported in minimum instruction set + # baseline + # b. it's not minimum instruction set, but supported by + # compiler + # + # in former case, just add avx512 C file to files list + # in latter case, compile c file to static lib, using correct + # compiler flags, and then have the .o file from static lib + # linked into main lib. + + if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512F') and \ + dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512VL') and \ + dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512CD') and \ + dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512BW') + + sources += files('acl_run_avx512.c') + cflags += '-DCC_AVX512_SUPPORT' + + elif cc.has_multi_arguments('-mavx512f', '-mavx512vl', + '-mavx512cd', '-mavx512bw') + + avx512_tmplib = static_library('avx512_tmp', + 'acl_run_avx512.c', + dependencies: static_rte_eal, + c_args: cflags + + ['-mavx512f', '-mavx512vl', + '-mavx512cd', '-mavx512bw']) + objs += avx512_tmplib.extract_objects( + 'acl_run_avx512.c') + cflags += '-DCC_AVX512_SUPPORT' + endif + endif + elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64') cflags += '-flax-vector-conversions' sources += files('acl_run_neon.c') diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c index 715b02359..71b4afb08 100644 --- a/lib/librte_acl/rte_acl.c +++ b/lib/librte_acl/rte_acl.c @@ -16,6 +16,22 @@ static struct rte_tailq_elem rte_acl_tailq = { }; EAL_REGISTER_TAILQ(rte_acl_tailq) +#ifndef CC_AVX512_SUPPORT +/* + * If the compiler doesn't support AVX512 instructions, + * then the dummy one would be used instead for AVX512 classify method. + */ +int +rte_acl_classify_avx512(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} +#endif + #ifndef CC_AVX2_SUPPORT /* * If the compiler doesn't support AVX2 instructions, @@ -77,6 +93,7 @@ static const rte_acl_classify_t classify_fns[] = { [RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2, [RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon, [RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec, + [RTE_ACL_CLASSIFY_AVX512] = rte_acl_classify_avx512, }; /* by default, use always available scalar code path. */ diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h index b814423a6..6f39042fc 100644 --- a/lib/librte_acl/rte_acl.h +++ b/lib/librte_acl/rte_acl.h @@ -241,6 +241,7 @@ enum rte_acl_classify_alg { RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */ RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */ RTE_ACL_CLASSIFY_ALTIVEC = 5, /**< requires ALTIVEC support. */ + RTE_ACL_CLASSIFY_AVX512 = 6, /**< requires AVX512 support. */ }; /**