From patchwork Thu Mar 6 19:08:33 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andre Muezerie X-Patchwork-Id: 152272 X-Patchwork-Delegate: david.marchand@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id A13D04635D; Thu, 6 Mar 2025 20:08:46 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 41AF540DCB; Thu, 6 Mar 2025 20:08:42 +0100 (CET) Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by mails.dpdk.org (Postfix) with ESMTP id 8B03440B94 for ; Thu, 6 Mar 2025 20:08:39 +0100 (CET) Received: by linux.microsoft.com (Postfix, from userid 1213) id C31EC211049E; Thu, 6 Mar 2025 11:08:38 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com C31EC211049E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1741288118; bh=MyKJVfms2OfTIx55C6iZwuiWEeC3r/l0Pg0ht9FwdNM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=kQHZdueMRqJQof9fjmWH39hVtbVDXTEuBZ/1tQFrzH0uX6B7T2FrY1rqCZsgotRnR QcZnEfqi6qKYcc6BxsBwQj/YQAQTEzQFnFjwGW3HpPJNjxSzeg//eAr8QMOiNI50MG sHLVyypupx3G+UxUKJPVGjYiW3GMB/7XbPDfpIgc= From: Andre Muezerie To: andremue@linux.microsoft.com Cc: dev@dpdk.org Subject: [PATCH v3 1/2] config: allow AVX512 instructions to be used with MSVC Date: Thu, 6 Mar 2025 11:08:33 -0800 Message-Id: <1741288114-15179-2-git-send-email-andremue@linux.microsoft.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1741288114-15179-1-git-send-email-andremue@linux.microsoft.com> References: <1740707537-10517-1-git-send-email-andremue@linux.microsoft.com> <1741288114-15179-1-git-send-email-andremue@linux.microsoft.com> X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Up to now MSVC has being used with the default mode, which uses SSE2 instructions for scalar floating-point and vector calculations. https://learn.microsoft.com/en-us/cpp/build/reference/arch-x64?view=msvc-170 This patch allows users to specify the CPU for which the generated code should be optimized for in the same way it's done for GCC: by passing the CPU name. When no explicit CPU name is passed, 'native' is assumed (like it happens with GCC) and the code will be optimized for the same CPU type used to compile the code. MSVC does not provide this functionality natively, so logic was added to a new meson.build file under config/x86/msvc to handle these differences, detecting which instruction sets are supported by the CPU(s), passing the best options to MSVC and setting the correct macros (like __AVX512F__) so that the DPDK code can rely on them like it is done with GCC. Signed-off-by: Andre Muezerie --- config/x86/meson.build | 1 + config/x86/msvc/meson.build | 287 ++++++++++++++++++++++++++++++++++++ 2 files changed, 288 insertions(+) create mode 100644 config/x86/msvc/meson.build diff --git a/config/x86/meson.build b/config/x86/meson.build index c3564b0011..ab21ebf396 100644 --- a/config/x86/meson.build +++ b/config/x86/meson.build @@ -49,6 +49,7 @@ endif dpdk_conf.set('RTE_MAX_NUMA_NODES', 32) if is_ms_compiler + subdir('msvc') subdir_done() endif diff --git a/config/x86/msvc/meson.build b/config/x86/msvc/meson.build new file mode 100644 index 0000000000..646c9a8515 --- /dev/null +++ b/config/x86/msvc/meson.build @@ -0,0 +1,287 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2025 Microsoft Corporation + +cc_avx2_flags = ['/arch:AVX2'] +cc_avx512_flags = ['/arch:AVX512'] +cc_has_avx512 = true + +cpuid_code = ''' + #include + #include + #include + + uint32_t f1_ECX = 0; + uint32_t f1_EDX = 0; + uint32_t f7_EBX = 0; + uint32_t f7_ECX = 0; + + void get_support_flags() + { + int ids_max; + int data[4]; + + /* + * Calling __cpuid with 0x0 as the function_id argument + * gets the number of the highest valid function ID. + */ + __cpuid(data, 0); + ids_max = data[0]; + + if (1 <= ids_max) { + __cpuidex(data, 1, 0); + f1_ECX = data[2]; + f1_EDX = data[3]; + + if (7 <= ids_max) { + __cpuidex(data, 7, 0); + f7_EBX = data[1]; + f7_ECX = data[2]; + } + } + } + + int get_instruction_support() + { + get_support_flags(); + + #ifdef SSE3 + return (f1_ECX & (1UL << 0)) ? 1 : 0; + #endif + #ifdef PCLMUL + return (f1_ECX & (1UL << 1)) ? 1 : 0; + #endif + #ifdef SSSE3 + return (f1_ECX & (1UL << 9)) ? 1 : 0; + #endif + #ifdef SSE4_1 + return (f1_ECX & (1UL << 19)) ? 1 : 0; + #endif + #ifdef SSE4_2 + return (f1_ECX & (1UL << 20)) ? 1 : 0; + #endif + #ifdef AES + return (f1_ECX & (1UL << 25)) ? 1 : 0; + #endif + #ifdef AVX + return (f1_ECX & (1UL << 28)) ? 1 : 0; + #endif + #ifdef RDRND + return (f1_ECX & (1UL << 30)) ? 1 : 0; + #endif + #ifdef SSE + return (f1_EDX & (1UL << 25)) ? 1 : 0; + #endif + #ifdef SSE2 + return (f1_EDX & (1UL << 26)) ? 1 : 0; + #endif + #ifdef AVX2 + return (f7_EBX & (1UL << 5)) ? 1 : 0; + #endif + #ifdef AVX512F + return (f7_EBX & (1UL << 16)) ? 1 : 0; + #endif + #ifdef AVX512DQ + return (f7_EBX & (1UL << 17)) ? 1 : 0; + #endif + #ifdef RDSEED + return (f7_EBX & (1UL << 18)) ? 1 : 0; + #endif + #ifdef AVX512IFMA + return (f7_EBX & (1UL << 21)) ? 1 : 0; + #endif + #ifdef AVX512CD + return (f7_EBX & (1UL << 28)) ? 1 : 0; + #endif + #ifdef AVX512BW + return (f7_EBX & (1UL << 30)) ? 1 : 0; + #endif + #ifdef AVX512VL + return (f7_EBX & (1UL << 31)) ? 1 : 0; + #endif + #ifdef GFNI + return (f7_ECX & (1UL << 8)) ? 1 : 0; + #endif + #ifdef VPCLMULQDQ + return (f7_ECX & (1UL << 10)) ? 1 : 0; + #endif + + return -1; + } + + int main(int argc, char *argv[]) + { + int res = get_instruction_support(); + if (res == -1) { + printf("Unknown instruction set"); + return -1; + } + printf("%d", res); + + return 0; + } +''' + +# The data in the table below can be found here: +# https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html +# A tool to easily update this table can be found under devtools/dump-cpu-flags. +# The table only contains CPUs that have SSE4.2, as this instruction set is required by DPDK. +# That means that in addition to the instruction sets mentioned in the table, all these CPUs +# also have ['SSE', 'SSE2', 'SSE3', 'SSEE3', 'SSE4_1', 'SSE4_2'] +cpu_type_to_flags = { + 'x86-64-v2': [], + 'x86-64-v3': ['AVX', 'AVX2'], + 'x86-64-v4': ['AVX', 'AVX2', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD'], + 'nehalem': [], + 'corei7': [], + 'westmere': ['PCLMUL'], + 'sandybridge': ['AVX', 'PCLMUL'], + 'corei7-avx': ['AVX', 'PCLMUL'], + 'ivybridge': ['AVX', 'PCLMUL', 'RDRND'], + 'core-avx-i': ['AVX', 'PCLMUL', 'RDRND'], + 'haswell': ['AVX', 'PCLMUL', 'RDRND', 'AVX2'], + 'core-avx2': ['AVX', 'PCLMUL', 'RDRND', 'AVX2'], + 'broadwell': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED'], + 'skylake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES'], + 'skylake-avx512': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD'], + 'cascadelake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD'], + 'cannonlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA'], + 'cooperlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD'], + 'icelake-client': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'icelake-server': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'tigerlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'rocketlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'alderlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'raptorlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'meteorlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'gracemont': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'arrowlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'arrowlake-s': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'lunarlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'pantherlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'sapphirerapids': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'emeraldrapids': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'graniterapids': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'graniterapids-d': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'diamondrapids': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'silvermont': ['PCLMUL', 'RDRND'], + 'slm': ['PCLMUL', 'RDRND'], + 'goldmont': ['PCLMUL', 'RDRND', 'RDSEED', 'AES'], + 'goldmont-plus': ['PCLMUL', 'RDRND', 'RDSEED', 'AES'], + 'tremont': ['PCLMUL', 'RDRND', 'RDSEED', 'AES', 'GFNI'], + 'sierraforest': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'grandridge': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], +'clearwaterforest': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], + 'bdver1': ['AVX', 'PCLMUL', 'AES'], + 'bdver2': ['AVX', 'PCLMUL', 'AES'], + 'bdver3': ['AVX', 'PCLMUL', 'AES'], + 'bdver4': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'AES'], + 'znver1': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES'], + 'znver2': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES'], + 'znver3': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ'], + 'znver4': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'znver5': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD', 'AVX512IFMA', 'GFNI'], + 'btver2': ['AVX', 'PCLMUL', 'AES'], + 'lujiazui': ['PCLMUL', 'RDRND', 'RDSEED', 'AES'], + 'yongfeng': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES'], + 'shijidadao': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES'], +} + +# Determine cpu_flags for a given configuration. +# SSE instructions up to 4.2 are required for DPDK. +cpu_flags = ['SSE', 'SSE2', 'SSE3', 'SSEE3', 'SSE4_1', 'SSE4_2'] + +message('cpu_instruction_set: @0@'.format(cpu_instruction_set)) + +if cpu_instruction_set == '' + # Nothing to do as cpu_flags already holds all the required flags. +elif cpu_instruction_set == 'native' + # MSVC behaves differently than GCC regarding supported instruction sets. + # While GCC will create macros like __AVX512F__ when such instruction set is + # supported by the current CPU, MSVC does not do that. MSVC will create that + # macro when parameter /arch:AVX512 is passed to the compiler, even when the + # CPU does not have that instruction set (by design). So there's a need to + # look at CPUID flags to figure out what is really supported by the CPU, so + # that the correct /arch value can be passed to the compiler. + # The macros also need to be explicitly defined, as /arch will not create all + # macros GCC creates under the same conditions. + # As an example, /arch:AVX512 creates __AVX512BW__, but does not create __SSE2__. + # More details available here: + # https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros + + optional_flags = [ + 'PCLMUL', + 'AES', + 'AVX', + 'RDRND', + 'AVX2', + 'AVX512F', + 'AVX512BW', + 'AVX512DQ', + 'AVX512VL', + 'AVX512CD', + 'AVX512IFMA', + 'GFNI', + 'RDSEED', + 'VPCLMULQDQ', + ] + foreach f:optional_flags + result = cc.run(cpuid_code, args: '-D@0@'.format(f), + name: 'instruction set @0@'.format(f)) + has_instr_set = result.returncode() == 0 and result.stdout() == '1' + if has_instr_set + cpu_flags += f + endif + message('Target has @0@: @1@'.format(f, has_instr_set)) + endforeach +else + # An explicit cpu_instruction_set was provided. Get cpu_flags + # from cpu_type_to_flags table. + if cpu_instruction_set not in cpu_type_to_flags + error('CPU not known or not supported. Please update the table with known CPUs if needed.') + endif + cpu_flags += cpu_type_to_flags[cpu_instruction_set] +endif + +# Now that all cpu_flags are known, set compile_time_cpuflags and also +# machine_args to ensure that the instruction set #defines (like __SSE2__) +# are always present in the preprocessor. +message('cpu_flags: @0@'.format(cpu_flags)) + +foreach flag:cpu_flags + machine_args += '/D__@0@__'.format(flag) + if flag == 'PCLMUL' + flag = 'PCLMULQDQ' + elif flag == 'RDRND' + flag = 'RDRAND' + endif + compile_time_cpuflags += ['RTE_CPUFLAG_' + flag] +endforeach + +# Per https://learn.microsoft.com/en-us/cpp/build/reference/arch-x64?view=msvc-170 +# option '/arch:AVX512' enables all five flags used in the expression below. +target_has_avx512 = ('AVX512F' in cpu_flags and + 'AVX512BW' in cpu_flags and + 'AVX512DQ' in cpu_flags and + 'AVX512CD' in cpu_flags and + 'AVX512VL' in cpu_flags) + +# Decide which instruction sets should be used by the compiler. +# With MSVC, intrinsic functions are always enabled. However, for the +# compiler to use an extended instruction set for automatically +# generated code "/arch" needs to be passed. So we instruct the compiler +# to use the largest set that is supported by the CPU. It is implied that +# smaller sets than the largest selected are included, as described here: +# https://learn.microsoft.com/en-us/cpp/build/reference/arch-x64?view=msvc-170 +if 'RTE_CPUFLAG_AVX512F' in compile_time_cpuflags + machine_args += ['/arch:AVX512'] +elif 'RTE_CPUFLAG_AVX2' in compile_time_cpuflags + machine_args += ['/arch:AVX2'] +elif 'RTE_CPUFLAG_AVX' in compile_time_cpuflags + machine_args += ['/arch:AVX'] +else + # SSE4.2 is expected to always be available + machine_args += ['/arch:SSE4.2'] +endif + +message('machine_args: @0@'.format(machine_args)) +message('compile_time_cpuflags: @0@'.format(compile_time_cpuflags)) From patchwork Thu Mar 6 19:08:34 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Andre Muezerie X-Patchwork-Id: 152273 X-Patchwork-Delegate: david.marchand@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 80FDD4635D; Thu, 6 Mar 2025 20:08:53 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 475C540DD2; Thu, 6 Mar 2025 20:08:43 +0100 (CET) Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by mails.dpdk.org (Postfix) with ESMTP id A192540BA4 for ; Thu, 6 Mar 2025 20:08:39 +0100 (CET) Received: by linux.microsoft.com (Postfix, from userid 1213) id CF899211049F; Thu, 6 Mar 2025 11:08:38 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com CF899211049F DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1741288118; bh=ZiWTeS26a+HZHgs+4ksaMudS3mPKwePPnU9rNdpZBwM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=FzgWh7K3unbX8amGj7eQ64hetpgdC0l0SlKBwiGCC8a2llW86yAucZcrB6a+Xpsjo nks4WMsFSIfS8EdjU6XP+OWNI7jyS57yyuRBJk9eSyu/kUXbqEkJi3+ORHpvq4sHhm ou93PArHKZb16caHCuTXUgcy9rJQfKsycjlf4mcI= From: Andre Muezerie To: andremue@linux.microsoft.com Cc: dev@dpdk.org Subject: [PATCH v3 2/2] devtools/dump-cpu-flags: add tool to update CPU flags table Date: Thu, 6 Mar 2025 11:08:34 -0800 Message-Id: <1741288114-15179-3-git-send-email-andremue@linux.microsoft.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1741288114-15179-1-git-send-email-andremue@linux.microsoft.com> References: <1740707537-10517-1-git-send-email-andremue@linux.microsoft.com> <1741288114-15179-1-git-send-email-andremue@linux.microsoft.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org This patchset allows users to specify the CPU for which the generated code should be optimized for by passing the CPU name. MSVC does not provide this functionality natively, so logic was added. This additional logic relies on a table which stores instruction set availability (like AXV512F) for different CPUs. To make it easier to update this table a new devtool is introduced with this patch. The new tool generates the table entries for all CPUs listed in an input file using a recent version of the compiler, which has all the information needed. This reduces enormously the amount of work needed to update the table in msvc/meson.build and makes the process much less error prone. Signed-off-by: Andre Muezerie --- devtools/dump-cpu-flags/README.md | 25 +++++ devtools/dump-cpu-flags/cpu-names.txt | 120 +++++++++++++++++++++ devtools/dump-cpu-flags/dump-cpu-flags.cpp | 119 ++++++++++++++++++++ devtools/dump-cpu-flags/dump-cpu-flags.py | 41 +++++++ 4 files changed, 305 insertions(+) create mode 100644 devtools/dump-cpu-flags/README.md create mode 100644 devtools/dump-cpu-flags/cpu-names.txt create mode 100644 devtools/dump-cpu-flags/dump-cpu-flags.cpp create mode 100644 devtools/dump-cpu-flags/dump-cpu-flags.py diff --git a/devtools/dump-cpu-flags/README.md b/devtools/dump-cpu-flags/README.md new file mode 100644 index 0000000000..3db69f9f8f --- /dev/null +++ b/devtools/dump-cpu-flags/README.md @@ -0,0 +1,25 @@ +# Generating updated CPU flags + +File `config\x86\msvc\meson.build` has a table with flags indicating instruction set support for a variety of CPU types. + +Script `dump-cpu-flags.py` can be used to generate updated entries for this table. + +The CPU names are stored in file `cpu-names.txt`, which is consumed by `dump-cpu-flags.py`. The formatting used in that file is described at the top of the file itself. + +The script relies on the information embedded in the g++ compiler. This means that an updated table can automatically be generated by switching to a newer version of the compiler. This avoids the need to manually edit the entries, which is error prone. With the script the table entries can just copied and pasted into `meson.build`. The only thing that might need to be done is adding new CPU names to cpu-names.txt, when new CPUs are released. + +**NOTE**: CPUs not known to the compiler will result in errors, which can be ignored (`dump-cpu-flags.py` will ignore these errors and continue). For best results use the latest g++ compiler available. + +Below is a sample output, where an error was logged because the compiler did not know about a CPU named ‘raptorlake’. + +```sh +$ ./dump-cpu-flags.py + 'x86-64-v2': [], + 'x86-64-v3': ['AVX', 'AVX2'], + 'x86-64-v4': ['AVX', 'AVX2', 'AVX512F', 'AVX512VL', 'AVX512BW', 'AVX512DQ', 'AVX512CD'], + 'alderlake': ['AVX', 'PCLMUL', 'RDRND', 'AVX2', 'RDSEED', 'AES', 'VPCLMULQDQ', 'GFNI'], +cc1plus: error: bad value (‘raptorlake’) for ‘-march=’ switch +cc1plus: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem corei7 westmere sandybridge... + 'silvermont': ['PCLMUL', 'RDRND'], + 'slm': ['PCLMUL', 'RDRND'], +``` \ No newline at end of file diff --git a/devtools/dump-cpu-flags/cpu-names.txt b/devtools/dump-cpu-flags/cpu-names.txt new file mode 100644 index 0000000000..5ceaf05c0d --- /dev/null +++ b/devtools/dump-cpu-flags/cpu-names.txt @@ -0,0 +1,120 @@ +# This file is consumed by dump-cpu-flags.py. It should contain CPU names, +# one per line. When the given CPU has a 32 bit architecture, it must be +# indicated so by appending ", 32" to the line. +# Always use the latest compiler available, otherwise it might not know +# about some CPUs listed here. +# The latest CPU names can be obtained from: +# https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html +# + +x86-64 +x86-64-v2 +x86-64-v3 +x86-64-v4 +i386, 32 +i486, 32 +i586, 32 +pentium, 32 +lakemont, 32 +pentium-mmx, 32 +pentiumpro, 32 +i686, 32 +pentium2, 32 +pentium3, 32 +pentium3m, 32 +pentium-m, 32 +pentium4, 32 +pentium4m, 32 +prescott, 32 +nocona +core2 +nehalem +corei7 +westmere +sandybridge +corei7-avx +ivybridge +core-avx-i +haswell +core-avx2 +broadwell +skylake +skylake-avx512 +cascadelake +cannonlake +cooperlake +icelake-client +icelake-server +tigerlake +rocketlake +alderlake +raptorlake, +meteorlake, +gracemont +arrowlake +arrowlake-s +lunarlake +pantherlake +sapphirerapids +emeraldrapids +graniterapids +graniterapids-d +diamondrapids +bonnell +atom +silvermont +slm +goldmont +goldmont-plus +tremont +sierraforest +grandridge +clearwaterforest +k6, 32 +k6-2, 32 +k6-3, 32 +athlon, 32 +athlon-tbird, 32 +athlon-4, 32 +athlon-xp, 32 +athlon-mp, 32 +k8 +opteron +athlon64 +athlon-fx +k8-sse3 +opteron-sse3 +athlon64-sse3 +amdfam10 +barcelona +bdver1 +bdver2 +bdver3 +bdver4 +znver1 +znver2 +znver3 +znver4 +znver5 +btver1 +btver2 +winchip-c6, 32 +winchip2, 32 +c3, 32 +c3-2, 32 +c7, 32 +samuel-2, 32 +nehemiah, 32 +esther, 32 +eden-x2 +eden-x4 +nano +nano-1000 +nano-2000 +nano-3000 +nano-x2 +nano-x4 +lujiazui +yongfeng +shijidadao +geode, 32 diff --git a/devtools/dump-cpu-flags/dump-cpu-flags.cpp b/devtools/dump-cpu-flags/dump-cpu-flags.cpp new file mode 100644 index 0000000000..3bd89c29e0 --- /dev/null +++ b/devtools/dump-cpu-flags/dump-cpu-flags.cpp @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2025 Microsoft Corporation + */ + +#include +#include +#include +#include + +enum option { + FILTER_OMIT_SSE_SETS = 1, +}; + +std::vector get_cpu_flags(option options) +{ + std::vector cpu_flags; + + if (!(options & FILTER_OMIT_SSE_SETS)) { +#ifdef __SSE__ + cpu_flags.push_back("SSE"); +#endif +#ifdef __SSE2__ + cpu_flags.push_back("SSE2"); +#endif +#ifdef __SSE3__ + cpu_flags.push_back("SSE3"); +#endif +#ifdef __SSSE3__ + cpu_flags.push_back("SSEE3"); +#endif +#ifdef __SSE4_1__ + cpu_flags.push_back("SSE4_1"); +#endif +#ifdef __SSE4_2__ + cpu_flags.push_back("SSE4_2"); +#endif + } + +#ifdef __AVX__ + cpu_flags.push_back("AVX"); +#endif +#ifdef __PCLMUL__ + cpu_flags.push_back("PCLMUL"); +#endif +#ifdef __RDRND__ + cpu_flags.push_back("RDRND"); +#endif +#ifdef __AVX2__ + cpu_flags.push_back("AVX2"); +#endif +#ifdef __RDSEED__ + cpu_flags.push_back("RDSEED"); +#endif +#ifdef __AES__ + cpu_flags.push_back("AES"); +#endif +#ifdef __VPCLMULQDQ__ + cpu_flags.push_back("VPCLMULQDQ"); +#endif +#ifdef __AVX512F__ + cpu_flags.push_back("AVX512F"); +#endif +#ifdef __AVX512VL__ + cpu_flags.push_back("AVX512VL"); +#endif +#ifdef __AVX512BW__ + cpu_flags.push_back("AVX512BW"); +#endif +#ifdef __AVX512DQ__ + cpu_flags.push_back("AVX512DQ"); +#endif +#ifdef __AVX512CD__ + cpu_flags.push_back("AVX512CD"); +#endif +#ifdef __AVX512IFMA__ + cpu_flags.push_back("AVX512IFMA"); +#endif +#ifdef __GFNI__ + cpu_flags.push_back("GFNI"); +#endif + return cpu_flags; +} + +void dump_cpu_flags(const std::string &cpu_name, const std::vector &cpu_flags) +{ + std::string cpu_name_quoted = std::string("'") + cpu_name + "'"; + std::cout << std::setw(18) << cpu_name_quoted << ": ["; + for (size_t i = 0; i < cpu_flags.size(); ++i) { + if (i > 0) + std::cout << ", "; + + std::cout << "'" << cpu_flags[i] << "'"; + } + std::cout << "],\n"; +} + +bool does_cpu_meet_dpdk_requirements() +{ +#ifdef __SSE4_2__ + return true; +#endif + + return false; +} + +int main(int argc, char *argv[]) +{ + if (argc < 2) { + std::cout << "Usage: " << argv[0] << " \n"; + return -1; + } + + if (does_cpu_meet_dpdk_requirements()) { + std::vector cpu_flags = get_cpu_flags(FILTER_OMIT_SSE_SETS); + dump_cpu_flags(argv[1], cpu_flags); + } + + return 0; +} diff --git a/devtools/dump-cpu-flags/dump-cpu-flags.py b/devtools/dump-cpu-flags/dump-cpu-flags.py new file mode 100644 index 0000000000..660a4a6699 --- /dev/null +++ b/devtools/dump-cpu-flags/dump-cpu-flags.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2025 Microsoft Corporation + +""" +This script generates a table which lists the flags indicating which instruction sets are +supported for each CPU type. +The CPU names are stored in file cpu-names.txt, which is consumed by this script. +The script relies on the information embedded in the g++ compiler. This means that an updated +table can automatically be generated by switching to a newer version of the compiler. +The only thing that might need be done is adding new CPU names to cpu-names.txt, when new +CPUs are released in the market. + +NOTE: CPUs not known to the compiler will result in errors, which can be ignored (this script +will ignore these errors and continue). For best results use the latest g++ compiler available. +""" + +import subprocess + +with open("cpu-names.txt", "r") as file: + for line in file: + line = line.strip() + if line.startswith("#") or line == "": + continue + + words = line.split(",") + cpu_name = words[0].strip() + if len(words) > 1: + nbits = words[1].strip() + else: + nbits = "" + + if nbits == "32": + result = subprocess.run(["g++", "dump-cpu-flags.cpp", "-o", + "dump-cpu-flags", f"-march={cpu_name}", "-m32"]) + else: + result = subprocess.run(["g++", "dump-cpu-flags.cpp", "-o", + "dump-cpu-flags", f"-march={cpu_name}"]) + + if result.returncode == 0: + subprocess.run(["./dump-cpu-flags", cpu_name])