From 9720e0de99c288d08778ad4d3bc00734280fa23b Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Fri, 31 May 2024 15:33:40 -0700 Subject: [PATCH 1/2] Delete VectorT size constants from minipal/cpufeatures Vector policy is JIT/EE interface level concern. It should not live in the PAL. --- .../Compiler/HardwareIntrinsicHelpers.cs | 17 +- src/coreclr/vm/codeman.cpp | 19 +- src/native/minipal/cpufeatures.c | 256 +++++++++--------- src/native/minipal/cpufeatures.h | 14 +- 4 files changed, 140 insertions(+), 166 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 98d06568878dd4..d053a11f63e4ec 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -81,12 +81,9 @@ private static class XArchIntrinsicConstants public const int Avx512Vbmi = 0x800000; public const int Avx512Vbmi_vl = 0x1000000; public const int Serialize = 0x2000000; - public const int VectorT128 = 0x4000000; - public const int VectorT256 = 0x8000000; - public const int VectorT512 = 0x10000000; - public const int Avx10v1 = 0x20000000; - public const int Avx10v1_v256 = 0x40000000; - public const int Avx10v1_v512 = unchecked((int)0x80000000); + public const int Avx10v1 = 0x4000000; + public const int Avx10v1_v256 = 0x8000000; + public const int Avx10v1_v512 = 0x10000000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -228,9 +225,9 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_X86Base_X64 => 0, // Vector Sizes - InstructionSet.X64_VectorT128 => VectorT128, - InstructionSet.X64_VectorT256 => VectorT256, - InstructionSet.X64_VectorT512 => VectorT512, + InstructionSet.X64_VectorT128 => 0, + InstructionSet.X64_VectorT256 => Avx2, + InstructionSet.X64_VectorT512 => Avx512f, _ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString()) }; @@ -310,7 +307,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.ARM64_Sve_Arm64 => Sve, // Vector Sizes - InstructionSet.ARM64_VectorT128 => VectorT128, + InstructionSet.ARM64_VectorT128 => AdvSimd, _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString()) }; diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index ea682d05325b9a..d115a22850a742 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1258,29 +1258,18 @@ void EEJitManager::SetCpuInfo() int cpuFeatures = minipal_getcpufeatures(); #if defined(TARGET_X86) || defined(TARGET_AMD64) - -#if defined(TARGET_X86) && !defined(TARGET_WINDOWS) - // Linux may still support no SSE/SSE2 for 32-bit - if ((cpuFeatures & XArchIntrinsicConstants_VectorT128) == 0) - { - EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("SSE and SSE2 processor support required.")); - } -#else - _ASSERTE((cpuFeatures & XArchIntrinsicConstants_VectorT128) != 0); -#endif - CPUCompileFlags.Set(InstructionSet_VectorT128); // Get the maximum bitwidth of Vector, rounding down to the nearest multiple of 128-bits uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128; - if (((cpuFeatures & XArchIntrinsicConstants_VectorT256) != 0) && ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))) + if (((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) && ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))) { // We allow 256-bit Vector by default CPUCompileFlags.Set(InstructionSet_VectorT256); } - if (((cpuFeatures & XArchIntrinsicConstants_VectorT512) != 0) && (maxVectorTBitWidth >= 512)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512f) != 0) && (maxVectorTBitWidth >= 512)) { // We require 512-bit Vector to be opt-in CPUCompileFlags.Set(InstructionSet_VectorT512); @@ -1458,12 +1447,12 @@ void EEJitManager::SetCpuInfo() #if !defined(TARGET_WINDOWS) // Linux may still support no AdvSimd - if ((cpuFeatures & ARM64IntrinsicConstants_VectorT128) == 0) + if ((cpuFeatures & ARM64IntrinsicConstants_AdvSimd) == 0) { EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("AdvSimd processor support required.")); } #else - _ASSERTE((cpuFeatures & ARM64IntrinsicConstants_VectorT128) != 0); + _ASSERTE((cpuFeatures & ARM64IntrinsicConstants_AdvSimd) != 0); #endif CPUCompileFlags.Set(InstructionSet_VectorT128); diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index a2ff83222140cd..1af0f86f18243a 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "cpufeatures.h" #include "cpuid.h" @@ -154,159 +155,150 @@ int minipal_getcpufeatures(void) __cpuid(cpuidInfo, 0x00000000); uint32_t maxCpuId = (uint32_t)cpuidInfo[CPUID_EAX]; + assert(maxCpuId >= 1); - if (maxCpuId >= 1) - { - __cpuid(cpuidInfo, 0x00000001); + __cpuid(cpuidInfo, 0x00000001); - const int requiredBaselineEdxFlags = (1 << 25) // SSE - | (1 << 26); // SSE2 + assert((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0); // SSE + assert((cpuidInfo[CPUID_EDX] & (1 << 26)) != 0); // SSE2 - if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags) - { - result |= XArchIntrinsicConstants_VectorT128; + if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI + { + result |= XArchIntrinsicConstants_Aes; + } - if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI - { - result |= XArchIntrinsicConstants_Aes; - } + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ + { + result |= XArchIntrinsicConstants_Pclmulqdq; + } - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ - { - result |= XArchIntrinsicConstants_Pclmulqdq; - } + if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 + { + result |= XArchIntrinsicConstants_Sse3; + + if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 + { + result |= XArchIntrinsicConstants_Ssse3; - if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 + if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 { - result |= XArchIntrinsicConstants_Sse3; + result |= XArchIntrinsicConstants_Sse41; - if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 + if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 { - result |= XArchIntrinsicConstants_Ssse3; + result |= XArchIntrinsicConstants_Sse42; - if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 + if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE { - result |= XArchIntrinsicConstants_Sse41; + result |= XArchIntrinsicConstants_Movbe; + } + + if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT + { + result |= XArchIntrinsicConstants_Popcnt; + } - if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 + const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE + | (1 << 28); // AVX + + if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) + { + if (IsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 { - result |= XArchIntrinsicConstants_Sse42; + result |= XArchIntrinsicConstants_Avx; - if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE + if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA { - result |= XArchIntrinsicConstants_Movbe; + result |= XArchIntrinsicConstants_Fma; } - if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT + if (maxCpuId >= 0x07) { - result |= XArchIntrinsicConstants_Popcnt; - } - - const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE - | (1 << 28); // AVX + __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) - { - if (IsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 + if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 { - result |= XArchIntrinsicConstants_Avx; - - if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA - { - result |= XArchIntrinsicConstants_Fma; - } + result |= XArchIntrinsicConstants_Avx2; - if (maxCpuId >= 0x07) + if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { - __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - - if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 + if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F { - result |= XArchIntrinsicConstants_Avx2; - result |= XArchIntrinsicConstants_VectorT256; + result |= XArchIntrinsicConstants_Avx512f; + + bool isAVX512_VLSupported = false; + if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL + { + result |= XArchIntrinsicConstants_Avx512f_vl; + isAVX512_VLSupported = true; + } - if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 + if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F + result |= XArchIntrinsicConstants_Avx512bw; + if (isAVX512_VLSupported) // AVX512BW_VL { - result |= XArchIntrinsicConstants_Avx512f; - result |= XArchIntrinsicConstants_VectorT512; - - bool isAVX512_VLSupported = false; - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL - { - result |= XArchIntrinsicConstants_Avx512f_vl; - isAVX512_VLSupported = true; - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW - { - result |= XArchIntrinsicConstants_Avx512bw; - if (isAVX512_VLSupported) // AVX512BW_VL - { - result |= XArchIntrinsicConstants_Avx512bw_vl; - } - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD - { - result |= XArchIntrinsicConstants_Avx512cd; - if (isAVX512_VLSupported) // AVX512CD_VL - { - result |= XArchIntrinsicConstants_Avx512cd_vl; - } - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ - { - result |= XArchIntrinsicConstants_Avx512dq; - if (isAVX512_VLSupported) // AVX512DQ_VL - { - result |= XArchIntrinsicConstants_Avx512dq_vl; - } - } - - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI - { - result |= XArchIntrinsicConstants_Avx512Vbmi; - if (isAVX512_VLSupported) // AVX512VBMI_VL - { - result |= XArchIntrinsicConstants_Avx512Vbmi_vl; - } - } + result |= XArchIntrinsicConstants_Avx512bw_vl; } } - __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD + { + result |= XArchIntrinsicConstants_Avx512cd; + if (isAVX512_VLSupported) // AVX512CD_VL + { + result |= XArchIntrinsicConstants_Avx512cd_vl; + } + } - if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI + if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ { - result |= XArchIntrinsicConstants_AvxVnni; + result |= XArchIntrinsicConstants_Avx512dq; + if (isAVX512_VLSupported) // AVX512DQ_VL + { + result |= XArchIntrinsicConstants_Avx512dq_vl; + } } - if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Avx10 + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI { - __cpuidex(cpuidInfo, 0x00000024, 0x00000000); - if((cpuidInfo[CPUID_EBX] & 0xFF) >= 1) // Avx10v1 - CPUID.(EAX=24H, ECX=00H):EBX[7:0] >= 1 + result |= XArchIntrinsicConstants_Avx512Vbmi; + if (isAVX512_VLSupported) // AVX512VBMI_VL { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) - { - result |= XArchIntrinsicConstants_Avx10v1; - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) - { - result |= XArchIntrinsicConstants_Avx10v1_V256; - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 18)) != 0) - { - result |= XArchIntrinsicConstants_Avx10v1_V512; - } + result |= XArchIntrinsicConstants_Avx512Vbmi_vl; } } } } + + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + + if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI + { + result |= XArchIntrinsicConstants_AvxVnni; + } + + if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Avx10 + { + __cpuidex(cpuidInfo, 0x00000024, 0x00000000); + if((cpuidInfo[CPUID_EBX] & 0xFF) >= 1) // Avx10v1 - CPUID.(EAX=24H, ECX=00H):EBX[7:0] >= 1 + { + if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) + { + result |= XArchIntrinsicConstants_Avx10v1; + } + + if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) + { + result |= XArchIntrinsicConstants_Avx10v1_V256; + } + + if ((cpuidInfo[CPUID_EBX] & (1 << 18)) != 0) + { + result |= XArchIntrinsicConstants_Avx10v1_V512; + } + } + } } } } @@ -314,25 +306,25 @@ int minipal_getcpufeatures(void) } } } + } - if (maxCpuId >= 0x07) - { - __cpuidex(cpuidInfo, 0x00000007, 0x00000000); + if (maxCpuId >= 0x07) + { + __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 - { - result |= XArchIntrinsicConstants_Bmi1; - } + if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 + { + result |= XArchIntrinsicConstants_Bmi1; + } - if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 - { - result |= XArchIntrinsicConstants_Bmi2; - } + if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 + { + result |= XArchIntrinsicConstants_Bmi2; + } - if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0) - { - result |= XArchIntrinsicConstants_Serialize; // SERIALIZE - } + if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0) + { + result |= XArchIntrinsicConstants_Serialize; // SERIALIZE } } @@ -382,7 +374,7 @@ int minipal_getcpufeatures(void) result |= ARM64IntrinsicConstants_Sha256; if (hwCap & HWCAP_ASIMD) - result |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; + result |= ARM64IntrinsicConstants_AdvSimd; if (hwCap & HWCAP_ASIMDRDM) result |= ARM64IntrinsicConstants_Rdm; @@ -427,13 +419,13 @@ int minipal_getcpufeatures(void) // Every ARM64 CPU should support SIMD and FP // If the OS have no function to query for CPU capabilities we set just these - result |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; + result |= ARM64IntrinsicConstants_AdvSimd; #endif // HAVE_AUXV_HWCAP_H #endif // HOST_UNIX #if defined(HOST_WINDOWS) // FP and SIMD support are enabled by default - result |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; + result |= ARM64IntrinsicConstants_AdvSimd; if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 472ce178339613..62aa1c75256a84 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -37,12 +37,9 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Avx512Vbmi = 0x800000, XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000, XArchIntrinsicConstants_Serialize = 0x2000000, - XArchIntrinsicConstants_VectorT128 = 0x4000000, - XArchIntrinsicConstants_VectorT256 = 0x8000000, - XArchIntrinsicConstants_VectorT512 = 0x10000000, - XArchIntrinsicConstants_Avx10v1 = 0x20000000, - XArchIntrinsicConstants_Avx10v1_V256 = 0x40000000, - XArchIntrinsicConstants_Avx10v1_V512 = 0x80000000, + XArchIntrinsicConstants_Avx10v1 = 0x4000000, + XArchIntrinsicConstants_Avx10v1_V256 = 0x8000000, + XArchIntrinsicConstants_Avx10v1_V512 = 0x10000000, }; #endif // HOST_X86 || HOST_AMD64 @@ -58,9 +55,8 @@ enum ARM64IntrinsicConstants ARM64IntrinsicConstants_Sha256 = 0x0040, ARM64IntrinsicConstants_Atomics = 0x0080, ARM64IntrinsicConstants_Rcpc = 0x0100, - ARM64IntrinsicConstants_VectorT128 = 0x0200, - ARM64IntrinsicConstants_Rcpc2 = 0x0400, - ARM64IntrinsicConstants_Sve = 0x0800, + ARM64IntrinsicConstants_Rcpc2 = 0x0200, + ARM64IntrinsicConstants_Sve = 0x0400, }; #include From 3f965348df06f01dcca2a822093647964382e2fa Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Fri, 31 May 2024 22:37:35 -0700 Subject: [PATCH 2/2] Fix arm64 --- .../tools/Common/Compiler/HardwareIntrinsicHelpers.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index d053a11f63e4ec..bef78e07ac7f06 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -246,9 +246,8 @@ private static class Arm64IntrinsicConstants public const int Sha256 = 0x0040; public const int Atomics = 0x0080; public const int Rcpc = 0x0100; - public const int VectorT128 = 0x0200; - public const int Rcpc2 = 0x0400; - public const int Sve = 0x0800; + public const int Rcpc2 = 0x0200; + public const int Sve = 0x0400; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) {