From 53be3c6bfd4add9c7a452bae9a89d05d78247237 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 7 May 2023 06:29:39 -0700 Subject: [PATCH 01/19] Expose DOTNET_MaxVectorTBitWidth and an undocumented DOTNET_PreferredVectorBitWidth --- .../coreclr/botr/vectors-and-intrinsics.md | 4 +- docs/design/coreclr/jit/ryujit-overview.md | 2 +- src/coreclr/inc/clrconfigvalues.h | 12 +- src/coreclr/inc/corinfo.h | 20 ++ src/coreclr/inc/corinfoinstructionset.h | 259 ++++++++------ src/coreclr/inc/corjit.h | 10 +- src/coreclr/inc/icorjitinfoimpl_generated.h | 3 + src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/jit/ICorJitInfo_names_generated.h | 1 + .../jit/ICorJitInfo_wrapper_generated.hpp | 8 + src/coreclr/jit/compiler.cpp | 92 +++-- src/coreclr/jit/compiler.h | 104 ++++-- src/coreclr/jit/ee_il_dll.cpp | 44 ++- src/coreclr/jit/ee_il_dll.hpp | 2 +- src/coreclr/jit/hwintrinsicxarch.cpp | 107 +++--- src/coreclr/jit/importercalls.cpp | 5 +- src/coreclr/jit/jitconfigvalues.h | 10 +- src/coreclr/jit/jitee.h | 2 +- src/coreclr/jit/lclvars.cpp | 4 +- src/coreclr/jit/simd.cpp | 8 +- src/coreclr/jit/simdashwintrinsic.cpp | 89 ++++- src/coreclr/jit/simdashwintrinsic.h | 10 +- src/coreclr/jit/simdashwintrinsiclistarm64.h | 8 + src/coreclr/jit/simdashwintrinsiclistxarch.h | 8 + .../Microsoft.NETCore.Native.targets | 1 + .../nativeaot/Runtime/IntrinsicConstants.h | 1 + src/coreclr/nativeaot/Runtime/startup.cpp | 78 +++-- src/coreclr/nativeaot/docs/optimizing.md | 1 + src/coreclr/pal/src/misc/jitsupport.cpp | 3 + .../Common/Compiler/InstructionSetSupport.cs | 81 ++++- .../tools/Common/InstructionSetHelpers.cs | 26 +- .../Runtime/ReadyToRunInstructionSetHelper.cs | 7 + .../tools/Common/JitInterface/CorInfoImpl.cs | 10 +- .../JitInterface/CorInfoImpl_generated.cs | 121 ++++--- .../JitInterface/CorInfoInstructionSet.cs | 316 ++++++++++++------ .../tools/Common/JitInterface/CorInfoTypes.cs | 16 + .../ThunkGenerator/InstructionSetDesc.txt | 77 +++-- .../ThunkGenerator/InstructionSetGenerator.cs | 2 +- .../ThunkGenerator/ThunkInput.txt | 2 + .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 3 + .../Compiler/ReadyToRunCodegenCompilation.cs | 8 +- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 2 +- .../aot/ILCompiler/ILCompilerRootCommand.cs | 3 + src/coreclr/tools/aot/ILCompiler/Program.cs | 2 +- .../aot/crossgen2/Crossgen2RootCommand.cs | 3 + src/coreclr/tools/aot/crossgen2/Program.cs | 2 +- .../aot/crossgen2/Properties/Resources.resx | 3 + .../aot/jitinterface/jitinterface_generated.h | 9 + .../tools/aot/jitinterface/jitwrapper.cpp | 6 +- .../superpmi-shared/icorjitcompilerimpl.h | 5 - .../tools/superpmi/superpmi-shared/lwmlist.h | 1 + .../superpmi-shared/methodcontext.cpp | 32 +- .../superpmi/superpmi-shared/methodcontext.h | 7 +- .../icorjitcompiler.cpp | 5 - .../superpmi-shim-collector/icorjitinfo.cpp | 7 + .../superpmi-shim-counter/icorjitcompiler.cpp | 6 - .../icorjitinfo_generated.cpp | 7 + .../superpmi-shim-simple/icorjitcompiler.cpp | 5 - .../icorjitinfo_generated.cpp | 6 + .../tools/superpmi/superpmi/icorjitinfo.cpp | 6 + src/coreclr/vm/cgensys.h | 11 - src/coreclr/vm/codeman.cpp | 109 ++++-- src/coreclr/vm/codeman.h | 24 +- src/coreclr/vm/jitinterface.cpp | 40 ++- src/coreclr/vm/jitinterface.h | 16 +- src/coreclr/vm/methodtablebuilder.cpp | 23 +- .../src/System/Numerics/Plane.cs | 8 +- .../SmokeTests/HardwareIntrinsics/Program.cs | 135 ++++++-- .../HardwareIntrinsics/X64Baseline.csproj | 2 +- .../HardwareIntrinsics/x64Avx.csproj | 39 +++ .../{x64Vex.csproj => x64Avx2.csproj} | 2 +- .../x64Avx2_VectorT128.csproj | 40 +++ .../HardwareIntrinsics/x64Avx512.csproj | 33 ++ .../x64Avx512_VectorT128.csproj | 34 ++ .../{x64NonVex.csproj => x64Sse42.csproj} | 2 +- 75 files changed, 1576 insertions(+), 634 deletions(-) create mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx.csproj rename src/tests/nativeaot/SmokeTests/HardwareIntrinsics/{x64Vex.csproj => x64Avx2.csproj} (91%) create mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2_VectorT128.csproj create mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512.csproj create mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512_VectorT128.csproj rename src/tests/nativeaot/SmokeTests/HardwareIntrinsics/{x64NonVex.csproj => x64Sse42.csproj} (83%) diff --git a/docs/design/coreclr/botr/vectors-and-intrinsics.md b/docs/design/coreclr/botr/vectors-and-intrinsics.md index fac5a0800c9bee..a90a3d68c724d8 100644 --- a/docs/design/coreclr/botr/vectors-and-intrinsics.md +++ b/docs/design/coreclr/botr/vectors-and-intrinsics.md @@ -194,5 +194,5 @@ While the above api exists, it is not expected that general purpose code within |`compExactlyDependsOn(isa)`| Use when making a decision to use or not use an instruction set when the decision will affect the semantics of the generated code. Should never be used in an assert. | Return whether or not an instruction set is supported. Calls notifyInstructionSetUsage with the result of that computation. |`compOpportunisticallyDependsOn(isa)`| Use when making an opportunistic decision to use or not use an instruction set. Use when the instruction set usage is a "nice to have optimization opportunity", but do not use when a false result may change the semantics of the program. Should never be used in an assert. | Return whether or not an instruction set is supported. Calls notifyInstructionSetUsage if the instruction set is supported. |`compIsaSupportedDebugOnly(isa)` | Use to assert whether or not an instruction set is supported | Return whether or not an instruction set is supported. Does not report anything. Only available in debug builds. -|`getSIMDVectorRegisterByteLength()` | Use to get the size of a `Vector` value. | Determine the size of the `Vector` type. If on the architecture the size may vary depending on whatever rules. Use `compExactlyDependsOn` to perform the queries so that the size is consistent between compile time and runtime. -|`maxSIMDStructBytes()`| Get the maximum number of bytes that might be used in a SIMD type during this compilation. | Query the set of instruction sets supported, and determine the largest simd type supported. Use `compOpportunisticallyDependsOn` to perform the queries so that the maximum size needed is the only one recorded. +|`getVectorTByteLength()` | Use to get the size of a `Vector` value. | Determine the size of the `Vector` type. If on the architecture the size may vary depending on whatever rules. Use `compExactlyDependsOn` to perform the queries so that the size is consistent between compile time and runtime. +|`getMaxVectorByteLength()`| Get the maximum number of bytes that might be used in a SIMD type during this compilation. | Query the set of instruction sets supported, and determine the largest simd type supported. Use `compOpportunisticallyDependsOn` to perform the queries so that the maximum size needed is the only one recorded. diff --git a/docs/design/coreclr/jit/ryujit-overview.md b/docs/design/coreclr/jit/ryujit-overview.md index e38e487058c744..a696dca8fb3edf 100644 --- a/docs/design/coreclr/jit/ryujit-overview.md +++ b/docs/design/coreclr/jit/ryujit-overview.md @@ -34,7 +34,7 @@ The following are the key methods on this interface: It returns a pointer to the code, its size, and additional GC, EH and (optionally) debug info. * `getVersionIdentifier` is the mechanism by which the JIT/EE interface is versioned. There is a single GUID (manually generated) which the JIT and EE must agree on. - * `getMaxIntrinsicSIMDVectorLength` communicates to the EE the largest SIMD vector length that the JIT can support. + * `getMaxVectorTBitWidth` communicates to the EE the the maximum width, in bits, that Vector is allowed to be. * `ICorJitInfo` – this is the interface that the EE implements. It has many methods defined on it that allow the JIT to look up metadata tokens, traverse type signatures, compute field and vtable offsets, find method entry points, construct string literals, etc. This bulk of this interface is inherited from `ICorDynamicInfo` which is defined in diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 853d985df3a330..607fd30d4de23a 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -357,12 +357,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitRegisterFP, W("JitRegisterFP"), 3, "Control RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitELTHookEnabled, W("JitELTHookEnabled"), 0, "On ARM, setting this will emit Enter/Leave/TailCall callbacks") RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitMemStats, W("JitMemStats"), 0, "Display JIT memory usage statistics") RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitVNMapSelBudget, W("JitVNMapSelBudget"), 100, "Max # of MapSelect's considered for a particular top-level invocation.") -#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64) -#define EXTERNAL_FeatureSIMD_Default 1 -#else // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)) -#define EXTERNAL_FeatureSIMD_Default 0 -#endif // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)) -RETAIL_CONFIG_DWORD_INFO(INTERNAL_SIMD16ByteOnly, W("SIMD16ByteOnly"), 0, "Limit maximum SIMD vector length to 16 bytes (used by x64_arm64_altjit)") RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TrackDynamicMethodDebugInfo, W("TrackDynamicMethodDebugInfo"), 0, "Specifies whether debug info should be generated and tracked for dynamic methods") #ifdef FEATURE_MULTICOREJIT @@ -745,15 +739,17 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame" #endif #endif +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum width, in bits, that Vector is allowed to be. A value less than 128 is treated as the system default.") + // // Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h // #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) //TODO: should implement LoongArch64's features. //TODO-RISCV64-CQ: should implement RISCV64's features. -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled") #else -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled") #endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(TARGET_AMD64) || defined(TARGET_X86) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 93f67680284498..52bcaba8272cee 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -317,6 +317,24 @@ struct SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR } }; +// Represents information about an XARCH CPU +union CORINFO_XARCH_CPU +{ + struct { + uint32_t SteppingId : 4; + uint32_t Model : 4; + uint32_t FamilyId : 4; + uint32_t ProcessorType : 2; + uint32_t IsAuthenticAmd : 1; // Unused bits in the CPUID result + uint32_t IsGenuineIntel : 1; // Unused bits in the CPUID result + uint32_t ExtendedModelId : 4; + uint32_t ExtendedFamilyId : 8; + uint32_t Reserved : 4; // Unused bits in the CPUID result + }; + + uint32_t Value; +}; + // StructFloadFieldInfoFlags: used on LoongArch64 architecture by `getLoongArch64PassStructInRegisterFlags` and // `getRISCV64PassStructInRegisterFlags` API to convey struct argument passing information. // @@ -3027,6 +3045,8 @@ class ICorStaticInfo virtual uint32_t getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls) = 0; virtual uint32_t getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls) = 0; + + virtual void getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfo) = 0; }; /***************************************************************************** diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 8462ab33413ef9..19950134867e8b 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -14,7 +14,7 @@ enum CORINFO_InstructionSet { InstructionSet_ILLEGAL = 0, - InstructionSet_NONE = 63, + InstructionSet_NONE = 127, #ifdef TARGET_ARM64 InstructionSet_ArmBase=1, InstructionSet_AdvSimd=2, @@ -27,16 +27,17 @@ enum CORINFO_InstructionSet InstructionSet_Atomics=9, InstructionSet_Vector64=10, InstructionSet_Vector128=11, - InstructionSet_Dczva=12, - InstructionSet_Rcpc=13, - InstructionSet_ArmBase_Arm64=14, - InstructionSet_AdvSimd_Arm64=15, - InstructionSet_Aes_Arm64=16, - InstructionSet_Crc32_Arm64=17, - InstructionSet_Dp_Arm64=18, - InstructionSet_Rdm_Arm64=19, - InstructionSet_Sha1_Arm64=20, - InstructionSet_Sha256_Arm64=21, + InstructionSet_VectorT128=12, + InstructionSet_Dczva=13, + InstructionSet_Rcpc=14, + InstructionSet_ArmBase_Arm64=15, + InstructionSet_AdvSimd_Arm64=16, + InstructionSet_Aes_Arm64=17, + InstructionSet_Crc32_Arm64=18, + InstructionSet_Dp_Arm64=19, + InstructionSet_Rdm_Arm64=20, + InstructionSet_Sha1_Arm64=21, + InstructionSet_Sha256_Arm64=22, #endif // TARGET_ARM64 #ifdef TARGET_AMD64 InstructionSet_X86Base=1, @@ -58,48 +59,51 @@ enum CORINFO_InstructionSet InstructionSet_Vector128=17, InstructionSet_Vector256=18, InstructionSet_Vector512=19, - InstructionSet_AVXVNNI=20, - InstructionSet_MOVBE=21, - InstructionSet_X86Serialize=22, - InstructionSet_AVX512F=23, - InstructionSet_AVX512F_VL=24, - InstructionSet_AVX512BW=25, - InstructionSet_AVX512BW_VL=26, - InstructionSet_AVX512CD=27, - InstructionSet_AVX512CD_VL=28, - InstructionSet_AVX512DQ=29, - InstructionSet_AVX512DQ_VL=30, - InstructionSet_AVX512VBMI=31, - InstructionSet_AVX512VBMI_VL=32, - InstructionSet_X86Base_X64=33, - InstructionSet_SSE_X64=34, - InstructionSet_SSE2_X64=35, - InstructionSet_SSE3_X64=36, - InstructionSet_SSSE3_X64=37, - InstructionSet_SSE41_X64=38, - InstructionSet_SSE42_X64=39, - InstructionSet_AVX_X64=40, - InstructionSet_AVX2_X64=41, - InstructionSet_AES_X64=42, - InstructionSet_BMI1_X64=43, - InstructionSet_BMI2_X64=44, - InstructionSet_FMA_X64=45, - InstructionSet_LZCNT_X64=46, - InstructionSet_PCLMULQDQ_X64=47, - InstructionSet_POPCNT_X64=48, - InstructionSet_AVXVNNI_X64=49, - InstructionSet_MOVBE_X64=50, - InstructionSet_X86Serialize_X64=51, - InstructionSet_AVX512F_X64=52, - InstructionSet_AVX512F_VL_X64=53, - InstructionSet_AVX512BW_X64=54, - InstructionSet_AVX512BW_VL_X64=55, - InstructionSet_AVX512CD_X64=56, - InstructionSet_AVX512CD_VL_X64=57, - InstructionSet_AVX512DQ_X64=58, - InstructionSet_AVX512DQ_VL_X64=59, - InstructionSet_AVX512VBMI_X64=60, - InstructionSet_AVX512VBMI_VL_X64=61, + InstructionSet_VectorT128=20, + InstructionSet_VectorT256=21, + InstructionSet_VectorT512=22, + InstructionSet_AVXVNNI=23, + InstructionSet_MOVBE=24, + InstructionSet_X86Serialize=25, + InstructionSet_AVX512F=26, + InstructionSet_AVX512F_VL=27, + InstructionSet_AVX512BW=28, + InstructionSet_AVX512BW_VL=29, + InstructionSet_AVX512CD=30, + InstructionSet_AVX512CD_VL=31, + InstructionSet_AVX512DQ=32, + InstructionSet_AVX512DQ_VL=33, + InstructionSet_AVX512VBMI=34, + InstructionSet_AVX512VBMI_VL=35, + InstructionSet_X86Base_X64=36, + InstructionSet_SSE_X64=37, + InstructionSet_SSE2_X64=38, + InstructionSet_SSE3_X64=39, + InstructionSet_SSSE3_X64=40, + InstructionSet_SSE41_X64=41, + InstructionSet_SSE42_X64=42, + InstructionSet_AVX_X64=43, + InstructionSet_AVX2_X64=44, + InstructionSet_AES_X64=45, + InstructionSet_BMI1_X64=46, + InstructionSet_BMI2_X64=47, + InstructionSet_FMA_X64=48, + InstructionSet_LZCNT_X64=49, + InstructionSet_PCLMULQDQ_X64=50, + InstructionSet_POPCNT_X64=51, + InstructionSet_AVXVNNI_X64=52, + InstructionSet_MOVBE_X64=53, + InstructionSet_X86Serialize_X64=54, + InstructionSet_AVX512F_X64=55, + InstructionSet_AVX512F_VL_X64=56, + InstructionSet_AVX512BW_X64=57, + InstructionSet_AVX512BW_VL_X64=58, + InstructionSet_AVX512CD_X64=59, + InstructionSet_AVX512CD_VL_X64=60, + InstructionSet_AVX512DQ_X64=61, + InstructionSet_AVX512DQ_VL_X64=62, + InstructionSet_AVX512VBMI_X64=63, + InstructionSet_AVX512VBMI_VL_X64=64, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -121,48 +125,51 @@ enum CORINFO_InstructionSet InstructionSet_Vector128=17, InstructionSet_Vector256=18, InstructionSet_Vector512=19, - InstructionSet_AVXVNNI=20, - InstructionSet_MOVBE=21, - InstructionSet_X86Serialize=22, - InstructionSet_AVX512F=23, - InstructionSet_AVX512F_VL=24, - InstructionSet_AVX512BW=25, - InstructionSet_AVX512BW_VL=26, - InstructionSet_AVX512CD=27, - InstructionSet_AVX512CD_VL=28, - InstructionSet_AVX512DQ=29, - InstructionSet_AVX512DQ_VL=30, - InstructionSet_AVX512VBMI=31, - InstructionSet_AVX512VBMI_VL=32, - InstructionSet_X86Base_X64=33, - InstructionSet_SSE_X64=34, - InstructionSet_SSE2_X64=35, - InstructionSet_SSE3_X64=36, - InstructionSet_SSSE3_X64=37, - InstructionSet_SSE41_X64=38, - InstructionSet_SSE42_X64=39, - InstructionSet_AVX_X64=40, - InstructionSet_AVX2_X64=41, - InstructionSet_AES_X64=42, - InstructionSet_BMI1_X64=43, - InstructionSet_BMI2_X64=44, - InstructionSet_FMA_X64=45, - InstructionSet_LZCNT_X64=46, - InstructionSet_PCLMULQDQ_X64=47, - InstructionSet_POPCNT_X64=48, - InstructionSet_AVXVNNI_X64=49, - InstructionSet_MOVBE_X64=50, - InstructionSet_X86Serialize_X64=51, - InstructionSet_AVX512F_X64=52, - InstructionSet_AVX512F_VL_X64=53, - InstructionSet_AVX512BW_X64=54, - InstructionSet_AVX512BW_VL_X64=55, - InstructionSet_AVX512CD_X64=56, - InstructionSet_AVX512CD_VL_X64=57, - InstructionSet_AVX512DQ_X64=58, - InstructionSet_AVX512DQ_VL_X64=59, - InstructionSet_AVX512VBMI_X64=60, - InstructionSet_AVX512VBMI_VL_X64=61, + InstructionSet_VectorT128=20, + InstructionSet_VectorT256=21, + InstructionSet_VectorT512=22, + InstructionSet_AVXVNNI=23, + InstructionSet_MOVBE=24, + InstructionSet_X86Serialize=25, + InstructionSet_AVX512F=26, + InstructionSet_AVX512F_VL=27, + InstructionSet_AVX512BW=28, + InstructionSet_AVX512BW_VL=29, + InstructionSet_AVX512CD=30, + InstructionSet_AVX512CD_VL=31, + InstructionSet_AVX512DQ=32, + InstructionSet_AVX512DQ_VL=33, + InstructionSet_AVX512VBMI=34, + InstructionSet_AVX512VBMI_VL=35, + InstructionSet_X86Base_X64=36, + InstructionSet_SSE_X64=37, + InstructionSet_SSE2_X64=38, + InstructionSet_SSE3_X64=39, + InstructionSet_SSSE3_X64=40, + InstructionSet_SSE41_X64=41, + InstructionSet_SSE42_X64=42, + InstructionSet_AVX_X64=43, + InstructionSet_AVX2_X64=44, + InstructionSet_AES_X64=45, + InstructionSet_BMI1_X64=46, + InstructionSet_BMI2_X64=47, + InstructionSet_FMA_X64=48, + InstructionSet_LZCNT_X64=49, + InstructionSet_PCLMULQDQ_X64=50, + InstructionSet_POPCNT_X64=51, + InstructionSet_AVXVNNI_X64=52, + InstructionSet_MOVBE_X64=53, + InstructionSet_X86Serialize_X64=54, + InstructionSet_AVX512F_X64=55, + InstructionSet_AVX512F_VL_X64=56, + InstructionSet_AVX512BW_X64=57, + InstructionSet_AVX512BW_VL_X64=58, + InstructionSet_AVX512CD_X64=59, + InstructionSet_AVX512CD_VL_X64=60, + InstructionSet_AVX512DQ_X64=61, + InstructionSet_AVX512DQ_VL_X64=62, + InstructionSet_AVX512VBMI_X64=63, + InstructionSet_AVX512VBMI_VL_X64=64, #endif // TARGET_X86 }; @@ -170,7 +177,7 @@ enum CORINFO_InstructionSet struct CORINFO_InstructionSetFlags { private: - static const int32_t FlagsFieldCount = 1; + static const int32_t FlagsFieldCount = 2; static const int32_t BitsPerFlagsField = sizeof(uint64_t) * 8; uint64_t _flags[FlagsFieldCount] = { }; @@ -404,6 +411,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Vector64); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT128); #endif // TARGET_ARM64 #ifdef TARGET_AMD64 if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64)) @@ -558,6 +567,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Vector256); if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) @@ -566,24 +581,40 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_FMA)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); #endif // TARGET_AMD64 #ifdef TARGET_X86 if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -622,6 +653,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Vector256); if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) @@ -630,24 +667,40 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_FMA)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512F); #endif // TARGET_X86 } while (!oldflags.Equals(resultflags)); @@ -702,6 +755,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector64"; case InstructionSet_Vector128 : return "Vector128"; + case InstructionSet_VectorT128 : + return "VectorT128"; case InstructionSet_Dczva : return "Dczva"; case InstructionSet_Rcpc : @@ -778,6 +833,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector256"; case InstructionSet_Vector512 : return "Vector512"; + case InstructionSet_VectorT128 : + return "VectorT128"; + case InstructionSet_VectorT256 : + return "VectorT256"; + case InstructionSet_VectorT512 : + return "VectorT512"; case InstructionSet_AVXVNNI : return "AVXVNNI"; case InstructionSet_AVXVNNI_X64 : @@ -870,6 +931,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector256"; case InstructionSet_Vector512 : return "Vector512"; + case InstructionSet_VectorT128 : + return "VectorT128"; + case InstructionSet_VectorT256 : + return "VectorT256"; + case InstructionSet_VectorT512 : + return "VectorT512"; case InstructionSet_AVXVNNI : return "AVXVNNI"; case InstructionSet_MOVBE : diff --git a/src/coreclr/inc/corjit.h b/src/coreclr/inc/corjit.h index 0f29b274f1b03d..bc0b856a23c1ac 100644 --- a/src/coreclr/inc/corjit.h +++ b/src/coreclr/inc/corjit.h @@ -209,10 +209,10 @@ class ICorJitCompiler GUID* versionIdentifier /* OUT */ ) = 0; - // When the EE loads the System.Numerics.Vectors assembly, it asks the JIT what length (in bytes) of - // SIMD vector it supports as an intrinsic type. Zero means that the JIT does not support SIMD - // intrinsics, so the EE should use the default size (i.e. the size of the IL implementation). - virtual unsigned getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags) { return 0; } + // When the EE loads Vector type, it asks the JIT what the maximum width, in bits, that + // Vector is allowed to be. Zero means the JIT doesn't support SIMD intrinsics, so the + // EE should use the default size (i.e. the size of the IL implementation). + virtual unsigned getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) { return 0; } // Some JIT's may support multiple OSs. This api provides a means to specify to the JIT what OS it should // be trying to compile. This api does not produce any errors, any errors are to be generated by the @@ -340,7 +340,7 @@ class ICorJitInfo : public ICorDynamicInfo // // SAMPLE_INTERVAL must be >= SIZE. SAMPLE_INTERVAL / SIZE // gives the average number of calls between table updates. - // + // struct HandleHistogram32 { enum diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h index cf4c94498c33a1..99eb40c7552a76 100644 --- a/src/coreclr/inc/icorjitinfoimpl_generated.h +++ b/src/coreclr/inc/icorjitinfoimpl_generated.h @@ -542,6 +542,9 @@ uint32_t getRISCV64PassStructInRegisterFlags( uint32_t getThreadTLSIndex( void** ppIndirection) override; +void getXarchCpuInfo( + CORINFO_XARCH_CPU* xarchCpuInfoPtr) override; + const void* getInlinedCallFrameVptr( void** ppIndirection) override; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index d539e04c6b168d..ebcf4919cb6afb 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 4e6355a0-3844-45e2-8cef-082c18217e14 */ - 0x4e6355a0, - 0x3844, - 0x45e2, - {0x8c, 0xef, 0x8, 0x2c, 0x18, 0x21, 0x7e, 0x14} +constexpr GUID JITEEVersionIdentifier = { /* fda2f9dd-6b3e-4ecd-a7b8-79e5edf1f072 */ + 0xfda2f9dd, + 0x6b3e, + 0x4ecd, + {0xa7, 0xb8, 0x79, 0xe5, 0xed, 0xf1, 0xf0, 0x72} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h index cb5db87194525c..517496d0e68cb5 100644 --- a/src/coreclr/jit/ICorJitInfo_names_generated.h +++ b/src/coreclr/jit/ICorJitInfo_names_generated.h @@ -136,6 +136,7 @@ DEF_CLR_API(getSystemVAmd64PassStructInRegisterDescriptor) DEF_CLR_API(getLoongArch64PassStructInRegisterFlags) DEF_CLR_API(getRISCV64PassStructInRegisterFlags) DEF_CLR_API(getThreadTLSIndex) +DEF_CLR_API(getXarchCpuInfo) DEF_CLR_API(getInlinedCallFrameVptr) DEF_CLR_API(getAddrOfCaptureThreadGlobal) DEF_CLR_API(getHelperFtn) diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp index b819fc12fbce13..8e07bd0bd70c75 100644 --- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp +++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp @@ -1297,6 +1297,14 @@ uint32_t WrapICorJitInfo::getThreadTLSIndex( return temp; } +void WrapICorJitInfo::getXarchCpuInfo( + CORINFO_XARCH_CPU* xarchCpuInfoPtr) +{ + API_ENTER(getXarchCpuInfo); + wrapHnd->getXarchCpuInfo(xarchCpuInfoPtr); + API_LEAVE(getXarchCpuInfo); +} + const void* WrapICorJitInfo::getInlinedCallFrameVptr( void** ppIndirection) { diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index bc1b7b0d0dbb46..9673ee08a47c98 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2295,6 +2295,10 @@ void Compiler::compSetProcessor() // don't actually exist. The JIT is in charge of adding those and ensuring // the total sum of flags is still valid. #if defined(TARGET_XARCH) + // Get the preferred vector bitwidth, rounding down to the nearest multiple of 128-bits + uint32_t preferredVectorBitWidth = (JitConfig.PreferredVectorBitWidth() / 128) * 128; + uint32_t preferredVectorByteLength = preferredVectorBitWidth / 8; + if (instructionSetFlags.HasInstructionSet(InstructionSet_SSE)) { instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); @@ -2305,50 +2309,66 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector256); } - // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL - // These have been shipped together historically and at the time of this writing - // there exists no hardware which doesn't support the entire feature set. To simplify - // the overall JIT implementation, we currently require the entire set of ISAs to be - // supported and disable AVX512 support otherwise. - - if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL) && - instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL) && - instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)) { + // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL + // These have been shipped together historically and at the time of this writing + // there exists no hardware which doesn't support the entire feature set. To simplify + // the overall JIT implementation, we currently require the entire set of ISAs to be + // supported and disable AVX512 support otherwise. + + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)); assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL)); assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL)); assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)); instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); - } - else - { - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512VBMI); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); -#ifdef TARGET_AMD64 - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512VBMI_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL_X64); -#endif // TARGET_AMD64 + if (preferredVectorByteLength == 0) + { + CORINFO_XARCH_CPU xarchCpuInfo; + eeGetXarchCpuInfo(&xarchCpuInfo); + + if (xarchCpuInfo.IsGenuineIntel) + { + // Some architectures can experience frequency throttling when executing + // executing 512-bit width instructions. To account for this we set the + // default preferred vector width to 256-bits in some scenarios. Power + // users can override this with `DOTNET_PreferredVectorBitWith=512` to + // allow using such instructions where hardware support is available. + + if (xarchCpuInfo.FamilyId == 0x06) + { + if (xarchCpuInfo.ExtendedModelId == 0x05) + { + if (xarchCpuInfo.Model == 0x05) + { + // * Skylake (Server) + // * Cascade Lake + // * Cooper Lake + + preferredVectorByteLength = 32; + } + } + else if (xarchCpuInfo.ExtendedModelId == 0x06) + { + if (xarchCpuInfo.Model == 0x06) + { + // * Cannon Lake + + preferredVectorByteLength = 32; + } + } + } + } + } } + + opts.preferredVectorByteLength = preferredVectorByteLength; #elif defined(TARGET_ARM64) if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd)) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 91d4350e0afda1..d2c0836f942ff2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8110,6 +8110,10 @@ class Compiler bool eeRunWithSPMIErrorTrapImp(void (*function)(void*), void* param); +#if defined(TARGET_XARCH) + void eeGetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfo); +#endif // TARGET_XARCH + // Utility functions static CORINFO_METHOD_HANDLE eeFindHelper(unsigned helper); @@ -8675,36 +8679,50 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Get the number of bytes in a System.Numeric.Vector for the current compilation. // Note - cannot be used for System.Runtime.Intrinsic - unsigned getSIMDVectorRegisterByteLength() + unsigned getVectorTByteLength() { + // We need to report the ISA dependency to the VM so that scenarios + // such as R2R work correctly for larger vector sizes, so we always + // do `compExactlyDependsOn` for such cases. + #if defined(TARGET_XARCH) - if (compExactlyDependsOn(InstructionSet_AVX2)) + if (compExactlyDependsOn(InstructionSet_VectorT256)) { - // TODO-XArch-AVX512 : Return ZMM_REGSIZE_BYTES once Vector supports AVX512. return YMM_REGSIZE_BYTES; } - else + else if (compOpportunisticallyDependsOn(InstructionSet_VectorT128)) { return XMM_REGSIZE_BYTES; } + else + { + return 0; + } #elif defined(TARGET_ARM64) - return FP_REGSIZE_BYTES; + if (compOpportunisticallyDependsOn(InstructionSet_VectorT128)) + { + return FP_REGSIZE_BYTES; + } + else + { + return 0; + } #else - assert(!"getSIMDVectorRegisterByteLength() unimplemented on target arch"); + assert(!"getVectorTByteLength() unimplemented on target arch"); unreached(); #endif } // The minimum and maximum possible number of bytes in a SIMD vector. - // maxSIMDStructBytes + // getMaxVectorByteLength // The minimum SIMD size supported by System.Numeric.Vectors or System.Runtime.Intrinsic // Arm.AdvSimd: 16-byte Vector and Vector128 // X86.SSE: 16-byte Vector and Vector128 // X86.AVX: 16-byte Vector and Vector256 // X86.AVX2: 32-byte Vector and Vector256 // X86.AVX512F: 32-byte Vector and Vector512 - unsigned int maxSIMDStructBytes() const + unsigned int getMaxVectorByteLength() const { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) if (compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -8720,16 +8738,35 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX } else { + assert(compIsaSupportedDebugOnly(InstructionSet_SSE)); return XMM_REGSIZE_BYTES; } #elif defined(TARGET_ARM64) + assert(compIsaSupportedDebugOnly(InstructionSet_AdvSimd)); return FP_REGSIZE_BYTES; #else - assert(!"maxSIMDStructBytes() unimplemented on target arch"); + assert(!"getMaxVectorByteLength() unimplemented on target arch"); unreached(); #endif } + //------------------------------------------------------------------------ + // getPreferredVectorByteLength: Gets the preferred length, in bytes, to use for vectorization + // + unsigned int getPreferredVectorByteLength() const + { +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + uint32_t preferredVectorByteLength = opts.preferredVectorByteLength; + + if (preferredVectorByteLength != 0) + { + return min(getMaxVectorByteLength(), preferredVectorByteLength); + } +#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH + + return getMaxVectorByteLength(); + } + //------------------------------------------------------------------------ // roundUpSIMDSize: rounds the given size up to the nearest SIMD size // available on the target. Examples on XARCH: @@ -8748,19 +8785,22 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX unsigned int roundUpSIMDSize(unsigned size) { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - unsigned maxSimdSize = maxSIMDStructBytes(); - assert(maxSimdSize <= ZMM_REGSIZE_BYTES); - if (size <= XMM_REGSIZE_BYTES && maxSimdSize > XMM_REGSIZE_BYTES) + unsigned maxSize = getPreferredVectorByteLength(); + assert(maxSize <= ZMM_REGSIZE_BYTES); + + if ((size <= XMM_REGSIZE_BYTES) && (maxSize > XMM_REGSIZE_BYTES)) { return XMM_REGSIZE_BYTES; } - if (size <= YMM_REGSIZE_BYTES && maxSimdSize > YMM_REGSIZE_BYTES) + + if ((size <= YMM_REGSIZE_BYTES) && (maxSize > YMM_REGSIZE_BYTES)) { return YMM_REGSIZE_BYTES; } - return maxSimdSize; + + return maxSize; #elif defined(TARGET_ARM64) - assert(maxSIMDStructBytes() == FP_REGSIZE_BYTES); + assert(getMaxVectorByteLength() == FP_REGSIZE_BYTES); return FP_REGSIZE_BYTES; #else assert(!"roundUpSIMDSize() unimplemented on target arch"); @@ -8783,30 +8823,33 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX unsigned int roundDownSIMDSize(unsigned size) { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - unsigned maxSimdSize = maxSIMDStructBytes(); - assert(maxSimdSize <= ZMM_REGSIZE_BYTES); - if (size >= maxSimdSize) + unsigned maxSize = getPreferredVectorByteLength(); + assert(maxSize <= ZMM_REGSIZE_BYTES); + + if (size >= maxSize) { // Size is bigger than max SIMD size the current target supports - return maxSimdSize; + return maxSize; } - if (size >= YMM_REGSIZE_BYTES && maxSimdSize >= YMM_REGSIZE_BYTES) + + if ((size >= YMM_REGSIZE_BYTES) && (maxSize >= YMM_REGSIZE_BYTES)) { // Size is >= YMM but not enough for ZMM -> YMM return YMM_REGSIZE_BYTES; } + // Return 0 if size is even less than XMM, otherwise - XMM - return size >= XMM_REGSIZE_BYTES ? XMM_REGSIZE_BYTES : 0; + return (size >= XMM_REGSIZE_BYTES) ? XMM_REGSIZE_BYTES : 0; #elif defined(TARGET_ARM64) - assert(maxSIMDStructBytes() == FP_REGSIZE_BYTES); - return size >= FP_REGSIZE_BYTES ? FP_REGSIZE_BYTES : 0; + assert(getMaxVectorByteLength() == FP_REGSIZE_BYTES); + return (size >= FP_REGSIZE_BYTES) ? FP_REGSIZE_BYTES : 0; #else assert(!"roundDownSIMDSize() unimplemented on target arch"); unreached(); #endif } - unsigned int minSIMDStructBytes() + unsigned int getMinVectorByteLength() { return emitTypeSize(TYP_SIMD8); } @@ -8889,8 +8932,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #if defined(FEATURE_SIMD) if (canUseSimd) { - maxRegSize = maxSIMDStructBytes(); + maxRegSize = getPreferredVectorByteLength(); + #if defined(TARGET_XARCH) + assert(maxRegSize <= ZMM_REGSIZE_BYTES); threshold = maxRegSize; #elif defined(TARGET_ARM64) // ldp/stp instructions can load/store two 16-byte vectors at once, e.g.: @@ -8948,7 +8993,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool structSizeMightRepresentSIMDType(size_t structSize) { #ifdef FEATURE_SIMD - return (structSize >= minSIMDStructBytes()) && (structSize <= maxSIMDStructBytes()); + return (structSize >= getMinVectorByteLength()) && (structSize <= getMaxVectorByteLength()); #else return false; #endif // FEATURE_SIMD @@ -9250,7 +9295,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX struct Options { - JitFlags* jitFlags; // all flags passed from the EE + // all flags passed from the EE + JitFlags* jitFlags; // The instruction sets that the compiler is allowed to emit. CORINFO_InstructionSetFlags compSupportsISA; @@ -9276,6 +9322,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool compUseCMOV; +#if defined(TARGET_XARCH) + uint32_t preferredVectorByteLength; +#endif // TARGET_XARCH + // optimize maximally and/or favor speed over size? #define DEFAULT_MIN_OPTS_CODE_SIZE 60000 diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index cc5ff2999b4fce..4f3fc02d33429e 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -315,35 +315,37 @@ void CILJit::setTargetOS(CORINFO_OS os) } /***************************************************************************** - * Determine the maximum length of SIMD vector supported by this JIT. + * Get the maximum width, in bytes, that Vector is allowed to be. */ - -unsigned CILJit::getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags) +unsigned CILJit::getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) { JitFlags jitFlags; jitFlags.SetFromFlags(cpuCompileFlags); -#ifdef FEATURE_SIMD +#if defined(FEATURE_SIMD) + CORINFO_InstructionSetFlags instructionSetFlags = cpuCompileFlags.GetInstructionSetFlags(); + #if defined(TARGET_XARCH) - if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && - jitFlags.GetInstructionSetFlags().HasInstructionSet(InstructionSet_AVX2)) + if (instructionSetFlags.HasInstructionSet(InstructionSet_VectorT256)) { - if (GetJitTls() != nullptr && JitTls::GetCompiler() != nullptr) + if ((GetJitTls() != nullptr) && (JitTls::GetCompiler() != nullptr)) { - JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 32\n"); + JITDUMP("getMaxVectorTBitWidth: returning 256\n"); } - return 32; + return 256; } #endif // defined(TARGET_XARCH) - if (GetJitTls() != nullptr && JitTls::GetCompiler() != nullptr) + assert(instructionSetFlags.HasInstructionSet(InstructionSet_VectorT128)); + + if ((GetJitTls() != nullptr) && (JitTls::GetCompiler() != nullptr)) { - JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 16\n"); + JITDUMP("getMaxVectorTBitWidth: returning 128\n"); } - return 16; + return 128; #else // !FEATURE_SIMD - if (GetJitTls() != nullptr && JitTls::GetCompiler() != nullptr) + if ((GetJitTls() != nullptr) && (JitTls::GetCompiler() != nullptr)) { - JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 0\n"); + JITDUMP("getMaxVectorTBitWidth: returning 0\n"); } return 0; #endif // !FEATURE_SIMD @@ -1430,3 +1432,17 @@ unsigned Compiler::eeTryGetClassSize(CORINFO_CLASS_HANDLE clsHnd) } #endif // !DEBUG + +#if defined(TARGET_XARCH) + //------------------------------------------------------------------------ + // eeGetXarchCpuInfo: Gets the XARCH CPU information for the JIT + // + // Arguments: + // xarchCpuInfoPtr -- pointer to the struct that recieves the cpu info + // + void Compiler::eeGetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) + { + info.compCompHnd->getXarchCpuInfo(xarchCpuInfoPtr); + } +#endif // TARGET_XARCH + diff --git a/src/coreclr/jit/ee_il_dll.hpp b/src/coreclr/jit/ee_il_dll.hpp index 162d5479dff49a..d26bbec8ce8fac 100644 --- a/src/coreclr/jit/ee_il_dll.hpp +++ b/src/coreclr/jit/ee_il_dll.hpp @@ -17,7 +17,7 @@ class CILJit : public ICorJitCompiler void getVersionIdentifier(GUID* versionIdentifier /* OUT */ ); - unsigned getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags); + unsigned getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags); void setTargetOS(CORINFO_OS os); }; diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 9959e39a41b1b8..0cfa182757c3e1 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -745,23 +745,28 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); - if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES) + uint32_t vectorTByteLength = getVectorTByteLength(); + + if (vectorTByteLength == YMM_REGSIZE_BYTES) { // Vector is TYP_SIMD32, so we should treat this as a call to Vector128.ToVector256 return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType, simdSize); } + else if (vectorTByteLength == XMM_REGSIZE_BYTES) + { + // We fold away the cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both the same TYP_SIMD. - assert(getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES); - - // We fold away the cast here, as it only exists to satisfy - // the type system. It is safe to do this here since the retNode type - // and the signature return type are both the same TYP_SIMD. - - retNode = impSIMDPopStack(); - SetOpLclRelatedToSIMDIntrinsic(retNode); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); - + retNode = impSIMDPopStack(); + SetOpLclRelatedToSIMDIntrinsic(retNode); + assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + } + else + { + assert(vectorTByteLength == 0); + } break; } @@ -873,7 +878,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); - if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES) + uint32_t vectorTByteLength = getVectorTByteLength(); + + if (vectorTByteLength == YMM_REGSIZE_BYTES) { // We fold away the cast here, as it only exists to satisfy // the type system. It is safe to do this here since the retNode type @@ -885,27 +892,30 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - - assert(getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES); - - if (compExactlyDependsOn(InstructionSet_AVX)) + else if (vectorTByteLength == XMM_REGSIZE_BYTES) { - // We support Vector256 but Vector is only 16-bytes, so we should - // treat this method as a call to Vector256.GetLower or Vector128.ToVector256 - - if (intrinsic == NI_Vector256_AsVector) - { - return impSpecialIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig, simdBaseJitType, retType, - simdSize); - } - else + if (compExactlyDependsOn(InstructionSet_AVX)) { - assert(intrinsic == NI_Vector256_AsVector256); - return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType, - 16); + // We support Vector256 but Vector is only 16-bytes, so we should + // treat this method as a call to Vector256.GetLower or Vector128.ToVector256 + + if (intrinsic == NI_Vector256_AsVector) + { + return impSpecialIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig, simdBaseJitType, retType, + simdSize); + } + else + { + assert(intrinsic == NI_Vector256_AsVector256); + return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType, + 16); + } } } - + else + { + assert(vectorTByteLength == 0); + } break; } @@ -914,7 +924,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); - if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES) + uint32_t vectorTByteLength = getVectorTByteLength(); + + if (vectorTByteLength == YMM_REGSIZE_BYTES) { assert(IsBaselineVector512IsaSupported()); // We support Vector512 but Vector is only 32-bytes, so we should @@ -933,25 +945,30 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } break; } - - assert(getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES); - if (compExactlyDependsOn(InstructionSet_AVX512F)) + else if (vectorTByteLength == XMM_REGSIZE_BYTES) { - // We support Vector512 but Vector is only 16-bytes, so we should - // treat this method as a call to Vector512.GetLower128 or Vector128.ToVector512 - - if (intrinsic == NI_Vector512_AsVector) + if (compExactlyDependsOn(InstructionSet_AVX512F)) { - return impSpecialIntrinsic(NI_Vector512_GetLower128, clsHnd, method, sig, simdBaseJitType, retType, - simdSize); - } - else - { - assert(intrinsic == NI_Vector512_AsVector512); - return impSpecialIntrinsic(NI_Vector128_ToVector512, clsHnd, method, sig, simdBaseJitType, retType, - 16); + // We support Vector512 but Vector is only 16-bytes, so we should + // treat this method as a call to Vector512.GetLower128 or Vector128.ToVector512 + + if (intrinsic == NI_Vector512_AsVector) + { + return impSpecialIntrinsic(NI_Vector512_GetLower128, clsHnd, method, sig, simdBaseJitType, retType, + simdSize); + } + else + { + assert(intrinsic == NI_Vector512_AsVector512); + return impSpecialIntrinsic(NI_Vector128_ToVector512, clsHnd, method, sig, simdBaseJitType, retType, + 16); + } } } + else + { + assert(vectorTByteLength == 0); + } break; } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index ce7d2f286db33e..4707bdd5d1f966 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -8270,10 +8270,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) CORINFO_SIG_INFO sig; info.compCompHnd->getMethodSig(method, &sig); - int sizeOfVectorT = getSIMDVectorRegisterByteLength(); - - result = SimdAsHWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName, - sizeOfVectorT); + result = SimdAsHWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName); #endif // FEATURE_HW_INTRINSICS if (result == NI_Illegal) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 578edeb50ccb51..ed8a9c3bf8ac67 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -309,10 +309,18 @@ CONFIG_INTEGER(JitStressEvexEncoding, W("JitStressEvexEncoding"), 0) // Enable E // clang-format off +CONFIG_INTEGER(PreferredVectorBitWidth, W("PreferredVectorBitWidth"), 0) // The preferred width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default. + // // Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h // -CONFIG_INTEGER(EnableHWIntrinsic, W("EnableHWIntrinsic"), 1) // Allows Base+ hardware intrinsics to be disabled +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +//TODO: should implement LoongArch64's features. +//TODO-RISCV64-CQ: should implement RISCV64's features. +CONFIG_INTEGER(EnableHWIntrinsic, W("EnableHWIntrinsic"), 0) // Allows Base+ hardware intrinsics to be disabled +#else +CONFIG_INTEGER(EnableHWIntrinsic, W("EnableHWIntrinsic"), 1) // Allows Base+ hardware intrinsics to be disabled +#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(TARGET_AMD64) || defined(TARGET_X86) CONFIG_INTEGER(EnableAES, W("EnableAES"), 1) // Allows AES+ hardware intrinsics to be disabled diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index b55719f56e8250..40373485c127db 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -227,6 +227,6 @@ class JitFlags } private: - unsigned __int64 m_jitFlags; + uint64_t m_jitFlags; CORINFO_InstructionSetFlags m_instructionSetFlags; }; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 60dc1828c1a4cd..4f445b1cbf78db 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1732,8 +1732,8 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE structPromotionInfo = lvaStructPromotionInfo(typeHnd); #if defined(FEATURE_SIMD) - // maxSIMDStructBytes() represents the size of the largest primitive type that we can struct promote. - const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * compiler->maxSIMDStructBytes(); + // getMaxVectorByteLength() represents the size of the largest primitive type that we can struct promote. + const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * compiler->getMaxVectorByteLength(); #else // !FEATURE_SIMD // sizeof(double) represents the size of the largest primitive type that we can struct promote. const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * sizeof(double); diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 88e1dd6fd14def..26f2f84442d406 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -233,8 +233,6 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { JITDUMP(" Found type Vector\n"); m_simdHandleCache->VectorHandle = typeHnd; - - size = getSIMDVectorRegisterByteLength(); break; } @@ -299,8 +297,12 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH } JITDUMP(" Found Vector<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType))); + size = getVectorTByteLength(); - size = getSIMDVectorRegisterByteLength(); + if (size == 0) + { + return CORINFO_TYPE_UNDEF; + } break; } diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 5136b4abcd0a1e..c7b4950c5525dc 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -44,10 +44,11 @@ const SimdAsHWIntrinsicInfo& SimdAsHWIntrinsicInfo::lookup(NamedIntrinsic id) // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet // // Arguments: +// comp -- The compiler +// sig -- The signature of the intrinsic // className -- The name of the class associated with the SimdIntrinsic to lookup // methodName -- The name of the method associated with the SimdIntrinsic to lookup // enclosingClassName -- The name of the enclosing class -// sizeOfVectorT -- The size of Vector in bytes // // Return Value: // The NamedIntrinsic associated with methodName and classId @@ -55,10 +56,9 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp, CORINFO_SIG_INFO* sig, const char* className, const char* methodName, - const char* enclosingClassName, - int sizeOfVectorT) + const char* enclosingClassName) { - SimdAsHWIntrinsicClassId classId = lookupClassId(className, enclosingClassName, sizeOfVectorT); + SimdAsHWIntrinsicClassId classId = lookupClassId(comp, className, enclosingClassName); if (classId == SimdAsHWIntrinsicClassId::Unknown) { @@ -74,11 +74,42 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp, isInstanceMethod = true; } - if (strcmp(methodName, "get_IsHardwareAccelerated") == 0) + if (classId == SimdAsHWIntrinsicClassId::Vector) { - return comp->IsBaselineSimdIsaSupported() ? NI_IsSupported_True : NI_IsSupported_False; + // We want to avoid doing anything that would unnecessarily trigger a recorded dependency against Vector + // so we duplicate a few checks here to ensure this works smoothly for the static Vector class. + + assert(!isInstanceMethod); + + if (strcmp(methodName, "get_IsHardwareAccelerated") == 0) + { + return comp->IsBaselineSimdIsaSupported() ? NI_IsSupported_True : NI_IsSupported_False; + } + + var_types retType = JITtype2varType(sig->retType); + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + if (retType == TYP_STRUCT) + { + argClass = sig->retTypeSigClass; + } + else if (numArgs != 0) + { + argClass = comp->info.compCompHnd->getArgClass(sig, sig->args); + } + + const char* argNamespaceName; + const char* argClassName = comp->getClassNameFromMetadata(argClass, &argNamespaceName); + + classId = lookupClassId(comp, argClassName, nullptr); + + assert(classId != SimdAsHWIntrinsicClassId::Unknown); + assert(classId != SimdAsHWIntrinsicClassId::Vector); } + assert(strcmp(methodName, "get_IsHardwareAccelerated") != 0); + for (int i = 0; i < (NI_SIMD_AS_HWINTRINSIC_END - NI_SIMD_AS_HWINTRINSIC_START - 1); i++) { const SimdAsHWIntrinsicInfo& intrinsicInfo = simdAsHWIntrinsicInfoArray[i]; @@ -113,15 +144,15 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp, // lookupClassId: Gets the SimdAsHWIntrinsicClassId for a given class name and enclsoing class name // // Arguments: +// comp -- The compiler // className -- The name of the class associated with the SimdAsHWIntrinsicClassId to lookup // enclosingClassName -- The name of the enclosing class -// sizeOfVectorT -- The size of Vector in bytes // // Return Value: // The SimdAsHWIntrinsicClassId associated with className and enclosingClassName -SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* className, - const char* enclosingClassName, - int sizeOfVectorT) +SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(Compiler* comp, + const char* className, + const char* enclosingClassName) { assert(className != nullptr); @@ -159,7 +190,11 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* classN className += 6; - if (strcmp(className, "2") == 0) + if (className[0] == '\0') + { + return SimdAsHWIntrinsicClassId::Vector; + } + else if (strcmp(className, "2") == 0) { return SimdAsHWIntrinsicClassId::Vector2; } @@ -171,17 +206,25 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* classN { return SimdAsHWIntrinsicClassId::Vector4; } - else if ((className[0] == '\0') || (strcmp(className, "`1") == 0)) + else if (strcmp(className, "`1") == 0) { + uint32_t vectorTByteLength = comp->getVectorTByteLength(); + #if defined(TARGET_XARCH) - if (sizeOfVectorT == 32) + if (vectorTByteLength == 32) { return SimdAsHWIntrinsicClassId::VectorT256; } #endif // TARGET_XARCH - assert(sizeOfVectorT == 16); - return SimdAsHWIntrinsicClassId::VectorT128; + if (vectorTByteLength == 16) + { + return SimdAsHWIntrinsicClassId::VectorT128; + } + else + { + return SimdAsHWIntrinsicClassId::Unknown; + } } break; } @@ -654,6 +697,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } + case NI_Quaternion_WithElement: + case NI_Vector2_WithElement: + case NI_Vector3_WithElement: + case NI_Vector4_WithElement: case NI_VectorT128_WithElement: case NI_VectorT256_WithElement: { @@ -735,6 +782,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } + case NI_Quaternion_WithElement: + case NI_Vector2_WithElement: + case NI_Vector3_WithElement: + case NI_Vector4_WithElement: case NI_VectorT128_WithElement: { assert(numArgs == 3); @@ -1467,9 +1518,13 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } case NI_Quaternion_get_Item: + case NI_Quaternion_GetElement: case NI_Vector2_get_Item: + case NI_Vector2_GetElement: case NI_Vector3_get_Item: + case NI_Vector3_GetElement: case NI_Vector4_get_Item: + case NI_Vector4_GetElement: case NI_VectorT128_get_Item: case NI_VectorT128_GetElement: #if defined(TARGET_XARCH) @@ -1969,6 +2024,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } + case NI_Quaternion_WithElement: + case NI_Vector2_WithElement: + case NI_Vector3_WithElement: + case NI_Vector4_WithElement: case NI_VectorT128_WithElement: #if defined(TARGET_XARCH) case NI_VectorT256_WithElement: diff --git a/src/coreclr/jit/simdashwintrinsic.h b/src/coreclr/jit/simdashwintrinsic.h index 7bce4330ae6ade..9cec169acb8ce6 100644 --- a/src/coreclr/jit/simdashwintrinsic.h +++ b/src/coreclr/jit/simdashwintrinsic.h @@ -9,6 +9,7 @@ enum class SimdAsHWIntrinsicClassId Unknown, Plane, Quaternion, + Vector, Vector2, Vector3, Vector4, @@ -77,11 +78,10 @@ struct SimdAsHWIntrinsicInfo CORINFO_SIG_INFO* sig, const char* className, const char* methodName, - const char* enclosingClassName, - int sizeOfVectorT); - static SimdAsHWIntrinsicClassId lookupClassId(const char* className, - const char* enclosingClassName, - int sizeOfVectorT); + const char* enclosingClassName); + static SimdAsHWIntrinsicClassId lookupClassId(Compiler* comp, + const char* className, + const char* enclosingClassName); // Member lookup diff --git a/src/coreclr/jit/simdashwintrinsiclistarm64.h b/src/coreclr/jit/simdashwintrinsiclistarm64.h index 83b5a1138c92c0..344fb7ebc6c322 100644 --- a/src/coreclr/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/jit/simdashwintrinsiclistarm64.h @@ -58,6 +58,7 @@ SIMD_AS_HWINTRINSIC_ID(Quaternion, Dot, SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Identity, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Identity, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Item, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Item, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg) SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Quaternion, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Inverse, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Inverse, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Quaternion, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) @@ -71,6 +72,7 @@ SIMD_AS_HWINTRINSIC_ID(Quaternion, op_Multiply, SIMD_AS_HWINTRINSIC_ID(Quaternion, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_op_Subtraction, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Quaternion, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -91,6 +93,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, SIMD_AS_HWINTRINSIC_ID(Vector2, get_UnitX, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_UnitX, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_UnitY, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_UnitY, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -108,6 +111,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector2, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector2, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -130,6 +134,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitX, SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitY, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_UnitY, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitZ, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_UnitZ, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -147,6 +152,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector3, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector3, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -171,6 +177,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitY, SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitZ, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_UnitZ, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitW, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_UnitW, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -188,6 +195,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector4, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector4, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* diff --git a/src/coreclr/jit/simdashwintrinsiclistxarch.h b/src/coreclr/jit/simdashwintrinsiclistxarch.h index 5756db4798648c..598712e86254d4 100644 --- a/src/coreclr/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/jit/simdashwintrinsiclistxarch.h @@ -58,6 +58,7 @@ SIMD_AS_HWINTRINSIC_ID(Quaternion, Dot, SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Identity, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Identity, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Item, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Item, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg) SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Quaternion, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Inverse, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Inverse, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Quaternion, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) @@ -71,6 +72,7 @@ SIMD_AS_HWINTRINSIC_ID(Quaternion, op_Multiply, SIMD_AS_HWINTRINSIC_ID(Quaternion, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_op_Subtraction, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Quaternion, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -91,6 +93,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, SIMD_AS_HWINTRINSIC_ID(Vector2, get_UnitX, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_UnitX, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_UnitY, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_UnitY, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -108,6 +111,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector2, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector2, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -130,6 +134,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitX, SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitY, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_UnitY, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitZ, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_UnitZ, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -147,6 +152,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector3, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector3, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -171,6 +177,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitY, SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitZ, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_UnitZ, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitW, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_UnitW, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -188,6 +195,7 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector4, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector4, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets index fbcadcc71b5977..294e496c7ff592 100644 --- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets +++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets @@ -228,6 +228,7 @@ The .NET Foundation licenses this file to you under the MIT license. + diff --git a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h index 67ba547488e960..ad7d2e11ee69ac 100644 --- a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h +++ b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h @@ -34,6 +34,7 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Avx512dq_vl = 0x400000, XArchIntrinsicConstants_Avx512Vbmi = 0x800000, XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000, + XArchIntrinsicConstants_Serialize = 0x2000000, }; #endif //HOST_X86 || HOST_AMD64 diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index 177e6c792cd0b6..bde9e3d50a08bb 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -197,51 +197,57 @@ bool DetectCPUFeatures() { __cpuid(cpuidInfo, 0x00000001); - if (((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0) && ((cpuidInfo[CPUID_EDX] & (1 << 26)) != 0)) // SSE & SSE2 + const int requiredBaselineEdxFlags = (1 << 25) // SSE + | (1 << 26); // SSE2 + + if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags) { - if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI + if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI { g_cpuFeatures |= XArchIntrinsicConstants_Aes; } - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ { g_cpuFeatures |= XArchIntrinsicConstants_Pclmulqdq; } - if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 + if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 { g_cpuFeatures |= XArchIntrinsicConstants_Sse3; - if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 + if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 { g_cpuFeatures |= XArchIntrinsicConstants_Ssse3; - if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 + if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 { g_cpuFeatures |= XArchIntrinsicConstants_Sse41; - if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 + if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 { g_cpuFeatures |= XArchIntrinsicConstants_Sse42; - if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE + if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE { g_cpuFeatures |= XArchIntrinsicConstants_Movbe; } - if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT + if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT { g_cpuFeatures |= XArchIntrinsicConstants_Popcnt; } - if (((cpuidInfo[CPUID_ECX] & (1 << 27)) != 0) && ((cpuidInfo[CPUID_ECX] & (1 << 28)) != 0)) // OSXSAVE & AVX + const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE + | (1 << 28); // AVX + + if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) { - if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1)) + if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 { g_cpuFeatures |= XArchIntrinsicConstants_Avx; - if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA + if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA { g_cpuFeatures |= XArchIntrinsicConstants_Fma; } @@ -250,66 +256,67 @@ bool DetectCPUFeatures() { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 + if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 { g_cpuFeatures |= XArchIntrinsicConstants_Avx2; - __cpuidex(cpuidInfo, 0x00000007, 0x00000001); - if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI - { - g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni; - } - - if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 + if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F + if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F { g_cpuFeatures |= XArchIntrinsicConstants_Avx512f; bool isAVX512_VLSupported = false; - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL + if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL { g_cpuFeatures |= XArchIntrinsicConstants_Avx512f_vl; isAVX512_VLSupported = true; } - if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW + if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW { g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw; - if (isAVX512_VLSupported) + if (isAVX512_VLSupported) // AVX512BW_VL { g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw_vl; } } - if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD + if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD { g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd; - if (isAVX512_VLSupported) + if (isAVX512_VLSupported) // AVX512CD_VL { g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd_vl; } } - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ + if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ { g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq; - if (isAVX512_VLSupported) + if (isAVX512_VLSupported) // AVX512DQ_VL { g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq_vl; } } - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI { g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi; - if (isAVX512_VLSupported) + if (isAVX512_VLSupported) // AVX512VBMI_VL { g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi_vl; } } } } + + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + + if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI + { + g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni; + } } } } @@ -324,15 +331,20 @@ bool DetectCPUFeatures() { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 + if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 { g_cpuFeatures |= XArchIntrinsicConstants_Bmi1; } - if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 + if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 { g_cpuFeatures |= XArchIntrinsicConstants_Bmi2; } + + if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0) + { + g_cpuFeatures |= XArchIntrinsicConstants_Serialize; // SERIALIZE + } } } @@ -343,7 +355,7 @@ bool DetectCPUFeatures() { __cpuid(cpuidInfo, 0x80000001); - if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT + if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT { g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt; } diff --git a/src/coreclr/nativeaot/docs/optimizing.md b/src/coreclr/nativeaot/docs/optimizing.md index 9f2c9ae70d12a8..9a90eb97c21286 100644 --- a/src/coreclr/nativeaot/docs/optimizing.md +++ b/src/coreclr/nativeaot/docs/optimizing.md @@ -40,4 +40,5 @@ Since `PublishTrimmed` is implied to be true with Native AOT, some framework fea * `Speed`: when generating optimized code, favor code execution speed. * `Size`: when generating optimized code, favor smaller code size. * ``: By default, the compiler targets the minimum instruction set supported by the target OS and architecture. This option allows targeting newer instruction sets for better performance. The native binary will require the instruction sets to be supported by the hardware in order to run. For example, `avx2,bmi2,fma,pclmul,popcnt,aes` will produce binary that takes advantage of instruction sets that are typically present on current Intel and AMD processors. Run `ilc --help` for the full list of available instruction sets. `ilc` can be executed from the NativeAOT package in your local nuget cache e.g. `%USERPROFILE%\.nuget\packages\runtime.win-x64.microsoft.dotnet.ilcompiler\8.0.0-...\tools\ilc.exe` on Windows or `~/.nuget/packages/runtime.linux-arm64.microsoft.dotnet.ilcompiler/8.0.0-.../tools/ilc` on Linux. +* ``: By default, the compiler targets the a `Vector` size of `16` or `32` bytes, depending on the underlying instruction sets supported. This option allows specifying a different maximum bit width. For example, if by default on x64 hardware `Vector` will be 16-bytes. However, if `AVX2` is targeted then `Vector` will automatically grow to be 32-bytes instead, setting `128` would keep the size as 16-bytes. Alternatively, even if `AVX512F` is targeted then by default `Vector` will not grow larger than 32-bytes, setting `512` would allow it to grow to 64-bytes. diff --git a/src/coreclr/pal/src/misc/jitsupport.cpp b/src/coreclr/pal/src/misc/jitsupport.cpp index f7ca5c36e71217..fbf94ad76b0ac4 100644 --- a/src/coreclr/pal/src/misc/jitsupport.cpp +++ b/src/coreclr/pal/src/misc/jitsupport.cpp @@ -235,7 +235,10 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags) #endif #ifdef HWCAP_ASIMD if (hwCap & HWCAP_ASIMD) + { flags->Set(InstructionSet_AdvSimd); + flags->Set(InstructionSet_VectorT128); + } #endif #ifdef HWCAP_ASIMDRDM if (hwCap & HWCAP_ASIMDRDM) diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 6d124bda4673c6..53bd63d23c3bfb 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -96,18 +96,33 @@ public SimdVectorLength GetVectorTSimdVector() { if ((_targetArchitecture == TargetArchitecture.X64) || (_targetArchitecture == TargetArchitecture.X86)) { - Debug.Assert(InstructionSet.X64_AVX2 == InstructionSet.X86_AVX2); - Debug.Assert(InstructionSet.X64_SSE2 == InstructionSet.X86_SSE2); - if (IsInstructionSetSupported(InstructionSet.X86_AVX2)) + Debug.Assert(InstructionSet.X64_VectorT512 == InstructionSet.X86_VectorT512); + Debug.Assert(InstructionSet.X64_VectorT256 == InstructionSet.X86_VectorT256); + Debug.Assert(InstructionSet.X64_VectorT128 == InstructionSet.X86_VectorT128); + + if (IsInstructionSetSupported(InstructionSet.X64_VectorT256)) + { return SimdVectorLength.Vector256Bit; - else if (IsInstructionSetExplicitlyUnsupported(InstructionSet.X86_AVX2) && IsInstructionSetSupported(InstructionSet.X64_SSE2)) + } + else if (IsInstructionSetSupported(InstructionSet.X64_VectorT128)) + { return SimdVectorLength.Vector128Bit; + } else + { return SimdVectorLength.None; + } } else if (_targetArchitecture == TargetArchitecture.ARM64) { - return SimdVectorLength.Vector128Bit; + if (IsInstructionSetSupported(InstructionSet.ARM64_VectorT128)) + { + return SimdVectorLength.Vector128Bit; + } + else + { + return SimdVectorLength.None; + } } else if (_targetArchitecture == TargetArchitecture.ARM) { @@ -245,9 +260,10 @@ public bool RemoveInstructionSetSupport(string instructionSet) /// Seal modifications to instruction set support /// /// returns "false" if instruction set isn't valid on this architecture - public bool ComputeInstructionSetFlags(out InstructionSetFlags supportedInstructionSets, - out InstructionSetFlags unsupportedInstructionSets, - Action invalidInstructionSetImplication) + public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, + out InstructionSetFlags supportedInstructionSets, + out InstructionSetFlags unsupportedInstructionSets, + Action invalidInstructionSetImplication) { supportedInstructionSets = new InstructionSetFlags(); unsupportedInstructionSets = new InstructionSetFlags(); @@ -288,6 +304,55 @@ public bool ComputeInstructionSetFlags(out InstructionSetFlags supportedInstruct } } + switch (_architecture) + { + case TargetArchitecture.X64: + case TargetArchitecture.X86: + { + Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F); + Debug.Assert(InstructionSet.X86_AVX2 == InstructionSet.X64_AVX2); + Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2); + + Debug.Assert(InstructionSet.X86_VectorT512 == InstructionSet.X64_VectorT512); + Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256); + Debug.Assert(InstructionSet.X86_VectorT128 == InstructionSet.X64_VectorT128); + + // Unlike for the JIT, we cannot default to enabling Vector to the below sizes + // as it may fail to launch in the case where `--verify-type-and-field-layout` + // was specified. So instead, only enable Vector when we have an explicit width. + + if (maxVectorTBitWidth >= 128) + { + supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); + } + + if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX2)) + { + if (maxVectorTBitWidth >= 256) + { + supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256); + } + + if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX512F)) + { + if (maxVectorTBitWidth >= 512) + { + supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512); + } + } + } + break; + } + + case TargetArchitecture.ARM64: + { + Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.ARM64_AdvSimd)); + supportedInstructionSets.AddInstructionSet(InstructionSet.ARM64_VectorT128); + break; + } + } + + return true; } } diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 8aa5705916dede..f6c4f3a6e1d26a 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -11,7 +11,7 @@ namespace System.CommandLine { internal static partial class Helpers { - public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, TargetArchitecture targetArchitecture, TargetOS targetOS, + public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, int maxVectorTBitWidth, TargetArchitecture targetArchitecture, TargetOS targetOS, string mustNotBeMessage, string invalidImplicationMessage) { InstructionSetSupportBuilder instructionSetSupportBuilder = new(targetArchitecture); @@ -74,7 +74,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru } } - instructionSetSupportBuilder.ComputeInstructionSetFlags(out var supportedInstructionSet, out var unsupportedInstructionSet, + instructionSetSupportBuilder.ComputeInstructionSetFlags(maxVectorTBitWidth, out var supportedInstructionSet, out var unsupportedInstructionSet, (string specifiedInstructionSet, string impliedInstructionSet) => throw new CommandLineException(string.Format(invalidImplicationMessage, specifiedInstructionSet, impliedInstructionSet))); @@ -93,16 +93,33 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("movbe"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("popcnt"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lzcnt"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("serialize"); // If AVX was enabled, we can opportunistically enable instruction sets which use the VEX encodings Debug.Assert(InstructionSet.X64_AVX == InstructionSet.X86_AVX); if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX)) { + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("fma"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni"); } + + Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F); + if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F)) + { + Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F_VL)); + Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW)); + Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)); + Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD)); + Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)); + Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ)); + Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)); + + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi_vl"); + } } else if (targetArchitecture == TargetArchitecture.ARM64) { @@ -111,9 +128,12 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha1"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("dotprod"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rdma"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc"); } - optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _, + optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(maxVectorTBitWidth, out var optimisticInstructionSet, out _, (string specifiedInstructionSet, string impliedInstructionSet) => throw new NotSupportedException()); optimisticInstructionSet.Remove(unsupportedInstructionSet); optimisticInstructionSet.Add(supportedInstructionSet); diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index b387eedd89f2b7..39f01526068b31 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -42,6 +42,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.ARM64_Atomics: return ReadyToRunInstructionSet.Atomics; case InstructionSet.ARM64_Vector64: return null; case InstructionSet.ARM64_Vector128: return null; + case InstructionSet.ARM64_VectorT128: return null; case InstructionSet.ARM64_Dczva: return null; case InstructionSet.ARM64_Rcpc: return ReadyToRunInstructionSet.Rcpc; @@ -88,6 +89,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_Vector128: return null; case InstructionSet.X64_Vector256: return null; case InstructionSet.X64_Vector512: return null; + case InstructionSet.X64_VectorT128: return null; + case InstructionSet.X64_VectorT256: return null; + case InstructionSet.X64_VectorT512: return null; case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X64_MOVBE: return ReadyToRunInstructionSet.Movbe; @@ -158,6 +162,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_Vector128: return null; case InstructionSet.X86_Vector256: return null; case InstructionSet.X86_Vector512: return null; + case InstructionSet.X86_VectorT128: return null; + case InstructionSet.X86_VectorT256: return null; + case InstructionSet.X86_VectorT512: return null; case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X86_AVXVNNI_X64: return null; case InstructionSet.X86_MOVBE: return ReadyToRunInstructionSet.Movbe; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index 6d3806f2987682..a77622bb7acb3d 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -149,7 +149,7 @@ private static extern CorJitResult JitCompileMethod(out IntPtr exception, ref CORINFO_METHOD_INFO info, uint flags, out IntPtr nativeEntry, out uint codeSize); [DllImport(JitSupportLibrary)] - private static extern uint GetMaxIntrinsicSIMDVectorLength(IntPtr jit, CORJIT_FLAGS* flags); + private static extern uint GetMaxVectorTBitWidth(IntPtr jit, CORJIT_FLAGS* flags); [DllImport(JitSupportLibrary)] private static extern IntPtr AllocException([MarshalAs(UnmanagedType.LPWStr)]string message, int messageLength); @@ -3263,6 +3263,14 @@ private uint getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_STRUCT_* cls) return RISCV64PassStructInRegister.GetRISCV64PassStructInRegisterFlags(typeDesc); } +#pragma warning disable CA1822 // Mark members as static + private void getXarchCpuInfo(ref CORINFO_XARCH_CPU xarchCpuInfoPtr) + { + // We can't assume a CPU for AOT compilation so return the default + xarchCpuInfoPtr = default; + } +#pragma warning restore CA1822 // Mark members as static + private uint getThreadTLSIndex(ref void* ppIndirection) { throw new NotImplementedException("getThreadTLSIndex"); } private void* getInlinedCallFrameVptr(ref void* ppIndirection) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index 3c7f632ef8e7b0..b9ef19be411995 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -1964,6 +1964,20 @@ private static uint _getThreadTLSIndex(IntPtr thisHandle, IntPtr* ppException, v } } + [UnmanagedCallersOnly] + private static void _getXarchCpuInfo(IntPtr thisHandle, IntPtr* ppException, CORINFO_XARCH_CPU* xarchCpuInfoPtr) + { + var _this = GetThis(thisHandle); + try + { + _this.getXarchCpuInfo(ref *xarchCpuInfoPtr); + } + catch (Exception ex) + { + *ppException = _this.AllocException(ex); + } + } + [UnmanagedCallersOnly] private static void* _getInlinedCallFrameVptr(IntPtr thisHandle, IntPtr* ppException, void** ppIndirection) { @@ -2730,7 +2744,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_ private static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 184); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 185); callbacks[0] = (delegate* unmanaged)&_isIntrinsic; callbacks[1] = (delegate* unmanaged)&_getMethodAttribs; @@ -2864,58 +2878,59 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[129] = (delegate* unmanaged)&_getLoongArch64PassStructInRegisterFlags; callbacks[130] = (delegate* unmanaged)&_getRISCV64PassStructInRegisterFlags; callbacks[131] = (delegate* unmanaged)&_getThreadTLSIndex; - callbacks[132] = (delegate* unmanaged)&_getInlinedCallFrameVptr; - callbacks[133] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; - callbacks[134] = (delegate* unmanaged)&_getHelperFtn; - callbacks[135] = (delegate* unmanaged)&_getFunctionEntryPoint; - callbacks[136] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; - callbacks[137] = (delegate* unmanaged)&_getMethodSync; - callbacks[138] = (delegate* unmanaged)&_getLazyStringLiteralHelper; - callbacks[139] = (delegate* unmanaged)&_embedModuleHandle; - callbacks[140] = (delegate* unmanaged)&_embedClassHandle; - callbacks[141] = (delegate* unmanaged)&_embedMethodHandle; - callbacks[142] = (delegate* unmanaged)&_embedFieldHandle; - callbacks[143] = (delegate* unmanaged)&_embedGenericHandle; - callbacks[144] = (delegate* unmanaged)&_getLocationOfThisType; - callbacks[145] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; - callbacks[146] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; - callbacks[147] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; - callbacks[148] = (delegate* unmanaged)&_getJustMyCodeHandle; - callbacks[149] = (delegate* unmanaged)&_GetProfilingHandle; - callbacks[150] = (delegate* unmanaged)&_getCallInfo; - callbacks[151] = (delegate* unmanaged)&_canAccessFamily; - callbacks[152] = (delegate* unmanaged)&_isRIDClassDomainID; - callbacks[153] = (delegate* unmanaged)&_getClassDomainID; - callbacks[154] = (delegate* unmanaged)&_getStaticFieldContent; - callbacks[155] = (delegate* unmanaged)&_getObjectContent; - callbacks[156] = (delegate* unmanaged)&_getStaticFieldCurrentClass; - callbacks[157] = (delegate* unmanaged)&_getVarArgsHandle; - callbacks[158] = (delegate* unmanaged)&_canGetVarArgsHandle; - callbacks[159] = (delegate* unmanaged)&_constructStringLiteral; - callbacks[160] = (delegate* unmanaged)&_emptyStringLiteral; - callbacks[161] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; - callbacks[162] = (delegate* unmanaged)&_GetDelegateCtor; - callbacks[163] = (delegate* unmanaged)&_MethodCompileComplete; - callbacks[164] = (delegate* unmanaged)&_getTailCallHelpers; - callbacks[165] = (delegate* unmanaged)&_convertPInvokeCalliToCall; - callbacks[166] = (delegate* unmanaged)&_notifyInstructionSetUsage; - callbacks[167] = (delegate* unmanaged)&_updateEntryPointForTailCall; - callbacks[168] = (delegate* unmanaged)&_allocMem; - callbacks[169] = (delegate* unmanaged)&_reserveUnwindInfo; - callbacks[170] = (delegate* unmanaged)&_allocUnwindInfo; - callbacks[171] = (delegate* unmanaged)&_allocGCInfo; - callbacks[172] = (delegate* unmanaged)&_setEHcount; - callbacks[173] = (delegate* unmanaged)&_setEHinfo; - callbacks[174] = (delegate* unmanaged)&_logMsg; - callbacks[175] = (delegate* unmanaged)&_doAssert; - callbacks[176] = (delegate* unmanaged)&_reportFatalError; - callbacks[177] = (delegate* unmanaged)&_getPgoInstrumentationResults; - callbacks[178] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; - callbacks[179] = (delegate* unmanaged)&_recordCallSite; - callbacks[180] = (delegate* unmanaged)&_recordRelocation; - callbacks[181] = (delegate* unmanaged)&_getRelocTypeHint; - callbacks[182] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[183] = (delegate* unmanaged)&_getJitFlags; + callbacks[132] = (delegate* unmanaged)&_getXarchCpuInfo; + callbacks[133] = (delegate* unmanaged)&_getInlinedCallFrameVptr; + callbacks[134] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; + callbacks[135] = (delegate* unmanaged)&_getHelperFtn; + callbacks[136] = (delegate* unmanaged)&_getFunctionEntryPoint; + callbacks[137] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; + callbacks[138] = (delegate* unmanaged)&_getMethodSync; + callbacks[139] = (delegate* unmanaged)&_getLazyStringLiteralHelper; + callbacks[140] = (delegate* unmanaged)&_embedModuleHandle; + callbacks[141] = (delegate* unmanaged)&_embedClassHandle; + callbacks[142] = (delegate* unmanaged)&_embedMethodHandle; + callbacks[143] = (delegate* unmanaged)&_embedFieldHandle; + callbacks[144] = (delegate* unmanaged)&_embedGenericHandle; + callbacks[145] = (delegate* unmanaged)&_getLocationOfThisType; + callbacks[146] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; + callbacks[147] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; + callbacks[148] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; + callbacks[149] = (delegate* unmanaged)&_getJustMyCodeHandle; + callbacks[150] = (delegate* unmanaged)&_GetProfilingHandle; + callbacks[151] = (delegate* unmanaged)&_getCallInfo; + callbacks[152] = (delegate* unmanaged)&_canAccessFamily; + callbacks[153] = (delegate* unmanaged)&_isRIDClassDomainID; + callbacks[154] = (delegate* unmanaged)&_getClassDomainID; + callbacks[155] = (delegate* unmanaged)&_getStaticFieldContent; + callbacks[156] = (delegate* unmanaged)&_getObjectContent; + callbacks[157] = (delegate* unmanaged)&_getStaticFieldCurrentClass; + callbacks[158] = (delegate* unmanaged)&_getVarArgsHandle; + callbacks[159] = (delegate* unmanaged)&_canGetVarArgsHandle; + callbacks[160] = (delegate* unmanaged)&_constructStringLiteral; + callbacks[161] = (delegate* unmanaged)&_emptyStringLiteral; + callbacks[162] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; + callbacks[163] = (delegate* unmanaged)&_GetDelegateCtor; + callbacks[164] = (delegate* unmanaged)&_MethodCompileComplete; + callbacks[165] = (delegate* unmanaged)&_getTailCallHelpers; + callbacks[166] = (delegate* unmanaged)&_convertPInvokeCalliToCall; + callbacks[167] = (delegate* unmanaged)&_notifyInstructionSetUsage; + callbacks[168] = (delegate* unmanaged)&_updateEntryPointForTailCall; + callbacks[169] = (delegate* unmanaged)&_allocMem; + callbacks[170] = (delegate* unmanaged)&_reserveUnwindInfo; + callbacks[171] = (delegate* unmanaged)&_allocUnwindInfo; + callbacks[172] = (delegate* unmanaged)&_allocGCInfo; + callbacks[173] = (delegate* unmanaged)&_setEHcount; + callbacks[174] = (delegate* unmanaged)&_setEHinfo; + callbacks[175] = (delegate* unmanaged)&_logMsg; + callbacks[176] = (delegate* unmanaged)&_doAssert; + callbacks[177] = (delegate* unmanaged)&_reportFatalError; + callbacks[178] = (delegate* unmanaged)&_getPgoInstrumentationResults; + callbacks[179] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; + callbacks[180] = (delegate* unmanaged)&_recordCallSite; + callbacks[181] = (delegate* unmanaged)&_recordRelocation; + callbacks[182] = (delegate* unmanaged)&_getRelocTypeHint; + callbacks[183] = (delegate* unmanaged)&_getExpectedTargetArchitecture; + callbacks[184] = (delegate* unmanaged)&_getJitFlags; return (IntPtr)callbacks; } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 73538c68172891..05debe11e908e4 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -15,7 +15,7 @@ namespace Internal.JitInterface public enum InstructionSet { ILLEGAL = 0, - NONE = 63, + NONE = 127, ARM64_ArmBase = InstructionSet_ARM64.ArmBase, ARM64_AdvSimd = InstructionSet_ARM64.AdvSimd, ARM64_Aes = InstructionSet_ARM64.Aes, @@ -27,6 +27,7 @@ public enum InstructionSet ARM64_Atomics = InstructionSet_ARM64.Atomics, ARM64_Vector64 = InstructionSet_ARM64.Vector64, ARM64_Vector128 = InstructionSet_ARM64.Vector128, + ARM64_VectorT128 = InstructionSet_ARM64.VectorT128, ARM64_Dczva = InstructionSet_ARM64.Dczva, ARM64_Rcpc = InstructionSet_ARM64.Rcpc, ARM64_ArmBase_Arm64 = InstructionSet_ARM64.ArmBase_Arm64, @@ -56,6 +57,9 @@ public enum InstructionSet X64_Vector128 = InstructionSet_X64.Vector128, X64_Vector256 = InstructionSet_X64.Vector256, X64_Vector512 = InstructionSet_X64.Vector512, + X64_VectorT128 = InstructionSet_X64.VectorT128, + X64_VectorT256 = InstructionSet_X64.VectorT256, + X64_VectorT512 = InstructionSet_X64.VectorT512, X64_AVXVNNI = InstructionSet_X64.AVXVNNI, X64_MOVBE = InstructionSet_X64.MOVBE, X64_X86Serialize = InstructionSet_X64.X86Serialize, @@ -117,6 +121,9 @@ public enum InstructionSet X86_Vector128 = InstructionSet_X86.Vector128, X86_Vector256 = InstructionSet_X86.Vector256, X86_Vector512 = InstructionSet_X86.Vector512, + X86_VectorT128 = InstructionSet_X86.VectorT128, + X86_VectorT256 = InstructionSet_X86.VectorT256, + X86_VectorT512 = InstructionSet_X86.VectorT512, X86_AVXVNNI = InstructionSet_X86.AVXVNNI, X86_MOVBE = InstructionSet_X86.MOVBE, X86_X86Serialize = InstructionSet_X86.X86Serialize, @@ -175,16 +182,17 @@ public enum InstructionSet_ARM64 Atomics = 9, Vector64 = 10, Vector128 = 11, - Dczva = 12, - Rcpc = 13, - ArmBase_Arm64 = 14, - AdvSimd_Arm64 = 15, - Aes_Arm64 = 16, - Crc32_Arm64 = 17, - Dp_Arm64 = 18, - Rdm_Arm64 = 19, - Sha1_Arm64 = 20, - Sha256_Arm64 = 21, + VectorT128 = 12, + Dczva = 13, + Rcpc = 14, + ArmBase_Arm64 = 15, + AdvSimd_Arm64 = 16, + Aes_Arm64 = 17, + Crc32_Arm64 = 18, + Dp_Arm64 = 19, + Rdm_Arm64 = 20, + Sha1_Arm64 = 21, + Sha256_Arm64 = 22, } public enum InstructionSet_X64 @@ -210,48 +218,51 @@ public enum InstructionSet_X64 Vector128 = 17, Vector256 = 18, Vector512 = 19, - AVXVNNI = 20, - MOVBE = 21, - X86Serialize = 22, - AVX512F = 23, - AVX512F_VL = 24, - AVX512BW = 25, - AVX512BW_VL = 26, - AVX512CD = 27, - AVX512CD_VL = 28, - AVX512DQ = 29, - AVX512DQ_VL = 30, - AVX512VBMI = 31, - AVX512VBMI_VL = 32, - X86Base_X64 = 33, - SSE_X64 = 34, - SSE2_X64 = 35, - SSE3_X64 = 36, - SSSE3_X64 = 37, - SSE41_X64 = 38, - SSE42_X64 = 39, - AVX_X64 = 40, - AVX2_X64 = 41, - AES_X64 = 42, - BMI1_X64 = 43, - BMI2_X64 = 44, - FMA_X64 = 45, - LZCNT_X64 = 46, - PCLMULQDQ_X64 = 47, - POPCNT_X64 = 48, - AVXVNNI_X64 = 49, - MOVBE_X64 = 50, - X86Serialize_X64 = 51, - AVX512F_X64 = 52, - AVX512F_VL_X64 = 53, - AVX512BW_X64 = 54, - AVX512BW_VL_X64 = 55, - AVX512CD_X64 = 56, - AVX512CD_VL_X64 = 57, - AVX512DQ_X64 = 58, - AVX512DQ_VL_X64 = 59, - AVX512VBMI_X64 = 60, - AVX512VBMI_VL_X64 = 61, + VectorT128 = 20, + VectorT256 = 21, + VectorT512 = 22, + AVXVNNI = 23, + MOVBE = 24, + X86Serialize = 25, + AVX512F = 26, + AVX512F_VL = 27, + AVX512BW = 28, + AVX512BW_VL = 29, + AVX512CD = 30, + AVX512CD_VL = 31, + AVX512DQ = 32, + AVX512DQ_VL = 33, + AVX512VBMI = 34, + AVX512VBMI_VL = 35, + X86Base_X64 = 36, + SSE_X64 = 37, + SSE2_X64 = 38, + SSE3_X64 = 39, + SSSE3_X64 = 40, + SSE41_X64 = 41, + SSE42_X64 = 42, + AVX_X64 = 43, + AVX2_X64 = 44, + AES_X64 = 45, + BMI1_X64 = 46, + BMI2_X64 = 47, + FMA_X64 = 48, + LZCNT_X64 = 49, + PCLMULQDQ_X64 = 50, + POPCNT_X64 = 51, + AVXVNNI_X64 = 52, + MOVBE_X64 = 53, + X86Serialize_X64 = 54, + AVX512F_X64 = 55, + AVX512F_VL_X64 = 56, + AVX512BW_X64 = 57, + AVX512BW_VL_X64 = 58, + AVX512CD_X64 = 59, + AVX512CD_VL_X64 = 60, + AVX512DQ_X64 = 61, + AVX512DQ_VL_X64 = 62, + AVX512VBMI_X64 = 63, + AVX512VBMI_VL_X64 = 64, } public enum InstructionSet_X86 @@ -277,53 +288,56 @@ public enum InstructionSet_X86 Vector128 = 17, Vector256 = 18, Vector512 = 19, - AVXVNNI = 20, - MOVBE = 21, - X86Serialize = 22, - AVX512F = 23, - AVX512F_VL = 24, - AVX512BW = 25, - AVX512BW_VL = 26, - AVX512CD = 27, - AVX512CD_VL = 28, - AVX512DQ = 29, - AVX512DQ_VL = 30, - AVX512VBMI = 31, - AVX512VBMI_VL = 32, - X86Base_X64 = 33, - SSE_X64 = 34, - SSE2_X64 = 35, - SSE3_X64 = 36, - SSSE3_X64 = 37, - SSE41_X64 = 38, - SSE42_X64 = 39, - AVX_X64 = 40, - AVX2_X64 = 41, - AES_X64 = 42, - BMI1_X64 = 43, - BMI2_X64 = 44, - FMA_X64 = 45, - LZCNT_X64 = 46, - PCLMULQDQ_X64 = 47, - POPCNT_X64 = 48, - AVXVNNI_X64 = 49, - MOVBE_X64 = 50, - X86Serialize_X64 = 51, - AVX512F_X64 = 52, - AVX512F_VL_X64 = 53, - AVX512BW_X64 = 54, - AVX512BW_VL_X64 = 55, - AVX512CD_X64 = 56, - AVX512CD_VL_X64 = 57, - AVX512DQ_X64 = 58, - AVX512DQ_VL_X64 = 59, - AVX512VBMI_X64 = 60, - AVX512VBMI_VL_X64 = 61, + VectorT128 = 20, + VectorT256 = 21, + VectorT512 = 22, + AVXVNNI = 23, + MOVBE = 24, + X86Serialize = 25, + AVX512F = 26, + AVX512F_VL = 27, + AVX512BW = 28, + AVX512BW_VL = 29, + AVX512CD = 30, + AVX512CD_VL = 31, + AVX512DQ = 32, + AVX512DQ_VL = 33, + AVX512VBMI = 34, + AVX512VBMI_VL = 35, + X86Base_X64 = 36, + SSE_X64 = 37, + SSE2_X64 = 38, + SSE3_X64 = 39, + SSSE3_X64 = 40, + SSE41_X64 = 41, + SSE42_X64 = 42, + AVX_X64 = 43, + AVX2_X64 = 44, + AES_X64 = 45, + BMI1_X64 = 46, + BMI2_X64 = 47, + FMA_X64 = 48, + LZCNT_X64 = 49, + PCLMULQDQ_X64 = 50, + POPCNT_X64 = 51, + AVXVNNI_X64 = 52, + MOVBE_X64 = 53, + X86Serialize_X64 = 54, + AVX512F_X64 = 55, + AVX512F_VL_X64 = 56, + AVX512BW_X64 = 57, + AVX512BW_VL_X64 = 58, + AVX512CD_X64 = 59, + AVX512CD_VL_X64 = 60, + AVX512DQ_X64 = 61, + AVX512DQ_VL_X64 = 62, + AVX512VBMI_X64 = 63, + AVX512VBMI_VL_X64 = 64, } public unsafe struct InstructionSetFlags : IEnumerable { - private const int FlagsFieldCount = 1; + private const int FlagsFieldCount = 2; private const int BitsPerFlagsField = 64; private fixed ulong _flags[FlagsFieldCount]; public IEnumerable ARM64Flags => this.Select((x) => (InstructionSet_ARM64)x); @@ -444,6 +458,7 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS { case InstructionSet.ARM64_Vector64: return InstructionSet.ARM64_AdvSimd; case InstructionSet.ARM64_Vector128: return InstructionSet.ARM64_AdvSimd; + case InstructionSet.ARM64_VectorT128: return InstructionSet.ARM64_AdvSimd; } break; case TargetArchitecture.X64: @@ -452,6 +467,9 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS case InstructionSet.X64_Vector128: return InstructionSet.X64_SSE; case InstructionSet.X64_Vector256: return InstructionSet.X64_AVX; case InstructionSet.X64_Vector512: return InstructionSet.X64_AVX512F; + case InstructionSet.X64_VectorT128: return InstructionSet.X64_SSE2; + case InstructionSet.X64_VectorT256: return InstructionSet.X64_AVX2; + case InstructionSet.X64_VectorT512: return InstructionSet.X64_AVX512F; } break; case TargetArchitecture.X86: @@ -460,6 +478,9 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS case InstructionSet.X86_Vector128: return InstructionSet.X86_SSE; case InstructionSet.X86_Vector256: return InstructionSet.X86_AVX; case InstructionSet.X86_Vector512: return InstructionSet.X86_AVX512F; + case InstructionSet.X86_VectorT128: return InstructionSet.X86_SSE2; + case InstructionSet.X86_VectorT256: return InstructionSet.X86_AVX2; + case InstructionSet.X86_VectorT512: return InstructionSet.X86_AVX512F; } break; } @@ -527,6 +548,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd); if (resultflags.HasInstructionSet(InstructionSet.ARM64_Vector128)) resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd); + if (resultflags.HasInstructionSet(InstructionSet.ARM64_VectorT128)) + resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd); break; case TargetArchitecture.X64: @@ -682,6 +705,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE)) @@ -690,24 +719,40 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_FMA); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); break; case TargetArchitecture.X86: @@ -747,6 +792,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_MOVBE)) @@ -755,24 +806,40 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_FMA); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); break; } } while (!oldflags.Equals(resultflags)); @@ -830,6 +897,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.ARM64_Vector64); if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd)) resultflags.AddInstructionSet(InstructionSet.ARM64_Vector128); + if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd)) + resultflags.AddInstructionSet(InstructionSet.ARM64_VectorT128); break; case TargetArchitecture.X64: @@ -927,6 +996,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_Vector256); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_Vector512); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X64_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_VectorT512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) @@ -935,24 +1010,40 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_FMA)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); break; case TargetArchitecture.X86: @@ -992,6 +1083,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_Vector256); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_Vector512); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X86_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_VectorT512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) @@ -1000,24 +1097,40 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_FMA)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); break; } } while (!oldflags.Equals(resultflags)); @@ -1035,8 +1148,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe { ("x86-x64-v3", TargetArchitecture.X86), "x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma" }, { ("skylake", TargetArchitecture.X64), "x86-x64-v3" }, { ("skylake", TargetArchitecture.X86), "x86-x64-v3" }, - { ("x86-x64-v4", TargetArchitecture.X64), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl" }, - { ("x86-x64-v4", TargetArchitecture.X86), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl" }, + { ("x86-x64-v4", TargetArchitecture.X64), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl" }, + { ("x86-x64-v4", TargetArchitecture.X86), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl" }, { ("armv8-a", TargetArchitecture.ARM64), "neon" }, { ("armv8.1-a", TargetArchitecture.ARM64), "armv8-a lse crc rdma" }, { ("armv8.2-a", TargetArchitecture.ARM64), "armv8.1-a" }, @@ -1086,6 +1199,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("lse", "", InstructionSet.ARM64_Atomics, true); yield return new InstructionSetInfo("Vector64", "", InstructionSet.ARM64_Vector64, false); yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false); + yield return new InstructionSetInfo("VectorT128", "", InstructionSet.ARM64_VectorT128, false); yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false); yield return new InstructionSetInfo("rcpc", "", InstructionSet.ARM64_Rcpc, true); break; @@ -1110,6 +1224,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false); yield return new InstructionSetInfo("Vector512", "", InstructionSet.X64_Vector512, false); + yield return new InstructionSetInfo("VectorT128", "", InstructionSet.X64_VectorT128, false); + yield return new InstructionSetInfo("VectorT256", "", InstructionSet.X64_VectorT256, false); + yield return new InstructionSetInfo("VectorT512", "", InstructionSet.X64_VectorT512, false); yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X64_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X64_X86Serialize, true); @@ -1145,6 +1262,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false); yield return new InstructionSetInfo("Vector512", "", InstructionSet.X86_Vector512, false); + yield return new InstructionSetInfo("VectorT128", "", InstructionSet.X86_VectorT128, false); + yield return new InstructionSetInfo("VectorT256", "", InstructionSet.X86_VectorT256, false); + yield return new InstructionSetInfo("VectorT512", "", InstructionSet.X86_VectorT512, false); yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X86_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X86_X86Serialize, true); diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index cedbbda36b7046..e250915f1d3df2 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1208,6 +1208,22 @@ public struct SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR public byte eightByteOffsets1; }; + public struct CORINFO_XARCH_CPU + { + public uint Value; + + // The native version is a union with the following fields: + // uint32_t SteppingId : 4; + // uint32_t Model : 4; + // uint32_t FamilyId : 4; + // uint32_t ProcessorType : 2; + // uint32_t IsAuthenticAmd : 1; // Unused bits in the CPUID result + // uint32_t IsGenuineIntel : 1; // Unused bits in the CPUID result + // uint32_t ExtendedModelId : 4; + // uint32_t ExtendedFamilyId : 8; + // uint32_t Reserved : 4; // Unused bits in the CPUID result + }; + // StructFloadFieldInfoFlags: used on LoongArch64 architecture by `getLoongArch64PassStructInRegisterFlags` and // `getRISCV64PassStructInRegisterFlags` API to convey struct argument passing information. // diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index fa4107640b494a..b9c4580b701dc8 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -44,6 +44,9 @@ instructionset ,X86 ,Popcnt , ,15 ,POPCNT instructionset ,X86 , , , ,Vector128 , instructionset ,X86 , , , ,Vector256 , instructionset ,X86 , , , ,Vector512 , +instructionset ,X86 , , , ,VectorT128 , +instructionset ,X86 , , , ,VectorT256 , +instructionset ,X86 , , , ,VectorT512 , instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni instructionset ,X86 ,Movbe , ,27 ,MOVBE ,movbe instructionset ,X86 ,X86Serialize , ,28 ,X86Serialize ,serialize @@ -92,6 +95,9 @@ instructionset64bit,X86 ,AVX512VBMI_VL vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 vectorinstructionset,X86 ,Vector512 +vectorinstructionset,X86 ,VectorT128 +vectorinstructionset,X86 ,VectorT256 +vectorinstructionset,X86 ,VectorT512 implication ,X86 ,SSE ,X86Base implication ,X86 ,SSE2 ,SSE @@ -111,41 +117,60 @@ implication ,X86 ,POPCNT ,SSE42 implication ,X86 ,Vector128 ,SSE implication ,X86 ,Vector256 ,AVX implication ,X86 ,Vector512 ,AVX512F +implication ,X86 ,VectorT128 ,SSE2 +implication ,X86 ,VectorT256 ,AVX2 +implication ,X86 ,VectorT512 ,AVX512F implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,MOVBE ,SSE42 implication ,X86 ,X86Serialize ,X86Base implication ,X86 ,AVX512F ,AVX2 +implication ,X86 ,AVX512F ,FMA implication ,X86 ,AVX512F_VL ,AVX512F implication ,X86 ,AVX512CD ,AVX512F +implication ,X86 ,AVX512CD_VL ,AVX512CD implication ,X86 ,AVX512CD_VL ,AVX512F_VL implication ,X86 ,AVX512BW ,AVX512F +implication ,X86 ,AVX512BW_VL ,AVX512BW implication ,X86 ,AVX512BW_VL ,AVX512F_VL implication ,X86 ,AVX512DQ ,AVX512F +implication ,X86 ,AVX512DQ_VL ,AVX512DQ implication ,X86 ,AVX512DQ_VL ,AVX512F_VL implication ,X86 ,AVX512VBMI ,AVX512BW +implication ,X86 ,AVX512VBMI_VL ,AVX512VBMI implication ,X86 ,AVX512VBMI_VL ,AVX512BW_VL +; While the AVX-512 ISAs can be individually lit-up, they really +; need the 5 following to be fully functional without adding +; significant complexity into the JIT. Additionally, unlike AVX/AVX2 +; there was never really any hardware that didn't provide all 5 at +; once, with the notable exception being Knight's Landing which +; provided a similar but not quite the same feature. +implication ,X86 ,AVX512F ,AVX512BW_VL +implication ,X86 ,AVX512F ,AVX512CD_VL +implication ,X86 ,AVX512F ,AVX512DQ_VL + ; Definition of X64 instruction sets definearch ,X64 ,64Bit ,X64 copyinstructionsets,X86 ,X64 ; Definition of Arm64 instruction sets -definearch ,ARM64 ,64Bit ,Arm64 - -instructionset ,ARM64 ,ArmBase , ,16 ,ArmBase ,base -instructionset ,ARM64 ,AdvSimd , ,17 ,AdvSimd ,neon -instructionset ,ARM64 ,Aes , ,9 ,Aes ,aes -instructionset ,ARM64 ,Crc32 , ,18 ,Crc32 ,crc -instructionset ,ARM64 ,Dp , ,23 ,Dp ,dotprod -instructionset ,ARM64 ,Rdm , ,24 ,Rdm ,rdma -instructionset ,ARM64 ,Sha1 , ,19 ,Sha1 ,sha1 -instructionset ,ARM64 ,Sha256 , ,20 ,Sha256 ,sha2 -instructionset ,ARM64 , ,Atomics ,21 ,Atomics ,lse -instructionset ,ARM64 , , , ,Vector64 , -instructionset ,ARM64 , , , ,Vector128, -instructionset ,ARM64 , , , ,Dczva , -instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc +definearch ,ARM64 ,64Bit ,Arm64 + +instructionset ,ARM64 ,ArmBase , ,16 ,ArmBase ,base +instructionset ,ARM64 ,AdvSimd , ,17 ,AdvSimd ,neon +instructionset ,ARM64 ,Aes , ,9 ,Aes ,aes +instructionset ,ARM64 ,Crc32 , ,18 ,Crc32 ,crc +instructionset ,ARM64 ,Dp , ,23 ,Dp ,dotprod +instructionset ,ARM64 ,Rdm , ,24 ,Rdm ,rdma +instructionset ,ARM64 ,Sha1 , ,19 ,Sha1 ,sha1 +instructionset ,ARM64 ,Sha256 , ,20 ,Sha256 ,sha2 +instructionset ,ARM64 , ,Atomics ,21 ,Atomics ,lse +instructionset ,ARM64 , , , ,Vector64 , +instructionset ,ARM64 , , , ,Vector128 , +instructionset ,ARM64 , , , ,VectorT128 , +instructionset ,ARM64 , , , ,Dczva , +instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc instructionset64bit,ARM64 ,ArmBase instructionset64bit,ARM64 ,AdvSimd @@ -158,16 +183,18 @@ instructionset64bit,ARM64 ,Sha256 vectorinstructionset,ARM64,Vector64 vectorinstructionset,ARM64,Vector128 +vectorinstructionset,ARM64,VectorT128 -implication ,ARM64 ,AdvSimd ,ArmBase -implication ,ARM64 ,Aes ,ArmBase -implication ,ARM64 ,Crc32 ,ArmBase -implication ,ARM64 ,Dp ,AdvSimd -implication ,ARM64 ,Rdm ,AdvSimd -implication ,ARM64 ,Sha1 ,ArmBase -implication ,ARM64 ,Sha256 ,ArmBase -implication ,ARM64 ,Vector64 ,AdvSimd -implication ,ARM64 ,Vector128 ,AdvSimd +implication ,ARM64 ,AdvSimd ,ArmBase +implication ,ARM64 ,Aes ,ArmBase +implication ,ARM64 ,Crc32 ,ArmBase +implication ,ARM64 ,Dp ,AdvSimd +implication ,ARM64 ,Rdm ,AdvSimd +implication ,ARM64 ,Sha1 ,ArmBase +implication ,ARM64 ,Sha256 ,ArmBase +implication ,ARM64 ,Vector64 ,AdvSimd +implication ,ARM64 ,Vector128 ,AdvSimd +implication ,ARM64 ,VectorT128 ,AdvSimd ; ,name and aliases ,archs ,lower baselines included by implication @@ -176,7 +203,7 @@ instructionsetgroup ,x86-x64 ,X64 X86 ,sse2 instructionsetgroup ,x86-x64-v2 ,X64 X86 ,sse4.2 popcnt instructionsetgroup ,x86-x64-v3 ,X64 X86 ,x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma instructionsetgroup ,skylake ,X64 X86 ,x86-x64-v3 -instructionsetgroup ,x86-x64-v4 ,X64 X86 ,x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl +instructionsetgroup ,x86-x64-v4 ,X64 X86 ,x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl instructionsetgroup ,armv8-a ,ARM64 ,neon instructionsetgroup ,armv8.1-a ,ARM64 ,armv8-a lse crc rdma diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs index bc218f33d34b59..07b48ffa1e384c 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs @@ -94,7 +94,7 @@ public InstructionSetImplication(string architecture, InstructionSetImplication private Dictionary _64BitVariantArchitectureJitNameSuffix = new Dictionary(); // This represents the number of flags fields we currently track - private const int FlagsFieldCount = 1; + private const int FlagsFieldCount = 2; private void ArchitectureEncountered(string arch) { diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt index b9f8aa66f259eb..3dba43bace4369 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt @@ -73,6 +73,7 @@ const wchar_t*,String uint32_t**,ref uint* unsigned*,ref uint CORJIT_FLAGS*,ref CORJIT_FLAGS +CORINFO_XARCH_CPU*,ref CORINFO_XARCH_CPU CORINFO_CONST_LOOKUP*,ref CORINFO_CONST_LOOKUP CORINFO_LOOKUP*,ref CORINFO_LOOKUP CORINFO_LOOKUP_KIND*,ref CORINFO_LOOKUP_KIND @@ -291,6 +292,7 @@ FUNCTIONS uint32_t getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd); uint32_t getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd); uint32_t getThreadTLSIndex(void **ppIndirection); + void getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr); const void * getInlinedCallFrameVptr(void **ppIndirection); int32_t * getAddrOfCaptureThreadGlobal(void **ppIndirection); void* getHelperFtn (CorInfoHelpFunc ftnNum, void **ppIndirection); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index 1411ca7b6c7941..b7e4ee14e57ac5 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -111,6 +111,7 @@ private static class XArchIntrinsicConstants public const int Avx512dq_vl = 0x400000; public const int Avx512Vbmi = 0x800000; public const int Avx512Vbmi_vl = 0x1000000; + public const int Serialize = 0x2000000; public static int FromInstructionSet(InstructionSet instructionSet) { @@ -170,6 +171,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi, InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi_vl, InstructionSet.X64_AVX512VBMI_VL_X64 => Avx512Vbmi_vl, + InstructionSet.X64_X86Serialize => Serialize, + InstructionSet.X64_X86Serialize_X64 => Serialize, // SSE and SSE2 are baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs index 8b1b8d7c30e2e0..3644241dcf85b7 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs @@ -475,12 +475,18 @@ private bool IsLayoutFixedInCurrentVersionBubbleInternal(TypeDesc type) return true; } - if (!(type is MetadataType defType)) + if (type is not MetadataType defType) { // Non metadata backed types have layout defined in all version bubbles return true; } + if (VectorOfTFieldLayoutAlgorithm.IsVectorOfTType(defType)) + { + // Vector always needs a layout check + return false; + } + if (!NodeFactory.CompilationModuleGroup.VersionsWithModule(defType.Module)) { // Valuetypes with non-versionable attribute are candidates for fixed layout. Reject the rest. diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index f73c55cd0dc679..2d78d29ef77d9d 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -3053,7 +3053,7 @@ private bool getStringChar(CORINFO_OBJECT_STRUCT_* strObj, int index, ushort* va { return false; } - + private CORINFO_OBJECT_STRUCT_* getRuntimeTypePointer(CORINFO_CLASS_STRUCT_* cls) { return null; diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs b/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs index e90dfd7256ef41..b9737866321615 100644 --- a/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs +++ b/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs @@ -112,6 +112,8 @@ internal sealed class ILCompilerRootCommand : RootCommand }, true, "Maximum number of threads to use during compilation"); public Option InstructionSet { get; } = new(new[] { "--instruction-set" }, "Instruction set to allow or disallow"); + public Option MaxVectorTBitWidth { get; } = + new(new[] { "--max-vectort-bitwidth" }, "Maximum width, in bits, that Vector is allowed to be"); public Option Guard { get; } = new(new[] { "--guard" }, "Enable mitigations. Options: 'cf': CFG (Control Flow Guard, Windows only)"); public Option Dehydrate { get; } = @@ -210,6 +212,7 @@ public ILCompilerRootCommand(string[] args) : base(".NET Native IL Compiler") AddOption(RuntimeOptions); AddOption(Parallelism); AddOption(InstructionSet); + AddOption(MaxVectorTBitWidth); AddOption(Guard); AddOption(Dehydrate); AddOption(PreinitStatics); diff --git a/src/coreclr/tools/aot/ILCompiler/Program.cs b/src/coreclr/tools/aot/ILCompiler/Program.cs index 2a9aff55141987..f7863637979376 100644 --- a/src/coreclr/tools/aot/ILCompiler/Program.cs +++ b/src/coreclr/tools/aot/ILCompiler/Program.cs @@ -77,7 +77,7 @@ public int Run() TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture); TargetOS targetOS = Get(_command.TargetOS); - InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), targetArchitecture, targetOS, + InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), targetArchitecture, targetOS, "Unrecognized instruction set {0}", "Unsupported combination of instruction sets: {0}/{1}"); string systemModuleName = Get(_command.SystemModuleName); diff --git a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs index e3918372e42dc9..ef384a1abece68 100644 --- a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs +++ b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs @@ -23,6 +23,8 @@ internal class Crossgen2RootCommand : RootCommand new(new[] { "--reference", "-r" }, result => Helpers.BuildPathDictionary(result.Tokens, false), true, SR.ReferenceFiles); public Option InstructionSet { get; } = new(new[] { "--instruction-set" }, SR.InstructionSets); + public Option MaxVectorTBitWidth { get; } = + new(new[] { "--max-vectort-bitwidth" }, SR.MaxVectorTBitWidths); public Option MibcFilePaths { get; } = new(new[] { "--mibc", "-m" }, Array.Empty, SR.MibcFiles); public Option OutputFilePath { get; } = @@ -193,6 +195,7 @@ public Crossgen2RootCommand(string[] args) : base(SR.Crossgen2BannerText) AddOption(UnrootedInputFilePaths); AddOption(ReferenceFilePaths); AddOption(InstructionSet); + AddOption(MaxVectorTBitWidth); AddOption(MibcFilePaths); AddOption(OutputFilePath); AddOption(CompositeRootPath); diff --git a/src/coreclr/tools/aot/crossgen2/Program.cs b/src/coreclr/tools/aot/crossgen2/Program.cs index b044eea0b47438..99a493c0c6906c 100644 --- a/src/coreclr/tools/aot/crossgen2/Program.cs +++ b/src/coreclr/tools/aot/crossgen2/Program.cs @@ -76,7 +76,7 @@ public int Run() TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture); TargetOS targetOS = Get(_command.TargetOS); - InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), targetArchitecture, targetOS, + InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), targetArchitecture, targetOS, SR.InstructionSetMustNotBe, SR.InstructionSetInvalidImplication); SharedGenericsMode genericsMode = SharedGenericsMode.CanonicalReferenceTypes; var targetDetails = new TargetDetails(targetArchitecture, targetOS, Crossgen2RootCommand.IsArmel ? TargetAbi.NativeAotArmel : TargetAbi.NativeAot, instructionSetSupport.GetVectorTSimdVector()); diff --git a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx index b899f77a8f1147..a737ea6aeb7706 100644 --- a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx +++ b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx @@ -180,6 +180,9 @@ Instruction set '{0}' implies support for instruction set '{1}' + + The maximum width, in bits, for System.Numerics.Vector<T>. For example '128', '256', or '512'. + Input files without automatic rooting of all methods diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h index 62db21d2232e23..f09f9b3cbc467d 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h @@ -143,6 +143,7 @@ struct JitInterfaceCallbacks uint32_t (* getLoongArch64PassStructInRegisterFlags)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE structHnd); uint32_t (* getRISCV64PassStructInRegisterFlags)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE structHnd); uint32_t (* getThreadTLSIndex)(void * thisHandle, CorInfoExceptionClass** ppException, void** ppIndirection); + void (* getXarchCpuInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_XARCH_CPU* xarchCpuInfoPtr); const void* (* getInlinedCallFrameVptr)(void * thisHandle, CorInfoExceptionClass** ppException, void** ppIndirection); int32_t* (* getAddrOfCaptureThreadGlobal)(void * thisHandle, CorInfoExceptionClass** ppException, void** ppIndirection); void* (* getHelperFtn)(void * thisHandle, CorInfoExceptionClass** ppException, CorInfoHelpFunc ftnNum, void** ppIndirection); @@ -1477,6 +1478,14 @@ class JitInterfaceWrapper : public ICorJitInfo return temp; } + virtual void getXarchCpuInfo( + CORINFO_XARCH_CPU* xarchCpuInfoPtr) +{ + CorInfoExceptionClass* pException = nullptr; + _callbacks->getXarchCpuInfo(_thisHandle, &pException, xarchCpuInfoPtr); + if (pException != nullptr) throw pException; +} + virtual const void* getInlinedCallFrameVptr( void** ppIndirection) { diff --git a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp index 4b763aec27fbfb..fed4cacdbaad72 100644 --- a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp +++ b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp @@ -51,9 +51,7 @@ DLL_EXPORT void JitProcessShutdownWork(ICorJitCompiler * pJit) return pJit->ProcessShutdownWork(nullptr); } -DLL_EXPORT unsigned GetMaxIntrinsicSIMDVectorLength( - ICorJitCompiler * pJit, - CORJIT_FLAGS * flags) +DLL_EXPORT unsigned GetMaxVectorTBitWidth(ICorJitCompiler * pJit, CORJIT_FLAGS * flags) { - return pJit->getMaxIntrinsicSIMDVectorLength(*flags); + return pJit->getMaxVectorTBitWidth(*flags); } diff --git a/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h b/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h index 49cfb3b7a1482f..66f7e0992a300c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h @@ -42,11 +42,6 @@ void ProcessShutdownWork(ICorStaticInfo* info); /* {}; */ void getVersionIdentifier(GUID* versionIdentifier /* OUT */ ); -// When the EE loads the System.Numerics.Vectors assembly, it asks the JIT what length (in bytes) of -// SIMD vector it supports as an intrinsic type. Zero means that the JIT does not support SIMD -// intrinsics, so the EE should use the default size (i.e. the size of the IL implementation). -unsigned getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags); /* { return 0; } */ - // Some JIT's may support multiple OSs. This api provides a means to specify to the JIT what OS it should // be trying to compile. This api does not produce any errors, any errors are to be generated by the // the compileMethod call, which will call back into the VM to ensure bits are correctly setup. diff --git a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h index 19a0600b3f582b..8c5ad83c977bc7 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h @@ -129,6 +129,7 @@ LWM(GetStringConfigValue, DWORD, DWORD) LWM(GetSystemVAmd64PassStructInRegisterDescriptor, DWORDLONG, Agnostic_GetSystemVAmd64PassStructInRegisterDescriptor) LWM(GetLoongArch64PassStructInRegisterFlags, DWORDLONG, DWORD) LWM(GetRISCV64PassStructInRegisterFlags, DWORDLONG, DWORD) +LWM(GetXarchCpuInfo, DWORD, DWORD) LWM(GetTailCallHelpers, Agnostic_GetTailCallHelpers, Agnostic_CORINFO_TAILCALL_HELPERS) LWM(UpdateEntryPointForTailCall, Agnostic_CORINFO_CONST_LOOKUP, Agnostic_CORINFO_CONST_LOOKUP) LWM(GetThreadTLSIndex, DWORD, DLD) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 469ff6f6d56d9e..6a9a18baba0482 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -6388,12 +6388,10 @@ void MethodContext::recGetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HAN GetLoongArch64PassStructInRegisterFlags->Add(key, value); DEBUG_REC(dmpGetLoongArch64PassStructInRegisterFlags(key, value)); } - void MethodContext::dmpGetLoongArch64PassStructInRegisterFlags(DWORDLONG key, DWORD value) { printf("GetLoongArch64PassStructInRegisterFlags key %016" PRIX64 " value-%08X", key, value); } - DWORD MethodContext::repGetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd) { DWORDLONG key = CastHandle(structHnd); @@ -6413,12 +6411,10 @@ void MethodContext::recGetRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE GetRISCV64PassStructInRegisterFlags->Add(key, value); DEBUG_REC(dmpGetRISCV64PassStructInRegisterFlags(key, value)); } - void MethodContext::dmpGetRISCV64PassStructInRegisterFlags(DWORDLONG key, DWORD value) { printf("GetRISCV64PassStructInRegisterFlags key %016" PRIX64 " value-%08X", key, value); } - DWORD MethodContext::repGetRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd) { DWORDLONG key = CastHandle(structHnd); @@ -6428,6 +6424,34 @@ DWORD MethodContext::repGetRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE return value; } +void MethodContext::recgetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) +{ + if (GetXarchCpuInfo == nullptr) + GetXarchCpuInfo = new LightWeightMap(); + + DWORD key = 0; + DWORD value = static_cast(xarchCpuInfoPtr->Value); + + GetXarchCpuInfo->Add(key, value); + DEBUG_REC(dmpGetXarchCpuInfo(key, value)); +} +void MethodContext::dmpGetXarchCpuInfo(DWORD key, DWORD value) +{ + printf("getXarchCpuInfo key %u, value %u", key, value); +} +void MethodContext::repGetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) +{ + DWORD key; + DWORD value; + + key = 0; + value = LookupByKeyOrMissNoMessage(GetXarchCpuInfo, key); + + DEBUG_REP(dmpGetXarchCpuInfo(key, value)); + + xarchCpuInfoPtr->Value = static_cast(value); +} + void MethodContext::recGetRelocTypeHint(void* target, WORD result) { if (GetRelocTypeHint == nullptr) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 53b3b02f745eaa..1c7f0ec6f45353 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -580,7 +580,7 @@ class MethodContext void recGetIsClassInitedFlagAddress(CORINFO_CLASS_HANDLE cls, CORINFO_CONST_LOOKUP* addr, int* offset, bool result); void dmpGetIsClassInitedFlagAddress(DWORDLONG key, const Agnostic_GetIsClassInitedFlagAddress& value); bool repGetIsClassInitedFlagAddress(CORINFO_CLASS_HANDLE cls, CORINFO_CONST_LOOKUP* addr, int* offset); - + void recGetStaticBaseAddress(CORINFO_CLASS_HANDLE cls, bool isGc, CORINFO_CONST_LOOKUP* addr, bool result); void dmpGetStaticBaseAddress(DLD key, const Agnostic_GetStaticBaseAddress& value); bool repGetStaticBaseAddress(CORINFO_CLASS_HANDLE cls, bool isGc, CORINFO_CONST_LOOKUP* addr); @@ -807,6 +807,10 @@ class MethodContext void dmpGetRISCV64PassStructInRegisterFlags(DWORDLONG key, DWORD value); DWORD repGetRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd); + void recgetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr); + void dmpGetXarchCpuInfo(DWORD key, DWORD value); + void repGetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr); + void recGetRelocTypeHint(void* target, WORD result); void dmpGetRelocTypeHint(DWORDLONG key, DWORD value); WORD repGetRelocTypeHint(void* target); @@ -1188,6 +1192,7 @@ enum mcPackets Packet_GetThreadLocalStaticBlocksInfo = 208, Packet_GetRISCV64PassStructInRegisterFlags = 209, Packet_GetObjectContent = 210, + Packet_GetXarchCpuInfo = 211, }; void SetDebugDumpVariables(); diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp index 0ef956bd62787d..331361a3e500b3 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp @@ -158,8 +158,3 @@ void interceptor_ICJC::getVersionIdentifier(GUID* versionIdentifier /* OUT */) { original_ICorJitCompiler->getVersionIdentifier(versionIdentifier); } - -unsigned interceptor_ICJC::getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags) -{ - return original_ICorJitCompiler->getMaxIntrinsicSIMDVectorLength(cpuCompileFlags); -} diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp index 96249dde36c0da..d87104da17b84f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -1488,6 +1488,13 @@ uint32_t interceptor_ICJI::getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HAN return temp; } +void interceptor_ICJI::getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) +{ + mc->cr->AddCall("getXarchCpuInfo"); + original_ICorJitInfo->getXarchCpuInfo(xarchCpuInfoPtr); + mc->recgetXarchCpuInfo(xarchCpuInfoPtr); +} + // Stuff on ICorDynamicInfo uint32_t interceptor_ICJI::getThreadTLSIndex(void** ppIndirection) { diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp index 1f1f1d210ef6e1..f67c5cbf6863bb 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp @@ -40,9 +40,3 @@ void interceptor_ICJC::getVersionIdentifier(GUID* versionIdentifier /* OUT */) mcs->AddCall("getVersionIdentifier"); original_ICorJitCompiler->getVersionIdentifier(versionIdentifier); } - -unsigned interceptor_ICJC::getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags) -{ - mcs->AddCall("getMaxIntrinsicSIMDVectorLength"); - return original_ICorJitCompiler->getMaxIntrinsicSIMDVectorLength(cpuCompileFlags); -} diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp index 58dc80abec8e1b..e1625c127c9c37 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp @@ -1061,6 +1061,13 @@ uint32_t interceptor_ICJI::getThreadTLSIndex( return original_ICorJitInfo->getThreadTLSIndex(ppIndirection); } +void interceptor_ICJI::getXarchCpuInfo( + CORINFO_XARCH_CPU* xarchCpuInfoPtr) +{ + mcs->AddCall("getXarchCpuInfo"); + original_ICorJitInfo->getXarchCpuInfo(xarchCpuInfoPtr); +} + const void* interceptor_ICJI::getInlinedCallFrameVptr( void** ppIndirection) { diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp index 53442ced042e17..5f266c0fc96e9f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp @@ -35,8 +35,3 @@ void interceptor_ICJC::getVersionIdentifier(GUID* versionIdentifier /* OUT */) { original_ICorJitCompiler->getVersionIdentifier(versionIdentifier); } - -unsigned interceptor_ICJC::getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags) -{ - return original_ICorJitCompiler->getMaxIntrinsicSIMDVectorLength(cpuCompileFlags); -} diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp index a9dced511ddce6..da79ab622c62a4 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp @@ -929,6 +929,12 @@ uint32_t interceptor_ICJI::getThreadTLSIndex( return original_ICorJitInfo->getThreadTLSIndex(ppIndirection); } +void interceptor_ICJI::getXarchCpuInfo( + CORINFO_XARCH_CPU* xarchCpuInfoPtr) +{ + original_ICorJitInfo->getXarchCpuInfo(xarchCpuInfoPtr); +} + const void* interceptor_ICJI::getInlinedCallFrameVptr( void** ppIndirection) { diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp index 9915b4699eeebc..49086d8597a104 100644 --- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp @@ -1299,6 +1299,12 @@ uint32_t MyICJI::getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE struct return jitInstance->mc->repGetRISCV64PassStructInRegisterFlags(structHnd); } +void MyICJI::getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) +{ + jitInstance->mc->cr->AddCall("getXarchCpuInfo"); + return jitInstance->mc->repGetXarchCpuInfo(xarchCpuInfoPtr); +} + // Stuff on ICorDynamicInfo uint32_t MyICJI::getThreadTLSIndex(void** ppIndirection) { diff --git a/src/coreclr/vm/cgensys.h b/src/coreclr/vm/cgensys.h index b7cc4c715a51b0..52d95adc57f99a 100644 --- a/src/coreclr/vm/cgensys.h +++ b/src/coreclr/vm/cgensys.h @@ -97,17 +97,6 @@ extern "C" DWORD xmmYmmStateSupport(); extern "C" DWORD avx512StateSupport(); #endif -inline bool TargetHasAVXSupport() -{ -#if (defined(TARGET_X86) || defined(TARGET_AMD64)) - int cpuInfo[4]; - __cpuid(cpuInfo, 0x00000001); // All x86/AMD64 targets support cpuid. - const int CPUID_ECX = 2; - return ((cpuInfo[CPUID_ECX] & (1 << 28)) != 0); // The AVX feature is ECX bit 28. -#endif // (defined(TARGET_X86) || defined(TARGET_AMD64)) - return false; -} - #ifdef DACCESS_COMPILE // Used by dac/strike to make sense of non-jit/non-jit-helper call targets diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 5be7e91f7e0622..2131a0460617e8 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1357,6 +1357,9 @@ void EEJitManager::SetCpuInfo() } #endif // TARGET_X86 + // Get the maximum bitwidth of Vector, rounding down to the nearest multiple of 128-bits + uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128; + #if defined(TARGET_X86) || defined(TARGET_AMD64) CPUCompileFlags.Set(InstructionSet_X86Base); @@ -1444,6 +1447,8 @@ void EEJitManager::SetCpuInfo() // LZCNT - ECX bit 5 // synchronously updating VM and JIT. + CORINFO_XARCH_CPU xarchCpuInfo = {}; + int cpuidInfo[4]; const int CPUID_EAX = 0; @@ -1454,58 +1459,82 @@ void EEJitManager::SetCpuInfo() __cpuid(cpuidInfo, 0x00000000); uint32_t maxCpuId = static_cast(cpuidInfo[CPUID_EAX]); + if (cpuidInfo[CPUID_EBX] == 0x756E6547) // Genu + { + xarchCpuInfo.IsGenuineIntel = (cpuidInfo[CPUID_EDX] == 0x49656E69) // ineI + && (cpuidInfo[CPUID_ECX] == 0x6C65746E); // ntel + } + else if (cpuidInfo[CPUID_EBX] == 0x68747541) // Auth + { + xarchCpuInfo.IsAuthenticAmd = (cpuidInfo[CPUID_EDX] == 0x69746E65) // enti + && (cpuidInfo[CPUID_ECX] == 0x444D4163); // cAMD + } + if (maxCpuId >= 1) { __cpuid(cpuidInfo, 0x00000001); - if (((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0) && ((cpuidInfo[CPUID_EDX] & (1 << 26)) != 0)) // SSE & SSE2 + // Mask off bits 14/15 since they are "reserved" on the CPUID side + // this allows us to reuse those bits to track if the CPU is AMD or + // Intel and keep everything in 1x uint32_t + + xarchCpuInfo.Value |= (cpuidInfo[CPUID_EAX] & ~(0x3 << 14)); + + const int requiredBaselineEdxFlags = (1 << 25) // SSE + | (1 << 26); // SSE2 + + if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags) { CPUCompileFlags.Set(InstructionSet_SSE); CPUCompileFlags.Set(InstructionSet_SSE2); + CPUCompileFlags.Set(InstructionSet_VectorT128); - if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI + if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI { CPUCompileFlags.Set(InstructionSet_AES); } - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ { CPUCompileFlags.Set(InstructionSet_PCLMULQDQ); } - if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 + if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 { CPUCompileFlags.Set(InstructionSet_SSE3); - if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 + if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 { CPUCompileFlags.Set(InstructionSet_SSSE3); - if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 + if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 { CPUCompileFlags.Set(InstructionSet_SSE41); - if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 + if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 { CPUCompileFlags.Set(InstructionSet_SSE42); - if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE + if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE { CPUCompileFlags.Set(InstructionSet_MOVBE); } - if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT + if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT { CPUCompileFlags.Set(InstructionSet_POPCNT); } - if (((cpuidInfo[CPUID_ECX] & (1 << 27)) != 0) && ((cpuidInfo[CPUID_ECX] & (1 << 28)) != 0)) // OSXSAVE & AVX + const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE + | (1 << 28); // AVX + + if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) { - if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 + if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 { CPUCompileFlags.Set(InstructionSet_AVX); - if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA + if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA { CPUCompileFlags.Set(InstructionSet_FMA); } @@ -1514,54 +1543,66 @@ void EEJitManager::SetCpuInfo() { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 + if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 { CPUCompileFlags.Set(InstructionSet_AVX2); - if (DoesOSSupportAVX512() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 + if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)) { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F + // We allow 256-bit Vector by default + CPUCompileFlags.Set(InstructionSet_VectorT256); + } + + if (DoesOSSupportAVX512() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 + { + if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F { CPUCompileFlags.Set(InstructionSet_AVX512F); + if (maxVectorTBitWidth >= 512) + { + // We require opt-in for 512-bit Vector + CPUCompileFlags.Set(InstructionSet_VectorT512); + } + bool isAVX512_VLSupported = false; - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL + if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL { CPUCompileFlags.Set(InstructionSet_AVX512F_VL); isAVX512_VLSupported = true; } - if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW + if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW { CPUCompileFlags.Set(InstructionSet_AVX512BW); - if (isAVX512_VLSupported) // AVX512BW_VL + if (isAVX512_VLSupported) // AVX512BW_VL { CPUCompileFlags.Set(InstructionSet_AVX512BW_VL); } } - if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD + if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD { CPUCompileFlags.Set(InstructionSet_AVX512CD); - if (isAVX512_VLSupported) // AVX512CD_VL + if (isAVX512_VLSupported) // AVX512CD_VL { CPUCompileFlags.Set(InstructionSet_AVX512CD_VL); } } - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ + if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ { CPUCompileFlags.Set(InstructionSet_AVX512DQ); - if (isAVX512_VLSupported) // AVX512DQ_VL + if (isAVX512_VLSupported) // AVX512DQ_VL { CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL); } } - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI { CPUCompileFlags.Set(InstructionSet_AVX512VBMI); - if (isAVX512_VLSupported) // AVX512VBMI_VL + if (isAVX512_VLSupported) // AVX512VBMI_VL { CPUCompileFlags.Set(InstructionSet_AVX512VBMI_VL); } @@ -1571,7 +1612,7 @@ void EEJitManager::SetCpuInfo() __cpuidex(cpuidInfo, 0x00000007, 0x00000001); - if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI + if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI { CPUCompileFlags.Set(InstructionSet_AVXVNNI); } @@ -1583,30 +1624,25 @@ void EEJitManager::SetCpuInfo() } } } - - if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_SIMD16ByteOnly) != 0) - { - CPUCompileFlags.Clear(InstructionSet_AVX2); - } } if (maxCpuId >= 0x07) { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 + if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 { CPUCompileFlags.Set(InstructionSet_BMI1); } - if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 + if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 { CPUCompileFlags.Set(InstructionSet_BMI2); } if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0) { - CPUCompileFlags.Set(InstructionSet_X86Serialize); // SERIALIZE + CPUCompileFlags.Set(InstructionSet_X86Serialize); // SERIALIZE } } } @@ -1618,7 +1654,7 @@ void EEJitManager::SetCpuInfo() { __cpuid(cpuidInfo, 0x80000001); - if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT + if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT { CPUCompileFlags.Set(InstructionSet_LZCNT); } @@ -1650,6 +1686,7 @@ void EEJitManager::SetCpuInfo() // FP and SIMD support are enabled by default CPUCompileFlags.Set(InstructionSet_ArmBase); CPUCompileFlags.Set(InstructionSet_AdvSimd); + CPUCompileFlags.Set(InstructionSet_VectorT128); // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE (30) if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) @@ -1836,7 +1873,8 @@ void EEJitManager::SetCpuInfo() // We need to additionally check that EXTERNAL_EnableSSE3_4 is set, as that // is a prexisting config flag that controls the SSE3+ ISAs - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3) || !CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3_4)) + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3) || + !CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3_4)) { CPUCompileFlags.Clear(InstructionSet_SSE3); } @@ -1860,7 +1898,6 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Clear(InstructionSet_X86Serialize); } - #elif defined(TARGET_ARM64) if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) { diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 8e547e4cc17cef..dbd73029c0e06e 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -936,7 +936,7 @@ class RangeSectionMap { // Upgrade to non-collectible #ifdef _DEBUG - TADDR initialValue = + TADDR initialValue = #endif InterlockedCompareExchangeT(&_ptr, ptr - 1, ptr); assert(initialValue == ptr || initialValue == (ptr - 1)); @@ -1052,7 +1052,7 @@ class RangeSectionMap auto levelNew = static_castVolatileLoad(NULL))[0])>(AllocateLevel()); if (levelNew == NULL) return NULL; - + if (!outerLevel->Install(levelNew, collectible)) { // Handle race where another thread grew the table @@ -1118,7 +1118,7 @@ class RangeSectionMap auto rangeSectionL3 = rangeSectionL3Ptr->VolatileLoadWithoutBarrier(pLockState); if (rangeSectionL3 == NULL) return NULL; - + auto rangeSectionL2Ptr = &((*rangeSectionL3)[EffectiveBitsForLevel(address, 3)]); if (level == 2) return rangeSectionL2Ptr; @@ -1172,7 +1172,7 @@ class RangeSectionMap // Account for the range not starting at the beginning of a last level fragment rangeSize += pRangeSection->_range.RangeStart() & (bytesAtLastLevel - 1); - + uintptr_t fragmentCount = ((rangeSize - 1) / bytesAtLastLevel) + 1; return fragmentCount; } @@ -1415,7 +1415,7 @@ class RangeSectionMap else { // Since the fragment linked lists are sorted such that the collectible ones are always after the non-collectible ones, this should never happen. - assert(!seenCollectibleRangeList); + assert(!seenCollectibleRangeList); } #endif entryInMapToUpdate = &(entryInMapToUpdate->VolatileLoadWithoutBarrier(pLockState))->pRangeSectionFragmentNext; @@ -1456,7 +1456,7 @@ class RangeSectionMap if (foundMeaningfulValue) break; - + // This level is completely empty. Free it, and then null out the pointer to it. pointerToLevelData->Uninstall(); free((void*)rawData); @@ -1999,6 +1999,10 @@ protected : private: CORJIT_FLAGS m_CPUCompileFlags; +#if defined(TARGET_X86) || defined(TARGET_AMD64) + CORINFO_XARCH_CPU m_xarchCpuInfo; +#endif // TARGET_X86 || TARGET_AMD64 + #if !defined DACCESS_COMPILE void SetCpuInfo(); #endif @@ -2010,6 +2014,14 @@ protected : return m_CPUCompileFlags; } +#if defined(TARGET_X86) || defined(TARGET_AMD64) + inline void getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfo) + { + LIMITED_METHOD_CONTRACT; + *xarchCpuInfo = m_xarchCpuInfo; + } +#endif // TARGET_X86 || TARGET_AMD64 + private: bool m_storeRichDebugInfo; diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 62a4400c7d86c0..4341b4b107c590 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1786,7 +1786,7 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field) typeIndex = AppDomain::GetCurrentDomain()->GetThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); assert(typeIndex != TypeIDProvider::INVALID_TYPE_ID); - + EE_TO_JIT_TRANSITION(); return typeIndex; } @@ -1808,7 +1808,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer); pInfo->offsetOfThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_threadStaticBlocks); pInfo->offsetOfMaxThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_maxThreadStaticBlocks); - + JIT_TO_EE_TRANSITION_LEAF(); } #else @@ -1838,7 +1838,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfThreadLocalStoragePointer = 0; pInfo->offsetOfThreadStaticBlocks = 0; pInfo->offsetOfMaxThreadStaticBlocks = 0; - + JIT_TO_EE_TRANSITION_LEAF(); } #endif // HOST_WINDOWS @@ -2515,6 +2515,33 @@ bool CEEInfo::getSystemVAmd64PassStructInRegisterDescriptor( #endif // !defined(UNIX_AMD64_ABI_ITF) } +/*********************************************************************/ +void CEEInfo::getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + JIT_TO_EE_TRANSITION_LEAF(); + +#if defined(TARGET_X86) || defined(TARGET_AMD64) + *xarchCpuInfoPtr = m_xarchCpuInfo; +#else + *xarchCpuInfoPtr = {}; +#endif + + EE_TO_JIT_TRANSITION_LEAF(); +} + +void CEEInfo::setXarchCpuInfo(const CORINFO_XARCH_CPU& xarchCpuInfo) +{ + LIMITED_METHOD_CONTRACT; + + m_xarchCpuInfo = xarchCpuInfo; +} + /*********************************************************************/ unsigned CEEInfo::getClassNumInstanceFields (CORINFO_CLASS_HANDLE clsHnd) { @@ -2545,7 +2572,6 @@ unsigned CEEInfo::getClassNumInstanceFields (CORINFO_CLASS_HANDLE clsHnd) return result; } - CorInfoType CEEInfo::asCorInfoType (CORINFO_CLASS_HANDLE clsHnd) { CONTRACTL { @@ -12568,6 +12594,12 @@ CorJitResult invokeCompileMethodHelper(EEJitManager *jitMgr, bool samplingEnabled = (s_stackSamplingEnabled.val(CLRConfig::UNSUPPORTED_StackSamplingEnabled) != 0); #endif +#if defined(TARGET_X86) || defined(TARGET_AMD64) + CORINFO_XARCH_CPU xarchCpuInfo; + ExecutionManager::GetEEJitManager()->getXarchCpuInfo(&xarchCpuInfo); + comp->setXarchCpuInfo(xarchCpuInfo); +#endif // TARGET_X86 || TARGET_AMD64 + #if defined(ALLOW_SXS_JIT) if (FAILED(ret) && jitMgr->m_alternateJit #ifdef FEATURE_STACK_SAMPLING diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 73778a1ca6baba..fc116cf4625c3c 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -533,6 +533,10 @@ class CEEInfo : public ICorJitInfo void setJitFlags(const CORJIT_FLAGS& jitFlags); +#if defined(TARGET_X86) || defined(TARGET_AMD64) + void setXarchCpuInfo(const CORINFO_XARCH_CPU& xarchCpuInfo); +#endif // TARGET_X86 || TARGET_AMD64 + private: // Shrinking these buffers drastically reduces the amount of stack space // required for each instance of the interpreter, and thereby reduces SOs. @@ -586,10 +590,14 @@ class CEEInfo : public ICorJitInfo #endif protected: - SArray* m_pJitHandles; // GC handles used by JIT - MethodDesc* m_pMethodBeingCompiled; // Top-level method being compiled - Thread * m_pThread; // Cached current thread for faster JIT-EE transitions - CORJIT_FLAGS m_jitFlags; + SArray* m_pJitHandles; // GC handles used by JIT + MethodDesc* m_pMethodBeingCompiled; // Top-level method being compiled + Thread * m_pThread; // Cached current thread for faster JIT-EE transitions + CORJIT_FLAGS m_jitFlags; + +#if defined(TARGET_X86) || defined(TARGET_AMD64) + CORINFO_XARCH_CPU m_xarchCpuInfo; +#endif // TARGET_X86 || TARGET_AMD64 CORINFO_METHOD_HANDLE getMethodBeingCompiled() { diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp index 147b2221fad472..756956c064cffc 100644 --- a/src/coreclr/vm/methodtablebuilder.cpp +++ b/src/coreclr/vm/methodtablebuilder.cpp @@ -1135,31 +1135,38 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() LPCUTF8 className; LPCUTF8 nameSpace; + if (FAILED(GetMDImport()->GetNameOfTypeDef(bmtInternal->pType->GetTypeDefToken(), &className, &nameSpace))) return false; if (strcmp(className, "Vector`1") != 0 || strcmp(nameSpace, "System.Numerics") != 0) return false; - if (!TargetHasAVXSupport()) - return false; - EEJitManager *jitMgr = ExecutionManager::GetEEJitManager(); + if (jitMgr->LoadJIT()) { CORJIT_FLAGS cpuCompileFlags = jitMgr->GetCPUCompileFlags(); - unsigned intrinsicSIMDVectorLength = jitMgr->m_jit->getMaxIntrinsicSIMDVectorLength(cpuCompileFlags); - if (intrinsicSIMDVectorLength != 0) + + uint32_t maxVectorTBitWidth = jitMgr->m_jit->getMaxVectorTBitWidth(cpuCompileFlags); + uint32_t numInstanceFieldBytes = maxVectorTBitWidth / 8; + + if (numInstanceFieldBytes != 0) { - bmtFP->NumInstanceFieldBytes = intrinsicSIMDVectorLength; + _ASSERTE((numInstanceFieldBytes * 8) == maxVectorTBitWidth); + _ASSERTE((numInstanceFieldBytes >= 16) && ((numInstanceFieldBytes % 16) == 0)); + + bmtFP->NumInstanceFieldBytes = numInstanceFieldBytes; + if (HasLayout()) { - GetLayoutInfo()->m_cbManagedSize = intrinsicSIMDVectorLength; + GetLayoutInfo()->m_cbManagedSize = numInstanceFieldBytes; } - return true; } + return true; } #endif // defined(TARGET_X86) || defined(TARGET_AMD64) + return false; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Plane.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Plane.cs index 57c15f4cff4c19..a4636a6e76a15f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Plane.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Plane.cs @@ -61,7 +61,7 @@ public Plane(Vector4 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Plane CreateFromVertices(Vector3 point1, Vector3 point2, Vector3 point3) { - if (Vector.IsHardwareAccelerated) + if (Vector128.IsHardwareAccelerated) { Vector3 a = point2 - point1; Vector3 b = point3 - point1; @@ -126,7 +126,7 @@ public static float Dot(Plane plane, Vector4 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float DotCoordinate(Plane plane, Vector3 value) { - if (Vector.IsHardwareAccelerated) + if (Vector128.IsHardwareAccelerated) { return Vector3.Dot(plane.Normal, value) + plane.D; } @@ -146,7 +146,7 @@ public static float DotCoordinate(Plane plane, Vector3 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float DotNormal(Plane plane, Vector3 value) { - if (Vector.IsHardwareAccelerated) + if (Vector128.IsHardwareAccelerated) { return Vector3.Dot(plane.Normal, value); } @@ -164,7 +164,7 @@ public static float DotNormal(Plane plane, Vector3 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Plane Normalize(Plane value) { - if (Vector.IsHardwareAccelerated) + if (Vector128.IsHardwareAccelerated) { float normalLengthSquared = value.Normal.LengthSquared(); if (MathF.Abs(normalLengthSquared - 1.0f) < NormalizeEpsilon) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 06f6a027c94dda..aa3ca7b1e3d2d4 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -50,39 +50,77 @@ static int Main() // // The test is compiled with multiple defines to test this. -#if BASELINE_INTRINSICS bool vectorsAccelerated = true; - int byteVectorLength = 16; - bool? Sse2AndBelow = true; + bool? Sse12 = true; + +#if BASELINE_INTRINSICS bool? Sse3Group = null; bool? AesLzPcl = null; bool? Sse4142 = null; bool? PopCnt = null; - bool? Avx12 = false; - bool? FmaBmi12 = false; + bool? Avx1 = false; + bool? Avx2 = false; + bool? Fma = null; + bool? Bmi12 = null; bool? Avxvnni = false; -#elif NON_VEX_INTRINSICS - bool vectorsAccelerated = true; - int byteVectorLength = 16; - bool? Sse2AndBelow = true; + bool? Avx512Group = false; + bool? Avx512Vbmi = false; +#elif SSE42_INTRINSICS bool? Sse3Group = true; bool? AesLzPcl = null; bool? Sse4142 = true; bool? PopCnt = null; - bool? Avx12 = false; - bool? FmaBmi12 = false; + bool? Avx1 = false; + bool? Avx2 = false; + bool? Fma = null; + bool? Bmi12 = null; bool? Avxvnni = false; -#elif VEX_INTRINSICS - bool vectorsAccelerated = true; - int byteVectorLength = 32; - bool? Sse2AndBelow = true; + bool? Avx512Group = false; + bool? Avx512Vbmi = false; +#elif AVX_INTRINSIC bool? Sse3Group = true; bool? AesLzPcl = null; bool? Sse4142 = true; bool? PopCnt = null; - bool? Avx12 = true; - bool? FmaBmi12 = null; + bool? Avx1 = true; + bool? Avx2 = false; + bool? Fma = null; + bool? Bmi12 = null; bool? Avxvnni = null; + bool? Avx512Group = false; + bool? Avx512Vbmi = false; +#elif AVX2_INTRINSICS + bool? Sse3Group = true; + bool? AesLzPcl = null; + bool? Sse4142 = true; + bool? PopCnt = null; + bool? Avx1 = true; + bool? Avx2 = true; + bool? Fma = null; + bool? Bmi12 = null; + bool? Avxvnni = null; + bool? Avx512Group = false; + bool? Avx512Vbmi = false; +#elif AVX512_INTRINSICS + bool? Sse3Group = true; + bool? AesLzPcl = null; + bool? Sse4142 = true; + bool? PopCnt = null; + bool? Avx1 = true; + bool? Avx2 = true; + bool? Fma = true; + bool? Bmi12 = null; + bool? Avxvnni = null; + bool? Avx512Group = true; + bool? Avx512Vbmi = null; +#else +#error Who dis? +#endif + +#if VECTORT128_INTRINSICS + int byteVectorLength = 16; +#elif VECTORT256_INTRINSICS + int byteVectorLength = 32; #else #error Who dis? #endif @@ -97,11 +135,11 @@ static int Main() throw new Exception($"Unexpected vector length - expected {byteVectorLength}, got {Vector.Count}"); } - Check("Sse", Sse2AndBelow, &SseIsSupported, Sse.IsSupported, () => Sse.Subtract(Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); - Check("Sse.X64", Sse2AndBelow, &SseX64IsSupported, Sse.X64.IsSupported, () => Sse.X64.ConvertToInt64WithTruncation(Vector128.Zero) == 0); + Check("Sse", Sse12, &SseIsSupported, Sse.IsSupported, () => Sse.Subtract(Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + Check("Sse.X64", Sse12, &SseX64IsSupported, Sse.X64.IsSupported, () => Sse.X64.ConvertToInt64WithTruncation(Vector128.Zero) == 0); - Check("Sse2", Sse2AndBelow, &Sse2IsSupported, Sse2.IsSupported, () => Sse2.Extract(Vector128.Zero, 0) == 0); - Check("Sse2.X64", Sse2AndBelow, &Sse2X64IsSupported, Sse2.X64.IsSupported, () => Sse2.X64.ConvertToInt64(Vector128.Zero) == 0); + Check("Sse2", Sse12, &Sse2IsSupported, Sse2.IsSupported, () => Sse2.Extract(Vector128.Zero, 0) == 0); + Check("Sse2.X64", Sse12, &Sse2X64IsSupported, Sse2.X64.IsSupported, () => Sse2.X64.ConvertToInt64(Vector128.Zero) == 0); Check("Sse3", Sse3Group, &Sse3IsSupported, Sse3.IsSupported, () => Sse3.MoveHighAndDuplicate(Vector128.Zero).Equals(Vector128.Zero)); Check("Sse3.X64", Sse3Group, &Sse3X64IsSupported, Sse3.X64.IsSupported, null); @@ -118,20 +156,20 @@ static int Main() Check("Aes", AesLzPcl, &AesIsSupported, Aes.IsSupported, () => Aes.KeygenAssist(Vector128.Zero, 0).Equals(Vector128.Create((byte)99))); Check("Aes.X64", AesLzPcl, &AesX64IsSupported, Aes.X64.IsSupported, null); - Check("Avx", Avx12, &AvxIsSupported, Avx.IsSupported, () => Avx.Add(Vector256.Zero, Vector256.Zero).Equals(Vector256.Zero)); - Check("Avx.X64", Avx12, &AvxX64IsSupported, Avx.X64.IsSupported, null); + Check("Avx", Avx1, &AvxIsSupported, Avx.IsSupported, () => Avx.Add(Vector256.Zero, Vector256.Zero).Equals(Vector256.Zero)); + Check("Avx.X64", Avx1, &AvxX64IsSupported, Avx.X64.IsSupported, null); - Check("Avx2", Avx12, &Avx2IsSupported, Avx2.IsSupported, () => Avx2.Abs(Vector256.Zero).Equals(Vector256.Zero)); - Check("Avx2.X64", Avx12, &Avx2X64IsSupported, Avx2.X64.IsSupported, null); + Check("Avx2", Avx2, &Avx2IsSupported, Avx2.IsSupported, () => Avx2.Abs(Vector256.Zero).Equals(Vector256.Zero)); + Check("Avx2.X64", Avx2, &Avx2X64IsSupported, Avx2.X64.IsSupported, null); - Check("Bmi1", FmaBmi12, &Bmi1IsSupported, Bmi1.IsSupported, () => Bmi1.AndNot(0, 0) == 0); - Check("Bmi1.X64", FmaBmi12, &Bmi1X64IsSupported, Bmi1.X64.IsSupported, () => Bmi1.X64.AndNot(0, 0) == 0); + Check("Bmi1", Bmi12, &Bmi1IsSupported, Bmi1.IsSupported, () => Bmi1.AndNot(0, 0) == 0); + Check("Bmi1.X64", Bmi12, &Bmi1X64IsSupported, Bmi1.X64.IsSupported, () => Bmi1.X64.AndNot(0, 0) == 0); - Check("Bmi2", FmaBmi12, &Bmi2IsSupported, Bmi2.IsSupported, () => Bmi2.MultiplyNoFlags(0, 0) == 0); - Check("Bmi2.X64", FmaBmi12, &Bmi2X64IsSupported, Bmi2.X64.IsSupported, () => Bmi2.X64.MultiplyNoFlags(0, 0) == 0); + Check("Bmi2", Bmi12, &Bmi2IsSupported, Bmi2.IsSupported, () => Bmi2.MultiplyNoFlags(0, 0) == 0); + Check("Bmi2.X64", Bmi12, &Bmi2X64IsSupported, Bmi2.X64.IsSupported, () => Bmi2.X64.MultiplyNoFlags(0, 0) == 0); - Check("Fma", FmaBmi12, &FmaIsSupported, Fma.IsSupported, () => Fma.MultiplyAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); - Check("Fma.X64", FmaBmi12, &FmaX64IsSupported, Fma.X64.IsSupported, null); + Check("Fma", Fma, &FmaIsSupported, Fma.IsSupported, () => Fma.MultiplyAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + Check("Fma.X64", Fma, &FmaX64IsSupported, Fma.X64.IsSupported, null); Check("Lzcnt", AesLzPcl, &LzcntIsSupported, Lzcnt.IsSupported, () => Lzcnt.LeadingZeroCount(0) == 32); Check("Lzcnt.X64", AesLzPcl, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64); @@ -145,6 +183,26 @@ static int Main() Check("AvxVnni", Avxvnni, &AvxVnniIsSupported, AvxVnni.IsSupported, () => AvxVnni.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); Check("AvxVnni.X64", Avxvnni, &AvxVnniX64IsSupported, AvxVnni.X64.IsSupported, null); + Check("Avx512F", Avx512Group, &Avx512FIsSupported, Avx512F.IsSupported, () => Avx512F.Abs(Vector512.Zero).Equals(Vector512.Zero)); + Check("Avx512F.VL", Avx512Group, &Avx512FVLIsSupported, Avx512F.VL.IsSupported, null); + Check("Avx512F.X64", Avx512Group, &Avx512FX64IsSupported, Avx512F.X64.IsSupported, null); + + Check("Avx512BW", Avx512Group, &Avx512BWIsSupported, Avx512BW.IsSupported, () => Avx512F.Abs(Vector512.Zero).Equals(Vector512.Zero)); + Check("Avx512BW.VL", Avx512Group, &Avx512BWVLIsSupported, Avx512BW.VL.IsSupported, null); + Check("Avx512BW.X64", Avx512Group, &Avx512BWX64IsSupported, Avx512BW.X64.IsSupported, null); + + Check("Avx512CD", Avx512Group, &Avx512CDIsSupported, Avx512CD.IsSupported, null); + Check("Avx512CD.VL", Avx512Group, &Avx512CDVLIsSupported, Avx512CD.VL.IsSupported, null); + Check("Avx512CD.X64", Avx512Group, &Avx512CDX64IsSupported, Avx512CD.X64.IsSupported, null); + + Check("Avx512DQ", Avx512Group, &Avx512DQIsSupported, Avx512DQ.IsSupported, () => Avx512F.And(Vector512.Zero, Vector512.Zero).Equals(Vector512.Zero)); + Check("Avx512DQ.VL", Avx512Group, &Avx512DQVLIsSupported, Avx512DQ.VL.IsSupported, null); + Check("Avx512DQ.X64", Avx512Group, &Avx512DQX64IsSupported, Avx512DQ.X64.IsSupported, null); + + Check("Avx512Vbmi", Avx512Group, &Avx512VbmiIsSupported, Avx512Vbmi.IsSupported, () => Avx512F.PermuteVar64x8(Vector512.Zero, Vector512.Zero).Equals(Vector512.Zero)); + Check("Avx512Vbmi.VL", Avx512Group, &Avx512VbmiVLIsSupported, Avx512Vbmi.VL.IsSupported, null); + Check("Avx512Vbmi.X64", Avx512Group, &Avx512VbmiX64IsSupported, Avx512Vbmi.X64.IsSupported, null); + return s_success ? 100 : 1; } @@ -183,6 +241,21 @@ static int Main() static bool PopcntX64IsSupported() => Popcnt.X64.IsSupported; static bool AvxVnniIsSupported() => AvxVnni.IsSupported; static bool AvxVnniX64IsSupported() => AvxVnni.X64.IsSupported; + static bool Avx512FIsSupported() => Avx512F.IsSupported; + static bool Avx512FVLIsSupported() => Avx512F.VL.IsSupported; + static bool Avx512FX64IsSupported() => Avx512F.X64.IsSupported; + static bool Avx512BWIsSupported() => Avx512BW.IsSupported; + static bool Avx512BWVLIsSupported() => Avx512BW.VL.IsSupported; + static bool Avx512BWX64IsSupported() => Avx512BW.X64.IsSupported; + static bool Avx512CDIsSupported() => Avx512CD.IsSupported; + static bool Avx512CDVLIsSupported() => Avx512CD.VL.IsSupported; + static bool Avx512CDX64IsSupported() => Avx512CD.X64.IsSupported; + static bool Avx512DQIsSupported() => Avx512DQ.IsSupported; + static bool Avx512DQVLIsSupported() => Avx512DQ.VL.IsSupported; + static bool Avx512DQX64IsSupported() => Avx512DQ.X64.IsSupported; + static bool Avx512VbmiIsSupported() => Avx512Vbmi.IsSupported; + static bool Avx512VbmiVLIsSupported() => Avx512Vbmi.VL.IsSupported; + static bool Avx512VbmiX64IsSupported() => Avx512Vbmi.X64.IsSupported; static bool IsConstantTrue(delegate* code) { diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Baseline.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Baseline.csproj index e49eb84629044d..9e5d0c79e268e1 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Baseline.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Baseline.csproj @@ -5,7 +5,7 @@ 0 true true - $(DefineConstants);BASELINE_INTRINSICS + $(DefineConstants);BASELINE_INTRINSICS;VECTORT128_INTRINSICS diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx.csproj new file mode 100644 index 00000000000000..1ccf630c3620c6 --- /dev/null +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx.csproj @@ -0,0 +1,39 @@ + + + Exe + BuildAndRun + 0 + true + true + $(DefineConstants);AVX_INTRINSICS;VECTORT128_INTRINSICS + + + + + + + + /dev/null + if [ $? -ne 0 ]; then + echo No support for AVX, test not applicable. + exit 0 + fi + fi + if [[ "$OSTYPE" == "linux"* ]]; then + if ! grep -q '^flags.*avx' /proc/cpuinfo 2>/dev/null; then + echo No support for AVX, test not applicable. + exit 0 + fi + fi +]]> + + + + + + diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Vex.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2.csproj similarity index 91% rename from src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Vex.csproj rename to src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2.csproj index 983436eab7d79f..626f88edc7278d 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Vex.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2.csproj @@ -5,7 +5,7 @@ 0 true true - $(DefineConstants);VEX_INTRINSICS + $(DefineConstants);AVX2_INTRINSICS;VECTORT256_INTRINSICS diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2_VectorT128.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2_VectorT128.csproj new file mode 100644 index 00000000000000..e292f10ab66a07 --- /dev/null +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2_VectorT128.csproj @@ -0,0 +1,40 @@ + + + Exe + BuildAndRun + 0 + true + true + $(DefineConstants);AVX2_INTRINSICS;VECTORT128_INTRINSICS + + + + + + + + + /dev/null + if [ $? -ne 0 ]; then + echo No support for AVX2, test not applicable. + exit 0 + fi + fi + if [[ "$OSTYPE" == "linux"* ]]; then + if ! grep -q '^flags.*avx2' /proc/cpuinfo 2>/dev/null; then + echo No support for AVX2, test not applicable. + exit 0 + fi + fi +]]> + + + + + + diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512.csproj new file mode 100644 index 00000000000000..c45d11276c9676 --- /dev/null +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512.csproj @@ -0,0 +1,33 @@ + + + Exe + BuildAndRun + 0 + true + true + $(DefineConstants);AVX512_INTRINSICS;VECTORT256_INTRINSICS + + + + + + + + /dev/null; then + echo No support for AVX512, test not applicable. + exit 0 + fi + fi +]]> + + + + + + diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512_VectorT128.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512_VectorT128.csproj new file mode 100644 index 00000000000000..4928bafbce048f --- /dev/null +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512_VectorT128.csproj @@ -0,0 +1,34 @@ + + + Exe + BuildAndRun + 0 + true + true + $(DefineConstants);AVX512_INTRINSICS;VECTORT128_INTRINSICS + + + + + + + + + /dev/null; then + echo No support for AVX512, test not applicable. + exit 0 + fi + fi +]]> + + + + + + diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64NonVex.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Sse42.csproj similarity index 83% rename from src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64NonVex.csproj rename to src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Sse42.csproj index 5e8d35d67bf3da..bf1a725b11a44d 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64NonVex.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Sse42.csproj @@ -5,7 +5,7 @@ 0 true true - $(DefineConstants);NON_VEX_INTRINSICS + $(DefineConstants);SSE42_INTRINSICS;VECTORT128_INTRINSICS From a3a21afba70e740d36b1b2cbc05ba75b2d968ea6 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 7 May 2023 13:53:20 -0700 Subject: [PATCH 02/19] Ensure SPMI keeps a getMaxVectorTBitWidth implementation --- .../tools/superpmi/superpmi-shared/icorjitcompilerimpl.h | 5 +++++ .../superpmi/superpmi-shim-collector/icorjitcompiler.cpp | 5 +++++ .../superpmi/superpmi-shim-counter/icorjitcompiler.cpp | 6 ++++++ .../tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp | 5 +++++ 4 files changed, 21 insertions(+) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h b/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h index 66f7e0992a300c..fe34513b2fff61 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h @@ -42,6 +42,11 @@ void ProcessShutdownWork(ICorStaticInfo* info); /* {}; */ void getVersionIdentifier(GUID* versionIdentifier /* OUT */ ); +// When the EE loads Vector type, it asks the JIT what the maximum width, in bits, that +// Vector is allowed to be. Zero means the JIT doesn't support SIMD intrinsics, so the +// EE should use the default size (i.e. the size of the IL implementation). +unsigned getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags); /* { return 0; } */ + // Some JIT's may support multiple OSs. This api provides a means to specify to the JIT what OS it should // be trying to compile. This api does not produce any errors, any errors are to be generated by the // the compileMethod call, which will call back into the VM to ensure bits are correctly setup. diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp index 331361a3e500b3..2f88a8de6bda6c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp @@ -158,3 +158,8 @@ void interceptor_ICJC::getVersionIdentifier(GUID* versionIdentifier /* OUT */) { original_ICorJitCompiler->getVersionIdentifier(versionIdentifier); } + +unsigned interceptor_ICJC::getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) +{ + return original_ICorJitCompiler->getMaxVectorTBitWidth(cpuCompileFlags); +} diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp index f67c5cbf6863bb..289e9fe65e7202 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp @@ -40,3 +40,9 @@ void interceptor_ICJC::getVersionIdentifier(GUID* versionIdentifier /* OUT */) mcs->AddCall("getVersionIdentifier"); original_ICorJitCompiler->getVersionIdentifier(versionIdentifier); } + +unsigned interceptor_ICJC::getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) +{ + mcs->AddCall("getMaxVectorTBitWidth"); + return original_ICorJitCompiler->getMaxVectorTBitWidth(cpuCompileFlags); +} diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp index 5f266c0fc96e9f..0f07055fc9bbf7 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp @@ -35,3 +35,8 @@ void interceptor_ICJC::getVersionIdentifier(GUID* versionIdentifier /* OUT */) { original_ICorJitCompiler->getVersionIdentifier(versionIdentifier); } + +unsigned interceptor_ICJC::getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) +{ + return original_ICorJitCompiler->getMaxVectorTBitWidth(cpuCompileFlags); +} From 84f06808c0194391aa339fe3e0828ebafb9042d9 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 8 May 2023 06:40:56 -0700 Subject: [PATCH 03/19] Fix the non-xarch vm build --- src/coreclr/vm/jitinterface.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 4341b4b107c590..bedb847e06d86f 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -2535,12 +2535,14 @@ void CEEInfo::getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) EE_TO_JIT_TRANSITION_LEAF(); } +#if defined(TARGET_X86) || defined(TARGET_AMD64) void CEEInfo::setXarchCpuInfo(const CORINFO_XARCH_CPU& xarchCpuInfo) { LIMITED_METHOD_CONTRACT; m_xarchCpuInfo = xarchCpuInfo; } +#endif // TARGET_X86 || TARGET_AMD64 /*********************************************************************/ unsigned CEEInfo::getClassNumInstanceFields (CORINFO_CLASS_HANDLE clsHnd) From beacbc5b1376ebdf611aae6c132cf3ea26d68cdf Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 8 May 2023 06:57:31 -0700 Subject: [PATCH 04/19] Remove getMaxVectorTBitWidth from the JIT/EE interface, it's no longer needed --- docs/design/coreclr/jit/ryujit-overview.md | 1 - src/coreclr/inc/corjit.h | 5 --- src/coreclr/jit/ee_il_dll.cpp | 37 ------------------- src/coreclr/jit/ee_il_dll.hpp | 2 - .../tools/aot/jitinterface/jitwrapper.cpp | 5 --- .../superpmi-shared/icorjitcompilerimpl.h | 5 --- .../icorjitcompiler.cpp | 5 --- .../superpmi-shim-counter/icorjitcompiler.cpp | 6 --- .../superpmi-shim-simple/icorjitcompiler.cpp | 5 --- src/coreclr/vm/methodtablebuilder.cpp | 26 +++++++------ 10 files changed, 14 insertions(+), 83 deletions(-) diff --git a/docs/design/coreclr/jit/ryujit-overview.md b/docs/design/coreclr/jit/ryujit-overview.md index a696dca8fb3edf..7f8cf8d553338d 100644 --- a/docs/design/coreclr/jit/ryujit-overview.md +++ b/docs/design/coreclr/jit/ryujit-overview.md @@ -34,7 +34,6 @@ The following are the key methods on this interface: It returns a pointer to the code, its size, and additional GC, EH and (optionally) debug info. * `getVersionIdentifier` is the mechanism by which the JIT/EE interface is versioned. There is a single GUID (manually generated) which the JIT and EE must agree on. - * `getMaxVectorTBitWidth` communicates to the EE the the maximum width, in bits, that Vector is allowed to be. * `ICorJitInfo` – this is the interface that the EE implements. It has many methods defined on it that allow the JIT to look up metadata tokens, traverse type signatures, compute field and vtable offsets, find method entry points, construct string literals, etc. This bulk of this interface is inherited from `ICorDynamicInfo` which is defined in diff --git a/src/coreclr/inc/corjit.h b/src/coreclr/inc/corjit.h index bc0b856a23c1ac..c48bf938da09df 100644 --- a/src/coreclr/inc/corjit.h +++ b/src/coreclr/inc/corjit.h @@ -209,11 +209,6 @@ class ICorJitCompiler GUID* versionIdentifier /* OUT */ ) = 0; - // When the EE loads Vector type, it asks the JIT what the maximum width, in bits, that - // Vector is allowed to be. Zero means the JIT doesn't support SIMD intrinsics, so the - // EE should use the default size (i.e. the size of the IL implementation). - virtual unsigned getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) { return 0; } - // Some JIT's may support multiple OSs. This api provides a means to specify to the JIT what OS it should // be trying to compile. This api does not produce any errors, any errors are to be generated by the // the compileMethod call, which will call back into the VM to ensure bits are correctly setup. diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index 4f3fc02d33429e..80674aaebb6f7c 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -314,43 +314,6 @@ void CILJit::setTargetOS(CORINFO_OS os) #endif } -/***************************************************************************** - * Get the maximum width, in bytes, that Vector is allowed to be. - */ -unsigned CILJit::getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) -{ - JitFlags jitFlags; - jitFlags.SetFromFlags(cpuCompileFlags); - -#if defined(FEATURE_SIMD) - CORINFO_InstructionSetFlags instructionSetFlags = cpuCompileFlags.GetInstructionSetFlags(); - -#if defined(TARGET_XARCH) - if (instructionSetFlags.HasInstructionSet(InstructionSet_VectorT256)) - { - if ((GetJitTls() != nullptr) && (JitTls::GetCompiler() != nullptr)) - { - JITDUMP("getMaxVectorTBitWidth: returning 256\n"); - } - return 256; - } -#endif // defined(TARGET_XARCH) - assert(instructionSetFlags.HasInstructionSet(InstructionSet_VectorT128)); - - if ((GetJitTls() != nullptr) && (JitTls::GetCompiler() != nullptr)) - { - JITDUMP("getMaxVectorTBitWidth: returning 128\n"); - } - return 128; -#else // !FEATURE_SIMD - if ((GetJitTls() != nullptr) && (JitTls::GetCompiler() != nullptr)) - { - JITDUMP("getMaxVectorTBitWidth: returning 0\n"); - } - return 0; -#endif // !FEATURE_SIMD -} - //------------------------------------------------------------------------ // eeGetArgSize: Returns the number of bytes required for the given type argument // including padding after the actual value. diff --git a/src/coreclr/jit/ee_il_dll.hpp b/src/coreclr/jit/ee_il_dll.hpp index d26bbec8ce8fac..c8a580fa723a66 100644 --- a/src/coreclr/jit/ee_il_dll.hpp +++ b/src/coreclr/jit/ee_il_dll.hpp @@ -17,8 +17,6 @@ class CILJit : public ICorJitCompiler void getVersionIdentifier(GUID* versionIdentifier /* OUT */ ); - unsigned getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags); - void setTargetOS(CORINFO_OS os); }; diff --git a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp index fed4cacdbaad72..1a091b2fab1977 100644 --- a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp +++ b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp @@ -50,8 +50,3 @@ DLL_EXPORT void JitProcessShutdownWork(ICorJitCompiler * pJit) { return pJit->ProcessShutdownWork(nullptr); } - -DLL_EXPORT unsigned GetMaxVectorTBitWidth(ICorJitCompiler * pJit, CORJIT_FLAGS * flags) -{ - return pJit->getMaxVectorTBitWidth(*flags); -} diff --git a/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h b/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h index fe34513b2fff61..66f7e0992a300c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/icorjitcompilerimpl.h @@ -42,11 +42,6 @@ void ProcessShutdownWork(ICorStaticInfo* info); /* {}; */ void getVersionIdentifier(GUID* versionIdentifier /* OUT */ ); -// When the EE loads Vector type, it asks the JIT what the maximum width, in bits, that -// Vector is allowed to be. Zero means the JIT doesn't support SIMD intrinsics, so the -// EE should use the default size (i.e. the size of the IL implementation). -unsigned getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags); /* { return 0; } */ - // Some JIT's may support multiple OSs. This api provides a means to specify to the JIT what OS it should // be trying to compile. This api does not produce any errors, any errors are to be generated by the // the compileMethod call, which will call back into the VM to ensure bits are correctly setup. diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp index 2f88a8de6bda6c..331361a3e500b3 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitcompiler.cpp @@ -158,8 +158,3 @@ void interceptor_ICJC::getVersionIdentifier(GUID* versionIdentifier /* OUT */) { original_ICorJitCompiler->getVersionIdentifier(versionIdentifier); } - -unsigned interceptor_ICJC::getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) -{ - return original_ICorJitCompiler->getMaxVectorTBitWidth(cpuCompileFlags); -} diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp index 289e9fe65e7202..f67c5cbf6863bb 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitcompiler.cpp @@ -40,9 +40,3 @@ void interceptor_ICJC::getVersionIdentifier(GUID* versionIdentifier /* OUT */) mcs->AddCall("getVersionIdentifier"); original_ICorJitCompiler->getVersionIdentifier(versionIdentifier); } - -unsigned interceptor_ICJC::getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) -{ - mcs->AddCall("getMaxVectorTBitWidth"); - return original_ICorJitCompiler->getMaxVectorTBitWidth(cpuCompileFlags); -} diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp index 0f07055fc9bbf7..5f266c0fc96e9f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitcompiler.cpp @@ -35,8 +35,3 @@ void interceptor_ICJC::getVersionIdentifier(GUID* versionIdentifier /* OUT */) { original_ICorJitCompiler->getVersionIdentifier(versionIdentifier); } - -unsigned interceptor_ICJC::getMaxVectorTBitWidth(CORJIT_FLAGS cpuCompileFlags) -{ - return original_ICorJitCompiler->getMaxVectorTBitWidth(cpuCompileFlags); -} diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp index 756956c064cffc..9e559deae6366b 100644 --- a/src/coreclr/vm/methodtablebuilder.cpp +++ b/src/coreclr/vm/methodtablebuilder.cpp @@ -1146,22 +1146,24 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() if (jitMgr->LoadJIT()) { - CORJIT_FLAGS cpuCompileFlags = jitMgr->GetCPUCompileFlags(); + CORJIT_FLAGS CPUCompileFlags = jitMgr->GetCPUCompileFlags(); + uint32_t numInstanceFieldBytes = 16; - uint32_t maxVectorTBitWidth = jitMgr->m_jit->getMaxVectorTBitWidth(cpuCompileFlags); - uint32_t numInstanceFieldBytes = maxVectorTBitWidth / 8; - - if (numInstanceFieldBytes != 0) + if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) + { + // TODO-XARCH: The JIT needs to be updated to support 64-byte Vector + numInstanceFieldBytes = 32; + } + else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256)) { - _ASSERTE((numInstanceFieldBytes * 8) == maxVectorTBitWidth); - _ASSERTE((numInstanceFieldBytes >= 16) && ((numInstanceFieldBytes % 16) == 0)); + numInstanceFieldBytes = 32; + } - bmtFP->NumInstanceFieldBytes = numInstanceFieldBytes; + bmtFP->NumInstanceFieldBytes = numInstanceFieldBytes; - if (HasLayout()) - { - GetLayoutInfo()->m_cbManagedSize = numInstanceFieldBytes; - } + if (HasLayout()) + { + GetLayoutInfo()->m_cbManagedSize = numInstanceFieldBytes; } return true; } From 52b055fa7d624d4d8f46ac5fd5237dc439c4f531 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 8 May 2023 09:14:43 -0700 Subject: [PATCH 05/19] Move SetCpuInfo down into the EEJitManager constructor --- src/coreclr/vm/codeman.cpp | 4 ++-- src/coreclr/vm/methodtablebuilder.cpp | 29 +++++++++++++-------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 2131a0460617e8..68d90c4e6eafb4 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1256,6 +1256,8 @@ EEJitManager::EEJitManager() m_storeRichDebugInfo = false; m_cleanupList = NULL; + + SetCpuInfo(); } #if defined(TARGET_X86) || defined(TARGET_AMD64) @@ -2233,8 +2235,6 @@ BOOL EEJitManager::LoadJIT() if (IsJitLoaded()) return TRUE; - SetCpuInfo(); - m_storeRichDebugInfo = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_RichDebugInfo) != 0; ICorJitCompiler* newJitCompiler = NULL; diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp index 9e559deae6366b..45c585da6d2196 100644 --- a/src/coreclr/vm/methodtablebuilder.cpp +++ b/src/coreclr/vm/methodtablebuilder.cpp @@ -1142,32 +1142,31 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() if (strcmp(className, "Vector`1") != 0 || strcmp(nameSpace, "System.Numerics") != 0) return false; - EEJitManager *jitMgr = ExecutionManager::GetEEJitManager(); + CORJIT_FLAGS CPUCompileFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); + uint32_t numInstanceFieldBytes = 16; - if (jitMgr->LoadJIT()) + if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) { - CORJIT_FLAGS CPUCompileFlags = jitMgr->GetCPUCompileFlags(); - uint32_t numInstanceFieldBytes = 16; - - if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) - { - // TODO-XARCH: The JIT needs to be updated to support 64-byte Vector - numInstanceFieldBytes = 32; - } - else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256)) - { - numInstanceFieldBytes = 32; - } + // TODO-XARCH: The JIT needs to be updated to support 64-byte Vector + numInstanceFieldBytes = 32; + } + else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256)) + { + numInstanceFieldBytes = 32; + } + if (numInstanceFieldBytes != 16) + { bmtFP->NumInstanceFieldBytes = numInstanceFieldBytes; if (HasLayout()) { GetLayoutInfo()->m_cbManagedSize = numInstanceFieldBytes; } + return true; } -#endif // defined(TARGET_X86) || defined(TARGET_AMD64) +#endif // TARGET_X86 || TARGET_AMD64 return false; } From 8ce51125aa96982abd7119028ea9f9cac9b7147d Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 8 May 2023 09:32:12 -0700 Subject: [PATCH 06/19] Remove getXarchCpuInfo in favor of passing `JIT_FLAG_VECTOR512_THROTTLING` --- src/coreclr/inc/corinfo.h | 20 --- src/coreclr/inc/corjitflags.h | 6 +- src/coreclr/inc/icorjitinfoimpl_generated.h | 3 - src/coreclr/jit/ICorJitInfo_names_generated.h | 1 - .../jit/ICorJitInfo_wrapper_generated.hpp | 8 -- src/coreclr/jit/compiler.cpp | 42 +----- src/coreclr/jit/compiler.h | 10 +- src/coreclr/jit/ee_il_dll.cpp | 14 -- src/coreclr/jit/hwintrinsicxarch.cpp | 12 +- src/coreclr/jit/jitee.h | 6 +- src/coreclr/jit/simdashwintrinsic.cpp | 6 +- .../tools/Common/JitInterface/CorInfoImpl.cs | 8 -- .../JitInterface/CorInfoImpl_generated.cs | 121 ++++++++---------- .../ThunkGenerator/ThunkInput.txt | 2 - .../aot/jitinterface/jitinterface_generated.h | 9 -- .../tools/superpmi/superpmi-shared/lwmlist.h | 1 - .../superpmi-shared/methodcontext.cpp | 28 ---- .../superpmi/superpmi-shared/methodcontext.h | 5 - .../superpmi-shim-collector/icorjitinfo.cpp | 7 - .../icorjitinfo_generated.cpp | 7 - .../icorjitinfo_generated.cpp | 6 - .../tools/superpmi/superpmi/icorjitinfo.cpp | 6 - src/coreclr/vm/codeman.cpp | 39 +++++- src/coreclr/vm/codeman.h | 30 +++-- src/coreclr/vm/jitinterface.cpp | 35 ----- src/coreclr/vm/jitinterface.h | 8 -- 26 files changed, 135 insertions(+), 305 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 52bcaba8272cee..93f67680284498 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -317,24 +317,6 @@ struct SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR } }; -// Represents information about an XARCH CPU -union CORINFO_XARCH_CPU -{ - struct { - uint32_t SteppingId : 4; - uint32_t Model : 4; - uint32_t FamilyId : 4; - uint32_t ProcessorType : 2; - uint32_t IsAuthenticAmd : 1; // Unused bits in the CPUID result - uint32_t IsGenuineIntel : 1; // Unused bits in the CPUID result - uint32_t ExtendedModelId : 4; - uint32_t ExtendedFamilyId : 8; - uint32_t Reserved : 4; // Unused bits in the CPUID result - }; - - uint32_t Value; -}; - // StructFloadFieldInfoFlags: used on LoongArch64 architecture by `getLoongArch64PassStructInRegisterFlags` and // `getRISCV64PassStructInRegisterFlags` API to convey struct argument passing information. // @@ -3045,8 +3027,6 @@ class ICorStaticInfo virtual uint32_t getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls) = 0; virtual uint32_t getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls) = 0; - - virtual void getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfo) = 0; }; /***************************************************************************** diff --git a/src/coreclr/inc/corjitflags.h b/src/coreclr/inc/corjitflags.h index 4d511aa6f74705..d209ec1e928c2d 100644 --- a/src/coreclr/inc/corjitflags.h +++ b/src/coreclr/inc/corjitflags.h @@ -93,9 +93,11 @@ class CORJIT_FLAGS #if defined(TARGET_ARM) CORJIT_FLAG_SOFTFP_ABI = 43, // On ARM should enable armel calling convention -#else // !defined(TARGET_ARM) +#elif defined(TARGET_X86) || defined(TARGET_AMD64) + CORJIT_FLAG_VECTOR512_THROTTLING = 43, // On Xarch, 512-bit vector usage may incur CPU frequency throttling +#else CORJIT_FLAG_UNUSED16 = 43, -#endif // !defined(TARGET_ARM) +#endif CORJIT_FLAG_UNUSED17 = 44, CORJIT_FLAG_UNUSED18 = 45, diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h index 99eb40c7552a76..cf4c94498c33a1 100644 --- a/src/coreclr/inc/icorjitinfoimpl_generated.h +++ b/src/coreclr/inc/icorjitinfoimpl_generated.h @@ -542,9 +542,6 @@ uint32_t getRISCV64PassStructInRegisterFlags( uint32_t getThreadTLSIndex( void** ppIndirection) override; -void getXarchCpuInfo( - CORINFO_XARCH_CPU* xarchCpuInfoPtr) override; - const void* getInlinedCallFrameVptr( void** ppIndirection) override; diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h index 517496d0e68cb5..cb5db87194525c 100644 --- a/src/coreclr/jit/ICorJitInfo_names_generated.h +++ b/src/coreclr/jit/ICorJitInfo_names_generated.h @@ -136,7 +136,6 @@ DEF_CLR_API(getSystemVAmd64PassStructInRegisterDescriptor) DEF_CLR_API(getLoongArch64PassStructInRegisterFlags) DEF_CLR_API(getRISCV64PassStructInRegisterFlags) DEF_CLR_API(getThreadTLSIndex) -DEF_CLR_API(getXarchCpuInfo) DEF_CLR_API(getInlinedCallFrameVptr) DEF_CLR_API(getAddrOfCaptureThreadGlobal) DEF_CLR_API(getHelperFtn) diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp index 8e07bd0bd70c75..b819fc12fbce13 100644 --- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp +++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp @@ -1297,14 +1297,6 @@ uint32_t WrapICorJitInfo::getThreadTLSIndex( return temp; } -void WrapICorJitInfo::getXarchCpuInfo( - CORINFO_XARCH_CPU* xarchCpuInfoPtr) -{ - API_ENTER(getXarchCpuInfo); - wrapHnd->getXarchCpuInfo(xarchCpuInfoPtr); - API_LEAVE(getXarchCpuInfo); -} - const void* WrapICorJitInfo::getInlinedCallFrameVptr( void** ppIndirection) { diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 9673ee08a47c98..970b6e019250c1 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2328,43 +2328,15 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); - if (preferredVectorByteLength == 0) + if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING)) { - CORINFO_XARCH_CPU xarchCpuInfo; - eeGetXarchCpuInfo(&xarchCpuInfo); + // Some architectures can experience frequency throttling when executing + // executing 512-bit width instructions. To account for this we set the + // default preferred vector width to 256-bits in some scenarios. Power + // users can override this with `DOTNET_PreferredVectorBitWith=512` to + // allow using such instructions where hardware support is available. - if (xarchCpuInfo.IsGenuineIntel) - { - // Some architectures can experience frequency throttling when executing - // executing 512-bit width instructions. To account for this we set the - // default preferred vector width to 256-bits in some scenarios. Power - // users can override this with `DOTNET_PreferredVectorBitWith=512` to - // allow using such instructions where hardware support is available. - - if (xarchCpuInfo.FamilyId == 0x06) - { - if (xarchCpuInfo.ExtendedModelId == 0x05) - { - if (xarchCpuInfo.Model == 0x05) - { - // * Skylake (Server) - // * Cascade Lake - // * Cooper Lake - - preferredVectorByteLength = 32; - } - } - else if (xarchCpuInfo.ExtendedModelId == 0x06) - { - if (xarchCpuInfo.Model == 0x06) - { - // * Cannon Lake - - preferredVectorByteLength = 32; - } - } - } - } + preferredVectorByteLength = 256; } } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d2c0836f942ff2..8d83dbe3608b0d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8110,10 +8110,6 @@ class Compiler bool eeRunWithSPMIErrorTrapImp(void (*function)(void*), void* param); -#if defined(TARGET_XARCH) - void eeGetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfo); -#endif // TARGET_XARCH - // Utility functions static CORINFO_METHOD_HANDLE eeFindHelper(unsigned helper); @@ -8681,9 +8677,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Note - cannot be used for System.Runtime.Intrinsic unsigned getVectorTByteLength() { - // We need to report the ISA dependency to the VM so that scenarios - // such as R2R work correctly for larger vector sizes, so we always - // do `compExactlyDependsOn` for such cases. +// We need to report the ISA dependency to the VM so that scenarios +// such as R2R work correctly for larger vector sizes, so we always +// do `compExactlyDependsOn` for such cases. #if defined(TARGET_XARCH) if (compExactlyDependsOn(InstructionSet_VectorT256)) diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index 80674aaebb6f7c..5d7c5b9b9c4eb9 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -1395,17 +1395,3 @@ unsigned Compiler::eeTryGetClassSize(CORINFO_CLASS_HANDLE clsHnd) } #endif // !DEBUG - -#if defined(TARGET_XARCH) - //------------------------------------------------------------------------ - // eeGetXarchCpuInfo: Gets the XARCH CPU information for the JIT - // - // Arguments: - // xarchCpuInfoPtr -- pointer to the struct that recieves the cpu info - // - void Compiler::eeGetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) - { - info.compCompHnd->getXarchCpuInfo(xarchCpuInfoPtr); - } -#endif // TARGET_XARCH - diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 0cfa182757c3e1..07a82eaccf2079 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -907,8 +907,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, else { assert(intrinsic == NI_Vector256_AsVector256); - return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType, - 16); + return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, + retType, 16); } } } @@ -954,14 +954,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (intrinsic == NI_Vector512_AsVector) { - return impSpecialIntrinsic(NI_Vector512_GetLower128, clsHnd, method, sig, simdBaseJitType, retType, - simdSize); + return impSpecialIntrinsic(NI_Vector512_GetLower128, clsHnd, method, sig, simdBaseJitType, + retType, simdSize); } else { assert(intrinsic == NI_Vector512_AsVector512); - return impSpecialIntrinsic(NI_Vector128_ToVector512, clsHnd, method, sig, simdBaseJitType, retType, - 16); + return impSpecialIntrinsic(NI_Vector128_ToVector512, clsHnd, method, sig, simdBaseJitType, + retType, 16); } } } diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index 40373485c127db..b7c6bb4fd60a02 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -83,9 +83,11 @@ class JitFlags #if defined(TARGET_ARM) JIT_FLAG_SOFTFP_ABI = 43, // On ARM should enable armel calling convention -#else // !defined(TARGET_ARM) +#elif defined(TARGET_XARCH) + JIT_FLAG_VECTOR512_THROTTLING = 43, // On Xarch, 512-bit vector usage may incur CPU frequency throttling +#else JIT_FLAG_UNUSED16 = 43, -#endif // !defined(TARGET_ARM) +#endif JIT_FLAG_UNUSED17 = 44, JIT_FLAG_UNUSED18 = 45, diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index c7b4950c5525dc..e176a896480be9 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -86,9 +86,9 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp, return comp->IsBaselineSimdIsaSupported() ? NI_IsSupported_True : NI_IsSupported_False; } - var_types retType = JITtype2varType(sig->retType); - CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + var_types retType = JITtype2varType(sig->retType); + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; if (retType == TYP_STRUCT) { diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index a77622bb7acb3d..ddf825c829195e 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -3263,14 +3263,6 @@ private uint getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_STRUCT_* cls) return RISCV64PassStructInRegister.GetRISCV64PassStructInRegisterFlags(typeDesc); } -#pragma warning disable CA1822 // Mark members as static - private void getXarchCpuInfo(ref CORINFO_XARCH_CPU xarchCpuInfoPtr) - { - // We can't assume a CPU for AOT compilation so return the default - xarchCpuInfoPtr = default; - } -#pragma warning restore CA1822 // Mark members as static - private uint getThreadTLSIndex(ref void* ppIndirection) { throw new NotImplementedException("getThreadTLSIndex"); } private void* getInlinedCallFrameVptr(ref void* ppIndirection) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index b9ef19be411995..3c7f632ef8e7b0 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -1964,20 +1964,6 @@ private static uint _getThreadTLSIndex(IntPtr thisHandle, IntPtr* ppException, v } } - [UnmanagedCallersOnly] - private static void _getXarchCpuInfo(IntPtr thisHandle, IntPtr* ppException, CORINFO_XARCH_CPU* xarchCpuInfoPtr) - { - var _this = GetThis(thisHandle); - try - { - _this.getXarchCpuInfo(ref *xarchCpuInfoPtr); - } - catch (Exception ex) - { - *ppException = _this.AllocException(ex); - } - } - [UnmanagedCallersOnly] private static void* _getInlinedCallFrameVptr(IntPtr thisHandle, IntPtr* ppException, void** ppIndirection) { @@ -2744,7 +2730,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_ private static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 185); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 184); callbacks[0] = (delegate* unmanaged)&_isIntrinsic; callbacks[1] = (delegate* unmanaged)&_getMethodAttribs; @@ -2878,59 +2864,58 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[129] = (delegate* unmanaged)&_getLoongArch64PassStructInRegisterFlags; callbacks[130] = (delegate* unmanaged)&_getRISCV64PassStructInRegisterFlags; callbacks[131] = (delegate* unmanaged)&_getThreadTLSIndex; - callbacks[132] = (delegate* unmanaged)&_getXarchCpuInfo; - callbacks[133] = (delegate* unmanaged)&_getInlinedCallFrameVptr; - callbacks[134] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; - callbacks[135] = (delegate* unmanaged)&_getHelperFtn; - callbacks[136] = (delegate* unmanaged)&_getFunctionEntryPoint; - callbacks[137] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; - callbacks[138] = (delegate* unmanaged)&_getMethodSync; - callbacks[139] = (delegate* unmanaged)&_getLazyStringLiteralHelper; - callbacks[140] = (delegate* unmanaged)&_embedModuleHandle; - callbacks[141] = (delegate* unmanaged)&_embedClassHandle; - callbacks[142] = (delegate* unmanaged)&_embedMethodHandle; - callbacks[143] = (delegate* unmanaged)&_embedFieldHandle; - callbacks[144] = (delegate* unmanaged)&_embedGenericHandle; - callbacks[145] = (delegate* unmanaged)&_getLocationOfThisType; - callbacks[146] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; - callbacks[147] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; - callbacks[148] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; - callbacks[149] = (delegate* unmanaged)&_getJustMyCodeHandle; - callbacks[150] = (delegate* unmanaged)&_GetProfilingHandle; - callbacks[151] = (delegate* unmanaged)&_getCallInfo; - callbacks[152] = (delegate* unmanaged)&_canAccessFamily; - callbacks[153] = (delegate* unmanaged)&_isRIDClassDomainID; - callbacks[154] = (delegate* unmanaged)&_getClassDomainID; - callbacks[155] = (delegate* unmanaged)&_getStaticFieldContent; - callbacks[156] = (delegate* unmanaged)&_getObjectContent; - callbacks[157] = (delegate* unmanaged)&_getStaticFieldCurrentClass; - callbacks[158] = (delegate* unmanaged)&_getVarArgsHandle; - callbacks[159] = (delegate* unmanaged)&_canGetVarArgsHandle; - callbacks[160] = (delegate* unmanaged)&_constructStringLiteral; - callbacks[161] = (delegate* unmanaged)&_emptyStringLiteral; - callbacks[162] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; - callbacks[163] = (delegate* unmanaged)&_GetDelegateCtor; - callbacks[164] = (delegate* unmanaged)&_MethodCompileComplete; - callbacks[165] = (delegate* unmanaged)&_getTailCallHelpers; - callbacks[166] = (delegate* unmanaged)&_convertPInvokeCalliToCall; - callbacks[167] = (delegate* unmanaged)&_notifyInstructionSetUsage; - callbacks[168] = (delegate* unmanaged)&_updateEntryPointForTailCall; - callbacks[169] = (delegate* unmanaged)&_allocMem; - callbacks[170] = (delegate* unmanaged)&_reserveUnwindInfo; - callbacks[171] = (delegate* unmanaged)&_allocUnwindInfo; - callbacks[172] = (delegate* unmanaged)&_allocGCInfo; - callbacks[173] = (delegate* unmanaged)&_setEHcount; - callbacks[174] = (delegate* unmanaged)&_setEHinfo; - callbacks[175] = (delegate* unmanaged)&_logMsg; - callbacks[176] = (delegate* unmanaged)&_doAssert; - callbacks[177] = (delegate* unmanaged)&_reportFatalError; - callbacks[178] = (delegate* unmanaged)&_getPgoInstrumentationResults; - callbacks[179] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; - callbacks[180] = (delegate* unmanaged)&_recordCallSite; - callbacks[181] = (delegate* unmanaged)&_recordRelocation; - callbacks[182] = (delegate* unmanaged)&_getRelocTypeHint; - callbacks[183] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[184] = (delegate* unmanaged)&_getJitFlags; + callbacks[132] = (delegate* unmanaged)&_getInlinedCallFrameVptr; + callbacks[133] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; + callbacks[134] = (delegate* unmanaged)&_getHelperFtn; + callbacks[135] = (delegate* unmanaged)&_getFunctionEntryPoint; + callbacks[136] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; + callbacks[137] = (delegate* unmanaged)&_getMethodSync; + callbacks[138] = (delegate* unmanaged)&_getLazyStringLiteralHelper; + callbacks[139] = (delegate* unmanaged)&_embedModuleHandle; + callbacks[140] = (delegate* unmanaged)&_embedClassHandle; + callbacks[141] = (delegate* unmanaged)&_embedMethodHandle; + callbacks[142] = (delegate* unmanaged)&_embedFieldHandle; + callbacks[143] = (delegate* unmanaged)&_embedGenericHandle; + callbacks[144] = (delegate* unmanaged)&_getLocationOfThisType; + callbacks[145] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; + callbacks[146] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; + callbacks[147] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; + callbacks[148] = (delegate* unmanaged)&_getJustMyCodeHandle; + callbacks[149] = (delegate* unmanaged)&_GetProfilingHandle; + callbacks[150] = (delegate* unmanaged)&_getCallInfo; + callbacks[151] = (delegate* unmanaged)&_canAccessFamily; + callbacks[152] = (delegate* unmanaged)&_isRIDClassDomainID; + callbacks[153] = (delegate* unmanaged)&_getClassDomainID; + callbacks[154] = (delegate* unmanaged)&_getStaticFieldContent; + callbacks[155] = (delegate* unmanaged)&_getObjectContent; + callbacks[156] = (delegate* unmanaged)&_getStaticFieldCurrentClass; + callbacks[157] = (delegate* unmanaged)&_getVarArgsHandle; + callbacks[158] = (delegate* unmanaged)&_canGetVarArgsHandle; + callbacks[159] = (delegate* unmanaged)&_constructStringLiteral; + callbacks[160] = (delegate* unmanaged)&_emptyStringLiteral; + callbacks[161] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; + callbacks[162] = (delegate* unmanaged)&_GetDelegateCtor; + callbacks[163] = (delegate* unmanaged)&_MethodCompileComplete; + callbacks[164] = (delegate* unmanaged)&_getTailCallHelpers; + callbacks[165] = (delegate* unmanaged)&_convertPInvokeCalliToCall; + callbacks[166] = (delegate* unmanaged)&_notifyInstructionSetUsage; + callbacks[167] = (delegate* unmanaged)&_updateEntryPointForTailCall; + callbacks[168] = (delegate* unmanaged)&_allocMem; + callbacks[169] = (delegate* unmanaged)&_reserveUnwindInfo; + callbacks[170] = (delegate* unmanaged)&_allocUnwindInfo; + callbacks[171] = (delegate* unmanaged)&_allocGCInfo; + callbacks[172] = (delegate* unmanaged)&_setEHcount; + callbacks[173] = (delegate* unmanaged)&_setEHinfo; + callbacks[174] = (delegate* unmanaged)&_logMsg; + callbacks[175] = (delegate* unmanaged)&_doAssert; + callbacks[176] = (delegate* unmanaged)&_reportFatalError; + callbacks[177] = (delegate* unmanaged)&_getPgoInstrumentationResults; + callbacks[178] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; + callbacks[179] = (delegate* unmanaged)&_recordCallSite; + callbacks[180] = (delegate* unmanaged)&_recordRelocation; + callbacks[181] = (delegate* unmanaged)&_getRelocTypeHint; + callbacks[182] = (delegate* unmanaged)&_getExpectedTargetArchitecture; + callbacks[183] = (delegate* unmanaged)&_getJitFlags; return (IntPtr)callbacks; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt index 3dba43bace4369..b9f8aa66f259eb 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt @@ -73,7 +73,6 @@ const wchar_t*,String uint32_t**,ref uint* unsigned*,ref uint CORJIT_FLAGS*,ref CORJIT_FLAGS -CORINFO_XARCH_CPU*,ref CORINFO_XARCH_CPU CORINFO_CONST_LOOKUP*,ref CORINFO_CONST_LOOKUP CORINFO_LOOKUP*,ref CORINFO_LOOKUP CORINFO_LOOKUP_KIND*,ref CORINFO_LOOKUP_KIND @@ -292,7 +291,6 @@ FUNCTIONS uint32_t getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd); uint32_t getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd); uint32_t getThreadTLSIndex(void **ppIndirection); - void getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr); const void * getInlinedCallFrameVptr(void **ppIndirection); int32_t * getAddrOfCaptureThreadGlobal(void **ppIndirection); void* getHelperFtn (CorInfoHelpFunc ftnNum, void **ppIndirection); diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h index f09f9b3cbc467d..62db21d2232e23 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h @@ -143,7 +143,6 @@ struct JitInterfaceCallbacks uint32_t (* getLoongArch64PassStructInRegisterFlags)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE structHnd); uint32_t (* getRISCV64PassStructInRegisterFlags)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE structHnd); uint32_t (* getThreadTLSIndex)(void * thisHandle, CorInfoExceptionClass** ppException, void** ppIndirection); - void (* getXarchCpuInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_XARCH_CPU* xarchCpuInfoPtr); const void* (* getInlinedCallFrameVptr)(void * thisHandle, CorInfoExceptionClass** ppException, void** ppIndirection); int32_t* (* getAddrOfCaptureThreadGlobal)(void * thisHandle, CorInfoExceptionClass** ppException, void** ppIndirection); void* (* getHelperFtn)(void * thisHandle, CorInfoExceptionClass** ppException, CorInfoHelpFunc ftnNum, void** ppIndirection); @@ -1478,14 +1477,6 @@ class JitInterfaceWrapper : public ICorJitInfo return temp; } - virtual void getXarchCpuInfo( - CORINFO_XARCH_CPU* xarchCpuInfoPtr) -{ - CorInfoExceptionClass* pException = nullptr; - _callbacks->getXarchCpuInfo(_thisHandle, &pException, xarchCpuInfoPtr); - if (pException != nullptr) throw pException; -} - virtual const void* getInlinedCallFrameVptr( void** ppIndirection) { diff --git a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h index 8c5ad83c977bc7..19a0600b3f582b 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h @@ -129,7 +129,6 @@ LWM(GetStringConfigValue, DWORD, DWORD) LWM(GetSystemVAmd64PassStructInRegisterDescriptor, DWORDLONG, Agnostic_GetSystemVAmd64PassStructInRegisterDescriptor) LWM(GetLoongArch64PassStructInRegisterFlags, DWORDLONG, DWORD) LWM(GetRISCV64PassStructInRegisterFlags, DWORDLONG, DWORD) -LWM(GetXarchCpuInfo, DWORD, DWORD) LWM(GetTailCallHelpers, Agnostic_GetTailCallHelpers, Agnostic_CORINFO_TAILCALL_HELPERS) LWM(UpdateEntryPointForTailCall, Agnostic_CORINFO_CONST_LOOKUP, Agnostic_CORINFO_CONST_LOOKUP) LWM(GetThreadTLSIndex, DWORD, DLD) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 6a9a18baba0482..95dc7bd7d60b46 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -6424,34 +6424,6 @@ DWORD MethodContext::repGetRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE return value; } -void MethodContext::recgetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) -{ - if (GetXarchCpuInfo == nullptr) - GetXarchCpuInfo = new LightWeightMap(); - - DWORD key = 0; - DWORD value = static_cast(xarchCpuInfoPtr->Value); - - GetXarchCpuInfo->Add(key, value); - DEBUG_REC(dmpGetXarchCpuInfo(key, value)); -} -void MethodContext::dmpGetXarchCpuInfo(DWORD key, DWORD value) -{ - printf("getXarchCpuInfo key %u, value %u", key, value); -} -void MethodContext::repGetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) -{ - DWORD key; - DWORD value; - - key = 0; - value = LookupByKeyOrMissNoMessage(GetXarchCpuInfo, key); - - DEBUG_REP(dmpGetXarchCpuInfo(key, value)); - - xarchCpuInfoPtr->Value = static_cast(value); -} - void MethodContext::recGetRelocTypeHint(void* target, WORD result) { if (GetRelocTypeHint == nullptr) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 1c7f0ec6f45353..9bd410dbd61689 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -807,10 +807,6 @@ class MethodContext void dmpGetRISCV64PassStructInRegisterFlags(DWORDLONG key, DWORD value); DWORD repGetRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd); - void recgetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr); - void dmpGetXarchCpuInfo(DWORD key, DWORD value); - void repGetXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr); - void recGetRelocTypeHint(void* target, WORD result); void dmpGetRelocTypeHint(DWORDLONG key, DWORD value); WORD repGetRelocTypeHint(void* target); @@ -1192,7 +1188,6 @@ enum mcPackets Packet_GetThreadLocalStaticBlocksInfo = 208, Packet_GetRISCV64PassStructInRegisterFlags = 209, Packet_GetObjectContent = 210, - Packet_GetXarchCpuInfo = 211, }; void SetDebugDumpVariables(); diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp index d87104da17b84f..96249dde36c0da 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -1488,13 +1488,6 @@ uint32_t interceptor_ICJI::getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HAN return temp; } -void interceptor_ICJI::getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) -{ - mc->cr->AddCall("getXarchCpuInfo"); - original_ICorJitInfo->getXarchCpuInfo(xarchCpuInfoPtr); - mc->recgetXarchCpuInfo(xarchCpuInfoPtr); -} - // Stuff on ICorDynamicInfo uint32_t interceptor_ICJI::getThreadTLSIndex(void** ppIndirection) { diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp index e1625c127c9c37..58dc80abec8e1b 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp @@ -1061,13 +1061,6 @@ uint32_t interceptor_ICJI::getThreadTLSIndex( return original_ICorJitInfo->getThreadTLSIndex(ppIndirection); } -void interceptor_ICJI::getXarchCpuInfo( - CORINFO_XARCH_CPU* xarchCpuInfoPtr) -{ - mcs->AddCall("getXarchCpuInfo"); - original_ICorJitInfo->getXarchCpuInfo(xarchCpuInfoPtr); -} - const void* interceptor_ICJI::getInlinedCallFrameVptr( void** ppIndirection) { diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp index da79ab622c62a4..a9dced511ddce6 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp @@ -929,12 +929,6 @@ uint32_t interceptor_ICJI::getThreadTLSIndex( return original_ICorJitInfo->getThreadTLSIndex(ppIndirection); } -void interceptor_ICJI::getXarchCpuInfo( - CORINFO_XARCH_CPU* xarchCpuInfoPtr) -{ - original_ICorJitInfo->getXarchCpuInfo(xarchCpuInfoPtr); -} - const void* interceptor_ICJI::getInlinedCallFrameVptr( void** ppIndirection) { diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp index 49086d8597a104..9915b4699eeebc 100644 --- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp @@ -1299,12 +1299,6 @@ uint32_t MyICJI::getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE struct return jitInstance->mc->repGetRISCV64PassStructInRegisterFlags(structHnd); } -void MyICJI::getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) -{ - jitInstance->mc->cr->AddCall("getXarchCpuInfo"); - return jitInstance->mc->repGetXarchCpuInfo(xarchCpuInfoPtr); -} - // Stuff on ICorDynamicInfo uint32_t MyICJI::getThreadTLSIndex(void** ppIndirection) { diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 68d90c4e6eafb4..5030c295593cf5 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1449,7 +1449,7 @@ void EEJitManager::SetCpuInfo() // LZCNT - ECX bit 5 // synchronously updating VM and JIT. - CORINFO_XARCH_CPU xarchCpuInfo = {}; + XarchCpuInfo xarchCpuInfo = {}; int cpuidInfo[4]; @@ -1752,7 +1752,7 @@ void EEJitManager::SetCpuInfo() // Now that we've queried the actual hardware support, we need to adjust what is actually supported based // on some externally available config switches that exist so users can test code for downlevel hardware. -#if defined(TARGET_AMD64) || defined(TARGET_X86) +#if defined(TARGET_X86) || defined(TARGET_AMD64) if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) { CPUCompileFlags.Clear(InstructionSet_X86Base); @@ -1969,6 +1969,41 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.EnsureValidInstructionSetSupport(); m_CPUCompileFlags = CPUCompileFlags; + +#if defined(TARGET_X86) || defined(TARGET_AMD64) + if (xarchCpuInfo.IsGenuineIntel) + { + // Some architectures can experience frequency throttling when executing + // executing 512-bit width instructions. To account for this we set the + // default preferred vector width to 256-bits in some scenarios. Power + // users can override this with `DOTNET_PreferredVectorBitWith=512` to + // allow using such instructions where hardware support is available. + + if (xarchCpuInfo.FamilyId == 0x06) + { + if (xarchCpuInfo.ExtendedModelId == 0x05) + { + if (xarchCpuInfo.Model == 0x05) + { + // * Skylake (Server) + // * Cascade Lake + // * Cooper Lake + + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING); + } + } + else if (xarchCpuInfo.ExtendedModelId == 0x06) + { + if (xarchCpuInfo.Model == 0x06) + { + // * Cannon Lake + + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING); + } + } + } + } +#endif // TARGET_X86 || TARGET_AMD64 } // Define some data that we can use to get a better idea of what happened when we get a Watson dump that indicates the JIT failed to load. diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index dbd73029c0e06e..e07dea6730b644 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -1999,10 +1999,6 @@ protected : private: CORJIT_FLAGS m_CPUCompileFlags; -#if defined(TARGET_X86) || defined(TARGET_AMD64) - CORINFO_XARCH_CPU m_xarchCpuInfo; -#endif // TARGET_X86 || TARGET_AMD64 - #if !defined DACCESS_COMPILE void SetCpuInfo(); #endif @@ -2014,14 +2010,6 @@ protected : return m_CPUCompileFlags; } -#if defined(TARGET_X86) || defined(TARGET_AMD64) - inline void getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfo) - { - LIMITED_METHOD_CONTRACT; - *xarchCpuInfo = m_xarchCpuInfo; - } -#endif // TARGET_X86 || TARGET_AMD64 - private: bool m_storeRichDebugInfo; @@ -2674,4 +2662,22 @@ class EECodeInfo void ThrowOutOfMemoryWithinRange(); +// Represents information about an XARCH CPU +union XarchCpuInfo +{ + struct { + uint32_t SteppingId : 4; + uint32_t Model : 4; + uint32_t FamilyId : 4; + uint32_t ProcessorType : 2; + uint32_t IsAuthenticAmd : 1; // Unused bits in the CPUID result + uint32_t IsGenuineIntel : 1; // Unused bits in the CPUID result + uint32_t ExtendedModelId : 4; + uint32_t ExtendedFamilyId : 8; + uint32_t Reserved : 4; // Unused bits in the CPUID result + }; + + uint32_t Value; +}; + #endif // !__CODEMAN_HPP__ diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index bedb847e06d86f..f085fc59950ce0 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -2515,35 +2515,6 @@ bool CEEInfo::getSystemVAmd64PassStructInRegisterDescriptor( #endif // !defined(UNIX_AMD64_ABI_ITF) } -/*********************************************************************/ -void CEEInfo::getXarchCpuInfo(CORINFO_XARCH_CPU* xarchCpuInfoPtr) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - MODE_PREEMPTIVE; - } CONTRACTL_END; - - JIT_TO_EE_TRANSITION_LEAF(); - -#if defined(TARGET_X86) || defined(TARGET_AMD64) - *xarchCpuInfoPtr = m_xarchCpuInfo; -#else - *xarchCpuInfoPtr = {}; -#endif - - EE_TO_JIT_TRANSITION_LEAF(); -} - -#if defined(TARGET_X86) || defined(TARGET_AMD64) -void CEEInfo::setXarchCpuInfo(const CORINFO_XARCH_CPU& xarchCpuInfo) -{ - LIMITED_METHOD_CONTRACT; - - m_xarchCpuInfo = xarchCpuInfo; -} -#endif // TARGET_X86 || TARGET_AMD64 - /*********************************************************************/ unsigned CEEInfo::getClassNumInstanceFields (CORINFO_CLASS_HANDLE clsHnd) { @@ -12596,12 +12567,6 @@ CorJitResult invokeCompileMethodHelper(EEJitManager *jitMgr, bool samplingEnabled = (s_stackSamplingEnabled.val(CLRConfig::UNSUPPORTED_StackSamplingEnabled) != 0); #endif -#if defined(TARGET_X86) || defined(TARGET_AMD64) - CORINFO_XARCH_CPU xarchCpuInfo; - ExecutionManager::GetEEJitManager()->getXarchCpuInfo(&xarchCpuInfo); - comp->setXarchCpuInfo(xarchCpuInfo); -#endif // TARGET_X86 || TARGET_AMD64 - #if defined(ALLOW_SXS_JIT) if (FAILED(ret) && jitMgr->m_alternateJit #ifdef FEATURE_STACK_SAMPLING diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index fc116cf4625c3c..2ad51cc126af13 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -533,10 +533,6 @@ class CEEInfo : public ICorJitInfo void setJitFlags(const CORJIT_FLAGS& jitFlags); -#if defined(TARGET_X86) || defined(TARGET_AMD64) - void setXarchCpuInfo(const CORINFO_XARCH_CPU& xarchCpuInfo); -#endif // TARGET_X86 || TARGET_AMD64 - private: // Shrinking these buffers drastically reduces the amount of stack space // required for each instance of the interpreter, and thereby reduces SOs. @@ -595,10 +591,6 @@ class CEEInfo : public ICorJitInfo Thread * m_pThread; // Cached current thread for faster JIT-EE transitions CORJIT_FLAGS m_jitFlags; -#if defined(TARGET_X86) || defined(TARGET_AMD64) - CORINFO_XARCH_CPU m_xarchCpuInfo; -#endif // TARGET_X86 || TARGET_AMD64 - CORINFO_METHOD_HANDLE getMethodBeingCompiled() { LIMITED_METHOD_CONTRACT; From a026b450df208bbcd3b5f5f219c7d445e00ad549 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 8 May 2023 09:43:24 -0700 Subject: [PATCH 07/19] Make sure CORINFO_XARCH_CPU is fully removed --- .../tools/Common/JitInterface/CorInfoTypes.cs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index e250915f1d3df2..cedbbda36b7046 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1208,22 +1208,6 @@ public struct SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR public byte eightByteOffsets1; }; - public struct CORINFO_XARCH_CPU - { - public uint Value; - - // The native version is a union with the following fields: - // uint32_t SteppingId : 4; - // uint32_t Model : 4; - // uint32_t FamilyId : 4; - // uint32_t ProcessorType : 2; - // uint32_t IsAuthenticAmd : 1; // Unused bits in the CPUID result - // uint32_t IsGenuineIntel : 1; // Unused bits in the CPUID result - // uint32_t ExtendedModelId : 4; - // uint32_t ExtendedFamilyId : 8; - // uint32_t Reserved : 4; // Unused bits in the CPUID result - }; - // StructFloadFieldInfoFlags: used on LoongArch64 architecture by `getLoongArch64PassStructInRegisterFlags` and // `getRISCV64PassStructInRegisterFlags` API to convey struct argument passing information. // From 3d8feff338fb7259e61f3439df1438e0739af1ec Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 8 May 2023 10:26:42 -0700 Subject: [PATCH 08/19] Have ENCODE_VERIFY_TYPE_LAYOUT not fail-fast for Vector size differences --- src/coreclr/inc/readytorun.h | 1 + src/coreclr/jit/compiler.h | 11 +++-- .../Common/Compiler/InstructionSetSupport.cs | 15 ++---- .../Internal/Runtime/ReadyToRunConstants.cs | 1 + .../TypeSystem/Common/DefType.FieldLayout.cs | 24 ++++++++++ .../TypeSystem/Common/FieldLayoutAlgorithm.cs | 1 + .../Common/MetadataFieldLayoutAlgorithm.cs | 47 +++++++++++++++---- .../TypeSystem/Interop/IL/MarshalHelpers.cs | 6 +++ .../Compiler/VectorOfTFieldLayoutAlgorithm.cs | 2 + .../ReadyToRun/TypeFixupSignature.cs | 7 ++- .../Compiler/ReadyToRunCompilerContext.cs | 3 ++ src/coreclr/vm/jitinterface.cpp | 26 +++++++--- 12 files changed, 112 insertions(+), 32 deletions(-) diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index e138058b3666a1..6e861a4f64055a 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -167,6 +167,7 @@ enum ReadyToRunTypeLayoutFlags READYTORUN_LAYOUT_Alignment_Native = 0x04, READYTORUN_LAYOUT_GCLayout = 0x08, READYTORUN_LAYOUT_GCLayout_Empty = 0x10, + READYTORUN_LAYOUT_IsOrContainsVectorT = 0x20, }; enum ReadyToRunVirtualFunctionOverrideFlags diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8d83dbe3608b0d..5d352a18a66787 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8677,16 +8677,17 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Note - cannot be used for System.Runtime.Intrinsic unsigned getVectorTByteLength() { -// We need to report the ISA dependency to the VM so that scenarios -// such as R2R work correctly for larger vector sizes, so we always -// do `compExactlyDependsOn` for such cases. + // We need to report the ISA dependency to the VM so that scenarios + // such as R2R work correctly for larger vector sizes, so we always + // do `compExactlyDependsOn` for such cases. + CLANG_FORMAT_COMMENT_ANCHOR; #if defined(TARGET_XARCH) if (compExactlyDependsOn(InstructionSet_VectorT256)) { return YMM_REGSIZE_BYTES; } - else if (compOpportunisticallyDependsOn(InstructionSet_VectorT128)) + else if (compExactlyDependsOn(InstructionSet_VectorT128)) { return XMM_REGSIZE_BYTES; } @@ -8695,7 +8696,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return 0; } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_VectorT128)) + if (compExactlyDependsOn(InstructionSet_VectorT128)) { return FP_REGSIZE_BYTES; } diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 53bd63d23c3bfb..af3ecfcdffbb9d 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -317,25 +317,19 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256); Debug.Assert(InstructionSet.X86_VectorT128 == InstructionSet.X64_VectorT128); - // Unlike for the JIT, we cannot default to enabling Vector to the below sizes - // as it may fail to launch in the case where `--verify-type-and-field-layout` - // was specified. So instead, only enable Vector when we have an explicit width. - - if (maxVectorTBitWidth >= 128) - { - supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); - } + Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128)); + supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX2)) { - if (maxVectorTBitWidth >= 256) + if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)) { supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256); } if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX512F)) { - if (maxVectorTBitWidth >= 512) + if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 512)) { supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512); } @@ -347,6 +341,7 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, case TargetArchitecture.ARM64: { Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.ARM64_AdvSimd)); + Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128)); supportedInstructionSets.AddInstructionSet(InstructionSet.ARM64_VectorT128); break; } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index f46904cab3e095..de2ad640abaa19 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -70,6 +70,7 @@ public enum ReadyToRunTypeLayoutFlags : byte READYTORUN_LAYOUT_Alignment_Native = 0x04, READYTORUN_LAYOUT_GCLayout = 0x08, READYTORUN_LAYOUT_GCLayout_Empty = 0x10, + READYTORUN_LAYOUT_IsOrContainsVectorT = 0x20, } [Flags] diff --git a/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs b/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs index 99305804f53519..fb4cd6bf17157f 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs @@ -73,6 +73,11 @@ private static class FieldLayoutFlags /// True if the type transitively has an Int128 in it or is an Int128 /// public const int IsInt128OrHasInt128Fields = 0x800; + + /// + /// True if the type transitively has a Vector in it or is Vector + /// + public const int IsVectorTOrHasVectorTFields = 0x1000; } private sealed class StaticBlockInfo @@ -153,6 +158,21 @@ public virtual bool IsInt128OrHasInt128Fields } } + /// + /// Is a type Vector or transitively have any fields of a type Vector. + /// + public virtual bool IsVectorTOrHasVectorTFields + { + get + { + if (!_fieldLayoutFlags.HasFlags(FieldLayoutFlags.ComputedInstanceTypeLayout)) + { + ComputeInstanceLayout(InstanceLayoutKind.TypeAndFields); + } + return _fieldLayoutFlags.HasFlags(FieldLayoutFlags.IsVectorTOrHasVectorTFields); + } + } + /// /// The number of bytes required to hold a field of this type /// @@ -451,6 +471,10 @@ public void ComputeInstanceLayout(InstanceLayoutKind layoutKind) { _fieldLayoutFlags.AddFlags(FieldLayoutFlags.IsInt128OrHasInt128Fields); } + if (computedLayout.IsVectorTOrHasVectorTFields) + { + _fieldLayoutFlags.AddFlags(FieldLayoutFlags.IsVectorTOrHasVectorTFields); + } if (computedLayout.Offsets != null) { diff --git a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs index 53388c915b85d8..31a46ec47f6416 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs @@ -84,6 +84,7 @@ public struct ComputedInstanceFieldLayout public bool LayoutAbiStable; // Is the layout stable such that it can safely be used in function calling conventions public bool IsAutoLayoutOrHasAutoLayoutFields; public bool IsInt128OrHasInt128Fields; + public bool IsVectorTOrHasVectorTFields; /// /// If Offsets is non-null, then all field based layout is complete. diff --git a/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs index 0fc9064fb00a81..d56bfa19bbf210 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs @@ -110,6 +110,7 @@ out instanceByteSizeAndAlignment LayoutAbiStable = true, IsAutoLayoutOrHasAutoLayoutFields = false, IsInt128OrHasInt128Fields = false, + IsVectorTOrHasVectorTFields = false, }; if (numInstanceFields > 0) @@ -211,7 +212,7 @@ public override ComputedStaticFieldLayout ComputeStaticFieldLayout(DefType defTy } ref StaticsBlock block = ref GetStaticsBlockForField(ref result, field); - SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout: false, context.Target.DefaultPackingSize, out bool _, out bool _, out bool _); + SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout: false, context.Target.DefaultPackingSize, out bool _, out bool _, out bool _, out bool _); block.Size = LayoutInt.AlignUp(block.Size, sizeAndAlignment.Alignment, context.Target); result.Offsets[index] = new FieldAndOffset(field, block.Size); @@ -303,18 +304,27 @@ protected ComputedInstanceFieldLayout ComputeExplicitFieldLayout(MetadataType ty int fieldOrdinal = 0; bool layoutAbiStable = true; bool hasAutoLayoutField = false; - bool hasInt128Field = type.BaseType == null ? false : type.BaseType.IsInt128OrHasInt128Fields; + bool hasInt128Field = false; + bool hasVectorTField = false; + + if (type.BaseType is not null) + { + hasInt128Field = type.BaseType.IsInt128OrHasInt128Fields; + hasVectorTField = type.BaseType.IsVectorTOrHasVectorTFields; + } foreach (var fieldAndOffset in layoutMetadata.Offsets) { TypeDesc fieldType = fieldAndOffset.Field.FieldType; - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field, out bool fieldHasVectorTField); if (!fieldLayoutAbiStable) layoutAbiStable = false; if (fieldHasAutoLayout) hasAutoLayoutField = true; if (fieldHasInt128Field) hasInt128Field = true; + if (fieldHasVectorTField) + hasVectorTField = true; largestAlignmentRequired = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequired); @@ -367,6 +377,7 @@ protected ComputedInstanceFieldLayout ComputeExplicitFieldLayout(MetadataType ty { IsAutoLayoutOrHasAutoLayoutFields = hasAutoLayoutField, IsInt128OrHasInt128Fields = hasInt128Field, + IsVectorTOrHasVectorTFields = hasVectorTField, }; computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment; computedLayout.FieldSize = instanceSizeAndAlignment.Size; @@ -402,20 +413,29 @@ protected ComputedInstanceFieldLayout ComputeSequentialFieldLayout(MetadataType int packingSize = ComputePackingSize(type, layoutMetadata); bool layoutAbiStable = true; bool hasAutoLayoutField = false; - bool hasInt128Field = type.BaseType == null ? false : type.BaseType.IsInt128OrHasInt128Fields; + bool hasInt128Field = false; + bool hasVectorTField = false; + + if (type.BaseType is not null) + { + hasInt128Field = type.BaseType.IsInt128OrHasInt128Fields; + hasVectorTField = type.BaseType.IsVectorTOrHasVectorTFields; + } foreach (var field in type.GetFields()) { if (field.IsStatic) continue; - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field, out bool fieldHasVectorTField); if (!fieldLayoutAbiStable) layoutAbiStable = false; if (fieldHasAutoLayout) hasAutoLayoutField = true; if (fieldHasInt128Field) hasInt128Field = true; + if (fieldHasVectorTField) + hasVectorTField = true; largestAlignmentRequirement = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequirement); @@ -443,6 +463,7 @@ protected ComputedInstanceFieldLayout ComputeSequentialFieldLayout(MetadataType { IsAutoLayoutOrHasAutoLayoutFields = hasAutoLayoutField, IsInt128OrHasInt128Fields = hasInt128Field, + IsVectorTOrHasVectorTFields = hasVectorTField, }; computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment; computedLayout.FieldSize = instanceSizeAndAlignment.Size; @@ -517,6 +538,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, int instanceGCPointerFieldsCount = 0; int[] instanceNonGCPointerFieldsCount = new int[maxLog2Size + 1]; bool hasInt128Field = false; + bool hasVectorTField = false; foreach (var field in type.GetFields()) { @@ -531,6 +553,8 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, instanceValueClassFieldCount++; if (((DefType)fieldType).IsInt128OrHasInt128Fields) hasInt128Field = true; + if (((DefType)fieldType).IsVectorTOrHasVectorTFields) + hasVectorTField = true; } else if (fieldType.IsGCPointer) { @@ -540,7 +564,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, { Debug.Assert(fieldType.IsPrimitive || fieldType.IsPointer || fieldType.IsFunctionPointer || fieldType.IsEnum || fieldType.IsByRef); - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool _, out bool _, out bool _); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool _, out bool _, out bool _, out bool _); instanceNonGCPointerFieldsCount[CalculateLog2(fieldSizeAndAlignment.Size.AsInt)]++; } } @@ -577,7 +601,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, TypeDesc fieldType = field.FieldType; - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _, out bool _); if (!fieldLayoutAbiStable) layoutAbiStable = false; @@ -747,7 +771,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, for (int i = 0; i < instanceValueClassFieldsArr.Length; i++) { // Align the cumulative field offset to the indeterminate value - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _, out bool _); if (!fieldLayoutAbiStable) layoutAbiStable = false; @@ -804,6 +828,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, { IsAutoLayoutOrHasAutoLayoutFields = true, IsInt128OrHasInt128Fields = hasInt128Field, + IsVectorTOrHasVectorTFields = hasVectorTField, }; computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment; computedLayout.FieldSize = instanceSizeAndAlignment.Size; @@ -817,7 +842,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, private static void PlaceInstanceField(FieldDesc field, bool hasLayout, int packingSize, FieldAndOffset[] offsets, ref LayoutInt instanceFieldPos, ref int fieldOrdinal, LayoutInt offsetBias) { - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, hasLayout, packingSize, out bool _, out bool _, out bool _); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, hasLayout, packingSize, out bool _, out bool _, out bool _, out bool _); instanceFieldPos = AlignUpInstanceFieldOffset(instanceFieldPos, fieldSizeAndAlignment.Alignment, field.Context.Target); offsets[fieldOrdinal] = new FieldAndOffset(field, instanceFieldPos + offsetBias); @@ -877,12 +902,13 @@ public LayoutInt CalculateFieldBaseOffset(MetadataType type, bool requiresAlign8 return cumulativeInstanceFieldPos; } - private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, bool hasLayout, int packingSize, out bool layoutAbiStable, out bool fieldTypeHasAutoLayout, out bool fieldTypeHasInt128Field) + private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, bool hasLayout, int packingSize, out bool layoutAbiStable, out bool fieldTypeHasAutoLayout, out bool fieldTypeHasInt128Field, out bool fieldTypeHasVectorTField) { SizeAndAlignment result; layoutAbiStable = true; fieldTypeHasAutoLayout = true; fieldTypeHasInt128Field = false; + fieldTypeHasVectorTField = false; if (fieldType.IsDefType) { @@ -894,6 +920,7 @@ private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, layoutAbiStable = defType.LayoutAbiStable; fieldTypeHasAutoLayout = defType.IsAutoLayoutOrHasAutoLayoutFields; fieldTypeHasInt128Field = defType.IsInt128OrHasInt128Fields; + fieldTypeHasVectorTField = defType.IsVectorTOrHasVectorTFields; } else { diff --git a/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs b/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs index cb80143d94250d..3aa6ca2db11a16 100644 --- a/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs +++ b/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs @@ -422,6 +422,12 @@ internal static MarshallerKind GetMarshallerKind( return MarshallerKind.Invalid; } + if (!isField && ((DefType)type).IsVectorTOrHasVectorTFields) + { + // Vector types or structs that contain them cannot be passed by value + return MarshallerKind.Invalid; + } + if (MarshalUtils.IsBlittableType(type)) { if (nativeType != NativeTypeKind.Default && nativeType != NativeTypeKind.Struct) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/VectorOfTFieldLayoutAlgorithm.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/VectorOfTFieldLayoutAlgorithm.cs index 1a1eef14d55820..cfa4dc2524815b 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/VectorOfTFieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/VectorOfTFieldLayoutAlgorithm.cs @@ -25,6 +25,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp TargetDetails targetDetails = defType.Context.Target; ComputedInstanceFieldLayout layoutFromMetadata = _fallbackAlgorithm.ComputeInstanceLayout(defType, layoutKind); + layoutFromMetadata.IsVectorTOrHasVectorTFields = true; LayoutInt instanceFieldSize; @@ -53,6 +54,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp FieldAlignment = layoutFromMetadata.FieldAlignment, FieldSize = instanceFieldSize, Offsets = layoutFromMetadata.Offsets, + IsVectorTOrHasVectorTFields = true, }; } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs index a0b9e801f2495d..992562cf630000 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs @@ -76,6 +76,11 @@ private static void EncodeTypeLayout(ObjectDataSignatureBuilder dataBuilder, Typ flags |= ReadyToRunTypeLayoutFlags.READYTORUN_LAYOUT_HFA; } + if (defType.IsVectorTOrHasVectorTFields) + { + flags |= ReadyToRunTypeLayoutFlags.READYTORUN_LAYOUT_IsOrContainsVectorT; + } + dataBuilder.EmitUInt((uint)flags); dataBuilder.EmitUInt((uint)size); @@ -92,7 +97,7 @@ private static void EncodeTypeLayout(ObjectDataSignatureBuilder dataBuilder, Typ }; dataBuilder.EmitUInt((uint)hfaElementType); } - + if (alignment != pointerSize) { dataBuilder.EmitUInt((uint)alignment); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs index 88a74a9ebc2f00..c052e336620f86 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs @@ -217,6 +217,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, ByteCountAlignment = LayoutInt.Indeterminate, Offsets = fieldsAndOffsets.ToArray(), LayoutAbiStable = false, + IsVectorTOrHasVectorTFields = true, }; return instanceLayout; } @@ -235,6 +236,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, FieldSize = layoutFromSimilarIntrinsicVector.FieldSize, Offsets = layoutFromMetadata.Offsets, LayoutAbiStable = _vectorAbiIsStable, + IsVectorTOrHasVectorTFields = true, }; #else return new ComputedInstanceFieldLayout @@ -245,6 +247,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, FieldSize = layoutFromSimilarIntrinsicVector.FieldSize, Offsets = layoutFromMetadata.Offsets, LayoutAbiStable = _vectorAbiIsStable, + IsVectorTOrHasVectorTFields = true, }; #endif } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index f085fc59950ce0..15eb0405d077e1 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -13392,7 +13392,7 @@ void ComputeGCRefMap(MethodTable * pMT, BYTE * pGCRefMap, size_t cbGCRefMap) // - Alignment // - Position of GC references // -BOOL TypeLayoutCheck(MethodTable * pMT, PCCOR_SIGNATURE pBlob, BOOL printDiff) +BOOL TypeLayoutCheck(MethodTable * pMT, PCCOR_SIGNATURE pBlob, BOOL printDiff, BOOL* isOrContainsVectorT) { STANDARD_VM_CONTRACT; @@ -13402,6 +13402,10 @@ BOOL TypeLayoutCheck(MethodTable * pMT, PCCOR_SIGNATURE pBlob, BOOL printDiff) uint32_t dwFlags; IfFailThrow(p.GetData(&dwFlags)); + // We need to track Vector because its size is expected to vary + // and we'll need to fail the load without failing the verification + *isOrContainsVectorT = (dwFlags & READYTORUN_LAYOUT_IsOrContainsVectorT) != 0; + BOOL result = TRUE; // Size is checked unconditionally @@ -13415,9 +13419,12 @@ BOOL TypeLayoutCheck(MethodTable * pMT, PCCOR_SIGNATURE pBlob, BOOL printDiff) { result = FALSE; - DefineFullyQualifiedNameForClass(); - printf("Type %s: expected size 0x%08x, actual size 0x%08x\n", - GetFullyQualifiedNameForClass(pMT), dwExpectedSize, dwActualSize); + if ((dwFlags & READYTORUN_LAYOUT_IsOrContainsVectorT) == 0) + { + DefineFullyQualifiedNameForClass(); + printf("Type %s: expected size 0x%08x, actual size 0x%08x\n", + GetFullyQualifiedNameForClass(pMT), dwExpectedSize, dwActualSize); + } } else { @@ -13964,12 +13971,19 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, MethodTable * pMT = th.AsMethodTable(); _ASSERTE(pMT->IsValueType()); - if (!TypeLayoutCheck(pMT, pBlob, /* printDiff */ kind == ENCODE_VERIFY_TYPE_LAYOUT)) + BOOL isOrContainsVectorT = FALSE; + + if (!TypeLayoutCheck(pMT, pBlob, /* printDiff */ kind == ENCODE_VERIFY_TYPE_LAYOUT, &isOrContainsVectorT)) { if (kind == ENCODE_CHECK_TYPE_LAYOUT) { return FALSE; } + else if (isOrContainsVectorT) + { + // We expect Vector to vary in size, don't fail + return FALSE; + } else { // Verification failures are failfast events @@ -13982,7 +13996,7 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, { _ASSERTE_MSG(false, fatalErrorString.GetUTF8()); // Run through the type layout logic again, after the assert, makes debugging easy - TypeLayoutCheck(pMT, pBlob, /* printDiff */ TRUE); + TypeLayoutCheck(pMT, pBlob, /* printDiff */ TRUE, &isOrContainsVectorT); } #endif From 9e73c1990e48606359eec63604b688e1fc3bdba2 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 12 May 2023 08:45:43 -0700 Subject: [PATCH 09/19] Only encode types containing Vector as check, not verify --- src/coreclr/inc/readytorun.h | 1 - .../Internal/Runtime/ReadyToRunConstants.cs | 1 - .../ReadyToRun/TypeFixupSignature.cs | 17 ++++++------ src/coreclr/vm/jitinterface.cpp | 26 +++++-------------- 4 files changed, 15 insertions(+), 30 deletions(-) diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index 6e861a4f64055a..e138058b3666a1 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -167,7 +167,6 @@ enum ReadyToRunTypeLayoutFlags READYTORUN_LAYOUT_Alignment_Native = 0x04, READYTORUN_LAYOUT_GCLayout = 0x08, READYTORUN_LAYOUT_GCLayout_Empty = 0x10, - READYTORUN_LAYOUT_IsOrContainsVectorT = 0x20, }; enum ReadyToRunVirtualFunctionOverrideFlags diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index 7d78ad0b763b95..731fc1407b5f36 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -70,7 +70,6 @@ public enum ReadyToRunTypeLayoutFlags : byte READYTORUN_LAYOUT_Alignment_Native = 0x04, READYTORUN_LAYOUT_GCLayout = 0x08, READYTORUN_LAYOUT_GCLayout_Empty = 0x10, - READYTORUN_LAYOUT_IsOrContainsVectorT = 0x20, } [Flags] diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs index 992562cf630000..e379651ff5e2cb 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs @@ -36,14 +36,20 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) if (!relocsOnly) { + ReadyToRunFixupKind fixupKind = _fixupKind; dataBuilder.AddSymbol(this); + if ((fixupKind == ReadyToRunFixupKind.Verify_TypeLayout) && ((MetadataType)_typeDesc).IsVectorTOrHasVectorTFields) + { + fixupKind = ReadyToRunFixupKind.Check_TypeLayout; + } + IEcmaModule targetModule = factory.SignatureContext.GetTargetModule(_typeDesc); - SignatureContext innerContext = dataBuilder.EmitFixup(factory, _fixupKind, targetModule, factory.SignatureContext); + SignatureContext innerContext = dataBuilder.EmitFixup(factory, fixupKind, targetModule, factory.SignatureContext); dataBuilder.EmitTypeSignature(_typeDesc, innerContext); - if ((_fixupKind == ReadyToRunFixupKind.Check_TypeLayout) || - (_fixupKind == ReadyToRunFixupKind.Verify_TypeLayout)) + if ((fixupKind == ReadyToRunFixupKind.Check_TypeLayout) || + (fixupKind == ReadyToRunFixupKind.Verify_TypeLayout)) { EncodeTypeLayout(dataBuilder, _typeDesc); } @@ -76,11 +82,6 @@ private static void EncodeTypeLayout(ObjectDataSignatureBuilder dataBuilder, Typ flags |= ReadyToRunTypeLayoutFlags.READYTORUN_LAYOUT_HFA; } - if (defType.IsVectorTOrHasVectorTFields) - { - flags |= ReadyToRunTypeLayoutFlags.READYTORUN_LAYOUT_IsOrContainsVectorT; - } - dataBuilder.EmitUInt((uint)flags); dataBuilder.EmitUInt((uint)size); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 2c20d937b73afb..246d155481afe0 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -13243,7 +13243,7 @@ void ComputeGCRefMap(MethodTable * pMT, BYTE * pGCRefMap, size_t cbGCRefMap) // - Alignment // - Position of GC references // -BOOL TypeLayoutCheck(MethodTable * pMT, PCCOR_SIGNATURE pBlob, BOOL printDiff, BOOL* isOrContainsVectorT) +BOOL TypeLayoutCheck(MethodTable * pMT, PCCOR_SIGNATURE pBlob, BOOL printDiff) { STANDARD_VM_CONTRACT; @@ -13253,10 +13253,6 @@ BOOL TypeLayoutCheck(MethodTable * pMT, PCCOR_SIGNATURE pBlob, BOOL printDiff, B uint32_t dwFlags; IfFailThrow(p.GetData(&dwFlags)); - // We need to track Vector because its size is expected to vary - // and we'll need to fail the load without failing the verification - *isOrContainsVectorT = (dwFlags & READYTORUN_LAYOUT_IsOrContainsVectorT) != 0; - BOOL result = TRUE; // Size is checked unconditionally @@ -13270,12 +13266,9 @@ BOOL TypeLayoutCheck(MethodTable * pMT, PCCOR_SIGNATURE pBlob, BOOL printDiff, B { result = FALSE; - if ((dwFlags & READYTORUN_LAYOUT_IsOrContainsVectorT) == 0) - { - DefineFullyQualifiedNameForClass(); - printf("Type %s: expected size 0x%08x, actual size 0x%08x\n", - GetFullyQualifiedNameForClass(pMT), dwExpectedSize, dwActualSize); - } + DefineFullyQualifiedNameForClass(); + printf("Type %s: expected size 0x%08x, actual size 0x%08x\n", + GetFullyQualifiedNameForClass(pMT), dwExpectedSize, dwActualSize); } else { @@ -13821,19 +13814,12 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, MethodTable * pMT = th.AsMethodTable(); _ASSERTE(pMT->IsValueType()); - BOOL isOrContainsVectorT = FALSE; - - if (!TypeLayoutCheck(pMT, pBlob, /* printDiff */ kind == ENCODE_VERIFY_TYPE_LAYOUT, &isOrContainsVectorT)) + if (!TypeLayoutCheck(pMT, pBlob, /* printDiff */ kind == ENCODE_VERIFY_TYPE_LAYOUT)) { if (kind == ENCODE_CHECK_TYPE_LAYOUT) { return FALSE; } - else if (isOrContainsVectorT) - { - // We expect Vector to vary in size, don't fail - return FALSE; - } else { // Verification failures are failfast events @@ -13846,7 +13832,7 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, { _ASSERTE_MSG(false, fatalErrorString.GetUTF8()); // Run through the type layout logic again, after the assert, makes debugging easy - TypeLayoutCheck(pMT, pBlob, /* printDiff */ TRUE, &isOrContainsVectorT); + TypeLayoutCheck(pMT, pBlob, /* printDiff */ TRUE); } #endif From ade7fc7c88724600b889f01a61cc607be12f5b79 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 19 May 2023 07:53:17 -0700 Subject: [PATCH 10/19] Remove changes that were extracted to separate PRs --- .../coreclr/botr/vectors-and-intrinsics.md | 2 +- src/coreclr/inc/corinfoinstructionset.h | 130 ++++++-------- src/coreclr/inc/corjitflags.h | 2 +- src/coreclr/jit/compiler.cpp | 43 ++++- src/coreclr/jit/compiler.h | 23 +-- src/coreclr/jit/hwintrinsicxarch.cpp | 24 +-- src/coreclr/jit/importercalls.cpp | 5 +- src/coreclr/jit/jitee.h | 2 +- src/coreclr/jit/simd.cpp | 4 +- src/coreclr/jit/simdashwintrinsic.cpp | 81 ++------- src/coreclr/jit/simdashwintrinsic.h | 10 +- src/coreclr/jit/simdashwintrinsiclistarm64.h | 8 - src/coreclr/jit/simdashwintrinsiclistxarch.h | 8 - .../nativeaot/Runtime/IntrinsicConstants.h | 1 - src/coreclr/nativeaot/Runtime/startup.cpp | 78 ++++---- .../tools/Common/InstructionSetHelpers.cs | 20 --- .../Runtime/ReadyToRunInstructionSetHelper.cs | 14 +- .../tools/Common/JitInterface/CorInfoImpl.cs | 3 - .../JitInterface/CorInfoInstructionSet.cs | 166 ++++++------------ .../ThunkGenerator/InstructionSetDesc.txt | 28 +-- .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 3 - .../superpmi-shared/methodcontext.cpp | 4 + .../superpmi/superpmi-shared/methodcontext.h | 2 +- src/coreclr/vm/codeman.cpp | 40 +---- src/coreclr/vm/codeman.h | 30 +--- src/coreclr/vm/jitinterface.cpp | 12 +- src/coreclr/vm/jitinterface.h | 8 +- .../src/System/Numerics/Plane.cs | 8 +- .../SmokeTests/HardwareIntrinsics/Program.cs | 135 ++++---------- .../HardwareIntrinsics/X64Baseline.csproj | 2 +- .../HardwareIntrinsics/x64Avx.csproj | 39 ---- .../x64Avx2_VectorT128.csproj | 40 ----- .../HardwareIntrinsics/x64Avx512.csproj | 33 ---- .../x64Avx512_VectorT128.csproj | 34 ---- .../{x64Sse42.csproj => x64NonVex.csproj} | 2 +- .../{x64Avx2.csproj => x64Vex.csproj} | 2 +- 36 files changed, 284 insertions(+), 762 deletions(-) delete mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx.csproj delete mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2_VectorT128.csproj delete mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512.csproj delete mode 100644 src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512_VectorT128.csproj rename src/tests/nativeaot/SmokeTests/HardwareIntrinsics/{x64Sse42.csproj => x64NonVex.csproj} (83%) rename src/tests/nativeaot/SmokeTests/HardwareIntrinsics/{x64Avx2.csproj => x64Vex.csproj} (91%) diff --git a/docs/design/coreclr/botr/vectors-and-intrinsics.md b/docs/design/coreclr/botr/vectors-and-intrinsics.md index 6b15c16981c93f..1ae6f17e8e7e9c 100644 --- a/docs/design/coreclr/botr/vectors-and-intrinsics.md +++ b/docs/design/coreclr/botr/vectors-and-intrinsics.md @@ -169,5 +169,5 @@ While the above api exists, it is not expected that general purpose code within |`compExactlyDependsOn(isa)`| Use when making a decision to use or not use an instruction set when the decision will affect the semantics of the generated code. Should never be used in an assert. | Return whether or not an instruction set is supported. Calls notifyInstructionSetUsage with the result of that computation. |`compOpportunisticallyDependsOn(isa)`| Use when making an opportunistic decision to use or not use an instruction set. Use when the instruction set usage is a "nice to have optimization opportunity", but do not use when a false result may change the semantics of the program. Should never be used in an assert. | Return whether or not an instruction set is supported. Calls notifyInstructionSetUsage if the instruction set is supported. |`compIsaSupportedDebugOnly(isa)` | Use to assert whether or not an instruction set is supported | Return whether or not an instruction set is supported. Does not report anything. Only available in debug builds. -|`getVectorTByteLength()` | Use to get the size of a `Vector` value. | Determine the size of the `Vector` type. If on the architecture the size may vary depending on whatever rules. Use `compExactlyDependsOn` to perform the queries so that the size is consistent between compile time and runtime. +|`getSIMDVectorRegisterByteLength()` | Use to get the size of a `Vector` value. | Determine the size of the `Vector` type. If on the architecture the size may vary depending on whatever rules. Use `compExactlyDependsOn` to perform the queries so that the size is consistent between compile time and runtime. |`getMaxVectorByteLength()`| Get the maximum number of bytes that might be used in a SIMD type during this compilation. | Query the set of instruction sets supported, and determine the largest simd type supported. Use `compOpportunisticallyDependsOn` to perform the queries so that the maximum size needed is the only one recorded. diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 19950134867e8b..4d08b27a7f74d3 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -27,9 +27,9 @@ enum CORINFO_InstructionSet InstructionSet_Atomics=9, InstructionSet_Vector64=10, InstructionSet_Vector128=11, - InstructionSet_VectorT128=12, - InstructionSet_Dczva=13, - InstructionSet_Rcpc=14, + InstructionSet_Dczva=12, + InstructionSet_Rcpc=13, + InstructionSet_VectorT128=14, InstructionSet_ArmBase_Arm64=15, InstructionSet_AdvSimd_Arm64=16, InstructionSet_Aes_Arm64=17, @@ -59,22 +59,22 @@ enum CORINFO_InstructionSet InstructionSet_Vector128=17, InstructionSet_Vector256=18, InstructionSet_Vector512=19, - InstructionSet_VectorT128=20, - InstructionSet_VectorT256=21, - InstructionSet_VectorT512=22, - InstructionSet_AVXVNNI=23, - InstructionSet_MOVBE=24, - InstructionSet_X86Serialize=25, - InstructionSet_AVX512F=26, - InstructionSet_AVX512F_VL=27, - InstructionSet_AVX512BW=28, - InstructionSet_AVX512BW_VL=29, - InstructionSet_AVX512CD=30, - InstructionSet_AVX512CD_VL=31, - InstructionSet_AVX512DQ=32, - InstructionSet_AVX512DQ_VL=33, - InstructionSet_AVX512VBMI=34, - InstructionSet_AVX512VBMI_VL=35, + InstructionSet_AVXVNNI=20, + InstructionSet_MOVBE=21, + InstructionSet_X86Serialize=22, + InstructionSet_AVX512F=23, + InstructionSet_AVX512F_VL=24, + InstructionSet_AVX512BW=25, + InstructionSet_AVX512BW_VL=26, + InstructionSet_AVX512CD=27, + InstructionSet_AVX512CD_VL=28, + InstructionSet_AVX512DQ=29, + InstructionSet_AVX512DQ_VL=30, + InstructionSet_AVX512VBMI=31, + InstructionSet_AVX512VBMI_VL=32, + InstructionSet_VectorT128=33, + InstructionSet_VectorT256=34, + InstructionSet_VectorT512=35, InstructionSet_X86Base_X64=36, InstructionSet_SSE_X64=37, InstructionSet_SSE2_X64=38, @@ -125,22 +125,22 @@ enum CORINFO_InstructionSet InstructionSet_Vector128=17, InstructionSet_Vector256=18, InstructionSet_Vector512=19, - InstructionSet_VectorT128=20, - InstructionSet_VectorT256=21, - InstructionSet_VectorT512=22, - InstructionSet_AVXVNNI=23, - InstructionSet_MOVBE=24, - InstructionSet_X86Serialize=25, - InstructionSet_AVX512F=26, - InstructionSet_AVX512F_VL=27, - InstructionSet_AVX512BW=28, - InstructionSet_AVX512BW_VL=29, - InstructionSet_AVX512CD=30, - InstructionSet_AVX512CD_VL=31, - InstructionSet_AVX512DQ=32, - InstructionSet_AVX512DQ_VL=33, - InstructionSet_AVX512VBMI=34, - InstructionSet_AVX512VBMI_VL=35, + InstructionSet_AVXVNNI=20, + InstructionSet_MOVBE=21, + InstructionSet_X86Serialize=22, + InstructionSet_AVX512F=23, + InstructionSet_AVX512F_VL=24, + InstructionSet_AVX512BW=25, + InstructionSet_AVX512BW_VL=26, + InstructionSet_AVX512CD=27, + InstructionSet_AVX512CD_VL=28, + InstructionSet_AVX512DQ=29, + InstructionSet_AVX512DQ_VL=30, + InstructionSet_AVX512VBMI=31, + InstructionSet_AVX512VBMI_VL=32, + InstructionSet_VectorT128=33, + InstructionSet_VectorT256=34, + InstructionSet_VectorT512=35, InstructionSet_X86Base_X64=36, InstructionSet_SSE_X64=37, InstructionSet_SSE2_X64=38, @@ -581,40 +581,24 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_FMA)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); #endif // TARGET_AMD64 #ifdef TARGET_X86 if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -667,40 +651,24 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_FMA)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); #endif // TARGET_X86 } while (!oldflags.Equals(resultflags)); @@ -755,12 +723,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector64"; case InstructionSet_Vector128 : return "Vector128"; - case InstructionSet_VectorT128 : - return "VectorT128"; case InstructionSet_Dczva : return "Dczva"; case InstructionSet_Rcpc : return "Rcpc"; + case InstructionSet_VectorT128 : + return "VectorT128"; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 case InstructionSet_X86Base : @@ -833,12 +801,6 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector256"; case InstructionSet_Vector512 : return "Vector512"; - case InstructionSet_VectorT128 : - return "VectorT128"; - case InstructionSet_VectorT256 : - return "VectorT256"; - case InstructionSet_VectorT512 : - return "VectorT512"; case InstructionSet_AVXVNNI : return "AVXVNNI"; case InstructionSet_AVXVNNI_X64 : @@ -891,6 +853,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX512VBMI_VL"; case InstructionSet_AVX512VBMI_VL_X64 : return "AVX512VBMI_VL_X64"; + case InstructionSet_VectorT128 : + return "VectorT128"; + case InstructionSet_VectorT256 : + return "VectorT256"; + case InstructionSet_VectorT512 : + return "VectorT512"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -931,12 +899,6 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector256"; case InstructionSet_Vector512 : return "Vector512"; - case InstructionSet_VectorT128 : - return "VectorT128"; - case InstructionSet_VectorT256 : - return "VectorT256"; - case InstructionSet_VectorT512 : - return "VectorT512"; case InstructionSet_AVXVNNI : return "AVXVNNI"; case InstructionSet_MOVBE : @@ -963,6 +925,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX512VBMI"; case InstructionSet_AVX512VBMI_VL : return "AVX512VBMI_VL"; + case InstructionSet_VectorT128 : + return "VectorT128"; + case InstructionSet_VectorT256 : + return "VectorT256"; + case InstructionSet_VectorT512 : + return "VectorT512"; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/corjitflags.h b/src/coreclr/inc/corjitflags.h index f3eb888adb73d1..cc4ad7900e2435 100644 --- a/src/coreclr/inc/corjitflags.h +++ b/src/coreclr/inc/corjitflags.h @@ -91,7 +91,7 @@ class CORJIT_FLAGS CORJIT_FLAG_VECTOR512_THROTTLING = 43, // On Xarch, 512-bit vector usage may incur CPU frequency throttling #else CORJIT_FLAG_UNUSED16 = 43, -#endif +#endif // !defined(TARGET_ARM) CORJIT_FLAG_UNUSED17 = 44, CORJIT_FLAG_UNUSED18 = 45, diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index fa7c2caa26d645..1d7fbbffeab829 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2281,16 +2281,21 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector256); } - if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)) + // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL + // These have been shipped together historically and at the time of this writing + // there exists no hardware which doesn't support the entire feature set. To simplify + // the overall JIT implementation, we currently require the entire set of ISAs to be + // supported and disable AVX512 support otherwise. + + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL) && + instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL) && + instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) { - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)); assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL)); assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL)); assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)); instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); @@ -2305,6 +2310,32 @@ void Compiler::compSetProcessor() preferredVectorByteLength = 256; } } + else + { + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512VBMI); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); + +#ifdef TARGET_AMD64 + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512VBMI_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL_X64); +#endif // TARGET_AMD64 + } opts.preferredVectorByteLength = preferredVectorByteLength; #elif defined(TARGET_ARM64) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2df123bf879ee4..1e83ec89e4ba33 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8642,7 +8642,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Get the number of bytes in a System.Numeric.Vector for the current compilation. // Note - cannot be used for System.Runtime.Intrinsic - unsigned getVectorTByteLength() + unsigned getSIMDVectorRegisterByteLength() { // We need to report the ISA dependency to the VM so that scenarios // such as R2R work correctly for larger vector sizes, so we always @@ -8672,7 +8672,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return 0; } #else - assert(!"getVectorTByteLength() unimplemented on target arch"); + assert(!"getSIMDVectorRegisterByteLength() unimplemented on target arch"); unreached(); #endif } @@ -8717,11 +8717,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX //------------------------------------------------------------------------ // getPreferredVectorByteLength: Gets the preferred length, in bytes, to use for vectorization // -<<<<<<< HEAD - unsigned int getPreferredVectorByteLength() const -======= uint32_t getPreferredVectorByteLength() const ->>>>>>> dotnet/main { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) uint32_t preferredVectorByteLength = opts.preferredVectorByteLength; @@ -8753,11 +8749,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX uint32_t roundUpSIMDSize(unsigned size) { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) -<<<<<<< HEAD - unsigned maxSize = getPreferredVectorByteLength(); -======= uint32_t maxSize = getPreferredVectorByteLength(); ->>>>>>> dotnet/main assert(maxSize <= ZMM_REGSIZE_BYTES); if ((size <= XMM_REGSIZE_BYTES) && (maxSize > XMM_REGSIZE_BYTES)) @@ -8795,11 +8787,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX uint32_t roundDownSIMDSize(unsigned size) { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) -<<<<<<< HEAD - unsigned maxSize = getPreferredVectorByteLength(); -======= uint32_t maxSize = getPreferredVectorByteLength(); ->>>>>>> dotnet/main assert(maxSize <= ZMM_REGSIZE_BYTES); if (size >= maxSize) @@ -8825,11 +8813,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif } -<<<<<<< HEAD - unsigned int getMinVectorByteLength() -======= uint32_t getMinVectorByteLength() ->>>>>>> dotnet/main { return emitTypeSize(TYP_SIMD8); } @@ -9275,8 +9259,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX struct Options { - // all flags passed from the EE - JitFlags* jitFlags; + JitFlags* jitFlags; // all flags passed from the EE // The instruction sets that the compiler is allowed to emit. CORINFO_InstructionSetFlags compSupportsISA; diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 0bb44bcb8a0927..c156faf3b1cd0c 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -776,15 +776,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); - uint32_t vectorTByteLength = getVectorTByteLength(); - - if (vectorTByteLength == YMM_REGSIZE_BYTES) + if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES) { // Vector is TYP_SIMD32, so we should treat this as a call to Vector128.ToVector256 return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType, simdSize); } - else if (vectorTByteLength == XMM_REGSIZE_BYTES) + else if (getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES) { // We fold away the cast here, as it only exists to satisfy // the type system. It is safe to do this here since the retNode type @@ -796,7 +794,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } else { - assert(vectorTByteLength == 0); + assert(getSIMDVectorRegisterByteLength() == 0); } break; } @@ -909,9 +907,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); - uint32_t vectorTByteLength = getVectorTByteLength(); - - if (vectorTByteLength == YMM_REGSIZE_BYTES) + if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES) { // We fold away the cast here, as it only exists to satisfy // the type system. It is safe to do this here since the retNode type @@ -923,7 +919,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - else if (vectorTByteLength == XMM_REGSIZE_BYTES) + else if (getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES) { if (compExactlyDependsOn(InstructionSet_AVX)) { @@ -945,7 +941,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } else { - assert(vectorTByteLength == 0); + assert(getSIMDVectorRegisterByteLength() == 0); } break; } @@ -955,9 +951,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); - uint32_t vectorTByteLength = getVectorTByteLength(); - - if (vectorTByteLength == YMM_REGSIZE_BYTES) + if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES) { assert(IsBaselineVector512IsaSupported()); // We support Vector512 but Vector is only 32-bytes, so we should @@ -976,7 +970,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } break; } - else if (vectorTByteLength == XMM_REGSIZE_BYTES) + else if (getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES) { if (compExactlyDependsOn(InstructionSet_AVX512F)) { @@ -998,7 +992,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } else { - assert(vectorTByteLength == 0); + assert(getSIMDVectorRegisterByteLength() == 0); } break; } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index a676724e4bd5dd..02dfcd04779b75 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -8332,7 +8332,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) CORINFO_SIG_INFO sig; info.compCompHnd->getMethodSig(method, &sig); - result = SimdAsHWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName); + int sizeOfVectorT = getSIMDVectorRegisterByteLength(); + + result = SimdAsHWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName, + sizeOfVectorT); #endif // FEATURE_HW_INTRINSICS if (result == NI_Illegal) diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h index 30abb7134b50ac..7c3089833bc332 100644 --- a/src/coreclr/jit/jitee.h +++ b/src/coreclr/jit/jitee.h @@ -212,6 +212,6 @@ class JitFlags } private: - uint64_t m_jitFlags; + unsigned __int64 m_jitFlags; CORINFO_InstructionSetFlags m_instructionSetFlags; }; diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 26f2f84442d406..331897add9c439 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -233,6 +233,8 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH { JITDUMP(" Found type Vector\n"); m_simdHandleCache->VectorHandle = typeHnd; + + size = getSIMDVectorRegisterByteLength(); break; } @@ -297,7 +299,7 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH } JITDUMP(" Found Vector<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType))); - size = getVectorTByteLength(); + size = getSIMDVectorRegisterByteLength(); if (size == 0) { diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 0c21b59e994e96..cddaa8efdcbb00 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -44,11 +44,10 @@ const SimdAsHWIntrinsicInfo& SimdAsHWIntrinsicInfo::lookup(NamedIntrinsic id) // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet // // Arguments: -// comp -- The compiler -// sig -- The signature of the intrinsic // className -- The name of the class associated with the SimdIntrinsic to lookup // methodName -- The name of the method associated with the SimdIntrinsic to lookup // enclosingClassName -- The name of the enclosing class +// sizeOfVectorT -- The size of Vector in bytes // // Return Value: // The NamedIntrinsic associated with methodName and classId @@ -56,9 +55,10 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp, CORINFO_SIG_INFO* sig, const char* className, const char* methodName, - const char* enclosingClassName) + const char* enclosingClassName, + int sizeOfVectorT) { - SimdAsHWIntrinsicClassId classId = lookupClassId(comp, className, enclosingClassName); + SimdAsHWIntrinsicClassId classId = lookupClassId(className, enclosingClassName, sizeOfVectorT); if (classId == SimdAsHWIntrinsicClassId::Unknown) { @@ -74,42 +74,11 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp, isInstanceMethod = true; } - if (classId == SimdAsHWIntrinsicClassId::Vector) + if (strcmp(methodName, "get_IsHardwareAccelerated") == 0) { - // We want to avoid doing anything that would unnecessarily trigger a recorded dependency against Vector - // so we duplicate a few checks here to ensure this works smoothly for the static Vector class. - - assert(!isInstanceMethod); - - if (strcmp(methodName, "get_IsHardwareAccelerated") == 0) - { - return comp->IsBaselineSimdIsaSupported() ? NI_IsSupported_True : NI_IsSupported_False; - } - - var_types retType = JITtype2varType(sig->retType); - CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - if (retType == TYP_STRUCT) - { - argClass = sig->retTypeSigClass; - } - else if (numArgs != 0) - { - argClass = comp->info.compCompHnd->getArgClass(sig, sig->args); - } - - const char* argNamespaceName; - const char* argClassName = comp->getClassNameFromMetadata(argClass, &argNamespaceName); - - classId = lookupClassId(comp, argClassName, nullptr); - - assert(classId != SimdAsHWIntrinsicClassId::Unknown); - assert(classId != SimdAsHWIntrinsicClassId::Vector); + return comp->IsBaselineSimdIsaSupported() ? NI_IsSupported_True : NI_IsSupported_False; } - assert(strcmp(methodName, "get_IsHardwareAccelerated") != 0); - for (int i = 0; i < (NI_SIMD_AS_HWINTRINSIC_END - NI_SIMD_AS_HWINTRINSIC_START - 1); i++) { const SimdAsHWIntrinsicInfo& intrinsicInfo = simdAsHWIntrinsicInfoArray[i]; @@ -144,15 +113,15 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp, // lookupClassId: Gets the SimdAsHWIntrinsicClassId for a given class name and enclsoing class name // // Arguments: -// comp -- The compiler // className -- The name of the class associated with the SimdAsHWIntrinsicClassId to lookup // enclosingClassName -- The name of the enclosing class +// sizeOfVectorT -- The size of Vector in bytes // // Return Value: // The SimdAsHWIntrinsicClassId associated with className and enclosingClassName -SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(Compiler* comp, - const char* className, - const char* enclosingClassName) +SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* className, + const char* enclosingClassName, + int sizeOfVectorT) { assert(className != nullptr); @@ -190,11 +159,7 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(Compiler* comp, className += 6; - if (className[0] == '\0') - { - return SimdAsHWIntrinsicClassId::Vector; - } - else if (strcmp(className, "2") == 0) + if (strcmp(className, "2") == 0) { return SimdAsHWIntrinsicClassId::Vector2; } @@ -206,18 +171,16 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(Compiler* comp, { return SimdAsHWIntrinsicClassId::Vector4; } - else if (strcmp(className, "`1") == 0) + else if ((className[0] == '\0') || (strcmp(className, "`1") == 0)) { - uint32_t vectorTByteLength = comp->getVectorTByteLength(); - #if defined(TARGET_XARCH) - if (vectorTByteLength == 32) + if (sizeOfVectorT == 32) { return SimdAsHWIntrinsicClassId::VectorT256; } #endif // TARGET_XARCH - if (vectorTByteLength == 16) + if (sizeOfVectorT == 16) { return SimdAsHWIntrinsicClassId::VectorT128; } @@ -698,10 +661,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } - case NI_Quaternion_WithElement: - case NI_Vector2_WithElement: - case NI_Vector3_WithElement: - case NI_Vector4_WithElement: case NI_VectorT128_WithElement: case NI_VectorT256_WithElement: { @@ -783,10 +742,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } - case NI_Quaternion_WithElement: - case NI_Vector2_WithElement: - case NI_Vector3_WithElement: - case NI_Vector4_WithElement: case NI_VectorT128_WithElement: { assert(numArgs == 3); @@ -1535,13 +1490,9 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } case NI_Quaternion_get_Item: - case NI_Quaternion_GetElement: case NI_Vector2_get_Item: - case NI_Vector2_GetElement: case NI_Vector3_get_Item: - case NI_Vector3_GetElement: case NI_Vector4_get_Item: - case NI_Vector4_GetElement: case NI_VectorT128_get_Item: case NI_VectorT128_GetElement: #if defined(TARGET_XARCH) @@ -2041,10 +1992,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } - case NI_Quaternion_WithElement: - case NI_Vector2_WithElement: - case NI_Vector3_WithElement: - case NI_Vector4_WithElement: case NI_VectorT128_WithElement: #if defined(TARGET_XARCH) case NI_VectorT256_WithElement: diff --git a/src/coreclr/jit/simdashwintrinsic.h b/src/coreclr/jit/simdashwintrinsic.h index 9cec169acb8ce6..7bce4330ae6ade 100644 --- a/src/coreclr/jit/simdashwintrinsic.h +++ b/src/coreclr/jit/simdashwintrinsic.h @@ -9,7 +9,6 @@ enum class SimdAsHWIntrinsicClassId Unknown, Plane, Quaternion, - Vector, Vector2, Vector3, Vector4, @@ -78,10 +77,11 @@ struct SimdAsHWIntrinsicInfo CORINFO_SIG_INFO* sig, const char* className, const char* methodName, - const char* enclosingClassName); - static SimdAsHWIntrinsicClassId lookupClassId(Compiler* comp, - const char* className, - const char* enclosingClassName); + const char* enclosingClassName, + int sizeOfVectorT); + static SimdAsHWIntrinsicClassId lookupClassId(const char* className, + const char* enclosingClassName, + int sizeOfVectorT); // Member lookup diff --git a/src/coreclr/jit/simdashwintrinsiclistarm64.h b/src/coreclr/jit/simdashwintrinsiclistarm64.h index 344fb7ebc6c322..83b5a1138c92c0 100644 --- a/src/coreclr/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/jit/simdashwintrinsiclistarm64.h @@ -58,7 +58,6 @@ SIMD_AS_HWINTRINSIC_ID(Quaternion, Dot, SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Identity, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Identity, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Item, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Item, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg) SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Quaternion, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Inverse, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Inverse, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Quaternion, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) @@ -72,7 +71,6 @@ SIMD_AS_HWINTRINSIC_ID(Quaternion, op_Multiply, SIMD_AS_HWINTRINSIC_ID(Quaternion, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_op_Subtraction, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Quaternion, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -93,7 +91,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, SIMD_AS_HWINTRINSIC_ID(Vector2, get_UnitX, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_UnitX, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_UnitY, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_UnitY, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector2, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -111,7 +108,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector2, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector2, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector2, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -134,7 +130,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitX, SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitY, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_UnitY, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitZ, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_UnitZ, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector3, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -152,7 +147,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector3, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector3, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector3, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -177,7 +171,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitY, SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitZ, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_UnitZ, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitW, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_UnitW, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector4, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -195,7 +188,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector4, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector4, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector4, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* diff --git a/src/coreclr/jit/simdashwintrinsiclistxarch.h b/src/coreclr/jit/simdashwintrinsiclistxarch.h index 598712e86254d4..5756db4798648c 100644 --- a/src/coreclr/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/jit/simdashwintrinsiclistxarch.h @@ -58,7 +58,6 @@ SIMD_AS_HWINTRINSIC_ID(Quaternion, Dot, SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Identity, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Identity, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Item, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Item, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg) SIMD_AS_HWINTRINSIC_ID(Quaternion, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Quaternion, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Inverse, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Inverse, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Quaternion, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) @@ -72,7 +71,6 @@ SIMD_AS_HWINTRINSIC_ID(Quaternion, op_Multiply, SIMD_AS_HWINTRINSIC_ID(Quaternion, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_op_Subtraction, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Quaternion, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Quaternion, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Quaternion_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -93,7 +91,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, SIMD_AS_HWINTRINSIC_ID(Vector2, get_UnitX, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_UnitX, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_UnitY, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_UnitY, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector2, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector2, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -111,7 +108,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector2, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector2, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector2, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector2, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector2, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -134,7 +130,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitX, SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitY, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_UnitY, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_UnitZ, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_UnitZ, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector3, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector3, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -152,7 +147,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector3, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector3, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector3, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags @@ -177,7 +171,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitY, SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitZ, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_UnitZ, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_UnitW, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_UnitW, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector4, GetElement, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_GetElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Length, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Length, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, LengthSquared, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_LengthSquared, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) SIMD_AS_HWINTRINSIC_ID(Vector4, Lerp, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Lerp, NI_Illegal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) @@ -195,7 +188,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector4, op_Subtraction, SIMD_AS_HWINTRINSIC_ID(Vector4, op_UnaryNegation, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_op_UnaryNegation, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_NM(Vector4, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector4, Subtract, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(Vector4, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // ISA ID Name NumArg Instructions Flags diff --git a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h index ad7d2e11ee69ac..67ba547488e960 100644 --- a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h +++ b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h @@ -34,7 +34,6 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Avx512dq_vl = 0x400000, XArchIntrinsicConstants_Avx512Vbmi = 0x800000, XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000, - XArchIntrinsicConstants_Serialize = 0x2000000, }; #endif //HOST_X86 || HOST_AMD64 diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index 34947b9a32f67c..ed44c9e948a485 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -197,57 +197,51 @@ bool DetectCPUFeatures() { __cpuid(cpuidInfo, 0x00000001); - const int requiredBaselineEdxFlags = (1 << 25) // SSE - | (1 << 26); // SSE2 - - if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags) + if (((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0) && ((cpuidInfo[CPUID_EDX] & (1 << 26)) != 0)) // SSE & SSE2 { - if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI + if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI { g_cpuFeatures |= XArchIntrinsicConstants_Aes; } - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ { g_cpuFeatures |= XArchIntrinsicConstants_Pclmulqdq; } - if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 + if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 { g_cpuFeatures |= XArchIntrinsicConstants_Sse3; - if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 + if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 { g_cpuFeatures |= XArchIntrinsicConstants_Ssse3; - if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 + if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 { g_cpuFeatures |= XArchIntrinsicConstants_Sse41; - if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 + if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 { g_cpuFeatures |= XArchIntrinsicConstants_Sse42; - if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE + if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE { g_cpuFeatures |= XArchIntrinsicConstants_Movbe; } - if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT + if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT { g_cpuFeatures |= XArchIntrinsicConstants_Popcnt; } - const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE - | (1 << 28); // AVX - - if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) + if (((cpuidInfo[CPUID_ECX] & (1 << 27)) != 0) && ((cpuidInfo[CPUID_ECX] & (1 << 28)) != 0)) // OSXSAVE & AVX { - if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 + if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1)) { g_cpuFeatures |= XArchIntrinsicConstants_Avx; - if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA + if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA { g_cpuFeatures |= XArchIntrinsicConstants_Fma; } @@ -256,67 +250,66 @@ bool DetectCPUFeatures() { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 + if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 { g_cpuFeatures |= XArchIntrinsicConstants_Avx2; - if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI + { + g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni; + } + + if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F + if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F { g_cpuFeatures |= XArchIntrinsicConstants_Avx512f; bool isAVX512_VLSupported = false; - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL + if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL { g_cpuFeatures |= XArchIntrinsicConstants_Avx512f_vl; isAVX512_VLSupported = true; } - if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW + if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW { g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw; - if (isAVX512_VLSupported) // AVX512BW_VL + if (isAVX512_VLSupported) { g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw_vl; } } - if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD + if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD { g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd; - if (isAVX512_VLSupported) // AVX512CD_VL + if (isAVX512_VLSupported) { g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd_vl; } } - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ + if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ { g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq; - if (isAVX512_VLSupported) // AVX512DQ_VL + if (isAVX512_VLSupported) { g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq_vl; } } - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI { g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi; - if (isAVX512_VLSupported) // AVX512VBMI_VL + if (isAVX512_VLSupported) { g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi_vl; } } } } - - __cpuidex(cpuidInfo, 0x00000007, 0x00000001); - - if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI - { - g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni; - } } } } @@ -331,20 +324,15 @@ bool DetectCPUFeatures() { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 + if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 { g_cpuFeatures |= XArchIntrinsicConstants_Bmi1; } - if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 + if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 { g_cpuFeatures |= XArchIntrinsicConstants_Bmi2; } - - if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0) - { - g_cpuFeatures |= XArchIntrinsicConstants_Serialize; // SERIALIZE - } } } @@ -355,7 +343,7 @@ bool DetectCPUFeatures() { __cpuid(cpuidInfo, 0x80000001); - if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT + if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT { g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt; } diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index f6c4f3a6e1d26a..307f49ca791c51 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -93,33 +93,16 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("movbe"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("popcnt"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lzcnt"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("serialize"); // If AVX was enabled, we can opportunistically enable instruction sets which use the VEX encodings Debug.Assert(InstructionSet.X64_AVX == InstructionSet.X86_AVX); if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX)) { - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("fma"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni"); } - - Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F); - if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F)) - { - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F_VL)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)); - - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi_vl"); - } } else if (targetArchitecture == TargetArchitecture.ARM64) { @@ -128,9 +111,6 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha1"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("dotprod"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rdma"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc"); } optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(maxVectorTBitWidth, out var optimisticInstructionSet, out _, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 39f01526068b31..19898dfc4dcd8d 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -42,9 +42,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.ARM64_Atomics: return ReadyToRunInstructionSet.Atomics; case InstructionSet.ARM64_Vector64: return null; case InstructionSet.ARM64_Vector128: return null; - case InstructionSet.ARM64_VectorT128: return null; case InstructionSet.ARM64_Dczva: return null; case InstructionSet.ARM64_Rcpc: return ReadyToRunInstructionSet.Rcpc; + case InstructionSet.ARM64_VectorT128: return null; default: throw new Exception("Unknown instruction set"); } @@ -89,9 +89,6 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_Vector128: return null; case InstructionSet.X64_Vector256: return null; case InstructionSet.X64_Vector512: return null; - case InstructionSet.X64_VectorT128: return null; - case InstructionSet.X64_VectorT256: return null; - case InstructionSet.X64_VectorT512: return null; case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X64_MOVBE: return ReadyToRunInstructionSet.Movbe; @@ -118,6 +115,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi; case InstructionSet.X64_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL; case InstructionSet.X64_AVX512VBMI_VL_X64: return ReadyToRunInstructionSet.Avx512Vbmi_VL; + case InstructionSet.X64_VectorT128: return null; + case InstructionSet.X64_VectorT256: return null; + case InstructionSet.X64_VectorT512: return null; default: throw new Exception("Unknown instruction set"); } @@ -162,9 +162,6 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_Vector128: return null; case InstructionSet.X86_Vector256: return null; case InstructionSet.X86_Vector512: return null; - case InstructionSet.X86_VectorT128: return null; - case InstructionSet.X86_VectorT256: return null; - case InstructionSet.X86_VectorT512: return null; case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X86_AVXVNNI_X64: return null; case InstructionSet.X86_MOVBE: return ReadyToRunInstructionSet.Movbe; @@ -191,6 +188,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_AVX512VBMI_X64: return null; case InstructionSet.X86_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL; case InstructionSet.X86_AVX512VBMI_VL_X64: return null; + case InstructionSet.X86_VectorT128: return null; + case InstructionSet.X86_VectorT256: return null; + case InstructionSet.X86_VectorT512: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index b5337a5bf5c377..30f0b145a18641 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -148,9 +148,6 @@ private static extern CorJitResult JitCompileMethod(out IntPtr exception, IntPtr jit, IntPtr thisHandle, IntPtr callbacks, ref CORINFO_METHOD_INFO info, uint flags, out IntPtr nativeEntry, out uint codeSize); - [DllImport(JitSupportLibrary)] - private static extern uint GetMaxVectorTBitWidth(IntPtr jit, CORJIT_FLAGS* flags); - [DllImport(JitSupportLibrary)] private static extern IntPtr AllocException([MarshalAs(UnmanagedType.LPWStr)]string message, int messageLength); diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index fd13fb87570361..8a5948c69d6507 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -27,9 +27,9 @@ public enum InstructionSet ARM64_Atomics = InstructionSet_ARM64.Atomics, ARM64_Vector64 = InstructionSet_ARM64.Vector64, ARM64_Vector128 = InstructionSet_ARM64.Vector128, - ARM64_VectorT128 = InstructionSet_ARM64.VectorT128, ARM64_Dczva = InstructionSet_ARM64.Dczva, ARM64_Rcpc = InstructionSet_ARM64.Rcpc, + ARM64_VectorT128 = InstructionSet_ARM64.VectorT128, ARM64_ArmBase_Arm64 = InstructionSet_ARM64.ArmBase_Arm64, ARM64_AdvSimd_Arm64 = InstructionSet_ARM64.AdvSimd_Arm64, ARM64_Aes_Arm64 = InstructionSet_ARM64.Aes_Arm64, @@ -57,9 +57,6 @@ public enum InstructionSet X64_Vector128 = InstructionSet_X64.Vector128, X64_Vector256 = InstructionSet_X64.Vector256, X64_Vector512 = InstructionSet_X64.Vector512, - X64_VectorT128 = InstructionSet_X64.VectorT128, - X64_VectorT256 = InstructionSet_X64.VectorT256, - X64_VectorT512 = InstructionSet_X64.VectorT512, X64_AVXVNNI = InstructionSet_X64.AVXVNNI, X64_MOVBE = InstructionSet_X64.MOVBE, X64_X86Serialize = InstructionSet_X64.X86Serialize, @@ -73,6 +70,9 @@ public enum InstructionSet X64_AVX512DQ_VL = InstructionSet_X64.AVX512DQ_VL, X64_AVX512VBMI = InstructionSet_X64.AVX512VBMI, X64_AVX512VBMI_VL = InstructionSet_X64.AVX512VBMI_VL, + X64_VectorT128 = InstructionSet_X64.VectorT128, + X64_VectorT256 = InstructionSet_X64.VectorT256, + X64_VectorT512 = InstructionSet_X64.VectorT512, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE_X64 = InstructionSet_X64.SSE_X64, X64_SSE2_X64 = InstructionSet_X64.SSE2_X64, @@ -121,9 +121,6 @@ public enum InstructionSet X86_Vector128 = InstructionSet_X86.Vector128, X86_Vector256 = InstructionSet_X86.Vector256, X86_Vector512 = InstructionSet_X86.Vector512, - X86_VectorT128 = InstructionSet_X86.VectorT128, - X86_VectorT256 = InstructionSet_X86.VectorT256, - X86_VectorT512 = InstructionSet_X86.VectorT512, X86_AVXVNNI = InstructionSet_X86.AVXVNNI, X86_MOVBE = InstructionSet_X86.MOVBE, X86_X86Serialize = InstructionSet_X86.X86Serialize, @@ -137,6 +134,9 @@ public enum InstructionSet X86_AVX512DQ_VL = InstructionSet_X86.AVX512DQ_VL, X86_AVX512VBMI = InstructionSet_X86.AVX512VBMI, X86_AVX512VBMI_VL = InstructionSet_X86.AVX512VBMI_VL, + X86_VectorT128 = InstructionSet_X86.VectorT128, + X86_VectorT256 = InstructionSet_X86.VectorT256, + X86_VectorT512 = InstructionSet_X86.VectorT512, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE_X64 = InstructionSet_X86.SSE_X64, X86_SSE2_X64 = InstructionSet_X86.SSE2_X64, @@ -182,9 +182,9 @@ public enum InstructionSet_ARM64 Atomics = 9, Vector64 = 10, Vector128 = 11, - VectorT128 = 12, - Dczva = 13, - Rcpc = 14, + Dczva = 12, + Rcpc = 13, + VectorT128 = 14, ArmBase_Arm64 = 15, AdvSimd_Arm64 = 16, Aes_Arm64 = 17, @@ -218,22 +218,22 @@ public enum InstructionSet_X64 Vector128 = 17, Vector256 = 18, Vector512 = 19, - VectorT128 = 20, - VectorT256 = 21, - VectorT512 = 22, - AVXVNNI = 23, - MOVBE = 24, - X86Serialize = 25, - AVX512F = 26, - AVX512F_VL = 27, - AVX512BW = 28, - AVX512BW_VL = 29, - AVX512CD = 30, - AVX512CD_VL = 31, - AVX512DQ = 32, - AVX512DQ_VL = 33, - AVX512VBMI = 34, - AVX512VBMI_VL = 35, + AVXVNNI = 20, + MOVBE = 21, + X86Serialize = 22, + AVX512F = 23, + AVX512F_VL = 24, + AVX512BW = 25, + AVX512BW_VL = 26, + AVX512CD = 27, + AVX512CD_VL = 28, + AVX512DQ = 29, + AVX512DQ_VL = 30, + AVX512VBMI = 31, + AVX512VBMI_VL = 32, + VectorT128 = 33, + VectorT256 = 34, + VectorT512 = 35, X86Base_X64 = 36, SSE_X64 = 37, SSE2_X64 = 38, @@ -288,22 +288,22 @@ public enum InstructionSet_X86 Vector128 = 17, Vector256 = 18, Vector512 = 19, - VectorT128 = 20, - VectorT256 = 21, - VectorT512 = 22, - AVXVNNI = 23, - MOVBE = 24, - X86Serialize = 25, - AVX512F = 26, - AVX512F_VL = 27, - AVX512BW = 28, - AVX512BW_VL = 29, - AVX512CD = 30, - AVX512CD_VL = 31, - AVX512DQ = 32, - AVX512DQ_VL = 33, - AVX512VBMI = 34, - AVX512VBMI_VL = 35, + AVXVNNI = 20, + MOVBE = 21, + X86Serialize = 22, + AVX512F = 23, + AVX512F_VL = 24, + AVX512BW = 25, + AVX512BW_VL = 26, + AVX512CD = 27, + AVX512CD_VL = 28, + AVX512DQ = 29, + AVX512DQ_VL = 30, + AVX512VBMI = 31, + AVX512VBMI_VL = 32, + VectorT128 = 33, + VectorT256 = 34, + VectorT512 = 35, X86Base_X64 = 36, SSE_X64 = 37, SSE2_X64 = 38, @@ -719,40 +719,24 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_FMA); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); break; case TargetArchitecture.X86: @@ -806,40 +790,24 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_FMA); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); break; } } while (!oldflags.Equals(resultflags)); @@ -1010,40 +978,24 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_FMA)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); break; case TargetArchitecture.X86: @@ -1097,40 +1049,24 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_FMA)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); break; } } while (!oldflags.Equals(resultflags)); @@ -1148,8 +1084,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe { ("x86-x64-v3", TargetArchitecture.X86), "x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma" }, { ("skylake", TargetArchitecture.X64), "x86-x64-v3" }, { ("skylake", TargetArchitecture.X86), "x86-x64-v3" }, - { ("x86-x64-v4", TargetArchitecture.X64), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl" }, - { ("x86-x64-v4", TargetArchitecture.X86), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl" }, + { ("x86-x64-v4", TargetArchitecture.X64), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl" }, + { ("x86-x64-v4", TargetArchitecture.X86), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl" }, { ("armv8-a", TargetArchitecture.ARM64), "neon" }, { ("armv8.1-a", TargetArchitecture.ARM64), "armv8-a lse crc rdma" }, { ("armv8.2-a", TargetArchitecture.ARM64), "armv8.1-a" }, @@ -1199,9 +1135,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("lse", "", InstructionSet.ARM64_Atomics, true); yield return new InstructionSetInfo("Vector64", "", InstructionSet.ARM64_Vector64, false); yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false); - yield return new InstructionSetInfo("VectorT128", "", InstructionSet.ARM64_VectorT128, false); yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false); yield return new InstructionSetInfo("rcpc", "", InstructionSet.ARM64_Rcpc, true); + yield return new InstructionSetInfo("VectorT128", "", InstructionSet.ARM64_VectorT128, false); break; case TargetArchitecture.X64: @@ -1224,9 +1160,6 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false); yield return new InstructionSetInfo("Vector512", "", InstructionSet.X64_Vector512, false); - yield return new InstructionSetInfo("VectorT128", "", InstructionSet.X64_VectorT128, false); - yield return new InstructionSetInfo("VectorT256", "", InstructionSet.X64_VectorT256, false); - yield return new InstructionSetInfo("VectorT512", "", InstructionSet.X64_VectorT512, false); yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X64_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X64_X86Serialize, true); @@ -1240,6 +1173,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512DQ_VL, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true); yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI_VL, true); + yield return new InstructionSetInfo("VectorT128", "", InstructionSet.X64_VectorT128, false); + yield return new InstructionSetInfo("VectorT256", "", InstructionSet.X64_VectorT256, false); + yield return new InstructionSetInfo("VectorT512", "", InstructionSet.X64_VectorT512, false); break; case TargetArchitecture.X86: @@ -1262,9 +1198,6 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false); yield return new InstructionSetInfo("Vector512", "", InstructionSet.X86_Vector512, false); - yield return new InstructionSetInfo("VectorT128", "", InstructionSet.X86_VectorT128, false); - yield return new InstructionSetInfo("VectorT256", "", InstructionSet.X86_VectorT256, false); - yield return new InstructionSetInfo("VectorT512", "", InstructionSet.X86_VectorT512, false); yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X86_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X86_X86Serialize, true); @@ -1278,6 +1211,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512DQ_VL, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true); yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI_VL, true); + yield return new InstructionSetInfo("VectorT128", "", InstructionSet.X86_VectorT128, false); + yield return new InstructionSetInfo("VectorT256", "", InstructionSet.X86_VectorT256, false); + yield return new InstructionSetInfo("VectorT512", "", InstructionSet.X86_VectorT512, false); break; } } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index bea91bf4909d3a..560b7b09f394a6 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -44,9 +44,6 @@ instructionset ,X86 ,Popcnt , ,15 ,POPCNT instructionset ,X86 , , , ,Vector128 , instructionset ,X86 , , , ,Vector256 , instructionset ,X86 , , , ,Vector512 , -instructionset ,X86 , , , ,VectorT128 , -instructionset ,X86 , , , ,VectorT256 , -instructionset ,X86 , , , ,VectorT512 , instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni instructionset ,X86 ,Movbe , ,27 ,MOVBE ,movbe instructionset ,X86 ,X86Serialize , ,28 ,X86Serialize ,serialize @@ -60,7 +57,9 @@ instructionset ,X86 ,Avx512DQ , ,35 ,AVX512DQ instructionset ,X86 ,Avx512DQ_VL , ,36 ,AVX512DQ_VL ,avx512dq_vl instructionset ,X86 ,Avx512Vbmi , ,37 ,AVX512VBMI ,avx512vbmi instructionset ,X86 ,Avx512Vbmi_VL , ,38 ,AVX512VBMI_VL ,avx512vbmi_vl - +instructionset ,X86 , , , ,VectorT128 , +instructionset ,X86 , , , ,VectorT256 , +instructionset ,X86 , , , ,VectorT512 , instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -124,38 +123,23 @@ implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,MOVBE ,SSE42 implication ,X86 ,X86Serialize ,X86Base implication ,X86 ,AVX512F ,AVX2 -implication ,X86 ,AVX512F ,FMA implication ,X86 ,AVX512F_VL ,AVX512F implication ,X86 ,AVX512CD ,AVX512F -implication ,X86 ,AVX512CD_VL ,AVX512CD implication ,X86 ,AVX512CD_VL ,AVX512F_VL implication ,X86 ,AVX512BW ,AVX512F -implication ,X86 ,AVX512BW_VL ,AVX512BW implication ,X86 ,AVX512BW_VL ,AVX512F_VL implication ,X86 ,AVX512DQ ,AVX512F -implication ,X86 ,AVX512DQ_VL ,AVX512DQ implication ,X86 ,AVX512DQ_VL ,AVX512F_VL implication ,X86 ,AVX512VBMI ,AVX512BW -implication ,X86 ,AVX512VBMI_VL ,AVX512VBMI implication ,X86 ,AVX512VBMI_VL ,AVX512BW_VL -; While the AVX-512 ISAs can be individually lit-up, they really -; need the 5 following to be fully functional without adding -; significant complexity into the JIT. Additionally, unlike AVX/AVX2 -; there was never really any hardware that didn't provide all 5 at -; once, with the notable exception being Knight's Landing which -; provided a similar but not quite the same feature. -implication ,X86 ,AVX512F ,AVX512BW_VL -implication ,X86 ,AVX512F ,AVX512CD_VL -implication ,X86 ,AVX512F ,AVX512DQ_VL - ; Definition of X64 instruction sets definearch ,X64 ,64Bit ,X64, X64 copyinstructionsets,X86 ,X64 ; Definition of Arm64 instruction sets -definearch ,ARM64 ,64Bit ,Arm64 +definearch ,ARM64 ,64Bit ,Arm64, Arm64 instructionset ,ARM64 ,ArmBase , ,16 ,ArmBase ,base instructionset ,ARM64 ,AdvSimd , ,17 ,AdvSimd ,neon @@ -168,9 +152,9 @@ instructionset ,ARM64 ,Sha256 , ,20 ,Sha256 instructionset ,ARM64 , ,Atomics ,21 ,Atomics ,lse instructionset ,ARM64 , , , ,Vector64 , instructionset ,ARM64 , , , ,Vector128 , -instructionset ,ARM64 , , , ,VectorT128 , instructionset ,ARM64 , , , ,Dczva , instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc +instructionset ,ARM64 , , , ,VectorT128 , instructionset64bit,ARM64 ,ArmBase instructionset64bit,ARM64 ,AdvSimd @@ -203,7 +187,7 @@ instructionsetgroup ,x86-x64 ,X64 X86 ,sse2 instructionsetgroup ,x86-x64-v2 ,X64 X86 ,sse4.2 popcnt instructionsetgroup ,x86-x64-v3 ,X64 X86 ,x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma instructionsetgroup ,skylake ,X64 X86 ,x86-x64-v3 -instructionsetgroup ,x86-x64-v4 ,X64 X86 ,x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl +instructionsetgroup ,x86-x64-v4 ,X64 X86 ,x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl instructionsetgroup ,armv8-a ,ARM64 ,neon instructionsetgroup ,armv8.1-a ,ARM64 ,armv8-a lse crc rdma diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index b7e4ee14e57ac5..1411ca7b6c7941 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -111,7 +111,6 @@ private static class XArchIntrinsicConstants public const int Avx512dq_vl = 0x400000; public const int Avx512Vbmi = 0x800000; public const int Avx512Vbmi_vl = 0x1000000; - public const int Serialize = 0x2000000; public static int FromInstructionSet(InstructionSet instructionSet) { @@ -171,8 +170,6 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi, InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi_vl, InstructionSet.X64_AVX512VBMI_VL_X64 => Avx512Vbmi_vl, - InstructionSet.X64_X86Serialize => Serialize, - InstructionSet.X64_X86Serialize_X64 => Serialize, // SSE and SSE2 are baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 78add98bf5499b..aa491393c414bd 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -6341,10 +6341,12 @@ void MethodContext::recGetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HAN GetLoongArch64PassStructInRegisterFlags->Add(key, value); DEBUG_REC(dmpGetLoongArch64PassStructInRegisterFlags(key, value)); } + void MethodContext::dmpGetLoongArch64PassStructInRegisterFlags(DWORDLONG key, DWORD value) { printf("GetLoongArch64PassStructInRegisterFlags key %016" PRIX64 " value-%08X", key, value); } + DWORD MethodContext::repGetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd) { DWORDLONG key = CastHandle(structHnd); @@ -6364,10 +6366,12 @@ void MethodContext::recGetRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE GetRISCV64PassStructInRegisterFlags->Add(key, value); DEBUG_REC(dmpGetRISCV64PassStructInRegisterFlags(key, value)); } + void MethodContext::dmpGetRISCV64PassStructInRegisterFlags(DWORDLONG key, DWORD value) { printf("GetRISCV64PassStructInRegisterFlags key %016" PRIX64 " value-%08X", key, value); } + DWORD MethodContext::repGetRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd) { DWORDLONG key = CastHandle(structHnd); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 814ae29b281265..e6dacd31f2a3ca 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -580,7 +580,7 @@ class MethodContext void recGetIsClassInitedFlagAddress(CORINFO_CLASS_HANDLE cls, CORINFO_CONST_LOOKUP* addr, int* offset, bool result); void dmpGetIsClassInitedFlagAddress(DWORDLONG key, const Agnostic_GetIsClassInitedFlagAddress& value); bool repGetIsClassInitedFlagAddress(CORINFO_CLASS_HANDLE cls, CORINFO_CONST_LOOKUP* addr, int* offset); - + void recGetStaticBaseAddress(CORINFO_CLASS_HANDLE cls, bool isGc, CORINFO_CONST_LOOKUP* addr, bool result); void dmpGetStaticBaseAddress(DLD key, const Agnostic_GetStaticBaseAddress& value); bool repGetStaticBaseAddress(CORINFO_CLASS_HANDLE cls, bool isGc, CORINFO_CONST_LOOKUP* addr); diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 7088fbee5d6b0c..afb7ab5b7748ea 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1520,10 +1520,7 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_POPCNT); } - const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE - | (1 << 28); // AVX - - if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) + if (((cpuidInfo[CPUID_ECX] & (1 << 27)) != 0) && ((cpuidInfo[CPUID_ECX] & (1 << 28)) != 0)) // OSXSAVE & AVX { if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 { @@ -1986,41 +1983,6 @@ void EEJitManager::SetCpuInfo() #endif // TARGET_X86 || TARGET_AMD64 m_CPUCompileFlags = CPUCompileFlags; - -#if defined(TARGET_X86) || defined(TARGET_AMD64) - if (xarchCpuInfo.IsGenuineIntel) - { - // Some architectures can experience frequency throttling when executing - // executing 512-bit width instructions. To account for this we set the - // default preferred vector width to 256-bits in some scenarios. Power - // users can override this with `DOTNET_PreferredVectorBitWith=512` to - // allow using such instructions where hardware support is available. - - if (xarchCpuInfo.FamilyId == 0x06) - { - if (xarchCpuInfo.ExtendedModelId == 0x05) - { - if (xarchCpuInfo.Model == 0x05) - { - // * Skylake (Server) - // * Cascade Lake - // * Cooper Lake - - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING); - } - } - else if (xarchCpuInfo.ExtendedModelId == 0x06) - { - if (xarchCpuInfo.Model == 0x06) - { - // * Cannon Lake - - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING); - } - } - } - } -#endif // TARGET_X86 || TARGET_AMD64 } // Define some data that we can use to get a better idea of what happened when we get a Watson dump that indicates the JIT failed to load. diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index e07dea6730b644..8e547e4cc17cef 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -936,7 +936,7 @@ class RangeSectionMap { // Upgrade to non-collectible #ifdef _DEBUG - TADDR initialValue = + TADDR initialValue = #endif InterlockedCompareExchangeT(&_ptr, ptr - 1, ptr); assert(initialValue == ptr || initialValue == (ptr - 1)); @@ -1052,7 +1052,7 @@ class RangeSectionMap auto levelNew = static_castVolatileLoad(NULL))[0])>(AllocateLevel()); if (levelNew == NULL) return NULL; - + if (!outerLevel->Install(levelNew, collectible)) { // Handle race where another thread grew the table @@ -1118,7 +1118,7 @@ class RangeSectionMap auto rangeSectionL3 = rangeSectionL3Ptr->VolatileLoadWithoutBarrier(pLockState); if (rangeSectionL3 == NULL) return NULL; - + auto rangeSectionL2Ptr = &((*rangeSectionL3)[EffectiveBitsForLevel(address, 3)]); if (level == 2) return rangeSectionL2Ptr; @@ -1172,7 +1172,7 @@ class RangeSectionMap // Account for the range not starting at the beginning of a last level fragment rangeSize += pRangeSection->_range.RangeStart() & (bytesAtLastLevel - 1); - + uintptr_t fragmentCount = ((rangeSize - 1) / bytesAtLastLevel) + 1; return fragmentCount; } @@ -1415,7 +1415,7 @@ class RangeSectionMap else { // Since the fragment linked lists are sorted such that the collectible ones are always after the non-collectible ones, this should never happen. - assert(!seenCollectibleRangeList); + assert(!seenCollectibleRangeList); } #endif entryInMapToUpdate = &(entryInMapToUpdate->VolatileLoadWithoutBarrier(pLockState))->pRangeSectionFragmentNext; @@ -1456,7 +1456,7 @@ class RangeSectionMap if (foundMeaningfulValue) break; - + // This level is completely empty. Free it, and then null out the pointer to it. pointerToLevelData->Uninstall(); free((void*)rawData); @@ -2662,22 +2662,4 @@ class EECodeInfo void ThrowOutOfMemoryWithinRange(); -// Represents information about an XARCH CPU -union XarchCpuInfo -{ - struct { - uint32_t SteppingId : 4; - uint32_t Model : 4; - uint32_t FamilyId : 4; - uint32_t ProcessorType : 2; - uint32_t IsAuthenticAmd : 1; // Unused bits in the CPUID result - uint32_t IsGenuineIntel : 1; // Unused bits in the CPUID result - uint32_t ExtendedModelId : 4; - uint32_t ExtendedFamilyId : 8; - uint32_t Reserved : 4; // Unused bits in the CPUID result - }; - - uint32_t Value; -}; - #endif // !__CODEMAN_HPP__ diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 48ce54869509dd..85d904b566cdd5 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1800,7 +1800,7 @@ uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isG } assert(typeIndex != TypeIDProvider::INVALID_TYPE_ID); - + EE_TO_JIT_TRANSITION(); return typeIndex; } @@ -1820,7 +1820,6 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->tlsIndex.accessType = IAT_VALUE; pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer); - if (isGCType) { pInfo->offsetOfThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_GCThreadStaticBlocks); @@ -1831,9 +1830,9 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCThreadStaticBlocks); pInfo->offsetOfMaxThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_NonGCMaxThreadStaticBlocks); } - + pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); - + JIT_TO_EE_TRANSITION_LEAF(); } #else @@ -1865,7 +1864,7 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo->offsetOfThreadStaticBlocks = 0; pInfo->offsetOfMaxThreadStaticBlocks = 0; pInfo->offsetOfGCDataPointer = 0; - + JIT_TO_EE_TRANSITION_LEAF(); } #endif // HOST_WINDOWS @@ -2572,6 +2571,7 @@ unsigned CEEInfo::getClassNumInstanceFields (CORINFO_CLASS_HANDLE clsHnd) return result; } + CorInfoType CEEInfo::asCorInfoType (CORINFO_CLASS_HANDLE clsHnd) { CONTRACTL { @@ -13295,7 +13295,7 @@ BOOL TypeLayoutCheck(MethodTable * pMT, PCCOR_SIGNATURE pBlob, BOOL printDiff) DefineFullyQualifiedNameForClass(); printf("Type %s: expected size 0x%08x, actual size 0x%08x\n", - GetFullyQualifiedNameForClass(pMT), dwExpectedSize, dwActualSize); + GetFullyQualifiedNameForClass(pMT), dwExpectedSize, dwActualSize); } else { diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 2ad51cc126af13..73778a1ca6baba 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -586,10 +586,10 @@ class CEEInfo : public ICorJitInfo #endif protected: - SArray* m_pJitHandles; // GC handles used by JIT - MethodDesc* m_pMethodBeingCompiled; // Top-level method being compiled - Thread * m_pThread; // Cached current thread for faster JIT-EE transitions - CORJIT_FLAGS m_jitFlags; + SArray* m_pJitHandles; // GC handles used by JIT + MethodDesc* m_pMethodBeingCompiled; // Top-level method being compiled + Thread * m_pThread; // Cached current thread for faster JIT-EE transitions + CORJIT_FLAGS m_jitFlags; CORINFO_METHOD_HANDLE getMethodBeingCompiled() { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Plane.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Plane.cs index a4636a6e76a15f..57c15f4cff4c19 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Plane.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Plane.cs @@ -61,7 +61,7 @@ public Plane(Vector4 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Plane CreateFromVertices(Vector3 point1, Vector3 point2, Vector3 point3) { - if (Vector128.IsHardwareAccelerated) + if (Vector.IsHardwareAccelerated) { Vector3 a = point2 - point1; Vector3 b = point3 - point1; @@ -126,7 +126,7 @@ public static float Dot(Plane plane, Vector4 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float DotCoordinate(Plane plane, Vector3 value) { - if (Vector128.IsHardwareAccelerated) + if (Vector.IsHardwareAccelerated) { return Vector3.Dot(plane.Normal, value) + plane.D; } @@ -146,7 +146,7 @@ public static float DotCoordinate(Plane plane, Vector3 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float DotNormal(Plane plane, Vector3 value) { - if (Vector128.IsHardwareAccelerated) + if (Vector.IsHardwareAccelerated) { return Vector3.Dot(plane.Normal, value); } @@ -164,7 +164,7 @@ public static float DotNormal(Plane plane, Vector3 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Plane Normalize(Plane value) { - if (Vector128.IsHardwareAccelerated) + if (Vector.IsHardwareAccelerated) { float normalLengthSquared = value.Normal.LengthSquared(); if (MathF.Abs(normalLengthSquared - 1.0f) < NormalizeEpsilon) diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index aa3ca7b1e3d2d4..06f6a027c94dda 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -50,77 +50,39 @@ static int Main() // // The test is compiled with multiple defines to test this. - bool vectorsAccelerated = true; - bool? Sse12 = true; - #if BASELINE_INTRINSICS + bool vectorsAccelerated = true; + int byteVectorLength = 16; + bool? Sse2AndBelow = true; bool? Sse3Group = null; bool? AesLzPcl = null; bool? Sse4142 = null; bool? PopCnt = null; - bool? Avx1 = false; - bool? Avx2 = false; - bool? Fma = null; - bool? Bmi12 = null; + bool? Avx12 = false; + bool? FmaBmi12 = false; bool? Avxvnni = false; - bool? Avx512Group = false; - bool? Avx512Vbmi = false; -#elif SSE42_INTRINSICS +#elif NON_VEX_INTRINSICS + bool vectorsAccelerated = true; + int byteVectorLength = 16; + bool? Sse2AndBelow = true; bool? Sse3Group = true; bool? AesLzPcl = null; bool? Sse4142 = true; bool? PopCnt = null; - bool? Avx1 = false; - bool? Avx2 = false; - bool? Fma = null; - bool? Bmi12 = null; + bool? Avx12 = false; + bool? FmaBmi12 = false; bool? Avxvnni = false; - bool? Avx512Group = false; - bool? Avx512Vbmi = false; -#elif AVX_INTRINSIC - bool? Sse3Group = true; - bool? AesLzPcl = null; - bool? Sse4142 = true; - bool? PopCnt = null; - bool? Avx1 = true; - bool? Avx2 = false; - bool? Fma = null; - bool? Bmi12 = null; - bool? Avxvnni = null; - bool? Avx512Group = false; - bool? Avx512Vbmi = false; -#elif AVX2_INTRINSICS - bool? Sse3Group = true; - bool? AesLzPcl = null; - bool? Sse4142 = true; - bool? PopCnt = null; - bool? Avx1 = true; - bool? Avx2 = true; - bool? Fma = null; - bool? Bmi12 = null; - bool? Avxvnni = null; - bool? Avx512Group = false; - bool? Avx512Vbmi = false; -#elif AVX512_INTRINSICS +#elif VEX_INTRINSICS + bool vectorsAccelerated = true; + int byteVectorLength = 32; + bool? Sse2AndBelow = true; bool? Sse3Group = true; bool? AesLzPcl = null; bool? Sse4142 = true; bool? PopCnt = null; - bool? Avx1 = true; - bool? Avx2 = true; - bool? Fma = true; - bool? Bmi12 = null; + bool? Avx12 = true; + bool? FmaBmi12 = null; bool? Avxvnni = null; - bool? Avx512Group = true; - bool? Avx512Vbmi = null; -#else -#error Who dis? -#endif - -#if VECTORT128_INTRINSICS - int byteVectorLength = 16; -#elif VECTORT256_INTRINSICS - int byteVectorLength = 32; #else #error Who dis? #endif @@ -135,11 +97,11 @@ static int Main() throw new Exception($"Unexpected vector length - expected {byteVectorLength}, got {Vector.Count}"); } - Check("Sse", Sse12, &SseIsSupported, Sse.IsSupported, () => Sse.Subtract(Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); - Check("Sse.X64", Sse12, &SseX64IsSupported, Sse.X64.IsSupported, () => Sse.X64.ConvertToInt64WithTruncation(Vector128.Zero) == 0); + Check("Sse", Sse2AndBelow, &SseIsSupported, Sse.IsSupported, () => Sse.Subtract(Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + Check("Sse.X64", Sse2AndBelow, &SseX64IsSupported, Sse.X64.IsSupported, () => Sse.X64.ConvertToInt64WithTruncation(Vector128.Zero) == 0); - Check("Sse2", Sse12, &Sse2IsSupported, Sse2.IsSupported, () => Sse2.Extract(Vector128.Zero, 0) == 0); - Check("Sse2.X64", Sse12, &Sse2X64IsSupported, Sse2.X64.IsSupported, () => Sse2.X64.ConvertToInt64(Vector128.Zero) == 0); + Check("Sse2", Sse2AndBelow, &Sse2IsSupported, Sse2.IsSupported, () => Sse2.Extract(Vector128.Zero, 0) == 0); + Check("Sse2.X64", Sse2AndBelow, &Sse2X64IsSupported, Sse2.X64.IsSupported, () => Sse2.X64.ConvertToInt64(Vector128.Zero) == 0); Check("Sse3", Sse3Group, &Sse3IsSupported, Sse3.IsSupported, () => Sse3.MoveHighAndDuplicate(Vector128.Zero).Equals(Vector128.Zero)); Check("Sse3.X64", Sse3Group, &Sse3X64IsSupported, Sse3.X64.IsSupported, null); @@ -156,20 +118,20 @@ static int Main() Check("Aes", AesLzPcl, &AesIsSupported, Aes.IsSupported, () => Aes.KeygenAssist(Vector128.Zero, 0).Equals(Vector128.Create((byte)99))); Check("Aes.X64", AesLzPcl, &AesX64IsSupported, Aes.X64.IsSupported, null); - Check("Avx", Avx1, &AvxIsSupported, Avx.IsSupported, () => Avx.Add(Vector256.Zero, Vector256.Zero).Equals(Vector256.Zero)); - Check("Avx.X64", Avx1, &AvxX64IsSupported, Avx.X64.IsSupported, null); + Check("Avx", Avx12, &AvxIsSupported, Avx.IsSupported, () => Avx.Add(Vector256.Zero, Vector256.Zero).Equals(Vector256.Zero)); + Check("Avx.X64", Avx12, &AvxX64IsSupported, Avx.X64.IsSupported, null); - Check("Avx2", Avx2, &Avx2IsSupported, Avx2.IsSupported, () => Avx2.Abs(Vector256.Zero).Equals(Vector256.Zero)); - Check("Avx2.X64", Avx2, &Avx2X64IsSupported, Avx2.X64.IsSupported, null); + Check("Avx2", Avx12, &Avx2IsSupported, Avx2.IsSupported, () => Avx2.Abs(Vector256.Zero).Equals(Vector256.Zero)); + Check("Avx2.X64", Avx12, &Avx2X64IsSupported, Avx2.X64.IsSupported, null); - Check("Bmi1", Bmi12, &Bmi1IsSupported, Bmi1.IsSupported, () => Bmi1.AndNot(0, 0) == 0); - Check("Bmi1.X64", Bmi12, &Bmi1X64IsSupported, Bmi1.X64.IsSupported, () => Bmi1.X64.AndNot(0, 0) == 0); + Check("Bmi1", FmaBmi12, &Bmi1IsSupported, Bmi1.IsSupported, () => Bmi1.AndNot(0, 0) == 0); + Check("Bmi1.X64", FmaBmi12, &Bmi1X64IsSupported, Bmi1.X64.IsSupported, () => Bmi1.X64.AndNot(0, 0) == 0); - Check("Bmi2", Bmi12, &Bmi2IsSupported, Bmi2.IsSupported, () => Bmi2.MultiplyNoFlags(0, 0) == 0); - Check("Bmi2.X64", Bmi12, &Bmi2X64IsSupported, Bmi2.X64.IsSupported, () => Bmi2.X64.MultiplyNoFlags(0, 0) == 0); + Check("Bmi2", FmaBmi12, &Bmi2IsSupported, Bmi2.IsSupported, () => Bmi2.MultiplyNoFlags(0, 0) == 0); + Check("Bmi2.X64", FmaBmi12, &Bmi2X64IsSupported, Bmi2.X64.IsSupported, () => Bmi2.X64.MultiplyNoFlags(0, 0) == 0); - Check("Fma", Fma, &FmaIsSupported, Fma.IsSupported, () => Fma.MultiplyAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); - Check("Fma.X64", Fma, &FmaX64IsSupported, Fma.X64.IsSupported, null); + Check("Fma", FmaBmi12, &FmaIsSupported, Fma.IsSupported, () => Fma.MultiplyAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + Check("Fma.X64", FmaBmi12, &FmaX64IsSupported, Fma.X64.IsSupported, null); Check("Lzcnt", AesLzPcl, &LzcntIsSupported, Lzcnt.IsSupported, () => Lzcnt.LeadingZeroCount(0) == 32); Check("Lzcnt.X64", AesLzPcl, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64); @@ -183,26 +145,6 @@ static int Main() Check("AvxVnni", Avxvnni, &AvxVnniIsSupported, AvxVnni.IsSupported, () => AvxVnni.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); Check("AvxVnni.X64", Avxvnni, &AvxVnniX64IsSupported, AvxVnni.X64.IsSupported, null); - Check("Avx512F", Avx512Group, &Avx512FIsSupported, Avx512F.IsSupported, () => Avx512F.Abs(Vector512.Zero).Equals(Vector512.Zero)); - Check("Avx512F.VL", Avx512Group, &Avx512FVLIsSupported, Avx512F.VL.IsSupported, null); - Check("Avx512F.X64", Avx512Group, &Avx512FX64IsSupported, Avx512F.X64.IsSupported, null); - - Check("Avx512BW", Avx512Group, &Avx512BWIsSupported, Avx512BW.IsSupported, () => Avx512F.Abs(Vector512.Zero).Equals(Vector512.Zero)); - Check("Avx512BW.VL", Avx512Group, &Avx512BWVLIsSupported, Avx512BW.VL.IsSupported, null); - Check("Avx512BW.X64", Avx512Group, &Avx512BWX64IsSupported, Avx512BW.X64.IsSupported, null); - - Check("Avx512CD", Avx512Group, &Avx512CDIsSupported, Avx512CD.IsSupported, null); - Check("Avx512CD.VL", Avx512Group, &Avx512CDVLIsSupported, Avx512CD.VL.IsSupported, null); - Check("Avx512CD.X64", Avx512Group, &Avx512CDX64IsSupported, Avx512CD.X64.IsSupported, null); - - Check("Avx512DQ", Avx512Group, &Avx512DQIsSupported, Avx512DQ.IsSupported, () => Avx512F.And(Vector512.Zero, Vector512.Zero).Equals(Vector512.Zero)); - Check("Avx512DQ.VL", Avx512Group, &Avx512DQVLIsSupported, Avx512DQ.VL.IsSupported, null); - Check("Avx512DQ.X64", Avx512Group, &Avx512DQX64IsSupported, Avx512DQ.X64.IsSupported, null); - - Check("Avx512Vbmi", Avx512Group, &Avx512VbmiIsSupported, Avx512Vbmi.IsSupported, () => Avx512F.PermuteVar64x8(Vector512.Zero, Vector512.Zero).Equals(Vector512.Zero)); - Check("Avx512Vbmi.VL", Avx512Group, &Avx512VbmiVLIsSupported, Avx512Vbmi.VL.IsSupported, null); - Check("Avx512Vbmi.X64", Avx512Group, &Avx512VbmiX64IsSupported, Avx512Vbmi.X64.IsSupported, null); - return s_success ? 100 : 1; } @@ -241,21 +183,6 @@ static int Main() static bool PopcntX64IsSupported() => Popcnt.X64.IsSupported; static bool AvxVnniIsSupported() => AvxVnni.IsSupported; static bool AvxVnniX64IsSupported() => AvxVnni.X64.IsSupported; - static bool Avx512FIsSupported() => Avx512F.IsSupported; - static bool Avx512FVLIsSupported() => Avx512F.VL.IsSupported; - static bool Avx512FX64IsSupported() => Avx512F.X64.IsSupported; - static bool Avx512BWIsSupported() => Avx512BW.IsSupported; - static bool Avx512BWVLIsSupported() => Avx512BW.VL.IsSupported; - static bool Avx512BWX64IsSupported() => Avx512BW.X64.IsSupported; - static bool Avx512CDIsSupported() => Avx512CD.IsSupported; - static bool Avx512CDVLIsSupported() => Avx512CD.VL.IsSupported; - static bool Avx512CDX64IsSupported() => Avx512CD.X64.IsSupported; - static bool Avx512DQIsSupported() => Avx512DQ.IsSupported; - static bool Avx512DQVLIsSupported() => Avx512DQ.VL.IsSupported; - static bool Avx512DQX64IsSupported() => Avx512DQ.X64.IsSupported; - static bool Avx512VbmiIsSupported() => Avx512Vbmi.IsSupported; - static bool Avx512VbmiVLIsSupported() => Avx512Vbmi.VL.IsSupported; - static bool Avx512VbmiX64IsSupported() => Avx512Vbmi.X64.IsSupported; static bool IsConstantTrue(delegate* code) { diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Baseline.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Baseline.csproj index 9e5d0c79e268e1..e49eb84629044d 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Baseline.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Baseline.csproj @@ -5,7 +5,7 @@ 0 true true - $(DefineConstants);BASELINE_INTRINSICS;VECTORT128_INTRINSICS + $(DefineConstants);BASELINE_INTRINSICS diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx.csproj deleted file mode 100644 index 1ccf630c3620c6..00000000000000 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx.csproj +++ /dev/null @@ -1,39 +0,0 @@ - - - Exe - BuildAndRun - 0 - true - true - $(DefineConstants);AVX_INTRINSICS;VECTORT128_INTRINSICS - - - - - - - - /dev/null - if [ $? -ne 0 ]; then - echo No support for AVX, test not applicable. - exit 0 - fi - fi - if [[ "$OSTYPE" == "linux"* ]]; then - if ! grep -q '^flags.*avx' /proc/cpuinfo 2>/dev/null; then - echo No support for AVX, test not applicable. - exit 0 - fi - fi -]]> - - - - - - diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2_VectorT128.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2_VectorT128.csproj deleted file mode 100644 index e292f10ab66a07..00000000000000 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2_VectorT128.csproj +++ /dev/null @@ -1,40 +0,0 @@ - - - Exe - BuildAndRun - 0 - true - true - $(DefineConstants);AVX2_INTRINSICS;VECTORT128_INTRINSICS - - - - - - - - - /dev/null - if [ $? -ne 0 ]; then - echo No support for AVX2, test not applicable. - exit 0 - fi - fi - if [[ "$OSTYPE" == "linux"* ]]; then - if ! grep -q '^flags.*avx2' /proc/cpuinfo 2>/dev/null; then - echo No support for AVX2, test not applicable. - exit 0 - fi - fi -]]> - - - - - - diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512.csproj deleted file mode 100644 index c45d11276c9676..00000000000000 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512.csproj +++ /dev/null @@ -1,33 +0,0 @@ - - - Exe - BuildAndRun - 0 - true - true - $(DefineConstants);AVX512_INTRINSICS;VECTORT256_INTRINSICS - - - - - - - - /dev/null; then - echo No support for AVX512, test not applicable. - exit 0 - fi - fi -]]> - - - - - - diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512_VectorT128.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512_VectorT128.csproj deleted file mode 100644 index 4928bafbce048f..00000000000000 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx512_VectorT128.csproj +++ /dev/null @@ -1,34 +0,0 @@ - - - Exe - BuildAndRun - 0 - true - true - $(DefineConstants);AVX512_INTRINSICS;VECTORT128_INTRINSICS - - - - - - - - - /dev/null; then - echo No support for AVX512, test not applicable. - exit 0 - fi - fi -]]> - - - - - - diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Sse42.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64NonVex.csproj similarity index 83% rename from src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Sse42.csproj rename to src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64NonVex.csproj index bf1a725b11a44d..5e8d35d67bf3da 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Sse42.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64NonVex.csproj @@ -5,7 +5,7 @@ 0 true true - $(DefineConstants);SSE42_INTRINSICS;VECTORT128_INTRINSICS + $(DefineConstants);NON_VEX_INTRINSICS diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Vex.csproj similarity index 91% rename from src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2.csproj rename to src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Vex.csproj index 626f88edc7278d..983436eab7d79f 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Avx2.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/x64Vex.csproj @@ -5,7 +5,7 @@ 0 true true - $(DefineConstants);AVX2_INTRINSICS;VECTORT256_INTRINSICS + $(DefineConstants);VEX_INTRINSICS From e96eca9c1f47cb1be6e93a898c4f582de63c8882 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 19 May 2023 11:19:52 -0700 Subject: [PATCH 11/19] Ensure that the optimistic flags are a strict superset of the supported flags --- .../tools/Common/Compiler/InstructionSetSupport.cs | 13 +++++++++++-- src/coreclr/tools/Common/InstructionSetHelpers.cs | 7 ++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index af3ecfcdffbb9d..b99a3b2ce42665 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -198,15 +198,24 @@ public static InstructionSetFlags GetNonSpecifiableInstructionSetsForArch(Target return s_nonSpecifiableInstructionSets[architecture]; } - private readonly SortedSet _supportedInstructionSets = new SortedSet(); - private readonly SortedSet _unsupportedInstructionSets = new SortedSet(); + private readonly SortedSet _supportedInstructionSets; + private readonly SortedSet _unsupportedInstructionSets; private readonly TargetArchitecture _architecture; public InstructionSetSupportBuilder(TargetArchitecture architecture) { + _supportedInstructionSets = new SortedSet(); + _unsupportedInstructionSets = new SortedSet(); _architecture = architecture; } + public InstructionSetSupportBuilder(InstructionSetSupportBuilder other) + { + _supportedInstructionSets = new SortedSet(other._supportedInstructionSets); + _unsupportedInstructionSets = new SortedSet(other._unsupportedInstructionSets); + _architecture = other._architecture; + } + /// /// Add a supported instruction set to the specified list. /// diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 307f49ca791c51..bf0f438a54485b 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -78,7 +78,12 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru (string specifiedInstructionSet, string impliedInstructionSet) => throw new CommandLineException(string.Format(invalidImplicationMessage, specifiedInstructionSet, impliedInstructionSet))); - InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(targetArchitecture); + // Due to expansion by implication, the optimistic set is most often a pure superset of the supported set + // + // However, there are some gaps in cases like Arm64 neon where none of the optimistic sets imply it. Likewise, + // the optimistic set would be missing the explicitly unsupported sets. So we effectively clone the list and + // tack on the additional optimistic bits after. This ensures the optimistic set remains an accurate superset + InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(instructionSetSupportBuilder); // Optimistically assume some instruction sets are present. if (targetArchitecture == TargetArchitecture.X86 || targetArchitecture == TargetArchitecture.X64) From 92c0307915bc869b1f70d6c9e5c3b0e092496976 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 19 May 2023 14:08:06 -0700 Subject: [PATCH 12/19] Make VectorT128/256/512 proper instruction sets and only allow one to be active at a time --- src/coreclr/inc/corinfoinstructionset.h | 31 ++++--- src/coreclr/inc/readytoruninstructionset.h | 3 + src/coreclr/jit/compiler.h | 4 + .../Common/Compiler/InstructionSetSupport.cs | 33 ++++--- .../Runtime/ReadyToRunInstructionSet.cs | 3 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 14 +-- .../JitInterface/CorInfoInstructionSet.cs | 90 +++++++++++-------- .../ThunkGenerator/InstructionSetDesc.txt | 19 ++-- src/coreclr/vm/codeman.cpp | 8 +- 9 files changed, 117 insertions(+), 88 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 4d08b27a7f74d3..7a628c98344af9 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -567,12 +567,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Vector256); if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); - if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) - resultflags.RemoveInstructionSet(InstructionSet_VectorT128); - if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_VectorT512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) @@ -599,6 +593,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT512); #endif // TARGET_AMD64 #ifdef TARGET_X86 if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -637,12 +637,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Vector256); if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); - if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) - resultflags.RemoveInstructionSet(InstructionSet_VectorT128); - if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_VectorT512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) @@ -669,6 +663,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT512); #endif // TARGET_X86 } while (!oldflags.Equals(resultflags)); @@ -961,6 +961,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Sha256: return InstructionSet_Sha256; case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics; case READYTORUN_INSTRUCTION_Rcpc: return InstructionSet_Rcpc; + case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -992,6 +993,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL; + case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; + case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; + case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -1023,6 +1027,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL; + case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; + case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; + case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index faf4a5028cc1c9..0a9a78e03f6c15 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -47,6 +47,9 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx512DQ_VL=36, READYTORUN_INSTRUCTION_Avx512Vbmi=37, READYTORUN_INSTRUCTION_Avx512Vbmi_VL=38, + READYTORUN_INSTRUCTION_VectorT128=39, + READYTORUN_INSTRUCTION_VectorT256=40, + READYTORUN_INSTRUCTION_VectorT512=41, }; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1e83ec89e4ba33..1ae3b2ce079ae7 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8650,8 +8650,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX CLANG_FORMAT_COMMENT_ANCHOR; #if defined(TARGET_XARCH) + // TODO-XArch: Add support for 512-bit Vector + assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT512)); + if (compExactlyDependsOn(InstructionSet_VectorT256)) { + assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT128)); return YMM_REGSIZE_BYTES; } else if (compExactlyDependsOn(InstructionSet_VectorT128)) diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index b99a3b2ce42665..46528387fec8ca 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -96,12 +96,16 @@ public SimdVectorLength GetVectorTSimdVector() { if ((_targetArchitecture == TargetArchitecture.X64) || (_targetArchitecture == TargetArchitecture.X86)) { - Debug.Assert(InstructionSet.X64_VectorT512 == InstructionSet.X86_VectorT512); - Debug.Assert(InstructionSet.X64_VectorT256 == InstructionSet.X86_VectorT256); Debug.Assert(InstructionSet.X64_VectorT128 == InstructionSet.X86_VectorT128); + Debug.Assert(InstructionSet.X64_VectorT256 == InstructionSet.X86_VectorT256); + Debug.Assert(InstructionSet.X64_VectorT512 == InstructionSet.X86_VectorT512); + + // TODO-XArch: Add support for 512-bit Vector + Debug.Assert(!IsInstructionSetSupported(InstructionSet.X64_VectorT512)); if (IsInstructionSetSupported(InstructionSet.X64_VectorT256)) { + Debug.Assert(!IsInstructionSetSupported(InstructionSet.X64_VectorT128)); return SimdVectorLength.Vector256Bit; } else if (IsInstructionSetSupported(InstructionSet.X64_VectorT128)) @@ -318,14 +322,18 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, case TargetArchitecture.X64: case TargetArchitecture.X86: { - Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F); - Debug.Assert(InstructionSet.X86_AVX2 == InstructionSet.X64_AVX2); Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2); + Debug.Assert(InstructionSet.X86_AVX2 == InstructionSet.X64_AVX2); + Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F); - Debug.Assert(InstructionSet.X86_VectorT512 == InstructionSet.X64_VectorT512); - Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256); Debug.Assert(InstructionSet.X86_VectorT128 == InstructionSet.X64_VectorT128); + Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256); + Debug.Assert(InstructionSet.X86_VectorT512 == InstructionSet.X64_VectorT512); + + // We only want one size supported for Vector and we want the other sizes explicitly + // unsupported to ensure we throw away the given methods if runtime picks a larger size + Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.X86_SSE2)); Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128)); supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); @@ -333,16 +341,14 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, { if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)) { + supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128); supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256); - } - if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX512F)) - { - if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 512)) - { - supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512); - } + unsupportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); + unsupportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512); } + + // TODO-XArch: Add support for 512-bit Vector } break; } @@ -356,7 +362,6 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, } } - return true; } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 0068f0be007764..32b60ecbcda7da 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -50,6 +50,9 @@ public enum ReadyToRunInstructionSet Avx512DQ_VL=36, Avx512Vbmi=37, Avx512Vbmi_VL=38, + VectorT128=39, + VectorT256=40, + VectorT512=41, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 19898dfc4dcd8d..f593808be32989 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -44,7 +44,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.ARM64_Vector128: return null; case InstructionSet.ARM64_Dczva: return null; case InstructionSet.ARM64_Rcpc: return ReadyToRunInstructionSet.Rcpc; - case InstructionSet.ARM64_VectorT128: return null; + case InstructionSet.ARM64_VectorT128: return ReadyToRunInstructionSet.VectorT128; default: throw new Exception("Unknown instruction set"); } @@ -115,9 +115,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi; case InstructionSet.X64_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL; case InstructionSet.X64_AVX512VBMI_VL_X64: return ReadyToRunInstructionSet.Avx512Vbmi_VL; - case InstructionSet.X64_VectorT128: return null; - case InstructionSet.X64_VectorT256: return null; - case InstructionSet.X64_VectorT512: return null; + case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; + case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; + case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; default: throw new Exception("Unknown instruction set"); } @@ -188,9 +188,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_AVX512VBMI_X64: return null; case InstructionSet.X86_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL; case InstructionSet.X86_AVX512VBMI_VL_X64: return null; - case InstructionSet.X86_VectorT128: return null; - case InstructionSet.X86_VectorT256: return null; - case InstructionSet.X86_VectorT512: return null; + case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; + case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; + case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 8a5948c69d6507..386249deb8270e 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -458,7 +458,6 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS { case InstructionSet.ARM64_Vector64: return InstructionSet.ARM64_AdvSimd; case InstructionSet.ARM64_Vector128: return InstructionSet.ARM64_AdvSimd; - case InstructionSet.ARM64_VectorT128: return InstructionSet.ARM64_AdvSimd; } break; case TargetArchitecture.X64: @@ -467,9 +466,6 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS case InstructionSet.X64_Vector128: return InstructionSet.X64_SSE; case InstructionSet.X64_Vector256: return InstructionSet.X64_AVX; case InstructionSet.X64_Vector512: return InstructionSet.X64_AVX512F; - case InstructionSet.X64_VectorT128: return InstructionSet.X64_SSE2; - case InstructionSet.X64_VectorT256: return InstructionSet.X64_AVX2; - case InstructionSet.X64_VectorT512: return InstructionSet.X64_AVX512F; } break; case TargetArchitecture.X86: @@ -478,9 +474,6 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS case InstructionSet.X86_Vector128: return InstructionSet.X86_SSE; case InstructionSet.X86_Vector256: return InstructionSet.X86_AVX; case InstructionSet.X86_Vector512: return InstructionSet.X86_AVX512F; - case InstructionSet.X86_VectorT128: return InstructionSet.X86_SSE2; - case InstructionSet.X86_VectorT256: return InstructionSet.X86_AVX2; - case InstructionSet.X86_VectorT512: return InstructionSet.X86_AVX512F; } break; } @@ -705,12 +698,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); - if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE)) @@ -737,6 +724,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); break; case TargetArchitecture.X86: @@ -776,12 +769,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE2); - if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_MOVBE)) @@ -808,6 +795,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); break; } } while (!oldflags.Equals(resultflags)); @@ -964,12 +957,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_Vector256); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_Vector512); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) - resultflags.AddInstructionSet(InstructionSet.X64_VectorT128); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X64_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_VectorT512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) @@ -996,6 +983,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X64_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_VectorT512); break; case TargetArchitecture.X86: @@ -1035,12 +1028,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_Vector256); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_Vector512); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) - resultflags.AddInstructionSet(InstructionSet.X86_VectorT128); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X86_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_VectorT512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) @@ -1067,6 +1054,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X86_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_VectorT512); break; } } while (!oldflags.Equals(resultflags)); @@ -1137,7 +1130,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false); yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false); yield return new InstructionSetInfo("rcpc", "", InstructionSet.ARM64_Rcpc, true); - yield return new InstructionSetInfo("VectorT128", "", InstructionSet.ARM64_VectorT128, false); + yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.ARM64_VectorT128, true); break; case TargetArchitecture.X64: @@ -1173,9 +1166,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512DQ_VL, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true); yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI_VL, true); - yield return new InstructionSetInfo("VectorT128", "", InstructionSet.X64_VectorT128, false); - yield return new InstructionSetInfo("VectorT256", "", InstructionSet.X64_VectorT256, false); - yield return new InstructionSetInfo("VectorT512", "", InstructionSet.X64_VectorT512, false); + yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true); + yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true); + yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true); break; case TargetArchitecture.X86: @@ -1211,9 +1204,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512DQ_VL, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true); yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI_VL, true); - yield return new InstructionSetInfo("VectorT128", "", InstructionSet.X86_VectorT128, false); - yield return new InstructionSetInfo("VectorT256", "", InstructionSet.X86_VectorT256, false); - yield return new InstructionSetInfo("VectorT512", "", InstructionSet.X86_VectorT512, false); + yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true); + yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true); + yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true); break; } } @@ -1488,6 +1481,9 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.ARM64_Sha256; } + case "VectorT128": + { return InstructionSet.ARM64_VectorT128; } + } break; @@ -1654,6 +1650,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_AVX512VBMI; } + case "VectorT128": + { return InstructionSet.X64_VectorT128; } + + case "VectorT256": + { return InstructionSet.X64_VectorT256; } + + case "VectorT512": + { return InstructionSet.X64_VectorT512; } + } break; @@ -1748,6 +1753,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X86_AVX512VBMI; } + case "VectorT128": + { return InstructionSet.X86_VectorT128; } + + case "VectorT256": + { return InstructionSet.X86_VectorT256; } + + case "VectorT512": + { return InstructionSet.X86_VectorT512; } + } break; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 560b7b09f394a6..7d371ba7d5bce9 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -57,9 +57,9 @@ instructionset ,X86 ,Avx512DQ , ,35 ,AVX512DQ instructionset ,X86 ,Avx512DQ_VL , ,36 ,AVX512DQ_VL ,avx512dq_vl instructionset ,X86 ,Avx512Vbmi , ,37 ,AVX512VBMI ,avx512vbmi instructionset ,X86 ,Avx512Vbmi_VL , ,38 ,AVX512VBMI_VL ,avx512vbmi_vl -instructionset ,X86 , , , ,VectorT128 , -instructionset ,X86 , , , ,VectorT256 , -instructionset ,X86 , , , ,VectorT512 , +instructionset ,X86 ,VectorT128 , ,39 ,VectorT128 ,vectort128 +instructionset ,X86 ,VectorT256 , ,40 ,VectorT256 ,vectort256 +instructionset ,X86 ,VectorT512 , ,41 ,VectorT512 ,vectort512 instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -94,9 +94,6 @@ instructionset64bit,X86 ,AVX512VBMI_VL vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 vectorinstructionset,X86 ,Vector512 -vectorinstructionset,X86 ,VectorT128 -vectorinstructionset,X86 ,VectorT256 -vectorinstructionset,X86 ,VectorT512 implication ,X86 ,SSE ,X86Base implication ,X86 ,SSE2 ,SSE @@ -116,9 +113,6 @@ implication ,X86 ,POPCNT ,SSE42 implication ,X86 ,Vector128 ,SSE implication ,X86 ,Vector256 ,AVX implication ,X86 ,Vector512 ,AVX512F -implication ,X86 ,VectorT128 ,SSE2 -implication ,X86 ,VectorT256 ,AVX2 -implication ,X86 ,VectorT512 ,AVX512F implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,MOVBE ,SSE42 implication ,X86 ,X86Serialize ,X86Base @@ -132,6 +126,9 @@ implication ,X86 ,AVX512DQ ,AVX512F implication ,X86 ,AVX512DQ_VL ,AVX512F_VL implication ,X86 ,AVX512VBMI ,AVX512BW implication ,X86 ,AVX512VBMI_VL ,AVX512BW_VL +implication ,X86 ,VectorT128 ,SSE2 +implication ,X86 ,VectorT256 ,AVX2 +implication ,X86 ,VectorT512 ,AVX512F ; Definition of X64 instruction sets definearch ,X64 ,64Bit ,X64, X64 @@ -154,7 +151,7 @@ instructionset ,ARM64 , , , ,Vector64 instructionset ,ARM64 , , , ,Vector128 , instructionset ,ARM64 , , , ,Dczva , instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc -instructionset ,ARM64 , , , ,VectorT128 , +instructionset ,ARM64 ,VectorT128 , ,39 ,VectorT128 ,vectort128 instructionset64bit,ARM64 ,ArmBase instructionset64bit,ARM64 ,AdvSimd @@ -167,7 +164,6 @@ instructionset64bit,ARM64 ,Sha256 vectorinstructionset,ARM64,Vector64 vectorinstructionset,ARM64,Vector128 -vectorinstructionset,ARM64,VectorT128 implication ,ARM64 ,AdvSimd ,ArmBase implication ,ARM64 ,Aes ,ArmBase @@ -180,7 +176,6 @@ implication ,ARM64 ,Vector64 ,AdvSimd implication ,ARM64 ,Vector128 ,AdvSimd implication ,ARM64 ,VectorT128 ,AdvSimd - ; ,name and aliases ,archs ,lower baselines included by implication ; instructionsetgroup ,x86-x64 ,X64 X86 ,sse2 diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index afb7ab5b7748ea..9261a44c5ec55e 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1542,6 +1542,7 @@ void EEJitManager::SetCpuInfo() if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)) { // We allow 256-bit Vector by default + CPUCompileFlags.Clear(InstructionSet_VectorT128); CPUCompileFlags.Set(InstructionSet_VectorT256); } @@ -1551,11 +1552,8 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Set(InstructionSet_AVX512F); - if (maxVectorTBitWidth >= 512) - { - // We require opt-in for 512-bit Vector - CPUCompileFlags.Set(InstructionSet_VectorT512); - } + // TODO-XArch: Add support for 512-bit Vector + assert(!CPUCompileFlags.IsSet(InstructionSet_VectorT512)); bool isAVX512_VLSupported = false; if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL From 9eeefd7270b3d3f355542c0645374a60ed54d185 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 20 May 2023 10:01:34 -0700 Subject: [PATCH 13/19] Don't allow avxvnni to be "optimistic" since that brings in avx2 --- src/coreclr/tools/Common/InstructionSetHelpers.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index bf0f438a54485b..29912d775c6f22 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -103,10 +103,13 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru Debug.Assert(InstructionSet.X64_AVX == InstructionSet.X86_AVX); if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX)) { + // TODO: Enable optimistic usage of AVX2 once we validate it doesn't break Vector usage + // optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx2"); + // optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("fma"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni"); } } else if (targetArchitecture == TargetArchitecture.ARM64) From 079e9b0bfab17b6a5b8f95fe2cf6138d489f7be7 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 21 May 2023 07:10:07 -0700 Subject: [PATCH 14/19] Ensure we handle HWIntrinsics being disabled --- src/coreclr/jit/compiler.h | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6b7d47e2072416..a8e4f3191b68ee 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8696,25 +8696,33 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX uint32_t getMaxVectorByteLength() const { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) - { - return ZMM_REGSIZE_BYTES; - } - else - { - return YMM_REGSIZE_BYTES; - } + return ZMM_REGSIZE_BYTES; } - else + else if (compOpportunisticallyDependsOn(InstructionSet_AVX)) + { + return YMM_REGSIZE_BYTES; + } + else if (compOpportunisticallyDependsOn(InstructionSet_SSE)) { - assert(compIsaSupportedDebugOnly(InstructionSet_SSE)); return XMM_REGSIZE_BYTES; } + else + { + assert((JitConfig.EnableHWIntrinsic() == 0) || (JitConfig.EnableSSE() == 0)); + return 0; + } #elif defined(TARGET_ARM64) - assert(compIsaSupportedDebugOnly(InstructionSet_AdvSimd)); - return FP_REGSIZE_BYTES; + if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) + { + return FP_REGSIZE_BYTES; + } + else + { + assert((JitConfig.EnableHWIntrinsic() == 0) || (JitConfig.EnableArm64AdvSimd() == 0)); + return 0; + } #else assert(!"getMaxVectorByteLength() unimplemented on target arch"); unreached(); From 76c33aa26ae403b206e1c9fddfe9d22116c1636f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 21 May 2023 11:39:50 -0700 Subject: [PATCH 15/19] Ensure that the Vector size ISAs are covered by FromInstructionSet --- .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 87 +++++++++++++++---- 1 file changed, 68 insertions(+), 19 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index 1411ca7b6c7941..9de888f03a0802 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -102,15 +102,18 @@ private static class XArchIntrinsicConstants public const int AvxVnni = 0x2000; public const int Movbe = 0x4000; public const int Avx512f = 0x8000; - public const int Avx512f_vl = 0x10000; - public const int Avx512bw = 0x20000; - public const int Avx512bw_vl = 0x40000; - public const int Avx512cd = 0x80000; - public const int Avx512cd_vl = 0x100000; - public const int Avx512dq = 0x200000; - public const int Avx512dq_vl = 0x400000; - public const int Avx512Vbmi = 0x800000; - public const int Avx512Vbmi_vl = 0x1000000; + public const int Avx512f_vl = 0x0001_0000; + public const int Avx512bw = 0x0002_0000; + public const int Avx512bw_vl = 0x0004_0000; + public const int Avx512cd = 0x0008_0000; + public const int Avx512cd_vl = 0x0010_0000; + public const int Avx512dq = 0x0020_0000; + public const int Avx512dq_vl = 0x0040_0000; + public const int Avx512Vbmi = 0x0080_0000; + public const int Avx512Vbmi_vl = 0x0100_0000; + public const int VectorT128 = 0x0200_0000; + public const int VectorT256 = 0x0400_0000; + public const int VectorT512 = 0x0800_0000; public static int FromInstructionSet(InstructionSet instructionSet) { @@ -120,65 +123,96 @@ public static int FromInstructionSet(InstructionSet instructionSet) return instructionSet switch { + // Baseline ISAs - they're always available + InstructionSet.X64_SSE => 0, + InstructionSet.X64_SSE_X64 => 0, + + InstructionSet.X64_SSE2 => 0, + InstructionSet.X64_SSE2_X64 => 0, + + InstructionSet.X64_X86Base => 0, + InstructionSet.X64_X86Base_X64 => 0, + + // Optional ISAs - only available via opt-in or opportunistic light-up InstructionSet.X64_AES => Aes, InstructionSet.X64_AES_X64 => Aes, + InstructionSet.X64_PCLMULQDQ => Pclmulqdq, InstructionSet.X64_PCLMULQDQ_X64 => Pclmulqdq, + InstructionSet.X64_SSE3 => Sse3, InstructionSet.X64_SSE3_X64 => Sse3, + InstructionSet.X64_SSSE3 => Ssse3, InstructionSet.X64_SSSE3_X64 => Ssse3, + InstructionSet.X64_SSE41 => Sse41, InstructionSet.X64_SSE41_X64 => Sse41, + InstructionSet.X64_SSE42 => Sse42, InstructionSet.X64_SSE42_X64 => Sse42, + InstructionSet.X64_POPCNT => Popcnt, InstructionSet.X64_POPCNT_X64 => Popcnt, + InstructionSet.X64_AVX => Avx, InstructionSet.X64_AVX_X64 => Avx, + InstructionSet.X64_FMA => Fma, InstructionSet.X64_FMA_X64 => Fma, + InstructionSet.X64_AVX2 => Avx2, InstructionSet.X64_AVX2_X64 => Avx2, + InstructionSet.X64_BMI1 => Bmi1, InstructionSet.X64_BMI1_X64 => Bmi1, + InstructionSet.X64_BMI2 => Bmi2, InstructionSet.X64_BMI2_X64 => Bmi2, + InstructionSet.X64_LZCNT => Lzcnt, InstructionSet.X64_LZCNT_X64 => Lzcnt, + InstructionSet.X64_AVXVNNI => AvxVnni, InstructionSet.X64_AVXVNNI_X64 => AvxVnni, + InstructionSet.X64_MOVBE => Movbe, InstructionSet.X64_MOVBE_X64 => Movbe, + InstructionSet.X64_AVX512F => Avx512f, InstructionSet.X64_AVX512F_X64 => Avx512f, + InstructionSet.X64_AVX512F_VL => Avx512f_vl, InstructionSet.X64_AVX512F_VL_X64 => Avx512f_vl, + InstructionSet.X64_AVX512BW => Avx512bw, InstructionSet.X64_AVX512BW_X64 => Avx512bw, + InstructionSet.X64_AVX512BW_VL => Avx512bw_vl, InstructionSet.X64_AVX512BW_VL_X64 => Avx512bw_vl, + InstructionSet.X64_AVX512CD => Avx512cd, InstructionSet.X64_AVX512CD_X64 => Avx512cd, + InstructionSet.X64_AVX512CD_VL => Avx512cd_vl, InstructionSet.X64_AVX512CD_VL_X64 => Avx512cd_vl, + InstructionSet.X64_AVX512DQ => Avx512dq, InstructionSet.X64_AVX512DQ_X64 => Avx512dq, + InstructionSet.X64_AVX512DQ_VL => Avx512dq_vl, InstructionSet.X64_AVX512DQ_VL_X64 => Avx512dq_vl, + InstructionSet.X64_AVX512VBMI => Avx512Vbmi, InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi, + InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi_vl, InstructionSet.X64_AVX512VBMI_VL_X64 => Avx512Vbmi_vl, - // SSE and SSE2 are baseline ISAs - they're always available - InstructionSet.X64_SSE => 0, - InstructionSet.X64_SSE_X64 => 0, - InstructionSet.X64_SSE2 => 0, - InstructionSet.X64_SSE2_X64 => 0, - - InstructionSet.X64_X86Base => 0, - InstructionSet.X64_X86Base_X64 => 0, + // Vector Sizes + InstructionSet.X64_VectorT128 => VectorT128, + InstructionSet.X64_VectorT256 => VectorT256, + InstructionSet.X64_VectorT512 => VectorT512, _ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString()) }; @@ -196,30 +230,45 @@ private static class Arm64IntrinsicConstants public const int Sha256 = 0x0040; public const int Atomics = 0x0080; public const int Rcpc = 0x0100; + public const int VectorT128 = 0x0200; public static int FromInstructionSet(InstructionSet instructionSet) { return instructionSet switch { + + // Baseline ISAs - they're always available + InstructionSet.ARM64_ArmBase => 0, + InstructionSet.ARM64_ArmBase_Arm64 => 0, + InstructionSet.ARM64_AdvSimd => AdvSimd, InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd, + + // Optional ISAs - only available via opt-in or opportunistic light-up InstructionSet.ARM64_Aes => Aes, InstructionSet.ARM64_Aes_Arm64 => Aes, + InstructionSet.ARM64_Crc32 => Crc32, InstructionSet.ARM64_Crc32_Arm64 => Crc32, + InstructionSet.ARM64_Dp => Dp, InstructionSet.ARM64_Dp_Arm64 => Dp, + InstructionSet.ARM64_Rdm => Rdm, InstructionSet.ARM64_Rdm_Arm64 => Rdm, + InstructionSet.ARM64_Sha1 => Sha1, InstructionSet.ARM64_Sha1_Arm64 => Sha1, + InstructionSet.ARM64_Sha256 => Sha256, InstructionSet.ARM64_Sha256_Arm64 => Sha256, + InstructionSet.ARM64_Atomics => Atomics, + InstructionSet.ARM64_Rcpc => Rcpc, - InstructionSet.ARM64_ArmBase => 0, - InstructionSet.ARM64_ArmBase_Arm64 => 0, + // Vector Sizes + InstructionSet.ARM64_VectorT128 => VectorT128, _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString()) }; From 3b84fb001bbf755ad9db4319ab36707e8711ebf6 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 4 Jun 2023 08:42:54 -0700 Subject: [PATCH 16/19] Ensure that `getMaxVectorByteLength` being 0 is handled --- src/coreclr/jit/lclvars.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index ff91e429c321eb..80d9493daca427 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1757,7 +1757,8 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE #if defined(FEATURE_SIMD) // getMaxVectorByteLength() represents the size of the largest primitive type that we can struct promote. - const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * compiler->getMaxVectorByteLength(); + const unsigned maxSize = + MAX_NumOfFieldsInPromotableStruct * max(compiler->getMaxVectorByteLength(), sizeof(double)); #else // !FEATURE_SIMD // sizeof(double) represents the size of the largest primitive type that we can struct promote. const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * sizeof(double); From 69e496a4d6ee6231b723279c7c4b40885e4146fa Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 4 Jun 2023 09:28:03 -0700 Subject: [PATCH 17/19] Ensure NAOT startup can correctly check for the VectorT size bits --- src/coreclr/nativeaot/Runtime/IntrinsicConstants.h | 4 ++++ src/coreclr/nativeaot/Runtime/startup.cpp | 4 ++++ src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp | 4 ++-- src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h index ad7d2e11ee69ac..41ec8dec9c3d02 100644 --- a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h +++ b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h @@ -35,6 +35,9 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Avx512Vbmi = 0x800000, XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000, XArchIntrinsicConstants_Serialize = 0x2000000, + XArchIntrinsicConstants_VectorT128 = 0x4000000, + XArchIntrinsicConstants_VectorT256 = 0x8000000, + XArchIntrinsicConstants_VectorT512 = 0x10000000, }; #endif //HOST_X86 || HOST_AMD64 @@ -50,6 +53,7 @@ enum ARM64IntrinsicConstants ARM64IntrinsicConstants_Sha256 = 0x0040, ARM64IntrinsicConstants_Atomics = 0x0080, ARM64IntrinsicConstants_Rcpc = 0x0100, + ARM64IntrinsicConstants_VectorT128 = 0x0200, }; // Bit position for the ARM64IntrinsicConstants_Atomics flags, to be used with tbz / tbnz instructions diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index 7ce983600df46d..58740157c49609 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -202,6 +202,8 @@ bool DetectCPUFeatures() if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags) { + g_cpuFeatures |= XArchIntrinsicConstants_VectorT128; + if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI { g_cpuFeatures |= XArchIntrinsicConstants_Aes; @@ -259,12 +261,14 @@ bool DetectCPUFeatures() if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 { g_cpuFeatures |= XArchIntrinsicConstants_Avx2; + g_cpuFeatures |= XArchIntrinsicConstants_VectorT256; if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F { g_cpuFeatures |= XArchIntrinsicConstants_Avx512f; + g_cpuFeatures |= XArchIntrinsicConstants_VectorT512; bool isAVX512_VLSupported = false; if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp index a856be48f4ab8c..69ee3da64a36d1 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp @@ -1446,7 +1446,7 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags) #endif #ifdef HWCAP_ASIMD if (hwCap & HWCAP_ASIMD) - *flags |= ARM64IntrinsicConstants_AdvSimd; + *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; #endif #ifdef HWCAP_ASIMDRDM if (hwCap & HWCAP_ASIMDRDM) @@ -1545,7 +1545,7 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags) // Every ARM64 CPU should support SIMD and FP // If the OS have no function to query for CPU capabilities we set just these - *flags |= ARM64IntrinsicConstants_AdvSimd; + *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; #endif // HAVE_AUXV_HWCAP_H } #endif diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index c7b1f3e313fa39..0c591bd8a89860 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -796,7 +796,7 @@ REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags) #endif // FP and SIMD support are enabled by default - *flags |= ARM64IntrinsicConstants_AdvSimd; + *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { From b0deccd8c82430911d1b688a1857c0529379a877 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 4 Jun 2023 14:37:01 -0700 Subject: [PATCH 18/19] Have BlkOpKindUnroll account for SIMD being disabled --- src/coreclr/jit/codegenxarch.cpp | 8 +++---- src/coreclr/jit/lsraxarch.cpp | 39 ++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 3d6291e7c11c36..6d7a973ad07520 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3401,15 +3401,15 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) assert(srcOffset < (INT32_MAX - static_cast(size))); assert(dstOffset < (INT32_MAX - static_cast(size))); - if (size >= XMM_REGSIZE_BYTES) + // Get the largest SIMD register available if the size is large enough + unsigned regSize = compiler->roundDownSIMDSize(size); + + if ((size >= regSize) && (regSize > 0)) { regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT); instruction simdMov = simdUnalignedMovIns(); - // Get the largest SIMD register available if the size is large enough - unsigned regSize = compiler->roundDownSIMDSize(size); - auto emitSimdMovs = [&]() { if (srcLclNum != BAD_VAR_NUM) { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 765de402b90e23..08ae13c6cff2f3 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1166,7 +1166,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The return value will be on the X87 stack, and we will need to move it. dstCandidates = allRegs(registerType); #else // !TARGET_X86 - dstCandidates = RBM_FLOATRET; + dstCandidates = RBM_FLOATRET; #endif // !TARGET_X86 } else @@ -1378,12 +1378,10 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { case GenTreeBlk::BlkOpKindUnroll: { -#ifdef TARGET_AMD64 - const bool canUse16BytesSimdMov = !blkNode->IsOnHeapAndContainsReferences(); - const bool willUseSimdMov = canUse16BytesSimdMov && (size >= 16); -#else - const bool willUseSimdMov = (size >= 16); -#endif + const bool canUse16BytesSimdMov = + !blkNode->IsOnHeapAndContainsReferences() && compiler->IsBaselineSimdIsaSupported(); + const bool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES); + if (willUseSimdMov) { buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); @@ -1440,8 +1438,26 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) break; case GenTreeBlk::BlkOpKindUnroll: - if ((size % XMM_REGSIZE_BYTES) != 0) + { + unsigned regSize = compiler->roundDownSIMDSize(size); + unsigned remainder = size; + + if ((size >= regSize) && (regSize > 0)) + { + // We need a float temporary if we're doing SIMD operations + + buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); + SetContainsAVXFlags(size); + + remainder %= regSize; + } + + if ((remainder > 0) && ((regSize == 0) || (isPow2(remainder) && (remainder <= REGSIZE_BYTES)))) { + // We need an int temporary if we're not doing SIMD operations + // or if are but the remainder is a power of 2 and less than the + // size of a register + regMaskTP regMask = availableIntRegs; #ifdef TARGET_X86 if ((size & 1) != 0) @@ -1453,13 +1469,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) #endif internalIntDef = buildInternalIntRegisterDefForNode(blkNode, regMask); } - - if (size >= XMM_REGSIZE_BYTES) - { - buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); - SetContainsAVXFlags(size); - } break; + } case GenTreeBlk::BlkOpKindUnrollMemmove: { From b7b26d763e1ae8036b4f31b6bd267a8ff7341b68 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 4 Jun 2023 18:45:34 -0700 Subject: [PATCH 19/19] Ensure InstructionSet_VectorT128 is set in the fallback path for PAL_GetJitCpuCapabilityFlags --- src/coreclr/pal/src/misc/jitsupport.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/pal/src/misc/jitsupport.cpp b/src/coreclr/pal/src/misc/jitsupport.cpp index fbf94ad76b0ac4..30426290043514 100644 --- a/src/coreclr/pal/src/misc/jitsupport.cpp +++ b/src/coreclr/pal/src/misc/jitsupport.cpp @@ -295,6 +295,7 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags) // Set baseline flags if OS has not exposed mechanism for us to determine CPU capabilities flags->Set(InstructionSet_ArmBase); flags->Set(InstructionSet_AdvSimd); + flags->Set(InstructionSet_VectorT128); // flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP); #endif // HAVE_AUXV_HWCAP_H }