From cef20486b047f853faea329923678d4878184605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Mon, 3 Jun 2019 11:33:19 +0200 Subject: [PATCH 1/7] Allow pregenerating all HW intrinsics in CoreLib This is a follow up to #24689 that lets us pregenerate all hardware intrinsics in CoreLib. We ensures the potentially unsupported code will never be reachable at runtime on CPUs that don't support it by not reporting the `IsSupported` property as intrinsic in crossgen. This ensures the support checks are always JITted. JITting the support checks is very cheap. There is cost in the form of an extra call and failure to do constant propagation of the return value, but the cost is negligible in practice and gets eliminated once the tiered JIT tiers the method up. We only do this in CoreLib because user code could technically not guard intrinsic use in `IsSupported` checks and pregenerating the code could lead to illegal instruction traps at runtime (instead of `PlatformNotSupportedException` throws) - it's a bad user experience. --- src/vm/methodtablebuilder.cpp | 6 ++--- src/zap/zapinfo.cpp | 49 +++++++++++++++++++++++++++++++++-- src/zap/zapper.cpp | 25 ++++++++++++++++-- 3 files changed, 72 insertions(+), 8 deletions(-) diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp index 31c4b0a5ee8e..61fd1deba1aa 100644 --- a/src/vm/methodtablebuilder.cpp +++ b/src/vm/methodtablebuilder.cpp @@ -1520,15 +1520,13 @@ MethodTableBuilder::BuildMethodTableThrowing( { #if defined(CROSSGEN_COMPILE) #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) - if ((!IsNgenPDBCompilationProcess() + if (!IsNgenPDBCompilationProcess() && GetAppDomain()->ToCompilationDomain()->GetTargetModule() != g_pObjectClass->GetModule()) - || (strcmp(className, "Sse") != 0 && strcmp(className, "Sse2") != 0)) #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) { // Disable AOT compiling for managed implementation of hardware intrinsics. // We specially treat them here to ensure correct ISA features are set during compilation - // The only exception to this rule are SSE and SSE2 intrinsics in CoreLib - we can - // safely expand those because we require them to be always available. + // The only exception to this rule are intrinsics in CoreLib. COMPlusThrow(kTypeLoadException, IDS_EE_HWINTRINSIC_NGEN_DISALLOWED); } #endif // defined(CROSSGEN_COMPILE) diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index 23fb3362c864..2edc73325643 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -441,7 +441,7 @@ void ZapInfo::CompileMethod() // this they can add the hint and reduce the perf cost at runtime. m_pImage->m_pPreloader->PrePrepareMethodIfNecessary(m_currentMethodHandle); - DWORD methodAttribs = getMethodAttribs(m_currentMethodHandle); + DWORD methodAttribs = m_pEEJitInfo->getMethodAttribs(m_currentMethodHandle); if (methodAttribs & CORINFO_FLG_AGGRESSIVE_OPT) { // Skip methods marked with MethodImplOptions.AggressiveOptimization, they will be jitted instead. In the future, @@ -450,6 +450,24 @@ void ZapInfo::CompileMethod() return; } +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + if (methodAttribs & CORINFO_FLG_JIT_INTRINSIC) + { + // Skip generating hardware intrinsic method bodies. + // + // The actual method bodies are only reachable via reflection/delegates, but we don't know what the + // implementation should do (whether it can do the actual intrinsic thing, or whether it should throw + // a PlatformNotSupportedException). + + const char* namespaceName; + getMethodNameFromMetadata(m_currentMethodHandle, nullptr, &namespaceName, nullptr); + if (strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0) + { + return; + } + } +#endif + m_jitFlags = ComputeJitFlags(m_currentMethodHandle); #ifdef FEATURE_READYTORUN_COMPILER @@ -2086,6 +2104,30 @@ void ZapInfo::GetProfilingHandle(BOOL *pbHookFunction, *pbIndirectedHandles = TRUE; } +DWORD FilterMethodAttribsForIsSupported(DWORD attribs, CORINFO_METHOD_HANDLE ftn, ICorDynamicInfo* pJitInfo) +{ +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + if (attribs & CORINFO_FLG_JIT_INTRINSIC) + { + // Do not report the get_IsSupported method as an intrinsic. This will turn the call into a regular + // call. We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen + // (see ZapInfo::CompileMethod) but get JITted instead. The JITted method will have the correct + // answer for the CPU the code is running on. + + const char* namespaceName; + const char* methodName = pJitInfo->getMethodNameFromMetadata(ftn, nullptr, &namespaceName, nullptr); + + if (strcmp(methodName, "get_IsSupported") == 0 + && strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0) + { + attribs &= ~CORINFO_FLG_JIT_INTRINSIC; + } + } +#endif + + return attribs; +} + //return a callable stub that will do the virtual or interface call @@ -2111,6 +2153,8 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, (CORINFO_CALLINFO_FLAGS)(flags | CORINFO_CALLINFO_KINDONLY), pResult); + pResult->methodFlags = FilterMethodAttribsForIsSupported(pResult->methodFlags, pResult->hMethod, m_pEEJitInfo); + #ifdef FEATURE_READYTORUN_COMPILER if (IsReadyToRunCompilation()) { @@ -3681,7 +3725,8 @@ unsigned ZapInfo::getMethodHash(CORINFO_METHOD_HANDLE ftn) DWORD ZapInfo::getMethodAttribs(CORINFO_METHOD_HANDLE ftn) { - return m_pEEJitInfo->getMethodAttribs(ftn); + DWORD result = m_pEEJitInfo->getMethodAttribs(ftn); + return FilterMethodAttribsForIsSupported(result, ftn, m_pEEJitInfo); } void ZapInfo::setMethodAttribs(CORINFO_METHOD_HANDLE ftn, CorInfoMethodRuntimeFlags attribs) diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp index 5e5d190346ee..22102faac0d2 100644 --- a/src/zap/zapper.cpp +++ b/src/zap/zapper.cpp @@ -1189,11 +1189,32 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo) #endif // _TARGET_X86_ #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) - // If we're compiling CoreLib, allow RyuJIT to generate SIMD code so that we can expand some - // of the hardware intrinsics. + // If we're crossgenning CoreLib, allow generating all intrinsics. The generated code might + // not actually be supported by the processor at runtime so we compensate for it by + // not letting the get_IsSupported method to be intrinsically expanded in crossgen + // (see special handling around CORINFO_FLG_JIT_INTRINSIC in ZapInfo). + // That way the actual support checks will always be jitted. + // We only do this for CoreLib because forgetting to wrap intrinsics under IsSupported + // checks can lead to illegal instruction traps (instead of a nice managed exception). if (m_pEECompileInfo->GetAssemblyModule(m_hAssembly) == m_pEECompileInfo->GetLoaderModuleForMscorlib()) { m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_FEATURE_SIMD); + +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AES); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_PCLMULQDQ); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE3); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSSE3); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE41); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE42); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_POPCNT); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_FMA); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_LZCNT); +#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) } #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) From b4b824437ad17885289f6dc5c639f9464d7dbf35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Tue, 4 Jun 2019 17:27:31 +0200 Subject: [PATCH 2/7] Review feedback --- src/vm/methodtablebuilder.cpp | 6 +++-- src/zap/zapinfo.cpp | 44 ++++++++++++++++++++++++++++------- src/zap/zapper.cpp | 6 ++--- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp index 61fd1deba1aa..578df650cf2f 100644 --- a/src/vm/methodtablebuilder.cpp +++ b/src/vm/methodtablebuilder.cpp @@ -1520,13 +1520,15 @@ MethodTableBuilder::BuildMethodTableThrowing( { #if defined(CROSSGEN_COMPILE) #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) - if (!IsNgenPDBCompilationProcess() + if ((!IsNgenPDBCompilationProcess() && GetAppDomain()->ToCompilationDomain()->GetTargetModule() != g_pObjectClass->GetModule()) + || strcmp(className, "Avx") == 0 || strcmp(className, "Avx2") == 0 || strcmp(className, "Fma") == 0) #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) { // Disable AOT compiling for managed implementation of hardware intrinsics. // We specially treat them here to ensure correct ISA features are set during compilation - // The only exception to this rule are intrinsics in CoreLib. + // The only exception to this rule are intrinsics in CoreLib outside those that enable + // VEX encoding. COMPlusThrow(kTypeLoadException, IDS_EE_HWINTRINSIC_NGEN_DISALLOWED); } #endif // defined(CROSSGEN_COMPILE) diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index 2edc73325643..b557f61c3926 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -455,13 +455,14 @@ void ZapInfo::CompileMethod() { // Skip generating hardware intrinsic method bodies. // - // The actual method bodies are only reachable via reflection/delegates, but we don't know what the - // implementation should do (whether it can do the actual intrinsic thing, or whether it should throw - // a PlatformNotSupportedException). + // We don't know what the implementation should do (whether it can do the actual intrinsic thing, or whether + // it should throw a PlatformNotSupportedException). const char* namespaceName; getMethodNameFromMetadata(m_currentMethodHandle, nullptr, &namespaceName, nullptr); - if (strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0) + if (strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0 + || strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 + || strcmp(namespaceName, "System.Runtime.Intrinsics") == 0) { return; } @@ -2106,7 +2107,6 @@ void ZapInfo::GetProfilingHandle(BOOL *pbHookFunction, DWORD FilterMethodAttribsForIsSupported(DWORD attribs, CORINFO_METHOD_HANDLE ftn, ICorDynamicInfo* pJitInfo) { -#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) if (attribs & CORINFO_FLG_JIT_INTRINSIC) { // Do not report the get_IsSupported method as an intrinsic. This will turn the call into a regular @@ -2115,15 +2115,41 @@ DWORD FilterMethodAttribsForIsSupported(DWORD attribs, CORINFO_METHOD_HANDLE ftn // answer for the CPU the code is running on. const char* namespaceName; - const char* methodName = pJitInfo->getMethodNameFromMetadata(ftn, nullptr, &namespaceName, nullptr); + const char* className; + const char* enclosingClassName; + const char* methodName = pJitInfo->getMethodNameFromMetadata(ftn, &className, &namespaceName, &enclosingClassName); + + // If it's not the IsSupported method, we're done + if (strcmp(methodName, "get_IsSupported") != 0) + return attribs; + + bool isX86intrinsic = strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0; - if (strcmp(methodName, "get_IsSupported") == 0 - && strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0) +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + // If it's IsSupported on Sse/Sse2, we can expand unconditionally since this is reliably + // available everywhere. + if (isX86intrinsic + && ( + strcmp(className, "Sse") == 0 || strcmp(className, "Sse2") == 0 + || ( + strcmp(className, "X64") == 0 + && ( + strcmp(enclosingClassName, "Sse") == 0 || strcmp(enclosingClassName, "Sse2") + ) + ) + ) + ) + return attribs; +#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + + // If it's another get_IsSupported method on some other ISA class, do not report as intrinsic + if (isX86intrinsic + || strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 + || strcmp(namespaceName, "System.Runtime.Intrinsics") == 0) { attribs &= ~CORINFO_FLG_JIT_INTRINSIC; } } -#endif return attribs; } diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp index 22102faac0d2..7568e451c518 100644 --- a/src/zap/zapper.cpp +++ b/src/zap/zapper.cpp @@ -1208,9 +1208,9 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo) m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE41); m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE42); m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_POPCNT); - m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX); - m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_FMA); - m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2); + // Leaving out CORJIT_FLAGS::CORJIT_FLAG_USE_AVX, CORJIT_FLAGS::CORJIT_FLAG_USE_FMA + // CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2 on purpose - these enable JIT to use VEX encoding + // and that's not safe. m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1); m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2); m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_LZCNT); From 5c3974f04918d666e81ef364014b4201c0835d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Tue, 4 Jun 2019 18:29:03 +0200 Subject: [PATCH 3/7] I hate C --- src/zap/zapinfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index b557f61c3926..c809ad52d2c1 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -2134,7 +2134,7 @@ DWORD FilterMethodAttribsForIsSupported(DWORD attribs, CORINFO_METHOD_HANDLE ftn || ( strcmp(className, "X64") == 0 && ( - strcmp(enclosingClassName, "Sse") == 0 || strcmp(enclosingClassName, "Sse2") + strcmp(enclosingClassName, "Sse") == 0 || strcmp(enclosingClassName, "Sse2") == 0 ) ) ) From da02afea30dd59ee54f59b387a97b50fac9dc04f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Mon, 10 Jun 2019 13:08:59 +0200 Subject: [PATCH 4/7] Also mark get_IsSupported as noinlining --- src/zap/zapinfo.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index 8e6c957ff2a1..2baee42a57c2 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -2145,6 +2145,10 @@ DWORD FilterMethodAttribsForIsSupported(DWORD attribs, CORINFO_METHOD_HANDLE ftn || strcmp(namespaceName, "System.Runtime.Intrinsics") == 0) { attribs &= ~CORINFO_FLG_JIT_INTRINSIC; + + // The intrinsic just recursively calls itself in IL so it wouldn't inline anyway, + // but let's make that explicit. + attribs |= CORINFO_FLG_DONT_INLINE; } } From 6b5b161733fb19ee92853b724cb10a1de395b12c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Wed, 12 Jun 2019 16:56:09 +0200 Subject: [PATCH 5/7] Review feedback --- src/vm/methodtablebuilder.cpp | 5 +-- src/zap/zapinfo.cpp | 84 +++++++++++++++++++++++++---------- src/zap/zapper.cpp | 9 ++-- 3 files changed, 65 insertions(+), 33 deletions(-) diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp index 578df650cf2f..b4b068bebad4 100644 --- a/src/vm/methodtablebuilder.cpp +++ b/src/vm/methodtablebuilder.cpp @@ -1521,14 +1521,11 @@ MethodTableBuilder::BuildMethodTableThrowing( #if defined(CROSSGEN_COMPILE) #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) if ((!IsNgenPDBCompilationProcess() - && GetAppDomain()->ToCompilationDomain()->GetTargetModule() != g_pObjectClass->GetModule()) - || strcmp(className, "Avx") == 0 || strcmp(className, "Avx2") == 0 || strcmp(className, "Fma") == 0) + && GetAppDomain()->ToCompilationDomain()->GetTargetModule() != g_pObjectClass->GetModule())) #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) { // Disable AOT compiling for managed implementation of hardware intrinsics. // We specially treat them here to ensure correct ISA features are set during compilation - // The only exception to this rule are intrinsics in CoreLib outside those that enable - // VEX encoding. COMPlusThrow(kTypeLoadException, IDS_EE_HWINTRINSIC_NGEN_DISALLOWED); } #endif // defined(CROSSGEN_COMPILE) diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index 2baee42a57c2..671c8572582e 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -461,6 +461,8 @@ void ZapInfo::CompileMethod() || strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 || strcmp(namespaceName, "System.Runtime.Intrinsics") == 0) { + if (m_zapper->m_pOpt->m_verbose) + m_zapper->Info(W("Skipped due to being a hardware intrinsic\n")); return; } } @@ -2102,30 +2104,28 @@ void ZapInfo::GetProfilingHandle(BOOL *pbHookFunction, *pbIndirectedHandles = TRUE; } -DWORD FilterMethodAttribsForIsSupported(DWORD attribs, CORINFO_METHOD_HANDLE ftn, ICorDynamicInfo* pJitInfo) +// +// This strips the CORINFO_FLG_JIT_INTRINSIC flag from some of the hardware intrinsic methods. +// +DWORD FilterHardwareIntrinsicMethodAttribs(DWORD attribs, CORINFO_METHOD_HANDLE ftn, ICorDynamicInfo* pJitInfo) { if (attribs & CORINFO_FLG_JIT_INTRINSIC) { - // Do not report the get_IsSupported method as an intrinsic. This will turn the call into a regular - // call. We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen - // (see ZapInfo::CompileMethod) but get JITted instead. The JITted method will have the correct - // answer for the CPU the code is running on. - + // Figure out which intrinsic we are dealing with. const char* namespaceName; const char* className; const char* enclosingClassName; const char* methodName = pJitInfo->getMethodNameFromMetadata(ftn, &className, &namespaceName, &enclosingClassName); - // If it's not the IsSupported method, we're done - if (strcmp(methodName, "get_IsSupported") != 0) - return attribs; - - bool isX86intrinsic = strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0; + // Is this the get_IsSupported method that checks whether intrinsic is supported? + bool fIsGetIsSupportedMethod = strcmp(methodName, "get_IsSupported") == 0; #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) - // If it's IsSupported on Sse/Sse2, we can expand unconditionally since this is reliably + bool fIsX86intrinsic = strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0; + + // If it's anything related to Sse/Sse2, we can expand unconditionally since this is reliably // available everywhere. - if (isX86intrinsic + if (fIsX86intrinsic && ( strcmp(className, "Sse") == 0 || strcmp(className, "Sse2") == 0 || ( @@ -2136,19 +2136,55 @@ DWORD FilterMethodAttribsForIsSupported(DWORD attribs, CORINFO_METHOD_HANDLE ftn ) ) ) + { return attribs; -#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + } - // If it's another get_IsSupported method on some other ISA class, do not report as intrinsic - if (isX86intrinsic - || strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 - || strcmp(namespaceName, "System.Runtime.Intrinsics") == 0) + // If it's an intrinsic that requires VEX encoding, do not report as intrinsic + // to force this to become a regular method call. + // We don't allow RyuJIT to use VEX encoding at AOT compilation time, so these + // cannot be pregenerated. Not reporting them as intrinsic will make sure + // it will do the right thing at runtime (the called method will be JITted). + // It will be slower, but correct. + if (fIsX86intrinsic + && ( + strcmp(className, "Avx") == 0 || strcmp(className, "Fma") == 0 || strcmp(className, "Avx2") == 0 || strcmp(className, "Bmi1") == 0 || strcmp(className, "Bmi2") == 0 + || ( + strcmp(className, "X64") == 0 + && ( + strcmp(enclosingClassName, "Bmi1") == 0 || strcmp(enclosingClassName, "Bmi2") == 0 + ) + ) + ) + ) { - attribs &= ~CORINFO_FLG_JIT_INTRINSIC; + // We do want the IsSupported for VEX instructions to be recognized as intrinsic so that the + // potentially worse quality code doesn't actually run until tiered JIT starts + // kicking in and recompiling methods. Reporting this as intrinsic makes RyuJIT expand it + // into `return false`. + if (fIsGetIsSupportedMethod) + return attribs; + + // Treat as a regular method call (into a JITted method). + return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE; + } - // The intrinsic just recursively calls itself in IL so it wouldn't inline anyway, - // but let's make that explicit. - attribs |= CORINFO_FLG_DONT_INLINE; +#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + + // Do not report the get_IsSupported method as an intrinsic. This will turn the call into a regular + // call. We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen + // (see ZapInfo::CompileMethod) but get JITted instead. The JITted method will have the correct + // answer for the CPU the code is running on. + if (fIsGetIsSupportedMethod && +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + fIsX86intrinsic || +#elif _TARGET_ARM64_ + strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 || +#endif + strcmp(namespaceName, "System.Runtime.Intrinsics") == 0) + { + // Treat as a regular method call (into a JITted method). + return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE; } } @@ -2180,7 +2216,7 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, (CORINFO_CALLINFO_FLAGS)(flags | CORINFO_CALLINFO_KINDONLY), pResult); - pResult->methodFlags = FilterMethodAttribsForIsSupported(pResult->methodFlags, pResult->hMethod, m_pEEJitInfo); + pResult->methodFlags = FilterHardwareIntrinsicMethodAttribs(pResult->methodFlags, pResult->hMethod, m_pEEJitInfo); #ifdef FEATURE_READYTORUN_COMPILER if (IsReadyToRunCompilation()) @@ -3753,7 +3789,7 @@ unsigned ZapInfo::getMethodHash(CORINFO_METHOD_HANDLE ftn) DWORD ZapInfo::getMethodAttribs(CORINFO_METHOD_HANDLE ftn) { DWORD result = m_pEEJitInfo->getMethodAttribs(ftn); - return FilterMethodAttribsForIsSupported(result, ftn, m_pEEJitInfo); + return FilterHardwareIntrinsicMethodAttribs(result, ftn, m_pEEJitInfo); } void ZapInfo::setMethodAttribs(CORINFO_METHOD_HANDLE ftn, CorInfoMethodRuntimeFlags attribs) diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp index 7568e451c518..e511b5726c11 100644 --- a/src/zap/zapper.cpp +++ b/src/zap/zapper.cpp @@ -1189,7 +1189,7 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo) #endif // _TARGET_X86_ #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) - // If we're crossgenning CoreLib, allow generating all intrinsics. The generated code might + // If we're crossgenning CoreLib, allow generating non-VEX intrinsics. The generated code might // not actually be supported by the processor at runtime so we compensate for it by // not letting the get_IsSupported method to be intrinsically expanded in crossgen // (see special handling around CORINFO_FLG_JIT_INTRINSIC in ZapInfo). @@ -1209,10 +1209,9 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo) m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE42); m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_POPCNT); // Leaving out CORJIT_FLAGS::CORJIT_FLAG_USE_AVX, CORJIT_FLAGS::CORJIT_FLAG_USE_FMA - // CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2 on purpose - these enable JIT to use VEX encoding - // and that's not safe. - m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1); - m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2); + // CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2, CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1, + // CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2 on purpose - these require VEX encodings + // and the JIT doesn't support generating code for methods with mixed encodings. m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_LZCNT); #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) } From db4cb30c7dbe2f28e7a408a93089bfecccd95054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Wed, 12 Jun 2019 17:36:28 +0200 Subject: [PATCH 6/7] oops --- src/zap/zapinfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index 671c8572582e..d63a3680853d 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -2175,13 +2175,13 @@ DWORD FilterHardwareIntrinsicMethodAttribs(DWORD attribs, CORINFO_METHOD_HANDLE // call. We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen // (see ZapInfo::CompileMethod) but get JITted instead. The JITted method will have the correct // answer for the CPU the code is running on. - if (fIsGetIsSupportedMethod && + if (fIsGetIsSupportedMethod && ( #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) fIsX86intrinsic || #elif _TARGET_ARM64_ strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 || #endif - strcmp(namespaceName, "System.Runtime.Intrinsics") == 0) + strcmp(namespaceName, "System.Runtime.Intrinsics") == 0)) { // Treat as a regular method call (into a JITted method). return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE; From d9d0c842157588933ba299104020b48337c52666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Wed, 12 Jun 2019 21:40:46 +0200 Subject: [PATCH 7/7] Update comments --- src/zap/zapinfo.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index d63a3680853d..142956ea8648 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -438,6 +438,8 @@ void ZapInfo::CompileMethod() // this they can add the hint and reduce the perf cost at runtime. m_pImage->m_pPreloader->PrePrepareMethodIfNecessary(m_currentMethodHandle); + // Retrieve method attributes from EEJitInfo - the ZapInfo's version updates + // some of the flags related to hardware intrinsics but we don't want that. DWORD methodAttribs = m_pEEJitInfo->getMethodAttribs(m_currentMethodHandle); if (methodAttribs & CORINFO_FLG_AGGRESSIVE_OPT) { @@ -2123,8 +2125,8 @@ DWORD FilterHardwareIntrinsicMethodAttribs(DWORD attribs, CORINFO_METHOD_HANDLE #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) bool fIsX86intrinsic = strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0; - // If it's anything related to Sse/Sse2, we can expand unconditionally since this is reliably - // available everywhere. + // If it's anything related to Sse/Sse2, we can expand unconditionally since this is a baseline + // requirement of CoreCLR. if (fIsX86intrinsic && ( strcmp(className, "Sse") == 0 || strcmp(className, "Sse2") == 0 @@ -2165,14 +2167,15 @@ DWORD FilterHardwareIntrinsicMethodAttribs(DWORD attribs, CORINFO_METHOD_HANDLE if (fIsGetIsSupportedMethod) return attribs; - // Treat as a regular method call (into a JITted method). + // Treat other intrinsic methods as a regular method call (into a JITted method). return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE; } #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) - // Do not report the get_IsSupported method as an intrinsic. This will turn the call into a regular - // call. We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen + // Do not report the get_IsSupported method as an intrinsic if it's an intrinsic on the architecture + // we are targeting. This will turn the call into a regular call. + // We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen // (see ZapInfo::CompileMethod) but get JITted instead. The JITted method will have the correct // answer for the CPU the code is running on. if (fIsGetIsSupportedMethod && (