diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp index 31c4b0a5ee8e..b4b068bebad4 100644 --- a/src/vm/methodtablebuilder.cpp +++ b/src/vm/methodtablebuilder.cpp @@ -1521,14 +1521,11 @@ MethodTableBuilder::BuildMethodTableThrowing( #if defined(CROSSGEN_COMPILE) #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) if ((!IsNgenPDBCompilationProcess() - && GetAppDomain()->ToCompilationDomain()->GetTargetModule() != g_pObjectClass->GetModule()) - || (strcmp(className, "Sse") != 0 && strcmp(className, "Sse2") != 0)) + && GetAppDomain()->ToCompilationDomain()->GetTargetModule() != g_pObjectClass->GetModule())) #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) { // Disable AOT compiling for managed implementation of hardware intrinsics. // We specially treat them here to ensure correct ISA features are set during compilation - // The only exception to this rule are SSE and SSE2 intrinsics in CoreLib - we can - // safely expand those because we require them to be always available. COMPlusThrow(kTypeLoadException, IDS_EE_HWINTRINSIC_NGEN_DISALLOWED); } #endif // defined(CROSSGEN_COMPILE) diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index f57556b8304d..142956ea8648 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -438,7 +438,9 @@ void ZapInfo::CompileMethod() // this they can add the hint and reduce the perf cost at runtime. m_pImage->m_pPreloader->PrePrepareMethodIfNecessary(m_currentMethodHandle); - DWORD methodAttribs = getMethodAttribs(m_currentMethodHandle); + // Retrieve method attributes from EEJitInfo - the ZapInfo's version updates + // some of the flags related to hardware intrinsics but we don't want that. + DWORD methodAttribs = m_pEEJitInfo->getMethodAttribs(m_currentMethodHandle); if (methodAttribs & CORINFO_FLG_AGGRESSIVE_OPT) { // Skip methods marked with MethodImplOptions.AggressiveOptimization, they will be jitted instead. In the future, @@ -447,6 +449,27 @@ void ZapInfo::CompileMethod() return; } +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + if (methodAttribs & CORINFO_FLG_JIT_INTRINSIC) + { + // Skip generating hardware intrinsic method bodies. + // + // We don't know what the implementation should do (whether it can do the actual intrinsic thing, or whether + // it should throw a PlatformNotSupportedException). + + const char* namespaceName; + getMethodNameFromMetadata(m_currentMethodHandle, nullptr, &namespaceName, nullptr); + if (strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0 + || strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 + || strcmp(namespaceName, "System.Runtime.Intrinsics") == 0) + { + if (m_zapper->m_pOpt->m_verbose) + m_zapper->Info(W("Skipped due to being a hardware intrinsic\n")); + return; + } + } +#endif + m_jitFlags = ComputeJitFlags(m_currentMethodHandle); #ifdef FEATURE_READYTORUN_COMPILER @@ -2083,6 +2106,94 @@ void ZapInfo::GetProfilingHandle(BOOL *pbHookFunction, *pbIndirectedHandles = TRUE; } +// +// This strips the CORINFO_FLG_JIT_INTRINSIC flag from some of the hardware intrinsic methods. +// +DWORD FilterHardwareIntrinsicMethodAttribs(DWORD attribs, CORINFO_METHOD_HANDLE ftn, ICorDynamicInfo* pJitInfo) +{ + if (attribs & CORINFO_FLG_JIT_INTRINSIC) + { + // Figure out which intrinsic we are dealing with. + const char* namespaceName; + const char* className; + const char* enclosingClassName; + const char* methodName = pJitInfo->getMethodNameFromMetadata(ftn, &className, &namespaceName, &enclosingClassName); + + // Is this the get_IsSupported method that checks whether intrinsic is supported? + bool fIsGetIsSupportedMethod = strcmp(methodName, "get_IsSupported") == 0; + +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + bool fIsX86intrinsic = strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0; + + // If it's anything related to Sse/Sse2, we can expand unconditionally since this is a baseline + // requirement of CoreCLR. + if (fIsX86intrinsic + && ( + strcmp(className, "Sse") == 0 || strcmp(className, "Sse2") == 0 + || ( + strcmp(className, "X64") == 0 + && ( + strcmp(enclosingClassName, "Sse") == 0 || strcmp(enclosingClassName, "Sse2") == 0 + ) + ) + ) + ) + { + return attribs; + } + + // If it's an intrinsic that requires VEX encoding, do not report as intrinsic + // to force this to become a regular method call. + // We don't allow RyuJIT to use VEX encoding at AOT compilation time, so these + // cannot be pregenerated. Not reporting them as intrinsic will make sure + // it will do the right thing at runtime (the called method will be JITted). + // It will be slower, but correct. + if (fIsX86intrinsic + && ( + strcmp(className, "Avx") == 0 || strcmp(className, "Fma") == 0 || strcmp(className, "Avx2") == 0 || strcmp(className, "Bmi1") == 0 || strcmp(className, "Bmi2") == 0 + || ( + strcmp(className, "X64") == 0 + && ( + strcmp(enclosingClassName, "Bmi1") == 0 || strcmp(enclosingClassName, "Bmi2") == 0 + ) + ) + ) + ) + { + // We do want the IsSupported for VEX instructions to be recognized as intrinsic so that the + // potentially worse quality code doesn't actually run until tiered JIT starts + // kicking in and recompiling methods. Reporting this as intrinsic makes RyuJIT expand it + // into `return false`. + if (fIsGetIsSupportedMethod) + return attribs; + + // Treat other intrinsic methods as a regular method call (into a JITted method). + return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE; + } + +#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + + // Do not report the get_IsSupported method as an intrinsic if it's an intrinsic on the architecture + // we are targeting. This will turn the call into a regular call. + // We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen + // (see ZapInfo::CompileMethod) but get JITted instead. The JITted method will have the correct + // answer for the CPU the code is running on. + if (fIsGetIsSupportedMethod && ( +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + fIsX86intrinsic || +#elif _TARGET_ARM64_ + strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 || +#endif + strcmp(namespaceName, "System.Runtime.Intrinsics") == 0)) + { + // Treat as a regular method call (into a JITted method). + return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE; + } + } + + return attribs; +} + //return a callable stub that will do the virtual or interface call @@ -2108,6 +2219,8 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, (CORINFO_CALLINFO_FLAGS)(flags | CORINFO_CALLINFO_KINDONLY), pResult); + pResult->methodFlags = FilterHardwareIntrinsicMethodAttribs(pResult->methodFlags, pResult->hMethod, m_pEEJitInfo); + #ifdef FEATURE_READYTORUN_COMPILER if (IsReadyToRunCompilation()) { @@ -3678,7 +3791,8 @@ unsigned ZapInfo::getMethodHash(CORINFO_METHOD_HANDLE ftn) DWORD ZapInfo::getMethodAttribs(CORINFO_METHOD_HANDLE ftn) { - return m_pEEJitInfo->getMethodAttribs(ftn); + DWORD result = m_pEEJitInfo->getMethodAttribs(ftn); + return FilterHardwareIntrinsicMethodAttribs(result, ftn, m_pEEJitInfo); } void ZapInfo::setMethodAttribs(CORINFO_METHOD_HANDLE ftn, CorInfoMethodRuntimeFlags attribs) diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp index 5e5d190346ee..e511b5726c11 100644 --- a/src/zap/zapper.cpp +++ b/src/zap/zapper.cpp @@ -1189,11 +1189,31 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo) #endif // _TARGET_X86_ #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) - // If we're compiling CoreLib, allow RyuJIT to generate SIMD code so that we can expand some - // of the hardware intrinsics. + // If we're crossgenning CoreLib, allow generating non-VEX intrinsics. The generated code might + // not actually be supported by the processor at runtime so we compensate for it by + // not letting the get_IsSupported method to be intrinsically expanded in crossgen + // (see special handling around CORINFO_FLG_JIT_INTRINSIC in ZapInfo). + // That way the actual support checks will always be jitted. + // We only do this for CoreLib because forgetting to wrap intrinsics under IsSupported + // checks can lead to illegal instruction traps (instead of a nice managed exception). if (m_pEECompileInfo->GetAssemblyModule(m_hAssembly) == m_pEECompileInfo->GetLoaderModuleForMscorlib()) { m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_FEATURE_SIMD); + +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AES); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_PCLMULQDQ); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE3); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSSE3); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE41); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE42); + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_POPCNT); + // Leaving out CORJIT_FLAGS::CORJIT_FLAG_USE_AVX, CORJIT_FLAGS::CORJIT_FLAG_USE_FMA + // CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2, CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1, + // CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2 on purpose - these require VEX encodings + // and the JIT doesn't support generating code for methods with mixed encodings. + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_LZCNT); +#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) } #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)