-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Allow pregenerating most HW intrinsics in CoreLib #24917
Changes from all commits
cef2048
b4b8244
5c3974f
aa19230
da02afe
6b5b161
db4cb30
d9d0c84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -438,7 +438,9 @@ void ZapInfo::CompileMethod() | |
| // this they can add the hint and reduce the perf cost at runtime. | ||
| m_pImage->m_pPreloader->PrePrepareMethodIfNecessary(m_currentMethodHandle); | ||
|
|
||
| DWORD methodAttribs = getMethodAttribs(m_currentMethodHandle); | ||
| // Retrieve method attributes from EEJitInfo - the ZapInfo's version updates | ||
| // some of the flags related to hardware intrinsics but we don't want that. | ||
| DWORD methodAttribs = m_pEEJitInfo->getMethodAttribs(m_currentMethodHandle); | ||
| if (methodAttribs & CORINFO_FLG_AGGRESSIVE_OPT) | ||
| { | ||
| // Skip methods marked with MethodImplOptions.AggressiveOptimization, they will be jitted instead. In the future, | ||
|
|
@@ -447,6 +449,27 @@ void ZapInfo::CompileMethod() | |
| return; | ||
| } | ||
|
|
||
| #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) | ||
| if (methodAttribs & CORINFO_FLG_JIT_INTRINSIC) | ||
| { | ||
| // Skip generating hardware intrinsic method bodies. | ||
| // | ||
| // We don't know what the implementation should do (whether it can do the actual intrinsic thing, or whether | ||
| // it should throw a PlatformNotSupportedException). | ||
|
|
||
| const char* namespaceName; | ||
| getMethodNameFromMetadata(m_currentMethodHandle, nullptr, &namespaceName, nullptr); | ||
| if (strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0 | ||
| || strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 | ||
| || strcmp(namespaceName, "System.Runtime.Intrinsics") == 0) | ||
| { | ||
| if (m_zapper->m_pOpt->m_verbose) | ||
| m_zapper->Info(W("Skipped due to being a hardware intrinsic\n")); | ||
| return; | ||
| } | ||
| } | ||
| #endif | ||
|
|
||
| m_jitFlags = ComputeJitFlags(m_currentMethodHandle); | ||
|
|
||
| #ifdef FEATURE_READYTORUN_COMPILER | ||
|
|
@@ -2083,6 +2106,94 @@ void ZapInfo::GetProfilingHandle(BOOL *pbHookFunction, | |
| *pbIndirectedHandles = TRUE; | ||
| } | ||
|
|
||
| // | ||
| // This strips the CORINFO_FLG_JIT_INTRINSIC flag from some of the hardware intrinsic methods. | ||
| // | ||
| DWORD FilterHardwareIntrinsicMethodAttribs(DWORD attribs, CORINFO_METHOD_HANDLE ftn, ICorDynamicInfo* pJitInfo) | ||
| { | ||
| if (attribs & CORINFO_FLG_JIT_INTRINSIC) | ||
| { | ||
| // Figure out which intrinsic we are dealing with. | ||
| const char* namespaceName; | ||
| const char* className; | ||
| const char* enclosingClassName; | ||
| const char* methodName = pJitInfo->getMethodNameFromMetadata(ftn, &className, &namespaceName, &enclosingClassName); | ||
|
|
||
| // Is this the get_IsSupported method that checks whether intrinsic is supported? | ||
| bool fIsGetIsSupportedMethod = strcmp(methodName, "get_IsSupported") == 0; | ||
|
|
||
| #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) | ||
| bool fIsX86intrinsic = strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0; | ||
|
|
||
| // If it's anything related to Sse/Sse2, we can expand unconditionally since this is a baseline | ||
| // requirement of CoreCLR. | ||
| if (fIsX86intrinsic | ||
| && ( | ||
| strcmp(className, "Sse") == 0 || strcmp(className, "Sse2") == 0 | ||
| || ( | ||
| strcmp(className, "X64") == 0 | ||
| && ( | ||
| strcmp(enclosingClassName, "Sse") == 0 || strcmp(enclosingClassName, "Sse2") == 0 | ||
| ) | ||
| ) | ||
| ) | ||
| ) | ||
| { | ||
| return attribs; | ||
| } | ||
|
|
||
| // If it's an intrinsic that requires VEX encoding, do not report as intrinsic | ||
| // to force this to become a regular method call. | ||
| // We don't allow RyuJIT to use VEX encoding at AOT compilation time, so these | ||
| // cannot be pregenerated. Not reporting them as intrinsic will make sure | ||
| // it will do the right thing at runtime (the called method will be JITted). | ||
| // It will be slower, but correct. | ||
| if (fIsX86intrinsic | ||
| && ( | ||
| strcmp(className, "Avx") == 0 || strcmp(className, "Fma") == 0 || strcmp(className, "Avx2") == 0 || strcmp(className, "Bmi1") == 0 || strcmp(className, "Bmi2") == 0 | ||
| || ( | ||
| strcmp(className, "X64") == 0 | ||
| && ( | ||
| strcmp(enclosingClassName, "Bmi1") == 0 || strcmp(enclosingClassName, "Bmi2") == 0 | ||
| ) | ||
| ) | ||
| ) | ||
| ) | ||
| { | ||
| // We do want the IsSupported for VEX instructions to be recognized as intrinsic so that the | ||
| // potentially worse quality code doesn't actually run until tiered JIT starts | ||
| // kicking in and recompiling methods. Reporting this as intrinsic makes RyuJIT expand it | ||
| // into `return false`. | ||
| if (fIsGetIsSupportedMethod) | ||
| return attribs; | ||
|
|
||
| // Treat other intrinsic methods as a regular method call (into a JITted method). | ||
| return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this going to be correct? The calls are recursive and the So, if it is recursive and
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What we want is the caller to treat these as regular, non-inline method calls and for the JIT to not AOT the method bodies of the intrinsics themselves.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't that what The recursive call within the actual intrinsic method body will never go through here because we don't AOT compile them.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have a compiler check for "is compiling for R2R/crossgen/AOT", right? It might be simpler to just update the logic in bool isIsaSupported = comp->compSupports(isa) && comp->compSupportsHWIntrinsic(isa);
if (strcmp(methodName, "get_IsSupported") == 0)
{
return isIsaSupported ? NI_IsSupported_True : NI_IsSupported_False;
}
else if (!isIsaSupported)
{
+ if (isCompilingForAOT)
+ {
+ return nullptr;
+ }
return NI_Throw_PlatformNotSupportedException;
}
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe that would allow us to treat the AVX/AVX2/FMA/BMI1/BMI2 code paths as normal (you don't need to mark them as The existing logic for not expanding method bodies would kick-in here: 6b5b161#diff-04a590b55693db432b3a24d0d9a59cceR451 And the added check would ensure that not supported ISAs (which these won't be due to us not setting the compiler flags in the zapper here: 6b5b161#diff-efefdb418a1b8be4f78c7c9aedf91cb4L1212) will return There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tannergooding is correct here - the intrinsics are recognized during importing, prior to the call being considered for inlining. I think it makes some sense to unconditionally fail to expand (i.e.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
But this line strips the intrinsic bit from the method. It should be treated as a simple method call, no? That's what we want for BMI and friends - keep IsSupported untouched and reported as an intrinsic so that it expands to
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The problem ends up coming in when we go to AOT those methods. You stripped the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Attributes in that spot are retrieved from the standard JitInterface - we don't call the Precompiling method bodies on e.g. the Bmi1 class would be wrong.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, ok. That definitely wasn't clear to me. A comment calling it out might be helpful. |
||
| } | ||
|
|
||
| #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) | ||
|
|
||
| // Do not report the get_IsSupported method as an intrinsic if it's an intrinsic on the architecture | ||
| // we are targeting. This will turn the call into a regular call. | ||
| // We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen | ||
| // (see ZapInfo::CompileMethod) but get JITted instead. The JITted method will have the correct | ||
| // answer for the CPU the code is running on. | ||
| if (fIsGetIsSupportedMethod && ( | ||
| #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) | ||
| fIsX86intrinsic || | ||
| #elif _TARGET_ARM64_ | ||
| strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 || | ||
| #endif | ||
| strcmp(namespaceName, "System.Runtime.Intrinsics") == 0)) | ||
| { | ||
| // Treat as a regular method call (into a JITted method). | ||
| return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same concern as I called out above. Intrinsics are special and are handled in the importer before inlining checks and the recursive calls are dependent on It might be better to just fixup the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense to me, though note that the JIT doesn't know whether it's SPC.dll or not.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is that actually a concern now that we have a mechanism to handle functions which consume AVX/AVX2/FMA/BMI1/BMI2 methods, so things work even if they are called without an I believe the "worst case" scenario is someone doesn't use the check and calls, for example
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is also the chance for subtle bugs, if users don't provide software fallbacks, but I think it might be better to just have that be upfront in the initial release. If we ever want to make this more generally available, users will always have that scenario. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right - I don't think it is a concern that we don't specially handle SPC.dll here.
But the code won't work in any event. I guess if they were actually relying on getting PNSE it might. I suppose we could trap the hardware exception and throw PNSE, but I'm not sure that complication is justified.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, I think it would be better to encourage people to have the appropriate |
||
| } | ||
| } | ||
|
|
||
| return attribs; | ||
| } | ||
|
|
||
| //return a callable stub that will do the virtual or interface call | ||
|
|
||
|
|
||
|
|
@@ -2108,6 +2219,8 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, | |
| (CORINFO_CALLINFO_FLAGS)(flags | CORINFO_CALLINFO_KINDONLY), | ||
| pResult); | ||
|
|
||
| pResult->methodFlags = FilterHardwareIntrinsicMethodAttribs(pResult->methodFlags, pResult->hMethod, m_pEEJitInfo); | ||
|
|
||
| #ifdef FEATURE_READYTORUN_COMPILER | ||
| if (IsReadyToRunCompilation()) | ||
| { | ||
|
|
@@ -3678,7 +3791,8 @@ unsigned ZapInfo::getMethodHash(CORINFO_METHOD_HANDLE ftn) | |
|
|
||
| DWORD ZapInfo::getMethodAttribs(CORINFO_METHOD_HANDLE ftn) | ||
| { | ||
| return m_pEEJitInfo->getMethodAttribs(ftn); | ||
| DWORD result = m_pEEJitInfo->getMethodAttribs(ftn); | ||
| return FilterHardwareIntrinsicMethodAttribs(result, ftn, m_pEEJitInfo); | ||
| } | ||
|
|
||
| void ZapInfo::setMethodAttribs(CORINFO_METHOD_HANDLE ftn, CorInfoMethodRuntimeFlags attribs) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1189,11 +1189,31 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo) | |
| #endif // _TARGET_X86_ | ||
|
|
||
| #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) | ||
| // If we're compiling CoreLib, allow RyuJIT to generate SIMD code so that we can expand some | ||
| // of the hardware intrinsics. | ||
| // If we're crossgenning CoreLib, allow generating non-VEX intrinsics. The generated code might | ||
| // not actually be supported by the processor at runtime so we compensate for it by | ||
| // not letting the get_IsSupported method to be intrinsically expanded in crossgen | ||
| // (see special handling around CORINFO_FLG_JIT_INTRINSIC in ZapInfo). | ||
| // That way the actual support checks will always be jitted. | ||
| // We only do this for CoreLib because forgetting to wrap intrinsics under IsSupported | ||
| // checks can lead to illegal instruction traps (instead of a nice managed exception). | ||
|
MichalStrehovsky marked this conversation as resolved.
|
||
| if (m_pEECompileInfo->GetAssemblyModule(m_hAssembly) == m_pEECompileInfo->GetLoaderModuleForMscorlib()) | ||
| { | ||
| m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_FEATURE_SIMD); | ||
|
|
||
| #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could potentially cause problems. The JIT currently decides to emit the VEX or non-VEX encoding based on whether or not AVX is supported. This isn't going to correctly handle that difference for an SSE exclusive vs an AVX checked code path, which will prevent the generated code from running on some downstream processors.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we keep the "do not crossgen" behavior for AVX/FMA/AVX2, would that fix the issue?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the safest thing to do for now would be to effectively treat the AOT codegen as if only That should mitigate the startup time regressions and throughput should get fairly quickly resolved by rejitting the hot methods. Anything else is going to have larger potential for a bug tail or may impact perf in interesting ways (hurting benchmark numbers, for example) and likely needs more investigating before we settle on a solution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I confess that this hadn't occurred to me. I'd like to see us have the ability to include runtime-checked dual-version code, but I'm pretty sure the JIT can't generate both SSE and VEX encodings in the same method.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I agree. Long term, we should be able to support a static AOT target or one of a couple forms of dynamic dispatch. The two most common, that I am aware of, are cached CPUID checks (which result in a branch per ISA target) and changing the method target (which involves fixing up which target the method points to). But, those are post 3.0 goals and likely require some fixups to support generating both VEX and non-VEX code paths, for example. |
||
| m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AES); | ||
| m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_PCLMULQDQ); | ||
| m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE3); | ||
| m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSSE3); | ||
| m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE41); | ||
| m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE42); | ||
| m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_POPCNT); | ||
| // Leaving out CORJIT_FLAGS::CORJIT_FLAG_USE_AVX, CORJIT_FLAGS::CORJIT_FLAG_USE_FMA | ||
| // CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2, CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1, | ||
| // CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2 on purpose - these require VEX encodings | ||
| // and the JIT doesn't support generating code for methods with mixed encodings. | ||
| m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_LZCNT); | ||
| #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) | ||
| } | ||
| #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.