From d79f664875c4b1926be751a94e7d3669ca7dd588 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Wed, 14 May 2025 15:57:02 -0700 Subject: [PATCH 1/4] allow any baseline intrinsics in JIT --- src/coreclr/jit/compiler.cpp | 51 +++++----- src/coreclr/jit/compiler.h | 48 +-------- src/coreclr/jit/decomposelongs.cpp | 2 - src/coreclr/jit/gentree.cpp | 104 +------------------ src/coreclr/jit/hwintrinsic.cpp | 7 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 1 - src/coreclr/jit/importercalls.cpp | 100 ++++++++---------- src/coreclr/jit/lowerarmarch.cpp | 4 +- src/coreclr/jit/lowerxarch.cpp | 107 ++------------------ src/coreclr/jit/morph.cpp | 39 ++----- 10 files changed, 97 insertions(+), 366 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 6ac4ca679fa611..2f33269de17aa1 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1980,11 +1980,15 @@ void Compiler::compSetProcessor() opts.compSupportsISAReported.Reset(); opts.compSupportsISAExactly.Reset(); -// The VM will set the ISA flags depending on actual hardware support -// and any specified config switches specified by the user. The exception -// here is for certain "artificial ISAs" such as Vector64/128/256 where they -// don't actually exist. The JIT is in charge of adding those and ensuring -// the total sum of flags is still valid. +// The VM will set the ISA flags depending on actual hardware support and any +// config values specified by the user. Config may have caused the VM to exclude +// baseline ISAs from the supported set. We force their inclusion here so that +// JIT code can use them unconditionally, but we will honor the config when +// resolving managed HWIntrinsic methods. +// +// We also take care of adding the virtual vector ISAs (i.e. Vector64/128/256/512) +// here, based on a combination of hardware ISA support and config values. + #if defined(TARGET_XARCH) // If the VM passed in a virtual vector ISA, it was done to communicate PreferredVectorBitWidth. // No check is done for the validity of the value, since it will be clamped to max supported by @@ -2015,10 +2019,11 @@ void Compiler::compSetProcessor() !instructionSetFlags.HasInstructionSet(InstructionSet_Vector256) && !instructionSetFlags.HasInstructionSet(InstructionSet_Vector512)); - if (instructionSetFlags.HasInstructionSet(InstructionSet_X86Base)) - { - instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); - } + instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); + instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); +#ifdef TARGET_AMD64 + instructionSetFlags.AddInstructionSet(InstructionSet_X86Base_X64); +#endif // TARGET_AMD64 if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX)) { @@ -2030,11 +2035,12 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); } #elif defined(TARGET_ARM64) - if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd)) - { - instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); - instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); - } + instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); + instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); + instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); + instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase_Arm64); + instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); + instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd_Arm64); #endif // TARGET_ARM64 assert(instructionSetFlags.Equals(EnsureInstructionSetFlagsAreValid(instructionSetFlags))); @@ -6011,11 +6017,8 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, } } - if (JitConfig.EnableHWIntrinsic() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); - instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); - } + instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); + instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); if (JitConfig.EnableArm64Aes() != 0) { @@ -6084,10 +6087,7 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, } } - if (JitConfig.EnableHWIntrinsic() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); - } + instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); if (JitConfig.EnableSSE3() != 0) { @@ -6197,10 +6197,7 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, instructionSetFlags.AddInstructionSet(InstructionSet_APX); } #elif defined(TARGET_RISCV64) - if (JitConfig.EnableHWIntrinsic() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_RiscV64Base); - } + instructionSetFlags.AddInstructionSet(InstructionSet_RiscV64Base); if (JitConfig.EnableRiscV64Zba() != 0) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 0c2e8a0e103e20..70ae13c480a721 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8915,43 +8915,20 @@ class Compiler bool IsBaselineSimdIsaSupported() { #ifdef FEATURE_SIMD -#if defined(TARGET_XARCH) - CORINFO_InstructionSet minimumIsa = InstructionSet_X86Base; -#elif defined(TARGET_ARM64) - CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) + return true; #elif defined(TARGET_LOONGARCH64) // TODO: supporting SIMD feature for LoongArch64. assert(!"unimplemented yet on LA"); - CORINFO_InstructionSet minimumIsa = 0; + return false; #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_LOONGARCH64 - - return compOpportunisticallyDependsOn(minimumIsa); #else return false; #endif } -#if defined(DEBUG) - bool IsBaselineSimdIsaSupportedDebugOnly() - { -#ifdef FEATURE_SIMD -#if defined(TARGET_XARCH) - CORINFO_InstructionSet minimumIsa = InstructionSet_X86Base; -#elif defined(TARGET_ARM64) - CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - return compIsaSupportedDebugOnly(minimumIsa); -#else - return false; -#endif // FEATURE_SIMD - } -#endif // DEBUG - bool isIntrinsicType(CORINFO_CLASS_HANDLE clsHnd) { return info.compCompHnd->isIntrinsicType(clsHnd); @@ -9232,29 +9209,12 @@ class Compiler { return YMM_REGSIZE_BYTES; } - else if (compOpportunisticallyDependsOn(InstructionSet_X86Base)) - { - return XMM_REGSIZE_BYTES; - } else { - // TODO: We should be returning 0 here, but there are a number of - // places that don't quite get handled correctly in that scenario - return XMM_REGSIZE_BYTES; } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - return FP_REGSIZE_BYTES; - } - else - { - // TODO: We should be returning 0 here, but there are a number of - // places that don't quite get handled correctly in that scenario - - return FP_REGSIZE_BYTES; - } + return FP_REGSIZE_BYTES; #else assert(!"getMaxVectorByteLength() unimplemented on target arch"); unreached(); diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index 4cd474c9338d80..f4915ac33ef3db 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -1970,8 +1970,6 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIn } else { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - GenTree* thirtyTwo = m_compiler->gtNewIconNode(32); GenTree* shift = m_compiler->gtNewSimdBinOpNode(GT_RSZ, op1->TypeGet(), simdTmpVar, thirtyTwo, node->GetSimdBaseJitType(), simdSize); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 0dad8ba0b82562..9cdde0f844f28f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20735,8 +20735,6 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types ty GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -20841,8 +20839,6 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si GenTree* Compiler::gtNewSimdBinOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -21475,8 +21471,6 @@ GenTree* Compiler::gtNewSimdBinOpNode( GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -21570,8 +21564,6 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, assert(varTypeIsFloating(simdSourceBaseType)); assert(varTypeIsIntegral(simdTargetBaseType)); - assert(IsBaselineSimdIsaSupportedDebugOnly()); - #if defined(TARGET_XARCH) assert(compIsaSupportedDebugOnly(InstructionSet_AVX512) || ((simdTargetBaseType == TYP_INT) && ((simdSize == 16 && compIsaSupportedDebugOnly(InstructionSet_SSE41)) || @@ -21710,8 +21702,6 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, assert(varTypeIsFloating(simdSourceBaseType)); assert(varTypeIsIntegral(simdTargetBaseType)); - assert(IsBaselineSimdIsaSupportedDebugOnly()); - // Generate intrinsic needed for conversion NamedIntrinsic hwIntrinsicID = NI_Illegal; @@ -21956,8 +21946,6 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, GenTree* Compiler::gtNewSimdCmpOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -22253,7 +22241,6 @@ GenTree* Compiler::gtNewSimdCmpOpNode( GenTree* Compiler::gtNewSimdCmpOpAllNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(type == TYP_INT); var_types simdType = getSIMDTypeForSize(simdSize); @@ -22392,7 +22379,6 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( GenTree* Compiler::gtNewSimdCmpOpAnyNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(type == TYP_INT); var_types simdType = getSIMDTypeForSize(simdSize); @@ -22527,8 +22513,6 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( GenTree* Compiler::gtNewSimdCndSelNode( var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23127,8 +23111,6 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( GenTree* Compiler::gtNewSimdDotProdNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - var_types simdType = getSIMDTypeForSize(simdSize); assert(varTypeIsSIMD(simdType)); @@ -23171,8 +23153,6 @@ GenTree* Compiler::gtNewSimdDotProdNode( GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23249,8 +23229,6 @@ GenTree* Compiler::gtNewSimdFmaNode( intrinsic = NI_FMA_MultiplyAdd; } #elif defined(TARGET_ARM64) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - if (simdBaseType == TYP_DOUBLE) { intrinsic = (simdSize == 8) ? NI_AdvSimd_FusedMultiplyAddScalar : NI_AdvSimd_Arm64_FusedMultiplyAdd; @@ -23305,7 +23283,7 @@ GenTree* Compiler::gtNewSimdGetElementNode( case TYP_SHORT: case TYP_USHORT: { - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); + // Supported by baseline ISA requirement break; } @@ -23520,8 +23498,6 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23549,8 +23525,6 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, // GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23602,8 +23576,6 @@ GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoTy // GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23635,8 +23607,6 @@ GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, CorInfo // GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23677,8 +23647,6 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoT // GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23710,8 +23678,6 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType // GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23754,8 +23720,6 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23804,8 +23768,6 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, // GenTree* Compiler::gtNewSimdIsNormalNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23869,8 +23831,6 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23898,8 +23858,6 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, // GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23942,8 +23900,6 @@ GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23995,8 +23951,6 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24054,8 +24008,6 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, // GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24111,8 +24063,6 @@ GenTree* Compiler::gtNewSimdLoadAlignedNode(var_types type, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24170,8 +24120,6 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24247,8 +24195,6 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, GenTree* Compiler::gtNewSimdMaxNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24297,8 +24243,6 @@ GenTree* Compiler::gtNewSimdMaxNode( GenTree* Compiler::gtNewSimdMaxNativeNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24491,8 +24435,6 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( GenTree* Compiler::gtNewSimdMinNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24541,8 +24483,6 @@ GenTree* Compiler::gtNewSimdMinNode( GenTree* Compiler::gtNewSimdMinNativeNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24731,8 +24671,6 @@ GenTree* Compiler::gtNewSimdMinNativeNode( GenTree* Compiler::gtNewSimdNarrowNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25246,8 +25184,6 @@ GenTree* Compiler::gtNewSimdNarrowNode( // GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25312,8 +25248,6 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType GenTree* Compiler::gtNewSimdShuffleVariableNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25929,8 +25863,6 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( GenTree* Compiler::gtNewSimdShuffleNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -26517,8 +26449,6 @@ GenTree* Compiler::gtNewSimdShuffleNode( GenTree* Compiler::gtNewSimdSqrtNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -26603,8 +26533,6 @@ GenTree* Compiler::gtNewSimdStoreNode(GenTree* op1, GenTree* op2, CorInfoType si GenTree* Compiler::gtNewSimdStoreAlignedNode(GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(op1 != nullptr); assert(op2 != nullptr); @@ -26662,8 +26590,6 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(op1 != nullptr); assert(op2 != nullptr); @@ -26705,8 +26631,6 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - var_types simdType = getSIMDTypeForSize(simdSize); assert(varTypeIsSIMD(simdType)); @@ -26775,7 +26699,6 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { if (simdBaseType == TYP_FLOAT) { - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); if (compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -26793,7 +26716,6 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si } else { - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); // The shuffle below gives us [0, 1, 2, 3] -> [1, 0, 3, 2] op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op1Shuffled, gtNewIconNode((int)0b10110001, TYP_INT), NI_X86Base_Shuffle, simdBaseJitType, simdSize); @@ -26813,7 +26735,6 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si } else { - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); if (compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -26991,7 +26912,6 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type, // GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(varTypeIsArithmetic(type)); assert(op1 != nullptr); @@ -27048,8 +26968,6 @@ GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoTy // GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27098,8 +27016,6 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType GenTree* Compiler::gtNewSimdUnOpNode( genTreeOps op, var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27195,8 +27111,6 @@ GenTree* Compiler::gtNewSimdUnOpNode( GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27409,8 +27323,6 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27666,7 +27578,7 @@ GenTree* Compiler::gtNewSimdWithElementNode( case TYP_FLOAT: case TYP_SHORT: case TYP_USHORT: - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); + // Supported by baseline ISA requirement break; default: @@ -29277,7 +29189,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( #endif // TARGET_ARM64 assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } assert(op1 != nullptr); @@ -29382,7 +29293,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, #endif // TARGET_ARM64 assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } NamedIntrinsic id = NI_Illegal; @@ -29592,7 +29502,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftLeftLogical; } } @@ -29610,7 +29519,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftLeftLogical; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) @@ -29687,7 +29595,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsShort(simdBaseType)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_MultiplyLow; } #elif defined(TARGET_ARM64) @@ -29833,7 +29740,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftRightArithmetic; } } @@ -29851,7 +29757,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftRightArithmetic; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) @@ -29901,7 +29806,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftRightLogical; } } @@ -29919,7 +29823,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftRightLogical; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) @@ -30093,7 +29996,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #endif // TARGET_ARM64 assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } NamedIntrinsic id = NI_Illegal; @@ -30208,7 +30110,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_CompareGreaterThan; } } @@ -30302,7 +30203,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_CompareLessThan; } } diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 24c0d4e533d461..a0f40ddc857dfa 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1195,7 +1195,8 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, return NI_Illegal; } - bool isIsaSupported = comp->compSupportsHWIntrinsic(isa); + bool isHWIntrinsicEnabled = (JitConfig.EnableHWIntrinsic() != 0); + bool isIsaSupported = isHWIntrinsicEnabled && comp->compSupportsHWIntrinsic(isa); bool isHardwareAcceleratedProp = false; bool isSupportedProp = false; uint32_t vectorByteLength = 0; @@ -1300,7 +1301,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, if (isa == InstructionSet_Vector128) { - if (!comp->IsBaselineSimdIsaSupported()) + if (!isHWIntrinsicEnabled) { return NI_Illegal; } @@ -1330,7 +1331,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, #elif defined(TARGET_ARM64) else if (isa == InstructionSet_Vector64) { - if (!comp->IsBaselineSimdIsaSupported()) + if (!isHWIntrinsicEnabled) { return NI_Illegal; } diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 8005cfa8922097..3bfb01712995fc 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1808,7 +1808,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_X86Base)); assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE)); GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr; diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 1f4438ede7e143..fb8fb191814b78 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -4296,11 +4296,10 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, #ifdef FEATURE_HW_INTRINSICS case NI_System_Math_FusedMultiplyAdd: { + assert(varTypeIsFloating(callType)); #ifdef TARGET_XARCH if (compOpportunisticallyDependsOn(InstructionSet_FMA)) { - assert(varTypeIsFloating(callType)); - // We are constructing a chain of intrinsics similar to: // return FMA.MultiplyAddScalar( // Vector128.CreateScalarUnsafe(x), @@ -4323,39 +4322,34 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, break; } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - assert(varTypeIsFloating(callType)); + // We are constructing a chain of intrinsics similar to: + // return AdvSimd.FusedMultiplyAddScalar( + // Vector64.Create{ScalarUnsafe}(z), + // Vector64.Create{ScalarUnsafe}(y), + // Vector64.Create{ScalarUnsafe}(x) + // ).ToScalar(); - // We are constructing a chain of intrinsics similar to: - // return AdvSimd.FusedMultiplyAddScalar( - // Vector64.Create{ScalarUnsafe}(z), - // Vector64.Create{ScalarUnsafe}(y), - // Vector64.Create{ScalarUnsafe}(x) - // ).ToScalar(); + impSpillSideEffect(true, stackState.esStackDepth - + 3 DEBUGARG("Spilling op1 side effects for FusedMultiplyAdd")); - impSpillSideEffect(true, stackState.esStackDepth - - 3 DEBUGARG("Spilling op1 side effects for FusedMultiplyAdd")); + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op2 side effects for FusedMultiplyAdd")); - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op2 side effects for FusedMultiplyAdd")); + GenTree* op3 = impImplicitR4orR8Cast(impPopStack().val, callType); + GenTree* op2 = impImplicitR4orR8Cast(impPopStack().val, callType); + GenTree* op1 = impImplicitR4orR8Cast(impPopStack().val, callType); - GenTree* op3 = impImplicitR4orR8Cast(impPopStack().val, callType); - GenTree* op2 = impImplicitR4orR8Cast(impPopStack().val, callType); - GenTree* op1 = impImplicitR4orR8Cast(impPopStack().val, callType); + op3 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op3, callJitType, 8); + op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op2, callJitType, 8); + op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op1, callJitType, 8); - op3 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op3, callJitType, 8); - op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op2, callJitType, 8); - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op1, callJitType, 8); + // Note that AdvSimd.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 + op2 * op3 + // while Math{F}.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 * op2 + op3 + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op3, op2, op1, NI_AdvSimd_FusedMultiplyAddScalar, + callJitType, 8); - // Note that AdvSimd.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 + op2 * op3 - // while Math{F}.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 * op2 + op3 - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op3, op2, op1, NI_AdvSimd_FusedMultiplyAddScalar, - callJitType, 8); - - retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 8); - break; - } + retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 8); + break; #endif // TODO-CQ-XArch: Ideally we would create a GT_INTRINSIC node for fma, however, that currently @@ -5571,7 +5565,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) - if ((intrinsic == NI_PRIMITIVE_ConvertToIntegerNative) && IsBaselineSimdIsaSupported()) + if (intrinsic == NI_PRIMITIVE_ConvertToIntegerNative) { NamedIntrinsic hwIntrinsicId = NI_Illegal; @@ -5783,7 +5777,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, hwintrinsic = varTypeIsLong(baseType) ? NI_LZCNT_X64_LeadingZeroCount : NI_LZCNT_LeadingZeroCount; result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); } - else if (compOpportunisticallyDependsOn(InstructionSet_X86Base)) + else { // Pop the value from the stack impPopStack(); @@ -5824,15 +5818,12 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, result = gtNewLclvNode(tmp, baseType); } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_ArmBase)) - { - // Pop the value from the stack - impPopStack(); + // Pop the value from the stack + impPopStack(); - hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; - result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); - baseType = TYP_INT; - } + hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; + result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); + baseType = TYP_INT; #endif // TARGET_* #endif // FEATURE_HW_INTRINSICS @@ -5967,10 +5958,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - // TODO-ARM64-CQ: PopCount should be handled as an intrinsic for non-constant cases - } + // TODO-ARM64-CQ: PopCount should be handled as an intrinsic for non-constant cases #endif // TARGET_* #endif // FEATURE_HW_INTRINSICS @@ -6130,7 +6118,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, hwintrinsic = varTypeIsLong(baseType) ? NI_BMI1_X64_TrailingZeroCount : NI_BMI1_TrailingZeroCount; result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); } - else if (compOpportunisticallyDependsOn(InstructionSet_X86Base)) + else { // Pop the value from the stack impPopStack(); @@ -6169,19 +6157,15 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, result = gtNewLclvNode(tmp, baseType); } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_ArmBase)) - { - // Pop the value from the stack - impPopStack(); + // Pop the value from the stack + impPopStack(); - hwintrinsic = - varTypeIsLong(baseType) ? NI_ArmBase_Arm64_ReverseElementBits : NI_ArmBase_ReverseElementBits; - op1 = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); + hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_ReverseElementBits : NI_ArmBase_ReverseElementBits; + op1 = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); - hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; - result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); - baseType = TYP_INT; - } + hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; + result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); + baseType = TYP_INT; #endif // TARGET_* #endif // FEATURE_HW_INTRINSICS @@ -7934,6 +7918,7 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Abs: case NI_System_Math_Ceiling: case NI_System_Math_Floor: + case NI_System_Math_FusedMultiplyAdd: case NI_System_Math_Max: case NI_System_Math_Min: case NI_System_Math_MultiplyAddEstimate: @@ -7944,9 +7929,6 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Truncate: return true; - case NI_System_Math_FusedMultiplyAdd: - return compOpportunisticallyDependsOn(InstructionSet_AdvSimd); - default: return false; } @@ -9900,7 +9882,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, } #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - if (!isMagnitude && compOpportunisticallyDependsOn(InstructionSet_X86Base)) + if (!isMagnitude) { bool needsFixup = false; bool canHandle = false; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index a1fe92920ae2cc..8f510e9b73a468 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3503,7 +3503,7 @@ bool Lowering::TryLowerAddSubToMulLongOp(GenTreeOp* op, GenTree** next) if (!comp->opts.OptimizationEnabled()) return false; - if (!comp->compOpportunisticallyDependsOn(InstructionSet_ArmBase_Arm64)) + if (!comp->IsBaselineSimdIsaSupported()) return false; if (op->isContained()) @@ -3609,7 +3609,7 @@ bool Lowering::TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next) if (!comp->opts.OptimizationEnabled()) return false; - if (!comp->compOpportunisticallyDependsOn(InstructionSet_ArmBase_Arm64)) + if (!comp->IsBaselineSimdIsaSupported()) return false; if (op->isContained()) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index ea115c4cc20d63..23c5a6a2d3b939 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -915,9 +915,6 @@ void Lowering::LowerCast(GenTree* tree) } else { - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); - assert(!TargetArchitecture::Is64Bit || comp->compIsaSupportedDebugOnly(InstructionSet_X86Base_X64)); - // We need to fix up NaN as well as handle possible overflow. Signed conversions // return int/long.MinValue for any overflow, which is correct for saturation of // negative, but the result must be replaced with MaxValue for positive overflow. @@ -953,14 +950,14 @@ void Lowering::LowerCast(GenTree* tree) if (srcType == TYP_FLOAT) { maxFloatSimdVal->f32[0] = 4294967296.0f; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_X86Base_X64) + convertIntrinsic = TargetArchitecture::Is64Bit ? NI_X86Base_X64_ConvertToInt64WithTruncation : NI_X86Base_ConvertToVector128Int32WithTruncation; } else { maxFloatSimdVal->f64[0] = 4294967296.0; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_X86Base_X64) + convertIntrinsic = TargetArchitecture::Is64Bit ? NI_X86Base_X64_ConvertToInt64WithTruncation : NI_X86Base_ConvertToVector128Int32WithTruncation; } @@ -1531,22 +1528,9 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) op2 = userIntrin->Op(1); } - NamedIntrinsic intrinsic = NI_Illegal; - - if (comp->IsBaselineSimdIsaSupported()) - { - intrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2, - simdBaseType, simdSize, false); - } - else - { - // We need to ensure we optimize even if SSE2 is disabled - - assert(simdBaseType == TYP_FLOAT); - assert(simdSize <= 16); - - intrinsic = NI_X86Base_AndNot; - } + NamedIntrinsic intrinsic = + GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2, simdBaseType, + simdSize, false); userIntrin->ResetHWIntrinsicId(intrinsic, comp, op1, op2); @@ -4289,8 +4273,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4344,8 +4326,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse2.UnpackLow(tmp1, tmp2); // ... - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4376,8 +4356,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // ... // return Sse2.Shuffle(tmp1, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp1, idx); @@ -4425,8 +4403,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = tmp1; // return Sse.Shuffle(tmp1, tmp2, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4463,8 +4439,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4594,7 +4568,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_USHORT)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); insIntrinsic = NI_X86Base_Insert; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) @@ -4654,7 +4627,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } assert((simdBaseType != TYP_SHORT) && (simdBaseType != TYP_USHORT)); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op[16]; op[0] = tmp1; @@ -4881,8 +4853,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = Sse.UnpackLow(opP, opQ); // return Sse.MoveLowToHigh(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - GenTree* op[4]; op[0] = tmp1; @@ -4948,8 +4918,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = Vector128.CreateScalarUnsafe(op2); // return Sse.UnpackLow(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, simdBaseJitType, 16); LowerNode(tmp2); @@ -5241,37 +5209,6 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) } } - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: -#if defined(TARGET_AMD64) - case TYP_LONG: - case TYP_ULONG: -#endif // TARGET_AMD64 - { - // Using software fallback if simdBaseType is not supported by hardware - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); - break; - } - - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - break; - } - - default: - { - unreached(); - } - } - // Remove the index node up front to simplify downstream logic BlockRange().Remove(op2); @@ -5394,6 +5331,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_LONG: case TYP_ULONG: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41_X64)); resIntrinsic = NI_SSE41_X64_Extract; break; } @@ -5411,6 +5349,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_INT: case TYP_UINT: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); resIntrinsic = NI_SSE41_Extract; break; } @@ -5510,32 +5449,6 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) assert((0 <= imm8) && (imm8 < count)); - switch (simdBaseType) - { - // Using software fallback if simdBaseType is not supported by hardware - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); - break; - - case TYP_LONG: - case TYP_ULONG: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41_X64)); - break; - - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - break; - - default: - unreached(); - } - // Remove the index node up front to simplify downstream logic BlockRange().Remove(op2); @@ -5715,6 +5628,8 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_LONG: case TYP_ULONG: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41_X64)); + idx = comp->gtNewIconNode(imm8); BlockRange().InsertBefore(result, idx); result->ChangeHWIntrinsicId(NI_SSE41_X64_Insert, op1, op3, idx); @@ -5861,6 +5776,8 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_INT: case TYP_UINT: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); + idx = comp->gtNewIconNode(imm8); BlockRange().InsertBefore(result, idx); result->ChangeHWIntrinsicId(NI_SSE41_Insert, op1, op3, idx); @@ -6065,8 +5982,6 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - switch (simdBaseType) { case TYP_SHORT: diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index bede7afff1c93e..d3c0ae95c3db5f 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -294,20 +294,14 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) { if (srcType == TYP_FLOAT -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // Arm64: src = float, dst is overflow conversion. +#ifdef TARGET_64BIT + // 64-bit: src = float, dst is overflow conversion. // This goes through helper and hence src needs to be converted to double. && tree->gtOverflow() -#elif defined(TARGET_AMD64) - // Amd64: src = float, dst = overflow conversion or SSE2 is not enabled - && (tree->gtOverflow() || !IsBaselineSimdIsaSupported()) -#elif defined(TARGET_ARM) - // Arm: src = float, dst = int64/uint64 or overflow conversion. - && (tree->gtOverflow() || varTypeIsLong(dstType)) #else - // x86: src = float, dst = int64/uint64 or overflow conversion or SSE2 is not enabled - && (tree->gtOverflow() || varTypeIsLong(dstType) || !IsBaselineSimdIsaSupported()) -#endif + // 32-bit: src = float, dst = int64/uint64 or overflow conversion. + && (tree->gtOverflow() || varTypeIsLong(dstType)) +#endif // TARGET_64BIT ) { oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); @@ -328,39 +322,24 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { if (!tree->gtOverflow()) { -// ARM64 and LoongArch64 optimize all non-overflow checking conversions -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef TARGET_64BIT return nullptr; #else -#if defined(TARGET_XARCH) - if (IsBaselineSimdIsaSupported() && (!varTypeIsLong(dstType) || TargetArchitecture::Is64Bit)) + if (!varTypeIsLong(dstType)) { return nullptr; } -#endif // TARGET_XARCH + switch (dstType) { - case TYP_INT: -#ifdef TARGET_XARCH - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); -#endif // TARGET_XARCH - return nullptr; - - case TYP_UINT: -#if defined(TARGET_ARM) - return nullptr; -#endif - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); - case TYP_LONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); - case TYP_ULONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); default: unreached(); } -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 +#endif // TARGET_64BIT } else { From 7fe032263e4d405ca1b24fe77741bed2d627a4a4 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Sun, 1 Jun 2025 11:53:01 -0700 Subject: [PATCH 2/4] formatting --- src/coreclr/jit/compiler.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 2f33269de17aa1..51cb6cd880578e 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1980,14 +1980,14 @@ void Compiler::compSetProcessor() opts.compSupportsISAReported.Reset(); opts.compSupportsISAExactly.Reset(); -// The VM will set the ISA flags depending on actual hardware support and any -// config values specified by the user. Config may have caused the VM to exclude -// baseline ISAs from the supported set. We force their inclusion here so that -// JIT code can use them unconditionally, but we will honor the config when -// resolving managed HWIntrinsic methods. -// -// We also take care of adding the virtual vector ISAs (i.e. Vector64/128/256/512) -// here, based on a combination of hardware ISA support and config values. + // The VM will set the ISA flags depending on actual hardware support and any + // config values specified by the user. Config may have caused the VM to exclude + // baseline ISAs from the supported set. We force their inclusion here so that + // JIT code can use them unconditionally, but we will honor the config when + // resolving managed HWIntrinsic methods. + // + // We also take care of adding the virtual vector ISAs (i.e. Vector64/128/256/512) + // here, based on a combination of hardware ISA support and config values. #if defined(TARGET_XARCH) // If the VM passed in a virtual vector ISA, it was done to communicate PreferredVectorBitWidth. From 26dce5346bd3e3548e9f512b17cec343a2553c0e Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Fri, 6 Jun 2025 13:55:15 -0700 Subject: [PATCH 3/4] remove IsBaselineSimdIsaSupported --- src/coreclr/jit/codegenarm64.cpp | 6 ++-- src/coreclr/jit/codegenxarch.cpp | 2 +- src/coreclr/jit/compiler.h | 19 +------------ src/coreclr/jit/importer.cpp | 4 +-- src/coreclr/jit/lclmorph.cpp | 7 ++--- src/coreclr/jit/lower.cpp | 48 +++++++++++++++++--------------- src/coreclr/jit/lowerarmarch.cpp | 6 ---- src/coreclr/jit/lowerxarch.cpp | 2 +- src/coreclr/jit/lsraarmarch.cpp | 2 +- src/coreclr/jit/lsraxarch.cpp | 2 +- 10 files changed, 38 insertions(+), 60 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 5ffb79df1f8448..0f32b6aa4c4e0b 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3613,7 +3613,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) // On ARM64, SIMD loads/stores provide 8-byte atomicity guarantees when aligned to 8 bytes. regNumber tmpSimdReg1 = REG_NA; regNumber tmpSimdReg2 = REG_NA; - if ((slots >= 4) && compiler->IsBaselineSimdIsaSupported()) + if (slots >= 4) { tmpSimdReg1 = internalRegisters.Extract(cpObjNode, RBM_ALLFLOAT); tmpSimdReg2 = internalRegisters.Extract(cpObjNode, RBM_ALLFLOAT); @@ -3644,8 +3644,8 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) // Copy at least two slots at a time if (nonGcSlots >= 2) { - // Do 4 slots at a time if SIMD is supported - if ((nonGcSlots >= 4) && compiler->IsBaselineSimdIsaSupported()) + // Do 4 slots at a time with SIMD instructions + if (nonGcSlots >= 4) { // We need SIMD temp regs now tmp1 = tmpSimdReg1; diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 4e26fef17a6a5f..7688c032a0456b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3302,7 +3302,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) // INITBLK zeroes a struct that contains GC pointers and can be observed by // other threads (i.e. when dstAddr is not an address of a local). // For example, this can happen when initializing a struct field of an object. - const bool canUse16BytesSimdMov = !node->IsOnHeapAndContainsReferences() && compiler->IsBaselineSimdIsaSupported(); + const bool canUse16BytesSimdMov = !node->IsOnHeapAndContainsReferences(); const bool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES); if (!src->isContained()) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 70ae13c480a721..c86c4f816cc06a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8912,23 +8912,6 @@ class Compiler XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ - bool IsBaselineSimdIsaSupported() - { -#ifdef FEATURE_SIMD -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) - return true; -#elif defined(TARGET_LOONGARCH64) - // TODO: supporting SIMD feature for LoongArch64. - assert(!"unimplemented yet on LA"); - return false; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_LOONGARCH64 -#else - return false; -#endif - } - bool isIntrinsicType(CORINFO_CLASS_HANDLE clsHnd) { return info.compCompHnd->isIntrinsicType(clsHnd); @@ -9414,7 +9397,7 @@ class Compiler assert(size > 0); var_types result = TYP_UNDEF; #ifdef FEATURE_SIMD - if (IsBaselineSimdIsaSupported() && (roundDownSIMDSize(size) > 0)) + if (roundDownSIMDSize(size) > 0) { return getSIMDTypeForSize(roundDownSIMDSize(size)); } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 05c4b8d57ecddd..280be70ebdfdcf 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3888,7 +3888,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI #ifdef FEATURE_SIMD // First, let's check whether field is a SIMD vector and import it as GT_CNS_VEC int simdWidth = getSIMDTypeSizeInBytes(fieldClsHnd); - if ((simdWidth > 0) && IsBaselineSimdIsaSupported()) + if (simdWidth > 0) { assert((totalSize <= 64) && (totalSize <= MaxStructSize)); var_types simdType = getSIMDTypeForSize(simdWidth); @@ -3906,7 +3906,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI else #endif // TARGET_XARCH { - // SIMD8, SIMD12, SIMD16 are covered by IsBaselineSimdIsaSupported check + // SIMD8, SIMD12, SIMD16 are covered by baseline ISA requirement assert((simdType == TYP_SIMD8) || (simdType == TYP_SIMD12) || (simdType == TYP_SIMD16)); } diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 1dd9ff67d16e4d..405e131c60505f 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1943,14 +1943,14 @@ class LocalAddressVisitor final : public GenTreeVisitor if (indir->TypeIs(TYP_FLOAT)) { - if (((offset % genTypeSize(TYP_FLOAT)) == 0) && m_compiler->IsBaselineSimdIsaSupported()) + if ((offset % genTypeSize(TYP_FLOAT)) == 0) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } } else if (indir->TypeIs(TYP_SIMD12)) { - if ((offset == 0) && (varDsc->TypeGet() == TYP_SIMD16) && m_compiler->IsBaselineSimdIsaSupported()) + if ((offset == 0) && (varDsc->TypeGet() == TYP_SIMD16)) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } @@ -1958,8 +1958,7 @@ class LocalAddressVisitor final : public GenTreeVisitor #ifdef TARGET_ARM64 else if (indir->TypeIs(TYP_SIMD8)) { - if ((varDsc->TypeGet() == TYP_SIMD16) && ((offset % 8) == 0) && - m_compiler->IsBaselineSimdIsaSupported()) + if ((varDsc->TypeGet() == TYP_SIMD16) && ((offset % 8) == 0)) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index eeb742740c9b77..9518915080dc34 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2395,9 +2395,10 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) GenTree* lArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); GenTree* rArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); - ssize_t MaxUnrollSize = comp->IsBaselineSimdIsaSupported() ? 32 : 16; + ssize_t MaxUnrollSize = 16; -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) +#ifdef FEATURE_SIMD +#ifdef TARGET_XARCH if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { MaxUnrollSize = 128; @@ -2407,7 +2408,12 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) // We need AVX2 for NI_Vector256_op_Equality, fallback to Vector128 if only AVX is available MaxUnrollSize = 64; } -#endif + else +#endif // TARGET_XARCH + { + MaxUnrollSize = 32; + } +#endif // FEATURE_SIMD if (cnsSize <= MaxUnrollSize) { @@ -10023,29 +10029,25 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) #if defined(FEATURE_HW_INTRINSICS) case TYP_LONG: case TYP_REF: - if (comp->IsBaselineSimdIsaSupported()) + // TLDR: we should be here only if one of the conditions is true: + // 1) Both GT_INDs have GTF_IND_ALLOW_NON_ATOMIC flag + // 2) ARM64: Data is at least 8-byte aligned + // 3) AMD64: Data is at least 16-byte aligned on AMD/Intel with AVX+ + // + newType = TYP_SIMD16; + if ((oldType == TYP_REF) && + (!currData.value->IsIntegralConst(0) || !prevData.value->IsIntegralConst(0))) { - // TLDR: we should be here only if one of the conditions is true: - // 1) Both GT_INDs have GTF_IND_ALLOW_NON_ATOMIC flag - // 2) ARM64: Data is at least 8-byte aligned - // 3) AMD64: Data is at least 16-byte aligned on AMD/Intel with AVX+ + // For TYP_REF we only support null values. In theory, we can also support frozen handles, e.g.: // - newType = TYP_SIMD16; - if ((oldType == TYP_REF) && - (!currData.value->IsIntegralConst(0) || !prevData.value->IsIntegralConst(0))) - { - // For TYP_REF we only support null values. In theory, we can also support frozen handles, e.g.: - // - // arr[1] = "hello"; - // arr[0] = "world"; - // - // but we don't want to load managed references into SIMD registers (we can only do so - // when we can issue a nongc region for a block) - return; - } - break; + // arr[1] = "hello"; + // arr[0] = "world"; + // + // but we don't want to load managed references into SIMD registers (we can only do so + // when we can issue a nongc region for a block) + return; } - return; + break; #if defined(TARGET_AMD64) case TYP_SIMD16: diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 8f510e9b73a468..9a09d3b0b1ac6f 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3503,9 +3503,6 @@ bool Lowering::TryLowerAddSubToMulLongOp(GenTreeOp* op, GenTree** next) if (!comp->opts.OptimizationEnabled()) return false; - if (!comp->IsBaselineSimdIsaSupported()) - return false; - if (op->isContained()) return false; @@ -3609,9 +3606,6 @@ bool Lowering::TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next) if (!comp->opts.OptimizationEnabled()) return false; - if (!comp->IsBaselineSimdIsaSupported()) - return false; - if (op->isContained()) return false; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 23c5a6a2d3b939..956b31c048d61f 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -407,7 +407,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ssize_t fill = src->AsIntCon()->IconValue() & 0xFF; - const bool canUseSimd = !blkNode->IsOnHeapAndContainsReferences() && comp->IsBaselineSimdIsaSupported(); + const bool canUseSimd = !blkNode->IsOnHeapAndContainsReferences(); if (size > comp->getUnrollThreshold(Compiler::UnrollKind::Memset, canUseSimd)) { // It turns out we can't use SIMD so the default threshold is too big diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 13fc2bd1ede079..4f103cd3f61b46 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -597,7 +597,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); } - if (size >= 4 * REGSIZE_BYTES && compiler->IsBaselineSimdIsaSupported()) + if (size >= 4 * REGSIZE_BYTES) { // We can use 128-bit SIMD ldp/stp for larger block sizes buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index e1dca325f3ab8a..c3824430d6a649 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1415,7 +1415,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { case GenTreeBlk::BlkOpKindUnroll: { - bool willUseSimdMov = compiler->IsBaselineSimdIsaSupported() && (size >= XMM_REGSIZE_BYTES); + bool willUseSimdMov = (size >= XMM_REGSIZE_BYTES); if (willUseSimdMov && blkNode->IsOnHeapAndContainsReferences()) { ClassLayout* layout = blkNode->GetLayout(); From 4de2edd3c29e2c5266b8ab7d2b491f1168709b53 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Fri, 6 Jun 2025 14:03:24 -0700 Subject: [PATCH 4/4] update comments --- src/coreclr/jit/compiler.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 51cb6cd880578e..e6366fb1559689 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1981,13 +1981,13 @@ void Compiler::compSetProcessor() opts.compSupportsISAExactly.Reset(); // The VM will set the ISA flags depending on actual hardware support and any - // config values specified by the user. Config may have caused the VM to exclude - // baseline ISAs from the supported set. We force their inclusion here so that - // JIT code can use them unconditionally, but we will honor the config when - // resolving managed HWIntrinsic methods. + // config values specified by the user. Config may cause the VM to omit baseline + // ISAs from the supported set. We force their inclusion here so that JIT code + // can use them unconditionally, but we will honor the config when resolving + // managed HWIntrinsic methods. // // We also take care of adding the virtual vector ISAs (i.e. Vector64/128/256/512) - // here, based on a combination of hardware ISA support and config values. + // here, based on the combination of hardware ISA support and config values. #if defined(TARGET_XARCH) // If the VM passed in a virtual vector ISA, it was done to communicate PreferredVectorBitWidth. @@ -2019,12 +2019,15 @@ void Compiler::compSetProcessor() !instructionSetFlags.HasInstructionSet(InstructionSet_Vector256) && !instructionSetFlags.HasInstructionSet(InstructionSet_Vector512)); - instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); + // Ensure required baseline ISAs are supported in JIT code, even if not passed in by the VM. instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); #ifdef TARGET_AMD64 instructionSetFlags.AddInstructionSet(InstructionSet_X86Base_X64); #endif // TARGET_AMD64 + // We can now add the virtual vector ISAs as appropriate. Vector128 is part of the required baseline. + instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX)) { instructionSetFlags.AddInstructionSet(InstructionSet_Vector256); @@ -2035,12 +2038,15 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); } #elif defined(TARGET_ARM64) - instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); - instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); + // Ensure required baseline ISAs are supported in JIT code, even if not passed in by the VM. instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase_Arm64); instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd_Arm64); + + // Add virtual vector ISAs. These are both supported as part of the required baseline. + instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); + instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); #endif // TARGET_ARM64 assert(instructionSetFlags.Equals(EnsureInstructionSetFlagsAreValid(instructionSetFlags)));