diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index b1f0404f5f82d5..1ba4369e342368 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3613,7 +3613,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) // On ARM64, SIMD loads/stores provide 8-byte atomicity guarantees when aligned to 8 bytes. regNumber tmpSimdReg1 = REG_NA; regNumber tmpSimdReg2 = REG_NA; - if ((slots >= 4) && compiler->IsBaselineSimdIsaSupported()) + if (slots >= 4) { tmpSimdReg1 = internalRegisters.Extract(cpObjNode, RBM_ALLFLOAT); tmpSimdReg2 = internalRegisters.Extract(cpObjNode, RBM_ALLFLOAT); @@ -3644,8 +3644,8 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) // Copy at least two slots at a time if (nonGcSlots >= 2) { - // Do 4 slots at a time if SIMD is supported - if ((nonGcSlots >= 4) && compiler->IsBaselineSimdIsaSupported()) + // Do 4 slots at a time with SIMD instructions + if (nonGcSlots >= 4) { // We need SIMD temp regs now tmp1 = tmpSimdReg1; diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 8716d2eb2256dc..be3326e83606f0 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3302,7 +3302,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) // INITBLK zeroes a struct that contains GC pointers and can be observed by // other threads (i.e. when dstAddr is not an address of a local). // For example, this can happen when initializing a struct field of an object. - const bool canUse16BytesSimdMov = !node->IsOnHeapAndContainsReferences() && compiler->IsBaselineSimdIsaSupported(); + const bool canUse16BytesSimdMov = !node->IsOnHeapAndContainsReferences(); const bool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES); if (!src->isContained()) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 473862398440c1..a311028ddd9a76 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1920,11 +1920,15 @@ void Compiler::compSetProcessor() opts.compSupportsISAReported.Reset(); opts.compSupportsISAExactly.Reset(); -// The VM will set the ISA flags depending on actual hardware support -// and any specified config switches specified by the user. The exception -// here is for certain "artificial ISAs" such as Vector64/128/256 where they -// don't actually exist. The JIT is in charge of adding those and ensuring -// the total sum of flags is still valid. + // The VM will set the ISA flags depending on actual hardware support and any + // config values specified by the user. Config may cause the VM to omit baseline + // ISAs from the supported set. We force their inclusion here so that JIT code + // can use them unconditionally, but we will honor the config when resolving + // managed HWIntrinsic methods. + // + // We also take care of adding the virtual vector ISAs (i.e. Vector64/128/256/512) + // here, based on the combination of hardware ISA support and config values. + #if defined(TARGET_XARCH) // If the VM passed in a virtual vector ISA, it was done to communicate PreferredVectorBitWidth. // No check is done for the validity of the value, since it will be clamped to max supported by @@ -1955,10 +1959,14 @@ void Compiler::compSetProcessor() !instructionSetFlags.HasInstructionSet(InstructionSet_Vector256) && !instructionSetFlags.HasInstructionSet(InstructionSet_Vector512)); - if (instructionSetFlags.HasInstructionSet(InstructionSet_X86Base)) - { - instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); - } + // Ensure required baseline ISAs are supported in JIT code, even if not passed in by the VM. + instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); +#ifdef TARGET_AMD64 + instructionSetFlags.AddInstructionSet(InstructionSet_X86Base_X64); +#endif // TARGET_AMD64 + + // We can now add the virtual vector ISAs as appropriate. Vector128 is part of the required baseline. + instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX)) { @@ -1970,11 +1978,15 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); } #elif defined(TARGET_ARM64) - if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd)) - { - instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); - instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); - } + // Ensure required baseline ISAs are supported in JIT code, even if not passed in by the VM. + instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); + instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase_Arm64); + instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); + instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd_Arm64); + + // Add virtual vector ISAs. These are both supported as part of the required baseline. + instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); + instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); #endif // TARGET_ARM64 assert(instructionSetFlags.Equals(EnsureInstructionSetFlagsAreValid(instructionSetFlags))); @@ -5956,11 +5968,8 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, } } - if (JitConfig.EnableHWIntrinsic() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); - instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); - } + instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); + instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); if (JitConfig.EnableArm64Aes() != 0) { @@ -6029,10 +6038,7 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, } } - if (JitConfig.EnableHWIntrinsic() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); - } + instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); if (JitConfig.EnableSSE3() != 0) { @@ -6142,10 +6148,7 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, instructionSetFlags.AddInstructionSet(InstructionSet_APX); } #elif defined(TARGET_RISCV64) - if (JitConfig.EnableHWIntrinsic() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_RiscV64Base); - } + instructionSetFlags.AddInstructionSet(InstructionSet_RiscV64Base); if (JitConfig.EnableRiscV64Zba() != 0) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2e2bf632fa9996..56a9a1eb543c70 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8928,46 +8928,6 @@ class Compiler XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ - bool IsBaselineSimdIsaSupported() - { -#ifdef FEATURE_SIMD -#if defined(TARGET_XARCH) - CORINFO_InstructionSet minimumIsa = InstructionSet_X86Base; -#elif defined(TARGET_ARM64) - CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; -#elif defined(TARGET_LOONGARCH64) - // TODO: supporting SIMD feature for LoongArch64. - assert(!"unimplemented yet on LA"); - CORINFO_InstructionSet minimumIsa = 0; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_LOONGARCH64 - - return compOpportunisticallyDependsOn(minimumIsa); -#else - return false; -#endif - } - -#if defined(DEBUG) - bool IsBaselineSimdIsaSupportedDebugOnly() - { -#ifdef FEATURE_SIMD -#if defined(TARGET_XARCH) - CORINFO_InstructionSet minimumIsa = InstructionSet_X86Base; -#elif defined(TARGET_ARM64) - CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - return compIsaSupportedDebugOnly(minimumIsa); -#else - return false; -#endif // FEATURE_SIMD - } -#endif // DEBUG - bool isIntrinsicType(CORINFO_CLASS_HANDLE clsHnd) { return info.compCompHnd->isIntrinsicType(clsHnd); @@ -9248,29 +9208,12 @@ class Compiler { return YMM_REGSIZE_BYTES; } - else if (compOpportunisticallyDependsOn(InstructionSet_X86Base)) - { - return XMM_REGSIZE_BYTES; - } else { - // TODO: We should be returning 0 here, but there are a number of - // places that don't quite get handled correctly in that scenario - return XMM_REGSIZE_BYTES; } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - return FP_REGSIZE_BYTES; - } - else - { - // TODO: We should be returning 0 here, but there are a number of - // places that don't quite get handled correctly in that scenario - - return FP_REGSIZE_BYTES; - } + return FP_REGSIZE_BYTES; #else assert(!"getMaxVectorByteLength() unimplemented on target arch"); unreached(); @@ -9470,7 +9413,7 @@ class Compiler assert(size > 0); var_types result = TYP_UNDEF; #ifdef FEATURE_SIMD - if (IsBaselineSimdIsaSupported() && (roundDownSIMDSize(size) > 0)) + if (roundDownSIMDSize(size) > 0) { return getSIMDTypeForSize(roundDownSIMDSize(size)); } diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index 46280dfbe33868..afe6625c10dc12 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -1970,8 +1970,6 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIn } else { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - GenTree* thirtyTwo = m_compiler->gtNewIconNode(32); GenTree* shift = m_compiler->gtNewSimdBinOpNode(GT_RSZ, op1->TypeGet(), simdTmpVar, thirtyTwo, node->GetSimdBaseJitType(), simdSize); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d806f84ad16c9c..2c4aafcce230f1 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20705,8 +20705,6 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types ty GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -20811,8 +20809,6 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si GenTree* Compiler::gtNewSimdBinOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -21445,8 +21441,6 @@ GenTree* Compiler::gtNewSimdBinOpNode( GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -21540,8 +21534,6 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, assert(varTypeIsFloating(simdSourceBaseType)); assert(varTypeIsIntegral(simdTargetBaseType)); - assert(IsBaselineSimdIsaSupportedDebugOnly()); - #if defined(TARGET_XARCH) assert(compIsaSupportedDebugOnly(InstructionSet_AVX512) || ((simdTargetBaseType == TYP_INT) && ((simdSize == 16 && compIsaSupportedDebugOnly(InstructionSet_SSE41)) || @@ -21680,8 +21672,6 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, assert(varTypeIsFloating(simdSourceBaseType)); assert(varTypeIsIntegral(simdTargetBaseType)); - assert(IsBaselineSimdIsaSupportedDebugOnly()); - // Generate intrinsic needed for conversion NamedIntrinsic hwIntrinsicID = NI_Illegal; @@ -21926,8 +21916,6 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, GenTree* Compiler::gtNewSimdCmpOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -22223,7 +22211,6 @@ GenTree* Compiler::gtNewSimdCmpOpNode( GenTree* Compiler::gtNewSimdCmpOpAllNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(type == TYP_INT); var_types simdType = getSIMDTypeForSize(simdSize); @@ -22362,7 +22349,6 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( GenTree* Compiler::gtNewSimdCmpOpAnyNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(type == TYP_INT); var_types simdType = getSIMDTypeForSize(simdSize); @@ -22497,8 +22483,6 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( GenTree* Compiler::gtNewSimdCndSelNode( var_types type, GenTree* op1, GenTree* op2, GenTree* op3, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23097,8 +23081,6 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( GenTree* Compiler::gtNewSimdDotProdNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - var_types simdType = getSIMDTypeForSize(simdSize); assert(varTypeIsSIMD(simdType)); @@ -23141,8 +23123,6 @@ GenTree* Compiler::gtNewSimdDotProdNode( GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23219,8 +23199,6 @@ GenTree* Compiler::gtNewSimdFmaNode( intrinsic = NI_FMA_MultiplyAdd; } #elif defined(TARGET_ARM64) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - if (simdBaseType == TYP_DOUBLE) { intrinsic = (simdSize == 8) ? NI_AdvSimd_FusedMultiplyAddScalar : NI_AdvSimd_Arm64_FusedMultiplyAdd; @@ -23275,7 +23253,7 @@ GenTree* Compiler::gtNewSimdGetElementNode( case TYP_SHORT: case TYP_USHORT: { - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); + // Supported by baseline ISA requirement break; } @@ -23490,8 +23468,6 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23519,8 +23495,6 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, // GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23572,8 +23546,6 @@ GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, CorInfoTy // GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23605,8 +23577,6 @@ GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, CorInfo // GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23647,8 +23617,6 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, CorInfoT // GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23680,8 +23648,6 @@ GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, CorInfoType // GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23724,8 +23690,6 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23774,8 +23738,6 @@ GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, // GenTree* Compiler::gtNewSimdIsNormalNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23839,8 +23801,6 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23868,8 +23828,6 @@ GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, // GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23912,8 +23870,6 @@ GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -23965,8 +23921,6 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24024,8 +23978,6 @@ GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, // GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24081,8 +24033,6 @@ GenTree* Compiler::gtNewSimdLoadAlignedNode(var_types type, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24140,8 +24090,6 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24217,8 +24165,6 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, GenTree* Compiler::gtNewSimdMaxNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24300,8 +24246,6 @@ GenTree* Compiler::gtNewSimdMaxNode( GenTree* Compiler::gtNewSimdMaxNativeNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24494,8 +24438,6 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( GenTree* Compiler::gtNewSimdMinNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24577,8 +24519,6 @@ GenTree* Compiler::gtNewSimdMinNode( GenTree* Compiler::gtNewSimdMinNativeNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24767,8 +24707,6 @@ GenTree* Compiler::gtNewSimdMinNativeNode( GenTree* Compiler::gtNewSimdNarrowNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25282,8 +25220,6 @@ GenTree* Compiler::gtNewSimdNarrowNode( // GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25348,8 +25284,6 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType GenTree* Compiler::gtNewSimdShuffleVariableNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25965,8 +25899,6 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( GenTree* Compiler::gtNewSimdShuffleNode( var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isShuffleNative) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -26553,8 +26485,6 @@ GenTree* Compiler::gtNewSimdShuffleNode( GenTree* Compiler::gtNewSimdSqrtNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -26639,8 +26569,6 @@ GenTree* Compiler::gtNewSimdStoreNode(GenTree* op1, GenTree* op2, CorInfoType si GenTree* Compiler::gtNewSimdStoreAlignedNode(GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(op1 != nullptr); assert(op2 != nullptr); @@ -26698,8 +26626,6 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, unsigned simdSize) { #if defined(TARGET_XARCH) - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(op1 != nullptr); assert(op2 != nullptr); @@ -26741,8 +26667,6 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - var_types simdType = getSIMDTypeForSize(simdSize); assert(varTypeIsSIMD(simdType)); @@ -26811,7 +26735,6 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { if (simdBaseType == TYP_FLOAT) { - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); if (compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -26829,7 +26752,6 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si } else { - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); // The shuffle below gives us [0, 1, 2, 3] -> [1, 0, 3, 2] op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op1Shuffled, gtNewIconNode((int)0b10110001, TYP_INT), NI_X86Base_Shuffle, simdBaseJitType, simdSize); @@ -26849,7 +26771,6 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si } else { - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); if (compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -27027,7 +26948,6 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type, // GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); assert(varTypeIsArithmetic(type)); assert(op1 != nullptr); @@ -27084,8 +27004,6 @@ GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoTy // GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27134,8 +27052,6 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType GenTree* Compiler::gtNewSimdUnOpNode( genTreeOps op, var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27231,8 +27147,6 @@ GenTree* Compiler::gtNewSimdUnOpNode( GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27445,8 +27359,6 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineSimdIsaSupportedDebugOnly()); - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27702,7 +27614,7 @@ GenTree* Compiler::gtNewSimdWithElementNode( case TYP_FLOAT: case TYP_SHORT: case TYP_USHORT: - assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); + // Supported by baseline ISA requirement break; default: @@ -29313,7 +29225,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( #endif // TARGET_ARM64 assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } assert(op1 != nullptr); @@ -29418,7 +29329,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, #endif // TARGET_ARM64 assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } NamedIntrinsic id = NI_Illegal; @@ -29628,7 +29538,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftLeftLogical; } } @@ -29646,7 +29555,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftLeftLogical; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) @@ -29723,7 +29631,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsShort(simdBaseType)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_MultiplyLow; } #elif defined(TARGET_ARM64) @@ -29869,7 +29776,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftRightArithmetic; } } @@ -29887,7 +29793,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftRightArithmetic; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) @@ -29937,7 +29842,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftRightLogical; } } @@ -29955,7 +29859,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_ShiftRightLogical; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) @@ -30129,7 +30032,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #endif // TARGET_ARM64 assert(!isScalar || varTypeIsFloating(simdBaseType)); - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); } NamedIntrinsic id = NI_Illegal; @@ -30244,7 +30146,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_CompareGreaterThan; } } @@ -30338,7 +30239,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); id = NI_X86Base_CompareLessThan; } } diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index eae728b2e09685..c69f97afa5caab 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1195,7 +1195,8 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, return NI_Illegal; } - bool isIsaSupported = comp->compSupportsHWIntrinsic(isa); + bool isHWIntrinsicEnabled = (JitConfig.EnableHWIntrinsic() != 0); + bool isIsaSupported = isHWIntrinsicEnabled && comp->compSupportsHWIntrinsic(isa); bool isHardwareAcceleratedProp = false; bool isSupportedProp = false; uint32_t vectorByteLength = 0; @@ -1300,7 +1301,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, if (isa == InstructionSet_Vector128) { - if (!comp->IsBaselineSimdIsaSupported()) + if (!isHWIntrinsicEnabled) { return NI_Illegal; } @@ -1330,7 +1331,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, #elif defined(TARGET_ARM64) else if (isa == InstructionSet_Vector64) { - if (!comp->IsBaselineSimdIsaSupported()) + if (!isHWIntrinsicEnabled) { return NI_Illegal; } diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 33f6d15dc9f843..78bed523d9d0bc 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1804,7 +1804,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_X86Base)); assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE)); GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 2c46b32ba7ab5f..95b14dfd186cc7 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3889,7 +3889,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI #ifdef FEATURE_SIMD // First, let's check whether field is a SIMD vector and import it as GT_CNS_VEC int simdWidth = getSIMDTypeSizeInBytes(fieldClsHnd); - if ((simdWidth > 0) && IsBaselineSimdIsaSupported()) + if (simdWidth > 0) { assert((totalSize <= 64) && (totalSize <= MaxStructSize)); var_types simdType = getSIMDTypeForSize(simdWidth); @@ -3907,7 +3907,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI else #endif // TARGET_XARCH { - // SIMD8, SIMD12, SIMD16 are covered by IsBaselineSimdIsaSupported check + // SIMD8, SIMD12, SIMD16 are covered by baseline ISA requirement assert((simdType == TYP_SIMD8) || (simdType == TYP_SIMD12) || (simdType == TYP_SIMD16)); } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 588679a612fa92..58180e8a9793f9 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -4296,11 +4296,10 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, #ifdef FEATURE_HW_INTRINSICS case NI_System_Math_FusedMultiplyAdd: { + assert(varTypeIsFloating(callType)); #ifdef TARGET_XARCH if (compOpportunisticallyDependsOn(InstructionSet_FMA)) { - assert(varTypeIsFloating(callType)); - // We are constructing a chain of intrinsics similar to: // return FMA.MultiplyAddScalar( // Vector128.CreateScalarUnsafe(x), @@ -4323,39 +4322,34 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, break; } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - assert(varTypeIsFloating(callType)); + // We are constructing a chain of intrinsics similar to: + // return AdvSimd.FusedMultiplyAddScalar( + // Vector64.Create{ScalarUnsafe}(z), + // Vector64.Create{ScalarUnsafe}(y), + // Vector64.Create{ScalarUnsafe}(x) + // ).ToScalar(); - // We are constructing a chain of intrinsics similar to: - // return AdvSimd.FusedMultiplyAddScalar( - // Vector64.Create{ScalarUnsafe}(z), - // Vector64.Create{ScalarUnsafe}(y), - // Vector64.Create{ScalarUnsafe}(x) - // ).ToScalar(); + impSpillSideEffect(true, stackState.esStackDepth - + 3 DEBUGARG("Spilling op1 side effects for FusedMultiplyAdd")); - impSpillSideEffect(true, stackState.esStackDepth - - 3 DEBUGARG("Spilling op1 side effects for FusedMultiplyAdd")); + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op2 side effects for FusedMultiplyAdd")); - impSpillSideEffect(true, stackState.esStackDepth - - 2 DEBUGARG("Spilling op2 side effects for FusedMultiplyAdd")); + GenTree* op3 = impImplicitR4orR8Cast(impPopStack().val, callType); + GenTree* op2 = impImplicitR4orR8Cast(impPopStack().val, callType); + GenTree* op1 = impImplicitR4orR8Cast(impPopStack().val, callType); - GenTree* op3 = impImplicitR4orR8Cast(impPopStack().val, callType); - GenTree* op2 = impImplicitR4orR8Cast(impPopStack().val, callType); - GenTree* op1 = impImplicitR4orR8Cast(impPopStack().val, callType); + op3 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op3, callJitType, 8); + op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op2, callJitType, 8); + op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op1, callJitType, 8); - op3 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op3, callJitType, 8); - op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op2, callJitType, 8); - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD8, op1, callJitType, 8); + // Note that AdvSimd.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 + op2 * op3 + // while Math{F}.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 * op2 + op3 + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op3, op2, op1, NI_AdvSimd_FusedMultiplyAddScalar, + callJitType, 8); - // Note that AdvSimd.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 + op2 * op3 - // while Math{F}.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 * op2 + op3 - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op3, op2, op1, NI_AdvSimd_FusedMultiplyAddScalar, - callJitType, 8); - - retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 8); - break; - } + retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 8); + break; #endif // TODO-CQ-XArch: Ideally we would create a GT_INTRINSIC node for fma, however, that currently @@ -5569,7 +5563,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) - if ((intrinsic == NI_PRIMITIVE_ConvertToIntegerNative) && IsBaselineSimdIsaSupported()) + if (intrinsic == NI_PRIMITIVE_ConvertToIntegerNative) { NamedIntrinsic hwIntrinsicId = NI_Illegal; @@ -5781,7 +5775,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, hwintrinsic = varTypeIsLong(baseType) ? NI_LZCNT_X64_LeadingZeroCount : NI_LZCNT_LeadingZeroCount; result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); } - else if (compOpportunisticallyDependsOn(InstructionSet_X86Base)) + else { // Pop the value from the stack impPopStack(); @@ -5822,15 +5816,12 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, result = gtNewLclvNode(tmp, baseType); } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_ArmBase)) - { - // Pop the value from the stack - impPopStack(); + // Pop the value from the stack + impPopStack(); - hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; - result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); - baseType = TYP_INT; - } + hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; + result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); + baseType = TYP_INT; #endif // TARGET_* #endif // FEATURE_HW_INTRINSICS @@ -5965,10 +5956,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - // TODO-ARM64-CQ: PopCount should be handled as an intrinsic for non-constant cases - } + // TODO-ARM64-CQ: PopCount should be handled as an intrinsic for non-constant cases #endif // TARGET_* #endif // FEATURE_HW_INTRINSICS @@ -6128,7 +6116,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, hwintrinsic = varTypeIsLong(baseType) ? NI_BMI1_X64_TrailingZeroCount : NI_BMI1_TrailingZeroCount; result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); } - else if (compOpportunisticallyDependsOn(InstructionSet_X86Base)) + else { // Pop the value from the stack impPopStack(); @@ -6167,19 +6155,15 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, result = gtNewLclvNode(tmp, baseType); } #elif defined(TARGET_ARM64) - if (compOpportunisticallyDependsOn(InstructionSet_ArmBase)) - { - // Pop the value from the stack - impPopStack(); + // Pop the value from the stack + impPopStack(); - hwintrinsic = - varTypeIsLong(baseType) ? NI_ArmBase_Arm64_ReverseElementBits : NI_ArmBase_ReverseElementBits; - op1 = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); + hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_ReverseElementBits : NI_ArmBase_ReverseElementBits; + op1 = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic); - hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; - result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); - baseType = TYP_INT; - } + hwintrinsic = varTypeIsLong(baseType) ? NI_ArmBase_Arm64_LeadingZeroCount : NI_ArmBase_LeadingZeroCount; + result = gtNewScalarHWIntrinsicNode(TYP_INT, op1, hwintrinsic); + baseType = TYP_INT; #endif // TARGET_* #endif // FEATURE_HW_INTRINSICS @@ -7948,6 +7932,7 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Abs: case NI_System_Math_Ceiling: case NI_System_Math_Floor: + case NI_System_Math_FusedMultiplyAdd: case NI_System_Math_Max: case NI_System_Math_Min: case NI_System_Math_MultiplyAddEstimate: @@ -7958,9 +7943,6 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Truncate: return true; - case NI_System_Math_FusedMultiplyAdd: - return compOpportunisticallyDependsOn(InstructionSet_AdvSimd); - default: return false; } @@ -9914,7 +9896,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, } #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - if (!isMagnitude && compOpportunisticallyDependsOn(InstructionSet_X86Base)) + if (!isMagnitude) { bool needsFixup = false; bool canHandle = false; diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index a82f2186101f18..7366dfe9bce1fd 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1943,14 +1943,14 @@ class LocalAddressVisitor final : public GenTreeVisitor if (indir->TypeIs(TYP_FLOAT)) { - if (((offset % genTypeSize(TYP_FLOAT)) == 0) && m_compiler->IsBaselineSimdIsaSupported()) + if ((offset % genTypeSize(TYP_FLOAT)) == 0) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } } else if (indir->TypeIs(TYP_SIMD12)) { - if ((offset == 0) && varDsc->TypeIs(TYP_SIMD16) && m_compiler->IsBaselineSimdIsaSupported()) + if ((offset == 0) && varDsc->TypeIs(TYP_SIMD16)) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } @@ -1958,7 +1958,7 @@ class LocalAddressVisitor final : public GenTreeVisitor #ifdef TARGET_ARM64 else if (indir->TypeIs(TYP_SIMD8)) { - if (varDsc->TypeIs(TYP_SIMD16) && ((offset % 8) == 0) && m_compiler->IsBaselineSimdIsaSupported()) + if (varDsc->TypeIs(TYP_SIMD16) && ((offset % 8) == 0)) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 292c1dfe844162..b30230b64adc18 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2648,9 +2648,10 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) GenTree* lArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); GenTree* rArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); - ssize_t MaxUnrollSize = comp->IsBaselineSimdIsaSupported() ? 32 : 16; + ssize_t MaxUnrollSize = 16; -#if defined(FEATURE_SIMD) && defined(TARGET_XARCH) +#ifdef FEATURE_SIMD +#ifdef TARGET_XARCH if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { MaxUnrollSize = 128; @@ -2660,7 +2661,12 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) // We need AVX2 for NI_Vector256_op_Equality, fallback to Vector128 if only AVX is available MaxUnrollSize = 64; } -#endif + else +#endif // TARGET_XARCH + { + MaxUnrollSize = 32; + } +#endif // FEATURE_SIMD if (cnsSize <= MaxUnrollSize) { @@ -10273,29 +10279,25 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind) #if defined(FEATURE_HW_INTRINSICS) case TYP_LONG: case TYP_REF: - if (comp->IsBaselineSimdIsaSupported()) + // TLDR: we should be here only if one of the conditions is true: + // 1) Both GT_INDs have GTF_IND_ALLOW_NON_ATOMIC flag + // 2) ARM64: Data is at least 8-byte aligned + // 3) AMD64: Data is at least 16-byte aligned on AMD/Intel with AVX+ + // + newType = TYP_SIMD16; + if ((oldType == TYP_REF) && + (!currData.value->IsIntegralConst(0) || !prevData.value->IsIntegralConst(0))) { - // TLDR: we should be here only if one of the conditions is true: - // 1) Both GT_INDs have GTF_IND_ALLOW_NON_ATOMIC flag - // 2) ARM64: Data is at least 8-byte aligned - // 3) AMD64: Data is at least 16-byte aligned on AMD/Intel with AVX+ + // For TYP_REF we only support null values. In theory, we can also support frozen handles, e.g.: // - newType = TYP_SIMD16; - if ((oldType == TYP_REF) && - (!currData.value->IsIntegralConst(0) || !prevData.value->IsIntegralConst(0))) - { - // For TYP_REF we only support null values. In theory, we can also support frozen handles, e.g.: - // - // arr[1] = "hello"; - // arr[0] = "world"; - // - // but we don't want to load managed references into SIMD registers (we can only do so - // when we can issue a nongc region for a block) - return; - } - break; + // arr[1] = "hello"; + // arr[0] = "world"; + // + // but we don't want to load managed references into SIMD registers (we can only do so + // when we can issue a nongc region for a block) + return; } - return; + break; #if defined(TARGET_AMD64) case TYP_SIMD16: diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 8b2b1114cc2cdc..222fe046963cf1 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3503,9 +3503,6 @@ bool Lowering::TryLowerAddSubToMulLongOp(GenTreeOp* op, GenTree** next) if (!comp->opts.OptimizationEnabled()) return false; - if (!comp->compOpportunisticallyDependsOn(InstructionSet_ArmBase_Arm64)) - return false; - if (op->isContained()) return false; @@ -3609,9 +3606,6 @@ bool Lowering::TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next) if (!comp->opts.OptimizationEnabled()) return false; - if (!comp->compOpportunisticallyDependsOn(InstructionSet_ArmBase_Arm64)) - return false; - if (op->isContained()) return false; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 0ff71753e89899..5dde5c6ccaedc0 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -407,7 +407,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ssize_t fill = src->AsIntCon()->IconValue() & 0xFF; - const bool canUseSimd = !blkNode->IsOnHeapAndContainsReferences() && comp->IsBaselineSimdIsaSupported(); + const bool canUseSimd = !blkNode->IsOnHeapAndContainsReferences(); if (size > comp->getUnrollThreshold(Compiler::UnrollKind::Memset, canUseSimd)) { // It turns out we can't use SIMD so the default threshold is too big @@ -903,9 +903,6 @@ void Lowering::LowerCast(GenTree* tree) } else { - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); - assert(!TargetArchitecture::Is64Bit || comp->compIsaSupportedDebugOnly(InstructionSet_X86Base_X64)); - // We need to fix up NaN as well as handle possible overflow. Signed conversions // return int/long.MinValue for any overflow, which is correct for saturation of // negative, but the result must be replaced with MaxValue for positive overflow. @@ -941,14 +938,14 @@ void Lowering::LowerCast(GenTree* tree) if (srcType == TYP_FLOAT) { maxFloatSimdVal->f32[0] = 4294967296.0f; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_X86Base_X64) + convertIntrinsic = TargetArchitecture::Is64Bit ? NI_X86Base_X64_ConvertToInt64WithTruncation : NI_X86Base_ConvertToVector128Int32WithTruncation; } else { maxFloatSimdVal->f64[0] = 4294967296.0; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_X86Base_X64) + convertIntrinsic = TargetArchitecture::Is64Bit ? NI_X86Base_X64_ConvertToInt64WithTruncation : NI_X86Base_ConvertToVector128Int32WithTruncation; } @@ -1519,22 +1516,9 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) op2 = userIntrin->Op(1); } - NamedIntrinsic intrinsic = NI_Illegal; - - if (comp->IsBaselineSimdIsaSupported()) - { - intrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2, - simdBaseType, simdSize, false); - } - else - { - // We need to ensure we optimize even if SSE2 is disabled - - assert(simdBaseType == TYP_FLOAT); - assert(simdSize <= 16); - - intrinsic = NI_X86Base_AndNot; - } + NamedIntrinsic intrinsic = + GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2, simdBaseType, + simdSize, false); userIntrin->ResetHWIntrinsicId(intrinsic, comp, op1, op2); @@ -4277,8 +4261,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4332,8 +4314,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse2.UnpackLow(tmp1, tmp2); // ... - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4364,8 +4344,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // ... // return Sse2.Shuffle(tmp1, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp1, idx); @@ -4413,8 +4391,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = tmp1; // return Sse.Shuffle(tmp1, tmp2, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4451,8 +4427,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4582,7 +4556,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_USHORT)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); insIntrinsic = NI_X86Base_Insert; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) @@ -4642,7 +4615,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } assert((simdBaseType != TYP_SHORT) && (simdBaseType != TYP_USHORT)); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op[16]; op[0] = tmp1; @@ -4869,8 +4841,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = Sse.UnpackLow(opP, opQ); // return Sse.MoveLowToHigh(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - GenTree* op[4]; op[0] = tmp1; @@ -4936,8 +4906,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = Vector128.CreateScalarUnsafe(op2); // return Sse.UnpackLow(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, simdBaseJitType, 16); LowerNode(tmp2); @@ -5229,37 +5197,6 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) } } - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: -#if defined(TARGET_AMD64) - case TYP_LONG: - case TYP_ULONG: -#endif // TARGET_AMD64 - { - // Using software fallback if simdBaseType is not supported by hardware - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); - break; - } - - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - break; - } - - default: - { - unreached(); - } - } - // Remove the index node up front to simplify downstream logic BlockRange().Remove(op2); @@ -5382,6 +5319,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_LONG: case TYP_ULONG: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41_X64)); resIntrinsic = NI_SSE41_X64_Extract; break; } @@ -5399,6 +5337,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_INT: case TYP_UINT: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); resIntrinsic = NI_SSE41_Extract; break; } @@ -5498,32 +5437,6 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) assert((0 <= imm8) && (imm8 < count)); - switch (simdBaseType) - { - // Using software fallback if simdBaseType is not supported by hardware - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); - break; - - case TYP_LONG: - case TYP_ULONG: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41_X64)); - break; - - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - break; - - default: - unreached(); - } - // Remove the index node up front to simplify downstream logic BlockRange().Remove(op2); @@ -5703,6 +5616,8 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_LONG: case TYP_ULONG: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41_X64)); + idx = comp->gtNewIconNode(imm8); BlockRange().InsertBefore(result, idx); result->ChangeHWIntrinsicId(NI_SSE41_X64_Insert, op1, op3, idx); @@ -5849,6 +5764,8 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_INT: case TYP_UINT: { + assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE41)); + idx = comp->gtNewIconNode(imm8); BlockRange().InsertBefore(result, idx); result->ChangeHWIntrinsicId(NI_SSE41_Insert, op1, op3, idx); @@ -6053,8 +5970,6 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); - switch (simdBaseType) { case TYP_SHORT: diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index c076c5d6061d11..0c06c07f8e325f 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -485,7 +485,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); } - if (size >= 4 * REGSIZE_BYTES && compiler->IsBaselineSimdIsaSupported()) + if (size >= 4 * REGSIZE_BYTES) { // We can use 128-bit SIMD ldp/stp for larger block sizes buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 344585d192eb6a..1e9763c6c5fe4f 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1415,7 +1415,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { case GenTreeBlk::BlkOpKindUnroll: { - bool willUseSimdMov = compiler->IsBaselineSimdIsaSupported() && (size >= XMM_REGSIZE_BYTES); + bool willUseSimdMov = (size >= XMM_REGSIZE_BYTES); if (willUseSimdMov && blkNode->IsOnHeapAndContainsReferences()) { ClassLayout* layout = blkNode->GetLayout(); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index fc9dd838c24161..a0425a44b6ea3c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -294,20 +294,14 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) { if (srcType == TYP_FLOAT -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // Arm64: src = float, dst is overflow conversion. +#ifdef TARGET_64BIT + // 64-bit: src = float, dst is overflow conversion. // This goes through helper and hence src needs to be converted to double. && tree->gtOverflow() -#elif defined(TARGET_AMD64) - // Amd64: src = float, dst = overflow conversion or SSE2 is not enabled - && (tree->gtOverflow() || !IsBaselineSimdIsaSupported()) -#elif defined(TARGET_ARM) - // Arm: src = float, dst = int64/uint64 or overflow conversion. - && (tree->gtOverflow() || varTypeIsLong(dstType)) #else - // x86: src = float, dst = int64/uint64 or overflow conversion or SSE2 is not enabled - && (tree->gtOverflow() || varTypeIsLong(dstType) || !IsBaselineSimdIsaSupported()) -#endif + // 32-bit: src = float, dst = int64/uint64 or overflow conversion. + && (tree->gtOverflow() || varTypeIsLong(dstType)) +#endif // TARGET_64BIT ) { oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); @@ -328,39 +322,24 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { if (!tree->gtOverflow()) { -// ARM64 and LoongArch64 optimize all non-overflow checking conversions -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef TARGET_64BIT return nullptr; #else -#if defined(TARGET_XARCH) - if (IsBaselineSimdIsaSupported() && (!varTypeIsLong(dstType) || TargetArchitecture::Is64Bit)) + if (!varTypeIsLong(dstType)) { return nullptr; } -#endif // TARGET_XARCH + switch (dstType) { - case TYP_INT: -#ifdef TARGET_XARCH - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); -#endif // TARGET_XARCH - return nullptr; - - case TYP_UINT: -#if defined(TARGET_ARM) - return nullptr; -#endif - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); - case TYP_LONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); - case TYP_ULONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); default: unreached(); } -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 +#endif // TARGET_64BIT } else {