diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d085ef712a40fb..2755328b8271ac 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2093,6 +2093,33 @@ bool Compiler::notifyInstructionSetUsage(CORINFO_InstructionSet isa, bool suppor return info.compCompHnd->notifyInstructionSetUsage(isa, supported); } +void Compiler::setBaselineISAsSupported() +{ +#ifdef FEATURE_HW_INTRINSICS + CORINFO_InstructionSetFlags supportedISAs = opts.compSupportsISA; + +#if defined(TARGET_XARCH) + supportedISAs.AddInstructionSet(InstructionSet_X86Base); + supportedISAs.AddInstructionSet(InstructionSet_SSE); + supportedISAs.AddInstructionSet(InstructionSet_SSE2); +#if defined(TARGET_AMD64) + supportedISAs.AddInstructionSet(InstructionSet_X86Base_X64); + supportedISAs.AddInstructionSet(InstructionSet_SSE_X64); + supportedISAs.AddInstructionSet(InstructionSet_SSE2_X64); +#endif // TARGET_AMD64 +#elif defined(TARGET_ARM64) + supportedISAs.AddInstructionSet(InstructionSet_ArmBase); + supportedISAs.AddInstructionSet(InstructionSet_AdvSimd); + supportedISAs.AddInstructionSet(InstructionSet_ArmBase_Arm64); + supportedISAs.AddInstructionSet(InstructionSet_AdvSimd_Arm64); + supportedISAs.AddInstructionSet(InstructionSet_Vector64); +#endif + supportedISAs.AddInstructionSet(InstructionSet_Vector128); + + opts.setSupportedISAs(supportedISAs); +#endif // FEATURE_HW_INTRINSICS +} + #ifdef PROFILING_SUPPORTED // A Dummy routine to receive Enter/Leave/Tailcall profiler callbacks. // These are used when DOTNET_JitEltHookEnabled=1 diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index de0f6afbdc8fc5..9558e3527dff8c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9661,6 +9661,7 @@ class Compiler #endif // DEBUG bool notifyInstructionSetUsage(CORINFO_InstructionSet isa, bool supported) const; + void setBaselineISAsSupported(); // Answer the question: Is a particular ISA allowed to be used implicitly by optimizations? // The result of this api call will exactly match the target machine diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index ddc55e633292fa..639ec3df69bbed 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -1979,8 +1979,6 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIn } else { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - GenTree* thirtyTwo = m_compiler->gtNewIconNode(32); GenTree* shift = m_compiler->gtNewSimdBinOpNode(GT_RSZ, op1->TypeGet(), simdTmpVar, thirtyTwo, node->GetSimdBaseJitType(), simdSize); diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index a4f22c16ec2c6f..24aede54066303 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1827,7 +1827,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE)); assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE)); GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 868718636d3454..ad663148800dbc 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -915,9 +915,6 @@ void Lowering::LowerCast(GenTree* tree) } else { - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); - assert(!TargetArchitecture::Is64Bit || comp->compIsaSupportedDebugOnly(InstructionSet_SSE2_X64)); - // We need to fix up NaN as well as handle possible overflow. Signed conversions // return int/long.MinValue for any overflow, which is correct for saturation of // negative, but the result must be replaced with MaxValue for positive overflow. @@ -953,16 +950,14 @@ void Lowering::LowerCast(GenTree* tree) if (srcType == TYP_FLOAT) { maxFloatSimdVal->f32[0] = 4294967296.0f; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE_X64) - ? NI_SSE_X64_ConvertToInt64WithTruncation - : NI_SSE2_ConvertToVector128Int32WithTruncation; + convertIntrinsic = TargetArchitecture::Is64Bit ? NI_SSE_X64_ConvertToInt64WithTruncation + : NI_SSE2_ConvertToVector128Int32WithTruncation; } else { maxFloatSimdVal->f64[0] = 4294967296.0; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE2_X64) - ? NI_SSE2_X64_ConvertToInt64WithTruncation - : NI_SSE2_ConvertToVector128Int32WithTruncation; + convertIntrinsic = TargetArchitecture::Is64Bit ? NI_SSE2_X64_ConvertToInt64WithTruncation + : NI_SSE2_ConvertToVector128Int32WithTruncation; } break; } @@ -4448,8 +4443,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4503,8 +4496,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse2.UnpackLow(tmp1, tmp2); // ... - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4535,8 +4526,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // ... // return Sse2.Shuffle(tmp1, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp1, idx); @@ -4584,8 +4573,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = tmp1; // return Sse.Shuffle(tmp1, tmp2, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4622,8 +4609,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4753,7 +4738,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_USHORT)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); insIntrinsic = NI_SSE2_Insert; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) @@ -4813,7 +4797,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } assert((simdBaseType != TYP_SHORT) && (simdBaseType != TYP_USHORT)); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); GenTree* op[16]; op[0] = tmp1; @@ -5040,8 +5023,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = Sse.UnpackLow(opP, opQ); // return Sse.MoveLowToHigh(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE)); - GenTree* op[4]; op[0] = tmp1; @@ -5105,8 +5086,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = Vector128.CreateScalarUnsafe(op2); // return Sse.UnpackLow(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, simdBaseJitType, 16); LowerNode(tmp2); @@ -5419,7 +5398,6 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_SHORT: case TYP_USHORT: { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); break; } @@ -6235,8 +6213,6 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - switch (simdBaseType) { case TYP_SHORT: diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 74a4e34d34e5f8..16e9605e964d0f 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -294,20 +294,14 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) { if (srcType == TYP_FLOAT -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // Arm64: src = float, dst is overflow conversion. +#if defined(TARGET_64BIT) + // 64-bit: src = float, dst is overflow conversion. // This goes through helper and hence src needs to be converted to double. && tree->gtOverflow() -#elif defined(TARGET_AMD64) - // Amd64: src = float, dst = overflow conversion or SSE2 is not enabled - && (tree->gtOverflow() || !IsBaselineSimdIsaSupported()) -#elif defined(TARGET_ARM) - // Arm: src = float, dst = int64/uint64 or overflow conversion. - && (tree->gtOverflow() || varTypeIsLong(dstType)) #else - // x86: src = float, dst = int64/uint64 or overflow conversion or SSE2 is not enabled - && (tree->gtOverflow() || varTypeIsLong(dstType) || !IsBaselineSimdIsaSupported()) -#endif + // 32-bit: src = float, dst = int64/uint64 or overflow conversion. + && (tree->gtOverflow() || varTypeIsLong(dstType)) +#endif // TARGET_64BIT ) { oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); @@ -328,39 +322,24 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { if (!tree->gtOverflow()) { -// ARM64 and LoongArch64 optimize all non-overflow checking conversions -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef TARGET_64BIT return nullptr; #else -#if defined(TARGET_XARCH) - if (IsBaselineSimdIsaSupported() && (!varTypeIsLong(dstType) || TargetArchitecture::Is64Bit)) + if (!varTypeIsLong(dstType)) { return nullptr; } -#endif // TARGET_XARCH + switch (dstType) { - case TYP_INT: -#ifdef TARGET_XARCH - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); -#endif // TARGET_XARCH - return nullptr; - - case TYP_UINT: -#if defined(TARGET_ARM) - return nullptr; -#endif - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); - case TYP_LONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); - case TYP_ULONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); default: unreached(); } -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 +#endif // TARGET_64BIT } else { diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 1c7d31f9eb400d..d15be0c9699b1b 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -914,6 +914,9 @@ PhaseStatus Rationalizer::DoPhase() block->bbStmtList = nullptr; assert(BlockRange().CheckLIR(comp, true)); + + // Allow unrestricted use of baseline HWIntrinsic ISAs in LIR. + comp->setBaselineISAsSupported(); } comp->compRationalIRForm = true;