From 8ac975e20425617d445aa24d7bfe09dd465b7247 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Wed, 14 May 2025 15:57:02 -0700 Subject: [PATCH 1/3] allow any baseline intrinsics in lowering --- src/coreclr/jit/hwintrinsic.h | 1 + src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 6 +-- src/coreclr/jit/hwintrinsicxarch.cpp | 33 +++++++++++++ src/coreclr/jit/lowerxarch.cpp | 51 +++++++++++---------- src/coreclr/jit/morph.cpp | 39 ++++------------ src/coreclr/jit/rationalize.cpp | 4 ++ 6 files changed, 78 insertions(+), 56 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index d936d579d8e25a..edcb84ce10144e 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -546,6 +546,7 @@ struct HWIntrinsicInfo static bool isScalarIsa(CORINFO_InstructionSet isa); #ifdef TARGET_XARCH + static bool isBaselineIsa(CORINFO_InstructionSet isa); static bool isAVX2GatherIntrinsic(NamedIntrinsic id); static FloatComparisonMode lookupFloatComparisonModeForSwappedArgs(FloatComparisonMode comparison); static NamedIntrinsic lookupIdForFloatComparisonMode(NamedIntrinsic intrinsic, diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index a4f22c16ec2c6f..b2b825abc5a261 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -403,8 +403,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* embMaskNode = nullptr; GenTree* embMaskOp = nullptr; - // We need to validate that other phases of the compiler haven't introduced unsupported intrinsics - assert(compiler->compIsaSupportedDebugOnly(isa)); + // We need to validate that other phases of the compiler haven't introduced unsupported intrinsics. + // We allow an exception for baseline intrinsics to be introduced unconditionally in LIR. + assert(compiler->compIsaSupportedDebugOnly(isa) || HWIntrinsicInfo::isBaselineIsa(isa)); assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId)); assert(!HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) || !varTypeIsSmall(node->GetSimdBaseType())); @@ -1827,7 +1828,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE)); assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE)); GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr; diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 106a0b952fe139..6c46fe2dd07854 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -855,6 +855,39 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } } +//------------------------------------------------------------------------ +// isBaselineIsa: Gets a value that indicates whether the InstructionSet is +// part of the required hardware support for this platform +// +// Arguments: +// isa - The InstructionSet to check +// +// Return Value: +// true if isa is part of the baseline; otherwise, false +bool HWIntrinsicInfo::isBaselineIsa(CORINFO_InstructionSet isa) +{ + switch (isa) + { + case InstructionSet_X86Base: + case InstructionSet_SSE: + case InstructionSet_SSE2: +#ifdef TARGET_AMD64 + case InstructionSet_X86Base_X64: + case InstructionSet_SSE_X64: + case InstructionSet_SSE2_X64: +#endif // TARGET_AMD64 + case InstructionSet_Vector128: + { + return true; + } + + default: + { + return false; + } + } +} + //------------------------------------------------------------------------ // isFullyImplementedIsa: Gets a value that indicates whether the InstructionSet is fully implemented // diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 868718636d3454..7da5a199035839 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -868,6 +868,11 @@ void Lowering::LowerCast(GenTree* tree) { // If we don't have AVX10v2 saturating conversion instructions for // floating->integral, we have to handle the saturation logic here. + // + // Since this implements ordinary casts, we bend the normal rules around ISA support + // for HWIntrinsics and assume the baseline ISA set (SSE2 and below) is available. + // For this reason, we eschew most gentree convenience methods (e.g. gtNewSimdBinOpNode) + // and create the HWIntrinsic nodes explicitly, as most helpers assert ISA support. JITDUMP("LowerCast before:\n"); DISPTREERANGE(BlockRange(), tree); @@ -904,8 +909,8 @@ void Lowering::LowerCast(GenTree* tree) GenTree* zero = comp->gtNewZeroConNode(TYP_SIMD16); GenTree* fixupVal = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcVector, zero, maxScalarIntrinsic, srcBaseType, 16); - - GenTree* toScalar = comp->gtNewSimdToScalarNode(srcType, fixupVal, srcBaseType, 16); + GenTree* toScalar = + comp->gtNewSimdHWIntrinsicNode(srcType, fixupVal, NI_Vector128_ToScalar, srcBaseType, 16); castRange.InsertAtEnd(zero); castRange.InsertAtEnd(fixupVal); @@ -915,9 +920,6 @@ void Lowering::LowerCast(GenTree* tree) } else { - assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); - assert(!TargetArchitecture::Is64Bit || comp->compIsaSupportedDebugOnly(InstructionSet_SSE2_X64)); - // We need to fix up NaN as well as handle possible overflow. Signed conversions // return int/long.MinValue for any overflow, which is correct for saturation of // negative, but the result must be replaced with MaxValue for positive overflow. @@ -953,16 +955,14 @@ void Lowering::LowerCast(GenTree* tree) if (srcType == TYP_FLOAT) { maxFloatSimdVal->f32[0] = 4294967296.0f; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE_X64) - ? NI_SSE_X64_ConvertToInt64WithTruncation - : NI_SSE2_ConvertToVector128Int32WithTruncation; + convertIntrinsic = TargetArchitecture::Is64Bit ? NI_SSE_X64_ConvertToInt64WithTruncation + : NI_SSE2_ConvertToVector128Int32WithTruncation; } else { maxFloatSimdVal->f64[0] = 4294967296.0; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE2_X64) - ? NI_SSE2_X64_ConvertToInt64WithTruncation - : NI_SSE2_ConvertToVector128Int32WithTruncation; + convertIntrinsic = TargetArchitecture::Is64Bit ? NI_SSE2_X64_ConvertToInt64WithTruncation + : NI_SSE2_ConvertToVector128Int32WithTruncation; } break; } @@ -1023,6 +1023,7 @@ void Lowering::LowerCast(GenTree* tree) // var fixupVal = Sse.And(srcVec, nanMask); // convertResult = Sse.ConvertToInt32WithTruncation(fixupVal); + NamedIntrinsic andIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_And : NI_SSE2_And; NamedIntrinsic compareNaNIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_CompareScalarOrdered : NI_SSE2_CompareScalarOrdered; @@ -1033,8 +1034,9 @@ void Lowering::LowerCast(GenTree* tree) castRange.InsertAtEnd(srcClone); castRange.InsertAtEnd(nanMask); - srcClone = comp->gtClone(srcVector); - GenTree* fixupVal = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, nanMask, srcClone, srcBaseType, 16); + srcClone = comp->gtClone(srcVector); + GenTree* fixupVal = + comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, nanMask, srcClone, andIntrinsic, srcBaseType, 16); castRange.InsertAtEnd(srcClone); castRange.InsertAtEnd(fixupVal); @@ -1120,15 +1122,16 @@ void Lowering::LowerCast(GenTree* tree) // This creates the equivalent of the following C# code: // floorVal = ((srcVector.AsUInt64() >>> 21) << 21).AsDouble(); - GenTree* twentyOne = comp->gtNewIconNode(21); - GenTree* rightShift = comp->gtNewSimdBinOpNode(GT_RSZ, TYP_SIMD16, floorVal, twentyOne, - CORINFO_TYPE_ULONG, 16); + GenTree* twentyOne = comp->gtNewIconNode(21); + GenTree* rightShift = + comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, floorVal, twentyOne, + NI_SSE2_ShiftRightLogical, CORINFO_TYPE_ULONG, 16); castRange.InsertAtEnd(twentyOne); castRange.InsertAtEnd(rightShift); twentyOne = comp->gtClone(twentyOne); - floorVal = comp->gtNewSimdBinOpNode(GT_LSH, TYP_SIMD16, rightShift, twentyOne, - CORINFO_TYPE_ULONG, 16); + floorVal = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, rightShift, twentyOne, + NI_SSE2_ShiftLeftLogical, CORINFO_TYPE_ULONG, 16); castRange.InsertAtEnd(twentyOne); castRange.InsertAtEnd(floorVal); } @@ -1191,21 +1194,23 @@ void Lowering::LowerCast(GenTree* tree) GenTree* thirtyOne = comp->gtNewIconNode(31); GenTree* mask = - comp->gtNewSimdBinOpNode(GT_RSH, TYP_SIMD16, result, thirtyOne, CORINFO_TYPE_INT, 16); + comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, result, thirtyOne, + NI_SSE2_ShiftRightArithmetic, CORINFO_TYPE_INT, 16); GenTree* andMask = - comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, mask, negated, dstBaseType, 16); + comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, mask, negated, NI_SSE2_And, dstBaseType, 16); castRange.InsertAtEnd(thirtyOne); castRange.InsertAtEnd(mask); castRange.InsertAtEnd(andMask); - convertResult = - comp->gtNewSimdBinOpNode(GT_OR, TYP_SIMD16, andMask, resultClone, dstBaseType, 16); + convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, andMask, resultClone, NI_SSE2_Or, + dstBaseType, 16); } // Because the results are in a SIMD register, we need to ToScalar() them out. castRange.InsertAtEnd(convertResult); - convertResult = comp->gtNewSimdToScalarNode(TYP_INT, convertResult, dstBaseType, 16); + convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_INT, convertResult, NI_Vector128_ToScalar, + dstBaseType, 16); } else { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 74a4e34d34e5f8..16e9605e964d0f 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -294,20 +294,14 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) { if (srcType == TYP_FLOAT -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // Arm64: src = float, dst is overflow conversion. +#if defined(TARGET_64BIT) + // 64-bit: src = float, dst is overflow conversion. // This goes through helper and hence src needs to be converted to double. && tree->gtOverflow() -#elif defined(TARGET_AMD64) - // Amd64: src = float, dst = overflow conversion or SSE2 is not enabled - && (tree->gtOverflow() || !IsBaselineSimdIsaSupported()) -#elif defined(TARGET_ARM) - // Arm: src = float, dst = int64/uint64 or overflow conversion. - && (tree->gtOverflow() || varTypeIsLong(dstType)) #else - // x86: src = float, dst = int64/uint64 or overflow conversion or SSE2 is not enabled - && (tree->gtOverflow() || varTypeIsLong(dstType) || !IsBaselineSimdIsaSupported()) -#endif + // 32-bit: src = float, dst = int64/uint64 or overflow conversion. + && (tree->gtOverflow() || varTypeIsLong(dstType)) +#endif // TARGET_64BIT ) { oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); @@ -328,39 +322,24 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) { if (!tree->gtOverflow()) { -// ARM64 and LoongArch64 optimize all non-overflow checking conversions -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#ifdef TARGET_64BIT return nullptr; #else -#if defined(TARGET_XARCH) - if (IsBaselineSimdIsaSupported() && (!varTypeIsLong(dstType) || TargetArchitecture::Is64Bit)) + if (!varTypeIsLong(dstType)) { return nullptr; } -#endif // TARGET_XARCH + switch (dstType) { - case TYP_INT: -#ifdef TARGET_XARCH - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); -#endif // TARGET_XARCH - return nullptr; - - case TYP_UINT: -#if defined(TARGET_ARM) - return nullptr; -#endif - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); - case TYP_LONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); - case TYP_ULONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); default: unreached(); } -#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 +#endif // TARGET_64BIT } else { diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 1c7d31f9eb400d..3df50d5e570f30 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -826,6 +826,10 @@ Compiler::fgWalkResult Rationalizer::RationalizeVisitor::PreOrderVisit(GenTree** #if defined(FEATURE_HW_INTRINSICS) else if (node->OperIsHWIntrinsic()) { + // All intrinsics introduced in HIR must be explicitly supported. + NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->GetHWIntrinsicId(); + assert(m_compiler->compIsaSupportedDebugOnly(HWIntrinsicInfo::lookupIsa(intrinsicId))); + if (node->AsHWIntrinsic()->IsUserCall()) { m_rationalizer.RewriteHWIntrinsicAsUserCall(use, this->m_ancestors); From c89077ef04802d4881d56f7aa3621dfc65ba59cc Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 15 May 2025 12:59:54 -0700 Subject: [PATCH 2/3] partial revert 8ac975e2 --- src/coreclr/jit/hwintrinsic.h | 1 - src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 5 ++- src/coreclr/jit/hwintrinsicxarch.cpp | 33 ------------------ src/coreclr/jit/lowerxarch.cpp | 38 ++++++++------------- src/coreclr/jit/rationalize.cpp | 4 --- 5 files changed, 16 insertions(+), 65 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index edcb84ce10144e..d936d579d8e25a 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -546,7 +546,6 @@ struct HWIntrinsicInfo static bool isScalarIsa(CORINFO_InstructionSet isa); #ifdef TARGET_XARCH - static bool isBaselineIsa(CORINFO_InstructionSet isa); static bool isAVX2GatherIntrinsic(NamedIntrinsic id); static FloatComparisonMode lookupFloatComparisonModeForSwappedArgs(FloatComparisonMode comparison); static NamedIntrinsic lookupIdForFloatComparisonMode(NamedIntrinsic intrinsic, diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index b2b825abc5a261..24aede54066303 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -403,9 +403,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* embMaskNode = nullptr; GenTree* embMaskOp = nullptr; - // We need to validate that other phases of the compiler haven't introduced unsupported intrinsics. - // We allow an exception for baseline intrinsics to be introduced unconditionally in LIR. - assert(compiler->compIsaSupportedDebugOnly(isa) || HWIntrinsicInfo::isBaselineIsa(isa)); + // We need to validate that other phases of the compiler haven't introduced unsupported intrinsics + assert(compiler->compIsaSupportedDebugOnly(isa)); assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId)); assert(!HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) || !varTypeIsSmall(node->GetSimdBaseType())); diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 6c46fe2dd07854..106a0b952fe139 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -855,39 +855,6 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } } -//------------------------------------------------------------------------ -// isBaselineIsa: Gets a value that indicates whether the InstructionSet is -// part of the required hardware support for this platform -// -// Arguments: -// isa - The InstructionSet to check -// -// Return Value: -// true if isa is part of the baseline; otherwise, false -bool HWIntrinsicInfo::isBaselineIsa(CORINFO_InstructionSet isa) -{ - switch (isa) - { - case InstructionSet_X86Base: - case InstructionSet_SSE: - case InstructionSet_SSE2: -#ifdef TARGET_AMD64 - case InstructionSet_X86Base_X64: - case InstructionSet_SSE_X64: - case InstructionSet_SSE2_X64: -#endif // TARGET_AMD64 - case InstructionSet_Vector128: - { - return true; - } - - default: - { - return false; - } - } -} - //------------------------------------------------------------------------ // isFullyImplementedIsa: Gets a value that indicates whether the InstructionSet is fully implemented // diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 7da5a199035839..15e4750fd8f824 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -868,11 +868,6 @@ void Lowering::LowerCast(GenTree* tree) { // If we don't have AVX10v2 saturating conversion instructions for // floating->integral, we have to handle the saturation logic here. - // - // Since this implements ordinary casts, we bend the normal rules around ISA support - // for HWIntrinsics and assume the baseline ISA set (SSE2 and below) is available. - // For this reason, we eschew most gentree convenience methods (e.g. gtNewSimdBinOpNode) - // and create the HWIntrinsic nodes explicitly, as most helpers assert ISA support. JITDUMP("LowerCast before:\n"); DISPTREERANGE(BlockRange(), tree); @@ -909,8 +904,8 @@ void Lowering::LowerCast(GenTree* tree) GenTree* zero = comp->gtNewZeroConNode(TYP_SIMD16); GenTree* fixupVal = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcVector, zero, maxScalarIntrinsic, srcBaseType, 16); - GenTree* toScalar = - comp->gtNewSimdHWIntrinsicNode(srcType, fixupVal, NI_Vector128_ToScalar, srcBaseType, 16); + + GenTree* toScalar = comp->gtNewSimdToScalarNode(srcType, fixupVal, srcBaseType, 16); castRange.InsertAtEnd(zero); castRange.InsertAtEnd(fixupVal); @@ -1023,7 +1018,6 @@ void Lowering::LowerCast(GenTree* tree) // var fixupVal = Sse.And(srcVec, nanMask); // convertResult = Sse.ConvertToInt32WithTruncation(fixupVal); - NamedIntrinsic andIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_And : NI_SSE2_And; NamedIntrinsic compareNaNIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_CompareScalarOrdered : NI_SSE2_CompareScalarOrdered; @@ -1034,9 +1028,8 @@ void Lowering::LowerCast(GenTree* tree) castRange.InsertAtEnd(srcClone); castRange.InsertAtEnd(nanMask); - srcClone = comp->gtClone(srcVector); - GenTree* fixupVal = - comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, nanMask, srcClone, andIntrinsic, srcBaseType, 16); + srcClone = comp->gtClone(srcVector); + GenTree* fixupVal = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, nanMask, srcClone, srcBaseType, 16); castRange.InsertAtEnd(srcClone); castRange.InsertAtEnd(fixupVal); @@ -1122,16 +1115,15 @@ void Lowering::LowerCast(GenTree* tree) // This creates the equivalent of the following C# code: // floorVal = ((srcVector.AsUInt64() >>> 21) << 21).AsDouble(); - GenTree* twentyOne = comp->gtNewIconNode(21); - GenTree* rightShift = - comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, floorVal, twentyOne, - NI_SSE2_ShiftRightLogical, CORINFO_TYPE_ULONG, 16); + GenTree* twentyOne = comp->gtNewIconNode(21); + GenTree* rightShift = comp->gtNewSimdBinOpNode(GT_RSZ, TYP_SIMD16, floorVal, twentyOne, + CORINFO_TYPE_ULONG, 16); castRange.InsertAtEnd(twentyOne); castRange.InsertAtEnd(rightShift); twentyOne = comp->gtClone(twentyOne); - floorVal = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, rightShift, twentyOne, - NI_SSE2_ShiftLeftLogical, CORINFO_TYPE_ULONG, 16); + floorVal = comp->gtNewSimdBinOpNode(GT_LSH, TYP_SIMD16, rightShift, twentyOne, + CORINFO_TYPE_ULONG, 16); castRange.InsertAtEnd(twentyOne); castRange.InsertAtEnd(floorVal); } @@ -1194,23 +1186,21 @@ void Lowering::LowerCast(GenTree* tree) GenTree* thirtyOne = comp->gtNewIconNode(31); GenTree* mask = - comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, result, thirtyOne, - NI_SSE2_ShiftRightArithmetic, CORINFO_TYPE_INT, 16); + comp->gtNewSimdBinOpNode(GT_RSH, TYP_SIMD16, result, thirtyOne, CORINFO_TYPE_INT, 16); GenTree* andMask = - comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, mask, negated, NI_SSE2_And, dstBaseType, 16); + comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, mask, negated, dstBaseType, 16); castRange.InsertAtEnd(thirtyOne); castRange.InsertAtEnd(mask); castRange.InsertAtEnd(andMask); - convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, andMask, resultClone, NI_SSE2_Or, - dstBaseType, 16); + convertResult = + comp->gtNewSimdBinOpNode(GT_OR, TYP_SIMD16, andMask, resultClone, dstBaseType, 16); } // Because the results are in a SIMD register, we need to ToScalar() them out. castRange.InsertAtEnd(convertResult); - convertResult = comp->gtNewSimdHWIntrinsicNode(TYP_INT, convertResult, NI_Vector128_ToScalar, - dstBaseType, 16); + convertResult = comp->gtNewSimdToScalarNode(TYP_INT, convertResult, dstBaseType, 16); } else { diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 3df50d5e570f30..1c7d31f9eb400d 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -826,10 +826,6 @@ Compiler::fgWalkResult Rationalizer::RationalizeVisitor::PreOrderVisit(GenTree** #if defined(FEATURE_HW_INTRINSICS) else if (node->OperIsHWIntrinsic()) { - // All intrinsics introduced in HIR must be explicitly supported. - NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->GetHWIntrinsicId(); - assert(m_compiler->compIsaSupportedDebugOnly(HWIntrinsicInfo::lookupIsa(intrinsicId))); - if (node->AsHWIntrinsic()->IsUserCall()) { m_rationalizer.RewriteHWIntrinsicAsUserCall(use, this->m_ancestors); From d5a89657f2dce5786249947840497bc90fb99ad2 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 15 May 2025 17:41:45 -0700 Subject: [PATCH 3/3] add baseline ISAs to supported set in rationalize --- src/coreclr/jit/compiler.cpp | 27 +++++++++++++++++++++++++++ src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/decomposelongs.cpp | 2 -- src/coreclr/jit/lowerxarch.cpp | 19 ------------------- src/coreclr/jit/rationalize.cpp | 3 +++ 5 files changed, 31 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d085ef712a40fb..2755328b8271ac 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2093,6 +2093,33 @@ bool Compiler::notifyInstructionSetUsage(CORINFO_InstructionSet isa, bool suppor return info.compCompHnd->notifyInstructionSetUsage(isa, supported); } +void Compiler::setBaselineISAsSupported() +{ +#ifdef FEATURE_HW_INTRINSICS + CORINFO_InstructionSetFlags supportedISAs = opts.compSupportsISA; + +#if defined(TARGET_XARCH) + supportedISAs.AddInstructionSet(InstructionSet_X86Base); + supportedISAs.AddInstructionSet(InstructionSet_SSE); + supportedISAs.AddInstructionSet(InstructionSet_SSE2); +#if defined(TARGET_AMD64) + supportedISAs.AddInstructionSet(InstructionSet_X86Base_X64); + supportedISAs.AddInstructionSet(InstructionSet_SSE_X64); + supportedISAs.AddInstructionSet(InstructionSet_SSE2_X64); +#endif // TARGET_AMD64 +#elif defined(TARGET_ARM64) + supportedISAs.AddInstructionSet(InstructionSet_ArmBase); + supportedISAs.AddInstructionSet(InstructionSet_AdvSimd); + supportedISAs.AddInstructionSet(InstructionSet_ArmBase_Arm64); + supportedISAs.AddInstructionSet(InstructionSet_AdvSimd_Arm64); + supportedISAs.AddInstructionSet(InstructionSet_Vector64); +#endif + supportedISAs.AddInstructionSet(InstructionSet_Vector128); + + opts.setSupportedISAs(supportedISAs); +#endif // FEATURE_HW_INTRINSICS +} + #ifdef PROFILING_SUPPORTED // A Dummy routine to receive Enter/Leave/Tailcall profiler callbacks. // These are used when DOTNET_JitEltHookEnabled=1 diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index de0f6afbdc8fc5..9558e3527dff8c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9661,6 +9661,7 @@ class Compiler #endif // DEBUG bool notifyInstructionSetUsage(CORINFO_InstructionSet isa, bool supported) const; + void setBaselineISAsSupported(); // Answer the question: Is a particular ISA allowed to be used implicitly by optimizations? // The result of this api call will exactly match the target machine diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index ddc55e633292fa..639ec3df69bbed 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -1979,8 +1979,6 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIn } else { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - GenTree* thirtyTwo = m_compiler->gtNewIconNode(32); GenTree* shift = m_compiler->gtNewSimdBinOpNode(GT_RSZ, op1->TypeGet(), simdTmpVar, thirtyTwo, node->GetSimdBaseJitType(), simdSize); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 15e4750fd8f824..ad663148800dbc 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -4443,8 +4443,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4498,8 +4496,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse2.UnpackLow(tmp1, tmp2); // ... - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4530,8 +4526,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // ... // return Sse2.Shuffle(tmp1, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp1, idx); @@ -4579,8 +4573,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = tmp1; // return Sse.Shuffle(tmp1, tmp2, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE)); - node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); @@ -4617,8 +4609,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - // We will be constructing the following parts: // ... // /--* tmp1 simd16 @@ -4748,7 +4738,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_USHORT)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); insIntrinsic = NI_SSE2_Insert; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) @@ -4808,7 +4797,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } assert((simdBaseType != TYP_SHORT) && (simdBaseType != TYP_USHORT)); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); GenTree* op[16]; op[0] = tmp1; @@ -5035,8 +5023,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = Sse.UnpackLow(opP, opQ); // return Sse.MoveLowToHigh(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE)); - GenTree* op[4]; op[0] = tmp1; @@ -5100,8 +5086,6 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = Vector128.CreateScalarUnsafe(op2); // return Sse.UnpackLow(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, simdBaseJitType, 16); LowerNode(tmp2); @@ -5414,7 +5398,6 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_SHORT: case TYP_USHORT: { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); break; } @@ -6230,8 +6213,6 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - switch (simdBaseType) { case TYP_SHORT: diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 1c7d31f9eb400d..d15be0c9699b1b 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -914,6 +914,9 @@ PhaseStatus Rationalizer::DoPhase() block->bbStmtList = nullptr; assert(BlockRange().CheckLIR(comp, true)); + + // Allow unrestricted use of baseline HWIntrinsic ISAs in LIR. + comp->setBaselineISAsSupported(); } comp->compRationalIRForm = true;