diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d7a0086ed9bb90..cf02f3ee4f2c75 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3354,10 +3354,13 @@ class Compiler NamedIntrinsic hwIntrinsicID); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode( var_types type, GenTree* op1, GenTree* op2, GenTree* op3, NamedIntrinsic hwIntrinsicID); - CorInfoType getBaseJitTypeFromArgIfNeeded(NamedIntrinsic intrinsic, + var_types getRetTypeAndBaseJitTypeFromSig(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, - CorInfoType simdBaseJitType); + CorInfoType* simdBaseJitType); + CorInfoType getBaseJitTypeFromArgIfNeeded(NamedIntrinsic intrinsic, + CORINFO_SIG_INFO* sig, + CorInfoType simdBaseJitType); #ifdef TARGET_ARM64 GenTreeFieldList* gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount); @@ -3615,6 +3618,9 @@ class Compiler GenTree* gtFoldExprCall(GenTreeCall* call); GenTree* gtFoldTypeCompare(GenTree* tree); GenTree* gtFoldTypeEqualityCall(bool isEq, GenTree* op1, GenTree* op2); +#if defined(FEATURE_HW_INTRINSICS) + GenTree* gtFoldHWIntrinsicCall(GenTreeCall* call, NamedIntrinsic intrinsic); +#endif // FEATURE_HW_INTRINSICS // Options to control behavior of gtTryRemoveBoxUpstreamEffects enum BoxRemovalOptions @@ -4573,6 +4579,9 @@ class Compiler bool mustExpand); #ifdef FEATURE_HW_INTRINSICS + static bool isSupportedBaseType(NamedIntrinsic intrinsic, CorInfoType baseJitType); + bool IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_types simdBaseType) const; + GenTree* impHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2b9ec27eb8228c..b9fc8c860f8ca7 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -13684,6 +13684,13 @@ GenTree* Compiler::gtFoldExprCall(GenTreeCall* call) // Check for a new-style jit intrinsic. const NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd); +#if defined(FEATURE_HW_INTRINSICS) + if ((ni > NI_HW_INTRINSIC_START) && (ni < NI_SIMD_AS_HWINTRINSIC_END)) + { + return gtFoldHWIntrinsicCall(call, ni); + } +#endif // FEATURE_HW_INTRINSICS + switch (ni) { case NI_System_Enum_HasFlag: @@ -13756,6 +13763,103 @@ GenTree* Compiler::gtFoldTypeEqualityCall(bool isEq, GenTree* op1, GenTree* op2) return compare; } +#if defined(FEATURE_HW_INTRINSICS) +//------------------------------------------------------------------------ +// gtFoldHWIntrinsicCall: Fold a call to a hardware intrinsic API or return the original call +// +// Arguments: +// call -- the call node to attempt to fold +// intrinsic -- the ID of the intrinsic represented by the call +// +// Returns: +// call if no folding happened. +// An alternative tree if folding happens. +// +GenTree* Compiler::gtFoldHWIntrinsicCall(GenTreeCall* call, NamedIntrinsic intrinsic) +{ + assert((intrinsic > NI_HW_INTRINSIC_START) && (intrinsic < NI_SIMD_AS_HWINTRINSIC_END)); + + if (intrinsic > NI_SIMD_AS_HWINTRINSIC_START) + { + // TODO-CQ: Handle SIMD_AS_HWINTRINSIC + return call; + } + + CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; + CORINFO_METHOD_HANDLE method = call->gtCallMethHnd; + + CORINFO_SIG_INFO sig; + eeGetMethodSig(method, &sig); + + int numArgs = sig.numArgs; + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + var_types retType = getRetTypeAndBaseJitTypeFromSig(intrinsic, clsHnd, &sig, &simdBaseJitType); + GenTree* retNode = call; + + if (retType == TYP_UNKNOWN) + { + return retNode; + } + + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic); + CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); + + // Immediately return if the category is other than scalar/special and this is not a supported base type. + if ((category != HW_Category_Special) && (category != HW_Category_Scalar) && !HWIntrinsicInfo::isScalarIsa(isa) && + !isSupportedBaseType(intrinsic, simdBaseJitType)) + { + return retNode; + } + + var_types simdBaseType = TYP_UNKNOWN; + + if (simdBaseJitType != CORINFO_TYPE_UNDEF) + { + simdBaseType = JitType2PreciseVarType(simdBaseJitType); + assert(varTypeIsArithmetic(simdBaseType)); + } + + const unsigned simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, &sig); + + switch (intrinsic) + { + case NI_Vector128_Shuffle: +#if defined(TARGET_XARCH) + case NI_Vector256_Shuffle: + case NI_Vector512_Shuffle: +#elif defined(TARGET_ARM64) + case NI_Vector64_Shuffle: +#endif + { + GenTree* op2 = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + + if (!op2->IsVectorConst() || !IsValidForShuffle(op2->AsVecCon(), simdSize, simdBaseType)) + { + // TODO-CQ: Handling non-constant indices is a bit more complex + break; + } + + GenTree* op1 = call->gtArgs.GetUserArgByIndex(0)->GetNode(); + retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize); + + if (call->gtArgs.HasRetBuffer()) + { + GenTree* retBuf = call->gtArgs.GetRetBufferArg()->GetNode(); + retNode = gtNewStoreIndNode(retType, retBuf, retNode); + } + break; + } + + default: + { + break; + } + } + + return retNode; +} +#endif // FEATURE_HW_INTRINSICS + /***************************************************************************** * * Some comparisons can be folded: @@ -18269,6 +18373,79 @@ unsigned GenTreeVecCon::ElementCount(unsigned simdSize, var_types simdBaseType) { return simdSize / genTypeSize(simdBaseType); } + +bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_types simdBaseType) const +{ +#if defined(TARGET_XARCH) + size_t elementSize = genTypeSize(simdBaseType); + size_t elementCount = simdSize / elementSize; + + if (simdSize == 32) + { + if (!compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + // While we could accelerate some functions on hardware with only AVX support + // it's likely not worth it overall given that IsHardwareAccelerated reports false + return false; + } + else if ((varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI_VL)) + || (varTypeIsShort(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512BW_VL))) + { + bool crossLane = false; + + for (size_t index = 0; index < elementCount; index++) + { + uint64_t value = vecCon->GetIntegralVectorConstElement(index, simdBaseType); + + if (value >= elementCount) + { + continue; + } + + if (index < (elementCount / 2)) + { + if (value >= (elementCount / 2)) + { + crossLane = true; + break; + } + } + else if (value < (elementCount / 2)) + { + crossLane = true; + break; + } + } + + if (crossLane) + { + // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort + return false; + } + } + } + else if (simdSize == 64) + { + if (varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI)) + { + // TYP_BYTE, TYP_UBYTE need AVX512VBMI. + return false; + } + } + else + { + assert(simdSize == 16); + + if (varTypeIsSmall(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSSE3)) + { + // TYP_BYTE, TYP_UBYTE, TYP_SHORT, and TYP_USHORT need SSSE3 to be able to shuffle any operation + return false; + } + } +#endif // TARGET_XARCH + + return true; +} #endif // FEATURE_HW_INTRINSICS*/ //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index e6b0e5fa72ffb1..ea4ea72b30626e 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -355,22 +355,136 @@ const TernaryLogicInfo& TernaryLogicInfo::lookup(uint8_t control) #endif // TARGET_XARCH //------------------------------------------------------------------------ -// getBaseJitTypeFromArgIfNeeded: Get simdBaseJitType of intrinsic from 1st or 2nd argument depending on the flag +// getRetTypeAndBaseJitTypeFromSig: Get retType and simdBaseJitType of intrinsic from signature // // Arguments: // intrinsic -- id of the intrinsic function. // clsHnd -- class handle containing the intrinsic function. -// method -- method handle of the intrinsic function. // sig -- signature of the intrinsic call. -// simdBaseJitType -- Predetermined simdBaseJitType, could be CORINFO_TYPE_UNDEF +// simdBaseJitType -- [Out] The determined simdBaseJitType, could be CORINFO_TYPE_UNDEF // // Return Value: -// The basetype of intrinsic of it can be fetched from 1st or 2nd argument, else return baseType unmodified. +// The retType of intrinsic if it can be fetched from the signature // -CorInfoType Compiler::getBaseJitTypeFromArgIfNeeded(NamedIntrinsic intrinsic, +var_types Compiler::getRetTypeAndBaseJitTypeFromSig(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, - CorInfoType simdBaseJitType) + CorInfoType* simdBaseJitType) +{ + assert(sig != nullptr); + assert(simdBaseJitType != nullptr); + + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic); + CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); + var_types retType = genActualType(JITtype2varType(sig->retType)); + + if (retType == TYP_STRUCT) + { + unsigned int sizeBytes; + *simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes); + + if (HWIntrinsicInfo::IsMultiReg(intrinsic)) + { + assert(sizeBytes == 0); + } + +#ifdef TARGET_ARM64 + else if ((intrinsic == NI_AdvSimd_LoadAndInsertScalar) || (intrinsic == NI_AdvSimd_Arm64_LoadAndInsertScalar)) + { + CorInfoType pSimdBaseJitType = CORINFO_TYPE_UNDEF; + var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdBaseJitType); + + if (retFieldType == TYP_STRUCT) + { + CORINFO_CLASS_HANDLE structType; + unsigned int sizeBytes = 0; + + // LoadAndInsertScalar that returns 2,3 or 4 vectors + assert(pSimdBaseJitType == CORINFO_TYPE_UNDEF); + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sig->retTypeSigClass); + assert(fieldCount > 1); + CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sig->retTypeClass, 0); + CorInfoType fieldType = info.compCompHnd->getFieldType(fieldHandle, &structType); + *simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes); + switch (fieldCount) + { + case 2: + intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2 + : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2; + break; + case 3: + intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3 + : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3; + break; + case 4: + intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4 + : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4; + break; + default: + assert("unsupported"); + } + } + else + { + assert((retFieldType == TYP_SIMD8) || (retFieldType == TYP_SIMD16)); + assert(isSupportedBaseType(intrinsic, *simdBaseJitType)); + retType = getSIMDTypeForSize(sizeBytes); + } + } +#endif + else + { + // We want to return early here for cases where retType was TYP_STRUCT as per method signature and + // rather than deferring the decision after getting the simdBaseJitType of arg. + if (!isSupportedBaseType(intrinsic, *simdBaseJitType)) + { + return TYP_UNKNOWN; + } + + assert(sizeBytes != 0); + retType = getSIMDTypeForSize(sizeBytes); + } + } + + *simdBaseJitType = getBaseJitTypeFromArgIfNeeded(intrinsic, sig, *simdBaseJitType); + + if (*simdBaseJitType == CORINFO_TYPE_UNDEF) + { + if ((category == HW_Category_Scalar) || HWIntrinsicInfo::isScalarIsa(isa)) + { + *simdBaseJitType = sig->retType; + + if (*simdBaseJitType == CORINFO_TYPE_VOID) + { + *simdBaseJitType = CORINFO_TYPE_UNDEF; + } + } + else + { + unsigned int sizeBytes; + + *simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(clsHnd, &sizeBytes); + assert((category == HW_Category_Special) || (category == HW_Category_Helper) || (sizeBytes != 0)); + } + } + + return retType; +} + +//------------------------------------------------------------------------ +// getBaseJitTypeFromArgIfNeeded: Get simdBaseJitType of intrinsic from 1st or 2nd argument depending on the flag +// +// Arguments: +// intrinsic -- id of the intrinsic function. +// sig -- signature of the intrinsic call. +// simdBaseJitType -- Predetermined simdBaseJitType, could be CORINFO_TYPE_UNDEF +// +// Return Value: +// The basetype of intrinsic of it can be fetched from 1st or 2nd argument, else return baseType unmodified. +// +CorInfoType Compiler::getBaseJitTypeFromArgIfNeeded(NamedIntrinsic intrinsic, + CORINFO_SIG_INFO* sig, + CorInfoType simdBaseJitType) { if (HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic) || HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic)) { @@ -932,7 +1046,7 @@ static bool impIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicC // Return Value: // returns true if the baseType is supported for given intrinsic. // -static bool isSupportedBaseType(NamedIntrinsic intrinsic, CorInfoType baseJitType) +bool Compiler::isSupportedBaseType(NamedIntrinsic intrinsic, CorInfoType baseJitType) { if (baseJitType == CORINFO_TYPE_UNDEF) { @@ -1151,98 +1265,13 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic); CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); int numArgs = sig->numArgs; - var_types retType = genActualType(JITtype2varType(sig->retType)); CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + var_types retType = getRetTypeAndBaseJitTypeFromSig(intrinsic, clsHnd, sig, &simdBaseJitType); GenTree* retNode = nullptr; - if (retType == TYP_STRUCT) - { - unsigned int sizeBytes; - simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes); - - if (HWIntrinsicInfo::IsMultiReg(intrinsic)) - { - assert(sizeBytes == 0); - } - -#ifdef TARGET_ARM64 - else if ((intrinsic == NI_AdvSimd_LoadAndInsertScalar) || (intrinsic == NI_AdvSimd_Arm64_LoadAndInsertScalar)) - { - CorInfoType pSimdBaseJitType = CORINFO_TYPE_UNDEF; - var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdBaseJitType); - - if (retFieldType == TYP_STRUCT) - { - CORINFO_CLASS_HANDLE structType; - unsigned int sizeBytes = 0; - - // LoadAndInsertScalar that returns 2,3 or 4 vectors - assert(pSimdBaseJitType == CORINFO_TYPE_UNDEF); - unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sig->retTypeSigClass); - assert(fieldCount > 1); - CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sig->retTypeClass, 0); - CorInfoType fieldType = info.compCompHnd->getFieldType(fieldHandle, &structType); - simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes); - switch (fieldCount) - { - case 2: - intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2 - : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2; - break; - case 3: - intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3 - : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3; - break; - case 4: - intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4 - : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4; - break; - default: - assert("unsupported"); - } - } - else - { - assert((retFieldType == TYP_SIMD8) || (retFieldType == TYP_SIMD16)); - assert(isSupportedBaseType(intrinsic, simdBaseJitType)); - retType = getSIMDTypeForSize(sizeBytes); - } - } -#endif - else - { - // We want to return early here for cases where retType was TYP_STRUCT as per method signature and - // rather than deferring the decision after getting the simdBaseJitType of arg. - if (!isSupportedBaseType(intrinsic, simdBaseJitType)) - { - return nullptr; - } - - assert(sizeBytes != 0); - retType = getSIMDTypeForSize(sizeBytes); - } - } - - simdBaseJitType = getBaseJitTypeFromArgIfNeeded(intrinsic, clsHnd, sig, simdBaseJitType); - - if (simdBaseJitType == CORINFO_TYPE_UNDEF) + if (retType == TYP_UNKNOWN) { - if ((category == HW_Category_Scalar) || HWIntrinsicInfo::isScalarIsa(isa)) - { - simdBaseJitType = sig->retType; - - if (simdBaseJitType == CORINFO_TYPE_VOID) - { - simdBaseJitType = CORINFO_TYPE_UNDEF; - } - } - else - { - unsigned int sizeBytes; - - simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(clsHnd, &sizeBytes); - assert((category == HW_Category_Special) || (category == HW_Category_Helper) || (sizeBytes != 0)); - } + return nullptr; } // Immediately return if the category is other than scalar/special and this is not a supported base type. diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 5a30c54fe78f55..a1e05b87407187 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1867,7 +1867,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, GenTree* indices = impStackTop(0).val; - if (!indices->IsVectorConst()) + if (!indices->IsVectorConst() && !IsValidForShuffle(indices->AsVecCon(), simdSize, simdBaseType)) { // TODO-ARM64-CQ: Handling non-constant indices is a bit more complex break; diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 8aaee01f4c41c7..d6fd83efa4396a 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -2888,79 +2888,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, GenTree* indices = impStackTop(0).val; - if (!indices->IsVectorConst()) + if (!indices->IsVectorConst() || !IsValidForShuffle(indices->AsVecCon(), simdSize, simdBaseType)) { // TODO-XARCH-CQ: Handling non-constant indices is a bit more complex break; } - size_t elementSize = genTypeSize(simdBaseType); - size_t elementCount = simdSize / elementSize; - - if (simdSize == 32) - { - if (!compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // While we could accelerate some functions on hardware with only AVX support - // it's likely not worth it overall given that IsHardwareAccelerated reports false - break; - } - else if ((varTypeIsByte(simdBaseType) && - !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI_VL)) || - (varTypeIsShort(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512BW_VL))) - { - bool crossLane = false; - - for (size_t index = 0; index < elementCount; index++) - { - uint64_t value = indices->GetIntegralVectorConstElement(index, simdBaseType); - - if (value >= elementCount) - { - continue; - } - - if (index < (elementCount / 2)) - { - if (value >= (elementCount / 2)) - { - crossLane = true; - break; - } - } - else if (value < (elementCount / 2)) - { - crossLane = true; - break; - } - } - - if (crossLane) - { - // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort - break; - } - } - } - else if (simdSize == 64) - { - if (varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI)) - { - // TYP_BYTE, TYP_UBYTE need AVX512VBMI. - break; - } - } - else - { - assert(simdSize == 16); - - if (varTypeIsSmall(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSSE3)) - { - // TYP_BYTE, TYP_UBYTE, TYP_SHORT, and TYP_USHORT need SSSE3 to be able to shuffle any operation - break; - } - } - if (sig->numArgs == 2) { op2 = impSIMDPopStack(); diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 85bd5d8dfbd166..9a4ce5f4a15dec 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3040,6 +3040,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, static_assert_no_msg((NI_HW_INTRINSIC_END + 1) == NI_SIMD_AS_HWINTRINSIC_START); + GenTree* hwintrinsic = nullptr; + if (ni < NI_HW_INTRINSIC_END) { assert(ni > NI_HW_INTRINSIC_START); @@ -3068,14 +3070,12 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, } } - GenTree* hwintrinsic = impHWIntrinsic(ni, clsHnd, method, sig, mustExpand); + hwintrinsic = impHWIntrinsic(ni, clsHnd, method, sig, mustExpand); if (mustExpand && (hwintrinsic == nullptr)) { - return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand); + hwintrinsic = impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand); } - - return hwintrinsic; } else { @@ -3083,9 +3083,19 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, if (isIntrinsic) { - return impSimdAsHWIntrinsic(ni, clsHnd, method, sig, newobjThis, mustExpand); + hwintrinsic = impSimdAsHWIntrinsic(ni, clsHnd, method, sig, newobjThis, mustExpand); } } + + if ((hwintrinsic == nullptr) && (isSpecialIntrinsic != nullptr)) + { + // We want to report this as special so later phases, such as morph, + // can try to expand the intrinsic again in case this first expansion + // failed. + + *isSpecialIntrinsic = true; + } + return hwintrinsic; } #endif // FEATURE_HW_INTRINSICS @@ -7270,6 +7280,21 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call, return; } +#if defined(FEATURE_HW_INTRINSICS) + /* Ignore hwintrinsic calls */ + if (call->IsSpecialIntrinsic()) + { + const NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd); + + if ((ni > NI_HW_INTRINSIC_START) && (ni < NI_SIMD_AS_HWINTRINSIC_END)) + { + assert(!call->IsGuardedDevirtualizationCandidate()); + inlineResult->NoteFatal(InlineObservation::CALLSITE_IS_CALL_TO_HELPER); + return; + } + } +#endif // FEATURE_HW_INTRINSICS + /* Ignore indirect calls */ if (call->gtCallType == CT_INDIRECT) {