diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index d6ed324deb242a..5cef7d5aa12f25 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -193,7 +193,7 @@ bool IsDstDstSrcAVXInstruction(instruction ins); bool IsDstSrcSrcAVXInstruction(instruction ins); bool HasRegularWideForm(instruction ins); bool HasRegularWideImmediateForm(instruction ins); -bool DoesWriteZeroFlag(instruction ins); +static bool DoesWriteZeroFlag(instruction ins); bool DoesWriteSignFlag(instruction ins); bool DoesResetOverflowAndCarryFlags(instruction ins); bool IsFlagsAlwaysModified(instrDesc* id); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 43bd543c599f1d..4551ed9e4e1ce1 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -577,6 +577,25 @@ struct HWIntrinsicInfo return lookup(id).ins[type - TYP_BYTE]; } + static instruction lookupIns(GenTreeHWIntrinsic* intrinsicNode) + { + assert(intrinsicNode != nullptr); + + NamedIntrinsic intrinsic = intrinsicNode->GetHWIntrinsicId(); + var_types type = TYP_UNKNOWN; + + if (lookupCategory(intrinsic) == HW_Category_Scalar) + { + type = intrinsicNode->TypeGet(); + } + else + { + type = intrinsicNode->GetSimdBaseType(); + } + + return lookupIns(intrinsic, type); + } + static HWIntrinsicCategory lookupCategory(NamedIntrinsic id) { return lookup(id).category; diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 458f919fe27935..08d4c4a8c8675d 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -595,7 +595,7 @@ INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit -INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit +INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract // BMI2 diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index ae5a41c52e6827..e7d4c238a26115 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -139,7 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_AND: case GT_OR: case GT_XOR: - return LowerBinaryArithmetic(node->AsOp()); + return LowerBinaryArithmeticCommon(node->AsOp()); case GT_MUL: case GT_MULHI: @@ -2708,10 +2708,16 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) && #ifdef TARGET_XARCH - op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG)) + (op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG) +#ifdef FEATURE_HW_INTRINSICS + || (op1->OperIs(GT_HWINTRINSIC) && + emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()))) +#endif // FEATURE_HW_INTRINSICS + ) #else // TARGET_ARM64 - op1->OperIs(GT_AND, GT_ADD, GT_SUB)) + op1->OperIs(GT_AND, GT_ADD, GT_SUB) #endif + ) { op1->gtFlags |= GTF_SET_FLAGS; op1->SetUnusedValue(); @@ -5122,7 +5128,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) } //------------------------------------------------------------------------ -// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// LowerBinaryArithmeticCommon: lowers the given binary arithmetic node. // // Recognizes opportunities for using target-independent "combined" nodes // (currently AND_NOT on ARMArch). Performs containment checks. @@ -5133,41 +5139,39 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) // Returns: // The next node to lower. // -GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node) +GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* binOp) { // TODO-CQ-XArch: support BMI2 "andn" in codegen and condition // this logic on the support for the instruction set on XArch. CLANG_FORMAT_COMMENT_ANCHOR; #ifdef TARGET_ARMARCH - if (comp->opts.OptimizationEnabled() && node->OperIs(GT_AND)) + if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) { GenTree* opNode = nullptr; GenTree* notNode = nullptr; - if (node->gtGetOp1()->OperIs(GT_NOT)) + if (binOp->gtGetOp1()->OperIs(GT_NOT)) { - notNode = node->gtGetOp1(); - opNode = node->gtGetOp2(); + notNode = binOp->gtGetOp1(); + opNode = binOp->gtGetOp2(); } - else if (node->gtGetOp2()->OperIs(GT_NOT)) + else if (binOp->gtGetOp2()->OperIs(GT_NOT)) { - notNode = node->gtGetOp2(); - opNode = node->gtGetOp1(); + notNode = binOp->gtGetOp2(); + opNode = binOp->gtGetOp1(); } if (notNode != nullptr) { - node->gtOp1 = opNode; - node->gtOp2 = notNode->AsUnOp()->gtGetOp1(); - node->ChangeOper(GT_AND_NOT); + binOp->gtOp1 = opNode; + binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); + binOp->ChangeOper(GT_AND_NOT); BlockRange().Remove(notNode); } } -#endif // TARGET_ARMARCH - - ContainCheckBinary(node); +#endif - return node->gtNext; + return LowerBinaryArithmetic(binOp); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index ed0ecc56619708..26c728190a7982 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -297,7 +297,8 @@ class Lowering final : public Phase void LowerStoreIndir(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); - GenTree* LowerBinaryArithmetic(GenTreeOp* node); + GenTree* LowerBinaryArithmeticCommon(GenTreeOp* binOp); + GenTree* LowerBinaryArithmetic(GenTreeOp* binOp); bool LowerUnsignedDivOrMod(GenTreeOp* divMod); GenTree* LowerConstIntDivOrMod(GenTree* node); GenTree* LowerSignedDivOrMod(GenTree* node); @@ -343,6 +344,7 @@ class Lowering final : public Phase void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node); void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node); void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node); + GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* binOp); #elif defined(TARGET_ARM64) bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 58301b37b887b7..ad74d3c888d741 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -281,6 +281,22 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) return mul->gtNext; } +//------------------------------------------------------------------------ +// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) +{ + ContainCheckBinary(binOp); + + return binOp->gtNext; +} + //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index c8076c4525f2df..d85197d2b605fb 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -159,6 +159,33 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) return mul->gtNext; } +//------------------------------------------------------------------------ +// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) +{ +#ifdef FEATURE_HW_INTRINSICS + if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp)) + { + GenTree* blsrNode = TryLowerAndOpToResetLowestSetBit(binOp); + if (blsrNode != nullptr) + { + return blsrNode->gtNext; + } + } +#endif + + ContainCheckBinary(binOp); + + return binOp->gtNext; +} + //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // @@ -3693,6 +3720,84 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) LowerNode(cast); } } + +//---------------------------------------------------------------------------------------------- +// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit +// +// Arguments: +// andNode - GT_AND node of integral type +// +// Return Value: +// Returns the replacement node if one is created else nullptr indicating no replacement +// +GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode) +{ + assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode)); + + GenTree* op1 = andNode->gtGetOp1(); + if (!op1->OperIs(GT_LCL_VAR) || comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed()) + { + return nullptr; + } + + GenTree* op2 = andNode->gtGetOp2(); + if (!op2->OperIs(GT_ADD)) + { + return nullptr; + } + + GenTree* addOp2 = op2->gtGetOp2(); + if (!addOp2->IsIntegralConst(-1)) + { + return nullptr; + } + + GenTree* addOp1 = op2->gtGetOp1(); + if (!addOp1->OperIs(GT_LCL_VAR) || (addOp1->AsLclVar()->GetLclNum() != op1->AsLclVar()->GetLclNum())) + { + return nullptr; + } + + NamedIntrinsic intrinsic; + if (op1->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) + { + intrinsic = NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit; + } + else if (comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) + { + intrinsic = NamedIntrinsic::NI_BMI1_ResetLowestSetBit; + } + else + { + return nullptr; + } + + LIR::Use use; + if (!BlockRange().TryGetUse(andNode, &use)) + { + return nullptr; + } + + GenTreeHWIntrinsic* blsrNode = comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), op1, intrinsic); + + JITDUMP("Lower: optimize AND(X, ADD(X, -1))\n"); + DISPNODE(andNode); + JITDUMP("to:\n"); + DISPNODE(blsrNode); + + use.ReplaceWith(blsrNode); + + BlockRange().InsertBefore(andNode, blsrNode); + BlockRange().Remove(andNode); + BlockRange().Remove(op2); + BlockRange().Remove(addOp1); + BlockRange().Remove(addOp2); + + ContainCheckHWIntrinsic(blsrNode); + + return blsrNode; +} + #endif // FEATURE_HW_INTRINSICS //----------------------------------------------------------------------------------------------