From 95902877ac187e70cdd926b24ccb52659cbfadeb Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Thu, 6 Jan 2022 16:29:46 +0000 Subject: [PATCH 01/19] add lowering to blsr instruction --- src/coreclr/jit/lower.cpp | 40 ++--------------- src/coreclr/jit/lower.h | 4 +- src/coreclr/jit/lowerarmarch.cpp | 40 +++++++++++++++++ src/coreclr/jit/lowerxarch.cpp | 77 ++++++++++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index ae5a41c52e6827..c9b2d88c38855a 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -139,7 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_AND: case GT_OR: case GT_XOR: - return LowerBinaryArithmetic(node->AsOp()); + return LowerBinaryArithmeticCommon(node->AsOp()); case GT_MUL: case GT_MULHI: @@ -5122,7 +5122,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) } //------------------------------------------------------------------------ -// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// LowerBinaryArithmeticCommon: lowers the given binary arithmetic node. // // Recognizes opportunities for using target-independent "combined" nodes // (currently AND_NOT on ARMArch). Performs containment checks. @@ -5133,41 +5133,9 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) // Returns: // The next node to lower. // -GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node) +GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* node) { - // TODO-CQ-XArch: support BMI2 "andn" in codegen and condition - // this logic on the support for the instruction set on XArch. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef TARGET_ARMARCH - if (comp->opts.OptimizationEnabled() && node->OperIs(GT_AND)) - { - GenTree* opNode = nullptr; - GenTree* notNode = nullptr; - if (node->gtGetOp1()->OperIs(GT_NOT)) - { - notNode = node->gtGetOp1(); - opNode = node->gtGetOp2(); - } - else if (node->gtGetOp2()->OperIs(GT_NOT)) - { - notNode = node->gtGetOp2(); - opNode = node->gtGetOp1(); - } - - if (notNode != nullptr) - { - node->gtOp1 = opNode; - node->gtOp2 = notNode->AsUnOp()->gtGetOp1(); - node->ChangeOper(GT_AND_NOT); - BlockRange().Remove(notNode); - } - } -#endif // TARGET_ARMARCH - - ContainCheckBinary(node); - - return node->gtNext; + return LowerBinaryArithmetic(node); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index ed0ecc56619708..18ad71489edd59 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -297,7 +297,8 @@ class Lowering final : public Phase void LowerStoreIndir(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); - GenTree* LowerBinaryArithmetic(GenTreeOp* node); + GenTree* LowerBinaryArithmeticCommon(GenTreeOp* node); + GenTree* LowerBinaryArithmetic(GenTreeOp* binOp); bool LowerUnsignedDivOrMod(GenTreeOp* divMod); GenTree* LowerConstIntDivOrMod(GenTree* node); GenTree* LowerSignedDivOrMod(GenTree* node); @@ -343,6 +344,7 @@ class Lowering final : public Phase void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node); void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node); void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node); + bool LowerAndOpToResetLowestSetBit(GenTree* node); #elif defined(TARGET_ARM64) bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 58301b37b887b7..21eb9cbf98d288 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -281,6 +281,46 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) return mul->gtNext; } +//------------------------------------------------------------------------ +// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) +{ + if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) + { + GenTree* opNode = nullptr; + GenTree* notNode = nullptr; + if (binOp->gtGetOp1()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp1(); + opNode = binOp->gtGetOp2(); + } + else if (binOp->gtGetOp2()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp2(); + opNode = binOp->gtGetOp1(); + } + + if (notNode != nullptr) + { + binOp->gtOp1 = opNode; + binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); + binOp->ChangeOper(GT_AND_NOT); + BlockRange().Remove(notNode); + } + } + + ContainCheckBinary(binOp); + + return binOp->gtNext; +} + //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index c8076c4525f2df..e7eb992ef7de2d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -159,6 +159,33 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) return mul->gtNext; } +//------------------------------------------------------------------------ +// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) +{ + // TODO-CQ-XArch: support BMI2 "andn" in codegen and condition + // this logic on the support for the instruction set on XArch. + CLANG_FORMAT_COMMENT_ANCHOR; + + if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp)) + { + if (!LowerAndOpToResetLowestSetBit(binOp)) + { + // if the node was lowered to a HWIntrinsic skip the containment check + ContainCheckBinary(binOp); + } + } + + return binOp->gtNext; +} + //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // @@ -3693,6 +3720,56 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) LowerNode(cast); } } + +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicToScalar: Lowers a tree AND(X, SUB(X, 1) to HWIntrinsic::ResetLowestSetBit +// Returns true if the node has been replaced +// +// Parameters +// tree - GentreePtr of binOp +// +bool Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) +{ + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + if (op1->OperIs(GT_LCL_VAR) && op2->OperIs(GT_ADD)) + { + GenTree* op21 = op2->gtGetOp1(); + GenTree* op22 = op2->gtGetOp2(); + if (op22->IsCnsIntOrI() && op22->AsIntCon()->IconValue() == -1 && op21->OperIs(GT_LCL_VAR) && + !comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed() && + op21->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum() && + ((op1->TypeGet() == TYP_LONG && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || + comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))) // if op1->TypeGet()!=TYP_LONG then it must be TYP_INT + { + GenTreeHWIntrinsic* replacementNode = + comp->gtNewScalarHWIntrinsicNode(node->TypeGet(), op1, NamedIntrinsic::NI_BMI1_ResetLowestSetBit); + + JITDUMP("Lower: optimize AND(X, SUB(X, 1): "); + DISPNODE(node); + JITDUMP("Replaced with: "); + DISPNODE(replacementNode); + LIR::Use use; + if (BlockRange().TryGetUse(node, &use)) + { + use.ReplaceWith(replacementNode); + } + else + { + op1->SetUnusedValue(); + } + BlockRange().InsertBefore(node, replacementNode); + BlockRange().Remove(node); + BlockRange().Remove(op2); + BlockRange().Remove(op21); + BlockRange().Remove(op22); + JITDUMP("Remove [%06u], [%06u]\n", node->gtTreeID, node->gtTreeID); + return true; + } + } + return false; +} + #endif // FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- From 770164612c4c6e7b33697d3a4f4685b80fcb134f Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Fri, 7 Jan 2022 16:32:55 +0000 Subject: [PATCH 02/19] fixup next for replacement node and clear containment --- src/coreclr/jit/lower.h | 2 +- src/coreclr/jit/lowerxarch.cpp | 40 ++++++++++++++++++++++------------ 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 18ad71489edd59..79f516453dfd70 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -344,7 +344,7 @@ class Lowering final : public Phase void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node); void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node); void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node); - bool LowerAndOpToResetLowestSetBit(GenTree* node); + GenTree* LowerAndOpToResetLowestSetBit(GenTree* node); #elif defined(TARGET_ARM64) bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index e7eb992ef7de2d..6df1f8076c07b2 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -174,18 +174,24 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) // this logic on the support for the instruction set on XArch. CLANG_FORMAT_COMMENT_ANCHOR; + GenTree* replacement = nullptr; if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp)) { - if (!LowerAndOpToResetLowestSetBit(binOp)) - { - // if the node was lowered to a HWIntrinsic skip the containment check - ContainCheckBinary(binOp); - } + replacement = LowerAndOpToResetLowestSetBit(binOp); } - return binOp->gtNext; + if (replacement == nullptr) + { + ContainCheckBinary(binOp); + return binOp->gtNext; + } + else + { + return replacement->gtNext; + } } + //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // @@ -3723,12 +3729,12 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsicToScalar: Lowers a tree AND(X, SUB(X, 1) to HWIntrinsic::ResetLowestSetBit -// Returns true if the node has been replaced +// Returns the recplacement node if one is created else nullptr indicating no replacement // // Parameters // tree - GentreePtr of binOp // -bool Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) +GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); @@ -3739,13 +3745,15 @@ bool Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) if (op22->IsCnsIntOrI() && op22->AsIntCon()->IconValue() == -1 && op21->OperIs(GT_LCL_VAR) && !comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed() && op21->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum() && - ((op1->TypeGet() == TYP_LONG && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || - comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))) // if op1->TypeGet()!=TYP_LONG then it must be TYP_INT + ( + (op1->TypeGet() == TYP_LONG && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || + comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))// if op1->TypeGet()!=TYP_LONG then it must be TYP_INT + ) { GenTreeHWIntrinsic* replacementNode = - comp->gtNewScalarHWIntrinsicNode(node->TypeGet(), op1, NamedIntrinsic::NI_BMI1_ResetLowestSetBit); + comp->gtNewScalarHWIntrinsicNode(op1->TypeGet(), op1, op1->TypeGet()==TYP_INT ? NamedIntrinsic::NI_BMI1_ResetLowestSetBit : NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit); - JITDUMP("Lower: optimize AND(X, SUB(X, 1): "); + JITDUMP("Lower: optimize AND(X, SUB(X, 1)): "); DISPNODE(node); JITDUMP("Replaced with: "); DISPNODE(replacementNode); @@ -3758,16 +3766,20 @@ bool Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) { op1->SetUnusedValue(); } + op1->ClearContained(); BlockRange().InsertBefore(node, replacementNode); BlockRange().Remove(node); BlockRange().Remove(op2); BlockRange().Remove(op21); BlockRange().Remove(op22); JITDUMP("Remove [%06u], [%06u]\n", node->gtTreeID, node->gtTreeID); - return true; + + ContainCheckHWIntrinsic(replacementNode); + + return replacementNode; } } - return false; + return nullptr; } #endif // FEATURE_HW_INTRINSICS From b9bc69bfd539dc196c00d5aa24ed16332c8dbc1f Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sat, 8 Jan 2022 02:30:03 +0000 Subject: [PATCH 03/19] add check for user being boolean to prevent optimization of LEA, TEST case. --- src/coreclr/jit/lowerxarch.cpp | 51 ++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 6df1f8076c07b2..4187f00c171cd8 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3747,36 +3747,45 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) op21->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum() && ( (op1->TypeGet() == TYP_LONG && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || - comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))// if op1->TypeGet()!=TYP_LONG then it must be TYP_INT + comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) // if op1->TypeGet()!=TYP_LONG then it must be TYP_INT ) { - GenTreeHWIntrinsic* replacementNode = - comp->gtNewScalarHWIntrinsicNode(op1->TypeGet(), op1, op1->TypeGet()==TYP_INT ? NamedIntrinsic::NI_BMI1_ResetLowestSetBit : NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit); - - JITDUMP("Lower: optimize AND(X, SUB(X, 1)): "); - DISPNODE(node); - JITDUMP("Replaced with: "); - DISPNODE(replacementNode); LIR::Use use; if (BlockRange().TryGetUse(node, &use)) { - use.ReplaceWith(replacementNode); + GenTree* user = use.User(); + if (user != nullptr && !user->OperIs(GT_EQ, GT_NE)) + { + GenTreeHWIntrinsic* replacementNode = + comp->gtNewScalarHWIntrinsicNode(op1->TypeGet(), op1, + op1->TypeGet() == TYP_INT + ? NamedIntrinsic::NI_BMI1_ResetLowestSetBit + : NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit); + + JITDUMP("Lower: optimize AND(X, SUB(X, 1)): "); + DISPNODE(node); + JITDUMP("Replaced with: "); + DISPNODE(replacementNode); + + use.ReplaceWith(replacementNode); + + op1->ClearContained(); + BlockRange().InsertBefore(node, replacementNode); + BlockRange().Remove(node); + BlockRange().Remove(op2); + BlockRange().Remove(op21); + BlockRange().Remove(op22); + JITDUMP("Remove [%06u], [%06u]\n", node->gtTreeID, node->gtTreeID); + + ContainCheckHWIntrinsic(replacementNode); + + return replacementNode; + } } else { - op1->SetUnusedValue(); + node->SetUnusedValue(); } - op1->ClearContained(); - BlockRange().InsertBefore(node, replacementNode); - BlockRange().Remove(node); - BlockRange().Remove(op2); - BlockRange().Remove(op21); - BlockRange().Remove(op22); - JITDUMP("Remove [%06u], [%06u]\n", node->gtTreeID, node->gtTreeID); - - ContainCheckHWIntrinsic(replacementNode); - - return replacementNode; } } return nullptr; From 94596e4ea85b7248c69cf08c62146f513628cea3 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sat, 8 Jan 2022 22:25:44 +0000 Subject: [PATCH 04/19] add comment and rename variables --- src/coreclr/jit/lowerxarch.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 4187f00c171cd8..1ffbc446f08556 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3728,7 +3728,7 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) } //---------------------------------------------------------------------------------------------- -// Lowering::LowerHWIntrinsicToScalar: Lowers a tree AND(X, SUB(X, 1) to HWIntrinsic::ResetLowestSetBit +// Lowering::LowerHWIntrinsicToScalar: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit // Returns the recplacement node if one is created else nullptr indicating no replacement // // Parameters @@ -3738,13 +3738,12 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); - if (op1->OperIs(GT_LCL_VAR) && op2->OperIs(GT_ADD)) + if (op1->OperIs(GT_LCL_VAR) && op2->OperIs(GT_ADD) && !comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed() ) { - GenTree* op21 = op2->gtGetOp1(); - GenTree* op22 = op2->gtGetOp2(); - if (op22->IsCnsIntOrI() && op22->AsIntCon()->IconValue() == -1 && op21->OperIs(GT_LCL_VAR) && - !comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed() && - op21->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum() && + GenTree* addOp1 = op2->gtGetOp1(); + GenTree* addOp2 = op2->gtGetOp2(); + if (addOp2->IsIntegralConst(-1) && addOp1->OperIs(GT_LCL_VAR) && + addOp1->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum() && ( (op1->TypeGet() == TYP_LONG && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) // if op1->TypeGet()!=TYP_LONG then it must be TYP_INT @@ -3753,6 +3752,9 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) LIR::Use use; if (BlockRange().TryGetUse(node, &use)) { + // If we allow the use of BSLR where the parent is NE or EQ it will generate "blsr, test" + // if we prevent the use of BSLR here then the OptimizeConstCompare lowering may generate + // TEST_NE or TEST_EQ leading to a cheaper "lea, test" GenTree* user = use.User(); if (user != nullptr && !user->OperIs(GT_EQ, GT_NE)) { @@ -3762,7 +3764,7 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) ? NamedIntrinsic::NI_BMI1_ResetLowestSetBit : NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit); - JITDUMP("Lower: optimize AND(X, SUB(X, 1)): "); + JITDUMP("Lower: optimize AND(X, ADD(X, -1)): "); DISPNODE(node); JITDUMP("Replaced with: "); DISPNODE(replacementNode); @@ -3773,8 +3775,8 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) BlockRange().InsertBefore(node, replacementNode); BlockRange().Remove(node); BlockRange().Remove(op2); - BlockRange().Remove(op21); - BlockRange().Remove(op22); + BlockRange().Remove(addOp1); + BlockRange().Remove(addOp2); JITDUMP("Remove [%06u], [%06u]\n", node->gtTreeID, node->gtTreeID); ContainCheckHWIntrinsic(replacementNode); From 0dcf0f70c27399ab4c15d0c67f3bbf1c357df862 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sun, 9 Jan 2022 03:16:33 +0000 Subject: [PATCH 05/19] ifdef optimization on HWIntrinsics feature and fix formatting --- src/coreclr/jit/lowerxarch.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 1ffbc446f08556..9a2e6eb6b698e9 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -175,10 +175,12 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) CLANG_FORMAT_COMMENT_ANCHOR; GenTree* replacement = nullptr; +#ifdef FEATURE_HW_INTRINSICS if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp)) { replacement = LowerAndOpToResetLowestSetBit(binOp); } +#endif if (replacement == nullptr) { @@ -191,7 +193,6 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) } } - //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // @@ -3733,21 +3734,21 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) // // Parameters // tree - GentreePtr of binOp -// +// GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); - if (op1->OperIs(GT_LCL_VAR) && op2->OperIs(GT_ADD) && !comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed() ) + if (op1->OperIs(GT_LCL_VAR) && op2->OperIs(GT_ADD) && !comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed()) { GenTree* addOp1 = op2->gtGetOp1(); GenTree* addOp2 = op2->gtGetOp2(); if (addOp2->IsIntegralConst(-1) && addOp1->OperIs(GT_LCL_VAR) && addOp1->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum() && - ( - (op1->TypeGet() == TYP_LONG && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || - comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) // if op1->TypeGet()!=TYP_LONG then it must be TYP_INT - ) + ((op1->TypeGet() == TYP_LONG && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || + comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) // if op1->TypeGet()!=TYP_LONG then it must be + // TYP_INT + ) { LIR::Use use; if (BlockRange().TryGetUse(node, &use)) From 92449730d29dc0c6b0d7daa7694fe83ea335c2c2 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sun, 9 Jan 2022 14:10:23 +0000 Subject: [PATCH 06/19] address reviewer feedback --- src/coreclr/jit/lowerxarch.cpp | 82 +++++++++++++++++----------------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 9a2e6eb6b698e9..6c2bd0f68cb7f0 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -174,23 +174,19 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) // this logic on the support for the instruction set on XArch. CLANG_FORMAT_COMMENT_ANCHOR; - GenTree* replacement = nullptr; #ifdef FEATURE_HW_INTRINSICS if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp)) { - replacement = LowerAndOpToResetLowestSetBit(binOp); + GenTree* blsrNode = LowerAndOpToResetLowestSetBit(binOp); + if (blsrNode != nullptr) + { + return blsrNode->gtNext; + } } #endif - if (replacement == nullptr) - { - ContainCheckBinary(binOp); - return binOp->gtNext; - } - else - { - return replacement->gtNext; - } + ContainCheckBinary(binOp); + return binOp->gtNext; } //------------------------------------------------------------------------ @@ -3729,68 +3725,70 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) } //---------------------------------------------------------------------------------------------- -// Lowering::LowerHWIntrinsicToScalar: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit -// Returns the recplacement node if one is created else nullptr indicating no replacement +// Lowering::LowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit // -// Parameters -// tree - GentreePtr of binOp +// Arguments: +// andNode - GentreePtr of binOp // -GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* node) +// Return Value: +// Returns the replacement node if one is created else nullptr indicating no replacement +// +// Assumptions: +// andNode is GT_AND and (TYP_INT or TYP_LONG) +// +GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* andNode) { - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode)); + + GenTree* op1 = andNode->gtGetOp1(); + GenTree* op2 = andNode->gtGetOp2(); if (op1->OperIs(GT_LCL_VAR) && op2->OperIs(GT_ADD) && !comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed()) { GenTree* addOp1 = op2->gtGetOp1(); GenTree* addOp2 = op2->gtGetOp2(); if (addOp2->IsIntegralConst(-1) && addOp1->OperIs(GT_LCL_VAR) && - addOp1->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum() && - ((op1->TypeGet() == TYP_LONG && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || - comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) // if op1->TypeGet()!=TYP_LONG then it must be - // TYP_INT + (addOp1->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum()) && + ((op1->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || + comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) ) { LIR::Use use; - if (BlockRange().TryGetUse(node, &use)) + if (BlockRange().TryGetUse(andNode, &use)) { // If we allow the use of BSLR where the parent is NE or EQ it will generate "blsr, test" - // if we prevent the use of BSLR here then the OptimizeConstCompare lowering may generate - // TEST_NE or TEST_EQ leading to a cheaper "lea, test" + // if we prevent the use of BSLR here then "OptimizeConstCompare" may generate TEST_NE or + // TEST_EQ leading to a cheaper "lea, test" GenTree* user = use.User(); - if (user != nullptr && !user->OperIs(GT_EQ, GT_NE)) + if (!user->OperIs(GT_EQ, GT_NE)) { - GenTreeHWIntrinsic* replacementNode = - comp->gtNewScalarHWIntrinsicNode(op1->TypeGet(), op1, - op1->TypeGet() == TYP_INT + GenTreeHWIntrinsic* blsrNode = + comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), op1, + andNode->TypeIs(TYP_INT) ? NamedIntrinsic::NI_BMI1_ResetLowestSetBit : NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit); JITDUMP("Lower: optimize AND(X, ADD(X, -1)): "); - DISPNODE(node); + DISPNODE(andNode); JITDUMP("Replaced with: "); - DISPNODE(replacementNode); + DISPNODE(blsrNode); - use.ReplaceWith(replacementNode); + use.ReplaceWith(blsrNode); - op1->ClearContained(); - BlockRange().InsertBefore(node, replacementNode); - BlockRange().Remove(node); + BlockRange().InsertBefore(andNode, blsrNode); + BlockRange().Remove(andNode); BlockRange().Remove(op2); BlockRange().Remove(addOp1); BlockRange().Remove(addOp2); - JITDUMP("Remove [%06u], [%06u]\n", node->gtTreeID, node->gtTreeID); + JITDUMP("Remove [%06u], [%06u]\n", andNode->gtTreeID, andNode->gtTreeID); - ContainCheckHWIntrinsic(replacementNode); + ContainCheckHWIntrinsic(blsrNode); - return replacementNode; + return blsrNode; } } - else - { - node->SetUnusedValue(); - } } } + return nullptr; } From 309acf22b2e160326df75570ab137fb91df49580 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sun, 9 Jan 2022 14:25:37 +0000 Subject: [PATCH 07/19] move arm ANDN back up to common --- src/coreclr/jit/lower.cpp | 34 ++++++++++++++++++++++++++++++-- src/coreclr/jit/lowerarmarch.cpp | 24 ---------------------- src/coreclr/jit/lowerxarch.cpp | 5 +---- 3 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c9b2d88c38855a..9a12617a6c3216 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5133,9 +5133,39 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) // Returns: // The next node to lower. // -GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* node) +GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* binOp) { - return LowerBinaryArithmetic(node); + // TODO-CQ-XArch: support BMI2 "andn" in codegen and condition + // this logic on the support for the instruction set on XArch. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef TARGET_ARMARCH + if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) + { + GenTree* opNode = nullptr; + GenTree* notNode = nullptr; + if (binOp->gtGetOp1()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp1(); + opNode = binOp->gtGetOp2(); + } + else if (binOp->gtGetOp2()->OperIs(GT_NOT)) + { + notNode = binOp->gtGetOp2(); + opNode = binOp->gtGetOp1(); + } + + if (notNode != nullptr) + { + binOp->gtOp1 = opNode; + binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); + binOp->ChangeOper(GT_AND_NOT); + BlockRange().Remove(notNode); + } + } +#endif + + return LowerBinaryArithmetic(binOp); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 21eb9cbf98d288..ad74d3c888d741 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -292,30 +292,6 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) // GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) { - if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) - { - GenTree* opNode = nullptr; - GenTree* notNode = nullptr; - if (binOp->gtGetOp1()->OperIs(GT_NOT)) - { - notNode = binOp->gtGetOp1(); - opNode = binOp->gtGetOp2(); - } - else if (binOp->gtGetOp2()->OperIs(GT_NOT)) - { - notNode = binOp->gtGetOp2(); - opNode = binOp->gtGetOp1(); - } - - if (notNode != nullptr) - { - binOp->gtOp1 = opNode; - binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); - binOp->ChangeOper(GT_AND_NOT); - BlockRange().Remove(notNode); - } - } - ContainCheckBinary(binOp); return binOp->gtNext; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 6c2bd0f68cb7f0..4e049a9b39c553 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -170,10 +170,6 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) // GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) { - // TODO-CQ-XArch: support BMI2 "andn" in codegen and condition - // this logic on the support for the instruction set on XArch. - CLANG_FORMAT_COMMENT_ANCHOR; - #ifdef FEATURE_HW_INTRINSICS if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp)) { @@ -186,6 +182,7 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) #endif ContainCheckBinary(binOp); + return binOp->gtNext; } From b0209b0b6cfbc7f4e668d83e2f77a835b2f6f7df Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sun, 9 Jan 2022 16:31:18 +0000 Subject: [PATCH 08/19] more review feedback --- src/coreclr/jit/lower.h | 4 ++-- src/coreclr/jit/lowerxarch.cpp | 9 ++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 79f516453dfd70..fe6ee8080e6053 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -297,7 +297,7 @@ class Lowering final : public Phase void LowerStoreIndir(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); - GenTree* LowerBinaryArithmeticCommon(GenTreeOp* node); + GenTree* LowerBinaryArithmeticCommon(GenTreeOp* binOp); GenTree* LowerBinaryArithmetic(GenTreeOp* binOp); bool LowerUnsignedDivOrMod(GenTreeOp* divMod); GenTree* LowerConstIntDivOrMod(GenTree* node); @@ -344,7 +344,7 @@ class Lowering final : public Phase void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node); void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node); void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node); - GenTree* LowerAndOpToResetLowestSetBit(GenTree* node); + GenTree* LowerAndOpToResetLowestSetBit(GenTreeOp* binOp); #elif defined(TARGET_ARM64) bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 4e049a9b39c553..a6b5a22a19dd3f 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3730,10 +3730,7 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) // Return Value: // Returns the replacement node if one is created else nullptr indicating no replacement // -// Assumptions: -// andNode is GT_AND and (TYP_INT or TYP_LONG) -// -GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* andNode) +GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTreeOp* andNode) { assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode)); @@ -3746,8 +3743,7 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* andNode) if (addOp2->IsIntegralConst(-1) && addOp1->OperIs(GT_LCL_VAR) && (addOp1->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum()) && ((op1->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || - comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) - ) + comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))) { LIR::Use use; if (BlockRange().TryGetUse(andNode, &use)) @@ -3776,7 +3772,6 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTree* andNode) BlockRange().Remove(op2); BlockRange().Remove(addOp1); BlockRange().Remove(addOp2); - JITDUMP("Remove [%06u], [%06u]\n", andNode->gtTreeID, andNode->gtTreeID); ContainCheckHWIntrinsic(blsrNode); From 5abeeaa074a91468e6fe929514c9126ed1586b43 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sun, 9 Jan 2022 22:22:13 +0000 Subject: [PATCH 09/19] feedback --- src/coreclr/jit/lowerxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index a6b5a22a19dd3f..fda27de6b3be4a 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3725,7 +3725,7 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) // Lowering::LowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit // // Arguments: -// andNode - GentreePtr of binOp +// andNode - GT_AND node of integral type // // Return Value: // Returns the replacement node if one is created else nullptr indicating no replacement From 3ee610cbe04c66ed649345b4dbad3b430392296c Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Mon, 10 Jan 2022 21:57:05 +0000 Subject: [PATCH 10/19] add check for second user op being constant --- src/coreclr/jit/lowerxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index fda27de6b3be4a..d276aeac2c5d5e 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3752,7 +3752,7 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTreeOp* andNode) // if we prevent the use of BSLR here then "OptimizeConstCompare" may generate TEST_NE or // TEST_EQ leading to a cheaper "lea, test" GenTree* user = use.User(); - if (!user->OperIs(GT_EQ, GT_NE)) + if (!(user->OperIs(GT_EQ, GT_NE) && user->gtGetOp2()->IsIntegralConst())) { GenTreeHWIntrinsic* blsrNode = comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), op1, From fc51924380e5e8b36b69f21524a7af154154ac8e Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Tue, 11 Jan 2022 00:04:26 +0000 Subject: [PATCH 11/19] add jitdump newlines --- src/coreclr/jit/lowerxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index d276aeac2c5d5e..cbce5ebda6e259 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3760,9 +3760,9 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTreeOp* andNode) ? NamedIntrinsic::NI_BMI1_ResetLowestSetBit : NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit); - JITDUMP("Lower: optimize AND(X, ADD(X, -1)): "); + JITDUMP("Lower: optimize AND(X, ADD(X, -1))\n"); DISPNODE(andNode); - JITDUMP("Replaced with: "); + JITDUMP("to:\n"); DISPNODE(blsrNode); use.ReplaceWith(blsrNode); From 0615e3d57ac8e9aa047ee2d79acbdff4696db210 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Thu, 13 Jan 2022 01:16:55 +0000 Subject: [PATCH 12/19] remove lower equality check and set blsr instruction flags so that they can be detected and avoided. --- src/coreclr/jit/emitxarch.h | 2 +- src/coreclr/jit/instrsxarch.h | 2 +- src/coreclr/jit/lower.cpp | 9 ++++++-- src/coreclr/jit/lowerxarch.cpp | 41 ++++++++++++++-------------------- 4 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index d6ed324deb242a..5cef7d5aa12f25 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -193,7 +193,7 @@ bool IsDstDstSrcAVXInstruction(instruction ins); bool IsDstSrcSrcAVXInstruction(instruction ins); bool HasRegularWideForm(instruction ins); bool HasRegularWideImmediateForm(instruction ins); -bool DoesWriteZeroFlag(instruction ins); +static bool DoesWriteZeroFlag(instruction ins); bool DoesWriteSignFlag(instruction ins); bool DoesResetOverflowAndCarryFlags(instruction ins); bool IsFlagsAlwaysModified(instrDesc* id); diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 458f919fe27935..08d4c4a8c8675d 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -595,7 +595,7 @@ INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit -INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit +INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract // BMI2 diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 9a12617a6c3216..9d45555c2df4eb 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2708,10 +2708,15 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) && #ifdef TARGET_XARCH - op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG)) + op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG) +#ifdef FEATURE_HW_INTRINSICS + || emitter::DoesWriteZeroFlag( + HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()->GetHWIntrinsicId(), op1->TypeGet())) +#endif #else // TARGET_ARM64 - op1->OperIs(GT_AND, GT_ADD, GT_SUB)) + op1->OperIs(GT_AND, GT_ADD, GT_SUB) #endif + ) { op1->gtFlags |= GTF_SET_FLAGS; op1->SetUnusedValue(); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index cbce5ebda6e259..e8ecee79005d6a 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3748,35 +3748,28 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTreeOp* andNode) LIR::Use use; if (BlockRange().TryGetUse(andNode, &use)) { - // If we allow the use of BSLR where the parent is NE or EQ it will generate "blsr, test" - // if we prevent the use of BSLR here then "OptimizeConstCompare" may generate TEST_NE or - // TEST_EQ leading to a cheaper "lea, test" - GenTree* user = use.User(); - if (!(user->OperIs(GT_EQ, GT_NE) && user->gtGetOp2()->IsIntegralConst())) - { - GenTreeHWIntrinsic* blsrNode = - comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), op1, - andNode->TypeIs(TYP_INT) - ? NamedIntrinsic::NI_BMI1_ResetLowestSetBit - : NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit); + GenTreeHWIntrinsic* blsrNode = + comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), op1, + andNode->TypeIs(TYP_INT) + ? NamedIntrinsic::NI_BMI1_ResetLowestSetBit + : NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit); - JITDUMP("Lower: optimize AND(X, ADD(X, -1))\n"); - DISPNODE(andNode); - JITDUMP("to:\n"); - DISPNODE(blsrNode); + JITDUMP("Lower: optimize AND(X, ADD(X, -1))\n"); + DISPNODE(andNode); + JITDUMP("to:\n"); + DISPNODE(blsrNode); - use.ReplaceWith(blsrNode); + use.ReplaceWith(blsrNode); - BlockRange().InsertBefore(andNode, blsrNode); - BlockRange().Remove(andNode); - BlockRange().Remove(op2); - BlockRange().Remove(addOp1); - BlockRange().Remove(addOp2); + BlockRange().InsertBefore(andNode, blsrNode); + BlockRange().Remove(andNode); + BlockRange().Remove(op2); + BlockRange().Remove(addOp1); + BlockRange().Remove(addOp2); - ContainCheckHWIntrinsic(blsrNode); + ContainCheckHWIntrinsic(blsrNode); - return blsrNode; - } + return blsrNode; } } } From 66088724612eb5dfb1a58be75c291cc840a17300 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Thu, 13 Jan 2022 03:22:33 +0000 Subject: [PATCH 13/19] add parens and tidy --- src/coreclr/jit/lower.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 9d45555c2df4eb..3c0d3234470ed9 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2708,11 +2708,12 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) && #ifdef TARGET_XARCH - op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG) + (op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG) #ifdef FEATURE_HW_INTRINSICS - || emitter::DoesWriteZeroFlag( - HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()->GetHWIntrinsicId(), op1->TypeGet())) -#endif + || emitter::DoesWriteZeroFlag( + HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()->GetHWIntrinsicId(), op1->TypeGet())) +#endif // FEATURE_HW_INTRINSICS + ) #else // TARGET_ARM64 op1->OperIs(GT_AND, GT_ADD, GT_SUB) #endif From be87205123b673f544e7822a9687f1f6d631f0bd Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Thu, 13 Jan 2022 09:01:12 +0000 Subject: [PATCH 14/19] correct missing IsOper(GT_HWINTRINSIC) check --- src/coreclr/jit/lower.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 3c0d3234470ed9..19bfdda502f041 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2710,8 +2710,9 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) #ifdef TARGET_XARCH (op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG) #ifdef FEATURE_HW_INTRINSICS - || emitter::DoesWriteZeroFlag( - HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()->GetHWIntrinsicId(), op1->TypeGet())) + || (op1->OperIs(GT_HWINTRINSIC) && + emitter::DoesWriteZeroFlag( + HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()->GetHWIntrinsicId(), op1->TypeGet()))) #endif // FEATURE_HW_INTRINSICS ) #else // TARGET_ARM64 From 1ccaab9a75f0c7addae3977e058389b3a8e0e3e9 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Thu, 13 Jan 2022 10:42:19 +0000 Subject: [PATCH 15/19] jit-format --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 19bfdda502f041..6cee8f877d279d 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2712,7 +2712,7 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) #ifdef FEATURE_HW_INTRINSICS || (op1->OperIs(GT_HWINTRINSIC) && emitter::DoesWriteZeroFlag( - HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()->GetHWIntrinsicId(), op1->TypeGet()))) + HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()->GetHWIntrinsicId(), op1->TypeGet()))) #endif // FEATURE_HW_INTRINSICS ) #else // TARGET_ARM64 From 8b93b157ea9620df88b507d076f74a64ce447b93 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sat, 15 Jan 2022 21:42:27 +0000 Subject: [PATCH 16/19] reformatting as suggested by EgorBo --- src/coreclr/jit/lowerxarch.cpp | 80 ++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index e8ecee79005d6a..ef1ee453c35c3f 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3735,46 +3735,62 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTreeOp* andNode) assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode)); GenTree* op1 = andNode->gtGetOp1(); + if (!op1->OperIs(GT_LCL_VAR) || comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed()) + { + return nullptr; + } + GenTree* op2 = andNode->gtGetOp2(); - if (op1->OperIs(GT_LCL_VAR) && op2->OperIs(GT_ADD) && !comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed()) + GenTree* addOp2 = op2->gtGetOp2(); + if (!op2->OperIs(GT_ADD) || !addOp2->IsIntegralConst(-1)) { - GenTree* addOp1 = op2->gtGetOp1(); - GenTree* addOp2 = op2->gtGetOp2(); - if (addOp2->IsIntegralConst(-1) && addOp1->OperIs(GT_LCL_VAR) && - (addOp1->AsLclVar()->GetLclNum() == op1->AsLclVar()->GetLclNum()) && - ((op1->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) || - comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))) - { - LIR::Use use; - if (BlockRange().TryGetUse(andNode, &use)) - { - GenTreeHWIntrinsic* blsrNode = - comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), op1, - andNode->TypeIs(TYP_INT) - ? NamedIntrinsic::NI_BMI1_ResetLowestSetBit - : NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit); + return nullptr; + } - JITDUMP("Lower: optimize AND(X, ADD(X, -1))\n"); - DISPNODE(andNode); - JITDUMP("to:\n"); - DISPNODE(blsrNode); + GenTree* addOp1 = op2->gtGetOp1(); + if (!addOp1->OperIs(GT_LCL_VAR) || (addOp1->AsLclVar()->GetLclNum() != op1->AsLclVar()->GetLclNum())) + { + return nullptr; + } - use.ReplaceWith(blsrNode); + NamedIntrinsic intrinsic; + if (op1->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) + { + intrinsic = NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit; + } + else if (comp->compOpportunisticallyDependsOn(InstructionSet_BMI1)) + { + intrinsic = NamedIntrinsic::NI_BMI1_ResetLowestSetBit; + } + else + { + return nullptr; + } - BlockRange().InsertBefore(andNode, blsrNode); - BlockRange().Remove(andNode); - BlockRange().Remove(op2); - BlockRange().Remove(addOp1); - BlockRange().Remove(addOp2); + LIR::Use use; + if (!BlockRange().TryGetUse(andNode, &use)) + { + return nullptr; + } - ContainCheckHWIntrinsic(blsrNode); + GenTreeHWIntrinsic* blsrNode = comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), op1, intrinsic); - return blsrNode; - } - } - } + JITDUMP("Lower: optimize AND(X, ADD(X, -1))\n"); + DISPNODE(andNode); + JITDUMP("to:\n"); + DISPNODE(blsrNode); + + use.ReplaceWith(blsrNode); + + BlockRange().InsertBefore(andNode, blsrNode); + BlockRange().Remove(andNode); + BlockRange().Remove(op2); + BlockRange().Remove(addOp1); + BlockRange().Remove(addOp2); + + ContainCheckHWIntrinsic(blsrNode); - return nullptr; + return blsrNode; } #endif // FEATURE_HW_INTRINSICS From 8e30ced9cf5918c9a4c9e6d3d27974a9344f3658 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sat, 15 Jan 2022 22:21:47 +0000 Subject: [PATCH 17/19] clang formatting is stupid --- src/coreclr/jit/lowerxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index ef1ee453c35c3f..8e064d3f2237e5 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3740,7 +3740,7 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTreeOp* andNode) return nullptr; } - GenTree* op2 = andNode->gtGetOp2(); + GenTree* op2 = andNode->gtGetOp2(); GenTree* addOp2 = op2->gtGetOp2(); if (!op2->OperIs(GT_ADD) || !addOp2->IsIntegralConst(-1)) { From c8f3765c978f142d0a6957fddf9900bfa60a2a4f Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Sun, 16 Jan 2022 22:02:22 +0000 Subject: [PATCH 18/19] fix ordering of and check and add lookupIns(HWIntrinsic) overload --- src/coreclr/jit/hwintrinsic.h | 19 +++++++++++++++++++ src/coreclr/jit/lower.cpp | 3 +-- src/coreclr/jit/lowerxarch.cpp | 9 +++++++-- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 43bd543c599f1d..f7c3acc13613a1 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -577,6 +577,25 @@ struct HWIntrinsicInfo return lookup(id).ins[type - TYP_BYTE]; } + static instruction lookupIns(GenTreeHWIntrinsic* intrinsicNode) + { + assert(intrinsicNode != nullptr); + + NamedIntrinsic intrinsic = intrinsicNode->GetHWIntrinsicId(); + var_types type = var_types::TYP_UNKNOWN; + + if (lookupCategory(intrinsic) == HWIntrinsicCategory::HW_Category_Scalar) + { + type = intrinsicNode->TypeGet(); + } + else + { + type = intrinsicNode->GetSimdBaseType(); + } + + return lookupIns(intrinsic, type); + } + static HWIntrinsicCategory lookupCategory(NamedIntrinsic id) { return lookup(id).category; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 6cee8f877d279d..e7d4c238a26115 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2711,8 +2711,7 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) (op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG) #ifdef FEATURE_HW_INTRINSICS || (op1->OperIs(GT_HWINTRINSIC) && - emitter::DoesWriteZeroFlag( - HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()->GetHWIntrinsicId(), op1->TypeGet()))) + emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()))) #endif // FEATURE_HW_INTRINSICS ) #else // TARGET_ARM64 diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 8e064d3f2237e5..67684a11d604c3 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3740,9 +3740,14 @@ GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTreeOp* andNode) return nullptr; } - GenTree* op2 = andNode->gtGetOp2(); + GenTree* op2 = andNode->gtGetOp2(); + if (!op2->OperIs(GT_ADD)) + { + return nullptr; + } + GenTree* addOp2 = op2->gtGetOp2(); - if (!op2->OperIs(GT_ADD) || !addOp2->IsIntegralConst(-1)) + if (!addOp2->IsIntegralConst(-1)) { return nullptr; } From 75c7110498f34e5da02f7af2c590ea4011bf93b8 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Wed, 19 Jan 2022 15:46:19 +0000 Subject: [PATCH 19/19] rename to prefix Try and remove unscoped enum prefixes --- src/coreclr/jit/hwintrinsic.h | 4 ++-- src/coreclr/jit/lower.h | 2 +- src/coreclr/jit/lowerxarch.cpp | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index f7c3acc13613a1..4551ed9e4e1ce1 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -582,9 +582,9 @@ struct HWIntrinsicInfo assert(intrinsicNode != nullptr); NamedIntrinsic intrinsic = intrinsicNode->GetHWIntrinsicId(); - var_types type = var_types::TYP_UNKNOWN; + var_types type = TYP_UNKNOWN; - if (lookupCategory(intrinsic) == HWIntrinsicCategory::HW_Category_Scalar) + if (lookupCategory(intrinsic) == HW_Category_Scalar) { type = intrinsicNode->TypeGet(); } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index fe6ee8080e6053..26c728190a7982 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -344,7 +344,7 @@ class Lowering final : public Phase void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node); void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node); void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node); - GenTree* LowerAndOpToResetLowestSetBit(GenTreeOp* binOp); + GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* binOp); #elif defined(TARGET_ARM64) bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 67684a11d604c3..d85197d2b605fb 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -173,7 +173,7 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) #ifdef FEATURE_HW_INTRINSICS if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp)) { - GenTree* blsrNode = LowerAndOpToResetLowestSetBit(binOp); + GenTree* blsrNode = TryLowerAndOpToResetLowestSetBit(binOp); if (blsrNode != nullptr) { return blsrNode->gtNext; @@ -3722,7 +3722,7 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) } //---------------------------------------------------------------------------------------------- -// Lowering::LowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit +// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit // // Arguments: // andNode - GT_AND node of integral type @@ -3730,7 +3730,7 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) // Return Value: // Returns the replacement node if one is created else nullptr indicating no replacement // -GenTree* Lowering::LowerAndOpToResetLowestSetBit(GenTreeOp* andNode) +GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode) { assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode));