From 3aa62af2e64fdb271b8e4d52d3fa8ea21bb77375 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 5 Dec 2022 11:16:23 +0000 Subject: [PATCH 01/31] Create AND chains in bool optimizer pass --- src/coreclr/jit/codegenarm64.cpp | 16 +- src/coreclr/jit/codegenarmarch.cpp | 2 +- src/coreclr/jit/codegenlinear.cpp | 39 +++- src/coreclr/jit/compiler.h | 6 + src/coreclr/jit/fgopt.cpp | 19 ++ src/coreclr/jit/gentree.cpp | 4 +- src/coreclr/jit/gentree.h | 5 +- src/coreclr/jit/ifconversion.cpp | 17 +- src/coreclr/jit/lower.cpp | 28 ++- src/coreclr/jit/lowerarmarch.cpp | 52 +++-- src/coreclr/jit/lsraarm64.cpp | 27 ++- src/coreclr/jit/morph.cpp | 2 +- src/coreclr/jit/optimizer.cpp | 193 +++++++++++++++++- .../JIT/opt/Compares/compareAnd2Chains.cs | 131 ++++++++++++ .../JIT/opt/Compares/compareAnd2Chains.csproj | 9 +- .../JIT/opt/Compares/compareAnd3Chains.cs | 17 ++ .../JIT/opt/Compares/compareAnd3Chains.csproj | 12 +- src/tests/JIT/opt/Compares/compares.cs | 58 +++--- 18 files changed, 555 insertions(+), 82 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 020d472ef1fe03..94b5ae87efa59b 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2519,12 +2519,12 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); - // The arithmetic node must be sitting in a register (since it's not contained) - assert(targetReg != REG_NA); - // Handles combined operations: 'madd', 'msub' if (op2->OperIs(GT_MUL) && op2->isContained()) { + // The arithmetic node must be sitting in a register (since it's not contained) + assert(targetReg != REG_NA); + // In the future, we might consider enabling this for floating-point "unsafe" math. assert(varTypeIsIntegral(tree)); @@ -2720,11 +2720,17 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) assert(chain); // Move the result from flags into a register. - inst_SETCC(cond, tree->TypeGet(), targetReg); - genProduceReg(tree); + if (targetReg != REG_NA) + { + inst_SETCC(cond, tree->TypeGet(), targetReg); + genProduceReg(tree); + } return; } + // The arithmetic node must be sitting in a register (since it's not contained) + assert(targetReg != REG_NA); + instruction ins = genGetInsForOper(tree->OperGet(), targetType); if ((tree->gtFlags & GTF_SET_FLAGS) != 0) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index dfc29a621d4652..e149122344cf32 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -214,7 +214,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_OR: case GT_XOR: - case GT_AND: case GT_AND_NOT: assert(varTypeIsIntegralOrI(treeNode)); @@ -227,6 +226,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_SUB_HI: #endif // !defined(TARGET_64BIT) + case GT_AND: case GT_ADD: case GT_SUB: case GT_MUL: diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index b510ce4a558dc5..2e01a8999d038c 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2592,8 +2592,43 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) assert(compiler->compCurBB->bbJumpKind == BBJ_COND); assert(jtrue->OperIs(GT_JTRUE)); - GenTreeOp* relop = jtrue->gtGetOp1()->AsOp(); - GenCondition condition = GenCondition::FromRelop(relop); + GenTreeOp* relop = jtrue->gtGetOp1()->AsOp(); + GenCondition condition; + + // Operands should never be contained inside a jtrue. + assert(!relop->isContained()); + +#if defined(TARGET_ARM64) + if (relop->OperIs(GT_AND)) + { + if ((relop->gtFlags & GTF_SET_FLAGS) == 0) + { + // The condition was generated into a register. + regNumber reg = relop->GetRegNum(); + emitAttr attr = emitActualTypeSize(relop->TypeGet()); + GetEmitter()->emitIns_J_R(INS_cbnz, attr, compiler->compCurBB->bbJumpDest, reg); + return; + } + else + { + // Find the last contained compare in the chain. + GenTreeOp* lastCompare = relop->gtGetOp2()->AsOp(); + assert(lastCompare->isContained()); + while (!lastCompare->OperIsCompare()) + { + assert(lastCompare->OperIs(GT_AND)); + lastCompare = lastCompare->gtGetOp2()->AsOp(); + assert(lastCompare->isContained()); + } + condition = GenCondition::FromRelop(lastCompare); + } + } + else +#endif + { + assert(relop->OperIsCompare()); + condition = GenCondition::FromRelop(relop); + } if (condition.PreferSwap()) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1299b34bb431e8..472b439278b470 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2870,6 +2870,12 @@ class Compiler // before they have been set.) bool gtComplexityExceeds(GenTree* tree, unsigned limit); + // Can the test condition be reversed without creating a new node. + bool gtCanReverseCondSimple(GenTree* tree) + { + return (tree->OperIsCompare() || tree->OperIs(GT_JCC, GT_SETCC) || tree->OperIs(GT_JCMP)); + } + GenTree* gtReverseCond(GenTree* tree); static bool gtHasRef(GenTree* tree, unsigned lclNum); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index f26a27ce30b0fb..7c50338e853d8a 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5172,6 +5172,18 @@ bool Compiler::fgReorderBlocks(bool useProfile) } } + // Cannot handle cases where reversing creates a new node. + if (bPrev->bbJumpKind == BBJ_COND) + { + Statement* const condTestStmt = bPrev->lastStmt(); + GenTree* const condTest = condTestStmt->GetRootNode(); + noway_assert(condTest->gtOper == GT_JTRUE); + if (!gtCanReverseCondSimple(condTest->gtGetOp1())) + { + reorderBlock = false; + } + } + if (reorderBlock == false) { // @@ -6163,6 +6175,13 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication, bool isPhase) } } + // Cannot handle cases where reversing creates a new node. + GenTree* last = block->lastNode(); + if (last->OperGet() == GT_JTRUE && !gtCanReverseCondSimple(last->gtGetOp1())) + { + optimizeJump = false; + } + if (optimizeJump && isJumpToJoinFree) { // In the join free case, we also need to move bDest right after bNext diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 50b22b34037d0d..6fd72084266154 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -10668,6 +10668,7 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, _In_ _In_opt_ case GT_TEST_EQ: case GT_TEST_NE: case GT_SELECT: + case GT_AND: if (tree->gtFlags & GTF_RELOP_NAN_UN) { printf("N"); @@ -17132,8 +17133,7 @@ bool GenTree::canBeContained() const return false; } - // It is not possible for nodes that do not produce values or that are not containable values to be contained. - if (!IsValue() || ((DebugOperKind() & DBK_NOCONTAIN) != 0) || (OperIsHWIntrinsic() && !isContainableHWIntrinsic())) + if (((DebugOperKind() & DBK_NOCONTAIN) != 0) || (OperIsHWIntrinsic() && !isContainableHWIntrinsic())) { return false; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ea8731460bf7c9..123e8d2844a5ba 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1089,8 +1089,8 @@ struct GenTree if (gtType == TYP_VOID) { // These are the only operators which can produce either VOID or non-VOID results. - assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsHWIntrinsic() || - IsCnsVec()); + assert(OperIs(GT_NOP, GT_CALL, GT_COMMA, GT_AND) || OperIsCompare() || OperIsLong() || + OperIsHWIntrinsic() || IsCnsVec()); return false; } @@ -2008,7 +2008,6 @@ struct GenTree void SetContained() { - assert(IsValue()); gtFlags |= GTF_CONTAINED; assert(isContained()); } diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index 292d915384ae47..6a26c629841e82 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -558,10 +558,11 @@ bool OptIfConversionDsc::optIfConvert() } // Verify the test block ends with a condition that we can manipulate. + // Assuming that an AND connected to a JTRUE is always a valid chain. GenTree* last = m_startBlock->lastStmt()->GetRootNode(); noway_assert(last->OperIs(GT_JTRUE)); m_cond = last->gtGetOp1(); - if (!m_cond->OperIsCompare()) + if (!m_cond->OperIsCompare() && !m_cond->OperIs(GT_AND)) { return false; } @@ -668,22 +669,16 @@ bool OptIfConversionDsc::optIfConvert() { if (m_doElseConversion) { - selectTrueInput = m_elseOperation.node->gtGetOp2(); - selectFalseInput = m_thenOperation.node->gtGetOp2(); + selectTrueInput = m_elseOperation.node->gtGetOp2(); } else { - // Invert the condition (to help matching condition codes back to CIL). - GenTree* revCond = m_comp->gtReverseCond(m_cond); - assert(m_cond == revCond); // Ensure `gtReverseCond` did not create a new node. - // Duplicate the destination of the Then assignment. assert(m_thenOperation.node->gtGetOp1()->IsLocal()); - selectFalseInput = m_comp->gtCloneExpr(m_thenOperation.node->gtGetOp1()); - selectFalseInput->gtFlags &= GTF_EMPTY; - - selectTrueInput = m_thenOperation.node->gtGetOp2(); + selectTrueInput = m_comp->gtCloneExpr(m_thenOperation.node->gtGetOp1()); + selectTrueInput->gtFlags &= GTF_EMPTY; } + selectFalseInput = m_thenOperation.node->gtGetOp2(); // Pick the type as the type of the local, which should always be compatible even for implicit coercions. selectType = genActualType(m_thenOperation.node->gtGetOp1()); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 93df4f3c690bfc..29975b6ca5b9f3 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7164,10 +7164,30 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret) // void Lowering::ContainCheckJTrue(GenTreeOp* node) { - // The compare does not need to be generated into a register. - GenTree* cmp = node->gtGetOp1(); - cmp->gtType = TYP_VOID; - cmp->gtFlags |= GTF_SET_FLAGS; + GenTree* op1 = node->gtGetOp1(); + + if (op1->OperIsCompare()) + { + // The compare does not need to be generated into a register. + op1->gtType = TYP_VOID; + op1->gtFlags |= GTF_SET_FLAGS; + } +#if defined(TARGET_ARM64) + else if (op1->OperIs(GT_AND)) + { + // If the second op of the AND is contained, then the AND does not need to be generated + // into a register. + if (op1->gtGetOp2()->isContained()) + { + op1->gtType = TYP_VOID; + op1->gtFlags |= GTF_SET_FLAGS; + } + } +#endif + else + { + assert(false); + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 70bb2b2957dfc3..af91956c288677 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2281,7 +2281,8 @@ bool Lowering::IsValidCompareChain(GenTree* child, GenTree* parent) return IsValidCompareChain(child->AsOp()->gtGetOp2(), child) && IsValidCompareChain(child->AsOp()->gtGetOp1(), child); } - else if (child->OperIsCmpCompare() && varTypeIsIntegral(child->gtGetOp1()) && varTypeIsIntegral(child->gtGetOp2())) + else if (child->OperIsCmpCompare() && varTypeIsIntegralOrI(child->gtGetOp1()) && + varTypeIsIntegralOrI(child->gtGetOp2())) { // Can the child compare be contained. return IsSafeToContainMem(parent, child); @@ -2307,7 +2308,6 @@ bool Lowering::IsValidCompareChain(GenTree* child, GenTree* parent) bool Lowering::ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree** startOfChain) { assert(parent->OperIs(GT_AND) || parent->OperIs(GT_SELECT)); - *startOfChain = nullptr; // Nothing found yet. if (parent->isContainedCompareChainSegment(child)) { @@ -2320,7 +2320,7 @@ bool Lowering::ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree if (child->OperIs(GT_AND)) { // If Op2 is not contained, then try to contain it. - if (!child->isContainedCompareChainSegment(child->AsOp()->gtGetOp2())) + if (!child->isContainedCompareChainSegment(child->gtGetOp2())) { if (!ContainCheckCompareChain(child->gtGetOp2(), child, startOfChain)) { @@ -2328,15 +2328,41 @@ bool Lowering::ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree return false; } } + else + { + // Start of the chain is unknown. + *startOfChain = nullptr; + + if (child->gtGetOp2()->OperIsCmpCompare()) + { + // Ensure the children of the compare are contained correctly. + child->gtGetOp2()->gtGetOp1()->ClearContained(); + child->gtGetOp2()->gtGetOp2()->ClearContained(); + ContainCheckConditionalCompare(child->gtGetOp2()->AsOp()); + } + } // If Op1 is not contained, then try to contain it. - if (!child->isContainedCompareChainSegment(child->AsOp()->gtGetOp1())) + if (!child->isContainedCompareChainSegment(child->gtGetOp1())) { if (!ContainCheckCompareChain(child->gtGetOp1(), child, startOfChain)) { return false; } } + else + { + // Start of the chain is unknown. + *startOfChain = nullptr; + + if (child->gtGetOp1()->OperIsCmpCompare()) + { + // Ensure the children of the compare are contained correctly. + child->gtGetOp1()->gtGetOp1()->ClearContained(); + child->gtGetOp1()->gtGetOp2()->ClearContained(); + ContainCheckConditionalCompare(child->gtGetOp1()->AsOp()); + } + } // Contain the AND. child->SetContained(); @@ -2382,17 +2408,15 @@ void Lowering::ContainCheckCompareChainForAnd(GenTree* tree) // only be contained if Op2 is contained. if (ContainCheckCompareChain(tree->AsOp()->gtGetOp2(), tree, &startOfChain)) { - if (ContainCheckCompareChain(tree->AsOp()->gtGetOp1(), tree, &startOfChain)) + ContainCheckCompareChain(tree->AsOp()->gtGetOp1(), tree, &startOfChain); + + // The earliest node in the chain will be generated as a standard compare. + if (startOfChain != nullptr) { - // If op1 is the start of a chain, then it'll be generated as a standard compare. - if (startOfChain != nullptr) - { - // The earliest node in the chain will be generated as a standard compare. - assert(startOfChain->OperIsCmpCompare()); - startOfChain->AsOp()->gtGetOp1()->ClearContained(); - startOfChain->AsOp()->gtGetOp2()->ClearContained(); - ContainCheckCompare(startOfChain->AsOp()); - } + assert(startOfChain->OperIsCmpCompare()); + startOfChain->AsOp()->gtGetOp1()->ClearContained(); + startOfChain->AsOp()->gtGetOp2()->ClearContained(); + ContainCheckCompare(startOfChain->AsOp()); } } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index f8e5e2fec3548f..30d770a18322ca 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -241,9 +241,17 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_JTRUE: - srcCount = 0; + { + GenTree* op1 = tree->gtGetOp1(); + srcCount = 0; + if ((op1->gtFlags & GTF_SET_FLAGS) == 0) + { + assert(op1->OperIs(GT_AND)); + srcCount = BuildOperandUses(op1); + } assert(dstCount == 0); - break; + } + break; case GT_JMP: srcCount = 0; @@ -287,7 +295,6 @@ int LinearScan::BuildNode(GenTree* tree) } FALLTHROUGH; - case GT_AND: case GT_AND_NOT: case GT_OR: case GT_XOR: @@ -300,6 +307,20 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree); break; + case GT_AND: + srcCount = BuildBinaryUses(tree->AsOp()); + if ((tree->gtFlags & GTF_SET_FLAGS) != 0) + { + assert(tree->TypeGet() == TYP_VOID); + assert(dstCount == 0); + } + else + { + assert(dstCount == 1); + BuildDef(tree); + } + break; + case GT_BFIZ: assert(tree->gtGetOp1()->OperIs(GT_CAST)); srcCount = BuildOperandUses(tree->gtGetOp1()->gtGetOp1()); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 21d2117447da79..af45f55220001e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -8916,7 +8916,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA noway_assert(op1); - if (op1->OperIsCompare()) + if (op1->OperIsCompare() || op1->OperIs(GT_AND)) { /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does not need to materialize the result as a 0 or 1. */ diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index db27df6119c87e..745527231b11ee 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9081,6 +9081,7 @@ class OptBoolsDsc public: bool optOptimizeBoolsCondBlock(); + bool optOptimizeCompareChainCondBlock(); bool optOptimizeBoolsReturnBlock(BasicBlock* b3); #ifdef DEBUG void optOptimizeBoolsGcStress(); @@ -9316,6 +9317,188 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() return true; } +//----------------------------------------------------------------------------- +// optOptimizeCompareChainCondBlock: Create AND chain when when both m_b1 and m_b2 are BBJ_COND. +// +// Returns: +// true if chain optimization is done and m_b1 and m_b2 are folded into m_b1, else false. +// +// Assumptions: +// m_b1 and m_b2 are set on entry. +// +// Notes: +// +// This aims to reduced the number of conditional jumps by joining cases when multiple +// conditions gate the execution of a block. For example: +// If ( a > b || c == d) { x = y; } +// Will become the following. Note that the second condition is inverted. +// +// ------------ BB01 -> BB03 (cond), succs={BB02,BB03} +// * JTRUE +// \--* GE a,b +// +// ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04} +// * JTRUE +// \--* NE c,d +// +// ------------ BB03, preds={BB01, BB02} succs={BB04} +// * ASG x,y +// +// These operands will be combined into a single AND chain in the first block (with the first +// condition inverted). +// +// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} +// * JTRUE +// \--* AND +// +--* LT a,b +// \--* NE c,d +// +// ------------ BB03, preds={BB01} succs={BB04} +// * ASG x,y +// +// +// This will also work for statements with else cases: +// If ( a > b || c == d) { x = y; } else { x = z; } +// Here BB04 will contain the else ASG. Both BB04 and BB05 will unconditionally jump to BB05. +// +// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} +// * JTRUE +// \--* AND +// +--* LT a,b +// \--* NE c,d +// +// ------------ BB03, preds={BB01} succs={BB05} +// * ASG x,y +// +// ------------ BB04, preds={BB01} succs={BB05} +// * ASG x,z +// +// +// Multiple conditions can be chained together. This is due to the optimization reverse +// iterating through the blocks. For example: +// If ( a > b || c == d || e < f ) { x = y; } +// The first pass will combine "c == d" and "e < f" into a chain. The second pass will then +// combine the "a > b" with the earlier chain, giving: +// +// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} +// * JTRUE +// \--* AND +// +--* AND +// +--* NE c,d +// +--* GE e,f +// \--* LT a,b +// +// ------------ BB03, preds={BB01} succs={BB04} +// * ASG x,y +// +// +// Conditions connected by && are not yet checked for. For example: +// If ( a > b && c == d ) { x = y; } +// +bool OptBoolsDsc::optOptimizeCompareChainCondBlock() +{ + assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); + m_t3 = nullptr; + + if (!(m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest)) + { + return false; + } + + Statement* const s1 = optOptimizeBoolsChkBlkCond(); + if (s1 == nullptr) + { + return false; + } + Statement* s2 = m_b2->firstStmt(); + + assert(m_testInfo1.testTree->gtOper == GT_JTRUE); + GenTree* cond1 = m_testInfo1.testTree->AsOp()->gtOp1; + + assert(m_testInfo2.testTree->gtOper == GT_JTRUE); + GenTree* cond2 = m_testInfo2.testTree->AsOp()->gtOp1; + + // Ensure both conditions are suitable. + if (!cond1->OperIsCmpCompare()) + { + return false; + } + if (!(cond2->OperIsCmpCompare() || cond2->OperIs(GT_AND))) + { + return false; + } + + // Ensure there are no additional side effects. + if ((cond1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0 || + (cond2->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0) + { + return false; + } + + // Integer compares only for now (until support for Arm64 fccmp instruction is added) + if (varTypeIsFloating(cond1->gtGetOp1()->TypeGet()) || varTypeIsFloating(cond2->gtGetOp1()->TypeGet())) + { + return false; + } + + // Avoid cases where the compare will be optimized better later: + // * cmp(and(x, y), 0) will be turned into a TEST_ opcode. + // * Compares against zero will be optimized with cbz. + GenTree* cond1Op2 = cond1->gtGetOp2(); + GenTree* cond2Op2 = cond2->gtGetOp2(); + if ((cond1Op2->IsIntegralConst() && cond1Op2->AsIntCon()->IconValue() == 0) || + (cond2Op2->IsIntegralConst() && cond2Op2->AsIntCon()->IconValue() == 0)) + { + return false; + } + + // Remove the first JTRUE statement. + constexpr bool isUnlink = true; + m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink)); + + // Invert the first condition. + GenTree* revCond = m_comp->gtReverseCond(cond1); + assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node. + + // Create a chain. + GenTree* newchain = m_comp->gtNewOperNode(GT_AND, TYP_INT, cond1, cond2); + newchain->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); + newchain->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); + cond1->gtFlags &= ~GTF_RELOP_JMP_USED; + cond2->gtFlags &= ~GTF_RELOP_JMP_USED; + newchain->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + + // Wire the chain into the second block + m_testInfo2.testTree->AsOp()->gtOp1 = newchain; + m_testInfo2.testTree->AsOp()->gtFlags |= (newchain->gtFlags & GTF_ALL_EFFECT); + m_comp->gtSetEvalOrder(m_testInfo2.testTree); + m_comp->fgSetStmtSeq(s2); + + // Update the flow. + m_comp->fgRemoveAllRefPreds(m_b1->bbJumpDest, m_b1); + m_b1->bbJumpKind = BBJ_NONE; + + // Fixup flags. + m_b2->bbFlags |= (m_b1->bbFlags & BBF_COPY_PROPAGATE); + + // Join the two blocks. This is done now to ensure that additional conditions can be chained. + if (m_comp->fgCanCompactBlocks(m_b1, m_b2)) + { + m_comp->fgCompactBlocks(m_b1, m_b2); + } + +#ifdef DEBUG + if (m_comp->verbose) + { + printf("\nCombined conditions " FMT_BB " and " FMT_BB " into AND chain :\n", m_b1->bbNum, m_b2->bbNum); + m_comp->fgDumpBlock(m_b1); + printf("\n"); + } +#endif + + return true; +} + //----------------------------------------------------------------------------- // optOptimizeBoolsChkBlkCond: Checks block conditions if it can be boolean optimized // @@ -10067,7 +10250,8 @@ PhaseStatus Compiler::optOptimizeBools() numPasses++; change = false; - for (BasicBlock* const b1 : Blocks()) + // Reverse iterate through the blocks. + for (BasicBlock* b1 = fgLastBB; b1 != nullptr; b1 = b1->bbPrev) { // We're only interested in conditional jumps here @@ -10108,6 +10292,13 @@ PhaseStatus Compiler::optOptimizeBools() change = true; numCond++; } +#ifdef TARGET_ARM64 + else if (optBoolsDsc.optOptimizeCompareChainCondBlock()) + { + change = true; + numCond++; + } +#endif } else if (b2->bbJumpKind == BBJ_RETURN) { diff --git a/src/tests/JIT/opt/Compares/compareAnd2Chains.cs b/src/tests/JIT/opt/Compares/compareAnd2Chains.cs index 44ce6c88f6004e..b7e1c8e940dabe 100644 --- a/src/tests/JIT/opt/Compares/compareAnd2Chains.cs +++ b/src/tests/JIT/opt/Compares/compareAnd2Chains.cs @@ -178,6 +178,122 @@ public class ComparisonTestAnd2Chains public static bool Ge_double_2(double a1, double a2) => a1 >= 5.5 & a2 >= 5.5; + [MethodImpl(MethodImplOptions.NoInlining)] + public static void consume(T a1, T a2) {} + + // If conditions that are consumed. + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Lt_byte_2_consume(byte a1, byte a2) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #11, nc, ge + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge + if (a1 < 10 || a2 < 11) { a1 = 10; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Le_short_2_consume(short a1, short a2) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #12, nzc, gt + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + if (a1 <= 10 || a2 <= 12) { a1 = 10; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Gt_int_2_consume(int a1, int a2) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #13, 0, le + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + if (a1 > 10 || a2 > 13) { a1 = 10; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Ge_long_2_consume(long a1, long a2) { + //ARM64-FULL-LINE: cmp {{x[0-9]+}}, #10 + //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #14, z, lt + //ARM64-FULL-LINE-NEXT: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lt + if (a1 >= 10 || a2 >= 14) { a1 = 10; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Eq_ushort_2_consume(ushort a1, ushort a2) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #15, z, ne + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + if (a1 == 10 || a2 == 15) { a1 = 10; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Ne_uint_2_consume(uint a1, uint a2) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #16, 0, eq + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + if (a1 != 10 || a2 != 16) { a1 = 10; } + consume(a1, a2); + } + + /* If/Else conditions that consume. */ + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Le_else_byte_2_consume(byte a1, byte a2) + { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #22, nzc, gt + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + if (a1 <= 11 || a2 <= 22) { a1 = 20; } else { a1 = 200; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Gt_else_short_2_consume(short a1, short a2) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #23, 0, le + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + if (a1 > 11 || a2 > 23) { a1 = 20; } else { a1 = 200; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Ge_else_int_2_consume(int a1, int a2) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #24, z, lt + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + if (a1 >= 11 || a2 >= 24) { a1 = 20; } else { a1 = 200; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Eq_else_long_2_consume(long a1, long a2) { + //ARM64-FULL-LINE: cmp {{x[0-9]+}}, #11 + //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #25, z, ne + //ARM64-FULL-LINE-NEXT: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ne + if (a1 == 11 || a2 == 25) { a1 = 20; } else { a1 = 200; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Ne_else_ushort_2_consume(ushort a1, ushort a2) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #26, 0, eq + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + if (a1 != 11 || a2 != 26) { a1 = 20; } else { a1 = 200; } + consume(a1, a2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Lt_else_uint_2_consume(uint a1, uint a2) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #27, 0, hs + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, hs + if (a1 < 11 || a2 < 27) { a1 = 20; } else { a1 = 200; } + consume(a1, a2); + } + [MethodImpl(MethodImplOptions.NoInlining)] public static int Main() { @@ -457,6 +573,21 @@ public static int Main() return 101; } + Lt_byte_2_consume(10, 11); + Le_short_2_consume(12, 13); + Gt_int_2_consume(14, 15); + Ge_long_2_consume(16, 17); + Eq_ushort_2_consume(18, 19); + Ne_uint_2_consume(20, 21); + + Le_else_byte_2_consume(10, 11); + Le_else_byte_2_consume(12, 13); + Gt_else_short_2_consume(14, 15); + Ge_else_int_2_consume(16, 17); + Eq_else_long_2_consume(18, 19); + Ne_else_ushort_2_consume(20, 21); + Lt_else_uint_2_consume(22, 23); + Console.WriteLine("PASSED"); return 100; } diff --git a/src/tests/JIT/opt/Compares/compareAnd2Chains.csproj b/src/tests/JIT/opt/Compares/compareAnd2Chains.csproj index 5e5fbae5cb863b..42a89c8384d74e 100644 --- a/src/tests/JIT/opt/Compares/compareAnd2Chains.csproj +++ b/src/tests/JIT/opt/Compares/compareAnd2Chains.csproj @@ -3,10 +3,15 @@ Exe - PdbOnly + None True - + + true + + + + diff --git a/src/tests/JIT/opt/Compares/compareAnd3Chains.cs b/src/tests/JIT/opt/Compares/compareAnd3Chains.cs index 2f6dcfa84f6e9a..feabefce22a503 100644 --- a/src/tests/JIT/opt/Compares/compareAnd3Chains.cs +++ b/src/tests/JIT/opt/Compares/compareAnd3Chains.cs @@ -178,6 +178,21 @@ public class ComparisonTestAnd3Chains public static bool Ge_double_3(double a1, double a2, double a3) => a1 >= 5.5 & a2 >= 5.5 & a3 >= 5.5; + [MethodImpl(MethodImplOptions.NoInlining)] + public static void consume(T a1, T a2, T a3) {} + + // If conditions that are consumed. + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Le_byte_3_consume(byte a1, byte a2, byte a3) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #12, nzc, gt + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #10, nzc, gt + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + if (a1 <= 10 || a2 <= 11 || a3 <= 12) { a1 = 10; } + consume(a1, a2, a3); + } + [MethodImpl(MethodImplOptions.NoInlining)] public static int Main() { @@ -457,6 +472,8 @@ public static int Main() return 101; } + Le_byte_3_consume(101, 102, 103); + Console.WriteLine("PASSED"); return 100; } diff --git a/src/tests/JIT/opt/Compares/compareAnd3Chains.csproj b/src/tests/JIT/opt/Compares/compareAnd3Chains.csproj index 5e5fbae5cb863b..6e57bab578a715 100644 --- a/src/tests/JIT/opt/Compares/compareAnd3Chains.csproj +++ b/src/tests/JIT/opt/Compares/compareAnd3Chains.csproj @@ -1,12 +1,16 @@ Exe - - - PdbOnly + None True + True - + + true + + + + diff --git a/src/tests/JIT/opt/Compares/compares.cs b/src/tests/JIT/opt/Compares/compares.cs index 9a83ee566f79c3..db6e82c67169f1 100644 --- a/src/tests/JIT/opt/Compares/compares.cs +++ b/src/tests/JIT/opt/Compares/compares.cs @@ -82,7 +82,7 @@ public static void consume(T a1, T a2) {} [MethodImpl(MethodImplOptions.NoInlining)] public static void Eq_byte_consume(byte a1, byte a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne if (a1 == a2) { a1 = 10; } consume(a1, a2); } @@ -91,7 +91,7 @@ public static void Eq_byte_consume(byte a1, byte a2) { public static void Ne_short_consume(short a1, short a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq if (a1 != a2) { a1 = 11; } consume(a1, a2); } @@ -100,7 +100,7 @@ public static void Ne_short_consume(short a1, short a2) public static void Lt_int_consume(int a1, int a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge if (a1 < a2) { a1 = 12; } consume(a1, a2); } @@ -109,7 +109,7 @@ public static void Lt_int_consume(int a1, int a2) public static void Le_long_consume(long a1, long a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, le + //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, gt if (a1 <= a2) { a1 = 13; } consume(a1, a2); } @@ -118,7 +118,7 @@ public static void Le_long_consume(long a1, long a2) public static void Gt_ushort_consume(ushort a1, ushort a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le if (a1 > a2) { a1 = 14; } consume(a1, a2); } @@ -127,7 +127,7 @@ public static void Gt_ushort_consume(ushort a1, ushort a2) public static void Ge_uint_consume(uint a1, uint a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt if (a1 >= a2) { a1 = 15; } consume(a1, a2); } @@ -136,7 +136,7 @@ public static void Ge_uint_consume(uint a1, uint a2) public static void Eq_ulong_consume(ulong a1, ulong a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, eq + //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ne if (a1 == a2) { a1 = 16; } consume(a1, a2); } @@ -145,7 +145,7 @@ public static void Eq_ulong_consume(ulong a1, ulong a2) public static void Ne_float_int_consume(float f1, float f2, int a1, int a2) { //ARM64-FULL-LINE: fcmp {{s[0-9]+}}, {{s[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq if (f1 != f2) { a1 = 17; } consume(a1, a2); } @@ -154,7 +154,7 @@ public static void Ne_float_int_consume(float f1, float f2, int a1, int a2) public static void Lt_double_long_consume(double f1, double f2, long a1, long a2) { //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-31]}}, {{x[0-31]}}, {{x[0-31]}}, lt + //ARM64-NEXT-FULL-LINE: csel {{x[0-31]}}, {{x[0-31]}}, {{x[0-31]}}, ge if (f1 < f2) { a1 = 18; } consume(a1, a2); } @@ -163,7 +163,7 @@ public static void Lt_double_long_consume(double f1, double f2, long a1, long a2 public static void Eq_double_long_consume(double f1, double f2, long a1, long a2) { //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-31]}}, {{x[0-31]}}, {{x[0-31]}}, eq + //ARM64-NEXT-FULL-LINE: csel {{x[0-31]}}, {{x[0-31]}}, {{x[0-31]}}, ne if (f1 == f2) { a1 = 18; } consume(a1, a2); } @@ -172,7 +172,7 @@ public static void Eq_double_long_consume(double f1, double f2, long a1, long a2 public static void Ne_double_int_consume(double f1, double f2, int a1, int a2) { //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq if (f1 != f2) { a1 = 18; } consume(a1, a2); } @@ -183,7 +183,7 @@ public static void Ne_double_int_consume(double f1, double f2, int a1, int a2) public static void Ne_else_byte_consume(byte a1, byte a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq if (a1 != a2) { a1 = 10; } else { a1 = 100; } consume(a1, a2); } @@ -192,7 +192,7 @@ public static void Ne_else_byte_consume(byte a1, byte a2) public static void Lt_else_short_consume(short a1, short a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge if (a1 < a2) { a1 = 11; } else { a1 = 101; } consume(a1, a2); } @@ -201,7 +201,7 @@ public static void Lt_else_short_consume(short a1, short a2) public static void Le_else_int_consume(int a1, int a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt if (a1 <= a2) { a1 = 12; } else { a1 = 102; } consume(a1, a2); } @@ -210,7 +210,7 @@ public static void Le_else_int_consume(int a1, int a2) public static void Gt_else_long_consume(long a1, long a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, gt + //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, le if (a1 > a2) { a1 = 13; } else { a1 = 103; } consume(a1, a2); } @@ -219,7 +219,7 @@ public static void Gt_else_long_consume(long a1, long a2) public static void Ge_else_ushort_consume(ushort a1, ushort a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt if (a1 >= a2) { a1 = 14; } else { a1 = 104; } consume(a1, a2); } @@ -228,7 +228,7 @@ public static void Ge_else_ushort_consume(ushort a1, ushort a2) public static void Eq_else_uint_consume(uint a1, uint a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne if (a1 == a2) { a1 = 15; } else { a1 = 105; } consume(a1, a2); } @@ -237,7 +237,7 @@ public static void Eq_else_uint_consume(uint a1, uint a2) public static void Ne_else_ulong_consume(ulong a1, ulong a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ne + //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, eq if (a1 != a2) { a1 = 16; } else { a1 = 106; } consume(a1, a2); } @@ -246,7 +246,7 @@ public static void Ne_else_ulong_consume(ulong a1, ulong a2) public static void Lt_else_float_int_consume(float f1, float f2, int a1, int a2) { //ARM64-FULL-LINE: fcmp {{s[0-9]+}}, {{s[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge if (f1 < f2) { a1 = 17; } else { a1 = 107; } consume(a1, a2); } @@ -255,7 +255,7 @@ public static void Lt_else_float_int_consume(float f1, float f2, int a1, int a2) public static void Le_else_double_int_consume(double f1, double f2, int a1, int a2) { //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt if (f1 <= f2) { a1 = 18; } else { a1 = 108; } consume(a1, a2); } @@ -266,7 +266,7 @@ public static void Le_else_double_int_consume(double f1, double f2, int a1, int public static byte Lt_else_byte_return(byte a1, byte a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge return (a1 < a2) ? (byte)10 : (byte)100; } @@ -274,7 +274,7 @@ public static byte Lt_else_byte_return(byte a1, byte a2) public static short Le_else_short_return(short a1, short a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt return (a1 <= a2) ? (short)11 : (short)101; } @@ -282,7 +282,7 @@ public static short Le_else_short_return(short a1, short a2) public static int Gt_else_int_return(int a1, int a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le return (a1 > a2) ? (int)12 : (int)102; } @@ -290,7 +290,7 @@ public static int Gt_else_int_return(int a1, int a2) public static long Ge_else_long_return(long a1, long a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ge + //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lt return (a1 >= a2) ? (long)13 : (long)103; } @@ -298,7 +298,7 @@ public static long Ge_else_long_return(long a1, long a2) public static ushort Eq_else_ushort_return(ushort a1, ushort a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne return (a1 == a2) ? (ushort)14 : (ushort)104; } @@ -306,7 +306,7 @@ public static ushort Eq_else_ushort_return(ushort a1, ushort a2) public static uint Ne_else_uint_return(uint a1, uint a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq return (a1 != a2) ? (uint)15 : (uint)105; } @@ -314,7 +314,7 @@ public static uint Ne_else_uint_return(uint a1, uint a2) public static ulong Lt_else_ulong_return(ulong a1, ulong a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lt + //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ge return (a1 < a2) ? (ulong)16 : (ulong)106; } @@ -322,7 +322,7 @@ public static ulong Lt_else_ulong_return(ulong a1, ulong a2) public static int Le_else_float_int_return(float a1, float a2) { //ARM64-FULL-LINE: fcmp {{s[0-9]+}}, {{s[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt return (a1 <= a2) ? 17 : 107; } @@ -330,7 +330,7 @@ public static int Le_else_float_int_return(float a1, float a2) public static int Gt_else_double_int_return(double a1, double a2) { //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le return (a1 > a2) ? 18 : 108; } From 48b68ecd77dfd5c7d1504c7a01d4760eb9be34a0 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 26 Jan 2023 15:07:07 +0000 Subject: [PATCH 02/31] Remove contained and morph checks --- src/coreclr/jit/gentree.h | 1 + src/coreclr/jit/morph.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 123e8d2844a5ba..196658eaf92e34 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -2008,6 +2008,7 @@ struct GenTree void SetContained() { + assert(IsValue()); gtFlags |= GTF_CONTAINED; assert(isContained()); } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index af45f55220001e..21d2117447da79 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -8916,7 +8916,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA noway_assert(op1); - if (op1->OperIsCompare() || op1->OperIs(GT_AND)) + if (op1->OperIsCompare()) { /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does not need to materialize the result as a 0 or 1. */ From 56ff63109ea5af95ada1cd0bcd274ea641a190e9 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 26 Jan 2023 17:18:47 +0000 Subject: [PATCH 03/31] Add GT_ANDFLAGS node --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenarm64.cpp | 60 ++++++++++++++++++++++++------ src/coreclr/jit/codegenarmarch.cpp | 7 +++- src/coreclr/jit/codegenlinear.cpp | 36 +++++++++--------- src/coreclr/jit/gentree.cpp | 1 - src/coreclr/jit/gentree.h | 3 +- src/coreclr/jit/gtlist.h | 4 ++ src/coreclr/jit/lower.cpp | 1 + src/coreclr/jit/lsraarm64.cpp | 15 ++------ 9 files changed, 85 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index aa3fbefad70039..f30654cfb661c2 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -887,6 +887,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCodeForContainedCompareChain(GenTree* tree, bool* inchain, GenCondition* prevCond); #endif void genCodeForSelect(GenTreeOp* select); + void genCodeForAndFlags(GenTreeOp* select); void genIntrinsic(GenTreeIntrinsic* treeNode); void genPutArgStk(GenTreePutArgStk* treeNode); void genPutArgReg(GenTreeOp* tree); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 94b5ae87efa59b..0e046f183c2d55 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2519,12 +2519,12 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); + // The arithmetic node must be sitting in a register (since it's not contained) + assert(targetReg != REG_NA); + // Handles combined operations: 'madd', 'msub' if (op2->OperIs(GT_MUL) && op2->isContained()) { - // The arithmetic node must be sitting in a register (since it's not contained) - assert(targetReg != REG_NA); - // In the future, we might consider enabling this for floating-point "unsafe" math. assert(varTypeIsIntegral(tree)); @@ -2720,17 +2720,11 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) assert(chain); // Move the result from flags into a register. - if (targetReg != REG_NA) - { - inst_SETCC(cond, tree->TypeGet(), targetReg); - genProduceReg(tree); - } + inst_SETCC(cond, tree->TypeGet(), targetReg); + genProduceReg(tree); return; } - // The arithmetic node must be sitting in a register (since it's not contained) - assert(targetReg != REG_NA); - instruction ins = genGetInsForOper(tree->OperGet(), targetType); if ((tree->gtFlags & GTF_SET_FLAGS) != 0) @@ -4790,6 +4784,50 @@ void CodeGen::genCodeForSelect(GenTreeOp* tree) genProduceReg(tree); } +//------------------------------------------------------------------------ +// genCodeForAndFlags: Generates code for ANDFLAGS statement. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForAndFlags(GenTreeOp* tree) +{ + var_types targetType = tree->TypeGet(); + emitter* emit = GetEmitter(); + + assert(tree->OperIs(GT_ANDFLAGS)); + assert(tree->GetRegNum() == REG_NA); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + if (tree->isContainedCompareChainSegment(op2)) + { + GenCondition cond; + bool chain = false; + + JITDUMP("Generating compare chain:\n"); + if (op1->isContained()) + { + // Generate Op1 into flags. + genCodeForContainedCompareChain(op1, &chain, &cond); + assert(chain); + } + else + { + // Op1 is not contained, move it from a register into flags. + emit->emitIns_R_I(INS_cmp, emitActualTypeSize(op1), op1->GetRegNum(), 0); + cond = GenCondition::NE; + chain = true; + } + + // Gen Op2 into flags. + genCodeForContainedCompareChain(op2, &chain, &cond); + assert(chain); + return; + } +} + //------------------------------------------------------------------------ // genCodeForJumpCompare: Generates code for jmpCompare statement. // diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index e149122344cf32..d5c7813f1c40a1 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -214,6 +214,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_OR: case GT_XOR: + case GT_AND: case GT_AND_NOT: assert(varTypeIsIntegralOrI(treeNode)); @@ -226,7 +227,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_SUB_HI: #endif // !defined(TARGET_64BIT) - case GT_AND: case GT_ADD: case GT_SUB: case GT_MUL: @@ -366,6 +366,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_SELECT: genCodeForSelect(treeNode->AsConditional()); break; + + case GT_ANDFLAGS: + genConsumeOperands(treeNode->AsOp()); + genCodeForAndFlags(treeNode->AsOp()); + break; #endif case GT_JTRUE: diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 2e01a8999d038c..dcbcb93afa9376 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2601,27 +2601,27 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) #if defined(TARGET_ARM64) if (relop->OperIs(GT_AND)) { - if ((relop->gtFlags & GTF_SET_FLAGS) == 0) - { - // The condition was generated into a register. - regNumber reg = relop->GetRegNum(); - emitAttr attr = emitActualTypeSize(relop->TypeGet()); - GetEmitter()->emitIns_J_R(INS_cbnz, attr, compiler->compCurBB->bbJumpDest, reg); - return; - } - else + // The condition was generated into a register. + assert(relop->gtType != TYP_VOID); + regNumber reg = relop->GetRegNum(); + assert(reg != REG_NA); + emitAttr attr = emitActualTypeSize(relop->TypeGet()); + GetEmitter()->emitIns_J_R(INS_cbnz, attr, compiler->compCurBB->bbJumpDest, reg); + return; + } + else if (relop->OperIs(GT_ANDFLAGS)) + { + // Find the last contained compare in the chain. + assert(relop->gtType == TYP_VOID); + GenTreeOp* lastCompare = relop->gtGetOp2()->AsOp(); + assert(lastCompare->isContained()); + while (!lastCompare->OperIsCompare()) { - // Find the last contained compare in the chain. - GenTreeOp* lastCompare = relop->gtGetOp2()->AsOp(); + assert(lastCompare->OperIs(GT_AND)); + lastCompare = lastCompare->gtGetOp2()->AsOp(); assert(lastCompare->isContained()); - while (!lastCompare->OperIsCompare()) - { - assert(lastCompare->OperIs(GT_AND)); - lastCompare = lastCompare->gtGetOp2()->AsOp(); - assert(lastCompare->isContained()); - } - condition = GenCondition::FromRelop(lastCompare); } + condition = GenCondition::FromRelop(lastCompare); } else #endif diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 6fd72084266154..15479d99ef4a41 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -10668,7 +10668,6 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, _In_ _In_opt_ case GT_TEST_EQ: case GT_TEST_NE: case GT_SELECT: - case GT_AND: if (tree->gtFlags & GTF_RELOP_NAN_UN) { printf("N"); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 196658eaf92e34..f4228ba2a2ffb1 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -913,7 +913,8 @@ struct GenTree // Node and its child in isolation form a contained compare chain. bool isContainedCompareChainSegment(GenTree* child) const { - return (OperIs(GT_AND) && child->isContained() && (child->OperIs(GT_AND) || child->OperIsCmpCompare())); + return ((OperIs(GT_AND) || OperIs(GT_ANDFLAGS)) && child->isContained() && + (child->OperIs(GT_AND) || child->OperIsCmpCompare())); } bool isContainedFltOrDblImmed() const diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index a0650f1d0dc05b..61b750f307ceed 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -242,6 +242,10 @@ GTNODE(SETCC , GenTreeCC ,0,GTK_LEAF|DBK_NOTHIR) // The XARCH BT instruction. Like CMP, this sets the condition flags (CF to be precise) and does not produce a value. GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)) #endif +// Sets the condition flags according to the combined results of it's children. +#if defined(TARGET_ARM64) +GTNODE(ANDFLAGS , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) +#endif //----------------------------------------------------------------------------- // Other nodes that look like unary/binary operators: diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 29975b6ca5b9f3..8c3998301f3a31 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7179,6 +7179,7 @@ void Lowering::ContainCheckJTrue(GenTreeOp* node) // into a register. if (op1->gtGetOp2()->isContained()) { + op1->gtOper = GT_ANDFLAGS; op1->gtType = TYP_VOID; op1->gtFlags |= GTF_SET_FLAGS; } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 30d770a18322ca..a1129d0b02b1b5 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -307,18 +307,11 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree); break; - case GT_AND: + case GT_ANDFLAGS: srcCount = BuildBinaryUses(tree->AsOp()); - if ((tree->gtFlags & GTF_SET_FLAGS) != 0) - { - assert(tree->TypeGet() == TYP_VOID); - assert(dstCount == 0); - } - else - { - assert(dstCount == 1); - BuildDef(tree); - } + assert(dstCount == 0); + assert((tree->gtFlags & GTF_SET_FLAGS) != 0); + assert(tree->TypeGet() == TYP_VOID); break; case GT_BFIZ: From a919a13a9077f038224bad0e11ab069e661568ae Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 30 Jan 2023 09:20:14 +0000 Subject: [PATCH 04/31] Fix X64 build --- src/coreclr/jit/gentree.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index f4228ba2a2ffb1..7002518f05a1cd 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -910,12 +910,14 @@ struct GenTree return isContained() && IsCnsIntOrI() && !isUsedFromSpillTemp(); } +#if defined(TARGET_ARM64) // Node and its child in isolation form a contained compare chain. bool isContainedCompareChainSegment(GenTree* child) const { return ((OperIs(GT_AND) || OperIs(GT_ANDFLAGS)) && child->isContained() && (child->OperIs(GT_AND) || child->OperIsCmpCompare())); } +#endif bool isContainedFltOrDblImmed() const { From 893cbb5d343f10feb5a1fd7ee6a3ffcae8271cba Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 1 Feb 2023 12:33:10 +0000 Subject: [PATCH 05/31] Review fixups --- src/coreclr/jit/codegenarm64.cpp | 40 +++++++++++++++----------------- src/coreclr/jit/gtlist.h | 2 +- src/coreclr/jit/lower.cpp | 4 ++-- src/coreclr/jit/lsraarm64.cpp | 2 +- src/coreclr/jit/optimizer.cpp | 13 +++++------ 5 files changed, 29 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 0e046f183c2d55..37c0081373ee28 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -4801,31 +4801,29 @@ void CodeGen::genCodeForAndFlags(GenTreeOp* tree) GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); - if (tree->isContainedCompareChainSegment(op2)) - { - GenCondition cond; - bool chain = false; + assert (tree->isContainedCompareChainSegment(op2)); - JITDUMP("Generating compare chain:\n"); - if (op1->isContained()) - { - // Generate Op1 into flags. - genCodeForContainedCompareChain(op1, &chain, &cond); - assert(chain); - } - else - { - // Op1 is not contained, move it from a register into flags. - emit->emitIns_R_I(INS_cmp, emitActualTypeSize(op1), op1->GetRegNum(), 0); - cond = GenCondition::NE; - chain = true; - } + GenCondition cond; + bool chain = false; - // Gen Op2 into flags. - genCodeForContainedCompareChain(op2, &chain, &cond); + JITDUMP("Generating compare chain:\n"); + if (op1->isContained()) + { + // Generate Op1 into flags. + genCodeForContainedCompareChain(op1, &chain, &cond); assert(chain); - return; } + else + { + // Op1 is not contained, move it from a register into flags. + emit->emitIns_R_I(INS_cmp, emitActualTypeSize(op1), op1->GetRegNum(), 0); + cond = GenCondition::NE; + chain = true; + } + + // Gen Op2 into flags. + genCodeForContainedCompareChain(op2, &chain, &cond); + assert(chain); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 61b750f307ceed..a2a7b5ee1795b2 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -242,7 +242,7 @@ GTNODE(SETCC , GenTreeCC ,0,GTK_LEAF|DBK_NOTHIR) // The XARCH BT instruction. Like CMP, this sets the condition flags (CF to be precise) and does not produce a value. GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)) #endif -// Sets the condition flags according to the combined results of it's children. +// Sets the condition flags according to the combined results of its children. #if defined(TARGET_ARM64) GTNODE(ANDFLAGS , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) #endif diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 8c3998301f3a31..f257e7a1863ffd 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7179,7 +7179,7 @@ void Lowering::ContainCheckJTrue(GenTreeOp* node) // into a register. if (op1->gtGetOp2()->isContained()) { - op1->gtOper = GT_ANDFLAGS; + op1->SetOper(GT_ANDFLAGS); op1->gtType = TYP_VOID; op1->gtFlags |= GTF_SET_FLAGS; } @@ -7187,7 +7187,7 @@ void Lowering::ContainCheckJTrue(GenTreeOp* node) #endif else { - assert(false); + unreached(); } } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index a1129d0b02b1b5..cc9ce33b7c2902 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -311,7 +311,7 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = BuildBinaryUses(tree->AsOp()); assert(dstCount == 0); assert((tree->gtFlags & GTF_SET_FLAGS) != 0); - assert(tree->TypeGet() == TYP_VOID); + assert(tree->TypeIs(TYP_VOID)); break; case GT_BFIZ: diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 745527231b11ee..a316cbd9553e1c 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9335,7 +9335,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() // // ------------ BB01 -> BB03 (cond), succs={BB02,BB03} // * JTRUE -// \--* GE a,b +// \--* GT a,b // // ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04} // * JTRUE @@ -9350,7 +9350,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() // ------------ BB01 -> BB03 (cond), succs={BB03,BB04} // * JTRUE // \--* AND -// +--* LT a,b +// +--* LE a,b // \--* NE c,d // // ------------ BB03, preds={BB01} succs={BB04} @@ -9412,11 +9412,10 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() } Statement* s2 = m_b2->firstStmt(); - assert(m_testInfo1.testTree->gtOper == GT_JTRUE); - GenTree* cond1 = m_testInfo1.testTree->AsOp()->gtOp1; - - assert(m_testInfo2.testTree->gtOper == GT_JTRUE); - GenTree* cond2 = m_testInfo2.testTree->AsOp()->gtOp1; + assert(m_testInfo1.testTree->OperIs(GT_JTRUE)); + GenTree* cond1 = m_testInfo1.testTree->gtGetOp1(); + assert(m_testInfo2.testTree->OperIs(GT_JTRUE)); + GenTree* cond2 = m_testInfo2.testTree->gtGetOp1(); // Ensure both conditions are suitable. if (!cond1->OperIsCmpCompare()) From 31c8560fc7f960975610d9a763d56096e7e67c73 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 3 Feb 2023 12:18:13 +0000 Subject: [PATCH 06/31] Add CCMP_EQ/CCMP_NE nodes --- src/coreclr/jit/codegen.h | 2 +- src/coreclr/jit/codegenarm64.cpp | 92 ++++++++++++++++++++++++++---- src/coreclr/jit/codegenarmarch.cpp | 5 +- src/coreclr/jit/codegenlinear.cpp | 13 +++-- src/coreclr/jit/gentree.h | 17 +++++- src/coreclr/jit/gtlist.h | 3 +- src/coreclr/jit/ifconversion.cpp | 3 +- src/coreclr/jit/lower.cpp | 14 +---- src/coreclr/jit/lower.h | 4 +- src/coreclr/jit/lowerarmarch.cpp | 56 ++++++++---------- src/coreclr/jit/lsraarm64.cpp | 11 +--- src/coreclr/jit/lsrabuild.cpp | 8 +-- src/coreclr/jit/optimizer.cpp | 63 ++++++++++++++++---- 13 files changed, 199 insertions(+), 92 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index f30654cfb661c2..2464327f4f8e78 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -887,7 +887,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCodeForContainedCompareChain(GenTree* tree, bool* inchain, GenCondition* prevCond); #endif void genCodeForSelect(GenTreeOp* select); - void genCodeForAndFlags(GenTreeOp* select); + void genCodeForConditionalCompare(GenTreeOp* select); void genIntrinsic(GenTreeIntrinsic* treeNode); void genPutArgStk(GenTreePutArgStk* treeNode); void genPutArgReg(GenTreeOp* tree); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 37c0081373ee28..2eeca07bcc4bca 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -4664,7 +4664,7 @@ void CodeGen::genCodeForContainedCompareChain(GenTree* tree, bool* inChain, GenC { assert(tree->isContained()); - if (tree->OperIs(GT_AND)) + if (tree->OperIs(GT_AND) || tree->OperIsConditionalCompare()) { GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); @@ -4734,28 +4734,71 @@ void CodeGen::genCodeForSelect(GenTreeOp* tree) assert(genTypeSize(op1Type) == genTypeSize(op2Type)); GenCondition prevCond; - genConsumeRegs(opcond); if (opcond->isContained()) { // Generate the contained condition. if (opcond->OperIsCompare()) { + genConsumeRegs(opcond); genCodeForCompare(opcond->AsOp()); prevCond = GenCondition::FromRelop(opcond); } else { - // Condition is a compare chain. Try to contain it. - assert(opcond->OperIs(GT_AND)); + assert(opcond->OperIsConditionalCompare()); + + // Condition is a compare chain. Generate it. bool chain = false; JITDUMP("Generating compare chain:\n"); - genCodeForContainedCompareChain(opcond, &chain, &prevCond); - assert(chain); + + GenTree* op1 = opcond->gtGetOp1(); + GenTree* op2 = opcond->gtGetOp2(); + + genConsumeRegs(op1); + genConsumeRegs(op2); + + assert(op2->isContained()); + + // If Op1 is contained, generate into flags. + if (op1->isContained()) + { + genCodeForContainedCompareChain(op1, &chain, &prevCond); + assert(chain); + assert(op2->isContained()); + } + // If Op2 is contained, generate into flags. + if (op2->isContained()) + { + genCodeForContainedCompareChain(op2, &chain, &prevCond); + assert(chain); + } + // If nothing was contained, put the result of op2 into flags. + else + { + emitter* emit = GetEmitter(); + emit->emitIns_R_I(INS_cmp, emitActualTypeSize(op1), op2->GetRegNum(), 0); + prevCond = GenCondition::NE; + } + + if (opcond->OperIs(GT_CCMP_NE)) + { + prevCond = GenCondition::Reverse(prevCond); + } } + // else + // { + // // Condition is a compare chain. Try to contain it. + // assert(opcond->OperIs(GT_AND)); + // bool chain = false; + // JITDUMP("Generating compare chain:\n"); + // genCodeForContainedCompareChain(opcond, &chain, &prevCond); + // assert(chain); + // } } else { // Condition has been generated into a register - move it into flags. + genConsumeRegs(opcond); emit->emitIns_R_I(INS_cmp, emitActualTypeSize(opcond), opcond->GetRegNum(), 0); prevCond = GenCondition::NE; } @@ -4785,23 +4828,22 @@ void CodeGen::genCodeForSelect(GenTreeOp* tree) } //------------------------------------------------------------------------ -// genCodeForAndFlags: Generates code for ANDFLAGS statement. +// genCodeForConditionalCompare: Generates code for CCMP node. // // Arguments: // tree - the node // -void CodeGen::genCodeForAndFlags(GenTreeOp* tree) +void CodeGen::genCodeForConditionalCompare(GenTreeOp* tree) { var_types targetType = tree->TypeGet(); emitter* emit = GetEmitter(); - assert(tree->OperIs(GT_ANDFLAGS)); - assert(tree->GetRegNum() == REG_NA); + assert(tree->OperIsConditionalCompare()); GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); - assert (tree->isContainedCompareChainSegment(op2)); + assert(tree->isContainedCompareChainSegment(op2)); GenCondition cond; bool chain = false; @@ -4821,9 +4863,37 @@ void CodeGen::genCodeForAndFlags(GenTreeOp* tree) chain = true; } + // AHTODO: not sure this is always true + assert(op2->isContained()); + // Gen Op2 into flags. genCodeForContainedCompareChain(op2, &chain, &cond); assert(chain); + + // Are we evaluating this into a register? + regNumber targetReg = tree->GetRegNum(); + if (targetReg != REG_NA) + { + // AHTODO: merge this into helper function with genCodeForJumpTrue() + // Find the last contained compare in the chain. + GenCondition condition; + GenTreeOp* lastCompare = tree->gtGetOp2()->AsOp(); + assert(lastCompare->isContained()); + while (!lastCompare->OperIsCompare()) + { + assert(lastCompare->OperIs(GT_AND) || lastCompare->OperIsConditionalCompare()); + lastCompare = lastCompare->gtGetOp2()->AsOp(); + assert(lastCompare->isContained()); + } + condition = GenCondition::FromRelop(lastCompare); + if (tree->OperIs(GT_CCMP_NE)) + { + condition = GenCondition::Reverse(condition); + } + + inst_SETCC(condition, tree->TypeGet(), targetReg); + genProduceReg(tree); + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index d5c7813f1c40a1..881273c0010567 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -367,9 +367,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForSelect(treeNode->AsConditional()); break; - case GT_ANDFLAGS: + case GT_CCMP_EQ: + case GT_CCMP_NE: genConsumeOperands(treeNode->AsOp()); - genCodeForAndFlags(treeNode->AsOp()); + genCodeForConditionalCompare(treeNode->AsOp()); break; #endif diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index dcbcb93afa9376..8e30f3382e173f 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1610,10 +1610,10 @@ void CodeGen::genConsumeRegs(GenTree* tree) assert(cast->isContained()); genConsumeAddress(cast->CastOp()); } - else if (tree->OperIsCompare() || tree->OperIs(GT_AND)) + else if (tree->OperIsCompare() || tree->OperIs(GT_AND) || tree->OperIsConditionalCompare()) { // Compares can be contained by a SELECT. - // ANDs and Cmp Compares may be contained in a chain. + // Compares, ANDs and conditional compares may be contained in a chain. genConsumeRegs(tree->gtGetOp1()); genConsumeRegs(tree->gtGetOp2()); } @@ -2601,6 +2601,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) #if defined(TARGET_ARM64) if (relop->OperIs(GT_AND)) { + assert(false); // The condition was generated into a register. assert(relop->gtType != TYP_VOID); regNumber reg = relop->GetRegNum(); @@ -2609,7 +2610,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) GetEmitter()->emitIns_J_R(INS_cbnz, attr, compiler->compCurBB->bbJumpDest, reg); return; } - else if (relop->OperIs(GT_ANDFLAGS)) + else if (relop->OperIsConditionalCompare()) { // Find the last contained compare in the chain. assert(relop->gtType == TYP_VOID); @@ -2617,11 +2618,15 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) assert(lastCompare->isContained()); while (!lastCompare->OperIsCompare()) { - assert(lastCompare->OperIs(GT_AND)); + assert(lastCompare->OperIs(GT_AND) || lastCompare->OperIsConditionalCompare()); lastCompare = lastCompare->gtGetOp2()->AsOp(); assert(lastCompare->isContained()); } condition = GenCondition::FromRelop(lastCompare); + if (relop->OperIs(GT_CCMP_NE)) + { + condition = GenCondition::Reverse(condition); + } } else #endif diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 7002518f05a1cd..823fec8c425bab 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -914,8 +914,9 @@ struct GenTree // Node and its child in isolation form a contained compare chain. bool isContainedCompareChainSegment(GenTree* child) const { - return ((OperIs(GT_AND) || OperIs(GT_ANDFLAGS)) && child->isContained() && - (child->OperIs(GT_AND) || child->OperIsCmpCompare())); + // AHTODO: Not sure we need all of these? + return ((OperIs(GT_AND) || OperIsConditionalCompare()) && child->isContained() && + (child->OperIs(GT_AND) || child->OperIsCmpCompare() || child->OperIsConditionalCompare())); } #endif @@ -1093,7 +1094,7 @@ struct GenTree { // These are the only operators which can produce either VOID or non-VOID results. assert(OperIs(GT_NOP, GT_CALL, GT_COMMA, GT_AND) || OperIsCompare() || OperIsLong() || - OperIsHWIntrinsic() || IsCnsVec()); + OperIsHWIntrinsic() || IsCnsVec() || OperIsConditionalCompare()); return false; } @@ -1379,6 +1380,16 @@ struct GenTree return OperIsConditional(OperGet()); } + static bool OperIsConditionalCompare(genTreeOps gtOper) + { + return (GT_CCMP_EQ == gtOper || GT_CCMP_NE == gtOper); + } + + bool OperIsConditionalCompare() const + { + return OperIsConditionalCompare(OperGet()); + } + static bool OperIsCC(genTreeOps gtOper) { return (gtOper == GT_JCC) || (gtOper == GT_SETCC); diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index a2a7b5ee1795b2..190d99703c9a35 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -244,7 +244,8 @@ GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHI #endif // Sets the condition flags according to the combined results of its children. #if defined(TARGET_ARM64) -GTNODE(ANDFLAGS , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) +GTNODE(CCMP_EQ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) +GTNODE(CCMP_NE , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) #endif //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index 6a26c629841e82..b2d786bbe88567 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -558,11 +558,10 @@ bool OptIfConversionDsc::optIfConvert() } // Verify the test block ends with a condition that we can manipulate. - // Assuming that an AND connected to a JTRUE is always a valid chain. GenTree* last = m_startBlock->lastStmt()->GetRootNode(); noway_assert(last->OperIs(GT_JTRUE)); m_cond = last->gtGetOp1(); - if (!m_cond->OperIsCompare() && !m_cond->OperIs(GT_AND)) + if (!m_cond->OperIsCompare()) { return false; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index f257e7a1863ffd..86064f19191b8e 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2868,15 +2868,6 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon(); ssize_t op2Value = op2->IconValue(); -#ifdef TARGET_ARM64 - // Do not optimise further if op1 has a contained chain. - if (op1->OperIs(GT_AND) && - (op1->isContainedCompareChainSegment(op1->gtGetOp1()) || op1->isContainedCompareChainSegment(op1->gtGetOp2()))) - { - return cmp; - } -#endif - #ifdef TARGET_XARCH var_types op1Type = op1->TypeGet(); if (IsContainableMemoryOp(op1) && varTypeIsSmall(op1Type) && FitsIn(op1Type, op2Value)) @@ -7173,13 +7164,12 @@ void Lowering::ContainCheckJTrue(GenTreeOp* node) op1->gtFlags |= GTF_SET_FLAGS; } #if defined(TARGET_ARM64) - else if (op1->OperIs(GT_AND)) + else if (op1->OperIsConditionalCompare()) { - // If the second op of the AND is contained, then the AND does not need to be generated + // If the second op of the CCMP is contained, then the CCMP does not need to be generated // into a register. if (op1->gtGetOp2()->isContained()) { - op1->SetOper(GT_ANDFLAGS); op1->gtType = TYP_VOID; op1->gtFlags |= GTF_SET_FLAGS; } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 23aef6fbaea2ed..0200cceb002b12 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -88,8 +88,8 @@ class Lowering final : public Phase #ifdef TARGET_ARM64 bool IsValidCompareChain(GenTree* child, GenTree* parent); bool ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree** earliestValid); - void ContainCheckCompareChainForAnd(GenTree* tree); - void ContainCheckConditionalCompare(GenTreeOp* cmp); + bool ContainCheckCompareChainForAnd(GenTree* tree); + void ContainCheckChainedCompare(GenTreeOp* cmp); void ContainCheckNeg(GenTreeOp* neg); #endif void ContainCheckSelect(GenTreeOp* select); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index af91956c288677..81a13653429443 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2243,6 +2243,17 @@ void Lowering::ContainCheckCast(GenTreeCast* node) // void Lowering::ContainCheckCompare(GenTreeOp* cmp) { + if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE)) + { + if (ContainCheckCompareChainForAnd(cmp)) + { + // Turn the chain into a CCMP node + JITDUMP("Switching node to CCMP:\n"); + cmp->SetOper(cmp->OperIs(GT_TEST_EQ) ? GT_CCMP_NE : GT_CCMP_EQ); + DISPNODE(cmp); + } + } + CheckImmedAndMakeContained(cmp, cmp->gtOp2); } @@ -2268,14 +2279,12 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) // bool Lowering::IsValidCompareChain(GenTree* child, GenTree* parent) { - assert(parent->OperIs(GT_AND) || parent->OperIs(GT_SELECT)); - if (parent->isContainedCompareChainSegment(child)) { // Already have a chain. return true; } - else if (child->OperIs(GT_AND)) + else if (child->OperIs(GT_AND) || child->OperIsConditionalCompare()) { // Count both sides. return IsValidCompareChain(child->AsOp()->gtGetOp2(), child) && @@ -2307,8 +2316,6 @@ bool Lowering::IsValidCompareChain(GenTree* child, GenTree* parent) // bool Lowering::ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree** startOfChain) { - assert(parent->OperIs(GT_AND) || parent->OperIs(GT_SELECT)); - if (parent->isContainedCompareChainSegment(child)) { // Already have a contained chain. @@ -2317,7 +2324,7 @@ bool Lowering::ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree // Can the child be contained. else if (IsSafeToContainMem(parent, child)) { - if (child->OperIs(GT_AND)) + if (child->OperIs(GT_AND) || child->OperIsConditionalCompare()) { // If Op2 is not contained, then try to contain it. if (!child->isContainedCompareChainSegment(child->gtGetOp2())) @@ -2338,7 +2345,7 @@ bool Lowering::ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree // Ensure the children of the compare are contained correctly. child->gtGetOp2()->gtGetOp1()->ClearContained(); child->gtGetOp2()->gtGetOp2()->ClearContained(); - ContainCheckConditionalCompare(child->gtGetOp2()->AsOp()); + ContainCheckChainedCompare(child->gtGetOp2()->AsOp()); } } @@ -2360,7 +2367,7 @@ bool Lowering::ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree // Ensure the children of the compare are contained correctly. child->gtGetOp1()->gtGetOp1()->ClearContained(); child->gtGetOp1()->gtGetOp2()->ClearContained(); - ContainCheckConditionalCompare(child->gtGetOp1()->AsOp()); + ContainCheckChainedCompare(child->gtGetOp1()->AsOp()); } } @@ -2375,7 +2382,7 @@ bool Lowering::ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree // Ensure the children of the compare are contained correctly. child->AsOp()->gtGetOp1()->ClearContained(); child->AsOp()->gtGetOp2()->ClearContained(); - ContainCheckConditionalCompare(child->AsOp()); + ContainCheckChainedCompare(child->AsOp()); *startOfChain = child; return true; } @@ -2390,13 +2397,13 @@ bool Lowering::ContainCheckCompareChain(GenTree* child, GenTree* parent, GenTree // Arguments: // node - pointer to the node // -void Lowering::ContainCheckCompareChainForAnd(GenTree* tree) +bool Lowering::ContainCheckCompareChainForAnd(GenTree* tree) { - assert(tree->OperIs(GT_AND)); + assert(tree->OperIs(GT_AND) || tree->OperIs(GT_TEST_EQ) || tree->OperIs(GT_TEST_NE)); if (!comp->opts.OptimizationEnabled()) { - return; + return false; } // First check there is a valid chain. @@ -2422,16 +2429,18 @@ void Lowering::ContainCheckCompareChainForAnd(GenTree* tree) JITDUMP("Lowered `AND` chain:\n"); DISPTREE(tree); + return true; } + return false; } //------------------------------------------------------------------------ -// ContainCheckConditionalCompare: determine whether the source of a compare within a compare chain should be contained. +// ContainCheckChainedCompare: determine whether the source of a compare within a compare chain should be contained. // // Arguments: // node - pointer to the node // -void Lowering::ContainCheckConditionalCompare(GenTreeOp* cmp) +void Lowering::ContainCheckChainedCompare(GenTreeOp* cmp) { assert(cmp->OperIsCmpCompare()); GenTree* op2 = cmp->gtOp2; @@ -2469,29 +2478,14 @@ void Lowering::ContainCheckSelect(GenTreeOp* node) GenTree* op1 = node->gtOp1; GenTree* op2 = node->gtOp2; - if (cond->OperIsCompare()) + // All compare node types (including TEST_ and CCMP) are containable. + if (cond->OperIsCompare() || cond->OperIsConditionalCompare()) { - // All compare node types (including TEST_) are containable. if (IsSafeToContainMem(node, cond)) { cond->AsOp()->SetContained(); } } - else - { - // Check for a compare chain and try to contain it. - GenTree* startOfChain = nullptr; - ContainCheckCompareChain(cond, node, &startOfChain); - - if (startOfChain != nullptr) - { - // The earliest node in the chain will be generated as a standard compare. - assert(startOfChain->OperIsCmpCompare()); - startOfChain->AsOp()->gtGetOp1()->ClearContained(); - startOfChain->AsOp()->gtGetOp2()->ClearContained(); - ContainCheckCompare(startOfChain->AsOp()); - } - } if (op1->IsIntegralConst(0)) { diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index cc9ce33b7c2902..053baaefc2577a 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -246,7 +246,7 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; if ((op1->gtFlags & GTF_SET_FLAGS) == 0) { - assert(op1->OperIs(GT_AND)); + assert(op1->OperIsConditionalCompare()); srcCount = BuildOperandUses(op1); } assert(dstCount == 0); @@ -307,13 +307,6 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree); break; - case GT_ANDFLAGS: - srcCount = BuildBinaryUses(tree->AsOp()); - assert(dstCount == 0); - assert((tree->gtFlags & GTF_SET_FLAGS) != 0); - assert(tree->TypeIs(TYP_VOID)); - break; - case GT_BFIZ: assert(tree->gtGetOp1()->OperIs(GT_CAST)); srcCount = BuildOperandUses(tree->gtGetOp1()->gtGetOp1()); @@ -421,6 +414,8 @@ int LinearScan::BuildNode(GenTree* tree) case GT_TEST_EQ: case GT_TEST_NE: case GT_JCMP: + case GT_CCMP_EQ: + case GT_CCMP_NE: srcCount = BuildCmp(tree); break; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3908f1998792a9..fd5128dbdd08b3 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3225,11 +3225,11 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) } #endif // FEATURE_HW_INTRINSICS #ifdef TARGET_ARM64 - if (node->OperIs(GT_MUL) || node->OperIsCompare() || node->OperIs(GT_AND)) + if (node->OperIs(GT_MUL) || node->OperIsCompare() || node->OperIs(GT_AND) || node->OperIsConditionalCompare()) { // MUL can be contained for madd or msub on arm64. - // Compares can be contained by a SELECT. - // ANDs and Cmp Compares may be contained in a chain. + // Compares and ConditionalCompares can be contained by a SELECT. + // ANDs, Cmp Compares and ConditionalCompares may be contained in a chain. return BuildBinaryUses(node->AsOp(), candidates); } if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ)) @@ -4075,7 +4075,7 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // int LinearScan::BuildCmp(GenTree* tree) { - assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP)); + assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_JCMP) || tree->OperIsConditionalCompare()); regMaskTP dstCandidates = RBM_NONE; regMaskTP op1Candidates = RBM_NONE; regMaskTP op2Candidates = RBM_NONE; diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index a316cbd9553e1c..be4b8d07bc61cc 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9090,8 +9090,9 @@ class OptBoolsDsc private: Statement* optOptimizeBoolsChkBlkCond(); GenTree* optIsBoolComp(OptTestInfo* pOptTest); - bool optOptimizeBoolsChkTypeCostCond(); - void optOptimizeBoolsUpdateTrees(); + bool optOptimizeBoolsChkTypeCostCond(); + void optOptimizeBoolsUpdateTrees(); + inline bool ConditionIsTest(GenTree* condition, bool* isOptBool); }; //----------------------------------------------------------------------------- @@ -9317,6 +9318,26 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() return true; } +inline bool OptBoolsDsc::ConditionIsTest(GenTree* condition, bool* isOptBool) +{ + GenTree* condOp1 = condition->gtGetOp1(); + GenTree* condOp2 = condition->gtGetOp2(); + + if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst() && condOp2->AsIntCon()->IconValue() == 0 && + condOp1->OperIs(GT_AND)) + { + if (condOp1->gtGetOp1()->OperIsCompare() && condOp1->gtGetOp2()->OperIsCompare()) + { + // Found chained conditions previously optimized via optimize bools. + *isOptBool = true; + return false; + } + // Found a TEST_EQ or TEST_NE equivalent. + return true; + } + return false; +} + //----------------------------------------------------------------------------- // optOptimizeCompareChainCondBlock: Create AND chain when when both m_b1 and m_b2 are BBJ_COND. // @@ -9443,14 +9464,23 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() // Avoid cases where the compare will be optimized better later: // * cmp(and(x, y), 0) will be turned into a TEST_ opcode. // * Compares against zero will be optimized with cbz. - GenTree* cond1Op2 = cond1->gtGetOp2(); - GenTree* cond2Op2 = cond2->gtGetOp2(); - if ((cond1Op2->IsIntegralConst() && cond1Op2->AsIntCon()->IconValue() == 0) || - (cond2Op2->IsIntegralConst() && cond2Op2->AsIntCon()->IconValue() == 0)) + // Make sure to avoid matching previous optimize bool cases. + bool op1IsCondChain = false; + bool op2IsCondChain = false; + if (ConditionIsTest(cond1, &op1IsCondChain) || ConditionIsTest(cond2, &op2IsCondChain)) { return false; } + GenTree* newchain = nullptr; + + // If a previous optimize bools happened, then reuse the AND operand. + if (op2IsCondChain) + { + newchain = cond2; + cond2 = cond2->gtGetOp1(); + } + // Remove the first JTRUE statement. constexpr bool isUnlink = true; m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink)); @@ -9459,13 +9489,24 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() GenTree* revCond = m_comp->gtReverseCond(cond1); assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node. - // Create a chain. - GenTree* newchain = m_comp->gtNewOperNode(GT_AND, TYP_INT, cond1, cond2); - newchain->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); - newchain->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); + // AND the two conditions together + GenTree* andconds = m_comp->gtNewOperNode(GT_AND, TYP_INT, cond1, cond2); + andconds->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); + andconds->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); cond1->gtFlags &= ~GTF_RELOP_JMP_USED; cond2->gtFlags &= ~GTF_RELOP_JMP_USED; - newchain->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + andconds->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + + // Add a NE condition onto the front of the AND. + if (op2IsCondChain) + { + newchain->AsOp()->gtOp1 = andconds; + newchain->AsOp()->gtFlags |= (andconds->gtFlags & GTF_ALL_EFFECT); + } + else + { + newchain = m_comp->gtNewOperNode(GT_NE, TYP_INT, andconds, m_comp->gtNewZeroConNode(TYP_INT)); + } // Wire the chain into the second block m_testInfo2.testTree->AsOp()->gtOp1 = newchain; From 60341f3f0e556250a970833618bcd1cf80bb0895 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 6 Feb 2023 17:04:38 +0000 Subject: [PATCH 07/31] Various cleanups Change-Id: I1ce4e36c07b7706d804bbcf097b7c72dbf3d0ffe --- src/coreclr/jit/codegenarm64.cpp | 32 +++++++------------------------ src/coreclr/jit/codegenlinear.cpp | 13 +------------ src/coreclr/jit/gentree.h | 9 ++++++--- src/coreclr/jit/ifconversion.cpp | 14 ++++++++++---- src/coreclr/jit/lowerarmarch.cpp | 2 ++ src/coreclr/jit/lsraarm64.cpp | 13 +++---------- src/coreclr/jit/lsrabuild.cpp | 2 +- 7 files changed, 30 insertions(+), 55 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 2eeca07bcc4bca..a9386679fe2410 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -4757,43 +4757,25 @@ void CodeGen::genCodeForSelect(GenTreeOp* tree) genConsumeRegs(op1); genConsumeRegs(op2); - assert(op2->isContained()); - - // If Op1 is contained, generate into flags. + // If Op1 is contained, generate it into flags. if (op1->isContained()) { genCodeForContainedCompareChain(op1, &chain, &prevCond); assert(chain); assert(op2->isContained()); } - // If Op2 is contained, generate into flags. - if (op2->isContained()) - { - genCodeForContainedCompareChain(op2, &chain, &prevCond); - assert(chain); - } - // If nothing was contained, put the result of op2 into flags. - else - { - emitter* emit = GetEmitter(); - emit->emitIns_R_I(INS_cmp, emitActualTypeSize(op1), op2->GetRegNum(), 0); - prevCond = GenCondition::NE; - } + // Generate op2 into flags. + assert(op2->isContained()); + genCodeForContainedCompareChain(op2, &chain, &prevCond); + assert(chain); + + // Reverse condition for NE. if (opcond->OperIs(GT_CCMP_NE)) { prevCond = GenCondition::Reverse(prevCond); } } - // else - // { - // // Condition is a compare chain. Try to contain it. - // assert(opcond->OperIs(GT_AND)); - // bool chain = false; - // JITDUMP("Generating compare chain:\n"); - // genCodeForContainedCompareChain(opcond, &chain, &prevCond); - // assert(chain); - // } } else { diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 8e30f3382e173f..be0801a536a2cc 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2599,18 +2599,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) assert(!relop->isContained()); #if defined(TARGET_ARM64) - if (relop->OperIs(GT_AND)) - { - assert(false); - // The condition was generated into a register. - assert(relop->gtType != TYP_VOID); - regNumber reg = relop->GetRegNum(); - assert(reg != REG_NA); - emitAttr attr = emitActualTypeSize(relop->TypeGet()); - GetEmitter()->emitIns_J_R(INS_cbnz, attr, compiler->compCurBB->bbJumpDest, reg); - return; - } - else if (relop->OperIsConditionalCompare()) + if (relop->OperIsConditionalCompare()) { // Find the last contained compare in the chain. assert(relop->gtType == TYP_VOID); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 823fec8c425bab..9318e5efe3c8ce 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -914,7 +914,6 @@ struct GenTree // Node and its child in isolation form a contained compare chain. bool isContainedCompareChainSegment(GenTree* child) const { - // AHTODO: Not sure we need all of these? return ((OperIs(GT_AND) || OperIsConditionalCompare()) && child->isContained() && (child->OperIs(GT_AND) || child->OperIsCmpCompare() || child->OperIsConditionalCompare())); } @@ -1093,8 +1092,8 @@ struct GenTree if (gtType == TYP_VOID) { // These are the only operators which can produce either VOID or non-VOID results. - assert(OperIs(GT_NOP, GT_CALL, GT_COMMA, GT_AND) || OperIsCompare() || OperIsLong() || - OperIsHWIntrinsic() || IsCnsVec() || OperIsConditionalCompare()); + assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsHWIntrinsic() || + IsCnsVec() || OperIsConditionalCompare()); return false; } @@ -1382,7 +1381,11 @@ struct GenTree static bool OperIsConditionalCompare(genTreeOps gtOper) { +#if defined(TARGET_ARM64) return (GT_CCMP_EQ == gtOper || GT_CCMP_NE == gtOper); +#else + return false; +#endif } bool OperIsConditionalCompare() const diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index b2d786bbe88567..292d915384ae47 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -668,16 +668,22 @@ bool OptIfConversionDsc::optIfConvert() { if (m_doElseConversion) { - selectTrueInput = m_elseOperation.node->gtGetOp2(); + selectTrueInput = m_elseOperation.node->gtGetOp2(); + selectFalseInput = m_thenOperation.node->gtGetOp2(); } else { + // Invert the condition (to help matching condition codes back to CIL). + GenTree* revCond = m_comp->gtReverseCond(m_cond); + assert(m_cond == revCond); // Ensure `gtReverseCond` did not create a new node. + // Duplicate the destination of the Then assignment. assert(m_thenOperation.node->gtGetOp1()->IsLocal()); - selectTrueInput = m_comp->gtCloneExpr(m_thenOperation.node->gtGetOp1()); - selectTrueInput->gtFlags &= GTF_EMPTY; + selectFalseInput = m_comp->gtCloneExpr(m_thenOperation.node->gtGetOp1()); + selectFalseInput->gtFlags &= GTF_EMPTY; + + selectTrueInput = m_thenOperation.node->gtGetOp2(); } - selectFalseInput = m_thenOperation.node->gtGetOp2(); // Pick the type as the type of the local, which should always be compatible even for implicit coercions. selectType = genActualType(m_thenOperation.node->gtGetOp1()); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 81a13653429443..c77e603c327d41 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2243,6 +2243,7 @@ void Lowering::ContainCheckCast(GenTreeCast* node) // void Lowering::ContainCheckCompare(GenTreeOp* cmp) { +#if defined(TARGET_ARM64) if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE)) { if (ContainCheckCompareChainForAnd(cmp)) @@ -2253,6 +2254,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) DISPNODE(cmp); } } +#endif CheckImmedAndMakeContained(cmp, cmp->gtOp2); } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 053baaefc2577a..d2a2bf533c96b7 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -241,17 +241,10 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_JTRUE: - { - GenTree* op1 = tree->gtGetOp1(); - srcCount = 0; - if ((op1->gtFlags & GTF_SET_FLAGS) == 0) - { - assert(op1->OperIsConditionalCompare()); - srcCount = BuildOperandUses(op1); - } + srcCount = 0; + assert((tree->gtGetOp1()->gtFlags & GTF_SET_FLAGS) != 0); assert(dstCount == 0); - } - break; + break; case GT_JMP: srcCount = 0; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index fd5128dbdd08b3..83e5f4891cc7c9 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3225,7 +3225,7 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) } #endif // FEATURE_HW_INTRINSICS #ifdef TARGET_ARM64 - if (node->OperIs(GT_MUL) || node->OperIsCompare() || node->OperIs(GT_AND) || node->OperIsConditionalCompare()) + if (node->OperIs(GT_MUL, GT_AND) || node->OperIsCompare() || node->OperIsConditionalCompare()) { // MUL can be contained for madd or msub on arm64. // Compares and ConditionalCompares can be contained by a SELECT. From 177c92aa51751832658699f3c2262e59bfd39839 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 6 Feb 2023 17:43:32 +0000 Subject: [PATCH 08/31] remove fgopt changes --- src/coreclr/jit/compiler.h | 6 ------ src/coreclr/jit/fgopt.cpp | 19 ------------------- src/coreclr/jit/gentree.cpp | 10 ++++++++++ 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 472b439278b470..1299b34bb431e8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2870,12 +2870,6 @@ class Compiler // before they have been set.) bool gtComplexityExceeds(GenTree* tree, unsigned limit); - // Can the test condition be reversed without creating a new node. - bool gtCanReverseCondSimple(GenTree* tree) - { - return (tree->OperIsCompare() || tree->OperIs(GT_JCC, GT_SETCC) || tree->OperIs(GT_JCMP)); - } - GenTree* gtReverseCond(GenTree* tree); static bool gtHasRef(GenTree* tree, unsigned lclNum); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 7c50338e853d8a..f26a27ce30b0fb 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5172,18 +5172,6 @@ bool Compiler::fgReorderBlocks(bool useProfile) } } - // Cannot handle cases where reversing creates a new node. - if (bPrev->bbJumpKind == BBJ_COND) - { - Statement* const condTestStmt = bPrev->lastStmt(); - GenTree* const condTest = condTestStmt->GetRootNode(); - noway_assert(condTest->gtOper == GT_JTRUE); - if (!gtCanReverseCondSimple(condTest->gtGetOp1())) - { - reorderBlock = false; - } - } - if (reorderBlock == false) { // @@ -6175,13 +6163,6 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication, bool isPhase) } } - // Cannot handle cases where reversing creates a new node. - GenTree* last = block->lastNode(); - if (last->OperGet() == GT_JTRUE && !gtCanReverseCondSimple(last->gtGetOp1())) - { - optimizeJump = false; - } - if (optimizeJump && isJumpToJoinFree) { // In the join free case, we also need to move bDest right after bNext diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 15479d99ef4a41..605078b4e31347 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3454,6 +3454,16 @@ GenTree* Compiler::gtReverseCond(GenTree* tree) // tbz <=> tbnz tree->gtFlags ^= GTF_JCMP_EQ; } +#if defined(TARGET_ARM64) + else if (tree->OperIs(GT_CCMP_EQ)) + { + tree->SetOper(GT_CCMP_NE); + } + else if (tree->OperIs(GT_CCMP_NE)) + { + tree->SetOper(GT_CCMP_EQ); + } +#endif else { tree = gtNewOperNode(GT_NOT, TYP_INT, tree); From 62dabf4815b869cd81333afb1a9365c5ffc8b7ab Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 7 Feb 2023 15:18:04 +0000 Subject: [PATCH 09/31] restore lsraarm64 GT_AND change Change-Id: I1f42be7533c76ba4698f212d187e20996d6a43e1 --- src/coreclr/jit/lsraarm64.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index d2a2bf533c96b7..c17c900c7d579b 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -288,6 +288,7 @@ int LinearScan::BuildNode(GenTree* tree) } FALLTHROUGH; + case GT_AND: case GT_AND_NOT: case GT_OR: case GT_XOR: From ca38951807f8e0a0d5e6bb4426698f4f56536163 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 14 Feb 2023 12:26:58 +0000 Subject: [PATCH 10/31] Make GT_CCMP_ into conditional nodes Change-Id: I09cdae25f7db5872011ef6720b787f51eb20153d --- src/coreclr/jit/codegen.h | 4 +- src/coreclr/jit/codegenarm64.cpp | 179 ++++++++++++----------------- src/coreclr/jit/codegenarmarch.cpp | 7 +- src/coreclr/jit/codegenlinear.cpp | 42 +++---- src/coreclr/jit/compiler.hpp | 8 ++ src/coreclr/jit/gentree.cpp | 39 ++++++- src/coreclr/jit/gentree.h | 38 +++++- src/coreclr/jit/gtlist.h | 14 ++- src/coreclr/jit/gtstructs.h | 2 +- src/coreclr/jit/ifconversion.cpp | 14 +-- src/coreclr/jit/lower.cpp | 107 ++++++++++++----- src/coreclr/jit/lower.h | 1 + src/coreclr/jit/lowerarmarch.cpp | 39 ++++--- src/coreclr/jit/lsra.h | 1 + src/coreclr/jit/lsraarm64.cpp | 10 +- src/coreclr/jit/lsraarmarch.cpp | 21 ++-- src/coreclr/jit/lsrabuild.cpp | 33 +++++- 17 files changed, 340 insertions(+), 219 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 2464327f4f8e78..8d6fe156423a0c 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -883,11 +883,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCkfinite(GenTree* treeNode); void genCodeForCompare(GenTreeOp* tree); #ifdef TARGET_ARM64 - void genCodeForConditionalCompare(GenTreeOp* tree, GenCondition prevCond); + void genCodeForChainedCompare(GenTreeOp* tree, GenCondition prevCond); void genCodeForContainedCompareChain(GenTree* tree, bool* inchain, GenCondition* prevCond); + void genCodeForConditionalCompare(GenTreeConditional* tree); #endif void genCodeForSelect(GenTreeOp* select); - void genCodeForConditionalCompare(GenTreeOp* select); void genIntrinsic(GenTreeIntrinsic* treeNode); void genPutArgStk(GenTreePutArgStk* treeNode); void genPutArgReg(GenTreeOp* tree); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index a9386679fe2410..ae50f0c8a4f510 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -4539,6 +4539,8 @@ void CodeGen::genCkfinite(GenTree* treeNode) // Arguments: // tree - the node // +// Assumptions: The registers for tree have already been consumed. +// void CodeGen::genCodeForCompare(GenTreeOp* tree) { regNumber targetReg = tree->GetRegNum(); @@ -4600,13 +4602,13 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) } //------------------------------------------------------------------------ -// genCodeForConditionalCompare: Produce code for a compare that's dependent on a previous compare. +// genCodeForChainedCompare: Produce code for a compare that's dependent on a previous compare. // // Arguments: -// tree - a compare node (GT_EQ etc) +// tree - a conditional compare node (GT_EQ etc) // cond - the condition of the previous generated compare. // -void CodeGen::genCodeForConditionalCompare(GenTreeOp* tree, GenCondition prevCond) +void CodeGen::genCodeForChainedCompare(GenTreeOp* tree, GenCondition prevCond) { emitter* emit = GetEmitter(); @@ -4703,7 +4705,7 @@ void CodeGen::genCodeForContainedCompareChain(GenTree* tree, bool* inChain, GenC { // Within the chain. Use a conditional compare (which is // dependent on the previous emitted compare). - genCodeForConditionalCompare(tree->AsOp(), *prevCond); + genCodeForChainedCompare(tree->AsOp(), *prevCond); } *inChain = true; @@ -4711,6 +4713,73 @@ void CodeGen::genCodeForContainedCompareChain(GenTree* tree, bool* inChain, GenC } } +//------------------------------------------------------------------------ +// genCodeForConditionalCompare: Produce code for a conditional compare. +// +// Arguments: +// tree - the node +// +// +void CodeGen::genCodeForConditionalCompare(GenTreeConditional* tree) +{ + assert(tree->OperIsConditionalCompare()); + emitter* emit = GetEmitter(); + + GenTree* opcond = tree->gtCond; + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + var_types op1Type = genActualType(op1->TypeGet()); + var_types op2Type = genActualType(op2->TypeGet()); + emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); + regNumber targetReg = tree->GetRegNum(); + + // No float support or swapping op1 and op2 to generate cmp reg, imm. + assert(!varTypeIsFloating(op2Type)); + assert(!op1->isContainedIntOrIImmed()); + + // ConditionalCompare relies on flags, therefore the condition must be contained. + assert(opcond->isContained()); + + if (opcond->OperIsCompare()) + { + genConsumeRegs(opcond); + genCodeForCompare(opcond->AsOp()); + } + else + { + assert(opcond->OperIsConditionalCompare()); + genCodeForConditionalCompare(opcond->AsConditional()); + } + + // For the ccmp flags, invert the condition of the compare. + insCflags cflags = InsCflagsForCcmp(GenCondition::FromRelop(tree)); + + // For the condition, use the opcond. + GenCondition prevCond = GenCondition::FromRelop(opcond); + const GenConditionDesc& prevDesc = GenConditionDesc::Get(prevCond); + insCond prevInsCond = JumpKindToInsCond(prevDesc.jumpKind1); + + regNumber srcReg1 = genConsumeReg(op1); + + if (op2->isContainedIntOrIImmed()) + { + GenTreeIntConCommon* intConst = op2->AsIntConCommon(); + emit->emitIns_R_I_FLAGS_COND(INS_ccmp, cmpSize, srcReg1, (int)intConst->IconValue(), cflags, prevInsCond); + } + else + { + regNumber srcReg2 = genConsumeReg(op2); + emit->emitIns_R_R_FLAGS_COND(INS_ccmp, cmpSize, srcReg1, srcReg2, cflags, prevInsCond); + } + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + inst_SETCC(GenCondition::FromRelop(tree), tree->TypeGet(), targetReg); + genProduceReg(tree); + } +} + //------------------------------------------------------------------------ // genCodeForSelect: Produce code for a GT_SELECT node. // @@ -4734,6 +4803,7 @@ void CodeGen::genCodeForSelect(GenTreeOp* tree) assert(genTypeSize(op1Type) == genTypeSize(op2Type)); GenCondition prevCond; + if (opcond->isContained()) { // Generate the contained condition. @@ -4741,41 +4811,13 @@ void CodeGen::genCodeForSelect(GenTreeOp* tree) { genConsumeRegs(opcond); genCodeForCompare(opcond->AsOp()); - prevCond = GenCondition::FromRelop(opcond); } else { assert(opcond->OperIsConditionalCompare()); - - // Condition is a compare chain. Generate it. - bool chain = false; - JITDUMP("Generating compare chain:\n"); - - GenTree* op1 = opcond->gtGetOp1(); - GenTree* op2 = opcond->gtGetOp2(); - - genConsumeRegs(op1); - genConsumeRegs(op2); - - // If Op1 is contained, generate it into flags. - if (op1->isContained()) - { - genCodeForContainedCompareChain(op1, &chain, &prevCond); - assert(chain); - assert(op2->isContained()); - } - - // Generate op2 into flags. - assert(op2->isContained()); - genCodeForContainedCompareChain(op2, &chain, &prevCond); - assert(chain); - - // Reverse condition for NE. - if (opcond->OperIs(GT_CCMP_NE)) - { - prevCond = GenCondition::Reverse(prevCond); - } + genCodeForConditionalCompare(opcond->AsConditional()); } + prevCond = GenCondition::FromRelop(opcond); } else { @@ -4809,75 +4851,6 @@ void CodeGen::genCodeForSelect(GenTreeOp* tree) genProduceReg(tree); } -//------------------------------------------------------------------------ -// genCodeForConditionalCompare: Generates code for CCMP node. -// -// Arguments: -// tree - the node -// -void CodeGen::genCodeForConditionalCompare(GenTreeOp* tree) -{ - var_types targetType = tree->TypeGet(); - emitter* emit = GetEmitter(); - - assert(tree->OperIsConditionalCompare()); - - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); - - assert(tree->isContainedCompareChainSegment(op2)); - - GenCondition cond; - bool chain = false; - - JITDUMP("Generating compare chain:\n"); - if (op1->isContained()) - { - // Generate Op1 into flags. - genCodeForContainedCompareChain(op1, &chain, &cond); - assert(chain); - } - else - { - // Op1 is not contained, move it from a register into flags. - emit->emitIns_R_I(INS_cmp, emitActualTypeSize(op1), op1->GetRegNum(), 0); - cond = GenCondition::NE; - chain = true; - } - - // AHTODO: not sure this is always true - assert(op2->isContained()); - - // Gen Op2 into flags. - genCodeForContainedCompareChain(op2, &chain, &cond); - assert(chain); - - // Are we evaluating this into a register? - regNumber targetReg = tree->GetRegNum(); - if (targetReg != REG_NA) - { - // AHTODO: merge this into helper function with genCodeForJumpTrue() - // Find the last contained compare in the chain. - GenCondition condition; - GenTreeOp* lastCompare = tree->gtGetOp2()->AsOp(); - assert(lastCompare->isContained()); - while (!lastCompare->OperIsCompare()) - { - assert(lastCompare->OperIs(GT_AND) || lastCompare->OperIsConditionalCompare()); - lastCompare = lastCompare->gtGetOp2()->AsOp(); - assert(lastCompare->isContained()); - } - condition = GenCondition::FromRelop(lastCompare); - if (tree->OperIs(GT_CCMP_NE)) - { - condition = GenCondition::Reverse(condition); - } - - inst_SETCC(condition, tree->TypeGet(), targetReg); - genProduceReg(tree); - } -} - //------------------------------------------------------------------------ // genCodeForJumpCompare: Generates code for jmpCompare statement. // diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 881273c0010567..504c5b07188cec 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -369,8 +369,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_CCMP_EQ: case GT_CCMP_NE: - genConsumeOperands(treeNode->AsOp()); - genCodeForConditionalCompare(treeNode->AsOp()); + case GT_CCMP_LT: + case GT_CCMP_LE: + case GT_CCMP_GE: + case GT_CCMP_GT: + genCodeForConditionalCompare(treeNode->AsConditional()); break; #endif diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index be0801a536a2cc..839c45d3562ed3 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1610,13 +1610,21 @@ void CodeGen::genConsumeRegs(GenTree* tree) assert(cast->isContained()); genConsumeAddress(cast->CastOp()); } - else if (tree->OperIsCompare() || tree->OperIs(GT_AND) || tree->OperIsConditionalCompare()) + else if (tree->OperIsCompare() || tree->OperIs(GT_AND)) { // Compares can be contained by a SELECT. - // Compares, ANDs and conditional compares may be contained in a chain. + // Compares and ANDs may be contained in a chain. genConsumeRegs(tree->gtGetOp1()); genConsumeRegs(tree->gtGetOp2()); } + else if (tree->OperIsConditionalCompare()) + { + assert(false); + // Conditional compares may be contained. + genConsumeRegs(tree->AsConditional()->gtCond); + genConsumeRegs(tree->AsConditional()->gtOp1); + genConsumeRegs(tree->AsConditional()->gtOp2); + } #endif else if (tree->OperIsLocalRead()) { @@ -2592,37 +2600,13 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) assert(compiler->compCurBB->bbJumpKind == BBJ_COND); assert(jtrue->OperIs(GT_JTRUE)); - GenTreeOp* relop = jtrue->gtGetOp1()->AsOp(); - GenCondition condition; + GenTree* relop = jtrue->gtGetOp1(); // Operands should never be contained inside a jtrue. assert(!relop->isContained()); -#if defined(TARGET_ARM64) - if (relop->OperIsConditionalCompare()) - { - // Find the last contained compare in the chain. - assert(relop->gtType == TYP_VOID); - GenTreeOp* lastCompare = relop->gtGetOp2()->AsOp(); - assert(lastCompare->isContained()); - while (!lastCompare->OperIsCompare()) - { - assert(lastCompare->OperIs(GT_AND) || lastCompare->OperIsConditionalCompare()); - lastCompare = lastCompare->gtGetOp2()->AsOp(); - assert(lastCompare->isContained()); - } - condition = GenCondition::FromRelop(lastCompare); - if (relop->OperIs(GT_CCMP_NE)) - { - condition = GenCondition::Reverse(condition); - } - } - else -#endif - { - assert(relop->OperIsCompare()); - condition = GenCondition::FromRelop(relop); - } + assert(relop->OperIsCompare() || relop->OperIsConditionalCompare()); + GenCondition condition = GenCondition::FromRelop(relop); if (condition.PreferSwap()) { diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 41c1ae5ebefbbc..d951961a4d5fe0 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4271,6 +4271,14 @@ void GenTree::VisitOperands(TVisitor visitor) } case GT_SELECT: +#if defined(TARGET_ARM64) + case GT_CCMP_EQ: + case GT_CCMP_NE: + case GT_CCMP_LT: + case GT_CCMP_LE: + case GT_CCMP_GE: + case GT_CCMP_GT: +#endif { GenTreeConditional* const cond = this->AsConditional(); if (visitor(cond->gtCond) == VisitResult::Abort) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 605078b4e31347..a6e6c74e2d9002 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3455,13 +3455,16 @@ GenTree* Compiler::gtReverseCond(GenTree* tree) tree->gtFlags ^= GTF_JCMP_EQ; } #if defined(TARGET_ARM64) - else if (tree->OperIs(GT_CCMP_EQ)) + else if (tree->OperIsConditionalCompare()) { - tree->SetOper(GT_CCMP_NE); - } - else if (tree->OperIs(GT_CCMP_NE)) - { - tree->SetOper(GT_CCMP_EQ); + genTreeOps cmpOper = GenTree::OperCovertConditionalCompareToCompare(tree->OperGet()); + genTreeOps revOper = GenTree::ReverseRelop(cmpOper); + tree->SetOper(GenTree::OperCovertCompareToConditionalCompare(revOper)); + + if (varTypeIsFloating(tree->AsConditional()->gtOp1->TypeGet())) + { + tree->gtFlags ^= GTF_RELOP_NAN_UN; + } } #endif else @@ -6376,6 +6379,14 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) } case GT_SELECT: +#if defined(TARGET_ARM64) + case GT_CCMP_EQ: + case GT_CCMP_NE: + case GT_CCMP_LT: + case GT_CCMP_LE: + case GT_CCMP_GE: + case GT_CCMP_GT: +#endif { GenTreeConditional* const conditional = this->AsConditional(); if (operand == conditional->gtCond) @@ -9649,6 +9660,14 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) return; case GT_SELECT: +#if defined(TARGET_ARM64) + case GT_CCMP_EQ: + case GT_CCMP_NE: + case GT_CCMP_LT: + case GT_CCMP_LE: + case GT_CCMP_GE: + case GT_CCMP_GT: +#endif m_edge = &m_node->AsConditional()->gtCond; assert(*m_edge != nullptr); m_advance = &GenTreeUseEdgeIterator::AdvanceConditional; @@ -12336,6 +12355,14 @@ void Compiler::gtDispTree(GenTree* tree, break; case GT_SELECT: +#if defined(TARGET_ARM64) + case GT_CCMP_EQ: + case GT_CCMP_NE: + case GT_CCMP_LT: + case GT_CCMP_LE: + case GT_CCMP_GE: + case GT_CCMP_GT: +#endif gtDispCommonEndLine(tree); if (!topOnly) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 9318e5efe3c8ce..3995a4d6a0ab81 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1371,7 +1371,13 @@ struct GenTree static bool OperIsConditional(genTreeOps gtOper) { +#if defined(TARGET_ARM64) + static_assert_no_msg( + AreContiguous(GT_SELECT, GT_CCMP_EQ, GT_CCMP_NE, GT_CCMP_LT, GT_CCMP_LE, GT_CCMP_GE, GT_CCMP_GT)); + return (GT_SELECT <= gtOper) && (gtOper <= GT_CCMP_GT); +#else return (GT_SELECT == gtOper); +#endif } bool OperIsConditional() const @@ -1382,7 +1388,8 @@ struct GenTree static bool OperIsConditionalCompare(genTreeOps gtOper) { #if defined(TARGET_ARM64) - return (GT_CCMP_EQ == gtOper || GT_CCMP_NE == gtOper); + static_assert_no_msg(AreContiguous(GT_CCMP_EQ, GT_CCMP_NE, GT_CCMP_LT, GT_CCMP_LE, GT_CCMP_GE, GT_CCMP_GT)); + return (GT_CCMP_EQ <= gtOper) && (gtOper <= GT_CCMP_GT); #else return false; #endif @@ -1393,6 +1400,20 @@ struct GenTree return OperIsConditionalCompare(OperGet()); } +#if defined(TARGET_ARM64) + static genTreeOps OperCovertCompareToConditionalCompare(genTreeOps oper) + { + assert(OperIsCmpCompare(oper)); + return (genTreeOps)(oper - GT_EQ + GT_CCMP_EQ); + } + + static genTreeOps OperCovertConditionalCompareToCompare(genTreeOps oper) + { + assert(OperIsConditionalCompare(oper)); + return (genTreeOps)(oper - GT_CCMP_EQ + GT_EQ); + } +#endif + static bool OperIsCC(genTreeOps gtOper) { return (gtOper == GT_JCC) || (gtOper == GT_SETCC); @@ -8409,9 +8430,13 @@ struct GenCondition static GenCondition FromRelop(GenTree* relop) { - assert(relop->OperIsCompare()); + assert(relop->OperIsCompare() || relop->OperIsConditionalCompare()); - if (varTypeIsFloating(relop->gtGetOp1())) + if (relop->OperIsConditionalCompare()) + { + return FromConditionalRelop(relop); + } + else if (varTypeIsFloating(relop->gtGetOp1())) { return FromFloatRelop(relop); } @@ -8473,6 +8498,13 @@ struct GenCondition return GenCondition(static_cast(code)); } + static GenCondition FromConditionalRelop(GenTree* relop) + { + assert(relop->OperIsConditionalCompare()); + assert(!varTypeIsFloating(relop->AsConditional()->gtOp1) && !varTypeIsFloating(relop->AsConditional()->gtOp2)); + return FromIntegralRelop((genTreeOps)(relop->OperGet() - GT_CCMP_EQ + GT_EQ), relop->IsUnsigned()); + } + static GenCondition Reverse(GenCondition condition) { // clang-format off diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 190d99703c9a35..cdf98e41d25c94 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -149,6 +149,15 @@ GTNODE(TEST_NE , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Conditional select with 3 operands: condition, true value, false value GTNODE(SELECT , GenTreeConditional ,0,GTK_SPECIAL) +// Sets the condition flags according to the combined results of its children. +#if defined(TARGET_ARM64) +GTNODE(CCMP_EQ , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) +GTNODE(CCMP_NE , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) +GTNODE(CCMP_LT , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) +GTNODE(CCMP_LE , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) +GTNODE(CCMP_GE , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) +GTNODE(CCMP_GT , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) +#endif GTNODE(COMMA , GenTreeOp ,0,GTK_BINOP|DBK_NOTLIR) GTNODE(QMARK , GenTreeQmark ,0,GTK_BINOP|GTK_EXOP|DBK_NOTLIR) @@ -242,11 +251,6 @@ GTNODE(SETCC , GenTreeCC ,0,GTK_LEAF|DBK_NOTHIR) // The XARCH BT instruction. Like CMP, this sets the condition flags (CF to be precise) and does not produce a value. GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)) #endif -// Sets the condition flags according to the combined results of its children. -#if defined(TARGET_ARM64) -GTNODE(CCMP_EQ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) -GTNODE(CCMP_NE , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) -#endif //----------------------------------------------------------------------------- // Other nodes that look like unary/binary operators: diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index 0321f51a95c00b..b4e75f911560c4 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -97,7 +97,7 @@ GTSTRUCT_1(PhiArg , GT_PHI_ARG) GTSTRUCT_1(Phi , GT_PHI) GTSTRUCT_1(StoreInd , GT_STOREIND) GTSTRUCT_N(Indir , GT_STOREIND, GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_OBJ, GT_STORE_OBJ, GT_STORE_DYN_BLK) -GTSTRUCT_N(Conditional , GT_SELECT) +GTSTRUCT_N(Conditional , GT_SELECT, GT_CCMP_EQ, GT_CCMP_NE, GT_CCMP_LT, GT_CCMP_LE, GT_CCMP_GE, GT_CCMP_GT) #if FEATURE_ARG_SPLIT GTSTRUCT_2_SPECIAL(PutArgStk, GT_PUTARG_STK, GT_PUTARG_SPLIT) GTSTRUCT_1(PutArgSplit , GT_PUTARG_SPLIT) diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index 292d915384ae47..b2d786bbe88567 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -668,22 +668,16 @@ bool OptIfConversionDsc::optIfConvert() { if (m_doElseConversion) { - selectTrueInput = m_elseOperation.node->gtGetOp2(); - selectFalseInput = m_thenOperation.node->gtGetOp2(); + selectTrueInput = m_elseOperation.node->gtGetOp2(); } else { - // Invert the condition (to help matching condition codes back to CIL). - GenTree* revCond = m_comp->gtReverseCond(m_cond); - assert(m_cond == revCond); // Ensure `gtReverseCond` did not create a new node. - // Duplicate the destination of the Then assignment. assert(m_thenOperation.node->gtGetOp1()->IsLocal()); - selectFalseInput = m_comp->gtCloneExpr(m_thenOperation.node->gtGetOp1()); - selectFalseInput->gtFlags &= GTF_EMPTY; - - selectTrueInput = m_thenOperation.node->gtGetOp2(); + selectTrueInput = m_comp->gtCloneExpr(m_thenOperation.node->gtGetOp1()); + selectTrueInput->gtFlags &= GTF_EMPTY; } + selectFalseInput = m_thenOperation.node->gtGetOp2(); // Pick the type as the type of the local, which should always be compatible even for implicit coercions. selectType = genActualType(m_thenOperation.node->gtGetOp1()); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 86064f19191b8e..859a0e7edb4499 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2868,6 +2868,16 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon(); ssize_t op2Value = op2->IconValue(); +// #ifdef TARGET_ARM64 +// // Do not optimise further if op1 has a contained chain. +// if (op1->OperIs(GT_AND) && +// (op1->isContainedCompareChainSegment(op1->gtGetOp1()) || +// op1->isContainedCompareChainSegment(op1->gtGetOp2()))) +// { +// return cmp; +// } +// #endif + #ifdef TARGET_XARCH var_types op1Type = op1->TypeGet(); if (IsContainableMemoryOp(op1) && varTypeIsSmall(op1Type) && FitsIn(op1Type, op2Value)) @@ -2937,6 +2947,8 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) cmp->AsOp()->gtOp1 = castOp; BlockRange().Remove(cast); + JITDUMP("Removed cast\n"); + DISPTREERANGE(BlockRange(), cmp); } } } @@ -2989,6 +3001,8 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) BlockRange().Remove(cmp->gtGetOp2()); BlockRange().Remove(cmp); + JITDUMP("Removed cast\n"); + DISPTREERANGE(BlockRange(), cmp); return next; } } @@ -3038,6 +3052,8 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) } } #endif + JITDUMP("Bashed compare to test:\n"); + DISPTREERANGE(BlockRange(), cmp); } } @@ -3206,6 +3222,48 @@ GenTree* Lowering::LowerCompare(GenTree* cmp) } } #endif // TARGET_XARCH + +#if defined(TARGET_ARM64) + // Detect TEST(CMP1, CMP2) and replace with CCMP2(CMP1). + if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE)) + { + GenTree* op1 = cmp->AsOp()->gtGetOp1(); + GenTree* op2 = cmp->AsOp()->gtGetOp2(); + + if (op1->OperIsCmpCompare() && op2->OperIsCmpCompare()) + { + // Get the equivalant CCMP oper. + genTreeOps cmpOper = op2->gtOper; + if (cmp->OperIs(GT_TEST_EQ)) + { + cmpOper = GenTree::ReverseRelop(cmpOper); + } + genTreeOps ccmpOper = GenTree::OperCovertCompareToConditionalCompare(cmpOper); + + // Create a ccmp node, insert it and update the use. + GenTreeConditional* ccmp = comp->gtNewConditionalNode(ccmpOper, op1, op2->AsOp()->gtGetOp1(), + op2->AsOp()->gtGetOp2(), op2->gtType); + BlockRange().InsertAfter(op2, ccmp); + LIR::Use useOfCmp; + bool gotUse = BlockRange().TryGetUse(cmp, &useOfCmp); + assert(gotUse); + useOfCmp.ReplaceWith(ccmp); + LowerNode(ccmp); + + // Remove the old nodes. + BlockRange().Remove(cmp); + BlockRange().Remove(op2); + + ContainCheckConditionalCompare(ccmp); + + JITDUMP("Bashed TEST to CCMP:\n"); + DISPTREERANGE(BlockRange(), ccmp); + + return ccmp->gtNext; + } + } +#endif + ContainCheckCompare(cmp->AsOp()); return cmp->gtNext; } @@ -3226,13 +3284,13 @@ GenTree* Lowering::LowerCompare(GenTree* cmp) GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) { #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) - GenTree* relop = jtrue->gtGetOp1(); - GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); + GenTree* relop = jtrue->gtGetOp1(); - if ((relop->gtNext == jtrue) && relopOp2->IsCnsIntOrI()) + if ((relop->gtNext == jtrue) && relop->OperIsCompare() && relop->AsOp()->gtGetOp2()->IsCnsIntOrI()) { - bool useJCMP = false; - GenTreeFlags flags = GTF_EMPTY; + bool useJCMP = false; + GenTreeFlags flags = GTF_EMPTY; + GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); #if defined(TARGET_LOONGARCH64) if (relop->OperIs(GT_EQ, GT_NE)) @@ -6591,7 +6649,7 @@ void Lowering::CheckCallArg(GenTree* arg) break; default: - assert(arg->OperIsPutArg()); + // assert(arg->OperIsPutArg()); break; } } @@ -6976,6 +7034,15 @@ void Lowering::ContainCheckNode(GenTree* node) ContainCheckSelect(node->AsConditional()); break; + case GT_CCMP_EQ: + case GT_CCMP_NE: + case GT_CCMP_LT: + case GT_CCMP_LE: + case GT_CCMP_GE: + case GT_CCMP_GT: + ContainCheckConditionalCompare(node->AsConditional()); + break; + case GT_ADD: case GT_SUB: #if !defined(TARGET_64BIT) @@ -7155,30 +7222,10 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret) // void Lowering::ContainCheckJTrue(GenTreeOp* node) { - GenTree* op1 = node->gtGetOp1(); - - if (op1->OperIsCompare()) - { - // The compare does not need to be generated into a register. - op1->gtType = TYP_VOID; - op1->gtFlags |= GTF_SET_FLAGS; - } -#if defined(TARGET_ARM64) - else if (op1->OperIsConditionalCompare()) - { - // If the second op of the CCMP is contained, then the CCMP does not need to be generated - // into a register. - if (op1->gtGetOp2()->isContained()) - { - op1->gtType = TYP_VOID; - op1->gtFlags |= GTF_SET_FLAGS; - } - } -#endif - else - { - unreached(); - } + // The compare does not need to be generated into a register. + GenTree* cmp = node->gtGetOp1(); + cmp->gtType = TYP_VOID; + cmp->gtFlags |= GTF_SET_FLAGS; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 0200cceb002b12..c39c5816adbc6d 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -91,6 +91,7 @@ class Lowering final : public Phase bool ContainCheckCompareChainForAnd(GenTree* tree); void ContainCheckChainedCompare(GenTreeOp* cmp); void ContainCheckNeg(GenTreeOp* neg); + void ContainCheckConditionalCompare(GenTreeConditional* ccmp); #endif void ContainCheckSelect(GenTreeOp* select); void ContainCheckBitCast(GenTree* node); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index c77e603c327d41..ba42a753261a8e 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2243,19 +2243,6 @@ void Lowering::ContainCheckCast(GenTreeCast* node) // void Lowering::ContainCheckCompare(GenTreeOp* cmp) { -#if defined(TARGET_ARM64) - if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE)) - { - if (ContainCheckCompareChainForAnd(cmp)) - { - // Turn the chain into a CCMP node - JITDUMP("Switching node to CCMP:\n"); - cmp->SetOper(cmp->OperIs(GT_TEST_EQ) ? GT_CCMP_NE : GT_CCMP_EQ); - DISPNODE(cmp); - } - } -#endif - CheckImmedAndMakeContained(cmp, cmp->gtOp2); } @@ -2458,6 +2445,30 @@ void Lowering::ContainCheckChainedCompare(GenTreeOp* cmp) } } +//------------------------------------------------------------------------ +// ContainCheckConditionalCompare : determine whether the source of a conditional compare should +// be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckConditionalCompare(GenTreeConditional* ccmp) +{ + if (!comp->opts.OptimizationEnabled()) + { + return; + } + + // Always try to contain the condition to prevent setting flags. + if (IsSafeToContainMem(ccmp, ccmp->gtCond)) + { + ccmp->gtCond->SetContained(); + } + + // Do the same containing as a standard compare + CheckImmedAndMakeContained(ccmp, ccmp->gtOp2); +} + #endif // TARGET_ARM64 //------------------------------------------------------------------------ @@ -2485,7 +2496,7 @@ void Lowering::ContainCheckSelect(GenTreeOp* node) { if (IsSafeToContainMem(node, cond)) { - cond->AsOp()->SetContained(); + cond->SetContained(); } } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index cfbd74487f4947..6c1f268bac4b3f 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1848,6 +1848,7 @@ class LinearScan : public LinearScanInterface int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates = RBM_NONE); #endif // !TARGET_XARCH int BuildSelect(GenTreeOp* select); + int BuildConditionalUses(GenTreeConditional* node, regMaskTP candidates); // This is the main entry point for building the RefPositions for a node. // These methods return the number of sources. int BuildNode(GenTree* tree); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index c17c900c7d579b..ad725a70de8b9c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -408,8 +408,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_TEST_EQ: case GT_TEST_NE: case GT_JCMP: - case GT_CCMP_EQ: - case GT_CCMP_NE: srcCount = BuildCmp(tree); break; @@ -775,7 +773,13 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_SELECT: - assert(dstCount == 1); + case GT_CCMP_EQ: + case GT_CCMP_NE: + case GT_CCMP_LT: + case GT_CCMP_LE: + case GT_CCMP_GE: + case GT_CCMP_GT: + // AHTODO: Rename this to Buildconditional srcCount = BuildSelect(tree->AsConditional()); break; diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 084861949d7039..5f6e98e1d6ce40 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -823,22 +823,27 @@ int LinearScan::BuildCast(GenTreeCast* cast) } //------------------------------------------------------------------------ -// BuildSelect: Build RefPositions for a GT_SELECT node. +// BuildSelect: Build RefPositions for a conditional node. // // Arguments: -// select - The GT_SELECT node +// select - The conditional node // // Return Value: // The number of sources consumed by this node. // -int LinearScan::BuildSelect(GenTreeOp* select) +int LinearScan::BuildSelect(GenTreeOp* tree) { - assert(select->OperIs(GT_SELECT)); + assert(tree->OperIsConditional()); + GenTreeConditional* conditional = tree->AsConditional(); - int srcCount = BuildOperandUses(select->AsConditional()->gtCond); - srcCount += BuildOperandUses(select->gtOp1); - srcCount += BuildOperandUses(select->gtOp2); - BuildDef(select); + int srcCount = BuildOperandUses(conditional->gtCond); + srcCount += BuildOperandUses(conditional->gtOp1); + srcCount += BuildOperandUses(conditional->gtOp2); + + if (tree->TypeGet() != TYP_VOID) + { + BuildDef(conditional); + } return srcCount; } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 83e5f4891cc7c9..4ca285fd4388b9 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1765,7 +1765,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // Currently produce is unused, but need to strengthen an assert to check if produce is // as expected. See https://github.com/dotnet/runtime/issues/8678 int produce = newDefListCount - oldDefListCount; - assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); + // assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); // If we are constraining registers, modify all the RefPositions we've just built to specify the // minimum reg count required. @@ -3225,14 +3225,19 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) } #endif // FEATURE_HW_INTRINSICS #ifdef TARGET_ARM64 - if (node->OperIs(GT_MUL, GT_AND) || node->OperIsCompare() || node->OperIsConditionalCompare()) + if (node->OperIs(GT_MUL, GT_AND) || node->OperIsCompare()) { // MUL can be contained for madd or msub on arm64. // Compares and ConditionalCompares can be contained by a SELECT. // ANDs, Cmp Compares and ConditionalCompares may be contained in a chain. return BuildBinaryUses(node->AsOp(), candidates); } - if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ)) + else if (node->OperIsConditionalCompare()) + { + // ConditionalCompares can be contained by a SELECT. + return BuildConditionalUses(node->AsConditional(), candidates); + } + else if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ)) { // NEG can be contained for mneg on arm64 // CAST and LSH for ADD with sign/zero extension @@ -3429,6 +3434,28 @@ int LinearScan::BuildCastUses(GenTreeCast* cast, regMaskTP candidates) return 1; } +//------------------------------------------------------------------------ +// BuildConditionalUses: Get the RefInfoListNodes for the operands of the +// given node, and build uses for them. +// +// Arguments: +// node - a GenTreeConditional +// +// Return Value: +// The number of actual register operands. +// +// Notes: +// The operands must already have been processed by buildRefPositionsForNode, and their +// RefInfoListNodes placed in the defList. +// +int LinearScan::BuildConditionalUses(GenTreeConditional* node, regMaskTP candidates) +{ + int srcCount = BuildOperandUses(node->gtCond, candidates); + srcCount += BuildOperandUses(node->gtOp1, candidates); + srcCount += BuildOperandUses(node->gtOp2, candidates); + return srcCount; +} + //------------------------------------------------------------------------ // BuildStoreLocDef: Build a definition RefPosition for a local store // From 8323f71fe7e706eb96b835d1c68296aab45f250a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 8 Mar 2023 09:56:53 +0000 Subject: [PATCH 11/31] Remove lowering/codegen changes --- .../api/v1/query/client-vscode/query.json | 1 - src/coreclr/jit/codegenarm64.cpp | 72 +------------------ src/coreclr/jit/codegenlinear.cpp | 8 --- src/coreclr/jit/compiler.hpp | 8 --- src/coreclr/jit/gentree.cpp | 40 +---------- src/coreclr/jit/gentree.h | 52 +------------- src/coreclr/jit/gtlist.h | 9 --- src/coreclr/jit/gtstructs.h | 2 +- src/coreclr/jit/ifconversion.cpp | 4 +- src/coreclr/jit/lower.cpp | 59 +-------------- src/coreclr/jit/lower.h | 1 - src/coreclr/jit/lowerarmarch.cpp | 27 +------ src/coreclr/jit/lsra.h | 1 - src/coreclr/jit/lsraarm64.cpp | 8 +-- src/coreclr/jit/lsraarmarch.cpp | 6 +- src/coreclr/jit/lsrabuild.cpp | 31 +------- 16 files changed, 17 insertions(+), 312 deletions(-) delete mode 100644 src/coreclr/jit/build/.cmake/api/v1/query/client-vscode/query.json diff --git a/src/coreclr/jit/build/.cmake/api/v1/query/client-vscode/query.json b/src/coreclr/jit/build/.cmake/api/v1/query/client-vscode/query.json deleted file mode 100644 index 82bb964246a197..00000000000000 --- a/src/coreclr/jit/build/.cmake/api/v1/query/client-vscode/query.json +++ /dev/null @@ -1 +0,0 @@ -{"requests":[{"kind":"cache","version":2},{"kind":"codemodel","version":2},{"kind":"toolchains","version":1},{"kind":"cmakeFiles","version":1}]} \ No newline at end of file diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 713b15c4da835c..c3a5e9e544b9b3 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -4504,8 +4504,6 @@ void CodeGen::genCkfinite(GenTree* treeNode) // Arguments: // tree - the node // -// Assumptions: The registers for tree have already been consumed. -// void CodeGen::genCodeForCompare(GenTreeOp* tree) { regNumber targetReg = tree->GetRegNum(); @@ -4585,7 +4583,7 @@ void CodeGen::genCodeForJTrue(GenTreeOp* jtrue) // genCodeForConditionalCompare: Produce code for a compare that's dependent on a previous compare. // // Arguments: -// tree - a conditional compare node (GT_EQ etc) +// tree - a compare node (GT_EQ etc) // cond - the condition of the previous generated compare. // void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp) @@ -4621,73 +4619,6 @@ void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp) } } -//------------------------------------------------------------------------ -// genCodeForConditionalCompare: Produce code for a conditional compare. -// -// Arguments: -// tree - the node -// -// -void CodeGen::genCodeForConditionalCompare(GenTreeConditional* tree) -{ - assert(tree->OperIsConditionalCompare()); - emitter* emit = GetEmitter(); - - GenTree* opcond = tree->gtCond; - GenTree* op1 = tree->gtOp1; - GenTree* op2 = tree->gtOp2; - var_types op1Type = genActualType(op1->TypeGet()); - var_types op2Type = genActualType(op2->TypeGet()); - emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); - regNumber targetReg = tree->GetRegNum(); - - // No float support or swapping op1 and op2 to generate cmp reg, imm. - assert(!varTypeIsFloating(op2Type)); - assert(!op1->isContainedIntOrIImmed()); - - // ConditionalCompare relies on flags, therefore the condition must be contained. - assert(opcond->isContained()); - - if (opcond->OperIsCompare()) - { - genConsumeRegs(opcond); - genCodeForCompare(opcond->AsOp()); - } - else - { - assert(opcond->OperIsConditionalCompare()); - genCodeForConditionalCompare(opcond->AsConditional()); - } - - // For the ccmp flags, invert the condition of the compare. - insCflags cflags = InsCflagsForCcmp(GenCondition::FromRelop(tree)); - - // For the condition, use the opcond. - GenCondition prevCond = GenCondition::FromRelop(opcond); - const GenConditionDesc& prevDesc = GenConditionDesc::Get(prevCond); - insCond prevInsCond = JumpKindToInsCond(prevDesc.jumpKind1); - - regNumber srcReg1 = genConsumeReg(op1); - - if (op2->isContainedIntOrIImmed()) - { - GenTreeIntConCommon* intConst = op2->AsIntConCommon(); - emit->emitIns_R_I_FLAGS_COND(INS_ccmp, cmpSize, srcReg1, (int)intConst->IconValue(), cflags, prevInsCond); - } - else - { - regNumber srcReg2 = genConsumeReg(op2); - emit->emitIns_R_R_FLAGS_COND(INS_ccmp, cmpSize, srcReg1, srcReg2, cflags, prevInsCond); - } - - // Are we evaluating this into a register? - if (targetReg != REG_NA) - { - inst_SETCC(GenCondition::FromRelop(tree), tree->TypeGet(), targetReg); - genProduceReg(tree); - } -} - //------------------------------------------------------------------------ // genCodeForSelect: Produce code for a GT_SELECT node. // @@ -4720,7 +4651,6 @@ void CodeGen::genCodeForSelect(GenTreeOp* tree) if (opcond != nullptr) { // Condition has been generated into a register - move it into flags. - genConsumeRegs(opcond); emit->emitIns_R_I(INS_cmp, emitActualTypeSize(opcond), opcond->GetRegNum(), 0); cond = GenCondition::NE; } diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 1ddd8874e3d432..bf402bc0538881 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1624,14 +1624,6 @@ void CodeGen::genConsumeRegs(GenTree* tree) genConsumeRegs(tree->gtGetOp1()); genConsumeRegs(tree->gtGetOp2()); } - else if (tree->OperIsConditionalCompare()) - { - assert(false); - // Conditional compares may be contained. - genConsumeRegs(tree->AsConditional()->gtCond); - genConsumeRegs(tree->AsConditional()->gtOp1); - genConsumeRegs(tree->AsConditional()->gtOp2); - } #endif else if (tree->OperIsLocalRead()) { diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 8bd04dcf5e8059..94dcf70963a413 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4259,14 +4259,6 @@ void GenTree::VisitOperands(TVisitor visitor) } case GT_SELECT: -#if defined(TARGET_ARM64) - case GT_CCMP_EQ: - case GT_CCMP_NE: - case GT_CCMP_LT: - case GT_CCMP_LE: - case GT_CCMP_GE: - case GT_CCMP_GT: -#endif { GenTreeConditional* const cond = this->AsConditional(); if (visitor(cond->gtCond) == VisitResult::Abort) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5c25c56d01ba80..2fc111827f6edd 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3475,19 +3475,6 @@ GenTree* Compiler::gtReverseCond(GenTree* tree) // tbz <=> tbnz tree->gtFlags ^= GTF_JCMP_EQ; } -#if defined(TARGET_ARM64) - else if (tree->OperIsConditionalCompare()) - { - genTreeOps cmpOper = GenTree::OperCovertConditionalCompareToCompare(tree->OperGet()); - genTreeOps revOper = GenTree::ReverseRelop(cmpOper); - tree->SetOper(GenTree::OperCovertCompareToConditionalCompare(revOper)); - - if (varTypeIsFloating(tree->AsConditional()->gtOp1->TypeGet())) - { - tree->gtFlags ^= GTF_RELOP_NAN_UN; - } - } -#endif else { tree = gtNewOperNode(GT_NOT, TYP_INT, tree); @@ -6397,14 +6384,6 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) } case GT_SELECT: -#if defined(TARGET_ARM64) - case GT_CCMP_EQ: - case GT_CCMP_NE: - case GT_CCMP_LT: - case GT_CCMP_LE: - case GT_CCMP_GE: - case GT_CCMP_GT: -#endif { GenTreeConditional* const conditional = this->AsConditional(); if (operand == conditional->gtCond) @@ -9716,14 +9695,6 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) return; case GT_SELECT: -#if defined(TARGET_ARM64) - case GT_CCMP_EQ: - case GT_CCMP_NE: - case GT_CCMP_LT: - case GT_CCMP_LE: - case GT_CCMP_GE: - case GT_CCMP_GT: -#endif m_edge = &m_node->AsConditional()->gtCond; assert(*m_edge != nullptr); m_advance = &GenTreeUseEdgeIterator::AdvanceConditional; @@ -12446,14 +12417,6 @@ void Compiler::gtDispTree(GenTree* tree, break; case GT_SELECT: -#if defined(TARGET_ARM64) - case GT_CCMP_EQ: - case GT_CCMP_NE: - case GT_CCMP_LT: - case GT_CCMP_LE: - case GT_CCMP_GE: - case GT_CCMP_GT: -#endif gtDispCommonEndLine(tree); if (!topOnly) @@ -17283,7 +17246,8 @@ bool GenTree::canBeContained() const return false; } - if (((DebugOperKind() & DBK_NOCONTAIN) != 0) || (OperIsHWIntrinsic() && !isContainableHWIntrinsic())) + // It is not possible for nodes that do not produce values or that are not containable values to be contained. + if (!IsValue() || ((DebugOperKind() & DBK_NOCONTAIN) != 0) || (OperIsHWIntrinsic() && !isContainableHWIntrinsic())) { return false; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ad69d8a5779eeb..4da96031e89920 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1068,7 +1068,7 @@ struct GenTree { // These are the only operators which can produce either VOID or non-VOID results. assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsHWIntrinsic() || - IsCnsVec() || OperIsConditionalCompare()); + IsCnsVec()); return false; } @@ -1355,13 +1355,7 @@ struct GenTree static bool OperIsConditional(genTreeOps gtOper) { -#if defined(TARGET_ARM64) - static_assert_no_msg( - AreContiguous(GT_SELECT, GT_CCMP_EQ, GT_CCMP_NE, GT_CCMP_LT, GT_CCMP_LE, GT_CCMP_GE, GT_CCMP_GT)); - return (GT_SELECT <= gtOper) && (gtOper <= GT_CCMP_GT); -#else return (GT_SELECT == gtOper); -#endif } bool OperIsConditional() const @@ -1369,35 +1363,6 @@ struct GenTree return OperIsConditional(OperGet()); } - static bool OperIsConditionalCompare(genTreeOps gtOper) - { -#if defined(TARGET_ARM64) - static_assert_no_msg(AreContiguous(GT_CCMP_EQ, GT_CCMP_NE, GT_CCMP_LT, GT_CCMP_LE, GT_CCMP_GE, GT_CCMP_GT)); - return (GT_CCMP_EQ <= gtOper) && (gtOper <= GT_CCMP_GT); -#else - return false; -#endif - } - - bool OperIsConditionalCompare() const - { - return OperIsConditionalCompare(OperGet()); - } - -#if defined(TARGET_ARM64) - static genTreeOps OperCovertCompareToConditionalCompare(genTreeOps oper) - { - assert(OperIsCmpCompare(oper)); - return (genTreeOps)(oper - GT_EQ + GT_CCMP_EQ); - } - - static genTreeOps OperCovertConditionalCompareToCompare(genTreeOps oper) - { - assert(OperIsConditionalCompare(oper)); - return (genTreeOps)(oper - GT_CCMP_EQ + GT_EQ); - } -#endif - static bool OperIsCC(genTreeOps gtOper) { return (gtOper == GT_JCC) || (gtOper == GT_SETCC); @@ -8660,13 +8625,9 @@ struct GenCondition static GenCondition FromRelop(GenTree* relop) { - assert(relop->OperIsCompare() || relop->OperIsConditionalCompare()); + assert(relop->OperIsCompare()); - if (relop->OperIsConditionalCompare()) - { - return FromConditionalRelop(relop); - } - else if (varTypeIsFloating(relop->gtGetOp1())) + if (varTypeIsFloating(relop->gtGetOp1())) { return FromFloatRelop(relop); } @@ -8734,13 +8695,6 @@ struct GenCondition return GenCondition(static_cast(code)); } - static GenCondition FromConditionalRelop(GenTree* relop) - { - assert(relop->OperIsConditionalCompare()); - assert(!varTypeIsFloating(relop->AsConditional()->gtOp1) && !varTypeIsFloating(relop->AsConditional()->gtOp2)); - return FromIntegralRelop((genTreeOps)(relop->OperGet() - GT_CCMP_EQ + GT_EQ), relop->IsUnsigned()); - } - static GenCondition Reverse(GenCondition condition) { // clang-format off diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index c03a1db4242b04..13c5a70dfbc55c 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -149,15 +149,6 @@ GTNODE(BITTEST_NE , GenTreeOp ,0,(GTK_BINOP|DBK_NOTHIR)) // Conditional select with 3 operands: condition, true value, false value GTNODE(SELECT , GenTreeConditional ,0,GTK_SPECIAL) -// Sets the condition flags according to the combined results of its children. -#if defined(TARGET_ARM64) -GTNODE(CCMP_EQ , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) -GTNODE(CCMP_NE , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) -GTNODE(CCMP_LT , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) -GTNODE(CCMP_LE , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) -GTNODE(CCMP_GE , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) -GTNODE(CCMP_GT , GenTreeConditional ,0,GTK_SPECIAL|DBK_NOTHIR) -#endif GTNODE(COMMA , GenTreeOp ,0,GTK_BINOP|DBK_NOTLIR) GTNODE(QMARK , GenTreeQmark ,0,GTK_BINOP|GTK_EXOP|DBK_NOTLIR) diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index ec90c0faf27325..b64ff20f64e7a2 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -97,7 +97,7 @@ GTSTRUCT_1(PhiArg , GT_PHI_ARG) GTSTRUCT_1(Phi , GT_PHI) GTSTRUCT_1(StoreInd , GT_STOREIND) GTSTRUCT_N(Indir , GT_STOREIND, GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_OBJ, GT_STORE_OBJ, GT_STORE_DYN_BLK) -GTSTRUCT_N(Conditional , GT_SELECT, GT_CCMP_EQ, GT_CCMP_NE, GT_CCMP_LT, GT_CCMP_LE, GT_CCMP_GE, GT_CCMP_GT) +GTSTRUCT_N(Conditional , GT_SELECT) #if FEATURE_ARG_SPLIT GTSTRUCT_2_SPECIAL(PutArgStk, GT_PUTARG_STK, GT_PUTARG_SPLIT) GTSTRUCT_1(PutArgSplit , GT_PUTARG_SPLIT) diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index 3f011a5360c96f..d7194d36c0df12 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -689,7 +689,8 @@ bool OptIfConversionDsc::optIfConvert() { if (m_doElseConversion) { - selectTrueInput = m_elseOperation.node->gtGetOp2(); + selectTrueInput = m_elseOperation.node->gtGetOp2(); + selectFalseInput = m_thenOperation.node->gtGetOp2(); } else { @@ -700,7 +701,6 @@ bool OptIfConversionDsc::optIfConvert() selectFalseInput = m_thenOperation.node->gtGetOp2(); } - selectFalseInput = m_thenOperation.node->gtGetOp2(); // Pick the type as the type of the local, which should always be compatible even for implicit coercions. selectType = genActualType(m_thenOperation.node->gtGetOp1()); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 5bf890e6f9a856..c25922157de2fe 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3134,8 +3134,6 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) cmp->AsOp()->gtOp1 = castOp; BlockRange().Remove(cast); - JITDUMP("Removed cast\n"); - DISPTREERANGE(BlockRange(), cmp); } } } @@ -3188,8 +3186,6 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) BlockRange().Remove(cmp->gtGetOp2()); BlockRange().Remove(cmp); - JITDUMP("Removed cast\n"); - DISPTREERANGE(BlockRange(), cmp); return next; } } @@ -3239,8 +3235,6 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) } } #endif - JITDUMP("Bashed compare to test:\n"); - DISPTREERANGE(BlockRange(), cmp); } } @@ -3341,48 +3335,6 @@ GenTree* Lowering::LowerCompare(GenTree* cmp) } } #endif // TARGET_XARCH - -#if defined(TARGET_ARM64) - // Detect TEST(CMP1, CMP2) and replace with CCMP2(CMP1). - if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE)) - { - GenTree* op1 = cmp->AsOp()->gtGetOp1(); - GenTree* op2 = cmp->AsOp()->gtGetOp2(); - - if (op1->OperIsCmpCompare() && op2->OperIsCmpCompare()) - { - // Get the equivalant CCMP oper. - genTreeOps cmpOper = op2->gtOper; - if (cmp->OperIs(GT_TEST_EQ)) - { - cmpOper = GenTree::ReverseRelop(cmpOper); - } - genTreeOps ccmpOper = GenTree::OperCovertCompareToConditionalCompare(cmpOper); - - // Create a ccmp node, insert it and update the use. - GenTreeConditional* ccmp = comp->gtNewConditionalNode(ccmpOper, op1, op2->AsOp()->gtGetOp1(), - op2->AsOp()->gtGetOp2(), op2->gtType); - BlockRange().InsertAfter(op2, ccmp); - LIR::Use useOfCmp; - bool gotUse = BlockRange().TryGetUse(cmp, &useOfCmp); - assert(gotUse); - useOfCmp.ReplaceWith(ccmp); - LowerNode(ccmp); - - // Remove the old nodes. - BlockRange().Remove(cmp); - BlockRange().Remove(op2); - - ContainCheckConditionalCompare(ccmp); - - JITDUMP("Bashed TEST to CCMP:\n"); - DISPTREERANGE(BlockRange(), ccmp); - - return ccmp->gtNext; - } - } -#endif - ContainCheckCompare(cmp->AsOp()); return cmp->gtNext; } @@ -6959,7 +6911,7 @@ void Lowering::CheckCallArg(GenTree* arg) break; default: - // assert(arg->OperIsPutArg()); + assert(arg->OperIsPutArg()); break; } } @@ -7334,15 +7286,6 @@ void Lowering::ContainCheckNode(GenTree* node) ContainCheckSelect(node->AsConditional()); break; - case GT_CCMP_EQ: - case GT_CCMP_NE: - case GT_CCMP_LT: - case GT_CCMP_LE: - case GT_CCMP_GE: - case GT_CCMP_GT: - ContainCheckConditionalCompare(node->AsConditional()); - break; - case GT_ADD: case GT_SUB: #if !defined(TARGET_64BIT) diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 872ddd78fca2f8..2a7eb25ca9c7d9 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -89,7 +89,6 @@ class Lowering final : public Phase insCflags TruthifyingFlags(GenCondition cond); void ContainCheckConditionalCompare(GenTreeCCMP* ccmp); void ContainCheckNeg(GenTreeOp* neg); - void ContainCheckConditionalCompare(GenTreeConditional* ccmp); #endif void ContainCheckSelect(GenTreeOp* select); void ContainCheckBitCast(GenTree* node); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index fd70994adba8ad..51a1a50df3a15d 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2404,11 +2404,10 @@ insCflags Lowering::TruthifyingFlags(GenCondition condition) NO_WAY("unexpected condition type"); return INS_FLAGS_NONE; } - return false; } //------------------------------------------------------------------------ -// ContainCheckChainedCompare: determine whether the source of a compare within a compare chain should be contained. +// ContainCheckConditionalCompare: determine whether the source of a compare within a compare chain should be contained. // // Arguments: // node - pointer to the node @@ -2428,30 +2427,6 @@ void Lowering::ContainCheckConditionalCompare(GenTreeCCMP* cmp) } } -//------------------------------------------------------------------------ -// ContainCheckConditionalCompare : determine whether the source of a conditional compare should -// be contained. -// -// Arguments: -// node - pointer to the node -// -void Lowering::ContainCheckConditionalCompare(GenTreeConditional* ccmp) -{ - if (!comp->opts.OptimizationEnabled()) - { - return; - } - - // Always try to contain the condition to prevent setting flags. - if (IsSafeToContainMem(ccmp, ccmp->gtCond)) - { - ccmp->gtCond->SetContained(); - } - - // Do the same containing as a standard compare - CheckImmedAndMakeContained(ccmp, ccmp->gtOp2); -} - #endif // TARGET_ARM64 //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 46d5c6121b302e..d28a8d521d632b 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1854,7 +1854,6 @@ class LinearScan : public LinearScanInterface inline regMaskTP BuildEvexIncompatibleMask(GenTree* tree); #endif // !TARGET_XARCH int BuildSelect(GenTreeOp* select); - int BuildConditionalUses(GenTreeConditional* node, regMaskTP candidates); // This is the main entry point for building the RefPositions for a node. // These methods return the number of sources. int BuildNode(GenTree* tree); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9b1f3cc03f30ea..0f7c3b4d9f1a7c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -775,13 +775,7 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_SELECT: - case GT_CCMP_EQ: - case GT_CCMP_NE: - case GT_CCMP_LT: - case GT_CCMP_LE: - case GT_CCMP_GE: - case GT_CCMP_GT: - // AHTODO: Rename this to Buildconditional + assert(dstCount == 1); srcCount = BuildSelect(tree->AsConditional()); break; case GT_SELECTCC: diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index d7580effeadee1..bc3cf04c235946 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -823,15 +823,15 @@ int LinearScan::BuildCast(GenTreeCast* cast) } //------------------------------------------------------------------------ -// BuildSelect: Build RefPositions for a conditional node. +// BuildSelect: Build RefPositions for a GT_SELECT node. // // Arguments: -// select - The conditional node +// select - The GT_SELECT node // // Return Value: // The number of sources consumed by this node. // -int LinearScan::BuildSelect(GenTreeOp* tree) +int LinearScan::BuildSelect(GenTreeOp* select) { assert(select->OperIs(GT_SELECT, GT_SELECTCC)); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 4ff33a00b36d63..14f142d7908faa 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1765,7 +1765,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // Currently produce is unused, but need to strengthen an assert to check if produce is // as expected. See https://github.com/dotnet/runtime/issues/8678 int produce = newDefListCount - oldDefListCount; - // assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); + assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); // If we are constraining registers, modify all the RefPositions we've just built to specify the // minimum reg count required. @@ -3239,12 +3239,7 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) // ANDs may be contained in a chain. return BuildBinaryUses(node->AsOp(), candidates); } - else if (node->OperIsConditionalCompare()) - { - // ConditionalCompares can be contained by a SELECT. - return BuildConditionalUses(node->AsConditional(), candidates); - } - else if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ)) + if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ)) { // NEG can be contained for mneg on arm64 // CAST and LSH for ADD with sign/zero extension @@ -3467,28 +3462,6 @@ int LinearScan::BuildCastUses(GenTreeCast* cast, regMaskTP candidates) return 1; } -//------------------------------------------------------------------------ -// BuildConditionalUses: Get the RefInfoListNodes for the operands of the -// given node, and build uses for them. -// -// Arguments: -// node - a GenTreeConditional -// -// Return Value: -// The number of actual register operands. -// -// Notes: -// The operands must already have been processed by buildRefPositionsForNode, and their -// RefInfoListNodes placed in the defList. -// -int LinearScan::BuildConditionalUses(GenTreeConditional* node, regMaskTP candidates) -{ - int srcCount = BuildOperandUses(node->gtCond, candidates); - srcCount += BuildOperandUses(node->gtOp1, candidates); - srcCount += BuildOperandUses(node->gtOp2, candidates); - return srcCount; -} - //------------------------------------------------------------------------ // BuildStoreLocDef: Build a definition RefPosition for a local store // From bef86f009916cdc7a0a4800f65e6665e0982077c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 8 Mar 2023 15:52:54 +0000 Subject: [PATCH 12/31] update header --- src/coreclr/jit/optimizer.cpp | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 3a318add36ab4c..aa857a58b6e5b9 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9389,9 +9389,11 @@ inline bool OptBoolsDsc::ConditionIsTest(GenTree* condition, bool* isOptBool) // // ------------ BB01 -> BB03 (cond), succs={BB03,BB04} // * JTRUE -// \--* AND -// +--* LE a,b -// \--* NE c,d +// \--* NE +// +--* AND +// | +--* LE a,b +// | \--* NE c,d +// \--* CNS_INT 0 // // ------------ BB03, preds={BB01} succs={BB04} // * ASG x,y @@ -9403,9 +9405,11 @@ inline bool OptBoolsDsc::ConditionIsTest(GenTree* condition, bool* isOptBool) // // ------------ BB01 -> BB03 (cond), succs={BB03,BB04} // * JTRUE -// \--* AND -// +--* LT a,b -// \--* NE c,d +// \--* NE +// +--* AND +// | +--* LE a,b +// | \--* NE c,d +// \--* CNS_INT 0 // // ------------ BB03, preds={BB01} succs={BB05} // * ASG x,y @@ -9422,11 +9426,13 @@ inline bool OptBoolsDsc::ConditionIsTest(GenTree* condition, bool* isOptBool) // // ------------ BB01 -> BB03 (cond), succs={BB03,BB04} // * JTRUE -// \--* AND +// \--* NE // +--* AND -// +--* NE c,d -// +--* GE e,f -// \--* LT a,b +// | +--* AND +// | | +--* NE c,d +// | | \--* GE e,f +// | \--* LT a,b +// \--* CNS_INT 0 // // ------------ BB03, preds={BB01} succs={BB04} // * ASG x,y From 3bdceebc7fa9b2d539541a801d87090e2603b029 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 9 Mar 2023 11:19:27 +0000 Subject: [PATCH 13/31] Add costing with stress overrides --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/optimizer.cpp | 108 +++++++++++++++++++++++++--------- 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6010e312e49d7f..440bce639d54bc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9757,6 +9757,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX STRESS_MODE(MERGED_RETURNS) \ STRESS_MODE(BB_PROFILE) \ STRESS_MODE(OPT_BOOLS_GC) \ + STRESS_MODE(OPT_BOOLS_COMPARE_CHAIN_COST) \ STRESS_MODE(REMORPH_TREES) \ STRESS_MODE(64RSLT_MUL) \ STRESS_MODE(DO_WHILE_LOOPS) \ diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index aa857a58b6e5b9..9d35c3245e8e74 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9111,7 +9111,7 @@ class OptBoolsDsc GenTree* optIsBoolComp(OptTestInfo* pOptTest); bool optOptimizeBoolsChkTypeCostCond(); void optOptimizeBoolsUpdateTrees(); - inline bool ConditionIsTest(GenTree* condition, bool* isOptBool); + inline bool FindCompareChain(GenTree* condition, bool* isTestCondition); }; //----------------------------------------------------------------------------- @@ -9337,23 +9337,44 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() return true; } -inline bool OptBoolsDsc::ConditionIsTest(GenTree* condition, bool* isOptBool) +//----------------------------------------------------------------------------- +// FindCompareChain: Check if the given condition is a compare chain. +// +// Arguments: +// condition: Condition to check. +// isTestCondition: Returns true if condition is a EQ/NE(AND(...),0) but is not a compare chain. +// +// Returns: +// true if chain optimization is a compare chain. +// +// Assumptions: +// m_b1 and m_b2 are set on entry. +// + +inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondition) { GenTree* condOp1 = condition->gtGetOp1(); GenTree* condOp2 = condition->gtGetOp2(); + *isTestCondition = false; + if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst() && condOp2->AsIntCon()->IconValue() == 0 && condOp1->OperIs(GT_AND)) { - if (condOp1->gtGetOp1()->OperIsCompare() && condOp1->gtGetOp2()->OperIsCompare()) + // Found a test condition. Does it contain a compare chain? + + // Only test that the second operand of AND ends with a compare operation, as this will be + // the condition the new link in the chain will connect with. + // We are allowing for the first operand of the not be a valid chain, as this would require + // a full recursive search through the children. + if (condOp1->gtGetOp2()->OperIsCmpCompare()) { - // Found chained conditions previously optimized via optimize bools. - *isOptBool = true; - return false; + return true; } - // Found a TEST_EQ or TEST_NE equivalent. - return true; + + *isTestCondition = true; } + return false; } @@ -9384,8 +9405,8 @@ inline bool OptBoolsDsc::ConditionIsTest(GenTree* condition, bool* isOptBool) // ------------ BB03, preds={BB01, BB02} succs={BB04} // * ASG x,y // -// These operands will be combined into a single AND chain in the first block (with the first -// condition inverted). +// These operands will be combined into a single AND in the first block (with the first +// condition inverted), wrapped by the test condition (NE(...,0)). // // ------------ BB01 -> BB03 (cond), succs={BB03,BB04} // * JTRUE @@ -9422,7 +9443,8 @@ inline bool OptBoolsDsc::ConditionIsTest(GenTree* condition, bool* isOptBool) // iterating through the blocks. For example: // If ( a > b || c == d || e < f ) { x = y; } // The first pass will combine "c == d" and "e < f" into a chain. The second pass will then -// combine the "a > b" with the earlier chain, giving: +// combine the "a > b" with the earlier chain. Where possible, the new condition is placed +// within the test condition (NE(...,0)). // // ------------ BB01 -> BB03 (cond), succs={BB03,BB04} // * JTRUE @@ -9443,6 +9465,10 @@ inline bool OptBoolsDsc::ConditionIsTest(GenTree* condition, bool* isOptBool) // bool OptBoolsDsc::optOptimizeCompareChainCondBlock() { + if (m_comp->verbose) + { + JITDUMP("here\n"); + } assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); m_t3 = nullptr; @@ -9464,11 +9490,7 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() GenTree* cond2 = m_testInfo2.testTree->gtGetOp1(); // Ensure both conditions are suitable. - if (!cond1->OperIsCmpCompare()) - { - return false; - } - if (!(cond2->OperIsCmpCompare() || cond2->OperIs(GT_AND))) + if (!cond1->OperIsCompare() || !cond2->OperIsCompare()) { return false; } @@ -9486,23 +9508,51 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() return false; } + // Check for previously optimized compare chains. + bool op1IsTestCond; + bool op2IsTestCond; + bool op1IsCondChain = FindCompareChain(cond1, &op1IsTestCond); + bool op2IsCondChain = FindCompareChain(cond2, &op2IsTestCond); + // Don't support combining multiple chains. Allowing this would give minimal benefit, as + // costing checks would disallow most instances. + if (op1IsCondChain && op2IsCondChain) + { + return false; + } + // Avoid cases where the compare will be optimized better later: // * cmp(and(x, y), 0) will be turned into a TEST_ opcode. // * Compares against zero will be optimized with cbz. - // Make sure to avoid matching previous optimize bool cases. - bool op1IsCondChain = false; - bool op2IsCondChain = false; - if (ConditionIsTest(cond1, &op1IsCondChain) || ConditionIsTest(cond2, &op2IsCondChain)) + if (op1IsTestCond || op2IsTestCond) { return false; } - GenTree* newchain = nullptr; + // Combining conditions means that all conditions are always fully evaluated. + // Put a limit on the max size that can be combined. + if (!m_comp->compStressCompile(Compiler::STRESS_OPT_BOOLS_COMPARE_CHAIN_COST, 25)) + { + int op1Cost = cond1->GetCostEx(); + int op2Cost = cond2->GetCostEx(); + int maxOp1Cost = op1IsCondChain ? 35 : 7; + int maxOp2Cost = op2IsCondChain ? 35 : 7; + + // Cost to allow for chain size of three. + if (op1Cost > maxOp1Cost || op2Cost > maxOp2Cost) + { + JITDUMP("Skipping CompareChainCond that will evaluate conditions unconditionally at costs %d,%d\n", + op1Cost, op2Cost); + return false; + } + } + + GenTree* testcondition = nullptr; - // If a previous optimize bools happened, then reuse the AND operand. + // If a previous optimize bools happened for op2, then reuse the test condition. + // Cannot reuse for op1, as the condition needs reversing. if (op2IsCondChain) { - newchain = cond2; + testcondition = cond2; cond2 = cond2->gtGetOp1(); } @@ -9522,20 +9572,20 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() cond2->gtFlags &= ~GTF_RELOP_JMP_USED; andconds->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); - // Add a NE condition onto the front of the AND. + // Add a test condition onto the front of the AND (or resuse an exisiting one). if (op2IsCondChain) { - newchain->AsOp()->gtOp1 = andconds; - newchain->AsOp()->gtFlags |= (andconds->gtFlags & GTF_ALL_EFFECT); + testcondition->AsOp()->gtOp1 = andconds; + testcondition->AsOp()->gtFlags |= (andconds->gtFlags & GTF_ALL_EFFECT); } else { - newchain = m_comp->gtNewOperNode(GT_NE, TYP_INT, andconds, m_comp->gtNewZeroConNode(TYP_INT)); + testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, andconds, m_comp->gtNewZeroConNode(TYP_INT)); } // Wire the chain into the second block - m_testInfo2.testTree->AsOp()->gtOp1 = newchain; - m_testInfo2.testTree->AsOp()->gtFlags |= (newchain->gtFlags & GTF_ALL_EFFECT); + m_testInfo2.testTree->AsOp()->gtOp1 = testcondition; + m_testInfo2.testTree->AsOp()->gtFlags |= (testcondition->gtFlags & GTF_ALL_EFFECT); m_comp->gtSetEvalOrder(m_testInfo2.testTree); m_comp->fgSetStmtSeq(s2); From adf6b9990a2567f401d9eef78de5c0c2b29a18f5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 9 Mar 2023 12:46:59 +0000 Subject: [PATCH 14/31] Better cbz comment --- src/coreclr/jit/optimizer.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 9d35c3245e8e74..8d0ecfca294d34 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9465,10 +9465,6 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit // bool OptBoolsDsc::optOptimizeCompareChainCondBlock() { - if (m_comp->verbose) - { - JITDUMP("here\n"); - } assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); m_t3 = nullptr; @@ -9520,8 +9516,9 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() return false; } - // Avoid cases where the compare will be optimized better later: - // * cmp(and(x, y), 0) will be turned into a TEST_ opcode. + // Specifically for Arm64, avoid cases where optimizations in lowering will produce better + // code than optimizing here. Specificially: + // * cmp(and(...), 0) will be turned into a TEST_ opcode. // * Compares against zero will be optimized with cbz. if (op1IsTestCond || op2IsTestCond) { From 5b99a4d29a0587deae515a501d2bbecd2ab4910b Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 9 Mar 2023 12:48:37 +0000 Subject: [PATCH 15/31] Use fgRemoveRefPred --- src/coreclr/jit/optimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 8d0ecfca294d34..694438f13e65d1 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9587,7 +9587,7 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() m_comp->fgSetStmtSeq(s2); // Update the flow. - m_comp->fgRemoveAllRefPreds(m_b1->bbJumpDest, m_b1); + m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1); m_b1->bbJumpKind = BBJ_NONE; // Fixup flags. From e8ebd2ae05003c66c37546b58364c4a444f7467d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 9 Mar 2023 12:56:39 +0000 Subject: [PATCH 16/31] Allow reversed conditions in tests --- .../JIT/opt/Compares/compareAnd2Chains.cs | 48 +++++++++---------- .../JIT/opt/Compares/compareAnd3Chains.cs | 6 +-- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/tests/JIT/opt/Compares/compareAnd2Chains.cs b/src/tests/JIT/opt/Compares/compareAnd2Chains.cs index b7e1c8e940dabe..ba6b355778bafa 100644 --- a/src/tests/JIT/opt/Compares/compareAnd2Chains.cs +++ b/src/tests/JIT/opt/Compares/compareAnd2Chains.cs @@ -186,8 +186,8 @@ public static void consume(T a1, T a2) {} [MethodImpl(MethodImplOptions.NoInlining)] public static void Lt_byte_2_consume(byte a1, byte a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #11, nc, ge - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #11, nc, {{ge|lt}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{ge|lt}} if (a1 < 10 || a2 < 11) { a1 = 10; } consume(a1, a2); } @@ -195,8 +195,8 @@ public static void Lt_byte_2_consume(byte a1, byte a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Le_short_2_consume(short a1, short a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #12, nzc, gt - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #12, nzc, {{gt|le}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} if (a1 <= 10 || a2 <= 12) { a1 = 10; } consume(a1, a2); } @@ -204,8 +204,8 @@ public static void Le_short_2_consume(short a1, short a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Gt_int_2_consume(int a1, int a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #13, 0, le - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #13, 0, {{le|gt}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{le|gt}} if (a1 > 10 || a2 > 13) { a1 = 10; } consume(a1, a2); } @@ -213,8 +213,8 @@ public static void Gt_int_2_consume(int a1, int a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Ge_long_2_consume(long a1, long a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, #10 - //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #14, z, lt - //ARM64-FULL-LINE-NEXT: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lt + //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #14, z, {{lt|ge}} + //ARM64-FULL-LINE-NEXT: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{lt|ge}} if (a1 >= 10 || a2 >= 14) { a1 = 10; } consume(a1, a2); } @@ -222,8 +222,8 @@ public static void Ge_long_2_consume(long a1, long a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Eq_ushort_2_consume(ushort a1, ushort a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #15, z, ne - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #15, z, {{ne|eq}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{ne|eq}} if (a1 == 10 || a2 == 15) { a1 = 10; } consume(a1, a2); } @@ -231,8 +231,8 @@ public static void Eq_ushort_2_consume(ushort a1, ushort a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Ne_uint_2_consume(uint a1, uint a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #16, 0, eq - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #16, 0, {{eq|ne}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{eq|ne}} if (a1 != 10 || a2 != 16) { a1 = 10; } consume(a1, a2); } @@ -243,8 +243,8 @@ public static void Ne_uint_2_consume(uint a1, uint a2) { public static void Le_else_byte_2_consume(byte a1, byte a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #22, nzc, gt - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #22, nzc, {{gt|le}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} if (a1 <= 11 || a2 <= 22) { a1 = 20; } else { a1 = 200; } consume(a1, a2); } @@ -252,8 +252,8 @@ public static void Le_else_byte_2_consume(byte a1, byte a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Gt_else_short_2_consume(short a1, short a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #23, 0, le - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #23, 0, {{le|gt}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{le|gt}} if (a1 > 11 || a2 > 23) { a1 = 20; } else { a1 = 200; } consume(a1, a2); } @@ -261,8 +261,8 @@ public static void Gt_else_short_2_consume(short a1, short a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Ge_else_int_2_consume(int a1, int a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #24, z, lt - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #24, z, {{lt|ge}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{lt|ge}} if (a1 >= 11 || a2 >= 24) { a1 = 20; } else { a1 = 200; } consume(a1, a2); } @@ -270,8 +270,8 @@ public static void Ge_else_int_2_consume(int a1, int a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Eq_else_long_2_consume(long a1, long a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #25, z, ne - //ARM64-FULL-LINE-NEXT: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ne + //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #25, z, {{ne|eq}} + //ARM64-FULL-LINE-NEXT: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{ne|eq}} if (a1 == 11 || a2 == 25) { a1 = 20; } else { a1 = 200; } consume(a1, a2); } @@ -279,8 +279,8 @@ public static void Eq_else_long_2_consume(long a1, long a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Ne_else_ushort_2_consume(ushort a1, ushort a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #26, 0, eq - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #26, 0, {{eq|ne}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{eq|ne}} if (a1 != 11 || a2 != 26) { a1 = 20; } else { a1 = 200; } consume(a1, a2); } @@ -288,8 +288,8 @@ public static void Ne_else_ushort_2_consume(ushort a1, ushort a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Lt_else_uint_2_consume(uint a1, uint a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #27, 0, hs - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, hs + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #27, 0, {{hs|lo}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{hs|lo}} if (a1 < 11 || a2 < 27) { a1 = 20; } else { a1 = 200; } consume(a1, a2); } diff --git a/src/tests/JIT/opt/Compares/compareAnd3Chains.cs b/src/tests/JIT/opt/Compares/compareAnd3Chains.cs index feabefce22a503..04e0f513eaf368 100644 --- a/src/tests/JIT/opt/Compares/compareAnd3Chains.cs +++ b/src/tests/JIT/opt/Compares/compareAnd3Chains.cs @@ -186,9 +186,9 @@ public static void consume(T a1, T a2, T a3) {} [MethodImpl(MethodImplOptions.NoInlining)] public static void Le_byte_3_consume(byte a1, byte a2, byte a3) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #12, nzc, gt - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #10, nzc, gt - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #12, nzc, {{gt|le}} + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #10, nzc, {{gt|le}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} if (a1 <= 10 || a2 <= 11 || a3 <= 12) { a1 = 10; } consume(a1, a2, a3); } From 748252f39d6e95426a7a32cdca62c521506883d3 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 9 Mar 2023 15:11:00 +0000 Subject: [PATCH 17/31] Move optimize bools pass to a new file --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/optimizebools.cpp | 1417 +++++++++++++++++++++++++++++ src/coreclr/jit/optimizer.cpp | 1401 ---------------------------- 3 files changed, 1418 insertions(+), 1401 deletions(-) create mode 100644 src/coreclr/jit/optimizebools.cpp diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 480bfdc045e16c..c44585cbbd63ee 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -150,6 +150,7 @@ set( JIT_SOURCES morphblock.cpp objectalloc.cpp optcse.cpp + optimizebools.cpp optimizer.cpp patchpoint.cpp phase.cpp diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp new file mode 100644 index 00000000000000..51857df782f5a2 --- /dev/null +++ b/src/coreclr/jit/optimizebools.cpp @@ -0,0 +1,1417 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX optOptimizeBools XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +//----------------------------------------------------------------------------- +// OptTestInfo: Member of OptBoolsDsc struct used to test if a GT_JTRUE or GT_RETURN node +// is a boolean comparison +// +struct OptTestInfo +{ + Statement* testStmt; // Last statement of the basic block + GenTree* testTree; // The root node of the testStmt (GT_JTRUE or GT_RETURN). + GenTree* compTree; // The compare node (i.e. GT_EQ or GT_NE node) of the testTree + bool isBool; // If the compTree is boolean expression +}; + +//----------------------------------------------------------------------------- +// OptBoolsDsc: Descriptor used for Boolean Optimization +// +class OptBoolsDsc +{ +public: + OptBoolsDsc(BasicBlock* b1, BasicBlock* b2, Compiler* comp) + { + m_b1 = b1; + m_b2 = b2; + m_b3 = nullptr; + m_comp = comp; + } + +private: + BasicBlock* m_b1; // The first basic block with the BBJ_COND conditional jump type + BasicBlock* m_b2; // The next basic block of m_b1. Either BBJ_COND or BBJ_RETURN type + BasicBlock* m_b3; // m_b1->bbJumpDest. Null if m_b2 is not a return block. + + Compiler* m_comp; // The pointer to the Compiler instance + + OptTestInfo m_testInfo1; // The first test info + OptTestInfo m_testInfo2; // The second test info + GenTree* m_t3; // The root node of the first statement of m_b3 + + GenTree* m_c1; // The first operand of m_testInfo1.compTree + GenTree* m_c2; // The first operand of m_testInfo2.compTree + + bool m_sameTarget; // if m_b1 and m_b2 jumps to the same destination + + genTreeOps m_foldOp; // The fold operator (e.g., GT_AND or GT_OR) + var_types m_foldType; // The type of the folded tree + genTreeOps m_cmpOp; // The comparison operator (e.g., GT_EQ or GT_NE) + +public: + bool optOptimizeBoolsCondBlock(); + bool optOptimizeCompareChainCondBlock(); + bool optOptimizeBoolsReturnBlock(BasicBlock* b3); +#ifdef DEBUG + void optOptimizeBoolsGcStress(); +#endif + +private: + Statement* optOptimizeBoolsChkBlkCond(); + GenTree* optIsBoolComp(OptTestInfo* pOptTest); + bool optOptimizeBoolsChkTypeCostCond(); + void optOptimizeBoolsUpdateTrees(); + inline bool FindCompareChain(GenTree* condition, bool* isTestCondition); +}; + +//----------------------------------------------------------------------------- +// optOptimizeBoolsCondBlock: Optimize boolean when bbJumpKind of both m_b1 and m_b2 are BBJ_COND +// +// Returns: +// true if boolean optimization is done and m_b1 and m_b2 are folded into m_b1, else false. +// +// Notes: +// m_b1 and m_b2 are set on entry. +// +// Case 1: if b1.bbJumpDest == b2.bbJumpDest, it transforms +// B1 : brtrue(t1, Bx) +// B2 : brtrue(t2, Bx) +// B3 : +// to +// B1 : brtrue(t1|t2, BX) +// B3 : +// +// For example, (x == 0 && y == 0 && z == 0) generates +// B1: GT_JTRUE (BBJ_COND), jump to B4 +// B2: GT_JTRUE (BBJ_COND), jump to B4 +// B3: GT_RETURN (BBJ_RETURN) +// B4: GT_RETURN (BBJ_RETURN) +// and B1 and B2 are folded into B1: +// B1: GT_JTRUE (BBJ_COND), jump to B4 +// B3: GT_RETURN (BBJ_RETURN) +// B4: GT_RETURN (BBJ_RETURN) +// +// Case 2: if B1.bbJumpDest == B2->bbNext, it transforms +// B1 : brtrue(t1, B3) +// B2 : brtrue(t2, Bx) +// B3 : +// to +// B1 : brtrue((!t1) && t2, Bx) +// B3 : +// +bool OptBoolsDsc::optOptimizeBoolsCondBlock() +{ + assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); + + // Check if m_b1 and m_b2 jump to the same target and get back pointers to m_testInfo1 and t2 tree nodes + + m_t3 = nullptr; + + // Check if m_b1 and m_b2 have the same bbJumpDest + + if (m_b1->bbJumpDest == m_b2->bbJumpDest) + { + // Given the following sequence of blocks : + // B1: brtrue(t1, BX) + // B2: brtrue(t2, BX) + // B3: + // we will try to fold it to : + // B1: brtrue(t1|t2, BX) + // B3: + + m_sameTarget = true; + } + else if (m_b1->bbJumpDest == m_b2->bbNext) + { + // Given the following sequence of blocks : + // B1: brtrue(t1, B3) + // B2: brtrue(t2, BX) + // B3: + // we will try to fold it to : + // B1: brtrue((!t1)&&t2, BX) + // B3: + + m_sameTarget = false; + } + else + { + return false; + } + + Statement* const s1 = optOptimizeBoolsChkBlkCond(); + if (s1 == nullptr) + { + return false; + } + + // Find the branch conditions of m_b1 and m_b2 + + m_c1 = optIsBoolComp(&m_testInfo1); + if (m_c1 == nullptr) + { + return false; + } + + m_c2 = optIsBoolComp(&m_testInfo2); + if (m_c2 == nullptr) + { + return false; + } + + // Find the type and cost conditions of m_testInfo1 and m_testInfo2 + + if (!optOptimizeBoolsChkTypeCostCond()) + { + return false; + } + + // Get the fold operator and the comparison operator + + genTreeOps foldOp; + genTreeOps cmpOp; + var_types foldType = m_c1->TypeGet(); + if (varTypeIsGC(foldType)) + { + foldType = TYP_I_IMPL; + } + + assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE)); + + if (m_sameTarget) + { + // Both conditions must be the same + + if (m_testInfo1.compTree->gtOper != m_testInfo2.compTree->gtOper) + { + return false; + } + + if (m_testInfo1.compTree->gtOper == GT_EQ) + { + // t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0 + // So we will branch to BX if (c1&c2)==0 + + foldOp = GT_AND; + cmpOp = GT_EQ; + } + else if (m_testInfo1.compTree->gtOper == GT_LT) + { + // t1:c1<0 t2:c2<0 ==> Branch to BX if either value < 0 + // So we will branch to BX if (c1|c2)<0 + + foldOp = GT_OR; + cmpOp = GT_LT; + } + else if (m_testInfo1.compTree->gtOper == GT_GE) + { + return false; + } + else + { + // t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0 + // So we will branch to BX if (c1|c2)!=0 + + foldOp = GT_OR; + cmpOp = GT_NE; + } + } + else + { + if (m_testInfo1.compTree->gtOper == m_testInfo2.compTree->gtOper) + { + return false; + } + + if (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) + { + // t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0 + // So we will branch to BX if (c1&c2)!=0 + + foldOp = GT_AND; + cmpOp = GT_NE; + } + else if (m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) + { + // t1:c1<0 t2:c2>=0 ==> Branch to BX if both values >= 0 + // So we will branch to BX if (c1|c2)>=0 + + foldOp = GT_OR; + cmpOp = GT_GE; + } + else if (m_testInfo1.compTree->gtOper == GT_GE) + { + return false; + } + else if (m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) + { + // t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0 + // So we will branch to BX if (c1|c2)==0 + + foldOp = GT_OR; + cmpOp = GT_EQ; + } + else + { + return false; + } + } + + // Anding requires both values to be 0 or 1 + + if ((foldOp == GT_AND) && (!m_testInfo1.isBool || !m_testInfo2.isBool)) + { + return false; + } + + // + // Now update the trees + // + + m_foldOp = foldOp; + m_foldType = foldType; + m_cmpOp = cmpOp; + + optOptimizeBoolsUpdateTrees(); + +#ifdef DEBUG + if (m_comp->verbose) + { + printf("Folded %sboolean conditions of " FMT_BB " and " FMT_BB " to :\n", m_c2->OperIsLeaf() ? "" : "non-leaf ", + m_b1->bbNum, m_b2->bbNum); + m_comp->gtDispStmt(s1); + printf("\n"); + } +#endif + + // Return true to continue the bool optimization for the rest of the BB chain + return true; +} + +//----------------------------------------------------------------------------- +// FindCompareChain: Check if the given condition is a compare chain. +// +// Arguments: +// condition: Condition to check. +// isTestCondition: Returns true if condition is a EQ/NE(AND(...),0) but is not a compare chain. +// +// Returns: +// true if chain optimization is a compare chain. +// +// Assumptions: +// m_b1 and m_b2 are set on entry. +// + +inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondition) +{ + GenTree* condOp1 = condition->gtGetOp1(); + GenTree* condOp2 = condition->gtGetOp2(); + + *isTestCondition = false; + + if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst() && condOp2->AsIntCon()->IconValue() == 0 && + condOp1->OperIs(GT_AND)) + { + // Found a test condition. Does it contain a compare chain? + + // Only test that the second operand of AND ends with a compare operation, as this will be + // the condition the new link in the chain will connect with. + // We are allowing for the first operand of the not be a valid chain, as this would require + // a full recursive search through the children. + if (condOp1->gtGetOp2()->OperIsCmpCompare()) + { + return true; + } + + *isTestCondition = true; + } + + return false; +} + +//----------------------------------------------------------------------------- +// optOptimizeCompareChainCondBlock: Create AND chain when when both m_b1 and m_b2 are BBJ_COND. +// +// Returns: +// true if chain optimization is done and m_b1 and m_b2 are folded into m_b1, else false. +// +// Assumptions: +// m_b1 and m_b2 are set on entry. +// +// Notes: +// +// This aims to reduced the number of conditional jumps by joining cases when multiple +// conditions gate the execution of a block. For example: +// If ( a > b || c == d) { x = y; } +// Will become the following. Note that the second condition is inverted. +// +// ------------ BB01 -> BB03 (cond), succs={BB02,BB03} +// * JTRUE +// \--* GT a,b +// +// ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04} +// * JTRUE +// \--* NE c,d +// +// ------------ BB03, preds={BB01, BB02} succs={BB04} +// * ASG x,y +// +// These operands will be combined into a single AND in the first block (with the first +// condition inverted), wrapped by the test condition (NE(...,0)). +// +// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} +// * JTRUE +// \--* NE +// +--* AND +// | +--* LE a,b +// | \--* NE c,d +// \--* CNS_INT 0 +// +// ------------ BB03, preds={BB01} succs={BB04} +// * ASG x,y +// +// +// This will also work for statements with else cases: +// If ( a > b || c == d) { x = y; } else { x = z; } +// Here BB04 will contain the else ASG. Both BB04 and BB05 will unconditionally jump to BB05. +// +// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} +// * JTRUE +// \--* NE +// +--* AND +// | +--* LE a,b +// | \--* NE c,d +// \--* CNS_INT 0 +// +// ------------ BB03, preds={BB01} succs={BB05} +// * ASG x,y +// +// ------------ BB04, preds={BB01} succs={BB05} +// * ASG x,z +// +// +// Multiple conditions can be chained together. This is due to the optimization reverse +// iterating through the blocks. For example: +// If ( a > b || c == d || e < f ) { x = y; } +// The first pass will combine "c == d" and "e < f" into a chain. The second pass will then +// combine the "a > b" with the earlier chain. Where possible, the new condition is placed +// within the test condition (NE(...,0)). +// +// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} +// * JTRUE +// \--* NE +// +--* AND +// | +--* AND +// | | +--* NE c,d +// | | \--* GE e,f +// | \--* LT a,b +// \--* CNS_INT 0 +// +// ------------ BB03, preds={BB01} succs={BB04} +// * ASG x,y +// +// +// Conditions connected by && are not yet checked for. For example: +// If ( a > b && c == d ) { x = y; } +// +bool OptBoolsDsc::optOptimizeCompareChainCondBlock() +{ + assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); + m_t3 = nullptr; + + if (!(m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest)) + { + return false; + } + + Statement* const s1 = optOptimizeBoolsChkBlkCond(); + if (s1 == nullptr) + { + return false; + } + Statement* s2 = m_b2->firstStmt(); + + assert(m_testInfo1.testTree->OperIs(GT_JTRUE)); + GenTree* cond1 = m_testInfo1.testTree->gtGetOp1(); + assert(m_testInfo2.testTree->OperIs(GT_JTRUE)); + GenTree* cond2 = m_testInfo2.testTree->gtGetOp1(); + + // Ensure both conditions are suitable. + if (!cond1->OperIsCompare() || !cond2->OperIsCompare()) + { + return false; + } + + // Ensure there are no additional side effects. + if ((cond1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0 || + (cond2->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0) + { + return false; + } + + // Integer compares only for now (until support for Arm64 fccmp instruction is added) + if (varTypeIsFloating(cond1->gtGetOp1()->TypeGet()) || varTypeIsFloating(cond2->gtGetOp1()->TypeGet())) + { + return false; + } + + // Check for previously optimized compare chains. + bool op1IsTestCond; + bool op2IsTestCond; + bool op1IsCondChain = FindCompareChain(cond1, &op1IsTestCond); + bool op2IsCondChain = FindCompareChain(cond2, &op2IsTestCond); + // Don't support combining multiple chains. Allowing this would give minimal benefit, as + // costing checks would disallow most instances. + if (op1IsCondChain && op2IsCondChain) + { + return false; + } + + // Specifically for Arm64, avoid cases where optimizations in lowering will produce better + // code than optimizing here. Specificially: + // * cmp(and(...), 0) will be turned into a TEST_ opcode. + // * Compares against zero will be optimized with cbz. + if (op1IsTestCond || op2IsTestCond) + { + return false; + } + + // Combining conditions means that all conditions are always fully evaluated. + // Put a limit on the max size that can be combined. + if (!m_comp->compStressCompile(Compiler::STRESS_OPT_BOOLS_COMPARE_CHAIN_COST, 25)) + { + int op1Cost = cond1->GetCostEx(); + int op2Cost = cond2->GetCostEx(); + int maxOp1Cost = op1IsCondChain ? 35 : 7; + int maxOp2Cost = op2IsCondChain ? 35 : 7; + + // Cost to allow for chain size of three. + if (op1Cost > maxOp1Cost || op2Cost > maxOp2Cost) + { + JITDUMP("Skipping CompareChainCond that will evaluate conditions unconditionally at costs %d,%d\n", op1Cost, + op2Cost); + return false; + } + } + + GenTree* testcondition = nullptr; + + // If a previous optimize bools happened for op2, then reuse the test condition. + // Cannot reuse for op1, as the condition needs reversing. + if (op2IsCondChain) + { + testcondition = cond2; + cond2 = cond2->gtGetOp1(); + } + + // Remove the first JTRUE statement. + constexpr bool isUnlink = true; + m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink)); + + // Invert the first condition. + GenTree* revCond = m_comp->gtReverseCond(cond1); + assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node. + + // AND the two conditions together + GenTree* andconds = m_comp->gtNewOperNode(GT_AND, TYP_INT, cond1, cond2); + andconds->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); + andconds->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); + cond1->gtFlags &= ~GTF_RELOP_JMP_USED; + cond2->gtFlags &= ~GTF_RELOP_JMP_USED; + andconds->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + + // Add a test condition onto the front of the AND (or resuse an exisiting one). + if (op2IsCondChain) + { + testcondition->AsOp()->gtOp1 = andconds; + testcondition->AsOp()->gtFlags |= (andconds->gtFlags & GTF_ALL_EFFECT); + } + else + { + testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, andconds, m_comp->gtNewZeroConNode(TYP_INT)); + } + + // Wire the chain into the second block + m_testInfo2.testTree->AsOp()->gtOp1 = testcondition; + m_testInfo2.testTree->AsOp()->gtFlags |= (testcondition->gtFlags & GTF_ALL_EFFECT); + m_comp->gtSetEvalOrder(m_testInfo2.testTree); + m_comp->fgSetStmtSeq(s2); + + // Update the flow. + m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1); + m_b1->bbJumpKind = BBJ_NONE; + + // Fixup flags. + m_b2->bbFlags |= (m_b1->bbFlags & BBF_COPY_PROPAGATE); + + // Join the two blocks. This is done now to ensure that additional conditions can be chained. + if (m_comp->fgCanCompactBlocks(m_b1, m_b2)) + { + m_comp->fgCompactBlocks(m_b1, m_b2); + } + +#ifdef DEBUG + if (m_comp->verbose) + { + printf("\nCombined conditions " FMT_BB " and " FMT_BB " into AND chain :\n", m_b1->bbNum, m_b2->bbNum); + m_comp->fgDumpBlock(m_b1); + printf("\n"); + } +#endif + + return true; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsChkBlkCond: Checks block conditions if it can be boolean optimized +// +// Return: +// If all conditions pass, returns the last statement of m_b1, else return nullptr. +// +// Notes: +// This method checks if the second (and third block for cond/return/return case) contains only one statement, +// and checks if tree operators are of the right type, e.g, GT_JTRUE, GT_RETURN. +// +// On entry, m_b1, m_b2 are set and m_b3 is set for cond/return/return case. +// If it passes all the conditions, m_testInfo1.testTree, m_testInfo2.testTree and m_t3 are set +// to the root nodes of m_b1, m_b2 and m_b3 each. +// SameTarget is also updated to true if m_b1 and m_b2 jump to the same destination. +// +Statement* OptBoolsDsc::optOptimizeBoolsChkBlkCond() +{ + assert(m_b1 != nullptr && m_b2 != nullptr); + + bool optReturnBlock = false; + if (m_b3 != nullptr) + { + optReturnBlock = true; + } + + // Find the block conditions of m_b1 and m_b2 + + if (m_b2->countOfInEdges() > 1 || (optReturnBlock && m_b3->countOfInEdges() > 1)) + { + return nullptr; + } + + // Find the condition for the first block + + Statement* s1 = m_b1->lastStmt(); + + GenTree* testTree1 = s1->GetRootNode(); + assert(testTree1->gtOper == GT_JTRUE); + + // The second and the third block must contain a single statement + + Statement* s2 = m_b2->firstStmt(); + if (s2->GetPrevStmt() != s2) + { + return nullptr; + } + + GenTree* testTree2 = s2->GetRootNode(); + + if (!optReturnBlock) + { + assert(testTree2->gtOper == GT_JTRUE); + } + else + { + if (testTree2->gtOper != GT_RETURN) + { + return nullptr; + } + + Statement* s3 = m_b3->firstStmt(); + if (s3->GetPrevStmt() != s3) + { + return nullptr; + } + + GenTree* testTree3 = s3->GetRootNode(); + if (testTree3->gtOper != GT_RETURN) + { + return nullptr; + } + + if (!varTypeIsIntegral(testTree2->TypeGet()) || !varTypeIsIntegral(testTree3->TypeGet())) + { + return nullptr; + } + + // The third block is Return with "CNS_INT int 0/1" + if (testTree3->AsOp()->gtOp1->gtOper != GT_CNS_INT) + { + return nullptr; + } + + if (testTree3->AsOp()->gtOp1->gtType != TYP_INT) + { + return nullptr; + } + + m_t3 = testTree3; + } + + m_testInfo1.testStmt = s1; + m_testInfo1.testTree = testTree1; + m_testInfo2.testStmt = s2; + m_testInfo2.testTree = testTree2; + + return s1; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsChkTypeCostCond: Checks if type conditions meet the folding condition, and +// if cost to fold is not too expensive +// +// Return: +// True if it meets type conditions and cost conditions. Else false. +// +bool OptBoolsDsc::optOptimizeBoolsChkTypeCostCond() +{ + assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo1.compTree->AsOp()->gtOp1 == m_c1); + assert(m_testInfo2.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo2.compTree->AsOp()->gtOp1 == m_c2); + + // + // Leave out floats where the bit-representation is more complicated + // - there are two representations for 0. + // + if (varTypeIsFloating(m_c1->TypeGet()) || varTypeIsFloating(m_c2->TypeGet())) + { + return false; + } + + // Make sure the types involved are of the same sizes + if (genTypeSize(m_c1->TypeGet()) != genTypeSize(m_c2->TypeGet())) + { + return false; + } + if (genTypeSize(m_testInfo1.compTree->TypeGet()) != genTypeSize(m_testInfo2.compTree->TypeGet())) + { + return false; + } +#ifdef TARGET_ARMARCH + // Skip the small operand which we cannot encode. + if (varTypeIsSmall(m_c1->TypeGet())) + return false; +#endif + // The second condition must not contain side effects + // + if (m_c2->gtFlags & GTF_GLOB_EFFECT) + { + return false; + } + + // The second condition must not be too expensive + // + if (m_c2->GetCostEx() > 12) + { + return false; + } + + return true; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsUpdateTrees: Fold the trees based on fold type and comparison type, +// update the edges, unlink removed blocks and update loop table +// +void OptBoolsDsc::optOptimizeBoolsUpdateTrees() +{ + assert(m_b1 != nullptr && m_b2 != nullptr); + + bool optReturnBlock = false; + if (m_b3 != nullptr) + { + optReturnBlock = true; + } + + assert(m_foldOp != NULL && m_foldType != NULL && m_c1 != nullptr && m_c2 != nullptr); + + GenTree* cmpOp1 = m_comp->gtNewOperNode(m_foldOp, m_foldType, m_c1, m_c2); + if (m_testInfo1.isBool && m_testInfo2.isBool) + { + // When we 'OR'/'AND' two booleans, the result is boolean as well + cmpOp1->gtFlags |= GTF_BOOLEAN; + } + + GenTree* t1Comp = m_testInfo1.compTree; + t1Comp->SetOper(m_cmpOp); + t1Comp->AsOp()->gtOp1 = cmpOp1; + t1Comp->AsOp()->gtOp2->gtType = m_foldType; // Could have been varTypeIsGC() + if (optReturnBlock) + { + // Update tree when m_b1 is BBJ_COND and m_b2 and m_b3 are GT_RETURN (BBJ_RETURN) + t1Comp->AsOp()->gtOp2->AsIntCon()->gtIconVal = 0; + m_testInfo1.testTree->gtOper = GT_RETURN; + m_testInfo1.testTree->gtType = m_testInfo2.testTree->gtType; + + // Update the return count of flow graph + assert(m_comp->fgReturnCount >= 2); + --m_comp->fgReturnCount; + } + +#if FEATURE_SET_FLAGS + // For comparisons against zero we will have the GTF_SET_FLAGS set + // and this can cause an assert to fire in fgMoveOpsLeft(GenTree* tree) + // during the CSE phase. + // + // So make sure to clear any GTF_SET_FLAGS bit on these operations + // as they are no longer feeding directly into a comparisons against zero + + // Make sure that the GTF_SET_FLAGS bit is cleared. + // Fix 388436 ARM JitStress WP7 + m_c1->gtFlags &= ~GTF_SET_FLAGS; + m_c2->gtFlags &= ~GTF_SET_FLAGS; + + // The new top level node that we just created does feed directly into + // a comparison against zero, so set the GTF_SET_FLAGS bit so that + // we generate an instruction that sets the flags, which allows us + // to omit the cmp with zero instruction. + + // Request that the codegen for cmpOp1 sets the condition flags + // when it generates the code for cmpOp1. + // + cmpOp1->gtRequestSetFlags(); +#endif + + // Recost/rethread the tree if necessary + // + if (m_comp->fgNodeThreading != NodeThreading::None) + { + m_comp->gtSetStmtInfo(m_testInfo1.testStmt); + m_comp->fgSetStmtSeq(m_testInfo1.testStmt); + } + + if (!optReturnBlock) + { + // Update edges if m_b1: BBJ_COND and m_b2: BBJ_COND + + FlowEdge* edge1 = m_comp->fgGetPredForBlock(m_b1->bbJumpDest, m_b1); + FlowEdge* edge2; + + if (m_sameTarget) + { + edge2 = m_comp->fgGetPredForBlock(m_b2->bbJumpDest, m_b2); + } + else + { + edge2 = m_comp->fgGetPredForBlock(m_b2->bbNext, m_b2); + + m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1); + + m_b1->bbJumpDest = m_b2->bbJumpDest; + + m_comp->fgAddRefPred(m_b2->bbJumpDest, m_b1); + } + + assert(edge1 != nullptr); + assert(edge2 != nullptr); + + weight_t edgeSumMin = edge1->edgeWeightMin() + edge2->edgeWeightMin(); + weight_t edgeSumMax = edge1->edgeWeightMax() + edge2->edgeWeightMax(); + if ((edgeSumMax >= edge1->edgeWeightMax()) && (edgeSumMax >= edge2->edgeWeightMax())) + { + edge1->setEdgeWeights(edgeSumMin, edgeSumMax, m_b1->bbJumpDest); + } + else + { + edge1->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, m_b1->bbJumpDest); + } + } + + /* Modify the target of the conditional jump and update bbRefs and bbPreds */ + + if (optReturnBlock) + { + m_b1->bbJumpDest = nullptr; + m_b1->bbJumpKind = BBJ_RETURN; +#ifdef DEBUG + m_b1->bbJumpSwt = m_b2->bbJumpSwt; +#endif + assert(m_b2->bbJumpKind == BBJ_RETURN); + assert(m_b1->bbNext == m_b2); + assert(m_b3 != nullptr); + } + else + { + assert(m_b1->bbJumpKind == BBJ_COND); + assert(m_b2->bbJumpKind == BBJ_COND); + assert(m_b1->bbJumpDest == m_b2->bbJumpDest); + assert(m_b1->bbNext == m_b2); + assert(m_b2->bbNext != nullptr); + } + + if (!optReturnBlock) + { + // Update bbRefs and bbPreds + // + // Replace pred 'm_b2' for 'm_b2->bbNext' with 'm_b1' + // Remove pred 'm_b2' for 'm_b2->bbJumpDest' + m_comp->fgReplacePred(m_b2->bbNext, m_b2, m_b1); + m_comp->fgRemoveRefPred(m_b2->bbJumpDest, m_b2); + } + + // Get rid of the second block + + m_comp->fgUnlinkBlock(m_b2); + m_b2->bbFlags |= BBF_REMOVED; + // If m_b2 was the last block of a try or handler, update the EH table. + m_comp->ehUpdateForDeletedBlock(m_b2); + + if (optReturnBlock) + { + // Get rid of the third block + m_comp->fgUnlinkBlock(m_b3); + m_b3->bbFlags |= BBF_REMOVED; + // If m_b3 was the last block of a try or handler, update the EH table. + m_comp->ehUpdateForDeletedBlock(m_b3); + } + + // Update loop table + m_comp->fgUpdateLoopsAfterCompacting(m_b1, m_b2); + if (optReturnBlock) + { + m_comp->fgUpdateLoopsAfterCompacting(m_b1, m_b3); + } + + // Update IL range of first block + m_b1->bbCodeOffsEnd = optReturnBlock ? m_b3->bbCodeOffsEnd : m_b2->bbCodeOffsEnd; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsReturnBlock: Optimize boolean when m_b1 is BBJ_COND and m_b2 and m_b3 are BBJ_RETURN +// +// Arguments: +// b3: Pointer to basic block b3 +// +// Returns: +// true if boolean optimization is done and m_b1, m_b2 and m_b3 are folded into m_b1, else false. +// +// Notes: +// m_b1, m_b2 and m_b3 of OptBoolsDsc are set on entry. +// +// if B1.bbJumpDest == b3, it transforms +// B1 : brtrue(t1, B3) +// B2 : ret(t2) +// B3 : ret(0) +// to +// B1 : ret((!t1) && t2) +// +// For example, (x==0 && y==0) generates: +// B1: GT_JTRUE (BBJ_COND), jumps to B3 +// B2: GT_RETURN (BBJ_RETURN) +// B3: GT_RETURN (BBJ_RETURN), +// and it is folded into +// B1: GT_RETURN (BBJ_RETURN) +// +bool OptBoolsDsc::optOptimizeBoolsReturnBlock(BasicBlock* b3) +{ + assert(m_b1 != nullptr && m_b2 != nullptr); + + // m_b3 is set for cond/return/return case + m_b3 = b3; + + m_sameTarget = false; + Statement* const s1 = optOptimizeBoolsChkBlkCond(); + if (s1 == nullptr) + { + return false; + } + + // Find the branch conditions of m_b1 and m_b2 + + m_c1 = optIsBoolComp(&m_testInfo1); + if (m_c1 == nullptr) + { + return false; + } + + m_c2 = optIsBoolComp(&m_testInfo2); + if (m_c2 == nullptr) + { + return false; + } + + // Find the type and cost conditions of m_testInfo1 and m_testInfo2 + + if (!optOptimizeBoolsChkTypeCostCond()) + { + return false; + } + + // Get the fold operator (m_foldOp, e.g., GT_OR/GT_AND) and + // the comparison operator (m_cmpOp, e.g., GT_EQ/GT_NE/GT_GE/GT_LT) + + var_types foldType = m_c1->TypeGet(); + if (varTypeIsGC(foldType)) + { + foldType = TYP_I_IMPL; + } + m_foldType = foldType; + + m_foldOp = GT_NONE; + m_cmpOp = GT_NONE; + + genTreeOps foldOp; + genTreeOps cmpOp; + + ssize_t it1val = m_testInfo1.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; + ssize_t it2val = m_testInfo2.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; + ssize_t it3val = m_t3->AsOp()->gtOp1->AsIntCon()->gtIconVal; + + if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) && + (it1val == 0 && it2val == 0 && it3val == 0)) + { + // Case: x == 0 && y == 0 + // t1:c1!=0 t2:c2==0 t3:c3==0 + // ==> true if (c1|c2)==0 + foldOp = GT_OR; + cmpOp = GT_EQ; + } + else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) && + (it1val == 0 && it2val == 0 && it3val == 0)) + { + // Case: x == 1 && y ==1 + // t1:c1!=1 t2:c2==1 t3:c3==0 is reversed from optIsBoolComp() to: t1:c1==0 t2:c2!=0 t3:c3==0 + // ==> true if (c1&c2)!=0 + foldOp = GT_AND; + cmpOp = GT_NE; + } + else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) && + (it1val == 0 && it2val == 0 && it3val == 0)) + { + // Case: x >= 0 && y >= 0 + // t1:c1<0 t2:c2>=0 t3:c3==0 + // ==> true if (c1|c2)>=0 + + foldOp = GT_OR; + cmpOp = GT_GE; + } + else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_EQ) && + (it1val == 0 && it2val == 0 && it3val == 1)) + { + // Case: x == 0 || y == 0 + // t1:c1==0 t2:c2==0 t3:c3==1 + // ==> true if (c1&c2)==0 + foldOp = GT_AND; + cmpOp = GT_EQ; + } + else if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_NE) && + (it1val == 0 && it2val == 0 && it3val == 1)) + { + // Case: x == 1 || y == 1 + // t1:c1==1 t2:c2==1 t3:c3==1 is reversed from optIsBoolComp() to: t1:c1!=0 t2:c2!=0 t3:c3==1 + // ==> true if (c1|c2)!=0 + foldOp = GT_OR; + cmpOp = GT_NE; + } + else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_LT) && + (it1val == 0 && it2val == 0 && it3val == 1)) + { + // Case: x < 0 || y < 0 + // t1:c1<0 t2:c2<0 t3:c3==1 + // ==> true if (c1|c2)<0 + + foldOp = GT_OR; + cmpOp = GT_LT; + } + else + { + // Require NOT operation for operand(s). Do Not fold. + return false; + } + + if ((foldOp == GT_AND || (cmpOp == GT_NE && foldOp != GT_OR)) && (!m_testInfo1.isBool || !m_testInfo2.isBool)) + { + // x == 1 && y == 1: Skip cases where x or y is greater than 1, e.g., x=3, y=1 + // x == 0 || y == 0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 + // x == 1 || y == 1: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 + return false; + } + + m_foldOp = foldOp; + m_cmpOp = cmpOp; + + // Now update the trees + + optOptimizeBoolsUpdateTrees(); + +#ifdef DEBUG + if (m_comp->verbose) + { + printf("Folded %sboolean conditions of " FMT_BB ", " FMT_BB " and " FMT_BB " to :\n", + m_c2->OperIsLeaf() ? "" : "non-leaf ", m_b1->bbNum, m_b2->bbNum, m_b3->bbNum); + m_comp->gtDispStmt(s1); + printf("\n"); + } +#endif + + // Return true to continue the bool optimization for the rest of the BB chain + return true; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsGcStress: Replace x==null with (x|x)==0 if x is a GC-type. +// This will stress code-gen and the emitter to make sure they support such trees. +// +#ifdef DEBUG + +void OptBoolsDsc::optOptimizeBoolsGcStress() +{ + if (!m_comp->compStressCompile(m_comp->STRESS_OPT_BOOLS_GC, 20)) + { + return; + } + + assert(m_b1->bbJumpKind == BBJ_COND); + Statement* const stmt = m_b1->lastStmt(); + GenTree* const cond = stmt->GetRootNode(); + + assert(cond->gtOper == GT_JTRUE); + + OptTestInfo test; + test.testStmt = stmt; + test.testTree = cond; + + GenTree* comparand = optIsBoolComp(&test); + + if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet())) + { + return; + } + GenTree* relop = test.compTree; + bool isBool = test.isBool; + + if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF)) + { + return; + } + + GenTree* comparandClone = m_comp->gtCloneExpr(comparand); + + noway_assert(relop->AsOp()->gtOp1 == comparand); + genTreeOps oper = m_comp->compStressCompile(m_comp->STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND; + relop->AsOp()->gtOp1 = m_comp->gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone); + + // Comparand type is already checked, and we have const int, there is no harm + // morphing it into a TYP_I_IMPL. + noway_assert(relop->AsOp()->gtOp2->gtOper == GT_CNS_INT); + relop->AsOp()->gtOp2->gtType = TYP_I_IMPL; + + // Recost/rethread the tree if necessary + // + if (m_comp->fgNodeThreading != NodeThreading::None) + { + m_comp->gtSetStmtInfo(test.testStmt); + m_comp->fgSetStmtSeq(test.testStmt); + } +} + +#endif + +//----------------------------------------------------------------------------- +// optIsBoolComp: Function used by folding of boolean conditionals +// +// Arguments: +// pOptTest The test info for the test tree +// +// Return: +// On success, return the first operand (gtOp1) of compTree, else return nullptr. +// +// Notes: +// On entry, testTree is set. +// On success, compTree is set to the compare node (i.e. GT_EQ or GT_NE or GT_LT or GT_GE) of the testTree. +// isBool is set to true if the comparand (i.e., operand 1 of compTree is boolean. Otherwise, false. +// +// Given a GT_JTRUE or GT_RETURN node, this method checks if it is a boolean comparison +// of the form "if (boolVal ==/!=/>=/< 0/1)".This is translated into +// a GT_EQ/GT_NE/GT_GE/GT_LT node with "opr1" being a boolean lclVar and "opr2" the const 0/1. +// +// When isBool == true, if the comparison was against a 1 (i.e true) +// then we morph the tree by reversing the GT_EQ/GT_NE/GT_GE/GT_LT and change the 1 to 0. +// +GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) +{ + pOptTest->isBool = false; + + assert(pOptTest->testTree->gtOper == GT_JTRUE || pOptTest->testTree->gtOper == GT_RETURN); + GenTree* cond = pOptTest->testTree->AsOp()->gtOp1; + + // The condition must be "!= 0" or "== 0" or >=0 or <0 + // we don't optimize unsigned < and >= operations + if (!cond->OperIs(GT_EQ, GT_NE) && (!cond->OperIs(GT_LT, GT_GE) || cond->IsUnsigned())) + { + return nullptr; + } + + // Return the compare node to the caller + + pOptTest->compTree = cond; + + // Get hold of the comparands + + GenTree* opr1 = cond->AsOp()->gtOp1; + GenTree* opr2 = cond->AsOp()->gtOp2; + + if (opr2->gtOper != GT_CNS_INT) + { + return nullptr; + } + + if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1)) + { + return nullptr; + } + + ssize_t ival2 = opr2->AsIntCon()->gtIconVal; + + // Is the value a boolean? + // We can either have a boolean expression (marked GTF_BOOLEAN) or + // a local variable that is marked as being boolean (lvIsBoolean) + + if (opr1->gtFlags & GTF_BOOLEAN) + { + pOptTest->isBool = true; + } + else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1))) + { + pOptTest->isBool = true; + } + else if (opr1->gtOper == GT_LCL_VAR) + { + // is it a boolean local variable? + + unsigned lclNum = opr1->AsLclVarCommon()->GetLclNum(); + noway_assert(lclNum < m_comp->lvaCount); + + if (m_comp->lvaTable[lclNum].lvIsBoolean) + { + pOptTest->isBool = true; + } + } + + // Was our comparison against the constant 1 (i.e. true) + if (ival2 == 1) + { + // If this is a boolean expression tree we can reverse the relop + // and change the true to false. + if (pOptTest->isBool) + { + m_comp->gtReverseCond(cond); + opr2->AsIntCon()->gtIconVal = 0; + } + else + { + return nullptr; + } + } + + return opr1; +} + +//----------------------------------------------------------------------------- +// optOptimizeBools: Folds boolean conditionals for GT_JTRUE/GT_RETURN nodes +// +// Returns: +// suitable phase status +// +// Notes: +// If the operand of GT_JTRUE/GT_RETURN node is GT_EQ/GT_NE/GT_GE/GT_LT of the form +// "if (boolVal ==/!=/>=/< 0/1)", the GT_EQ/GT_NE/GT_GE/GT_LT nodes are translated into a +// GT_EQ/GT_NE/GT_GE/GT_LT node with +// "op1" being a boolean GT_OR/GT_AND lclVar and +// "op2" the const 0/1. +// For example, the folded tree for the below boolean optimization is shown below: +// Case 1: (x == 0 && y ==0) => (x | y) == 0 +// * RETURN int +// \--* EQ int +// +--* OR int +// | +--* LCL_VAR int V00 arg0 +// | \--* LCL_VAR int V01 arg1 +// \--* CNS_INT int 0 +// +// Case 2: (x == null && y == null) ==> (x | y) == 0 +// * RETURN int +// \-- * EQ int +// + -- * OR long +// | +-- * LCL_VAR ref V00 arg0 +// | \-- * LCL_VAR ref V01 arg1 +// \-- * CNS_INT long 0 +// +// Case 3: (x == 0 && y == 0 && z == 0) ==> ((x | y) | z) == 0 +// * RETURN int +// \-- * EQ int +// + -- * OR int +// | +-- * OR int +// | | +-- * LCL_VAR int V00 arg0 +// | | \-- * LCL_VAR int V01 arg1 +// | \-- * LCL_VAR int V02 arg2 +// \-- * CNS_INT int 0 +// +// Case 4: (x == 0 && y == 0 && z == 0 && w == 0) ==> (((x | y) | z) | w) == 0 +// * RETURN int +// \-- * EQ int +// + * OR int +// | +--* OR int +// | | +--* OR int +// | | | +--* LCL_VAR int V00 arg0 +// | | | \--* LCL_VAR int V01 arg1 +// | | \--* LCL_VAR int V02 arg2 +// | \--* LCL_VAR int V03 arg3 +// \--* CNS_INT int 0 +// +// Case 5: (x != 0 && y != 0) => (x | y) != 0 +// * RETURN int +// \--* NE int +// +--* OR int +// | +--* LCL_VAR int V00 arg0 +// | \--* LCL_VAR int V01 arg1 +// \--* CNS_INT int 0 +// +// Case 6: (x >= 0 && y >= 0) => (x | y) >= 0 +// * RETURN int +// \--* GE int +// +--* OR int +// | +--* LCL_VAR int V00 arg0 +// | \--* LCL_VAR int V01 arg1 +// \--* CNS_INT int 0 +// +// Case 7: (x < 0 || y < 0) => (x & y) < 0 +// * RETURN int +// \--* LT int +// +--* AND int +// | +--* LCL_VAR int V00 arg0 +// | \--* LCL_VAR int V01 arg1 +// \--* CNS_INT int 0 +// +// Patterns that are not optimized include (x == 1 && y == 1), (x == 1 || y == 1), +// (x == 0 || y == 0) because currently their comptree is not marked as boolean expression. +// When m_foldOp == GT_AND or m_cmpOp == GT_NE, both compTrees must be boolean expression +// in order to skip below cases when compTree is not boolean expression: +// - x == 1 && y == 1 ==> (x&y)!=0: Skip cases where x or y is greater than 1, e.g., x=3, y=1 +// - x == 1 || y == 1 ==> (x|y)!=0: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 +// - x == 0 || y == 0 ==> (x&y)==0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 +// +PhaseStatus Compiler::optOptimizeBools() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In optOptimizeBools()\n"); + } +#endif + bool change = false; + unsigned numCond = 0; + unsigned numReturn = 0; + unsigned numPasses = 0; + unsigned stress = false; + + do + { + numPasses++; + change = false; + + // Reverse iterate through the blocks. + for (BasicBlock* b1 = fgLastBB; b1 != nullptr; b1 = b1->bbPrev) + { + // We're only interested in conditional jumps here + + if (b1->bbJumpKind != BBJ_COND) + { + continue; + } + + // If there is no next block, we're done + + BasicBlock* b2 = b1->bbNext; + if (b2 == nullptr) + { + break; + } + + // The next block must not be marked as BBF_DONT_REMOVE + if (b2->bbFlags & BBF_DONT_REMOVE) + { + continue; + } + + OptBoolsDsc optBoolsDsc(b1, b2, this); + + // The next block needs to be a condition or return block. + + if (b2->bbJumpKind == BBJ_COND) + { + if ((b1->bbJumpDest != b2->bbJumpDest) && (b1->bbJumpDest != b2->bbNext)) + { + continue; + } + + // When it is conditional jumps + + if (optBoolsDsc.optOptimizeBoolsCondBlock()) + { + change = true; + numCond++; + } +#ifdef TARGET_ARM64 + else if (optBoolsDsc.optOptimizeCompareChainCondBlock()) + { + change = true; + numCond++; + } +#endif + } + else if (b2->bbJumpKind == BBJ_RETURN) + { + // Set b3 to b1 jump destination + BasicBlock* b3 = b1->bbJumpDest; + + // b3 must not be marked as BBF_DONT_REMOVE + + if (b3->bbFlags & BBF_DONT_REMOVE) + { + continue; + } + + // b3 must be RETURN type + + if (b3->bbJumpKind != BBJ_RETURN) + { + continue; + } + + if (optBoolsDsc.optOptimizeBoolsReturnBlock(b3)) + { + change = true; + numReturn++; + } + } + else + { +#ifdef DEBUG + optBoolsDsc.optOptimizeBoolsGcStress(); + stress = true; +#endif + } + } + } while (change); + + JITDUMP("\noptimized %u BBJ_COND cases, %u BBJ_RETURN cases in %u passes\n", numCond, numReturn, numPasses); + + const bool modified = stress || ((numCond + numReturn) > 0); + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; +} diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 694438f13e65d1..ea5481529ce467 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9052,1407 +9052,6 @@ ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEB return scale; } -//----------------------------------------------------------------------------- -// OptTestInfo: Member of OptBoolsDsc struct used to test if a GT_JTRUE or GT_RETURN node -// is a boolean comparison -// -struct OptTestInfo -{ - Statement* testStmt; // Last statement of the basic block - GenTree* testTree; // The root node of the testStmt (GT_JTRUE or GT_RETURN). - GenTree* compTree; // The compare node (i.e. GT_EQ or GT_NE node) of the testTree - bool isBool; // If the compTree is boolean expression -}; - -//----------------------------------------------------------------------------- -// OptBoolsDsc: Descriptor used for Boolean Optimization -// -class OptBoolsDsc -{ -public: - OptBoolsDsc(BasicBlock* b1, BasicBlock* b2, Compiler* comp) - { - m_b1 = b1; - m_b2 = b2; - m_b3 = nullptr; - m_comp = comp; - } - -private: - BasicBlock* m_b1; // The first basic block with the BBJ_COND conditional jump type - BasicBlock* m_b2; // The next basic block of m_b1. Either BBJ_COND or BBJ_RETURN type - BasicBlock* m_b3; // m_b1->bbJumpDest. Null if m_b2 is not a return block. - - Compiler* m_comp; // The pointer to the Compiler instance - - OptTestInfo m_testInfo1; // The first test info - OptTestInfo m_testInfo2; // The second test info - GenTree* m_t3; // The root node of the first statement of m_b3 - - GenTree* m_c1; // The first operand of m_testInfo1.compTree - GenTree* m_c2; // The first operand of m_testInfo2.compTree - - bool m_sameTarget; // if m_b1 and m_b2 jumps to the same destination - - genTreeOps m_foldOp; // The fold operator (e.g., GT_AND or GT_OR) - var_types m_foldType; // The type of the folded tree - genTreeOps m_cmpOp; // The comparison operator (e.g., GT_EQ or GT_NE) - -public: - bool optOptimizeBoolsCondBlock(); - bool optOptimizeCompareChainCondBlock(); - bool optOptimizeBoolsReturnBlock(BasicBlock* b3); -#ifdef DEBUG - void optOptimizeBoolsGcStress(); -#endif - -private: - Statement* optOptimizeBoolsChkBlkCond(); - GenTree* optIsBoolComp(OptTestInfo* pOptTest); - bool optOptimizeBoolsChkTypeCostCond(); - void optOptimizeBoolsUpdateTrees(); - inline bool FindCompareChain(GenTree* condition, bool* isTestCondition); -}; - -//----------------------------------------------------------------------------- -// optOptimizeBoolsCondBlock: Optimize boolean when bbJumpKind of both m_b1 and m_b2 are BBJ_COND -// -// Returns: -// true if boolean optimization is done and m_b1 and m_b2 are folded into m_b1, else false. -// -// Notes: -// m_b1 and m_b2 are set on entry. -// -// Case 1: if b1.bbJumpDest == b2.bbJumpDest, it transforms -// B1 : brtrue(t1, Bx) -// B2 : brtrue(t2, Bx) -// B3 : -// to -// B1 : brtrue(t1|t2, BX) -// B3 : -// -// For example, (x == 0 && y == 0 && z == 0) generates -// B1: GT_JTRUE (BBJ_COND), jump to B4 -// B2: GT_JTRUE (BBJ_COND), jump to B4 -// B3: GT_RETURN (BBJ_RETURN) -// B4: GT_RETURN (BBJ_RETURN) -// and B1 and B2 are folded into B1: -// B1: GT_JTRUE (BBJ_COND), jump to B4 -// B3: GT_RETURN (BBJ_RETURN) -// B4: GT_RETURN (BBJ_RETURN) -// -// Case 2: if B1.bbJumpDest == B2->bbNext, it transforms -// B1 : brtrue(t1, B3) -// B2 : brtrue(t2, Bx) -// B3 : -// to -// B1 : brtrue((!t1) && t2, Bx) -// B3 : -// -bool OptBoolsDsc::optOptimizeBoolsCondBlock() -{ - assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); - - // Check if m_b1 and m_b2 jump to the same target and get back pointers to m_testInfo1 and t2 tree nodes - - m_t3 = nullptr; - - // Check if m_b1 and m_b2 have the same bbJumpDest - - if (m_b1->bbJumpDest == m_b2->bbJumpDest) - { - // Given the following sequence of blocks : - // B1: brtrue(t1, BX) - // B2: brtrue(t2, BX) - // B3: - // we will try to fold it to : - // B1: brtrue(t1|t2, BX) - // B3: - - m_sameTarget = true; - } - else if (m_b1->bbJumpDest == m_b2->bbNext) - { - // Given the following sequence of blocks : - // B1: brtrue(t1, B3) - // B2: brtrue(t2, BX) - // B3: - // we will try to fold it to : - // B1: brtrue((!t1)&&t2, BX) - // B3: - - m_sameTarget = false; - } - else - { - return false; - } - - Statement* const s1 = optOptimizeBoolsChkBlkCond(); - if (s1 == nullptr) - { - return false; - } - - // Find the branch conditions of m_b1 and m_b2 - - m_c1 = optIsBoolComp(&m_testInfo1); - if (m_c1 == nullptr) - { - return false; - } - - m_c2 = optIsBoolComp(&m_testInfo2); - if (m_c2 == nullptr) - { - return false; - } - - // Find the type and cost conditions of m_testInfo1 and m_testInfo2 - - if (!optOptimizeBoolsChkTypeCostCond()) - { - return false; - } - - // Get the fold operator and the comparison operator - - genTreeOps foldOp; - genTreeOps cmpOp; - var_types foldType = m_c1->TypeGet(); - if (varTypeIsGC(foldType)) - { - foldType = TYP_I_IMPL; - } - - assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE)); - - if (m_sameTarget) - { - // Both conditions must be the same - - if (m_testInfo1.compTree->gtOper != m_testInfo2.compTree->gtOper) - { - return false; - } - - if (m_testInfo1.compTree->gtOper == GT_EQ) - { - // t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0 - // So we will branch to BX if (c1&c2)==0 - - foldOp = GT_AND; - cmpOp = GT_EQ; - } - else if (m_testInfo1.compTree->gtOper == GT_LT) - { - // t1:c1<0 t2:c2<0 ==> Branch to BX if either value < 0 - // So we will branch to BX if (c1|c2)<0 - - foldOp = GT_OR; - cmpOp = GT_LT; - } - else if (m_testInfo1.compTree->gtOper == GT_GE) - { - return false; - } - else - { - // t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0 - // So we will branch to BX if (c1|c2)!=0 - - foldOp = GT_OR; - cmpOp = GT_NE; - } - } - else - { - if (m_testInfo1.compTree->gtOper == m_testInfo2.compTree->gtOper) - { - return false; - } - - if (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) - { - // t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0 - // So we will branch to BX if (c1&c2)!=0 - - foldOp = GT_AND; - cmpOp = GT_NE; - } - else if (m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) - { - // t1:c1<0 t2:c2>=0 ==> Branch to BX if both values >= 0 - // So we will branch to BX if (c1|c2)>=0 - - foldOp = GT_OR; - cmpOp = GT_GE; - } - else if (m_testInfo1.compTree->gtOper == GT_GE) - { - return false; - } - else if (m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) - { - // t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0 - // So we will branch to BX if (c1|c2)==0 - - foldOp = GT_OR; - cmpOp = GT_EQ; - } - else - { - return false; - } - } - - // Anding requires both values to be 0 or 1 - - if ((foldOp == GT_AND) && (!m_testInfo1.isBool || !m_testInfo2.isBool)) - { - return false; - } - - // - // Now update the trees - // - - m_foldOp = foldOp; - m_foldType = foldType; - m_cmpOp = cmpOp; - - optOptimizeBoolsUpdateTrees(); - -#ifdef DEBUG - if (m_comp->verbose) - { - printf("Folded %sboolean conditions of " FMT_BB " and " FMT_BB " to :\n", m_c2->OperIsLeaf() ? "" : "non-leaf ", - m_b1->bbNum, m_b2->bbNum); - m_comp->gtDispStmt(s1); - printf("\n"); - } -#endif - - // Return true to continue the bool optimization for the rest of the BB chain - return true; -} - -//----------------------------------------------------------------------------- -// FindCompareChain: Check if the given condition is a compare chain. -// -// Arguments: -// condition: Condition to check. -// isTestCondition: Returns true if condition is a EQ/NE(AND(...),0) but is not a compare chain. -// -// Returns: -// true if chain optimization is a compare chain. -// -// Assumptions: -// m_b1 and m_b2 are set on entry. -// - -inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondition) -{ - GenTree* condOp1 = condition->gtGetOp1(); - GenTree* condOp2 = condition->gtGetOp2(); - - *isTestCondition = false; - - if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst() && condOp2->AsIntCon()->IconValue() == 0 && - condOp1->OperIs(GT_AND)) - { - // Found a test condition. Does it contain a compare chain? - - // Only test that the second operand of AND ends with a compare operation, as this will be - // the condition the new link in the chain will connect with. - // We are allowing for the first operand of the not be a valid chain, as this would require - // a full recursive search through the children. - if (condOp1->gtGetOp2()->OperIsCmpCompare()) - { - return true; - } - - *isTestCondition = true; - } - - return false; -} - -//----------------------------------------------------------------------------- -// optOptimizeCompareChainCondBlock: Create AND chain when when both m_b1 and m_b2 are BBJ_COND. -// -// Returns: -// true if chain optimization is done and m_b1 and m_b2 are folded into m_b1, else false. -// -// Assumptions: -// m_b1 and m_b2 are set on entry. -// -// Notes: -// -// This aims to reduced the number of conditional jumps by joining cases when multiple -// conditions gate the execution of a block. For example: -// If ( a > b || c == d) { x = y; } -// Will become the following. Note that the second condition is inverted. -// -// ------------ BB01 -> BB03 (cond), succs={BB02,BB03} -// * JTRUE -// \--* GT a,b -// -// ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04} -// * JTRUE -// \--* NE c,d -// -// ------------ BB03, preds={BB01, BB02} succs={BB04} -// * ASG x,y -// -// These operands will be combined into a single AND in the first block (with the first -// condition inverted), wrapped by the test condition (NE(...,0)). -// -// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} -// * JTRUE -// \--* NE -// +--* AND -// | +--* LE a,b -// | \--* NE c,d -// \--* CNS_INT 0 -// -// ------------ BB03, preds={BB01} succs={BB04} -// * ASG x,y -// -// -// This will also work for statements with else cases: -// If ( a > b || c == d) { x = y; } else { x = z; } -// Here BB04 will contain the else ASG. Both BB04 and BB05 will unconditionally jump to BB05. -// -// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} -// * JTRUE -// \--* NE -// +--* AND -// | +--* LE a,b -// | \--* NE c,d -// \--* CNS_INT 0 -// -// ------------ BB03, preds={BB01} succs={BB05} -// * ASG x,y -// -// ------------ BB04, preds={BB01} succs={BB05} -// * ASG x,z -// -// -// Multiple conditions can be chained together. This is due to the optimization reverse -// iterating through the blocks. For example: -// If ( a > b || c == d || e < f ) { x = y; } -// The first pass will combine "c == d" and "e < f" into a chain. The second pass will then -// combine the "a > b" with the earlier chain. Where possible, the new condition is placed -// within the test condition (NE(...,0)). -// -// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} -// * JTRUE -// \--* NE -// +--* AND -// | +--* AND -// | | +--* NE c,d -// | | \--* GE e,f -// | \--* LT a,b -// \--* CNS_INT 0 -// -// ------------ BB03, preds={BB01} succs={BB04} -// * ASG x,y -// -// -// Conditions connected by && are not yet checked for. For example: -// If ( a > b && c == d ) { x = y; } -// -bool OptBoolsDsc::optOptimizeCompareChainCondBlock() -{ - assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); - m_t3 = nullptr; - - if (!(m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest)) - { - return false; - } - - Statement* const s1 = optOptimizeBoolsChkBlkCond(); - if (s1 == nullptr) - { - return false; - } - Statement* s2 = m_b2->firstStmt(); - - assert(m_testInfo1.testTree->OperIs(GT_JTRUE)); - GenTree* cond1 = m_testInfo1.testTree->gtGetOp1(); - assert(m_testInfo2.testTree->OperIs(GT_JTRUE)); - GenTree* cond2 = m_testInfo2.testTree->gtGetOp1(); - - // Ensure both conditions are suitable. - if (!cond1->OperIsCompare() || !cond2->OperIsCompare()) - { - return false; - } - - // Ensure there are no additional side effects. - if ((cond1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0 || - (cond2->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0) - { - return false; - } - - // Integer compares only for now (until support for Arm64 fccmp instruction is added) - if (varTypeIsFloating(cond1->gtGetOp1()->TypeGet()) || varTypeIsFloating(cond2->gtGetOp1()->TypeGet())) - { - return false; - } - - // Check for previously optimized compare chains. - bool op1IsTestCond; - bool op2IsTestCond; - bool op1IsCondChain = FindCompareChain(cond1, &op1IsTestCond); - bool op2IsCondChain = FindCompareChain(cond2, &op2IsTestCond); - // Don't support combining multiple chains. Allowing this would give minimal benefit, as - // costing checks would disallow most instances. - if (op1IsCondChain && op2IsCondChain) - { - return false; - } - - // Specifically for Arm64, avoid cases where optimizations in lowering will produce better - // code than optimizing here. Specificially: - // * cmp(and(...), 0) will be turned into a TEST_ opcode. - // * Compares against zero will be optimized with cbz. - if (op1IsTestCond || op2IsTestCond) - { - return false; - } - - // Combining conditions means that all conditions are always fully evaluated. - // Put a limit on the max size that can be combined. - if (!m_comp->compStressCompile(Compiler::STRESS_OPT_BOOLS_COMPARE_CHAIN_COST, 25)) - { - int op1Cost = cond1->GetCostEx(); - int op2Cost = cond2->GetCostEx(); - int maxOp1Cost = op1IsCondChain ? 35 : 7; - int maxOp2Cost = op2IsCondChain ? 35 : 7; - - // Cost to allow for chain size of three. - if (op1Cost > maxOp1Cost || op2Cost > maxOp2Cost) - { - JITDUMP("Skipping CompareChainCond that will evaluate conditions unconditionally at costs %d,%d\n", - op1Cost, op2Cost); - return false; - } - } - - GenTree* testcondition = nullptr; - - // If a previous optimize bools happened for op2, then reuse the test condition. - // Cannot reuse for op1, as the condition needs reversing. - if (op2IsCondChain) - { - testcondition = cond2; - cond2 = cond2->gtGetOp1(); - } - - // Remove the first JTRUE statement. - constexpr bool isUnlink = true; - m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink)); - - // Invert the first condition. - GenTree* revCond = m_comp->gtReverseCond(cond1); - assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node. - - // AND the two conditions together - GenTree* andconds = m_comp->gtNewOperNode(GT_AND, TYP_INT, cond1, cond2); - andconds->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); - andconds->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); - cond1->gtFlags &= ~GTF_RELOP_JMP_USED; - cond2->gtFlags &= ~GTF_RELOP_JMP_USED; - andconds->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); - - // Add a test condition onto the front of the AND (or resuse an exisiting one). - if (op2IsCondChain) - { - testcondition->AsOp()->gtOp1 = andconds; - testcondition->AsOp()->gtFlags |= (andconds->gtFlags & GTF_ALL_EFFECT); - } - else - { - testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, andconds, m_comp->gtNewZeroConNode(TYP_INT)); - } - - // Wire the chain into the second block - m_testInfo2.testTree->AsOp()->gtOp1 = testcondition; - m_testInfo2.testTree->AsOp()->gtFlags |= (testcondition->gtFlags & GTF_ALL_EFFECT); - m_comp->gtSetEvalOrder(m_testInfo2.testTree); - m_comp->fgSetStmtSeq(s2); - - // Update the flow. - m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1); - m_b1->bbJumpKind = BBJ_NONE; - - // Fixup flags. - m_b2->bbFlags |= (m_b1->bbFlags & BBF_COPY_PROPAGATE); - - // Join the two blocks. This is done now to ensure that additional conditions can be chained. - if (m_comp->fgCanCompactBlocks(m_b1, m_b2)) - { - m_comp->fgCompactBlocks(m_b1, m_b2); - } - -#ifdef DEBUG - if (m_comp->verbose) - { - printf("\nCombined conditions " FMT_BB " and " FMT_BB " into AND chain :\n", m_b1->bbNum, m_b2->bbNum); - m_comp->fgDumpBlock(m_b1); - printf("\n"); - } -#endif - - return true; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsChkBlkCond: Checks block conditions if it can be boolean optimized -// -// Return: -// If all conditions pass, returns the last statement of m_b1, else return nullptr. -// -// Notes: -// This method checks if the second (and third block for cond/return/return case) contains only one statement, -// and checks if tree operators are of the right type, e.g, GT_JTRUE, GT_RETURN. -// -// On entry, m_b1, m_b2 are set and m_b3 is set for cond/return/return case. -// If it passes all the conditions, m_testInfo1.testTree, m_testInfo2.testTree and m_t3 are set -// to the root nodes of m_b1, m_b2 and m_b3 each. -// SameTarget is also updated to true if m_b1 and m_b2 jump to the same destination. -// -Statement* OptBoolsDsc::optOptimizeBoolsChkBlkCond() -{ - assert(m_b1 != nullptr && m_b2 != nullptr); - - bool optReturnBlock = false; - if (m_b3 != nullptr) - { - optReturnBlock = true; - } - - // Find the block conditions of m_b1 and m_b2 - - if (m_b2->countOfInEdges() > 1 || (optReturnBlock && m_b3->countOfInEdges() > 1)) - { - return nullptr; - } - - // Find the condition for the first block - - Statement* s1 = m_b1->lastStmt(); - - GenTree* testTree1 = s1->GetRootNode(); - assert(testTree1->gtOper == GT_JTRUE); - - // The second and the third block must contain a single statement - - Statement* s2 = m_b2->firstStmt(); - if (s2->GetPrevStmt() != s2) - { - return nullptr; - } - - GenTree* testTree2 = s2->GetRootNode(); - - if (!optReturnBlock) - { - assert(testTree2->gtOper == GT_JTRUE); - } - else - { - if (testTree2->gtOper != GT_RETURN) - { - return nullptr; - } - - Statement* s3 = m_b3->firstStmt(); - if (s3->GetPrevStmt() != s3) - { - return nullptr; - } - - GenTree* testTree3 = s3->GetRootNode(); - if (testTree3->gtOper != GT_RETURN) - { - return nullptr; - } - - if (!varTypeIsIntegral(testTree2->TypeGet()) || !varTypeIsIntegral(testTree3->TypeGet())) - { - return nullptr; - } - - // The third block is Return with "CNS_INT int 0/1" - if (testTree3->AsOp()->gtOp1->gtOper != GT_CNS_INT) - { - return nullptr; - } - - if (testTree3->AsOp()->gtOp1->gtType != TYP_INT) - { - return nullptr; - } - - m_t3 = testTree3; - } - - m_testInfo1.testStmt = s1; - m_testInfo1.testTree = testTree1; - m_testInfo2.testStmt = s2; - m_testInfo2.testTree = testTree2; - - return s1; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsChkTypeCostCond: Checks if type conditions meet the folding condition, and -// if cost to fold is not too expensive -// -// Return: -// True if it meets type conditions and cost conditions. Else false. -// -bool OptBoolsDsc::optOptimizeBoolsChkTypeCostCond() -{ - assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo1.compTree->AsOp()->gtOp1 == m_c1); - assert(m_testInfo2.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo2.compTree->AsOp()->gtOp1 == m_c2); - - // - // Leave out floats where the bit-representation is more complicated - // - there are two representations for 0. - // - if (varTypeIsFloating(m_c1->TypeGet()) || varTypeIsFloating(m_c2->TypeGet())) - { - return false; - } - - // Make sure the types involved are of the same sizes - if (genTypeSize(m_c1->TypeGet()) != genTypeSize(m_c2->TypeGet())) - { - return false; - } - if (genTypeSize(m_testInfo1.compTree->TypeGet()) != genTypeSize(m_testInfo2.compTree->TypeGet())) - { - return false; - } -#ifdef TARGET_ARMARCH - // Skip the small operand which we cannot encode. - if (varTypeIsSmall(m_c1->TypeGet())) - return false; -#endif - // The second condition must not contain side effects - // - if (m_c2->gtFlags & GTF_GLOB_EFFECT) - { - return false; - } - - // The second condition must not be too expensive - // - if (m_c2->GetCostEx() > 12) - { - return false; - } - - return true; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsUpdateTrees: Fold the trees based on fold type and comparison type, -// update the edges, unlink removed blocks and update loop table -// -void OptBoolsDsc::optOptimizeBoolsUpdateTrees() -{ - assert(m_b1 != nullptr && m_b2 != nullptr); - - bool optReturnBlock = false; - if (m_b3 != nullptr) - { - optReturnBlock = true; - } - - assert(m_foldOp != NULL && m_foldType != NULL && m_c1 != nullptr && m_c2 != nullptr); - - GenTree* cmpOp1 = m_comp->gtNewOperNode(m_foldOp, m_foldType, m_c1, m_c2); - if (m_testInfo1.isBool && m_testInfo2.isBool) - { - // When we 'OR'/'AND' two booleans, the result is boolean as well - cmpOp1->gtFlags |= GTF_BOOLEAN; - } - - GenTree* t1Comp = m_testInfo1.compTree; - t1Comp->SetOper(m_cmpOp); - t1Comp->AsOp()->gtOp1 = cmpOp1; - t1Comp->AsOp()->gtOp2->gtType = m_foldType; // Could have been varTypeIsGC() - if (optReturnBlock) - { - // Update tree when m_b1 is BBJ_COND and m_b2 and m_b3 are GT_RETURN (BBJ_RETURN) - t1Comp->AsOp()->gtOp2->AsIntCon()->gtIconVal = 0; - m_testInfo1.testTree->gtOper = GT_RETURN; - m_testInfo1.testTree->gtType = m_testInfo2.testTree->gtType; - - // Update the return count of flow graph - assert(m_comp->fgReturnCount >= 2); - --m_comp->fgReturnCount; - } - -#if FEATURE_SET_FLAGS - // For comparisons against zero we will have the GTF_SET_FLAGS set - // and this can cause an assert to fire in fgMoveOpsLeft(GenTree* tree) - // during the CSE phase. - // - // So make sure to clear any GTF_SET_FLAGS bit on these operations - // as they are no longer feeding directly into a comparisons against zero - - // Make sure that the GTF_SET_FLAGS bit is cleared. - // Fix 388436 ARM JitStress WP7 - m_c1->gtFlags &= ~GTF_SET_FLAGS; - m_c2->gtFlags &= ~GTF_SET_FLAGS; - - // The new top level node that we just created does feed directly into - // a comparison against zero, so set the GTF_SET_FLAGS bit so that - // we generate an instruction that sets the flags, which allows us - // to omit the cmp with zero instruction. - - // Request that the codegen for cmpOp1 sets the condition flags - // when it generates the code for cmpOp1. - // - cmpOp1->gtRequestSetFlags(); -#endif - - // Recost/rethread the tree if necessary - // - if (m_comp->fgNodeThreading != NodeThreading::None) - { - m_comp->gtSetStmtInfo(m_testInfo1.testStmt); - m_comp->fgSetStmtSeq(m_testInfo1.testStmt); - } - - if (!optReturnBlock) - { - // Update edges if m_b1: BBJ_COND and m_b2: BBJ_COND - - FlowEdge* edge1 = m_comp->fgGetPredForBlock(m_b1->bbJumpDest, m_b1); - FlowEdge* edge2; - - if (m_sameTarget) - { - edge2 = m_comp->fgGetPredForBlock(m_b2->bbJumpDest, m_b2); - } - else - { - edge2 = m_comp->fgGetPredForBlock(m_b2->bbNext, m_b2); - - m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1); - - m_b1->bbJumpDest = m_b2->bbJumpDest; - - m_comp->fgAddRefPred(m_b2->bbJumpDest, m_b1); - } - - assert(edge1 != nullptr); - assert(edge2 != nullptr); - - weight_t edgeSumMin = edge1->edgeWeightMin() + edge2->edgeWeightMin(); - weight_t edgeSumMax = edge1->edgeWeightMax() + edge2->edgeWeightMax(); - if ((edgeSumMax >= edge1->edgeWeightMax()) && (edgeSumMax >= edge2->edgeWeightMax())) - { - edge1->setEdgeWeights(edgeSumMin, edgeSumMax, m_b1->bbJumpDest); - } - else - { - edge1->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, m_b1->bbJumpDest); - } - } - - /* Modify the target of the conditional jump and update bbRefs and bbPreds */ - - if (optReturnBlock) - { - m_b1->bbJumpDest = nullptr; - m_b1->bbJumpKind = BBJ_RETURN; -#ifdef DEBUG - m_b1->bbJumpSwt = m_b2->bbJumpSwt; -#endif - assert(m_b2->bbJumpKind == BBJ_RETURN); - assert(m_b1->bbNext == m_b2); - assert(m_b3 != nullptr); - } - else - { - assert(m_b1->bbJumpKind == BBJ_COND); - assert(m_b2->bbJumpKind == BBJ_COND); - assert(m_b1->bbJumpDest == m_b2->bbJumpDest); - assert(m_b1->bbNext == m_b2); - assert(m_b2->bbNext != nullptr); - } - - if (!optReturnBlock) - { - // Update bbRefs and bbPreds - // - // Replace pred 'm_b2' for 'm_b2->bbNext' with 'm_b1' - // Remove pred 'm_b2' for 'm_b2->bbJumpDest' - m_comp->fgReplacePred(m_b2->bbNext, m_b2, m_b1); - m_comp->fgRemoveRefPred(m_b2->bbJumpDest, m_b2); - } - - // Get rid of the second block - - m_comp->fgUnlinkBlock(m_b2); - m_b2->bbFlags |= BBF_REMOVED; - // If m_b2 was the last block of a try or handler, update the EH table. - m_comp->ehUpdateForDeletedBlock(m_b2); - - if (optReturnBlock) - { - // Get rid of the third block - m_comp->fgUnlinkBlock(m_b3); - m_b3->bbFlags |= BBF_REMOVED; - // If m_b3 was the last block of a try or handler, update the EH table. - m_comp->ehUpdateForDeletedBlock(m_b3); - } - - // Update loop table - m_comp->fgUpdateLoopsAfterCompacting(m_b1, m_b2); - if (optReturnBlock) - { - m_comp->fgUpdateLoopsAfterCompacting(m_b1, m_b3); - } - - // Update IL range of first block - m_b1->bbCodeOffsEnd = optReturnBlock ? m_b3->bbCodeOffsEnd : m_b2->bbCodeOffsEnd; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsReturnBlock: Optimize boolean when m_b1 is BBJ_COND and m_b2 and m_b3 are BBJ_RETURN -// -// Arguments: -// b3: Pointer to basic block b3 -// -// Returns: -// true if boolean optimization is done and m_b1, m_b2 and m_b3 are folded into m_b1, else false. -// -// Notes: -// m_b1, m_b2 and m_b3 of OptBoolsDsc are set on entry. -// -// if B1.bbJumpDest == b3, it transforms -// B1 : brtrue(t1, B3) -// B2 : ret(t2) -// B3 : ret(0) -// to -// B1 : ret((!t1) && t2) -// -// For example, (x==0 && y==0) generates: -// B1: GT_JTRUE (BBJ_COND), jumps to B3 -// B2: GT_RETURN (BBJ_RETURN) -// B3: GT_RETURN (BBJ_RETURN), -// and it is folded into -// B1: GT_RETURN (BBJ_RETURN) -// -bool OptBoolsDsc::optOptimizeBoolsReturnBlock(BasicBlock* b3) -{ - assert(m_b1 != nullptr && m_b2 != nullptr); - - // m_b3 is set for cond/return/return case - m_b3 = b3; - - m_sameTarget = false; - Statement* const s1 = optOptimizeBoolsChkBlkCond(); - if (s1 == nullptr) - { - return false; - } - - // Find the branch conditions of m_b1 and m_b2 - - m_c1 = optIsBoolComp(&m_testInfo1); - if (m_c1 == nullptr) - { - return false; - } - - m_c2 = optIsBoolComp(&m_testInfo2); - if (m_c2 == nullptr) - { - return false; - } - - // Find the type and cost conditions of m_testInfo1 and m_testInfo2 - - if (!optOptimizeBoolsChkTypeCostCond()) - { - return false; - } - - // Get the fold operator (m_foldOp, e.g., GT_OR/GT_AND) and - // the comparison operator (m_cmpOp, e.g., GT_EQ/GT_NE/GT_GE/GT_LT) - - var_types foldType = m_c1->TypeGet(); - if (varTypeIsGC(foldType)) - { - foldType = TYP_I_IMPL; - } - m_foldType = foldType; - - m_foldOp = GT_NONE; - m_cmpOp = GT_NONE; - - genTreeOps foldOp; - genTreeOps cmpOp; - - ssize_t it1val = m_testInfo1.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; - ssize_t it2val = m_testInfo2.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; - ssize_t it3val = m_t3->AsOp()->gtOp1->AsIntCon()->gtIconVal; - - if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) && - (it1val == 0 && it2val == 0 && it3val == 0)) - { - // Case: x == 0 && y == 0 - // t1:c1!=0 t2:c2==0 t3:c3==0 - // ==> true if (c1|c2)==0 - foldOp = GT_OR; - cmpOp = GT_EQ; - } - else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) && - (it1val == 0 && it2val == 0 && it3val == 0)) - { - // Case: x == 1 && y ==1 - // t1:c1!=1 t2:c2==1 t3:c3==0 is reversed from optIsBoolComp() to: t1:c1==0 t2:c2!=0 t3:c3==0 - // ==> true if (c1&c2)!=0 - foldOp = GT_AND; - cmpOp = GT_NE; - } - else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) && - (it1val == 0 && it2val == 0 && it3val == 0)) - { - // Case: x >= 0 && y >= 0 - // t1:c1<0 t2:c2>=0 t3:c3==0 - // ==> true if (c1|c2)>=0 - - foldOp = GT_OR; - cmpOp = GT_GE; - } - else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_EQ) && - (it1val == 0 && it2val == 0 && it3val == 1)) - { - // Case: x == 0 || y == 0 - // t1:c1==0 t2:c2==0 t3:c3==1 - // ==> true if (c1&c2)==0 - foldOp = GT_AND; - cmpOp = GT_EQ; - } - else if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_NE) && - (it1val == 0 && it2val == 0 && it3val == 1)) - { - // Case: x == 1 || y == 1 - // t1:c1==1 t2:c2==1 t3:c3==1 is reversed from optIsBoolComp() to: t1:c1!=0 t2:c2!=0 t3:c3==1 - // ==> true if (c1|c2)!=0 - foldOp = GT_OR; - cmpOp = GT_NE; - } - else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_LT) && - (it1val == 0 && it2val == 0 && it3val == 1)) - { - // Case: x < 0 || y < 0 - // t1:c1<0 t2:c2<0 t3:c3==1 - // ==> true if (c1|c2)<0 - - foldOp = GT_OR; - cmpOp = GT_LT; - } - else - { - // Require NOT operation for operand(s). Do Not fold. - return false; - } - - if ((foldOp == GT_AND || (cmpOp == GT_NE && foldOp != GT_OR)) && (!m_testInfo1.isBool || !m_testInfo2.isBool)) - { - // x == 1 && y == 1: Skip cases where x or y is greater than 1, e.g., x=3, y=1 - // x == 0 || y == 0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 - // x == 1 || y == 1: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 - return false; - } - - m_foldOp = foldOp; - m_cmpOp = cmpOp; - - // Now update the trees - - optOptimizeBoolsUpdateTrees(); - -#ifdef DEBUG - if (m_comp->verbose) - { - printf("Folded %sboolean conditions of " FMT_BB ", " FMT_BB " and " FMT_BB " to :\n", - m_c2->OperIsLeaf() ? "" : "non-leaf ", m_b1->bbNum, m_b2->bbNum, m_b3->bbNum); - m_comp->gtDispStmt(s1); - printf("\n"); - } -#endif - - // Return true to continue the bool optimization for the rest of the BB chain - return true; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsGcStress: Replace x==null with (x|x)==0 if x is a GC-type. -// This will stress code-gen and the emitter to make sure they support such trees. -// -#ifdef DEBUG - -void OptBoolsDsc::optOptimizeBoolsGcStress() -{ - if (!m_comp->compStressCompile(m_comp->STRESS_OPT_BOOLS_GC, 20)) - { - return; - } - - assert(m_b1->bbJumpKind == BBJ_COND); - Statement* const stmt = m_b1->lastStmt(); - GenTree* const cond = stmt->GetRootNode(); - - assert(cond->gtOper == GT_JTRUE); - - OptTestInfo test; - test.testStmt = stmt; - test.testTree = cond; - - GenTree* comparand = optIsBoolComp(&test); - - if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet())) - { - return; - } - GenTree* relop = test.compTree; - bool isBool = test.isBool; - - if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF)) - { - return; - } - - GenTree* comparandClone = m_comp->gtCloneExpr(comparand); - - noway_assert(relop->AsOp()->gtOp1 == comparand); - genTreeOps oper = m_comp->compStressCompile(m_comp->STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND; - relop->AsOp()->gtOp1 = m_comp->gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone); - - // Comparand type is already checked, and we have const int, there is no harm - // morphing it into a TYP_I_IMPL. - noway_assert(relop->AsOp()->gtOp2->gtOper == GT_CNS_INT); - relop->AsOp()->gtOp2->gtType = TYP_I_IMPL; - - // Recost/rethread the tree if necessary - // - if (m_comp->fgNodeThreading != NodeThreading::None) - { - m_comp->gtSetStmtInfo(test.testStmt); - m_comp->fgSetStmtSeq(test.testStmt); - } -} - -#endif - -//----------------------------------------------------------------------------- -// optIsBoolComp: Function used by folding of boolean conditionals -// -// Arguments: -// pOptTest The test info for the test tree -// -// Return: -// On success, return the first operand (gtOp1) of compTree, else return nullptr. -// -// Notes: -// On entry, testTree is set. -// On success, compTree is set to the compare node (i.e. GT_EQ or GT_NE or GT_LT or GT_GE) of the testTree. -// isBool is set to true if the comparand (i.e., operand 1 of compTree is boolean. Otherwise, false. -// -// Given a GT_JTRUE or GT_RETURN node, this method checks if it is a boolean comparison -// of the form "if (boolVal ==/!=/>=/< 0/1)".This is translated into -// a GT_EQ/GT_NE/GT_GE/GT_LT node with "opr1" being a boolean lclVar and "opr2" the const 0/1. -// -// When isBool == true, if the comparison was against a 1 (i.e true) -// then we morph the tree by reversing the GT_EQ/GT_NE/GT_GE/GT_LT and change the 1 to 0. -// -GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) -{ - pOptTest->isBool = false; - - assert(pOptTest->testTree->gtOper == GT_JTRUE || pOptTest->testTree->gtOper == GT_RETURN); - GenTree* cond = pOptTest->testTree->AsOp()->gtOp1; - - // The condition must be "!= 0" or "== 0" or >=0 or <0 - // we don't optimize unsigned < and >= operations - if (!cond->OperIs(GT_EQ, GT_NE) && (!cond->OperIs(GT_LT, GT_GE) || cond->IsUnsigned())) - { - return nullptr; - } - - // Return the compare node to the caller - - pOptTest->compTree = cond; - - // Get hold of the comparands - - GenTree* opr1 = cond->AsOp()->gtOp1; - GenTree* opr2 = cond->AsOp()->gtOp2; - - if (opr2->gtOper != GT_CNS_INT) - { - return nullptr; - } - - if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1)) - { - return nullptr; - } - - ssize_t ival2 = opr2->AsIntCon()->gtIconVal; - - // Is the value a boolean? - // We can either have a boolean expression (marked GTF_BOOLEAN) or - // a local variable that is marked as being boolean (lvIsBoolean) - - if (opr1->gtFlags & GTF_BOOLEAN) - { - pOptTest->isBool = true; - } - else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1))) - { - pOptTest->isBool = true; - } - else if (opr1->gtOper == GT_LCL_VAR) - { - // is it a boolean local variable? - - unsigned lclNum = opr1->AsLclVarCommon()->GetLclNum(); - noway_assert(lclNum < m_comp->lvaCount); - - if (m_comp->lvaTable[lclNum].lvIsBoolean) - { - pOptTest->isBool = true; - } - } - - // Was our comparison against the constant 1 (i.e. true) - if (ival2 == 1) - { - // If this is a boolean expression tree we can reverse the relop - // and change the true to false. - if (pOptTest->isBool) - { - m_comp->gtReverseCond(cond); - opr2->AsIntCon()->gtIconVal = 0; - } - else - { - return nullptr; - } - } - - return opr1; -} - -//----------------------------------------------------------------------------- -// optOptimizeBools: Folds boolean conditionals for GT_JTRUE/GT_RETURN nodes -// -// Returns: -// suitable phase status -// -// Notes: -// If the operand of GT_JTRUE/GT_RETURN node is GT_EQ/GT_NE/GT_GE/GT_LT of the form -// "if (boolVal ==/!=/>=/< 0/1)", the GT_EQ/GT_NE/GT_GE/GT_LT nodes are translated into a -// GT_EQ/GT_NE/GT_GE/GT_LT node with -// "op1" being a boolean GT_OR/GT_AND lclVar and -// "op2" the const 0/1. -// For example, the folded tree for the below boolean optimization is shown below: -// Case 1: (x == 0 && y ==0) => (x | y) == 0 -// * RETURN int -// \--* EQ int -// +--* OR int -// | +--* LCL_VAR int V00 arg0 -// | \--* LCL_VAR int V01 arg1 -// \--* CNS_INT int 0 -// -// Case 2: (x == null && y == null) ==> (x | y) == 0 -// * RETURN int -// \-- * EQ int -// + -- * OR long -// | +-- * LCL_VAR ref V00 arg0 -// | \-- * LCL_VAR ref V01 arg1 -// \-- * CNS_INT long 0 -// -// Case 3: (x == 0 && y == 0 && z == 0) ==> ((x | y) | z) == 0 -// * RETURN int -// \-- * EQ int -// + -- * OR int -// | +-- * OR int -// | | +-- * LCL_VAR int V00 arg0 -// | | \-- * LCL_VAR int V01 arg1 -// | \-- * LCL_VAR int V02 arg2 -// \-- * CNS_INT int 0 -// -// Case 4: (x == 0 && y == 0 && z == 0 && w == 0) ==> (((x | y) | z) | w) == 0 -// * RETURN int -// \-- * EQ int -// + * OR int -// | +--* OR int -// | | +--* OR int -// | | | +--* LCL_VAR int V00 arg0 -// | | | \--* LCL_VAR int V01 arg1 -// | | \--* LCL_VAR int V02 arg2 -// | \--* LCL_VAR int V03 arg3 -// \--* CNS_INT int 0 -// -// Case 5: (x != 0 && y != 0) => (x | y) != 0 -// * RETURN int -// \--* NE int -// +--* OR int -// | +--* LCL_VAR int V00 arg0 -// | \--* LCL_VAR int V01 arg1 -// \--* CNS_INT int 0 -// -// Case 6: (x >= 0 && y >= 0) => (x | y) >= 0 -// * RETURN int -// \--* GE int -// +--* OR int -// | +--* LCL_VAR int V00 arg0 -// | \--* LCL_VAR int V01 arg1 -// \--* CNS_INT int 0 -// -// Case 7: (x < 0 || y < 0) => (x & y) < 0 -// * RETURN int -// \--* LT int -// +--* AND int -// | +--* LCL_VAR int V00 arg0 -// | \--* LCL_VAR int V01 arg1 -// \--* CNS_INT int 0 -// -// Patterns that are not optimized include (x == 1 && y == 1), (x == 1 || y == 1), -// (x == 0 || y == 0) because currently their comptree is not marked as boolean expression. -// When m_foldOp == GT_AND or m_cmpOp == GT_NE, both compTrees must be boolean expression -// in order to skip below cases when compTree is not boolean expression: -// - x == 1 && y == 1 ==> (x&y)!=0: Skip cases where x or y is greater than 1, e.g., x=3, y=1 -// - x == 1 || y == 1 ==> (x|y)!=0: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 -// - x == 0 || y == 0 ==> (x&y)==0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 -// -PhaseStatus Compiler::optOptimizeBools() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In optOptimizeBools()\n"); - } -#endif - bool change = false; - unsigned numCond = 0; - unsigned numReturn = 0; - unsigned numPasses = 0; - unsigned stress = false; - - do - { - numPasses++; - change = false; - - // Reverse iterate through the blocks. - for (BasicBlock* b1 = fgLastBB; b1 != nullptr; b1 = b1->bbPrev) - { - // We're only interested in conditional jumps here - - if (b1->bbJumpKind != BBJ_COND) - { - continue; - } - - // If there is no next block, we're done - - BasicBlock* b2 = b1->bbNext; - if (b2 == nullptr) - { - break; - } - - // The next block must not be marked as BBF_DONT_REMOVE - if (b2->bbFlags & BBF_DONT_REMOVE) - { - continue; - } - - OptBoolsDsc optBoolsDsc(b1, b2, this); - - // The next block needs to be a condition or return block. - - if (b2->bbJumpKind == BBJ_COND) - { - if ((b1->bbJumpDest != b2->bbJumpDest) && (b1->bbJumpDest != b2->bbNext)) - { - continue; - } - - // When it is conditional jumps - - if (optBoolsDsc.optOptimizeBoolsCondBlock()) - { - change = true; - numCond++; - } -#ifdef TARGET_ARM64 - else if (optBoolsDsc.optOptimizeCompareChainCondBlock()) - { - change = true; - numCond++; - } -#endif - } - else if (b2->bbJumpKind == BBJ_RETURN) - { - // Set b3 to b1 jump destination - BasicBlock* b3 = b1->bbJumpDest; - - // b3 must not be marked as BBF_DONT_REMOVE - - if (b3->bbFlags & BBF_DONT_REMOVE) - { - continue; - } - - // b3 must be RETURN type - - if (b3->bbJumpKind != BBJ_RETURN) - { - continue; - } - - if (optBoolsDsc.optOptimizeBoolsReturnBlock(b3)) - { - change = true; - numReturn++; - } - } - else - { -#ifdef DEBUG - optBoolsDsc.optOptimizeBoolsGcStress(); - stress = true; -#endif - } - } - } while (change); - - JITDUMP("\noptimized %u BBJ_COND cases, %u BBJ_RETURN cases in %u passes\n", numCond, numReturn, numPasses); - - const bool modified = stress || ((numCond + numReturn) > 0); - return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; -} - typedef JitHashTable, unsigned> LclVarRefCounts; //------------------------------------------------------------------------------------------ From 6a94a6ee92539dbcbe2a4f20aecf5b9eefaef0cd Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 9 Mar 2023 17:13:51 +0000 Subject: [PATCH 18/31] Improve cbz detection --- src/coreclr/jit/optimizebools.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 51857df782f5a2..e2e6ceae0ba365 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -321,8 +321,7 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit *isTestCondition = false; - if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst() && condOp2->AsIntCon()->IconValue() == 0 && - condOp1->OperIs(GT_AND)) + if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst() && condOp2->AsIntCon()->IconValue() == 0) { // Found a test condition. Does it contain a compare chain? @@ -330,7 +329,7 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit // the condition the new link in the chain will connect with. // We are allowing for the first operand of the not be a valid chain, as this would require // a full recursive search through the children. - if (condOp1->gtGetOp2()->OperIsCmpCompare()) + if (condOp1->OperIs(GT_AND) && condOp1->gtGetOp2()->OperIsCmpCompare()) { return true; } From 8846bc399c801aa10f2de803e4b6e3116dd4173d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 13 Mar 2023 11:45:19 +0000 Subject: [PATCH 19/31] Move optbools back into optimizer --- src/coreclr/jit/CMakeLists.txt | 1 - src/coreclr/jit/optimizebools.cpp | 1416 ----------------------------- src/coreclr/jit/optimizer.cpp | 1401 ++++++++++++++++++++++++++++ 3 files changed, 1401 insertions(+), 1417 deletions(-) delete mode 100644 src/coreclr/jit/optimizebools.cpp diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index c44585cbbd63ee..480bfdc045e16c 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -150,7 +150,6 @@ set( JIT_SOURCES morphblock.cpp objectalloc.cpp optcse.cpp - optimizebools.cpp optimizer.cpp patchpoint.cpp phase.cpp diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp deleted file mode 100644 index e2e6ceae0ba365..00000000000000 --- a/src/coreclr/jit/optimizebools.cpp +++ /dev/null @@ -1,1416 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XX XX -XX optOptimizeBools XX -XX XX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -*/ - -#include "jitpch.h" -#ifdef _MSC_VER -#pragma hdrstop -#endif - -//----------------------------------------------------------------------------- -// OptTestInfo: Member of OptBoolsDsc struct used to test if a GT_JTRUE or GT_RETURN node -// is a boolean comparison -// -struct OptTestInfo -{ - Statement* testStmt; // Last statement of the basic block - GenTree* testTree; // The root node of the testStmt (GT_JTRUE or GT_RETURN). - GenTree* compTree; // The compare node (i.e. GT_EQ or GT_NE node) of the testTree - bool isBool; // If the compTree is boolean expression -}; - -//----------------------------------------------------------------------------- -// OptBoolsDsc: Descriptor used for Boolean Optimization -// -class OptBoolsDsc -{ -public: - OptBoolsDsc(BasicBlock* b1, BasicBlock* b2, Compiler* comp) - { - m_b1 = b1; - m_b2 = b2; - m_b3 = nullptr; - m_comp = comp; - } - -private: - BasicBlock* m_b1; // The first basic block with the BBJ_COND conditional jump type - BasicBlock* m_b2; // The next basic block of m_b1. Either BBJ_COND or BBJ_RETURN type - BasicBlock* m_b3; // m_b1->bbJumpDest. Null if m_b2 is not a return block. - - Compiler* m_comp; // The pointer to the Compiler instance - - OptTestInfo m_testInfo1; // The first test info - OptTestInfo m_testInfo2; // The second test info - GenTree* m_t3; // The root node of the first statement of m_b3 - - GenTree* m_c1; // The first operand of m_testInfo1.compTree - GenTree* m_c2; // The first operand of m_testInfo2.compTree - - bool m_sameTarget; // if m_b1 and m_b2 jumps to the same destination - - genTreeOps m_foldOp; // The fold operator (e.g., GT_AND or GT_OR) - var_types m_foldType; // The type of the folded tree - genTreeOps m_cmpOp; // The comparison operator (e.g., GT_EQ or GT_NE) - -public: - bool optOptimizeBoolsCondBlock(); - bool optOptimizeCompareChainCondBlock(); - bool optOptimizeBoolsReturnBlock(BasicBlock* b3); -#ifdef DEBUG - void optOptimizeBoolsGcStress(); -#endif - -private: - Statement* optOptimizeBoolsChkBlkCond(); - GenTree* optIsBoolComp(OptTestInfo* pOptTest); - bool optOptimizeBoolsChkTypeCostCond(); - void optOptimizeBoolsUpdateTrees(); - inline bool FindCompareChain(GenTree* condition, bool* isTestCondition); -}; - -//----------------------------------------------------------------------------- -// optOptimizeBoolsCondBlock: Optimize boolean when bbJumpKind of both m_b1 and m_b2 are BBJ_COND -// -// Returns: -// true if boolean optimization is done and m_b1 and m_b2 are folded into m_b1, else false. -// -// Notes: -// m_b1 and m_b2 are set on entry. -// -// Case 1: if b1.bbJumpDest == b2.bbJumpDest, it transforms -// B1 : brtrue(t1, Bx) -// B2 : brtrue(t2, Bx) -// B3 : -// to -// B1 : brtrue(t1|t2, BX) -// B3 : -// -// For example, (x == 0 && y == 0 && z == 0) generates -// B1: GT_JTRUE (BBJ_COND), jump to B4 -// B2: GT_JTRUE (BBJ_COND), jump to B4 -// B3: GT_RETURN (BBJ_RETURN) -// B4: GT_RETURN (BBJ_RETURN) -// and B1 and B2 are folded into B1: -// B1: GT_JTRUE (BBJ_COND), jump to B4 -// B3: GT_RETURN (BBJ_RETURN) -// B4: GT_RETURN (BBJ_RETURN) -// -// Case 2: if B1.bbJumpDest == B2->bbNext, it transforms -// B1 : brtrue(t1, B3) -// B2 : brtrue(t2, Bx) -// B3 : -// to -// B1 : brtrue((!t1) && t2, Bx) -// B3 : -// -bool OptBoolsDsc::optOptimizeBoolsCondBlock() -{ - assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); - - // Check if m_b1 and m_b2 jump to the same target and get back pointers to m_testInfo1 and t2 tree nodes - - m_t3 = nullptr; - - // Check if m_b1 and m_b2 have the same bbJumpDest - - if (m_b1->bbJumpDest == m_b2->bbJumpDest) - { - // Given the following sequence of blocks : - // B1: brtrue(t1, BX) - // B2: brtrue(t2, BX) - // B3: - // we will try to fold it to : - // B1: brtrue(t1|t2, BX) - // B3: - - m_sameTarget = true; - } - else if (m_b1->bbJumpDest == m_b2->bbNext) - { - // Given the following sequence of blocks : - // B1: brtrue(t1, B3) - // B2: brtrue(t2, BX) - // B3: - // we will try to fold it to : - // B1: brtrue((!t1)&&t2, BX) - // B3: - - m_sameTarget = false; - } - else - { - return false; - } - - Statement* const s1 = optOptimizeBoolsChkBlkCond(); - if (s1 == nullptr) - { - return false; - } - - // Find the branch conditions of m_b1 and m_b2 - - m_c1 = optIsBoolComp(&m_testInfo1); - if (m_c1 == nullptr) - { - return false; - } - - m_c2 = optIsBoolComp(&m_testInfo2); - if (m_c2 == nullptr) - { - return false; - } - - // Find the type and cost conditions of m_testInfo1 and m_testInfo2 - - if (!optOptimizeBoolsChkTypeCostCond()) - { - return false; - } - - // Get the fold operator and the comparison operator - - genTreeOps foldOp; - genTreeOps cmpOp; - var_types foldType = m_c1->TypeGet(); - if (varTypeIsGC(foldType)) - { - foldType = TYP_I_IMPL; - } - - assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE)); - - if (m_sameTarget) - { - // Both conditions must be the same - - if (m_testInfo1.compTree->gtOper != m_testInfo2.compTree->gtOper) - { - return false; - } - - if (m_testInfo1.compTree->gtOper == GT_EQ) - { - // t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0 - // So we will branch to BX if (c1&c2)==0 - - foldOp = GT_AND; - cmpOp = GT_EQ; - } - else if (m_testInfo1.compTree->gtOper == GT_LT) - { - // t1:c1<0 t2:c2<0 ==> Branch to BX if either value < 0 - // So we will branch to BX if (c1|c2)<0 - - foldOp = GT_OR; - cmpOp = GT_LT; - } - else if (m_testInfo1.compTree->gtOper == GT_GE) - { - return false; - } - else - { - // t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0 - // So we will branch to BX if (c1|c2)!=0 - - foldOp = GT_OR; - cmpOp = GT_NE; - } - } - else - { - if (m_testInfo1.compTree->gtOper == m_testInfo2.compTree->gtOper) - { - return false; - } - - if (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) - { - // t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0 - // So we will branch to BX if (c1&c2)!=0 - - foldOp = GT_AND; - cmpOp = GT_NE; - } - else if (m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) - { - // t1:c1<0 t2:c2>=0 ==> Branch to BX if both values >= 0 - // So we will branch to BX if (c1|c2)>=0 - - foldOp = GT_OR; - cmpOp = GT_GE; - } - else if (m_testInfo1.compTree->gtOper == GT_GE) - { - return false; - } - else if (m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) - { - // t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0 - // So we will branch to BX if (c1|c2)==0 - - foldOp = GT_OR; - cmpOp = GT_EQ; - } - else - { - return false; - } - } - - // Anding requires both values to be 0 or 1 - - if ((foldOp == GT_AND) && (!m_testInfo1.isBool || !m_testInfo2.isBool)) - { - return false; - } - - // - // Now update the trees - // - - m_foldOp = foldOp; - m_foldType = foldType; - m_cmpOp = cmpOp; - - optOptimizeBoolsUpdateTrees(); - -#ifdef DEBUG - if (m_comp->verbose) - { - printf("Folded %sboolean conditions of " FMT_BB " and " FMT_BB " to :\n", m_c2->OperIsLeaf() ? "" : "non-leaf ", - m_b1->bbNum, m_b2->bbNum); - m_comp->gtDispStmt(s1); - printf("\n"); - } -#endif - - // Return true to continue the bool optimization for the rest of the BB chain - return true; -} - -//----------------------------------------------------------------------------- -// FindCompareChain: Check if the given condition is a compare chain. -// -// Arguments: -// condition: Condition to check. -// isTestCondition: Returns true if condition is a EQ/NE(AND(...),0) but is not a compare chain. -// -// Returns: -// true if chain optimization is a compare chain. -// -// Assumptions: -// m_b1 and m_b2 are set on entry. -// - -inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondition) -{ - GenTree* condOp1 = condition->gtGetOp1(); - GenTree* condOp2 = condition->gtGetOp2(); - - *isTestCondition = false; - - if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst() && condOp2->AsIntCon()->IconValue() == 0) - { - // Found a test condition. Does it contain a compare chain? - - // Only test that the second operand of AND ends with a compare operation, as this will be - // the condition the new link in the chain will connect with. - // We are allowing for the first operand of the not be a valid chain, as this would require - // a full recursive search through the children. - if (condOp1->OperIs(GT_AND) && condOp1->gtGetOp2()->OperIsCmpCompare()) - { - return true; - } - - *isTestCondition = true; - } - - return false; -} - -//----------------------------------------------------------------------------- -// optOptimizeCompareChainCondBlock: Create AND chain when when both m_b1 and m_b2 are BBJ_COND. -// -// Returns: -// true if chain optimization is done and m_b1 and m_b2 are folded into m_b1, else false. -// -// Assumptions: -// m_b1 and m_b2 are set on entry. -// -// Notes: -// -// This aims to reduced the number of conditional jumps by joining cases when multiple -// conditions gate the execution of a block. For example: -// If ( a > b || c == d) { x = y; } -// Will become the following. Note that the second condition is inverted. -// -// ------------ BB01 -> BB03 (cond), succs={BB02,BB03} -// * JTRUE -// \--* GT a,b -// -// ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04} -// * JTRUE -// \--* NE c,d -// -// ------------ BB03, preds={BB01, BB02} succs={BB04} -// * ASG x,y -// -// These operands will be combined into a single AND in the first block (with the first -// condition inverted), wrapped by the test condition (NE(...,0)). -// -// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} -// * JTRUE -// \--* NE -// +--* AND -// | +--* LE a,b -// | \--* NE c,d -// \--* CNS_INT 0 -// -// ------------ BB03, preds={BB01} succs={BB04} -// * ASG x,y -// -// -// This will also work for statements with else cases: -// If ( a > b || c == d) { x = y; } else { x = z; } -// Here BB04 will contain the else ASG. Both BB04 and BB05 will unconditionally jump to BB05. -// -// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} -// * JTRUE -// \--* NE -// +--* AND -// | +--* LE a,b -// | \--* NE c,d -// \--* CNS_INT 0 -// -// ------------ BB03, preds={BB01} succs={BB05} -// * ASG x,y -// -// ------------ BB04, preds={BB01} succs={BB05} -// * ASG x,z -// -// -// Multiple conditions can be chained together. This is due to the optimization reverse -// iterating through the blocks. For example: -// If ( a > b || c == d || e < f ) { x = y; } -// The first pass will combine "c == d" and "e < f" into a chain. The second pass will then -// combine the "a > b" with the earlier chain. Where possible, the new condition is placed -// within the test condition (NE(...,0)). -// -// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} -// * JTRUE -// \--* NE -// +--* AND -// | +--* AND -// | | +--* NE c,d -// | | \--* GE e,f -// | \--* LT a,b -// \--* CNS_INT 0 -// -// ------------ BB03, preds={BB01} succs={BB04} -// * ASG x,y -// -// -// Conditions connected by && are not yet checked for. For example: -// If ( a > b && c == d ) { x = y; } -// -bool OptBoolsDsc::optOptimizeCompareChainCondBlock() -{ - assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); - m_t3 = nullptr; - - if (!(m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest)) - { - return false; - } - - Statement* const s1 = optOptimizeBoolsChkBlkCond(); - if (s1 == nullptr) - { - return false; - } - Statement* s2 = m_b2->firstStmt(); - - assert(m_testInfo1.testTree->OperIs(GT_JTRUE)); - GenTree* cond1 = m_testInfo1.testTree->gtGetOp1(); - assert(m_testInfo2.testTree->OperIs(GT_JTRUE)); - GenTree* cond2 = m_testInfo2.testTree->gtGetOp1(); - - // Ensure both conditions are suitable. - if (!cond1->OperIsCompare() || !cond2->OperIsCompare()) - { - return false; - } - - // Ensure there are no additional side effects. - if ((cond1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0 || - (cond2->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0) - { - return false; - } - - // Integer compares only for now (until support for Arm64 fccmp instruction is added) - if (varTypeIsFloating(cond1->gtGetOp1()->TypeGet()) || varTypeIsFloating(cond2->gtGetOp1()->TypeGet())) - { - return false; - } - - // Check for previously optimized compare chains. - bool op1IsTestCond; - bool op2IsTestCond; - bool op1IsCondChain = FindCompareChain(cond1, &op1IsTestCond); - bool op2IsCondChain = FindCompareChain(cond2, &op2IsTestCond); - // Don't support combining multiple chains. Allowing this would give minimal benefit, as - // costing checks would disallow most instances. - if (op1IsCondChain && op2IsCondChain) - { - return false; - } - - // Specifically for Arm64, avoid cases where optimizations in lowering will produce better - // code than optimizing here. Specificially: - // * cmp(and(...), 0) will be turned into a TEST_ opcode. - // * Compares against zero will be optimized with cbz. - if (op1IsTestCond || op2IsTestCond) - { - return false; - } - - // Combining conditions means that all conditions are always fully evaluated. - // Put a limit on the max size that can be combined. - if (!m_comp->compStressCompile(Compiler::STRESS_OPT_BOOLS_COMPARE_CHAIN_COST, 25)) - { - int op1Cost = cond1->GetCostEx(); - int op2Cost = cond2->GetCostEx(); - int maxOp1Cost = op1IsCondChain ? 35 : 7; - int maxOp2Cost = op2IsCondChain ? 35 : 7; - - // Cost to allow for chain size of three. - if (op1Cost > maxOp1Cost || op2Cost > maxOp2Cost) - { - JITDUMP("Skipping CompareChainCond that will evaluate conditions unconditionally at costs %d,%d\n", op1Cost, - op2Cost); - return false; - } - } - - GenTree* testcondition = nullptr; - - // If a previous optimize bools happened for op2, then reuse the test condition. - // Cannot reuse for op1, as the condition needs reversing. - if (op2IsCondChain) - { - testcondition = cond2; - cond2 = cond2->gtGetOp1(); - } - - // Remove the first JTRUE statement. - constexpr bool isUnlink = true; - m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink)); - - // Invert the first condition. - GenTree* revCond = m_comp->gtReverseCond(cond1); - assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node. - - // AND the two conditions together - GenTree* andconds = m_comp->gtNewOperNode(GT_AND, TYP_INT, cond1, cond2); - andconds->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); - andconds->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); - cond1->gtFlags &= ~GTF_RELOP_JMP_USED; - cond2->gtFlags &= ~GTF_RELOP_JMP_USED; - andconds->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); - - // Add a test condition onto the front of the AND (or resuse an exisiting one). - if (op2IsCondChain) - { - testcondition->AsOp()->gtOp1 = andconds; - testcondition->AsOp()->gtFlags |= (andconds->gtFlags & GTF_ALL_EFFECT); - } - else - { - testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, andconds, m_comp->gtNewZeroConNode(TYP_INT)); - } - - // Wire the chain into the second block - m_testInfo2.testTree->AsOp()->gtOp1 = testcondition; - m_testInfo2.testTree->AsOp()->gtFlags |= (testcondition->gtFlags & GTF_ALL_EFFECT); - m_comp->gtSetEvalOrder(m_testInfo2.testTree); - m_comp->fgSetStmtSeq(s2); - - // Update the flow. - m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1); - m_b1->bbJumpKind = BBJ_NONE; - - // Fixup flags. - m_b2->bbFlags |= (m_b1->bbFlags & BBF_COPY_PROPAGATE); - - // Join the two blocks. This is done now to ensure that additional conditions can be chained. - if (m_comp->fgCanCompactBlocks(m_b1, m_b2)) - { - m_comp->fgCompactBlocks(m_b1, m_b2); - } - -#ifdef DEBUG - if (m_comp->verbose) - { - printf("\nCombined conditions " FMT_BB " and " FMT_BB " into AND chain :\n", m_b1->bbNum, m_b2->bbNum); - m_comp->fgDumpBlock(m_b1); - printf("\n"); - } -#endif - - return true; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsChkBlkCond: Checks block conditions if it can be boolean optimized -// -// Return: -// If all conditions pass, returns the last statement of m_b1, else return nullptr. -// -// Notes: -// This method checks if the second (and third block for cond/return/return case) contains only one statement, -// and checks if tree operators are of the right type, e.g, GT_JTRUE, GT_RETURN. -// -// On entry, m_b1, m_b2 are set and m_b3 is set for cond/return/return case. -// If it passes all the conditions, m_testInfo1.testTree, m_testInfo2.testTree and m_t3 are set -// to the root nodes of m_b1, m_b2 and m_b3 each. -// SameTarget is also updated to true if m_b1 and m_b2 jump to the same destination. -// -Statement* OptBoolsDsc::optOptimizeBoolsChkBlkCond() -{ - assert(m_b1 != nullptr && m_b2 != nullptr); - - bool optReturnBlock = false; - if (m_b3 != nullptr) - { - optReturnBlock = true; - } - - // Find the block conditions of m_b1 and m_b2 - - if (m_b2->countOfInEdges() > 1 || (optReturnBlock && m_b3->countOfInEdges() > 1)) - { - return nullptr; - } - - // Find the condition for the first block - - Statement* s1 = m_b1->lastStmt(); - - GenTree* testTree1 = s1->GetRootNode(); - assert(testTree1->gtOper == GT_JTRUE); - - // The second and the third block must contain a single statement - - Statement* s2 = m_b2->firstStmt(); - if (s2->GetPrevStmt() != s2) - { - return nullptr; - } - - GenTree* testTree2 = s2->GetRootNode(); - - if (!optReturnBlock) - { - assert(testTree2->gtOper == GT_JTRUE); - } - else - { - if (testTree2->gtOper != GT_RETURN) - { - return nullptr; - } - - Statement* s3 = m_b3->firstStmt(); - if (s3->GetPrevStmt() != s3) - { - return nullptr; - } - - GenTree* testTree3 = s3->GetRootNode(); - if (testTree3->gtOper != GT_RETURN) - { - return nullptr; - } - - if (!varTypeIsIntegral(testTree2->TypeGet()) || !varTypeIsIntegral(testTree3->TypeGet())) - { - return nullptr; - } - - // The third block is Return with "CNS_INT int 0/1" - if (testTree3->AsOp()->gtOp1->gtOper != GT_CNS_INT) - { - return nullptr; - } - - if (testTree3->AsOp()->gtOp1->gtType != TYP_INT) - { - return nullptr; - } - - m_t3 = testTree3; - } - - m_testInfo1.testStmt = s1; - m_testInfo1.testTree = testTree1; - m_testInfo2.testStmt = s2; - m_testInfo2.testTree = testTree2; - - return s1; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsChkTypeCostCond: Checks if type conditions meet the folding condition, and -// if cost to fold is not too expensive -// -// Return: -// True if it meets type conditions and cost conditions. Else false. -// -bool OptBoolsDsc::optOptimizeBoolsChkTypeCostCond() -{ - assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo1.compTree->AsOp()->gtOp1 == m_c1); - assert(m_testInfo2.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo2.compTree->AsOp()->gtOp1 == m_c2); - - // - // Leave out floats where the bit-representation is more complicated - // - there are two representations for 0. - // - if (varTypeIsFloating(m_c1->TypeGet()) || varTypeIsFloating(m_c2->TypeGet())) - { - return false; - } - - // Make sure the types involved are of the same sizes - if (genTypeSize(m_c1->TypeGet()) != genTypeSize(m_c2->TypeGet())) - { - return false; - } - if (genTypeSize(m_testInfo1.compTree->TypeGet()) != genTypeSize(m_testInfo2.compTree->TypeGet())) - { - return false; - } -#ifdef TARGET_ARMARCH - // Skip the small operand which we cannot encode. - if (varTypeIsSmall(m_c1->TypeGet())) - return false; -#endif - // The second condition must not contain side effects - // - if (m_c2->gtFlags & GTF_GLOB_EFFECT) - { - return false; - } - - // The second condition must not be too expensive - // - if (m_c2->GetCostEx() > 12) - { - return false; - } - - return true; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsUpdateTrees: Fold the trees based on fold type and comparison type, -// update the edges, unlink removed blocks and update loop table -// -void OptBoolsDsc::optOptimizeBoolsUpdateTrees() -{ - assert(m_b1 != nullptr && m_b2 != nullptr); - - bool optReturnBlock = false; - if (m_b3 != nullptr) - { - optReturnBlock = true; - } - - assert(m_foldOp != NULL && m_foldType != NULL && m_c1 != nullptr && m_c2 != nullptr); - - GenTree* cmpOp1 = m_comp->gtNewOperNode(m_foldOp, m_foldType, m_c1, m_c2); - if (m_testInfo1.isBool && m_testInfo2.isBool) - { - // When we 'OR'/'AND' two booleans, the result is boolean as well - cmpOp1->gtFlags |= GTF_BOOLEAN; - } - - GenTree* t1Comp = m_testInfo1.compTree; - t1Comp->SetOper(m_cmpOp); - t1Comp->AsOp()->gtOp1 = cmpOp1; - t1Comp->AsOp()->gtOp2->gtType = m_foldType; // Could have been varTypeIsGC() - if (optReturnBlock) - { - // Update tree when m_b1 is BBJ_COND and m_b2 and m_b3 are GT_RETURN (BBJ_RETURN) - t1Comp->AsOp()->gtOp2->AsIntCon()->gtIconVal = 0; - m_testInfo1.testTree->gtOper = GT_RETURN; - m_testInfo1.testTree->gtType = m_testInfo2.testTree->gtType; - - // Update the return count of flow graph - assert(m_comp->fgReturnCount >= 2); - --m_comp->fgReturnCount; - } - -#if FEATURE_SET_FLAGS - // For comparisons against zero we will have the GTF_SET_FLAGS set - // and this can cause an assert to fire in fgMoveOpsLeft(GenTree* tree) - // during the CSE phase. - // - // So make sure to clear any GTF_SET_FLAGS bit on these operations - // as they are no longer feeding directly into a comparisons against zero - - // Make sure that the GTF_SET_FLAGS bit is cleared. - // Fix 388436 ARM JitStress WP7 - m_c1->gtFlags &= ~GTF_SET_FLAGS; - m_c2->gtFlags &= ~GTF_SET_FLAGS; - - // The new top level node that we just created does feed directly into - // a comparison against zero, so set the GTF_SET_FLAGS bit so that - // we generate an instruction that sets the flags, which allows us - // to omit the cmp with zero instruction. - - // Request that the codegen for cmpOp1 sets the condition flags - // when it generates the code for cmpOp1. - // - cmpOp1->gtRequestSetFlags(); -#endif - - // Recost/rethread the tree if necessary - // - if (m_comp->fgNodeThreading != NodeThreading::None) - { - m_comp->gtSetStmtInfo(m_testInfo1.testStmt); - m_comp->fgSetStmtSeq(m_testInfo1.testStmt); - } - - if (!optReturnBlock) - { - // Update edges if m_b1: BBJ_COND and m_b2: BBJ_COND - - FlowEdge* edge1 = m_comp->fgGetPredForBlock(m_b1->bbJumpDest, m_b1); - FlowEdge* edge2; - - if (m_sameTarget) - { - edge2 = m_comp->fgGetPredForBlock(m_b2->bbJumpDest, m_b2); - } - else - { - edge2 = m_comp->fgGetPredForBlock(m_b2->bbNext, m_b2); - - m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1); - - m_b1->bbJumpDest = m_b2->bbJumpDest; - - m_comp->fgAddRefPred(m_b2->bbJumpDest, m_b1); - } - - assert(edge1 != nullptr); - assert(edge2 != nullptr); - - weight_t edgeSumMin = edge1->edgeWeightMin() + edge2->edgeWeightMin(); - weight_t edgeSumMax = edge1->edgeWeightMax() + edge2->edgeWeightMax(); - if ((edgeSumMax >= edge1->edgeWeightMax()) && (edgeSumMax >= edge2->edgeWeightMax())) - { - edge1->setEdgeWeights(edgeSumMin, edgeSumMax, m_b1->bbJumpDest); - } - else - { - edge1->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, m_b1->bbJumpDest); - } - } - - /* Modify the target of the conditional jump and update bbRefs and bbPreds */ - - if (optReturnBlock) - { - m_b1->bbJumpDest = nullptr; - m_b1->bbJumpKind = BBJ_RETURN; -#ifdef DEBUG - m_b1->bbJumpSwt = m_b2->bbJumpSwt; -#endif - assert(m_b2->bbJumpKind == BBJ_RETURN); - assert(m_b1->bbNext == m_b2); - assert(m_b3 != nullptr); - } - else - { - assert(m_b1->bbJumpKind == BBJ_COND); - assert(m_b2->bbJumpKind == BBJ_COND); - assert(m_b1->bbJumpDest == m_b2->bbJumpDest); - assert(m_b1->bbNext == m_b2); - assert(m_b2->bbNext != nullptr); - } - - if (!optReturnBlock) - { - // Update bbRefs and bbPreds - // - // Replace pred 'm_b2' for 'm_b2->bbNext' with 'm_b1' - // Remove pred 'm_b2' for 'm_b2->bbJumpDest' - m_comp->fgReplacePred(m_b2->bbNext, m_b2, m_b1); - m_comp->fgRemoveRefPred(m_b2->bbJumpDest, m_b2); - } - - // Get rid of the second block - - m_comp->fgUnlinkBlock(m_b2); - m_b2->bbFlags |= BBF_REMOVED; - // If m_b2 was the last block of a try or handler, update the EH table. - m_comp->ehUpdateForDeletedBlock(m_b2); - - if (optReturnBlock) - { - // Get rid of the third block - m_comp->fgUnlinkBlock(m_b3); - m_b3->bbFlags |= BBF_REMOVED; - // If m_b3 was the last block of a try or handler, update the EH table. - m_comp->ehUpdateForDeletedBlock(m_b3); - } - - // Update loop table - m_comp->fgUpdateLoopsAfterCompacting(m_b1, m_b2); - if (optReturnBlock) - { - m_comp->fgUpdateLoopsAfterCompacting(m_b1, m_b3); - } - - // Update IL range of first block - m_b1->bbCodeOffsEnd = optReturnBlock ? m_b3->bbCodeOffsEnd : m_b2->bbCodeOffsEnd; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsReturnBlock: Optimize boolean when m_b1 is BBJ_COND and m_b2 and m_b3 are BBJ_RETURN -// -// Arguments: -// b3: Pointer to basic block b3 -// -// Returns: -// true if boolean optimization is done and m_b1, m_b2 and m_b3 are folded into m_b1, else false. -// -// Notes: -// m_b1, m_b2 and m_b3 of OptBoolsDsc are set on entry. -// -// if B1.bbJumpDest == b3, it transforms -// B1 : brtrue(t1, B3) -// B2 : ret(t2) -// B3 : ret(0) -// to -// B1 : ret((!t1) && t2) -// -// For example, (x==0 && y==0) generates: -// B1: GT_JTRUE (BBJ_COND), jumps to B3 -// B2: GT_RETURN (BBJ_RETURN) -// B3: GT_RETURN (BBJ_RETURN), -// and it is folded into -// B1: GT_RETURN (BBJ_RETURN) -// -bool OptBoolsDsc::optOptimizeBoolsReturnBlock(BasicBlock* b3) -{ - assert(m_b1 != nullptr && m_b2 != nullptr); - - // m_b3 is set for cond/return/return case - m_b3 = b3; - - m_sameTarget = false; - Statement* const s1 = optOptimizeBoolsChkBlkCond(); - if (s1 == nullptr) - { - return false; - } - - // Find the branch conditions of m_b1 and m_b2 - - m_c1 = optIsBoolComp(&m_testInfo1); - if (m_c1 == nullptr) - { - return false; - } - - m_c2 = optIsBoolComp(&m_testInfo2); - if (m_c2 == nullptr) - { - return false; - } - - // Find the type and cost conditions of m_testInfo1 and m_testInfo2 - - if (!optOptimizeBoolsChkTypeCostCond()) - { - return false; - } - - // Get the fold operator (m_foldOp, e.g., GT_OR/GT_AND) and - // the comparison operator (m_cmpOp, e.g., GT_EQ/GT_NE/GT_GE/GT_LT) - - var_types foldType = m_c1->TypeGet(); - if (varTypeIsGC(foldType)) - { - foldType = TYP_I_IMPL; - } - m_foldType = foldType; - - m_foldOp = GT_NONE; - m_cmpOp = GT_NONE; - - genTreeOps foldOp; - genTreeOps cmpOp; - - ssize_t it1val = m_testInfo1.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; - ssize_t it2val = m_testInfo2.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; - ssize_t it3val = m_t3->AsOp()->gtOp1->AsIntCon()->gtIconVal; - - if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) && - (it1val == 0 && it2val == 0 && it3val == 0)) - { - // Case: x == 0 && y == 0 - // t1:c1!=0 t2:c2==0 t3:c3==0 - // ==> true if (c1|c2)==0 - foldOp = GT_OR; - cmpOp = GT_EQ; - } - else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) && - (it1val == 0 && it2val == 0 && it3val == 0)) - { - // Case: x == 1 && y ==1 - // t1:c1!=1 t2:c2==1 t3:c3==0 is reversed from optIsBoolComp() to: t1:c1==0 t2:c2!=0 t3:c3==0 - // ==> true if (c1&c2)!=0 - foldOp = GT_AND; - cmpOp = GT_NE; - } - else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) && - (it1val == 0 && it2val == 0 && it3val == 0)) - { - // Case: x >= 0 && y >= 0 - // t1:c1<0 t2:c2>=0 t3:c3==0 - // ==> true if (c1|c2)>=0 - - foldOp = GT_OR; - cmpOp = GT_GE; - } - else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_EQ) && - (it1val == 0 && it2val == 0 && it3val == 1)) - { - // Case: x == 0 || y == 0 - // t1:c1==0 t2:c2==0 t3:c3==1 - // ==> true if (c1&c2)==0 - foldOp = GT_AND; - cmpOp = GT_EQ; - } - else if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_NE) && - (it1val == 0 && it2val == 0 && it3val == 1)) - { - // Case: x == 1 || y == 1 - // t1:c1==1 t2:c2==1 t3:c3==1 is reversed from optIsBoolComp() to: t1:c1!=0 t2:c2!=0 t3:c3==1 - // ==> true if (c1|c2)!=0 - foldOp = GT_OR; - cmpOp = GT_NE; - } - else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_LT) && - (it1val == 0 && it2val == 0 && it3val == 1)) - { - // Case: x < 0 || y < 0 - // t1:c1<0 t2:c2<0 t3:c3==1 - // ==> true if (c1|c2)<0 - - foldOp = GT_OR; - cmpOp = GT_LT; - } - else - { - // Require NOT operation for operand(s). Do Not fold. - return false; - } - - if ((foldOp == GT_AND || (cmpOp == GT_NE && foldOp != GT_OR)) && (!m_testInfo1.isBool || !m_testInfo2.isBool)) - { - // x == 1 && y == 1: Skip cases where x or y is greater than 1, e.g., x=3, y=1 - // x == 0 || y == 0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 - // x == 1 || y == 1: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 - return false; - } - - m_foldOp = foldOp; - m_cmpOp = cmpOp; - - // Now update the trees - - optOptimizeBoolsUpdateTrees(); - -#ifdef DEBUG - if (m_comp->verbose) - { - printf("Folded %sboolean conditions of " FMT_BB ", " FMT_BB " and " FMT_BB " to :\n", - m_c2->OperIsLeaf() ? "" : "non-leaf ", m_b1->bbNum, m_b2->bbNum, m_b3->bbNum); - m_comp->gtDispStmt(s1); - printf("\n"); - } -#endif - - // Return true to continue the bool optimization for the rest of the BB chain - return true; -} - -//----------------------------------------------------------------------------- -// optOptimizeBoolsGcStress: Replace x==null with (x|x)==0 if x is a GC-type. -// This will stress code-gen and the emitter to make sure they support such trees. -// -#ifdef DEBUG - -void OptBoolsDsc::optOptimizeBoolsGcStress() -{ - if (!m_comp->compStressCompile(m_comp->STRESS_OPT_BOOLS_GC, 20)) - { - return; - } - - assert(m_b1->bbJumpKind == BBJ_COND); - Statement* const stmt = m_b1->lastStmt(); - GenTree* const cond = stmt->GetRootNode(); - - assert(cond->gtOper == GT_JTRUE); - - OptTestInfo test; - test.testStmt = stmt; - test.testTree = cond; - - GenTree* comparand = optIsBoolComp(&test); - - if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet())) - { - return; - } - GenTree* relop = test.compTree; - bool isBool = test.isBool; - - if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF)) - { - return; - } - - GenTree* comparandClone = m_comp->gtCloneExpr(comparand); - - noway_assert(relop->AsOp()->gtOp1 == comparand); - genTreeOps oper = m_comp->compStressCompile(m_comp->STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND; - relop->AsOp()->gtOp1 = m_comp->gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone); - - // Comparand type is already checked, and we have const int, there is no harm - // morphing it into a TYP_I_IMPL. - noway_assert(relop->AsOp()->gtOp2->gtOper == GT_CNS_INT); - relop->AsOp()->gtOp2->gtType = TYP_I_IMPL; - - // Recost/rethread the tree if necessary - // - if (m_comp->fgNodeThreading != NodeThreading::None) - { - m_comp->gtSetStmtInfo(test.testStmt); - m_comp->fgSetStmtSeq(test.testStmt); - } -} - -#endif - -//----------------------------------------------------------------------------- -// optIsBoolComp: Function used by folding of boolean conditionals -// -// Arguments: -// pOptTest The test info for the test tree -// -// Return: -// On success, return the first operand (gtOp1) of compTree, else return nullptr. -// -// Notes: -// On entry, testTree is set. -// On success, compTree is set to the compare node (i.e. GT_EQ or GT_NE or GT_LT or GT_GE) of the testTree. -// isBool is set to true if the comparand (i.e., operand 1 of compTree is boolean. Otherwise, false. -// -// Given a GT_JTRUE or GT_RETURN node, this method checks if it is a boolean comparison -// of the form "if (boolVal ==/!=/>=/< 0/1)".This is translated into -// a GT_EQ/GT_NE/GT_GE/GT_LT node with "opr1" being a boolean lclVar and "opr2" the const 0/1. -// -// When isBool == true, if the comparison was against a 1 (i.e true) -// then we morph the tree by reversing the GT_EQ/GT_NE/GT_GE/GT_LT and change the 1 to 0. -// -GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) -{ - pOptTest->isBool = false; - - assert(pOptTest->testTree->gtOper == GT_JTRUE || pOptTest->testTree->gtOper == GT_RETURN); - GenTree* cond = pOptTest->testTree->AsOp()->gtOp1; - - // The condition must be "!= 0" or "== 0" or >=0 or <0 - // we don't optimize unsigned < and >= operations - if (!cond->OperIs(GT_EQ, GT_NE) && (!cond->OperIs(GT_LT, GT_GE) || cond->IsUnsigned())) - { - return nullptr; - } - - // Return the compare node to the caller - - pOptTest->compTree = cond; - - // Get hold of the comparands - - GenTree* opr1 = cond->AsOp()->gtOp1; - GenTree* opr2 = cond->AsOp()->gtOp2; - - if (opr2->gtOper != GT_CNS_INT) - { - return nullptr; - } - - if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1)) - { - return nullptr; - } - - ssize_t ival2 = opr2->AsIntCon()->gtIconVal; - - // Is the value a boolean? - // We can either have a boolean expression (marked GTF_BOOLEAN) or - // a local variable that is marked as being boolean (lvIsBoolean) - - if (opr1->gtFlags & GTF_BOOLEAN) - { - pOptTest->isBool = true; - } - else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1))) - { - pOptTest->isBool = true; - } - else if (opr1->gtOper == GT_LCL_VAR) - { - // is it a boolean local variable? - - unsigned lclNum = opr1->AsLclVarCommon()->GetLclNum(); - noway_assert(lclNum < m_comp->lvaCount); - - if (m_comp->lvaTable[lclNum].lvIsBoolean) - { - pOptTest->isBool = true; - } - } - - // Was our comparison against the constant 1 (i.e. true) - if (ival2 == 1) - { - // If this is a boolean expression tree we can reverse the relop - // and change the true to false. - if (pOptTest->isBool) - { - m_comp->gtReverseCond(cond); - opr2->AsIntCon()->gtIconVal = 0; - } - else - { - return nullptr; - } - } - - return opr1; -} - -//----------------------------------------------------------------------------- -// optOptimizeBools: Folds boolean conditionals for GT_JTRUE/GT_RETURN nodes -// -// Returns: -// suitable phase status -// -// Notes: -// If the operand of GT_JTRUE/GT_RETURN node is GT_EQ/GT_NE/GT_GE/GT_LT of the form -// "if (boolVal ==/!=/>=/< 0/1)", the GT_EQ/GT_NE/GT_GE/GT_LT nodes are translated into a -// GT_EQ/GT_NE/GT_GE/GT_LT node with -// "op1" being a boolean GT_OR/GT_AND lclVar and -// "op2" the const 0/1. -// For example, the folded tree for the below boolean optimization is shown below: -// Case 1: (x == 0 && y ==0) => (x | y) == 0 -// * RETURN int -// \--* EQ int -// +--* OR int -// | +--* LCL_VAR int V00 arg0 -// | \--* LCL_VAR int V01 arg1 -// \--* CNS_INT int 0 -// -// Case 2: (x == null && y == null) ==> (x | y) == 0 -// * RETURN int -// \-- * EQ int -// + -- * OR long -// | +-- * LCL_VAR ref V00 arg0 -// | \-- * LCL_VAR ref V01 arg1 -// \-- * CNS_INT long 0 -// -// Case 3: (x == 0 && y == 0 && z == 0) ==> ((x | y) | z) == 0 -// * RETURN int -// \-- * EQ int -// + -- * OR int -// | +-- * OR int -// | | +-- * LCL_VAR int V00 arg0 -// | | \-- * LCL_VAR int V01 arg1 -// | \-- * LCL_VAR int V02 arg2 -// \-- * CNS_INT int 0 -// -// Case 4: (x == 0 && y == 0 && z == 0 && w == 0) ==> (((x | y) | z) | w) == 0 -// * RETURN int -// \-- * EQ int -// + * OR int -// | +--* OR int -// | | +--* OR int -// | | | +--* LCL_VAR int V00 arg0 -// | | | \--* LCL_VAR int V01 arg1 -// | | \--* LCL_VAR int V02 arg2 -// | \--* LCL_VAR int V03 arg3 -// \--* CNS_INT int 0 -// -// Case 5: (x != 0 && y != 0) => (x | y) != 0 -// * RETURN int -// \--* NE int -// +--* OR int -// | +--* LCL_VAR int V00 arg0 -// | \--* LCL_VAR int V01 arg1 -// \--* CNS_INT int 0 -// -// Case 6: (x >= 0 && y >= 0) => (x | y) >= 0 -// * RETURN int -// \--* GE int -// +--* OR int -// | +--* LCL_VAR int V00 arg0 -// | \--* LCL_VAR int V01 arg1 -// \--* CNS_INT int 0 -// -// Case 7: (x < 0 || y < 0) => (x & y) < 0 -// * RETURN int -// \--* LT int -// +--* AND int -// | +--* LCL_VAR int V00 arg0 -// | \--* LCL_VAR int V01 arg1 -// \--* CNS_INT int 0 -// -// Patterns that are not optimized include (x == 1 && y == 1), (x == 1 || y == 1), -// (x == 0 || y == 0) because currently their comptree is not marked as boolean expression. -// When m_foldOp == GT_AND or m_cmpOp == GT_NE, both compTrees must be boolean expression -// in order to skip below cases when compTree is not boolean expression: -// - x == 1 && y == 1 ==> (x&y)!=0: Skip cases where x or y is greater than 1, e.g., x=3, y=1 -// - x == 1 || y == 1 ==> (x|y)!=0: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 -// - x == 0 || y == 0 ==> (x&y)==0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 -// -PhaseStatus Compiler::optOptimizeBools() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In optOptimizeBools()\n"); - } -#endif - bool change = false; - unsigned numCond = 0; - unsigned numReturn = 0; - unsigned numPasses = 0; - unsigned stress = false; - - do - { - numPasses++; - change = false; - - // Reverse iterate through the blocks. - for (BasicBlock* b1 = fgLastBB; b1 != nullptr; b1 = b1->bbPrev) - { - // We're only interested in conditional jumps here - - if (b1->bbJumpKind != BBJ_COND) - { - continue; - } - - // If there is no next block, we're done - - BasicBlock* b2 = b1->bbNext; - if (b2 == nullptr) - { - break; - } - - // The next block must not be marked as BBF_DONT_REMOVE - if (b2->bbFlags & BBF_DONT_REMOVE) - { - continue; - } - - OptBoolsDsc optBoolsDsc(b1, b2, this); - - // The next block needs to be a condition or return block. - - if (b2->bbJumpKind == BBJ_COND) - { - if ((b1->bbJumpDest != b2->bbJumpDest) && (b1->bbJumpDest != b2->bbNext)) - { - continue; - } - - // When it is conditional jumps - - if (optBoolsDsc.optOptimizeBoolsCondBlock()) - { - change = true; - numCond++; - } -#ifdef TARGET_ARM64 - else if (optBoolsDsc.optOptimizeCompareChainCondBlock()) - { - change = true; - numCond++; - } -#endif - } - else if (b2->bbJumpKind == BBJ_RETURN) - { - // Set b3 to b1 jump destination - BasicBlock* b3 = b1->bbJumpDest; - - // b3 must not be marked as BBF_DONT_REMOVE - - if (b3->bbFlags & BBF_DONT_REMOVE) - { - continue; - } - - // b3 must be RETURN type - - if (b3->bbJumpKind != BBJ_RETURN) - { - continue; - } - - if (optBoolsDsc.optOptimizeBoolsReturnBlock(b3)) - { - change = true; - numReturn++; - } - } - else - { -#ifdef DEBUG - optBoolsDsc.optOptimizeBoolsGcStress(); - stress = true; -#endif - } - } - } while (change); - - JITDUMP("\noptimized %u BBJ_COND cases, %u BBJ_RETURN cases in %u passes\n", numCond, numReturn, numPasses); - - const bool modified = stress || ((numCond + numReturn) > 0); - return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; -} diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index ea5481529ce467..a3e47922c52211 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9052,6 +9052,1407 @@ ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEB return scale; } +//----------------------------------------------------------------------------- +// OptTestInfo: Member of OptBoolsDsc struct used to test if a GT_JTRUE or GT_RETURN node +// is a boolean comparison +// +struct OptTestInfo +{ + Statement* testStmt; // Last statement of the basic block + GenTree* testTree; // The root node of the testStmt (GT_JTRUE or GT_RETURN). + GenTree* compTree; // The compare node (i.e. GT_EQ or GT_NE node) of the testTree + bool isBool; // If the compTree is boolean expression +}; + +//----------------------------------------------------------------------------- +// OptBoolsDsc: Descriptor used for Boolean Optimization +// +class OptBoolsDsc +{ +public: + OptBoolsDsc(BasicBlock* b1, BasicBlock* b2, Compiler* comp) + { + m_b1 = b1; + m_b2 = b2; + m_b3 = nullptr; + m_comp = comp; + } + +private: + BasicBlock* m_b1; // The first basic block with the BBJ_COND conditional jump type + BasicBlock* m_b2; // The next basic block of m_b1. Either BBJ_COND or BBJ_RETURN type + BasicBlock* m_b3; // m_b1->bbJumpDest. Null if m_b2 is not a return block. + + Compiler* m_comp; // The pointer to the Compiler instance + + OptTestInfo m_testInfo1; // The first test info + OptTestInfo m_testInfo2; // The second test info + GenTree* m_t3; // The root node of the first statement of m_b3 + + GenTree* m_c1; // The first operand of m_testInfo1.compTree + GenTree* m_c2; // The first operand of m_testInfo2.compTree + + bool m_sameTarget; // if m_b1 and m_b2 jumps to the same destination + + genTreeOps m_foldOp; // The fold operator (e.g., GT_AND or GT_OR) + var_types m_foldType; // The type of the folded tree + genTreeOps m_cmpOp; // The comparison operator (e.g., GT_EQ or GT_NE) + +public: + bool optOptimizeBoolsCondBlock(); + bool optOptimizeCompareChainCondBlock(); + bool optOptimizeBoolsReturnBlock(BasicBlock* b3); +#ifdef DEBUG + void optOptimizeBoolsGcStress(); +#endif + +private: + Statement* optOptimizeBoolsChkBlkCond(); + GenTree* optIsBoolComp(OptTestInfo* pOptTest); + bool optOptimizeBoolsChkTypeCostCond(); + void optOptimizeBoolsUpdateTrees(); + inline bool FindCompareChain(GenTree* condition, bool* isTestCondition); +}; + +//----------------------------------------------------------------------------- +// optOptimizeBoolsCondBlock: Optimize boolean when bbJumpKind of both m_b1 and m_b2 are BBJ_COND +// +// Returns: +// true if boolean optimization is done and m_b1 and m_b2 are folded into m_b1, else false. +// +// Notes: +// m_b1 and m_b2 are set on entry. +// +// Case 1: if b1.bbJumpDest == b2.bbJumpDest, it transforms +// B1 : brtrue(t1, Bx) +// B2 : brtrue(t2, Bx) +// B3 : +// to +// B1 : brtrue(t1|t2, BX) +// B3 : +// +// For example, (x == 0 && y == 0 && z == 0) generates +// B1: GT_JTRUE (BBJ_COND), jump to B4 +// B2: GT_JTRUE (BBJ_COND), jump to B4 +// B3: GT_RETURN (BBJ_RETURN) +// B4: GT_RETURN (BBJ_RETURN) +// and B1 and B2 are folded into B1: +// B1: GT_JTRUE (BBJ_COND), jump to B4 +// B3: GT_RETURN (BBJ_RETURN) +// B4: GT_RETURN (BBJ_RETURN) +// +// Case 2: if B1.bbJumpDest == B2->bbNext, it transforms +// B1 : brtrue(t1, B3) +// B2 : brtrue(t2, Bx) +// B3 : +// to +// B1 : brtrue((!t1) && t2, Bx) +// B3 : +// +bool OptBoolsDsc::optOptimizeBoolsCondBlock() +{ + assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); + + // Check if m_b1 and m_b2 jump to the same target and get back pointers to m_testInfo1 and t2 tree nodes + + m_t3 = nullptr; + + // Check if m_b1 and m_b2 have the same bbJumpDest + + if (m_b1->bbJumpDest == m_b2->bbJumpDest) + { + // Given the following sequence of blocks : + // B1: brtrue(t1, BX) + // B2: brtrue(t2, BX) + // B3: + // we will try to fold it to : + // B1: brtrue(t1|t2, BX) + // B3: + + m_sameTarget = true; + } + else if (m_b1->bbJumpDest == m_b2->bbNext) + { + // Given the following sequence of blocks : + // B1: brtrue(t1, B3) + // B2: brtrue(t2, BX) + // B3: + // we will try to fold it to : + // B1: brtrue((!t1)&&t2, BX) + // B3: + + m_sameTarget = false; + } + else + { + return false; + } + + Statement* const s1 = optOptimizeBoolsChkBlkCond(); + if (s1 == nullptr) + { + return false; + } + + // Find the branch conditions of m_b1 and m_b2 + + m_c1 = optIsBoolComp(&m_testInfo1); + if (m_c1 == nullptr) + { + return false; + } + + m_c2 = optIsBoolComp(&m_testInfo2); + if (m_c2 == nullptr) + { + return false; + } + + // Find the type and cost conditions of m_testInfo1 and m_testInfo2 + + if (!optOptimizeBoolsChkTypeCostCond()) + { + return false; + } + + // Get the fold operator and the comparison operator + + genTreeOps foldOp; + genTreeOps cmpOp; + var_types foldType = m_c1->TypeGet(); + if (varTypeIsGC(foldType)) + { + foldType = TYP_I_IMPL; + } + + assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE)); + + if (m_sameTarget) + { + // Both conditions must be the same + + if (m_testInfo1.compTree->gtOper != m_testInfo2.compTree->gtOper) + { + return false; + } + + if (m_testInfo1.compTree->gtOper == GT_EQ) + { + // t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0 + // So we will branch to BX if (c1&c2)==0 + + foldOp = GT_AND; + cmpOp = GT_EQ; + } + else if (m_testInfo1.compTree->gtOper == GT_LT) + { + // t1:c1<0 t2:c2<0 ==> Branch to BX if either value < 0 + // So we will branch to BX if (c1|c2)<0 + + foldOp = GT_OR; + cmpOp = GT_LT; + } + else if (m_testInfo1.compTree->gtOper == GT_GE) + { + return false; + } + else + { + // t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0 + // So we will branch to BX if (c1|c2)!=0 + + foldOp = GT_OR; + cmpOp = GT_NE; + } + } + else + { + if (m_testInfo1.compTree->gtOper == m_testInfo2.compTree->gtOper) + { + return false; + } + + if (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) + { + // t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0 + // So we will branch to BX if (c1&c2)!=0 + + foldOp = GT_AND; + cmpOp = GT_NE; + } + else if (m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) + { + // t1:c1<0 t2:c2>=0 ==> Branch to BX if both values >= 0 + // So we will branch to BX if (c1|c2)>=0 + + foldOp = GT_OR; + cmpOp = GT_GE; + } + else if (m_testInfo1.compTree->gtOper == GT_GE) + { + return false; + } + else if (m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) + { + // t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0 + // So we will branch to BX if (c1|c2)==0 + + foldOp = GT_OR; + cmpOp = GT_EQ; + } + else + { + return false; + } + } + + // Anding requires both values to be 0 or 1 + + if ((foldOp == GT_AND) && (!m_testInfo1.isBool || !m_testInfo2.isBool)) + { + return false; + } + + // + // Now update the trees + // + + m_foldOp = foldOp; + m_foldType = foldType; + m_cmpOp = cmpOp; + + optOptimizeBoolsUpdateTrees(); + +#ifdef DEBUG + if (m_comp->verbose) + { + printf("Folded %sboolean conditions of " FMT_BB " and " FMT_BB " to :\n", m_c2->OperIsLeaf() ? "" : "non-leaf ", + m_b1->bbNum, m_b2->bbNum); + m_comp->gtDispStmt(s1); + printf("\n"); + } +#endif + + // Return true to continue the bool optimization for the rest of the BB chain + return true; +} + +//----------------------------------------------------------------------------- +// FindCompareChain: Check if the given condition is a compare chain. +// +// Arguments: +// condition: Condition to check. +// isTestCondition: Returns true if condition is a EQ/NE(AND(...),0) but is not a compare chain. +// +// Returns: +// true if chain optimization is a compare chain. +// +// Assumptions: +// m_b1 and m_b2 are set on entry. +// + +inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondition) +{ + GenTree* condOp1 = condition->gtGetOp1(); + GenTree* condOp2 = condition->gtGetOp2(); + + *isTestCondition = false; + + if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst() && condOp2->AsIntCon()->IconValue() == 0) + { + // Found a test condition. Does it contain a compare chain? + + // Only test that the second operand of AND ends with a compare operation, as this will be + // the condition the new link in the chain will connect with. + // We are allowing for the first operand of the not be a valid chain, as this would require + // a full recursive search through the children. + + if (condOp1->OperIs(GT_AND) && condOp1->gtGetOp2()->OperIsCmpCompare()) + { + return true; + } + + *isTestCondition = true; + } + + return false; +} + +//----------------------------------------------------------------------------- +// optOptimizeCompareChainCondBlock: Create AND chain when when both m_b1 and m_b2 are BBJ_COND. +// +// Returns: +// true if chain optimization is done and m_b1 and m_b2 are folded into m_b1, else false. +// +// Assumptions: +// m_b1 and m_b2 are set on entry. +// +// Notes: +// +// This aims to reduced the number of conditional jumps by joining cases when multiple +// conditions gate the execution of a block. For example: +// If ( a > b || c == d) { x = y; } +// Will become the following. Note that the second condition is inverted. +// +// ------------ BB01 -> BB03 (cond), succs={BB02,BB03} +// * JTRUE +// \--* GT a,b +// +// ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04} +// * JTRUE +// \--* NE c,d +// +// ------------ BB03, preds={BB01, BB02} succs={BB04} +// * ASG x,y +// +// These operands will be combined into a single AND in the first block (with the first +// condition inverted), wrapped by the test condition (NE(...,0)). +// +// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} +// * JTRUE +// \--* NE +// +--* AND +// | +--* LE a,b +// | \--* NE c,d +// \--* CNS_INT 0 +// +// ------------ BB03, preds={BB01} succs={BB04} +// * ASG x,y +// +// +// This will also work for statements with else cases: +// If ( a > b || c == d) { x = y; } else { x = z; } +// Here BB04 will contain the else ASG. Both BB04 and BB05 will unconditionally jump to BB05. +// +// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} +// * JTRUE +// \--* NE +// +--* AND +// | +--* LE a,b +// | \--* NE c,d +// \--* CNS_INT 0 +// +// ------------ BB03, preds={BB01} succs={BB05} +// * ASG x,y +// +// ------------ BB04, preds={BB01} succs={BB05} +// * ASG x,z +// +// +// Multiple conditions can be chained together. This is due to the optimization reverse +// iterating through the blocks. For example: +// If ( a > b || c == d || e < f ) { x = y; } +// The first pass will combine "c == d" and "e < f" into a chain. The second pass will then +// combine the "a > b" with the earlier chain. Where possible, the new condition is placed +// within the test condition (NE(...,0)). +// +// ------------ BB01 -> BB03 (cond), succs={BB03,BB04} +// * JTRUE +// \--* NE +// +--* AND +// | +--* AND +// | | +--* NE c,d +// | | \--* GE e,f +// | \--* LT a,b +// \--* CNS_INT 0 +// +// ------------ BB03, preds={BB01} succs={BB04} +// * ASG x,y +// +// +// Conditions connected by && are not yet checked for. For example: +// If ( a > b && c == d ) { x = y; } +// +bool OptBoolsDsc::optOptimizeCompareChainCondBlock() +{ + assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); + m_t3 = nullptr; + + if (!(m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest)) + { + return false; + } + + Statement* const s1 = optOptimizeBoolsChkBlkCond(); + if (s1 == nullptr) + { + return false; + } + Statement* s2 = m_b2->firstStmt(); + + assert(m_testInfo1.testTree->OperIs(GT_JTRUE)); + GenTree* cond1 = m_testInfo1.testTree->gtGetOp1(); + assert(m_testInfo2.testTree->OperIs(GT_JTRUE)); + GenTree* cond2 = m_testInfo2.testTree->gtGetOp1(); + + // Ensure both conditions are suitable. + if (!cond1->OperIsCompare() || !cond2->OperIsCompare()) + { + return false; + } + + // Ensure there are no additional side effects. + if ((cond1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0 || + (cond2->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0) + { + return false; + } + + // Integer compares only for now (until support for Arm64 fccmp instruction is added) + if (varTypeIsFloating(cond1->gtGetOp1()->TypeGet()) || varTypeIsFloating(cond2->gtGetOp1()->TypeGet())) + { + return false; + } + + // Check for previously optimized compare chains. + bool op1IsTestCond; + bool op2IsTestCond; + bool op1IsCondChain = FindCompareChain(cond1, &op1IsTestCond); + bool op2IsCondChain = FindCompareChain(cond2, &op2IsTestCond); + // Don't support combining multiple chains. Allowing this would give minimal benefit, as + // costing checks would disallow most instances. + if (op1IsCondChain && op2IsCondChain) + { + return false; + } + + // Specifically for Arm64, avoid cases where optimizations in lowering will produce better + // code than optimizing here. Specificially: + // * cmp(and(...), 0) will be turned into a TEST_ opcode. + // * Compares against zero will be optimized with cbz. + if (op1IsTestCond || op2IsTestCond) + { + return false; + } + + // Combining conditions means that all conditions are always fully evaluated. + // Put a limit on the max size that can be combined. + if (!m_comp->compStressCompile(Compiler::STRESS_OPT_BOOLS_COMPARE_CHAIN_COST, 25)) + { + int op1Cost = cond1->GetCostEx(); + int op2Cost = cond2->GetCostEx(); + int maxOp1Cost = op1IsCondChain ? 35 : 7; + int maxOp2Cost = op2IsCondChain ? 35 : 7; + + // Cost to allow for chain size of three. + if (op1Cost > maxOp1Cost || op2Cost > maxOp2Cost) + { + JITDUMP("Skipping CompareChainCond that will evaluate conditions unconditionally at costs %d,%d\n", op1Cost, + op2Cost); + return false; + } + } + + GenTree* testcondition = nullptr; + + // If a previous optimize bools happened for op2, then reuse the test condition. + // Cannot reuse for op1, as the condition needs reversing. + if (op2IsCondChain) + { + testcondition = cond2; + cond2 = cond2->gtGetOp1(); + } + + // Remove the first JTRUE statement. + constexpr bool isUnlink = true; + m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink)); + + // Invert the first condition. + GenTree* revCond = m_comp->gtReverseCond(cond1); + assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node. + + // AND the two conditions together + GenTree* andconds = m_comp->gtNewOperNode(GT_AND, TYP_INT, cond1, cond2); + andconds->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); + andconds->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); + cond1->gtFlags &= ~GTF_RELOP_JMP_USED; + cond2->gtFlags &= ~GTF_RELOP_JMP_USED; + andconds->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + + // Add a test condition onto the front of the AND (or resuse an exisiting one). + if (op2IsCondChain) + { + testcondition->AsOp()->gtOp1 = andconds; + testcondition->AsOp()->gtFlags |= (andconds->gtFlags & GTF_ALL_EFFECT); + } + else + { + testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, andconds, m_comp->gtNewZeroConNode(TYP_INT)); + } + + // Wire the chain into the second block + m_testInfo2.testTree->AsOp()->gtOp1 = testcondition; + m_testInfo2.testTree->AsOp()->gtFlags |= (testcondition->gtFlags & GTF_ALL_EFFECT); + m_comp->gtSetEvalOrder(m_testInfo2.testTree); + m_comp->fgSetStmtSeq(s2); + + // Update the flow. + m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1); + m_b1->bbJumpKind = BBJ_NONE; + + // Fixup flags. + m_b2->bbFlags |= (m_b1->bbFlags & BBF_COPY_PROPAGATE); + + // Join the two blocks. This is done now to ensure that additional conditions can be chained. + if (m_comp->fgCanCompactBlocks(m_b1, m_b2)) + { + m_comp->fgCompactBlocks(m_b1, m_b2); + } + +#ifdef DEBUG + if (m_comp->verbose) + { + printf("\nCombined conditions " FMT_BB " and " FMT_BB " into AND chain :\n", m_b1->bbNum, m_b2->bbNum); + m_comp->fgDumpBlock(m_b1); + printf("\n"); + } +#endif + + return true; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsChkBlkCond: Checks block conditions if it can be boolean optimized +// +// Return: +// If all conditions pass, returns the last statement of m_b1, else return nullptr. +// +// Notes: +// This method checks if the second (and third block for cond/return/return case) contains only one statement, +// and checks if tree operators are of the right type, e.g, GT_JTRUE, GT_RETURN. +// +// On entry, m_b1, m_b2 are set and m_b3 is set for cond/return/return case. +// If it passes all the conditions, m_testInfo1.testTree, m_testInfo2.testTree and m_t3 are set +// to the root nodes of m_b1, m_b2 and m_b3 each. +// SameTarget is also updated to true if m_b1 and m_b2 jump to the same destination. +// +Statement* OptBoolsDsc::optOptimizeBoolsChkBlkCond() +{ + assert(m_b1 != nullptr && m_b2 != nullptr); + + bool optReturnBlock = false; + if (m_b3 != nullptr) + { + optReturnBlock = true; + } + + // Find the block conditions of m_b1 and m_b2 + + if (m_b2->countOfInEdges() > 1 || (optReturnBlock && m_b3->countOfInEdges() > 1)) + { + return nullptr; + } + + // Find the condition for the first block + + Statement* s1 = m_b1->lastStmt(); + + GenTree* testTree1 = s1->GetRootNode(); + assert(testTree1->gtOper == GT_JTRUE); + + // The second and the third block must contain a single statement + + Statement* s2 = m_b2->firstStmt(); + if (s2->GetPrevStmt() != s2) + { + return nullptr; + } + + GenTree* testTree2 = s2->GetRootNode(); + + if (!optReturnBlock) + { + assert(testTree2->gtOper == GT_JTRUE); + } + else + { + if (testTree2->gtOper != GT_RETURN) + { + return nullptr; + } + + Statement* s3 = m_b3->firstStmt(); + if (s3->GetPrevStmt() != s3) + { + return nullptr; + } + + GenTree* testTree3 = s3->GetRootNode(); + if (testTree3->gtOper != GT_RETURN) + { + return nullptr; + } + + if (!varTypeIsIntegral(testTree2->TypeGet()) || !varTypeIsIntegral(testTree3->TypeGet())) + { + return nullptr; + } + + // The third block is Return with "CNS_INT int 0/1" + if (testTree3->AsOp()->gtOp1->gtOper != GT_CNS_INT) + { + return nullptr; + } + + if (testTree3->AsOp()->gtOp1->gtType != TYP_INT) + { + return nullptr; + } + + m_t3 = testTree3; + } + + m_testInfo1.testStmt = s1; + m_testInfo1.testTree = testTree1; + m_testInfo2.testStmt = s2; + m_testInfo2.testTree = testTree2; + + return s1; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsChkTypeCostCond: Checks if type conditions meet the folding condition, and +// if cost to fold is not too expensive +// +// Return: +// True if it meets type conditions and cost conditions. Else false. +// +bool OptBoolsDsc::optOptimizeBoolsChkTypeCostCond() +{ + assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo1.compTree->AsOp()->gtOp1 == m_c1); + assert(m_testInfo2.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo2.compTree->AsOp()->gtOp1 == m_c2); + + // + // Leave out floats where the bit-representation is more complicated + // - there are two representations for 0. + // + if (varTypeIsFloating(m_c1->TypeGet()) || varTypeIsFloating(m_c2->TypeGet())) + { + return false; + } + + // Make sure the types involved are of the same sizes + if (genTypeSize(m_c1->TypeGet()) != genTypeSize(m_c2->TypeGet())) + { + return false; + } + if (genTypeSize(m_testInfo1.compTree->TypeGet()) != genTypeSize(m_testInfo2.compTree->TypeGet())) + { + return false; + } +#ifdef TARGET_ARMARCH + // Skip the small operand which we cannot encode. + if (varTypeIsSmall(m_c1->TypeGet())) + return false; +#endif + // The second condition must not contain side effects + // + if (m_c2->gtFlags & GTF_GLOB_EFFECT) + { + return false; + } + + // The second condition must not be too expensive + // + if (m_c2->GetCostEx() > 12) + { + return false; + } + + return true; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsUpdateTrees: Fold the trees based on fold type and comparison type, +// update the edges, unlink removed blocks and update loop table +// +void OptBoolsDsc::optOptimizeBoolsUpdateTrees() +{ + assert(m_b1 != nullptr && m_b2 != nullptr); + + bool optReturnBlock = false; + if (m_b3 != nullptr) + { + optReturnBlock = true; + } + + assert(m_foldOp != NULL && m_foldType != NULL && m_c1 != nullptr && m_c2 != nullptr); + + GenTree* cmpOp1 = m_comp->gtNewOperNode(m_foldOp, m_foldType, m_c1, m_c2); + if (m_testInfo1.isBool && m_testInfo2.isBool) + { + // When we 'OR'/'AND' two booleans, the result is boolean as well + cmpOp1->gtFlags |= GTF_BOOLEAN; + } + + GenTree* t1Comp = m_testInfo1.compTree; + t1Comp->SetOper(m_cmpOp); + t1Comp->AsOp()->gtOp1 = cmpOp1; + t1Comp->AsOp()->gtOp2->gtType = m_foldType; // Could have been varTypeIsGC() + if (optReturnBlock) + { + // Update tree when m_b1 is BBJ_COND and m_b2 and m_b3 are GT_RETURN (BBJ_RETURN) + t1Comp->AsOp()->gtOp2->AsIntCon()->gtIconVal = 0; + m_testInfo1.testTree->gtOper = GT_RETURN; + m_testInfo1.testTree->gtType = m_testInfo2.testTree->gtType; + + // Update the return count of flow graph + assert(m_comp->fgReturnCount >= 2); + --m_comp->fgReturnCount; + } + +#if FEATURE_SET_FLAGS + // For comparisons against zero we will have the GTF_SET_FLAGS set + // and this can cause an assert to fire in fgMoveOpsLeft(GenTree* tree) + // during the CSE phase. + // + // So make sure to clear any GTF_SET_FLAGS bit on these operations + // as they are no longer feeding directly into a comparisons against zero + + // Make sure that the GTF_SET_FLAGS bit is cleared. + // Fix 388436 ARM JitStress WP7 + m_c1->gtFlags &= ~GTF_SET_FLAGS; + m_c2->gtFlags &= ~GTF_SET_FLAGS; + + // The new top level node that we just created does feed directly into + // a comparison against zero, so set the GTF_SET_FLAGS bit so that + // we generate an instruction that sets the flags, which allows us + // to omit the cmp with zero instruction. + + // Request that the codegen for cmpOp1 sets the condition flags + // when it generates the code for cmpOp1. + // + cmpOp1->gtRequestSetFlags(); +#endif + + // Recost/rethread the tree if necessary + // + if (m_comp->fgNodeThreading != NodeThreading::None) + { + m_comp->gtSetStmtInfo(m_testInfo1.testStmt); + m_comp->fgSetStmtSeq(m_testInfo1.testStmt); + } + + if (!optReturnBlock) + { + // Update edges if m_b1: BBJ_COND and m_b2: BBJ_COND + + FlowEdge* edge1 = m_comp->fgGetPredForBlock(m_b1->bbJumpDest, m_b1); + FlowEdge* edge2; + + if (m_sameTarget) + { + edge2 = m_comp->fgGetPredForBlock(m_b2->bbJumpDest, m_b2); + } + else + { + edge2 = m_comp->fgGetPredForBlock(m_b2->bbNext, m_b2); + + m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1); + + m_b1->bbJumpDest = m_b2->bbJumpDest; + + m_comp->fgAddRefPred(m_b2->bbJumpDest, m_b1); + } + + assert(edge1 != nullptr); + assert(edge2 != nullptr); + + weight_t edgeSumMin = edge1->edgeWeightMin() + edge2->edgeWeightMin(); + weight_t edgeSumMax = edge1->edgeWeightMax() + edge2->edgeWeightMax(); + if ((edgeSumMax >= edge1->edgeWeightMax()) && (edgeSumMax >= edge2->edgeWeightMax())) + { + edge1->setEdgeWeights(edgeSumMin, edgeSumMax, m_b1->bbJumpDest); + } + else + { + edge1->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, m_b1->bbJumpDest); + } + } + + /* Modify the target of the conditional jump and update bbRefs and bbPreds */ + + if (optReturnBlock) + { + m_b1->bbJumpDest = nullptr; + m_b1->bbJumpKind = BBJ_RETURN; +#ifdef DEBUG + m_b1->bbJumpSwt = m_b2->bbJumpSwt; +#endif + assert(m_b2->bbJumpKind == BBJ_RETURN); + assert(m_b1->bbNext == m_b2); + assert(m_b3 != nullptr); + } + else + { + assert(m_b1->bbJumpKind == BBJ_COND); + assert(m_b2->bbJumpKind == BBJ_COND); + assert(m_b1->bbJumpDest == m_b2->bbJumpDest); + assert(m_b1->bbNext == m_b2); + assert(m_b2->bbNext != nullptr); + } + + if (!optReturnBlock) + { + // Update bbRefs and bbPreds + // + // Replace pred 'm_b2' for 'm_b2->bbNext' with 'm_b1' + // Remove pred 'm_b2' for 'm_b2->bbJumpDest' + m_comp->fgReplacePred(m_b2->bbNext, m_b2, m_b1); + m_comp->fgRemoveRefPred(m_b2->bbJumpDest, m_b2); + } + + // Get rid of the second block + + m_comp->fgUnlinkBlock(m_b2); + m_b2->bbFlags |= BBF_REMOVED; + // If m_b2 was the last block of a try or handler, update the EH table. + m_comp->ehUpdateForDeletedBlock(m_b2); + + if (optReturnBlock) + { + // Get rid of the third block + m_comp->fgUnlinkBlock(m_b3); + m_b3->bbFlags |= BBF_REMOVED; + // If m_b3 was the last block of a try or handler, update the EH table. + m_comp->ehUpdateForDeletedBlock(m_b3); + } + + // Update loop table + m_comp->fgUpdateLoopsAfterCompacting(m_b1, m_b2); + if (optReturnBlock) + { + m_comp->fgUpdateLoopsAfterCompacting(m_b1, m_b3); + } + + // Update IL range of first block + m_b1->bbCodeOffsEnd = optReturnBlock ? m_b3->bbCodeOffsEnd : m_b2->bbCodeOffsEnd; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsReturnBlock: Optimize boolean when m_b1 is BBJ_COND and m_b2 and m_b3 are BBJ_RETURN +// +// Arguments: +// b3: Pointer to basic block b3 +// +// Returns: +// true if boolean optimization is done and m_b1, m_b2 and m_b3 are folded into m_b1, else false. +// +// Notes: +// m_b1, m_b2 and m_b3 of OptBoolsDsc are set on entry. +// +// if B1.bbJumpDest == b3, it transforms +// B1 : brtrue(t1, B3) +// B2 : ret(t2) +// B3 : ret(0) +// to +// B1 : ret((!t1) && t2) +// +// For example, (x==0 && y==0) generates: +// B1: GT_JTRUE (BBJ_COND), jumps to B3 +// B2: GT_RETURN (BBJ_RETURN) +// B3: GT_RETURN (BBJ_RETURN), +// and it is folded into +// B1: GT_RETURN (BBJ_RETURN) +// +bool OptBoolsDsc::optOptimizeBoolsReturnBlock(BasicBlock* b3) +{ + assert(m_b1 != nullptr && m_b2 != nullptr); + + // m_b3 is set for cond/return/return case + m_b3 = b3; + + m_sameTarget = false; + Statement* const s1 = optOptimizeBoolsChkBlkCond(); + if (s1 == nullptr) + { + return false; + } + + // Find the branch conditions of m_b1 and m_b2 + + m_c1 = optIsBoolComp(&m_testInfo1); + if (m_c1 == nullptr) + { + return false; + } + + m_c2 = optIsBoolComp(&m_testInfo2); + if (m_c2 == nullptr) + { + return false; + } + + // Find the type and cost conditions of m_testInfo1 and m_testInfo2 + + if (!optOptimizeBoolsChkTypeCostCond()) + { + return false; + } + + // Get the fold operator (m_foldOp, e.g., GT_OR/GT_AND) and + // the comparison operator (m_cmpOp, e.g., GT_EQ/GT_NE/GT_GE/GT_LT) + + var_types foldType = m_c1->TypeGet(); + if (varTypeIsGC(foldType)) + { + foldType = TYP_I_IMPL; + } + m_foldType = foldType; + + m_foldOp = GT_NONE; + m_cmpOp = GT_NONE; + + genTreeOps foldOp; + genTreeOps cmpOp; + + ssize_t it1val = m_testInfo1.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; + ssize_t it2val = m_testInfo2.compTree->AsOp()->gtOp2->AsIntCon()->gtIconVal; + ssize_t it3val = m_t3->AsOp()->gtOp1->AsIntCon()->gtIconVal; + + if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ) && + (it1val == 0 && it2val == 0 && it3val == 0)) + { + // Case: x == 0 && y == 0 + // t1:c1!=0 t2:c2==0 t3:c3==0 + // ==> true if (c1|c2)==0 + foldOp = GT_OR; + cmpOp = GT_EQ; + } + else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE) && + (it1val == 0 && it2val == 0 && it3val == 0)) + { + // Case: x == 1 && y ==1 + // t1:c1!=1 t2:c2==1 t3:c3==0 is reversed from optIsBoolComp() to: t1:c1==0 t2:c2!=0 t3:c3==0 + // ==> true if (c1&c2)!=0 + foldOp = GT_AND; + cmpOp = GT_NE; + } + else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) && + (it1val == 0 && it2val == 0 && it3val == 0)) + { + // Case: x >= 0 && y >= 0 + // t1:c1<0 t2:c2>=0 t3:c3==0 + // ==> true if (c1|c2)>=0 + + foldOp = GT_OR; + cmpOp = GT_GE; + } + else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_EQ) && + (it1val == 0 && it2val == 0 && it3val == 1)) + { + // Case: x == 0 || y == 0 + // t1:c1==0 t2:c2==0 t3:c3==1 + // ==> true if (c1&c2)==0 + foldOp = GT_AND; + cmpOp = GT_EQ; + } + else if ((m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_NE) && + (it1val == 0 && it2val == 0 && it3val == 1)) + { + // Case: x == 1 || y == 1 + // t1:c1==1 t2:c2==1 t3:c3==1 is reversed from optIsBoolComp() to: t1:c1!=0 t2:c2!=0 t3:c3==1 + // ==> true if (c1|c2)!=0 + foldOp = GT_OR; + cmpOp = GT_NE; + } + else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_LT) && + (it1val == 0 && it2val == 0 && it3val == 1)) + { + // Case: x < 0 || y < 0 + // t1:c1<0 t2:c2<0 t3:c3==1 + // ==> true if (c1|c2)<0 + + foldOp = GT_OR; + cmpOp = GT_LT; + } + else + { + // Require NOT operation for operand(s). Do Not fold. + return false; + } + + if ((foldOp == GT_AND || (cmpOp == GT_NE && foldOp != GT_OR)) && (!m_testInfo1.isBool || !m_testInfo2.isBool)) + { + // x == 1 && y == 1: Skip cases where x or y is greater than 1, e.g., x=3, y=1 + // x == 0 || y == 0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 + // x == 1 || y == 1: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 + return false; + } + + m_foldOp = foldOp; + m_cmpOp = cmpOp; + + // Now update the trees + + optOptimizeBoolsUpdateTrees(); + +#ifdef DEBUG + if (m_comp->verbose) + { + printf("Folded %sboolean conditions of " FMT_BB ", " FMT_BB " and " FMT_BB " to :\n", + m_c2->OperIsLeaf() ? "" : "non-leaf ", m_b1->bbNum, m_b2->bbNum, m_b3->bbNum); + m_comp->gtDispStmt(s1); + printf("\n"); + } +#endif + + // Return true to continue the bool optimization for the rest of the BB chain + return true; +} + +//----------------------------------------------------------------------------- +// optOptimizeBoolsGcStress: Replace x==null with (x|x)==0 if x is a GC-type. +// This will stress code-gen and the emitter to make sure they support such trees. +// +#ifdef DEBUG + +void OptBoolsDsc::optOptimizeBoolsGcStress() +{ + if (!m_comp->compStressCompile(m_comp->STRESS_OPT_BOOLS_GC, 20)) + { + return; + } + + assert(m_b1->bbJumpKind == BBJ_COND); + Statement* const stmt = m_b1->lastStmt(); + GenTree* const cond = stmt->GetRootNode(); + + assert(cond->gtOper == GT_JTRUE); + + OptTestInfo test; + test.testStmt = stmt; + test.testTree = cond; + + GenTree* comparand = optIsBoolComp(&test); + + if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet())) + { + return; + } + GenTree* relop = test.compTree; + bool isBool = test.isBool; + + if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF)) + { + return; + } + + GenTree* comparandClone = m_comp->gtCloneExpr(comparand); + + noway_assert(relop->AsOp()->gtOp1 == comparand); + genTreeOps oper = m_comp->compStressCompile(m_comp->STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND; + relop->AsOp()->gtOp1 = m_comp->gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone); + + // Comparand type is already checked, and we have const int, there is no harm + // morphing it into a TYP_I_IMPL. + noway_assert(relop->AsOp()->gtOp2->gtOper == GT_CNS_INT); + relop->AsOp()->gtOp2->gtType = TYP_I_IMPL; + + // Recost/rethread the tree if necessary + // + if (m_comp->fgNodeThreading != NodeThreading::None) + { + m_comp->gtSetStmtInfo(test.testStmt); + m_comp->fgSetStmtSeq(test.testStmt); + } +} + +#endif + +//----------------------------------------------------------------------------- +// optIsBoolComp: Function used by folding of boolean conditionals +// +// Arguments: +// pOptTest The test info for the test tree +// +// Return: +// On success, return the first operand (gtOp1) of compTree, else return nullptr. +// +// Notes: +// On entry, testTree is set. +// On success, compTree is set to the compare node (i.e. GT_EQ or GT_NE or GT_LT or GT_GE) of the testTree. +// isBool is set to true if the comparand (i.e., operand 1 of compTree is boolean. Otherwise, false. +// +// Given a GT_JTRUE or GT_RETURN node, this method checks if it is a boolean comparison +// of the form "if (boolVal ==/!=/>=/< 0/1)".This is translated into +// a GT_EQ/GT_NE/GT_GE/GT_LT node with "opr1" being a boolean lclVar and "opr2" the const 0/1. +// +// When isBool == true, if the comparison was against a 1 (i.e true) +// then we morph the tree by reversing the GT_EQ/GT_NE/GT_GE/GT_LT and change the 1 to 0. +// +GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) +{ + pOptTest->isBool = false; + + assert(pOptTest->testTree->gtOper == GT_JTRUE || pOptTest->testTree->gtOper == GT_RETURN); + GenTree* cond = pOptTest->testTree->AsOp()->gtOp1; + + // The condition must be "!= 0" or "== 0" or >=0 or <0 + // we don't optimize unsigned < and >= operations + if (!cond->OperIs(GT_EQ, GT_NE) && (!cond->OperIs(GT_LT, GT_GE) || cond->IsUnsigned())) + { + return nullptr; + } + + // Return the compare node to the caller + + pOptTest->compTree = cond; + + // Get hold of the comparands + + GenTree* opr1 = cond->AsOp()->gtOp1; + GenTree* opr2 = cond->AsOp()->gtOp2; + + if (opr2->gtOper != GT_CNS_INT) + { + return nullptr; + } + + if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1)) + { + return nullptr; + } + + ssize_t ival2 = opr2->AsIntCon()->gtIconVal; + + // Is the value a boolean? + // We can either have a boolean expression (marked GTF_BOOLEAN) or + // a local variable that is marked as being boolean (lvIsBoolean) + + if (opr1->gtFlags & GTF_BOOLEAN) + { + pOptTest->isBool = true; + } + else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1))) + { + pOptTest->isBool = true; + } + else if (opr1->gtOper == GT_LCL_VAR) + { + // is it a boolean local variable? + + unsigned lclNum = opr1->AsLclVarCommon()->GetLclNum(); + noway_assert(lclNum < m_comp->lvaCount); + + if (m_comp->lvaTable[lclNum].lvIsBoolean) + { + pOptTest->isBool = true; + } + } + + // Was our comparison against the constant 1 (i.e. true) + if (ival2 == 1) + { + // If this is a boolean expression tree we can reverse the relop + // and change the true to false. + if (pOptTest->isBool) + { + m_comp->gtReverseCond(cond); + opr2->AsIntCon()->gtIconVal = 0; + } + else + { + return nullptr; + } + } + + return opr1; +} + +//----------------------------------------------------------------------------- +// optOptimizeBools: Folds boolean conditionals for GT_JTRUE/GT_RETURN nodes +// +// Returns: +// suitable phase status +// +// Notes: +// If the operand of GT_JTRUE/GT_RETURN node is GT_EQ/GT_NE/GT_GE/GT_LT of the form +// "if (boolVal ==/!=/>=/< 0/1)", the GT_EQ/GT_NE/GT_GE/GT_LT nodes are translated into a +// GT_EQ/GT_NE/GT_GE/GT_LT node with +// "op1" being a boolean GT_OR/GT_AND lclVar and +// "op2" the const 0/1. +// For example, the folded tree for the below boolean optimization is shown below: +// Case 1: (x == 0 && y ==0) => (x | y) == 0 +// * RETURN int +// \--* EQ int +// +--* OR int +// | +--* LCL_VAR int V00 arg0 +// | \--* LCL_VAR int V01 arg1 +// \--* CNS_INT int 0 +// +// Case 2: (x == null && y == null) ==> (x | y) == 0 +// * RETURN int +// \-- * EQ int +// + -- * OR long +// | +-- * LCL_VAR ref V00 arg0 +// | \-- * LCL_VAR ref V01 arg1 +// \-- * CNS_INT long 0 +// +// Case 3: (x == 0 && y == 0 && z == 0) ==> ((x | y) | z) == 0 +// * RETURN int +// \-- * EQ int +// + -- * OR int +// | +-- * OR int +// | | +-- * LCL_VAR int V00 arg0 +// | | \-- * LCL_VAR int V01 arg1 +// | \-- * LCL_VAR int V02 arg2 +// \-- * CNS_INT int 0 +// +// Case 4: (x == 0 && y == 0 && z == 0 && w == 0) ==> (((x | y) | z) | w) == 0 +// * RETURN int +// \-- * EQ int +// + * OR int +// | +--* OR int +// | | +--* OR int +// | | | +--* LCL_VAR int V00 arg0 +// | | | \--* LCL_VAR int V01 arg1 +// | | \--* LCL_VAR int V02 arg2 +// | \--* LCL_VAR int V03 arg3 +// \--* CNS_INT int 0 +// +// Case 5: (x != 0 && y != 0) => (x | y) != 0 +// * RETURN int +// \--* NE int +// +--* OR int +// | +--* LCL_VAR int V00 arg0 +// | \--* LCL_VAR int V01 arg1 +// \--* CNS_INT int 0 +// +// Case 6: (x >= 0 && y >= 0) => (x | y) >= 0 +// * RETURN int +// \--* GE int +// +--* OR int +// | +--* LCL_VAR int V00 arg0 +// | \--* LCL_VAR int V01 arg1 +// \--* CNS_INT int 0 +// +// Case 7: (x < 0 || y < 0) => (x & y) < 0 +// * RETURN int +// \--* LT int +// +--* AND int +// | +--* LCL_VAR int V00 arg0 +// | \--* LCL_VAR int V01 arg1 +// \--* CNS_INT int 0 +// +// Patterns that are not optimized include (x == 1 && y == 1), (x == 1 || y == 1), +// (x == 0 || y == 0) because currently their comptree is not marked as boolean expression. +// When m_foldOp == GT_AND or m_cmpOp == GT_NE, both compTrees must be boolean expression +// in order to skip below cases when compTree is not boolean expression: +// - x == 1 && y == 1 ==> (x&y)!=0: Skip cases where x or y is greater than 1, e.g., x=3, y=1 +// - x == 1 || y == 1 ==> (x|y)!=0: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 +// - x == 0 || y == 0 ==> (x&y)==0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 +// +PhaseStatus Compiler::optOptimizeBools() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In optOptimizeBools()\n"); + } +#endif + bool change = false; + unsigned numCond = 0; + unsigned numReturn = 0; + unsigned numPasses = 0; + unsigned stress = false; + + do + { + numPasses++; + change = false; + + // Reverse iterate through the blocks. + for (BasicBlock* b1 = fgLastBB; b1 != nullptr; b1 = b1->bbPrev) + { + // We're only interested in conditional jumps here + + if (b1->bbJumpKind != BBJ_COND) + { + continue; + } + + // If there is no next block, we're done + + BasicBlock* b2 = b1->bbNext; + if (b2 == nullptr) + { + break; + } + + // The next block must not be marked as BBF_DONT_REMOVE + if (b2->bbFlags & BBF_DONT_REMOVE) + { + continue; + } + + OptBoolsDsc optBoolsDsc(b1, b2, this); + + // The next block needs to be a condition or return block. + + if (b2->bbJumpKind == BBJ_COND) + { + if ((b1->bbJumpDest != b2->bbJumpDest) && (b1->bbJumpDest != b2->bbNext)) + { + continue; + } + + // When it is conditional jumps + + if (optBoolsDsc.optOptimizeBoolsCondBlock()) + { + change = true; + numCond++; + } +#ifdef TARGET_ARM64 + else if (optBoolsDsc.optOptimizeCompareChainCondBlock()) + { + change = true; + numCond++; + } +#endif + } + else if (b2->bbJumpKind == BBJ_RETURN) + { + // Set b3 to b1 jump destination + BasicBlock* b3 = b1->bbJumpDest; + + // b3 must not be marked as BBF_DONT_REMOVE + + if (b3->bbFlags & BBF_DONT_REMOVE) + { + continue; + } + + // b3 must be RETURN type + + if (b3->bbJumpKind != BBJ_RETURN) + { + continue; + } + + if (optBoolsDsc.optOptimizeBoolsReturnBlock(b3)) + { + change = true; + numReturn++; + } + } + else + { +#ifdef DEBUG + optBoolsDsc.optOptimizeBoolsGcStress(); + stress = true; +#endif + } + } + } while (change); + + JITDUMP("\noptimized %u BBJ_COND cases, %u BBJ_RETURN cases in %u passes\n", numCond, numReturn, numPasses); + + const bool modified = stress || ((numCond + numReturn) > 0); + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; +} + typedef JitHashTable, unsigned> LclVarRefCounts; //------------------------------------------------------------------------------------------ From 13508495bfc92f93f7fc3f6caf5119a6af51b16b Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 13 Mar 2023 15:09:16 +0000 Subject: [PATCH 20/31] Forward iterate through the blocks --- src/coreclr/jit/optimizer.cpp | 61 +++++++++++++++++------------------ 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index a3e47922c52211..ad0ba9ee08004e 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9367,7 +9367,7 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit // We are allowing for the first operand of the not be a valid chain, as this would require // a full recursive search through the children. - if (condOp1->OperIs(GT_AND) && condOp1->gtGetOp2()->OperIsCmpCompare()) + if (condOp1->OperIs(GT_AND, GT_OR) && condOp1->gtGetOp2()->OperIsCmpCompare()) { return true; } @@ -9468,7 +9468,17 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); m_t3 = nullptr; - if (!(m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest)) + bool foundStartOfIfConditions = false; + bool foundEndOfIfConditions = false; + if (m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbJumpDest) + { + foundStartOfIfConditions = true; + } + else if (m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest) + { + foundEndOfIfConditions = true; + } + else { return false; } @@ -9545,40 +9555,28 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() GenTree* testcondition = nullptr; - // If a previous optimize bools happened for op2, then reuse the test condition. - // Cannot reuse for op1, as the condition needs reversing. - if (op2IsCondChain) - { - testcondition = cond2; - cond2 = cond2->gtGetOp1(); - } - // Remove the first JTRUE statement. constexpr bool isUnlink = true; m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink)); - // Invert the first condition. - GenTree* revCond = m_comp->gtReverseCond(cond1); - assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node. + // Invert the condition. + if (foundEndOfIfConditions) + { + GenTree* revCond = m_comp->gtReverseCond(cond1); + assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node. + } - // AND the two conditions together - GenTree* andconds = m_comp->gtNewOperNode(GT_AND, TYP_INT, cond1, cond2); - andconds->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); - andconds->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); + // Join the two conditions together + genTreeOps chainedOper = foundStartOfIfConditions? GT_OR : GT_AND; + GenTree* chainedConditions = m_comp->gtNewOperNode(chainedOper, TYP_INT, cond1, cond2); + chainedConditions->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); + chainedConditions->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); cond1->gtFlags &= ~GTF_RELOP_JMP_USED; cond2->gtFlags &= ~GTF_RELOP_JMP_USED; - andconds->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + chainedConditions->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); - // Add a test condition onto the front of the AND (or resuse an exisiting one). - if (op2IsCondChain) - { - testcondition->AsOp()->gtOp1 = andconds; - testcondition->AsOp()->gtFlags |= (andconds->gtFlags & GTF_ALL_EFFECT); - } - else - { - testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, andconds, m_comp->gtNewZeroConNode(TYP_INT)); - } + // Add a test condition onto the front of the chain + testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, chainedConditions, m_comp->gtNewZeroConNode(TYP_INT)); // Wire the chain into the second block m_testInfo2.testTree->AsOp()->gtOp1 = testcondition; @@ -9602,9 +9600,9 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() #ifdef DEBUG if (m_comp->verbose) { - printf("\nCombined conditions " FMT_BB " and " FMT_BB " into AND chain :\n", m_b1->bbNum, m_b2->bbNum); + JITDUMP("\nCombined conditions " FMT_BB " and " FMT_BB " into AND chain :\n", m_b1->bbNum, m_b2->bbNum); m_comp->fgDumpBlock(m_b1); - printf("\n"); + JITDUMP("\n"); } #endif @@ -10362,8 +10360,7 @@ PhaseStatus Compiler::optOptimizeBools() numPasses++; change = false; - // Reverse iterate through the blocks. - for (BasicBlock* b1 = fgLastBB; b1 != nullptr; b1 = b1->bbPrev) + for (BasicBlock* const b1 : Blocks()) { // We're only interested in conditional jumps here From f0567655edd665d0a99c96258b93f6cccaf3a156 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 13 Mar 2023 16:31:00 +0000 Subject: [PATCH 21/31] Fixup comment block --- src/coreclr/jit/optimizer.cpp | 82 ++++++++++++++--------------------- 1 file changed, 32 insertions(+), 50 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index ad0ba9ee08004e..54a210c9203fe4 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9379,7 +9379,7 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit } //----------------------------------------------------------------------------- -// optOptimizeCompareChainCondBlock: Create AND chain when when both m_b1 and m_b2 are BBJ_COND. +// optOptimizeCompareChainCondBlock: Create a chain when when both m_b1 and m_b2 are BBJ_COND. // // Returns: // true if chain optimization is done and m_b1 and m_b2 are folded into m_b1, else false. @@ -9390,47 +9390,39 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit // Notes: // // This aims to reduced the number of conditional jumps by joining cases when multiple -// conditions gate the execution of a block. For example: +// conditions gate the execution of a block. +// +// Example 1: // If ( a > b || c == d) { x = y; } -// Will become the following. Note that the second condition is inverted. +// +// Will be represented in IR as: // // ------------ BB01 -> BB03 (cond), succs={BB02,BB03} -// * JTRUE -// \--* GT a,b +// * JTRUE (GT a,b) // // ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04} -// * JTRUE -// \--* NE c,d +// * JTRUE (NE c,d) // // ------------ BB03, preds={BB01, BB02} succs={BB04} -// * ASG x,y +// * ASG (x,y) // // These operands will be combined into a single AND in the first block (with the first -// condition inverted), wrapped by the test condition (NE(...,0)). +// condition inverted), wrapped by the test condition (NE(...,0)). Giving: // // ------------ BB01 -> BB03 (cond), succs={BB03,BB04} -// * JTRUE -// \--* NE -// +--* AND -// | +--* LE a,b -// | \--* NE c,d -// \--* CNS_INT 0 +// * JTRUE (NE (AND (LE a,b), (NE c,d)), 0) // // ------------ BB03, preds={BB01} succs={BB04} // * ASG x,y // // -// This will also work for statements with else cases: -// If ( a > b || c == d) { x = y; } else { x = z; } -// Here BB04 will contain the else ASG. Both BB04 and BB05 will unconditionally jump to BB05. +// Example 2: +// If ( a > b && c == d) { x = y; } else { x = z; } +// +// Here the && conditions are connected via an OR. After the pass: // // ------------ BB01 -> BB03 (cond), succs={BB03,BB04} -// * JTRUE -// \--* NE -// +--* AND -// | +--* LE a,b -// | \--* NE c,d -// \--* CNS_INT 0 +// * JTRUE (NE (OR (LE a,b), (NE c,d)), 0) // // ------------ BB03, preds={BB01} succs={BB05} // * ASG x,y @@ -9439,46 +9431,36 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit // * ASG x,z // // -// Multiple conditions can be chained together. This is due to the optimization reverse -// iterating through the blocks. For example: +// Example 3: // If ( a > b || c == d || e < f ) { x = y; } -// The first pass will combine "c == d" and "e < f" into a chain. The second pass will then -// combine the "a > b" with the earlier chain. Where possible, the new condition is placed -// within the test condition (NE(...,0)). +// The first pass of the optimization will combine two of the conditions. The +// second pass will then combine remaining condition the earlier chain. // // ------------ BB01 -> BB03 (cond), succs={BB03,BB04} -// * JTRUE -// \--* NE -// +--* AND -// | +--* AND -// | | +--* NE c,d -// | | \--* GE e,f -// | \--* LT a,b -// \--* CNS_INT 0 +// * JTRUE (NE (OR ((NE (OR (NE c,d), (GE e,f)), 0), (LE a,b))), 0) // // ------------ BB03, preds={BB01} succs={BB04} // * ASG x,y // // -// Conditions connected by && are not yet checked for. For example: -// If ( a > b && c == d ) { x = y; } +// This optimization means that every condition within the IF statement is always evaluated, +// as opposed to stopping at the first positive match. +// Theoretically there is no maximum limit on the size of the generated chain. Therefore cost +// checking is used to limit the maximum number of conditions that can be chained together. // bool OptBoolsDsc::optOptimizeCompareChainCondBlock() { assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); m_t3 = nullptr; - bool foundStartOfIfConditions = false; - bool foundEndOfIfConditions = false; - if (m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbJumpDest) + bool foundEndOfOrConditions = false; + if (m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest) { - foundStartOfIfConditions = true; + // Found the end of two (or more) conditions being ORed together. + // The final condition has been inverted. + foundEndOfOrConditions = true; } - else if (m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest) - { - foundEndOfIfConditions = true; - } - else + else if (!(m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbJumpDest)) { return false; } @@ -9560,14 +9542,14 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink)); // Invert the condition. - if (foundEndOfIfConditions) + if (foundEndOfOrConditions) { GenTree* revCond = m_comp->gtReverseCond(cond1); assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node. } // Join the two conditions together - genTreeOps chainedOper = foundStartOfIfConditions? GT_OR : GT_AND; + genTreeOps chainedOper = foundEndOfOrConditions? GT_AND : GT_OR; GenTree* chainedConditions = m_comp->gtNewOperNode(chainedOper, TYP_INT, cond1, cond2); chainedConditions->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); chainedConditions->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); From a690be659f88268d1d69f0b13e6988eac5afa09e Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 13 Mar 2023 16:47:09 +0000 Subject: [PATCH 22/31] Improve scanning for existing chains --- src/coreclr/jit/optimizer.cpp | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 54a210c9203fe4..3f63bfaa889326 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9362,14 +9362,21 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit { // Found a test condition. Does it contain a compare chain? - // Only test that the second operand of AND ends with a compare operation, as this will be - // the condition the new link in the chain will connect with. - // We are allowing for the first operand of the not be a valid chain, as this would require - // a full recursive search through the children. - - if (condOp1->OperIs(GT_AND, GT_OR) && condOp1->gtGetOp2()->OperIsCmpCompare()) + if (condOp1->OperIs(GT_AND, GT_OR) && varTypeIsIntegralOrI(condOp1->gtGetOp1()) + && varTypeIsIntegralOrI(condOp1->gtGetOp2())) { - return true; + // Check that the second operand of AND ends with a compare operation, as this will be + // the condition the new link in the chain will connect with. + if (condOp1->gtGetOp2()->OperIsCmpCompare()) + { + return true; + } + if (condOp1->gtGetOp2()->OperIsCmpCompare()) + { + // Recursive check the inner condition. + bool innerTestCondition; + return FindCompareChain(condOp1->gtGetOp2(), &innerTestCondition); + } } *isTestCondition = true; @@ -9501,16 +9508,10 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() bool op2IsTestCond; bool op1IsCondChain = FindCompareChain(cond1, &op1IsTestCond); bool op2IsCondChain = FindCompareChain(cond2, &op2IsTestCond); - // Don't support combining multiple chains. Allowing this would give minimal benefit, as - // costing checks would disallow most instances. - if (op1IsCondChain && op2IsCondChain) - { - return false; - } // Specifically for Arm64, avoid cases where optimizations in lowering will produce better // code than optimizing here. Specificially: - // * cmp(and(...), 0) will be turned into a TEST_ opcode. + // * CMP(AND(...), 0) will be turned into a TEST_ opcode. // * Compares against zero will be optimized with cbz. if (op1IsTestCond || op2IsTestCond) { @@ -9582,7 +9583,8 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() #ifdef DEBUG if (m_comp->verbose) { - JITDUMP("\nCombined conditions " FMT_BB " and " FMT_BB " into AND chain :\n", m_b1->bbNum, m_b2->bbNum); + JITDUMP("\nCombined conditions " FMT_BB " and " FMT_BB " into %s chain :\n", m_b1->bbNum, m_b2->bbNum, + GenTree::OpName(chainedOper)); m_comp->fgDumpBlock(m_b1); JITDUMP("\n"); } From 02bbafee4f107d14e455078620d2ad416ab13ca9 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 13 Mar 2023 17:27:52 +0000 Subject: [PATCH 23/31] Fix formatting --- src/coreclr/jit/optimizer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 3f63bfaa889326..ba274b2eeca135 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9362,8 +9362,8 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit { // Found a test condition. Does it contain a compare chain? - if (condOp1->OperIs(GT_AND, GT_OR) && varTypeIsIntegralOrI(condOp1->gtGetOp1()) - && varTypeIsIntegralOrI(condOp1->gtGetOp2())) + if (condOp1->OperIs(GT_AND, GT_OR) && varTypeIsIntegralOrI(condOp1->gtGetOp1()) && + varTypeIsIntegralOrI(condOp1->gtGetOp2())) { // Check that the second operand of AND ends with a compare operation, as this will be // the condition the new link in the chain will connect with. @@ -9550,8 +9550,8 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() } // Join the two conditions together - genTreeOps chainedOper = foundEndOfOrConditions? GT_AND : GT_OR; - GenTree* chainedConditions = m_comp->gtNewOperNode(chainedOper, TYP_INT, cond1, cond2); + genTreeOps chainedOper = foundEndOfOrConditions ? GT_AND : GT_OR; + GenTree* chainedConditions = m_comp->gtNewOperNode(chainedOper, TYP_INT, cond1, cond2); chainedConditions->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); chainedConditions->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); cond1->gtFlags &= ~GTF_RELOP_JMP_USED; From 8fcef6714b69f9c4a3f868ddf13051e094890cfc Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 14 Mar 2023 15:35:38 +0000 Subject: [PATCH 24/31] Allow main optimize bools loop to run again --- src/coreclr/jit/optimizer.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index ba274b2eeca135..f25552e7c352a9 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -10334,6 +10334,7 @@ PhaseStatus Compiler::optOptimizeBools() } #endif bool change = false; + bool retry = false; unsigned numCond = 0; unsigned numReturn = 0; unsigned numPasses = 0; @@ -10344,8 +10345,10 @@ PhaseStatus Compiler::optOptimizeBools() numPasses++; change = false; - for (BasicBlock* const b1 : Blocks()) + for (BasicBlock* b1 = fgFirstBB; b1 != nullptr; b1 = retry ? b1 : b1->bbNext) { + retry = false; + // We're only interested in conditional jumps here if (b1->bbJumpKind != BBJ_COND) @@ -10388,7 +10391,10 @@ PhaseStatus Compiler::optOptimizeBools() #ifdef TARGET_ARM64 else if (optBoolsDsc.optOptimizeCompareChainCondBlock()) { + // The optimization will have merged b1 and b2. Retry the loop so that + // b1 and b2->bbNext can be tested. change = true; + retry = true; numCond++; } #endif From e7f43698932a782fa8de0d78fe014ca6ae27f157 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 15 Mar 2023 11:25:07 +0000 Subject: [PATCH 25/31] Check for tbz conditions generated from pow2 values --- src/coreclr/jit/optimizer.cpp | 52 +++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index f25552e7c352a9..e55aa7b4cccfc5 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9342,7 +9342,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() // // Arguments: // condition: Condition to check. -// isTestCondition: Returns true if condition is a EQ/NE(AND(...),0) but is not a compare chain. +// isTestCondition: Returns true if condition is but is not a compare chain. // // Returns: // true if chain optimization is a compare chain. @@ -9358,28 +9358,41 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit *isTestCondition = false; - if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst() && condOp2->AsIntCon()->IconValue() == 0) + if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst()) { - // Found a test condition. Does it contain a compare chain? + ssize_t condOp2Value = condOp2->AsIntCon()->IconValue(); - if (condOp1->OperIs(GT_AND, GT_OR) && varTypeIsIntegralOrI(condOp1->gtGetOp1()) && - varTypeIsIntegralOrI(condOp1->gtGetOp2())) + if (condOp2Value == 0) { - // Check that the second operand of AND ends with a compare operation, as this will be - // the condition the new link in the chain will connect with. - if (condOp1->gtGetOp2()->OperIsCmpCompare()) - { - return true; - } - if (condOp1->gtGetOp2()->OperIsCmpCompare()) + // Found a EQ/NE(...,0). Does it contain a compare chain (ie - conditions that have + // previously been combined by optOptimizeCompareChainCondBlock) or is it a test condition + // that will be optimised to cbz/cbnz during lowering? + + if (condOp1->OperIs(GT_AND, GT_OR) && + varTypeIsIntegralOrI(condOp1->gtGetOp1()) && varTypeIsIntegralOrI(condOp1->gtGetOp2())) { - // Recursive check the inner condition. - bool innerTestCondition; - return FindCompareChain(condOp1->gtGetOp2(), &innerTestCondition); + // Check that the second operand of AND ends with a compare operation, as this will be + // the condition the new link in the chain will connect with. + if (condOp1->gtGetOp2()->OperIsCmpCompare()) + { + return true; + } + if (condOp1->gtGetOp2()->OperIsCmpCompare()) + { + // Recursive check the inner condition. + bool innerTestCondition; + return FindCompareChain(condOp1->gtGetOp2(), &innerTestCondition); + } } - } - *isTestCondition = true; + *isTestCondition = true; + } + else if (condOp1->OperIs(GT_AND) && isPow2(static_cast(condOp2Value)) && + condOp1->gtGetOp2()->IsIntegralConst(condOp2Value)) + { + // Found a EQ/NE(AND(...,n),n) which will be optimized to tbz/tbnz during lowering. + *isTestCondition = true; + } } return false; @@ -9509,10 +9522,7 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() bool op1IsCondChain = FindCompareChain(cond1, &op1IsTestCond); bool op2IsCondChain = FindCompareChain(cond2, &op2IsTestCond); - // Specifically for Arm64, avoid cases where optimizations in lowering will produce better - // code than optimizing here. Specificially: - // * CMP(AND(...), 0) will be turned into a TEST_ opcode. - // * Compares against zero will be optimized with cbz. + // Avoid cases where optimizations in lowering will produce better code than optimizing here. if (op1IsTestCond || op2IsTestCond) { return false; From 7f9fdb8ced21e79ca15e7ae28998ff0df49d7cd2 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 15 Mar 2023 12:02:07 +0000 Subject: [PATCH 26/31] Fix and expand test cases --- .../JIT/opt/Compares/compareAnd2Chains.cs | 24 ++++++------ .../JIT/opt/Compares/compareAnd3Chains.cs | 37 ++++++++++++++++++- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/tests/JIT/opt/Compares/compareAnd2Chains.cs b/src/tests/JIT/opt/Compares/compareAnd2Chains.cs index ba6b355778bafa..ff6eebda589791 100644 --- a/src/tests/JIT/opt/Compares/compareAnd2Chains.cs +++ b/src/tests/JIT/opt/Compares/compareAnd2Chains.cs @@ -195,9 +195,9 @@ public static void Lt_byte_2_consume(byte a1, byte a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Le_short_2_consume(short a1, short a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #12, nzc, {{gt|le}} + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #12, 0, {{gt|le}} //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} - if (a1 <= 10 || a2 <= 12) { a1 = 10; } + if (a1 <= 10 && a2 <= 12) { a1 = 10; } consume(a1, a2); } @@ -213,9 +213,9 @@ public static void Gt_int_2_consume(int a1, int a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Ge_long_2_consume(long a1, long a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, #10 - //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #14, z, {{lt|ge}} + //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #14, nc, {{lt|ge}} //ARM64-FULL-LINE-NEXT: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{lt|ge}} - if (a1 >= 10 || a2 >= 14) { a1 = 10; } + if (a1 >= 10 && a2 >= 14) { a1 = 10; } consume(a1, a2); } @@ -231,9 +231,9 @@ public static void Eq_ushort_2_consume(ushort a1, ushort a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Ne_uint_2_consume(uint a1, uint a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #16, 0, {{eq|ne}} + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #16, z, {{eq|ne}} //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{eq|ne}} - if (a1 != 10 || a2 != 16) { a1 = 10; } + if (a1 != 10 && a2 != 16) { a1 = 10; } consume(a1, a2); } @@ -252,9 +252,9 @@ public static void Le_else_byte_2_consume(byte a1, byte a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Gt_else_short_2_consume(short a1, short a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #23, 0, {{le|gt}} + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #23, nzc, {{le|gt}} //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{le|gt}} - if (a1 > 11 || a2 > 23) { a1 = 20; } else { a1 = 200; } + if (a1 > 11 && a2 > 23) { a1 = 20; } else { a1 = 200; } consume(a1, a2); } @@ -270,9 +270,9 @@ public static void Ge_else_int_2_consume(int a1, int a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Eq_else_long_2_consume(long a1, long a2) { //ARM64-FULL-LINE: cmp {{x[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #25, z, {{ne|eq}} + //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #25, 0, {{ne|eq}} //ARM64-FULL-LINE-NEXT: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{ne|eq}} - if (a1 == 11 || a2 == 25) { a1 = 20; } else { a1 = 200; } + if (a1 == 11 && a2 == 25) { a1 = 20; } else { a1 = 200; } consume(a1, a2); } @@ -288,9 +288,9 @@ public static void Ne_else_ushort_2_consume(ushort a1, ushort a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Lt_else_uint_2_consume(uint a1, uint a2) { //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #27, 0, {{hs|lo}} + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #27, c, {{hs|lo}} //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{hs|lo}} - if (a1 < 11 || a2 < 27) { a1 = 20; } else { a1 = 200; } + if (a1 < 11 && a2 < 27) { a1 = 20; } else { a1 = 200; } consume(a1, a2); } diff --git a/src/tests/JIT/opt/Compares/compareAnd3Chains.cs b/src/tests/JIT/opt/Compares/compareAnd3Chains.cs index 04e0f513eaf368..e23e18d55d4500 100644 --- a/src/tests/JIT/opt/Compares/compareAnd3Chains.cs +++ b/src/tests/JIT/opt/Compares/compareAnd3Chains.cs @@ -185,14 +185,44 @@ public static void consume(T a1, T a2, T a3) {} [MethodImpl(MethodImplOptions.NoInlining)] public static void Le_byte_3_consume(byte a1, byte a2, byte a3) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #11 + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #10 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #11, nzc, {{gt|le}} //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #12, nzc, {{gt|le}} - //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #10, nzc, {{gt|le}} //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} if (a1 <= 10 || a2 <= 11 || a3 <= 12) { a1 = 10; } consume(a1, a2, a3); } + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Gt_short_3_consume(short a1, short a2, short a3) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #13 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #14, 0, {{gt|le}} + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #15, 0, {{gt|le}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} + if (a1 <= 13 && a2 <= 14 && a3 <= 15) { a1 = 10; } + consume(a1, a2, a3); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Ge_int_3_consume(int a1, int a2, int a3) { + //ARM64-FULL-LINE: cmp {{w[0-9]+}}, #16 + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #17, 0, {{gt|le}} + //ARM64-FULL-LINE-NEXT: ccmp {{w[0-9]+}}, #18, nzc, {{gt|le}} + //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} + if (a1 <= 16 && a2 <= 17 || a3 <= 18) { a1 = 10; } + consume(a1, a2, a3); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Eq_else_long_3_consume(long a1, long a2, long a3) { + //ARM64-FULL-LINE: cmp {{x[0-9]+}}, #20 + //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #21, 0, {{eq|ne}} + //ARM64-FULL-LINE-NEXT: ccmp {{x[0-9]+}}, #19, z, {{eq|ne}} + //ARM64-FULL-LINE-NEXT: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{eq|ne}} + if (a1 == 19 || a2 == 20 && a3 == 21) { a1 = 10; } else { a1 = 11; } + consume(a1, a2, a3); + } + [MethodImpl(MethodImplOptions.NoInlining)] public static int Main() { @@ -473,6 +503,9 @@ public static int Main() } Le_byte_3_consume(101, 102, 103); + Gt_short_3_consume(104, 105, 106); + Ge_int_3_consume(107, 108, 109); + Eq_else_long_3_consume(110, 111, 112); Console.WriteLine("PASSED"); return 100; From 3d8ed4f239b7cd95246f87d25f2294435b73a65c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 15 Mar 2023 12:25:16 +0000 Subject: [PATCH 27/31] Minor fixups --- src/coreclr/jit/optimizer.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index e55aa7b4cccfc5..1fa9b3aaa51768 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9368,21 +9368,14 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit // previously been combined by optOptimizeCompareChainCondBlock) or is it a test condition // that will be optimised to cbz/cbnz during lowering? - if (condOp1->OperIs(GT_AND, GT_OR) && - varTypeIsIntegralOrI(condOp1->gtGetOp1()) && varTypeIsIntegralOrI(condOp1->gtGetOp2())) + if (condOp1->OperIs(GT_AND, GT_OR)) { - // Check that the second operand of AND ends with a compare operation, as this will be + // Check that the second operand of AND/OR ends with a compare operation, as this will be // the condition the new link in the chain will connect with. - if (condOp1->gtGetOp2()->OperIsCmpCompare()) + if (condOp1->gtGetOp2()->OperIsCmpCompare() && varTypeIsIntegralOrI(condOp1->gtGetOp2()->gtGetOp1())) { return true; } - if (condOp1->gtGetOp2()->OperIsCmpCompare()) - { - // Recursive check the inner condition. - bool innerTestCondition; - return FindCompareChain(condOp1->gtGetOp2(), &innerTestCondition); - } } *isTestCondition = true; @@ -9474,7 +9467,7 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() m_t3 = nullptr; bool foundEndOfOrConditions = false; - if (m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbNext && m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest) + if ((m_b1->bbNext == m_b2) && (m_b1->bbJumpDest == m_b2->bbNext) && (m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest)) { // Found the end of two (or more) conditions being ORed together. // The final condition has been inverted. @@ -9511,7 +9504,7 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() } // Integer compares only for now (until support for Arm64 fccmp instruction is added) - if (varTypeIsFloating(cond1->gtGetOp1()->TypeGet()) || varTypeIsFloating(cond2->gtGetOp1()->TypeGet())) + if (varTypeIsFloating(cond1->gtGetOp1()) || varTypeIsFloating(cond2->gtGetOp1())) { return false; } From 191a65043f98a3dfc8aca240e5039527401732ac Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 16 Mar 2023 10:11:21 +0000 Subject: [PATCH 28/31] Allow wider range of conditions --- src/coreclr/jit/optimizer.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 1fa9b3aaa51768..f7bc8d6ff4d248 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9463,17 +9463,21 @@ inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondit // bool OptBoolsDsc::optOptimizeCompareChainCondBlock() { - assert(m_b1 != nullptr && m_b2 != nullptr && m_b3 == nullptr); + assert((m_b1 != nullptr) && (m_b2 != nullptr) && (m_b3 == nullptr)); m_t3 = nullptr; bool foundEndOfOrConditions = false; - if ((m_b1->bbNext == m_b2) && (m_b1->bbJumpDest == m_b2->bbNext) && (m_b1->bbJumpDest->bbNext == m_b2->bbJumpDest)) + if ((m_b1->bbNext == m_b2) && (m_b1->bbJumpDest == m_b2->bbNext)) { // Found the end of two (or more) conditions being ORed together. // The final condition has been inverted. foundEndOfOrConditions = true; } - else if (!(m_b1->bbNext == m_b2 && m_b1->bbJumpDest == m_b2->bbJumpDest)) + else if ((m_b1->bbNext == m_b2) && (m_b1->bbJumpDest == m_b2->bbJumpDest)) + { + // Found two conditions connected together. + } + else { return false; } From 6cf9cd36350b6341e8b3d10606e3ed7cf21db0a6 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 17 Mar 2023 12:22:47 +0000 Subject: [PATCH 29/31] Minor cleanups --- src/coreclr/jit/optimizer.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index f7bc8d6ff4d248..aabedf84df5bf5 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9111,7 +9111,7 @@ class OptBoolsDsc GenTree* optIsBoolComp(OptTestInfo* pOptTest); bool optOptimizeBoolsChkTypeCostCond(); void optOptimizeBoolsUpdateTrees(); - inline bool FindCompareChain(GenTree* condition, bool* isTestCondition); + bool FindCompareChain(GenTree* condition, bool* isTestCondition); }; //----------------------------------------------------------------------------- @@ -9351,7 +9351,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock() // m_b1 and m_b2 are set on entry. // -inline bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondition) +bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondition) { GenTree* condOp1 = condition->gtGetOp1(); GenTree* condOp2 = condition->gtGetOp2(); @@ -9543,8 +9543,6 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() } } - GenTree* testcondition = nullptr; - // Remove the first JTRUE statement. constexpr bool isUnlink = true; m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink)); @@ -9559,14 +9557,12 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() // Join the two conditions together genTreeOps chainedOper = foundEndOfOrConditions ? GT_AND : GT_OR; GenTree* chainedConditions = m_comp->gtNewOperNode(chainedOper, TYP_INT, cond1, cond2); - chainedConditions->AsOp()->gtFlags |= (cond1->gtFlags & GTF_ALL_EFFECT); - chainedConditions->AsOp()->gtFlags |= (cond2->gtFlags & GTF_ALL_EFFECT); cond1->gtFlags &= ~GTF_RELOP_JMP_USED; cond2->gtFlags &= ~GTF_RELOP_JMP_USED; chainedConditions->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); // Add a test condition onto the front of the chain - testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, chainedConditions, m_comp->gtNewZeroConNode(TYP_INT)); + GenTree* testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, chainedConditions, m_comp->gtNewZeroConNode(TYP_INT)); // Wire the chain into the second block m_testInfo2.testTree->AsOp()->gtOp1 = testcondition; From 18ea05c574f0d506f2d5e0e8b534e97a53e2f8e7 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 17 Mar 2023 12:36:35 +0000 Subject: [PATCH 30/31] Reduce max allowed cost --- src/coreclr/jit/optimizer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index aabedf84df5bf5..dfbd2984c957a4 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9531,8 +9531,9 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() { int op1Cost = cond1->GetCostEx(); int op2Cost = cond2->GetCostEx(); - int maxOp1Cost = op1IsCondChain ? 35 : 7; - int maxOp2Cost = op2IsCondChain ? 35 : 7; + // The cost of combing three simple conditions is 32. + int maxOp1Cost = op1IsCondChain ? 31 : 7; + int maxOp2Cost = op2IsCondChain ? 31 : 7; // Cost to allow for chain size of three. if (op1Cost > maxOp1Cost || op2Cost > maxOp2Cost) From 238d81063a83b2066b67b606c45cec944863d214 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 17 Mar 2023 13:32:41 +0000 Subject: [PATCH 31/31] Fix formatting --- src/coreclr/jit/optimizer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index dfbd2984c957a4..819138f165affd 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9109,8 +9109,8 @@ class OptBoolsDsc private: Statement* optOptimizeBoolsChkBlkCond(); GenTree* optIsBoolComp(OptTestInfo* pOptTest); - bool optOptimizeBoolsChkTypeCostCond(); - void optOptimizeBoolsUpdateTrees(); + bool optOptimizeBoolsChkTypeCostCond(); + void optOptimizeBoolsUpdateTrees(); bool FindCompareChain(GenTree* condition, bool* isTestCondition); }; @@ -9529,8 +9529,8 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() // Put a limit on the max size that can be combined. if (!m_comp->compStressCompile(Compiler::STRESS_OPT_BOOLS_COMPARE_CHAIN_COST, 25)) { - int op1Cost = cond1->GetCostEx(); - int op2Cost = cond2->GetCostEx(); + int op1Cost = cond1->GetCostEx(); + int op2Cost = cond2->GetCostEx(); // The cost of combing three simple conditions is 32. int maxOp1Cost = op1IsCondChain ? 31 : 7; int maxOp2Cost = op2IsCondChain ? 31 : 7; @@ -9563,7 +9563,8 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock() chainedConditions->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); // Add a test condition onto the front of the chain - GenTree* testcondition = m_comp->gtNewOperNode(GT_NE, TYP_INT, chainedConditions, m_comp->gtNewZeroConNode(TYP_INT)); + GenTree* testcondition = + m_comp->gtNewOperNode(GT_NE, TYP_INT, chainedConditions, m_comp->gtNewZeroConNode(TYP_INT)); // Wire the chain into the second block m_testInfo2.testTree->AsOp()->gtOp1 = testcondition;