From 648e4e69963a2d21427f489be8a1bbe32b9a5e60 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Thu, 9 Jul 2020 14:08:47 -0700 Subject: [PATCH 1/3] Prerequisite work item for the CSE of GT_CNS_INT work item (zero code diffs in the framework libraries) Mark nodes that use the division by constant optimization with GTF_DIV_BY_CNS_OPT Don't perform const prop on expressions marked with GTF_DONT_CSE, as this would undo a constant CSE Fix for bug in AssertionProp where we assign the wrong value number when folding a conditional When dumping the BasicBlocks print hascall when the block is marked with BBF_HAS_CALL Call CheckDivideByConstOptimized when early prop inserts a constant node added methods: UsesDivideByConstOptimized, CheckDivideByConstOptimized and MarkDivideByConstant Propagate any side effect flags in the gtCallAddr field of an indirect call node Call CheckDivideByConstOptimized when morphing a divide or remainder nodes Don't allow changing a floating point GT_DIV into a GT_MUL in fgMorph after the global morph phase In loop hoisting, set BBF_HAS_CALL if we hoist a tree that contains a call When hoisting something that requires a physical register, clear that requirement in the hoisted copy --- src/coreclr/src/jit/assertionprop.cpp | 22 ++- src/coreclr/src/jit/block.cpp | 4 + src/coreclr/src/jit/earlyprop.cpp | 8 ++ src/coreclr/src/jit/gentree.cpp | 200 ++++++++++++++++++++++++-- src/coreclr/src/jit/gentree.h | 21 +++ src/coreclr/src/jit/lower.cpp | 4 +- src/coreclr/src/jit/morph.cpp | 12 +- src/coreclr/src/jit/optimizer.cpp | 9 ++ 8 files changed, 266 insertions(+), 14 deletions(-) diff --git a/src/coreclr/src/jit/assertionprop.cpp b/src/coreclr/src/jit/assertionprop.cpp index 987388a39259dc..7bac5584bf92c9 100644 --- a/src/coreclr/src/jit/assertionprop.cpp +++ b/src/coreclr/src/jit/assertionprop.cpp @@ -3236,7 +3236,8 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen return nullptr; } - AssertionDsc* curAssertion = optGetAssertion(index); + AssertionDsc* curAssertion = optGetAssertion(index); + bool assertionKindIsEqual = (curAssertion->assertionKind == OAK_EQUAL); // Allow or not to reverse condition for OAK_NOT_EQUAL assertions. bool allowReverse = true; @@ -3251,7 +3252,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen printf("\nVN relop based constant assertion prop in " FMT_BB ":\n", compCurBB->bbNum); printf("Assertion index=#%02u: ", index); printTreeID(op1); - printf(" %s ", (curAssertion->assertionKind == OAK_EQUAL) ? "==" : "!="); + printf(" %s ", assertionKindIsEqual ? "==" : "!="); if (genActualType(op1->TypeGet()) == TYP_INT) { printf("%d\n", vnStore->ConstantValue(vnCns)); @@ -3336,8 +3337,15 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen op1->gtVNPair.SetBoth(vnCns); // Preserve the ValueNumPair, as ChangeOperConst/SetOper will clear it. - // Also set the value number on the relop. - if (curAssertion->assertionKind == OAK_EQUAL) + // set foldResult to either 0 or 1 + bool foldResult = assertionKindIsEqual; + if (tree->gtOper == GT_NE) + { + foldResult = !foldResult; + } + + // Set the value number on the relop to 1 (true) or 0 (false) + if (foldResult) { tree->gtVNPair.SetBoth(vnStore->VNOneForType(TYP_INT)); } @@ -4947,6 +4955,12 @@ GenTree* Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test) // Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree) { + // Don't perform const prop on expressions marked with GTF_DONT_CSE + if (!tree->CanCSE()) + { + return WALK_CONTINUE; + } + // Don't propagate floating-point constants into a TYP_STRUCT LclVar // This can occur for HFA return values (see hfa_sf3E_r.exe) if (tree->TypeGet() == TYP_STRUCT) diff --git a/src/coreclr/src/jit/block.cpp b/src/coreclr/src/jit/block.cpp index 9064caaaa38e4b..d8ca31c6662c24 100644 --- a/src/coreclr/src/jit/block.cpp +++ b/src/coreclr/src/jit/block.cpp @@ -309,6 +309,10 @@ void BasicBlock::dspFlags() { printf("jmp "); } + if (bbFlags & BBF_HAS_CALL) + { + printf("hascall "); + } if (bbFlags & BBF_GC_SAFE_POINT) { printf("gcsafe "); diff --git a/src/coreclr/src/jit/earlyprop.cpp b/src/coreclr/src/jit/earlyprop.cpp index 6c8a521fc75023..4a520599882748 100644 --- a/src/coreclr/src/jit/earlyprop.cpp +++ b/src/coreclr/src/jit/earlyprop.cpp @@ -445,6 +445,14 @@ GenTree* Compiler::optEarlyPropRewriteTree(GenTree* tree, LocalNumberToNullCheck // actualValClone has small tree node size, it is safe to use CopyFrom here. tree->ReplaceWith(actualValClone, this); + // Propagating a constant may create an opportunity to use a division by constant optimization + // + if ((tree->gtNext != nullptr) && tree->gtNext->OperIsBinary()) + { + // We need to mark the parent divide/mod operation when this occurs + tree->gtNext->AsOp()->CheckDivideByConstOptimized(this); + } + #ifdef DEBUG if (verbose) { diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 25d4bc58054858..1a8aace4ccab29 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -615,10 +615,6 @@ void GenTree::CopyReg(GenTree* from) // GT_COPY/GT_RELOAD is considered having a reg if it // has a reg assigned to any of its positions. // -// Assumption: -// In order for this to work properly, gtClearReg must be called -// prior to setting the register value. -// bool GenTree::gtHasReg() const { bool hasReg = false; @@ -6674,6 +6670,168 @@ void GenTreeIntCon::FixupInitBlkValue(var_types asgType) } } +//---------------------------------------------------------------------------- +// UsesDivideByConstOptimized: +// returns true if rationalize will use the division by constant +// optimization for this node. +// +// Arguments: +// this - a GenTreeOp node +// comp - the compiler instance +// +// Return Value: +// Return true iff the node is a GT_DIV,GT_UDIV, GT_MOD or GT_UMOD with +// an integer constant and we can perform the division operation using +// a reciprocal multiply or a shift operation. +// +bool GenTreeOp::UsesDivideByConstOptimized(Compiler* comp) +{ + if (!OperIs(GT_DIV, GT_MOD, GT_UDIV, GT_UMOD)) + { + return false; + } +#if defined(TARGET_ARM64) + if (OperIs(GT_MOD, GT_UMOD)) + { + // MOD, UMOD not supported for ARM64 + return false; + } +#endif // TARGET_ARM64 + + bool isSignedDivide = OperIs(GT_DIV, GT_MOD); + GenTree* dividend = gtGetOp1()->gtEffectiveVal(/*commaOnly*/ true); + GenTree* divisor = gtGetOp2()->gtEffectiveVal(/*commaOnly*/ true); + +#if !defined(TARGET_64BIT) + if (dividend->OperIs(GT_LONG)) + { + return false; + } +#endif + + if (dividend->IsCnsIntOrI()) + { + // We shouldn't see a divmod with constant operands here but if we do then it's likely + // because optimizations are disabled or it's a case that's supposed to throw an exception. + // Don't optimize this. + return false; + } + + ssize_t divisorValue; + if (divisor->IsCnsIntOrI()) + { + divisorValue = static_cast(divisor->AsIntCon()->IconValue()); + } + else + { + ValueNum vn = divisor->gtVNPair.GetLiberal(); + if (comp->vnStore->IsVNConstant(vn)) + { + divisorValue = comp->vnStore->CoercedConstantValue(vn); + } + else + { + return false; + } + } + + const var_types divType = TypeGet(); + + if (divisorValue == 0) + { + // x / 0 and x % 0 can't be optimized because they are required to throw an exception. + return false; + } + else if (isSignedDivide) + { + if (divisorValue == -1) + { + // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception. + return false; + } + else if (isPow2(divisorValue)) + { + return true; + } + } + else // unsigned divide + { + if (divType == TYP_INT) + { + // Clear up the upper 32 bits of the value, they may be set to 1 because constants + // are treated as signed and stored in ssize_t which is 64 bit in size on 64 bit targets. + divisorValue &= UINT32_MAX; + } + + size_t unsignedDivisorValue = (size_t)divisorValue; + if (isPow2(unsignedDivisorValue)) + { + return true; + } + } + + const bool isDiv = OperIs(GT_DIV, GT_UDIV); + + if (isDiv) + { + if (isSignedDivide) + { + // If the divisor is the minimum representable integer value then the result is either 0 or 1 + if ((divType == TYP_INT && divisorValue == INT_MIN) || (divType == TYP_LONG && divisorValue == INT64_MIN)) + { + return true; + } + } + else + { + // If the divisor is greater or equal than 2^(N - 1) then the result is either 0 or 1 + if (((divType == TYP_INT) && (divisorValue > (UINT32_MAX / 2))) || + ((divType == TYP_LONG) && (divisorValue > (UINT64_MAX / 2)))) + { + return true; + } + } + } + +// TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32 +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) + if (!comp->opts.MinOpts() && ((divisorValue >= 3) || !isSignedDivide)) + { + // All checks pass we can perform the division operation using a reciprocal multiply. + return true; + } +#endif + + return false; +} + +//------------------------------------------------------------------------ +// CheckDivideByConstOptimized: +// Checks if we will can use the division by constant optimization +// on this node +// and if so sets the flag GTF_DIV_BY_CNS_OPT and +// set GTF_DONT_CSE on the constant node +// +// Arguments: +// this - a GenTreeOp node +// comp - the compiler instance +// +void GenTreeOp::CheckDivideByConstOptimized(Compiler* comp) +{ + if (UsesDivideByConstOptimized(comp)) + { + gtFlags |= GTF_DIV_BY_CNS_OPT; + + // Now set DONT_CSE on the GT_CNS_INT divisor, note that + // with ValueNumbering we can have a non GT_CNS_INT divisior + GenTree* divisor = gtGetOp2()->gtEffectiveVal(/*commaOnly*/ true); + if (divisor->OperIs(GT_CNS_INT)) + { + divisor->gtFlags |= GTF_DONT_CSE; + } + } +} + // //------------------------------------------------------------------------ // gtBlockOpInit: Initializes a BlkOp GenTree @@ -9899,6 +10057,18 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, __in __in_z _ } goto DASH; + case GT_DIV: + case GT_MOD: + case GT_UDIV: + case GT_UMOD: + if (tree->gtFlags & GTF_DIV_BY_CNS_OPT) + { + printf("M"); // We will use a Multiply by reciprical + --msgLength; + break; + } + goto DASH; + case GT_LCL_FLD: case GT_LCL_VAR: case GT_LCL_VAR_ADDR: @@ -10566,16 +10736,30 @@ void Compiler::gtDispConst(GenTree* tree) else if ((tree->AsIntCon()->gtIconVal > -1000) && (tree->AsIntCon()->gtIconVal < 1000)) { printf(" %ld", dspIconVal); -#ifdef TARGET_64BIT } +#ifdef TARGET_64BIT else if ((tree->AsIntCon()->gtIconVal & 0xFFFFFFFF00000000LL) != 0) { - printf(" 0x%llx", dspIconVal); -#endif + if (dspIconVal >= 0) + { + printf(" 0x%llx", dspIconVal); + } + else + { + printf(" -0x%llx", -dspIconVal); + } } +#endif else { - printf(" 0x%X", dspIconVal); + if (dspIconVal >= 0) + { + printf(" 0x%X", dspIconVal); + } + else + { + printf(" -0x%X", -dspIconVal); + } } if (tree->IsIconHandle()) diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 835b7e9089ad8f..4f61fa7ef75bd2 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -633,6 +633,12 @@ struct GenTree assert(_gtRegNum == reg); } + void ClearRegNum() + { + _gtRegNum = REG_NA; + INDEBUG(gtRegTag = GT_REGTAG_NONE;) + } + // Copy the _gtRegNum/gtRegTag fields void CopyReg(GenTree* from); bool gtHasReg() const; @@ -922,6 +928,8 @@ struct GenTree #define GTF_OVERFLOW 0x10000000 // Supported for: GT_ADD, GT_SUB, GT_MUL and GT_CAST. // Requires an overflow check. Use gtOverflow(Ex)() to check this flag. +#define GTF_DIV_BY_CNS_OPT 0x80000000 // GT_DIV -- Uses the division by constant optimization to compute this division + #define GTF_ARR_BOUND_INBND 0x80000000 // GT_ARR_BOUNDS_CHECK -- have proved this check is always in-bounds #define GTF_ARRLEN_ARR_IDX 0x80000000 // GT_ARR_LENGTH -- Length which feeds into an array index expression @@ -2853,6 +2861,19 @@ struct GenTreeOp : public GenTreeUnOp assert(oper == GT_NOP || oper == GT_RETURN || oper == GT_RETFILT || OperIsBlk(oper)); } + // returns true if we will use the division by constant optimization for this node. + bool UsesDivideByConstOptimized(Compiler* comp); + + // checks if we will use the division by constant optimization this node + // then sets the flag GTF_DIV_BY_CNS_OPT and GTF_DONT_CSE on the constant + void CheckDivideByConstOptimized(Compiler* comp); + + // True if this node is marked as using the division by constant optimization + bool MarkedDivideByConstOptimized() const + { + return (gtFlags & GTF_DIV_BY_CNS_OPT) != 0; + } + #if DEBUGGABLE_GENTREE GenTreeOp() : GenTreeUnOp(), gtOp2(nullptr) { diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 3ee8b05a89f1e0..98881d08590343 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -5098,6 +5098,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) divMod->SetOper(newOper); divisor->AsIntCon()->SetIconValue(divisorValue); ContainCheckNode(divMod); + assert(divMod->MarkedDivideByConstOptimized()); return true; } if (isDiv) @@ -5110,6 +5111,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) divMod->SetOper(GT_GE); divMod->gtFlags |= GTF_UNSIGNED; ContainCheckNode(divMod); + assert(divMod->MarkedDivideByConstOptimized()); return true; } } @@ -5134,6 +5136,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) unreached(); #endif } + assert(divMod->MarkedDivideByConstOptimized()); // Depending on the "add" flag returned by GetUnsignedMagicNumberForDivide we need to generate: // add == false (when divisor == 3 for example): @@ -5207,7 +5210,6 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) BlockRange().InsertBefore(divMod, div, divisor, mul, dividend); } ContainCheckRange(firstNode, divMod); - return true; } #endif diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 61ed33e644f27f..8fd2c69a441323 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -3982,6 +3982,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) if (call->gtCallType == CT_INDIRECT) { call->gtCallAddr = fgMorphTree(call->gtCallAddr); + // Const CSE may create an assignment node here + flagsSummary |= call->gtCallAddr->gtFlags; } #if FEATURE_FIXED_OUT_ARGS @@ -11685,7 +11687,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) // Replace "val / dcon" with "val * (1.0 / dcon)" if dcon is a power of two. // Powers of two within range are always exactly represented, // so multiplication by the reciprocal is safe in this scenario - if (op2->IsCnsFltOrDbl()) + if (fgGlobalMorph && op2->IsCnsFltOrDbl()) { double divisor = op2->AsDblCon()->gtDconVal; if (((typ == TYP_DOUBLE) && FloatingPointUtils::hasPreciseReciprocal(divisor)) || @@ -11829,6 +11831,8 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) { tree = gtFoldExpr(tree); } + + tree->AsOp()->CheckDivideByConstOptimized(this); return tree; } } @@ -14537,7 +14541,11 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) DEBUG_DESTROY_NODE(tree); return op1; } + break; + case GT_UDIV: + case GT_UMOD: + tree->CheckDivideByConstOptimized(this); break; case GT_LSH: @@ -14690,6 +14698,8 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; #endif + tree->CheckDivideByConstOptimized(this); + return sub; } diff --git a/src/coreclr/src/jit/optimizer.cpp b/src/coreclr/src/jit/optimizer.cpp index adc874b27887c5..ac56ff3ed4afb3 100644 --- a/src/coreclr/src/jit/optimizer.cpp +++ b/src/coreclr/src/jit/optimizer.cpp @@ -4265,6 +4265,11 @@ void Compiler::fgOptWhileLoop(BasicBlock* block) copyOfCondStmt->SetCompilerAdded(); + if (condTree->gtFlags & GTF_CALL) + { + block->bbFlags |= BBF_HAS_CALL; // Record that the block has a call + } + if (opts.compDbgInfo) { copyOfCondStmt->SetILOffsetX(condStmt->GetILOffsetX()); @@ -6198,6 +6203,10 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, unsigned lnum) // Create a copy of the expression and mark it for CSE's. GenTree* hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE); + // The hoist Expr does not have to computed into a specific register, + // so clear the RegNum if it was set in the original expression + hoistExpr->ClearRegNum(); + // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag assert(hoistExpr != origExpr); assert(hoistExpr->gtFlags & GTF_MAKE_CSE); From f76b605eba02cb82d6e3f99fee145c55efb56e19 Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Thu, 9 Jul 2020 16:38:53 -0700 Subject: [PATCH 2/3] Code review feedback --- src/coreclr/src/jit/gentree.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 1a8aace4ccab29..98bea0eabc9c58 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -6676,8 +6676,8 @@ void GenTreeIntCon::FixupInitBlkValue(var_types asgType) // optimization for this node. // // Arguments: -// this - a GenTreeOp node -// comp - the compiler instance +// this - a GenTreeOp node +// comp - the compiler instance // // Return Value: // Return true iff the node is a GT_DIV,GT_UDIV, GT_MOD or GT_UMOD with @@ -6686,6 +6686,11 @@ void GenTreeIntCon::FixupInitBlkValue(var_types asgType) // bool GenTreeOp::UsesDivideByConstOptimized(Compiler* comp) { + if (!comp->opts.OptimizationEnabled()) + { + return false; + } + if (!OperIs(GT_DIV, GT_MOD, GT_UDIV, GT_UMOD)) { return false; @@ -6807,7 +6812,7 @@ bool GenTreeOp::UsesDivideByConstOptimized(Compiler* comp) //------------------------------------------------------------------------ // CheckDivideByConstOptimized: -// Checks if we will can use the division by constant optimization +// Checks if we can use the division by constant optimization // on this node // and if so sets the flag GTF_DIV_BY_CNS_OPT and // set GTF_DONT_CSE on the constant node From e8f534a135f96a4588a5542cba52a72b599b376c Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Thu, 9 Jul 2020 19:38:41 -0700 Subject: [PATCH 3/3] Remove two asserts in lower because it will always make an optimization for UDIV and UMOD with a power of two divisor. --- src/coreclr/src/jit/lower.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 98881d08590343..395062a4014204 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -5098,7 +5098,6 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) divMod->SetOper(newOper); divisor->AsIntCon()->SetIconValue(divisorValue); ContainCheckNode(divMod); - assert(divMod->MarkedDivideByConstOptimized()); return true; } if (isDiv) @@ -5111,7 +5110,6 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) divMod->SetOper(GT_GE); divMod->gtFlags |= GTF_UNSIGNED; ContainCheckNode(divMod); - assert(divMod->MarkedDivideByConstOptimized()); return true; } }