From de14015fc21a0d408b62ae7a5380a74deac00c3e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 17 Jan 2024 04:06:03 +0100 Subject: [PATCH 01/15] Late expansion of casts --- src/coreclr/jit/compiler.cpp | 3 + src/coreclr/jit/compiler.h | 3 + src/coreclr/jit/gentree.h | 1 + src/coreclr/jit/helperexpansion.cpp | 260 ++++++++++++++++++++++++++++ src/coreclr/jit/importer.cpp | 10 +- 5 files changed, 276 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index f01eb2b74bf0ef..94c42162e27f73 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5055,6 +5055,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Expand thread local access DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess); + // Expand casts + DoPhase(this, PHASE_EXPAND_STATIC_INIT, &Compiler::fgLateCastExpansion); + // Insert GC Polls DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 521aa32777b4be..5ba56480f91720 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5870,6 +5870,9 @@ class Compiler bool fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); bool fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); + PhaseStatus fgLateCastExpansion(); + bool fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); + PhaseStatus fgInsertGCPolls(); BasicBlock* fgCreateGCPoll(GCPollType pollType, BasicBlock* block); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index f06b0cd4386716..86b05db38e88c7 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4079,6 +4079,7 @@ enum GenTreeCallFlags : unsigned int GTF_CALL_M_EXPANDED_EARLY = 0x00800000, // the Virtual Call target address is expanded and placed in gtControlExpr in Morph rather than in Lower GTF_CALL_M_HAS_LATE_DEVIRT_INFO = 0x01000000, // this call has late devirtualzation info GTF_CALL_M_LDVIRTFTN_INTERFACE = 0x02000000, // ldvirtftn on an interface type + GTF_CALL_M_CAST_CAN_BE_EXPANDED = 0x04000000, // this cast (helper call) can be expanded if it's profitable }; inline constexpr GenTreeCallFlags operator ~(GenTreeCallFlags a) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index bcc6982d8e1b87..8481af984560f8 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1553,3 +1553,263 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, JITDUMP("ReadUtf8: succesfully expanded!\n") return true; } + +PhaseStatus Compiler::fgLateCastExpansion() +{ + if (!opts.IsOptimizedWithProfile()) + { + return PhaseStatus::MODIFIED_NOTHING; + } + + const bool preferSize = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT); + if (preferSize) + { + // The optimization comes with a codegen size increase + JITDUMP("Optimized for size - bail out.\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + return fgExpandHelper<&Compiler::fgLateCastExpansionForCall>(true); +} + +bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) +{ + if (!call->IsHelperCall() || !impIsCastHelperMayHaveProfileData(call->GetHelperNum())) + { + // Not a cast helper we're interested in + return false; + } + + if ((call->gtCallMoreFlags & GTF_CALL_M_CAST_CAN_BE_EXPANDED) == 0) + { + // We either already expanded this cast helper or it's not eligible for expansion + return false; + } + + JITDUMP("Attempting to expand cast helper call [%06d] in " FMT_BB "...\n", dspTreeID(call), (*pBlock)->bbNum) + + BasicBlock* block = *pBlock; + + // Currently, we only expand "isinst" and only using profile data. The long-term plan is to + // move cast expansion logic here from the importer completely. + const int maxLikelyClasses = 8; + LikelyClassMethodRecord likelyClasses[maxLikelyClasses]; + unsigned likelyClassCount = getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, + fgPgoData, (int)call->gtRawILOffset); + + if (likelyClassCount == 0) + { + JITDUMP("No likely classes found - bail out.\n") + return false; + } + +#ifdef DEBUG + // Print all the candidates and their likelihoods to the log + for (UINT32 i = 0; i < likelyClassCount; i++) + { + const char* className = eeGetClassName((CORINFO_CLASS_HANDLE)likelyClasses[i].handle); + JITDUMP(" %u) %p (%s) [likelihood:%u%%]\n", i + 1, likelyClasses[i].handle, className, + likelyClasses[i].likelihood); + } + + // Optional stress mode to pick a random known class, rather than + // the most likely known class. + if (JitConfig.JitRandomGuardedDevirtualization() != 0) + { + // Reuse the random inliner's random state. + CLRRandom* const random = + impInlineRoot()->m_inlineStrategy->GetRandom(JitConfig.JitRandomGuardedDevirtualization()); + + unsigned index = static_cast(random->Next(static_cast(likelyClassCount))); + likelyClasses[0].handle = likelyClasses[index].handle; + likelyClasses[0].likelihood = 100; + likelyClassCount = 1; + } +#endif + + LikelyClassMethodRecord likelyClass = likelyClasses[0]; + CORINFO_CLASS_HANDLE likelyCls = (CORINFO_CLASS_HANDLE)likelyClass.handle; + + if (likelyCls == NO_CLASS_HANDLE) + { + // TODO: make null significant, so it could mean our object is likely just null + JITDUMP("Likely class is null - bail out.\n") + return false; + } + + // if there is a dominating candidate with >= 40% likelihood, use it + const unsigned likelihoodMinThreshold = 40; + if (likelyClass.likelihood < likelihoodMinThreshold) + { + JITDUMP("Likely class likelihood is below %u%% - bail out.\n", likelihoodMinThreshold) + return false; + } + + // E.g. "call CORINFO_HELP_ISINSTANCEOFCLASS(class, obj)" + GenTree* clsArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); + CORINFO_CLASS_HANDLE expectedCls = gtGetHelperArgClassHandle(clsArg); + if (expectedCls == NO_CLASS_HANDLE) + { + // clsArg doesn't represent a class handle - bail out + // TODO: use VN if it's available (depends on when this phase is executed) + JITDUMP("clsArg is not a constant handle - bail out.\n") + return false; + } + + TypeCompareState castResult = info.compCompHnd->compareTypesForCast(likelyCls, expectedCls); + if (castResult != TypeCompareState::Must) + { + JITDUMP("Likely class can't be casted to the requested class.\n"); + // TODO: support MustNot - if we know for sure that the likely class always fail the cast + // we can optimize it like "if (obj == null || obj->pMT == likelyCls) return null;" + return false; + } + + if ((info.compCompHnd->getClassAttribs(likelyCls) & (CORINFO_FLG_INTERFACE | CORINFO_FLG_ABSTRACT)) != 0) + { + // Possible scenario: someone changed Foo to be an interface, + // but static profile data still report it as a normal likely class. + JITDUMP("Likely class is abstract/interface - bail out (stale PGO data?).\n"); + return false; + } + + DebugInfo debugInfo = stmt->GetDebugInfo(); + + // Split block right before the call tree (this is a standard pattern we use in helperexpansion.cpp) + BasicBlock* prevBb = block; + GenTree** callUse = nullptr; + Statement* newFirstStmt = nullptr; + block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); + assert(prevBb != nullptr && block != nullptr); + *pBlock = block; + // Block ops inserted by the split need to be morphed here since we are after morph. + // We cannot morph stmt yet as we may modify it further below, and the morphing + // could invalidate callUse + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + + // Reload the arguments after the split + clsArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); + GenTree* objArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + + // Make sure we don't try to expand this call again + call->gtCallMoreFlags &= ~GTF_CALL_M_CAST_CAN_BE_EXPANDED; + + // Grab a temp to store the result. + const unsigned tmpNum = lvaGrabTemp(true DEBUGARG("local for result")); + lvaTable[tmpNum].lvType = call->TypeGet(); + lvaSetClass(tmpNum, expectedCls); + + // Replace the original call with that temp + *callUse = gtNewLclvNode(tmpNum, call->TypeGet()); + + fgMorphStmtBlockOps(block, stmt); + gtUpdateStmtSideEffects(stmt); + + // We're going to expand this "isinst" like this: + // + // prevBb: [weight: 1.0] + // ... + // + // nullcheckBb (BBJ_COND): [weight: 1.0] + // tmp = obj; + // if (tmp == null) goto block; + // + // typeCheckBb (BBJ_COND): [weight: 0.5] + // if (tmp->pMT == likelyClass) goto block; + // + // fallbackBb (BBJ_ALWAYS): [weight: 0.25] + // tmp = helper_call(cls, tmp); + // + // block (BBJ_any): [weight: 1.0] + // use(tmp); + // + + // Block 1: nullcheckBb + // TODO: assertionprop should leave us a mark that objArg is never null, so we can omit this check + // it's too late to rely on upstream phases to do this for us (unless we do optRepeat). + GenTree* objTmp = gtNewLclVarNode(tmpNum); + GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, objTmp, gtNewNull()); + nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; + BasicBlock* nullcheckBb = + fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo, block, true); + + // Insert statements to spill call's arguments (preserving the evaluation order) + const unsigned clsTmp = lvaGrabTemp(true DEBUGARG("local for cls")); + lvaTable[clsTmp].lvType = clsArg->TypeGet(); + Statement* storeObjStmt = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(tmpNum, objArg), debugInfo); + Statement* storeClsStmt = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(clsTmp, clsArg), debugInfo); + gtSetStmtInfo(storeObjStmt); + gtSetStmtInfo(storeClsStmt); + fgSetStmtSeq(storeObjStmt); + fgSetStmtSeq(storeClsStmt); + gtUpdateStmtSideEffects(storeObjStmt); + gtUpdateStmtSideEffects(storeClsStmt); + + // if likelyCls == clsArg, we can just use clsArg that we've just spilled to a temp + // it's a sort of manual CSE. + GenTree* likelyClsNode = likelyCls == expectedCls ? gtNewLclVarNode(clsTmp) : gtNewIconEmbClsHndNode(likelyCls); + + // Block 2: typeCheckBb + GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(objTmp)), likelyClsNode); + mtCheck->gtFlags |= GTF_RELOP_JMP_USED; + GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, mtCheck); + BasicBlock* typeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, nullcheckBb, jtrue, debugInfo, block, true); + + // Block 3: fallbackBb + // NOTE: we spilled call's arguments above, we need to re-create it here (we can't just modify call's args) + GenTree* fallbackCall = + gtNewHelperCallNode(call->GetHelperNum(), TYP_REF, gtNewLclVarNode(clsTmp), gtNewLclVarNode(tmpNum)); + fallbackCall = fgMorphCall(fallbackCall->AsCall()); + gtSetEvalOrder(fallbackCall); + BasicBlock* fallbackBb = + fgNewBBFromTreeAfter(BBJ_ALWAYS, typeCheckBb, gtNewTempStore(tmpNum, fallbackCall), debugInfo, block); + + // + // Wire up the blocks + // + prevBb->SetTarget(nullcheckBb); + nullcheckBb->SetTrueTarget(fallbackBb); + nullcheckBb->SetFalseTarget(typeCheckBb); + typeCheckBb->SetTrueTarget(block); + typeCheckBb->SetFalseTarget(fallbackBb); + fallbackBb->SetTarget(block); + fgRemoveRefPred(block, prevBb); + fgAddRefPred(nullcheckBb, prevBb); + fgAddRefPred(typeCheckBb, nullcheckBb); + fgAddRefPred(fallbackBb, nullcheckBb); + fgAddRefPred(fallbackBb, typeCheckBb); + fgAddRefPred(block, typeCheckBb); + fgAddRefPred(block, fallbackBb); + + // + // Re-distribute weights + // We assume obj is 50%/50% null/not-null, and if it's not null, + // it's 50%/50% of being of the expected type. TODO: rely on profile data + // + nullcheckBb->inheritWeight(prevBb); + fallbackBb->inheritWeightPercentage(nullcheckBb, 50); + typeCheckBb->inheritWeightPercentage(fallbackBb, 50); + block->inheritWeight(prevBb); + + // + // Update bbNatLoopNum for all new blocks and validate EH regions + // + nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; + typeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + assert(BasicBlock::sameEHRegion(prevBb, block)); + assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); + assert(BasicBlock::sameEHRegion(prevBb, fallbackBb)); + assert(BasicBlock::sameEHRegion(prevBb, typeCheckBb)); + + // Bonus step: merge prevBb with nullcheckBb as they are likely to be mergeable + if (fgCanCompactBlocks(prevBb, nullcheckBb)) + { + fgCompactBlocks(prevBb, nullcheckBb); + } + + return true; +} diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 2ba0643887987d..02db5a77192694 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -5500,7 +5500,9 @@ GenTree* Compiler::impCastClassOrIsInstToTree( } // Check if this cast helper have some profile data - if (impIsCastHelperMayHaveProfileData(helper)) + // "isinst" with profile data is moved to a late phase. + // The long-term plan is to move all non-trivial expansions there. + if (impIsCastHelperMayHaveProfileData(helper) && isCastClass) { const int maxLikelyClasses = 32; LikelyClassMethodRecord likelyClasses[maxLikelyClasses]; @@ -5599,6 +5601,12 @@ GenTree* Compiler::impCastClassOrIsInstToTree( compCurBB->SetFlags(BBF_HAS_HISTOGRAM_PROFILE); } } + else + { + // Maybe the late-cast-expand phase can have a better luck expanding this cast. + call->gtCallMoreFlags |= GTF_CALL_M_CAST_CAN_BE_EXPANDED; + call->gtRawILOffset = ilOffset; + } return call; } From 2c72dea30277bc4f91b56a050b8e00b985de9ff7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 17 Jan 2024 14:53:26 +0100 Subject: [PATCH 02/15] Fix build --- src/coreclr/jit/gentree.h | 5 +++-- src/coreclr/jit/helperexpansion.cpp | 10 +++++----- src/coreclr/jit/importer.cpp | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 86b05db38e88c7..5e29184b992d2a 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5588,8 +5588,9 @@ struct GenTreeCall final : public GenTree CORINFO_CLASS_HANDLE gtRetClsHnd; // The return type handle of the call if it is a struct; always available union { - void* gtStubCallStubAddr; // GTF_CALL_VIRT_STUB - these are never inlined - CORINFO_CLASS_HANDLE gtInitClsHnd; // Used by static init helpers, represents a class they init + void* gtStubCallStubAddr; // GTF_CALL_VIRT_STUB - these are never inlined + CORINFO_CLASS_HANDLE gtInitClsHnd; // Used by static init helpers, represents a class they init + IL_OFFSET gtCastHelperILOffset; // Used by cast helpers to save corresponding IL offset }; union { diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 8481af984560f8..f30a8c01e7a3d3 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1594,7 +1594,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, const int maxLikelyClasses = 8; LikelyClassMethodRecord likelyClasses[maxLikelyClasses]; unsigned likelyClassCount = getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, - fgPgoData, (int)call->gtRawILOffset); + fgPgoData, call->gtCastHelperILOffset); if (likelyClassCount == 0) { @@ -1720,7 +1720,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // typeCheckBb (BBJ_COND): [weight: 0.5] // if (tmp->pMT == likelyClass) goto block; // - // fallbackBb (BBJ_ALWAYS): [weight: 0.25] + // fallbackBb (BBJ_ALWAYS): [weight: ] // tmp = helper_call(cls, tmp); // // block (BBJ_any): [weight: 1.0] @@ -1786,12 +1786,12 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // // Re-distribute weights - // We assume obj is 50%/50% null/not-null, and if it's not null, - // it's 50%/50% of being of the expected type. TODO: rely on profile data + // We assume obj is 50%/50% null/not-null (TODO: use profile data) + // and rely on profile for the slow path. // nullcheckBb->inheritWeight(prevBb); fallbackBb->inheritWeightPercentage(nullcheckBb, 50); - typeCheckBb->inheritWeightPercentage(fallbackBb, 50); + typeCheckBb->inheritWeightPercentage(fallbackBb, 100 - likelyClass.likelihood); block->inheritWeight(prevBb); // diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 02db5a77192694..3b85ed8d43d51d 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -5603,9 +5603,9 @@ GenTree* Compiler::impCastClassOrIsInstToTree( } else { - // Maybe the late-cast-expand phase can have a better luck expanding this cast. + // Maybe the late-cast-expand phase will have a better luck expanding this cast. call->gtCallMoreFlags |= GTF_CALL_M_CAST_CAN_BE_EXPANDED; - call->gtRawILOffset = ilOffset; + call->gtCastHelperILOffset = ilOffset; } return call; } From a5345fa9ba60868f7fc862bfec8a29a4e441cac2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 17 Jan 2024 15:20:27 +0100 Subject: [PATCH 03/15] Address feedback --- src/coreclr/jit/helperexpansion.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index f30a8c01e7a3d3..b570d4b84691d8 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1581,7 +1581,8 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, if ((call->gtCallMoreFlags & GTF_CALL_M_CAST_CAN_BE_EXPANDED) == 0) { - // We either already expanded this cast helper or it's not eligible for expansion + // It's not eligible for expansion (already expanded in importer) + // To be removed once we move cast expansion here completely. return false; } @@ -1694,9 +1695,6 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, clsArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); GenTree* objArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); - // Make sure we don't try to expand this call again - call->gtCallMoreFlags &= ~GTF_CALL_M_CAST_CAN_BE_EXPANDED; - // Grab a temp to store the result. const unsigned tmpNum = lvaGrabTemp(true DEBUGARG("local for result")); lvaTable[tmpNum].lvType = call->TypeGet(); @@ -1789,9 +1787,10 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // We assume obj is 50%/50% null/not-null (TODO: use profile data) // and rely on profile for the slow path. // + assert(likelyClass.likelihood <= 100); nullcheckBb->inheritWeight(prevBb); - fallbackBb->inheritWeightPercentage(nullcheckBb, 50); - typeCheckBb->inheritWeightPercentage(fallbackBb, 100 - likelyClass.likelihood); + typeCheckBb->inheritWeightPercentage(nullcheckBb, 50); + fallbackBb->inheritWeightPercentage(typeCheckBb, 100 - likelyClass.likelihood); block->inheritWeight(prevBb); // From efdeae6416211b38f3c82974e4d3680fc84d5736 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 17 Jan 2024 15:30:10 +0100 Subject: [PATCH 04/15] Clean up --- src/coreclr/jit/importer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 3b85ed8d43d51d..d82e2662495544 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -5601,9 +5601,10 @@ GenTree* Compiler::impCastClassOrIsInstToTree( compCurBB->SetFlags(BBF_HAS_HISTOGRAM_PROFILE); } } - else + else if (!isCastClass && impIsCastHelperMayHaveProfileData(helper)) { // Maybe the late-cast-expand phase will have a better luck expanding this cast. + // TODO: enable for cast-class as well. call->gtCallMoreFlags |= GTF_CALL_M_CAST_CAN_BE_EXPANDED; call->gtCastHelperILOffset = ilOffset; } From 587c66ef369bc063ab1c3f16d6a928bff3112830 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 17 Jan 2024 15:33:43 +0100 Subject: [PATCH 05/15] Improve TP --- src/coreclr/jit/compiler.h | 11 +++++++++++ src/coreclr/jit/helperexpansion.cpp | 7 +++++++ src/coreclr/jit/importer.cpp | 1 + 3 files changed, 19 insertions(+) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5ba56480f91720..7068c7e3dcdb09 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7558,6 +7558,7 @@ class Compiler #define OMF_HAS_TLS_FIELD 0x00010000 // Method contains TLS field access #define OMF_HAS_SPECIAL_INTRINSICS 0x00020000 // Method contains special intrinsics expanded in late phases #define OMF_HAS_RECURSIVE_TAILCALL 0x00040000 // Method contains recursive tail call +#define OMF_HAS_EXPANDABLE_CAST 0x00080000 // Method contains casts eligible for late expansion // clang-format on @@ -7598,6 +7599,16 @@ class Compiler optMethodFlags |= OMF_HAS_STATIC_INIT; } + bool doesMethodHaveExpandableCasts() + { + return (optMethodFlags & OMF_HAS_EXPANDABLE_CAST) != 0; + } + + void setMethodHasExpandableCasts() + { + optMethodFlags |= OMF_HAS_EXPANDABLE_CAST; + } + bool doesMethodHaveGuardedDevirtualization() const { return (optMethodFlags & OMF_HAS_GUARDEDDEVIRT) != 0; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index b570d4b84691d8..06ff7046fcf59c 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1556,8 +1556,15 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, PhaseStatus Compiler::fgLateCastExpansion() { + if (!doesMethodHaveExpandableCasts()) + { + // Nothing to expand in the current method + return PhaseStatus::MODIFIED_NOTHING; + } + if (!opts.IsOptimizedWithProfile()) { + // Currently, we're only interested in expanding cast helpers using profile data return PhaseStatus::MODIFIED_NOTHING; } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index d82e2662495544..b584ad5c85ca60 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -5607,6 +5607,7 @@ GenTree* Compiler::impCastClassOrIsInstToTree( // TODO: enable for cast-class as well. call->gtCallMoreFlags |= GTF_CALL_M_CAST_CAN_BE_EXPANDED; call->gtCastHelperILOffset = ilOffset; + setMethodHasExpandableCasts(); } return call; } From a72014f38437ab85a1d999180b79de9d19564e26 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 17 Jan 2024 19:22:23 +0100 Subject: [PATCH 06/15] Handle MustNot case --- src/coreclr/jit/helperexpansion.cpp | 48 ++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 06ff7046fcf59c..4019c562468b81 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1664,11 +1664,9 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, } TypeCompareState castResult = info.compCompHnd->compareTypesForCast(likelyCls, expectedCls); - if (castResult != TypeCompareState::Must) + if (castResult == TypeCompareState::May) { - JITDUMP("Likely class can't be casted to the requested class.\n"); - // TODO: support MustNot - if we know for sure that the likely class always fail the cast - // we can optimize it like "if (obj == null || obj->pMT == likelyCls) return null;" + JITDUMP("compareTypesForCast returned May for this candidate\n"); return false; } @@ -1811,6 +1809,48 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, assert(BasicBlock::sameEHRegion(prevBb, fallbackBb)); assert(BasicBlock::sameEHRegion(prevBb, typeCheckBb)); + // Rare case: if profile points to a class that never passes the type check + // we can also optimize it like this: + // + // prevBb: [weight: 1.0] + // ... + // + // nullcheckBb (BBJ_COND): [weight: 1.0] + // tmp = obj; + // if (tmp == null) goto block; + // + // typeCheckBb (BBJ_COND): [weight: 0.5] + // if (tmp->pMT == likelyClass) goto assignNullBb; + // + // fallbackBb (BBJ_ALWAYS): [weight: ] + // tmp = helper_call(cls, tmp); + // goto block; + // + // assignNullBb (BBJ_ALWAYS): [weight: ] + // tmp = null; + // + // block (BBJ_any): [weight: 1.0] + // use(tmp); + // + + if (castResult == TypeCompareState::MustNot) + { + // Block 4: assignNullBb + // + // Let's do everything here instead of adding ifs in the previous blocks to make them simpler. + BasicBlock* assignNullBb = + fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, gtNewTempStore(tmpNum, gtNewNull()), debugInfo, block); + + // typeCheckBb now jumps to assignNullBb on true + typeCheckBb->SetTrueTarget(assignNullBb); + // typeCheckBb is no longer a predecessor of block + fgRemoveRefPred(block, typeCheckBb); + fgAddRefPred(assignNullBb, typeCheckBb); + fgAddRefPred(block, assignNullBb); + assignNullBb->inheritWeightPercentage(typeCheckBb, likelyClass.likelihood); + assignNullBb->bbNatLoopNum = prevBb->bbNatLoopNum; + } + // Bonus step: merge prevBb with nullcheckBb as they are likely to be mergeable if (fgCanCompactBlocks(prevBb, nullcheckBb)) { From b147e67c117493660cd1edcda5e1a7e80c38bba1 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 18 Jan 2024 00:12:08 +0100 Subject: [PATCH 07/15] Final clean up --- src/coreclr/jit/assertionprop.cpp | 24 +++++++++++--------- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compphases.h | 1 + src/coreclr/jit/gentree.h | 1 + src/coreclr/jit/helperexpansion.cpp | 35 +++++++++++++++++++++++++++-- 5 files changed, 49 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 034bd7264bdd54..1e1db6c577f72c 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -4764,17 +4764,14 @@ GenTree* Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCal { return optAssertionProp_Update(call, call, stmt); } - else if (!optLocalAssertionProp && (call->gtCallType == CT_HELPER)) - { - if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFINTERFACE) || - call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFARRAY) || - call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFCLASS) || - call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFANY) || - call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTINTERFACE) || - call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTARRAY) || - call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS) || - call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTANY) || - call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS_SPECIAL)) + else if (!optLocalAssertionProp && call->IsHelperCall()) + { + const CorInfoHelpFunc helper = eeGetHelperNum(call->gtCallMethHnd); + if ((helper == CORINFO_HELP_ISINSTANCEOFINTERFACE) || (helper == CORINFO_HELP_ISINSTANCEOFARRAY) || + (helper == CORINFO_HELP_ISINSTANCEOFCLASS) || (helper == CORINFO_HELP_ISINSTANCEOFANY) || + (helper == CORINFO_HELP_CHKCASTINTERFACE) || (helper == CORINFO_HELP_CHKCASTARRAY) || + (helper == CORINFO_HELP_CHKCASTCLASS) || (helper == CORINFO_HELP_CHKCASTANY) || + (helper == CORINFO_HELP_CHKCASTCLASS_SPECIAL)) { GenTree* arg1 = call->gtArgs.GetArgByIndex(1)->GetNode(); if (arg1->gtOper != GT_LCL_VAR) @@ -4804,6 +4801,11 @@ GenTree* Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCal return optAssertionProp_Update(arg1, call, stmt); } + + // TODO: check optAssertionIsNonNull for the object argument and replace + // the helper with its nonnull version, e.g.: + // CORINFO_HELP_ISINSTANCEOFANY -> CORINFO_HELP_ISINSTANCEOFANY_NONNULL + // so then fgLateCastExpansion can skip the null check. } } diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 94c42162e27f73..240dcb5d153582 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5056,7 +5056,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess); // Expand casts - DoPhase(this, PHASE_EXPAND_STATIC_INIT, &Compiler::fgLateCastExpansion); + DoPhase(this, PHASE_EXPAND_CASTS, &Compiler::fgLateCastExpansion); // Insert GC Polls DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls); diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index a51ab4adc4f88e..23930985319769 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -98,6 +98,7 @@ CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS2, "Compute edge weights (2, f CompPhaseNameMacro(PHASE_STRESS_SPLIT_TREE, "Stress gtSplitTree", false, -1, false) CompPhaseNameMacro(PHASE_EXPAND_RTLOOKUPS, "Expand runtime lookups", false, -1, true) CompPhaseNameMacro(PHASE_EXPAND_STATIC_INIT, "Expand static init", false, -1, true) +CompPhaseNameMacro(PHASE_EXPAND_CASTS, "Expand casts", false, -1, true) CompPhaseNameMacro(PHASE_EXPAND_TLS, "Expand TLS access", false, -1, true) CompPhaseNameMacro(PHASE_INSERT_GC_POLLS, "Insert GC Polls", false, -1, true) CompPhaseNameMacro(PHASE_CREATE_THROW_HELPERS, "Create throw helper blocks", false, -1, true) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 5e29184b992d2a..83ab6cc9611019 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4080,6 +4080,7 @@ enum GenTreeCallFlags : unsigned int GTF_CALL_M_HAS_LATE_DEVIRT_INFO = 0x01000000, // this call has late devirtualzation info GTF_CALL_M_LDVIRTFTN_INTERFACE = 0x02000000, // ldvirtftn on an interface type GTF_CALL_M_CAST_CAN_BE_EXPANDED = 0x04000000, // this cast (helper call) can be expanded if it's profitable + GTF_CALL_M_CAST_NON_NULL = 0x08000000, // this cast (helper call) can be expanded if it's profitable }; inline constexpr GenTreeCallFlags operator ~(GenTreeCallFlags a) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 4019c562468b81..e7b5ef7d335cbc 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1554,6 +1554,25 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, return true; } +//------------------------------------------------------------------------------ +// fgLateCastExpansion: Partially inline various cast helpers, e.g.: +// +// tmp = CORINFO_HELP_ISINSTANCEOFINTERFACE(clsHandle, obj); +// +// into: +// +// tmp = obj; +// if ((obj != null) && (obj->pMT != likelyClassHandle)) +// { +// tmp = CORINFO_HELP_ISINSTANCEOFINTERFACE(clsHandle, obj); +// } +// +// The goal is to move cast expansion logic from the importer to this phase, for now, +// this phase only supports "isinst" and for profiled casts only. +// +// Returns: +// PhaseStatus indicating what, if anything, was changed. +// PhaseStatus Compiler::fgLateCastExpansion() { if (!doesMethodHaveExpandableCasts()) @@ -1578,6 +1597,18 @@ PhaseStatus Compiler::fgLateCastExpansion() return fgExpandHelper<&Compiler::fgLateCastExpansionForCall>(true); } +//------------------------------------------------------------------------------ +// fgLateCastExpansionForCall : Expand specific cast helper, see +// fgLateCastExpansion's comments. +// +// Arguments: +// block - Block containing the cast helper to expand +// stmt - Statement containing the cast helper +// call - The cast helper +// +// Returns: +// True if expanded, false otherwise. +// bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { if (!call->IsHelperCall() || !impIsCastHelperMayHaveProfileData(call->GetHelperNum())) @@ -1774,7 +1805,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // Wire up the blocks // prevBb->SetTarget(nullcheckBb); - nullcheckBb->SetTrueTarget(fallbackBb); + nullcheckBb->SetTrueTarget(block); nullcheckBb->SetFalseTarget(typeCheckBb); typeCheckBb->SetTrueTarget(block); typeCheckBb->SetFalseTarget(fallbackBb); @@ -1782,7 +1813,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, fgRemoveRefPred(block, prevBb); fgAddRefPred(nullcheckBb, prevBb); fgAddRefPred(typeCheckBb, nullcheckBb); - fgAddRefPred(fallbackBb, nullcheckBb); + fgAddRefPred(block, nullcheckBb); fgAddRefPred(fallbackBb, typeCheckBb); fgAddRefPred(block, typeCheckBb); fgAddRefPred(block, fallbackBb); From aee96f2a47d78184e26dd8e37f5777d9e41c5c2b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 18 Jan 2024 03:31:17 +0100 Subject: [PATCH 08/15] No, this is the final clean up --- src/coreclr/jit/assertionprop.cpp | 2 +- src/coreclr/jit/gentree.h | 3 +- src/coreclr/jit/helperexpansion.cpp | 235 ++++++++++++++-------------- 3 files changed, 120 insertions(+), 120 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 1e1db6c577f72c..195e28ee845edb 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -4802,7 +4802,7 @@ GenTree* Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCal return optAssertionProp_Update(arg1, call, stmt); } - // TODO: check optAssertionIsNonNull for the object argument and replace + // TODO-InlineCast: check optAssertionIsNonNull for the object argument and replace // the helper with its nonnull version, e.g.: // CORINFO_HELP_ISINSTANCEOFANY -> CORINFO_HELP_ISINSTANCEOFANY_NONNULL // so then fgLateCastExpansion can skip the null check. diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 83ab6cc9611019..356ac49d9a0cd4 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4079,8 +4079,7 @@ enum GenTreeCallFlags : unsigned int GTF_CALL_M_EXPANDED_EARLY = 0x00800000, // the Virtual Call target address is expanded and placed in gtControlExpr in Morph rather than in Lower GTF_CALL_M_HAS_LATE_DEVIRT_INFO = 0x01000000, // this call has late devirtualzation info GTF_CALL_M_LDVIRTFTN_INTERFACE = 0x02000000, // ldvirtftn on an interface type - GTF_CALL_M_CAST_CAN_BE_EXPANDED = 0x04000000, // this cast (helper call) can be expanded if it's profitable - GTF_CALL_M_CAST_NON_NULL = 0x08000000, // this cast (helper call) can be expanded if it's profitable + GTF_CALL_M_CAST_CAN_BE_EXPANDED = 0x04000000, // this cast (helper call) can be expanded if it's profitable. To be removed. }; inline constexpr GenTreeCallFlags operator ~(GenTreeCallFlags a) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index e7b5ef7d335cbc..aeb1c8a6e410d3 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1598,54 +1598,35 @@ PhaseStatus Compiler::fgLateCastExpansion() } //------------------------------------------------------------------------------ -// fgLateCastExpansionForCall : Expand specific cast helper, see -// fgLateCastExpansion's comments. +// PickLikelyClass: picks a likely class handle corresponding to the given IL offset // // Arguments: -// block - Block containing the cast helper to expand -// stmt - Statement containing the cast helper -// call - The cast helper +// comp - Compiler instance +// offset - IL offset +// likelihood - [out] likelihood of the returned class // // Returns: -// True if expanded, false otherwise. +// Likely class handle or NO_CLASS_HANDLE // -bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) +CORINFO_CLASS_HANDLE PickLikelyClass(Compiler* comp, IL_OFFSET offset, unsigned* likelihood) { - if (!call->IsHelperCall() || !impIsCastHelperMayHaveProfileData(call->GetHelperNum())) - { - // Not a cast helper we're interested in - return false; - } - - if ((call->gtCallMoreFlags & GTF_CALL_M_CAST_CAN_BE_EXPANDED) == 0) - { - // It's not eligible for expansion (already expanded in importer) - // To be removed once we move cast expansion here completely. - return false; - } - - JITDUMP("Attempting to expand cast helper call [%06d] in " FMT_BB "...\n", dspTreeID(call), (*pBlock)->bbNum) + // TODO-InlineCast: consider merging this helper with pickGDV - BasicBlock* block = *pBlock; - - // Currently, we only expand "isinst" and only using profile data. The long-term plan is to - // move cast expansion logic here from the importer completely. const int maxLikelyClasses = 8; LikelyClassMethodRecord likelyClasses[maxLikelyClasses]; - unsigned likelyClassCount = getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, - fgPgoData, call->gtCastHelperILOffset); + unsigned likelyClassCount = getLikelyClasses(likelyClasses, maxLikelyClasses, comp->fgPgoSchema, + comp->fgPgoSchemaCount, comp->fgPgoData, (int)offset); if (likelyClassCount == 0) { - JITDUMP("No likely classes found - bail out.\n") - return false; + return NO_CLASS_HANDLE; } #ifdef DEBUG // Print all the candidates and their likelihoods to the log for (UINT32 i = 0; i < likelyClassCount; i++) { - const char* className = eeGetClassName((CORINFO_CLASS_HANDLE)likelyClasses[i].handle); + const char* className = comp->eeGetClassName((CORINFO_CLASS_HANDLE)likelyClasses[i].handle); JITDUMP(" %u) %p (%s) [likelihood:%u%%]\n", i + 1, likelyClasses[i].handle, className, likelyClasses[i].likelihood); } @@ -1656,30 +1637,64 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, { // Reuse the random inliner's random state. CLRRandom* const random = - impInlineRoot()->m_inlineStrategy->GetRandom(JitConfig.JitRandomGuardedDevirtualization()); - - unsigned index = static_cast(random->Next(static_cast(likelyClassCount))); - likelyClasses[0].handle = likelyClasses[index].handle; - likelyClasses[0].likelihood = 100; - likelyClassCount = 1; + comp->impInlineRoot()->m_inlineStrategy->GetRandom(JitConfig.JitRandomGuardedDevirtualization()); + unsigned index = static_cast(random->Next(static_cast(likelyClassCount))); + *likelihood = 100; + return (CORINFO_CLASS_HANDLE)likelyClasses[index].handle; } #endif + *likelihood = likelyClasses[0].likelihood; + return (CORINFO_CLASS_HANDLE)likelyClasses[0].handle; +} + +//------------------------------------------------------------------------------ +// fgLateCastExpansionForCall : Expand specific cast helper, see +// fgLateCastExpansion's comments. +// +// Arguments: +// block - Block containing the cast helper to expand +// stmt - Statement containing the cast helper +// call - The cast helper +// +// Returns: +// True if expanded, false otherwise. +// +bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) +{ + if (!call->IsHelperCall() || !impIsCastHelperMayHaveProfileData(call->GetHelperNum())) + { + // Not a cast helper we're interested in + return false; + } + + if ((call->gtCallMoreFlags & GTF_CALL_M_CAST_CAN_BE_EXPANDED) == 0) + { + // It's not eligible for expansion (already expanded in importer) + // To be removed once we move cast expansion here completely. + return false; + } - LikelyClassMethodRecord likelyClass = likelyClasses[0]; - CORINFO_CLASS_HANDLE likelyCls = (CORINFO_CLASS_HANDLE)likelyClass.handle; + BasicBlock* lastBlock = *pBlock; + JITDUMP("Attempting to expand a cast helper call in " FMT_BB "...\n", lastBlock->bbNum); + DISPTREE(call); + JITDUMP("\n"); + // Currently, we only expand "isinst" and only using profile data. The long-term plan is to + // move cast expansion logic here from the importer completely. + unsigned likelihood = 100; + CORINFO_CLASS_HANDLE likelyCls = PickLikelyClass(this, call->gtCastHelperILOffset, &likelihood); if (likelyCls == NO_CLASS_HANDLE) { // TODO: make null significant, so it could mean our object is likely just null - JITDUMP("Likely class is null - bail out.\n") + JITDUMP("Likely class is null - bail out.\n"); return false; } - // if there is a dominating candidate with >= 40% likelihood, use it - const unsigned likelihoodMinThreshold = 40; - if (likelyClass.likelihood < likelihoodMinThreshold) + // if there is a dominating candidate with >= 50% likelihood, use it + const unsigned likelihoodMinThreshold = 50; + if (likelihood < likelihoodMinThreshold) { - JITDUMP("Likely class likelihood is below %u%% - bail out.\n", likelihoodMinThreshold) + JITDUMP("Likely class likelihood is below %u%% - bail out.\n", likelihoodMinThreshold); return false; } @@ -1689,12 +1704,12 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, if (expectedCls == NO_CLASS_HANDLE) { // clsArg doesn't represent a class handle - bail out - // TODO: use VN if it's available (depends on when this phase is executed) - JITDUMP("clsArg is not a constant handle - bail out.\n") + // TODO-InlineCast: use VN if it's available (depends on when this phase is executed) + JITDUMP("clsArg is not a constant handle - bail out.\n"); return false; } - TypeCompareState castResult = info.compCompHnd->compareTypesForCast(likelyCls, expectedCls); + const TypeCompareState castResult = info.compCompHnd->compareTypesForCast(likelyCls, expectedCls); if (castResult == TypeCompareState::May) { JITDUMP("compareTypesForCast returned May for this candidate\n"); @@ -1712,18 +1727,18 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, DebugInfo debugInfo = stmt->GetDebugInfo(); // Split block right before the call tree (this is a standard pattern we use in helperexpansion.cpp) - BasicBlock* prevBb = block; + BasicBlock* prevBb = lastBlock; GenTree** callUse = nullptr; Statement* newFirstStmt = nullptr; - block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); - assert(prevBb != nullptr && block != nullptr); - *pBlock = block; + lastBlock = fgSplitBlockBeforeTree(lastBlock, stmt, call, &newFirstStmt, &callUse); + assert(prevBb != nullptr && lastBlock != nullptr); + *pBlock = lastBlock; // Block ops inserted by the split need to be morphed here since we are after morph. // We cannot morph stmt yet as we may modify it further below, and the morphing // could invalidate callUse while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) { - fgMorphStmtBlockOps(block, newFirstStmt); + fgMorphStmtBlockOps(lastBlock, newFirstStmt); newFirstStmt = newFirstStmt->GetNextStmt(); } @@ -1739,7 +1754,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // Replace the original call with that temp *callUse = gtNewLclvNode(tmpNum, call->TypeGet()); - fgMorphStmtBlockOps(block, stmt); + fgMorphStmtBlockOps(lastBlock, stmt); gtUpdateStmtSideEffects(stmt); // We're going to expand this "isinst" like this: @@ -1749,28 +1764,38 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // // nullcheckBb (BBJ_COND): [weight: 1.0] // tmp = obj; - // if (tmp == null) goto block; + // if (tmp == null) + // goto lastBlock; + // fallthrough; // // typeCheckBb (BBJ_COND): [weight: 0.5] - // if (tmp->pMT == likelyClass) goto block; + // if (tmp->pMT == likelyClass) + // goto typeCheckSucceedBb; + // fallthrough; // // fallbackBb (BBJ_ALWAYS): [weight: ] // tmp = helper_call(cls, tmp); + // goto lastBlock; // - // block (BBJ_any): [weight: 1.0] + // typeCheckSucceedBb (BBJ_ALWAYS): [weight: ] + // nop (or tmp = null) + // fallthrough; + // + // lastBlock (BBJ_any): [weight: 1.0] // use(tmp); // // Block 1: nullcheckBb - // TODO: assertionprop should leave us a mark that objArg is never null, so we can omit this check + // TODO-InlineCast: assertionprop should leave us a mark that objArg is never null, so we can omit this check // it's too late to rely on upstream phases to do this for us (unless we do optRepeat). GenTree* objTmp = gtNewLclVarNode(tmpNum); GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, objTmp, gtNewNull()); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; - BasicBlock* nullcheckBb = - fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo, block, true); + BasicBlock* nullcheckBb = fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), + debugInfo, lastBlock, true); // Insert statements to spill call's arguments (preserving the evaluation order) + // TODO-InlineCast: don't spill cls if possible, we only need it after the nullcheck const unsigned clsTmp = lvaGrabTemp(true DEBUGARG("local for cls")); lvaTable[clsTmp].lvType = clsArg->TypeGet(); Statement* storeObjStmt = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(tmpNum, objArg), debugInfo); @@ -1790,7 +1815,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(objTmp)), likelyClsNode); mtCheck->gtFlags |= GTF_RELOP_JMP_USED; GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, mtCheck); - BasicBlock* typeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, nullcheckBb, jtrue, debugInfo, block, true); + BasicBlock* typeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, nullcheckBb, jtrue, debugInfo, lastBlock, true); // Block 3: fallbackBb // NOTE: we spilled call's arguments above, we need to re-create it here (we can't just modify call's args) @@ -1798,90 +1823,66 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, gtNewHelperCallNode(call->GetHelperNum(), TYP_REF, gtNewLclVarNode(clsTmp), gtNewLclVarNode(tmpNum)); fallbackCall = fgMorphCall(fallbackCall->AsCall()); gtSetEvalOrder(fallbackCall); - BasicBlock* fallbackBb = - fgNewBBFromTreeAfter(BBJ_ALWAYS, typeCheckBb, gtNewTempStore(tmpNum, fallbackCall), debugInfo, block); + GenTree* fallbackTree = gtNewTempStore(tmpNum, fallbackCall); + BasicBlock* fallbackBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, typeCheckBb, fallbackTree, debugInfo, lastBlock, true); + + // Block 4: typeCheckSucceedBb + GenTree* typeCheckSucceedTree; + if (castResult == TypeCompareState::MustNot) + { + // With TypeCompareState::MustNot it means our likely class never passes the type check. + // it means we just check obj's type for being likelyclass and return null if it's true. + typeCheckSucceedTree = gtNewTempStore(tmpNum, gtNewNull()); + } + else + { + // Otherwise, no-op (for simplicity, some upstream phase will collect this block) + typeCheckSucceedTree = gtNewNothingNode(); + } + BasicBlock* typeCheckSucceedBb = + fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, typeCheckSucceedTree, debugInfo, lastBlock); // // Wire up the blocks // prevBb->SetTarget(nullcheckBb); - nullcheckBb->SetTrueTarget(block); + nullcheckBb->SetTrueTarget(lastBlock); nullcheckBb->SetFalseTarget(typeCheckBb); - typeCheckBb->SetTrueTarget(block); + typeCheckBb->SetTrueTarget(typeCheckSucceedBb); typeCheckBb->SetFalseTarget(fallbackBb); - fallbackBb->SetTarget(block); - fgRemoveRefPred(block, prevBb); + fallbackBb->SetTarget(lastBlock); + fgRemoveRefPred(lastBlock, prevBb); fgAddRefPred(nullcheckBb, prevBb); fgAddRefPred(typeCheckBb, nullcheckBb); - fgAddRefPred(block, nullcheckBb); + fgAddRefPred(lastBlock, nullcheckBb); fgAddRefPred(fallbackBb, typeCheckBb); - fgAddRefPred(block, typeCheckBb); - fgAddRefPred(block, fallbackBb); + fgAddRefPred(lastBlock, typeCheckSucceedBb); + fgAddRefPred(typeCheckSucceedBb, typeCheckBb); + fgAddRefPred(lastBlock, fallbackBb); // // Re-distribute weights // We assume obj is 50%/50% null/not-null (TODO: use profile data) // and rely on profile for the slow path. // - assert(likelyClass.likelihood <= 100); nullcheckBb->inheritWeight(prevBb); typeCheckBb->inheritWeightPercentage(nullcheckBb, 50); - fallbackBb->inheritWeightPercentage(typeCheckBb, 100 - likelyClass.likelihood); - block->inheritWeight(prevBb); + fallbackBb->inheritWeightPercentage(typeCheckBb, likelihood); + typeCheckSucceedBb->inheritWeightPercentage(typeCheckBb, 100 - likelihood); + lastBlock->inheritWeight(prevBb); // // Update bbNatLoopNum for all new blocks and validate EH regions // - nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; - fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; - typeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; - assert(BasicBlock::sameEHRegion(prevBb, block)); + nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; + typeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + typeCheckSucceedBb->bbNatLoopNum = prevBb->bbNatLoopNum; + assert(BasicBlock::sameEHRegion(prevBb, lastBlock)); assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); assert(BasicBlock::sameEHRegion(prevBb, fallbackBb)); assert(BasicBlock::sameEHRegion(prevBb, typeCheckBb)); - // Rare case: if profile points to a class that never passes the type check - // we can also optimize it like this: - // - // prevBb: [weight: 1.0] - // ... - // - // nullcheckBb (BBJ_COND): [weight: 1.0] - // tmp = obj; - // if (tmp == null) goto block; - // - // typeCheckBb (BBJ_COND): [weight: 0.5] - // if (tmp->pMT == likelyClass) goto assignNullBb; - // - // fallbackBb (BBJ_ALWAYS): [weight: ] - // tmp = helper_call(cls, tmp); - // goto block; - // - // assignNullBb (BBJ_ALWAYS): [weight: ] - // tmp = null; - // - // block (BBJ_any): [weight: 1.0] - // use(tmp); - // - - if (castResult == TypeCompareState::MustNot) - { - // Block 4: assignNullBb - // - // Let's do everything here instead of adding ifs in the previous blocks to make them simpler. - BasicBlock* assignNullBb = - fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, gtNewTempStore(tmpNum, gtNewNull()), debugInfo, block); - - // typeCheckBb now jumps to assignNullBb on true - typeCheckBb->SetTrueTarget(assignNullBb); - // typeCheckBb is no longer a predecessor of block - fgRemoveRefPred(block, typeCheckBb); - fgAddRefPred(assignNullBb, typeCheckBb); - fgAddRefPred(block, assignNullBb); - assignNullBb->inheritWeightPercentage(typeCheckBb, likelyClass.likelihood); - assignNullBb->bbNatLoopNum = prevBb->bbNatLoopNum; - } - // Bonus step: merge prevBb with nullcheckBb as they are likely to be mergeable if (fgCanCompactBlocks(prevBb, nullcheckBb)) { From 2e6795c9266262b7cbb86751a760fee4a3ada6a9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 18 Jan 2024 04:38:11 +0100 Subject: [PATCH 09/15] Clean up --- src/coreclr/jit/helperexpansion.cpp | 141 ++++++++++++++++------------ 1 file changed, 83 insertions(+), 58 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index aeb1c8a6e410d3..1447ce602e3d8f 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -33,6 +33,55 @@ static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprB return comp->gtNewLclVarNode(tmpNum); }; +//------------------------------------------------------------------------------ +// SplitAtTreeAndReplaceItWithLocal : Split block at the given tree and replace it with a local +// See comments in gtSplitTree and fgSplitBlockBeforeTree +// TODO: use this function in more places in this file. +// +// Arguments: +// comp - Compiler instance +// block - Block to split +// stmt - Statement containing the tree to split at +// tree - Tree to split at +// topBlock - [out] Top block after the split +// bottomBlock - [out] Bottom block after the split +// +// Return Value: +// Number of the local that replaces the tree +// +unsigned SplitAtTreeAndReplaceItWithLocal( + Compiler* comp, BasicBlock* block, Statement* stmt, GenTree* tree, BasicBlock** topBlock, BasicBlock** bottomBlock) +{ + BasicBlock* prevBb = block; + GenTree** callUse = nullptr; + Statement* newFirstStmt = nullptr; + block = comp->fgSplitBlockBeforeTree(block, stmt, tree, &newFirstStmt, &callUse); + assert(prevBb != nullptr && block != nullptr); + + // Block ops inserted by the split need to be morphed here since we are after morph. + // We cannot morph stmt yet as we may modify it further below, and the morphing + // could invalidate callUse + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + comp->fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + + // Grab a temp to store the result. + const unsigned tmpNum = comp->lvaGrabTemp(true DEBUGARG("replacement local")); + comp->lvaTable[tmpNum].lvType = tree->TypeGet(); + + // Replace the original call with that temp + *callUse = comp->gtNewLclvNode(tmpNum, tree->TypeGet()); + + comp->fgMorphStmtBlockOps(block, stmt); + comp->gtUpdateStmtSideEffects(stmt); + + *topBlock = prevBb; + *bottomBlock = block; + return tmpNum; +} + //------------------------------------------------------------------------------ // gtNewRuntimeLookupHelperCallNode : Helper to create a runtime lookup call helper node. // @@ -1674,8 +1723,8 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, return false; } - BasicBlock* lastBlock = *pBlock; - JITDUMP("Attempting to expand a cast helper call in " FMT_BB "...\n", lastBlock->bbNum); + BasicBlock* block = *pBlock; + JITDUMP("Attempting to expand a cast helper call in " FMT_BB "...\n", block->bbNum); DISPTREE(call); JITDUMP("\n"); @@ -1726,36 +1775,15 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, DebugInfo debugInfo = stmt->GetDebugInfo(); - // Split block right before the call tree (this is a standard pattern we use in helperexpansion.cpp) - BasicBlock* prevBb = lastBlock; - GenTree** callUse = nullptr; - Statement* newFirstStmt = nullptr; - lastBlock = fgSplitBlockBeforeTree(lastBlock, stmt, call, &newFirstStmt, &callUse); - assert(prevBb != nullptr && lastBlock != nullptr); - *pBlock = lastBlock; - // Block ops inserted by the split need to be morphed here since we are after morph. - // We cannot morph stmt yet as we may modify it further below, and the morphing - // could invalidate callUse - while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) - { - fgMorphStmtBlockOps(lastBlock, newFirstStmt); - newFirstStmt = newFirstStmt->GetNextStmt(); - } + BasicBlock* firstBb; + BasicBlock* lastBb; + const unsigned tmpNum = SplitAtTreeAndReplaceItWithLocal(this, block, stmt, call, &firstBb, &lastBb); + lvaSetClass(tmpNum, expectedCls); // Reload the arguments after the split clsArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); GenTree* objArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); - - // Grab a temp to store the result. - const unsigned tmpNum = lvaGrabTemp(true DEBUGARG("local for result")); - lvaTable[tmpNum].lvType = call->TypeGet(); - lvaSetClass(tmpNum, expectedCls); - - // Replace the original call with that temp - *callUse = gtNewLclvNode(tmpNum, call->TypeGet()); - - fgMorphStmtBlockOps(lastBlock, stmt); - gtUpdateStmtSideEffects(stmt); + *pBlock = lastBb; // We're going to expand this "isinst" like this: // @@ -1766,12 +1794,10 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // tmp = obj; // if (tmp == null) // goto lastBlock; - // fallthrough; // // typeCheckBb (BBJ_COND): [weight: 0.5] // if (tmp->pMT == likelyClass) // goto typeCheckSucceedBb; - // fallthrough; // // fallbackBb (BBJ_ALWAYS): [weight: ] // tmp = helper_call(cls, tmp); @@ -1779,7 +1805,6 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // // typeCheckSucceedBb (BBJ_ALWAYS): [weight: ] // nop (or tmp = null) - // fallthrough; // // lastBlock (BBJ_any): [weight: 1.0] // use(tmp); @@ -1791,8 +1816,8 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTree* objTmp = gtNewLclVarNode(tmpNum); GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, objTmp, gtNewNull()); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; - BasicBlock* nullcheckBb = fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), - debugInfo, lastBlock, true); + BasicBlock* nullcheckBb = fgNewBBFromTreeAfter(BBJ_COND, firstBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), + debugInfo, lastBb, true); // Insert statements to spill call's arguments (preserving the evaluation order) // TODO-InlineCast: don't spill cls if possible, we only need it after the nullcheck @@ -1815,7 +1840,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(objTmp)), likelyClsNode); mtCheck->gtFlags |= GTF_RELOP_JMP_USED; GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, mtCheck); - BasicBlock* typeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, nullcheckBb, jtrue, debugInfo, lastBlock, true); + BasicBlock* typeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, nullcheckBb, jtrue, debugInfo, lastBb, true); // Block 3: fallbackBb // NOTE: we spilled call's arguments above, we need to re-create it here (we can't just modify call's args) @@ -1824,7 +1849,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, fallbackCall = fgMorphCall(fallbackCall->AsCall()); gtSetEvalOrder(fallbackCall); GenTree* fallbackTree = gtNewTempStore(tmpNum, fallbackCall); - BasicBlock* fallbackBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, typeCheckBb, fallbackTree, debugInfo, lastBlock, true); + BasicBlock* fallbackBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, typeCheckBb, fallbackTree, debugInfo, lastBb, true); // Block 4: typeCheckSucceedBb GenTree* typeCheckSucceedTree; @@ -1840,53 +1865,53 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, typeCheckSucceedTree = gtNewNothingNode(); } BasicBlock* typeCheckSucceedBb = - fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, typeCheckSucceedTree, debugInfo, lastBlock); + fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, typeCheckSucceedTree, debugInfo, lastBb); // // Wire up the blocks // - prevBb->SetTarget(nullcheckBb); - nullcheckBb->SetTrueTarget(lastBlock); + firstBb->SetTarget(nullcheckBb); + nullcheckBb->SetTrueTarget(lastBb); nullcheckBb->SetFalseTarget(typeCheckBb); typeCheckBb->SetTrueTarget(typeCheckSucceedBb); typeCheckBb->SetFalseTarget(fallbackBb); - fallbackBb->SetTarget(lastBlock); - fgRemoveRefPred(lastBlock, prevBb); - fgAddRefPred(nullcheckBb, prevBb); + fallbackBb->SetTarget(lastBb); + fgRemoveRefPred(lastBb, firstBb); + fgAddRefPred(nullcheckBb, firstBb); fgAddRefPred(typeCheckBb, nullcheckBb); - fgAddRefPred(lastBlock, nullcheckBb); + fgAddRefPred(lastBb, nullcheckBb); fgAddRefPred(fallbackBb, typeCheckBb); - fgAddRefPred(lastBlock, typeCheckSucceedBb); + fgAddRefPred(lastBb, typeCheckSucceedBb); fgAddRefPred(typeCheckSucceedBb, typeCheckBb); - fgAddRefPred(lastBlock, fallbackBb); + fgAddRefPred(lastBb, fallbackBb); // // Re-distribute weights // We assume obj is 50%/50% null/not-null (TODO: use profile data) // and rely on profile for the slow path. // - nullcheckBb->inheritWeight(prevBb); + nullcheckBb->inheritWeight(firstBb); typeCheckBb->inheritWeightPercentage(nullcheckBb, 50); - fallbackBb->inheritWeightPercentage(typeCheckBb, likelihood); - typeCheckSucceedBb->inheritWeightPercentage(typeCheckBb, 100 - likelihood); - lastBlock->inheritWeight(prevBb); + fallbackBb->inheritWeightPercentage(typeCheckBb, 100 - likelihood); + typeCheckSucceedBb->inheritWeightPercentage(typeCheckBb, likelihood); + lastBb->inheritWeight(firstBb); // // Update bbNatLoopNum for all new blocks and validate EH regions // - nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; - fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; - typeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; - typeCheckSucceedBb->bbNatLoopNum = prevBb->bbNatLoopNum; - assert(BasicBlock::sameEHRegion(prevBb, lastBlock)); - assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); - assert(BasicBlock::sameEHRegion(prevBb, fallbackBb)); - assert(BasicBlock::sameEHRegion(prevBb, typeCheckBb)); + nullcheckBb->bbNatLoopNum = firstBb->bbNatLoopNum; + fallbackBb->bbNatLoopNum = firstBb->bbNatLoopNum; + typeCheckBb->bbNatLoopNum = firstBb->bbNatLoopNum; + typeCheckSucceedBb->bbNatLoopNum = firstBb->bbNatLoopNum; + assert(BasicBlock::sameEHRegion(firstBb, lastBb)); + assert(BasicBlock::sameEHRegion(firstBb, nullcheckBb)); + assert(BasicBlock::sameEHRegion(firstBb, fallbackBb)); + assert(BasicBlock::sameEHRegion(firstBb, typeCheckBb)); // Bonus step: merge prevBb with nullcheckBb as they are likely to be mergeable - if (fgCanCompactBlocks(prevBb, nullcheckBb)) + if (fgCanCompactBlocks(firstBb, nullcheckBb)) { - fgCompactBlocks(prevBb, nullcheckBb); + fgCompactBlocks(firstBb, nullcheckBb); } return true; From a5f70795fb00f5bf2d2533284c11233ecd6d3f8d Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Fri, 19 Jan 2024 00:33:44 +0100 Subject: [PATCH 10/15] Apply suggestions from code review Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/helperexpansion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 1447ce602e3d8f..8af7de67b91555 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -49,7 +49,7 @@ static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprB // Return Value: // Number of the local that replaces the tree // -unsigned SplitAtTreeAndReplaceItWithLocal( +static unsigned SplitAtTreeAndReplaceItWithLocal( Compiler* comp, BasicBlock* block, Statement* stmt, GenTree* tree, BasicBlock** topBlock, BasicBlock** bottomBlock) { BasicBlock* prevBb = block; @@ -1657,7 +1657,7 @@ PhaseStatus Compiler::fgLateCastExpansion() // Returns: // Likely class handle or NO_CLASS_HANDLE // -CORINFO_CLASS_HANDLE PickLikelyClass(Compiler* comp, IL_OFFSET offset, unsigned* likelihood) +static CORINFO_CLASS_HANDLE PickLikelyClass(Compiler* comp, IL_OFFSET offset, unsigned* likelihood) { // TODO-InlineCast: consider merging this helper with pickGDV From 34523d210fd59e18ca75ec14eaeb8776b7c3b55a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 19 Jan 2024 00:53:10 +0100 Subject: [PATCH 11/15] Address feedback --- src/coreclr/jit/helperexpansion.cpp | 24 +++++------------------- src/coreclr/jit/importer.cpp | 1 - src/coreclr/jit/morph.cpp | 14 +++++++++++--- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 1447ce602e3d8f..923b0243fad2c5 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1781,7 +1781,6 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, lvaSetClass(tmpNum, expectedCls); // Reload the arguments after the split - clsArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); GenTree* objArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); *pBlock = lastBb; @@ -1819,22 +1818,14 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, BasicBlock* nullcheckBb = fgNewBBFromTreeAfter(BBJ_COND, firstBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo, lastBb, true); - // Insert statements to spill call's arguments (preserving the evaluation order) - // TODO-InlineCast: don't spill cls if possible, we only need it after the nullcheck - const unsigned clsTmp = lvaGrabTemp(true DEBUGARG("local for cls")); - lvaTable[clsTmp].lvType = clsArg->TypeGet(); - Statement* storeObjStmt = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(tmpNum, objArg), debugInfo); - Statement* storeClsStmt = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(clsTmp, clsArg), debugInfo); + // Set tmp's value to obj by default (before the nullcheck) + Statement* storeObjStmt = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(tmpNum, gtCloneExpr(objArg)), debugInfo); gtSetStmtInfo(storeObjStmt); - gtSetStmtInfo(storeClsStmt); fgSetStmtSeq(storeObjStmt); - fgSetStmtSeq(storeClsStmt); - gtUpdateStmtSideEffects(storeObjStmt); - gtUpdateStmtSideEffects(storeClsStmt); // if likelyCls == clsArg, we can just use clsArg that we've just spilled to a temp // it's a sort of manual CSE. - GenTree* likelyClsNode = likelyCls == expectedCls ? gtNewLclVarNode(clsTmp) : gtNewIconEmbClsHndNode(likelyCls); + GenTree* likelyClsNode = gtNewIconEmbClsHndNode(likelyCls); // Block 2: typeCheckBb GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(objTmp)), likelyClsNode); @@ -1843,12 +1834,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, BasicBlock* typeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, nullcheckBb, jtrue, debugInfo, lastBb, true); // Block 3: fallbackBb - // NOTE: we spilled call's arguments above, we need to re-create it here (we can't just modify call's args) - GenTree* fallbackCall = - gtNewHelperCallNode(call->GetHelperNum(), TYP_REF, gtNewLclVarNode(clsTmp), gtNewLclVarNode(tmpNum)); - fallbackCall = fgMorphCall(fallbackCall->AsCall()); - gtSetEvalOrder(fallbackCall); - GenTree* fallbackTree = gtNewTempStore(tmpNum, fallbackCall); + GenTree* fallbackTree = gtNewTempStore(tmpNum, call); BasicBlock* fallbackBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, typeCheckBb, fallbackTree, debugInfo, lastBb, true); // Block 4: typeCheckSucceedBb @@ -1861,7 +1847,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, } else { - // Otherwise, no-op (for simplicity, some upstream phase will collect this block) + // Otherwise, no-op (for simplicity, some downstream phase will collect this block) typeCheckSucceedTree = gtNewNothingNode(); } BasicBlock* typeCheckSucceedBb = diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index b584ad5c85ca60..d82e2662495544 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -5607,7 +5607,6 @@ GenTree* Compiler::impCastClassOrIsInstToTree( // TODO: enable for cast-class as well. call->gtCallMoreFlags |= GTF_CALL_M_CAST_CAN_BE_EXPANDED; call->gtCastHelperILOffset = ilOffset; - setMethodHasExpandableCasts(); } return call; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 73a65b96474aec..485a5b439dbd78 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7697,10 +7697,18 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) optMethodFlags |= OMF_NEEDS_GCPOLLS; } - if (fgGlobalMorph && IsStaticHelperEligibleForExpansion(call)) + if (fgGlobalMorph) { - // Current method has potential candidates for fgExpandStaticInit phase - setMethodHasStaticInit(); + if (IsStaticHelperEligibleForExpansion(call)) + { + // Current method has potential candidates for fgExpandStaticInit phase + setMethodHasStaticInit(); + } + else if ((call->gtCallMoreFlags & GTF_CALL_M_CAST_CAN_BE_EXPANDED) != 0) + { + // Current method has potential candidates for fgLateCastExpansion phase + setMethodHasExpandableCasts(); + } } // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag From c189b8004e49d35207de14bcbcb3c71b0957469e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 19 Jan 2024 00:59:45 +0100 Subject: [PATCH 12/15] remove more code --- src/coreclr/jit/helperexpansion.cpp | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 991a583f8a5dcc..de95e3854b4154 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1790,20 +1790,19 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // ... // // nullcheckBb (BBJ_COND): [weight: 1.0] - // tmp = obj; // if (tmp == null) // goto lastBlock; // // typeCheckBb (BBJ_COND): [weight: 0.5] - // if (tmp->pMT == likelyClass) + // if (obj->pMT == likelyClass) // goto typeCheckSucceedBb; // // fallbackBb (BBJ_ALWAYS): [weight: ] - // tmp = helper_call(cls, tmp); + // tmp = helper_call(cls, obj); // goto lastBlock; // // typeCheckSucceedBb (BBJ_ALWAYS): [weight: ] - // nop (or tmp = null) + // tmp = obj; (or tmp = null in case of 'MustNot') // // lastBlock (BBJ_any): [weight: 1.0] // use(tmp); @@ -1812,23 +1811,17 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // Block 1: nullcheckBb // TODO-InlineCast: assertionprop should leave us a mark that objArg is never null, so we can omit this check // it's too late to rely on upstream phases to do this for us (unless we do optRepeat). - GenTree* objTmp = gtNewLclVarNode(tmpNum); - GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, objTmp, gtNewNull()); + GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, gtCloneExpr(objArg), gtNewNull()); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; BasicBlock* nullcheckBb = fgNewBBFromTreeAfter(BBJ_COND, firstBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo, lastBb, true); - // Set tmp's value to obj by default (before the nullcheck) - Statement* storeObjStmt = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(tmpNum, gtCloneExpr(objArg)), debugInfo); - gtSetStmtInfo(storeObjStmt); - fgSetStmtSeq(storeObjStmt); - // if likelyCls == clsArg, we can just use clsArg that we've just spilled to a temp // it's a sort of manual CSE. GenTree* likelyClsNode = gtNewIconEmbClsHndNode(likelyCls); // Block 2: typeCheckBb - GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(objTmp)), likelyClsNode); + GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(objArg)), likelyClsNode); mtCheck->gtFlags |= GTF_RELOP_JMP_USED; GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, mtCheck); BasicBlock* typeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, nullcheckBb, jtrue, debugInfo, lastBb, true); @@ -1847,8 +1840,8 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, } else { - // Otherwise, no-op (for simplicity, some downstream phase will collect this block) - typeCheckSucceedTree = gtNewNothingNode(); + // Just return the original obj (assign to tmpNum) + typeCheckSucceedTree = gtNewTempStore(tmpNum, gtCloneExpr(objArg)); } BasicBlock* typeCheckSucceedBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, typeCheckSucceedTree, debugInfo, lastBb); From 668456303d3ec6573217cc67598c0091cc119413 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 19 Jan 2024 01:04:34 +0100 Subject: [PATCH 13/15] clean up --- src/coreclr/jit/helperexpansion.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index de95e3854b4154..1bb7c77c42d488 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1790,19 +1790,19 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // ... // // nullcheckBb (BBJ_COND): [weight: 1.0] - // if (tmp == null) + // if (obj == null) // goto lastBlock; // // typeCheckBb (BBJ_COND): [weight: 0.5] - // if (obj->pMT == likelyClass) + // if (obj->pMT == likelyCls) // goto typeCheckSucceedBb; // // fallbackBb (BBJ_ALWAYS): [weight: ] - // tmp = helper_call(cls, obj); + // tmp = helper_call(expectedCls, obj); // goto lastBlock; // // typeCheckSucceedBb (BBJ_ALWAYS): [weight: ] - // tmp = obj; (or tmp = null in case of 'MustNot') + // tmp = obj; (or tmp = null; in case of 'MustNot') // // lastBlock (BBJ_any): [weight: 1.0] // use(tmp); @@ -1816,12 +1816,10 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, BasicBlock* nullcheckBb = fgNewBBFromTreeAfter(BBJ_COND, firstBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo, lastBb, true); - // if likelyCls == clsArg, we can just use clsArg that we've just spilled to a temp - // it's a sort of manual CSE. - GenTree* likelyClsNode = gtNewIconEmbClsHndNode(likelyCls); - // Block 2: typeCheckBb - GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(objArg)), likelyClsNode); + // TODO-InlineCast: if likelyCls == expectedCls we can consider saving to a local to re-use. + GenTree* likelyClsNode = gtNewIconEmbClsHndNode(likelyCls); + GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(objArg)), likelyClsNode); mtCheck->gtFlags |= GTF_RELOP_JMP_USED; GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, mtCheck); BasicBlock* typeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, nullcheckBb, jtrue, debugInfo, lastBb, true); From cdb7b47071109e1598b9f327d90f05daa7c50484 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 19 Jan 2024 18:46:01 +0100 Subject: [PATCH 14/15] Fix control flow --- src/coreclr/jit/helperexpansion.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index dfbaca64f59bb7..e1125df99d4d4c 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1848,6 +1848,11 @@ PhaseStatus Compiler::fgLateCastExpansion() return PhaseStatus::MODIFIED_NOTHING; } + if (JitConfig.JitConsumeProfileForCasts() == 0) + { + return PhaseStatus::MODIFIED_NOTHING; + } + const bool preferSize = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT); if (preferSize) { @@ -1935,6 +1940,9 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, return false; } + // Helper calls are never tail calls + assert(!call->IsTailCall()); + BasicBlock* block = *pBlock; JITDUMP("Attempting to expand a cast helper call in " FMT_BB "...\n", block->bbNum); DISPTREE(call); @@ -2002,6 +2010,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // ... // // nullcheckBb (BBJ_COND): [weight: 1.0] + // tmp = obj; // if (obj == null) // goto lastBlock; // @@ -2014,7 +2023,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // goto lastBlock; // // typeCheckSucceedBb (BBJ_ALWAYS): [weight: ] - // tmp = obj; (or tmp = null; in case of 'MustNot') + // no-op (or tmp = null; in case of 'MustNot') // // lastBlock (BBJ_any): [weight: 1.0] // use(tmp); @@ -2028,6 +2037,12 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, BasicBlock* nullcheckBb = fgNewBBFromTreeAfter(BBJ_COND, firstBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo, lastBb, true); + // The very first statement in the whole expansion is to assign obj to tmp. + // We assume it's the value we're going to return in most cases. + Statement* assignTmp = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(tmpNum, gtCloneExpr(objArg)), debugInfo); + gtSetStmtInfo(assignTmp); + fgSetStmtSeq(assignTmp); + // Block 2: typeCheckBb // TODO-InlineCast: if likelyCls == expectedCls we can consider saving to a local to re-use. GenTree* likelyClsNode = gtNewIconEmbClsHndNode(likelyCls); @@ -2050,8 +2065,9 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, } else { - // Just return the original obj (assign to tmpNum) - typeCheckSucceedTree = gtNewTempStore(tmpNum, gtCloneExpr(objArg)); + // tmp is already assigned to obj, so we don't need to do anything here + // some downstream phase will collect this block. It's done for simplicity. + typeCheckSucceedTree = gtNewNothingNode(); } BasicBlock* typeCheckSucceedBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, typeCheckSucceedTree, debugInfo, lastBb); From e1122c8f7a2e15d07313af55eb03965d81f30bf5 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 19 Jan 2024 18:57:46 +0100 Subject: [PATCH 15/15] Clean up --- src/coreclr/jit/helperexpansion.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index e1125df99d4d4c..ac2a65b33ef95c 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1999,10 +1999,8 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, BasicBlock* lastBb; const unsigned tmpNum = SplitAtTreeAndReplaceItWithLocal(this, block, stmt, call, &firstBb, &lastBb); lvaSetClass(tmpNum, expectedCls); - - // Reload the arguments after the split - GenTree* objArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); - *pBlock = lastBb; + GenTree* tmpNode = gtNewLclvNode(tmpNum, call->TypeGet()); + *pBlock = lastBb; // We're going to expand this "isinst" like this: // @@ -2011,11 +2009,11 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // // nullcheckBb (BBJ_COND): [weight: 1.0] // tmp = obj; - // if (obj == null) + // if (tmp == null) // goto lastBlock; // // typeCheckBb (BBJ_COND): [weight: 0.5] - // if (obj->pMT == likelyCls) + // if (tmp->pMT == likelyCls) // goto typeCheckSucceedBb; // // fallbackBb (BBJ_ALWAYS): [weight: ] @@ -2032,21 +2030,22 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, // Block 1: nullcheckBb // TODO-InlineCast: assertionprop should leave us a mark that objArg is never null, so we can omit this check // it's too late to rely on upstream phases to do this for us (unless we do optRepeat). - GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, gtCloneExpr(objArg), gtNewNull()); + GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, tmpNode, gtNewNull()); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; BasicBlock* nullcheckBb = fgNewBBFromTreeAfter(BBJ_COND, firstBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo, lastBb, true); // The very first statement in the whole expansion is to assign obj to tmp. // We assume it's the value we're going to return in most cases. - Statement* assignTmp = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(tmpNum, gtCloneExpr(objArg)), debugInfo); + GenTree* originalObj = gtCloneExpr(call->gtArgs.GetUserArgByIndex(1)->GetNode()); + Statement* assignTmp = fgNewStmtAtBeg(nullcheckBb, gtNewTempStore(tmpNum, originalObj), debugInfo); gtSetStmtInfo(assignTmp); fgSetStmtSeq(assignTmp); // Block 2: typeCheckBb // TODO-InlineCast: if likelyCls == expectedCls we can consider saving to a local to re-use. GenTree* likelyClsNode = gtNewIconEmbClsHndNode(likelyCls); - GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(objArg)), likelyClsNode); + GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(tmpNode)), likelyClsNode); mtCheck->gtFlags |= GTF_RELOP_JMP_USED; GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, mtCheck); BasicBlock* typeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, nullcheckBb, jtrue, debugInfo, lastBb, true);