diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 94f6b0e646675f..f0fcce4dace184 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -618,8 +618,8 @@ void DefaultValueAnalysis::Run() return; } - #endif + ComputePerBlockMutatedVars(); ComputeInterBlockDefaultValues(); } @@ -658,6 +658,36 @@ static bool IsDefaultValue(GenTree* node) return node->IsIntegralConst(0) || node->IsFloatPositiveZero() || node->IsVectorZero(); } +//------------------------------------------------------------------------ +// MarkMutatedVarDsc: +// Mark a VarDsc (or its promoted fields) in the specified varset. +// +// Parameters: +// compiler - The compiler instance. +// varDsc - The var. +// mutated - [in/out] The set to update. +// +static void MarkMutatedVarDsc(Compiler* compiler, LclVarDsc* varDsc, VARSET_TP& mutated) +{ + if (varDsc->lvTracked) + { + VarSetOps::AddElemD(compiler, mutated, varDsc->lvVarIndex); + return; + } + + if (varDsc->lvPromoted) + { + for (unsigned i = 0; i < varDsc->lvFieldCnt; i++) + { + LclVarDsc* fieldDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i); + if (fieldDsc->lvTracked) + { + VarSetOps::AddElemD(compiler, mutated, fieldDsc->lvVarIndex); + } + } + } +} + //------------------------------------------------------------------------ // UpdateMutatedLocal: // If the given node is a local store or LCL_ADDR, and the local is tracked, @@ -696,28 +726,32 @@ static void UpdateMutatedLocal(Compiler* compiler, GenTree* node, VARSET_TP& mut } LclVarDsc* varDsc = compiler->lvaGetDesc(node->AsLclVarCommon()); + MarkMutatedVarDsc(compiler, varDsc, mutated); +} - if (varDsc->lvTracked) +#ifdef DEBUG +//------------------------------------------------------------------------ +// PrintVarSet: +// Print a varset as a space-separated list of locals. +// +// Parameters: +// comp - Compiler instance +// set - The varset to print. +// +static void PrintVarSet(Compiler* comp, VARSET_VALARG_TP set) +{ + VarSetOps::Iter iter(comp, set); + unsigned varIndex = 0; + const char* sep = ""; + while (iter.NextElem(&varIndex)) { - VarSetOps::AddElemD(compiler, mutated, varDsc->lvVarIndex); - return; - } - - // For promoted structs the parent may not be tracked but the field locals - // are. When the parent is mutated, all tracked fields must be marked as - // mutated as well. - if (varDsc->lvPromoted) - { - for (unsigned i = 0; i < varDsc->lvFieldCnt; i++) - { - LclVarDsc* fieldDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i); - if (fieldDsc->lvTracked) - { - VarSetOps::AddElemD(compiler, mutated, fieldDsc->lvVarIndex); - } - } + unsigned lclNum = comp->lvaTrackedToVarNum[varIndex]; + printf("%sV%02u", sep, lclNum); + sep = " "; } + printf("\n"); } +#endif //------------------------------------------------------------------------ // DefaultValueAnalysis::ComputePerBlockMutatedVars: @@ -751,7 +785,7 @@ void DefaultValueAnalysis::ComputePerBlockMutatedVars() } JITDUMP("Default value analysis: per-block mutated vars\n"); - DBEXEC(m_compiler->verbose, DumpMutatedVars()); + JITDUMPEXEC(DumpMutatedVars()); } //------------------------------------------------------------------------ @@ -790,7 +824,7 @@ void DefaultValueAnalysis::ComputeInterBlockDefaultValues() flow.ForwardAnalysis(callback); JITDUMP("Default value analysis: per-block mutated vars on entry\n"); - DBEXEC(m_compiler->verbose, DumpMutatedVarsIn()); + JITDUMPEXEC(DumpMutatedVarsIn()); } #ifdef DEBUG @@ -807,16 +841,7 @@ void DefaultValueAnalysis::DumpMutatedVars() if (!VarSetOps::IsEmpty(m_compiler, m_mutatedVars[block->bbNum])) { printf(" " FMT_BB " mutated: ", block->bbNum); - VarSetOps::Iter iter(m_compiler, m_mutatedVars[block->bbNum]); - unsigned varIndex = 0; - const char* sep = ""; - while (iter.NextElem(&varIndex)) - { - unsigned lclNum = m_compiler->lvaTrackedToVarNum[varIndex]; - printf("%sV%02u", sep, lclNum); - sep = " "; - } - printf("\n"); + PrintVarSet(m_compiler, m_mutatedVars[block->bbNum]); } } } @@ -835,40 +860,465 @@ void DefaultValueAnalysis::DumpMutatedVarsIn() if (VarSetOps::IsEmpty(m_compiler, m_mutatedVarsIn[block->bbNum])) { - printf(""); + printf("\n"); } else { - VarSetOps::Iter iter(m_compiler, m_mutatedVarsIn[block->bbNum]); - unsigned varIndex = 0; - const char* sep = ""; - while (iter.NextElem(&varIndex)) + PrintVarSet(m_compiler, m_mutatedVarsIn[block->bbNum]); + } + } +} +#endif + +//------------------------------------------------------------------------ +// MarkMutatedLocal: +// If the given node is a local store or LCL_ADDR, and the local is tracked, +// mark it as mutated in the provided set. Unlike UpdateMutatedLocal, all +// stores count as mutations (including stores of default values). +// +// Parameters: +// compiler - The compiler instance. +// node - The IR node to check. +// mutated - [in/out] The set to update. +// +static void MarkMutatedLocal(Compiler* compiler, GenTree* node, VARSET_TP& mutated) +{ + if (node->IsCall()) + { + auto visitDef = [&](GenTreeLclVarCommon* lcl) { + MarkMutatedVarDsc(compiler, compiler->lvaGetDesc(lcl), mutated); + return GenTree::VisitResult::Continue; + }; + node->VisitLocalDefNodes(compiler, visitDef); + } + else if (node->OperIsLocalStore() || node->OperIs(GT_LCL_ADDR)) + { + MarkMutatedVarDsc(compiler, compiler->lvaGetDesc(node->AsLclVarCommon()), mutated); + } + else + { + return; + } +} + +//------------------------------------------------------------------------ +// PreservedValueAnalysis: +// Computes which tracked locals may have been mutated since the previous +// resumption point. A local that has not been mutated since the last +// resumption does not need to be stored into a reused continuation, because +// the continuation already holds the correct value. +// +// The analysis proceeds in several steps: +// +// 0. Identify blocks that contain awaits and compute which blocks are +// reachable after resumption. +// +// 1. Per-block: compute which tracked locals are mutated (assigned any +// value or have their address taken) in each block. Only mutations +// reachable after resumption need to be taken into account. +// +// 2. Inter-block: forward dataflow to compute for each block the set of +// tracked locals that have been mutated since the previous resumption. +// At merge points the sets are unioned (a local is mutated if it is +// mutated on any incoming path). +// +class PreservedValueAnalysis +{ + Compiler* m_compiler; + + BitVecTraits m_blockTraits; + + // Blocks that have awaits in them. + BitVec m_awaitBlocks; + + // Blocks that may be entered after we resumed. + BitVec m_resumeReachableBlocks; + + // Per-block set of locals that may be mutated by each block after a resumption. + VARSET_TP* m_mutatedVars; + + // Per-block incoming set of locals possibly mutated since previous resumption. + VARSET_TP* m_mutatedVarsIn; + + // DataFlow::ForwardAnalysis callback used in Phase 2. + class DataFlowCallback + { + PreservedValueAnalysis& m_analysis; + Compiler* m_compiler; + VARSET_TP m_preMergeIn; + + public: + DataFlowCallback(PreservedValueAnalysis& analysis, Compiler* compiler) + : m_analysis(analysis) + , m_compiler(compiler) + , m_preMergeIn(VarSetOps::UninitVal()) + { + } + + void StartMerge(BasicBlock* block) + { + VarSetOps::Assign(m_compiler, m_preMergeIn, m_analysis.m_mutatedVarsIn[block->bbNum]); + } + + void Merge(BasicBlock* block, BasicBlock* predBlock, unsigned dupCount) + { + // Normal predecessor: mutatedOut = mutatedIn | mutated. + VarSetOps::UnionD(m_compiler, m_analysis.m_mutatedVarsIn[block->bbNum], + m_analysis.m_mutatedVarsIn[predBlock->bbNum]); + VarSetOps::UnionD(m_compiler, m_analysis.m_mutatedVarsIn[block->bbNum], + m_analysis.m_mutatedVars[predBlock->bbNum]); + } + + void MergeHandler(BasicBlock* block, BasicBlock* firstTryBlock, BasicBlock* lastTryBlock) + { + for (BasicBlock* tryBlock = firstTryBlock; tryBlock != lastTryBlock->Next(); tryBlock = tryBlock->Next()) + { + VarSetOps::UnionD(m_compiler, m_analysis.m_mutatedVarsIn[block->bbNum], + m_analysis.m_mutatedVarsIn[tryBlock->bbNum]); + VarSetOps::UnionD(m_compiler, m_analysis.m_mutatedVarsIn[block->bbNum], + m_analysis.m_mutatedVars[tryBlock->bbNum]); + } + } + + bool EndMerge(BasicBlock* block) + { + return !VarSetOps::Equal(m_compiler, m_preMergeIn, m_analysis.m_mutatedVarsIn[block->bbNum]); + } + }; + +public: + PreservedValueAnalysis(Compiler* compiler) + : m_compiler(compiler) + , m_blockTraits(compiler->fgBBNumMax + 1, compiler) + , m_awaitBlocks(BitVecOps::UninitVal()) + , m_resumeReachableBlocks(BitVecOps::UninitVal()) + , m_mutatedVars(nullptr) + , m_mutatedVarsIn(nullptr) + { + } + + void Run(ArrayStack& awaitBlocks); + const VARSET_TP& GetMutatedVarsIn(BasicBlock* block) const; + bool IsResumeReachable(BasicBlock* block); + +private: + void ComputeResumeReachableBlocks(ArrayStack& awaitBlocks); + void ComputePerBlockMutatedVars(); + void ComputeInterBlockMutatedVars(); + +#ifdef DEBUG + void DumpAwaitBlocks(); + void DumpResumeReachableBlocks(); + void DumpMutatedVars(); + void DumpMutatedVarsIn(); +#endif +}; + +//------------------------------------------------------------------------ +// PreservedValueAnalysis::Run: +// Run the preserved value analysis: identify await blocks, compute +// resume-reachable blocks, then compute per-block and inter-block +// mutation sets relative to resumption points. +// +// Parameters: +// awaitblocks - Blocks containing async calls +// +void PreservedValueAnalysis::Run(ArrayStack& awaitBlocks) +{ +#ifdef DEBUG + static ConfigMethodRange s_range; + s_range.EnsureInit(JitConfig.JitAsyncPreservedValueAnalysisRange()); + + if (!s_range.Contains(m_compiler->info.compMethodHash())) + { + JITDUMP("Preserved value analysis disabled because of method range\n"); + m_mutatedVarsIn = m_compiler->fgAllocateTypeForEachBlk(CMK_Async); + m_mutatedVars = m_compiler->fgAllocateTypeForEachBlk(CMK_Async); + for (BasicBlock* block : m_compiler->Blocks()) + { + VarSetOps::AssignNoCopy(m_compiler, m_mutatedVarsIn[block->bbNum], VarSetOps::MakeFull(m_compiler)); + VarSetOps::AssignNoCopy(m_compiler, m_mutatedVars[block->bbNum], VarSetOps::MakeFull(m_compiler)); + } + + m_resumeReachableBlocks = BitVecOps::MakeFull(&m_blockTraits); + m_awaitBlocks = BitVecOps::MakeFull(&m_blockTraits); + + return; + } +#endif + + ComputeResumeReachableBlocks(awaitBlocks); + ComputePerBlockMutatedVars(); + ComputeInterBlockMutatedVars(); +} + +//------------------------------------------------------------------------ +// PreservedValueAnalysis::GetMutatedVarsIn: +// Get the set of tracked locals that may have been mutated since the +// previous resumption on entry to the specified block. +// +// Parameters: +// block - The basic block. +// +// Returns: +// The VARSET_TP of tracked locals that may have been mutated since last +// resumption. A tracked local NOT in this set has a preserved value in the +// continuation and does not need to be re-stored when reusing it. +// +const VARSET_TP& PreservedValueAnalysis::GetMutatedVarsIn(BasicBlock* block) const +{ + assert(m_mutatedVarsIn != nullptr); + return m_mutatedVarsIn[block->bbNum]; +} + +//------------------------------------------------------------------------ +// PreservedValueAnalysis::IsResumeReachable: +// Check if the specified basic block is reachable after a previous resumption. +// +// Parameters: +// block - The basic block. +// +// Returns: +// True if so. Blocks that are not resume-reachable will never be able to +// reuse a continuation. Also, mutations of locals that are not +// resume-reachable do not need to be considered for preserved value +// analysis. +// +bool PreservedValueAnalysis::IsResumeReachable(BasicBlock* block) +{ + return BitVecOps::IsMember(&m_blockTraits, m_resumeReachableBlocks, block->bbNum); +} + +//------------------------------------------------------------------------ +// PreservedValueAnalysis::ComputeResumeReachableBlocks: +// Phase 0: Identify blocks containing awaits, then compute the set of +// blocks reachable after any resumption via a DFS starting from await blocks. +// +void PreservedValueAnalysis::ComputeResumeReachableBlocks(ArrayStack& awaitBlocks) +{ + m_awaitBlocks = BitVecOps::MakeEmpty(&m_blockTraits); + m_resumeReachableBlocks = BitVecOps::MakeEmpty(&m_blockTraits); + + ArrayStack worklist(m_compiler->getAllocator(CMK_Async)); + // Find all blocks that contain awaits. + for (BasicBlock* awaitBlock : awaitBlocks.BottomUpOrder()) + { + BitVecOps::AddElemD(&m_blockTraits, m_awaitBlocks, awaitBlock->bbNum); + worklist.Push(awaitBlock); + } + + JITDUMP("Preserved value analysis: blocks containing awaits\n"); + JITDUMPEXEC(DumpAwaitBlocks()); + + // DFS from those blocks. + while (!worklist.Empty()) + { + BasicBlock* block = worklist.Pop(); + + block->VisitAllSuccs(m_compiler, [&](BasicBlock* succ) { + if (BitVecOps::TryAddElemD(&m_blockTraits, m_resumeReachableBlocks, succ->bbNum)) + { + worklist.Push(succ); + } + return BasicBlockVisit::Continue; + }); + } + + JITDUMP("Preserved value analysis: blocks reachable after resuming\n"); + JITDUMPEXEC(DumpResumeReachableBlocks()); +} + +//------------------------------------------------------------------------ +// PreservedValueAnalysis::ComputePerBlockMutatedVars: +// Phase 1: For each reachable basic block compute the set of tracked locals +// that are mutated. +// +// For blocks that are reachable after resumption the full set of mutations +// in the block is recorded. +// +// For blocks that are NOT reachable after resumption but contain an await, +// only mutations that occur after the first suspension point are recorded, +// because mutations before the first suspension are not relevant to +// preserved values (the continuation did not exist yet or was not being +// reused at that point). +// +void PreservedValueAnalysis::ComputePerBlockMutatedVars() +{ + m_mutatedVars = m_compiler->fgAllocateTypeForEachBlk(CMK_Async); + + for (unsigned i = 0; i <= m_compiler->fgBBNumMax; i++) + { + VarSetOps::AssignNoCopy(m_compiler, m_mutatedVars[i], VarSetOps::MakeEmpty(m_compiler)); + } + + for (BasicBlock* block : m_compiler->Blocks()) + { + VARSET_TP& mutated = m_mutatedVars[block->bbNum]; + + bool isAwaitBlock = BitVecOps::IsMember(&m_blockTraits, m_awaitBlocks, block->bbNum); + bool isResumeReachable = BitVecOps::IsMember(&m_blockTraits, m_resumeReachableBlocks, block->bbNum); + + if (!isResumeReachable && !isAwaitBlock) + { + continue; + } + + GenTree* node = block->GetFirstLIRNode(); + + if (!isResumeReachable) + { + while (!node->IsCall() || !node->AsCall()->IsAsync()) { - unsigned lclNum = m_compiler->lvaTrackedToVarNum[varIndex]; - printf("%sV%02u", sep, lclNum); - sep = " "; + node = node->gtNext; + assert(node != nullptr); } } - printf("\n"); + + while (node != nullptr) + { + MarkMutatedLocal(m_compiler, node, mutated); + node = node->gtNext; + } + } + + JITDUMP("Preserved value analysis: per-block mutated vars after resumption\n"); + JITDUMPEXEC(DumpMutatedVars()); +} + +//------------------------------------------------------------------------ +// PreservedValueAnalysis::ComputeInterBlockMutatedVars: +// Phase 2: Forward dataflow to compute for each block the set of tracked +// locals that have been mutated since the previous resumption on entry. +// +// Transfer function: mutatedOut[B] = mutatedIn[B] | mutated[B] +// Merge: mutatedIn[B] = union of mutatedOut[pred] for all preds +// +// At method entry no locals are considered mutated (not reachable from a resumption). +// +void PreservedValueAnalysis::ComputeInterBlockMutatedVars() +{ + m_mutatedVarsIn = m_compiler->fgAllocateTypeForEachBlk(CMK_Async); + + for (unsigned i = 0; i <= m_compiler->fgBBNumMax; i++) + { + VarSetOps::AssignNoCopy(m_compiler, m_mutatedVarsIn[i], VarSetOps::MakeEmpty(m_compiler)); + } + + DataFlowCallback callback(*this, m_compiler); + DataFlow flow(m_compiler); + flow.ForwardAnalysis(callback); + + JITDUMP("Preserved value analysis: per-block mutated vars on entry\n"); + JITDUMPEXEC(DumpMutatedVarsIn()); +} + +#ifdef DEBUG +//------------------------------------------------------------------------ +// PreservedValueAnalysis::DumpAwaitBlocks: +// Debug helper to print the set of blocks containing awaits. +// +void PreservedValueAnalysis::DumpAwaitBlocks() +{ + printf(" Await blocks:"); + const char* sep = " "; + for (BasicBlock* block : m_compiler->Blocks()) + { + if (BitVecOps::IsMember(&m_blockTraits, m_awaitBlocks, block->bbNum)) + { + printf("%s" FMT_BB, sep, block->bbNum); + sep = ", "; + } + } + printf("\n"); +} + +//------------------------------------------------------------------------ +// PreservedValueAnalysis::DumpResumeReachableBlocks: +// Debug helper to print the set of resume-reachable blocks. +// +void PreservedValueAnalysis::DumpResumeReachableBlocks() +{ + printf(" Resume-reachable blocks:"); + const char* sep = " "; + for (BasicBlock* block : m_compiler->Blocks()) + { + if (BitVecOps::IsMember(&m_blockTraits, m_resumeReachableBlocks, block->bbNum)) + { + printf("%s" FMT_BB, sep, block->bbNum); + sep = ", "; + } + } + printf("\n"); +} + +//------------------------------------------------------------------------ +// PreservedValueAnalysis::DumpMutatedVars: +// Debug helper to print the per-block mutated variable sets. +// +void PreservedValueAnalysis::DumpMutatedVars() +{ + const FlowGraphDfsTree* dfsTree = m_compiler->m_dfsTree; + for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++) + { + BasicBlock* block = dfsTree->GetPostOrder(i); + if (!VarSetOps::IsEmpty(m_compiler, m_mutatedVars[block->bbNum])) + { + printf(" " FMT_BB " mutated: ", block->bbNum); + PrintVarSet(m_compiler, m_mutatedVars[block->bbNum]); + } + } +} + +//------------------------------------------------------------------------ +// PreservedValueAnalysis::DumpMutatedVarsIn: +// Debug helper to print the per-block mutated-on-entry variable sets. +// +void PreservedValueAnalysis::DumpMutatedVarsIn() +{ + const FlowGraphDfsTree* dfsTree = m_compiler->m_dfsTree; + for (unsigned i = dfsTree->GetPostOrderCount(); i > 0; i--) + { + BasicBlock* block = dfsTree->GetPostOrder(i - 1); + printf(" " FMT_BB " mutated since resumption on entry: ", block->bbNum); + + if (VarSetOps::IsEmpty(m_compiler, m_mutatedVarsIn[block->bbNum])) + { + printf("\n"); + } + else if (VarSetOps::Equal(m_compiler, m_mutatedVarsIn[block->bbNum], VarSetOps::MakeFull(m_compiler))) + { + printf("\n"); + } + else + { + PrintVarSet(m_compiler, m_mutatedVarsIn[block->bbNum]); + } } } #endif class AsyncLiveness { - Compiler* m_compiler; - TreeLifeUpdater m_updater; - unsigned m_numVars; - DefaultValueAnalysis& m_defaultValueAnalysis; - VARSET_TP m_mutatedValues; + Compiler* m_compiler; + TreeLifeUpdater m_updater; + unsigned m_numVars; + DefaultValueAnalysis& m_defaultValueAnalysis; + PreservedValueAnalysis& m_preservedValueAnalysis; + VARSET_TP m_mutatedValues; + bool m_resumeReachable = false; + VARSET_TP m_mutatedSinceResumption; public: - AsyncLiveness(Compiler* comp, DefaultValueAnalysis& defaultValueAnalysis) + AsyncLiveness(Compiler* comp, + DefaultValueAnalysis& defaultValueAnalysis, + PreservedValueAnalysis& preservedValueAnalysis) : m_compiler(comp) , m_updater(comp) , m_numVars(comp->lvaCount) , m_defaultValueAnalysis(defaultValueAnalysis) + , m_preservedValueAnalysis(preservedValueAnalysis) , m_mutatedValues(VarSetOps::MakeEmpty(comp)) + , m_mutatedSinceResumption(VarSetOps::MakeEmpty(comp)) { } @@ -878,6 +1328,15 @@ class AsyncLiveness template void GetLiveLocals(ContinuationLayoutBuilder* layoutBuilder, Functor includeLocal); + bool IsResumeReachable() const + { + return m_resumeReachable; + } + + VARSET_TP GetMutatedSinceResumption() const + { + return m_mutatedSinceResumption; + } private: bool IsLocalCaptureUnnecessary(unsigned lclNum); }; @@ -885,7 +1344,7 @@ class AsyncLiveness //------------------------------------------------------------------------ // AsyncLiveness::StartBlock: // Indicate that we are now starting a new block, and do relevant liveness -// updates for it. +// and other analysis updates for it. // // Parameters: // block - The block that we are starting. @@ -894,6 +1353,8 @@ void AsyncLiveness::StartBlock(BasicBlock* block) { VarSetOps::Assign(m_compiler, m_compiler->compCurLife, block->bbLiveIn); VarSetOps::Assign(m_compiler, m_mutatedValues, m_defaultValueAnalysis.GetMutatedVarsIn(block)); + VarSetOps::Assign(m_compiler, m_mutatedSinceResumption, m_preservedValueAnalysis.GetMutatedVarsIn(block)); + m_resumeReachable = m_preservedValueAnalysis.IsResumeReachable(block); } //------------------------------------------------------------------------ @@ -908,6 +1369,14 @@ void AsyncLiveness::Update(GenTree* node) { m_updater.UpdateLife(node); UpdateMutatedLocal(m_compiler, node, m_mutatedValues); + + // If this is an async call then we can reach defs after resumption now. + // Make sure defs happening as part of the call are included as mutated since resumption. + m_resumeReachable |= node->IsCall() && node->AsCall()->IsAsync(); + if (m_resumeReachable) + { + MarkMutatedLocal(m_compiler, node, m_mutatedSinceResumption); + } } //------------------------------------------------------------------------ @@ -1182,7 +1651,9 @@ PhaseStatus AsyncTransformation::Run() DefaultValueAnalysis defaultValues(m_compiler); defaultValues.Run(); - AsyncLiveness liveness(m_compiler, defaultValues); + PreservedValueAnalysis preservedValues(m_compiler); + preservedValues.Run(worklist); + AsyncLiveness liveness(m_compiler, defaultValues, preservedValues); // Now walk the IR for all the blocks that contain async calls. Keep track // of liveness and outstanding LIR edges as we go; the LIR edges that cross @@ -1398,6 +1869,16 @@ void AsyncTransformation::Transform( } #endif + bool resumeReachable = life.IsResumeReachable(); + VARSET_TP mutatedSinceResumption(VarSetOps::MakeCopy(m_compiler, life.GetMutatedSinceResumption())); + + JITDUMP(" This suspension point is%s resume-reachable\n", resumeReachable ? "" : " NOT"); + if (resumeReachable) + { + JITDUMP(" Locals mutated since previous resumption: "); + JITDUMPEXEC(PrintVarSet(m_compiler, mutatedSinceResumption)); + } + ContinuationLayoutBuilder* layoutBuilder = new (m_compiler, CMK_Async) ContinuationLayoutBuilder(m_compiler); CreateLiveSetForSuspension(block, call, defs, life, layoutBuilder); @@ -1415,7 +1896,8 @@ void AsyncTransformation::Transform( BasicBlock* resumeBB = CreateResumptionBlock(*remainder, stateNum); - m_states.push_back(AsyncState(stateNum, layoutBuilder, block, call, callDefInfo, suspendBB, resumeBB)); + m_states.push_back(AsyncState(stateNum, layoutBuilder, block, call, callDefInfo, suspendBB, resumeBB, + resumeReachable, mutatedSinceResumption)); JITDUMP("\n"); } @@ -1904,7 +2386,7 @@ ContinuationLayout* ContinuationLayoutBuilder::Create() layout->ContinuationContextOffset = allocLayout(TARGET_POINTER_SIZE, TARGET_POINTER_SIZE); } - // Now allocate all returns + // Now allocate all returns for (ReturnInfo& ret : layout->Returns) { // All returns must be pointer aligned because of the offset encoding in Continuation::Flags. @@ -2153,13 +2635,18 @@ BasicBlock* AsyncTransformation::CreateSuspensionBlock(BasicBlock* block, unsign // stateNum - State number assigned to this suspension point. // layout - Layout information for the continuation object. // subLayout - Per-call layout builder indicating which fields are needed. +// resumeReachable - Whether or not this suspension point is reachable from a previous resumption +// mutatedSinceResumption - If this suspension point is reachable from a previous resumption +// then this indicates the set of tracked variables that may have been mutated since then. // void AsyncTransformation::CreateSuspension(BasicBlock* callBlock, GenTreeCall* call, BasicBlock* suspendBB, unsigned stateNum, const ContinuationLayout& layout, - const ContinuationLayoutBuilder& subLayout) + const ContinuationLayoutBuilder& subLayout, + bool resumeReachable, + VARSET_VALARG_TP mutatedSinceResumption) { GenTreeILOffset* ilOffsetNode = m_compiler->gtNewILOffsetNode(call->GetAsyncInfo().CallAsyncDebugInfo DEBUGARG(BAD_IL_OFFSET)); @@ -2185,7 +2672,8 @@ void AsyncTransformation::CreateSuspension(BasicBlock* call GenTree* storeNewContinuation = m_compiler->gtNewStoreLclVarNode(newContinuationVar, allocContinuation); LIR::AsRange(suspendBB).InsertAtEnd(storeNewContinuation); - if (ReuseContinuations()) + SaveSet tailSaveSet = SaveSet::All; + if (ReuseContinuations() && resumeReachable) { // Split suspendBB into suspendBB -> [reuse continuation with Next store] -> [allocNewBlock with allocation // call] -> suspendBBTail [empty] @@ -2225,6 +2713,14 @@ void AsyncTransformation::CreateSuspension(BasicBlock* call GenTree* storeNext = StoreAtOffset(returnedContinuation, nextOffset, newContinuation, TYP_REF); LIR::AsRange(reuseContinuationBB).InsertAtEnd(LIR::SeqTree(m_compiler, storeNext)); + // In the path where we allocated a new continuation we save only locals that we know to be unmutated since the + // last resumption. + FillInDataOnSuspension(call, layout, subLayout, allocNewBB, mutatedSinceResumption, SaveSet::UnmutatedLocals); + + // We can skip saving unmutated locals in the shared path -- we only need to save locals that may have been + // mutated since the last resumption. + tailSaveSet = SaveSet::MutatedLocals; + suspendBB = suspendTailBB; } @@ -2282,10 +2778,7 @@ void AsyncTransformation::CreateSuspension(BasicBlock* call GenTree* storeFlags = StoreAtOffset(newContinuation, flagsOffset, flagsNode, TYP_INT); LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_compiler, storeFlags)); - if (layout.Size > 0) - { - FillInDataOnSuspension(call, layout, subLayout, suspendBB); - } + FillInDataOnSuspension(call, layout, subLayout, suspendBB, mutatedSinceResumption, tailSaveSet); RestoreContexts(callBlock, call, suspendBB); @@ -2350,9 +2843,11 @@ GenTreeCall* AsyncTransformation::CreateAllocContinuationCall(bool void AsyncTransformation::FillInDataOnSuspension(GenTreeCall* call, const ContinuationLayout& layout, const ContinuationLayoutBuilder& subLayout, - BasicBlock* suspendBB) + BasicBlock* suspendBB, + VARSET_VALARG_TP mutatedSinceResumption, + SaveSet saveSet) { - if (m_compiler->doesMethodHavePatchpoints() || m_compiler->opts.IsOSR()) + if ((saveSet != SaveSet::MutatedLocals) && (m_compiler->doesMethodHavePatchpoints() || m_compiler->opts.IsOSR())) { GenTree* ilOffsetToStore; if (m_compiler->doesMethodHavePatchpoints()) @@ -2379,6 +2874,11 @@ void AsyncTransformation::FillInDataOnSuspension(GenTreeCall* LclVarDsc* dsc = m_compiler->lvaGetDesc(inf.LclNum); + if ((saveSet != SaveSet::All) && (GetLocalSaveSet(dsc, mutatedSinceResumption) != saveSet)) + { + continue; + } + GenTree* newContinuation = m_compiler->gtNewLclvNode(GetNewContinuationVar(), TYP_REF); unsigned offset = OFFSETOF__CORINFO_Continuation__data + inf.Offset; @@ -2416,7 +2916,7 @@ void AsyncTransformation::FillInDataOnSuspension(GenTreeCall* m_compiler->compLongUsed |= dsc->TypeIs(TYP_LONG); } - if (subLayout.NeedsContinuationContext()) + if ((saveSet != SaveSet::UnmutatedLocals) && subLayout.NeedsContinuationContext()) { // Insert call // AsyncHelpers.CaptureContinuationContext( @@ -2467,7 +2967,7 @@ void AsyncTransformation::FillInDataOnSuspension(GenTreeCall* LIR::AsRange(suspendBB).Remove(flagsPlaceholder); } - if (subLayout.NeedsExecutionContext()) + if ((saveSet != SaveSet::UnmutatedLocals) && subLayout.NeedsExecutionContext()) { GenTreeCall* captureExecContext = m_compiler->gtNewCallNode(CT_USER_FUNC, m_asyncInfo->captureExecutionContextMethHnd, TYP_REF); @@ -2483,6 +2983,45 @@ void AsyncTransformation::FillInDataOnSuspension(GenTreeCall* } } +//------------------------------------------------------------------------ +// AsyncTransformation::GetLocalSaveSet: +// Get the save set that a local should be saved as part of. +// +// Parameters: +// dsc - The local +// mutatedSinceResumption - Set of locals that may have been mutated since a resumption +// +// Returns: +// The set to save the local as part of. +// +SaveSet AsyncTransformation::GetLocalSaveSet(const LclVarDsc* dsc, VARSET_VALARG_TP mutatedSinceResumption) +{ + if (dsc->lvPromoted) + { + for (unsigned i = 0; i < dsc->lvFieldCnt; i++) + { + LclVarDsc* fieldDsc = m_compiler->lvaGetDesc(dsc->lvFieldLclStart + i); + if (!fieldDsc->lvTracked || VarSetOps::IsMember(m_compiler, mutatedSinceResumption, fieldDsc->lvVarIndex)) + { + return SaveSet::MutatedLocals; + } + } + + return SaveSet::UnmutatedLocals; + } + + // We should only see struct fields for independently promoted structs + assert(!dsc->lvIsStructField || + (m_compiler->lvaGetPromotionType(dsc->lvParentLcl) == Compiler::PROMOTION_TYPE_INDEPENDENT)); + + if (!dsc->lvTracked || VarSetOps::IsMember(m_compiler, mutatedSinceResumption, dsc->lvVarIndex)) + { + return SaveSet::MutatedLocals; + } + + return SaveSet::UnmutatedLocals; +} + //------------------------------------------------------------------------ // AsyncTransformation::RestoreContexts: // Create IR to restore contexts on suspension. @@ -3227,7 +3766,8 @@ void AsyncTransformation::CreateResumptionsAndSuspensions() JITDUMP("State %u suspend @ " FMT_BB ", resume @ " FMT_BB "\n", state.Number, state.SuspensionBB->bbNum, state.ResumptionBB->bbNum); ContinuationLayout* layout = sharedLayout == nullptr ? state.Layout->Create() : sharedLayout; - CreateSuspension(state.CallBlock, state.Call, state.SuspensionBB, state.Number, *layout, *state.Layout); + CreateSuspension(state.CallBlock, state.Call, state.SuspensionBB, state.Number, *layout, *state.Layout, + state.ResumeReachable, state.MutatedSincePreviousResumption); CreateResumption(state.CallBlock, state.Call, state.ResumptionBB, state.CallDefInfo, *layout, *state.Layout); CreateDebugInfoForSuspensionPoint(*layout, *state.Layout); diff --git a/src/coreclr/jit/async.h b/src/coreclr/jit/async.h index c054f75006b1e0..15f84777b6c0e6 100644 --- a/src/coreclr/jit/async.h +++ b/src/coreclr/jit/async.h @@ -166,7 +166,9 @@ struct AsyncState GenTreeCall* call, CallDefinitionInfo callDefInfo, BasicBlock* suspensionBB, - BasicBlock* resumptionBB) + BasicBlock* resumptionBB, + bool resumeReachable, + VARSET_TP mutatedSincePreviousResumption) : Number(number) , Layout(layout) , CallBlock(callBlock) @@ -174,6 +176,8 @@ struct AsyncState , CallDefInfo(callDefInfo) , SuspensionBB(suspensionBB) , ResumptionBB(resumptionBB) + , ResumeReachable(resumeReachable) + , MutatedSincePreviousResumption(mutatedSincePreviousResumption) { } @@ -184,6 +188,17 @@ struct AsyncState CallDefinitionInfo CallDefInfo; BasicBlock* SuspensionBB; BasicBlock* ResumptionBB; + // Is this suspension point reachable after a previous resumption? + bool ResumeReachable; + // Set of variables that may have been mutated since the previous resumption. + VARSET_TP MutatedSincePreviousResumption; +}; + +enum class SaveSet +{ + All, + UnmutatedLocals, + MutatedLocals, }; class AsyncTransformation @@ -241,28 +256,34 @@ class AsyncTransformation BasicBlock* suspendBB, unsigned stateNum, const ContinuationLayout& layout, - const ContinuationLayoutBuilder& subLayout); + const ContinuationLayoutBuilder& subLayout, + bool resumeReachable, + VARSET_VALARG_TP mutatedSinceResumption); GenTreeCall* CreateAllocContinuationCall(bool hasKeepAlive, GenTree* prevContinuation, const ContinuationLayout& layout); - void FillInDataOnSuspension(GenTreeCall* call, - const ContinuationLayout& layout, - const ContinuationLayoutBuilder& subLayout, - BasicBlock* suspendBB); - void RestoreContexts(BasicBlock* block, GenTreeCall* call, BasicBlock* insertionBB); - void CreateCheckAndSuspendAfterCall(BasicBlock* block, - GenTreeCall* call, - const CallDefinitionInfo& callDefInfo, - BasicBlock* suspendBB, - BasicBlock** remainder); - BasicBlock* CreateResumptionBlock(BasicBlock* remainder, unsigned stateNum); - void CreateResumption(BasicBlock* callBlock, - GenTreeCall* call, - BasicBlock* resumeBB, - const CallDefinitionInfo& callDefInfo, - const ContinuationLayout& layout, - const ContinuationLayoutBuilder& subLayout); + + void FillInDataOnSuspension(GenTreeCall* call, + const ContinuationLayout& layout, + const ContinuationLayoutBuilder& subLayout, + BasicBlock* suspendBB, + VARSET_VALARG_TP mutatedSinceResumption, + SaveSet saveSet); + SaveSet GetLocalSaveSet(const LclVarDsc* dsc, VARSET_VALARG_TP mutatedSinceResumption); + void RestoreContexts(BasicBlock* block, GenTreeCall* call, BasicBlock* insertionBB); + void CreateCheckAndSuspendAfterCall(BasicBlock* block, + GenTreeCall* call, + const CallDefinitionInfo& callDefInfo, + BasicBlock* suspendBB, + BasicBlock** remainder); + BasicBlock* CreateResumptionBlock(BasicBlock* remainder, unsigned stateNum); + void CreateResumption(BasicBlock* callBlock, + GenTreeCall* call, + BasicBlock* resumeBB, + const CallDefinitionInfo& callDefInfo, + const ContinuationLayout& layout, + const ContinuationLayoutBuilder& subLayout); void RestoreFromDataOnResumption(const ContinuationLayout& layout, const ContinuationLayoutBuilder& subLayout, diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 7c7dbfe17fc1d0..9e6fa8883e0ee1 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -592,6 +592,13 @@ OPT_CONFIG_INTEGER(JitDoOptimizeMaskConversions, "JitDoOptimizeMaskConversions", OPT_CONFIG_INTEGER(JitOptimizeAwait, "JitOptimizeAwait", 1) // Perform optimization of Await intrinsics OPT_CONFIG_STRING(JitAsyncDefaultValueAnalysisRange, "JitAsyncDefaultValueAnalysisRange") // Enable async default value analysis based on method hash range + +// Enable async preserved value analysis based on method hash range. This +// analysis computes state that is guaranteed to not have been changed since +// the last time suspension happened, and skips storing them in the case where +// a continuation is being reused. +OPT_CONFIG_STRING(JitAsyncPreservedValueAnalysisRange, "JitAsyncPreservedValueAnalysisRange") + // Save and reuse continuation instances in runtime async functions. Also // implies use of shared continuation layouts for all suspension points. RELEASE_CONFIG_INTEGER(JitAsyncReuseContinuations, "JitAsyncReuseContinuations", 1)