-
Notifications
You must be signed in to change notification settings - Fork 30
External/preemptive wait benefit formula + CPU:Elapsed adjustment (#215 C6, C7) #259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| using PlanViewer.Core.Models; | ||
| using PlanViewer.Core.Services; | ||
|
|
||
| namespace PlanViewer.Core.Output; | ||
|
|
||
|
|
@@ -111,10 +112,18 @@ private static StatementResult MapStatement(PlanStatement stmt) | |
| // Query time (actual plans) | ||
| if (stmt.QueryTimeStats != null) | ||
| { | ||
| long externalWaitMs = 0; | ||
| foreach (var w in stmt.WaitStats) | ||
| { | ||
| if (BenefitScorer.IsExternalWait(w.WaitType)) | ||
| externalWaitMs += w.WaitTimeMs; | ||
| } | ||
|
|
||
| result.QueryTime = new QueryTimeResult | ||
| { | ||
| CpuTimeMs = stmt.QueryTimeStats.CpuTimeMs, | ||
| ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs | ||
| ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs, | ||
| ExternalWaitMs = externalWaitMs | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No test coverage landing with this PR.
Would catch any future drift in either Generated by Claude Code |
||
| }; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -494,12 +494,11 @@ private static void ScoreWaitStats(PlanStatement stmt) | |
| var isParallel = stmt.DegreeOfParallelism > 1 && stmt.RootNode != null; | ||
|
|
||
| // Collect all operators with per-thread stats for parallel benefit calculation | ||
| List<OperatorWaitProfile>? operatorProfiles = null; | ||
| if (isParallel) | ||
| { | ||
| operatorProfiles = new List<OperatorWaitProfile>(); | ||
| CollectOperatorWaitProfiles(stmt.RootNode!, operatorProfiles); | ||
| } | ||
| // Collect operator profiles even for serial plans — the external-wait formula | ||
| // uses sum-of-max-thread-cpu across operators and works for both. | ||
| var operatorProfiles = new List<OperatorWaitProfile>(); | ||
| if (stmt.RootNode != null) | ||
| CollectOperatorWaitProfiles(stmt.RootNode, operatorProfiles); | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously the tree walk was skipped for serial plans. Now Generated by Claude Code |
||
|
|
||
| foreach (var wait in stmt.WaitStats) | ||
| { | ||
|
|
@@ -508,7 +507,15 @@ private static void ScoreWaitStats(PlanStatement stmt) | |
| var category = ClassifyWaitType(wait.WaitType); | ||
| double benefitPct; | ||
|
|
||
| if (category == "Parallelism" && isParallel) | ||
| if (IsExternalWait(wait.WaitType) && operatorProfiles.Count > 0) | ||
| { | ||
| // External / preemptive waits (MEMORY_ALLOCATION_*, PREEMPTIVE_*): the worker | ||
| // is CPU-busy in kernel, so operator elapsed ≈ operator cpu and the wait | ||
| // barely shows in the per-thread (elapsed - cpu) calculation. Joe's formula: | ||
| // benefit = (wait_ms / total_cpu_ms) * Σ max_thread_cpu_per_operator / elapsed | ||
| benefitPct = CalculateExternalWaitBenefit(wait, operatorProfiles, stmt.QueryTimeStats!.CpuTimeMs, elapsedMs); | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The external-wait path is taken for serial plans too, not just parallel. On a serial plan with per-operator CPU stats, Generated by Claude Code |
||
| } | ||
| else if (category == "Parallelism" && isParallel) | ||
| { | ||
| // CXPACKET/CXCONSUMER/CXSYNC: benefit is the parallelism efficiency gap, | ||
| // not the raw wait time. Threads waiting for other threads is a symptom | ||
|
|
@@ -525,7 +532,7 @@ private static void ScoreWaitStats(PlanStatement stmt) | |
| benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100; | ||
| } | ||
| } | ||
| else if (!isParallel || operatorProfiles == null || operatorProfiles.Count == 0) | ||
| else if (!isParallel || operatorProfiles.Count == 0) | ||
| { | ||
| // Serial plan or no operator data: simple ratio | ||
| benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100; | ||
|
|
@@ -585,6 +592,48 @@ private static double CalculateParallelWaitBenefit( | |
| return benefitMs / stmtElapsedMs * 100; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Joe's formula for external/preemptive waits where the worker is CPU-busy in kernel | ||
| /// (MEMORY_ALLOCATION_*, PREEMPTIVE_*). The standard (elapsed-cpu) per-thread | ||
| /// wait accounting misses these because elapsed ≈ cpu for those threads. Use the | ||
| /// wait's share of total CPU, scaled by the plan's critical-path CPU. | ||
| /// wait_cpu_share = wait_ms / total_cpu_ms | ||
| /// sum_max_cpu = Σ max_thread_cpu across operators | ||
| /// benefit_ms = wait_cpu_share * sum_max_cpu | ||
| /// Then convert to % of statement elapsed. | ||
| /// </summary> | ||
| private static double CalculateExternalWaitBenefit( | ||
| WaitStatInfo wait, List<OperatorWaitProfile> profiles, | ||
| long stmtCpuMs, long stmtElapsedMs) | ||
| { | ||
| if (stmtCpuMs <= 0 || stmtElapsedMs <= 0) | ||
| return (double)wait.WaitTimeMs / Math.Max(1, stmtElapsedMs) * 100; | ||
|
|
||
| long sumMaxCpu = 0; | ||
| foreach (var p in profiles) | ||
| sumMaxCpu += p.MaxThreadCpuMs; | ||
|
|
||
| if (sumMaxCpu <= 0) | ||
| return (double)wait.WaitTimeMs / stmtElapsedMs * 100; | ||
|
|
||
| var waitCpuShare = (double)wait.WaitTimeMs / stmtCpuMs; | ||
| var benefitMs = waitCpuShare * sumMaxCpu; | ||
| return benefitMs / stmtElapsedMs * 100; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// External / preemptive waits where the worker is CPU-busy in kernel rather than | ||
| /// descheduled. Their wait time counts toward the query's CPU time, so the usual | ||
| /// (elapsed - cpu) per-thread wait math misses them entirely. | ||
| /// </summary> | ||
| public static bool IsExternalWait(string waitType) | ||
| { | ||
| if (string.IsNullOrEmpty(waitType)) return false; | ||
| var wt = waitType.ToUpperInvariant(); | ||
| return wt.Contains("MEMORY_ALLOCATION") | ||
| || wt.StartsWith("PREEMPTIVE_"); | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
return wt == "MEMORY_ALLOCATION_EXT"
|| wt == "RESERVED_MEMORY_ALLOCATION_EXT"
|| wt.StartsWith("PREEMPTIVE_");Not a bug today, just a robustness nit. Same classifier is reused from Generated by Claude Code |
||
| } | ||
|
|
||
| /// <summary> | ||
| /// Determines if an operator is relevant for a given wait category. | ||
| /// </summary> | ||
|
|
@@ -624,13 +673,18 @@ private static void CollectOperatorWaitProfiles(PlanNode node, List<OperatorWait | |
| maxThreadWait = threadWait; | ||
| } | ||
|
|
||
| if (totalWait > 0 || maxThreadWait > 0) | ||
| // Max per-thread SELF CPU (non-cumulative) — critical-path CPU contribution | ||
| // from this operator. Used by the external-wait formula. | ||
| var maxThreadCpu = PlanAnalyzer.GetOperatorMaxThreadOwnCpuMs(node); | ||
|
|
||
| if (totalWait > 0 || maxThreadWait > 0 || maxThreadCpu > 0) | ||
| { | ||
| profiles.Add(new OperatorWaitProfile | ||
| { | ||
| Node = node, | ||
| MaxThreadWaitMs = maxThreadWait, | ||
| TotalWaitMs = totalWait, | ||
| MaxThreadCpuMs = maxThreadCpu, | ||
| HasPhysicalReads = node.ActualPhysicalReads > 0, | ||
| HasCpuWork = node.ActualCPUMs > 0, | ||
| IsExchange = node.PhysicalOp == "Parallelism", | ||
|
|
@@ -681,6 +735,8 @@ private sealed class OperatorWaitProfile | |
| public PlanNode Node { get; init; } = null!; | ||
| public long MaxThreadWaitMs { get; init; } | ||
| public long TotalWaitMs { get; init; } | ||
| /// <summary>Max CPU time among this operator's threads (critical-path CPU for external-wait formula).</summary> | ||
| public long MaxThreadCpuMs { get; init; } | ||
| public bool HasPhysicalReads { get; init; } | ||
| public bool HasCpuWork { get; init; } | ||
| public bool IsExchange { get; init; } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -353,7 +353,7 @@ | |
|
|
||
| if (unsnifffedParams.Count > 0) | ||
| { | ||
| var hasRecompile = stmt.StatementText.Contains("RECOMPILE", StringComparison.OrdinalIgnoreCase); | ||
|
Check warning on line 356 in src/PlanViewer.Core/Services/PlanAnalyzer.cs
|
||
| if (!hasRecompile) | ||
| { | ||
| var names = string.Join(", ", unsnifffedParams.Select(p => p.Name)); | ||
|
|
@@ -1204,7 +1204,7 @@ | |
| // Rule 28: Row Count Spool — NOT IN with nullable column | ||
| // Pattern: Row Count Spool with high rewinds, child scan has IS NULL predicate, | ||
| // and statement text contains NOT IN | ||
| if (!cfg.IsRuleDisabled(28) && node.PhysicalOp.Contains("Row Count Spool")) | ||
|
Check warning on line 1207 in src/PlanViewer.Core/Services/PlanAnalyzer.cs
|
||
| { | ||
| var rewinds = node.HasActualStats ? (double)node.ActualRewinds : node.EstimateRewinds; | ||
| if (rewinds > 10000 && HasNotInPattern(node, stmt)) | ||
|
|
@@ -1592,6 +1592,66 @@ | |
| return maxSelf; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Max per-thread self-CPU for this operator. | ||
| /// Parallel: for each thread, self_cpu = thread_cpu - Σ same-thread child cpu; take max. | ||
| /// Serial / single-thread: operator_cpu - Σ effective child cpu. | ||
| /// Needed for external-wait benefit scoring (Joe's formula). | ||
| /// </summary> | ||
| internal static long GetOperatorMaxThreadOwnCpuMs(PlanNode node) | ||
| { | ||
| if (!node.HasActualStats || node.ActualCPUMs <= 0) return 0; | ||
|
|
||
| if (node.PerThreadStats.Count > 1) | ||
| { | ||
| var parentByThread = new Dictionary<int, long>(); | ||
| foreach (var ts in node.PerThreadStats) | ||
| parentByThread[ts.ThreadId] = ts.ActualCPUMs; | ||
|
|
||
| var childSumByThread = new Dictionary<int, long>(); | ||
| foreach (var child in node.Children) | ||
| { | ||
| var childNode = child; | ||
| if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0) | ||
| childNode = child.Children.OrderByDescending(c => c.ActualCPUMs).First(); | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exchange look-through mirrors Generated by Claude Code |
||
| foreach (var ts in childNode.PerThreadStats) | ||
| { | ||
| childSumByThread.TryGetValue(ts.ThreadId, out var existing); | ||
| childSumByThread[ts.ThreadId] = existing + ts.ActualCPUMs; | ||
| } | ||
| } | ||
|
|
||
| var maxSelf = 0L; | ||
| foreach (var (threadId, parentCpu) in parentByThread) | ||
| { | ||
| childSumByThread.TryGetValue(threadId, out var childCpu); | ||
| var self = Math.Max(0, parentCpu - childCpu); | ||
| if (self > maxSelf) maxSelf = self; | ||
| } | ||
| return maxSelf; | ||
| } | ||
|
|
||
| // Serial: operator_cpu - Σ effective child cpu | ||
| var totalChildCpu = 0L; | ||
| foreach (var child in node.Children) | ||
| totalChildCpu += GetEffectiveChildCpuMs(child); | ||
| return Math.Max(0, node.ActualCPUMs - totalChildCpu); | ||
| } | ||
|
|
||
| private static long GetEffectiveChildCpuMs(PlanNode child) | ||
| { | ||
| if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0) | ||
| return child.Children.Max(GetEffectiveChildCpuMs); | ||
| if (child.ActualCPUMs > 0) | ||
| return child.ActualCPUMs; | ||
| if (child.Children.Count == 0) | ||
| return 0; | ||
| var sum = 0L; | ||
| foreach (var grandchild in child.Children) | ||
| sum += GetEffectiveChildCpuMs(grandchild); | ||
| return sum; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Serial row mode self-time: subtract all direct children's effective elapsed. | ||
| /// Pass-through operators (Compute Scalar, etc.) don't carry runtime stats — | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ResultMapperalready derivesQueryTimeResult.ExternalWaitMsusing the same classifier (ResultMapper.cs:115-119). This code-behind works against the rawPlanStatement, not the mappedStatementResult, so the re-sum is necessary — just flagging that the classifier (BenefitScorer.IsExternalWait) is the single source of truth across all three sites (here,ResultMapper,HtmlExporter/Index.razorvia the mapped field). If the classifier ever drifts, this site would need to drift with it.Generated by Claude Code