Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/PlanViewer.App/Controls/PlanViewerControl.axaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2839,8 +2839,15 @@ static string EfficiencyColor(double pct) => pct >= 80 ? "#E4E6EB"
statement.QueryTimeStats.CpuTimeMs > 0 &&
statement.DegreeOfParallelism > 1)
{
// Speedup ratio: CPU/elapsed = 1.0 means serial, = DOP means perfect parallelism
var speedup = (double)statement.QueryTimeStats.CpuTimeMs / statement.QueryTimeStats.ElapsedTimeMs;
// Speedup ratio: CPU/elapsed = 1.0 means serial, = DOP means perfect parallelism.
// Subtract external/preemptive wait time from CPU — those waits are CPU-busy
// in kernel and inflate the ratio without representing real query work.
long externalWaitMs = 0;
foreach (var w in statement.WaitStats)
if (BenefitScorer.IsExternalWait(w.WaitType))
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ResultMapper already derives QueryTimeResult.ExternalWaitMs using the same classifier (ResultMapper.cs:115-119). This code-behind works against the raw PlanStatement, not the mapped StatementResult, so the re-sum is necessary — just flagging that the classifier (BenefitScorer.IsExternalWait) is the single source of truth across all three sites (here, ResultMapper, HtmlExporter/Index.razor via the mapped field). If the classifier ever drifts, this site would need to drift with it.


Generated by Claude Code

externalWaitMs += w.WaitTimeMs;
var effectiveCpu = Math.Max(0, statement.QueryTimeStats.CpuTimeMs - externalWaitMs);
var speedup = (double)effectiveCpu / statement.QueryTimeStats.ElapsedTimeMs;
var efficiency = Math.Min(100.0, (speedup - 1.0) / (statement.DegreeOfParallelism - 1.0) * 100.0);
efficiency = Math.Max(0.0, efficiency);
dopText += $" ({efficiency:N0}% efficient)";
Expand Down
2 changes: 1 addition & 1 deletion src/PlanViewer.App/PlanViewer.App.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<ApplicationManifest>app.manifest</ApplicationManifest>
<ApplicationIcon>EDD.ico</ApplicationIcon>
<AvaloniaUseCompiledBindingsByDefault>true</AvaloniaUseCompiledBindingsByDefault>
<Version>1.7.3</Version>
<Version>1.7.4</Version>
<Authors>Erik Darling</Authors>
<Company>Darling Data LLC</Company>
<Product>Performance Studio</Product>
Expand Down
8 changes: 8 additions & 0 deletions src/PlanViewer.Core/Output/AnalysisResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,14 @@ public class QueryTimeResult

[JsonPropertyName("elapsed_time_ms")]
public long ElapsedTimeMs { get; set; }

/// <summary>
/// Sum of external/preemptive wait time (MEMORY_ALLOCATION_*, PREEMPTIVE_*) —
/// these waits are CPU-busy in kernel and inflate CpuTimeMs vs real query CPU.
/// Subtract from CpuTimeMs for a truer CPU:Elapsed ratio.
/// </summary>
[JsonPropertyName("external_wait_ms")]
public long ExternalWaitMs { get; set; }
}

public class ParameterResult
Expand Down
3 changes: 2 additions & 1 deletion src/PlanViewer.Core/Output/HtmlExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ private static void WriteRuntimeCard(StringBuilder sb, StatementResult stmt)
WriteRow(sb, "CPU", $"{stmt.QueryTime.CpuTimeMs:N0} ms");
if (stmt.QueryTime.ElapsedTimeMs > 0)
{
var ratio = (double)stmt.QueryTime.CpuTimeMs / stmt.QueryTime.ElapsedTimeMs;
var effectiveCpu = Math.Max(0, stmt.QueryTime.CpuTimeMs - stmt.QueryTime.ExternalWaitMs);
var ratio = (double)effectiveCpu / stmt.QueryTime.ElapsedTimeMs;
WriteRow(sb, "CPU:Elapsed", ratio.ToString("N2"));
}
}
Expand Down
11 changes: 10 additions & 1 deletion src/PlanViewer.Core/Output/ResultMapper.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using PlanViewer.Core.Models;
using PlanViewer.Core.Services;

namespace PlanViewer.Core.Output;

Expand Down Expand Up @@ -111,10 +112,18 @@ private static StatementResult MapStatement(PlanStatement stmt)
// Query time (actual plans)
if (stmt.QueryTimeStats != null)
{
long externalWaitMs = 0;
foreach (var w in stmt.WaitStats)
{
if (BenefitScorer.IsExternalWait(w.WaitType))
externalWaitMs += w.WaitTimeMs;
}

result.QueryTime = new QueryTimeResult
{
CpuTimeMs = stmt.QueryTimeStats.CpuTimeMs,
ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs
ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs,
ExternalWaitMs = externalWaitMs
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No test coverage landing with this PR. tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs has wait-stat tests (Rule 11 severity gating, CPU vs I/O waits) but nothing asserting on benefit % values. A small fixture covering:

  • serial plan with a RESERVED_MEMORY_ALLOCATION_EXT wait → asserts ExternalWaitMs is populated and benefit% is reasonable;
  • a plan matching Joe's 48.3% worked example → locks that math in so a future refactor can't silently move the number.

Would catch any future drift in either IsExternalWait or the formula. Not blocking, but flagging per the review checklist.


Generated by Claude Code

};
}

Expand Down
74 changes: 65 additions & 9 deletions src/PlanViewer.Core/Services/BenefitScorer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -494,12 +494,11 @@ private static void ScoreWaitStats(PlanStatement stmt)
var isParallel = stmt.DegreeOfParallelism > 1 && stmt.RootNode != null;

// Collect all operators with per-thread stats for parallel benefit calculation
List<OperatorWaitProfile>? operatorProfiles = null;
if (isParallel)
{
operatorProfiles = new List<OperatorWaitProfile>();
CollectOperatorWaitProfiles(stmt.RootNode!, operatorProfiles);
}
// Collect operator profiles even for serial plans — the external-wait formula
// uses sum-of-max-thread-cpu across operators and works for both.
var operatorProfiles = new List<OperatorWaitProfile>();
if (stmt.RootNode != null)
CollectOperatorWaitProfiles(stmt.RootNode, operatorProfiles);
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously the tree walk was skipped for serial plans. Now CollectOperatorWaitProfiles runs on every statement that has a root node, including serial plans that have no external waits. For statements with many operators and no MEMORY_ALLOCATION_*/PREEMPTIVE_* waits this is pure overhead. If it shows up in CLI/bulk-scan profiling, gate the walk on stmt.WaitStats.Any(w => IsExternalWait(w.WaitType)) || isParallel.


Generated by Claude Code


foreach (var wait in stmt.WaitStats)
{
Expand All @@ -508,7 +507,15 @@ private static void ScoreWaitStats(PlanStatement stmt)
var category = ClassifyWaitType(wait.WaitType);
double benefitPct;

if (category == "Parallelism" && isParallel)
if (IsExternalWait(wait.WaitType) && operatorProfiles.Count > 0)
{
// External / preemptive waits (MEMORY_ALLOCATION_*, PREEMPTIVE_*): the worker
// is CPU-busy in kernel, so operator elapsed ≈ operator cpu and the wait
// barely shows in the per-thread (elapsed - cpu) calculation. Joe's formula:
// benefit = (wait_ms / total_cpu_ms) * Σ max_thread_cpu_per_operator / elapsed
benefitPct = CalculateExternalWaitBenefit(wait, operatorProfiles, stmt.QueryTimeStats!.CpuTimeMs, elapsedMs);
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The external-wait path is taken for serial plans too, not just parallel. On a serial plan with per-operator CPU stats, sumMaxCpu ≈ stmt.CpuTimeMs and the formula collapses to wait_ms / elapsed_ms — which is exactly the old serial behavior, so no regression (matches your c1-c5.sqlplan verification). Worth noting in the doc-comment above (Σ max_thread_cpu_per_operator reads as parallel-only at a glance).


Generated by Claude Code

}
else if (category == "Parallelism" && isParallel)
{
// CXPACKET/CXCONSUMER/CXSYNC: benefit is the parallelism efficiency gap,
// not the raw wait time. Threads waiting for other threads is a symptom
Expand All @@ -525,7 +532,7 @@ private static void ScoreWaitStats(PlanStatement stmt)
benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100;
}
}
else if (!isParallel || operatorProfiles == null || operatorProfiles.Count == 0)
else if (!isParallel || operatorProfiles.Count == 0)
{
// Serial plan or no operator data: simple ratio
benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100;
Expand Down Expand Up @@ -585,6 +592,48 @@ private static double CalculateParallelWaitBenefit(
return benefitMs / stmtElapsedMs * 100;
}

/// <summary>
/// Joe's formula for external/preemptive waits where the worker is CPU-busy in kernel
/// (MEMORY_ALLOCATION_*, PREEMPTIVE_*). The standard (elapsed-cpu) per-thread
/// wait accounting misses these because elapsed ≈ cpu for those threads. Use the
/// wait's share of total CPU, scaled by the plan's critical-path CPU.
/// wait_cpu_share = wait_ms / total_cpu_ms
/// sum_max_cpu = Σ max_thread_cpu across operators
/// benefit_ms = wait_cpu_share * sum_max_cpu
/// Then convert to % of statement elapsed.
/// </summary>
private static double CalculateExternalWaitBenefit(
WaitStatInfo wait, List<OperatorWaitProfile> profiles,
long stmtCpuMs, long stmtElapsedMs)
{
if (stmtCpuMs <= 0 || stmtElapsedMs <= 0)
return (double)wait.WaitTimeMs / Math.Max(1, stmtElapsedMs) * 100;

long sumMaxCpu = 0;
foreach (var p in profiles)
sumMaxCpu += p.MaxThreadCpuMs;

if (sumMaxCpu <= 0)
return (double)wait.WaitTimeMs / stmtElapsedMs * 100;

var waitCpuShare = (double)wait.WaitTimeMs / stmtCpuMs;
var benefitMs = waitCpuShare * sumMaxCpu;
return benefitMs / stmtElapsedMs * 100;
}

/// <summary>
/// External / preemptive waits where the worker is CPU-busy in kernel rather than
/// descheduled. Their wait time counts toward the query's CPU time, so the usual
/// (elapsed - cpu) per-thread wait math misses them entirely.
/// </summary>
public static bool IsExternalWait(string waitType)
{
if (string.IsNullOrEmpty(waitType)) return false;
var wt = waitType.ToUpperInvariant();
return wt.Contains("MEMORY_ALLOCATION")
|| wt.StartsWith("PREEMPTIVE_");
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Contains("MEMORY_ALLOCATION") catches both MEMORY_ALLOCATION_EXT and RESERVED_MEMORY_ALLOCATION_EXT — that's the intent — but it's substring-wide, so any future wait type embedding that token anywhere in the name would be (mis)classified as CPU-busy. Tighter alternative:

return wt == "MEMORY_ALLOCATION_EXT"
    || wt == "RESERVED_MEMORY_ALLOCATION_EXT"
    || wt.StartsWith("PREEMPTIVE_");

Not a bug today, just a robustness nit. Same classifier is reused from ResultMapper and PlanViewerControl, so whatever shape it takes here is authoritative.


Generated by Claude Code

}

/// <summary>
/// Determines if an operator is relevant for a given wait category.
/// </summary>
Expand Down Expand Up @@ -624,13 +673,18 @@ private static void CollectOperatorWaitProfiles(PlanNode node, List<OperatorWait
maxThreadWait = threadWait;
}

if (totalWait > 0 || maxThreadWait > 0)
// Max per-thread SELF CPU (non-cumulative) — critical-path CPU contribution
// from this operator. Used by the external-wait formula.
var maxThreadCpu = PlanAnalyzer.GetOperatorMaxThreadOwnCpuMs(node);

if (totalWait > 0 || maxThreadWait > 0 || maxThreadCpu > 0)
{
profiles.Add(new OperatorWaitProfile
{
Node = node,
MaxThreadWaitMs = maxThreadWait,
TotalWaitMs = totalWait,
MaxThreadCpuMs = maxThreadCpu,
HasPhysicalReads = node.ActualPhysicalReads > 0,
HasCpuWork = node.ActualCPUMs > 0,
IsExchange = node.PhysicalOp == "Parallelism",
Expand Down Expand Up @@ -681,6 +735,8 @@ private sealed class OperatorWaitProfile
public PlanNode Node { get; init; } = null!;
public long MaxThreadWaitMs { get; init; }
public long TotalWaitMs { get; init; }
/// <summary>Max CPU time among this operator's threads (critical-path CPU for external-wait formula).</summary>
public long MaxThreadCpuMs { get; init; }
public bool HasPhysicalReads { get; init; }
public bool HasCpuWork { get; init; }
public bool IsExchange { get; init; }
Expand Down
60 changes: 60 additions & 0 deletions src/PlanViewer.Core/Services/PlanAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@

if (unsnifffedParams.Count > 0)
{
var hasRecompile = stmt.StatementText.Contains("RECOMPILE", StringComparison.OrdinalIgnoreCase);

Check warning on line 356 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

Dereference of a possibly null reference.

Check warning on line 356 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

Dereference of a possibly null reference.
if (!hasRecompile)
{
var names = string.Join(", ", unsnifffedParams.Select(p => p.Name));
Expand Down Expand Up @@ -1204,7 +1204,7 @@
// Rule 28: Row Count Spool — NOT IN with nullable column
// Pattern: Row Count Spool with high rewinds, child scan has IS NULL predicate,
// and statement text contains NOT IN
if (!cfg.IsRuleDisabled(28) && node.PhysicalOp.Contains("Row Count Spool"))

Check warning on line 1207 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

Dereference of a possibly null reference.

Check warning on line 1207 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

Dereference of a possibly null reference.
{
var rewinds = node.HasActualStats ? (double)node.ActualRewinds : node.EstimateRewinds;
if (rewinds > 10000 && HasNotInPattern(node, stmt))
Expand Down Expand Up @@ -1592,6 +1592,66 @@
return maxSelf;
}

/// <summary>
/// Max per-thread self-CPU for this operator.
/// Parallel: for each thread, self_cpu = thread_cpu - Σ same-thread child cpu; take max.
/// Serial / single-thread: operator_cpu - Σ effective child cpu.
/// Needed for external-wait benefit scoring (Joe's formula).
/// </summary>
internal static long GetOperatorMaxThreadOwnCpuMs(PlanNode node)
{
if (!node.HasActualStats || node.ActualCPUMs <= 0) return 0;

if (node.PerThreadStats.Count > 1)
{
var parentByThread = new Dictionary<int, long>();
foreach (var ts in node.PerThreadStats)
parentByThread[ts.ThreadId] = ts.ActualCPUMs;

var childSumByThread = new Dictionary<int, long>();
foreach (var child in node.Children)
{
var childNode = child;
if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0)
childNode = child.Children.OrderByDescending(c => c.ActualCPUMs).First();
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exchange look-through mirrors GetPerThreadOwnElapsed at 1573-1574 — good, pattern is consistent. One subtle thing: on the parent side (parentByThread), thread IDs come from the operator itself; on the child side (childSumByThread), they come from the grandchild when the direct child is Parallelism. Producer-side threads below a parallelism exchange are a different thread set from consumer-side threads above it, so childSumByThread lookups keyed by parent's thread IDs will often miss and contribute 0. That's actually the right answer (you don't want to subtract producer CPU from consumer thread self-CPU), but it means in those cases this collapses to "parent thread cpu" as self. Worth a one-line comment noting that's intentional.


Generated by Claude Code

foreach (var ts in childNode.PerThreadStats)
{
childSumByThread.TryGetValue(ts.ThreadId, out var existing);
childSumByThread[ts.ThreadId] = existing + ts.ActualCPUMs;
}
}

var maxSelf = 0L;
foreach (var (threadId, parentCpu) in parentByThread)
{
childSumByThread.TryGetValue(threadId, out var childCpu);
var self = Math.Max(0, parentCpu - childCpu);
if (self > maxSelf) maxSelf = self;
}
return maxSelf;
}

// Serial: operator_cpu - Σ effective child cpu
var totalChildCpu = 0L;
foreach (var child in node.Children)
totalChildCpu += GetEffectiveChildCpuMs(child);
return Math.Max(0, node.ActualCPUMs - totalChildCpu);
}

private static long GetEffectiveChildCpuMs(PlanNode child)
{
if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0)
return child.Children.Max(GetEffectiveChildCpuMs);
if (child.ActualCPUMs > 0)
return child.ActualCPUMs;
if (child.Children.Count == 0)
return 0;
var sum = 0L;
foreach (var grandchild in child.Children)
sum += GetEffectiveChildCpuMs(grandchild);
return sum;
}

/// <summary>
/// Serial row mode self-time: subtract all direct children's effective elapsed.
/// Pass-through operators (Compute Scalar, etc.) don't carry runtime stats —
Expand Down
3 changes: 2 additions & 1 deletion src/PlanViewer.Web/Pages/Index.razor
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ else
</div>
@if (ActiveStmt!.QueryTime.ElapsedTimeMs > 0)
{
var ratio = (double)ActiveStmt!.QueryTime.CpuTimeMs / ActiveStmt!.QueryTime.ElapsedTimeMs;
var effectiveCpu = Math.Max(0L, ActiveStmt!.QueryTime.CpuTimeMs - ActiveStmt!.QueryTime.ExternalWaitMs);
var ratio = (double)effectiveCpu / ActiveStmt!.QueryTime.ElapsedTimeMs;
<div class="insight-row">
<span class="insight-label">CPU:Elapsed</span>
<span class="insight-value">@ratio.ToString("N2")</span>
Expand Down
Loading