From 2d2011fb01b7b4c57f2675fb1292b2e56fd1f666 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Fri, 10 Apr 2026 14:54:59 -0400 Subject: [PATCH 1/4] Add wait stats as cross-cutting analysis signal in PlanAnalyzer Surface statement-level wait stats across Rules 1, 5, 11, and 16 so the analyzer uses actual execution wait evidence to enrich warnings. The dominant wait type (SOS_SCHEDULER_YIELD, CXPACKET, HTBUILD, PAGEIOLATCH, LCK_M_S, etc.) is appended to each warning message. Rule 11 (Scan with Predicate) additionally elevates to Critical when I/O waits confirm the scan is hitting disk at >= 50% of plan cost. Expand GetWaitStatsAdvice (Rules 25/31) with specific explanations for all 21 wait types found in real plan files: SOS_SCHEDULER_YIELD, CXSYNC_PORT, MEMORY_ALLOCATION_EXT, HTBUILD/HTDELETE/HTREPARTITION, BPSORT, BMPBUILD, PAGELATCH, LOGBUFFER, ASYNC_NETWORK_IO, SOS_PHYS_PAGE_CACHE, and others. Remove dead RESOURCE_SEMAPHORE branch (pre-execution wait, never appears in statement-level plan wait stats). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/PlanViewer.Core/Services/PlanAnalyzer.cs | 137 ++++++- .../PlanAnalyzerTests.cs | 380 ++++++++++++++++++ 2 files changed, 509 insertions(+), 8 deletions(-) diff --git a/src/PlanViewer.Core/Services/PlanAnalyzer.cs b/src/PlanViewer.Core/Services/PlanAnalyzer.cs index f2e052d..3a66d47 100644 --- a/src/PlanViewer.Core/Services/PlanAnalyzer.cs +++ b/src/PlanViewer.Core/Services/PlanAnalyzer.cs @@ -548,9 +548,9 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi { // Gate: skip trivial filters based on actual stats or estimated cost bool isTrivial; + long childReads = 0; if (node.HasActualStats) { - long childReads = 0; foreach (var child in node.Children) childReads += SumSubtreeReads(child); var childElapsed = node.Children.Max(c => c.ActualElapsedMs); @@ -571,6 +571,14 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi message += $"\n{impact}"; message += $"\nPredicate: {predicate}"; + // Wait stats add context — rows burned CPU/I/O/waits just to be discarded + if (childReads >= 1000) + { + var waitContext = GetTopWaitContext(stmt.WaitStats); + if (waitContext != null) + message += $"\n{waitContext}"; + } + node.Warnings.Add(new PlanWarning { WarningType = "Filter Operator", @@ -647,10 +655,16 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi var actualDisplay = executions > 1 ? $"Actual {node.ActualRows:N0} ({actualPerExec:N0} rows x {executions:N0} executions)" : $"Actual {node.ActualRows:N0}"; + var message = $"Estimated {node.EstimateRows:N0} vs {actualDisplay} — {factor:F0}x {direction}. {harm}"; + + var waitContext = GetTopWaitContext(stmt.WaitStats); + if (waitContext != null) + message += $" {waitContext}"; + node.Warnings.Add(new PlanWarning { WarningType = "Row Estimate Mismatch", - Message = $"Estimated {node.EstimateRows:N0} vs {actualDisplay} — {factor:F0}x {direction}. {harm}", + Message = message, Severity = factor >= 100 ? PlanWarningSeverity.Critical : PlanWarningSeverity.Warning }); } @@ -833,6 +847,16 @@ _ when nonSargableReason.StartsWith("Function call") => if (!string.IsNullOrEmpty(details.Summary)) message += $" {details.Summary}"; message += " Check that you have appropriate indexes."; + + var waitContext = GetTopWaitContext(stmt.WaitStats); + if (waitContext != null) + message += $" {waitContext}"; + + // I/O waits specifically confirm the scan is hitting disk — elevate + if (HasSignificantIoWaits(stmt.WaitStats) && details.CostPct >= 50 + && severity != PlanWarningSeverity.Critical) + severity = PlanWarningSeverity.Critical; + message += $"\nPredicate: {Truncate(displayPredicate, 200)}"; node.Warnings.Add(new PlanWarning @@ -1023,6 +1047,10 @@ _ when nonSargableReason.StartsWith("Function call") => else details.Add("Consider whether a hash or merge join would be more appropriate for this row count."); + var waitContext = GetTopWaitContext(stmt.WaitStats); + if (waitContext != null) + details.Add(waitContext); + node.Warnings.Add(new PlanWarning { WarningType = "Nested Loops High Executions", @@ -1693,24 +1721,117 @@ private static ScanImpact BuildScanImpactDetails(PlanNode node, PlanStatement st var waitType = top.WaitType.ToUpperInvariant(); var advice = waitType switch { + // I/O: reading/writing data pages from disk _ when waitType.StartsWith("PAGEIOLATCH") => $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}. Data is being read from disk rather than memory. Consider adding indexes to reduce I/O, or investigate memory pressure.", + _ when waitType.Contains("IO_COMPLETION") => + $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}. Non-buffer I/O such as sort/hash spills to TempDB or eager writes.", + + // CPU: thread yielding its scheduler quantum + _ when waitType == "SOS_SCHEDULER_YIELD" => + $"CPU bound — {topPct:N0}% of wait time is {top.WaitType}. The query is consuming significant CPU. Look for expensive operators (scans, sorts, hash builds) that could be eliminated or reduced.", + + // Parallelism: exchange and synchronization waits + _ when waitType.StartsWith("CXPACKET") || waitType.StartsWith("CXCONSUMER") => + $"Parallel thread skew — {topPct:N0}% of wait time is {top.WaitType}. Work is unevenly distributed across parallel threads.", + _ when waitType.StartsWith("CXSYNC") => + $"Parallel synchronization — {topPct:N0}% of wait time is {top.WaitType}. Threads are waiting at exchange operators to synchronize parallel execution.", + + // Hash operations + _ when waitType.StartsWith("HT") => + $"Hash operation — {topPct:N0}% of wait time is {top.WaitType}. Time spent building, repartitioning, or cleaning up hash tables. Large hash builds may indicate missing indexes or bad row estimates.", + + // Sort/bitmap batch operations + _ when waitType == "BPSORT" => + $"Batch sort — {topPct:N0}% of wait time is {top.WaitType}. Time spent in batch-mode sort operations.", + _ when waitType == "BMPBUILD" => + $"Bitmap build — {topPct:N0}% of wait time is {top.WaitType}. Time spent building bitmap filters for hash joins.", + + // Memory allocation + _ when waitType.Contains("MEMORY_ALLOCATION_EXT") => + $"Memory allocation — {topPct:N0}% of wait time is {top.WaitType}. Frequent memory allocations during query execution.", + + // Latch contention (non-I/O) + _ when waitType.StartsWith("PAGELATCH") => + $"Page latch contention — {topPct:N0}% of wait time is {top.WaitType}. In-memory page contention, often on TempDB or hot pages.", _ when waitType.StartsWith("LATCH_") => $"Latch contention — {topPct:N0}% of wait time is {top.WaitType}.", + + // Lock contention _ when waitType.StartsWith("LCK_") => $"Lock contention — {topPct:N0}% of wait time is {top.WaitType}. Other sessions are holding locks that this query needs.", - _ when waitType.StartsWith("CXPACKET") || waitType.StartsWith("CXCONSUMER") => - $"Parallel thread skew — {topPct:N0}% of wait time is {top.WaitType}. Work is unevenly distributed across parallel threads.", - _ when waitType.Contains("IO_COMPLETION") => - $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}.", - _ when waitType.StartsWith("RESOURCE_SEMAPHORE") => - $"Memory grant wait — {topPct:N0}% of wait time is {top.WaitType}. The query had to wait for a memory grant.", + + // Log writes + _ when waitType == "LOGBUFFER" => + $"Log write — {topPct:N0}% of wait time is {top.WaitType}. Waiting for transaction log buffer flushes, typically from data modifications.", + + // Network + _ when waitType == "ASYNC_NETWORK_IO" => + $"Network bound — {topPct:N0}% of wait time is {top.WaitType}. The client application is not consuming results fast enough.", + + // Physical page cache + _ when waitType == "SOS_PHYS_PAGE_CACHE" => + $"Physical page cache — {topPct:N0}% of wait time is {top.WaitType}. Contention on the physical memory page allocator.", + _ => $"Dominant wait is {top.WaitType} ({topPct:N0}% of wait time)." }; return advice; } + /// + /// Returns true if the statement has significant I/O waits (PAGEIOLATCH_*, IO_COMPLETION). + /// Used for severity elevation decisions where I/O specifically indicates disk access. + /// Thresholds: I/O waits >= 20% of total wait time AND >= 100ms absolute. + /// + private static bool HasSignificantIoWaits(List waits) + { + if (waits.Count == 0) + return false; + + var totalMs = waits.Sum(w => w.WaitTimeMs); + if (totalMs == 0) + return false; + + long ioMs = 0; + foreach (var w in waits) + { + var wt = w.WaitType.ToUpperInvariant(); + if (wt.StartsWith("PAGEIOLATCH") || wt.Contains("IO_COMPLETION")) + ioMs += w.WaitTimeMs; + } + + var pct = (double)ioMs / totalMs * 100; + return ioMs >= 100 && pct >= 20; + } + + /// + /// Returns a terse sentence describing the dominant wait type for appending + /// to an existing warning message, or null if waits are negligible. + /// Surfaces whatever wait type is dominant — PAGEIOLATCH, SOS_SCHEDULER_YIELD, + /// CXPACKET, LCK_*, HTBUILD, EXECSYNC, IO_COMPLETION, etc. + /// Threshold: top wait >= 100ms and >= 20% of total wait time. + /// + private static string? GetTopWaitContext(List waits) + { + if (waits.Count == 0) + return null; + + var totalMs = waits.Sum(w => w.WaitTimeMs); + if (totalMs == 0) + return null; + + var top = waits.OrderByDescending(w => w.WaitTimeMs).First(); + if (top.WaitTimeMs < 100) + return null; + + var pct = (double)top.WaitTimeMs / totalMs * 100; + if (pct < 20) + return null; + + return $"Dominant wait: {top.WaitType} ({top.WaitTimeMs:N0}ms, {pct:N0}% of total wait time)."; + } + private static bool AllocatesResources(PlanNode node) { // Operators that get memory grants or allocate structures based on row estimates. diff --git a/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs b/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs index d0656aa..e6f450f 100644 --- a/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs +++ b/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs @@ -1,4 +1,5 @@ using PlanViewer.Core.Models; +using PlanViewer.Core.Services; namespace PlanViewer.Core.Tests; @@ -844,4 +845,383 @@ public void Issue178_7_FilterSuppressedOnTrivialChildIO() Assert.Empty(filterWarnings); } + + // --------------------------------------------------------------- + // Wait stats cross-cutting integration + // --------------------------------------------------------------- + + private static List IoWaits(long ms = 2000) => new() + { + new WaitStatInfo { WaitType = "PAGEIOLATCH_SH", WaitTimeMs = ms, WaitCount = 50 }, + new WaitStatInfo { WaitType = "ASYNC_NETWORK_IO", WaitTimeMs = 100, WaitCount = 10 } + }; + + private static List LockWaits(long ms = 3000) => new() + { + new WaitStatInfo { WaitType = "LCK_M_S", WaitTimeMs = ms, WaitCount = 200 }, + new WaitStatInfo { WaitType = "ASYNC_NETWORK_IO", WaitTimeMs = 100, WaitCount = 10 } + }; + + private static List CpuWaits(long ms = 4000) => new() + { + new WaitStatInfo { WaitType = "SOS_SCHEDULER_YIELD", WaitTimeMs = ms, WaitCount = 5000 }, + new WaitStatInfo { WaitType = "ASYNC_NETWORK_IO", WaitTimeMs = 100, WaitCount = 10 } + }; + + private static List CxWaits(long ms = 3000) => new() + { + new WaitStatInfo { WaitType = "CXPACKET", WaitTimeMs = ms, WaitCount = 400 }, + new WaitStatInfo { WaitType = "SOS_SCHEDULER_YIELD", WaitTimeMs = 200, WaitCount = 100 } + }; + + private static List HashWaits(long ms = 2500) => new() + { + new WaitStatInfo { WaitType = "HTBUILD", WaitTimeMs = ms, WaitCount = 80 }, + new WaitStatInfo { WaitType = "SOS_SCHEDULER_YIELD", WaitTimeMs = 200, WaitCount = 100 } + }; + + private static ParsedPlan BuildSyntheticPlan(PlanStatement stmt) + { + var plan = new ParsedPlan(); + plan.Batches.Add(new PlanBatch { Statements = { stmt } }); + return plan; + } + + [Fact] + public void WaitStats_Rule01_FilterAppendsIoContext() + { + // Filter with >1000 child reads + I/O waits → message includes wait context + var child = new PlanNode + { + NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", + HasActualStats = true, ActualRows = 100000, ActualLogicalReads = 5000, ActualElapsedMs = 500 + }; + var filter = new PlanNode + { + NodeId = 0, PhysicalOp = "Filter", LogicalOp = "Filter", + Predicate = "[col1] > 100", + HasActualStats = true, ActualRows = 10, ActualElapsedMs = 500, + Children = { child } + }; + child.Parent = filter; + + var stmt = new PlanStatement + { + RootNode = filter, + WaitStats = IoWaits() + }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Filter Operator").ToList(); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); + } + + [Fact] + public void WaitStats_Rule01_FilterNoContextWithoutWaits() + { + // Same filter but no wait stats → no wait context in message + var child = new PlanNode + { + NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", + HasActualStats = true, ActualRows = 100000, ActualLogicalReads = 5000, ActualElapsedMs = 500 + }; + var filter = new PlanNode + { + NodeId = 0, PhysicalOp = "Filter", LogicalOp = "Filter", + Predicate = "[col1] > 100", + HasActualStats = true, ActualRows = 10, ActualElapsedMs = 500, + Children = { child } + }; + child.Parent = filter; + + var stmt = new PlanStatement { RootNode = filter }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Filter Operator").ToList(); + Assert.NotEmpty(warnings); + Assert.DoesNotContain(warnings, w => w.Message.Contains("Wait stats")); + } + + [Fact] + public void WaitStats_Rule05_SpillWithIoWaitsAppendsContext() + { + // Row mismatch causing a spill + I/O waits → I/O context appended + // Sort needs a spill warning so AssessEstimateHarm returns non-null, + // and must have a non-root parent (NodeId != -1). + var sortNode = new PlanNode + { + NodeId = 2, PhysicalOp = "Sort", LogicalOp = "Sort", + HasActualStats = true, + EstimateRows = 100, ActualRows = 100000, ActualExecutions = 1, + Warnings = { new PlanWarning + { + WarningType = "Sort Spill", + Message = "Sort spilled", + Severity = PlanWarningSeverity.Warning, + SpillDetails = new SpillDetail { SpillType = "Sort", WritesToTempDb = 1000 } + }} + }; + var selectNode = new PlanNode + { + NodeId = 1, PhysicalOp = "Compute Scalar", LogicalOp = "Compute Scalar", + Children = { sortNode } + }; + sortNode.Parent = selectNode; + var root = new PlanNode + { + NodeId = -1, PhysicalOp = "SELECT", LogicalOp = "SELECT", + Children = { selectNode } + }; + selectNode.Parent = root; + + var stmt = new PlanStatement + { + RootNode = root, + WaitStats = IoWaits() + }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Row Estimate Mismatch").ToList(); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); + } + + [Fact] + public void WaitStats_Rule11_ScanWithIoWaitsElevatedToCritical() + { + // Scan at 60% cost + I/O waits → elevated to Critical (normally needs 90%) + var scan = new PlanNode + { + NodeId = 0, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", + Predicate = "[col1] = @p1", + EstimatedTotalSubtreeCost = 6.0 + }; + var stmt = new PlanStatement + { + RootNode = scan, + StatementSubTreeCost = 10.0, + WaitStats = IoWaits() + }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Scan With Predicate").ToList(); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Severity == PlanWarningSeverity.Critical); + Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); + } + + [Fact] + public void WaitStats_Rule11_ScanWithoutIoWaitsStaysWarning() + { + // Same scan at 60% cost but no I/O waits → stays Warning + var scan = new PlanNode + { + NodeId = 0, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", + Predicate = "[col1] = @p1", + EstimatedTotalSubtreeCost = 6.0 + }; + var stmt = new PlanStatement + { + RootNode = scan, + StatementSubTreeCost = 10.0 + }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Scan With Predicate").ToList(); + Assert.NotEmpty(warnings); + Assert.All(warnings, w => Assert.Equal(PlanWarningSeverity.Warning, w.Severity)); + } + + [Fact] + public void WaitStats_Rule16_NestedLoopsAppendsLockContext() + { + // Nested Loops with >100K inner executions + lock waits → lock context appended + var outer = new PlanNode + { + NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", + HasActualStats = true, ActualRows = 500000, ActualExecutions = 1, EstimateRows = 500000 + }; + var inner = new PlanNode + { + NodeId = 2, PhysicalOp = "Index Seek", LogicalOp = "Index Seek", + HasActualStats = true, ActualRows = 500000, ActualExecutions = 500000 + }; + var nl = new PlanNode + { + NodeId = 0, PhysicalOp = "Nested Loops", LogicalOp = "Inner Join", + HasActualStats = true, ActualRows = 500000, + Children = { outer, inner } + }; + outer.Parent = nl; + inner.Parent = nl; + + var stmt = new PlanStatement + { + RootNode = nl, + WaitStats = LockWaits() + }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Nested Loops High Executions").ToList(); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Message.Contains("LCK_M_S")); + } + + [Fact] + public void WaitStats_Rule16_NestedLoopsSurfacesIoWaits() + { + // Nested Loops with >100K inner executions + I/O waits → I/O context + var outer = new PlanNode + { + NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", + HasActualStats = true, ActualRows = 500000, ActualExecutions = 1, EstimateRows = 500000 + }; + var inner = new PlanNode + { + NodeId = 2, PhysicalOp = "Index Seek", LogicalOp = "Index Seek", + HasActualStats = true, ActualRows = 500000, ActualExecutions = 500000 + }; + var nl = new PlanNode + { + NodeId = 0, PhysicalOp = "Nested Loops", LogicalOp = "Inner Join", + HasActualStats = true, ActualRows = 500000, + Children = { outer, inner } + }; + outer.Parent = nl; + inner.Parent = nl; + + var stmt = new PlanStatement + { + RootNode = nl, + WaitStats = IoWaits() + }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Nested Loops High Executions").ToList(); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); + } + + [Fact] + public void WaitStats_SosSchedulerYield_SurfacedOnScan() + { + // SOS_SCHEDULER_YIELD as dominant wait on a scan → surfaces in message + var scan = new PlanNode + { + NodeId = 0, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", + Predicate = "[col1] = @p1", + EstimatedTotalSubtreeCost = 6.0 + }; + var stmt = new PlanStatement + { + RootNode = scan, + StatementSubTreeCost = 10.0, + WaitStats = CpuWaits() + }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Scan With Predicate").ToList(); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Message.Contains("SOS_SCHEDULER_YIELD")); + // CPU waits are not I/O — should NOT elevate severity (stays Warning at 60% cost) + Assert.All(warnings, w => Assert.Equal(PlanWarningSeverity.Warning, w.Severity)); + } + + [Fact] + public void WaitStats_CxPacket_SurfacedOnNestedLoops() + { + // CXPACKET as dominant wait on Nested Loops → surfaces in message + var outer = new PlanNode + { + NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", + HasActualStats = true, ActualRows = 500000, ActualExecutions = 1, EstimateRows = 500000 + }; + var inner = new PlanNode + { + NodeId = 2, PhysicalOp = "Index Seek", LogicalOp = "Index Seek", + HasActualStats = true, ActualRows = 500000, ActualExecutions = 500000 + }; + var nl = new PlanNode + { + NodeId = 0, PhysicalOp = "Nested Loops", LogicalOp = "Inner Join", + HasActualStats = true, ActualRows = 500000, + Children = { outer, inner } + }; + outer.Parent = nl; + inner.Parent = nl; + + var stmt = new PlanStatement + { + RootNode = nl, + WaitStats = CxWaits() + }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Nested Loops High Executions").ToList(); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Message.Contains("CXPACKET")); + } + + [Fact] + public void WaitStats_HtBuild_SurfacedOnEstimateMismatch() + { + // HTBUILD as dominant wait on a hash join with bad estimates → surfaces in message + var hashNode = new PlanNode + { + NodeId = 2, PhysicalOp = "Hash Match", LogicalOp = "Inner Join", + HasActualStats = true, + EstimateRows = 100, ActualRows = 100000, ActualExecutions = 1, + Warnings = { new PlanWarning + { + WarningType = "Hash Spill", + Message = "Hash spilled", + Severity = PlanWarningSeverity.Warning, + SpillDetails = new SpillDetail { SpillType = "Hash", WritesToTempDb = 500 } + }} + }; + var parent = new PlanNode + { + NodeId = 1, PhysicalOp = "Compute Scalar", LogicalOp = "Compute Scalar", + Children = { hashNode } + }; + hashNode.Parent = parent; + var root = new PlanNode + { + NodeId = -1, PhysicalOp = "SELECT", LogicalOp = "SELECT", + Children = { parent } + }; + parent.Parent = root; + + var stmt = new PlanStatement + { + RootNode = root, + WaitStats = HashWaits() + }; + var plan = BuildSyntheticPlan(stmt); + PlanAnalyzer.Analyze(plan); + + var warnings = PlanTestHelper.AllNodeWarnings(stmt) + .Where(w => w.WarningType == "Row Estimate Mismatch").ToList(); + Assert.NotEmpty(warnings); + Assert.Contains(warnings, w => w.Message.Contains("HTBUILD")); + } } From e9655311760a1c253659238ccf7580870cdad5b8 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:31:33 -0400 Subject: [PATCH 2/4] Add inline wait type labels and expand wait stats coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Annotate each wait stat line in the advice panel with a short description (e.g., "I/O — reading data from disk", "CPU — scheduler yielding") so users can immediately understand what each wait type means without leaving the panel. Labels render inline in muted text via GetWaitLabel(). Extract DescribeWaitType() from GetWaitStatsAdvice() so the wait type knowledge is reusable. Remove dead RESOURCE_SEMAPHORE branch (pre-execution wait, never appears in plan wait stats). Drop the separate summary line that had formatting issues in favor of per-line inline labels. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Services/AdviceContentBuilder.cs | 7 ++ src/PlanViewer.Core/Services/PlanAnalyzer.cs | 78 +++++++++++++------ 2 files changed, 63 insertions(+), 22 deletions(-) diff --git a/src/PlanViewer.App/Services/AdviceContentBuilder.cs b/src/PlanViewer.App/Services/AdviceContentBuilder.cs index 2c96e54..525fb9d 100644 --- a/src/PlanViewer.App/Services/AdviceContentBuilder.cs +++ b/src/PlanViewer.App/Services/AdviceContentBuilder.cs @@ -7,6 +7,7 @@ using Avalonia.Layout; using Avalonia.Media; using PlanViewer.Core.Output; +using PlanViewer.Core.Services; namespace PlanViewer.App.Services; @@ -994,6 +995,12 @@ private static StackPanel CreateWaitStatLine(string waitName, string waitValue, var waitBrush = GetWaitCategoryBrush(waitName); tb.Inlines!.Add(new Run(waitName) { Foreground = waitBrush }); tb.Inlines.Add(new Run(": " + waitValue) { Foreground = ValueBrush }); + + // Inline description label for the wait type + var label = PlanAnalyzer.GetWaitLabel(waitName); + if (!string.IsNullOrEmpty(label)) + tb.Inlines.Add(new Run(" " + label) { Foreground = MutedBrush, FontSize = 11 }); + wrapper.Children.Add(tb); // Proportional bar scaled to max wait in group diff --git a/src/PlanViewer.Core/Services/PlanAnalyzer.cs b/src/PlanViewer.Core/Services/PlanAnalyzer.cs index 3a66d47..d656279 100644 --- a/src/PlanViewer.Core/Services/PlanAnalyzer.cs +++ b/src/PlanViewer.Core/Services/PlanAnalyzer.cs @@ -1699,9 +1699,36 @@ private static ScanImpact BuildScanImpactDetails(PlanNode node, PlanStatement st /// - A parent Sort/Hash spilled (downstream estimate caused bad grant) /// /// - /// Returns targeted advice based on statement-level wait stats, or null if no waits. - /// When the dominant wait type is clear, gives specific guidance instead of generic advice. + /// Returns a short label describing what a wait type means (e.g., "I/O — reading from disk"). + /// Public for use by UI components that annotate wait stats inline. /// + public static string GetWaitLabel(string waitType) + { + var wt = waitType.ToUpperInvariant(); + return wt switch + { + _ when wt.StartsWith("PAGEIOLATCH") => "I/O — reading data from disk", + _ when wt.Contains("IO_COMPLETION") => "I/O — spills to TempDB or eager writes", + _ when wt == "SOS_SCHEDULER_YIELD" => "CPU — scheduler yielding", + _ when wt.StartsWith("CXPACKET") || wt.StartsWith("CXCONSUMER") => "parallelism — thread skew", + _ when wt.StartsWith("CXSYNC") => "parallelism — exchange synchronization", + _ when wt == "HTBUILD" => "hash — building hash table", + _ when wt == "HTDELETE" => "hash — cleaning up hash table", + _ when wt == "HTREPARTITION" => "hash — repartitioning", + _ when wt.StartsWith("HT") => "hash operation", + _ when wt == "BPSORT" => "batch sort", + _ when wt == "BMPBUILD" => "bitmap filter build", + _ when wt.Contains("MEMORY_ALLOCATION_EXT") => "memory allocation", + _ when wt.StartsWith("PAGELATCH") => "page latch — in-memory contention", + _ when wt.StartsWith("LATCH_") => "latch contention", + _ when wt.StartsWith("LCK_") => "lock contention", + _ when wt == "LOGBUFFER" => "transaction log writes", + _ when wt == "ASYNC_NETWORK_IO" => "network — client not consuming results", + _ when wt == "SOS_PHYS_PAGE_CACHE" => "physical page cache contention", + _ => "" + }; + } + private static string? GetWaitStatsAdvice(List waits) { if (waits.Count == 0) @@ -1718,65 +1745,72 @@ private static ScanImpact BuildScanImpactDetails(PlanNode node, PlanStatement st if (topPct < 80) return null; - var waitType = top.WaitType.ToUpperInvariant(); - var advice = waitType switch + return DescribeWaitType(top.WaitType, topPct); + } + + /// + /// Maps a wait type to a human-readable description with percentage context. + /// Covers all wait types observed in real execution plan files. + /// + private static string DescribeWaitType(string rawWaitType, double topPct) + { + var waitType = rawWaitType.ToUpperInvariant(); + return waitType switch { // I/O: reading/writing data pages from disk _ when waitType.StartsWith("PAGEIOLATCH") => - $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}. Data is being read from disk rather than memory. Consider adding indexes to reduce I/O, or investigate memory pressure.", + $"I/O bound — {topPct:N0}% of wait time is {rawWaitType}. Data is being read from disk rather than memory. Consider adding indexes to reduce I/O, or investigate memory pressure.", _ when waitType.Contains("IO_COMPLETION") => - $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}. Non-buffer I/O such as sort/hash spills to TempDB or eager writes.", + $"I/O bound — {topPct:N0}% of wait time is {rawWaitType}. Non-buffer I/O such as sort/hash spills to TempDB or eager writes.", // CPU: thread yielding its scheduler quantum _ when waitType == "SOS_SCHEDULER_YIELD" => - $"CPU bound — {topPct:N0}% of wait time is {top.WaitType}. The query is consuming significant CPU. Look for expensive operators (scans, sorts, hash builds) that could be eliminated or reduced.", + $"CPU bound — {topPct:N0}% of wait time is {rawWaitType}. The query is consuming significant CPU. Look for expensive operators (scans, sorts, hash builds) that could be eliminated or reduced.", // Parallelism: exchange and synchronization waits _ when waitType.StartsWith("CXPACKET") || waitType.StartsWith("CXCONSUMER") => - $"Parallel thread skew — {topPct:N0}% of wait time is {top.WaitType}. Work is unevenly distributed across parallel threads.", + $"Parallel thread skew — {topPct:N0}% of wait time is {rawWaitType}. Work is unevenly distributed across parallel threads.", _ when waitType.StartsWith("CXSYNC") => - $"Parallel synchronization — {topPct:N0}% of wait time is {top.WaitType}. Threads are waiting at exchange operators to synchronize parallel execution.", + $"Parallel synchronization — {topPct:N0}% of wait time is {rawWaitType}. Threads are waiting at exchange operators to synchronize parallel execution.", // Hash operations _ when waitType.StartsWith("HT") => - $"Hash operation — {topPct:N0}% of wait time is {top.WaitType}. Time spent building, repartitioning, or cleaning up hash tables. Large hash builds may indicate missing indexes or bad row estimates.", + $"Hash operation — {topPct:N0}% of wait time is {rawWaitType}. Time spent building, repartitioning, or cleaning up hash tables. Large hash builds may indicate missing indexes or bad row estimates.", // Sort/bitmap batch operations _ when waitType == "BPSORT" => - $"Batch sort — {topPct:N0}% of wait time is {top.WaitType}. Time spent in batch-mode sort operations.", + $"Batch sort — {topPct:N0}% of wait time is {rawWaitType}. Time spent in batch-mode sort operations.", _ when waitType == "BMPBUILD" => - $"Bitmap build — {topPct:N0}% of wait time is {top.WaitType}. Time spent building bitmap filters for hash joins.", + $"Bitmap build — {topPct:N0}% of wait time is {rawWaitType}. Time spent building bitmap filters for hash joins.", // Memory allocation _ when waitType.Contains("MEMORY_ALLOCATION_EXT") => - $"Memory allocation — {topPct:N0}% of wait time is {top.WaitType}. Frequent memory allocations during query execution.", + $"Memory allocation — {topPct:N0}% of wait time is {rawWaitType}. Frequent memory allocations during query execution.", // Latch contention (non-I/O) _ when waitType.StartsWith("PAGELATCH") => - $"Page latch contention — {topPct:N0}% of wait time is {top.WaitType}. In-memory page contention, often on TempDB or hot pages.", + $"Page latch contention — {topPct:N0}% of wait time is {rawWaitType}. In-memory page contention, often on TempDB or hot pages.", _ when waitType.StartsWith("LATCH_") => - $"Latch contention — {topPct:N0}% of wait time is {top.WaitType}.", + $"Latch contention — {topPct:N0}% of wait time is {rawWaitType}.", // Lock contention _ when waitType.StartsWith("LCK_") => - $"Lock contention — {topPct:N0}% of wait time is {top.WaitType}. Other sessions are holding locks that this query needs.", + $"Lock contention — {topPct:N0}% of wait time is {rawWaitType}. Other sessions are holding locks that this query needs.", // Log writes _ when waitType == "LOGBUFFER" => - $"Log write — {topPct:N0}% of wait time is {top.WaitType}. Waiting for transaction log buffer flushes, typically from data modifications.", + $"Log write — {topPct:N0}% of wait time is {rawWaitType}. Waiting for transaction log buffer flushes, typically from data modifications.", // Network _ when waitType == "ASYNC_NETWORK_IO" => - $"Network bound — {topPct:N0}% of wait time is {top.WaitType}. The client application is not consuming results fast enough.", + $"Network bound — {topPct:N0}% of wait time is {rawWaitType}. The client application is not consuming results fast enough.", // Physical page cache _ when waitType == "SOS_PHYS_PAGE_CACHE" => - $"Physical page cache — {topPct:N0}% of wait time is {top.WaitType}. Contention on the physical memory page allocator.", + $"Physical page cache — {topPct:N0}% of wait time is {rawWaitType}. Contention on the physical memory page allocator.", - _ => $"Dominant wait is {top.WaitType} ({topPct:N0}% of wait time)." + _ => $"Dominant wait is {rawWaitType} ({topPct:N0}% of wait time)." }; - - return advice; } /// From c65d86c8a453f7145824ee675aa4ee4c45448ea7 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:56:42 -0400 Subject: [PATCH 3/4] Strip operator-level wait context, add query classification to triage card MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove GetTopWaitContext calls from Rules 1, 5, 11, and 16 — statement-level waits can't be attributed to individual operators, so appending the dominant wait type to every warning was noise rather than analysis. Keep Rule 11 I/O severity elevation (scan + PAGEIOLATCH at 50%+ cost → Critical) since a dominant scan with I/O waits is a defensible correlation. Add wait profile classification to the triage summary card — categorizes queries as "I/O bound", "CPU bound", "parallelism bound", or "lock contention bound" based on wait category percentages (>= 30% threshold). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Services/AdviceContentBuilder.cs | 35 +++ src/PlanViewer.Core/Services/PlanAnalyzer.cs | 53 +--- .../PlanAnalyzerTests.cs | 286 +----------------- 3 files changed, 39 insertions(+), 335 deletions(-) diff --git a/src/PlanViewer.App/Services/AdviceContentBuilder.cs b/src/PlanViewer.App/Services/AdviceContentBuilder.cs index 525fb9d..875644b 100644 --- a/src/PlanViewer.App/Services/AdviceContentBuilder.cs +++ b/src/PlanViewer.App/Services/AdviceContentBuilder.cs @@ -1126,6 +1126,41 @@ private static SolidColorBrush GetWaitCategoryBrush(string waitType) items.Add(($"Memory grant: {grantedMB:F1} MB ({usedPct:F0}% used)", memBrush)); } + // Wait profile classification + if (stmt.WaitStats.Count > 0) + { + var totalMs = stmt.WaitStats.Sum(w => w.WaitTimeMs); + if (totalMs > 0) + { + long ioMs = 0, cpuMs = 0, parallelMs = 0, lockMs = 0; + foreach (var w in stmt.WaitStats) + { + var wt = w.WaitType.ToUpperInvariant(); + if (wt.StartsWith("PAGEIOLATCH") || wt.Contains("IO_COMPLETION")) + ioMs += w.WaitTimeMs; + else if (wt == "SOS_SCHEDULER_YIELD") + cpuMs += w.WaitTimeMs; + else if (wt.StartsWith("CX")) + parallelMs += w.WaitTimeMs; + else if (wt.StartsWith("LCK_")) + lockMs += w.WaitTimeMs; + } + + // Pick the dominant category (>= 30% of total) + var categories = new List<(string label, long ms)>(); + if (ioMs * 100 / totalMs >= 30) categories.Add(("I/O", ioMs)); + if (cpuMs * 100 / totalMs >= 30) categories.Add(("CPU", cpuMs)); + if (parallelMs * 100 / totalMs >= 30) categories.Add(("parallelism", parallelMs)); + if (lockMs * 100 / totalMs >= 30) categories.Add(("lock contention", lockMs)); + + if (categories.Count > 0) + { + var label = string.Join(" + ", categories.Select(c => c.label)); + items.Add(($"{label} bound ({totalMs:N0}ms total wait time)", InfoBrush)); + } + } + } + // Warning counts by severity var criticalCount = stmt.Warnings.Count(w => w.Severity.Equals("Critical", StringComparison.OrdinalIgnoreCase)); diff --git a/src/PlanViewer.Core/Services/PlanAnalyzer.cs b/src/PlanViewer.Core/Services/PlanAnalyzer.cs index d656279..5e18d0a 100644 --- a/src/PlanViewer.Core/Services/PlanAnalyzer.cs +++ b/src/PlanViewer.Core/Services/PlanAnalyzer.cs @@ -548,9 +548,9 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi { // Gate: skip trivial filters based on actual stats or estimated cost bool isTrivial; - long childReads = 0; if (node.HasActualStats) { + long childReads = 0; foreach (var child in node.Children) childReads += SumSubtreeReads(child); var childElapsed = node.Children.Max(c => c.ActualElapsedMs); @@ -571,14 +571,6 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi message += $"\n{impact}"; message += $"\nPredicate: {predicate}"; - // Wait stats add context — rows burned CPU/I/O/waits just to be discarded - if (childReads >= 1000) - { - var waitContext = GetTopWaitContext(stmt.WaitStats); - if (waitContext != null) - message += $"\n{waitContext}"; - } - node.Warnings.Add(new PlanWarning { WarningType = "Filter Operator", @@ -655,16 +647,10 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi var actualDisplay = executions > 1 ? $"Actual {node.ActualRows:N0} ({actualPerExec:N0} rows x {executions:N0} executions)" : $"Actual {node.ActualRows:N0}"; - var message = $"Estimated {node.EstimateRows:N0} vs {actualDisplay} — {factor:F0}x {direction}. {harm}"; - - var waitContext = GetTopWaitContext(stmt.WaitStats); - if (waitContext != null) - message += $" {waitContext}"; - node.Warnings.Add(new PlanWarning { WarningType = "Row Estimate Mismatch", - Message = message, + Message = $"Estimated {node.EstimateRows:N0} vs {actualDisplay} — {factor:F0}x {direction}. {harm}", Severity = factor >= 100 ? PlanWarningSeverity.Critical : PlanWarningSeverity.Warning }); } @@ -848,10 +834,6 @@ _ when nonSargableReason.StartsWith("Function call") => message += $" {details.Summary}"; message += " Check that you have appropriate indexes."; - var waitContext = GetTopWaitContext(stmt.WaitStats); - if (waitContext != null) - message += $" {waitContext}"; - // I/O waits specifically confirm the scan is hitting disk — elevate if (HasSignificantIoWaits(stmt.WaitStats) && details.CostPct >= 50 && severity != PlanWarningSeverity.Critical) @@ -1047,10 +1029,6 @@ _ when nonSargableReason.StartsWith("Function call") => else details.Add("Consider whether a hash or merge join would be more appropriate for this row count."); - var waitContext = GetTopWaitContext(stmt.WaitStats); - if (waitContext != null) - details.Add(waitContext); - node.Warnings.Add(new PlanWarning { WarningType = "Nested Loops High Executions", @@ -1839,33 +1817,6 @@ private static bool HasSignificantIoWaits(List waits) return ioMs >= 100 && pct >= 20; } - /// - /// Returns a terse sentence describing the dominant wait type for appending - /// to an existing warning message, or null if waits are negligible. - /// Surfaces whatever wait type is dominant — PAGEIOLATCH, SOS_SCHEDULER_YIELD, - /// CXPACKET, LCK_*, HTBUILD, EXECSYNC, IO_COMPLETION, etc. - /// Threshold: top wait >= 100ms and >= 20% of total wait time. - /// - private static string? GetTopWaitContext(List waits) - { - if (waits.Count == 0) - return null; - - var totalMs = waits.Sum(w => w.WaitTimeMs); - if (totalMs == 0) - return null; - - var top = waits.OrderByDescending(w => w.WaitTimeMs).First(); - if (top.WaitTimeMs < 100) - return null; - - var pct = (double)top.WaitTimeMs / totalMs * 100; - if (pct < 20) - return null; - - return $"Dominant wait: {top.WaitType} ({top.WaitTimeMs:N0}ms, {pct:N0}% of total wait time)."; - } - private static bool AllocatesResources(PlanNode node) { // Operators that get memory grants or allocate structures based on row estimates. diff --git a/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs b/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs index e6f450f..5ca3373 100644 --- a/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs +++ b/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs @@ -856,30 +856,12 @@ public void Issue178_7_FilterSuppressedOnTrivialChildIO() new WaitStatInfo { WaitType = "ASYNC_NETWORK_IO", WaitTimeMs = 100, WaitCount = 10 } }; - private static List LockWaits(long ms = 3000) => new() - { - new WaitStatInfo { WaitType = "LCK_M_S", WaitTimeMs = ms, WaitCount = 200 }, - new WaitStatInfo { WaitType = "ASYNC_NETWORK_IO", WaitTimeMs = 100, WaitCount = 10 } - }; - private static List CpuWaits(long ms = 4000) => new() { new WaitStatInfo { WaitType = "SOS_SCHEDULER_YIELD", WaitTimeMs = ms, WaitCount = 5000 }, new WaitStatInfo { WaitType = "ASYNC_NETWORK_IO", WaitTimeMs = 100, WaitCount = 10 } }; - private static List CxWaits(long ms = 3000) => new() - { - new WaitStatInfo { WaitType = "CXPACKET", WaitTimeMs = ms, WaitCount = 400 }, - new WaitStatInfo { WaitType = "SOS_SCHEDULER_YIELD", WaitTimeMs = 200, WaitCount = 100 } - }; - - private static List HashWaits(long ms = 2500) => new() - { - new WaitStatInfo { WaitType = "HTBUILD", WaitTimeMs = ms, WaitCount = 80 }, - new WaitStatInfo { WaitType = "SOS_SCHEDULER_YIELD", WaitTimeMs = 200, WaitCount = 100 } - }; - private static ParsedPlan BuildSyntheticPlan(PlanStatement stmt) { var plan = new ParsedPlan(); @@ -887,112 +869,6 @@ private static ParsedPlan BuildSyntheticPlan(PlanStatement stmt) return plan; } - [Fact] - public void WaitStats_Rule01_FilterAppendsIoContext() - { - // Filter with >1000 child reads + I/O waits → message includes wait context - var child = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 100000, ActualLogicalReads = 5000, ActualElapsedMs = 500 - }; - var filter = new PlanNode - { - NodeId = 0, PhysicalOp = "Filter", LogicalOp = "Filter", - Predicate = "[col1] > 100", - HasActualStats = true, ActualRows = 10, ActualElapsedMs = 500, - Children = { child } - }; - child.Parent = filter; - - var stmt = new PlanStatement - { - RootNode = filter, - WaitStats = IoWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Filter Operator").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); - } - - [Fact] - public void WaitStats_Rule01_FilterNoContextWithoutWaits() - { - // Same filter but no wait stats → no wait context in message - var child = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 100000, ActualLogicalReads = 5000, ActualElapsedMs = 500 - }; - var filter = new PlanNode - { - NodeId = 0, PhysicalOp = "Filter", LogicalOp = "Filter", - Predicate = "[col1] > 100", - HasActualStats = true, ActualRows = 10, ActualElapsedMs = 500, - Children = { child } - }; - child.Parent = filter; - - var stmt = new PlanStatement { RootNode = filter }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Filter Operator").ToList(); - Assert.NotEmpty(warnings); - Assert.DoesNotContain(warnings, w => w.Message.Contains("Wait stats")); - } - - [Fact] - public void WaitStats_Rule05_SpillWithIoWaitsAppendsContext() - { - // Row mismatch causing a spill + I/O waits → I/O context appended - // Sort needs a spill warning so AssessEstimateHarm returns non-null, - // and must have a non-root parent (NodeId != -1). - var sortNode = new PlanNode - { - NodeId = 2, PhysicalOp = "Sort", LogicalOp = "Sort", - HasActualStats = true, - EstimateRows = 100, ActualRows = 100000, ActualExecutions = 1, - Warnings = { new PlanWarning - { - WarningType = "Sort Spill", - Message = "Sort spilled", - Severity = PlanWarningSeverity.Warning, - SpillDetails = new SpillDetail { SpillType = "Sort", WritesToTempDb = 1000 } - }} - }; - var selectNode = new PlanNode - { - NodeId = 1, PhysicalOp = "Compute Scalar", LogicalOp = "Compute Scalar", - Children = { sortNode } - }; - sortNode.Parent = selectNode; - var root = new PlanNode - { - NodeId = -1, PhysicalOp = "SELECT", LogicalOp = "SELECT", - Children = { selectNode } - }; - selectNode.Parent = root; - - var stmt = new PlanStatement - { - RootNode = root, - WaitStats = IoWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Row Estimate Mismatch").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); - } - [Fact] public void WaitStats_Rule11_ScanWithIoWaitsElevatedToCritical() { @@ -1016,7 +892,6 @@ public void WaitStats_Rule11_ScanWithIoWaitsElevatedToCritical() .Where(w => w.WarningType == "Scan With Predicate").ToList(); Assert.NotEmpty(warnings); Assert.Contains(warnings, w => w.Severity == PlanWarningSeverity.Critical); - Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); } [Fact] @@ -1044,83 +919,9 @@ public void WaitStats_Rule11_ScanWithoutIoWaitsStaysWarning() } [Fact] - public void WaitStats_Rule16_NestedLoopsAppendsLockContext() + public void WaitStats_Rule11_CpuWaitsDoNotElevateSeverity() { - // Nested Loops with >100K inner executions + lock waits → lock context appended - var outer = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 1, EstimateRows = 500000 - }; - var inner = new PlanNode - { - NodeId = 2, PhysicalOp = "Index Seek", LogicalOp = "Index Seek", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 500000 - }; - var nl = new PlanNode - { - NodeId = 0, PhysicalOp = "Nested Loops", LogicalOp = "Inner Join", - HasActualStats = true, ActualRows = 500000, - Children = { outer, inner } - }; - outer.Parent = nl; - inner.Parent = nl; - - var stmt = new PlanStatement - { - RootNode = nl, - WaitStats = LockWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Nested Loops High Executions").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("LCK_M_S")); - } - - [Fact] - public void WaitStats_Rule16_NestedLoopsSurfacesIoWaits() - { - // Nested Loops with >100K inner executions + I/O waits → I/O context - var outer = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 1, EstimateRows = 500000 - }; - var inner = new PlanNode - { - NodeId = 2, PhysicalOp = "Index Seek", LogicalOp = "Index Seek", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 500000 - }; - var nl = new PlanNode - { - NodeId = 0, PhysicalOp = "Nested Loops", LogicalOp = "Inner Join", - HasActualStats = true, ActualRows = 500000, - Children = { outer, inner } - }; - outer.Parent = nl; - inner.Parent = nl; - - var stmt = new PlanStatement - { - RootNode = nl, - WaitStats = IoWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Nested Loops High Executions").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); - } - - [Fact] - public void WaitStats_SosSchedulerYield_SurfacedOnScan() - { - // SOS_SCHEDULER_YIELD as dominant wait on a scan → surfaces in message + // CPU waits (SOS_SCHEDULER_YIELD) are not I/O — should NOT elevate at 60% cost var scan = new PlanNode { NodeId = 0, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", @@ -1139,89 +940,6 @@ public void WaitStats_SosSchedulerYield_SurfacedOnScan() var warnings = PlanTestHelper.AllNodeWarnings(stmt) .Where(w => w.WarningType == "Scan With Predicate").ToList(); Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("SOS_SCHEDULER_YIELD")); - // CPU waits are not I/O — should NOT elevate severity (stays Warning at 60% cost) Assert.All(warnings, w => Assert.Equal(PlanWarningSeverity.Warning, w.Severity)); } - - [Fact] - public void WaitStats_CxPacket_SurfacedOnNestedLoops() - { - // CXPACKET as dominant wait on Nested Loops → surfaces in message - var outer = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 1, EstimateRows = 500000 - }; - var inner = new PlanNode - { - NodeId = 2, PhysicalOp = "Index Seek", LogicalOp = "Index Seek", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 500000 - }; - var nl = new PlanNode - { - NodeId = 0, PhysicalOp = "Nested Loops", LogicalOp = "Inner Join", - HasActualStats = true, ActualRows = 500000, - Children = { outer, inner } - }; - outer.Parent = nl; - inner.Parent = nl; - - var stmt = new PlanStatement - { - RootNode = nl, - WaitStats = CxWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Nested Loops High Executions").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("CXPACKET")); - } - - [Fact] - public void WaitStats_HtBuild_SurfacedOnEstimateMismatch() - { - // HTBUILD as dominant wait on a hash join with bad estimates → surfaces in message - var hashNode = new PlanNode - { - NodeId = 2, PhysicalOp = "Hash Match", LogicalOp = "Inner Join", - HasActualStats = true, - EstimateRows = 100, ActualRows = 100000, ActualExecutions = 1, - Warnings = { new PlanWarning - { - WarningType = "Hash Spill", - Message = "Hash spilled", - Severity = PlanWarningSeverity.Warning, - SpillDetails = new SpillDetail { SpillType = "Hash", WritesToTempDb = 500 } - }} - }; - var parent = new PlanNode - { - NodeId = 1, PhysicalOp = "Compute Scalar", LogicalOp = "Compute Scalar", - Children = { hashNode } - }; - hashNode.Parent = parent; - var root = new PlanNode - { - NodeId = -1, PhysicalOp = "SELECT", LogicalOp = "SELECT", - Children = { parent } - }; - parent.Parent = root; - - var stmt = new PlanStatement - { - RootNode = root, - WaitStats = HashWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Row Estimate Mismatch").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("HTBUILD")); - } } From 65170fd6ddb94364ccd8150634bd9a6800aaed5e Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:10:14 -0400 Subject: [PATCH 4/4] Add maximum benefit scoring for plan analysis findings (#215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce BenefitScorer — a second-pass analysis that calculates the maximum percentage of elapsed time each finding could save if addressed. Findings are now sorted by benefit descending across CLI text/HTML output, JSON API, and the desktop app's Properties panel and tooltips. This reduces noise on large plans by surfacing high-impact findings first. Stage 1 of the maximum benefit system scores operator-level rules (Filter, Spill, Key Lookup, Scan, UDF, Nested Loops) using actual operator self-time, with CostPercent fallback for estimated plans. Bump version to 1.5.0. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Controls/PlanViewerControl.axaml.cs | 36 +- src/PlanViewer.App/Mcp/McpQueryStoreTools.cs | 1 + src/PlanViewer.App/PlanViewer.App.csproj | 2 +- src/PlanViewer.Cli/Commands/AnalyzeCommand.cs | 2 + .../Commands/QueryStoreCommand.cs | 1 + src/PlanViewer.Core/Models/PlanModels.cs | 11 + src/PlanViewer.Core/Output/AnalysisResult.cs | 6 + src/PlanViewer.Core/Output/HtmlExporter.cs | 11 +- src/PlanViewer.Core/Output/ResultMapper.cs | 8 +- src/PlanViewer.Core/Output/TextFormatter.cs | 39 +- src/PlanViewer.Core/Services/BenefitScorer.cs | 379 ++++++++++++++++++ src/PlanViewer.Web/Pages/Index.razor | 1 + src/PlanViewer.Web/PlanViewer.Web.csproj | 1 + tests/PlanViewer.Core.Tests/PlanTestHelper.cs | 2 + 14 files changed, 475 insertions(+), 25 deletions(-) create mode 100644 src/PlanViewer.Core/Services/BenefitScorer.cs diff --git a/src/PlanViewer.App/Controls/PlanViewerControl.axaml.cs b/src/PlanViewer.App/Controls/PlanViewerControl.axaml.cs index 0f5872e..fc8f3a9 100644 --- a/src/PlanViewer.App/Controls/PlanViewerControl.axaml.cs +++ b/src/PlanViewer.App/Controls/PlanViewerControl.axaml.cs @@ -275,6 +275,7 @@ public void LoadPlan(string planXml, string label, string? queryText = null) _currentPlan = ShowPlanParser.Parse(planXml); PlanAnalyzer.Analyze(_currentPlan, ConfigLoader.Load()); + BenefitScorer.Score(_currentPlan); var allStatements = _currentPlan.Batches .SelectMany(b => b.Statements) @@ -1725,14 +1726,21 @@ private void ShowPropertiesPanel(PlanNode node) if (s.PlanWarnings.Count > 0) { var planWarningsPanel = new StackPanel(); - foreach (var w in s.PlanWarnings) + var sortedPlanWarnings = s.PlanWarnings + .OrderByDescending(w => w.MaxBenefitPercent ?? -1) + .ThenByDescending(w => w.Severity) + .ThenBy(w => w.WarningType); + foreach (var w in sortedPlanWarnings) { var warnColor = w.Severity == PlanWarningSeverity.Critical ? "#E57373" : w.Severity == PlanWarningSeverity.Warning ? "#FFB347" : "#6BB5FF"; var warnPanel = new StackPanel { Margin = new Thickness(10, 2, 10, 2) }; + var planWarnHeader = w.MaxBenefitPercent.HasValue + ? $"\u26A0 {w.WarningType} \u2014 up to {w.MaxBenefitPercent:N0}% benefit" + : $"\u26A0 {w.WarningType}"; warnPanel.Children.Add(new TextBlock { - Text = $"\u26A0 {w.WarningType}", + Text = planWarnHeader, FontWeight = FontWeight.SemiBold, FontSize = 11, Foreground = new SolidColorBrush(Color.Parse(warnColor)) @@ -1788,14 +1796,21 @@ private void ShowPropertiesPanel(PlanNode node) if (node.HasWarnings) { var warningsPanel = new StackPanel(); - foreach (var w in node.Warnings) + var sortedNodeWarnings = node.Warnings + .OrderByDescending(w => w.MaxBenefitPercent ?? -1) + .ThenByDescending(w => w.Severity) + .ThenBy(w => w.WarningType); + foreach (var w in sortedNodeWarnings) { var warnColor = w.Severity == PlanWarningSeverity.Critical ? "#E57373" : w.Severity == PlanWarningSeverity.Warning ? "#FFB347" : "#6BB5FF"; var warnPanel = new StackPanel { Margin = new Thickness(10, 2, 10, 2) }; + var nodeWarnHeader = w.MaxBenefitPercent.HasValue + ? $"\u26A0 {w.WarningType} \u2014 up to {w.MaxBenefitPercent:N0}% benefit" + : $"\u26A0 {w.WarningType}"; warnPanel.Children.Add(new TextBlock { - Text = $"\u26A0 {w.WarningType}", + Text = nodeWarnHeader, FontWeight = FontWeight.SemiBold, FontSize = 11, Foreground = new SolidColorBrush(Color.Parse(warnColor)) @@ -2140,18 +2155,21 @@ private object BuildNodeTooltipContent(PlanNode node, List? allWarn if (allWarnings != null) { - // Root node: show distinct warning type names only + // Root node: show distinct warning type names only, sorted by max benefit var distinct = warnings .GroupBy(w => w.WarningType) - .Select(g => (Type: g.Key, MaxSeverity: g.Max(w => w.Severity), Count: g.Count())) - .OrderByDescending(g => g.MaxSeverity) + .Select(g => (Type: g.Key, MaxSeverity: g.Max(w => w.Severity), Count: g.Count(), + MaxBenefit: g.Max(w => w.MaxBenefitPercent ?? -1))) + .OrderByDescending(g => g.MaxBenefit) + .ThenByDescending(g => g.MaxSeverity) .ThenBy(g => g.Type); - foreach (var (type, severity, count) in distinct) + foreach (var (type, severity, count, maxBenefit) in distinct) { var warnColor = severity == PlanWarningSeverity.Critical ? "#E57373" : severity == PlanWarningSeverity.Warning ? "#FFB347" : "#6BB5FF"; - var label = count > 1 ? $"\u26A0 {type} ({count})" : $"\u26A0 {type}"; + var benefitSuffix = maxBenefit >= 0 ? $" \u2014 up to {maxBenefit:N0}%" : ""; + var label = count > 1 ? $"\u26A0 {type} ({count}){benefitSuffix}" : $"\u26A0 {type}{benefitSuffix}"; stack.Children.Add(new TextBlock { Text = label, diff --git a/src/PlanViewer.App/Mcp/McpQueryStoreTools.cs b/src/PlanViewer.App/Mcp/McpQueryStoreTools.cs index 22919e9..e23d949 100644 --- a/src/PlanViewer.App/Mcp/McpQueryStoreTools.cs +++ b/src/PlanViewer.App/Mcp/McpQueryStoreTools.cs @@ -124,6 +124,7 @@ public static async Task GetQueryStoreTop( .Replace("encoding=\"utf-16\"", "encoding=\"utf-8\""); var parsed = ShowPlanParser.Parse(xml); PlanAnalyzer.Analyze(parsed); + BenefitScorer.Score(parsed); var allStatements = parsed.Batches.SelectMany(b => b.Statements).ToList(); diff --git a/src/PlanViewer.App/PlanViewer.App.csproj b/src/PlanViewer.App/PlanViewer.App.csproj index f59bb64..51b3a7a 100644 --- a/src/PlanViewer.App/PlanViewer.App.csproj +++ b/src/PlanViewer.App/PlanViewer.App.csproj @@ -6,7 +6,7 @@ app.manifest EDD.ico true - 1.4.3 + 1.5.0 Erik Darling Darling Data LLC Performance Studio diff --git a/src/PlanViewer.Cli/Commands/AnalyzeCommand.cs b/src/PlanViewer.Cli/Commands/AnalyzeCommand.cs index c522f9d..a1f2ae3 100644 --- a/src/PlanViewer.Cli/Commands/AnalyzeCommand.cs +++ b/src/PlanViewer.Cli/Commands/AnalyzeCommand.cs @@ -207,6 +207,7 @@ private static async Task RunAsync(FileInfo? file, bool stdin, string output, bo var plan = ShowPlanParser.Parse(planXml); PlanAnalyzer.Analyze(plan, analyzerConfig); + BenefitScorer.Score(plan); if (plan.Batches.Count == 0) { @@ -400,6 +401,7 @@ private static async Task RunLiveAsync( // Parse, analyze, map result var plan = ShowPlanParser.Parse(planXml); PlanAnalyzer.Analyze(plan, analyzerConfig); + BenefitScorer.Score(plan); var result = ResultMapper.Map(plan, $"{name}.sql"); if (warningsOnly) diff --git a/src/PlanViewer.Cli/Commands/QueryStoreCommand.cs b/src/PlanViewer.Cli/Commands/QueryStoreCommand.cs index 18ff36b..d805a05 100644 --- a/src/PlanViewer.Cli/Commands/QueryStoreCommand.cs +++ b/src/PlanViewer.Cli/Commands/QueryStoreCommand.cs @@ -273,6 +273,7 @@ private static async Task RunAsync( // Parse, analyze, map var plan = ShowPlanParser.Parse(qsPlan.PlanXml); PlanAnalyzer.Analyze(plan, analyzerConfig); + BenefitScorer.Score(plan); var result = ResultMapper.Map(plan, $"{label}.sqlplan"); if (warningsOnly) diff --git a/src/PlanViewer.Core/Models/PlanModels.cs b/src/PlanViewer.Core/Models/PlanModels.cs index ec0201b..3950ba6 100644 --- a/src/PlanViewer.Core/Models/PlanModels.cs +++ b/src/PlanViewer.Core/Models/PlanModels.cs @@ -373,6 +373,17 @@ public class PlanWarning public string Message { get; set; } = ""; public PlanWarningSeverity Severity { get; set; } public SpillDetail? SpillDetails { get; set; } + + /// + /// Maximum percentage of elapsed time that could be saved by addressing this finding. + /// null = not quantifiable, 0 = calculated as negligible. + /// + public double? MaxBenefitPercent { get; set; } + + /// + /// Short actionable fix suggestion (e.g., "Add INCLUDE (columns) to index"). + /// + public string? ActionableFix { get; set; } } public enum PlanWarningSeverity { Info, Warning, Critical } diff --git a/src/PlanViewer.Core/Output/AnalysisResult.cs b/src/PlanViewer.Core/Output/AnalysisResult.cs index 0a0a5af..60168a1 100644 --- a/src/PlanViewer.Core/Output/AnalysisResult.cs +++ b/src/PlanViewer.Core/Output/AnalysisResult.cs @@ -208,6 +208,12 @@ public class WarningResult [JsonPropertyName("node_id")] public int? NodeId { get; set; } + + [JsonPropertyName("max_benefit_percent")] + public double? MaxBenefitPercent { get; set; } + + [JsonPropertyName("actionable_fix")] + public string? ActionableFix { get; set; } } public class MissingIndexResult diff --git a/src/PlanViewer.Core/Output/HtmlExporter.cs b/src/PlanViewer.Core/Output/HtmlExporter.cs index d633038..4757268 100644 --- a/src/PlanViewer.Core/Output/HtmlExporter.cs +++ b/src/PlanViewer.Core/Output/HtmlExporter.cs @@ -185,6 +185,7 @@ .card h3 { .sev-info { color: var(--info); } .warn-op { font-size: 0.75rem; font-weight: 500; color: var(--text-secondary); } .warn-type { font-size: 0.75rem; font-weight: 600; } +.warn-benefit { font-size: 0.7rem; font-weight: 600; color: var(--text-muted); padding: 0.05rem 0.3rem; border-radius: 3px; background: rgba(0,0,0,0.04); } .warn-msg { font-size: 0.8rem; color: var(--text); flex-basis: 100%; } /* Query text */ @@ -428,7 +429,13 @@ private static void WriteWarnings(StringBuilder sb, StatementResult stmt) if (infoCount > 0) sb.Append($" {infoCount}"); sb.AppendLine(""); - foreach (var w in allWarnings) + // Sort by benefit descending (nulls last), then severity, then type + var sorted = allWarnings + .OrderByDescending(w => w.MaxBenefitPercent ?? -1) + .ThenBy(w => w.Severity switch { "Critical" => 0, "Warning" => 1, _ => 2 }) + .ThenBy(w => w.Type); + + foreach (var w in sorted) { var sevLower = w.Severity.ToLower(); sb.AppendLine($"
"); @@ -436,6 +443,8 @@ private static void WriteWarnings(StringBuilder sb, StatementResult stmt) if (w.Operator != null) sb.AppendLine($"{Encode(w.Operator)}"); sb.AppendLine($"{Encode(w.Type)}"); + if (w.MaxBenefitPercent.HasValue) + sb.AppendLine($"up to {w.MaxBenefitPercent:N0}% benefit"); sb.AppendLine($"{Encode(w.Message)}"); sb.AppendLine("
"); } diff --git a/src/PlanViewer.Core/Output/ResultMapper.cs b/src/PlanViewer.Core/Output/ResultMapper.cs index ce45ab6..d61d44e 100644 --- a/src/PlanViewer.Core/Output/ResultMapper.cs +++ b/src/PlanViewer.Core/Output/ResultMapper.cs @@ -158,7 +158,9 @@ private static StatementResult MapStatement(PlanStatement stmt) { Type = w.WarningType, Severity = w.Severity.ToString(), - Message = w.Message + Message = w.Message, + MaxBenefitPercent = w.MaxBenefitPercent, + ActionableFix = w.ActionableFix }); } @@ -259,7 +261,9 @@ private static OperatorResult MapNode(PlanNode node) Severity = w.Severity.ToString(), Message = w.Message, Operator = $"{node.PhysicalOp} (Node {node.NodeId})", - NodeId = node.NodeId + NodeId = node.NodeId, + MaxBenefitPercent = w.MaxBenefitPercent, + ActionableFix = w.ActionableFix }); } diff --git a/src/PlanViewer.Core/Output/TextFormatter.cs b/src/PlanViewer.Core/Output/TextFormatter.cs index 04ccc80..02f77a5 100644 --- a/src/PlanViewer.Core/Output/TextFormatter.cs +++ b/src/PlanViewer.Core/Output/TextFormatter.cs @@ -151,12 +151,17 @@ public static void WriteText(AnalysisResult result, TextWriter writer) writer.WriteLine("Plan warnings:"); var hasDetailedMemoryGrant = stmt.Warnings.Any(w => w.Type == "Excessive Memory Grant" || w.Type == "Large Memory Grant"); - foreach (var w in stmt.Warnings) + var sortedWarnings = stmt.Warnings + .Where(w => !(w.Type == "Memory Grant" && hasDetailedMemoryGrant)) + .OrderByDescending(w => w.MaxBenefitPercent ?? -1) + .ThenBy(w => w.Severity switch { "Critical" => 0, "Warning" => 1, _ => 2 }) + .ThenBy(w => w.Type); + foreach (var w in sortedWarnings) { - // Skip raw XML "Memory Grant" when analyzer provides better context - if (w.Type == "Memory Grant" && hasDetailedMemoryGrant) - continue; - writer.WriteLine($" [{w.Severity}] {w.Type}: {EscapeNewlines(w.Message)}"); + var benefitTag = w.MaxBenefitPercent.HasValue + ? $" (up to {w.MaxBenefitPercent:N0}% benefit)" + : ""; + writer.WriteLine($" [{w.Severity}] {w.Type}{benefitTag}: {EscapeNewlines(w.Message)}"); } } @@ -272,10 +277,17 @@ private static void CollectNodeWarnings(OperatorResult node, List private static void WriteGroupedOperatorWarnings(List warnings, TextWriter writer) { + // Sort by benefit descending (nulls last), then severity, then type + var sorted = warnings + .OrderByDescending(w => w.MaxBenefitPercent ?? -1) + .ThenBy(w => w.Severity switch { "Critical" => 0, "Warning" => 1, _ => 2 }) + .ThenBy(w => w.Type) + .ToList(); + // Split each message into "data | explanation" at the last sentence boundary // that starts with "The " (the harm assessment). Group by shared explanation. - var entries = new List<(string Severity, string Operator, string Data, string? Explanation)>(); - foreach (var w in warnings) + var entries = new List<(string Severity, string Operator, string Data, string? Explanation, double? Benefit)>(); + foreach (var w in sorted) { var msg = w.Message; string data; @@ -293,14 +305,13 @@ private static void WriteGroupedOperatorWarnings(List warnings, T data = msg; } - entries.Add((w.Severity, w.Operator ?? "?", data, explanation)); + entries.Add((w.Severity, w.Operator ?? "?", data, explanation, w.MaxBenefitPercent)); } // Group entries that share the same severity, type, and explanation - // Sort criticals before warnings before info + // Preserve the pre-sorted order (benefit desc, severity, type) var grouped = entries .GroupBy(e => (e.Severity, e.Explanation ?? "")) - .OrderBy(g => g.Key.Item1 switch { "Critical" => 0, "Warning" => 1, _ => 2 }) .ToList(); foreach (var group in grouped) @@ -310,7 +321,10 @@ private static void WriteGroupedOperatorWarnings(List warnings, T { // Multiple operators with the same explanation — list compactly foreach (var item in items) - writer.WriteLine($" [{item.Severity}] {item.Operator}: {EscapeNewlines(item.Data)}"); + { + var benefitTag = item.Benefit.HasValue ? $" (up to {item.Benefit:N0}% benefit)" : ""; + writer.WriteLine($" [{item.Severity}] {item.Operator}{benefitTag}: {EscapeNewlines(item.Data)}"); + } writer.WriteLine($" -> {group.Key.Item2}"); } else @@ -319,7 +333,8 @@ private static void WriteGroupedOperatorWarnings(List warnings, T foreach (var item in items) { var full = item.Explanation != null ? $"{item.Data}. {item.Explanation}" : item.Data; - writer.WriteLine($" [{item.Severity}] {item.Operator}: {EscapeNewlines(full)}"); + var benefitTag = item.Benefit.HasValue ? $" (up to {item.Benefit:N0}% benefit)" : ""; + writer.WriteLine($" [{item.Severity}] {item.Operator}{benefitTag}: {EscapeNewlines(full)}"); } } } diff --git a/src/PlanViewer.Core/Services/BenefitScorer.cs b/src/PlanViewer.Core/Services/BenefitScorer.cs new file mode 100644 index 0000000..7508294 --- /dev/null +++ b/src/PlanViewer.Core/Services/BenefitScorer.cs @@ -0,0 +1,379 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using PlanViewer.Core.Models; + +namespace PlanViewer.Core.Services; + +/// +/// Second-pass analysis that calculates MaxBenefitPercent for each PlanWarning. +/// Runs after PlanAnalyzer.Analyze() — the analyzer creates findings, the scorer quantifies them. +/// Benefit = maximum % of elapsed time that could be saved by addressing the finding. +/// +public static class BenefitScorer +{ + // Warning types that map to specific scoring strategies + private static readonly HashSet OperatorTimeRules = new(StringComparer.OrdinalIgnoreCase) + { + "Filter Operator", // Rule 1 + "Eager Index Spool", // Rule 2 + "Spill", // Rule 7 + "Key Lookup", // Rule 10 + "RID Lookup", // Rule 10 variant + "Scan With Predicate", // Rule 11 + "Non-SARGable Predicate", // Rule 12 + "Scan Cardinality Misestimate", // Rule 32 + }; + + public static void Score(ParsedPlan plan) + { + foreach (var batch in plan.Batches) + { + foreach (var stmt in batch.Statements) + { + ScoreStatementWarnings(stmt); + + if (stmt.RootNode != null) + ScoreNodeTree(stmt.RootNode, stmt); + } + } + } + + private static void ScoreStatementWarnings(PlanStatement stmt) + { + var elapsedMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + foreach (var warning in stmt.PlanWarnings) + { + switch (warning.WarningType) + { + case "Ineffective Parallelism": // Rule 25 + case "Parallel Wait Bottleneck": // Rule 31 + // These are meta-findings about parallelism efficiency. + // The benefit is the gap between actual and ideal elapsed time. + if (elapsedMs > 0 && stmt.QueryTimeStats != null) + { + var cpu = stmt.QueryTimeStats.CpuTimeMs; + var dop = stmt.DegreeOfParallelism; + if (dop > 1 && cpu > 0) + { + // Ideal elapsed = CPU / DOP. Benefit = (actual - ideal) / actual + var idealElapsed = (double)cpu / dop; + var benefit = Math.Max(0, (elapsedMs - idealElapsed) / elapsedMs * 100); + warning.MaxBenefitPercent = Math.Min(100, Math.Round(benefit, 1)); + } + } + break; + + case "Serial Plan": // Rule 3 + // Can't know how fast a parallel plan would be, but estimate: + // CPU-bound: benefit up to (1 - 1/maxDOP) * 100% + if (elapsedMs > 0 && stmt.QueryTimeStats != null) + { + var cpu = stmt.QueryTimeStats.CpuTimeMs; + // Assume server max DOP — use a conservative 4 if unknown + var potentialDop = 4; + if (cpu >= elapsedMs) + { + // CPU-bound: parallelism could help significantly + var benefit = (1.0 - 1.0 / potentialDop) * 100; + warning.MaxBenefitPercent = Math.Round(benefit, 1); + } + else + { + // Not CPU-bound: parallelism helps less + var cpuRatio = (double)cpu / elapsedMs; + var benefit = cpuRatio * (1.0 - 1.0 / potentialDop) * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(50, benefit), 1); + } + } + break; + + case "Memory Grant": // Rule 9 + // Grant wait is the only part that affects this query's elapsed time + if (elapsedMs > 0 && stmt.MemoryGrant?.GrantWaitTimeMs > 0) + { + var benefit = (double)stmt.MemoryGrant.GrantWaitTimeMs / elapsedMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + break; + + case "High Compile CPU": // Rule 19 + if (elapsedMs > 0 && stmt.CompileCPUMs > 0) + { + var benefit = (double)stmt.CompileCPUMs / elapsedMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + break; + + // Rules that cannot be quantified: leave MaxBenefitPercent as null + // Rule 18 (Compile Memory Exceeded), Rule 20 (Local Variables), + // Rule 27 (Optimize For Unknown) + } + } + } + + private static void ScoreNodeTree(PlanNode node, PlanStatement stmt) + { + ScoreNodeWarnings(node, stmt); + + foreach (var child in node.Children) + ScoreNodeTree(child, stmt); + } + + private static void ScoreNodeWarnings(PlanNode node, PlanStatement stmt) + { + var elapsedMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + foreach (var warning in node.Warnings) + { + // Already scored (e.g., by a different pass) + if (warning.MaxBenefitPercent != null) + continue; + + if (warning.WarningType == "UDF Execution") // Rule 4 + { + ScoreUdfWarning(warning, node, elapsedMs); + } + else if (warning.WarningType == "Filter Operator") // Rule 1 + { + ScoreFilterWarning(warning, node, stmt); + } + else if (warning.WarningType == "Nested Loops High Executions") // Rule 16 + { + ScoreNestedLoopsWarning(warning, node, stmt); + } + else if (warning.SpillDetails != null) // Rule 7 + { + ScoreSpillWarning(warning, node, stmt); + } + else if (OperatorTimeRules.Contains(warning.WarningType)) + { + ScoreByOperatorTime(warning, node, stmt); + } + else if (warning.WarningType == "Row Estimate Mismatch") // Rule 5 + { + ScoreEstimateMismatchWarning(warning, node, stmt); + } + // Rules that stay null: Scalar UDF (Rule 6, informational reference), + // Parallel Skew (Rule 8), Data Type Mismatch (Rule 13), + // Lazy Spool Ineffective (Rule 14), Join OR Clause (Rule 15), + // Many-to-Many Merge Join (Rule 17), CTE Multiple References (Rule 21), + // Table Variable (Rule 22), Table-Valued Function (Rule 23), + // Top Above Scan (Rule 24), Row Goal (Rule 26), + // NOT IN with Nullable Column (Rule 28), Implicit Conversion (Rule 29), + // Wide Index Suggestion (Rule 30), Estimated Plan CE Guess (Rule 33) + } + } + + /// + /// Rule 4: UDF Execution — benefit is UDF elapsed time / statement elapsed. + /// + private static void ScoreUdfWarning(PlanWarning warning, PlanNode node, long stmtElapsedMs) + { + if (stmtElapsedMs > 0 && node.UdfElapsedTimeMs > 0) + { + var benefit = (double)node.UdfElapsedTimeMs / stmtElapsedMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 1: Filter Operator — benefit is child subtree elapsed / statement elapsed. + /// The filter discards rows late; eliminating it means the child subtree work was unnecessary. + /// + private static void ScoreFilterWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.HasActualStats && stmtMs > 0 && node.Children.Count > 0) + { + var childElapsed = node.Children.Max(c => c.ActualElapsedMs); + var benefit = (double)childElapsed / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else if (!node.HasActualStats && stmt.StatementSubTreeCost > 0 && node.Children.Count > 0) + { + // Estimated plan fallback: child subtree cost / statement cost + var childCost = node.Children.Sum(c => c.EstimatedTotalSubtreeCost); + var benefit = childCost / stmt.StatementSubTreeCost * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 16: Nested Loops High Executions — benefit is inner-side elapsed / statement elapsed. + /// + private static void ScoreNestedLoopsWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.Children.Count >= 2) + { + var innerChild = node.Children[1]; + + if (innerChild.HasActualStats && stmtMs > 0 && innerChild.ActualElapsedMs > 0) + { + var benefit = (double)innerChild.ActualElapsedMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else if (!innerChild.HasActualStats && stmt.StatementSubTreeCost > 0) + { + var benefit = innerChild.EstimatedTotalSubtreeCost / stmt.StatementSubTreeCost * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + } + + /// + /// Rule 7: Spill — benefit is the spilling operator's self-time / statement elapsed. + /// Exchange spills use the parallelism operator time (unreliable but best we have). + /// + private static void ScoreSpillWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + if (stmtMs <= 0) return; + + long operatorMs; + if (warning.SpillDetails?.SpillType == "Exchange") + operatorMs = GetParallelismOperatorElapsedMs(node); + else + operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Generic operator-time scoring for rules where the fix would eliminate or + /// drastically reduce the operator's work: Key Lookup, RID Lookup, + /// Scan With Predicate, Non-SARGable Predicate, Eager Index Spool, + /// Scan Cardinality Misestimate. + /// + private static void ScoreByOperatorTime(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.HasActualStats && stmtMs > 0) + { + var operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else + { + // Operator self-time is 0 — negligible benefit + warning.MaxBenefitPercent = 0; + } + } + else if (!node.HasActualStats && stmt.StatementSubTreeCost > 0) + { + // Estimated plan fallback: use operator cost percentage + var benefit = (double)node.CostPercent; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 5: Row Estimate Mismatch — benefit is the harmed operator's time. + /// If the mismatch caused a spill, benefit = spilling operator time. + /// If it caused a bad join choice, benefit = join operator time. + /// Otherwise, benefit is the misestimated operator's own time (conservative). + /// + private static void ScoreEstimateMismatchWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + if (stmtMs <= 0 || !node.HasActualStats) return; + + // Walk up to find the harmed operator (same logic as AssessEstimateHarm) + var harmedNode = FindHarmedOperator(node); + if (harmedNode != null) + { + var operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(harmedNode); + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + return; + } + } + + // Fallback: use the misestimated node's own time + var ownMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + if (ownMs > 0) + { + var benefit = (double)ownMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Walks up from a node with a bad row estimate to find the operator that was + /// harmed by it (spilling sort/hash, or join that chose the wrong strategy). + /// Returns null if no specific harm can be attributed. + /// + private static PlanNode? FindHarmedOperator(PlanNode node) + { + // The node itself has a spill — it harmed itself + if (node.Warnings.Any(w => w.SpillDetails != null)) + return node; + + // Walk up through transparent operators + var ancestor = node.Parent; + while (ancestor != null) + { + if (ancestor.PhysicalOp == "Parallelism" || + ancestor.PhysicalOp == "Compute Scalar" || + ancestor.PhysicalOp == "Segment" || + ancestor.PhysicalOp == "Sequence Project" || + ancestor.PhysicalOp == "Top" || + ancestor.PhysicalOp == "Filter") + { + ancestor = ancestor.Parent; + continue; + } + + // Parent join — bad row count from below caused wrong join choice + if (ancestor.LogicalOp.Contains("Join", StringComparison.OrdinalIgnoreCase)) + { + if (ancestor.IsAdaptive) + return null; // Adaptive join self-corrects + return ancestor; + } + + // Parent Sort/Hash that spilled + if (ancestor.Warnings.Any(w => w.SpillDetails != null)) + return ancestor; + + // Parent Sort/Hash with no spill — benign + if (ancestor.PhysicalOp.Contains("Sort", StringComparison.OrdinalIgnoreCase) || + ancestor.PhysicalOp.Contains("Hash", StringComparison.OrdinalIgnoreCase)) + return null; + + break; + } + + return null; + } + + /// + /// Calculates a Parallelism (exchange) operator's own elapsed time. + /// Mirrors PlanAnalyzer.GetParallelismOperatorElapsedMs but accessible here. + /// + private static long GetParallelismOperatorElapsedMs(PlanNode node) + { + if (node.Children.Count == 0) + return node.ActualElapsedMs; + + if (node.PerThreadStats.Count > 1) + return PlanAnalyzer.GetOperatorOwnElapsedMs(node); + + var maxChildElapsed = node.Children.Max(c => c.ActualElapsedMs); + return Math.Max(0, node.ActualElapsedMs - maxChildElapsed); + } +} diff --git a/src/PlanViewer.Web/Pages/Index.razor b/src/PlanViewer.Web/Pages/Index.razor index 3cd70ae..669efb5 100644 --- a/src/PlanViewer.Web/Pages/Index.razor +++ b/src/PlanViewer.Web/Pages/Index.razor @@ -1713,6 +1713,7 @@ else } PlanAnalyzer.Analyze(parsedPlan); + BenefitScorer.Score(parsedPlan); foreach (var batch in parsedPlan.Batches) { diff --git a/src/PlanViewer.Web/PlanViewer.Web.csproj b/src/PlanViewer.Web/PlanViewer.Web.csproj index 0ae249b..97b159f 100644 --- a/src/PlanViewer.Web/PlanViewer.Web.csproj +++ b/src/PlanViewer.Web/PlanViewer.Web.csproj @@ -25,6 +25,7 @@ + diff --git a/tests/PlanViewer.Core.Tests/PlanTestHelper.cs b/tests/PlanViewer.Core.Tests/PlanTestHelper.cs index 1e5e85a..44e6fe5 100644 --- a/tests/PlanViewer.Core.Tests/PlanTestHelper.cs +++ b/tests/PlanViewer.Core.Tests/PlanTestHelper.cs @@ -24,6 +24,7 @@ public static ParsedPlan LoadAndAnalyze(string planFileName) xml = xml.Replace("encoding=\"utf-16\"", "encoding=\"utf-8\""); var plan = ShowPlanParser.Parse(xml); PlanAnalyzer.Analyze(plan); + BenefitScorer.Score(plan); return plan; } @@ -99,6 +100,7 @@ public static PlanStatement FirstStatement(ParsedPlan plan) xml = xml.Replace("encoding=\"utf-16\"", "encoding=\"utf-8\""); var plan = ShowPlanParser.Parse(xml); PlanAnalyzer.Analyze(plan); + BenefitScorer.Score(plan); return plan; }