From c65d86c8a453f7145824ee675aa4ee4c45448ea7 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:56:42 -0400 Subject: [PATCH] Strip operator-level wait context, add query classification to triage card MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove GetTopWaitContext calls from Rules 1, 5, 11, and 16 — statement-level waits can't be attributed to individual operators, so appending the dominant wait type to every warning was noise rather than analysis. Keep Rule 11 I/O severity elevation (scan + PAGEIOLATCH at 50%+ cost → Critical) since a dominant scan with I/O waits is a defensible correlation. Add wait profile classification to the triage summary card — categorizes queries as "I/O bound", "CPU bound", "parallelism bound", or "lock contention bound" based on wait category percentages (>= 30% threshold). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Services/AdviceContentBuilder.cs | 35 +++ src/PlanViewer.Core/Services/PlanAnalyzer.cs | 53 +--- .../PlanAnalyzerTests.cs | 286 +----------------- 3 files changed, 39 insertions(+), 335 deletions(-) diff --git a/src/PlanViewer.App/Services/AdviceContentBuilder.cs b/src/PlanViewer.App/Services/AdviceContentBuilder.cs index 525fb9d..875644b 100644 --- a/src/PlanViewer.App/Services/AdviceContentBuilder.cs +++ b/src/PlanViewer.App/Services/AdviceContentBuilder.cs @@ -1126,6 +1126,41 @@ private static SolidColorBrush GetWaitCategoryBrush(string waitType) items.Add(($"Memory grant: {grantedMB:F1} MB ({usedPct:F0}% used)", memBrush)); } + // Wait profile classification + if (stmt.WaitStats.Count > 0) + { + var totalMs = stmt.WaitStats.Sum(w => w.WaitTimeMs); + if (totalMs > 0) + { + long ioMs = 0, cpuMs = 0, parallelMs = 0, lockMs = 0; + foreach (var w in stmt.WaitStats) + { + var wt = w.WaitType.ToUpperInvariant(); + if (wt.StartsWith("PAGEIOLATCH") || wt.Contains("IO_COMPLETION")) + ioMs += w.WaitTimeMs; + else if (wt == "SOS_SCHEDULER_YIELD") + cpuMs += w.WaitTimeMs; + else if (wt.StartsWith("CX")) + parallelMs += w.WaitTimeMs; + else if (wt.StartsWith("LCK_")) + lockMs += w.WaitTimeMs; + } + + // Pick the dominant category (>= 30% of total) + var categories = new List<(string label, long ms)>(); + if (ioMs * 100 / totalMs >= 30) categories.Add(("I/O", ioMs)); + if (cpuMs * 100 / totalMs >= 30) categories.Add(("CPU", cpuMs)); + if (parallelMs * 100 / totalMs >= 30) categories.Add(("parallelism", parallelMs)); + if (lockMs * 100 / totalMs >= 30) categories.Add(("lock contention", lockMs)); + + if (categories.Count > 0) + { + var label = string.Join(" + ", categories.Select(c => c.label)); + items.Add(($"{label} bound ({totalMs:N0}ms total wait time)", InfoBrush)); + } + } + } + // Warning counts by severity var criticalCount = stmt.Warnings.Count(w => w.Severity.Equals("Critical", StringComparison.OrdinalIgnoreCase)); diff --git a/src/PlanViewer.Core/Services/PlanAnalyzer.cs b/src/PlanViewer.Core/Services/PlanAnalyzer.cs index d656279..5e18d0a 100644 --- a/src/PlanViewer.Core/Services/PlanAnalyzer.cs +++ b/src/PlanViewer.Core/Services/PlanAnalyzer.cs @@ -548,9 +548,9 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi { // Gate: skip trivial filters based on actual stats or estimated cost bool isTrivial; - long childReads = 0; if (node.HasActualStats) { + long childReads = 0; foreach (var child in node.Children) childReads += SumSubtreeReads(child); var childElapsed = node.Children.Max(c => c.ActualElapsedMs); @@ -571,14 +571,6 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi message += $"\n{impact}"; message += $"\nPredicate: {predicate}"; - // Wait stats add context — rows burned CPU/I/O/waits just to be discarded - if (childReads >= 1000) - { - var waitContext = GetTopWaitContext(stmt.WaitStats); - if (waitContext != null) - message += $"\n{waitContext}"; - } - node.Warnings.Add(new PlanWarning { WarningType = "Filter Operator", @@ -655,16 +647,10 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt, AnalyzerConfi var actualDisplay = executions > 1 ? $"Actual {node.ActualRows:N0} ({actualPerExec:N0} rows x {executions:N0} executions)" : $"Actual {node.ActualRows:N0}"; - var message = $"Estimated {node.EstimateRows:N0} vs {actualDisplay} — {factor:F0}x {direction}. {harm}"; - - var waitContext = GetTopWaitContext(stmt.WaitStats); - if (waitContext != null) - message += $" {waitContext}"; - node.Warnings.Add(new PlanWarning { WarningType = "Row Estimate Mismatch", - Message = message, + Message = $"Estimated {node.EstimateRows:N0} vs {actualDisplay} — {factor:F0}x {direction}. {harm}", Severity = factor >= 100 ? PlanWarningSeverity.Critical : PlanWarningSeverity.Warning }); } @@ -848,10 +834,6 @@ _ when nonSargableReason.StartsWith("Function call") => message += $" {details.Summary}"; message += " Check that you have appropriate indexes."; - var waitContext = GetTopWaitContext(stmt.WaitStats); - if (waitContext != null) - message += $" {waitContext}"; - // I/O waits specifically confirm the scan is hitting disk — elevate if (HasSignificantIoWaits(stmt.WaitStats) && details.CostPct >= 50 && severity != PlanWarningSeverity.Critical) @@ -1047,10 +1029,6 @@ _ when nonSargableReason.StartsWith("Function call") => else details.Add("Consider whether a hash or merge join would be more appropriate for this row count."); - var waitContext = GetTopWaitContext(stmt.WaitStats); - if (waitContext != null) - details.Add(waitContext); - node.Warnings.Add(new PlanWarning { WarningType = "Nested Loops High Executions", @@ -1839,33 +1817,6 @@ private static bool HasSignificantIoWaits(List waits) return ioMs >= 100 && pct >= 20; } - /// - /// Returns a terse sentence describing the dominant wait type for appending - /// to an existing warning message, or null if waits are negligible. - /// Surfaces whatever wait type is dominant — PAGEIOLATCH, SOS_SCHEDULER_YIELD, - /// CXPACKET, LCK_*, HTBUILD, EXECSYNC, IO_COMPLETION, etc. - /// Threshold: top wait >= 100ms and >= 20% of total wait time. - /// - private static string? GetTopWaitContext(List waits) - { - if (waits.Count == 0) - return null; - - var totalMs = waits.Sum(w => w.WaitTimeMs); - if (totalMs == 0) - return null; - - var top = waits.OrderByDescending(w => w.WaitTimeMs).First(); - if (top.WaitTimeMs < 100) - return null; - - var pct = (double)top.WaitTimeMs / totalMs * 100; - if (pct < 20) - return null; - - return $"Dominant wait: {top.WaitType} ({top.WaitTimeMs:N0}ms, {pct:N0}% of total wait time)."; - } - private static bool AllocatesResources(PlanNode node) { // Operators that get memory grants or allocate structures based on row estimates. diff --git a/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs b/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs index e6f450f..5ca3373 100644 --- a/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs +++ b/tests/PlanViewer.Core.Tests/PlanAnalyzerTests.cs @@ -856,30 +856,12 @@ public void Issue178_7_FilterSuppressedOnTrivialChildIO() new WaitStatInfo { WaitType = "ASYNC_NETWORK_IO", WaitTimeMs = 100, WaitCount = 10 } }; - private static List LockWaits(long ms = 3000) => new() - { - new WaitStatInfo { WaitType = "LCK_M_S", WaitTimeMs = ms, WaitCount = 200 }, - new WaitStatInfo { WaitType = "ASYNC_NETWORK_IO", WaitTimeMs = 100, WaitCount = 10 } - }; - private static List CpuWaits(long ms = 4000) => new() { new WaitStatInfo { WaitType = "SOS_SCHEDULER_YIELD", WaitTimeMs = ms, WaitCount = 5000 }, new WaitStatInfo { WaitType = "ASYNC_NETWORK_IO", WaitTimeMs = 100, WaitCount = 10 } }; - private static List CxWaits(long ms = 3000) => new() - { - new WaitStatInfo { WaitType = "CXPACKET", WaitTimeMs = ms, WaitCount = 400 }, - new WaitStatInfo { WaitType = "SOS_SCHEDULER_YIELD", WaitTimeMs = 200, WaitCount = 100 } - }; - - private static List HashWaits(long ms = 2500) => new() - { - new WaitStatInfo { WaitType = "HTBUILD", WaitTimeMs = ms, WaitCount = 80 }, - new WaitStatInfo { WaitType = "SOS_SCHEDULER_YIELD", WaitTimeMs = 200, WaitCount = 100 } - }; - private static ParsedPlan BuildSyntheticPlan(PlanStatement stmt) { var plan = new ParsedPlan(); @@ -887,112 +869,6 @@ private static ParsedPlan BuildSyntheticPlan(PlanStatement stmt) return plan; } - [Fact] - public void WaitStats_Rule01_FilterAppendsIoContext() - { - // Filter with >1000 child reads + I/O waits → message includes wait context - var child = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 100000, ActualLogicalReads = 5000, ActualElapsedMs = 500 - }; - var filter = new PlanNode - { - NodeId = 0, PhysicalOp = "Filter", LogicalOp = "Filter", - Predicate = "[col1] > 100", - HasActualStats = true, ActualRows = 10, ActualElapsedMs = 500, - Children = { child } - }; - child.Parent = filter; - - var stmt = new PlanStatement - { - RootNode = filter, - WaitStats = IoWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Filter Operator").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); - } - - [Fact] - public void WaitStats_Rule01_FilterNoContextWithoutWaits() - { - // Same filter but no wait stats → no wait context in message - var child = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 100000, ActualLogicalReads = 5000, ActualElapsedMs = 500 - }; - var filter = new PlanNode - { - NodeId = 0, PhysicalOp = "Filter", LogicalOp = "Filter", - Predicate = "[col1] > 100", - HasActualStats = true, ActualRows = 10, ActualElapsedMs = 500, - Children = { child } - }; - child.Parent = filter; - - var stmt = new PlanStatement { RootNode = filter }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Filter Operator").ToList(); - Assert.NotEmpty(warnings); - Assert.DoesNotContain(warnings, w => w.Message.Contains("Wait stats")); - } - - [Fact] - public void WaitStats_Rule05_SpillWithIoWaitsAppendsContext() - { - // Row mismatch causing a spill + I/O waits → I/O context appended - // Sort needs a spill warning so AssessEstimateHarm returns non-null, - // and must have a non-root parent (NodeId != -1). - var sortNode = new PlanNode - { - NodeId = 2, PhysicalOp = "Sort", LogicalOp = "Sort", - HasActualStats = true, - EstimateRows = 100, ActualRows = 100000, ActualExecutions = 1, - Warnings = { new PlanWarning - { - WarningType = "Sort Spill", - Message = "Sort spilled", - Severity = PlanWarningSeverity.Warning, - SpillDetails = new SpillDetail { SpillType = "Sort", WritesToTempDb = 1000 } - }} - }; - var selectNode = new PlanNode - { - NodeId = 1, PhysicalOp = "Compute Scalar", LogicalOp = "Compute Scalar", - Children = { sortNode } - }; - sortNode.Parent = selectNode; - var root = new PlanNode - { - NodeId = -1, PhysicalOp = "SELECT", LogicalOp = "SELECT", - Children = { selectNode } - }; - selectNode.Parent = root; - - var stmt = new PlanStatement - { - RootNode = root, - WaitStats = IoWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Row Estimate Mismatch").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); - } - [Fact] public void WaitStats_Rule11_ScanWithIoWaitsElevatedToCritical() { @@ -1016,7 +892,6 @@ public void WaitStats_Rule11_ScanWithIoWaitsElevatedToCritical() .Where(w => w.WarningType == "Scan With Predicate").ToList(); Assert.NotEmpty(warnings); Assert.Contains(warnings, w => w.Severity == PlanWarningSeverity.Critical); - Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); } [Fact] @@ -1044,83 +919,9 @@ public void WaitStats_Rule11_ScanWithoutIoWaitsStaysWarning() } [Fact] - public void WaitStats_Rule16_NestedLoopsAppendsLockContext() + public void WaitStats_Rule11_CpuWaitsDoNotElevateSeverity() { - // Nested Loops with >100K inner executions + lock waits → lock context appended - var outer = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 1, EstimateRows = 500000 - }; - var inner = new PlanNode - { - NodeId = 2, PhysicalOp = "Index Seek", LogicalOp = "Index Seek", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 500000 - }; - var nl = new PlanNode - { - NodeId = 0, PhysicalOp = "Nested Loops", LogicalOp = "Inner Join", - HasActualStats = true, ActualRows = 500000, - Children = { outer, inner } - }; - outer.Parent = nl; - inner.Parent = nl; - - var stmt = new PlanStatement - { - RootNode = nl, - WaitStats = LockWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Nested Loops High Executions").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("LCK_M_S")); - } - - [Fact] - public void WaitStats_Rule16_NestedLoopsSurfacesIoWaits() - { - // Nested Loops with >100K inner executions + I/O waits → I/O context - var outer = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 1, EstimateRows = 500000 - }; - var inner = new PlanNode - { - NodeId = 2, PhysicalOp = "Index Seek", LogicalOp = "Index Seek", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 500000 - }; - var nl = new PlanNode - { - NodeId = 0, PhysicalOp = "Nested Loops", LogicalOp = "Inner Join", - HasActualStats = true, ActualRows = 500000, - Children = { outer, inner } - }; - outer.Parent = nl; - inner.Parent = nl; - - var stmt = new PlanStatement - { - RootNode = nl, - WaitStats = IoWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Nested Loops High Executions").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("PAGEIOLATCH_SH")); - } - - [Fact] - public void WaitStats_SosSchedulerYield_SurfacedOnScan() - { - // SOS_SCHEDULER_YIELD as dominant wait on a scan → surfaces in message + // CPU waits (SOS_SCHEDULER_YIELD) are not I/O — should NOT elevate at 60% cost var scan = new PlanNode { NodeId = 0, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", @@ -1139,89 +940,6 @@ public void WaitStats_SosSchedulerYield_SurfacedOnScan() var warnings = PlanTestHelper.AllNodeWarnings(stmt) .Where(w => w.WarningType == "Scan With Predicate").ToList(); Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("SOS_SCHEDULER_YIELD")); - // CPU waits are not I/O — should NOT elevate severity (stays Warning at 60% cost) Assert.All(warnings, w => Assert.Equal(PlanWarningSeverity.Warning, w.Severity)); } - - [Fact] - public void WaitStats_CxPacket_SurfacedOnNestedLoops() - { - // CXPACKET as dominant wait on Nested Loops → surfaces in message - var outer = new PlanNode - { - NodeId = 1, PhysicalOp = "Clustered Index Scan", LogicalOp = "Clustered Index Scan", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 1, EstimateRows = 500000 - }; - var inner = new PlanNode - { - NodeId = 2, PhysicalOp = "Index Seek", LogicalOp = "Index Seek", - HasActualStats = true, ActualRows = 500000, ActualExecutions = 500000 - }; - var nl = new PlanNode - { - NodeId = 0, PhysicalOp = "Nested Loops", LogicalOp = "Inner Join", - HasActualStats = true, ActualRows = 500000, - Children = { outer, inner } - }; - outer.Parent = nl; - inner.Parent = nl; - - var stmt = new PlanStatement - { - RootNode = nl, - WaitStats = CxWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Nested Loops High Executions").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("CXPACKET")); - } - - [Fact] - public void WaitStats_HtBuild_SurfacedOnEstimateMismatch() - { - // HTBUILD as dominant wait on a hash join with bad estimates → surfaces in message - var hashNode = new PlanNode - { - NodeId = 2, PhysicalOp = "Hash Match", LogicalOp = "Inner Join", - HasActualStats = true, - EstimateRows = 100, ActualRows = 100000, ActualExecutions = 1, - Warnings = { new PlanWarning - { - WarningType = "Hash Spill", - Message = "Hash spilled", - Severity = PlanWarningSeverity.Warning, - SpillDetails = new SpillDetail { SpillType = "Hash", WritesToTempDb = 500 } - }} - }; - var parent = new PlanNode - { - NodeId = 1, PhysicalOp = "Compute Scalar", LogicalOp = "Compute Scalar", - Children = { hashNode } - }; - hashNode.Parent = parent; - var root = new PlanNode - { - NodeId = -1, PhysicalOp = "SELECT", LogicalOp = "SELECT", - Children = { parent } - }; - parent.Parent = root; - - var stmt = new PlanStatement - { - RootNode = root, - WaitStats = HashWaits() - }; - var plan = BuildSyntheticPlan(stmt); - PlanAnalyzer.Analyze(plan); - - var warnings = PlanTestHelper.AllNodeWarnings(stmt) - .Where(w => w.WarningType == "Row Estimate Mismatch").ToList(); - Assert.NotEmpty(warnings); - Assert.Contains(warnings, w => w.Message.Contains("HTBUILD")); - } }