From 6a6993056edf9badc88abf766fc19f4a73f87106 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Fri, 27 Feb 2026 05:47:27 -0500 Subject: [PATCH] Rule 7: Replace arbitrary spill write-count threshold with time-based analysis Spill severity is now based on what percentage of statement elapsed time the spilling operator accounts for (Warning >= 10%, Critical >= 50%). Uses QueryTimeStats.ElapsedTimeMs for statement time and calculates operator-own time by subtracting child elapsed (row mode) or using elapsed directly (batch mode). Skips Parallelism exchange operators when subtracting child time due to known timing bugs. Co-Authored-By: Claude Opus 4.6 --- Dashboard/Services/PlanAnalyzer.cs | 49 ++++++++++++++++++++++++++++-- Lite/Services/PlanAnalyzer.cs | 49 ++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/Dashboard/Services/PlanAnalyzer.cs b/Dashboard/Services/PlanAnalyzer.cs index 2a32df34..6c3bdc23 100644 --- a/Dashboard/Services/PlanAnalyzer.cs +++ b/Dashboard/Services/PlanAnalyzer.cs @@ -272,11 +272,26 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt) }); } - // Rule 7: Spill detection — promote severity for large spills + // Rule 7: Spill detection — calculate operator time and set severity + // based on what percentage of statement elapsed time the spill accounts for foreach (var w in node.Warnings.ToList()) { - if (w.SpillDetails != null && w.SpillDetails.WritesToTempDb > 1000) - w.Severity = PlanWarningSeverity.Critical; + if (w.SpillDetails != null && node.ActualElapsedMs > 0) + { + var operatorMs = GetOperatorOwnElapsedMs(node); + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (stmtMs > 0) + { + var pct = (double)operatorMs / stmtMs; + w.Message += $" Operator time: {operatorMs:N0}ms ({pct:P0} of statement)."; + + if (pct >= 0.5) + w.Severity = PlanWarningSeverity.Critical; + else if (pct >= 0.1) + w.Severity = PlanWarningSeverity.Warning; + } + } } // Rule 8: Parallel thread skew (actual plans with per-thread stats) @@ -626,6 +641,34 @@ private static void FindMemoryConsumers(PlanNode node, List consumers) FindMemoryConsumers(child, consumers); } + /// + /// Calculates an operator's own elapsed time by subtracting child time. + /// In batch mode, operator times are self-contained. In row mode, times are + /// cumulative (include children), so we subtract the dominant child's time. + /// Parallelism (exchange) operators are skipped because they have timing bugs. + /// + private static long GetOperatorOwnElapsedMs(PlanNode node) + { + if (node.ActualExecutionMode == "Batch") + return node.ActualElapsedMs; + + // Row mode: subtract the dominant child's elapsed time + var maxChildElapsed = 0L; + foreach (var child in node.Children) + { + var childElapsed = child.ActualElapsedMs; + + // Exchange operators have timing bugs — skip to their child + if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0) + childElapsed = child.Children.Max(c => c.ActualElapsedMs); + + if (childElapsed > maxChildElapsed) + maxChildElapsed = childElapsed; + } + + return Math.Max(0, node.ActualElapsedMs - maxChildElapsed); + } + private static string Truncate(string value, int maxLength) { return value.Length <= maxLength ? value : value[..maxLength] + "..."; diff --git a/Lite/Services/PlanAnalyzer.cs b/Lite/Services/PlanAnalyzer.cs index 4fa41a5d..9db6f722 100644 --- a/Lite/Services/PlanAnalyzer.cs +++ b/Lite/Services/PlanAnalyzer.cs @@ -272,11 +272,26 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt) }); } - // Rule 7: Spill detection — promote severity for large spills + // Rule 7: Spill detection — calculate operator time and set severity + // based on what percentage of statement elapsed time the spill accounts for foreach (var w in node.Warnings.ToList()) { - if (w.SpillDetails != null && w.SpillDetails.WritesToTempDb > 1000) - w.Severity = PlanWarningSeverity.Critical; + if (w.SpillDetails != null && node.ActualElapsedMs > 0) + { + var operatorMs = GetOperatorOwnElapsedMs(node); + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (stmtMs > 0) + { + var pct = (double)operatorMs / stmtMs; + w.Message += $" Operator time: {operatorMs:N0}ms ({pct:P0} of statement)."; + + if (pct >= 0.5) + w.Severity = PlanWarningSeverity.Critical; + else if (pct >= 0.1) + w.Severity = PlanWarningSeverity.Warning; + } + } } // Rule 8: Parallel thread skew (actual plans with per-thread stats) @@ -626,6 +641,34 @@ private static void FindMemoryConsumers(PlanNode node, List consumers) FindMemoryConsumers(child, consumers); } + /// + /// Calculates an operator's own elapsed time by subtracting child time. + /// In batch mode, operator times are self-contained. In row mode, times are + /// cumulative (include children), so we subtract the dominant child's time. + /// Parallelism (exchange) operators are skipped because they have timing bugs. + /// + private static long GetOperatorOwnElapsedMs(PlanNode node) + { + if (node.ActualExecutionMode == "Batch") + return node.ActualElapsedMs; + + // Row mode: subtract the dominant child's elapsed time + var maxChildElapsed = 0L; + foreach (var child in node.Children) + { + var childElapsed = child.ActualElapsedMs; + + // Exchange operators have timing bugs — skip to their child + if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0) + childElapsed = child.Children.Max(c => c.ActualElapsedMs); + + if (childElapsed > maxChildElapsed) + maxChildElapsed = childElapsed; + } + + return Math.Max(0, node.ActualElapsedMs - maxChildElapsed); + } + private static string Truncate(string value, int maxLength) { return value.Length <= maxLength ? value : value[..maxLength] + "...";