From e668b7627c2ce1843e4a4371b92fbb2f85a95098 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Thu, 16 Apr 2026 14:06:13 -0400 Subject: [PATCH] Sync PlanAnalyzer and BenefitScorer from PerformanceStudio (Apr 9-16) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port PS PRs #216, #217, #219, #224, #229, #230, #231 to PM. PlanAnalyzer changes: - Rule 5: Suppress for Key Lookups (point lookups mislead per-execution estimates) - Rule 8: Enhanced parallel skew with batch mode sort detection and practical context - Rule 9: Large memory grant shows top 3 consumers sorted by row count - Rule 10: Key lookup overhaul — show output columns, check predicate filtering, softer advice - Rules 11/12/29: Suppress on 0-execution nodes (operator never ran) - Rule 11: I/O wait severity elevation when scan hits disk - Rule 24: FormatNodeRef helper includes object name for data access operators - Rule 26: Suppress when row goal prediction was correct, specific cause detection - Wait stats: DescribeWaitType with full wait type coverage, multi-wait summary - New helpers: GetWaitLabel, HasSignificantIoWaits, IdentifyRowGoalCause, FormatNodeRef - GetOperatorOwnElapsedMs changed to internal for BenefitScorer access BenefitScorer (new file): - Stage 1: MaxBenefitPercent for operator-level rules (filter, spill, lookup, etc.) - Stage 2: Wait stats benefit scoring with parallel allocation (Joe's formula) PlanModels additions: - MaxBenefitPercent and ActionableFix on PlanWarning - WaitBenefit class and WaitBenefits list on PlanStatement Co-Authored-By: Claude Opus 4.6 (1M context) --- Dashboard/Models/PlanModels.cs | 19 + Dashboard/Services/BenefitScorer.cs | 653 ++++++++++++++++++++++++++++ Dashboard/Services/PlanAnalyzer.cs | 315 ++++++++++++-- Lite/Models/PlanModels.cs | 19 + Lite/Services/BenefitScorer.cs | 653 ++++++++++++++++++++++++++++ Lite/Services/PlanAnalyzer.cs | 318 ++++++++++++-- 6 files changed, 1900 insertions(+), 77 deletions(-) create mode 100644 Dashboard/Services/BenefitScorer.cs create mode 100644 Lite/Services/BenefitScorer.cs diff --git a/Dashboard/Models/PlanModels.cs b/Dashboard/Models/PlanModels.cs index d3c75277..d645f73b 100644 --- a/Dashboard/Models/PlanModels.cs +++ b/Dashboard/Models/PlanModels.cs @@ -61,6 +61,7 @@ public class PlanStatement public SetOptionsInfo? SetOptions { get; set; } public List Parameters { get; set; } = new(); public List WaitStats { get; set; } = new(); + public List WaitBenefits { get; set; } = new(); public QueryTimeInfo? QueryTimeStats { get; set; } // Wave 2: MaxQueryMemory + QueryPlan-level warnings @@ -370,6 +371,17 @@ public class PlanWarning public string Message { get; set; } = ""; public PlanWarningSeverity Severity { get; set; } public SpillDetail? SpillDetails { get; set; } + + /// + /// Maximum percentage of elapsed time that could be saved by addressing this finding. + /// null = not quantifiable, 0 = calculated as negligible. + /// + public double? MaxBenefitPercent { get; set; } + + /// + /// Short actionable fix suggestion (e.g., "Add INCLUDE (columns) to index"). + /// + public string? ActionableFix { get; set; } } public enum PlanWarningSeverity { Info, Warning, Critical } @@ -433,6 +445,13 @@ public class PlanParameter public string? RuntimeValue { get; set; } } +public class WaitBenefit +{ + public string WaitType { get; set; } = ""; + public double MaxBenefitPercent { get; set; } + public string Category { get; set; } = ""; +} + public class WaitStatInfo { public string WaitType { get; set; } = ""; diff --git a/Dashboard/Services/BenefitScorer.cs b/Dashboard/Services/BenefitScorer.cs new file mode 100644 index 00000000..1acf26cf --- /dev/null +++ b/Dashboard/Services/BenefitScorer.cs @@ -0,0 +1,653 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using PerformanceMonitorDashboard.Models; + +namespace PerformanceMonitorDashboard.Services; + +/// +/// Second-pass analysis that calculates MaxBenefitPercent for each PlanWarning. +/// Runs after PlanAnalyzer.Analyze() — the analyzer creates findings, the scorer quantifies them. +/// Benefit = maximum % of elapsed time that could be saved by addressing the finding. +/// +public static class BenefitScorer +{ + // Warning types that map to specific scoring strategies + private static readonly HashSet OperatorTimeRules = new(StringComparer.OrdinalIgnoreCase) + { + "Filter Operator", // Rule 1 + "Eager Index Spool", // Rule 2 + "Spill", // Rule 7 + // Key Lookup / RID Lookup (Rule 10) handled separately by ScoreKeyLookupWarning + "Scan With Predicate", // Rule 11 + "Non-SARGable Predicate", // Rule 12 + "Scan Cardinality Misestimate", // Rule 32 + }; + + public static void Score(ParsedPlan plan) + { + foreach (var batch in plan.Batches) + { + foreach (var stmt in batch.Statements) + { + ScoreStatementWarnings(stmt); + + if (stmt.RootNode != null) + ScoreNodeTree(stmt.RootNode, stmt); + + if (stmt.WaitStats.Count > 0 && stmt.QueryTimeStats != null) + ScoreWaitStats(stmt); + } + } + } + + private static void ScoreStatementWarnings(PlanStatement stmt) + { + var elapsedMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + foreach (var warning in stmt.PlanWarnings) + { + switch (warning.WarningType) + { + case "Ineffective Parallelism": // Rule 25 + case "Parallel Wait Bottleneck": // Rule 31 + // These are meta-findings about parallelism efficiency. + // The benefit is the gap between actual and ideal elapsed time. + if (elapsedMs > 0 && stmt.QueryTimeStats != null) + { + var cpu = stmt.QueryTimeStats.CpuTimeMs; + var dop = stmt.DegreeOfParallelism; + if (dop > 1 && cpu > 0) + { + // Ideal elapsed = CPU / DOP. Benefit = (actual - ideal) / actual + var idealElapsed = (double)cpu / dop; + var benefit = Math.Max(0, (elapsedMs - idealElapsed) / elapsedMs * 100); + warning.MaxBenefitPercent = Math.Min(100, Math.Round(benefit, 1)); + } + } + break; + + case "Serial Plan": // Rule 3 + // Can't know how fast a parallel plan would be, but estimate: + // CPU-bound: benefit up to (1 - 1/maxDOP) * 100% + if (elapsedMs > 0 && stmt.QueryTimeStats != null) + { + var cpu = stmt.QueryTimeStats.CpuTimeMs; + // Assume server max DOP — use a conservative 4 if unknown + var potentialDop = 4; + if (cpu >= elapsedMs) + { + // CPU-bound: parallelism could help significantly + var benefit = (1.0 - 1.0 / potentialDop) * 100; + warning.MaxBenefitPercent = Math.Round(benefit, 1); + } + else + { + // Not CPU-bound: parallelism helps less + var cpuRatio = (double)cpu / elapsedMs; + var benefit = cpuRatio * (1.0 - 1.0 / potentialDop) * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(50, benefit), 1); + } + } + break; + + case "Memory Grant": // Rule 9 + // Grant wait is the only part that affects this query's elapsed time + if (elapsedMs > 0 && stmt.MemoryGrant?.GrantWaitTimeMs > 0) + { + var benefit = (double)stmt.MemoryGrant.GrantWaitTimeMs / elapsedMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + break; + + case "High Compile CPU": // Rule 19 + if (elapsedMs > 0 && stmt.CompileCPUMs > 0) + { + var benefit = (double)stmt.CompileCPUMs / elapsedMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + break; + + // Rules that cannot be quantified: leave MaxBenefitPercent as null + // Rule 18 (Compile Memory Exceeded), Rule 20 (Local Variables), + // Rule 27 (Optimize For Unknown) + } + } + } + + private static void ScoreNodeTree(PlanNode node, PlanStatement stmt) + { + ScoreNodeWarnings(node, stmt); + + foreach (var child in node.Children) + ScoreNodeTree(child, stmt); + } + + private static void ScoreNodeWarnings(PlanNode node, PlanStatement stmt) + { + var elapsedMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + foreach (var warning in node.Warnings) + { + // Already scored (e.g., by a different pass) + if (warning.MaxBenefitPercent != null) + continue; + + if (warning.WarningType == "UDF Execution") // Rule 4 + { + ScoreUdfWarning(warning, node, elapsedMs); + } + else if (warning.WarningType == "Filter Operator") // Rule 1 + { + ScoreFilterWarning(warning, node, stmt); + } + else if (warning.WarningType == "Nested Loops High Executions") // Rule 16 + { + ScoreNestedLoopsWarning(warning, node, stmt); + } + else if (warning.SpillDetails != null) // Rule 7 + { + ScoreSpillWarning(warning, node, stmt); + } + else if (warning.WarningType is "Key Lookup" or "RID Lookup") // Rule 10 + { + ScoreKeyLookupWarning(warning, node, stmt); + } + else if (OperatorTimeRules.Contains(warning.WarningType)) + { + ScoreByOperatorTime(warning, node, stmt); + } + else if (warning.WarningType == "Row Estimate Mismatch") // Rule 5 + { + ScoreEstimateMismatchWarning(warning, node, stmt); + } + // Rules that stay null: Scalar UDF (Rule 6, informational reference), + // Parallel Skew (Rule 8 — will be integrated per-operator later), + // Data Type Mismatch (Rule 13), + // Lazy Spool Ineffective (Rule 14), Join OR Clause (Rule 15), + // Many-to-Many Merge Join (Rule 17), CTE Multiple References (Rule 21), + // Table Variable (Rule 22), Table-Valued Function (Rule 23), + // Top Above Scan (Rule 24), Row Goal (Rule 26), + // NOT IN with Nullable Column (Rule 28), Implicit Conversion (Rule 29), + // Wide Index Suggestion (Rule 30), Estimated Plan CE Guess (Rule 33) + } + } + + /// + /// Rule 4: UDF Execution — benefit is UDF elapsed time / statement elapsed. + /// + private static void ScoreUdfWarning(PlanWarning warning, PlanNode node, long stmtElapsedMs) + { + if (stmtElapsedMs > 0 && node.UdfElapsedTimeMs > 0) + { + var benefit = (double)node.UdfElapsedTimeMs / stmtElapsedMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 1: Filter Operator — benefit is child subtree elapsed / statement elapsed. + /// The filter discards rows late; eliminating it means the child subtree work was unnecessary. + /// + private static void ScoreFilterWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.HasActualStats && stmtMs > 0 && node.Children.Count > 0) + { + var childElapsed = node.Children.Max(c => c.ActualElapsedMs); + var benefit = (double)childElapsed / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else if (!node.HasActualStats && stmt.StatementSubTreeCost > 0 && node.Children.Count > 0) + { + // Estimated plan fallback: child subtree cost / statement cost + var childCost = node.Children.Sum(c => c.EstimatedTotalSubtreeCost); + var benefit = childCost / stmt.StatementSubTreeCost * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 16: Nested Loops High Executions — benefit is inner-side elapsed / statement elapsed. + /// + private static void ScoreNestedLoopsWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.Children.Count >= 2) + { + var innerChild = node.Children[1]; + + if (innerChild.HasActualStats && stmtMs > 0 && innerChild.ActualElapsedMs > 0) + { + var benefit = (double)innerChild.ActualElapsedMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else if (!innerChild.HasActualStats && stmt.StatementSubTreeCost > 0) + { + var benefit = innerChild.EstimatedTotalSubtreeCost / stmt.StatementSubTreeCost * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + } + + /// + /// Rule 7: Spill — benefit is the spilling operator's self-time / statement elapsed. + /// Exchange spills use the parallelism operator time (unreliable but best we have). + /// + private static void ScoreSpillWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + if (stmtMs <= 0) return; + + long operatorMs; + if (warning.SpillDetails?.SpillType == "Exchange") + operatorMs = GetParallelismOperatorElapsedMs(node); + else + operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Generic operator-time scoring for rules where the fix would eliminate or + /// drastically reduce the operator's work: Key Lookup, RID Lookup, + /// Scan With Predicate, Non-SARGable Predicate, Eager Index Spool, + /// Scan Cardinality Misestimate. + /// + private static void ScoreByOperatorTime(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.HasActualStats && stmtMs > 0) + { + var operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else + { + // Operator self-time is 0 — negligible benefit + warning.MaxBenefitPercent = 0; + } + } + else if (!node.HasActualStats && stmt.StatementSubTreeCost > 0) + { + // Estimated plan fallback: use operator cost percentage + var benefit = (double)node.CostPercent; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 10: Key Lookup / RID Lookup — benefit includes the lookup operator's time, + /// plus the parent Nested Loops join when the NL only exists to drive the lookup + /// (inner child is the lookup, outer child is a seek/scan with no subtree). + /// + private static void ScoreKeyLookupWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.HasActualStats && stmtMs > 0) + { + var operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + + // Check if the parent NL join is purely a lookup driver: + // - Parent is Nested Loops + // - Has exactly 2 children + // - This node (the lookup) is the inner child (index 1) + // - The outer child (index 0) is a simple seek/scan with no children + var parent = node.Parent; + if (parent != null + && parent.PhysicalOp == "Nested Loops" + && parent.Children.Count == 2 + && parent.Children[1] == node + && parent.Children[0].Children.Count == 0) + { + operatorMs += PlanAnalyzer.GetOperatorOwnElapsedMs(parent); + } + + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else + { + warning.MaxBenefitPercent = 0; + } + } + else if (!node.HasActualStats && stmt.StatementSubTreeCost > 0) + { + var benefit = (double)node.CostPercent; + // Same parent-NL logic for estimated plans + var parent = node.Parent; + if (parent != null + && parent.PhysicalOp == "Nested Loops" + && parent.Children.Count == 2 + && parent.Children[1] == node + && parent.Children[0].Children.Count == 0) + { + benefit += parent.CostPercent; + } + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 5: Row Estimate Mismatch — benefit is the harmed operator's time. + /// If the mismatch caused a spill, benefit = spilling operator time. + /// If it caused a bad join choice, benefit = join operator time. + /// Otherwise, benefit is the misestimated operator's own time (conservative). + /// + private static void ScoreEstimateMismatchWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + if (stmtMs <= 0 || !node.HasActualStats) return; + + // Walk up to find the harmed operator (same logic as AssessEstimateHarm) + var harmedNode = FindHarmedOperator(node); + if (harmedNode != null) + { + var operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(harmedNode); + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + return; + } + } + + // Fallback: use the misestimated node's own time + var ownMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + if (ownMs > 0) + { + var benefit = (double)ownMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Walks up from a node with a bad row estimate to find the operator that was + /// harmed by it (spilling sort/hash, or join that chose the wrong strategy). + /// Returns null if no specific harm can be attributed. + /// + private static PlanNode? FindHarmedOperator(PlanNode node) + { + // The node itself has a spill — it harmed itself + if (node.Warnings.Any(w => w.SpillDetails != null)) + return node; + + // Walk up through transparent operators + var ancestor = node.Parent; + while (ancestor != null) + { + if (ancestor.PhysicalOp == "Parallelism" || + ancestor.PhysicalOp == "Compute Scalar" || + ancestor.PhysicalOp == "Segment" || + ancestor.PhysicalOp == "Sequence Project" || + ancestor.PhysicalOp == "Top" || + ancestor.PhysicalOp == "Filter") + { + ancestor = ancestor.Parent; + continue; + } + + // Parent join — bad row count from below caused wrong join choice + if (ancestor.LogicalOp.Contains("Join", StringComparison.OrdinalIgnoreCase)) + { + if (ancestor.IsAdaptive) + return null; // Adaptive join self-corrects + return ancestor; + } + + // Parent Sort/Hash that spilled + if (ancestor.Warnings.Any(w => w.SpillDetails != null)) + return ancestor; + + // Parent Sort/Hash with no spill — benign + if (ancestor.PhysicalOp.Contains("Sort", StringComparison.OrdinalIgnoreCase) || + ancestor.PhysicalOp.Contains("Hash", StringComparison.OrdinalIgnoreCase)) + return null; + + break; + } + + return null; + } + + /// + /// Calculates a Parallelism (exchange) operator's own elapsed time. + /// Mirrors PlanAnalyzer.GetParallelismOperatorElapsedMs but accessible here. + /// + private static long GetParallelismOperatorElapsedMs(PlanNode node) + { + if (node.Children.Count == 0) + return node.ActualElapsedMs; + + if (node.PerThreadStats.Count > 1) + return PlanAnalyzer.GetOperatorOwnElapsedMs(node); + + var maxChildElapsed = node.Children.Max(c => c.ActualElapsedMs); + return Math.Max(0, node.ActualElapsedMs - maxChildElapsed); + } + + // --------------------------------------------------------------- + // Stage 2: Wait Stats Benefit + // --------------------------------------------------------------- + + /// + /// Calculates MaxBenefitPercent for each wait type in the statement's wait stats. + /// Serial plans: simple ratio of wait time to elapsed time. + /// Parallel plans: proportional allocation across relevant operators (Joe's formula). + /// + private static void ScoreWaitStats(PlanStatement stmt) + { + var elapsedMs = stmt.QueryTimeStats!.ElapsedTimeMs; + if (elapsedMs <= 0) return; + + var isParallel = stmt.DegreeOfParallelism > 1 && stmt.RootNode != null; + + // Collect all operators with per-thread stats for parallel benefit calculation + List? operatorProfiles = null; + if (isParallel) + { + operatorProfiles = new List(); + CollectOperatorWaitProfiles(stmt.RootNode!, operatorProfiles); + } + + foreach (var wait in stmt.WaitStats) + { + if (wait.WaitTimeMs <= 0) continue; + + var category = ClassifyWaitType(wait.WaitType); + double benefitPct; + + if (category == "Parallelism" && isParallel) + { + // CXPACKET/CXCONSUMER/CXSYNC: benefit is the parallelism efficiency gap, + // not the raw wait time. Threads waiting for other threads is a symptom + // of imperfect parallelism, not directly addressable time. + var cpu = stmt.QueryTimeStats!.CpuTimeMs; + var dop = stmt.DegreeOfParallelism; + if (cpu > 0 && dop > 1) + { + var idealElapsed = (double)cpu / dop; + benefitPct = Math.Max(0, (elapsedMs - idealElapsed) / elapsedMs * 100); + } + else + { + benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100; + } + } + else if (!isParallel || operatorProfiles == null || operatorProfiles.Count == 0) + { + // Serial plan or no operator data: simple ratio + benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100; + } + else + { + // Parallel plan: proportional allocation across relevant operators + benefitPct = CalculateParallelWaitBenefit(wait, category, operatorProfiles, elapsedMs); + } + + stmt.WaitBenefits.Add(new WaitBenefit + { + WaitType = wait.WaitType, + MaxBenefitPercent = Math.Round(Math.Min(100, Math.Max(0, benefitPct)), 1), + Category = category + }); + } + } + + /// + /// Parallel wait benefit using Joe's formula: + /// benefit = (SUM relevant operator max waits) * (total_wait_for_type) / (SUM relevant operator total waits) + /// Then convert to % of statement elapsed time. + /// + private static double CalculateParallelWaitBenefit( + WaitStatInfo wait, string category, + List profiles, long stmtElapsedMs) + { + // Filter to operators relevant for this wait category + var relevant = new List(); + foreach (var p in profiles) + { + if (IsOperatorRelevantForCategory(p, category)) + relevant.Add(p); + } + + // If no operators match, fall back to simple ratio + if (relevant.Count == 0) + return (double)wait.WaitTimeMs / stmtElapsedMs * 100; + + // Joe's formula: + // sum_max = SUM of each relevant operator's max per-thread wait time + // sum_total = SUM of each relevant operator's total wait time across all threads + // benefit_ms = sum_max * wait.WaitTimeMs / sum_total + double sumMax = 0; + double sumTotal = 0; + foreach (var p in relevant) + { + sumMax += p.MaxThreadWaitMs; + sumTotal += p.TotalWaitMs; + } + + if (sumTotal <= 0) + return (double)wait.WaitTimeMs / stmtElapsedMs * 100; + + var benefitMs = sumMax * wait.WaitTimeMs / sumTotal; + return benefitMs / stmtElapsedMs * 100; + } + + /// + /// Determines if an operator is relevant for a given wait category. + /// + private static bool IsOperatorRelevantForCategory(OperatorWaitProfile profile, string category) + { + return category switch + { + "I/O" => profile.HasPhysicalReads, + "CPU" => profile.HasCpuWork, + "Parallelism" => profile.IsExchange, + "Hash" => profile.IsHashOperator, + "Sort" => profile.IsSortOperator, + "Latch" => profile.HasTempDbActivity, + "Lock" => true, // any operator can be blocked by locks + "Network" => false, // ASYNC_NETWORK_IO is client-side, not attributable to operators + "Memory" => false, // memory waits are statement-level + _ => true, // unknown category: include all operators + }; + } + + /// + /// Walks the operator tree and collects wait time profiles for each operator. + /// Wait time per thread = max(0, elapsed - cpu) for that thread. + /// + private static void CollectOperatorWaitProfiles(PlanNode node, List profiles) + { + if (node.HasActualStats && node.PerThreadStats.Count > 0) + { + long maxThreadWait = 0; + long totalWait = 0; + + foreach (var ts in node.PerThreadStats) + { + var threadWait = Math.Max(0, ts.ActualElapsedMs - ts.ActualCPUMs); + totalWait += threadWait; + if (threadWait > maxThreadWait) + maxThreadWait = threadWait; + } + + if (totalWait > 0 || maxThreadWait > 0) + { + profiles.Add(new OperatorWaitProfile + { + Node = node, + MaxThreadWaitMs = maxThreadWait, + TotalWaitMs = totalWait, + HasPhysicalReads = node.ActualPhysicalReads > 0, + HasCpuWork = node.ActualCPUMs > 0, + IsExchange = node.PhysicalOp == "Parallelism", + IsHashOperator = node.PhysicalOp.StartsWith("Hash", StringComparison.OrdinalIgnoreCase), + IsSortOperator = node.PhysicalOp.StartsWith("Sort", StringComparison.OrdinalIgnoreCase), + HasTempDbActivity = node.Warnings.Any(w => w.SpillDetails != null) + || node.PhysicalOp.Contains("Spool", StringComparison.OrdinalIgnoreCase) + }); + } + } + + foreach (var child in node.Children) + CollectOperatorWaitProfiles(child, profiles); + } + + /// + /// Classifies a wait type into a category for operator-to-wait mapping. + /// + internal static string ClassifyWaitType(string waitType) + { + var wt = waitType.ToUpperInvariant(); + return wt switch + { + _ when wt.StartsWith("PAGEIOLATCH") => "I/O", + _ when wt.Contains("IO_COMPLETION") => "I/O", + _ when wt.StartsWith("WRITELOG") => "I/O", + _ when wt == "SOS_SCHEDULER_YIELD" => "CPU", + _ when wt.StartsWith("CXPACKET") || wt.StartsWith("CXCONSUMER") => "Parallelism", + _ when wt.StartsWith("CXSYNC") => "Parallelism", + _ when wt.StartsWith("HT") => "Hash", + _ when wt == "BPSORT" => "Sort", + _ when wt == "BMPBUILD" => "Hash", + _ when wt.StartsWith("PAGELATCH") => "Latch", + _ when wt.StartsWith("LATCH_") => "Latch", + _ when wt.StartsWith("LCK_") => "Lock", + _ when wt == "ASYNC_NETWORK_IO" => "Network", + _ when wt.Contains("MEMORY_ALLOCATION") => "Memory", + _ when wt == "SOS_PHYS_PAGE_CACHE" => "Memory", + _ => "Other" + }; + } + + /// + /// Per-operator wait time profile used for parallel benefit allocation. + /// + private sealed class OperatorWaitProfile + { + public PlanNode Node { get; init; } = null!; + public long MaxThreadWaitMs { get; init; } + public long TotalWaitMs { get; init; } + public bool HasPhysicalReads { get; init; } + public bool HasCpuWork { get; init; } + public bool IsExchange { get; init; } + public bool IsHashOperator { get; init; } + public bool IsSortOperator { get; init; } + public bool HasTempDbActivity { get; init; } + } +} diff --git a/Dashboard/Services/PlanAnalyzer.cs b/Dashboard/Services/PlanAnalyzer.cs index befa192c..254246d5 100644 --- a/Dashboard/Services/PlanAnalyzer.cs +++ b/Dashboard/Services/PlanAnalyzer.cs @@ -179,16 +179,24 @@ private static void AnalyzeStatement(PlanStatement stmt) }); } - // Large memory grant with sort/hash guidance + // Large memory grant with top consumers if (grant.GrantedMemoryKB >= 1048576 && stmt.RootNode != null) { var consumers = new List(); FindMemoryConsumers(stmt.RootNode, consumers); var grantMB = grant.GrantedMemoryKB / 1024.0; - var guidance = consumers.Count > 0 - ? $" Memory consumers: {string.Join(", ", consumers)}. Check whether these operators are processing more rows than necessary." - : ""; + var guidance = ""; + if (consumers.Count > 0) + { + // Show only the top 3 consumers — listing 20+ is noise + var shown = consumers.Take(3); + var remaining = consumers.Count - 3; + guidance = $" Largest consumers: {string.Join(", ", shown)}"; + if (remaining > 0) + guidance += $", and {remaining} more"; + guidance += "."; + } stmt.PlanWarnings.Add(new PlanWarning { @@ -517,7 +525,8 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt) // - A parent join may have chosen the wrong strategy // - Root nodes with no parent to harm are skipped // - Nodes whose only parents are Parallelism/Top/Sort (no spill) are skipped - if (node.HasActualStats && node.EstimateRows > 0) + if (node.HasActualStats && node.EstimateRows > 0 + && !node.Lookup) // Key lookups are point lookups (1 row per execution) — per-execution estimate is misleading { if (node.ActualRows == 0) { @@ -642,11 +651,29 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt) var skewThreshold = workerThreads.Count <= 2 ? 0.80 : 0.50; if (skewRatio >= skewThreshold) { + var message = $"Thread {maxThread.ThreadId} processed {skewRatio:P0} of rows ({maxThread.ActualRows:N0}/{totalRows:N0}). Work is heavily skewed to one thread, so parallelism isn't helping much."; + var severity = PlanWarningSeverity.Warning; + + // Batch mode sorts produce all output on a single thread by design + // unless their parent is a batch mode Window Aggregate + if (node.PhysicalOp == "Sort" + && (node.ActualExecutionMode ?? node.ExecutionMode) == "Batch" + && node.Parent?.PhysicalOp != "Window Aggregate") + { + message += " Batch mode sorts produce all output rows on a single thread by design, unless feeding a batch mode Window Aggregate."; + severity = PlanWarningSeverity.Info; + } + else + { + // Add practical context — skew is often hard to fix + message += " Common causes: uneven data distribution across partitions or hash buckets, or a scan/seek whose predicate sends most rows to one range. Reducing DOP or rewriting the query to avoid the skewed operation may help."; + } + node.Warnings.Add(new PlanWarning { WarningType = "Parallel Skew", - Message = $"Thread {maxThread.ThreadId} processed {skewRatio:P0} of rows ({maxThread.ActualRows:N0}/{totalRows:N0}). Work is heavily skewed to one thread, so parallelism isn't helping much.", - Severity = PlanWarningSeverity.Warning + Message = message, + Severity = severity }); } } @@ -667,18 +694,37 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt) Severity = PlanWarningSeverity.Warning }); } - else if (node.Lookup && !string.IsNullOrEmpty(node.Predicate)) + else if (node.Lookup) { + var lookupMsg = "Key Lookup — SQL Server found rows via a nonclustered index but had to go back to the clustered index for additional columns."; + + // Show what columns the lookup is fetching + if (!string.IsNullOrEmpty(node.OutputColumns)) + lookupMsg += $"\nColumns fetched: {Truncate(node.OutputColumns, 200)}"; + + // Only call out the predicate if it actually filters rows + if (!string.IsNullOrEmpty(node.Predicate)) + { + var predicateFilters = node.HasActualStats && node.ActualExecutions > 0 + && node.ActualRows < node.ActualExecutions; + if (predicateFilters) + lookupMsg += $"\nResidual predicate (filtered {node.ActualExecutions - node.ActualRows:N0} rows): {Truncate(node.Predicate, 200)}"; + } + + lookupMsg += "\nTo eliminate the lookup, consider adding the needed columns as INCLUDE columns on the nonclustered index. This widens the index, so weigh the read benefit against write and storage overhead."; + node.Warnings.Add(new PlanWarning { WarningType = "Key Lookup", - Message = $"Key Lookup — SQL Server found rows via a nonclustered index but had to go back to the clustered index for additional columns. Alter the nonclustered index to add the predicate column as a key column or as an INCLUDE column.\nPredicate: {Truncate(node.Predicate, 200)}", + Message = lookupMsg, Severity = PlanWarningSeverity.Critical }); } // Rule 12: Non-SARGable predicate on scan - var nonSargableReason = DetectNonSargablePredicate(node); + // Skip for 0-execution nodes — the operator never ran, so the warning is academic + var nonSargableReason = (node.HasActualStats && node.ActualExecutions == 0) + ? null : DetectNonSargablePredicate(node); if (nonSargableReason != null) { var nonSargableAdvice = nonSargableReason switch @@ -707,19 +753,30 @@ _ when nonSargableReason.StartsWith("Function call", StringComparison.OrdinalIgn // Rule 11: Scan with residual predicate (skip if non-SARGable already flagged) // A PROBE() alone is just a bitmap filter — not a real residual predicate. + // Skip for 0-execution nodes — the operator never ran if (nonSargableReason == null && IsRowstoreScan(node) && !string.IsNullOrEmpty(node.Predicate) && - !IsProbeOnly(node.Predicate)) + !IsProbeOnly(node.Predicate) && !(node.HasActualStats && node.ActualExecutions == 0)) { var displayPredicate = StripProbeExpressions(node.Predicate); var details = BuildScanImpactDetails(node, stmt); var severity = PlanWarningSeverity.Warning; + + // Elevate to Critical if the scan dominates the plan if (details.CostPct >= 90 || details.ElapsedPct >= 90) severity = PlanWarningSeverity.Critical; + var message = "Scan with residual predicate — SQL Server is reading every row and filtering after the fact."; if (!string.IsNullOrEmpty(details.Summary)) message += $" {details.Summary}"; message += " Check that you have appropriate indexes."; + + // I/O waits specifically confirm the scan is hitting disk — elevate + if (HasSignificantIoWaits(stmt.WaitStats) && details.CostPct >= 50 + && severity != PlanWarningSeverity.Critical) + severity = PlanWarningSeverity.Critical; + message += $"\nPredicate: {Truncate(displayPredicate, 200)}"; + node.Warnings.Add(new PlanWarning { WarningType = "Scan With Predicate", @@ -995,7 +1052,7 @@ _ when nonSargableReason.StartsWith("Function call", StringComparison.OrdinalIgn node.Warnings.Add(new PlanWarning { WarningType = "Top Above Scan", - Message = $"{topLabel} reads from {scanCandidate.PhysicalOp} (Node {scanCandidate.NodeId}).{innerNote}{predInfo} An index on the ORDER BY columns could eliminate the scan and sort entirely.", + Message = $"{topLabel} reads from {FormatNodeRef(scanCandidate)}.{innerNote}{predInfo} An index on the ORDER BY columns could eliminate the scan and sort entirely.", Severity = onInner ? PlanWarningSeverity.Critical : PlanWarningSeverity.Warning }); } @@ -1014,12 +1071,28 @@ _ when nonSargableReason.StartsWith("Function call", StringComparison.OrdinalIgn // tiny floating-point differences that display identically are noise if (reduction >= 2.0) { - node.Warnings.Add(new PlanWarning + // If we have actual stats, check whether the row goal prediction was correct. + // When actual rows <= the row goal estimate, the optimizer stopped early as planned — benign. + var rowGoalWorked = false; + if (node.HasActualStats) { - WarningType = "Row Goal", - Message = $"Row goal active: estimate reduced from {node.EstimateRowsWithoutRowGoal:N0} to {node.EstimateRows:N0} ({reduction:N0}x reduction) due to TOP, EXISTS, IN, or FAST hint. The optimizer chose this plan shape expecting to stop reading early. If the query reads all rows anyway, the plan choice may be suboptimal.", - Severity = PlanWarningSeverity.Info - }); + var executions = node.ActualExecutions > 0 ? node.ActualExecutions : 1; + var actualPerExec = (double)node.ActualRows / executions; + rowGoalWorked = actualPerExec <= node.EstimateRows; + } + + if (!rowGoalWorked) + { + // Try to identify the specific row goal cause from the statement text + var cause = IdentifyRowGoalCause(stmt.StatementText); + + node.Warnings.Add(new PlanWarning + { + WarningType = "Row Goal", + Message = $"Row goal active: estimate reduced from {node.EstimateRowsWithoutRowGoal:N0} to {node.EstimateRows:N0} ({reduction:N0}x reduction) due to {cause}. The optimizer chose this plan shape expecting to stop reading early. If the query reads all rows anyway, the plan choice may be suboptimal.", + Severity = PlanWarningSeverity.Info + }); + } } } @@ -1041,9 +1114,11 @@ _ when nonSargableReason.StartsWith("Function call", StringComparison.OrdinalIgn } // Rule 29: Enhance implicit conversion warnings — Seek Plan is more severe + // Skip for 0-execution nodes — the operator never ran + if (!(node.HasActualStats && node.ActualExecutions == 0)) foreach (var w in node.Warnings.ToList()) { - if (w.WarningType == "Implicit Conversion" && w.Message.StartsWith("Seek Plan", StringComparison.Ordinal)) + if (w.WarningType == "Implicit Conversion" && w.Message.StartsWith("Seek Plan")) { w.Severity = PlanWarningSeverity.Critical; w.Message = $"Implicit conversion prevented an index seek, forcing a scan instead. Fix the data type mismatch: ensure the parameter or variable type matches the column type exactly. {w.Message}"; @@ -1268,25 +1343,37 @@ private static bool IsOrExpansionChain(PlanNode concatenationNode) /// Finds Sort and Hash Match operators in the tree that consume memory. /// private static void FindMemoryConsumers(PlanNode node, List consumers) + { + // Collect all consumers first, then sort by row count descending + var raw = new List<(string Label, double Rows)>(); + FindMemoryConsumersRecursive(node, raw); + + foreach (var (label, _) in raw.OrderByDescending(c => c.Rows)) + consumers.Add(label); + } + + private static void FindMemoryConsumersRecursive(PlanNode node, List<(string Label, double Rows)> consumers) { if (node.PhysicalOp.Contains("Sort", StringComparison.OrdinalIgnoreCase) && !node.PhysicalOp.Contains("Spool", StringComparison.OrdinalIgnoreCase)) { + var rowCount = node.HasActualStats ? node.ActualRows : node.EstimateRows; var rows = node.HasActualStats ? $"{node.ActualRows:N0} actual rows" : $"{node.EstimateRows:N0} estimated rows"; - consumers.Add($"Sort (Node {node.NodeId}, {rows})"); + consumers.Add(($"Sort (Node {node.NodeId}, {rows})", rowCount)); } else if (node.PhysicalOp.Contains("Hash", StringComparison.OrdinalIgnoreCase)) { + var rowCount = node.HasActualStats ? node.ActualRows : node.EstimateRows; var rows = node.HasActualStats ? $"{node.ActualRows:N0} actual rows" : $"{node.EstimateRows:N0} estimated rows"; - consumers.Add($"Hash Match (Node {node.NodeId}, {rows})"); + consumers.Add(($"Hash Match (Node {node.NodeId}, {rows})", rowCount)); } foreach (var child in node.Children) - FindMemoryConsumers(child, consumers); + FindMemoryConsumersRecursive(child, consumers); } /// @@ -1298,7 +1385,7 @@ private static void FindMemoryConsumers(PlanNode node, List consumers) /// Exchange operators accumulate downstream wait time (e.g. from spilling /// children) so their self-time is unreliable — see sql.kiwi/2021/03. /// - private static long GetOperatorOwnElapsedMs(PlanNode node) + internal static long GetOperatorOwnElapsedMs(PlanNode node) { if (node.ActualExecutionMode == "Batch") return node.ActualElapsedMs; @@ -1536,6 +1623,37 @@ private static string Truncate(string value, int maxLength) return value.Length <= maxLength ? value : value[..maxLength] + "..."; } + /// + /// Returns a short label describing what a wait type means (e.g., "I/O — reading from disk"). + /// Public for use by UI components that annotate wait stats inline. + /// + public static string GetWaitLabel(string waitType) + { + var wt = waitType.ToUpperInvariant(); + return wt switch + { + _ when wt.StartsWith("PAGEIOLATCH", StringComparison.Ordinal) => "I/O — reading data from disk", + _ when wt.Contains("IO_COMPLETION", StringComparison.Ordinal) => "I/O — spills to TempDB or eager writes", + _ when wt == "SOS_SCHEDULER_YIELD" => "CPU — scheduler yielding", + _ when wt.StartsWith("CXPACKET", StringComparison.Ordinal) || wt.StartsWith("CXCONSUMER", StringComparison.Ordinal) => "parallelism — thread skew", + _ when wt.StartsWith("CXSYNC", StringComparison.Ordinal) => "parallelism — exchange synchronization", + _ when wt == "HTBUILD" => "hash — building hash table", + _ when wt == "HTDELETE" => "hash — cleaning up hash table", + _ when wt == "HTREPARTITION" => "hash — repartitioning", + _ when wt.StartsWith("HT", StringComparison.Ordinal) => "hash operation", + _ when wt == "BPSORT" => "batch sort", + _ when wt == "BMPBUILD" => "bitmap filter build", + _ when wt.Contains("MEMORY_ALLOCATION_EXT", StringComparison.Ordinal) => "memory allocation", + _ when wt.StartsWith("PAGELATCH", StringComparison.Ordinal) => "page latch — in-memory contention", + _ when wt.StartsWith("LATCH_", StringComparison.Ordinal) => "latch contention", + _ when wt.StartsWith("LCK_", StringComparison.Ordinal) => "lock contention", + _ when wt == "LOGBUFFER" => "transaction log writes", + _ when wt == "ASYNC_NETWORK_IO" => "network — client not consuming results", + _ when wt == "SOS_PHYS_PAGE_CACHE" => "physical page cache contention", + _ => "" + }; + } + /// /// Returns targeted advice based on statement-level wait stats, or null if no waits. /// When the dominant wait type is clear, gives specific guidance instead of generic advice. @@ -1552,29 +1670,150 @@ private static string Truncate(string value, int maxLength) var top = waits.OrderByDescending(w => w.WaitTimeMs).First(); var topPct = (double)top.WaitTimeMs / totalMs * 100; - // Only give targeted advice if the dominant wait is >= 80% of total wait time - if (topPct < 80) - return null; + // Single dominant wait — give targeted advice + if (topPct >= 80) + return DescribeWaitType(top.WaitType, topPct); + + // Multiple waits — summarize the top contributors instead of guessing + var topWaits = waits.OrderByDescending(w => w.WaitTimeMs).Take(3) + .Select(w => $"{w.WaitType} ({(double)w.WaitTimeMs / totalMs * 100:N0}%)") + .ToList(); + return $"Top waits: {string.Join(", ", topWaits)}."; + } - var waitType = top.WaitType.ToUpperInvariant(); - var advice = waitType switch + /// + /// Maps a wait type to a human-readable description with percentage context. + /// Covers all wait types observed in real execution plan files. + /// + private static string DescribeWaitType(string rawWaitType, double topPct) + { + var waitType = rawWaitType.ToUpperInvariant(); + return waitType switch { + // I/O: reading/writing data pages from disk _ when waitType.StartsWith("PAGEIOLATCH", StringComparison.Ordinal) => - $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}. Data is being read from disk rather than memory. Consider adding indexes to reduce I/O, or investigate memory pressure.", + $"I/O bound — {topPct:N0}% of wait time is {rawWaitType}. Data is being read from disk rather than memory. Consider adding indexes to reduce I/O, or investigate memory pressure.", + _ when waitType.Contains("IO_COMPLETION", StringComparison.Ordinal) => + $"I/O bound — {topPct:N0}% of wait time is {rawWaitType}. Non-buffer I/O such as sort/hash spills to TempDB or eager writes.", + + // CPU: thread yielding its scheduler quantum + _ when waitType == "SOS_SCHEDULER_YIELD" => + $"CPU bound — {topPct:N0}% of wait time is {rawWaitType}. The query is consuming significant CPU. Look for expensive operators (scans, sorts, hash builds) that could be eliminated or reduced.", + + // Parallelism: exchange and synchronization waits + _ when waitType.StartsWith("CXPACKET", StringComparison.Ordinal) || waitType.StartsWith("CXCONSUMER", StringComparison.Ordinal) => + $"Parallel thread skew — {topPct:N0}% of wait time is {rawWaitType}. Work is unevenly distributed across parallel threads.", + _ when waitType.StartsWith("CXSYNC", StringComparison.Ordinal) => + $"Parallel synchronization — {topPct:N0}% of wait time is {rawWaitType}. Threads are waiting at exchange operators to synchronize parallel execution.", + + // Hash operations + _ when waitType.StartsWith("HT", StringComparison.Ordinal) => + $"Hash operation — {topPct:N0}% of wait time is {rawWaitType}. Time spent building, repartitioning, or cleaning up hash tables. Large hash builds may indicate missing indexes or bad row estimates.", + + // Sort/bitmap batch operations + _ when waitType == "BPSORT" => + $"Batch sort — {topPct:N0}% of wait time is {rawWaitType}. Time spent in batch-mode sort operations.", + _ when waitType == "BMPBUILD" => + $"Bitmap build — {topPct:N0}% of wait time is {rawWaitType}. Time spent building bitmap filters for hash joins.", + + // Memory allocation + _ when waitType.Contains("MEMORY_ALLOCATION_EXT", StringComparison.Ordinal) => + $"Memory allocation — {topPct:N0}% of wait time is {rawWaitType}. Frequent memory allocations during query execution.", + + // Latch contention (non-I/O) + _ when waitType.StartsWith("PAGELATCH", StringComparison.Ordinal) => + $"Page latch contention — {topPct:N0}% of wait time is {rawWaitType}. In-memory page contention, often on TempDB or hot pages.", _ when waitType.StartsWith("LATCH_", StringComparison.Ordinal) => - $"Latch contention — {topPct:N0}% of wait time is {top.WaitType}.", + $"Latch contention — {topPct:N0}% of wait time is {rawWaitType}.", + + // Lock contention _ when waitType.StartsWith("LCK_", StringComparison.Ordinal) => - $"Lock contention — {topPct:N0}% of wait time is {top.WaitType}. Other sessions are holding locks that this query needs.", - _ when waitType.StartsWith("CXPACKET", StringComparison.Ordinal) || waitType.StartsWith("CXCONSUMER", StringComparison.Ordinal) => - $"Parallel thread skew — {topPct:N0}% of wait time is {top.WaitType}. Work is unevenly distributed across parallel threads.", - _ when waitType.Contains("IO_COMPLETION", StringComparison.Ordinal) => - $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}.", - _ when waitType.StartsWith("RESOURCE_SEMAPHORE", StringComparison.Ordinal) => - $"Memory grant wait — {topPct:N0}% of wait time is {top.WaitType}. The query had to wait for a memory grant.", - _ => $"Dominant wait is {top.WaitType} ({topPct:N0}% of wait time)." + $"Lock contention — {topPct:N0}% of wait time is {rawWaitType}. Other sessions are holding locks that this query needs.", + + // Log writes + _ when waitType == "LOGBUFFER" => + $"Log write — {topPct:N0}% of wait time is {rawWaitType}. Waiting for transaction log buffer flushes, typically from data modifications.", + + // Network + _ when waitType == "ASYNC_NETWORK_IO" => + $"Network bound — {topPct:N0}% of wait time is {rawWaitType}. The client application is not consuming results fast enough.", + + // Physical page cache + _ when waitType == "SOS_PHYS_PAGE_CACHE" => + $"Physical page cache — {topPct:N0}% of wait time is {rawWaitType}. Contention on the physical memory page allocator.", + + _ => $"Dominant wait is {rawWaitType} ({topPct:N0}% of wait time)." }; + } + + /// + /// Returns true if the statement has significant I/O waits (PAGEIOLATCH_*, IO_COMPLETION). + /// Used for severity elevation decisions where I/O specifically indicates disk access. + /// Thresholds: I/O waits >= 20% of total wait time AND >= 100ms absolute. + /// + private static bool HasSignificantIoWaits(List waits) + { + if (waits.Count == 0) + return false; + + var totalMs = waits.Sum(w => w.WaitTimeMs); + if (totalMs == 0) + return false; + + long ioMs = 0; + foreach (var w in waits) + { + var wt = w.WaitType.ToUpperInvariant(); + if (wt.StartsWith("PAGEIOLATCH", StringComparison.Ordinal) || wt.Contains("IO_COMPLETION", StringComparison.Ordinal)) + ioMs += w.WaitTimeMs; + } + + var pct = (double)ioMs / totalMs * 100; + return ioMs >= 100 && pct >= 20; + } - return advice; + /// + /// Formats a node reference for use in warning messages. Includes object name + /// for data access operators where it helps identify which table is involved. + /// + private static string FormatNodeRef(PlanNode node) + { + if (!string.IsNullOrEmpty(node.ObjectName)) + { + var objRef = !string.IsNullOrEmpty(node.DatabaseName) + ? $"{node.DatabaseName}.{node.ObjectName}" + : node.ObjectName; + return $"{node.PhysicalOp} on {objRef} (Node {node.NodeId})"; + } + + return $"{node.PhysicalOp} (Node {node.NodeId})"; + } + + /// + /// Identifies the specific cause of a row goal from the statement text. + /// Returns a specific cause when detectable, or a generic list as fallback. + /// + private static string IdentifyRowGoalCause(string stmtText) + { + if (string.IsNullOrEmpty(stmtText)) + return "TOP, EXISTS, IN, or FAST hint"; + + var text = stmtText.ToUpperInvariant(); + var causes = new List(4); + + if (Regex.IsMatch(text, @"\bTOP\b")) + causes.Add("TOP"); + if (Regex.IsMatch(text, @"\bEXISTS\b")) + causes.Add("EXISTS"); + // IN with subquery — bare "IN (" followed by SELECT, not just "IN (1,2,3)" + if (Regex.IsMatch(text, @"\bIN\s*\(\s*SELECT\b")) + causes.Add("IN (subquery)"); + if (Regex.IsMatch(text, @"\bFAST\b")) + causes.Add("FAST hint"); + + return causes.Count > 0 + ? string.Join(", ", causes) + : "TOP, EXISTS, IN, or FAST hint"; } /// diff --git a/Lite/Models/PlanModels.cs b/Lite/Models/PlanModels.cs index 50a33927..beac9c79 100644 --- a/Lite/Models/PlanModels.cs +++ b/Lite/Models/PlanModels.cs @@ -61,6 +61,7 @@ public class PlanStatement public SetOptionsInfo? SetOptions { get; set; } public List Parameters { get; set; } = new(); public List WaitStats { get; set; } = new(); + public List WaitBenefits { get; set; } = new(); public QueryTimeInfo? QueryTimeStats { get; set; } // Wave 2: MaxQueryMemory + QueryPlan-level warnings @@ -370,6 +371,17 @@ public class PlanWarning public string Message { get; set; } = ""; public PlanWarningSeverity Severity { get; set; } public SpillDetail? SpillDetails { get; set; } + + /// + /// Maximum percentage of elapsed time that could be saved by addressing this finding. + /// null = not quantifiable, 0 = calculated as negligible. + /// + public double? MaxBenefitPercent { get; set; } + + /// + /// Short actionable fix suggestion (e.g., "Add INCLUDE (columns) to index"). + /// + public string? ActionableFix { get; set; } } public enum PlanWarningSeverity { Info, Warning, Critical } @@ -433,6 +445,13 @@ public class PlanParameter public string? RuntimeValue { get; set; } } +public class WaitBenefit +{ + public string WaitType { get; set; } = ""; + public double MaxBenefitPercent { get; set; } + public string Category { get; set; } = ""; +} + public class WaitStatInfo { public string WaitType { get; set; } = ""; diff --git a/Lite/Services/BenefitScorer.cs b/Lite/Services/BenefitScorer.cs new file mode 100644 index 00000000..a922c9b7 --- /dev/null +++ b/Lite/Services/BenefitScorer.cs @@ -0,0 +1,653 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using PerformanceMonitorLite.Models; + +namespace PerformanceMonitorLite.Services; + +/// +/// Second-pass analysis that calculates MaxBenefitPercent for each PlanWarning. +/// Runs after PlanAnalyzer.Analyze() — the analyzer creates findings, the scorer quantifies them. +/// Benefit = maximum % of elapsed time that could be saved by addressing the finding. +/// +public static class BenefitScorer +{ + // Warning types that map to specific scoring strategies + private static readonly HashSet OperatorTimeRules = new(StringComparer.OrdinalIgnoreCase) + { + "Filter Operator", // Rule 1 + "Eager Index Spool", // Rule 2 + "Spill", // Rule 7 + // Key Lookup / RID Lookup (Rule 10) handled separately by ScoreKeyLookupWarning + "Scan With Predicate", // Rule 11 + "Non-SARGable Predicate", // Rule 12 + "Scan Cardinality Misestimate", // Rule 32 + }; + + public static void Score(ParsedPlan plan) + { + foreach (var batch in plan.Batches) + { + foreach (var stmt in batch.Statements) + { + ScoreStatementWarnings(stmt); + + if (stmt.RootNode != null) + ScoreNodeTree(stmt.RootNode, stmt); + + if (stmt.WaitStats.Count > 0 && stmt.QueryTimeStats != null) + ScoreWaitStats(stmt); + } + } + } + + private static void ScoreStatementWarnings(PlanStatement stmt) + { + var elapsedMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + foreach (var warning in stmt.PlanWarnings) + { + switch (warning.WarningType) + { + case "Ineffective Parallelism": // Rule 25 + case "Parallel Wait Bottleneck": // Rule 31 + // These are meta-findings about parallelism efficiency. + // The benefit is the gap between actual and ideal elapsed time. + if (elapsedMs > 0 && stmt.QueryTimeStats != null) + { + var cpu = stmt.QueryTimeStats.CpuTimeMs; + var dop = stmt.DegreeOfParallelism; + if (dop > 1 && cpu > 0) + { + // Ideal elapsed = CPU / DOP. Benefit = (actual - ideal) / actual + var idealElapsed = (double)cpu / dop; + var benefit = Math.Max(0, (elapsedMs - idealElapsed) / elapsedMs * 100); + warning.MaxBenefitPercent = Math.Min(100, Math.Round(benefit, 1)); + } + } + break; + + case "Serial Plan": // Rule 3 + // Can't know how fast a parallel plan would be, but estimate: + // CPU-bound: benefit up to (1 - 1/maxDOP) * 100% + if (elapsedMs > 0 && stmt.QueryTimeStats != null) + { + var cpu = stmt.QueryTimeStats.CpuTimeMs; + // Assume server max DOP — use a conservative 4 if unknown + var potentialDop = 4; + if (cpu >= elapsedMs) + { + // CPU-bound: parallelism could help significantly + var benefit = (1.0 - 1.0 / potentialDop) * 100; + warning.MaxBenefitPercent = Math.Round(benefit, 1); + } + else + { + // Not CPU-bound: parallelism helps less + var cpuRatio = (double)cpu / elapsedMs; + var benefit = cpuRatio * (1.0 - 1.0 / potentialDop) * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(50, benefit), 1); + } + } + break; + + case "Memory Grant": // Rule 9 + // Grant wait is the only part that affects this query's elapsed time + if (elapsedMs > 0 && stmt.MemoryGrant?.GrantWaitTimeMs > 0) + { + var benefit = (double)stmt.MemoryGrant.GrantWaitTimeMs / elapsedMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + break; + + case "High Compile CPU": // Rule 19 + if (elapsedMs > 0 && stmt.CompileCPUMs > 0) + { + var benefit = (double)stmt.CompileCPUMs / elapsedMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + break; + + // Rules that cannot be quantified: leave MaxBenefitPercent as null + // Rule 18 (Compile Memory Exceeded), Rule 20 (Local Variables), + // Rule 27 (Optimize For Unknown) + } + } + } + + private static void ScoreNodeTree(PlanNode node, PlanStatement stmt) + { + ScoreNodeWarnings(node, stmt); + + foreach (var child in node.Children) + ScoreNodeTree(child, stmt); + } + + private static void ScoreNodeWarnings(PlanNode node, PlanStatement stmt) + { + var elapsedMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + foreach (var warning in node.Warnings) + { + // Already scored (e.g., by a different pass) + if (warning.MaxBenefitPercent != null) + continue; + + if (warning.WarningType == "UDF Execution") // Rule 4 + { + ScoreUdfWarning(warning, node, elapsedMs); + } + else if (warning.WarningType == "Filter Operator") // Rule 1 + { + ScoreFilterWarning(warning, node, stmt); + } + else if (warning.WarningType == "Nested Loops High Executions") // Rule 16 + { + ScoreNestedLoopsWarning(warning, node, stmt); + } + else if (warning.SpillDetails != null) // Rule 7 + { + ScoreSpillWarning(warning, node, stmt); + } + else if (warning.WarningType is "Key Lookup" or "RID Lookup") // Rule 10 + { + ScoreKeyLookupWarning(warning, node, stmt); + } + else if (OperatorTimeRules.Contains(warning.WarningType)) + { + ScoreByOperatorTime(warning, node, stmt); + } + else if (warning.WarningType == "Row Estimate Mismatch") // Rule 5 + { + ScoreEstimateMismatchWarning(warning, node, stmt); + } + // Rules that stay null: Scalar UDF (Rule 6, informational reference), + // Parallel Skew (Rule 8 — will be integrated per-operator later), + // Data Type Mismatch (Rule 13), + // Lazy Spool Ineffective (Rule 14), Join OR Clause (Rule 15), + // Many-to-Many Merge Join (Rule 17), CTE Multiple References (Rule 21), + // Table Variable (Rule 22), Table-Valued Function (Rule 23), + // Top Above Scan (Rule 24), Row Goal (Rule 26), + // NOT IN with Nullable Column (Rule 28), Implicit Conversion (Rule 29), + // Wide Index Suggestion (Rule 30), Estimated Plan CE Guess (Rule 33) + } + } + + /// + /// Rule 4: UDF Execution — benefit is UDF elapsed time / statement elapsed. + /// + private static void ScoreUdfWarning(PlanWarning warning, PlanNode node, long stmtElapsedMs) + { + if (stmtElapsedMs > 0 && node.UdfElapsedTimeMs > 0) + { + var benefit = (double)node.UdfElapsedTimeMs / stmtElapsedMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 1: Filter Operator — benefit is child subtree elapsed / statement elapsed. + /// The filter discards rows late; eliminating it means the child subtree work was unnecessary. + /// + private static void ScoreFilterWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.HasActualStats && stmtMs > 0 && node.Children.Count > 0) + { + var childElapsed = node.Children.Max(c => c.ActualElapsedMs); + var benefit = (double)childElapsed / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else if (!node.HasActualStats && stmt.StatementSubTreeCost > 0 && node.Children.Count > 0) + { + // Estimated plan fallback: child subtree cost / statement cost + var childCost = node.Children.Sum(c => c.EstimatedTotalSubtreeCost); + var benefit = childCost / stmt.StatementSubTreeCost * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 16: Nested Loops High Executions — benefit is inner-side elapsed / statement elapsed. + /// + private static void ScoreNestedLoopsWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.Children.Count >= 2) + { + var innerChild = node.Children[1]; + + if (innerChild.HasActualStats && stmtMs > 0 && innerChild.ActualElapsedMs > 0) + { + var benefit = (double)innerChild.ActualElapsedMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else if (!innerChild.HasActualStats && stmt.StatementSubTreeCost > 0) + { + var benefit = innerChild.EstimatedTotalSubtreeCost / stmt.StatementSubTreeCost * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + } + + /// + /// Rule 7: Spill — benefit is the spilling operator's self-time / statement elapsed. + /// Exchange spills use the parallelism operator time (unreliable but best we have). + /// + private static void ScoreSpillWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + if (stmtMs <= 0) return; + + long operatorMs; + if (warning.SpillDetails?.SpillType == "Exchange") + operatorMs = GetParallelismOperatorElapsedMs(node); + else + operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Generic operator-time scoring for rules where the fix would eliminate or + /// drastically reduce the operator's work: Key Lookup, RID Lookup, + /// Scan With Predicate, Non-SARGable Predicate, Eager Index Spool, + /// Scan Cardinality Misestimate. + /// + private static void ScoreByOperatorTime(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.HasActualStats && stmtMs > 0) + { + var operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else + { + // Operator self-time is 0 — negligible benefit + warning.MaxBenefitPercent = 0; + } + } + else if (!node.HasActualStats && stmt.StatementSubTreeCost > 0) + { + // Estimated plan fallback: use operator cost percentage + var benefit = (double)node.CostPercent; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 10: Key Lookup / RID Lookup — benefit includes the lookup operator's time, + /// plus the parent Nested Loops join when the NL only exists to drive the lookup + /// (inner child is the lookup, outer child is a seek/scan with no subtree). + /// + private static void ScoreKeyLookupWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + + if (node.HasActualStats && stmtMs > 0) + { + var operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + + // Check if the parent NL join is purely a lookup driver: + // - Parent is Nested Loops + // - Has exactly 2 children + // - This node (the lookup) is the inner child (index 1) + // - The outer child (index 0) is a simple seek/scan with no children + var parent = node.Parent; + if (parent != null + && parent.PhysicalOp == "Nested Loops" + && parent.Children.Count == 2 + && parent.Children[1] == node + && parent.Children[0].Children.Count == 0) + { + operatorMs += PlanAnalyzer.GetOperatorOwnElapsedMs(parent); + } + + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + else + { + warning.MaxBenefitPercent = 0; + } + } + else if (!node.HasActualStats && stmt.StatementSubTreeCost > 0) + { + var benefit = (double)node.CostPercent; + // Same parent-NL logic for estimated plans + var parent = node.Parent; + if (parent != null + && parent.PhysicalOp == "Nested Loops" + && parent.Children.Count == 2 + && parent.Children[1] == node + && parent.Children[0].Children.Count == 0) + { + benefit += parent.CostPercent; + } + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Rule 5: Row Estimate Mismatch — benefit is the harmed operator's time. + /// If the mismatch caused a spill, benefit = spilling operator time. + /// If it caused a bad join choice, benefit = join operator time. + /// Otherwise, benefit is the misestimated operator's own time (conservative). + /// + private static void ScoreEstimateMismatchWarning(PlanWarning warning, PlanNode node, PlanStatement stmt) + { + var stmtMs = stmt.QueryTimeStats?.ElapsedTimeMs ?? 0; + if (stmtMs <= 0 || !node.HasActualStats) return; + + // Walk up to find the harmed operator (same logic as AssessEstimateHarm) + var harmedNode = FindHarmedOperator(node); + if (harmedNode != null) + { + var operatorMs = PlanAnalyzer.GetOperatorOwnElapsedMs(harmedNode); + if (operatorMs > 0) + { + var benefit = (double)operatorMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + return; + } + } + + // Fallback: use the misestimated node's own time + var ownMs = PlanAnalyzer.GetOperatorOwnElapsedMs(node); + if (ownMs > 0) + { + var benefit = (double)ownMs / stmtMs * 100; + warning.MaxBenefitPercent = Math.Round(Math.Min(100, benefit), 1); + } + } + + /// + /// Walks up from a node with a bad row estimate to find the operator that was + /// harmed by it (spilling sort/hash, or join that chose the wrong strategy). + /// Returns null if no specific harm can be attributed. + /// + private static PlanNode? FindHarmedOperator(PlanNode node) + { + // The node itself has a spill — it harmed itself + if (node.Warnings.Any(w => w.SpillDetails != null)) + return node; + + // Walk up through transparent operators + var ancestor = node.Parent; + while (ancestor != null) + { + if (ancestor.PhysicalOp == "Parallelism" || + ancestor.PhysicalOp == "Compute Scalar" || + ancestor.PhysicalOp == "Segment" || + ancestor.PhysicalOp == "Sequence Project" || + ancestor.PhysicalOp == "Top" || + ancestor.PhysicalOp == "Filter") + { + ancestor = ancestor.Parent; + continue; + } + + // Parent join — bad row count from below caused wrong join choice + if (ancestor.LogicalOp.Contains("Join", StringComparison.OrdinalIgnoreCase)) + { + if (ancestor.IsAdaptive) + return null; // Adaptive join self-corrects + return ancestor; + } + + // Parent Sort/Hash that spilled + if (ancestor.Warnings.Any(w => w.SpillDetails != null)) + return ancestor; + + // Parent Sort/Hash with no spill — benign + if (ancestor.PhysicalOp.Contains("Sort", StringComparison.OrdinalIgnoreCase) || + ancestor.PhysicalOp.Contains("Hash", StringComparison.OrdinalIgnoreCase)) + return null; + + break; + } + + return null; + } + + /// + /// Calculates a Parallelism (exchange) operator's own elapsed time. + /// Mirrors PlanAnalyzer.GetParallelismOperatorElapsedMs but accessible here. + /// + private static long GetParallelismOperatorElapsedMs(PlanNode node) + { + if (node.Children.Count == 0) + return node.ActualElapsedMs; + + if (node.PerThreadStats.Count > 1) + return PlanAnalyzer.GetOperatorOwnElapsedMs(node); + + var maxChildElapsed = node.Children.Max(c => c.ActualElapsedMs); + return Math.Max(0, node.ActualElapsedMs - maxChildElapsed); + } + + // --------------------------------------------------------------- + // Stage 2: Wait Stats Benefit + // --------------------------------------------------------------- + + /// + /// Calculates MaxBenefitPercent for each wait type in the statement's wait stats. + /// Serial plans: simple ratio of wait time to elapsed time. + /// Parallel plans: proportional allocation across relevant operators (Joe's formula). + /// + private static void ScoreWaitStats(PlanStatement stmt) + { + var elapsedMs = stmt.QueryTimeStats!.ElapsedTimeMs; + if (elapsedMs <= 0) return; + + var isParallel = stmt.DegreeOfParallelism > 1 && stmt.RootNode != null; + + // Collect all operators with per-thread stats for parallel benefit calculation + List? operatorProfiles = null; + if (isParallel) + { + operatorProfiles = new List(); + CollectOperatorWaitProfiles(stmt.RootNode!, operatorProfiles); + } + + foreach (var wait in stmt.WaitStats) + { + if (wait.WaitTimeMs <= 0) continue; + + var category = ClassifyWaitType(wait.WaitType); + double benefitPct; + + if (category == "Parallelism" && isParallel) + { + // CXPACKET/CXCONSUMER/CXSYNC: benefit is the parallelism efficiency gap, + // not the raw wait time. Threads waiting for other threads is a symptom + // of imperfect parallelism, not directly addressable time. + var cpu = stmt.QueryTimeStats!.CpuTimeMs; + var dop = stmt.DegreeOfParallelism; + if (cpu > 0 && dop > 1) + { + var idealElapsed = (double)cpu / dop; + benefitPct = Math.Max(0, (elapsedMs - idealElapsed) / elapsedMs * 100); + } + else + { + benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100; + } + } + else if (!isParallel || operatorProfiles == null || operatorProfiles.Count == 0) + { + // Serial plan or no operator data: simple ratio + benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100; + } + else + { + // Parallel plan: proportional allocation across relevant operators + benefitPct = CalculateParallelWaitBenefit(wait, category, operatorProfiles, elapsedMs); + } + + stmt.WaitBenefits.Add(new WaitBenefit + { + WaitType = wait.WaitType, + MaxBenefitPercent = Math.Round(Math.Min(100, Math.Max(0, benefitPct)), 1), + Category = category + }); + } + } + + /// + /// Parallel wait benefit using Joe's formula: + /// benefit = (SUM relevant operator max waits) * (total_wait_for_type) / (SUM relevant operator total waits) + /// Then convert to % of statement elapsed time. + /// + private static double CalculateParallelWaitBenefit( + WaitStatInfo wait, string category, + List profiles, long stmtElapsedMs) + { + // Filter to operators relevant for this wait category + var relevant = new List(); + foreach (var p in profiles) + { + if (IsOperatorRelevantForCategory(p, category)) + relevant.Add(p); + } + + // If no operators match, fall back to simple ratio + if (relevant.Count == 0) + return (double)wait.WaitTimeMs / stmtElapsedMs * 100; + + // Joe's formula: + // sum_max = SUM of each relevant operator's max per-thread wait time + // sum_total = SUM of each relevant operator's total wait time across all threads + // benefit_ms = sum_max * wait.WaitTimeMs / sum_total + double sumMax = 0; + double sumTotal = 0; + foreach (var p in relevant) + { + sumMax += p.MaxThreadWaitMs; + sumTotal += p.TotalWaitMs; + } + + if (sumTotal <= 0) + return (double)wait.WaitTimeMs / stmtElapsedMs * 100; + + var benefitMs = sumMax * wait.WaitTimeMs / sumTotal; + return benefitMs / stmtElapsedMs * 100; + } + + /// + /// Determines if an operator is relevant for a given wait category. + /// + private static bool IsOperatorRelevantForCategory(OperatorWaitProfile profile, string category) + { + return category switch + { + "I/O" => profile.HasPhysicalReads, + "CPU" => profile.HasCpuWork, + "Parallelism" => profile.IsExchange, + "Hash" => profile.IsHashOperator, + "Sort" => profile.IsSortOperator, + "Latch" => profile.HasTempDbActivity, + "Lock" => true, // any operator can be blocked by locks + "Network" => false, // ASYNC_NETWORK_IO is client-side, not attributable to operators + "Memory" => false, // memory waits are statement-level + _ => true, // unknown category: include all operators + }; + } + + /// + /// Walks the operator tree and collects wait time profiles for each operator. + /// Wait time per thread = max(0, elapsed - cpu) for that thread. + /// + private static void CollectOperatorWaitProfiles(PlanNode node, List profiles) + { + if (node.HasActualStats && node.PerThreadStats.Count > 0) + { + long maxThreadWait = 0; + long totalWait = 0; + + foreach (var ts in node.PerThreadStats) + { + var threadWait = Math.Max(0, ts.ActualElapsedMs - ts.ActualCPUMs); + totalWait += threadWait; + if (threadWait > maxThreadWait) + maxThreadWait = threadWait; + } + + if (totalWait > 0 || maxThreadWait > 0) + { + profiles.Add(new OperatorWaitProfile + { + Node = node, + MaxThreadWaitMs = maxThreadWait, + TotalWaitMs = totalWait, + HasPhysicalReads = node.ActualPhysicalReads > 0, + HasCpuWork = node.ActualCPUMs > 0, + IsExchange = node.PhysicalOp == "Parallelism", + IsHashOperator = node.PhysicalOp.StartsWith("Hash", StringComparison.OrdinalIgnoreCase), + IsSortOperator = node.PhysicalOp.StartsWith("Sort", StringComparison.OrdinalIgnoreCase), + HasTempDbActivity = node.Warnings.Any(w => w.SpillDetails != null) + || node.PhysicalOp.Contains("Spool", StringComparison.OrdinalIgnoreCase) + }); + } + } + + foreach (var child in node.Children) + CollectOperatorWaitProfiles(child, profiles); + } + + /// + /// Classifies a wait type into a category for operator-to-wait mapping. + /// + internal static string ClassifyWaitType(string waitType) + { + var wt = waitType.ToUpperInvariant(); + return wt switch + { + _ when wt.StartsWith("PAGEIOLATCH") => "I/O", + _ when wt.Contains("IO_COMPLETION") => "I/O", + _ when wt.StartsWith("WRITELOG") => "I/O", + _ when wt == "SOS_SCHEDULER_YIELD" => "CPU", + _ when wt.StartsWith("CXPACKET") || wt.StartsWith("CXCONSUMER") => "Parallelism", + _ when wt.StartsWith("CXSYNC") => "Parallelism", + _ when wt.StartsWith("HT") => "Hash", + _ when wt == "BPSORT" => "Sort", + _ when wt == "BMPBUILD" => "Hash", + _ when wt.StartsWith("PAGELATCH") => "Latch", + _ when wt.StartsWith("LATCH_") => "Latch", + _ when wt.StartsWith("LCK_") => "Lock", + _ when wt == "ASYNC_NETWORK_IO" => "Network", + _ when wt.Contains("MEMORY_ALLOCATION") => "Memory", + _ when wt == "SOS_PHYS_PAGE_CACHE" => "Memory", + _ => "Other" + }; + } + + /// + /// Per-operator wait time profile used for parallel benefit allocation. + /// + private sealed class OperatorWaitProfile + { + public PlanNode Node { get; init; } = null!; + public long MaxThreadWaitMs { get; init; } + public long TotalWaitMs { get; init; } + public bool HasPhysicalReads { get; init; } + public bool HasCpuWork { get; init; } + public bool IsExchange { get; init; } + public bool IsHashOperator { get; init; } + public bool IsSortOperator { get; init; } + public bool HasTempDbActivity { get; init; } + } +} diff --git a/Lite/Services/PlanAnalyzer.cs b/Lite/Services/PlanAnalyzer.cs index 866def3b..29d0a3d6 100644 --- a/Lite/Services/PlanAnalyzer.cs +++ b/Lite/Services/PlanAnalyzer.cs @@ -179,16 +179,24 @@ private static void AnalyzeStatement(PlanStatement stmt) }); } - // Large memory grant with sort/hash guidance + // Large memory grant with top consumers if (grant.GrantedMemoryKB >= 1048576 && stmt.RootNode != null) { var consumers = new List(); FindMemoryConsumers(stmt.RootNode, consumers); var grantMB = grant.GrantedMemoryKB / 1024.0; - var guidance = consumers.Count > 0 - ? $" Memory consumers: {string.Join(", ", consumers)}. Check whether these operators are processing more rows than necessary." - : ""; + var guidance = ""; + if (consumers.Count > 0) + { + // Show only the top 3 consumers — listing 20+ is noise + var shown = consumers.Take(3); + var remaining = consumers.Count - 3; + guidance = $" Largest consumers: {string.Join(", ", shown)}"; + if (remaining > 0) + guidance += $", and {remaining} more"; + guidance += "."; + } stmt.PlanWarnings.Add(new PlanWarning { @@ -517,7 +525,8 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt) // - A parent join may have chosen the wrong strategy // - Root nodes with no parent to harm are skipped // - Nodes whose only parents are Parallelism/Top/Sort (no spill) are skipped - if (node.HasActualStats && node.EstimateRows > 0) + if (node.HasActualStats && node.EstimateRows > 0 + && !node.Lookup) // Key lookups are point lookups (1 row per execution) — per-execution estimate is misleading { if (node.ActualRows == 0) { @@ -642,11 +651,29 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt) var skewThreshold = workerThreads.Count <= 2 ? 0.80 : 0.50; if (skewRatio >= skewThreshold) { + var message = $"Thread {maxThread.ThreadId} processed {skewRatio:P0} of rows ({maxThread.ActualRows:N0}/{totalRows:N0}). Work is heavily skewed to one thread, so parallelism isn't helping much."; + var severity = PlanWarningSeverity.Warning; + + // Batch mode sorts produce all output on a single thread by design + // unless their parent is a batch mode Window Aggregate + if (node.PhysicalOp == "Sort" + && (node.ActualExecutionMode ?? node.ExecutionMode) == "Batch" + && node.Parent?.PhysicalOp != "Window Aggregate") + { + message += " Batch mode sorts produce all output rows on a single thread by design, unless feeding a batch mode Window Aggregate."; + severity = PlanWarningSeverity.Info; + } + else + { + // Add practical context — skew is often hard to fix + message += " Common causes: uneven data distribution across partitions or hash buckets, or a scan/seek whose predicate sends most rows to one range. Reducing DOP or rewriting the query to avoid the skewed operation may help."; + } + node.Warnings.Add(new PlanWarning { WarningType = "Parallel Skew", - Message = $"Thread {maxThread.ThreadId} processed {skewRatio:P0} of rows ({maxThread.ActualRows:N0}/{totalRows:N0}). Work is heavily skewed to one thread, so parallelism isn't helping much.", - Severity = PlanWarningSeverity.Warning + Message = message, + Severity = severity }); } } @@ -667,18 +694,37 @@ private static void AnalyzeNode(PlanNode node, PlanStatement stmt) Severity = PlanWarningSeverity.Warning }); } - else if (node.Lookup && !string.IsNullOrEmpty(node.Predicate)) + else if (node.Lookup) { + var lookupMsg = "Key Lookup — SQL Server found rows via a nonclustered index but had to go back to the clustered index for additional columns."; + + // Show what columns the lookup is fetching + if (!string.IsNullOrEmpty(node.OutputColumns)) + lookupMsg += $"\nColumns fetched: {Truncate(node.OutputColumns, 200)}"; + + // Only call out the predicate if it actually filters rows + if (!string.IsNullOrEmpty(node.Predicate)) + { + var predicateFilters = node.HasActualStats && node.ActualExecutions > 0 + && node.ActualRows < node.ActualExecutions; + if (predicateFilters) + lookupMsg += $"\nResidual predicate (filtered {node.ActualExecutions - node.ActualRows:N0} rows): {Truncate(node.Predicate, 200)}"; + } + + lookupMsg += "\nTo eliminate the lookup, consider adding the needed columns as INCLUDE columns on the nonclustered index. This widens the index, so weigh the read benefit against write and storage overhead."; + node.Warnings.Add(new PlanWarning { WarningType = "Key Lookup", - Message = $"Key Lookup — SQL Server found rows via a nonclustered index but had to go back to the clustered index for additional columns. Alter the nonclustered index to add the predicate column as a key column or as an INCLUDE column.\nPredicate: {Truncate(node.Predicate, 200)}", + Message = lookupMsg, Severity = PlanWarningSeverity.Critical }); } // Rule 12: Non-SARGable predicate on scan - var nonSargableReason = DetectNonSargablePredicate(node); + // Skip for 0-execution nodes — the operator never ran, so the warning is academic + var nonSargableReason = (node.HasActualStats && node.ActualExecutions == 0) + ? null : DetectNonSargablePredicate(node); if (nonSargableReason != null) { var nonSargableAdvice = nonSargableReason switch @@ -707,19 +753,30 @@ _ when nonSargableReason.StartsWith("Function call", StringComparison.OrdinalIgn // Rule 11: Scan with residual predicate (skip if non-SARGable already flagged) // A PROBE() alone is just a bitmap filter — not a real residual predicate. + // Skip for 0-execution nodes — the operator never ran if (nonSargableReason == null && IsRowstoreScan(node) && !string.IsNullOrEmpty(node.Predicate) && - !IsProbeOnly(node.Predicate)) + !IsProbeOnly(node.Predicate) && !(node.HasActualStats && node.ActualExecutions == 0)) { var displayPredicate = StripProbeExpressions(node.Predicate); var details = BuildScanImpactDetails(node, stmt); var severity = PlanWarningSeverity.Warning; + + // Elevate to Critical if the scan dominates the plan if (details.CostPct >= 90 || details.ElapsedPct >= 90) severity = PlanWarningSeverity.Critical; + var message = "Scan with residual predicate — SQL Server is reading every row and filtering after the fact."; if (!string.IsNullOrEmpty(details.Summary)) message += $" {details.Summary}"; message += " Check that you have appropriate indexes."; + + // I/O waits specifically confirm the scan is hitting disk — elevate + if (HasSignificantIoWaits(stmt.WaitStats) && details.CostPct >= 50 + && severity != PlanWarningSeverity.Critical) + severity = PlanWarningSeverity.Critical; + message += $"\nPredicate: {Truncate(displayPredicate, 200)}"; + node.Warnings.Add(new PlanWarning { WarningType = "Scan With Predicate", @@ -804,7 +861,8 @@ _ when nonSargableReason.StartsWith("Function call", StringComparison.OrdinalIgn // Rule 14: Lazy Table Spool unfavorable rebind/rewind ratio // Rebinds = cache misses (child re-executes), rewinds = cache hits (reuse cached result) - if (node.LogicalOp == "Lazy Spool" && !node.PhysicalOp.Contains("Index", StringComparison.OrdinalIgnoreCase)) + if (node.LogicalOp == "Lazy Spool" + && !node.PhysicalOp.Contains("Index", StringComparison.OrdinalIgnoreCase)) { var rebinds = node.HasActualStats ? (double)node.ActualRebinds : node.EstimateRebinds; var rewinds = node.HasActualStats ? (double)node.ActualRewinds : node.EstimateRewinds; @@ -994,7 +1052,7 @@ _ when nonSargableReason.StartsWith("Function call", StringComparison.OrdinalIgn node.Warnings.Add(new PlanWarning { WarningType = "Top Above Scan", - Message = $"{topLabel} reads from {scanCandidate.PhysicalOp} (Node {scanCandidate.NodeId}).{innerNote}{predInfo} An index on the ORDER BY columns could eliminate the scan and sort entirely.", + Message = $"{topLabel} reads from {FormatNodeRef(scanCandidate)}.{innerNote}{predInfo} An index on the ORDER BY columns could eliminate the scan and sort entirely.", Severity = onInner ? PlanWarningSeverity.Critical : PlanWarningSeverity.Warning }); } @@ -1013,12 +1071,28 @@ _ when nonSargableReason.StartsWith("Function call", StringComparison.OrdinalIgn // tiny floating-point differences that display identically are noise if (reduction >= 2.0) { - node.Warnings.Add(new PlanWarning + // If we have actual stats, check whether the row goal prediction was correct. + // When actual rows <= the row goal estimate, the optimizer stopped early as planned — benign. + var rowGoalWorked = false; + if (node.HasActualStats) { - WarningType = "Row Goal", - Message = $"Row goal active: estimate reduced from {node.EstimateRowsWithoutRowGoal:N0} to {node.EstimateRows:N0} ({reduction:N0}x reduction) due to TOP, EXISTS, IN, or FAST hint. The optimizer chose this plan shape expecting to stop reading early. If the query reads all rows anyway, the plan choice may be suboptimal.", - Severity = PlanWarningSeverity.Info - }); + var executions = node.ActualExecutions > 0 ? node.ActualExecutions : 1; + var actualPerExec = (double)node.ActualRows / executions; + rowGoalWorked = actualPerExec <= node.EstimateRows; + } + + if (!rowGoalWorked) + { + // Try to identify the specific row goal cause from the statement text + var cause = IdentifyRowGoalCause(stmt.StatementText); + + node.Warnings.Add(new PlanWarning + { + WarningType = "Row Goal", + Message = $"Row goal active: estimate reduced from {node.EstimateRowsWithoutRowGoal:N0} to {node.EstimateRows:N0} ({reduction:N0}x reduction) due to {cause}. The optimizer chose this plan shape expecting to stop reading early. If the query reads all rows anyway, the plan choice may be suboptimal.", + Severity = PlanWarningSeverity.Info + }); + } } } @@ -1040,9 +1114,11 @@ _ when nonSargableReason.StartsWith("Function call", StringComparison.OrdinalIgn } // Rule 29: Enhance implicit conversion warnings — Seek Plan is more severe + // Skip for 0-execution nodes — the operator never ran + if (!(node.HasActualStats && node.ActualExecutions == 0)) foreach (var w in node.Warnings.ToList()) { - if (w.WarningType == "Implicit Conversion" && w.Message.StartsWith("Seek Plan", StringComparison.Ordinal)) + if (w.WarningType == "Implicit Conversion" && w.Message.StartsWith("Seek Plan")) { w.Severity = PlanWarningSeverity.Critical; w.Message = $"Implicit conversion prevented an index seek, forcing a scan instead. Fix the data type mismatch: ensure the parameter or variable type matches the column type exactly. {w.Message}"; @@ -1267,25 +1343,37 @@ private static bool IsOrExpansionChain(PlanNode concatenationNode) /// Finds Sort and Hash Match operators in the tree that consume memory. /// private static void FindMemoryConsumers(PlanNode node, List consumers) + { + // Collect all consumers first, then sort by row count descending + var raw = new List<(string Label, double Rows)>(); + FindMemoryConsumersRecursive(node, raw); + + foreach (var (label, _) in raw.OrderByDescending(c => c.Rows)) + consumers.Add(label); + } + + private static void FindMemoryConsumersRecursive(PlanNode node, List<(string Label, double Rows)> consumers) { if (node.PhysicalOp.Contains("Sort", StringComparison.OrdinalIgnoreCase) && !node.PhysicalOp.Contains("Spool", StringComparison.OrdinalIgnoreCase)) { + var rowCount = node.HasActualStats ? node.ActualRows : node.EstimateRows; var rows = node.HasActualStats ? $"{node.ActualRows:N0} actual rows" : $"{node.EstimateRows:N0} estimated rows"; - consumers.Add($"Sort (Node {node.NodeId}, {rows})"); + consumers.Add(($"Sort (Node {node.NodeId}, {rows})", rowCount)); } else if (node.PhysicalOp.Contains("Hash", StringComparison.OrdinalIgnoreCase)) { + var rowCount = node.HasActualStats ? node.ActualRows : node.EstimateRows; var rows = node.HasActualStats ? $"{node.ActualRows:N0} actual rows" : $"{node.EstimateRows:N0} estimated rows"; - consumers.Add($"Hash Match (Node {node.NodeId}, {rows})"); + consumers.Add(($"Hash Match (Node {node.NodeId}, {rows})", rowCount)); } foreach (var child in node.Children) - FindMemoryConsumers(child, consumers); + FindMemoryConsumersRecursive(child, consumers); } /// @@ -1297,7 +1385,7 @@ private static void FindMemoryConsumers(PlanNode node, List consumers) /// Exchange operators accumulate downstream wait time (e.g. from spilling /// children) so their self-time is unreliable — see sql.kiwi/2021/03. /// - private static long GetOperatorOwnElapsedMs(PlanNode node) + internal static long GetOperatorOwnElapsedMs(PlanNode node) { if (node.ActualExecutionMode == "Batch") return node.ActualElapsedMs; @@ -1535,6 +1623,37 @@ private static string Truncate(string value, int maxLength) return value.Length <= maxLength ? value : value[..maxLength] + "..."; } + /// + /// Returns a short label describing what a wait type means (e.g., "I/O — reading from disk"). + /// Public for use by UI components that annotate wait stats inline. + /// + public static string GetWaitLabel(string waitType) + { + var wt = waitType.ToUpperInvariant(); + return wt switch + { + _ when wt.StartsWith("PAGEIOLATCH", StringComparison.Ordinal) => "I/O — reading data from disk", + _ when wt.Contains("IO_COMPLETION", StringComparison.Ordinal) => "I/O — spills to TempDB or eager writes", + _ when wt == "SOS_SCHEDULER_YIELD" => "CPU — scheduler yielding", + _ when wt.StartsWith("CXPACKET", StringComparison.Ordinal) || wt.StartsWith("CXCONSUMER", StringComparison.Ordinal) => "parallelism — thread skew", + _ when wt.StartsWith("CXSYNC", StringComparison.Ordinal) => "parallelism — exchange synchronization", + _ when wt == "HTBUILD" => "hash — building hash table", + _ when wt == "HTDELETE" => "hash — cleaning up hash table", + _ when wt == "HTREPARTITION" => "hash — repartitioning", + _ when wt.StartsWith("HT", StringComparison.Ordinal) => "hash operation", + _ when wt == "BPSORT" => "batch sort", + _ when wt == "BMPBUILD" => "bitmap filter build", + _ when wt.Contains("MEMORY_ALLOCATION_EXT", StringComparison.Ordinal) => "memory allocation", + _ when wt.StartsWith("PAGELATCH", StringComparison.Ordinal) => "page latch — in-memory contention", + _ when wt.StartsWith("LATCH_", StringComparison.Ordinal) => "latch contention", + _ when wt.StartsWith("LCK_", StringComparison.Ordinal) => "lock contention", + _ when wt == "LOGBUFFER" => "transaction log writes", + _ when wt == "ASYNC_NETWORK_IO" => "network — client not consuming results", + _ when wt == "SOS_PHYS_PAGE_CACHE" => "physical page cache contention", + _ => "" + }; + } + /// /// Returns targeted advice based on statement-level wait stats, or null if no waits. /// When the dominant wait type is clear, gives specific guidance instead of generic advice. @@ -1551,29 +1670,150 @@ private static string Truncate(string value, int maxLength) var top = waits.OrderByDescending(w => w.WaitTimeMs).First(); var topPct = (double)top.WaitTimeMs / totalMs * 100; - // Only give targeted advice if the dominant wait is >= 80% of total wait time - if (topPct < 80) - return null; + // Single dominant wait — give targeted advice + if (topPct >= 80) + return DescribeWaitType(top.WaitType, topPct); + + // Multiple waits — summarize the top contributors instead of guessing + var topWaits = waits.OrderByDescending(w => w.WaitTimeMs).Take(3) + .Select(w => $"{w.WaitType} ({(double)w.WaitTimeMs / totalMs * 100:N0}%)") + .ToList(); + return $"Top waits: {string.Join(", ", topWaits)}."; + } - var waitType = top.WaitType.ToUpperInvariant(); - var advice = waitType switch + /// + /// Maps a wait type to a human-readable description with percentage context. + /// Covers all wait types observed in real execution plan files. + /// + private static string DescribeWaitType(string rawWaitType, double topPct) + { + var waitType = rawWaitType.ToUpperInvariant(); + return waitType switch { + // I/O: reading/writing data pages from disk _ when waitType.StartsWith("PAGEIOLATCH", StringComparison.Ordinal) => - $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}. Data is being read from disk rather than memory. Consider adding indexes to reduce I/O, or investigate memory pressure.", + $"I/O bound — {topPct:N0}% of wait time is {rawWaitType}. Data is being read from disk rather than memory. Consider adding indexes to reduce I/O, or investigate memory pressure.", + _ when waitType.Contains("IO_COMPLETION", StringComparison.Ordinal) => + $"I/O bound — {topPct:N0}% of wait time is {rawWaitType}. Non-buffer I/O such as sort/hash spills to TempDB or eager writes.", + + // CPU: thread yielding its scheduler quantum + _ when waitType == "SOS_SCHEDULER_YIELD" => + $"CPU bound — {topPct:N0}% of wait time is {rawWaitType}. The query is consuming significant CPU. Look for expensive operators (scans, sorts, hash builds) that could be eliminated or reduced.", + + // Parallelism: exchange and synchronization waits + _ when waitType.StartsWith("CXPACKET", StringComparison.Ordinal) || waitType.StartsWith("CXCONSUMER", StringComparison.Ordinal) => + $"Parallel thread skew — {topPct:N0}% of wait time is {rawWaitType}. Work is unevenly distributed across parallel threads.", + _ when waitType.StartsWith("CXSYNC", StringComparison.Ordinal) => + $"Parallel synchronization — {topPct:N0}% of wait time is {rawWaitType}. Threads are waiting at exchange operators to synchronize parallel execution.", + + // Hash operations + _ when waitType.StartsWith("HT", StringComparison.Ordinal) => + $"Hash operation — {topPct:N0}% of wait time is {rawWaitType}. Time spent building, repartitioning, or cleaning up hash tables. Large hash builds may indicate missing indexes or bad row estimates.", + + // Sort/bitmap batch operations + _ when waitType == "BPSORT" => + $"Batch sort — {topPct:N0}% of wait time is {rawWaitType}. Time spent in batch-mode sort operations.", + _ when waitType == "BMPBUILD" => + $"Bitmap build — {topPct:N0}% of wait time is {rawWaitType}. Time spent building bitmap filters for hash joins.", + + // Memory allocation + _ when waitType.Contains("MEMORY_ALLOCATION_EXT", StringComparison.Ordinal) => + $"Memory allocation — {topPct:N0}% of wait time is {rawWaitType}. Frequent memory allocations during query execution.", + + // Latch contention (non-I/O) + _ when waitType.StartsWith("PAGELATCH", StringComparison.Ordinal) => + $"Page latch contention — {topPct:N0}% of wait time is {rawWaitType}. In-memory page contention, often on TempDB or hot pages.", _ when waitType.StartsWith("LATCH_", StringComparison.Ordinal) => - $"Latch contention — {topPct:N0}% of wait time is {top.WaitType}.", + $"Latch contention — {topPct:N0}% of wait time is {rawWaitType}.", + + // Lock contention _ when waitType.StartsWith("LCK_", StringComparison.Ordinal) => - $"Lock contention — {topPct:N0}% of wait time is {top.WaitType}. Other sessions are holding locks that this query needs.", - _ when waitType.StartsWith("CXPACKET", StringComparison.Ordinal) || waitType.StartsWith("CXCONSUMER", StringComparison.Ordinal) => - $"Parallel thread skew — {topPct:N0}% of wait time is {top.WaitType}. Work is unevenly distributed across parallel threads.", - _ when waitType.Contains("IO_COMPLETION", StringComparison.Ordinal) => - $"I/O bound — {topPct:N0}% of wait time is {top.WaitType}.", - _ when waitType.StartsWith("RESOURCE_SEMAPHORE", StringComparison.Ordinal) => - $"Memory grant wait — {topPct:N0}% of wait time is {top.WaitType}. The query had to wait for a memory grant.", - _ => $"Dominant wait is {top.WaitType} ({topPct:N0}% of wait time)." + $"Lock contention — {topPct:N0}% of wait time is {rawWaitType}. Other sessions are holding locks that this query needs.", + + // Log writes + _ when waitType == "LOGBUFFER" => + $"Log write — {topPct:N0}% of wait time is {rawWaitType}. Waiting for transaction log buffer flushes, typically from data modifications.", + + // Network + _ when waitType == "ASYNC_NETWORK_IO" => + $"Network bound — {topPct:N0}% of wait time is {rawWaitType}. The client application is not consuming results fast enough.", + + // Physical page cache + _ when waitType == "SOS_PHYS_PAGE_CACHE" => + $"Physical page cache — {topPct:N0}% of wait time is {rawWaitType}. Contention on the physical memory page allocator.", + + _ => $"Dominant wait is {rawWaitType} ({topPct:N0}% of wait time)." }; + } + + /// + /// Returns true if the statement has significant I/O waits (PAGEIOLATCH_*, IO_COMPLETION). + /// Used for severity elevation decisions where I/O specifically indicates disk access. + /// Thresholds: I/O waits >= 20% of total wait time AND >= 100ms absolute. + /// + private static bool HasSignificantIoWaits(List waits) + { + if (waits.Count == 0) + return false; + + var totalMs = waits.Sum(w => w.WaitTimeMs); + if (totalMs == 0) + return false; + + long ioMs = 0; + foreach (var w in waits) + { + var wt = w.WaitType.ToUpperInvariant(); + if (wt.StartsWith("PAGEIOLATCH", StringComparison.Ordinal) || wt.Contains("IO_COMPLETION", StringComparison.Ordinal)) + ioMs += w.WaitTimeMs; + } + + var pct = (double)ioMs / totalMs * 100; + return ioMs >= 100 && pct >= 20; + } - return advice; + /// + /// Formats a node reference for use in warning messages. Includes object name + /// for data access operators where it helps identify which table is involved. + /// + private static string FormatNodeRef(PlanNode node) + { + if (!string.IsNullOrEmpty(node.ObjectName)) + { + var objRef = !string.IsNullOrEmpty(node.DatabaseName) + ? $"{node.DatabaseName}.{node.ObjectName}" + : node.ObjectName; + return $"{node.PhysicalOp} on {objRef} (Node {node.NodeId})"; + } + + return $"{node.PhysicalOp} (Node {node.NodeId})"; + } + + /// + /// Identifies the specific cause of a row goal from the statement text. + /// Returns a specific cause when detectable, or a generic list as fallback. + /// + private static string IdentifyRowGoalCause(string stmtText) + { + if (string.IsNullOrEmpty(stmtText)) + return "TOP, EXISTS, IN, or FAST hint"; + + var text = stmtText.ToUpperInvariant(); + var causes = new List(4); + + if (Regex.IsMatch(text, @"\bTOP\b")) + causes.Add("TOP"); + if (Regex.IsMatch(text, @"\bEXISTS\b")) + causes.Add("EXISTS"); + // IN with subquery — bare "IN (" followed by SELECT, not just "IN (1,2,3)" + if (Regex.IsMatch(text, @"\bIN\s*\(\s*SELECT\b")) + causes.Add("IN (subquery)"); + if (Regex.IsMatch(text, @"\bFAST\b")) + causes.Add("FAST hint"); + + return causes.Count > 0 + ? string.Join(", ", causes) + : "TOP, EXISTS, IN, or FAST hint"; } ///