From e6e5d69a72cbcfbeee40d6191987f97524c913ba Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Fri, 24 Apr 2026 09:59:05 -0400 Subject: [PATCH] Batch-mode subtree elapsed sums (not maxes) when computing parent self-time (#215 D1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Joe's plan 5GQmlu6m7W has a Parallelism (Gather Streams) above a batch-mode zone of three operators: Compute Scalar (9ms) / Hash Match Aggregate (10,294ms) / Clustered Index Scan (13,761ms). The exchange itself has elapsed 21,177ms and was being reported with 21,168ms of self-time because GetEffectiveChildElapsedMs only subtracted the direct child (the 9ms Compute Scalar). Batch mode pipelines operators — each operator's elapsed is standalone wall time for that operator, not cumulative of descendants the way row mode is. So for a row-mode parent above a batch-mode subtree, the correct "effective child elapsed" is the sum across the whole batch pipeline, not just the direct child. New SumBatchSubtreeElapsedMs walks a contiguous batch-mode zone (stopping at Parallelism boundaries) and sums ActualElapsedMs. GetEffectiveChildElapsedMs now routes batch-mode children with actual stats through that helper. Result on 5GQmlu6m7W: Parallelism self-time goes from 21,168ms (~56% of statement) to 0ms (clamped — 21.177 - 24.064 is negative, matching Joe's math). No more bogus "Expensive Operator" warning on the gather-streams. Regression check: - c1-c5.sqlplan (serial row mode) — all warnings unchanged - 20260415_1.sqlplan (parallel batch-mode) — all wait benefits and operator warnings unchanged (the batch subtree rule only fires when computing self-time of a row-mode parent above a batch child) D2 (unhelpful CXPACKET warning) was already addressed by v1.7.7's WaitStatsKnowledge content strip — the CXPACKET item now shows only the raw wait ms/count, no speculative fix text. Joe's deeper concern about CXPACKET double-counting other waits is a separate future refinement. Version bump 1.7.7 -> 1.7.8. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/PlanViewer.App/PlanViewer.App.csproj | 2 +- src/PlanViewer.Core/Services/PlanAnalyzer.cs | 32 ++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/PlanViewer.App/PlanViewer.App.csproj b/src/PlanViewer.App/PlanViewer.App.csproj index 6aa36a1..3064690 100644 --- a/src/PlanViewer.App/PlanViewer.App.csproj +++ b/src/PlanViewer.App/PlanViewer.App.csproj @@ -6,7 +6,7 @@ app.manifest EDD.ico true - 1.7.7 + 1.7.8 Erik Darling Darling Data LLC Performance Studio diff --git a/src/PlanViewer.Core/Services/PlanAnalyzer.cs b/src/PlanViewer.Core/Services/PlanAnalyzer.cs index dd5739f..69ada1b 100644 --- a/src/PlanViewer.Core/Services/PlanAnalyzer.cs +++ b/src/PlanViewer.Core/Services/PlanAnalyzer.cs @@ -1731,6 +1731,14 @@ private static long GetEffectiveChildElapsedMs(PlanNode child) if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0) return child.Children.Max(GetEffectiveChildElapsedMs); + // Batch mode pipelines — each operator's elapsed stands alone rather than + // rolling up its descendants the way row-mode does. For a parent computing + // self-time above a batch-mode subtree, subtract the whole pipeline's time + // (Joe #215 D1: Parallelism gather-streams above three batch operators). + var mode = child.ActualExecutionMode ?? child.ExecutionMode; + if (mode == "Batch" && child.HasActualStats) + return SumBatchSubtreeElapsedMs(child); + // Child has its own stats: use them if (child.ActualElapsedMs > 0) return child.ActualElapsedMs; @@ -1745,6 +1753,30 @@ private static long GetEffectiveChildElapsedMs(PlanNode child) return sum; } + /// + /// Sums ActualElapsedMs across a contiguous batch-mode subtree (stops at + /// Parallelism exchange zone boundaries). Batch operators pipeline — elapsed + /// times are standalone, not cumulative — so summing gives the total work the + /// zone did, which is what a row-mode parent above the zone should subtract + /// to get its own self-time. + /// + private static long SumBatchSubtreeElapsedMs(PlanNode node) + { + long sum = node.ActualElapsedMs; + foreach (var child in node.Children) + { + // Zone boundary — stop summing + if (child.PhysicalOp == "Parallelism") continue; + + var childMode = child.ActualExecutionMode ?? child.ExecutionMode; + if (childMode == "Batch" && child.HasActualStats) + sum += SumBatchSubtreeElapsedMs(child); + else + sum += GetEffectiveChildElapsedMs(child); + } + return sum; + } + /// /// Calculates a Parallelism (exchange) operator's own elapsed time. /// Exchange times are unreliable — they accumulate wait time caused by