From ef4ed0cb0b4986da3c065d066e3c13a6e5520b7a Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Wed, 25 Feb 2026 12:00:34 -0500 Subject: [PATCH 1/2] Issue #281 Gap 2: File I/O throughput charts, file-level latency, queued I/O overlay **New features (both apps):** - Add "File I/O Throughput" sub-tab with read/write MB/s charts per file - Restructure "File I/O" tab into "File I/O Latency" and "File I/O Throughput" sub-tabs - Add queued I/O overlay (dashed lines) to latency charts showing OS queue wait time **Lite-specific:** - Switch File I/O latency charts from database-level to file-level (top 10 by activity) - Add io_stall_queued_read/write columns to DuckDB schema (v15 migration) - Update collector to collect queued stall data from sys.dm_io_virtual_file_stats - Use DuckDB LAG() window function for throughput MB/s interval calculation **Dashboard-specific:** - Fix double-brace interpolation bug in GetFileIoThroughputTimeSeriesAsync ({{dbFilter}}/{{dateFilter}} produced literal text instead of interpolating variables) **CI fix:** - Update 50_configuration_issues_analyzer.sql and 97_test_procedures.sql to remove references to warning columns dropped from collect.memory_grant_stats in Gap 1 Co-Authored-By: Claude Sonnet 4.6 --- .../Controls/ResourceMetricsContent.xaml | 85 ++++++++--- .../Controls/ResourceMetricsContent.xaml.cs | 54 ++++++- .../Models/FileIoLatencyTimeSeriesItem.cs | 4 + .../DatabaseService.ResourceMetrics.cs | 133 ++++++++++++++++- Lite/Controls/ServerTab.xaml | 43 ++++-- Lite/Controls/ServerTab.xaml.cs | 105 +++++++++++++- Lite/Database/DuckDbInitializer.cs | 11 +- Lite/Database/Schema.cs | 6 +- Lite/Services/LocalDataService.FileIo.cs | 135 ++++++++++++++++-- .../Services/RemoteCollectorService.FileIo.cs | 19 ++- install/50_configuration_issues_analyzer.sql | 21 +-- install/97_test_procedures.sql | 16 +-- 12 files changed, 550 insertions(+), 82 deletions(-) diff --git a/Dashboard/Controls/ResourceMetricsContent.xaml b/Dashboard/Controls/ResourceMetricsContent.xaml index be2771a1..63ad4e7e 100644 --- a/Dashboard/Controls/ResourceMetricsContent.xaml +++ b/Dashboard/Controls/ResourceMetricsContent.xaml @@ -201,38 +201,77 @@ - - - - - - - - - - - + + + + + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + - + - - - + + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + - - + + diff --git a/Dashboard/Controls/ResourceMetricsContent.xaml.cs b/Dashboard/Controls/ResourceMetricsContent.xaml.cs index 54b14bb4..88bb7d28 100644 --- a/Dashboard/Controls/ResourceMetricsContent.xaml.cs +++ b/Dashboard/Controls/ResourceMetricsContent.xaml.cs @@ -86,6 +86,8 @@ public partial class ResourceMetricsContent : UserControl private Helpers.ChartHoverHelper? _spinlockStatsHover; private Helpers.ChartHoverHelper? _fileIoReadHover; private Helpers.ChartHoverHelper? _fileIoWriteHover; + private Helpers.ChartHoverHelper? _fileIoReadThroughputHover; + private Helpers.ChartHoverHelper? _fileIoWriteThroughputHover; private Helpers.ChartHoverHelper? _perfmonHover; private Helpers.ChartHoverHelper? _waitStatsHover; private Helpers.ChartHoverHelper? _tempdbStatsHover; @@ -111,6 +113,8 @@ public ResourceMetricsContent() _spinlockStatsHover = new Helpers.ChartHoverHelper(SpinlockStatsChart, "collisions/sec"); _fileIoReadHover = new Helpers.ChartHoverHelper(UserDbReadLatencyChart, "ms"); _fileIoWriteHover = new Helpers.ChartHoverHelper(UserDbWriteLatencyChart, "ms"); + _fileIoReadThroughputHover = new Helpers.ChartHoverHelper(FileIoReadThroughputChart, "MB/s"); + _fileIoWriteThroughputHover = new Helpers.ChartHoverHelper(FileIoWriteThroughputChart, "MB/s"); _perfmonHover = new Helpers.ChartHoverHelper(PerfmonCountersChart, ""); _waitStatsHover = new Helpers.ChartHoverHelper(WaitStatsDetailChart, "ms/sec"); _tempdbStatsHover = new Helpers.ChartHoverHelper(TempdbStatsChart, "MB"); @@ -146,6 +150,10 @@ private void SetupChartContextMenus() // File I/O Latency charts TabHelpers.SetupChartContextMenu(UserDbReadLatencyChart, "UserDB_Read_Latency", "collect.file_io_stats"); TabHelpers.SetupChartContextMenu(UserDbWriteLatencyChart, "UserDB_Write_Latency", "collect.file_io_stats"); + + // File I/O Throughput charts + TabHelpers.SetupChartContextMenu(FileIoReadThroughputChart, "UserDB_Read_Throughput", "collect.file_io_stats"); + TabHelpers.SetupChartContextMenu(FileIoWriteThroughputChart, "UserDB_Write_Throughput", "collect.file_io_stats"); TabHelpers.SetupChartContextMenu(TempDbLatencyChart, "TempDB_Latency", "collect.file_io_stats"); // Server Utilization Trends charts @@ -225,6 +233,7 @@ await Task.WhenAll( RefreshTempdbStatsAsync(), RefreshSessionStatsAsync(), LoadFileIoLatencyChartsAsync(), + LoadFileIoThroughputChartsAsync(), RefreshServerTrendsAsync(), RefreshPerfmonCountersTabAsync(), RefreshWaitStatsDetailTabAsync() @@ -837,11 +846,11 @@ private async Task LoadFileIoLatencyChartsAsync() // Load User DB data only - TempDB latency moved to TempDB Stats tab var userDbData = await _databaseService.GetFileIoLatencyTimeSeriesAsync(isTempDb: false, _fileIoHoursBack, _fileIoFromDate, _fileIoToDate); - LoadFileIoChart(UserDbReadLatencyChart, userDbData, d => d.ReadLatencyMs, "Read Latency (ms)", colors, xMin, xMax, _fileIoReadHover); - LoadFileIoChart(UserDbWriteLatencyChart, userDbData, d => d.WriteLatencyMs, "Write Latency (ms)", colors, xMin, xMax, _fileIoWriteHover); + LoadFileIoChart(UserDbReadLatencyChart, userDbData, d => d.ReadLatencyMs, "Read Latency (ms)", colors, xMin, xMax, _fileIoReadHover, d => d.ReadQueuedLatencyMs); + LoadFileIoChart(UserDbWriteLatencyChart, userDbData, d => d.WriteLatencyMs, "Write Latency (ms)", colors, xMin, xMax, _fileIoWriteHover, d => d.WriteQueuedLatencyMs); } - private void LoadFileIoChart(ScottPlot.WPF.WpfPlot chart, List data, Func latencySelector, string yLabel, ScottPlot.Color[] colors, double xMin, double xMax, Helpers.ChartHoverHelper? hover = null) + private void LoadFileIoChart(ScottPlot.WPF.WpfPlot chart, List data, Func latencySelector, string yLabel, ScottPlot.Color[] colors, double xMin, double xMax, Helpers.ChartHoverHelper? hover = null, Func? queuedSelector = null) { DateTime rangeStart = DateTime.FromOADate(xMin); DateTime rangeEnd = DateTime.FromOADate(xMax); @@ -856,6 +865,9 @@ private void LoadFileIoChart(ScottPlot.WPF.WpfPlot chart, List queuedSelector(d) > 0); + if (data != null && data.Count > 0) { // Get all unique time points for gap filling @@ -879,13 +891,31 @@ private void LoadFileIoChart(ScottPlot.WPF.WpfPlot chart, List (double)queuedSelector!(d)); + if (queuedValues.Any(v => v > 0)) + { + var (qxs, qys) = TabHelpers.FillTimeSeriesGaps(timePoints, queuedValues); + var queuedScatter = chart.Plot.Add.Scatter(qxs, qys); + queuedScatter.LineWidth = 2; + queuedScatter.MarkerSize = 0; + queuedScatter.Color = color; + queuedScatter.LinePattern = ScottPlot.LinePattern.Dashed; + queuedScatter.LegendText = $"{fileName} (queued)"; + hover?.Add(queuedScatter, $"{fileName} (queued)"); + } + } + colorIndex++; } } @@ -913,6 +943,22 @@ private void LoadFileIoChart(ScottPlot.WPF.WpfPlot chart, List d.ReadThroughputMbPerSec, "Read Throughput (MB/s)", colors, xMin, xMax, _fileIoReadThroughputHover); + LoadFileIoChart(FileIoWriteThroughputChart, throughputData, d => d.WriteThroughputMbPerSec, "Write Throughput (MB/s)", colors, xMin, xMax, _fileIoWriteThroughputHover); + } + #endregion #region Server Trends Tab diff --git a/Dashboard/Models/FileIoLatencyTimeSeriesItem.cs b/Dashboard/Models/FileIoLatencyTimeSeriesItem.cs index 62828332..982c2350 100644 --- a/Dashboard/Models/FileIoLatencyTimeSeriesItem.cs +++ b/Dashboard/Models/FileIoLatencyTimeSeriesItem.cs @@ -20,5 +20,9 @@ public class FileIoLatencyTimeSeriesItem public decimal WriteLatencyMs { get; set; } public long ReadCount { get; set; } public long WriteCount { get; set; } + public decimal ReadQueuedLatencyMs { get; set; } + public decimal WriteQueuedLatencyMs { get; set; } + public decimal ReadThroughputMbPerSec { get; set; } + public decimal WriteThroughputMbPerSec { get; set; } } } diff --git a/Dashboard/Services/DatabaseService.ResourceMetrics.cs b/Dashboard/Services/DatabaseService.ResourceMetrics.cs index 5bead59f..a6b2bc23 100644 --- a/Dashboard/Services/DatabaseService.ResourceMetrics.cs +++ b/Dashboard/Services/DatabaseService.ResourceMetrics.cs @@ -1379,6 +1379,18 @@ WHEN ISNULL(fio.num_of_writes_delta, 0) > 0 THEN CONVERT(decimal(19,2), fio.io_stall_write_ms_delta * 1.0 / fio.num_of_writes_delta) ELSE 0 END, + read_queued_latency_ms = + CASE + WHEN ISNULL(fio.num_of_reads_delta, 0) > 0 + THEN CONVERT(decimal(19,2), ISNULL(fio.io_stall_queued_read_ms_delta, 0) * 1.0 / fio.num_of_reads_delta) + ELSE 0 + END, + write_queued_latency_ms = + CASE + WHEN ISNULL(fio.num_of_writes_delta, 0) > 0 + THEN CONVERT(decimal(19,2), ISNULL(fio.io_stall_queued_write_ms_delta, 0) * 1.0 / fio.num_of_writes_delta) + ELSE 0 + END, read_count = ISNULL(fio.num_of_reads_delta, 0), write_count = ISNULL(fio.num_of_writes_delta, 0) FROM collect.file_io_stats AS fio @@ -1392,10 +1404,10 @@ ORDER BY fio.collection_time, fio.database_name, fio.file_name;"; - + using var command = new SqlCommand(query, connection); command.CommandTimeout = 120; - + if (fromDate.HasValue && toDate.HasValue) { command.Parameters.Add(new SqlParameter("@fromDate", SqlDbType.DateTime2) { Value = fromDate.Value }); @@ -1405,7 +1417,7 @@ ORDER BY { command.Parameters.Add(new SqlParameter("@hoursBack", SqlDbType.Int) { Value = hoursBack }); } - + using var reader = await command.ExecuteReaderAsync(); while (await reader.ReadAsync()) { @@ -1417,11 +1429,124 @@ ORDER BY FileType = reader.IsDBNull(3) ? string.Empty : reader.GetString(3), ReadLatencyMs = reader.IsDBNull(4) ? 0m : reader.GetDecimal(4), WriteLatencyMs = reader.IsDBNull(5) ? 0m : reader.GetDecimal(5), + ReadQueuedLatencyMs = reader.IsDBNull(6) ? 0m : reader.GetDecimal(6), + WriteQueuedLatencyMs = reader.IsDBNull(7) ? 0m : reader.GetDecimal(7), + ReadCount = reader.IsDBNull(8) ? 0 : reader.GetInt64(8), + WriteCount = reader.IsDBNull(9) ? 0 : reader.GetInt64(9) + }); + } + + return items; + } + + public async Task> GetFileIoThroughputTimeSeriesAsync(bool isTempDb, int hoursBack = 24, DateTime? fromDate = null, DateTime? toDate = null) + { + var items = new List(); + + await using var tc = await OpenThrottledConnectionAsync(); + var connection = tc.Connection; + + string dateFilter = fromDate.HasValue && toDate.HasValue + ? "AND fio.collection_time >= @fromDate AND fio.collection_time <= @toDate" + : "AND fio.collection_time >= DATEADD(HOUR, -@hoursBack, SYSDATETIME())"; + + string dbFilter = isTempDb + ? "AND fio.database_name = N'tempdb'" + : "AND fio.database_name <> N'tempdb'"; + + string query = $@" + SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + + WITH + file_avg_throughput AS + ( + SELECT + fio.database_name, + fio.file_name, + fio.file_type_desc, + total_bytes = SUM(ISNULL(fio.num_of_bytes_read_delta, 0)) + SUM(ISNULL(fio.num_of_bytes_written_delta, 0)), + total_io = SUM(ISNULL(fio.num_of_reads_delta, 0)) + SUM(ISNULL(fio.num_of_writes_delta, 0)) + FROM collect.file_io_stats AS fio + WHERE fio.database_name IS NOT NULL + {dbFilter} + {dateFilter} + GROUP BY + fio.database_name, + fio.file_name, + fio.file_type_desc + HAVING + SUM(ISNULL(fio.num_of_bytes_read_delta, 0)) + SUM(ISNULL(fio.num_of_bytes_written_delta, 0)) > 0 + ), + top_files AS + ( + SELECT TOP (10) + fat.database_name, + fat.file_name, + fat.file_type_desc + FROM file_avg_throughput AS fat + ORDER BY + fat.total_bytes DESC + ) + SELECT + fio.collection_time, + fio.database_name, + fio.file_name, + fio.file_type_desc, + read_throughput_mb_per_sec = + CASE + WHEN ISNULL(fio.sample_ms_delta, 0) > 0 + THEN CONVERT(decimal(19,4), fio.num_of_bytes_read_delta * 1000.0 / fio.sample_ms_delta / 1048576.0) + ELSE 0 + END, + write_throughput_mb_per_sec = + CASE + WHEN ISNULL(fio.sample_ms_delta, 0) > 0 + THEN CONVERT(decimal(19,4), fio.num_of_bytes_written_delta * 1000.0 / fio.sample_ms_delta / 1048576.0) + ELSE 0 + END, + read_count = ISNULL(fio.num_of_reads_delta, 0), + write_count = ISNULL(fio.num_of_writes_delta, 0) + FROM collect.file_io_stats AS fio + JOIN top_files AS tf + ON tf.database_name = fio.database_name + AND tf.file_name = fio.file_name + WHERE fio.database_name IS NOT NULL + {dbFilter} + {dateFilter} + ORDER BY + fio.collection_time, + fio.database_name, + fio.file_name;"; + + using var command = new SqlCommand(query, connection); + command.CommandTimeout = 120; + + if (fromDate.HasValue && toDate.HasValue) + { + command.Parameters.Add(new SqlParameter("@fromDate", SqlDbType.DateTime2) { Value = fromDate.Value }); + command.Parameters.Add(new SqlParameter("@toDate", SqlDbType.DateTime2) { Value = toDate.Value }); + } + else + { + command.Parameters.Add(new SqlParameter("@hoursBack", SqlDbType.Int) { Value = hoursBack }); + } + + using var reader = await command.ExecuteReaderAsync(); + while (await reader.ReadAsync()) + { + items.Add(new FileIoLatencyTimeSeriesItem + { + CollectionTime = reader.GetDateTime(0), + DatabaseName = reader.IsDBNull(1) ? string.Empty : reader.GetString(1), + FileName = reader.IsDBNull(2) ? string.Empty : reader.GetString(2), + FileType = reader.IsDBNull(3) ? string.Empty : reader.GetString(3), + ReadThroughputMbPerSec = reader.IsDBNull(4) ? 0m : reader.GetDecimal(4), + WriteThroughputMbPerSec = reader.IsDBNull(5) ? 0m : reader.GetDecimal(5), ReadCount = reader.IsDBNull(6) ? 0 : reader.GetInt64(6), WriteCount = reader.IsDBNull(7) ? 0 : reader.GetInt64(7) }); } - + return items; } diff --git a/Lite/Controls/ServerTab.xaml b/Lite/Controls/ServerTab.xaml index d77dbb59..3a70620a 100644 --- a/Lite/Controls/ServerTab.xaml +++ b/Lite/Controls/ServerTab.xaml @@ -716,18 +716,39 @@ - - - - - - - - + + + + + + + + + + + + + + + + - - - + + + + + + + + + + + + + + + + diff --git a/Lite/Controls/ServerTab.xaml.cs b/Lite/Controls/ServerTab.xaml.cs index 4700c556..a8801442 100644 --- a/Lite/Controls/ServerTab.xaml.cs +++ b/Lite/Controls/ServerTab.xaml.cs @@ -49,6 +49,8 @@ public partial class ServerTab : UserControl private Helpers.ChartHoverHelper? _tempDbFileIoHover; private Helpers.ChartHoverHelper? _fileIoReadHover; private Helpers.ChartHoverHelper? _fileIoWriteHover; + private Helpers.ChartHoverHelper? _fileIoReadThroughputHover; + private Helpers.ChartHoverHelper? _fileIoWriteThroughputHover; private Helpers.ChartHoverHelper? _collectorDurationHover; private Helpers.ChartHoverHelper? _queryDurationTrendHover; private Helpers.ChartHoverHelper? _procDurationTrendHover; @@ -163,6 +165,8 @@ public ServerTab(ServerConnection server, DuckDbInitializer duckDb, CredentialSe _tempDbFileIoHover = new Helpers.ChartHoverHelper(TempDbFileIoChart, "ms"); _fileIoReadHover = new Helpers.ChartHoverHelper(FileIoReadChart, "ms"); _fileIoWriteHover = new Helpers.ChartHoverHelper(FileIoWriteChart, "ms"); + _fileIoReadThroughputHover = new Helpers.ChartHoverHelper(FileIoReadThroughputChart, "MB/s"); + _fileIoWriteThroughputHover = new Helpers.ChartHoverHelper(FileIoWriteThroughputChart, "MB/s"); _collectorDurationHover = new Helpers.ChartHoverHelper(CollectorDurationChart, "ms"); _queryDurationTrendHover = new Helpers.ChartHoverHelper(QueryDurationTrendChart, "ms/sec"); _procDurationTrendHover = new Helpers.ChartHoverHelper(ProcDurationTrendChart, "ms/sec"); @@ -475,6 +479,7 @@ private async System.Threading.Tasks.Task RefreshAllDataAsync() var procStatsTask = _dataService.GetTopProceduresByCpuAsync(_serverId, hoursBack, 50, fromDate, toDate, UtcOffsetMinutes); var fileIoTask = _dataService.GetLatestFileIoStatsAsync(_serverId); var fileIoTrendTask = _dataService.GetFileIoLatencyTrendAsync(_serverId, hoursBack, fromDate, toDate); + var fileIoThroughputTask = _dataService.GetFileIoThroughputTrendAsync(_serverId, hoursBack, fromDate, toDate); var tempDbTask = _dataService.GetTempDbTrendAsync(_serverId, hoursBack, fromDate, toDate); var tempDbFileIoTask = _dataService.GetTempDbFileIoTrendAsync(_serverId, hoursBack, fromDate, toDate); var deadlockTask = _dataService.GetRecentDeadlocksAsync(_serverId, hoursBack, fromDate, toDate); @@ -496,7 +501,7 @@ private async System.Threading.Tasks.Task RefreshAllDataAsync() /* Core data tasks */ await System.Threading.Tasks.Task.WhenAll( snapshotsTask, cpuTask, memoryTask, memoryTrendTask, - queryStatsTask, procStatsTask, fileIoTask, fileIoTrendTask, tempDbTask, tempDbFileIoTask, + queryStatsTask, procStatsTask, fileIoTask, fileIoTrendTask, fileIoThroughputTask, tempDbTask, tempDbFileIoTask, deadlockTask, blockedProcessTask, waitTypesTask, memoryClerkTypesTask, perfmonCountersTask, queryStoreTask, memoryGrantTrendTask, memoryGrantChartTask, serverConfigTask, databaseConfigTask, databaseScopedConfigTask, traceFlagsTask, @@ -560,6 +565,7 @@ await System.Threading.Tasks.Task.WhenAll( UpdateTempDbChart(tempDbTask.Result); UpdateTempDbFileIoChart(tempDbFileIoTask.Result); UpdateFileIoCharts(fileIoTrendTask.Result); + UpdateFileIoThroughputCharts(fileIoThroughputTask.Result); UpdateLockWaitTrendChart(lockWaitTrendTask.Result, hoursBack, fromDate, toDate); UpdateBlockingTrendChart(blockingTrendTask.Result, hoursBack, fromDate, toDate); UpdateDeadlockTrendChart(deadlockTrendTask.Result, hoursBack, fromDate, toDate); @@ -917,16 +923,18 @@ private void UpdateFileIoCharts(List data) if (data.Count == 0) { FileIoReadChart.Refresh(); FileIoWriteChart.Refresh(); return; } - /* Group by database, limit to top 12 by total stall */ + /* Group by file, limit to top 10 by total stall */ var databases = data - .GroupBy(d => d.DatabaseName) + .GroupBy(d => $"{d.DatabaseName}.{d.FileName}") .OrderByDescending(g => g.Sum(d => d.AvgReadLatencyMs + d.AvgWriteLatencyMs)) - .Take(12) + .Take(10) .ToList(); double readMax = 0, writeMax = 0; int colorIdx = 0; + bool hasQueuedData = data.Any(d => d.AvgQueuedReadLatencyMs > 0 || d.AvgQueuedWriteLatencyMs > 0); + foreach (var dbGroup in databases) { var points = dbGroup.OrderBy(d => d.CollectionTime).ToList(); @@ -953,6 +961,31 @@ private void UpdateFileIoCharts(List data) _fileIoWriteHover?.Add(writePlot, dbGroup.Key); writeMax = Math.Max(writeMax, writeLatency.Max()); } + + /* Queued I/O overlay — dashed lines showing queue wait portion of latency */ + if (hasQueuedData) + { + var queuedReadLatency = points.Select(d => d.AvgQueuedReadLatencyMs).ToArray(); + var queuedWriteLatency = points.Select(d => d.AvgQueuedWriteLatencyMs).ToArray(); + + if (queuedReadLatency.Any(v => v > 0)) + { + var qReadPlot = FileIoReadChart.Plot.Add.Scatter(times, queuedReadLatency); + qReadPlot.LegendText = $"{dbGroup.Key} (queued)"; + qReadPlot.Color = color; + qReadPlot.LinePattern = ScottPlot.LinePattern.Dashed; + _fileIoReadHover?.Add(qReadPlot, $"{dbGroup.Key} (queued)"); + } + + if (queuedWriteLatency.Any(v => v > 0)) + { + var qWritePlot = FileIoWriteChart.Plot.Add.Scatter(times, queuedWriteLatency); + qWritePlot.LegendText = $"{dbGroup.Key} (queued)"; + qWritePlot.Color = color; + qWritePlot.LinePattern = ScottPlot.LinePattern.Dashed; + _fileIoWriteHover?.Add(qWritePlot, $"{dbGroup.Key} (queued)"); + } + } } FileIoReadChart.Plot.Axes.DateTimeTicksBottom(); @@ -970,6 +1003,70 @@ private void UpdateFileIoCharts(List data) FileIoWriteChart.Refresh(); } + private void UpdateFileIoThroughputCharts(List data) + { + ClearChart(FileIoReadThroughputChart); + ClearChart(FileIoWriteThroughputChart); + _fileIoReadThroughputHover?.Clear(); + _fileIoWriteThroughputHover?.Clear(); + ApplyDarkTheme(FileIoReadThroughputChart); + ApplyDarkTheme(FileIoWriteThroughputChart); + + if (data.Count == 0) { FileIoReadThroughputChart.Refresh(); FileIoWriteThroughputChart.Refresh(); return; } + + /* Group by file label, limit to top 10 by total throughput */ + var files = data + .GroupBy(d => d.FileLabel) + .OrderByDescending(g => g.Sum(d => d.ReadMbPerSec + d.WriteMbPerSec)) + .Take(10) + .ToList(); + + double readMax = 0, writeMax = 0; + int colorIdx = 0; + + foreach (var fileGroup in files) + { + var points = fileGroup.OrderBy(d => d.CollectionTime).ToList(); + var times = points.Select(d => d.CollectionTime.AddMinutes(UtcOffsetMinutes).ToOADate()).ToArray(); + var readThroughput = points.Select(d => d.ReadMbPerSec).ToArray(); + var writeThroughput = points.Select(d => d.WriteMbPerSec).ToArray(); + var color = ScottPlot.Color.FromHex(SeriesColors[colorIdx % SeriesColors.Length]); + colorIdx++; + + if (readThroughput.Length > 0) + { + var readPlot = FileIoReadThroughputChart.Plot.Add.Scatter(times, readThroughput); + readPlot.LegendText = fileGroup.Key; + readPlot.Color = color; + _fileIoReadThroughputHover?.Add(readPlot, fileGroup.Key); + readMax = Math.Max(readMax, readThroughput.Max()); + } + + if (writeThroughput.Length > 0) + { + var writePlot = FileIoWriteThroughputChart.Plot.Add.Scatter(times, writeThroughput); + writePlot.LegendText = fileGroup.Key; + writePlot.Color = color; + _fileIoWriteThroughputHover?.Add(writePlot, fileGroup.Key); + writeMax = Math.Max(writeMax, writeThroughput.Max()); + } + } + + FileIoReadThroughputChart.Plot.Axes.DateTimeTicksBottom(); + ReapplyAxisColors(FileIoReadThroughputChart); + FileIoReadThroughputChart.Plot.YLabel("Read Throughput (MB/s)"); + SetChartYLimitsWithLegendPadding(FileIoReadThroughputChart, 0, readMax > 0 ? readMax : 1); + ShowChartLegend(FileIoReadThroughputChart); + FileIoReadThroughputChart.Refresh(); + + FileIoWriteThroughputChart.Plot.Axes.DateTimeTicksBottom(); + ReapplyAxisColors(FileIoWriteThroughputChart); + FileIoWriteThroughputChart.Plot.YLabel("Write Throughput (MB/s)"); + SetChartYLimitsWithLegendPadding(FileIoWriteThroughputChart, 0, writeMax > 0 ? writeMax : 1); + ShowChartLegend(FileIoWriteThroughputChart); + FileIoWriteThroughputChart.Refresh(); + } + /* ========== Blocking/Deadlock Trend Charts ========== */ private void UpdateLockWaitTrendChart(List data, int hoursBack, DateTime? fromDate, DateTime? toDate) diff --git a/Lite/Database/DuckDbInitializer.cs b/Lite/Database/DuckDbInitializer.cs index ac064de5..57121939 100644 --- a/Lite/Database/DuckDbInitializer.cs +++ b/Lite/Database/DuckDbInitializer.cs @@ -68,7 +68,7 @@ public void Dispose() /// /// Current schema version. Increment this when schema changes require table rebuilds. /// - internal const int CurrentSchemaVersion = 14; + internal const int CurrentSchemaVersion = 15; private readonly string _archivePath; @@ -463,6 +463,15 @@ Must drop/recreate because column layout is completely different. */ _logger?.LogInformation("Running migration to v14: rebuilding memory_grant_stats for resource semaphore schema"); await ExecuteNonQueryAsync(connection, "DROP TABLE IF EXISTS memory_grant_stats"); } + + if (fromVersion < 15) + { + /* v15: Added queued I/O columns (io_stall_queued_read_ms, io_stall_queued_write_ms) + and their delta counterparts to file_io_stats for latency overlay charts. + Must drop/recreate because DuckDB appender writes by position. */ + _logger?.LogInformation("Running migration to v15: rebuilding file_io_stats for queued I/O columns"); + await ExecuteNonQueryAsync(connection, "DROP TABLE IF EXISTS file_io_stats"); + } } /// diff --git a/Lite/Database/Schema.cs b/Lite/Database/Schema.cs index 2723e099..af4f3912 100644 --- a/Lite/Database/Schema.cs +++ b/Lite/Database/Schema.cs @@ -148,12 +148,16 @@ size_mb DECIMAL(18,2), write_bytes BIGINT, io_stall_read_ms BIGINT, io_stall_write_ms BIGINT, + io_stall_queued_read_ms BIGINT, + io_stall_queued_write_ms BIGINT, delta_reads BIGINT, delta_writes BIGINT, delta_read_bytes BIGINT, delta_write_bytes BIGINT, delta_stall_read_ms BIGINT, - delta_stall_write_ms BIGINT + delta_stall_write_ms BIGINT, + delta_stall_queued_read_ms BIGINT, + delta_stall_queued_write_ms BIGINT )"; public const string CreateMemoryStatsTable = @" diff --git a/Lite/Services/LocalDataService.FileIo.cs b/Lite/Services/LocalDataService.FileIo.cs index 3b1dfbe0..15cd4109 100644 --- a/Lite/Services/LocalDataService.FileIo.cs +++ b/Lite/Services/LocalDataService.FileIo.cs @@ -66,7 +66,7 @@ FROM v_file_io_stats } /// - /// Gets file I/O latency trend data broken down by database for charting. + /// Gets file I/O latency trend data broken down by file for charting (top 10 files by I/O activity). /// public async Task> GetFileIoLatencyTrendAsync(int serverId, int hoursBack = 24, DateTime? fromDate = null, DateTime? toDate = null) { @@ -76,17 +76,40 @@ public async Task> GetFileIoLatencyTrendAsync(int serverI var (startTime, endTime) = GetTimeRange(hoursBack, fromDate, toDate); command.CommandText = @" +WITH top_files AS ( + SELECT database_name, file_name + FROM v_file_io_stats + WHERE server_id = $1 + AND collection_time >= $2 + AND collection_time <= $3 + AND (delta_reads > 0 OR delta_writes > 0) + GROUP BY database_name, file_name + ORDER BY SUM(delta_reads + delta_writes) DESC + LIMIT 10 +) SELECT - collection_time, - database_name, - CASE WHEN SUM(delta_reads) > 0 THEN SUM(CAST(delta_stall_read_ms AS DOUBLE)) / SUM(delta_reads) ELSE 0 END AS avg_read_latency_ms, - CASE WHEN SUM(delta_writes) > 0 THEN SUM(CAST(delta_stall_write_ms AS DOUBLE)) / SUM(delta_writes) ELSE 0 END AS avg_write_latency_ms -FROM v_file_io_stats -WHERE server_id = $1 -AND collection_time >= $2 -AND collection_time <= $3 -GROUP BY collection_time, database_name -ORDER BY collection_time, database_name"; + f.collection_time, + f.database_name, + f.file_name, + CASE WHEN SUM(f.delta_reads) > 0 + THEN SUM(CAST(f.delta_stall_read_ms AS DOUBLE)) / SUM(f.delta_reads) + ELSE 0 END AS avg_read_latency_ms, + CASE WHEN SUM(f.delta_writes) > 0 + THEN SUM(CAST(f.delta_stall_write_ms AS DOUBLE)) / SUM(f.delta_writes) + ELSE 0 END AS avg_write_latency_ms, + CASE WHEN SUM(f.delta_reads) > 0 + THEN SUM(CAST(COALESCE(f.delta_stall_queued_read_ms, 0) AS DOUBLE)) / SUM(f.delta_reads) + ELSE 0 END AS avg_queued_read_latency_ms, + CASE WHEN SUM(f.delta_writes) > 0 + THEN SUM(CAST(COALESCE(f.delta_stall_queued_write_ms, 0) AS DOUBLE)) / SUM(f.delta_writes) + ELSE 0 END AS avg_queued_write_latency_ms +FROM v_file_io_stats f +JOIN top_files tf ON tf.database_name = f.database_name AND tf.file_name = f.file_name +WHERE f.server_id = $1 +AND f.collection_time >= $2 +AND f.collection_time <= $3 +GROUP BY f.collection_time, f.database_name, f.file_name +ORDER BY f.collection_time, f.database_name, f.file_name"; command.Parameters.Add(new DuckDBParameter { Value = serverId }); command.Parameters.Add(new DuckDBParameter { Value = startTime }); @@ -100,8 +123,83 @@ FROM v_file_io_stats { CollectionTime = reader.GetDateTime(0), DatabaseName = reader.IsDBNull(1) ? "" : reader.GetString(1), - AvgReadLatencyMs = reader.IsDBNull(2) ? 0 : ToDouble(reader.GetValue(2)), - AvgWriteLatencyMs = reader.IsDBNull(3) ? 0 : ToDouble(reader.GetValue(3)) + FileName = reader.IsDBNull(2) ? "" : reader.GetString(2), + AvgReadLatencyMs = reader.IsDBNull(3) ? 0 : ToDouble(reader.GetValue(3)), + AvgWriteLatencyMs = reader.IsDBNull(4) ? 0 : ToDouble(reader.GetValue(4)), + AvgQueuedReadLatencyMs = reader.IsDBNull(5) ? 0 : ToDouble(reader.GetValue(5)), + AvgQueuedWriteLatencyMs = reader.IsDBNull(6) ? 0 : ToDouble(reader.GetValue(6)) + }); + } + + return items; + } + + /// + /// Gets file I/O throughput trend data (MB/s) broken down by file for charting. + /// Uses LAG() window function to compute collection interval for per-second calculation. + /// + public async Task> GetFileIoThroughputTrendAsync(int serverId, int hoursBack = 24, DateTime? fromDate = null, DateTime? toDate = null) + { + using var connection = await OpenConnectionAsync(); + using var command = connection.CreateCommand(); + + var (startTime, endTime) = GetTimeRange(hoursBack, fromDate, toDate); + + command.CommandText = @" +WITH top_files AS ( + SELECT database_name, file_name + FROM v_file_io_stats + WHERE server_id = $1 + AND collection_time >= $2 + AND collection_time <= $3 + AND (delta_read_bytes > 0 OR delta_write_bytes > 0) + GROUP BY database_name, file_name + ORDER BY SUM(delta_read_bytes + delta_write_bytes) DESC + LIMIT 10 +), +with_interval AS ( + SELECT + f.collection_time, + f.database_name || '.' || f.file_name AS file_label, + f.delta_read_bytes, + f.delta_write_bytes, + EXTRACT(EPOCH FROM (f.collection_time - LAG(f.collection_time) OVER ( + PARTITION BY f.server_id, f.database_name, f.file_name + ORDER BY f.collection_time + ))) AS interval_seconds + FROM v_file_io_stats f + JOIN top_files tf ON tf.database_name = f.database_name AND tf.file_name = f.file_name + WHERE f.server_id = $1 + AND f.collection_time >= $2 + AND f.collection_time <= $3 +) +SELECT + collection_time, + file_label, + CASE WHEN interval_seconds > 0 + THEN CAST(delta_read_bytes AS DOUBLE) / interval_seconds / 1048576.0 + ELSE 0 END AS read_mb_per_sec, + CASE WHEN interval_seconds > 0 + THEN CAST(delta_write_bytes AS DOUBLE) / interval_seconds / 1048576.0 + ELSE 0 END AS write_mb_per_sec +FROM with_interval +WHERE interval_seconds IS NOT NULL AND interval_seconds > 0 +ORDER BY collection_time, file_label"; + + command.Parameters.Add(new DuckDBParameter { Value = serverId }); + command.Parameters.Add(new DuckDBParameter { Value = startTime }); + command.Parameters.Add(new DuckDBParameter { Value = endTime }); + + var items = new List(); + using var reader = await command.ExecuteReaderAsync(); + while (await reader.ReadAsync()) + { + items.Add(new FileIoThroughputPoint + { + CollectionTime = reader.GetDateTime(0), + FileLabel = reader.IsDBNull(1) ? "" : reader.GetString(1), + ReadMbPerSec = reader.IsDBNull(2) ? 0 : ToDouble(reader.GetValue(2)), + WriteMbPerSec = reader.IsDBNull(3) ? 0 : ToDouble(reader.GetValue(3)) }); } @@ -157,8 +255,19 @@ public class FileIoTrendPoint { public DateTime CollectionTime { get; set; } public string DatabaseName { get; set; } = ""; + public string FileName { get; set; } = ""; public double AvgReadLatencyMs { get; set; } public double AvgWriteLatencyMs { get; set; } + public double AvgQueuedReadLatencyMs { get; set; } + public double AvgQueuedWriteLatencyMs { get; set; } +} + +public class FileIoThroughputPoint +{ + public DateTime CollectionTime { get; set; } + public string FileLabel { get; set; } = ""; + public double ReadMbPerSec { get; set; } + public double WriteMbPerSec { get; set; } } public class FileIoRow diff --git a/Lite/Services/RemoteCollectorService.FileIo.cs b/Lite/Services/RemoteCollectorService.FileIo.cs index 1681624a..f7438627 100644 --- a/Lite/Services/RemoteCollectorService.FileIo.cs +++ b/Lite/Services/RemoteCollectorService.FileIo.cs @@ -47,6 +47,8 @@ Azure MI (edition 8) HAS sys.master_files and behaves like on-prem. */ write_bytes = vfs.num_of_bytes_written, io_stall_read_ms = vfs.io_stall_read_ms, io_stall_write_ms = vfs.io_stall_write_ms, + io_stall_queued_read_ms = vfs.io_stall_queued_read_ms, + io_stall_queued_write_ms = vfs.io_stall_queued_write_ms, database_id = vfs.database_id, file_id = vfs.file_id FROM sys.dm_io_virtual_file_stats(DB_ID(), NULL) AS vfs @@ -68,6 +70,8 @@ LEFT JOIN sys.database_files AS df write_bytes = vfs.num_of_bytes_written, io_stall_read_ms = vfs.io_stall_read_ms, io_stall_write_ms = vfs.io_stall_write_ms, + io_stall_queued_read_ms = vfs.io_stall_queued_read_ms, + io_stall_queued_write_ms = vfs.io_stall_queued_write_ms, database_id = vfs.database_id, file_id = vfs.file_id FROM sys.dm_io_virtual_file_stats(NULL, NULL) AS vfs @@ -96,7 +100,8 @@ AND vfs.database_id < 32761 var fileStats = new List<( string DatabaseName, string FileName, string FileType, string PhysicalName, decimal SizeMb, long NumOfReads, long NumOfWrites, long ReadBytes, long WriteBytes, - long IoStallReadMs, long IoStallWriteMs, int DatabaseId, int FileId)>(); + long IoStallReadMs, long IoStallWriteMs, long IoStallQueuedReadMs, long IoStallQueuedWriteMs, + int DatabaseId, int FileId)>(); while (await reader.ReadAsync(cancellationToken)) { @@ -112,8 +117,10 @@ AND vfs.database_id < 32761 WriteBytes: reader.IsDBNull(8) ? 0L : reader.GetInt64(8), IoStallReadMs: reader.IsDBNull(9) ? 0L : reader.GetInt64(9), IoStallWriteMs: reader.IsDBNull(10) ? 0L : reader.GetInt64(10), - DatabaseId: reader.IsDBNull(11) ? 0 : Convert.ToInt32(reader.GetValue(11)), - FileId: reader.IsDBNull(12) ? 0 : Convert.ToInt32(reader.GetValue(12)) + IoStallQueuedReadMs: reader.IsDBNull(11) ? 0L : reader.GetInt64(11), + IoStallQueuedWriteMs: reader.IsDBNull(12) ? 0L : reader.GetInt64(12), + DatabaseId: reader.IsDBNull(13) ? 0 : Convert.ToInt32(reader.GetValue(13)), + FileId: reader.IsDBNull(14) ? 0 : Convert.ToInt32(reader.GetValue(14)) )); } sqlSw.Stop(); @@ -136,6 +143,8 @@ AND vfs.database_id < 32761 var deltaWriteBytes = _deltaCalculator.CalculateDelta(serverId, "file_io_write_bytes", deltaKey, stat.WriteBytes); var deltaStallReadMs = _deltaCalculator.CalculateDelta(serverId, "file_io_stall_read", deltaKey, stat.IoStallReadMs); var deltaStallWriteMs = _deltaCalculator.CalculateDelta(serverId, "file_io_stall_write", deltaKey, stat.IoStallWriteMs); + var deltaStallQueuedReadMs = _deltaCalculator.CalculateDelta(serverId, "file_io_stall_queued_read", deltaKey, stat.IoStallQueuedReadMs); + var deltaStallQueuedWriteMs = _deltaCalculator.CalculateDelta(serverId, "file_io_stall_queued_write", deltaKey, stat.IoStallQueuedWriteMs); var row = appender.CreateRow(); row.AppendValue(GenerateCollectionId()) @@ -153,12 +162,16 @@ AND vfs.database_id < 32761 .AppendValue(stat.WriteBytes) .AppendValue(stat.IoStallReadMs) .AppendValue(stat.IoStallWriteMs) + .AppendValue(stat.IoStallQueuedReadMs) + .AppendValue(stat.IoStallQueuedWriteMs) .AppendValue(deltaReads) .AppendValue(deltaWrites) .AppendValue(deltaReadBytes) .AppendValue(deltaWriteBytes) .AppendValue(deltaStallReadMs) .AppendValue(deltaStallWriteMs) + .AppendValue(deltaStallQueuedReadMs) + .AppendValue(deltaStallQueuedWriteMs) .EndRow(); rowsCollected++; diff --git a/install/50_configuration_issues_analyzer.sql b/install/50_configuration_issues_analyzer.sql index ae114a8a..5b9e6de6 100644 --- a/install/50_configuration_issues_analyzer.sql +++ b/install/50_configuration_issues_analyzer.sql @@ -385,7 +385,7 @@ BEGIN message, investigate_query ) - /*Available memory pressure warning*/ + /*Available memory pressure warning — low available memory for grants (<100MB)*/ SELECT severity = N'WARNING', problem_area = N'Memory Grant Pressure', @@ -397,7 +397,8 @@ BEGIN N'. Available: ' + CONVERT(nvarchar(20), mgs.available_memory_mb) + N' MB.', investigate_query = N'SELECT * FROM collect.memory_grant_stats WHERE collection_time >= DATEADD(HOUR, -1, SYSDATETIME()) ORDER BY collection_time DESC;' FROM collect.memory_grant_stats AS mgs - WHERE mgs.available_memory_pressure_warning = 1 + WHERE mgs.available_memory_mb < 100 + AND mgs.granted_memory_mb > 0 AND mgs.collection_time >= DATEADD(MINUTE, -5, SYSDATETIME()) AND NOT EXISTS ( @@ -412,7 +413,7 @@ BEGIN UNION ALL - /*Waiter count warning*/ + /*Waiter count warning — high number of queries waiting for memory grants (>10)*/ SELECT severity = N'WARNING', problem_area = N'Memory Grant Pressure', @@ -424,7 +425,7 @@ BEGIN N'. Waiters: ' + CONVERT(nvarchar(10), mgs.waiter_count) + N'.', investigate_query = N'SELECT * FROM collect.memory_grant_stats WHERE collection_time >= DATEADD(HOUR, -1, SYSDATETIME()) ORDER BY collection_time DESC;' FROM collect.memory_grant_stats AS mgs - WHERE mgs.waiter_count_warning = 1 + WHERE mgs.waiter_count > 10 AND mgs.collection_time >= DATEADD(MINUTE, -5, SYSDATETIME()) AND NOT EXISTS ( @@ -439,7 +440,7 @@ BEGIN UNION ALL - /*Timeout error warning - CRITICAL severity*/ + /*Timeout error warning — queries timing out waiting for memory (delta > 0), CRITICAL severity*/ SELECT severity = N'CRITICAL', problem_area = N'Memory Grant Pressure', @@ -448,10 +449,10 @@ BEGIN message = N'CRITICAL: Queries timing out waiting for memory grants at ' + CONVERT(nvarchar(30), mgs.collection_time, 121) + - N'. Timeout errors: ' + CONVERT(nvarchar(10), mgs.timeout_error_count) + N'.', + N'. Timeout errors: ' + CONVERT(nvarchar(10), ISNULL(mgs.timeout_error_count_delta, 0)) + N'.', investigate_query = N'SELECT * FROM collect.memory_grant_stats WHERE collection_time >= DATEADD(HOUR, -1, SYSDATETIME()) ORDER BY collection_time DESC;' FROM collect.memory_grant_stats AS mgs - WHERE mgs.timeout_error_warning = 1 + WHERE ISNULL(mgs.timeout_error_count_delta, 0) > 0 AND mgs.collection_time >= DATEADD(MINUTE, -5, SYSDATETIME()) AND NOT EXISTS ( @@ -466,7 +467,7 @@ BEGIN UNION ALL - /*Forced grant warning*/ + /*Forced grant warning — queries forced to run with insufficient memory (delta > 0)*/ SELECT severity = N'WARNING', problem_area = N'Memory Grant Pressure', @@ -475,11 +476,11 @@ BEGIN message = N'Queries forced to run with insufficient memory at ' + CONVERT(nvarchar(30), mgs.collection_time, 121) + - N'. Forced grants: ' + CONVERT(nvarchar(10), mgs.forced_grant_count) + + N'. Forced grants: ' + CONVERT(nvarchar(10), ISNULL(mgs.forced_grant_count_delta, 0)) + N'. These queries will spill to tempdb.', investigate_query = N'SELECT * FROM collect.memory_grant_stats WHERE collection_time >= DATEADD(HOUR, -1, SYSDATETIME()) ORDER BY collection_time DESC;' FROM collect.memory_grant_stats AS mgs - WHERE mgs.forced_grant_warning = 1 + WHERE ISNULL(mgs.forced_grant_count_delta, 0) > 0 AND mgs.collection_time >= DATEADD(MINUTE, -5, SYSDATETIME()) AND NOT EXISTS ( diff --git a/install/97_test_procedures.sql b/install/97_test_procedures.sql index e7d25c9f..fac3b155 100644 --- a/install/97_test_procedures.sql +++ b/install/97_test_procedures.sql @@ -278,10 +278,10 @@ INSERT INTO waiter_count, timeout_error_count, forced_grant_count, - available_memory_pressure_warning, - waiter_count_warning, - timeout_error_warning, - forced_grant_warning + server_start_time, + timeout_error_count_delta, + forced_grant_count_delta, + sample_interval_seconds ) VALUES ( @@ -298,10 +298,10 @@ VALUES 25, /* High waiter count */ 10, /* Timeouts occurring */ 5, /* Forced grants */ - 1, /* Memory pressure warning */ - 1, /* Waiter warning */ - 1, /* Timeout warning */ - 1 /* Forced grant warning */ + @test_collection_time, /* server_start_time */ + 10, /* timeout_error_count_delta */ + 5, /* forced_grant_count_delta */ + 30 /* sample_interval_seconds */ ); PRINT '✓ Test 5 data created: Memory grant pressure > 20% threshold'; From 02296a4d2a1d9a4c95d5d156346bf784f407e6d2 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Wed, 25 Feb 2026 12:28:54 -0500 Subject: [PATCH 2/2] Issue #281 Gap 4: Remove session_wait_stats (zero UI, full removal) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove collect.session_wait_stats entirely — table, collector procedure, reporting view, schedule entries, schema metadata, CI checks, and all references. This collector had 12 columns and zero UI in either app. Adds cleanup DROPs to 02_create_tables.sql so existing installations get cleaned up on upgrade (drops view, procedure, table, schedule row). Co-Authored-By: Claude Opus 4.6 --- .github/sql/ci_validate_installation.sql | 6 +- Dashboard/schema/tables.json | 21 -- install/02_create_tables.sql | 52 +---- install/03_create_config_tables.sql | 1 - install/04_create_schedule_table.sql | 1 - install/06_ensure_collection_table.sql | 34 +-- install/40_collect_session_wait_stats.sql | 243 ---------------------- install/41_schedule_management.sql | 3 - install/42_scheduled_master_collector.sql | 4 - install/47_create_reporting_views.sql | 113 ---------- install/98_validate_installation.sql | 24 --- install/99_user_troubleshooting.sql | 24 --- 12 files changed, 12 insertions(+), 514 deletions(-) delete mode 100644 install/40_collect_session_wait_stats.sql diff --git a/.github/sql/ci_validate_installation.sql b/.github/sql/ci_validate_installation.sql index 77187200..0761f80a 100644 --- a/.github/sql/ci_validate_installation.sql +++ b/.github/sql/ci_validate_installation.sql @@ -30,7 +30,7 @@ IF SCHEMA_ID(N'report') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: sche PRINT ''; /* -Procedures in collect schema (37) +Procedures in collect schema (36) */ PRINT 'Checking collect procedures...'; @@ -65,7 +65,6 @@ IF OBJECT_ID(N'collect.tempdb_stats_collector', N'P') IS NULL BEGIN SE IF OBJECT_ID(N'collect.plan_cache_stats_collector', N'P') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: collect.plan_cache_stats_collector'; END; SET @checked += 1; IF OBJECT_ID(N'collect.session_stats_collector', N'P') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: collect.session_stats_collector'; END; SET @checked += 1; IF OBJECT_ID(N'collect.waiting_tasks_collector', N'P') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: collect.waiting_tasks_collector'; END; SET @checked += 1; -IF OBJECT_ID(N'collect.session_wait_stats_collector', N'P') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: collect.session_wait_stats_collector'; END; SET @checked += 1; IF OBJECT_ID(N'collect.server_configuration_collector', N'P') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: collect.server_configuration_collector'; END; SET @checked += 1; IF OBJECT_ID(N'collect.database_configuration_collector', N'P') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: collect.database_configuration_collector'; END; SET @checked += 1; IF OBJECT_ID(N'collect.configuration_issues_analyzer', N'P') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: collect.configuration_issues_analyzer'; END; SET @checked += 1; @@ -142,7 +141,6 @@ IF OBJECT_ID(N'report.blocking_chain_analysis', N'V') IS NULL BEGIN IF OBJECT_ID(N'report.tempdb_contention_analysis', N'V') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: report.tempdb_contention_analysis'; END; SET @checked += 1; IF OBJECT_ID(N'report.parameter_sensitivity_detection', N'V') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: report.parameter_sensitivity_detection'; END; SET @checked += 1; IF OBJECT_ID(N'report.scheduler_cpu_analysis', N'V') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: report.scheduler_cpu_analysis'; END; SET @checked += 1; -IF OBJECT_ID(N'report.session_wait_analysis', N'V') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: report.session_wait_analysis'; END; SET @checked += 1; IF OBJECT_ID(N'report.critical_issues', N'V') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: report.critical_issues'; END; SET @checked += 1; IF OBJECT_ID(N'report.memory_usage_trends', N'V') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: report.memory_usage_trends'; END; SET @checked += 1; IF OBJECT_ID(N'report.running_jobs', N'V') IS NULL BEGIN SET @missing += 1; PRINT ' MISSING: report.running_jobs'; END; SET @checked += 1; @@ -182,7 +180,7 @@ WHERE OBJECT_SCHEMA_NAME(t.object_id) = N'config'; PRINT ' collect schema tables: ' + CONVERT(varchar(10), @collect_tables); PRINT ' config schema tables: ' + CONVERT(varchar(10), @config_tables); -IF @collect_tables < 20 BEGIN SET @missing += 1; PRINT ' MISSING: expected >= 20 collect tables, found ' + CONVERT(varchar(10), @collect_tables); END; SET @checked += 1; +IF @collect_tables < 19 BEGIN SET @missing += 1; PRINT ' MISSING: expected >= 20 collect tables, found ' + CONVERT(varchar(10), @collect_tables); END; SET @checked += 1; IF @config_tables < 5 BEGIN SET @missing += 1; PRINT ' MISSING: expected >= 5 config tables, found ' + CONVERT(varchar(10), @config_tables); END; SET @checked += 1; PRINT ''; diff --git a/Dashboard/schema/tables.json b/Dashboard/schema/tables.json index bbd88287..af9d0dbd 100644 --- a/Dashboard/schema/tables.json +++ b/Dashboard/schema/tables.json @@ -677,26 +677,5 @@ "row_count": { "type": "bigint", "nullable": true } }, "primary_key": ["collection_id"] - }, - "collect.session_wait_stats": { - "description": "Per-session wait statistics from sys.dm_exec_session_wait_stats (SQL 2016 SP1+)", - "columns": { - "collection_id": { "type": "bigint", "nullable": false, "identity": true }, - "collection_time": { "type": "datetime2(7)", "nullable": false }, - "session_id": { "type": "integer", "nullable": false }, - "wait_type": { "type": "nvarchar(60)", "nullable": false }, - "waiting_tasks_count": { "type": "bigint", "nullable": false }, - "wait_time_ms": { "type": "bigint", "nullable": false }, - "max_wait_time_ms": { "type": "bigint", "nullable": false }, - "signal_wait_time_ms": { "type": "bigint", "nullable": false }, - "database_id": { "type": "integer", "nullable": true }, - "database_name": { "type": "sysname", "nullable": true }, - "login_name": { "type": "nvarchar(128)", "nullable": true }, - "host_name": { "type": "nvarchar(128)", "nullable": true }, - "program_name": { "type": "nvarchar(128)", "nullable": true }, - "sql_handle": { "type": "varbinary(64)", "nullable": true }, - "query_text": { "type": "nvarchar(max)", "nullable": true } - }, - "primary_key": ["collection_id"] } } diff --git a/install/02_create_tables.sql b/install/02_create_tables.sql index 05c4b492..9907bc09 100644 --- a/install/02_create_tables.sql +++ b/install/02_create_tables.sql @@ -18,6 +18,15 @@ GO USE PerformanceMonitor; GO +/* +Cleanup: session_wait_stats removed in v1.4 +*/ +IF OBJECT_ID(N'report.session_wait_analysis', N'V') IS NOT NULL DROP VIEW report.session_wait_analysis; +IF OBJECT_ID(N'collect.session_wait_stats_collector', N'P') IS NOT NULL DROP PROCEDURE collect.session_wait_stats_collector; +IF OBJECT_ID(N'collect.session_wait_stats', N'U') IS NOT NULL DROP TABLE collect.session_wait_stats; +IF OBJECT_ID(N'config.collection_schedule', N'U') IS NOT NULL DELETE FROM config.collection_schedule WHERE collector_name = N'session_wait_stats_collector'; +GO + /* Collection tables for the 7 core collectors */ @@ -1277,49 +1286,6 @@ BEGIN PRINT 'Created collect.waiting_tasks table'; END; -/* -Table: collect.session_wait_stats -Purpose: Captures per-session wait statistics from sys.dm_exec_session_wait_stats -Collection Frequency: Every 60 seconds (alongside query snapshots) -Type: Snapshot (resets on session end) -Dependencies: None -Notes: Correlates with query_snapshots to show waits for specific sessions - Requires SQL Server 2016 SP1 or later -*/ -IF OBJECT_ID(N'collect.session_wait_stats', N'U') IS NULL -BEGIN - CREATE TABLE - collect.session_wait_stats - ( - collection_id bigint IDENTITY NOT NULL, - collection_time datetime2(7) NOT NULL - DEFAULT SYSDATETIME(), - session_id integer NOT NULL, - wait_type nvarchar(60) NOT NULL, - waiting_tasks_count bigint NOT NULL, - wait_time_ms bigint NOT NULL, - max_wait_time_ms bigint NOT NULL, - signal_wait_time_ms bigint NOT NULL, - /*Session context for correlation*/ - database_id integer NULL, - database_name sysname NULL, - login_name nvarchar(128) NULL, - host_name nvarchar(128) NULL, - program_name nvarchar(128) NULL, - /*Query context if executing*/ - sql_handle varbinary(64) NULL, - query_text nvarchar(max) NULL, - CONSTRAINT - PK_session_wait_stats - PRIMARY KEY CLUSTERED - (collection_time, collection_id) - WITH - (DATA_COMPRESSION = PAGE) - ); - - PRINT 'Created collect.session_wait_stats table'; -END; - /* Running Jobs Monitor (Point-in-Time Snapshot) Captures currently running SQL Agent jobs with historical duration comparison diff --git a/install/03_create_config_tables.sql b/install/03_create_config_tables.sql index aec89f1f..6fdb40d4 100644 --- a/install/03_create_config_tables.sql +++ b/install/03_create_config_tables.sql @@ -202,7 +202,6 @@ BEGIN (N'plan_cache_stats_collector', 1, 60, 5, 30, N'Plan cache composition statistics - single-use plans and plan cache bloat detection'), (N'session_stats_collector', 1, 5, 2, 30, N'Session and connection statistics - connection leaks and application patterns'), (N'waiting_tasks_collector', 1, 5, 2, 30, N'Currently waiting tasks - blocking chains and wait analysis'), - (N'session_wait_stats_collector', 1, 1, 2, 30, N'Per-session wait statistics - correlates waits with specific sessions/queries (requires SQL Server 2016 SP1+)'), (N'running_jobs_collector', 1, 5, 2, 7, N'Currently running SQL Agent jobs with historical duration comparison'); /* diff --git a/install/04_create_schedule_table.sql b/install/04_create_schedule_table.sql index d73c13b4..fa4e6390 100644 --- a/install/04_create_schedule_table.sql +++ b/install/04_create_schedule_table.sql @@ -74,7 +74,6 @@ FROM (N'plan_cache_stats_collector', 1, 5, 5, 30, N'Plan cache composition statistics - single-use plans and plan cache bloat detection'), (N'session_stats_collector', 1, 1, 2, 30, N'Session and connection statistics - connection leaks and application patterns'), (N'waiting_tasks_collector', 1, 1, 2, 30, N'Currently waiting tasks - blocking chains and wait analysis'), - (N'session_wait_stats_collector', 1, 1, 2, 30, N'Per-session wait statistics - correlates waits with specific sessions/queries (requires SQL Server 2016 SP1+)'), (N'running_jobs_collector', 1, 1, 2, 7, N'Currently running SQL Agent jobs with historical duration comparison') ) AS v (collector_name, enabled, frequency_minutes, max_duration_minutes, retention_days, description) WHERE NOT EXISTS diff --git a/install/06_ensure_collection_table.sql b/install/06_ensure_collection_table.sql index 905e62ef..fd0a2c8a 100644 --- a/install/06_ensure_collection_table.sql +++ b/install/06_ensure_collection_table.sql @@ -1054,38 +1054,6 @@ BEGIN (DATA_COMPRESSION = PAGE) ); - END; - ELSE IF @table_name = N'session_wait_stats' - BEGIN - CREATE TABLE - collect.session_wait_stats - ( - collection_id bigint IDENTITY NOT NULL, - collection_time datetime2(7) NOT NULL - DEFAULT SYSDATETIME(), - session_id integer NOT NULL, - wait_type nvarchar(60) NOT NULL, - waiting_tasks_count bigint NOT NULL, - wait_time_ms bigint NOT NULL, - max_wait_time_ms bigint NOT NULL, - signal_wait_time_ms bigint NOT NULL, - /*Session context for correlation*/ - database_id integer NULL, - database_name sysname NULL, - login_name nvarchar(128) NULL, - host_name nvarchar(128) NULL, - program_name nvarchar(128) NULL, - /*Query context if executing*/ - sql_handle varbinary(64) NULL, - query_text nvarchar(max) NULL, - CONSTRAINT - PK_session_wait_stats - PRIMARY KEY CLUSTERED - (collection_time, collection_id) - WITH - (DATA_COMPRESSION = PAGE) - ); - END; ELSE IF @table_name = N'running_jobs' BEGIN @@ -1117,7 +1085,7 @@ BEGIN END; ELSE BEGIN - SET @error_message = N'Unknown table name: ' + @table_name + N'. Valid table names are: wait_stats, query_stats, memory_stats, memory_pressure_events, deadlock_xml, blocked_process_xml, procedure_stats, query_snapshots, query_store_data, trace_analysis, default_trace_events, file_io_stats, memory_grant_stats, cpu_scheduler_stats, memory_clerks_stats, perfmon_stats, cpu_utilization_stats, blocking_deadlock_stats, latch_stats, spinlock_stats, tempdb_stats, plan_cache_stats, session_stats, waiting_tasks, session_wait_stats, running_jobs'; + SET @error_message = N'Unknown table name: ' + @table_name + N'. Valid table names are: wait_stats, query_stats, memory_stats, memory_pressure_events, deadlock_xml, blocked_process_xml, procedure_stats, query_snapshots, query_store_data, trace_analysis, default_trace_events, file_io_stats, memory_grant_stats, cpu_scheduler_stats, memory_clerks_stats, perfmon_stats, cpu_utilization_stats, blocking_deadlock_stats, latch_stats, spinlock_stats, tempdb_stats, plan_cache_stats, session_stats, waiting_tasks, running_jobs'; RAISERROR(@error_message, 16, 1); RETURN; END; diff --git a/install/40_collect_session_wait_stats.sql b/install/40_collect_session_wait_stats.sql deleted file mode 100644 index de667e7f..00000000 --- a/install/40_collect_session_wait_stats.sql +++ /dev/null @@ -1,243 +0,0 @@ -/* -Copyright 2026 Darling Data, LLC -https://www.erikdarling.com/ - -*/ - -SET ANSI_NULLS ON; -SET ANSI_PADDING ON; -SET ANSI_WARNINGS ON; -SET ARITHABORT ON; -SET CONCAT_NULL_YIELDS_NULL ON; -SET QUOTED_IDENTIFIER ON; -SET NUMERIC_ROUNDABORT OFF; -SET IMPLICIT_TRANSACTIONS OFF; -SET STATISTICS TIME, IO OFF; -GO - -USE PerformanceMonitor; -GO - -/* -Session Wait Stats Collector -Collects per-session wait statistics from sys.dm_exec_session_wait_stats -Allows correlation of waits to specific sessions/queries -Requires SQL Server 2016 SP1 or later -*/ - -IF OBJECT_ID(N'collect.session_wait_stats_collector', N'P') IS NULL -BEGIN - EXECUTE(N'CREATE PROCEDURE collect.session_wait_stats_collector AS RETURN 138;'); -END; -GO - -ALTER PROCEDURE - collect.session_wait_stats_collector -( - @min_wait_time_ms integer = 100, /*Minimum wait time to capture (reduces noise)*/ - @debug bit = 0 /*Print debugging information*/ -) -WITH RECOMPILE -AS -BEGIN - SET NOCOUNT ON; - SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - - DECLARE - @rows_collected bigint = 0, - @start_time datetime2(7) = SYSDATETIME(), - @error_message nvarchar(4000), - @sql_version integer; - - /* - Check SQL Server version - dm_exec_session_wait_stats requires 2016 SP1+ - */ - SELECT - @sql_version = CONVERT(integer, SERVERPROPERTY(N'ProductMajorVersion')); - - IF @sql_version < 13 - BEGIN - IF @debug = 1 - BEGIN - RAISERROR(N'session_wait_stats_collector requires SQL Server 2016 or later (current: %d)', 0, 1, @sql_version) WITH NOWAIT; - END; - - INSERT INTO - config.collection_log - ( - collector_name, - collection_status, - rows_collected, - duration_ms, - error_message - ) - VALUES - ( - N'session_wait_stats_collector', - N'SKIPPED', - 0, - DATEDIFF(MILLISECOND, @start_time, SYSDATETIME()), - N'Requires SQL Server 2016 SP1 or later' - ); - - RETURN; - END; - - BEGIN TRY - /* - Ensure target table exists - */ - IF OBJECT_ID(N'collect.session_wait_stats', N'U') IS NULL - BEGIN - INSERT INTO - config.collection_log - ( - collection_time, - collector_name, - collection_status, - rows_collected, - duration_ms, - error_message - ) - VALUES - ( - @start_time, - N'session_wait_stats_collector', - N'TABLE_MISSING', - 0, - 0, - N'Table collect.session_wait_stats does not exist' - ); - - RAISERROR(N'Table collect.session_wait_stats does not exist. Please run 02_create_tables.sql', 16, 1); - RETURN; - END; - - /* - Ensure config.ignored_wait_types exists - */ - IF OBJECT_ID(N'config.ignored_wait_types', N'U') IS NULL - OR NOT EXISTS (SELECT 1/0 FROM config.ignored_wait_types WHERE is_enabled = 1) - BEGIN - IF @debug = 1 - BEGIN - RAISERROR(N'config.ignored_wait_types table missing or empty - calling ensure_config_tables', 0, 1) WITH NOWAIT; - END; - - EXECUTE config.ensure_config_tables - @debug = @debug; - END; - - /* - Collect session wait statistics - Joins with sys.dm_exec_sessions for context - Joins with sys.dm_exec_requests for current query info - Filters out ignored wait types and low wait times - */ - INSERT INTO - collect.session_wait_stats - ( - session_id, - wait_type, - waiting_tasks_count, - wait_time_ms, - max_wait_time_ms, - signal_wait_time_ms, - database_id, - database_name, - login_name, - host_name, - program_name, - sql_handle, - query_text - ) - SELECT - session_id = sws.session_id, - wait_type = sws.wait_type, - waiting_tasks_count = sws.waiting_tasks_count, - wait_time_ms = sws.wait_time_ms, - max_wait_time_ms = sws.max_wait_time_ms, - signal_wait_time_ms = sws.signal_wait_time_ms, - database_id = s.database_id, - database_name = DB_NAME(s.database_id), - login_name = s.login_name, - host_name = s.host_name, - program_name = s.program_name, - sql_handle = r.sql_handle, - query_text = st.text - FROM sys.dm_exec_session_wait_stats AS sws - JOIN sys.dm_exec_sessions AS s - ON s.session_id = sws.session_id - LEFT JOIN sys.dm_exec_requests AS r - ON r.session_id = sws.session_id - OUTER APPLY sys.dm_exec_sql_text(r.sql_handle) AS st - WHERE sws.wait_time_ms >= @min_wait_time_ms - AND s.is_user_process = 1 - AND NOT EXISTS - ( - SELECT - 1/0 - FROM config.ignored_wait_types AS iwt - WHERE iwt.wait_type = sws.wait_type - AND iwt.is_enabled = 1 - ) - OPTION(RECOMPILE); - - SET @rows_collected = ROWCOUNT_BIG(); - - /* - Log successful collection - */ - INSERT INTO - config.collection_log - ( - collector_name, - collection_status, - rows_collected, - duration_ms - ) - VALUES - ( - N'session_wait_stats_collector', - N'SUCCESS', - @rows_collected, - DATEDIFF(MILLISECOND, @start_time, SYSDATETIME()) - ); - - IF @debug = 1 - BEGIN - RAISERROR(N'Collected %d session wait stats rows', 0, 1, @rows_collected) WITH NOWAIT; - END; - - END TRY - BEGIN CATCH - SET @error_message = ERROR_MESSAGE(); - - /* - Log the error - */ - INSERT INTO - config.collection_log - ( - collector_name, - collection_status, - duration_ms, - error_message - ) - VALUES - ( - N'session_wait_stats_collector', - N'ERROR', - DATEDIFF(MILLISECOND, @start_time, SYSDATETIME()), - @error_message - ); - - RAISERROR(N'Error in session_wait_stats_collector: %s', 16, 1, @error_message); - END CATCH; -END; -GO - -PRINT 'Session wait stats collector created successfully'; -PRINT 'Use collect.session_wait_stats_collector to collect per-session wait statistics'; -PRINT 'Requires SQL Server 2016 SP1 or later'; -GO diff --git a/install/41_schedule_management.sql b/install/41_schedule_management.sql index e46333ff..80799b43 100644 --- a/install/41_schedule_management.sql +++ b/install/41_schedule_management.sql @@ -160,7 +160,6 @@ BEGIN BEGIN TRY /*High frequency for real-time dashboard*/ EXECUTE config.update_collector_frequency N'query_snapshots_collector', 1, 1; - EXECUTE config.update_collector_frequency N'session_wait_stats_collector', 1, 1; EXECUTE config.update_collector_frequency N'wait_stats_collector', 1, 1; EXECUTE config.update_collector_frequency N'query_stats_collector', 1, 1; EXECUTE config.update_collector_frequency N'procedure_stats_collector', 2, 1; @@ -218,7 +217,6 @@ BEGIN BEGIN TRY /*Balanced frequencies for consulting work*/ EXECUTE config.update_collector_frequency N'query_snapshots_collector', 1, 1; - EXECUTE config.update_collector_frequency N'session_wait_stats_collector', 1, 1; EXECUTE config.update_collector_frequency N'wait_stats_collector', 5, 1; EXECUTE config.update_collector_frequency N'query_stats_collector', 5, 1; EXECUTE config.update_collector_frequency N'procedure_stats_collector', 5, 1; @@ -285,7 +283,6 @@ BEGIN /*Disable high-frequency collectors*/ EXECUTE config.set_collector_enabled N'query_snapshots_collector', 0; - EXECUTE config.set_collector_enabled N'session_wait_stats_collector', 0; PRINT 'Baseline monitoring profile enabled'; PRINT 'All collectors at 5-minute intervals, snapshots disabled'; diff --git a/install/42_scheduled_master_collector.sql b/install/42_scheduled_master_collector.sql index 791a4953..a192a90e 100644 --- a/install/42_scheduled_master_collector.sql +++ b/install/42_scheduled_master_collector.sql @@ -311,10 +311,6 @@ BEGIN BEGIN EXECUTE collect.waiting_tasks_collector @debug = @debug; END; - ELSE IF @collector_name = N'session_wait_stats_collector' - BEGIN - EXECUTE collect.session_wait_stats_collector @debug = @debug; - END; ELSE IF @collector_name = N'running_jobs_collector' BEGIN EXECUTE collect.running_jobs_collector @debug = @debug; diff --git a/install/47_create_reporting_views.sql b/install/47_create_reporting_views.sql index 016e64ad..fc320108 100644 --- a/install/47_create_reporting_views.sql +++ b/install/47_create_reporting_views.sql @@ -2676,118 +2676,6 @@ PRINT ' - report.query_store_regressions (performance regressions)'; PRINT ' - report.long_running_query_patterns (trace analysis)'; GO -/* -============================================================================= -SESSION WAIT ANALYSIS -Shows per-session wait patterns with query context from session_wait_stats -Requires SQL Server 2016 SP1+ (collector skips gracefully on older versions) -============================================================================= -*/ -CREATE OR ALTER VIEW - report.session_wait_analysis -AS -WITH - recent_session_waits AS -( - SELECT - sws.session_id, - sws.database_name, - sws.login_name, - sws.host_name, - sws.program_name, - sws.wait_type, - sws.wait_time_ms, - sws.waiting_tasks_count, - sws.max_wait_time_ms, - sws.signal_wait_time_ms, - sws.query_text, - sws.collection_time, - /*Rank by total wait time per session*/ - session_wait_rank = ROW_NUMBER() OVER - ( - PARTITION BY - sws.session_id - ORDER BY - sws.wait_time_ms DESC - ) - FROM collect.session_wait_stats AS sws - WHERE sws.collection_time >= DATEADD(HOUR, -1, SYSDATETIME()) -), - session_totals AS -( - SELECT - session_id, - total_wait_time_ms = SUM(wait_time_ms), - total_waiting_tasks = SUM(waiting_tasks_count), - distinct_wait_types = COUNT_BIG(DISTINCT wait_type) - FROM recent_session_waits - GROUP BY - session_id -) -SELECT TOP (100) - rsw.session_id, - rsw.database_name, - rsw.login_name, - rsw.host_name, - rsw.program_name, - /*Top wait for this session*/ - top_wait_type = rsw.wait_type, - top_wait_time_ms = rsw.wait_time_ms, - top_wait_tasks = rsw.waiting_tasks_count, - top_wait_max_ms = rsw.max_wait_time_ms, - top_wait_signal_ms = rsw.signal_wait_time_ms, - /*Session totals*/ - st.total_wait_time_ms, - st.total_waiting_tasks, - st.distinct_wait_types, - /*Wait time breakdown*/ - resource_wait_ms = rsw.wait_time_ms - rsw.signal_wait_time_ms, - signal_wait_percent = - CASE - WHEN rsw.wait_time_ms > 0 - THEN CONVERT(decimal(5,2), rsw.signal_wait_time_ms * 100.0 / rsw.wait_time_ms) - ELSE 0 - END, - /*Assessment*/ - wait_concern = - CASE - WHEN rsw.wait_time_ms > 60000 THEN N'CRITICAL - > 1 minute wait' - WHEN rsw.wait_time_ms > 30000 THEN N'HIGH - > 30 second wait' - WHEN rsw.wait_time_ms > 10000 THEN N'MEDIUM - > 10 second wait' - WHEN rsw.signal_wait_time_ms * 100.0 / NULLIF(rsw.wait_time_ms, 0) > 25 - THEN N'MEDIUM - High signal wait (CPU pressure)' - ELSE N'LOW' - END, - recommendation = - CASE - WHEN rsw.wait_type LIKE N'LCK_M_%' - THEN N'Lock wait - check for blocking, review transaction scope' - WHEN rsw.wait_type LIKE N'PAGEIOLATCH_%' - THEN N'I/O wait - check storage latency, consider adding memory' - WHEN rsw.wait_type LIKE N'PAGELATCH_%' - THEN N'Page latch - check for tempdb contention or hot pages' - WHEN rsw.wait_type = N'CXPACKET' - THEN N'Parallelism wait - review MAXDOP settings, check for skewed parallelism' - WHEN rsw.wait_type = N'SOS_SCHEDULER_YIELD' - THEN N'CPU pressure - query is CPU-bound' - WHEN rsw.wait_type LIKE N'ASYNC_NETWORK_IO%' - THEN N'Network wait - client not consuming results fast enough' - WHEN rsw.wait_type = N'RESOURCE_SEMAPHORE' - THEN N'Memory grant wait - query needs large memory grant' - WHEN rsw.wait_type = N'WRITELOG' - THEN N'Transaction log write - check log disk performance' - ELSE N'Review wait type documentation' - END, - query_text = CONVERT(nvarchar(500), rsw.query_text), - last_seen = rsw.collection_time -FROM recent_session_waits AS rsw -JOIN session_totals AS st - ON st.session_id = rsw.session_id -WHERE rsw.session_wait_rank = 1 -ORDER BY - rsw.wait_time_ms DESC; -GO - /* ============================================================================= CRITICAL ISSUES @@ -2856,7 +2744,6 @@ PRINT ' - report.blocking_chain_analysis (blocking hierarchies with query plans PRINT ' - report.tempdb_contention_analysis (tempdb + PFS/GAM waits + sessions)'; PRINT ' - report.parameter_sensitivity_detection (same query_hash, different plans)'; PRINT ' - report.scheduler_cpu_analysis (scheduler health + runnable task trends)'; -PRINT ' - report.session_wait_analysis (per-session waits with query context)'; PRINT ' - report.critical_issues (configuration problems with recommendations)'; PRINT ''; PRINT 'Quick health check:'; diff --git a/install/98_validate_installation.sql b/install/98_validate_installation.sql index 11c82700..77fa7a20 100644 --- a/install/98_validate_installation.sql +++ b/install/98_validate_installation.sql @@ -294,30 +294,6 @@ BEGIN HAVING COUNT_BIG(*) > 0; END; -/* -Session wait stats - session_id and wait_type should never be NULL -*/ -IF OBJECT_ID(N'collect.session_wait_stats', N'U') IS NOT NULL -BEGIN - INSERT INTO @null_checks - SELECT - table_name = N'session_wait_stats', - column_name = N'session_id', - null_count = COUNT_BIG(*) - FROM collect.session_wait_stats - WHERE session_id IS NULL - HAVING COUNT_BIG(*) > 0; - - INSERT INTO @null_checks - SELECT - table_name = N'session_wait_stats', - column_name = N'wait_type', - null_count = COUNT_BIG(*) - FROM collect.session_wait_stats - WHERE wait_type IS NULL - HAVING COUNT_BIG(*) > 0; -END; - IF EXISTS (SELECT 1/0 FROM @null_checks) BEGIN PRINT '*** NULL VALUES FOUND IN REQUIRED COLUMNS ***'; diff --git a/install/99_user_troubleshooting.sql b/install/99_user_troubleshooting.sql index e89e952c..c00585cb 100644 --- a/install/99_user_troubleshooting.sql +++ b/install/99_user_troubleshooting.sql @@ -379,15 +379,6 @@ EXECUTE collect.waiting_tasks_collector @debug = 1; GO -/* -32. Session Wait Stats Collector -Collects per-session wait statistics from sys.dm_exec_session_wait_stats -Requires SQL Server 2016 SP1 or later - will skip gracefully on older versions -*/ -EXECUTE collect.session_wait_stats_collector - @debug = 1; -GO - /* =============================================================================== UTILITY COMMANDS @@ -572,13 +563,6 @@ SELECT row_count = COUNT_BIG(*) FROM collect.waiting_tasks -UNION ALL - -SELECT - table_name = N'session_wait_stats', - row_count = COUNT_BIG(*) -FROM collect.session_wait_stats - ORDER BY table_name; GO @@ -762,14 +746,6 @@ SELECT max_collection_id = MAX(wt.collection_id) FROM collect.waiting_tasks AS wt -UNION ALL - -SELECT - table_name = N'session_wait_stats', - max_collection_time = MAX(sws.collection_time), - max_collection_id = MAX(sws.collection_id) -FROM collect.session_wait_stats AS sws - ORDER BY table_name; GO