diff --git a/Dashboard/MainWindow.xaml.cs b/Dashboard/MainWindow.xaml.cs index 6358cfd5..275917f0 100644 --- a/Dashboard/MainWindow.xaml.cs +++ b/Dashboard/MainWindow.xaml.cs @@ -1065,7 +1065,7 @@ private async Task CheckAllServerAlertsAsync() var connectionString = server.GetConnectionString(_credentialService); var databaseService = new DatabaseService(connectionString); var connStatus = _serverManager.GetConnectionStatus(server.Id); - var health = await databaseService.GetAlertHealthAsync(connStatus.SqlEngineEdition, prefs.LongRunningQueryThresholdMinutes, prefs.LongRunningJobMultiplier); + var health = await databaseService.GetAlertHealthAsync(connStatus.SqlEngineEdition, prefs.LongRunningQueryThresholdMinutes, prefs.LongRunningJobMultiplier, prefs.LongRunningQueryMaxResults, prefs.LongRunningQueryExcludeSpServerDiagnostics, prefs.LongRunningQueryExcludeWaitFor, prefs.LongRunningQueryExcludeBackups, prefs.LongRunningQueryExcludeMiscWaits); if (health.IsOnline) { diff --git a/Dashboard/Models/UserPreferences.cs b/Dashboard/Models/UserPreferences.cs index 01e5dd70..17e3ecb8 100644 --- a/Dashboard/Models/UserPreferences.cs +++ b/Dashboard/Models/UserPreferences.cs @@ -85,6 +85,11 @@ public class UserPreferences public int PoisonWaitThresholdMs { get; set; } = 500; // Alert when avg ms per wait > X public bool NotifyOnLongRunningQueries { get; set; } = true; public int LongRunningQueryThresholdMinutes { get; set; } = 30; // Alert when query runs > X minutes + public int LongRunningQueryMaxResults { get; set; } = 5; // Max number of long-running queries returned per check + public bool LongRunningQueryExcludeSpServerDiagnostics { get; set; } = true; + public bool LongRunningQueryExcludeWaitFor { get; set; } = true; + public bool LongRunningQueryExcludeBackups { get; set; } = true; + public bool LongRunningQueryExcludeMiscWaits { get; set; } = true; public bool NotifyOnTempDbSpace { get; set; } = true; public int TempDbSpaceThresholdPercent { get; set; } = 80; // Alert when TempDB used > X% public bool NotifyOnLongRunningJobs { get; set; } = true; diff --git a/Dashboard/Services/DatabaseService.NocHealth.cs b/Dashboard/Services/DatabaseService.NocHealth.cs index 4e711ef4..54b8a6ff 100644 --- a/Dashboard/Services/DatabaseService.NocHealth.cs +++ b/Dashboard/Services/DatabaseService.NocHealth.cs @@ -121,7 +121,15 @@ public async Task RefreshNocHealthStatusAsync(ServerHealthStatus status, int eng /// Lightweight alert-only health check. Runs 3 queries instead of 9. /// Used by MainWindow's independent alert timer. /// - public async Task GetAlertHealthAsync(int engineEdition = 0, int longRunningQueryThresholdMinutes = 30, int longRunningJobMultiplier = 3) + public async Task GetAlertHealthAsync( + int engineEdition = 0, + int longRunningQueryThresholdMinutes = 30, + int longRunningJobMultiplier = 3, + int longRunningQueryMaxResults = 5, + bool excludeSpServerDiagnostics = true, + bool excludeWaitFor = true, + bool excludeBackups = true, + bool excludeMiscWaits = true) { var result = new AlertHealthResult(); @@ -136,7 +144,7 @@ public async Task GetAlertHealthAsync(int engineEdition = 0, var blockingTask = GetBlockingValuesAsync(connection); var deadlockTask = GetDeadlockCountAsync(connection); var poisonWaitTask = GetPoisonWaitDeltasAsync(connection); - var longRunningTask = GetLongRunningQueriesAsync(connection, longRunningQueryThresholdMinutes); + var longRunningTask = GetLongRunningQueriesAsync(connection, longRunningQueryThresholdMinutes, longRunningQueryMaxResults, excludeSpServerDiagnostics, excludeWaitFor, excludeBackups, excludeMiscWaits); var tempDbTask = GetTempDbSpaceAsync(connection); var anomalousJobTask = GetAnomalousJobsAsync(connection, longRunningJobMultiplier); @@ -603,24 +611,29 @@ ORDER BY collection_time DESC /// Gets currently running queries that exceed the duration threshold. /// Uses live DMV data (sys.dm_exec_requests) for immediate detection. /// - private async Task> GetLongRunningQueriesAsync(SqlConnection connection, int thresholdMinutes) + private async Task> GetLongRunningQueriesAsync( + SqlConnection connection, + int thresholdMinutes, + int maxResults = 5, + bool excludeSpServerDiagnostics = true, + bool excludeWaitFor = true, + bool excludeBackups = true, + bool excludeMiscWaits = true) { + maxResults = Math.Clamp(maxResults, 1, int.MaxValue); - // Exclude internal SP_SERVER_DIAGNOSTICS queries by default, as they often run long and aren't actionable. - string spServerDiagnosticsFilter = "AND r.wait_type NOT LIKE N'%SP_SERVER_DIAGNOSTICS%'"; - - // Exclude WAITFOR queries by default, as they can run indefinitely and may not indicate a problem. - string waitForFilter = "AND r.wait_type NOT IN (N'WAITFOR', N'BROKER_RECEIVE_WAITFOR')"; - - // Exclude backup waits if specified, as they can run long and aren't typically actionable in this context. - string backupsFilter = "AND r.wait_type NOT IN (N'BACKUPTHREAD', N'BACKUPIO')"; - - // Exclude miscellaneous wait type that aren't typically actionable - string miscWaitsFilter = "AND r.wait_type NOT IN (N'XE_LIVE_TARGET_TVF')"; + string spServerDiagnosticsFilter = excludeSpServerDiagnostics + ? "AND r.wait_type NOT LIKE N'%SP_SERVER_DIAGNOSTICS%'" : ""; + string waitForFilter = excludeWaitFor + ? "AND r.wait_type NOT IN (N'WAITFOR', N'BROKER_RECEIVE_WAITFOR')" : ""; + string backupsFilter = excludeBackups + ? "AND r.wait_type NOT IN (N'BACKUPTHREAD', N'BACKUPIO')" : ""; + string miscWaitsFilter = excludeMiscWaits + ? "AND r.wait_type NOT IN (N'XE_LIVE_TARGET_TVF')" : ""; string query = @$"SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - SELECT TOP(5) + SELECT TOP({maxResults}) r.session_id, DB_NAME(r.database_id) AS database_name, SUBSTRING(t.text, 1, 300) AS query_text, diff --git a/Dashboard/SettingsWindow.xaml b/Dashboard/SettingsWindow.xaml index f955eff0..5930e733 100644 --- a/Dashboard/SettingsWindow.xaml +++ b/Dashboard/SettingsWindow.xaml @@ -217,8 +217,28 @@ Margin="8,0,8,0" VerticalAlignment="Center" TextAlignment="Center"/> - + + + + + + + + + + = 1 && lrqMaxResults <= int.MaxValue) + { + prefs.LongRunningQueryMaxResults = lrqMaxResults; + } + else + { + validationErrors.Add($"Long-running query max results must be between 1 and {int.MaxValue}"); + } + + prefs.LongRunningQueryExcludeSpServerDiagnostics = LrqExcludeSpServerDiagnosticsCheckBox.IsChecked == true; + prefs.LongRunningQueryExcludeWaitFor = LrqExcludeWaitForCheckBox.IsChecked == true; + prefs.LongRunningQueryExcludeBackups = LrqExcludeBackupsCheckBox.IsChecked == true; + prefs.LongRunningQueryExcludeMiscWaits = LrqExcludeMiscWaitsCheckBox.IsChecked == true; + prefs.NotifyOnTempDbSpace = NotifyOnTempDbSpaceCheckBox.IsChecked == true; if (int.TryParse(TempDbSpaceThresholdTextBox.Text, out int tempDbThreshold) && tempDbThreshold > 0 && tempDbThreshold <= 100) prefs.TempDbSpaceThresholdPercent = tempDbThreshold; diff --git a/Lite/App.xaml.cs b/Lite/App.xaml.cs index e775a91e..76484d57 100644 --- a/Lite/App.xaml.cs +++ b/Lite/App.xaml.cs @@ -64,6 +64,11 @@ public partial class App : Application public static int AlertPoisonWaitThresholdMs { get; set; } = 500; public static bool AlertLongRunningQueryEnabled { get; set; } = true; public static int AlertLongRunningQueryThresholdMinutes { get; set; } = 30; + public static int AlertLongRunningQueryMaxResults { get; set; } = 5; + public static bool AlertLongRunningQueryExcludeSpServerDiagnostics { get; set; } = true; + public static bool AlertLongRunningQueryExcludeWaitFor { get; set; } = true; + public static bool AlertLongRunningQueryExcludeBackups { get; set; } = true; + public static bool AlertLongRunningQueryExcludeMiscWaits { get; set; } = true; public static bool AlertTempDbSpaceEnabled { get; set; } = true; public static int AlertTempDbSpaceThresholdPercent { get; set; } = 80; public static bool AlertLongRunningJobEnabled { get; set; } = true; @@ -242,6 +247,11 @@ public static void LoadAlertSettings() if (root.TryGetProperty("alert_poison_wait_threshold_ms", out v)) AlertPoisonWaitThresholdMs = v.GetInt32(); if (root.TryGetProperty("alert_long_running_query_enabled", out v)) AlertLongRunningQueryEnabled = v.GetBoolean(); if (root.TryGetProperty("alert_long_running_query_threshold_minutes", out v)) AlertLongRunningQueryThresholdMinutes = v.GetInt32(); + if (root.TryGetProperty("alert_long_running_query_max_results", out v)) AlertLongRunningQueryMaxResults = (int)Math.Clamp(v.GetInt64(), 1, int.MaxValue); + if (root.TryGetProperty("alert_long_running_query_exclude_sp_server_diagnostics", out v)) AlertLongRunningQueryExcludeSpServerDiagnostics = v.GetBoolean(); + if (root.TryGetProperty("alert_long_running_query_exclude_waitfor", out v)) AlertLongRunningQueryExcludeWaitFor = v.GetBoolean(); + if (root.TryGetProperty("alert_long_running_query_exclude_backups", out v)) AlertLongRunningQueryExcludeBackups = v.GetBoolean(); + if (root.TryGetProperty("alert_long_running_query_exclude_misc_waits", out v)) AlertLongRunningQueryExcludeMiscWaits = v.GetBoolean(); if (root.TryGetProperty("alert_tempdb_space_enabled", out v)) AlertTempDbSpaceEnabled = v.GetBoolean(); if (root.TryGetProperty("alert_tempdb_space_threshold_percent", out v)) AlertTempDbSpaceThresholdPercent = v.GetInt32(); if (root.TryGetProperty("alert_long_running_job_enabled", out v)) AlertLongRunningJobEnabled = v.GetBoolean(); diff --git a/Lite/MainWindow.xaml.cs b/Lite/MainWindow.xaml.cs index 2f537d2f..7e16095a 100644 --- a/Lite/MainWindow.xaml.cs +++ b/Lite/MainWindow.xaml.cs @@ -1122,7 +1122,7 @@ await _emailAlertService.TrySendAlertEmailAsync( { try { - var longRunning = await _dataService.GetLongRunningQueriesAsync(summary.ServerId, App.AlertLongRunningQueryThresholdMinutes); + var longRunning = await _dataService.GetLongRunningQueriesAsync(summary.ServerId, App.AlertLongRunningQueryThresholdMinutes, App.AlertLongRunningQueryMaxResults, App.AlertLongRunningQueryExcludeSpServerDiagnostics, App.AlertLongRunningQueryExcludeWaitFor, App.AlertLongRunningQueryExcludeBackups, App.AlertLongRunningQueryExcludeMiscWaits); if (longRunning.Count > 0) { diff --git a/Lite/Services/LocalDataService.WaitStats.cs b/Lite/Services/LocalDataService.WaitStats.cs index e18b4f5e..bfd57c55 100644 --- a/Lite/Services/LocalDataService.WaitStats.cs +++ b/Lite/Services/LocalDataService.WaitStats.cs @@ -195,24 +195,31 @@ ORDER BY collection_time DESC /// Gets long-running queries from the latest collection snapshot. /// Returns sessions whose total elapsed time exceeds the given threshold. /// - public async Task> GetLongRunningQueriesAsync(int serverId, int thresholdMinutes) + public async Task> GetLongRunningQueriesAsync( + int serverId, + int thresholdMinutes, + int maxResults = 5, + bool excludeSpServerDiagnostics = true, + bool excludeWaitFor = true, + bool excludeBackups = true, + bool excludeMiscWaits = true) { + maxResults = Math.Clamp(maxResults, 1, int.MaxValue); + using var connection = await OpenConnectionAsync(); using var command = connection.CreateCommand(); var thresholdMs = (long)thresholdMinutes * 60 * 1000; - // Exclude internal SP_SERVER_DIAGNOSTICS queries by default, as they often run long and aren't actionable. - string spServerDiagnosticsFilter = "AND r.wait_type NOT LIKE N'%SP_SERVER_DIAGNOSTICS%'"; - - // Exclude WAITFOR queries by default, as they can run indefinitely and may not indicate a problem. - string waitForFilter = "AND r.wait_type NOT IN (N'WAITFOR', N'BROKER_RECEIVE_WAITFOR')"; - - // Exclude backup waits if specified, as they can run long and aren't typically actionable in this context. - string backupsFilter = "AND r.wait_type NOT IN (N'BACKUPTHREAD', N'BACKUPIO')"; + string spServerDiagnosticsFilter = excludeSpServerDiagnostics + ? "AND r.wait_type NOT LIKE N'%SP_SERVER_DIAGNOSTICS%'" : ""; + string waitForFilter = excludeWaitFor + ? "AND r.wait_type NOT IN (N'WAITFOR', N'BROKER_RECEIVE_WAITFOR')" : ""; + string backupsFilter = excludeBackups + ? "AND r.wait_type NOT IN (N'BACKUPTHREAD', N'BACKUPIO')" : ""; + string miscWaitsFilter = excludeMiscWaits + ? "AND r.wait_type NOT IN (N'XE_LIVE_TARGET_TVF')" : ""; - // Exclude miscellaneous wait type that aren't typically actionable - string miscWaitsFilter = "AND r.wait_type NOT IN (N'XE_LIVE_TARGET_TVF')"; command.CommandText = @$" SELECT @@ -235,7 +242,7 @@ AND r.session_id > 50 {miscWaitsFilter} AND r.total_elapsed_time_ms >= $2 ORDER BY r.total_elapsed_time_ms DESC - LIMIT 5;"; + LIMIT {maxResults};"; command.Parameters.Add(new DuckDBParameter { Value = serverId }); command.Parameters.Add(new DuckDBParameter { Value = thresholdMs }); diff --git a/Lite/Windows/SettingsWindow.xaml b/Lite/Windows/SettingsWindow.xaml index 4412f023..098555b7 100644 --- a/Lite/Windows/SettingsWindow.xaml +++ b/Lite/Windows/SettingsWindow.xaml @@ -181,9 +181,29 @@ - + + + + + + + + + diff --git a/Lite/Windows/SettingsWindow.xaml.cs b/Lite/Windows/SettingsWindow.xaml.cs index 8876420d..350458a5 100644 --- a/Lite/Windows/SettingsWindow.xaml.cs +++ b/Lite/Windows/SettingsWindow.xaml.cs @@ -436,6 +436,11 @@ private void LoadAlertSettings() AlertPoisonWaitThresholdBox.Text = App.AlertPoisonWaitThresholdMs.ToString(); AlertLongRunningQueryCheckBox.IsChecked = App.AlertLongRunningQueryEnabled; AlertLongRunningQueryThresholdBox.Text = App.AlertLongRunningQueryThresholdMinutes.ToString(); + AlertLongRunningQueryMaxResultsBox.Text = App.AlertLongRunningQueryMaxResults.ToString(); + LrqExcludeSpServerDiagnosticsCheckBox.IsChecked = App.AlertLongRunningQueryExcludeSpServerDiagnostics; + LrqExcludeWaitForCheckBox.IsChecked = App.AlertLongRunningQueryExcludeWaitFor; + LrqExcludeBackupsCheckBox.IsChecked = App.AlertLongRunningQueryExcludeBackups; + LrqExcludeMiscWaitsCheckBox.IsChecked = App.AlertLongRunningQueryExcludeMiscWaits; AlertTempDbSpaceCheckBox.IsChecked = App.AlertTempDbSpaceEnabled; AlertTempDbSpaceThresholdBox.Text = App.AlertTempDbSpaceThresholdPercent.ToString(); AlertLongRunningJobCheckBox.IsChecked = App.AlertLongRunningJobEnabled; @@ -463,6 +468,12 @@ private void SaveAlertSettings() App.AlertLongRunningQueryEnabled = AlertLongRunningQueryCheckBox.IsChecked == true; if (int.TryParse(AlertLongRunningQueryThresholdBox.Text, out var lrq) && lrq > 0) App.AlertLongRunningQueryThresholdMinutes = lrq; + if (int.TryParse(AlertLongRunningQueryMaxResultsBox.Text, out var lrqMax) && lrqMax >= 1 && lrqMax <= int.MaxValue) + App.AlertLongRunningQueryMaxResults = lrqMax; + App.AlertLongRunningQueryExcludeSpServerDiagnostics = LrqExcludeSpServerDiagnosticsCheckBox.IsChecked == true; + App.AlertLongRunningQueryExcludeWaitFor = LrqExcludeWaitForCheckBox.IsChecked == true; + App.AlertLongRunningQueryExcludeBackups = LrqExcludeBackupsCheckBox.IsChecked == true; + App.AlertLongRunningQueryExcludeMiscWaits = LrqExcludeMiscWaitsCheckBox.IsChecked == true; App.AlertTempDbSpaceEnabled = AlertTempDbSpaceCheckBox.IsChecked == true; if (int.TryParse(AlertTempDbSpaceThresholdBox.Text, out var tempDb) && tempDb > 0 && tempDb <= 100) App.AlertTempDbSpaceThresholdPercent = tempDb; @@ -497,6 +508,11 @@ private void SaveAlertSettings() root["alert_poison_wait_threshold_ms"] = App.AlertPoisonWaitThresholdMs; root["alert_long_running_query_enabled"] = App.AlertLongRunningQueryEnabled; root["alert_long_running_query_threshold_minutes"] = App.AlertLongRunningQueryThresholdMinutes; + root["alert_long_running_query_max_results"] = App.AlertLongRunningQueryMaxResults; + root["alert_long_running_query_exclude_sp_server_diagnostics"] = App.AlertLongRunningQueryExcludeSpServerDiagnostics; + root["alert_long_running_query_exclude_waitfor"] = App.AlertLongRunningQueryExcludeWaitFor; + root["alert_long_running_query_exclude_backups"] = App.AlertLongRunningQueryExcludeBackups; + root["alert_long_running_query_exclude_misc_waits"] = App.AlertLongRunningQueryExcludeMiscWaits; root["alert_tempdb_space_enabled"] = App.AlertTempDbSpaceEnabled; root["alert_tempdb_space_threshold_percent"] = App.AlertTempDbSpaceThresholdPercent; root["alert_long_running_job_enabled"] = App.AlertLongRunningJobEnabled;