diff --git a/Dashboard/Analysis/AnalysisService.cs b/Dashboard/Analysis/AnalysisService.cs index a0d7b2fc..e982c19f 100644 --- a/Dashboard/Analysis/AnalysisService.cs +++ b/Dashboard/Analysis/AnalysisService.cs @@ -23,6 +23,7 @@ public class AnalysisService private readonly InferenceEngine _engine; private readonly SqlServerDrillDownCollector _drillDown; private readonly SqlServerAnomalyDetector _anomalyDetector; + private readonly SqlServerBaselineProvider _baselineProvider; /// /// Minimum hours of collected data required before analysis will run. @@ -60,7 +61,8 @@ public AnalysisService(string connectionString, IPlanFetcher? planFetcher = null _graph = new RelationshipGraph(); _engine = new InferenceEngine(_graph); _drillDown = new SqlServerDrillDownCollector(connectionString, planFetcher); - _anomalyDetector = new SqlServerAnomalyDetector(connectionString); + _baselineProvider = new SqlServerBaselineProvider(connectionString); + _anomalyDetector = new SqlServerAnomalyDetector(connectionString, _baselineProvider); } /// diff --git a/Dashboard/Analysis/FactScorer.cs b/Dashboard/Analysis/FactScorer.cs index 78347953..5605a88b 100644 --- a/Dashboard/Analysis/FactScorer.cs +++ b/Dashboard/Analysis/FactScorer.cs @@ -308,9 +308,12 @@ private static double ScoreBadActorFact(Fact fact) /// private static double ScoreAnomalyFact(Fact fact) { - if ( fact.Key.StartsWith("ANOMALY_CPU_SPIKE" , StringComparison.OrdinalIgnoreCase) - || fact.Key.StartsWith("ANOMALY_READ_LATENCY" , StringComparison.OrdinalIgnoreCase) - || fact.Key.StartsWith("ANOMALY_WRITE_LATENCY", StringComparison.OrdinalIgnoreCase) + if ( fact.Key.StartsWith("ANOMALY_CPU_SPIKE" , StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_READ_LATENCY" , StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_WRITE_LATENCY" , StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_BATCH_REQUESTS", StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_SESSION_SPIKE" , StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_QUERY_DURATION", StringComparison.OrdinalIgnoreCase) ) { // Deviation-based scoring: 2σ = 0.5, 4σ = 1.0 diff --git a/Dashboard/Analysis/SqlServerAnomalyDetector.cs b/Dashboard/Analysis/SqlServerAnomalyDetector.cs index bdf6664a..82d26294 100644 --- a/Dashboard/Analysis/SqlServerAnomalyDetector.cs +++ b/Dashboard/Analysis/SqlServerAnomalyDetector.cs @@ -1,543 +1,708 @@ -using System; -using System.Collections.Generic; -using System.Threading.Tasks; -using Microsoft.Data.SqlClient; -using PerformanceMonitorDashboard.Helpers; - -namespace PerformanceMonitorDashboard.Analysis; - -/// -/// Detects anomalies by comparing the analysis window's metrics against a -/// baseline period. When a metric deviates significantly from baseline -/// (mean + standard deviation), an ANOMALY fact is emitted. -/// -/// This is the "oh shit" mode -- detecting acute deviations that don't show -/// up in aggregate analysis because they're brief. A 5-minute CPU spike -/// that averages out over 4 hours is invisible to aggregate scoring but -/// obvious when compared against "what was this metric doing before?" -/// -/// Baseline selection: uses the 24 hours preceding the analysis window. -/// If less data is available, uses whatever exists with lower confidence. -/// -/// Port of Lite's AnomalyDetector -- uses SQL Server collect.* tables instead of DuckDB views. -/// No server_id filtering -- Dashboard monitors one server per database. -/// -public class SqlServerAnomalyDetector -{ - private readonly string _connectionString; - - /// - /// Minimum number of baseline samples needed for reliable detection. - /// Below this, anomalies are still detected but with reduced confidence. - /// - private const int MinBaselineSamples = 10; - - /// - /// Number of standard deviations above baseline mean to flag as anomalous. - /// - private const double DeviationThreshold = 2.0; - - public SqlServerAnomalyDetector(string connectionString) - { - _connectionString = connectionString; - } - - /// - /// Detects anomalies by comparing the analysis window against a baseline period. - /// Returns anomaly facts to be merged into the main fact list. - /// - public async Task> DetectAnomaliesAsync(AnalysisContext context) - { - var anomalies = new List(); - - // Baseline: 24 hours preceding the analysis window - var baselineEnd = context.TimeRangeStart; - var baselineStart = baselineEnd.AddHours(-24); - - // Check if baseline period has any data at all -- if not, skip all anomaly detection. - // Without baseline data, everything looks anomalous. - if (!await HasBaselineDataAsync(baselineStart, baselineEnd)) - return anomalies; - - await DetectCpuAnomalies(context, baselineStart, baselineEnd, anomalies); - await DetectWaitAnomalies(context, baselineStart, baselineEnd, anomalies); - await DetectBlockingAnomalies(context, baselineStart, baselineEnd, anomalies); - await DetectIoAnomalies(context, baselineStart, baselineEnd, anomalies); - - return anomalies; - } - - /// - /// Checks if the baseline period has any collected data. - /// Uses wait_stats as canary -- if waits are collected, other data is too. - /// - private async Task HasBaselineDataAsync(DateTime baselineStart, DateTime baselineEnd) - { - try - { - using var connection = new SqlConnection(_connectionString); - await connection.OpenAsync(); - - using var cmd = connection.CreateCommand(); - cmd.CommandText = @" -SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - -SELECT - (SELECT COUNT(*) FROM collect.wait_stats - WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd) - + (SELECT COUNT(*) FROM collect.cpu_utilization_stats - WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd);"; - - cmd.Parameters.Add(new SqlParameter("@baselineStart", baselineStart)); - cmd.Parameters.Add(new SqlParameter("@baselineEnd", baselineEnd)); - - var count = Convert.ToInt64(await cmd.ExecuteScalarAsync() ?? 0); - return count > 0; - } - catch { return false; } - } - - /// - /// Detects CPU utilization anomalies by comparing per-sample values - /// against the baseline distribution. - /// - private async Task DetectCpuAnomalies(AnalysisContext context, - DateTime baselineStart, DateTime baselineEnd, List anomalies) - { - try - { - using var connection = new SqlConnection(_connectionString); - await connection.OpenAsync(); - - // Get baseline stats - using var baselineCmd = connection.CreateCommand(); - baselineCmd.CommandText = @" -SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - -SELECT - AVG(CAST(sqlserver_cpu_utilization AS FLOAT)) AS mean_cpu, - STDEV(CAST(sqlserver_cpu_utilization AS FLOAT)) AS stddev_cpu, - COUNT(*) AS sample_count -FROM collect.cpu_utilization_stats -WHERE collection_time >= @baselineStart -AND collection_time < @baselineEnd;"; - - baselineCmd.Parameters.Add(new SqlParameter("@baselineStart", baselineStart)); - baselineCmd.Parameters.Add(new SqlParameter("@baselineEnd", baselineEnd)); - - double baselineMean = 0, baselineStdDev = 0; - long baselineSamples = 0; - - using (var reader = await baselineCmd.ExecuteReaderAsync()) - { - if (await reader.ReadAsync()) - { - baselineMean = reader.IsDBNull(0) ? 0 : Convert.ToDouble(reader.GetValue(0)); - baselineStdDev = reader.IsDBNull(1) ? 0 : Convert.ToDouble(reader.GetValue(1)); - baselineSamples = reader.IsDBNull(2) ? 0 : Convert.ToInt64(reader.GetValue(2)); - } - } - - if (baselineSamples < 3 || baselineStdDev <= 0) return; - - // Get peak and average in the analysis window - using var windowCmd = connection.CreateCommand(); - windowCmd.CommandText = @" -SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - -SELECT - MAX(sqlserver_cpu_utilization) AS peak_cpu, - AVG(CAST(sqlserver_cpu_utilization AS FLOAT)) AS avg_cpu, - COUNT(*) AS sample_count, - (SELECT TOP 1 collection_time FROM collect.cpu_utilization_stats - WHERE collection_time >= @windowStart AND collection_time < @windowEnd - ORDER BY sqlserver_cpu_utilization DESC) AS peak_time -FROM collect.cpu_utilization_stats -WHERE collection_time >= @windowStart -AND collection_time < @windowEnd;"; - - windowCmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); - windowCmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); - - using var windowReader = await windowCmd.ExecuteReaderAsync(); - if (!await windowReader.ReadAsync()) return; - - var peakCpu = windowReader.IsDBNull(0) ? 0.0 : Convert.ToDouble(windowReader.GetValue(0)); - var avgCpu = windowReader.IsDBNull(1) ? 0.0 : Convert.ToDouble(windowReader.GetValue(1)); - var windowSamples = windowReader.IsDBNull(2) ? 0L : Convert.ToInt64(windowReader.GetValue(2)); - var peakTime = windowReader.IsDBNull(3) ? (DateTime?)null : windowReader.GetDateTime(3); - - if (windowSamples == 0) return; - - // Check if peak deviates significantly from baseline - var deviation = (peakCpu - baselineMean) / baselineStdDev; - if (deviation < DeviationThreshold || peakCpu < 50) return; // Don't flag low absolute values - - var confidence = baselineSamples >= MinBaselineSamples ? 1.0 : (double)baselineSamples / MinBaselineSamples; - - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_CPU_SPIKE", - Value = peakCpu, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["peak_cpu"] = peakCpu, - ["avg_cpu_in_window"] = avgCpu, - ["baseline_mean"] = baselineMean, - ["baseline_stddev"] = baselineStdDev, - ["deviation_sigma"] = deviation, - ["baseline_samples"] = baselineSamples, - ["window_samples"] = windowSamples, - ["confidence"] = confidence, - ["peak_time_ticks"] = peakTime?.Ticks ?? 0 - } - }); - } - catch (Exception ex) - { - Logger.Error($"[SqlServerAnomalyDetector] CPU anomaly detection failed: {ex.Message}"); - } - } - - /// - /// Detects wait stat anomalies -- significant waits in the analysis window - /// that were absent or much lower in the baseline. - /// - private async Task DetectWaitAnomalies(AnalysisContext context, - DateTime baselineStart, DateTime baselineEnd, List anomalies) - { - try - { - using var connection = new SqlConnection(_connectionString); - await connection.OpenAsync(); - - // Check if baseline has any wait data at all -- if not, skip - using var checkCmd = connection.CreateCommand(); - checkCmd.CommandText = @" -SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - -SELECT COUNT(*) FROM collect.wait_stats -WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd;"; - - checkCmd.Parameters.Add(new SqlParameter("@baselineStart", baselineStart)); - checkCmd.Parameters.Add(new SqlParameter("@baselineEnd", baselineEnd)); - - var baselineCount = Convert.ToInt64(await checkCmd.ExecuteScalarAsync() ?? 0); - if (baselineCount == 0) return; - - // Get per-wait-type totals in both windows - using var cmd = connection.CreateCommand(); - cmd.CommandText = @" -SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - -;WITH baseline AS ( - SELECT wait_type, - CAST(SUM(wait_time_ms_delta) AS BIGINT) AS total_ms - FROM collect.wait_stats - WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd - AND wait_time_ms_delta > 0 - GROUP BY wait_type -), -current_window AS ( - SELECT wait_type, - CAST(SUM(wait_time_ms_delta) AS BIGINT) AS total_ms - FROM collect.wait_stats - WHERE collection_time >= @windowStart AND collection_time <= @windowEnd - AND wait_time_ms_delta > 0 - GROUP BY wait_type -) -SELECT TOP 10 - c.wait_type, - c.total_ms AS current_ms, - COALESCE(b.total_ms, 0) AS baseline_ms -FROM current_window c -LEFT JOIN baseline b ON c.wait_type = b.wait_type -WHERE c.total_ms > 10000 -- At least 10 seconds of wait time -ORDER BY c.total_ms DESC;"; - - cmd.Parameters.Add(new SqlParameter("@baselineStart", baselineStart)); - cmd.Parameters.Add(new SqlParameter("@baselineEnd", baselineEnd)); - cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); - cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); - - using var reader = await cmd.ExecuteReaderAsync(); - while (await reader.ReadAsync()) - { - var waitType = reader.GetString(0); - var currentMs = Convert.ToInt64(reader.GetValue(1)); - var baselineMs = Convert.ToInt64(reader.GetValue(2)); - - // Normalize to per-hour rates before comparing (windows are different lengths) - var baselineHours = (baselineEnd - baselineStart).TotalHours; - var currentHours = (context.TimeRangeEnd - context.TimeRangeStart).TotalHours; - if (baselineHours <= 0) baselineHours = 1; - if (currentHours <= 0) currentHours = 1; - - double ratio; - string anomalyType; - - if (baselineMs == 0) - { - ratio = currentMs > 60_000 ? 100.0 : 0; // Only flag if > 1 minute total - anomalyType = "new"; - } - else - { - var baselineRate = baselineMs / baselineHours; - var currentRate = currentMs / currentHours; - ratio = baselineRate > 0 ? currentRate / baselineRate : 100.0; - anomalyType = "spike"; - } - - if (ratio < 5.0) continue; // Need at least 5x increase - - anomalies.Add(new Fact - { - Source = "anomaly", - Key = $"ANOMALY_WAIT_{waitType}", - Value = currentMs, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_ms"] = currentMs, - ["baseline_ms"] = baselineMs, - ["ratio"] = ratio, - ["is_new"] = anomalyType == "new" ? 1 : 0 - } - }); - } - } - catch (Exception ex) - { - Logger.Error($"[SqlServerAnomalyDetector] Wait anomaly detection failed: {ex.Message}"); - } - } - - /// - /// Detects blocking/deadlock anomalies -- events in the analysis window - /// that are significantly above baseline rates. - /// - private async Task DetectBlockingAnomalies(AnalysisContext context, - DateTime baselineStart, DateTime baselineEnd, List anomalies) - { - try - { - using var connection = new SqlConnection(_connectionString); - await connection.OpenAsync(); - - // Check if baseline period has any data at all - using var checkCmd = connection.CreateCommand(); - checkCmd.CommandText = @" -SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - -SELECT - (SELECT COUNT(*) FROM collect.blocking_BlockedProcessReport - WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd) - + (SELECT COUNT(*) FROM collect.deadlocks - WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd) - + (SELECT COUNT(*) FROM collect.wait_stats - WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd);"; - - checkCmd.Parameters.Add(new SqlParameter("@baselineStart", baselineStart)); - checkCmd.Parameters.Add(new SqlParameter("@baselineEnd", baselineEnd)); - - var baselineDataCount = Convert.ToInt64(await checkCmd.ExecuteScalarAsync() ?? 0); - if (baselineDataCount == 0) return; // No baseline data = can't detect anomaly - - using var cmd = connection.CreateCommand(); - cmd.CommandText = @" -SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - -SELECT - (SELECT COUNT(*) FROM collect.blocking_BlockedProcessReport - WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd) AS baseline_blocking, - (SELECT COUNT(*) FROM collect.blocking_BlockedProcessReport - WHERE collection_time >= @windowStart AND collection_time <= @windowEnd) AS current_blocking, - (SELECT COUNT(*) FROM collect.deadlocks - WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd) AS baseline_deadlocks, - (SELECT COUNT(*) FROM collect.deadlocks - WHERE collection_time >= @windowStart AND collection_time <= @windowEnd) AS current_deadlocks;"; - - cmd.Parameters.Add(new SqlParameter("@baselineStart", baselineStart)); - cmd.Parameters.Add(new SqlParameter("@baselineEnd", baselineEnd)); - cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); - cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); - - using var reader = await cmd.ExecuteReaderAsync(); - if (!await reader.ReadAsync()) return; - - var baselineBlocking = Convert.ToInt64(reader.GetValue(0)); - var currentBlocking = Convert.ToInt64(reader.GetValue(1)); - var baselineDeadlocks = Convert.ToInt64(reader.GetValue(2)); - var currentDeadlocks = Convert.ToInt64(reader.GetValue(3)); - - // Normalize to per-hour rates (windows are different lengths) - var baselineHours = (baselineEnd - baselineStart).TotalHours; - var currentHours = (context.TimeRangeEnd - context.TimeRangeStart).TotalHours; - if (baselineHours <= 0) baselineHours = 1; - if (currentHours <= 0) currentHours = 1; - - var baselineBlockingRate = baselineBlocking / baselineHours; - var currentBlockingRate = currentBlocking / currentHours; - var blockingRatio = baselineBlocking > 0 ? currentBlockingRate / baselineBlockingRate : 100.0; - - var baselineDeadlockRate = baselineDeadlocks / baselineHours; - var currentDeadlockRate = currentDeadlocks / currentHours; - var deadlockRatio = baselineDeadlocks > 0 ? currentDeadlockRate / baselineDeadlockRate : 100.0; - - // Blocking spike: at least 5 events AND 3x baseline rate (or new) - if (currentBlocking >= 5 && (baselineBlocking == 0 || blockingRatio >= 3)) - { - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_BLOCKING_SPIKE", - Value = currentBlocking, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_count"] = currentBlocking, - ["baseline_count"] = baselineBlocking, - ["ratio"] = blockingRatio - } - }); - } - - // Deadlock spike: at least 3 events AND 3x baseline rate (or new) - if (currentDeadlocks >= 3 && (baselineDeadlocks == 0 || deadlockRatio >= 3)) - { - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_DEADLOCK_SPIKE", - Value = currentDeadlocks, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_count"] = currentDeadlocks, - ["baseline_count"] = baselineDeadlocks, - ["ratio"] = deadlockRatio - } - }); - } - } - catch (Exception ex) - { - Logger.Error($"[SqlServerAnomalyDetector] Blocking anomaly detection failed: {ex.Message}"); - } - } - - /// - /// Detects I/O latency anomalies -- significant increase in read/write latency - /// compared to baseline. - /// - private async Task DetectIoAnomalies(AnalysisContext context, - DateTime baselineStart, DateTime baselineEnd, List anomalies) - { - try - { - using var connection = new SqlConnection(_connectionString); - await connection.OpenAsync(); - - using var cmd = connection.CreateCommand(); - cmd.CommandText = @" -SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - -;WITH baseline AS ( - SELECT - AVG(io_stall_read_ms_delta * 1.0 / NULLIF(num_of_reads_delta, 0)) AS avg_read_lat, - AVG(io_stall_write_ms_delta * 1.0 / NULLIF(num_of_writes_delta, 0)) AS avg_write_lat, - STDEV(io_stall_read_ms_delta * 1.0 / NULLIF(num_of_reads_delta, 0)) AS stddev_read, - STDEV(io_stall_write_ms_delta * 1.0 / NULLIF(num_of_writes_delta, 0)) AS stddev_write, - COUNT(*) AS samples - FROM collect.file_io_stats - WHERE collection_time >= @baselineStart AND collection_time < @baselineEnd - AND (num_of_reads_delta > 0 OR num_of_writes_delta > 0) -), -current_window AS ( - SELECT - AVG(io_stall_read_ms_delta * 1.0 / NULLIF(num_of_reads_delta, 0)) AS avg_read_lat, - AVG(io_stall_write_ms_delta * 1.0 / NULLIF(num_of_writes_delta, 0)) AS avg_write_lat - FROM collect.file_io_stats - WHERE collection_time >= @windowStart AND collection_time <= @windowEnd - AND (num_of_reads_delta > 0 OR num_of_writes_delta > 0) -) -SELECT b.avg_read_lat, b.stddev_read, c.avg_read_lat, - b.avg_write_lat, b.stddev_write, c.avg_write_lat, - b.samples -FROM baseline b, current_window c;"; - - cmd.Parameters.Add(new SqlParameter("@baselineStart", baselineStart)); - cmd.Parameters.Add(new SqlParameter("@baselineEnd", baselineEnd)); - cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); - cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); - - using var reader = await cmd.ExecuteReaderAsync(); - if (!await reader.ReadAsync()) return; - - var baselineReadLat = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); - var stddevRead = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); - var currentReadLat = reader.IsDBNull(2) ? 0.0 : Convert.ToDouble(reader.GetValue(2)); - var baselineWriteLat = reader.IsDBNull(3) ? 0.0 : Convert.ToDouble(reader.GetValue(3)); - var stddevWrite = reader.IsDBNull(4) ? 0.0 : Convert.ToDouble(reader.GetValue(4)); - var currentWriteLat = reader.IsDBNull(5) ? 0.0 : Convert.ToDouble(reader.GetValue(5)); - var samples = reader.IsDBNull(6) ? 0L : Convert.ToInt64(reader.GetValue(6)); - - if (samples < 3) return; - - // Read latency anomaly - if (stddevRead > 0 && currentReadLat > 10) // At least 10ms to matter - { - var readDeviation = (currentReadLat - baselineReadLat) / stddevRead; - if (readDeviation >= DeviationThreshold) - { - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_READ_LATENCY", - Value = currentReadLat, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_latency_ms"] = currentReadLat, - ["baseline_mean_ms"] = baselineReadLat, - ["baseline_stddev_ms"] = stddevRead, - ["deviation_sigma"] = readDeviation, - ["baseline_samples"] = samples - } - }); - } - } - - // Write latency anomaly - if (stddevWrite > 0 && currentWriteLat > 5) // At least 5ms to matter - { - var writeDeviation = (currentWriteLat - baselineWriteLat) / stddevWrite; - if (writeDeviation >= DeviationThreshold) - { - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_WRITE_LATENCY", - Value = currentWriteLat, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_latency_ms"] = currentWriteLat, - ["baseline_mean_ms"] = baselineWriteLat, - ["baseline_stddev_ms"] = stddevWrite, - ["deviation_sigma"] = writeDeviation, - ["baseline_samples"] = samples - } - }); - } - } - } - catch (Exception ex) - { - Logger.Error($"[SqlServerAnomalyDetector] I/O anomaly detection failed: {ex.Message}"); - } - } -} +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Microsoft.Data.SqlClient; +using PerformanceMonitorDashboard.Helpers; + +namespace PerformanceMonitorDashboard.Analysis; + +/// +/// Detects anomalies by comparing the analysis window's metrics against +/// time-bucketed baselines (hour-of-day x day-of-week, 30-day rolling window). +/// +/// Two detection patterns: +/// - Z-score: (observed - mean) / stddev — used for continuous metrics +/// (CPU, batch requests, I/O latency, session counts, query duration) +/// - Ratio: currentRate / baselineRate — used for rate/event metrics +/// (wait stats, blocking, deadlocks) +/// +/// Baseline computation and caching are handled by SqlServerBaselineProvider. +/// +/// Port of Lite's AnomalyDetector — uses SQL Server collect.* tables instead of DuckDB views. +/// No server_id filtering — Dashboard monitors one server per database. +/// No memory metric — Dashboard doesn't collect memory stats. +/// +public class SqlServerAnomalyDetector +{ + private readonly string _connectionString; + private readonly SqlServerBaselineProvider _baselineProvider; + + /// + /// Default number of standard deviations above baseline mean to flag as anomalous. + /// + private const double DefaultDeviationThreshold = 2.0; + + /// + /// Default ratio threshold for rate-based anomaly detection (wait stats). + /// + private const double DefaultRatioThreshold = 5.0; + + /// + /// Default ratio threshold for event-based anomaly detection (blocking/deadlocks). + /// + private const double DefaultEventRatioThreshold = 3.0; + + /// + /// Per-metric deviation thresholds. Metrics not listed use DefaultDeviationThreshold. + /// + private readonly Dictionary _deviationThresholds = new(); + + public SqlServerAnomalyDetector(string connectionString, SqlServerBaselineProvider baselineProvider) + { + _connectionString = connectionString; + _baselineProvider = baselineProvider; + } + + /// + /// Sets a custom deviation threshold for a specific metric. + /// + public void SetDeviationThreshold(string metricName, double threshold) + { + _deviationThresholds[metricName] = threshold; + } + + private double GetDeviationThreshold(string metricName) + { + return _deviationThresholds.TryGetValue(metricName, out var threshold) + ? threshold + : DefaultDeviationThreshold; + } + + /// + /// Adds baseline context metadata to an anomaly fact's metadata dictionary. + /// + private static void AddBaselineContext(Dictionary metadata, BaselineBucket baseline) + { + metadata["baseline_hour"] = baseline.HourOfDay; + metadata["baseline_dow"] = baseline.DayOfWeek; + metadata["baseline_tier"] = (double)baseline.Tier; + } + + /// + /// Detects anomalies by comparing the analysis window against time-bucketed baselines. + /// Returns anomaly facts to be merged into the main fact list. + /// + public async Task> DetectAnomaliesAsync(AnalysisContext context) + { + var anomalies = new List(); + + // Check if baseline period has any data at all — if not, skip all anomaly detection. + if (!await HasBaselineDataAsync()) + return anomalies; + + // Existing detection methods (upgraded to time-bucketed baselines) + await DetectCpuAnomalies(context, anomalies); + await DetectWaitAnomalies(context, anomalies); + await DetectBlockingAnomalies(context, anomalies); + await DetectIoAnomalies(context, anomalies); + + // New detection methods + await DetectBatchRequestAnomalies(context, anomalies); + await DetectSessionAnomalies(context, anomalies); + await DetectQueryDurationAnomalies(context, anomalies); + + return anomalies; + } + + /// + /// Checks if the server has enough historical data for meaningful baselines. + /// Uses wait_stats and cpu_utilization_stats as canary. + /// + private async Task HasBaselineDataAsync() + { + try + { + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +SELECT + (SELECT COUNT(*) FROM collect.wait_stats + WHERE collection_time >= @cutoff) + + (SELECT COUNT(*) FROM collect.cpu_utilization_stats + WHERE collection_time >= @cutoff);"; + + cmd.Parameters.Add(new SqlParameter("@cutoff", DateTime.UtcNow.AddDays(-30))); + + var count = Convert.ToInt64(await cmd.ExecuteScalarAsync() ?? 0); + return count > 0; + } + catch { return false; } + } + + /// + /// Detects CPU utilization anomalies using z-score against time-bucketed baseline. + /// + private async Task DetectCpuAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + SqlServerMetricNames.Cpu, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +SELECT + MAX(sqlserver_cpu_utilization) AS peak_cpu, + AVG(CAST(sqlserver_cpu_utilization AS FLOAT)) AS avg_cpu, + COUNT(*) AS sample_count, + (SELECT TOP 1 collection_time FROM collect.cpu_utilization_stats + WHERE collection_time >= @windowStart AND collection_time < @windowEnd + ORDER BY sqlserver_cpu_utilization DESC) AS peak_time +FROM collect.cpu_utilization_stats +WHERE collection_time >= @windowStart +AND collection_time < @windowEnd;"; + + cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); + cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var peakCpu = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var avgCpu = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + var windowSamples = reader.IsDBNull(2) ? 0L : Convert.ToInt64(reader.GetValue(2)); + var peakTime = reader.IsDBNull(3) ? (DateTime?)null : reader.GetDateTime(3); + + if (windowSamples == 0) return; + + var deviation = (peakCpu - baseline.Mean) / effectiveStdDev; + if (deviation < GetDeviationThreshold(SqlServerMetricNames.Cpu) || peakCpu < 50) return; + + var metadata = new Dictionary + { + ["peak_cpu"] = peakCpu, + ["avg_cpu_in_window"] = avgCpu, + ["baseline_mean"] = baseline.Mean, + ["baseline_stddev"] = effectiveStdDev, + ["deviation_sigma"] = deviation, + ["baseline_samples"] = baseline.SampleCount, + ["window_samples"] = windowSamples, + ["confidence"] = 1.0, + ["peak_time_ticks"] = peakTime?.Ticks ?? 0 + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_CPU_SPIKE", + Value = peakCpu, + ServerId = context.ServerId, + Metadata = metadata + }); + } + catch (Exception ex) + { + Logger.Error($"[SqlServerAnomalyDetector] CPU anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects wait stat anomalies — total wait time significantly above + /// baseline rate for this time bucket. Uses ratio-based scoring. + /// + private async Task DetectWaitAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + SqlServerMetricNames.WaitStats, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +SELECT TOP 10 + wait_type, + CAST(SUM(wait_time_ms_delta) AS BIGINT) AS total_ms +FROM collect.wait_stats +WHERE collection_time >= @windowStart AND collection_time <= @windowEnd +AND wait_time_ms_delta > 0 +GROUP BY wait_type +HAVING SUM(wait_time_ms_delta) > 10000 +ORDER BY total_ms DESC;"; + + cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); + cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); + + var currentHours = (context.TimeRangeEnd - context.TimeRangeStart).TotalHours; + if (currentHours <= 0) currentHours = 1; + + var baselineRate = baseline.SampleCount > 0 ? baseline.Mean : 0; + + using var reader = await cmd.ExecuteReaderAsync(); + while (await reader.ReadAsync()) + { + var waitType = reader.GetString(0); + var currentMs = Convert.ToInt64(reader.GetValue(1)); + var currentRate = currentMs / currentHours; + + double ratio; + string anomalyType; + + if (baselineRate <= 0 || baseline.SampleCount == 0) + { + ratio = currentMs > 60_000 ? 100.0 : 0; + anomalyType = "new"; + } + else + { + ratio = currentRate / baselineRate; + anomalyType = "spike"; + } + + if (ratio < DefaultRatioThreshold) continue; + + var metadata = new Dictionary + { + ["current_ms"] = currentMs, + ["baseline_mean"] = baseline.Mean, + ["ratio"] = ratio, + ["is_new"] = anomalyType == "new" ? 1 : 0 + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = $"ANOMALY_WAIT_{waitType}", + Value = currentMs, + ServerId = context.ServerId, + Metadata = metadata + }); + } + } + catch (Exception ex) + { + Logger.Error($"[SqlServerAnomalyDetector] Wait anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects blocking/deadlock anomalies — event rates significantly above + /// baseline for this time bucket. Uses ratio-based scoring. + /// + private async Task DetectBlockingAnomalies(AnalysisContext context, List anomalies) + { + try + { + var blockingBaseline = await _baselineProvider.GetBaselineAsync( + SqlServerMetricNames.Blocking, context.TimeRangeStart); + var deadlockBaseline = await _baselineProvider.GetBaselineAsync( + SqlServerMetricNames.Deadlock, context.TimeRangeStart); + + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +SELECT + (SELECT COUNT(*) FROM collect.blocking_BlockedProcessReport + WHERE collection_time >= @windowStart AND collection_time <= @windowEnd) AS current_blocking, + (SELECT COUNT(*) FROM collect.deadlocks + WHERE collection_time >= @windowStart AND collection_time <= @windowEnd) AS current_deadlocks;"; + + cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); + cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var currentBlocking = Convert.ToInt64(reader.GetValue(0)); + var currentDeadlocks = Convert.ToInt64(reader.GetValue(1)); + + var baselineBlockingRate = blockingBaseline.SampleCount > 0 ? blockingBaseline.Mean : 0; + var baselineDeadlockRate = deadlockBaseline.SampleCount > 0 ? deadlockBaseline.Mean : 0; + + // Blocking spike: at least 5 events AND 3x baseline rate (or no baseline) + if (currentBlocking >= 5 && (baselineBlockingRate <= 0 || currentBlocking / Math.Max(baselineBlockingRate, 1) >= DefaultEventRatioThreshold)) + { + var metadata = new Dictionary + { + ["current_count"] = currentBlocking, + ["baseline_rate"] = baselineBlockingRate, + ["ratio"] = baselineBlockingRate > 0 ? currentBlocking / baselineBlockingRate : 100.0 + }; + AddBaselineContext(metadata, blockingBaseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_BLOCKING_SPIKE", + Value = currentBlocking, + ServerId = context.ServerId, + Metadata = metadata + }); + } + + // Deadlock spike: at least 3 events AND 3x baseline rate (or no baseline) + if (currentDeadlocks >= 3 && (baselineDeadlockRate <= 0 || currentDeadlocks / Math.Max(baselineDeadlockRate, 1) >= DefaultEventRatioThreshold)) + { + var metadata = new Dictionary + { + ["current_count"] = currentDeadlocks, + ["baseline_rate"] = baselineDeadlockRate, + ["ratio"] = baselineDeadlockRate > 0 ? currentDeadlocks / baselineDeadlockRate : 100.0 + }; + AddBaselineContext(metadata, deadlockBaseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_DEADLOCK_SPIKE", + Value = currentDeadlocks, + ServerId = context.ServerId, + Metadata = metadata + }); + } + } + catch (Exception ex) + { + Logger.Error($"[SqlServerAnomalyDetector] Blocking anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects I/O latency anomalies using z-score against time-bucketed baseline. + /// + private async Task DetectIoAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + SqlServerMetricNames.IoLatency, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +SELECT + AVG(io_stall_read_ms_delta * 1.0 / NULLIF(num_of_reads_delta, 0)) AS avg_read_lat, + AVG(io_stall_write_ms_delta * 1.0 / NULLIF(num_of_writes_delta, 0)) AS avg_write_lat +FROM collect.file_io_stats +WHERE collection_time >= @windowStart AND collection_time <= @windowEnd +AND (num_of_reads_delta > 0 OR num_of_writes_delta > 0);"; + + cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); + cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var currentReadLat = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var currentWriteLat = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + + var ioThreshold = GetDeviationThreshold(SqlServerMetricNames.IoLatency); + + // Read latency anomaly + if (currentReadLat > 10) + { + var readDeviation = (currentReadLat - baseline.Mean) / effectiveStdDev; + if (readDeviation >= ioThreshold) + { + var metadata = new Dictionary + { + ["current_latency_ms"] = currentReadLat, + ["baseline_mean_ms"] = baseline.Mean, + ["baseline_stddev_ms"] = effectiveStdDev, + ["deviation_sigma"] = readDeviation, + ["baseline_samples"] = baseline.SampleCount + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_READ_LATENCY", + Value = currentReadLat, + ServerId = context.ServerId, + Metadata = metadata + }); + } + } + + // Write latency anomaly + if (currentWriteLat > 5) + { + var writeDeviation = (currentWriteLat - baseline.Mean) / effectiveStdDev; + if (writeDeviation >= ioThreshold) + { + var metadata = new Dictionary + { + ["current_latency_ms"] = currentWriteLat, + ["baseline_mean_ms"] = baseline.Mean, + ["baseline_stddev_ms"] = effectiveStdDev, + ["deviation_sigma"] = writeDeviation, + ["baseline_samples"] = baseline.SampleCount + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_WRITE_LATENCY", + Value = currentWriteLat, + ServerId = context.ServerId, + Metadata = metadata + }); + } + } + } + catch (Exception ex) + { + Logger.Error($"[SqlServerAnomalyDetector] I/O anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects batch requests/sec anomalies using z-score against time-bucketed baseline. + /// + private async Task DetectBatchRequestAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + SqlServerMetricNames.BatchRequests, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +SELECT + AVG(cntr_value_delta) AS avg_batch, + MAX(cntr_value_delta) AS peak_batch, + COUNT(*) AS sample_count +FROM collect.perfmon_stats +WHERE collection_time >= @windowStart AND collection_time <= @windowEnd +AND counter_name = 'Batch Requests/sec' +AND cntr_value_delta >= 0;"; + + cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); + cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var avgBatch = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var peakBatch = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + var windowSamples = reader.IsDBNull(2) ? 0L : Convert.ToInt64(reader.GetValue(2)); + + if (windowSamples == 0) return; + + var deviation = (peakBatch - baseline.Mean) / effectiveStdDev; + if (deviation < GetDeviationThreshold(SqlServerMetricNames.BatchRequests)) return; + + var metadata = new Dictionary + { + ["peak_batch_requests"] = peakBatch, + ["avg_batch_requests"] = avgBatch, + ["baseline_mean"] = baseline.Mean, + ["baseline_stddev"] = effectiveStdDev, + ["deviation_sigma"] = deviation, + ["baseline_samples"] = baseline.SampleCount, + ["window_samples"] = windowSamples + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_BATCH_REQUESTS", + Value = peakBatch, + ServerId = context.ServerId, + Metadata = metadata + }); + } + catch (Exception ex) + { + Logger.Error($"[SqlServerAnomalyDetector] Batch request anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects session/connection count anomalies using z-score against time-bucketed baseline. + /// + private async Task DetectSessionAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + SqlServerMetricNames.SessionCount, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +;WITH per_collection AS ( + SELECT collection_time, + SUM(total_sessions) AS total_connections + FROM collect.session_stats + WHERE collection_time >= @windowStart AND collection_time <= @windowEnd + GROUP BY collection_time +) +SELECT AVG(CAST(total_connections AS FLOAT)) AS avg_connections, + MAX(total_connections) AS peak_connections, + COUNT(*) AS sample_count +FROM per_collection;"; + + cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); + cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var avgConnections = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var peakConnections = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + var windowSamples = reader.IsDBNull(2) ? 0L : Convert.ToInt64(reader.GetValue(2)); + + if (windowSamples == 0) return; + + var deviation = (peakConnections - baseline.Mean) / effectiveStdDev; + if (deviation < GetDeviationThreshold(SqlServerMetricNames.SessionCount)) return; + + var metadata = new Dictionary + { + ["peak_connections"] = peakConnections, + ["avg_connections"] = avgConnections, + ["baseline_mean"] = baseline.Mean, + ["baseline_stddev"] = effectiveStdDev, + ["deviation_sigma"] = deviation, + ["baseline_samples"] = baseline.SampleCount, + ["window_samples"] = windowSamples + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_SESSION_SPIKE", + Value = peakConnections, + ServerId = context.ServerId, + Metadata = metadata + }); + } + catch (Exception ex) + { + Logger.Error($"[SqlServerAnomalyDetector] Session anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects query duration aggregate anomalies using z-score against time-bucketed baseline. + /// Measures total elapsed time across all queries per collection interval. + /// + private async Task DetectQueryDurationAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + SqlServerMetricNames.QueryDuration, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +;WITH per_collection AS ( + SELECT collection_time, + SUM(total_elapsed_time_delta) AS total_elapsed + FROM collect.query_stats + WHERE collection_time >= @windowStart AND collection_time <= @windowEnd + AND execution_count_delta > 0 + AND total_elapsed_time_delta >= 0 + GROUP BY collection_time +) +SELECT AVG(CAST(total_elapsed AS FLOAT)) AS avg_elapsed, + MAX(total_elapsed) AS peak_elapsed, + COUNT(*) AS sample_count +FROM per_collection;"; + + cmd.Parameters.Add(new SqlParameter("@windowStart", context.TimeRangeStart)); + cmd.Parameters.Add(new SqlParameter("@windowEnd", context.TimeRangeEnd)); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var avgElapsed = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var peakElapsed = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + var windowSamples = reader.IsDBNull(2) ? 0L : Convert.ToInt64(reader.GetValue(2)); + + if (windowSamples == 0) return; + + var deviation = (peakElapsed - baseline.Mean) / effectiveStdDev; + if (deviation < GetDeviationThreshold(SqlServerMetricNames.QueryDuration)) return; + + var metadata = new Dictionary + { + ["peak_total_elapsed_us"] = peakElapsed, + ["avg_total_elapsed_us"] = avgElapsed, + ["baseline_mean"] = baseline.Mean, + ["baseline_stddev"] = effectiveStdDev, + ["deviation_sigma"] = deviation, + ["baseline_samples"] = baseline.SampleCount, + ["window_samples"] = windowSamples + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_QUERY_DURATION", + Value = peakElapsed, + ServerId = context.ServerId, + Metadata = metadata + }); + } + catch (Exception ex) + { + Logger.Error($"[SqlServerAnomalyDetector] Query duration anomaly detection failed: {ex.Message}"); + } + } +} diff --git a/Dashboard/Analysis/SqlServerBaselineProvider.cs b/Dashboard/Analysis/SqlServerBaselineProvider.cs new file mode 100644 index 00000000..1746028c --- /dev/null +++ b/Dashboard/Analysis/SqlServerBaselineProvider.cs @@ -0,0 +1,525 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.Data.SqlClient; +using PerformanceMonitorDashboard.Helpers; + +namespace PerformanceMonitorDashboard.Analysis; + +/// +/// Provides time-bucketed baselines (hour-of-day x day-of-week) computed from +/// 30-day rolling history in SQL Server collect.* tables. +/// +/// Port of Lite's BaselineProvider — uses SQL Server instead of DuckDB. +/// No server_id filtering — Dashboard monitors one server per database. +/// +/// Each baseline bucket contains mean, stddev, and sample count for a metric +/// at a specific (hour, day-of-week) combination. When a bucket has insufficient +/// samples, the provider collapses to less-specific tiers: +/// Full (hour+dow) -> Hour-only -> Flat (global mean/stddev) +/// +/// Baselines are cached in memory with a 1-hour TTL to avoid redundant +/// recomputation during rapid re-analysis. +/// +public class SqlServerBaselineProvider +{ + private readonly string _connectionString; + + /// Rolling window for baseline computation. + private const int BaselineWindowDays = 30; + + /// Collapse to hour-only when full bucket has fewer than this many samples. + private const int CollapseThreshold = 10; + + /// Restore to full bucket when sample count reaches this level (hysteresis). + private const int RestoreThreshold = 15; + + /// Cache TTL — baselines are recomputed after this interval. + public static TimeSpan CacheTtl { get; set; } = TimeSpan.FromHours(1); + + private readonly ConcurrentDictionary _cache = new(); + + public SqlServerBaselineProvider(string connectionString) + { + _connectionString = connectionString; + } + + /// + /// Gets the baseline for a specific metric and time bucket. + /// Returns the most specific bucket available, collapsing as needed. + /// + public async Task GetBaselineAsync(string metricName, DateTime analysisTime) + { + var hourOfDay = analysisTime.Hour; + var dayOfWeek = (int)analysisTime.DayOfWeek; // Sunday=0 + + var baselines = await GetOrComputeBaselinesAsync(metricName, analysisTime); + if (baselines == null || baselines.Count == 0) + return BaselineBucket.Empty; + + // Try full bucket (hour + day-of-week) + var fullKey = (hourOfDay, dayOfWeek); + if (baselines.TryGetValue(fullKey, out var fullBucket) && fullBucket.SampleCount >= RestoreThreshold) + return fullBucket; + + // If full bucket exists but below restore threshold, check if it's above collapse threshold + // (hysteresis: don't collapse if we're between 10-14 samples and were previously using full) + if (fullBucket != null && fullBucket.SampleCount >= CollapseThreshold) + return fullBucket; + + // Collapse to hour-only: aggregate all days for this hour + var hourBuckets = baselines + .Where(kvp => kvp.Key.HourOfDay == hourOfDay) + .Select(kvp => kvp.Value) + .ToList(); + + if (hourBuckets.Count > 0) + { + var collapsed = CollapseToHourOnly(hourBuckets); + if (collapsed.SampleCount >= CollapseThreshold) + return collapsed; + } + + // Collapse to flat: aggregate everything + var allBuckets = baselines.Values.ToList(); + if (allBuckets.Count > 0) + { + var flat = CollapseToFlat(allBuckets); + if (flat.SampleCount >= 3) // Minimum viable baseline + return flat; + } + + return BaselineBucket.Empty; + } + + /// + /// Gets all baseline buckets for a metric. Used by UI for rendering + /// expected-range bands across all time slots. + /// + public async Task?> GetAllBaselinesAsync( + string metricName, DateTime analysisTime) + { + return await GetOrComputeBaselinesAsync(metricName, analysisTime); + } + + /// Forces full cache clear — used during testing. + public void ClearCache() => _cache.Clear(); + + private async Task?> GetOrComputeBaselinesAsync( + string metricName, DateTime analysisTime) + { + var cacheKey = metricName; + var roundedHour = new DateTime(analysisTime.Year, analysisTime.Month, analysisTime.Day, analysisTime.Hour, 0, 0); + + if (_cache.TryGetValue(cacheKey, out var cached) && + cached.ComputedAt == roundedHour && + (DateTime.UtcNow - cached.RealTime) < CacheTtl) + { + return cached.Buckets; + } + + var buckets = await ComputeBaselinesAsync(metricName, analysisTime); + + _cache[cacheKey] = new CachedBaseline + { + ComputedAt = roundedHour, + RealTime = DateTime.UtcNow, + Buckets = buckets + }; + + return buckets; + } + + private async Task?> ComputeBaselinesAsync( + string metricName, DateTime analysisTime) + { + var query = GetBaselineQuery(metricName); + if (query == null) return null; + + var windowStart = analysisTime.AddDays(-BaselineWindowDays); + + try + { + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = query; + cmd.Parameters.Add(new SqlParameter("@windowStart", windowStart)); + cmd.Parameters.Add(new SqlParameter("@windowEnd", analysisTime)); + + var buckets = new Dictionary<(int, int), BaselineBucket>(); + + using var reader = await cmd.ExecuteReaderAsync(); + while (await reader.ReadAsync()) + { + var hour = Convert.ToInt32(reader.GetValue(0)); + var dow = Convert.ToInt32(reader.GetValue(1)); + var mean = reader.IsDBNull(2) ? 0.0 : Convert.ToDouble(reader.GetValue(2)); + var stddev = reader.IsDBNull(3) ? 0.0 : Convert.ToDouble(reader.GetValue(3)); + var count = reader.IsDBNull(4) ? 0L : Convert.ToInt64(reader.GetValue(4)); + + buckets[(hour, dow)] = new BaselineBucket + { + HourOfDay = hour, + DayOfWeek = dow, + Mean = mean, + StdDev = stddev, + SampleCount = count, + Tier = count >= RestoreThreshold ? BaselineTier.Full + : count >= CollapseThreshold ? BaselineTier.Full + : BaselineTier.HourOnly + }; + } + + return buckets; + } + catch (Exception ex) + { + Logger.Error($"[SqlServerBaselineProvider] Failed to compute baselines for {metricName}: {ex.Message}"); + return null; + } + } + + private static string? GetBaselineQuery(string metricName) + { + // All queries return: hour_of_day, day_of_week, mean_val, stddev_val, sample_count + // Day-of-week normalization: (DATEPART(weekday, x) + @@DATEFIRST - 1) % 7 gives Sunday=0 + // Cumulative metrics use CTEs for restart poisoning exclusion — exclude samples where + // value drops near-zero when the prior sample was significantly higher. + // SQL Server has no QUALIFY — use ROW_NUMBER() in CTEs instead. + return metricName switch + { + // Point-in-time metric — no restart exclusion needed + SqlServerMetricNames.Cpu => @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +SELECT DATEPART(HOUR, collection_time) AS hour_of_day, + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7 AS day_of_week, + AVG(CAST(sqlserver_cpu_utilization AS FLOAT)) AS mean_val, + STDEV(CAST(sqlserver_cpu_utilization AS FLOAT)) AS stddev_val, + COUNT(*) AS sample_count +FROM collect.cpu_utilization_stats +WHERE collection_time >= @windowStart AND collection_time < @windowEnd +GROUP BY DATEPART(HOUR, collection_time), + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7;", + + // Cumulative counter — restart exclusion via CTE with LAG. + // server_start_time is inline in collect.perfmon_stats. + // Exclude samples within 5 min of a detected restart. + SqlServerMetricNames.BatchRequests => @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +;WITH filtered AS ( + SELECT collection_time, cntr_value_delta, + LAG(cntr_value_delta) OVER (ORDER BY collection_time) AS prev_value + FROM collect.perfmon_stats + WHERE collection_time >= @windowStart AND collection_time < @windowEnd + AND counter_name = 'Batch Requests/sec' + AND cntr_value_delta >= 0 +) +SELECT DATEPART(HOUR, collection_time) AS hour_of_day, + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7 AS day_of_week, + AVG(cntr_value_delta) AS mean_val, + STDEV(cntr_value_delta) AS stddev_val, + COUNT(*) AS sample_count +FROM filtered +WHERE NOT (cntr_value_delta = 0 AND ISNULL(prev_value, 0) > 1000) +GROUP BY DATEPART(HOUR, collection_time), + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7;", + + // Cumulative counter, multiple rows per collection (per wait type) — + // aggregate to total wait ms per collection first, then filter restart poisoning + SqlServerMetricNames.WaitStats => @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +;WITH per_collection AS ( + SELECT collection_time, + SUM(wait_time_ms_delta) AS total_wait_ms + FROM collect.wait_stats + WHERE collection_time >= @windowStart AND collection_time < @windowEnd + AND wait_time_ms_delta >= 0 + GROUP BY collection_time +), +with_lag AS ( + SELECT collection_time, total_wait_ms, + LAG(total_wait_ms) OVER (ORDER BY collection_time) AS prev_value + FROM per_collection +) +SELECT DATEPART(HOUR, collection_time) AS hour_of_day, + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7 AS day_of_week, + AVG(CAST(total_wait_ms AS FLOAT)) AS mean_val, + STDEV(CAST(total_wait_ms AS FLOAT)) AS stddev_val, + COUNT(*) AS sample_count +FROM with_lag +WHERE NOT (total_wait_ms = 0 AND ISNULL(prev_value, 0) > 10000) +GROUP BY DATEPART(HOUR, collection_time), + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7;", + + // Point-in-time, multiple rows per collection (per program_name) — + // aggregate to total connections per collection first. + // collect.session_stats does NOT have server_start_time — not needed. + SqlServerMetricNames.SessionCount => @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +;WITH per_collection AS ( + SELECT collection_time, + SUM(total_sessions) AS total_connections + FROM collect.session_stats + WHERE collection_time >= @windowStart AND collection_time < @windowEnd + GROUP BY collection_time +) +SELECT DATEPART(HOUR, collection_time) AS hour_of_day, + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7 AS day_of_week, + AVG(CAST(total_connections AS FLOAT)) AS mean_val, + STDEV(CAST(total_connections AS FLOAT)) AS stddev_val, + COUNT(*) AS sample_count +FROM per_collection +GROUP BY DATEPART(HOUR, collection_time), + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7;", + + // Cumulative (plan cache), multiple rows per collection (per query) — + // use delta columns, aggregate total elapsed per collection, filter restart poisoning. + // server_start_time is inline in collect.query_stats. + SqlServerMetricNames.QueryDuration => @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +;WITH per_collection AS ( + SELECT collection_time, + SUM(total_elapsed_time_delta) AS total_elapsed + FROM collect.query_stats + WHERE collection_time >= @windowStart AND collection_time < @windowEnd + AND execution_count_delta > 0 + AND total_elapsed_time_delta >= 0 + GROUP BY collection_time +), +with_lag AS ( + SELECT collection_time, total_elapsed, + LAG(total_elapsed) OVER (ORDER BY collection_time) AS prev_value + FROM per_collection +) +SELECT DATEPART(HOUR, collection_time) AS hour_of_day, + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7 AS day_of_week, + AVG(CAST(total_elapsed AS FLOAT)) AS mean_val, + STDEV(CAST(total_elapsed AS FLOAT)) AS stddev_val, + COUNT(*) AS sample_count +FROM with_lag +WHERE NOT (total_elapsed = 0 AND ISNULL(prev_value, 0) > 100000) +GROUP BY DATEPART(HOUR, collection_time), + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7;", + + // Point-in-time metric — no restart exclusion needed + SqlServerMetricNames.IoLatency => @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +SELECT DATEPART(HOUR, collection_time) AS hour_of_day, + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7 AS day_of_week, + AVG(io_stall_read_ms_delta * 1.0 / NULLIF(num_of_reads_delta, 0)) AS mean_val, + STDEV(io_stall_read_ms_delta * 1.0 / NULLIF(num_of_reads_delta, 0)) AS stddev_val, + COUNT(*) AS sample_count +FROM collect.file_io_stats +WHERE collection_time >= @windowStart AND collection_time < @windowEnd +AND (num_of_reads_delta > 0 OR num_of_writes_delta > 0) +GROUP BY DATEPART(HOUR, collection_time), + (DATEPART(WEEKDAY, collection_time) + @@DATEFIRST - 1) % 7;", + + // Event-based — use wait_stats collection intervals as time spine (bucketed to minute), + // LEFT JOIN event counts so intervals with zero events are included in the baseline. + // Without this, the baseline only reflects storm periods (when events exist). + SqlServerMetricNames.Blocking => @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +;WITH spine AS ( + SELECT DISTINCT DATEADD(MINUTE, DATEDIFF(MINUTE, 0, collection_time), 0) AS minute_bucket + FROM collect.wait_stats + WHERE collection_time >= @windowStart AND collection_time < @windowEnd +), +event_counts AS ( + SELECT DATEADD(MINUTE, DATEDIFF(MINUTE, 0, collection_time), 0) AS minute_bucket, + CAST(COUNT(*) AS FLOAT) AS cnt + FROM collect.blocking_BlockedProcessReport + WHERE collection_time >= @windowStart AND collection_time < @windowEnd + GROUP BY DATEADD(MINUTE, DATEDIFF(MINUTE, 0, collection_time), 0) +), +per_interval AS ( + SELECT s.minute_bucket, ISNULL(e.cnt, 0) AS event_count + FROM spine s + LEFT JOIN event_counts e ON s.minute_bucket = e.minute_bucket +) +SELECT DATEPART(HOUR, minute_bucket) AS hour_of_day, + (DATEPART(WEEKDAY, minute_bucket) + @@DATEFIRST - 1) % 7 AS day_of_week, + AVG(event_count) AS mean_val, + STDEV(event_count) AS stddev_val, + COUNT(*) AS sample_count +FROM per_interval +GROUP BY DATEPART(HOUR, minute_bucket), + (DATEPART(WEEKDAY, minute_bucket) + @@DATEFIRST - 1) % 7;", + + // Event-based — same spine approach as blocking + SqlServerMetricNames.Deadlock => @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +;WITH spine AS ( + SELECT DISTINCT DATEADD(MINUTE, DATEDIFF(MINUTE, 0, collection_time), 0) AS minute_bucket + FROM collect.wait_stats + WHERE collection_time >= @windowStart AND collection_time < @windowEnd +), +event_counts AS ( + SELECT DATEADD(MINUTE, DATEDIFF(MINUTE, 0, collection_time), 0) AS minute_bucket, + CAST(COUNT(*) AS FLOAT) AS cnt + FROM collect.deadlocks + WHERE collection_time >= @windowStart AND collection_time < @windowEnd + GROUP BY DATEADD(MINUTE, DATEDIFF(MINUTE, 0, collection_time), 0) +), +per_interval AS ( + SELECT s.minute_bucket, ISNULL(e.cnt, 0) AS event_count + FROM spine s + LEFT JOIN event_counts e ON s.minute_bucket = e.minute_bucket +) +SELECT DATEPART(HOUR, minute_bucket) AS hour_of_day, + (DATEPART(WEEKDAY, minute_bucket) + @@DATEFIRST - 1) % 7 AS day_of_week, + AVG(event_count) AS mean_val, + STDEV(event_count) AS stddev_val, + COUNT(*) AS sample_count +FROM per_interval +GROUP BY DATEPART(HOUR, minute_bucket), + (DATEPART(WEEKDAY, minute_bucket) + @@DATEFIRST - 1) % 7;", + + _ => null + }; + } + + /// + /// Collapses multiple day-of-week buckets for the same hour into a single + /// hour-only bucket using pooled statistics. + /// + private static BaselineBucket CollapseToHourOnly(List hourBuckets) + { + var totalSamples = hourBuckets.Sum(b => b.SampleCount); + if (totalSamples == 0) + return BaselineBucket.Empty; + + // Weighted mean across all day-of-week buckets for this hour + var weightedMean = hourBuckets.Sum(b => b.Mean * b.SampleCount) / totalSamples; + + // Pooled standard deviation + var pooledVariance = PoolVariance(hourBuckets, weightedMean); + + return new BaselineBucket + { + HourOfDay = hourBuckets[0].HourOfDay, + DayOfWeek = -1, // Indicates hour-only + Mean = weightedMean, + StdDev = Math.Sqrt(pooledVariance), + SampleCount = totalSamples, + Tier = BaselineTier.HourOnly + }; + } + + /// + /// Collapses all buckets into a single flat baseline (equivalent to old 24h behavior). + /// + private static BaselineBucket CollapseToFlat(List allBuckets) + { + var totalSamples = allBuckets.Sum(b => b.SampleCount); + if (totalSamples == 0) + return BaselineBucket.Empty; + + var weightedMean = allBuckets.Sum(b => b.Mean * b.SampleCount) / totalSamples; + var pooledVariance = PoolVariance(allBuckets, weightedMean); + + return new BaselineBucket + { + HourOfDay = -1, + DayOfWeek = -1, + Mean = weightedMean, + StdDev = Math.Sqrt(pooledVariance), + SampleCount = totalSamples, + Tier = BaselineTier.Flat + }; + } + + /// + /// Computes pooled variance from multiple buckets, accounting for both + /// within-bucket variance and between-bucket mean differences. + /// + private static double PoolVariance(List buckets, double grandMean) + { + var totalSamples = buckets.Sum(b => b.SampleCount); + if (totalSamples <= 1) return 0; + + double totalSumSq = 0; + foreach (var b in buckets) + { + if (b.SampleCount <= 0) continue; + // Within-bucket variance contribution + totalSumSq += (b.StdDev * b.StdDev) * (b.SampleCount - 1); + // Between-bucket mean difference contribution + totalSumSq += b.SampleCount * (b.Mean - grandMean) * (b.Mean - grandMean); + } + + return totalSumSq / (totalSamples - 1); + } + + private class CachedBaseline + { + public DateTime ComputedAt { get; init; } + public DateTime RealTime { get; init; } + public Dictionary<(int HourOfDay, int DayOfWeek), BaselineBucket>? Buckets { get; init; } + } +} + +/// +/// Represents the computed baseline statistics for a single time bucket. +/// +public class BaselineBucket +{ + public int HourOfDay { get; init; } + public int DayOfWeek { get; init; } + public double Mean { get; init; } + public double StdDev { get; init; } + public long SampleCount { get; init; } + public BaselineTier Tier { get; init; } + + public static BaselineBucket Empty => new() + { + HourOfDay = -1, DayOfWeek = -1, Mean = 0, StdDev = 0, + SampleCount = 0, Tier = BaselineTier.Flat + }; + + /// + /// Returns the effective stddev with a proportional minimum floor to prevent + /// division-by-zero in z-score calculations. When both mean and stddev are 0 + /// (zero activity), returns 0 — callers should skip scoring. + /// + public double EffectiveStdDev + { + get + { + if (Mean == 0 && StdDev <= 0) return 0; // Zero activity — skip scoring + return Math.Max(StdDev, Mean * 0.01); + } + } +} + +public enum BaselineTier +{ + Full, // hour + day-of-week (168 buckets) + HourOnly, // hour only (24 buckets) + Flat // global mean/stddev +} + +/// Metric name constants used as baseline cache keys. +public static class SqlServerMetricNames +{ + public const string Cpu = "cpu"; + public const string BatchRequests = "batch_requests"; + public const string WaitStats = "wait_stats"; + public const string SessionCount = "session_count"; + public const string QueryDuration = "query_duration"; + public const string IoLatency = "io_latency"; + public const string Blocking = "blocking"; + public const string Deadlock = "deadlock"; +} diff --git a/Dashboard/Controls/CorrelatedTimelineLanesControl.xaml b/Dashboard/Controls/CorrelatedTimelineLanesControl.xaml index 46b68159..0d58d368 100644 --- a/Dashboard/Controls/CorrelatedTimelineLanesControl.xaml +++ b/Dashboard/Controls/CorrelatedTimelineLanesControl.xaml @@ -18,15 +18,11 @@ - - @@ -34,15 +30,11 @@ - - @@ -50,7 +42,6 @@ - - - + - - - @@ -86,15 +70,11 @@ - - diff --git a/Dashboard/Controls/CorrelatedTimelineLanesControl.xaml.cs b/Dashboard/Controls/CorrelatedTimelineLanesControl.xaml.cs index 94aa4291..9af24f3f 100644 --- a/Dashboard/Controls/CorrelatedTimelineLanesControl.xaml.cs +++ b/Dashboard/Controls/CorrelatedTimelineLanesControl.xaml.cs @@ -16,6 +16,7 @@ using System.Threading.Tasks; using System.Windows; using System.Windows.Controls; +using PerformanceMonitorDashboard.Analysis; using PerformanceMonitorDashboard.Helpers; using PerformanceMonitorDashboard.Services; @@ -24,6 +25,7 @@ namespace PerformanceMonitorDashboard.Controls; public partial class CorrelatedTimelineLanesControl : UserControl { private DatabaseService? _dataService; + private SqlServerBaselineProvider? _baselineProvider; private CorrelatedCrosshairManager? _crosshairManager; private bool _isRefreshing; @@ -34,12 +36,13 @@ public CorrelatedTimelineLanesControl() } /// - /// Initializes the control with the data service. + /// Initializes the control with the data service and optional baseline provider. /// Must be called before RefreshAsync. /// - public void Initialize(DatabaseService dataService) + public void Initialize(DatabaseService dataService, SqlServerBaselineProvider? baselineProvider = null) { _dataService = dataService; + _baselineProvider = baselineProvider; var charts = new[] { CpuChart, WaitStatsChart, BlockingChart, MemoryChart, FileIoChart }; foreach (var chart in charts) @@ -50,17 +53,18 @@ public void Initialize(DatabaseService dataService) } _crosshairManager = new CorrelatedCrosshairManager(); - _crosshairManager.AddLane(CpuChart, "CPU", "%", CpuValueLabel); - _crosshairManager.AddLane(WaitStatsChart, "Wait Stats", "ms/sec", WaitStatsValueLabel); - _crosshairManager.AddLane(BlockingChart, "Blocking", "events", BlockingValueLabel); - _crosshairManager.AddLane(MemoryChart, "Memory", "MB", MemoryValueLabel); - _crosshairManager.AddLane(FileIoChart, "I/O Latency", "ms", FileIoValueLabel); + _crosshairManager.AddLane(CpuChart, "CPU", "%"); + _crosshairManager.AddLane(WaitStatsChart, "Wait Stats", "ms/sec"); + _crosshairManager.AddLane(BlockingChart, "Blocking", "events"); + _crosshairManager.AddLane(MemoryChart, "Buffer Pool", "MB"); + _crosshairManager.AddLane(FileIoChart, "I/O Latency", "ms"); } /// /// Refreshes all lane data for the given time range. /// - public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDate) + public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDate, + (DateTime From, DateTime To)? comparisonRange = null) { if (_dataService == null || _isRefreshing) return; _isRefreshing = true; @@ -76,26 +80,59 @@ public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDa var memoryTask = _dataService.GetMemoryStatsAsync(hoursBack, fromDate, toDate); var fileIoTask = _dataService.GetFileIoLatencyTimeSeriesAsync(false, hoursBack, fromDate, toDate); + // Fetch baselines for band rendering if provider is available + var referenceTime = fromDate ?? DateTime.UtcNow.AddHours(-hoursBack); + Task? cpuBaselineTask = null; + Task? waitBaselineTask = null; + Task? ioBaselineTask = null; + Task? blockingBaselineTask = null; + Task? deadlockBaselineTask = null; + + if (_baselineProvider != null) + { + cpuBaselineTask = GetBaselineAsync(SqlServerMetricNames.Cpu, referenceTime); + waitBaselineTask = GetBaselineAsync(SqlServerMetricNames.WaitStats, referenceTime); + ioBaselineTask = GetBaselineAsync(SqlServerMetricNames.IoLatency, referenceTime); + blockingBaselineTask = GetBaselineAsync(SqlServerMetricNames.Blocking, referenceTime); + deadlockBaselineTask = GetBaselineAsync(SqlServerMetricNames.Deadlock, referenceTime); + } + try { - await Task.WhenAll(cpuTask, waitTask, blockingTask, deadlockTask, memoryTask, fileIoTask); + var tasks = new List { cpuTask, waitTask, blockingTask, deadlockTask, memoryTask, fileIoTask }; + if (cpuBaselineTask != null) tasks.Add(cpuBaselineTask); + if (waitBaselineTask != null) tasks.Add(waitBaselineTask); + if (ioBaselineTask != null) tasks.Add(ioBaselineTask); + if (blockingBaselineTask != null) tasks.Add(blockingBaselineTask); + if (deadlockBaselineTask != null) tasks.Add(deadlockBaselineTask); + await Task.WhenAll(tasks); } catch (Exception ex) { Debug.WriteLine($"CorrelatedLanes: Data fetch failed: {ex.Message}"); } + var cpuBaseline = cpuBaselineTask is { IsCompletedSuccessfully: true } ? cpuBaselineTask.Result : null; + var waitBaseline = waitBaselineTask is { IsCompletedSuccessfully: true } ? waitBaselineTask.Result : null; + var ioBaseline = ioBaselineTask is { IsCompletedSuccessfully: true } ? ioBaselineTask.Result : null; + var blockingBaseline = blockingBaselineTask is { IsCompletedSuccessfully: true } ? blockingBaselineTask.Result : null; + var deadlockBaseline = deadlockBaselineTask is { IsCompletedSuccessfully: true } ? deadlockBaselineTask.Result : null; + var blockingLaneBaseline = blockingBaseline ?? deadlockBaseline; + + // minAnomalyValue: absolute floor below which dots/arrows are suppressed even if outside band. + // Prevents "1% CPU above 0.5% baseline" false alarms on idle servers. if (cpuTask.IsCompletedSuccessfully) UpdateLane(CpuChart, "CPU %", - cpuTask.Result.Select(d => (d.SampleTime.ToOADate(), (double)d.SqlServerCpuUtilization)).ToList(), - "#4FC3F7", 0, 105); + cpuTask.Result.OrderBy(d => d.SampleTime) + .Select(d => (d.SampleTime.ToOADate(), (double)d.SqlServerCpuUtilization)).ToList(), + "#4FC3F7", 0, 105, cpuBaseline, minAnomalyValue: 10); else ShowEmpty(CpuChart, "CPU %"); if (waitTask.IsCompletedSuccessfully) UpdateLane(WaitStatsChart, "Wait ms/sec", waitTask.Result.Select(d => (d.CollectionTime.ToOADate(), (double)d.WaitTimeMsPerSecond)).ToList(), - "#FFB74D"); + "#FFB74D", baseline: waitBaseline, minAnomalyValue: 100); else ShowEmpty(WaitStatsChart, "Wait ms/sec"); @@ -113,7 +150,7 @@ public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDa .Select(d => (d.CollectionTime.ToOADate(), (double)d.BlockedCount)) .ToList() : new List<(double, double)>(); - UpdateBlockingLane(blockingData, deadlockData); + UpdateBlockingLane(blockingData, deadlockData, blockingLaneBaseline); } catch (Exception ex) { @@ -122,11 +159,11 @@ public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDa } if (memoryTask.IsCompletedSuccessfully) - UpdateLane(MemoryChart, "Memory MB", + UpdateLane(MemoryChart, "Buffer Pool MB", memoryTask.Result.Select(d => (d.CollectionTime.ToOADate(), (double)d.TotalMemoryMb)).ToList(), "#CE93D8"); else - ShowEmpty(MemoryChart, "Memory MB"); + ShowEmpty(MemoryChart, "Buffer Pool MB"); if (fileIoTask.IsCompletedSuccessfully) { @@ -135,11 +172,63 @@ public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDa .OrderBy(g => g.Key) .Select(g => (g.Key.ToOADate(), (double)g.Average(x => x.ReadLatencyMs))) .ToList(); - UpdateLane(FileIoChart, "I/O ms", ioGrouped, "#81C784"); + UpdateLane(FileIoChart, "I/O ms", ioGrouped, "#81C784", baseline: ioBaseline, minAnomalyValue: 2); } else ShowEmpty(FileIoChart, "I/O ms"); + // Comparison overlay — fetch reference period data and render as ghost lines + if (comparisonRange.HasValue) + { + var refFrom = comparisonRange.Value.From; + var refTo = comparisonRange.Value.To; + var timeShift = (fromDate ?? DateTime.UtcNow.AddHours(-hoursBack)) - refFrom; + + var refCpuTask = _dataService.GetCpuUtilizationAsync(0, refFrom, refTo); + var refWaitTask = _dataService.GetTotalWaitStatsTrendAsync(0, refFrom, refTo); + var refBlockingTask = _dataService.GetBlockedSessionTrendAsync(0, refFrom, refTo); + var refMemoryTask = _dataService.GetMemoryStatsAsync(0, refFrom, refTo); + var refIoTask = _dataService.GetFileIoLatencyTimeSeriesAsync(false, 0, refFrom, refTo); + + try { await Task.WhenAll(refCpuTask, refWaitTask, refBlockingTask, refMemoryTask, refIoTask); } + catch (Exception ex) { Debug.WriteLine($"CorrelatedLanes: Comparison fetch failed: {ex.Message}"); } + + if (refCpuTask.IsCompletedSuccessfully) + AddGhostLine(CpuChart, refCpuTask.Result + .Select(d => (d.SampleTime.Add(timeShift).ToOADate(), (double)d.SqlServerCpuUtilization)).ToList(), "#4FC3F7"); + + if (refWaitTask.IsCompletedSuccessfully) + AddGhostLine(WaitStatsChart, refWaitTask.Result + .Select(d => (d.CollectionTime.Add(timeShift).ToOADate(), (double)d.WaitTimeMsPerSecond)).ToList(), "#FFB74D"); + + if (refBlockingTask.IsCompletedSuccessfully) + { + var refBlocking = refBlockingTask.Result + .GroupBy(d => d.CollectionTime) + .OrderBy(g => g.Key) + .Select(g => (g.Key.Add(timeShift).ToOADate(), (double)g.Sum(x => x.BlockedCount))) + .ToList(); + if (refBlocking.Count > 0) + AddGhostLine(BlockingChart, refBlocking, "#E57373"); + } + + if (refMemoryTask.IsCompletedSuccessfully) + AddGhostLine(MemoryChart, refMemoryTask.Result + .Select(d => (d.CollectionTime.Add(timeShift).ToOADate(), (double)d.TotalMemoryMb)).ToList(), "#CE93D8"); + + if (refIoTask.IsCompletedSuccessfully) + { + var refIo = refIoTask.Result + .GroupBy(d => d.CollectionTime) + .OrderBy(g => g.Key) + .Select(g => (g.Key.Add(timeShift).ToOADate(), (double)g.Average(x => x.ReadLatencyMs))) + .ToList(); + AddGhostLine(FileIoChart, refIo, "#81C784"); + } + + _crosshairManager?.SetComparisonLabel(ComparisonLabel(comparisonRange.Value, fromDate, hoursBack)); + } + _crosshairManager?.ReattachVLines(); SyncXAxes(hoursBack, fromDate, toDate); } @@ -149,21 +238,32 @@ public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDa } } + /// + /// Fetches a baseline bucket from the provider, wrapping in a nullable task. + /// + private async Task GetBaselineAsync(string metricName, DateTime referenceTime) + { + if (_baselineProvider == null) return null; + try + { + var bucket = await _baselineProvider.GetBaselineAsync(metricName, referenceTime); + return bucket.SampleCount > 0 ? bucket : null; + } + catch { return null; } + } + private void UpdateBlockingLane(List<(double Time, double Value)> blockingData, - List<(double Time, double Value)> deadlockData) + List<(double Time, double Value)> deadlockData, BaselineBucket? baseline = null) { ClearChart(BlockingChart); TabHelpers.ApplyThemeToChart(BlockingChart); - // Register blocking and deadlock as separate named series for the tooltip var blockTimes = blockingData.Select(d => d.Time).ToArray(); var blockValues = blockingData.Select(d => d.Value).ToArray(); var deadTimes = deadlockData.Select(d => d.Time).ToArray(); var deadValues = deadlockData.Select(d => d.Value).ToArray(); - // First series clears any previous data _crosshairManager?.SetLaneData(BlockingChart, blockTimes, blockValues, isEventBased: true); - // Rename the auto-created series and add the second _crosshairManager?.AddLaneSeries(BlockingChart, "Deadlocks", "events", deadTimes, deadValues, isEventBased: true); @@ -176,7 +276,6 @@ private void UpdateBlockingLane(List<(double Time, double Value)> blockingData, double barWidth = 30.0 / 86400.0; double maxCount = 0; - // Blocking bars — red if (blockingData.Count > 0) { var bars = blockingData.Select(d => new ScottPlot.Bar @@ -191,7 +290,6 @@ private void UpdateBlockingLane(List<(double Time, double Value)> blockingData, maxCount = Math.Max(maxCount, blockingData.Max(d => d.Value)); } - // Deadlock bars — yellow/amber, slightly narrower so both are visible if (deadlockData.Count > 0) { var bars = deadlockData.Select(d => new ScottPlot.Bar @@ -206,6 +304,31 @@ private void UpdateBlockingLane(List<(double Time, double Value)> blockingData, maxCount = Math.Max(maxCount, deadlockData.Max(d => d.Value)); } + // Baseline for blocking — event-based metrics where zero is normal. + // Even if EffectiveStdDev is 0 (all-zero baseline), still register the baseline + // so the event-based indicator check (mean < 1 → any event is ▲) works. + if (baseline != null && baseline.SampleCount > 0) + { + var effectiveStdDev = Math.Max(baseline.EffectiveStdDev, 0.01); + var upper = baseline.Mean + 2 * effectiveStdDev; + var lower = Math.Max(0, baseline.Mean - 2 * effectiveStdDev); + + _crosshairManager?.SetLaneBaseline(BlockingChart, lower, upper, isEventBased: true); + + // Only render the visual band if there's meaningful variance + if (baseline.EffectiveStdDev > 0) + { + var band = BlockingChart.Plot.Add.HorizontalSpan(lower, upper); + band.FillStyle.Color = ScottPlot.Color.FromHex("#E57373").WithAlpha(25); + band.LineStyle.Width = 0; + + var meanLine = BlockingChart.Plot.Add.HorizontalLine(baseline.Mean); + meanLine.Color = ScottPlot.Color.FromHex("#E57373").WithAlpha(60); + meanLine.LinePattern = ScottPlot.LinePattern.Dashed; + meanLine.LineWidth = 1; + } + } + BlockingChart.Plot.Axes.DateTimeTicksBottom(); BlockingChart.Plot.Axes.Bottom.TickLabelStyle.IsVisible = false; TabHelpers.ReapplyAxisColors(BlockingChart); @@ -215,13 +338,12 @@ private void UpdateBlockingLane(List<(double Time, double Value)> blockingData, BlockingChart.Plot.Legend.IsVisible = false; BlockingChart.Plot.Axes.Margins(bottom: 0); BlockingChart.Plot.Axes.SetLimitsY(0, Math.Max(maxCount * 1.3, 2)); - - BlockingChart.Refresh(); } private void UpdateLane(ScottPlot.WPF.WpfPlot chart, string title, List<(double Time, double Value)> data, string colorHex, - double? yMin = null, double? yMax = null) + double? yMin = null, double? yMax = null, BaselineBucket? baseline = null, + double minAnomalyValue = 0) { ClearChart(chart); TabHelpers.ApplyThemeToChart(chart); @@ -235,6 +357,43 @@ private void UpdateLane(ScottPlot.WPF.WpfPlot chart, string title, var times = data.Select(d => d.Time).ToArray(); var values = data.Select(d => d.Value).ToArray(); + // Render baseline band FIRST (behind the data line) + if (baseline != null && baseline.SampleCount > 0 && baseline.EffectiveStdDev > 0) + { + var upper = baseline.Mean + 2 * baseline.EffectiveStdDev; + var lower = Math.Max(0, baseline.Mean - 2 * baseline.EffectiveStdDev); + + _crosshairManager?.SetLaneBaseline(chart, lower, upper, minAnomalyValue); + + var band = chart.Plot.Add.HorizontalSpan(lower, upper); + band.FillStyle.Color = ScottPlot.Color.FromHex(colorHex).WithAlpha(25); + band.LineStyle.Width = 0; + + var meanLine = chart.Plot.Add.HorizontalLine(baseline.Mean); + meanLine.Color = ScottPlot.Color.FromHex(colorHex).WithAlpha(60); + meanLine.LinePattern = ScottPlot.LinePattern.Dashed; + meanLine.LineWidth = 1; + + // Highlight anomalous points (outside ± 2σ band AND above absolute minimum) + var anomalyIndices = new List(); + for (int i = 0; i < values.Length; i++) + { + if ((values[i] > upper && values[i] >= minAnomalyValue) || values[i] < lower) + anomalyIndices.Add(i); + } + + if (anomalyIndices.Count > 0) + { + var anomalyTimes = anomalyIndices.Select(i => times[i]).ToArray(); + var anomalyValues = anomalyIndices.Select(i => values[i]).ToArray(); + var anomalyScatter = chart.Plot.Add.Scatter(anomalyTimes, anomalyValues); + anomalyScatter.Color = ScottPlot.Color.FromHex("#FF5252"); + anomalyScatter.MarkerSize = 6; + anomalyScatter.MarkerShape = ScottPlot.MarkerShape.FilledCircle; + anomalyScatter.LineWidth = 0; + } + } + var scatter = chart.Plot.Add.Scatter(times, values); scatter.Color = ScottPlot.Color.FromHex(colorHex); scatter.MarkerSize = 0; @@ -245,13 +404,11 @@ private void UpdateLane(ScottPlot.WPF.WpfPlot chart, string title, _crosshairManager?.SetLaneData(chart, times, values); chart.Plot.Axes.DateTimeTicksBottom(); - // Hide bottom tick labels on all lanes except the last (File I/O) if (chart != FileIoChart) chart.Plot.Axes.Bottom.TickLabelStyle.IsVisible = false; TabHelpers.ReapplyAxisColors(chart); - // Compact layout: hide Y label, minimize title, no legend chart.Plot.Title(""); chart.Plot.YLabel(""); chart.Plot.Legend.IsVisible = false; @@ -266,13 +423,8 @@ private void UpdateLane(ScottPlot.WPF.WpfPlot chart, string title, var padding = Math.Max((maxVal - minVal) * 0.1, 1); chart.Plot.Axes.SetLimitsY(Math.Max(0, minVal - padding), maxVal + padding); } - - chart.Refresh(); } - /// - /// Sets identical X-axis limits across all lanes. - /// private void SyncXAxes(int hoursBack, DateTime? fromDate, DateTime? toDate) { DateTime xStart, xEnd; @@ -298,9 +450,34 @@ private void SyncXAxes(int hoursBack, DateTime? fromDate, DateTime? toDate) } } + private static void AddGhostLine(ScottPlot.WPF.WpfPlot chart, + List<(double Time, double Value)> data, string colorHex) + { + if (data.Count == 0) return; + + var times = data.Select(d => d.Time).ToArray(); + var values = data.Select(d => d.Value).ToArray(); + + var scatter = chart.Plot.Add.Scatter(times, values); + scatter.Color = ScottPlot.Colors.White.WithAlpha(140); + scatter.MarkerSize = 0; + scatter.LineWidth = 1.5f; + scatter.LinePattern = ScottPlot.LinePattern.Dashed; + } + + private static string ComparisonLabel((DateTime From, DateTime To) range, + DateTime? fromDate, int hoursBack) + { + var currentStart = fromDate ?? DateTime.UtcNow.AddHours(-hoursBack); + var daysBack = (currentStart - range.From).TotalDays; + + if (Math.Abs(daysBack - 1) < 0.5) return "yesterday"; + if (Math.Abs(daysBack - 7) < 0.5) return "last week"; + return $"{daysBack:N0}d ago"; + } + private static void ClearChart(ScottPlot.WPF.WpfPlot chart) { - chart.Reset(); chart.Plot.Clear(); } @@ -317,7 +494,6 @@ private static void ShowEmpty(ScottPlot.WPF.WpfPlot chart, string title) chart.Plot.Axes.Bottom.TickGenerator = new ScottPlot.TickGenerators.EmptyTickGenerator(); chart.Plot.Axes.Left.TickGenerator = new ScottPlot.TickGenerators.EmptyTickGenerator(); chart.Plot.Legend.IsVisible = false; - chart.Refresh(); } /// diff --git a/Dashboard/Controls/ResourceMetricsContent.xaml b/Dashboard/Controls/ResourceMetricsContent.xaml index 4067d08f..36850412 100644 --- a/Dashboard/Controls/ResourceMetricsContent.xaml +++ b/Dashboard/Controls/ResourceMetricsContent.xaml @@ -28,7 +28,21 @@ - + + + + + + + + + + + + diff --git a/Dashboard/Controls/ResourceMetricsContent.xaml.cs b/Dashboard/Controls/ResourceMetricsContent.xaml.cs index 4884c6cd..7faa537f 100644 --- a/Dashboard/Controls/ResourceMetricsContent.xaml.cs +++ b/Dashboard/Controls/ResourceMetricsContent.xaml.cs @@ -214,10 +214,11 @@ private void SetupChartContextMenus() /// /// Initializes the control with required dependencies. /// - public void Initialize(DatabaseService databaseService) + public void Initialize(DatabaseService databaseService, + Analysis.SqlServerBaselineProvider? baselineProvider = null) { _databaseService = databaseService ?? throw new ArgumentNullException(nameof(databaseService)); - CorrelatedLanes.Initialize(databaseService); + CorrelatedLanes.Initialize(databaseService, baselineProvider); } /// @@ -1024,12 +1025,41 @@ private async Task LoadFileIoThroughputChartsAsync() #region Server Trends Tab + private async void CompareToCombo_SelectionChanged(object sender, SelectionChangedEventArgs e) + { + if (!IsLoaded) return; + ComparisonRange = GetComparisonRange(); + await RefreshServerTrendsAsync(); + } + + private (DateTime From, DateTime To)? ComparisonRange { get; set; } + + /// + /// Computes the reference time range for the comparison overlay. + /// Returns null if "None" is selected. + /// + private (DateTime From, DateTime To)? GetComparisonRange() + { + if (CompareToCombo == null || CompareToCombo.SelectedIndex <= 0) return null; + + var currentEnd = _serverTrendsToDate ?? DateTime.UtcNow; + var currentStart = _serverTrendsFromDate ?? currentEnd.AddHours(-_serverTrendsHoursBack); + + return CompareToCombo.SelectedIndex switch + { + 1 => (currentStart.AddDays(-1), currentEnd.AddDays(-1)), // Yesterday + 2 => (currentStart.AddDays(-7), currentEnd.AddDays(-7)), // Last week + 3 => (currentStart.AddDays(-7), currentEnd.AddDays(-7)), // Same day last week + _ => null + }; + } + private async Task RefreshServerTrendsAsync() { if (_databaseService == null) return; try { - await CorrelatedLanes.RefreshAsync(_serverTrendsHoursBack, _serverTrendsFromDate, _serverTrendsToDate); + await CorrelatedLanes.RefreshAsync(_serverTrendsHoursBack, _serverTrendsFromDate, _serverTrendsToDate, ComparisonRange); } catch (Exception ex) { diff --git a/Dashboard/Controls/TimeRangeSlicerControl.xaml.cs b/Dashboard/Controls/TimeRangeSlicerControl.xaml.cs index 9d924862..c67c0b25 100644 --- a/Dashboard/Controls/TimeRangeSlicerControl.xaml.cs +++ b/Dashboard/Controls/TimeRangeSlicerControl.xaml.cs @@ -213,7 +213,7 @@ public void Redraw() SlicerCanvas.Children.Add(new Path { Data = lineGeo, Stroke = lineBrush, StrokeThickness = 1.5 }); // X-axis labels — evenly spaced by TIME across the full range, skip if too close - var labelBrush = FindBrush("SlicerLabelBrush", "#99E4E6EB"); + var labelBrush = FindBrush("SlicerLabelBrush", "#E4E6EB"); const double minLabelSpacingPx = 90; double lastLabelX = -minLabelSpacingPx; int targetLabels = Math.Max(2, (int)(w / minLabelSpacingPx)); diff --git a/Dashboard/Helpers/CorrelatedCrosshairManager.cs b/Dashboard/Helpers/CorrelatedCrosshairManager.cs index 7da54004..c49b0a7b 100644 --- a/Dashboard/Helpers/CorrelatedCrosshairManager.cs +++ b/Dashboard/Helpers/CorrelatedCrosshairManager.cs @@ -1,7 +1,7 @@ /* * Copyright (c) 2026 Erik Darling, Darling Data LLC * - * This file is part of the SQL Server Performance Monitor Lite. + * This file is part of the SQL Server Performance Monitor Dashboard. * * Licensed under the MIT License. See LICENSE file in the project root for full license information. * @@ -15,6 +15,7 @@ using System.Windows; using System.Windows.Controls; using System.Windows.Controls.Primitives; +using System.Windows.Documents; using System.Windows.Input; using System.Windows.Media; using PerformanceMonitorDashboard.Services; @@ -62,14 +63,13 @@ public CorrelatedCrosshairManager() /// /// Registers a chart lane for crosshair synchronization. /// - public void AddLane(ScottPlot.WPF.WpfPlot chart, string label, string unit, TextBlock valueLabel) + public void AddLane(ScottPlot.WPF.WpfPlot chart, string label, string unit) { var lane = new LaneInfo { Chart = chart, Label = label, - Unit = unit, - ValueLabel = valueLabel + Unit = unit }; chart.MouseMove += (s, e) => OnMouseMove(lane, e); @@ -78,6 +78,21 @@ public void AddLane(ScottPlot.WPF.WpfPlot chart, string label, string unit, Text _lanes.Add(lane); } + /// + /// Sets the expected baseline range for a lane (upper/lower bounds). + /// Values outside this range get ▲/▼ indicators in the tooltip. + /// + public void SetLaneBaseline(ScottPlot.WPF.WpfPlot chart, double lower, double upper, + double minAnomalyValue = 0, bool isEventBased = false) + { + var lane = _lanes.Find(l => l.Chart == chart); + if (lane == null) return; + lane.BaselineLower = lower; + lane.BaselineUpper = upper; + lane.MinAnomalyValue = minAnomalyValue; + lane.IsEventBased = isEventBased; + } + /// /// Sets a single data series for a lane (most lanes have one series). /// @@ -117,6 +132,16 @@ public void AddLaneSeries(ScottPlot.WPF.WpfPlot chart, string name, string unit, }); } + /// + /// Sets the label shown in the tooltip for comparison data (e.g., "yesterday"). + /// + public void SetComparisonLabel(string label) + { + _comparisonLabel = label; + } + + private string? _comparisonLabel; + /// /// Clears data and VLines. Call before re-populating charts. /// @@ -124,10 +149,14 @@ public void PrepareForRefresh() { _isRefreshing = true; _tooltip.IsOpen = false; + _comparisonLabel = null; foreach (var lane in _lanes) { lane.Series.Clear(); lane.VLine = null; + lane.BaselineUpper = null; + lane.BaselineLower = null; + lane.MinAnomalyValue = 0; } } @@ -165,10 +194,14 @@ private void OnMouseMove(LaneInfo sourceLane, MouseEventArgs e) var mouseCoords = sourceLane.Chart.Plot.GetCoordinates(pixel); double xValue = mouseCoords.X; - var tooltipLines = new List(); + _tooltipText.Inlines.Clear(); var time = DateTime.FromOADate(xValue); var displayTime = ServerTimeHelper.ConvertForDisplay(time, ServerTimeHelper.CurrentDisplayMode); - tooltipLines.Add(displayTime.ToString("yyyy-MM-dd HH:mm:ss")); + _tooltipText.Inlines.Add(new Run(displayTime.ToString("yyyy-MM-dd HH:mm:ss"))); + if (_comparisonLabel != null) + _tooltipText.Inlines.Add(new Run($" (dashed = {_comparisonLabel})") { Foreground = DimBrush }); + + var defaultBrush = new SolidColorBrush(Color.FromRgb(0xE0, 0xE0, 0xE0)); foreach (var lane in _lanes) { @@ -179,51 +212,49 @@ private void OnMouseMove(LaneInfo sourceLane, MouseEventArgs e) if (lane.Series.Count == 1) { - // Single series — use lane label and unit var series = lane.Series[0]; double? value = FindNearestValue(series, xValue); if (value.HasValue) { - lane.ValueLabel.Text = $"{value.Value:N1} {lane.Unit}"; - tooltipLines.Add($"{lane.Label}: {value.Value:N1} {lane.Unit}"); + var indicator = GetBaselineIndicator(lane, value.Value); + + // Tooltip: value + arrow + "30d avg" context + _tooltipText.Inlines.Add(new Run($"\n{lane.Label}: {value.Value:N1} {lane.Unit}") { Foreground = defaultBrush }); + if (indicator != null) + { + _tooltipText.Inlines.Add(new Run($" {indicator.Value.Symbol}") { Foreground = indicator.Value.Brush }); + } } else { - lane.ValueLabel.Text = ""; - tooltipLines.Add($"{lane.Label}: —"); + _tooltipText.Inlines.Add(new Run($"\n{lane.Label}: —") { Foreground = defaultBrush }); } } else if (lane.Series.Count > 1) { - // Multiple series — show each with its own name - var valueParts = new List(); foreach (var series in lane.Series) { double? value = FindNearestValue(series, xValue); string unit = series.Unit ?? lane.Unit; if (value.HasValue) { - valueParts.Add($"{value.Value:N0}"); - tooltipLines.Add($"{series.Name}: {value.Value:N0} {unit}"); + _tooltipText.Inlines.Add(new Run($"\n{series.Name}: {value.Value:N0} {unit}") { Foreground = defaultBrush }); + var indicator = GetBaselineIndicator(lane, value.Value); + if (indicator != null) + _tooltipText.Inlines.Add(new Run($" {indicator.Value.Symbol}") { Foreground = indicator.Value.Brush }); } else - { - tooltipLines.Add($"{series.Name}: —"); - } + _tooltipText.Inlines.Add(new Run($"\n{series.Name}: —") { Foreground = defaultBrush }); } - lane.ValueLabel.Text = valueParts.Count > 0 ? string.Join("/", valueParts) : ""; } else { - lane.ValueLabel.Text = ""; - tooltipLines.Add($"{lane.Label}: —"); + _tooltipText.Inlines.Add(new Run($"\n{lane.Label}: —") { Foreground = defaultBrush }); } lane.Chart.Refresh(); } - - _tooltipText.Text = string.Join("\n", tooltipLines); _tooltip.PlacementTarget = sourceLane.Chart; _tooltip.HorizontalOffset = pos.X + 15; _tooltip.VerticalOffset = pos.Y + 15; @@ -265,6 +296,38 @@ private void OnMouseMove(LaneInfo sourceLane, MouseEventArgs e) return val; } + private static readonly SolidColorBrush RedBrush = new(Color.FromRgb(0xFF, 0x52, 0x52)); + private static readonly SolidColorBrush GreenBrush = new(Color.FromRgb(0x69, 0xF0, 0x69)); + private static readonly SolidColorBrush DimBrush = new(Color.FromRgb(0x90, 0x96, 0xA0)); + + private record struct BaselineIndicator(string Symbol, SolidColorBrush Brush); + + private static string? FormatBaselineContext(LaneInfo lane) + { + if (lane.BaselineUpper == null || lane.BaselineLower == null) return null; + var mean = (lane.BaselineUpper.Value + lane.BaselineLower.Value) / 2.0; + var formatted = mean >= 1000 ? $"{mean:N0}" : mean >= 10 ? $"{mean:N1}" : $"{mean:N2}"; + return $"30d avg: ~{formatted}"; + } + + private static BaselineIndicator? GetBaselineIndicator(LaneInfo lane, double value) + { + if (lane.BaselineUpper == null || lane.BaselineLower == null) return null; + // For event-based metrics (blocking/deadlocks): value significantly above + // the baseline mean is a spike, even if within the wide ± 2σ band. + // Uses 3x mean as threshold — if you normally see ~5 events and now see 20, that's a spike. + var mean = (lane.BaselineUpper.Value + lane.BaselineLower.Value) / 2.0; + if (lane.IsEventBased && value >= 1.0 && (mean < 1.0 || value > mean * 3)) + return new BaselineIndicator("▲", RedBrush); + // ▲ requires both: outside band AND above absolute minimum (prevents 1% CPU false alarms) + if (value > lane.BaselineUpper.Value && value >= lane.MinAnomalyValue) + return new BaselineIndicator("▲", RedBrush); + // ▼ always shown when below band (drops are always interesting — tuning feedback) + if (value < lane.BaselineLower.Value) + return new BaselineIndicator("▼", GreenBrush); + return null; + } + private void OnMouseLeave() { _tooltip.IsOpen = false; @@ -272,7 +335,6 @@ private void OnMouseLeave() { if (lane.VLine != null) lane.VLine.IsVisible = false; - lane.ValueLabel.Text = ""; lane.Chart.Refresh(); } } @@ -303,7 +365,10 @@ private class LaneInfo public string Label { get; set; } = ""; public string Unit { get; set; } = ""; public ScottPlot.Plottables.VerticalLine? VLine { get; set; } - public TextBlock ValueLabel { get; set; } = null!; public List Series { get; set; } = new(); + public double? BaselineUpper { get; set; } + public double? BaselineLower { get; set; } + public double MinAnomalyValue { get; set; } + public bool IsEventBased { get; set; } } } diff --git a/Dashboard/Helpers/TabHelpers.cs b/Dashboard/Helpers/TabHelpers.cs index 6a5a7361..70f6533d 100644 --- a/Dashboard/Helpers/TabHelpers.cs +++ b/Dashboard/Helpers/TabHelpers.cs @@ -157,7 +157,7 @@ public static void ApplyThemeToChart(WpfPlot chart) { figureBackground = ScottPlot.Color.FromHex("#EEF4FA"); dataBackground = ScottPlot.Color.FromHex("#DAE6F0"); - textColor = ScottPlot.Color.FromHex("#364D61"); + textColor = ScottPlot.Color.FromHex("#1A2A3A"); gridColor = ScottPlot.Color.FromHex("#A8BDD0").WithAlpha(120); legendBg = ScottPlot.Color.FromHex("#EEF4FA"); legendFg = ScottPlot.Color.FromHex("#1A2A3A"); @@ -167,7 +167,7 @@ public static void ApplyThemeToChart(WpfPlot chart) { figureBackground = ScottPlot.Color.FromHex("#FFFFFF"); dataBackground = ScottPlot.Color.FromHex("#F5F7FA"); - textColor = ScottPlot.Color.FromHex("#4A5568"); + textColor = ScottPlot.Color.FromHex("#1A1D23"); gridColor = ScottPlot.Colors.Black.WithAlpha(20); legendBg = ScottPlot.Color.FromHex("#FFFFFF"); legendFg = ScottPlot.Color.FromHex("#1A1D23"); @@ -177,7 +177,7 @@ public static void ApplyThemeToChart(WpfPlot chart) { figureBackground = ScottPlot.Color.FromHex("#22252b"); dataBackground = ScottPlot.Color.FromHex("#111217"); - textColor = ScottPlot.Color.FromHex("#9DA5B4"); + textColor = ScottPlot.Color.FromHex("#E4E6EB"); gridColor = ScottPlot.Colors.White.WithAlpha(40); legendBg = ScottPlot.Color.FromHex("#22252b"); legendFg = ScottPlot.Color.FromHex("#E4E6EB"); @@ -226,10 +226,10 @@ private static void HandleChartFirstLoaded(object sender, RoutedEventArgs e) public static void ReapplyAxisColors(WpfPlot chart) { var textColor = ThemeManager.CurrentTheme == "CoolBreeze" - ? ScottPlot.Color.FromHex("#364D61") + ? ScottPlot.Color.FromHex("#1A2A3A") : ThemeManager.HasLightBackground - ? ScottPlot.Color.FromHex("#4A5568") - : ScottPlot.Color.FromHex("#9DA5B4"); + ? ScottPlot.Color.FromHex("#1A1D23") + : ScottPlot.Color.FromHex("#E4E6EB"); chart.Plot.Axes.Bottom.TickLabelStyle.ForeColor = textColor; chart.Plot.Axes.Left.TickLabelStyle.ForeColor = textColor; chart.Plot.Axes.Bottom.Label.ForeColor = textColor; diff --git a/Dashboard/ServerTab.xaml b/Dashboard/ServerTab.xaml index 9ff26d3c..e1d831b0 100644 --- a/Dashboard/ServerTab.xaml +++ b/Dashboard/ServerTab.xaml @@ -429,7 +429,7 @@ diff --git a/Dashboard/ServerTab.xaml.cs b/Dashboard/ServerTab.xaml.cs index b3e2872b..c207bad9 100644 --- a/Dashboard/ServerTab.xaml.cs +++ b/Dashboard/ServerTab.xaml.cs @@ -158,7 +158,8 @@ public ServerTab(ServerConnection serverConnection, int utcOffsetMinutes = 0) SetDrillDownGlobalRange(from, to); }; SystemEventsContent.Initialize(_databaseService); - ResourceMetricsContent.Initialize(_databaseService); + var baselineProvider = new Analysis.SqlServerBaselineProvider(_databaseService.ConnectionString); + ResourceMetricsContent.Initialize(_databaseService, baselineProvider); ResourceMetricsContent.ChartDrillDownRequested += OnChildChartDrillDown; // Set default time range on UserControls based on user preferences diff --git a/Dashboard/Services/EmailTemplateBuilder.cs b/Dashboard/Services/EmailTemplateBuilder.cs index c2b9efbb..c76ceff1 100644 --- a/Dashboard/Services/EmailTemplateBuilder.cs +++ b/Dashboard/Services/EmailTemplateBuilder.cs @@ -116,7 +116,7 @@ private static string BuildHtmlBody( sb.Append($""); sb.Append("
"); sb.Append($"SQL Server Performance Monitor
"); - sb.Append($"{WebUtility.HtmlEncode(EditionName)}"); + sb.Append($"{WebUtility.HtmlEncode(EditionName)}"); sb.Append("
"); sb.Append(""); @@ -167,7 +167,7 @@ private static string BuildHtmlBody( /* Footer */ sb.Append("
 
"); sb.Append(""); - sb.Append($""); + sb.Append($""); sb.Append($"Sent by {WebUtility.HtmlEncode(EditionName)}"); if (!isTest) { @@ -200,7 +200,7 @@ private static void AppendDetailSection(StringBuilder sb, AlertContext context) /* Separator + heading */ sb.Append("
 
"); sb.Append(""); - sb.Append($"RECENT EVENTS"); + sb.Append($"RECENT EVENTS"); sb.Append(""); foreach (var item in context.Details) diff --git a/Dashboard/Themes/CoolBreezeTheme.xaml b/Dashboard/Themes/CoolBreezeTheme.xaml index ab50dea2..4eb70025 100644 --- a/Dashboard/Themes/CoolBreezeTheme.xaml +++ b/Dashboard/Themes/CoolBreezeTheme.xaml @@ -22,7 +22,7 @@ #1A2A3A - #364D61 + #1A2A3A #5B7A90 diff --git a/Dashboard/Themes/DarkTheme.xaml b/Dashboard/Themes/DarkTheme.xaml index 44519117..73cc58cb 100644 --- a/Dashboard/Themes/DarkTheme.xaml +++ b/Dashboard/Themes/DarkTheme.xaml @@ -22,7 +22,7 @@ #E4E6EB - #9DA5B4 + #E4E6EB #6B7280 @@ -1265,7 +1265,7 @@ - + diff --git a/Dashboard/Themes/LightTheme.xaml b/Dashboard/Themes/LightTheme.xaml index 882619fd..b2fff248 100644 --- a/Dashboard/Themes/LightTheme.xaml +++ b/Dashboard/Themes/LightTheme.xaml @@ -22,7 +22,7 @@ #1A1D23 - #4A5568 + #1A1D23 #718096 diff --git a/InstallerGui/App.xaml b/InstallerGui/App.xaml index 7c9deaae..f28634d7 100644 --- a/InstallerGui/App.xaml +++ b/InstallerGui/App.xaml @@ -14,7 +14,7 @@ - + diff --git a/InstallerGui/Themes/DarkTheme.xaml b/InstallerGui/Themes/DarkTheme.xaml index 2b9092e5..7e8d0268 100644 --- a/InstallerGui/Themes/DarkTheme.xaml +++ b/InstallerGui/Themes/DarkTheme.xaml @@ -17,7 +17,7 @@ - + diff --git a/Lite.Tests/AnomalyDetectorTests.cs b/Lite.Tests/AnomalyDetectorTests.cs new file mode 100644 index 00000000..1efdab95 --- /dev/null +++ b/Lite.Tests/AnomalyDetectorTests.cs @@ -0,0 +1,493 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading.Tasks; +using DuckDB.NET.Data; +using PerformanceMonitorLite.Analysis; +using PerformanceMonitorLite.Database; +using Xunit; + +namespace PerformanceMonitorLite.Tests; + +/// +/// Tests for the upgraded AnomalyDetector: time-bucketed baselines, new detection +/// methods (batch requests, sessions, query duration, memory), per-metric thresholds, +/// and baseline context metadata. +/// +public class AnomalyDetectorTests : IDisposable +{ + private readonly string _tempDir; + private readonly DuckDbInitializer _duckDb; + private readonly BaselineProvider _baselineProvider; + private readonly AnomalyDetector _detector; + + private const int ServerId = -999; + private const string ServerName = "TestServer"; + + // Fixed timestamps for deterministic testing + private static readonly DateTime _now = DateTime.UtcNow; + private static readonly DateTime _analysisEnd = _now; + private static readonly DateTime _analysisStart = _now.AddHours(-4); + + private long _nextId = -1; + + public AnomalyDetectorTests() + { + _tempDir = Path.Combine(Path.GetTempPath(), "AnomalyTests_" + Guid.NewGuid().ToString("N")[..8]); + Directory.CreateDirectory(_tempDir); + var dbPath = Path.Combine(_tempDir, "test.duckdb"); + _duckDb = new DuckDbInitializer(dbPath); + _baselineProvider = new BaselineProvider(_duckDb); + _detector = new AnomalyDetector(_duckDb, _baselineProvider); + BaselineProvider.CacheTtl = TimeSpan.FromMilliseconds(1); + } + + public void Dispose() + { + try + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + catch { } + } + + private AnalysisContext CreateContext() => new() + { + ServerId = ServerId, + ServerName = ServerName, + TimeRangeStart = _analysisStart, + TimeRangeEnd = _analysisEnd + }; + + // ── Batch Requests ── + + [Fact] + public async Task DetectBatchRequestAnomalies_Spike_DetectsAnomaly() + { + await _duckDb.InitializeAsync(); + + // Baseline: normal batch requests (~5000) + await SeedBaselinePerfmon("Batch Requests/sec", 5000, variance: 200); + + // Analysis window: spike to 15000 + for (int i = 0; i < 16; i++) + await SeedPerfmonAsync(_analysisStart.AddMinutes(i * 15), "Batch Requests/sec", 15000); + + // Need wait/cpu data for HasBaselineDataAsync + await SeedBaselineCpu(10, variance: 2); + + var anomalies = await _detector.DetectAnomaliesAsync(CreateContext()); + + Assert.Contains(anomalies, f => f.Key == "ANOMALY_BATCH_REQUESTS"); + var fact = anomalies.First(f => f.Key == "ANOMALY_BATCH_REQUESTS"); + Assert.True(fact.Metadata["deviation_sigma"] >= 2.0); + Assert.True(fact.Metadata.ContainsKey("baseline_hour")); + Assert.True(fact.Metadata.ContainsKey("baseline_dow")); + Assert.True(fact.Metadata.ContainsKey("baseline_tier")); + } + + [Fact] + public async Task DetectBatchRequestAnomalies_Normal_NoAnomaly() + { + await _duckDb.InitializeAsync(); + + await SeedBaselinePerfmon("Batch Requests/sec", 5000, variance: 200); + + // Analysis window: same as baseline + for (int i = 0; i < 16; i++) + await SeedPerfmonAsync(_analysisStart.AddMinutes(i * 15), "Batch Requests/sec", 5000); + + await SeedBaselineCpu(10, variance: 2); + + var anomalies = await _detector.DetectAnomaliesAsync(CreateContext()); + + Assert.DoesNotContain(anomalies, f => f.Key == "ANOMALY_BATCH_REQUESTS"); + } + + // ── Session Count ── + + [Fact] + public async Task DetectSessionAnomalies_Spike_DetectsAnomaly() + { + await _duckDb.InitializeAsync(); + + // Baseline: ~20 connections + await SeedBaselineSessions(20, variance: 2); + + // Analysis window: spike to 200 connections + for (int i = 0; i < 16; i++) + { + var t = _analysisStart.AddMinutes(i * 15); + await SeedSessionStatAsync(t, "App1", 150); + await SeedSessionStatAsync(t, "App2", 50); + } + + await SeedBaselineCpu(10, variance: 2); + // CPU data in analysis window (needed for HasBaselineDataAsync and CPU detector to not exit early) + for (int i = 0; i < 4; i++) + await SeedCpuAsync(_analysisStart.AddMinutes(i * 15), 10); + + var anomalies = await _detector.DetectAnomaliesAsync(CreateContext()); + + Assert.Contains(anomalies, f => f.Key == "ANOMALY_SESSION_SPIKE"); + } + + [Fact] + public async Task DetectSessionAnomalies_Normal_NoAnomaly() + { + await _duckDb.InitializeAsync(); + + await SeedBaselineSessions(20, variance: 2); + + // Analysis window: same as baseline + for (int i = 0; i < 16; i++) + { + var t = _analysisStart.AddMinutes(i * 15); + await SeedSessionStatAsync(t, "App1", 15); + await SeedSessionStatAsync(t, "App2", 5); + } + + await SeedBaselineCpu(10, variance: 2); + + var anomalies = await _detector.DetectAnomaliesAsync(CreateContext()); + + Assert.DoesNotContain(anomalies, f => f.Key == "ANOMALY_SESSION_SPIKE"); + } + + // ── Query Duration ── + + [Fact] + public async Task DetectQueryDurationAnomalies_Spike_DetectsAnomaly() + { + await _duckDb.InitializeAsync(); + + // Baseline: ~10000 microseconds total elapsed per collection + await SeedBaselineQueryStats(10_000, variance: 1000); + + // Analysis window: spike to 500000 microseconds + for (int i = 0; i < 16; i++) + await SeedQueryStatAsync(_analysisStart.AddMinutes(i * 15), 500_000, 100); + + await SeedBaselineCpu(10, variance: 2); + await SeedBaselineWaits(); + + var anomalies = await _detector.DetectAnomaliesAsync(CreateContext()); + + Assert.Contains(anomalies, f => f.Key == "ANOMALY_QUERY_DURATION"); + } + + // ── Memory Pressure ── + + [Fact] + public async Task DetectMemoryAnomalies_HighPressure_DetectsAnomaly() + { + await _duckDb.InitializeAsync(); + + // Baseline: ~70% memory pressure + await SeedBaselineMemory(70_000, 100_000); + + // Analysis window: spike to 99% + for (int i = 0; i < 16; i++) + await SeedMemoryStatAsync(_analysisStart.AddMinutes(i * 15), 99_000, 100_000); + + await SeedBaselineCpu(10, variance: 2); + + var anomalies = await _detector.DetectAnomaliesAsync(CreateContext()); + + Assert.Contains(anomalies, f => f.Key == "ANOMALY_MEMORY_PRESSURE"); + } + + [Fact] + public async Task DetectMemoryAnomalies_Normal_NoAnomaly() + { + await _duckDb.InitializeAsync(); + + await SeedBaselineMemory(70_000, 100_000); + + // Analysis window: same as baseline + for (int i = 0; i < 16; i++) + await SeedMemoryStatAsync(_analysisStart.AddMinutes(i * 15), 70_000, 100_000); + + await SeedBaselineCpu(10, variance: 2); + + var anomalies = await _detector.DetectAnomaliesAsync(CreateContext()); + + Assert.DoesNotContain(anomalies, f => f.Key == "ANOMALY_MEMORY_PRESSURE"); + } + + // ── Per-metric threshold ── + + [Fact] + public async Task SetDeviationThreshold_HigherThreshold_SuppressesAnomaly() + { + await _duckDb.InitializeAsync(); + + // Baseline: CPU ~10% + await SeedBaselineCpu(10, variance: 2); + + // Analysis window: CPU spike to 60% (would normally be >2σ) + for (int i = 0; i < 16; i++) + await SeedCpuAsync(_analysisStart.AddMinutes(i * 15), 60); + + // Default threshold (2σ) should detect it + var anomalies1 = await _detector.DetectAnomaliesAsync(CreateContext()); + var hasCpu1 = anomalies1.Any(f => f.Key == "ANOMALY_CPU_SPIKE"); + + // Set very high threshold — should suppress it + _detector.SetDeviationThreshold(MetricNames.Cpu, 100.0); + _baselineProvider.ClearCache(); + var anomalies2 = await _detector.DetectAnomaliesAsync(CreateContext()); + var hasCpu2 = anomalies2.Any(f => f.Key == "ANOMALY_CPU_SPIKE"); + + // Reset + _detector.SetDeviationThreshold(MetricNames.Cpu, 2.0); + + Assert.False(hasCpu2, "High threshold should suppress CPU anomaly"); + } + + // ── Baseline context metadata ── + + [Fact] + public async Task AnomalyFacts_ContainBaselineContextMetadata() + { + await _duckDb.InitializeAsync(); + + await SeedBaselineCpu(10, variance: 2); + + // Spike to trigger anomaly + for (int i = 0; i < 16; i++) + await SeedCpuAsync(_analysisStart.AddMinutes(i * 15), 90); + + var anomalies = await _detector.DetectAnomaliesAsync(CreateContext()); + var cpuAnomaly = anomalies.FirstOrDefault(f => f.Key == "ANOMALY_CPU_SPIKE"); + + if (cpuAnomaly != null) + { + Assert.True(cpuAnomaly.Metadata.ContainsKey("baseline_hour"), "Missing baseline_hour"); + Assert.True(cpuAnomaly.Metadata.ContainsKey("baseline_dow"), "Missing baseline_dow"); + Assert.True(cpuAnomaly.Metadata.ContainsKey("baseline_tier"), "Missing baseline_tier"); + Assert.True(cpuAnomaly.Metadata.ContainsKey("baseline_mean"), "Missing baseline_mean"); + Assert.True(cpuAnomaly.Metadata.ContainsKey("deviation_sigma"), "Missing deviation_sigma"); + } + } + + // ── No baseline = no anomalies ── + + [Fact] + public async Task DetectAnomalies_NoBaselineData_ReturnsEmpty() + { + await _duckDb.InitializeAsync(); + + // Only analysis window data, no baseline + for (int i = 0; i < 16; i++) + await SeedCpuAsync(_analysisStart.AddMinutes(i * 15), 90); + + var anomalies = await _detector.DetectAnomaliesAsync(CreateContext()); + + // Should not fire — no baseline to compare against + Assert.Empty(anomalies); + } + + // ── Helpers: seed baseline data in the 30-day window before analysis ── + + /// + /// Seeds baseline data across 14 days, keeping all samples within the same hour + /// as the analysis start so they land in the same time bucket. Uses 3-minute + /// intervals to stay within one hour (14 days × 4 samples = 56 total, enough + /// for flat/hour-only collapse). + /// + private async Task SeedBaselineCpu(int avgCpu, int variance) + { + var rng = new Random(42); + for (int day = 1; day <= 14; day++) + { + var baseDay = _analysisStart.AddDays(-day); + for (int i = 0; i < 4; i++) + { + var cpu = Math.Clamp(avgCpu + rng.Next(-variance, variance + 1), 0, 100); + await SeedCpuAsync(baseDay.AddMinutes(i * 3), cpu); + } + } + } + + private async Task SeedBaselinePerfmon(string counterName, long avgValue, int variance) + { + var rng = new Random(42); + for (int day = 1; day <= 14; day++) + { + var baseDay = _analysisStart.AddDays(-day); + for (int i = 0; i < 4; i++) + { + var value = Math.Max(0, avgValue + rng.Next(-variance, variance + 1)); + await SeedPerfmonAsync(baseDay.AddMinutes(i * 3), counterName, value); + } + } + } + + private async Task SeedBaselineSessions(int avgConnections, int variance) + { + var rng = new Random(42); + for (int day = 1; day <= 14; day++) + { + var baseDay = _analysisStart.AddDays(-day); + for (int i = 0; i < 4; i++) + { + var count = Math.Max(1, avgConnections + rng.Next(-variance, variance + 1)); + await SeedSessionStatAsync(baseDay.AddMinutes(i * 3), "App1", count); + } + } + } + + private async Task SeedBaselineQueryStats(long avgElapsed, int variance) + { + var rng = new Random(42); + for (int day = 1; day <= 14; day++) + { + var baseDay = _analysisStart.AddDays(-day); + for (int i = 0; i < 4; i++) + { + var elapsed = Math.Max(0, avgElapsed + rng.Next(-variance, variance + 1)); + await SeedQueryStatAsync(baseDay.AddMinutes(i * 3), elapsed, 100); + } + } + } + + private async Task SeedBaselineWaits() + { + for (int day = 1; day <= 14; day++) + { + var baseDay = _analysisStart.AddDays(-day); + for (int i = 0; i < 4; i++) + await SeedWaitStatAsync(baseDay.AddMinutes(i * 3), "SOS_SCHEDULER_YIELD", 100); + } + } + + private async Task SeedBaselineMemory(double avgTotalServerMb, double targetMb) + { + for (int day = 1; day <= 14; day++) + { + var baseDay = _analysisStart.AddDays(-day); + for (int i = 0; i < 4; i++) + await SeedMemoryStatAsync(baseDay.AddMinutes(i * 3), avgTotalServerMb, targetMb); + } + } + + // ── Helpers: seed individual rows ── + + private async Task SeedCpuAsync(DateTime time, int cpuValue) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO cpu_utilization_stats + (collection_id, collection_time, server_id, server_name, sample_time, + sqlserver_cpu_utilization, other_process_cpu_utilization) + VALUES ($1, $2, $3, 'TestServer', $4, $5, 2)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = cpuValue }); + await cmd.ExecuteNonQueryAsync(); + } + + private async Task SeedPerfmonAsync(DateTime time, string counterName, long deltaValue) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO perfmon_stats + (collection_id, collection_time, server_id, server_name, + object_name, counter_name, instance_name, cntr_value, delta_cntr_value, sample_interval_seconds) + VALUES ($1, $2, $3, 'TestServer', 'SQLServer:SQL Statistics', $4, '', $5, $5, 10)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = counterName }); + cmd.Parameters.Add(new DuckDBParameter { Value = deltaValue }); + await cmd.ExecuteNonQueryAsync(); + } + + private async Task SeedWaitStatAsync(DateTime time, string waitType, long deltaWaitMs) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO wait_stats + (collection_id, collection_time, server_id, server_name, wait_type, + waiting_tasks_count, wait_time_ms, signal_wait_time_ms, + delta_waiting_tasks, delta_wait_time_ms, delta_signal_wait_time_ms) + VALUES ($1, $2, $3, 'TestServer', $4, 0, 0, 0, 0, $5, 0)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = waitType }); + cmd.Parameters.Add(new DuckDBParameter { Value = deltaWaitMs }); + await cmd.ExecuteNonQueryAsync(); + } + + private async Task SeedSessionStatAsync(DateTime time, string programName, long connectionCount) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO session_stats + (collection_id, collection_time, server_id, server_name, program_name, + connection_count, running_count, sleeping_count, dormant_count) + VALUES ($1, $2, $3, 'TestServer', $4, $5, 0, 0, 0)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = programName }); + cmd.Parameters.Add(new DuckDBParameter { Value = connectionCount }); + await cmd.ExecuteNonQueryAsync(); + } + + private async Task SeedQueryStatAsync(DateTime time, long deltaElapsed, long deltaExecCount) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO query_stats + (collection_id, collection_time, server_id, server_name, + execution_count, total_elapsed_time, total_worker_time, + total_logical_reads, total_logical_writes, total_physical_reads, + delta_execution_count, delta_elapsed_time, delta_worker_time, + delta_logical_reads, delta_logical_writes, delta_physical_reads, delta_rows, delta_spills) + VALUES ($1, $2, $3, 'TestServer', $4, $5, 0, 0, 0, 0, $4, $5, 0, 0, 0, 0, 0, 0)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = deltaExecCount }); + cmd.Parameters.Add(new DuckDBParameter { Value = deltaElapsed }); + await cmd.ExecuteNonQueryAsync(); + } + + private async Task SeedMemoryStatAsync(DateTime time, double totalServerMb, double targetMb) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO memory_stats + (collection_id, collection_time, server_id, server_name, + total_physical_memory_mb, available_physical_memory_mb, + target_server_memory_mb, total_server_memory_mb, buffer_pool_mb) + VALUES ($1, $2, $3, 'TestServer', $4, $5, $6, $7, $7)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = targetMb * 1.2 }); + cmd.Parameters.Add(new DuckDBParameter { Value = targetMb * 0.2 }); + cmd.Parameters.Add(new DuckDBParameter { Value = targetMb }); + cmd.Parameters.Add(new DuckDBParameter { Value = totalServerMb }); + await cmd.ExecuteNonQueryAsync(); + } +} diff --git a/Lite.Tests/BaselineProviderTests.cs b/Lite.Tests/BaselineProviderTests.cs new file mode 100644 index 00000000..cdf9cad8 --- /dev/null +++ b/Lite.Tests/BaselineProviderTests.cs @@ -0,0 +1,481 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Threading.Tasks; +using DuckDB.NET.Data; +using PerformanceMonitorLite.Analysis; +using PerformanceMonitorLite.Database; +using Xunit; + +namespace PerformanceMonitorLite.Tests; + +/// +/// Tests for BaselineProvider: time-bucketed baseline computation, bucket collapse +/// with hysteresis, restart poisoning exclusion, and division-by-zero handling. +/// +public class BaselineProviderTests : IDisposable +{ + private readonly string _tempDir; + private readonly DuckDbInitializer _duckDb; + private readonly BaselineProvider _provider; + + private const int ServerId = -999; + + // Analysis time is pinned to a known hour+dow for deterministic bucket matching. + // Wednesday 14:00 UTC (dow=3 in DuckDB where Sunday=0) + private static readonly DateTime AnalysisTime = new(2026, 4, 1, 14, 0, 0, DateTimeKind.Utc); + + private long _nextId = -1; + + public BaselineProviderTests() + { + _tempDir = Path.Combine(Path.GetTempPath(), "BaselineTests_" + Guid.NewGuid().ToString("N")[..8]); + Directory.CreateDirectory(_tempDir); + var dbPath = Path.Combine(_tempDir, "test.duckdb"); + _duckDb = new DuckDbInitializer(dbPath); + _provider = new BaselineProvider(_duckDb); + // Use very short TTL so cache doesn't interfere between tests + BaselineProvider.CacheTtl = TimeSpan.FromMilliseconds(1); + } + + public void Dispose() + { + try + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + catch { /* Best-effort cleanup */ } + } + + // ── Full bucket: enough samples in one hour+dow ── + + [Fact] + public async Task GetBaseline_FullBucket_ReturnsMeanAndStdDev() + { + await _duckDb.InitializeAsync(); + + // Seed 20 CPU samples on Wednesdays at 14:xx over 4 weeks (well above RestoreThreshold=15) + for (int week = 0; week < 4; week++) + { + var wednesday = AnalysisTime.AddDays(-7 * (week + 1)); // Previous Wednesdays + for (int i = 0; i < 5; i++) + { + await SeedCpuAsync(wednesday.AddMinutes(i * 10), 50 + i * 2); // 50,52,54,56,58 + } + } + + var baseline = await _provider.GetBaselineAsync(ServerId, MetricNames.Cpu, AnalysisTime); + + Assert.True(baseline.SampleCount >= 15); // Full bucket + Assert.Equal(BaselineTier.Full, baseline.Tier); + Assert.InRange(baseline.Mean, 50, 58); // Mean of 50,52,54,56,58 repeated + Assert.True(baseline.StdDev > 0); + } + + // ── Bucket collapse: hour-only fallback ── + + [Fact] + public async Task GetBaseline_SparseBucket_CollapsesToHourOnly() + { + await _duckDb.InitializeAsync(); + + // Seed only 5 samples on Wednesday 14:xx (below CollapseThreshold=10) + var wednesday = AnalysisTime.AddDays(-7); + for (int i = 0; i < 5; i++) + await SeedCpuAsync(wednesday.AddMinutes(i * 10), 40 + i); + + // Seed 15 samples on other days at 14:xx (enough for hour-only) + for (int dow = 0; dow < 3; dow++) // Sun, Mon, Tue + { + var day = AnalysisTime.AddDays(-7 - dow - 4); // Different days, same hour + for (int i = 0; i < 5; i++) + await SeedCpuAsync(day.AddMinutes(i * 10), 60 + i); + } + + _provider.ClearCache(); + var baseline = await _provider.GetBaselineAsync(ServerId, MetricNames.Cpu, AnalysisTime); + + Assert.True(baseline.SampleCount >= 10); + Assert.Equal(BaselineTier.HourOnly, baseline.Tier); + Assert.Equal(-1, baseline.DayOfWeek); // Indicates hour-only + } + + // ── Bucket collapse: flat fallback ── + + [Fact] + public async Task GetBaseline_VerySparseBucket_CollapsesToFlat() + { + await _duckDb.InitializeAsync(); + + // Seed only 2 samples at 14:xx (below threshold for hour-only) + var day = AnalysisTime.AddDays(-7); + await SeedCpuAsync(day.AddMinutes(0), 30); + await SeedCpuAsync(day.AddMinutes(15), 35); + + // Seed 5 samples at other hours (enough for flat but not hour-only) + for (int h = 0; h < 5; h++) + await SeedCpuAsync(day.AddHours(-h - 1), 50 + h); + + _provider.ClearCache(); + var baseline = await _provider.GetBaselineAsync(ServerId, MetricNames.Cpu, AnalysisTime); + + // Should fall through to flat (7 samples total, >= 3 minimum viable) + Assert.True(baseline.SampleCount >= 3); + Assert.Equal(BaselineTier.Flat, baseline.Tier); + } + + // ── Empty baseline ── + + [Fact] + public async Task GetBaseline_NoData_ReturnsEmpty() + { + await _duckDb.InitializeAsync(); + + var baseline = await _provider.GetBaselineAsync(ServerId, MetricNames.Cpu, AnalysisTime); + + Assert.Equal(0, baseline.SampleCount); + } + + // ── Hysteresis: between collapse and restore thresholds ── + + [Fact] + public async Task GetBaseline_BetweenThresholds_UsesFullBucket() + { + await _duckDb.InitializeAsync(); + + // Seed exactly 12 samples on Wednesday 14:xx (between 10 and 15) + for (int week = 0; week < 3; week++) + { + var wednesday = AnalysisTime.AddDays(-7 * (week + 1)); + for (int i = 0; i < 4; i++) + await SeedCpuAsync(wednesday.AddMinutes(i * 10), 45 + i); + } + + _provider.ClearCache(); + var baseline = await _provider.GetBaselineAsync(ServerId, MetricNames.Cpu, AnalysisTime); + + // 12 samples >= CollapseThreshold(10), so full bucket is used (hysteresis) + Assert.Equal(12, baseline.SampleCount); + Assert.Equal(BaselineTier.Full, baseline.Tier); + } + + // ── Division by zero: proportional floor ── + + [Fact] + public void EffectiveStdDev_ZeroStdDev_UsesProportionalFloor() + { + // All identical values → stddev = 0, mean = 50 + var bucket = new BaselineBucket + { + HourOfDay = 14, DayOfWeek = 3, + Mean = 50.0, StdDev = 0.0, SampleCount = 20, + Tier = BaselineTier.Full + }; + + // Should be max(0, 50 * 0.01) = 0.5 + Assert.Equal(0.5, bucket.EffectiveStdDev); + } + + [Fact] + public void EffectiveStdDev_ZeroMeanAndZeroStdDev_ReturnsZero() + { + // Zero activity → skip scoring + var bucket = new BaselineBucket + { + HourOfDay = 14, DayOfWeek = 3, + Mean = 0.0, StdDev = 0.0, SampleCount = 20, + Tier = BaselineTier.Full + }; + + Assert.Equal(0.0, bucket.EffectiveStdDev); + } + + [Fact] + public void EffectiveStdDev_NormalStdDev_ReturnsActual() + { + var bucket = new BaselineBucket + { + HourOfDay = 14, DayOfWeek = 3, + Mean = 50.0, StdDev = 5.0, SampleCount = 20, + Tier = BaselineTier.Full + }; + + // StdDev (5.0) > Mean * 0.01 (0.5), so return actual + Assert.Equal(5.0, bucket.EffectiveStdDev); + } + + // ── Restart poisoning: cumulative counter drop excluded ── + + [Fact] + public async Task GetBaseline_BatchRequests_ExcludesRestartDrop() + { + await _duckDb.InitializeAsync(); + + // Seed batch requests with a restart-shaped drop in the middle + var baseDay = AnalysisTime.AddDays(-7); + var normalValues = new[] { 5000, 5100, 4900, 5200, 5050, 4950 }; + + for (int i = 0; i < normalValues.Length; i++) + await SeedPerfmonAsync(baseDay.AddMinutes(i * 10), "Batch Requests/sec", normalValues[i]); + + // Restart drop: value falls to 0 then recovers + await SeedPerfmonAsync(baseDay.AddMinutes(60), "Batch Requests/sec", 0); // Restart + await SeedPerfmonAsync(baseDay.AddMinutes(70), "Batch Requests/sec", 5100); // Recovery + + // Add enough more samples on other days to reach threshold + for (int d = 2; d <= 4; d++) + { + var day = AnalysisTime.AddDays(-7 * d); + for (int i = 0; i < 5; i++) + await SeedPerfmonAsync(day.AddMinutes(i * 10), "Batch Requests/sec", 5000 + i * 50); + } + + _provider.ClearCache(); + var baseline = await _provider.GetBaselineAsync(ServerId, MetricNames.BatchRequests, AnalysisTime); + + // The restart drop (0) should be excluded, so mean should be near 5000, not pulled toward 0 + Assert.True(baseline.Mean > 4000, $"Mean {baseline.Mean} should not be poisoned by restart drop"); + } + + // ── Wait stats: per-collection aggregation ── + + [Fact] + public async Task GetBaseline_WaitStats_AggregatesPerCollection() + { + await _duckDb.InitializeAsync(); + + // Seed multiple wait types at each collection time — baseline should aggregate to total + for (int week = 0; week < 4; week++) + { + var day = AnalysisTime.AddDays(-7 * (week + 1)); + for (int i = 0; i < 5; i++) + { + var t = day.AddMinutes(i * 10); + await SeedWaitStatAsync(t, "SOS_SCHEDULER_YIELD", 100); + await SeedWaitStatAsync(t, "WRITELOG", 50); + await SeedWaitStatAsync(t, "PAGEIOLATCH_SH", 30); + } + } + + _provider.ClearCache(); + var baseline = await _provider.GetBaselineAsync(ServerId, MetricNames.WaitStats, AnalysisTime); + + Assert.True(baseline.SampleCount > 0); + // Mean should be ~180 (100+50+30 per collection) + Assert.InRange(baseline.Mean, 150, 210); + } + + // ── Session count: per-collection aggregation ── + + [Fact] + public async Task GetBaseline_SessionCount_AggregatesPerCollection() + { + await _duckDb.InitializeAsync(); + + // Seed multiple program_name rows per collection + for (int week = 0; week < 4; week++) + { + var day = AnalysisTime.AddDays(-7 * (week + 1)); + for (int i = 0; i < 5; i++) + { + var t = day.AddMinutes(i * 10); + await SeedSessionStatAsync(t, "App1", 10); + await SeedSessionStatAsync(t, "App2", 5); + } + } + + _provider.ClearCache(); + var baseline = await _provider.GetBaselineAsync(ServerId, MetricNames.SessionCount, AnalysisTime); + + Assert.True(baseline.SampleCount > 0); + // Mean should be ~15 (10+5 per collection) + Assert.InRange(baseline.Mean, 12, 18); + } + + // ── Cache behavior ── + + [Fact] + public async Task GetBaseline_CacheHit_ReturnsSameResult() + { + await _duckDb.InitializeAsync(); + + for (int i = 0; i < 20; i++) + await SeedCpuAsync(AnalysisTime.AddDays(-7).AddMinutes(i * 10), 50); + + BaselineProvider.CacheTtl = TimeSpan.FromMinutes(5); + _provider.ClearCache(); + + var first = await _provider.GetBaselineAsync(ServerId, MetricNames.Cpu, AnalysisTime); + var second = await _provider.GetBaselineAsync(ServerId, MetricNames.Cpu, AnalysisTime); + + Assert.Equal(first.Mean, second.Mean); + Assert.Equal(first.SampleCount, second.SampleCount); + + // Restore short TTL + BaselineProvider.CacheTtl = TimeSpan.FromMilliseconds(1); + } + + [Fact] + public async Task InvalidateCache_ClearsServerEntries() + { + await _duckDb.InitializeAsync(); + + for (int i = 0; i < 20; i++) + await SeedCpuAsync(AnalysisTime.AddDays(-7).AddMinutes(i * 10), 50); + + BaselineProvider.CacheTtl = TimeSpan.FromMinutes(5); + _provider.ClearCache(); + + await _provider.GetBaselineAsync(ServerId, MetricNames.Cpu, AnalysisTime); + _provider.InvalidateCache(ServerId); + + // After invalidation, should recompute (no error, same result) + var after = await _provider.GetBaselineAsync(ServerId, MetricNames.Cpu, AnalysisTime); + Assert.True(after.SampleCount > 0); + + BaselineProvider.CacheTtl = TimeSpan.FromMilliseconds(1); + } + + // ── Server isolation: no cross-contamination ── + + [Fact] + public async Task GetBaseline_DifferentServers_NoCrossContamination() + { + await _duckDb.InitializeAsync(); + + int server1 = -998, server2 = -997; + + // Seed different CPU values for two servers + for (int i = 0; i < 20; i++) + { + await SeedCpuAsync(AnalysisTime.AddDays(-7).AddMinutes(i * 10), 80, server1); + await SeedCpuAsync(AnalysisTime.AddDays(-7).AddMinutes(i * 10), 20, server2); + } + + _provider.ClearCache(); + var baseline1 = await _provider.GetBaselineAsync(server1, MetricNames.Cpu, AnalysisTime); + var baseline2 = await _provider.GetBaselineAsync(server2, MetricNames.Cpu, AnalysisTime); + + Assert.InRange(baseline1.Mean, 75, 85); + Assert.InRange(baseline2.Mean, 15, 25); + } + + // ── Memory metric (Lite-only) ── + + [Fact] + public async Task GetBaseline_Memory_ComputesPressurePercent() + { + await _duckDb.InitializeAsync(); + + // 80% memory pressure: 80GB used of 100GB target + for (int week = 0; week < 4; week++) + { + var day = AnalysisTime.AddDays(-7 * (week + 1)); + for (int i = 0; i < 5; i++) + await SeedMemoryStatAsync(day.AddMinutes(i * 10), totalServerMb: 80_000, targetMb: 100_000); + } + + _provider.ClearCache(); + var baseline = await _provider.GetBaselineAsync(ServerId, MetricNames.Memory, AnalysisTime); + + Assert.True(baseline.SampleCount > 0); + Assert.InRange(baseline.Mean, 78, 82); // ~80% + } + + // ── Helpers ── + + private async Task SeedCpuAsync(DateTime time, int cpuValue, int serverId = ServerId) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO cpu_utilization_stats + (collection_id, collection_time, server_id, server_name, sample_time, + sqlserver_cpu_utilization, other_process_cpu_utilization) + VALUES ($1, $2, $3, 'TestServer', $4, $5, 2)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = serverId }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = cpuValue }); + await cmd.ExecuteNonQueryAsync(); + } + + private async Task SeedPerfmonAsync(DateTime time, string counterName, long deltaValue) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO perfmon_stats + (collection_id, collection_time, server_id, server_name, + object_name, counter_name, instance_name, cntr_value, delta_cntr_value, sample_interval_seconds) + VALUES ($1, $2, $3, 'TestServer', 'SQLServer:SQL Statistics', $4, '', $5, $5, 10)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = counterName }); + cmd.Parameters.Add(new DuckDBParameter { Value = deltaValue }); + await cmd.ExecuteNonQueryAsync(); + } + + private async Task SeedWaitStatAsync(DateTime time, string waitType, long deltaWaitMs) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO wait_stats + (collection_id, collection_time, server_id, server_name, wait_type, + waiting_tasks_count, wait_time_ms, signal_wait_time_ms, + delta_waiting_tasks, delta_wait_time_ms, delta_signal_wait_time_ms) + VALUES ($1, $2, $3, 'TestServer', $4, 0, 0, 0, 0, $5, 0)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = waitType }); + cmd.Parameters.Add(new DuckDBParameter { Value = deltaWaitMs }); + await cmd.ExecuteNonQueryAsync(); + } + + private async Task SeedSessionStatAsync(DateTime time, string programName, long connectionCount) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO session_stats + (collection_id, collection_time, server_id, server_name, program_name, + connection_count, running_count, sleeping_count, dormant_count) + VALUES ($1, $2, $3, 'TestServer', $4, $5, 0, 0, 0)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = programName }); + cmd.Parameters.Add(new DuckDBParameter { Value = connectionCount }); + await cmd.ExecuteNonQueryAsync(); + } + + private async Task SeedMemoryStatAsync(DateTime time, double totalServerMb, double targetMb) + { + using var readLock = _duckDb.AcquireReadLock(); + using var conn = _duckDb.CreateConnection(); + await conn.OpenAsync(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @"INSERT INTO memory_stats + (collection_id, collection_time, server_id, server_name, + total_physical_memory_mb, available_physical_memory_mb, + target_server_memory_mb, total_server_memory_mb, buffer_pool_mb) + VALUES ($1, $2, $3, 'TestServer', $4, $5, $6, $7, $7)"; + cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- }); + cmd.Parameters.Add(new DuckDBParameter { Value = time }); + cmd.Parameters.Add(new DuckDBParameter { Value = ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = targetMb * 1.2 }); // total physical > target + cmd.Parameters.Add(new DuckDBParameter { Value = targetMb * 0.2 }); // some available + cmd.Parameters.Add(new DuckDBParameter { Value = targetMb }); + cmd.Parameters.Add(new DuckDBParameter { Value = totalServerMb }); + await cmd.ExecuteNonQueryAsync(); + } +} diff --git a/Lite.Tests/ScenarioTests.cs b/Lite.Tests/ScenarioTests.cs index dcadf1af..bc764814 100644 --- a/Lite.Tests/ScenarioTests.cs +++ b/Lite.Tests/ScenarioTests.cs @@ -445,7 +445,7 @@ public async Task WaitSpikeAnomaly_HighRatio() var facts = await collector.CollectFactsAsync(context); // Run anomaly detection (compares analysis window against baseline) - var anomalyDetector = new AnomalyDetector(_duckDb); + var anomalyDetector = new AnomalyDetector(_duckDb, new BaselineProvider(_duckDb)); var anomalies = await anomalyDetector.DetectAnomaliesAsync(context); facts.AddRange(anomalies); diff --git a/Lite/Analysis/AnalysisModels.cs b/Lite/Analysis/AnalysisModels.cs index 022bfd78..3fb6fd21 100644 --- a/Lite/Analysis/AnalysisModels.cs +++ b/Lite/Analysis/AnalysisModels.cs @@ -72,6 +72,7 @@ public class AnalysisStory public double? LeafFactValue { get; set; } public int FactCount { get; set; } public bool IsAbsolution { get; set; } + public Dictionary? RootFactMetadata { get; set; } } /// @@ -104,6 +105,12 @@ public class AnalysisFinding /// Contains supporting detail keyed by category (e.g., "top_deadlocks", "queries_at_spike"). /// public Dictionary? DrillDown { get; set; } + + /// + /// Root fact metadata from anomaly detection. Ephemeral — not persisted to DuckDB. + /// Contains baseline context (mean, stddev, tier, hour, dow) for anomaly findings. + /// + public Dictionary? RootFactMetadata { get; set; } } /// diff --git a/Lite/Analysis/AnalysisService.cs b/Lite/Analysis/AnalysisService.cs index 43fd5084..83fa23d8 100644 --- a/Lite/Analysis/AnalysisService.cs +++ b/Lite/Analysis/AnalysisService.cs @@ -23,13 +23,14 @@ public class AnalysisService private readonly InferenceEngine _engine; private readonly DrillDownCollector _drillDown; private readonly AnomalyDetector _anomalyDetector; + private readonly BaselineProvider _baselineProvider; /// /// Minimum hours of collected data required before analysis will run. /// Short collection windows distort fraction-of-period calculations — /// 5 seconds of THREADPOOL looks alarming in a 16-minute window. /// Production: 72. Dev/testing: 0.5 (raise before release). /// - internal double MinimumDataHours { get; set; } = 72; + internal double MinimumDataHours { get; set; } = 24; // TODO: restore to 72 before release /// /// Raised after each analysis run completes, providing the findings for UI display. @@ -60,7 +61,8 @@ public AnalysisService(DuckDbInitializer duckDb, IPlanFetcher? planFetcher = nul _graph = new RelationshipGraph(); _engine = new InferenceEngine(_graph); _drillDown = new DrillDownCollector(duckDb, planFetcher); - _anomalyDetector = new AnomalyDetector(duckDb); + _baselineProvider = new BaselineProvider(duckDb); + _anomalyDetector = new AnomalyDetector(duckDb, _baselineProvider); } /// diff --git a/Lite/Analysis/AnomalyDetector.cs b/Lite/Analysis/AnomalyDetector.cs index ab520fd9..1398b0e2 100644 --- a/Lite/Analysis/AnomalyDetector.cs +++ b/Lite/Analysis/AnomalyDetector.cs @@ -1,528 +1,780 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Threading.Tasks; -using DuckDB.NET.Data; -using PerformanceMonitorLite.Database; -using PerformanceMonitorLite.Services; - -namespace PerformanceMonitorLite.Analysis; - -/// -/// Detects anomalies by comparing the analysis window's metrics against a -/// baseline period. When a metric deviates significantly from baseline -/// (mean + standard deviation), an ANOMALY fact is emitted. -/// -/// This is the "oh shit" mode — detecting acute deviations that don't show -/// up in aggregate analysis because they're brief. A 5-minute CPU spike -/// that averages out over 4 hours is invisible to aggregate scoring but -/// obvious when compared against "what was this metric doing before?" -/// -/// Baseline selection: uses the 24 hours preceding the analysis window. -/// If less data is available, uses whatever exists with lower confidence. -/// -public class AnomalyDetector -{ - private readonly DuckDbInitializer _duckDb; - - /// - /// Minimum number of baseline samples needed for reliable detection. - /// Below this, anomalies are still detected but with reduced confidence. - /// - private const int MinBaselineSamples = 10; - - /// - /// Number of standard deviations above baseline mean to flag as anomalous. - /// - private const double DeviationThreshold = 2.0; - - public AnomalyDetector(DuckDbInitializer duckDb) - { - _duckDb = duckDb; - } - - /// - /// Detects anomalies by comparing the analysis window against a baseline period. - /// Returns anomaly facts to be merged into the main fact list. - /// - public async Task> DetectAnomaliesAsync(AnalysisContext context) - { - var anomalies = new List(); - - // Baseline: 24 hours preceding the analysis window - var baselineEnd = context.TimeRangeStart; - var baselineStart = baselineEnd.AddHours(-24); - - // Check if baseline period has any data at all — if not, skip all anomaly detection. - // Without baseline data, everything looks anomalous. - if (!await HasBaselineDataAsync(context.ServerId, baselineStart, baselineEnd)) - return anomalies; - - await DetectCpuAnomalies(context, baselineStart, baselineEnd, anomalies); - await DetectWaitAnomalies(context, baselineStart, baselineEnd, anomalies); - await DetectBlockingAnomalies(context, baselineStart, baselineEnd, anomalies); - await DetectIoAnomalies(context, baselineStart, baselineEnd, anomalies); - - return anomalies; - } - - /// - /// Checks if the baseline period has any collected data. - /// Uses wait_stats as canary — if waits are collected, other data is too. - /// - private async Task HasBaselineDataAsync(int serverId, DateTime baselineStart, DateTime baselineEnd) - { - try - { - using var readLock = _duckDb.AcquireReadLock(); - using var connection = _duckDb.CreateConnection(); - await connection.OpenAsync(); - - using var cmd = connection.CreateCommand(); - cmd.CommandText = @" -SELECT (SELECT COUNT(*) FROM v_wait_stats - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3) - + (SELECT COUNT(*) FROM v_cpu_utilization_stats - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3)"; - cmd.Parameters.Add(new DuckDBParameter { Value = serverId }); - cmd.Parameters.Add(new DuckDBParameter { Value = baselineStart }); - cmd.Parameters.Add(new DuckDBParameter { Value = baselineEnd }); - - var count = Convert.ToInt64(await cmd.ExecuteScalarAsync() ?? 0); - return count > 0; - } - catch { return false; } - } - - /// - /// Detects CPU utilization anomalies by comparing per-sample values - /// against the baseline distribution. - /// - private async Task DetectCpuAnomalies(AnalysisContext context, - DateTime baselineStart, DateTime baselineEnd, List anomalies) - { - try - { - using var readLock = _duckDb.AcquireReadLock(); - using var connection = _duckDb.CreateConnection(); - await connection.OpenAsync(); - - // Get baseline stats - using var baselineCmd = connection.CreateCommand(); - baselineCmd.CommandText = @" -SELECT AVG(sqlserver_cpu_utilization) AS mean_cpu, - STDDEV_SAMP(sqlserver_cpu_utilization) AS stddev_cpu, - COUNT(*) AS sample_count -FROM v_cpu_utilization_stats -WHERE server_id = $1 -AND collection_time >= $2 AND collection_time < $3"; - - baselineCmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); - baselineCmd.Parameters.Add(new DuckDBParameter { Value = baselineStart }); - baselineCmd.Parameters.Add(new DuckDBParameter { Value = baselineEnd }); - - double baselineMean = 0, baselineStdDev = 0; - long baselineSamples = 0; - - using (var reader = await baselineCmd.ExecuteReaderAsync()) - { - if (await reader.ReadAsync()) - { - baselineMean = reader.IsDBNull(0) ? 0 : Convert.ToDouble(reader.GetValue(0)); - baselineStdDev = reader.IsDBNull(1) ? 0 : Convert.ToDouble(reader.GetValue(1)); - baselineSamples = reader.IsDBNull(2) ? 0 : Convert.ToInt64(reader.GetValue(2)); - } - } - - if (baselineSamples < 3 || baselineStdDev <= 0) return; - - // Get peak and average in the analysis window - using var windowCmd = connection.CreateCommand(); - windowCmd.CommandText = @" -SELECT MAX(sqlserver_cpu_utilization) AS peak_cpu, - AVG(sqlserver_cpu_utilization) AS avg_cpu, - COUNT(*) AS sample_count, - (SELECT collection_time FROM v_cpu_utilization_stats - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 - ORDER BY sqlserver_cpu_utilization DESC LIMIT 1) AS peak_time -FROM v_cpu_utilization_stats -WHERE server_id = $1 -AND collection_time >= $2 AND collection_time < $3"; - - windowCmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); - windowCmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); - windowCmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); - - using var windowReader = await windowCmd.ExecuteReaderAsync(); - if (!await windowReader.ReadAsync()) return; - - var peakCpu = windowReader.IsDBNull(0) ? 0.0 : Convert.ToDouble(windowReader.GetValue(0)); - var avgCpu = windowReader.IsDBNull(1) ? 0.0 : Convert.ToDouble(windowReader.GetValue(1)); - var windowSamples = windowReader.IsDBNull(2) ? 0L : Convert.ToInt64(windowReader.GetValue(2)); - var peakTime = windowReader.IsDBNull(3) ? (DateTime?)null : windowReader.GetDateTime(3); - - if (windowSamples == 0) return; - - // Check if peak deviates significantly from baseline - var deviation = (peakCpu - baselineMean) / baselineStdDev; - if (deviation < DeviationThreshold || peakCpu < 50) return; // Don't flag low absolute values - - var confidence = baselineSamples >= MinBaselineSamples ? 1.0 : (double)baselineSamples / MinBaselineSamples; - - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_CPU_SPIKE", - Value = peakCpu, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["peak_cpu"] = peakCpu, - ["avg_cpu_in_window"] = avgCpu, - ["baseline_mean"] = baselineMean, - ["baseline_stddev"] = baselineStdDev, - ["deviation_sigma"] = deviation, - ["baseline_samples"] = baselineSamples, - ["window_samples"] = windowSamples, - ["confidence"] = confidence, - ["peak_time_ticks"] = peakTime?.Ticks ?? 0 - } - }); - } - catch (Exception ex) - { - AppLogger.Error("AnomalyDetector", $"CPU anomaly detection failed: {ex.Message}"); - } - } - - /// - /// Detects wait stat anomalies — significant waits in the analysis window - /// that were absent or much lower in the baseline. - /// - private async Task DetectWaitAnomalies(AnalysisContext context, - DateTime baselineStart, DateTime baselineEnd, List anomalies) - { - try - { - using var readLock = _duckDb.AcquireReadLock(); - using var connection = _duckDb.CreateConnection(); - await connection.OpenAsync(); - - // Check if baseline has any wait data at all — if not, skip - using var checkCmd = connection.CreateCommand(); - checkCmd.CommandText = @" -SELECT COUNT(*) FROM v_wait_stats -WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3"; - checkCmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); - checkCmd.Parameters.Add(new DuckDBParameter { Value = baselineStart }); - checkCmd.Parameters.Add(new DuckDBParameter { Value = baselineEnd }); - var baselineCount = Convert.ToInt64(await checkCmd.ExecuteScalarAsync() ?? 0); - if (baselineCount == 0) return; - - // Get per-wait-type totals in both windows - using var cmd = connection.CreateCommand(); - cmd.CommandText = @" -WITH baseline AS ( - SELECT wait_type, - SUM(delta_wait_time_ms)::BIGINT AS total_ms - FROM v_wait_stats - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 - AND delta_wait_time_ms > 0 - GROUP BY wait_type -), -current_window AS ( - SELECT wait_type, - SUM(delta_wait_time_ms)::BIGINT AS total_ms - FROM v_wait_stats - WHERE server_id = $1 AND collection_time >= $4 AND collection_time <= $5 - AND delta_wait_time_ms > 0 - GROUP BY wait_type -) -SELECT c.wait_type, - c.total_ms AS current_ms, - COALESCE(b.total_ms, 0) AS baseline_ms -FROM current_window c -LEFT JOIN baseline b ON c.wait_type = b.wait_type -WHERE c.total_ms > 10000 -- At least 10 seconds of wait time -ORDER BY c.total_ms DESC -LIMIT 10"; - - cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); - cmd.Parameters.Add(new DuckDBParameter { Value = baselineStart }); - cmd.Parameters.Add(new DuckDBParameter { Value = baselineEnd }); - cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); - cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); - - using var reader = await cmd.ExecuteReaderAsync(); - while (await reader.ReadAsync()) - { - var waitType = reader.GetString(0); - var currentMs = Convert.ToInt64(reader.GetValue(1)); - var baselineMs = Convert.ToInt64(reader.GetValue(2)); - - // Normalize to per-hour rates before comparing (windows are different lengths) - var baselineHours = (baselineEnd - baselineStart).TotalHours; - var currentHours = (context.TimeRangeEnd - context.TimeRangeStart).TotalHours; - if (baselineHours <= 0) baselineHours = 1; - if (currentHours <= 0) currentHours = 1; - - double ratio; - string anomalyType; - - if (baselineMs == 0) - { - ratio = currentMs > 60_000 ? 100.0 : 0; // Only flag if > 1 minute total - anomalyType = "new"; - } - else - { - var baselineRate = baselineMs / baselineHours; - var currentRate = currentMs / currentHours; - ratio = baselineRate > 0 ? currentRate / baselineRate : 100.0; - anomalyType = "spike"; - } - - if (ratio < 5.0) continue; // Need at least 5x increase - - anomalies.Add(new Fact - { - Source = "anomaly", - Key = $"ANOMALY_WAIT_{waitType}", - Value = currentMs, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_ms"] = currentMs, - ["baseline_ms"] = baselineMs, - ["ratio"] = ratio, - ["is_new"] = anomalyType == "new" ? 1 : 0 - } - }); - } - } - catch (Exception ex) - { - AppLogger.Error("AnomalyDetector", $"Wait anomaly detection failed: {ex.Message}"); - } - } - - /// - /// Detects blocking/deadlock anomalies — events in the analysis window - /// that are significantly above baseline rates. - /// - private async Task DetectBlockingAnomalies(AnalysisContext context, - DateTime baselineStart, DateTime baselineEnd, List anomalies) - { - try - { - using var readLock = _duckDb.AcquireReadLock(); - using var connection = _duckDb.CreateConnection(); - await connection.OpenAsync(); - - // Check if baseline period has any data at all - using var checkCmd = connection.CreateCommand(); - checkCmd.CommandText = @" -SELECT (SELECT COUNT(*) FROM v_blocked_process_reports - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3) - + (SELECT COUNT(*) FROM v_deadlocks - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3) - + (SELECT COUNT(*) FROM v_wait_stats - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3)"; - checkCmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); - checkCmd.Parameters.Add(new DuckDBParameter { Value = baselineStart }); - checkCmd.Parameters.Add(new DuckDBParameter { Value = baselineEnd }); - var baselineDataCount = Convert.ToInt64(await checkCmd.ExecuteScalarAsync() ?? 0); - if (baselineDataCount == 0) return; // No baseline data = can't detect anomaly - - using var cmd = connection.CreateCommand(); - cmd.CommandText = @" -SELECT - (SELECT COUNT(*) FROM v_blocked_process_reports - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3) AS baseline_blocking, - (SELECT COUNT(*) FROM v_blocked_process_reports - WHERE server_id = $1 AND collection_time >= $4 AND collection_time <= $5) AS current_blocking, - (SELECT COUNT(*) FROM v_deadlocks - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3) AS baseline_deadlocks, - (SELECT COUNT(*) FROM v_deadlocks - WHERE server_id = $1 AND collection_time >= $4 AND collection_time <= $5) AS current_deadlocks"; - - cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); - cmd.Parameters.Add(new DuckDBParameter { Value = baselineStart }); - cmd.Parameters.Add(new DuckDBParameter { Value = baselineEnd }); - cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); - cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); - - using var reader = await cmd.ExecuteReaderAsync(); - if (!await reader.ReadAsync()) return; - - var baselineBlocking = Convert.ToInt64(reader.GetValue(0)); - var currentBlocking = Convert.ToInt64(reader.GetValue(1)); - var baselineDeadlocks = Convert.ToInt64(reader.GetValue(2)); - var currentDeadlocks = Convert.ToInt64(reader.GetValue(3)); - - // Normalize to per-hour rates (windows are different lengths) - var baselineHours = (baselineEnd - baselineStart).TotalHours; - var currentHours = (context.TimeRangeEnd - context.TimeRangeStart).TotalHours; - if (baselineHours <= 0) baselineHours = 1; - if (currentHours <= 0) currentHours = 1; - - var baselineBlockingRate = baselineBlocking / baselineHours; - var currentBlockingRate = currentBlocking / currentHours; - var blockingRatio = baselineBlocking > 0 ? currentBlockingRate / baselineBlockingRate : 100.0; - - var baselineDeadlockRate = baselineDeadlocks / baselineHours; - var currentDeadlockRate = currentDeadlocks / currentHours; - var deadlockRatio = baselineDeadlocks > 0 ? currentDeadlockRate / baselineDeadlockRate : 100.0; - - // Blocking spike: at least 5 events AND 3x baseline rate (or new) - if (currentBlocking >= 5 && (baselineBlocking == 0 || blockingRatio >= 3)) - { - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_BLOCKING_SPIKE", - Value = currentBlocking, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_count"] = currentBlocking, - ["baseline_count"] = baselineBlocking, - ["ratio"] = blockingRatio - } - }); - } - - // Deadlock spike: at least 3 events AND 3x baseline rate (or new) - if (currentDeadlocks >= 3 && (baselineDeadlocks == 0 || deadlockRatio >= 3)) - { - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_DEADLOCK_SPIKE", - Value = currentDeadlocks, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_count"] = currentDeadlocks, - ["baseline_count"] = baselineDeadlocks, - ["ratio"] = deadlockRatio - } - }); - } - } - catch (Exception ex) - { - AppLogger.Error("AnomalyDetector", $"Blocking anomaly detection failed: {ex.Message}"); - } - } - - /// - /// Detects I/O latency anomalies — significant increase in read/write latency - /// compared to baseline. - /// - private async Task DetectIoAnomalies(AnalysisContext context, - DateTime baselineStart, DateTime baselineEnd, List anomalies) - { - try - { - using var readLock = _duckDb.AcquireReadLock(); - using var connection = _duckDb.CreateConnection(); - await connection.OpenAsync(); - - using var cmd = connection.CreateCommand(); - cmd.CommandText = @" -WITH baseline AS ( - SELECT AVG(delta_stall_read_ms * 1.0 / NULLIF(delta_reads, 0)) AS avg_read_lat, - AVG(delta_stall_write_ms * 1.0 / NULLIF(delta_writes, 0)) AS avg_write_lat, - STDDEV_SAMP(delta_stall_read_ms * 1.0 / NULLIF(delta_reads, 0)) AS stddev_read, - STDDEV_SAMP(delta_stall_write_ms * 1.0 / NULLIF(delta_writes, 0)) AS stddev_write, - COUNT(*) AS samples - FROM v_file_io_stats - WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 - AND (delta_reads > 0 OR delta_writes > 0) -), -current_window AS ( - SELECT AVG(delta_stall_read_ms * 1.0 / NULLIF(delta_reads, 0)) AS avg_read_lat, - AVG(delta_stall_write_ms * 1.0 / NULLIF(delta_writes, 0)) AS avg_write_lat - FROM v_file_io_stats - WHERE server_id = $1 AND collection_time >= $4 AND collection_time <= $5 - AND (delta_reads > 0 OR delta_writes > 0) -) -SELECT b.avg_read_lat, b.stddev_read, c.avg_read_lat, - b.avg_write_lat, b.stddev_write, c.avg_write_lat, - b.samples -FROM baseline b, current_window c"; - - cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); - cmd.Parameters.Add(new DuckDBParameter { Value = baselineStart }); - cmd.Parameters.Add(new DuckDBParameter { Value = baselineEnd }); - cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); - cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); - - using var reader = await cmd.ExecuteReaderAsync(); - if (!await reader.ReadAsync()) return; - - var baselineReadLat = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); - var stddevRead = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); - var currentReadLat = reader.IsDBNull(2) ? 0.0 : Convert.ToDouble(reader.GetValue(2)); - var baselineWriteLat = reader.IsDBNull(3) ? 0.0 : Convert.ToDouble(reader.GetValue(3)); - var stddevWrite = reader.IsDBNull(4) ? 0.0 : Convert.ToDouble(reader.GetValue(4)); - var currentWriteLat = reader.IsDBNull(5) ? 0.0 : Convert.ToDouble(reader.GetValue(5)); - var samples = reader.IsDBNull(6) ? 0L : Convert.ToInt64(reader.GetValue(6)); - - if (samples < 3) return; - - // Read latency anomaly - if (stddevRead > 0 && currentReadLat > 10) // At least 10ms to matter - { - var readDeviation = (currentReadLat - baselineReadLat) / stddevRead; - if (readDeviation >= DeviationThreshold) - { - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_READ_LATENCY", - Value = currentReadLat, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_latency_ms"] = currentReadLat, - ["baseline_mean_ms"] = baselineReadLat, - ["baseline_stddev_ms"] = stddevRead, - ["deviation_sigma"] = readDeviation, - ["baseline_samples"] = samples - } - }); - } - } - - // Write latency anomaly - if (stddevWrite > 0 && currentWriteLat > 5) // At least 5ms to matter - { - var writeDeviation = (currentWriteLat - baselineWriteLat) / stddevWrite; - if (writeDeviation >= DeviationThreshold) - { - anomalies.Add(new Fact - { - Source = "anomaly", - Key = "ANOMALY_WRITE_LATENCY", - Value = currentWriteLat, - ServerId = context.ServerId, - Metadata = new Dictionary - { - ["current_latency_ms"] = currentWriteLat, - ["baseline_mean_ms"] = baselineWriteLat, - ["baseline_stddev_ms"] = stddevWrite, - ["deviation_sigma"] = writeDeviation, - ["baseline_samples"] = samples - } - }); - } - } - } - catch (Exception ex) - { - AppLogger.Error("AnomalyDetector", $"I/O anomaly detection failed: {ex.Message}"); - } - } -} +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using DuckDB.NET.Data; +using PerformanceMonitorLite.Database; +using PerformanceMonitorLite.Services; + +namespace PerformanceMonitorLite.Analysis; + +/// +/// Detects anomalies by comparing the analysis window's metrics against +/// time-bucketed baselines (hour-of-day x day-of-week, 30-day rolling window). +/// +/// Two detection patterns: +/// - Z-score: (observed - mean) / stddev — used for continuous metrics +/// (CPU, batch requests, I/O latency, session counts, query duration, memory) +/// - Ratio: currentRate / baselineRate — used for rate/event metrics +/// (wait stats, blocking, deadlocks) +/// +/// Baseline computation and caching are handled by BaselineProvider. +/// +public class AnomalyDetector +{ + private readonly DuckDbInitializer _duckDb; + private readonly BaselineProvider _baselineProvider; + + /// + /// Default number of standard deviations above baseline mean to flag as anomalous. + /// + private const double DefaultDeviationThreshold = 2.0; + + /// + /// Default ratio threshold for rate-based anomaly detection (wait stats). + /// + private const double DefaultRatioThreshold = 5.0; + + /// + /// Default ratio threshold for event-based anomaly detection (blocking/deadlocks). + /// + private const double DefaultEventRatioThreshold = 3.0; + + /// + /// Per-metric deviation thresholds. Metrics not listed use DefaultDeviationThreshold. + /// + private readonly Dictionary _deviationThresholds = new(); + + public AnomalyDetector(DuckDbInitializer duckDb, BaselineProvider baselineProvider) + { + _duckDb = duckDb; + _baselineProvider = baselineProvider; + } + + /// + /// Sets a custom deviation threshold for a specific metric. + /// + public void SetDeviationThreshold(string metricName, double threshold) + { + _deviationThresholds[metricName] = threshold; + } + + private double GetDeviationThreshold(string metricName) + { + return _deviationThresholds.TryGetValue(metricName, out var threshold) + ? threshold + : DefaultDeviationThreshold; + } + + /// + /// Adds baseline context metadata to an anomaly fact's metadata dictionary. + /// + private static void AddBaselineContext(Dictionary metadata, BaselineBucket baseline) + { + metadata["baseline_hour"] = baseline.HourOfDay; + metadata["baseline_dow"] = baseline.DayOfWeek; + metadata["baseline_tier"] = (double)baseline.Tier; + } + + /// + /// Detects anomalies by comparing the analysis window against time-bucketed baselines. + /// Returns anomaly facts to be merged into the main fact list. + /// + public async Task> DetectAnomaliesAsync(AnalysisContext context) + { + var anomalies = new List(); + + // Check if baseline period has any data at all — if not, skip all anomaly detection. + if (!await HasBaselineDataAsync(context.ServerId)) + return anomalies; + + // Existing detection methods (upgraded to time-bucketed baselines) + await DetectCpuAnomalies(context, anomalies); + await DetectWaitAnomalies(context, anomalies); + await DetectBlockingAnomalies(context, anomalies); + await DetectIoAnomalies(context, anomalies); + + // New detection methods + await DetectBatchRequestAnomalies(context, anomalies); + await DetectSessionAnomalies(context, anomalies); + await DetectQueryDurationAnomalies(context, anomalies); + await DetectMemoryAnomalies(context, anomalies); + + return anomalies; + } + + /// + /// Checks if the server has enough historical data for meaningful baselines. + /// Uses wait_stats as canary — if waits are collected, other data is too. + /// + private async Task HasBaselineDataAsync(int serverId) + { + try + { + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SELECT (SELECT COUNT(*) FROM v_wait_stats + WHERE server_id = $1 AND collection_time >= $2) + + (SELECT COUNT(*) FROM v_cpu_utilization_stats + WHERE server_id = $1 AND collection_time >= $2)"; + cmd.Parameters.Add(new DuckDBParameter { Value = serverId }); + cmd.Parameters.Add(new DuckDBParameter { Value = DateTime.UtcNow.AddDays(-30) }); + + var count = Convert.ToInt64(await cmd.ExecuteScalarAsync() ?? 0); + return count > 0; + } + catch { return false; } + } + + /// + /// Detects CPU utilization anomalies using z-score against time-bucketed baseline. + /// + private async Task DetectCpuAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + context.ServerId, MetricNames.Cpu, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; // Zero mean + zero stddev — skip + + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SELECT MAX(sqlserver_cpu_utilization) AS peak_cpu, + AVG(sqlserver_cpu_utilization) AS avg_cpu, + COUNT(*) AS sample_count, + (SELECT collection_time FROM v_cpu_utilization_stats + WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 + ORDER BY sqlserver_cpu_utilization DESC LIMIT 1) AS peak_time +FROM v_cpu_utilization_stats +WHERE server_id = $1 +AND collection_time >= $2 AND collection_time < $3"; + + cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var peakCpu = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var avgCpu = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + var windowSamples = reader.IsDBNull(2) ? 0L : Convert.ToInt64(reader.GetValue(2)); + var peakTime = reader.IsDBNull(3) ? (DateTime?)null : reader.GetDateTime(3); + + if (windowSamples == 0) return; + + var deviation = (peakCpu - baseline.Mean) / effectiveStdDev; + if (deviation < GetDeviationThreshold(MetricNames.Cpu) || peakCpu < 50) return; + + var metadata = new Dictionary + { + ["peak_cpu"] = peakCpu, + ["avg_cpu_in_window"] = avgCpu, + ["baseline_mean"] = baseline.Mean, + ["baseline_stddev"] = effectiveStdDev, + ["deviation_sigma"] = deviation, + ["baseline_samples"] = baseline.SampleCount, + ["window_samples"] = windowSamples, + ["confidence"] = 1.0, + ["peak_time_ticks"] = peakTime?.Ticks ?? 0 + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_CPU_SPIKE", + Value = peakCpu, + ServerId = context.ServerId, + Metadata = metadata + }); + } + catch (Exception ex) + { + AppLogger.Error("AnomalyDetector", $"CPU anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects wait stat anomalies — total wait time significantly above + /// baseline rate for this time bucket. Uses ratio-based scoring. + /// + private async Task DetectWaitAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + context.ServerId, MetricNames.WaitStats, context.TimeRangeStart); + + // No baseline data at all — can't distinguish "new" waits from "always present." + // Skip rather than flagging everything as anomalous. + if (baseline.SampleCount == 0) return; + + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + // Get per-wait-type totals in the analysis window + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SELECT wait_type, + SUM(delta_wait_time_ms)::BIGINT AS total_ms +FROM v_wait_stats +WHERE server_id = $1 AND collection_time >= $2 AND collection_time <= $3 +AND delta_wait_time_ms > 0 +GROUP BY wait_type +HAVING SUM(delta_wait_time_ms) > 10000 +ORDER BY total_ms DESC +LIMIT 10"; + + cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); + + var currentHours = (context.TimeRangeEnd - context.TimeRangeStart).TotalHours; + if (currentHours <= 0) currentHours = 1; + + // Baseline mean is total wait ms per collection interval for this time bucket. + // If no baseline, use ratio=100 for significant new waits. + var baselineRate = baseline.SampleCount > 0 ? baseline.Mean : 0; + + using var reader = await cmd.ExecuteReaderAsync(); + while (await reader.ReadAsync()) + { + var waitType = reader.GetString(0); + var currentMs = Convert.ToInt64(reader.GetValue(1)); + var currentRate = currentMs / currentHours; + + double ratio; + string anomalyType; + + if (baselineRate <= 0 || baseline.SampleCount == 0) + { + ratio = currentMs > 60_000 ? 100.0 : 0; + anomalyType = "new"; + } + else + { + ratio = currentRate / baselineRate; + anomalyType = "spike"; + } + + if (ratio < DefaultRatioThreshold) continue; + + var metadata = new Dictionary + { + ["current_ms"] = currentMs, + ["baseline_mean"] = baseline.Mean, + ["ratio"] = ratio, + ["is_new"] = anomalyType == "new" ? 1 : 0 + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = $"ANOMALY_WAIT_{waitType}", + Value = currentMs, + ServerId = context.ServerId, + Metadata = metadata + }); + } + } + catch (Exception ex) + { + AppLogger.Error("AnomalyDetector", $"Wait anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects blocking/deadlock anomalies — event rates significantly above + /// baseline for this time bucket. Uses ratio-based scoring. + /// + private async Task DetectBlockingAnomalies(AnalysisContext context, List anomalies) + { + try + { + var blockingBaseline = await _baselineProvider.GetBaselineAsync( + context.ServerId, MetricNames.Blocking, context.TimeRangeStart); + var deadlockBaseline = await _baselineProvider.GetBaselineAsync( + context.ServerId, MetricNames.Deadlock, context.TimeRangeStart); + + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SELECT + (SELECT COUNT(*) FROM v_blocked_process_reports + WHERE server_id = $1 AND collection_time >= $2 AND collection_time <= $3) AS current_blocking, + (SELECT COUNT(*) FROM v_deadlocks + WHERE server_id = $1 AND collection_time >= $2 AND collection_time <= $3) AS current_deadlocks"; + + cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var currentBlocking = Convert.ToInt64(reader.GetValue(0)); + var currentDeadlocks = Convert.ToInt64(reader.GetValue(1)); + + // Baseline mean = events per day for this hour+dow bucket + var baselineBlockingRate = blockingBaseline.SampleCount > 0 ? blockingBaseline.Mean : 0; + var baselineDeadlockRate = deadlockBaseline.SampleCount > 0 ? deadlockBaseline.Mean : 0; + + // Blocking spike: at least 5 events AND 3x baseline rate (or no baseline) + if (currentBlocking >= 5 && (baselineBlockingRate <= 0 || currentBlocking / Math.Max(baselineBlockingRate, 1) >= DefaultEventRatioThreshold)) + { + var metadata = new Dictionary + { + ["current_count"] = currentBlocking, + ["baseline_rate"] = baselineBlockingRate, + ["ratio"] = baselineBlockingRate > 0 ? currentBlocking / baselineBlockingRate : 100.0 + }; + AddBaselineContext(metadata, blockingBaseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_BLOCKING_SPIKE", + Value = currentBlocking, + ServerId = context.ServerId, + Metadata = metadata + }); + } + + // Deadlock spike: at least 3 events AND 3x baseline rate (or no baseline) + if (currentDeadlocks >= 3 && (baselineDeadlockRate <= 0 || currentDeadlocks / Math.Max(baselineDeadlockRate, 1) >= DefaultEventRatioThreshold)) + { + var metadata = new Dictionary + { + ["current_count"] = currentDeadlocks, + ["baseline_rate"] = baselineDeadlockRate, + ["ratio"] = baselineDeadlockRate > 0 ? currentDeadlocks / baselineDeadlockRate : 100.0 + }; + AddBaselineContext(metadata, deadlockBaseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_DEADLOCK_SPIKE", + Value = currentDeadlocks, + ServerId = context.ServerId, + Metadata = metadata + }); + } + } + catch (Exception ex) + { + AppLogger.Error("AnomalyDetector", $"Blocking anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects I/O latency anomalies using z-score against time-bucketed baseline. + /// + private async Task DetectIoAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + context.ServerId, MetricNames.IoLatency, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SELECT AVG(delta_stall_read_ms * 1.0 / NULLIF(delta_reads, 0)) AS avg_read_lat, + AVG(delta_stall_write_ms * 1.0 / NULLIF(delta_writes, 0)) AS avg_write_lat +FROM v_file_io_stats +WHERE server_id = $1 AND collection_time >= $2 AND collection_time <= $3 +AND (delta_reads > 0 OR delta_writes > 0)"; + + cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var currentReadLat = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var currentWriteLat = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + + var ioThreshold = GetDeviationThreshold(MetricNames.IoLatency); + + // Read latency anomaly + if (currentReadLat > 10) + { + var readDeviation = (currentReadLat - baseline.Mean) / effectiveStdDev; + if (readDeviation >= ioThreshold) + { + var metadata = new Dictionary + { + ["current_latency_ms"] = currentReadLat, + ["baseline_mean_ms"] = baseline.Mean, + ["baseline_stddev_ms"] = effectiveStdDev, + ["deviation_sigma"] = readDeviation, + ["baseline_samples"] = baseline.SampleCount + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_READ_LATENCY", + Value = currentReadLat, + ServerId = context.ServerId, + Metadata = metadata + }); + } + } + + // Write latency anomaly + if (currentWriteLat > 5) + { + var writeDeviation = (currentWriteLat - baseline.Mean) / effectiveStdDev; + if (writeDeviation >= ioThreshold) + { + var metadata = new Dictionary + { + ["current_latency_ms"] = currentWriteLat, + ["baseline_mean_ms"] = baseline.Mean, + ["baseline_stddev_ms"] = effectiveStdDev, + ["deviation_sigma"] = writeDeviation, + ["baseline_samples"] = baseline.SampleCount + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_WRITE_LATENCY", + Value = currentWriteLat, + ServerId = context.ServerId, + Metadata = metadata + }); + } + } + } + catch (Exception ex) + { + AppLogger.Error("AnomalyDetector", $"I/O anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects batch requests/sec anomalies using z-score against time-bucketed baseline. + /// + private async Task DetectBatchRequestAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + context.ServerId, MetricNames.BatchRequests, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SELECT AVG(delta_cntr_value) AS avg_batch, + MAX(delta_cntr_value) AS peak_batch, + COUNT(*) AS sample_count +FROM v_perfmon_stats +WHERE server_id = $1 AND collection_time >= $2 AND collection_time <= $3 +AND counter_name = 'Batch Requests/sec' +AND delta_cntr_value >= 0"; + + cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var avgBatch = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var peakBatch = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + var windowSamples = reader.IsDBNull(2) ? 0L : Convert.ToInt64(reader.GetValue(2)); + + if (windowSamples == 0) return; + + var deviation = (peakBatch - baseline.Mean) / effectiveStdDev; + if (deviation < GetDeviationThreshold(MetricNames.BatchRequests)) return; + + var metadata = new Dictionary + { + ["peak_batch_requests"] = peakBatch, + ["avg_batch_requests"] = avgBatch, + ["baseline_mean"] = baseline.Mean, + ["baseline_stddev"] = effectiveStdDev, + ["deviation_sigma"] = deviation, + ["baseline_samples"] = baseline.SampleCount, + ["window_samples"] = windowSamples + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_BATCH_REQUESTS", + Value = peakBatch, + ServerId = context.ServerId, + Metadata = metadata + }); + } + catch (Exception ex) + { + AppLogger.Error("AnomalyDetector", $"Batch request anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects session/connection count anomalies using z-score against time-bucketed baseline. + /// + private async Task DetectSessionAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + context.ServerId, MetricNames.SessionCount, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +WITH per_collection AS ( + SELECT collection_time, + SUM(connection_count)::DOUBLE AS total_connections + FROM v_session_stats + WHERE server_id = $1 AND collection_time >= $2 AND collection_time <= $3 + GROUP BY collection_time +) +SELECT AVG(total_connections) AS avg_connections, + MAX(total_connections) AS peak_connections, + COUNT(*) AS sample_count +FROM per_collection"; + + cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var avgConnections = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var peakConnections = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + var windowSamples = reader.IsDBNull(2) ? 0L : Convert.ToInt64(reader.GetValue(2)); + + if (windowSamples == 0) return; + + var deviation = (peakConnections - baseline.Mean) / effectiveStdDev; + if (deviation < GetDeviationThreshold(MetricNames.SessionCount)) return; + + var metadata = new Dictionary + { + ["peak_connections"] = peakConnections, + ["avg_connections"] = avgConnections, + ["baseline_mean"] = baseline.Mean, + ["baseline_stddev"] = effectiveStdDev, + ["deviation_sigma"] = deviation, + ["baseline_samples"] = baseline.SampleCount, + ["window_samples"] = windowSamples + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_SESSION_SPIKE", + Value = peakConnections, + ServerId = context.ServerId, + Metadata = metadata + }); + } + catch (Exception ex) + { + AppLogger.Error("AnomalyDetector", $"Session anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects query duration aggregate anomalies using z-score against time-bucketed baseline. + /// Measures total elapsed time across all queries per collection interval. + /// + private async Task DetectQueryDurationAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + context.ServerId, MetricNames.QueryDuration, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +WITH per_collection AS ( + SELECT collection_time, + SUM(delta_elapsed_time)::DOUBLE AS total_elapsed + FROM v_query_stats + WHERE server_id = $1 AND collection_time >= $2 AND collection_time <= $3 + AND delta_execution_count > 0 + AND delta_elapsed_time >= 0 + GROUP BY collection_time +) +SELECT AVG(total_elapsed) AS avg_elapsed, + MAX(total_elapsed) AS peak_elapsed, + COUNT(*) AS sample_count +FROM per_collection"; + + cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var avgElapsed = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var peakElapsed = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + var windowSamples = reader.IsDBNull(2) ? 0L : Convert.ToInt64(reader.GetValue(2)); + + if (windowSamples == 0) return; + + var deviation = (peakElapsed - baseline.Mean) / effectiveStdDev; + if (deviation < GetDeviationThreshold(MetricNames.QueryDuration)) return; + + var metadata = new Dictionary + { + ["peak_total_elapsed_us"] = peakElapsed, + ["avg_total_elapsed_us"] = avgElapsed, + ["baseline_mean"] = baseline.Mean, + ["baseline_stddev"] = effectiveStdDev, + ["deviation_sigma"] = deviation, + ["baseline_samples"] = baseline.SampleCount, + ["window_samples"] = windowSamples + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_QUERY_DURATION", + Value = peakElapsed, + ServerId = context.ServerId, + Metadata = metadata + }); + } + catch (Exception ex) + { + AppLogger.Error("AnomalyDetector", $"Query duration anomaly detection failed: {ex.Message}"); + } + } + + /// + /// Detects memory utilization anomalies using z-score against time-bucketed baseline. + /// Lite-only — Dashboard does not collect memory metrics. + /// Measures total_server_memory_mb / target_server_memory_mb as memory pressure %. + /// + private async Task DetectMemoryAnomalies(AnalysisContext context, List anomalies) + { + try + { + var baseline = await _baselineProvider.GetBaselineAsync( + context.ServerId, MetricNames.Memory, context.TimeRangeStart); + + if (baseline.SampleCount == 0) return; + var effectiveStdDev = baseline.EffectiveStdDev; + if (effectiveStdDev <= 0) return; + + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +SELECT AVG(total_server_memory_mb::DOUBLE / NULLIF(target_server_memory_mb::DOUBLE, 0) * 100) AS avg_pressure, + MAX(total_server_memory_mb::DOUBLE / NULLIF(target_server_memory_mb::DOUBLE, 0) * 100) AS peak_pressure, + COUNT(*) AS sample_count +FROM v_memory_stats +WHERE server_id = $1 AND collection_time >= $2 AND collection_time <= $3 +AND target_server_memory_mb > 0"; + + cmd.Parameters.Add(new DuckDBParameter { Value = context.ServerId }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeStart }); + cmd.Parameters.Add(new DuckDBParameter { Value = context.TimeRangeEnd }); + + using var reader = await cmd.ExecuteReaderAsync(); + if (!await reader.ReadAsync()) return; + + var avgPressure = reader.IsDBNull(0) ? 0.0 : Convert.ToDouble(reader.GetValue(0)); + var peakPressure = reader.IsDBNull(1) ? 0.0 : Convert.ToDouble(reader.GetValue(1)); + var windowSamples = reader.IsDBNull(2) ? 0L : Convert.ToInt64(reader.GetValue(2)); + + if (windowSamples == 0) return; + + var deviation = (peakPressure - baseline.Mean) / effectiveStdDev; + if (deviation < GetDeviationThreshold(MetricNames.Memory)) return; + + var metadata = new Dictionary + { + ["peak_memory_pressure_pct"] = peakPressure, + ["avg_memory_pressure_pct"] = avgPressure, + ["baseline_mean"] = baseline.Mean, + ["baseline_stddev"] = effectiveStdDev, + ["deviation_sigma"] = deviation, + ["baseline_samples"] = baseline.SampleCount, + ["window_samples"] = windowSamples + }; + AddBaselineContext(metadata, baseline); + + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_MEMORY_PRESSURE", + Value = peakPressure, + ServerId = context.ServerId, + Metadata = metadata + }); + } + catch (Exception ex) + { + AppLogger.Error("AnomalyDetector", $"Memory anomaly detection failed: {ex.Message}"); + } + } +} diff --git a/Lite/Analysis/BaselineProvider.cs b/Lite/Analysis/BaselineProvider.cs new file mode 100644 index 00000000..fc40d2be --- /dev/null +++ b/Lite/Analysis/BaselineProvider.cs @@ -0,0 +1,544 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using DuckDB.NET.Data; +using PerformanceMonitorLite.Database; +using PerformanceMonitorLite.Services; + +namespace PerformanceMonitorLite.Analysis; + +/// +/// Provides time-bucketed baselines (hour-of-day x day-of-week) computed from +/// 30-day rolling history in DuckDB. Replaces the flat 24-hour lookback used +/// by the previous anomaly detection implementation. +/// +/// Each baseline bucket contains mean, stddev, and sample count for a metric +/// at a specific (hour, day-of-week) combination. When a bucket has insufficient +/// samples, the provider collapses to less-specific tiers: +/// Full (hour+dow) -> Hour-only -> Flat (global mean/stddev) +/// +/// Baselines are cached in memory with a 1-hour TTL to avoid redundant +/// recomputation during rapid re-analysis. +/// +public class BaselineProvider +{ + private readonly DuckDbInitializer _duckDb; + + /// Rolling window for baseline computation. + private const int BaselineWindowDays = 30; + + /// Collapse to hour-only when full bucket has fewer than this many samples. + private const int CollapseThreshold = 10; + + /// Restore to full bucket when sample count reaches this level (hysteresis). + private const int RestoreThreshold = 15; + + /// Cache TTL — baselines are recomputed after this interval. + public static TimeSpan CacheTtl { get; set; } = TimeSpan.FromHours(1); + + private readonly ConcurrentDictionary _cache = new(); + + public BaselineProvider(DuckDbInitializer duckDb) + { + _duckDb = duckDb; + } + + /// + /// Gets the baseline for a specific metric, server, and time bucket. + /// Returns the most specific bucket available, collapsing as needed. + /// + public async Task GetBaselineAsync( + int serverId, string metricName, DateTime analysisTime) + { + var hourOfDay = analysisTime.Hour; + var dayOfWeek = (int)analysisTime.DayOfWeek; // Sunday=0 + + var baselines = await GetOrComputeBaselinesAsync(serverId, metricName, analysisTime); + if (baselines == null || baselines.Count == 0) + return BaselineBucket.Empty; + + // Try full bucket (hour + day-of-week) + var fullKey = (hourOfDay, dayOfWeek); + if (baselines.TryGetValue(fullKey, out var fullBucket) && fullBucket.SampleCount >= RestoreThreshold) + return fullBucket; + + // If full bucket exists but below restore threshold, check if it's above collapse threshold + // (hysteresis: don't collapse if we're between 10-14 samples and were previously using full) + if (fullBucket != null && fullBucket.SampleCount >= CollapseThreshold) + return fullBucket; + + // Collapse to hour-only: aggregate all days for this hour + var hourBuckets = baselines + .Where(kvp => kvp.Key.HourOfDay == hourOfDay) + .Select(kvp => kvp.Value) + .ToList(); + + if (hourBuckets.Count > 0) + { + var collapsed = CollapseToHourOnly(hourBuckets); + if (collapsed.SampleCount >= CollapseThreshold) + return collapsed; + } + + // Collapse to flat: aggregate everything + var allBuckets = baselines.Values.ToList(); + if (allBuckets.Count > 0) + { + var flat = CollapseToFlat(allBuckets); + if (flat.SampleCount >= 3) // Minimum viable baseline + return flat; + } + + return BaselineBucket.Empty; + } + + /// + /// Gets all baseline buckets for a metric/server. Used by UI for rendering + /// expected-range bands across all time slots. + /// + public async Task?> GetAllBaselinesAsync( + int serverId, string metricName, DateTime analysisTime) + { + return await GetOrComputeBaselinesAsync(serverId, metricName, analysisTime); + } + + /// Forces cache eviction for a server — used during testing. + public void InvalidateCache(int serverId) + { + var keysToRemove = _cache.Keys.Where(k => k.StartsWith($"{serverId}:", StringComparison.Ordinal)).ToList(); + foreach (var key in keysToRemove) + _cache.TryRemove(key, out _); + } + + /// Forces full cache clear — used during testing. + public void ClearCache() => _cache.Clear(); + + private async Task?> GetOrComputeBaselinesAsync( + int serverId, string metricName, DateTime analysisTime) + { + var cacheKey = $"{serverId}:{metricName}"; + var roundedHour = new DateTime(analysisTime.Year, analysisTime.Month, analysisTime.Day, analysisTime.Hour, 0, 0); + + if (_cache.TryGetValue(cacheKey, out var cached) && + cached.ComputedAt == roundedHour && + (DateTime.UtcNow - cached.RealTime) < CacheTtl) + { + return cached.Buckets; + } + + var buckets = await ComputeBaselinesAsync(serverId, metricName, analysisTime); + + _cache[cacheKey] = new CachedBaseline + { + ComputedAt = roundedHour, + RealTime = DateTime.UtcNow, + Buckets = buckets + }; + + return buckets; + } + + private async Task?> ComputeBaselinesAsync( + int serverId, string metricName, DateTime analysisTime) + { + var query = GetBaselineQuery(metricName); + if (query == null) return null; + + var windowStart = analysisTime.AddDays(-BaselineWindowDays); + + try + { + using var readLock = _duckDb.AcquireReadLock(); + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var cmd = connection.CreateCommand(); + cmd.CommandText = query; + cmd.Parameters.Add(new DuckDBParameter { Value = serverId }); + cmd.Parameters.Add(new DuckDBParameter { Value = windowStart }); + cmd.Parameters.Add(new DuckDBParameter { Value = analysisTime }); + + var buckets = new Dictionary<(int, int), BaselineBucket>(); + + using var reader = await cmd.ExecuteReaderAsync(); + while (await reader.ReadAsync()) + { + var hour = Convert.ToInt32(reader.GetValue(0)); + var dow = Convert.ToInt32(reader.GetValue(1)); + var mean = reader.IsDBNull(2) ? 0.0 : Convert.ToDouble(reader.GetValue(2)); + var stddev = reader.IsDBNull(3) ? 0.0 : Convert.ToDouble(reader.GetValue(3)); + var count = reader.IsDBNull(4) ? 0L : Convert.ToInt64(reader.GetValue(4)); + + buckets[(hour, dow)] = new BaselineBucket + { + HourOfDay = hour, + DayOfWeek = dow, + Mean = mean, + StdDev = stddev, + SampleCount = count, + Tier = count >= RestoreThreshold ? BaselineTier.Full + : count >= CollapseThreshold ? BaselineTier.Full + : BaselineTier.HourOnly + }; + } + + return buckets; + } + catch (Exception ex) + { + AppLogger.Error("BaselineProvider", $"Failed to compute baselines for {metricName}: {ex.Message}"); + return null; + } + } + + private static string? GetBaselineQuery(string metricName) + { + // All queries return: hour_of_day, day_of_week, mean_val, stddev_val, sample_count + // Cumulative metrics (batch requests, wait stats, query duration) use CTEs for + // restart poisoning exclusion — exclude samples where value drops to near-zero + // when the prior sample was significantly higher. + // Multi-row-per-collection metrics (waits, sessions, queries) aggregate per + // collection_time first, then bucket by hour+dow. + return metricName switch + { + // Point-in-time metric — no restart exclusion needed + MetricNames.Cpu => @" +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + AVG(sqlserver_cpu_utilization) AS mean_val, + STDDEV_SAMP(sqlserver_cpu_utilization) AS stddev_val, + COUNT(*) AS sample_count +FROM v_cpu_utilization_stats +WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 +GROUP BY hour_of_day, day_of_week", + + // Cumulative counter — restart exclusion via subquery with QUALIFY. + // Excludes samples where delta drops to 0 when prior sample was > 1000 + // (restart signature for cumulative counters). + MetricNames.BatchRequests => @" +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + AVG(delta_cntr_value) AS mean_val, + STDDEV_SAMP(delta_cntr_value) AS stddev_val, + COUNT(*) AS sample_count +FROM ( + SELECT collection_time, delta_cntr_value + FROM v_perfmon_stats + WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 + AND counter_name = 'Batch Requests/sec' + AND delta_cntr_value >= 0 + QUALIFY NOT (delta_cntr_value = 0 + AND COALESCE(LAG(delta_cntr_value) OVER (ORDER BY collection_time), 0) > 1000) +) +GROUP BY hour_of_day, day_of_week", + + // Cumulative counter, multiple rows per collection (per wait type) — + // aggregate to total wait ms per collection first, then QUALIFY for restart exclusion + MetricNames.WaitStats => @" +WITH per_collection AS ( + SELECT collection_time, + SUM(delta_wait_time_ms) AS total_wait_ms + FROM v_wait_stats + WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 + AND delta_wait_time_ms >= 0 + GROUP BY collection_time + QUALIFY NOT (total_wait_ms = 0 + AND COALESCE(LAG(total_wait_ms) OVER (ORDER BY collection_time), 0) > 10000) +) +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + AVG(total_wait_ms) AS mean_val, + STDDEV_SAMP(total_wait_ms) AS stddev_val, + COUNT(*) AS sample_count +FROM per_collection +GROUP BY hour_of_day, day_of_week", + + // Point-in-time, multiple rows per collection (per program_name) — + // aggregate to total connections per collection first + MetricNames.SessionCount => @" +WITH per_collection AS ( + SELECT collection_time, + SUM(connection_count) AS total_connections + FROM v_session_stats + WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 + GROUP BY collection_time +) +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + AVG(total_connections) AS mean_val, + STDDEV_SAMP(total_connections) AS stddev_val, + COUNT(*) AS sample_count +FROM per_collection +GROUP BY hour_of_day, day_of_week", + + // Cumulative (plan cache), multiple rows per collection (per query) — + // use delta columns, aggregate total elapsed per collection, QUALIFY for restart exclusion + MetricNames.QueryDuration => @" +WITH per_collection AS ( + SELECT collection_time, + SUM(delta_elapsed_time) AS total_elapsed + FROM v_query_stats + WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 + AND delta_execution_count > 0 + AND delta_elapsed_time >= 0 + GROUP BY collection_time + QUALIFY NOT (total_elapsed = 0 + AND COALESCE(LAG(total_elapsed) OVER (ORDER BY collection_time), 0) > 100000) +) +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + AVG(total_elapsed) AS mean_val, + STDDEV_SAMP(total_elapsed) AS stddev_val, + COUNT(*) AS sample_count +FROM per_collection +GROUP BY hour_of_day, day_of_week", + + // Point-in-time metric — no restart exclusion needed + MetricNames.IoLatency => @" +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + AVG(delta_stall_read_ms * 1.0 / NULLIF(delta_reads, 0)) AS mean_val, + STDDEV_SAMP(delta_stall_read_ms * 1.0 / NULLIF(delta_reads, 0)) AS stddev_val, + COUNT(*) AS sample_count +FROM v_file_io_stats +WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 +AND (delta_reads > 0 OR delta_writes > 0) +GROUP BY hour_of_day, day_of_week", + + // Event-based — mean = events per day for this bucket, sample_count = distinct days observed. + // No restart exclusion needed (event counts, not cumulative). + MetricNames.Blocking => @" +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + COUNT(*)::DOUBLE / GREATEST(COUNT(DISTINCT collection_time::DATE), 1) AS mean_val, + 0::DOUBLE AS stddev_val, + COUNT(DISTINCT collection_time::DATE) AS sample_count +FROM v_blocked_process_reports +WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 +GROUP BY hour_of_day, day_of_week", + + // Event-based — same approach as blocking + MetricNames.Deadlock => @" +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + COUNT(*)::DOUBLE / GREATEST(COUNT(DISTINCT collection_time::DATE), 1) AS mean_val, + 0::DOUBLE AS stddev_val, + COUNT(DISTINCT collection_time::DATE) AS sample_count +FROM v_deadlocks +WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 +GROUP BY hour_of_day, day_of_week", + + // Point-in-time metric (memory pressure %) — no restart exclusion needed + MetricNames.Memory => @" +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + AVG(total_server_memory_mb::DOUBLE / NULLIF(target_server_memory_mb::DOUBLE, 0) * 100) AS mean_val, + STDDEV_SAMP(total_server_memory_mb::DOUBLE / NULLIF(target_server_memory_mb::DOUBLE, 0) * 100) AS stddev_val, + COUNT(*) AS sample_count +FROM v_memory_stats +WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 +AND target_server_memory_mb > 0 +GROUP BY hour_of_day, day_of_week", + + // ── Chart-unit baselines (for UI bands — units match what the chart displays) ── + + // Buffer pool MB (chart shows this, not pressure %) + MetricNames.MemoryBufferPoolMb => @" +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + AVG(buffer_pool_mb::DOUBLE) AS mean_val, + STDDEV_SAMP(buffer_pool_mb::DOUBLE) AS stddev_val, + COUNT(*) AS sample_count +FROM v_memory_stats +WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 +AND buffer_pool_mb > 0 +GROUP BY hour_of_day, day_of_week", + + // Wait ms per second (chart shows this, not total ms per collection) + MetricNames.WaitMsPerSec => @" +WITH per_collection AS ( + SELECT collection_time, + SUM(delta_wait_time_ms)::DOUBLE AS total_wait_ms, + date_diff('second', LAG(collection_time) OVER (ORDER BY collection_time), collection_time) AS interval_sec + FROM v_wait_stats + WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 + AND delta_wait_time_ms >= 0 + GROUP BY collection_time +), +with_rate AS ( + SELECT collection_time, + CASE WHEN interval_sec > 0 THEN total_wait_ms / interval_sec ELSE 0 END AS ms_per_sec + FROM per_collection + WHERE interval_sec IS NOT NULL + QUALIFY NOT (ms_per_sec = 0 + AND COALESCE(LAG(ms_per_sec) OVER (ORDER BY collection_time), 0) > 100) +) +SELECT EXTRACT(HOUR FROM collection_time)::INT AS hour_of_day, + EXTRACT(DOW FROM collection_time)::INT AS day_of_week, + AVG(ms_per_sec) AS mean_val, + STDDEV_SAMP(ms_per_sec) AS stddev_val, + COUNT(*) AS sample_count +FROM with_rate +GROUP BY hour_of_day, day_of_week", + + // Blocking events per minute (chart shows event bars bucketed by minute) + MetricNames.BlockingPerMinute => @" +WITH per_minute AS ( + SELECT DATE_TRUNC('minute', collection_time) AS minute_bucket, + COUNT(*)::DOUBLE AS event_count + FROM v_blocked_process_reports + WHERE server_id = $1 AND collection_time >= $2 AND collection_time < $3 + GROUP BY minute_bucket +) +SELECT EXTRACT(HOUR FROM minute_bucket)::INT AS hour_of_day, + EXTRACT(DOW FROM minute_bucket)::INT AS day_of_week, + AVG(event_count) AS mean_val, + STDDEV_SAMP(event_count) AS stddev_val, + COUNT(*) AS sample_count +FROM per_minute +GROUP BY hour_of_day, day_of_week", + + _ => null + }; + } + + /// + /// Collapses multiple day-of-week buckets for the same hour into a single + /// hour-only bucket using pooled statistics. + /// + private static BaselineBucket CollapseToHourOnly(List hourBuckets) + { + var totalSamples = hourBuckets.Sum(b => b.SampleCount); + if (totalSamples == 0) + return BaselineBucket.Empty; + + // Weighted mean across all day-of-week buckets for this hour + var weightedMean = hourBuckets.Sum(b => b.Mean * b.SampleCount) / totalSamples; + + // Pooled standard deviation + var pooledVariance = PoolVariance(hourBuckets, weightedMean); + + return new BaselineBucket + { + HourOfDay = hourBuckets[0].HourOfDay, + DayOfWeek = -1, // Indicates hour-only + Mean = weightedMean, + StdDev = Math.Sqrt(pooledVariance), + SampleCount = totalSamples, + Tier = BaselineTier.HourOnly + }; + } + + /// + /// Collapses all buckets into a single flat baseline (equivalent to old 24h behavior). + /// + private static BaselineBucket CollapseToFlat(List allBuckets) + { + var totalSamples = allBuckets.Sum(b => b.SampleCount); + if (totalSamples == 0) + return BaselineBucket.Empty; + + var weightedMean = allBuckets.Sum(b => b.Mean * b.SampleCount) / totalSamples; + var pooledVariance = PoolVariance(allBuckets, weightedMean); + + return new BaselineBucket + { + HourOfDay = -1, + DayOfWeek = -1, + Mean = weightedMean, + StdDev = Math.Sqrt(pooledVariance), + SampleCount = totalSamples, + Tier = BaselineTier.Flat + }; + } + + /// + /// Computes pooled variance from multiple buckets, accounting for both + /// within-bucket variance and between-bucket mean differences. + /// + private static double PoolVariance(List buckets, double grandMean) + { + var totalSamples = buckets.Sum(b => b.SampleCount); + if (totalSamples <= 1) return 0; + + double totalSumSq = 0; + foreach (var b in buckets) + { + if (b.SampleCount <= 0) continue; + // Within-bucket variance contribution + totalSumSq += (b.StdDev * b.StdDev) * (b.SampleCount - 1); + // Between-bucket mean difference contribution + totalSumSq += b.SampleCount * (b.Mean - grandMean) * (b.Mean - grandMean); + } + + return totalSumSq / (totalSamples - 1); + } + + private class CachedBaseline + { + public DateTime ComputedAt { get; init; } + public DateTime RealTime { get; init; } + public Dictionary<(int HourOfDay, int DayOfWeek), BaselineBucket>? Buckets { get; init; } + } +} + +/// +/// Represents the computed baseline statistics for a single time bucket. +/// +public class BaselineBucket +{ + public int HourOfDay { get; init; } + public int DayOfWeek { get; init; } + public double Mean { get; init; } + public double StdDev { get; init; } + public long SampleCount { get; init; } + public BaselineTier Tier { get; init; } + + public static BaselineBucket Empty => new() + { + HourOfDay = -1, DayOfWeek = -1, Mean = 0, StdDev = 0, + SampleCount = 0, Tier = BaselineTier.Flat + }; + + /// + /// Returns the effective stddev with a proportional minimum floor to prevent + /// division-by-zero in z-score calculations. When both mean and stddev are 0 + /// (zero activity), returns 0 — callers should skip scoring. + /// + public double EffectiveStdDev + { + get + { + if (Mean == 0 && StdDev <= 0) return 0; // Zero activity — skip scoring + return Math.Max(StdDev, Mean * 0.01); + } + } +} + +public enum BaselineTier +{ + Full, // hour + day-of-week (168 buckets) + HourOnly, // hour only (24 buckets) + Flat // global mean/stddev +} + +/// Metric name constants used as baseline cache keys. +public static class MetricNames +{ + public const string Cpu = "cpu"; + public const string BatchRequests = "batch_requests"; + public const string WaitStats = "wait_stats"; + public const string SessionCount = "session_count"; + public const string QueryDuration = "query_duration"; + public const string IoLatency = "io_latency"; + public const string Blocking = "blocking"; + public const string Deadlock = "deadlock"; + public const string Memory = "memory"; + + // Chart-unit metrics (for UI bands — units match what the chart displays) + public const string MemoryBufferPoolMb = "memory_buffer_pool_mb"; + public const string WaitMsPerSec = "wait_ms_per_sec"; + public const string BlockingPerMinute = "blocking_per_minute"; +} diff --git a/Lite/Analysis/FactScorer.cs b/Lite/Analysis/FactScorer.cs index 1ec47f7a..e42b4e9d 100644 --- a/Lite/Analysis/FactScorer.cs +++ b/Lite/Analysis/FactScorer.cs @@ -308,8 +308,13 @@ private static double ScoreBadActorFact(Fact fact) /// private static double ScoreAnomalyFact(Fact fact) { - if (fact.Key.StartsWith("ANOMALY_CPU_SPIKE", StringComparison.OrdinalIgnoreCase) || fact.Key.StartsWith("ANOMALY_READ_LATENCY", StringComparison.OrdinalIgnoreCase) - || fact.Key.StartsWith("ANOMALY_WRITE_LATENCY", StringComparison.OrdinalIgnoreCase)) + if (fact.Key.StartsWith("ANOMALY_CPU_SPIKE", StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_READ_LATENCY", StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_WRITE_LATENCY", StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_BATCH_REQUESTS", StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_SESSION_SPIKE", StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_QUERY_DURATION", StringComparison.OrdinalIgnoreCase) + || fact.Key.StartsWith("ANOMALY_MEMORY_PRESSURE", StringComparison.OrdinalIgnoreCase)) { // Deviation-based scoring: 2σ = 0.5, 4σ = 1.0 var deviation = fact.Metadata.GetValueOrDefault("deviation_sigma"); diff --git a/Lite/Analysis/FindingStore.cs b/Lite/Analysis/FindingStore.cs index 6724445c..7611e512 100644 --- a/Lite/Analysis/FindingStore.cs +++ b/Lite/Analysis/FindingStore.cs @@ -60,7 +60,8 @@ public async Task> SaveFindingsAsync( RootFactValue = story.RootFactValue, LeafFactKey = story.LeafFactKey, LeafFactValue = story.LeafFactValue, - FactCount = story.FactCount + FactCount = story.FactCount, + RootFactMetadata = story.RootFactMetadata }; await InsertFindingAsync(finding); diff --git a/Lite/Analysis/InferenceEngine.cs b/Lite/Analysis/InferenceEngine.cs index 4ef4dc89..fdc7985b 100644 --- a/Lite/Analysis/InferenceEngine.cs +++ b/Lite/Analysis/InferenceEngine.cs @@ -150,7 +150,8 @@ private static AnalysisStory BuildStory(List path, Dictionary - - @@ -34,15 +30,11 @@ - - @@ -50,7 +42,6 @@ - - - + - - - @@ -86,15 +70,11 @@ - - diff --git a/Lite/Controls/CorrelatedTimelineLanesControl.xaml.cs b/Lite/Controls/CorrelatedTimelineLanesControl.xaml.cs index 5f4e01d8..d39775af 100644 --- a/Lite/Controls/CorrelatedTimelineLanesControl.xaml.cs +++ b/Lite/Controls/CorrelatedTimelineLanesControl.xaml.cs @@ -15,6 +15,7 @@ using System.Threading.Tasks; using System.Windows; using System.Windows.Controls; +using PerformanceMonitorLite.Analysis; using PerformanceMonitorLite.Helpers; using PerformanceMonitorLite.Services; @@ -51,17 +52,18 @@ public void Initialize(LocalDataService dataService, int serverId) } _crosshairManager = new CorrelatedCrosshairManager(); - _crosshairManager.AddLane(CpuChart, "CPU", "%", CpuValueLabel); - _crosshairManager.AddLane(WaitStatsChart, "Wait Stats", "ms/sec", WaitStatsValueLabel); - _crosshairManager.AddLane(BlockingChart, "Blocking", "events", BlockingValueLabel); - _crosshairManager.AddLane(MemoryChart, "Memory", "MB", MemoryValueLabel); - _crosshairManager.AddLane(FileIoChart, "I/O Latency", "ms", FileIoValueLabel); + _crosshairManager.AddLane(CpuChart, "CPU", "%"); + _crosshairManager.AddLane(WaitStatsChart, "Wait Stats", "ms/sec"); + _crosshairManager.AddLane(BlockingChart, "Blocking", "events"); + _crosshairManager.AddLane(MemoryChart, "Buffer Pool", "MB"); + _crosshairManager.AddLane(FileIoChart, "I/O Latency", "ms"); } /// /// Refreshes all lane data for the given time range. /// - public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDate) + public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDate, + (DateTime From, DateTime To)? comparisonRange = null) { if (_dataService == null || _isRefreshing) return; _isRefreshing = true; @@ -77,28 +79,43 @@ public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDa var memoryTask = _dataService.GetMemoryTrendAsync(_serverId, hoursBack, fromDate, toDate); var fileIoTask = _dataService.GetFileIoLatencyTrendAsync(_serverId, hoursBack, fromDate, toDate); + // Fetch baselines for band rendering — chart-unit-matched metrics + var referenceTime = fromDate ?? DateTime.UtcNow.AddHours(-hoursBack); + var cpuBaselineTask = _dataService.GetBaselineForLaneAsync(_serverId, MetricNames.Cpu, referenceTime); + var waitBaselineTask = _dataService.GetBaselineForLaneAsync(_serverId, MetricNames.WaitMsPerSec, referenceTime); + var ioBaselineTask = _dataService.GetBaselineForLaneAsync(_serverId, MetricNames.IoLatency, referenceTime); + var blockingBaselineTask = _dataService.GetBaselineForLaneAsync(_serverId, MetricNames.BlockingPerMinute, referenceTime); + try { - await Task.WhenAll(cpuTask, waitTask, blockingTask, deadlockTask, memoryTask, fileIoTask); + await Task.WhenAll(cpuTask, waitTask, blockingTask, deadlockTask, memoryTask, fileIoTask, + cpuBaselineTask, waitBaselineTask, ioBaselineTask, blockingBaselineTask); } catch (Exception ex) { AppLogger.Info("CorrelatedLanes", $"Data fetch failed: {ex.Message}"); } + var cpuBaseline = cpuBaselineTask.IsCompletedSuccessfully ? cpuBaselineTask.Result : null; + var waitBaseline = waitBaselineTask.IsCompletedSuccessfully ? waitBaselineTask.Result : null; + var ioBaseline = ioBaselineTask.IsCompletedSuccessfully ? ioBaselineTask.Result : null; + var blockingBaseline = blockingBaselineTask.IsCompletedSuccessfully ? blockingBaselineTask.Result : null; + var utcOffset = ServerTimeHelper.UtcOffsetMinutes; + // minAnomalyValue: absolute floor below which dots/arrows are suppressed even if outside band. + // Prevents "1% CPU above 0.5% baseline" false alarms on idle servers. if (cpuTask.IsCompletedSuccessfully) UpdateLane(CpuChart, "CPU %", cpuTask.Result.Select(d => (d.SampleTime.ToOADate(), (double)d.SqlServerCpu)).ToList(), - "#4FC3F7", 0, 105); + "#4FC3F7", 0, 105, cpuBaseline, minAnomalyValue: 10); else ShowEmpty(CpuChart, "CPU %"); if (waitTask.IsCompletedSuccessfully) UpdateLane(WaitStatsChart, "Wait ms/sec", waitTask.Result.Select(d => (d.CollectionTime.AddMinutes(utcOffset).ToOADate(), d.WaitTimeMsPerSecond)).ToList(), - "#FFB74D"); + "#FFB74D", baseline: waitBaseline, minAnomalyValue: 100); else ShowEmpty(WaitStatsChart, "Wait ms/sec"); @@ -109,11 +126,11 @@ public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDa var deadlockData = deadlockTask.IsCompletedSuccessfully ? deadlockTask.Result.Select(d => (d.Time.AddMinutes(utcOffset).ToOADate(), (double)d.Count)).ToList() : new List<(double, double)>(); - UpdateBlockingLane(blockingData, deadlockData); + UpdateBlockingLane(blockingData, deadlockData, blockingBaseline); } if (memoryTask.IsCompletedSuccessfully) - UpdateLane(MemoryChart, "Memory MB", + UpdateLane(MemoryChart, "Buffer Pool MB", memoryTask.Result.Select(d => (d.CollectionTime.AddMinutes(utcOffset).ToOADate(), d.BufferPoolMb)).ToList(), "#CE93D8"); else @@ -126,11 +143,66 @@ public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDa .OrderBy(g => g.Key) .Select(g => (g.Key.AddMinutes(utcOffset).ToOADate(), g.Average(x => x.AvgReadLatencyMs))) .ToList(); - UpdateLane(FileIoChart, "I/O ms", ioGrouped, "#81C784"); + UpdateLane(FileIoChart, "I/O ms", ioGrouped, "#81C784", baseline: ioBaseline, minAnomalyValue: 2); } else ShowEmpty(FileIoChart, "I/O ms"); + // Comparison overlay — fetch reference period data and render as ghost lines + if (comparisonRange.HasValue) + { + var refFrom = comparisonRange.Value.From; + var refTo = comparisonRange.Value.To; + // Time shift: offset to align reference data with current chart X axis + var timeShift = (fromDate ?? DateTime.UtcNow.AddHours(-hoursBack)) - refFrom; + + var refCpuTask = _dataService.GetCpuUtilizationAsync(_serverId, 0, refFrom, refTo); + var refWaitTask = _dataService.GetTotalWaitTrendAsync(_serverId, 0, refFrom, refTo); + var refBlockingTask = _dataService.GetBlockingTrendAsync(_serverId, 0, refFrom, refTo); + var refMemoryTask = _dataService.GetMemoryTrendAsync(_serverId, 0, refFrom, refTo); + var refIoTask = _dataService.GetFileIoLatencyTrendAsync(_serverId, 0, refFrom, refTo); + + try { await Task.WhenAll(refCpuTask, refWaitTask, refBlockingTask, refMemoryTask, refIoTask); } + catch (Exception ex) { AppLogger.Info("CorrelatedLanes", $"Comparison fetch failed: {ex.Message}"); } + + AppLogger.Info("CorrelatedLanes", + $"Comparison: refFrom={refFrom:o}, refTo={refTo:o}, shift={timeShift.TotalHours:F1}h, " + + $"cpuRows={refCpuTask.Result?.Count ?? 0}, waitRows={refWaitTask.Result?.Count ?? 0}"); + + if (refCpuTask.IsCompletedSuccessfully) + AddGhostLine(CpuChart, refCpuTask.Result + .Select(d => (d.SampleTime.Add(timeShift).ToOADate(), (double)d.SqlServerCpu)).ToList(), "#4FC3F7"); + + if (refWaitTask.IsCompletedSuccessfully) + AddGhostLine(WaitStatsChart, refWaitTask.Result + .Select(d => (d.CollectionTime.AddMinutes(utcOffset).Add(timeShift).ToOADate(), d.WaitTimeMsPerSecond)).ToList(), "#FFB74D"); + + if (refBlockingTask.IsCompletedSuccessfully) + { + var refBlocking = refBlockingTask.Result + .Select(d => (d.Time.AddMinutes(utcOffset).Add(timeShift).ToOADate(), (double)d.Count)).ToList(); + if (refBlocking.Count > 0) + AddGhostLine(BlockingChart, refBlocking, "#E57373"); + } + + if (refMemoryTask.IsCompletedSuccessfully) + AddGhostLine(MemoryChart, refMemoryTask.Result + .Select(d => (d.CollectionTime.AddMinutes(utcOffset).Add(timeShift).ToOADate(), d.BufferPoolMb)).ToList(), "#CE93D8"); + + if (refIoTask.IsCompletedSuccessfully) + { + var refIo = refIoTask.Result + .GroupBy(d => d.CollectionTime) + .OrderBy(g => g.Key) + .Select(g => (g.Key.AddMinutes(utcOffset).Add(timeShift).ToOADate(), g.Average(x => x.AvgReadLatencyMs))) + .ToList(); + AddGhostLine(FileIoChart, refIo, "#81C784"); + } + + // Register reference data with crosshair manager for tooltip + _crosshairManager?.SetComparisonLabel(ComparisonLabel(comparisonRange.Value, fromDate, hoursBack)); + } + _crosshairManager?.ReattachVLines(); SyncXAxes(hoursBack, fromDate, toDate, utcOffset); } @@ -141,7 +213,7 @@ public async Task RefreshAsync(int hoursBack, DateTime? fromDate, DateTime? toDa } private void UpdateBlockingLane(List<(double Time, double Value)> blockingData, - List<(double Time, double Value)> deadlockData) + List<(double Time, double Value)> deadlockData, BaselineBucket? baseline = null) { ClearChart(BlockingChart); ApplyTheme(BlockingChart); @@ -197,6 +269,24 @@ private void UpdateBlockingLane(List<(double Time, double Value)> blockingData, maxCount = Math.Max(maxCount, deadlockData.Max(d => d.Value)); } + // Baseline band for blocking + if (baseline != null && baseline.SampleCount > 0 && baseline.EffectiveStdDev > 0) + { + var upper = baseline.Mean + 2 * baseline.EffectiveStdDev; + var lower = Math.Max(0, baseline.Mean - 2 * baseline.EffectiveStdDev); + + _crosshairManager?.SetLaneBaseline(BlockingChart, lower, upper, isEventBased: true); + + var band = BlockingChart.Plot.Add.HorizontalSpan(lower, upper); + band.FillStyle.Color = ScottPlot.Color.FromHex("#E57373").WithAlpha(25); + band.LineStyle.Width = 0; + + var meanLine = BlockingChart.Plot.Add.HorizontalLine(baseline.Mean); + meanLine.Color = ScottPlot.Color.FromHex("#E57373").WithAlpha(60); + meanLine.LinePattern = ScottPlot.LinePattern.Dashed; + meanLine.LineWidth = 1; + } + BlockingChart.Plot.Axes.DateTimeTicksBottom(); BlockingChart.Plot.Axes.Bottom.TickLabelStyle.IsVisible = false; ReapplyAxisColors(BlockingChart); @@ -212,7 +302,8 @@ private void UpdateBlockingLane(List<(double Time, double Value)> blockingData, private void UpdateLane(ScottPlot.WPF.WpfPlot chart, string title, List<(double Time, double Value)> data, string colorHex, - double? yMin = null, double? yMax = null) + double? yMin = null, double? yMax = null, BaselineBucket? baseline = null, + double minAnomalyValue = 0) { ClearChart(chart); ApplyTheme(chart); @@ -226,6 +317,43 @@ private void UpdateLane(ScottPlot.WPF.WpfPlot chart, string title, var times = data.Select(d => d.Time).ToArray(); var values = data.Select(d => d.Value).ToArray(); + // Render baseline band FIRST (behind the data line) + if (baseline != null && baseline.SampleCount > 0 && baseline.EffectiveStdDev > 0) + { + var upper = baseline.Mean + 2 * baseline.EffectiveStdDev; + var lower = Math.Max(0, baseline.Mean - 2 * baseline.EffectiveStdDev); + + _crosshairManager?.SetLaneBaseline(chart, lower, upper, minAnomalyValue); + + var band = chart.Plot.Add.HorizontalSpan(lower, upper); + band.FillStyle.Color = ScottPlot.Color.FromHex(colorHex).WithAlpha(25); + band.LineStyle.Width = 0; + + var meanLine = chart.Plot.Add.HorizontalLine(baseline.Mean); + meanLine.Color = ScottPlot.Color.FromHex(colorHex).WithAlpha(60); + meanLine.LinePattern = ScottPlot.LinePattern.Dashed; + meanLine.LineWidth = 1; + + // Highlight anomalous points (outside ± 2σ band AND above absolute minimum) + var anomalyIndices = new List(); + for (int i = 0; i < values.Length; i++) + { + if ((values[i] > upper && values[i] >= minAnomalyValue) || values[i] < lower) + anomalyIndices.Add(i); + } + + if (anomalyIndices.Count > 0) + { + var anomalyTimes = anomalyIndices.Select(i => times[i]).ToArray(); + var anomalyValues = anomalyIndices.Select(i => values[i]).ToArray(); + var anomalyScatter = chart.Plot.Add.Scatter(anomalyTimes, anomalyValues); + anomalyScatter.Color = ScottPlot.Color.FromHex("#FF5252"); + anomalyScatter.MarkerSize = 6; + anomalyScatter.MarkerShape = ScottPlot.MarkerShape.FilledCircle; + anomalyScatter.LineWidth = 0; + } + } + var scatter = chart.Plot.Add.Scatter(times, values); scatter.Color = ScottPlot.Color.FromHex(colorHex); scatter.MarkerSize = 0; @@ -289,6 +417,38 @@ private void SyncXAxes(int hoursBack, DateTime? fromDate, DateTime? toDate, doub } } + /// + /// Renders a semi-transparent dashed ghost line for comparison overlay. + /// + private static void AddGhostLine(ScottPlot.WPF.WpfPlot chart, + List<(double Time, double Value)> data, string colorHex) + { + if (data.Count == 0) return; + + var times = data.Select(d => d.Time).ToArray(); + var values = data.Select(d => d.Value).ToArray(); + + var scatter = chart.Plot.Add.Scatter(times, values); + // White-ish ghost line — distinct from the primary colored line + scatter.Color = ScottPlot.Colors.White.WithAlpha(140); + scatter.MarkerSize = 0; + scatter.LineWidth = 1.5f; + scatter.LinePattern = ScottPlot.LinePattern.Dashed; + + chart.Refresh(); + } + + private static string ComparisonLabel((DateTime From, DateTime To) range, + DateTime? fromDate, int hoursBack) + { + var currentStart = fromDate ?? DateTime.UtcNow.AddHours(-hoursBack); + var daysBack = (currentStart - range.From).TotalDays; + + if (Math.Abs(daysBack - 1) < 0.5) return "yesterday"; + if (Math.Abs(daysBack - 7) < 0.5) return "last week"; + return $"{daysBack:N0}d ago"; + } + private static void ClearChart(ScottPlot.WPF.WpfPlot chart) { chart.Reset(); @@ -332,21 +492,21 @@ private static void ApplyTheme(ScottPlot.WPF.WpfPlot chart) { figureBackground = ScottPlot.Color.FromHex("#EEF4FA"); dataBackground = ScottPlot.Color.FromHex("#DAE6F0"); - textColor = ScottPlot.Color.FromHex("#364D61"); + textColor = ScottPlot.Color.FromHex("#1A2A3A"); gridColor = ScottPlot.Color.FromHex("#A8BDD0").WithAlpha(120); } else if (ThemeManager.HasLightBackground) { figureBackground = ScottPlot.Color.FromHex("#FFFFFF"); dataBackground = ScottPlot.Color.FromHex("#F5F7FA"); - textColor = ScottPlot.Color.FromHex("#4A5568"); + textColor = ScottPlot.Color.FromHex("#1A1D23"); gridColor = ScottPlot.Colors.Black.WithAlpha(20); } else { figureBackground = ScottPlot.Color.FromHex("#22252b"); dataBackground = ScottPlot.Color.FromHex("#111217"); - textColor = ScottPlot.Color.FromHex("#9DA5B4"); + textColor = ScottPlot.Color.FromHex("#E4E6EB"); gridColor = ScottPlot.Colors.White.WithAlpha(40); } @@ -368,10 +528,10 @@ private static void ApplyTheme(ScottPlot.WPF.WpfPlot chart) private static void ReapplyAxisColors(ScottPlot.WPF.WpfPlot chart) { var textColor = ThemeManager.CurrentTheme == "CoolBreeze" - ? ScottPlot.Color.FromHex("#364D61") + ? ScottPlot.Color.FromHex("#1A2A3A") : ThemeManager.HasLightBackground - ? ScottPlot.Color.FromHex("#4A5568") - : ScottPlot.Color.FromHex("#9DA5B4"); + ? ScottPlot.Color.FromHex("#1A1D23") + : ScottPlot.Color.FromHex("#E4E6EB"); chart.Plot.Axes.Bottom.TickLabelStyle.ForeColor = textColor; chart.Plot.Axes.Left.TickLabelStyle.ForeColor = textColor; chart.Plot.Axes.Bottom.Label.ForeColor = textColor; diff --git a/Lite/Controls/ServerTab.xaml b/Lite/Controls/ServerTab.xaml index 37e079c1..6202f649 100644 --- a/Lite/Controls/ServerTab.xaml +++ b/Lite/Controls/ServerTab.xaml @@ -103,6 +103,16 @@ SelectionChanged="CustomTimeCombo_Changed" ToolTip="Minute"/>