From 4f3722b54cd1abdd8d2fa7668cb7aa955848f1e4 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Sat, 21 Feb 2026 15:29:26 -0500 Subject: [PATCH 01/53] Fix DuckDB file corruption during maintenance, overhaul maintenance system (#218) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: archival DELETEs + CHECKPOINT reorganized/truncated the DuckDB file while UI connections had stale file offsets, causing "Reached the end of the file" crashes after ~1 hour of uptime. Fix: ReaderWriterLockSlim coordinates UI readers with maintenance writers. UI queries hold read locks (unlimited concurrency) via LockedConnection wrapper. CHECKPOINT and archive DELETEs hold exclusive write locks (<1s duration). Maintenance overhaul: - Replaced compaction cycle (File.Replace race condition) with archive-all-and-reset - Size-based trigger at 512MB archives ALL data to parquet, deletes .duckdb, reinits - Tested: 515MB → 19MB in <1s, 65K rows to ~400KB ZSTD parquet per cycle - Per-reset timestamped parquet naming (20260221_1925_table.parquet) eliminates merge logic and simplifies 90-day retention (delete by date prefix) - RetentionService handles both new timestamped and legacy monthly formats - Wired AppLoggerAdapter for all 8 services that had null loggers - Removed dead CompactAsync method (~140 lines) Tested with 4 SQL Servers under HammerDB load, 3 successful archive+reset cycles, zero errors, archive views correctly serve data from all parquet file sets. Co-Authored-By: Claude Opus 4.6 --- Lite/Database/DuckDbInitializer.cs | 199 ++++++------------- Lite/Database/LockedConnection.cs | 53 +++++ Lite/MainWindow.xaml.cs | 19 +- Lite/Services/AppLoggerAdapter.cs | 50 +++++ Lite/Services/ArchiveService.cs | 156 ++++++++++----- Lite/Services/CollectionBackgroundService.cs | 72 +++---- Lite/Services/LocalDataService.cs | 21 +- Lite/Services/RemoteCollectorService.cs | 5 +- Lite/Services/RetentionService.cs | 24 ++- 9 files changed, 353 insertions(+), 246 deletions(-) create mode 100644 Lite/Database/LockedConnection.cs create mode 100644 Lite/Services/AppLoggerAdapter.cs diff --git a/Lite/Database/DuckDbInitializer.cs b/Lite/Database/DuckDbInitializer.cs index 6f912df8..103555ea 100644 --- a/Lite/Database/DuckDbInitializer.cs +++ b/Lite/Database/DuckDbInitializer.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Threading; using System.Threading.Tasks; using DuckDB.NET.Data; using Microsoft.Extensions.Logging; @@ -16,6 +17,54 @@ public class DuckDbInitializer private readonly string _databasePath; private readonly ILogger? _logger; + /// + /// Coordinates UI readers with maintenance writers (CHECKPOINT, archive DELETEs, compaction). + /// Read locks allow unlimited concurrent UI queries. Write locks are exclusive and wait + /// for all readers to finish before proceeding. + /// + private static readonly ReaderWriterLockSlim s_dbLock = new(LockRecursionPolicy.NoRecursion); + + /// + /// Acquires a read lock on the database. Multiple readers can hold this concurrently. + /// Dispose the returned object to release the lock. + /// + public IDisposable AcquireReadLock() + { + s_dbLock.EnterReadLock(); + return new LockReleaser(s_dbLock, write: false); + } + + /// + /// Acquires an exclusive write lock on the database. Blocks until all readers finish. + /// Dispose the returned object to release the lock. + /// + public IDisposable AcquireWriteLock() + { + s_dbLock.EnterWriteLock(); + return new LockReleaser(s_dbLock, write: true); + } + + private sealed class LockReleaser : IDisposable + { + private readonly ReaderWriterLockSlim _lock; + private readonly bool _write; + private bool _disposed; + + public LockReleaser(ReaderWriterLockSlim rwLock, bool write) + { + _lock = rwLock; + _write = write; + } + + public void Dispose() + { + if (_disposed) return; + _disposed = true; + if (_write) _lock.ExitWriteLock(); + else _lock.ExitReadLock(); + } + } + /// /// Current schema version. Increment this when schema changes require table rebuilds. /// @@ -491,6 +540,7 @@ public async Task CreateArchiveViewsAsync() /// public async Task CheckpointAsync() { + using var writeLock = AcquireWriteLock(); try { using var connection = CreateConnection(); @@ -546,147 +596,24 @@ public double GetDatabaseSizeMb() } /// - /// Compacts the database by exporting all tables to a fresh file and swapping. - /// DuckDB VACUUM does not reclaim space from append-fragmented files — only - /// export/reimport eliminates bloat. Typically takes 2-5 seconds for a 300MB database. + /// Deletes the database and WAL files, then reinitializes with fresh empty tables + /// and archive views pointing at the parquet files. + /// Acquires its own write lock — caller must NOT already hold the lock. /// - /// True if compaction was performed, false if skipped or failed. - public async Task CompactAsync() + public async Task ResetDatabaseAsync() { - if (!DatabaseExists()) - { - return false; - } - - var sizeBefore = GetDatabaseSizeMb(); - var tempPath = _databasePath + ".compact"; - var backupPath = _databasePath + ".precompact"; - - _logger?.LogInformation("Starting database compaction ({SizeMb:F0} MB)", sizeBefore); - - try - { - /* Export all data to a fresh database via ATTACH + CREATE TABLE AS */ - if (File.Exists(tempPath)) File.Delete(tempPath); - - using (var connection = CreateConnection()) - { - await connection.OpenAsync(); - - /* Checkpoint first to flush WAL */ - using (var cmd = connection.CreateCommand()) - { - cmd.CommandText = "CHECKPOINT"; - await cmd.ExecuteNonQueryAsync(); - } - - /* Attach the new database and copy all tables */ - using (var cmd = connection.CreateCommand()) - { - cmd.CommandText = $"ATTACH '{tempPath.Replace("\\", "/")}' AS compact_db"; - await cmd.ExecuteNonQueryAsync(); - } - - /* Get all table names (exclude views) */ - var tableNames = new List(); - using (var cmd = connection.CreateCommand()) - { - cmd.CommandText = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'main' AND table_type = 'BASE TABLE'"; - using var reader = await cmd.ExecuteReaderAsync(); - while (await reader.ReadAsync()) - { - tableNames.Add(reader.GetString(0)); - } - } - - foreach (var table in tableNames) - { - using var cmd = connection.CreateCommand(); - cmd.CommandText = $"CREATE TABLE compact_db.{table} AS SELECT * FROM main.{table}"; - await cmd.ExecuteNonQueryAsync(); - } - - using (var cmd = connection.CreateCommand()) - { - cmd.CommandText = "DETACH compact_db"; - await cmd.ExecuteNonQueryAsync(); - } - } - - /* Delete WAL files before swap — the old WAL belongs to the pre-compaction - database and would confuse the fresh compacted file on next open */ - var walPath = _databasePath + ".wal"; - if (File.Exists(walPath)) File.Delete(walPath); - - var tempWalPath = tempPath + ".wal"; - if (File.Exists(tempWalPath)) File.Delete(tempWalPath); - - /* Atomically replace the database file with the compacted version. - File.Replace swaps in a single OS operation, eliminating any window - where _databasePath doesn't exist (unlike two separate File.Move calls). - Retry briefly if a UI connection still has the file open. */ - if (File.Exists(backupPath)) File.Delete(backupPath); - - const int maxSwapAttempts = 3; - for (int attempt = 1; attempt <= maxSwapAttempts; attempt++) - { - try - { - File.Replace(tempPath, _databasePath, backupPath); - break; - } - catch (IOException) when (attempt < maxSwapAttempts) - { - _logger?.LogDebug("Compaction file swap attempt {Attempt}/{Max} failed (file in use), retrying in 500ms", - attempt, maxSwapAttempts); - await Task.Delay(500); - } - } - - /* Recreate indexes and views on the fresh database */ - using (var connection = CreateConnection()) - { - await connection.OpenAsync(); - - foreach (var indexStatement in Schema.GetAllIndexStatements()) - { - try - { - using var cmd = connection.CreateCommand(); - cmd.CommandText = indexStatement; - await cmd.ExecuteNonQueryAsync(); - } - catch { /* Index may already exist from CREATE TABLE AS */ } - } - } - - await CreateArchiveViewsAsync(); - - /* Clean up backup */ - File.Delete(backupPath); + using var writeLock = AcquireWriteLock(); - var sizeAfter = GetDatabaseSizeMb(); - _logger?.LogInformation("Compaction complete: {Before:F0} MB -> {After:F0} MB ({Saved:F0} MB reclaimed)", - sizeBefore, sizeAfter, sizeBefore - sizeAfter); - - return true; - } - catch (Exception ex) - { - _logger?.LogError(ex, "Database compaction failed"); - - /* Restore from backup if the primary file was moved */ - if (!File.Exists(_databasePath) && File.Exists(backupPath)) - { - File.Move(backupPath, _databasePath); - _logger?.LogInformation("Restored database from pre-compaction backup"); - } + if (File.Exists(_databasePath)) + File.Delete(_databasePath); - /* Clean up temp file */ - if (File.Exists(tempPath)) File.Delete(tempPath); + var walPath = _databasePath + ".wal"; + if (File.Exists(walPath)) + File.Delete(walPath); - return false; - } + _logger?.LogInformation("Database files deleted, reinitializing"); + await InitializeAsync(); } + } diff --git a/Lite/Database/LockedConnection.cs b/Lite/Database/LockedConnection.cs new file mode 100644 index 00000000..44e67769 --- /dev/null +++ b/Lite/Database/LockedConnection.cs @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2026 Erik Darling, Darling Data LLC + * + * This file is part of the SQL Server Performance Monitor Lite. + * + * Licensed under the MIT License. See LICENSE file in the project root for full license information. + */ + +using System; +using System.Threading.Tasks; +using DuckDB.NET.Data; + +namespace PerformanceMonitorLite.Database; + +/// +/// Wraps a DuckDBConnection with a read lock that is released when the connection is disposed. +/// Ensures UI reads hold the lock for their entire duration, preventing CHECKPOINT or compaction +/// from reorganizing the database file while a reader has stale file offsets. +/// +public sealed class LockedConnection : IDisposable, IAsyncDisposable +{ + private readonly DuckDBConnection _connection; + private readonly IDisposable _readLock; + private bool _disposed; + + public LockedConnection(DuckDBConnection connection, IDisposable readLock) + { + _connection = connection; + _readLock = readLock; + } + + /// + /// Creates a command on the underlying connection. + /// This is the only method callers need — all 50 call sites use CreateCommand() exclusively. + /// + public DuckDBCommand CreateCommand() => _connection.CreateCommand(); + + public void Dispose() + { + if (_disposed) return; + _disposed = true; + _connection.Dispose(); + _readLock.Dispose(); + } + + public async ValueTask DisposeAsync() + { + if (_disposed) return; + _disposed = true; + await _connection.DisposeAsync(); + _readLock.Dispose(); + } +} diff --git a/Lite/MainWindow.xaml.cs b/Lite/MainWindow.xaml.cs index e1aed234..e7ef039f 100644 --- a/Lite/MainWindow.xaml.cs +++ b/Lite/MainWindow.xaml.cs @@ -62,10 +62,10 @@ public MainWindow() { InitializeComponent(); - // Initialize services - _databaseInitializer = new DuckDbInitializer(App.DatabasePath); + // Initialize services (with loggers wired to AppLogger) + _databaseInitializer = new DuckDbInitializer(App.DatabasePath, new AppLoggerAdapter()); _emailAlertService = new EmailAlertService(_databaseInitializer); - _serverManager = new ServerManager(App.ConfigDirectory); + _serverManager = new ServerManager(App.ConfigDirectory, logger: new AppLoggerAdapter()); _scheduleManager = new ScheduleManager(App.ConfigDirectory); // Status bar update timer @@ -96,16 +96,19 @@ private async void MainWindow_Loaded(object sender, RoutedEventArgs e) // Initialize the DuckDB database await _databaseInitializer.InitializeAsync(); - // Initialize the collection engine + // Initialize the collection engine (with loggers wired to AppLogger) _collectorService = new RemoteCollectorService( _databaseInitializer, _serverManager, - _scheduleManager); + _scheduleManager, + new AppLoggerAdapter()); - var archiveService = new ArchiveService(_databaseInitializer, App.ArchiveDirectory); - var retentionService = new RetentionService(App.ArchiveDirectory); + var archiveService = new ArchiveService(_databaseInitializer, App.ArchiveDirectory, new AppLoggerAdapter()); + var retentionService = new RetentionService(App.ArchiveDirectory, new AppLoggerAdapter()); - _backgroundService = new CollectionBackgroundService(_collectorService, _databaseInitializer, archiveService, retentionService, _serverManager); + _backgroundService = new CollectionBackgroundService( + _collectorService, _databaseInitializer, archiveService, retentionService, _serverManager, + new AppLoggerAdapter()); // Start background collection _backgroundCts = new CancellationTokenSource(); diff --git a/Lite/Services/AppLoggerAdapter.cs b/Lite/Services/AppLoggerAdapter.cs new file mode 100644 index 00000000..2bd1c79d --- /dev/null +++ b/Lite/Services/AppLoggerAdapter.cs @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2026 Erik Darling, Darling Data LLC + * + * This file is part of the SQL Server Performance Monitor Lite. + * + * Licensed under the MIT License. See LICENSE file in the project root for full license information. + */ + +using System; +using Microsoft.Extensions.Logging; + +namespace PerformanceMonitorLite.Services; + +/// +/// Bridges the static AppLogger to the ILogger<T> interface so services +/// that accept ILogger<T> can log to the same file as the rest of the app. +/// +public sealed class AppLoggerAdapter : ILogger +{ + private readonly string _categoryName = typeof(T).Name; + + public IDisposable? BeginScope(TState state) where TState : notnull => null; + + public bool IsEnabled(LogLevel logLevel) => logLevel >= LogLevel.Debug; + + public void Log(LogLevel logLevel, EventId eventId, TState state, Exception? exception, Func formatter) + { + if (!IsEnabled(logLevel)) return; + + var message = formatter(state, exception); + + switch (logLevel) + { + case LogLevel.Trace: + case LogLevel.Debug: + AppLogger.Debug(_categoryName, message); + break; + case LogLevel.Information: + AppLogger.Info(_categoryName, message); + break; + case LogLevel.Warning: + AppLogger.Warn(_categoryName, message); + break; + case LogLevel.Error: + case LogLevel.Critical: + AppLogger.Error(_categoryName, message, exception); + break; + } + } +} diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs index 5b71fac1..34ddb761 100644 --- a/Lite/Services/ArchiveService.cs +++ b/Lite/Services/ArchiveService.cs @@ -58,10 +58,12 @@ public ArchiveService(DuckDbInitializer duckDb, string archivePath, ILogger - /// Archives data older than the specified number of days to Parquet files, + /// Archives data older than the specified cutoff to Parquet files, /// then deletes the archived rows from the hot tables. + /// Use hotDataDays for scheduled archival (default 7), or hotDataHours + /// for size-triggered archival when the database is under space pressure. /// - public async Task ArchiveOldDataAsync(int hotDataDays = 7) + public async Task ArchiveOldDataAsync(int hotDataDays = 7, int? hotDataHours = null) { if (!await s_archiveLock.WaitAsync(TimeSpan.Zero)) { @@ -71,65 +73,55 @@ public async Task ArchiveOldDataAsync(int hotDataDays = 7) try { - var cutoffDate = DateTime.UtcNow.AddDays(-hotDataDays); - var archiveMonth = cutoffDate.ToString("yyyy-MM"); + var cutoffDate = hotDataHours.HasValue + ? DateTime.UtcNow.AddHours(-hotDataHours.Value) + : DateTime.UtcNow.AddDays(-hotDataDays); + var timestamp = DateTime.UtcNow.ToString("yyyyMMdd_HHmm"); - _logger?.LogInformation("Archiving data older than {CutoffDate} to Parquet (month: {Month})", cutoffDate, archiveMonth); + _logger?.LogInformation("Archiving data older than {CutoffDate} to Parquet (prefix: {Timestamp})", cutoffDate, timestamp); - using var connection = _duckDb.CreateConnection(); - await connection.OpenAsync(); - - foreach (var (table, timeColumn) in ArchivableTables) + /* Write lock covers export + DELETE. The DELETEs modify table data, and the + next CHECKPOINT will reorganize the file — readers must not be mid-query + when that happens or they get "Reached the end of the file" errors. */ + using (_duckDb.AcquireWriteLock()) { - try + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + foreach (var (table, timeColumn) in ArchivableTables) { - /* Check if there are rows to archive */ - var rowCount = await GetRowCountBeforeCutoff(connection, table, timeColumn, cutoffDate); - if (rowCount == 0) + try { - continue; - } + /* Check if there are rows to archive */ + var rowCount = await GetRowCountBeforeCutoff(connection, table, timeColumn, cutoffDate); + if (rowCount == 0) + { + continue; + } + + /* Export to a uniquely-named parquet file — no merging needed. + Each archival cycle produces a new file with a timestamp prefix. + Archive views use glob (*_table.parquet) to pick up all files. */ + var parquetPath = Path.Combine(_archivePath, $"{timestamp}_{table}.parquet") + .Replace("\\", "/"); - /* Export to Parquet (append mode - UNION if file exists) */ - var parquetPath = Path.Combine(_archivePath, $"{archiveMonth}_{table}.parquet") - .Replace("\\", "/"); + await ExportToParquet(connection, table, timeColumn, cutoffDate, parquetPath); - if (File.Exists(parquetPath)) - { - /* Append: write to temp, then UNION with existing */ - var tempPath = parquetPath + ".tmp"; - await ExportToParquet(connection, table, timeColumn, cutoffDate, tempPath); + /* Delete archived rows from hot table */ + using var deleteCmd = connection.CreateCommand(); + deleteCmd.CommandText = $"DELETE FROM {table} WHERE {timeColumn} < '{cutoffDate:yyyy-MM-dd HH:mm:ss}'"; + await deleteCmd.ExecuteNonQueryAsync(); - using var mergeCmd = connection.CreateCommand(); - mergeCmd.CommandText = $@" -COPY ( - SELECT * FROM read_parquet('{parquetPath}') - UNION ALL - SELECT * FROM read_parquet('{tempPath}') -) TO '{parquetPath}' (FORMAT PARQUET, COMPRESSION ZSTD)"; - await mergeCmd.ExecuteNonQueryAsync(); - - File.Delete(tempPath); + _logger?.LogInformation("Archived {Count} rows from {Table} to {Path}", rowCount, table, parquetPath); } - else + catch (Exception ex) { - await ExportToParquet(connection, table, timeColumn, cutoffDate, parquetPath); + _logger?.LogError(ex, "Failed to archive table {Table}", table); } - - /* Delete archived rows from hot table */ - using var deleteCmd = connection.CreateCommand(); - deleteCmd.CommandText = $"DELETE FROM {table} WHERE {timeColumn} < '{cutoffDate:yyyy-MM-dd HH:mm:ss}'"; - await deleteCmd.ExecuteNonQueryAsync(); - - _logger?.LogInformation("Archived {Count} rows from {Table} to {Path}", rowCount, table, parquetPath); - } - catch (Exception ex) - { - _logger?.LogError(ex, "Failed to archive table {Table}", table); } } - /* Refresh archive views so newly archived parquet files are queryable */ + /* Refresh archive views outside write lock — view creation is fast and safe */ await _duckDb.CreateArchiveViewsAsync(); } finally @@ -155,4 +147,74 @@ private static async Task ExportToParquet(DuckDBConnection connection, string ta ) TO '{filePath}' (FORMAT PARQUET, COMPRESSION ZSTD)"; await cmd.ExecuteNonQueryAsync(); } + + /// + /// Archives ALL data from every table to parquet, then deletes and reinitializes the database. + /// Called when the database exceeds the size threshold. Data remains queryable through archive views. + /// + public async Task ArchiveAllAndResetAsync() + { + if (!await s_archiveLock.WaitAsync(TimeSpan.Zero)) + { + _logger?.LogDebug("Archive operation already in progress, skipping"); + return; + } + + try + { + var timestamp = DateTime.UtcNow.ToString("yyyyMMdd_HHmm"); + + _logger?.LogInformation("Archiving ALL data to Parquet (prefix: {Timestamp}) and resetting database", timestamp); + + /* Export everything under write lock */ + using (_duckDb.AcquireWriteLock()) + { + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + + foreach (var (table, _) in ArchivableTables) + { + try + { + /* Check row count */ + using var countCmd = connection.CreateCommand(); + countCmd.CommandText = $"SELECT COUNT(*) FROM {table}"; + var rowCount = Convert.ToInt64(await countCmd.ExecuteScalarAsync()); + if (rowCount == 0) continue; + + /* Export all rows to a uniquely-named parquet file. + No merging needed — each reset produces a new file. + Archive views use glob (*_table.parquet) to pick up all files. */ + var parquetPath = Path.Combine(_archivePath, $"{timestamp}_{table}.parquet") + .Replace("\\", "/"); + + using var exportCmd = connection.CreateCommand(); + exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{parquetPath}' (FORMAT PARQUET, COMPRESSION ZSTD)"; + await exportCmd.ExecuteNonQueryAsync(); + + _logger?.LogInformation("Archived {Count} rows from {Table}", rowCount, table); + } + catch (Exception ex) + { + _logger?.LogError(ex, "Failed to archive table {Table}", table); + } + } + } + + /* Nuke and reinitialize outside the using-connection scope so all handles are closed */ + _logger?.LogInformation("Deleting and reinitializing database"); + await _duckDb.ResetDatabaseAsync(); + + _logger?.LogInformation("Database reset complete — archive views now serve all historical data from Parquet"); + } + catch (Exception ex) + { + _logger?.LogError(ex, "Archive-all-and-reset failed"); + } + finally + { + s_archiveLock.Release(); + } + } + } diff --git a/Lite/Services/CollectionBackgroundService.cs b/Lite/Services/CollectionBackgroundService.cs index 1459c936..4641dd61 100644 --- a/Lite/Services/CollectionBackgroundService.cs +++ b/Lite/Services/CollectionBackgroundService.cs @@ -29,19 +29,19 @@ public class CollectionBackgroundService : BackgroundService private readonly ILogger? _logger; private static readonly TimeSpan CollectionInterval = TimeSpan.FromMinutes(1); - /* Start at UtcNow so maintenance tasks don't all fire on the very first cycle. - Archival runs after 1 hour, retention + compaction after 24 hours of uptime. */ + /* Start at UtcNow so maintenance tasks don't all fire on the very first cycle. */ private DateTime _lastArchiveTime = DateTime.UtcNow; private DateTime _lastRetentionTime = DateTime.UtcNow; - private DateTime _lastCompactionTime = DateTime.UtcNow; - /* Archive every hour, retention + compaction once per day */ + /* Archive every hour, retention once per day */ private static readonly TimeSpan ArchiveInterval = TimeSpan.FromHours(1); private static readonly TimeSpan RetentionInterval = TimeSpan.FromHours(24); - private static readonly TimeSpan CompactionInterval = TimeSpan.FromHours(24); - /* Warn if database exceeds this size between compaction cycles */ - private const double SizeWarningThresholdMb = 1024; + /* Size-based trigger — when the database exceeds this size, archive ALL data + to parquet and reset the database. INSERT performance degrades badly with + large tables (33x slower at 667MB in testing). Data remains fully queryable + through the archive views (hot UNION parquet). */ + private const double ArchiveSizeThresholdMb = 512; public bool IsPaused { get; set; } public DateTime? LastCollectionTime { get; private set; } @@ -109,14 +109,11 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken) IsCollecting = false; } - /* Periodic archival */ + /* Periodic archival (time-based or size-based) */ await RunArchivalIfDueAsync(); /* Periodic retention cleanup */ RunRetentionIfDue(); - - /* Periodic database compaction to prevent bloat */ - await RunCompactionIfDueAsync(); } try @@ -134,14 +131,31 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken) private async Task RunArchivalIfDueAsync() { - if (_archiveService == null || DateTime.UtcNow - _lastArchiveTime < ArchiveInterval) + if (_archiveService == null) + { + return; + } + + var timeDue = DateTime.UtcNow - _lastArchiveTime >= ArchiveInterval; + var sizeDue = _duckDb != null && _duckDb.GetDatabaseSizeMb() >= ArchiveSizeThresholdMb; + + if (!timeDue && !sizeDue) { return; } try { - await _archiveService.ArchiveOldDataAsync(hotDataDays: 7); + if (sizeDue) + { + _logger?.LogInformation("Database size ({SizeMb:F0} MB) exceeds {Threshold} MB — archiving all data and resetting database", + _duckDb!.GetDatabaseSizeMb(), ArchiveSizeThresholdMb); + await _archiveService.ArchiveAllAndResetAsync(); + } + else + { + await _archiveService.ArchiveOldDataAsync(hotDataDays: 7); + } _lastArchiveTime = DateTime.UtcNow; } catch (Exception ex) @@ -168,36 +182,4 @@ private void RunRetentionIfDue() } } - private async Task RunCompactionIfDueAsync() - { - if (_duckDb == null || DateTime.UtcNow - _lastCompactionTime < CompactionInterval) - { - /* Size watchdog: warn if database is large even between compaction cycles */ - if (_duckDb != null) - { - var sizeMb = _duckDb.GetDatabaseSizeMb(); - if (sizeMb > SizeWarningThresholdMb) - { - _logger?.LogWarning("Database size is {SizeMb:F0} MB (threshold: {Threshold} MB) — compaction will run at next scheduled interval", - sizeMb, SizeWarningThresholdMb); - } - } - return; - } - - try - { - IsPaused = true; - await _duckDb.CompactAsync(); - _lastCompactionTime = DateTime.UtcNow; - } - catch (Exception ex) - { - _logger?.LogError(ex, "Database compaction failed"); - } - finally - { - IsPaused = false; - } - } } diff --git a/Lite/Services/LocalDataService.cs b/Lite/Services/LocalDataService.cs index 6e407acc..ca65309b 100644 --- a/Lite/Services/LocalDataService.cs +++ b/Lite/Services/LocalDataService.cs @@ -32,13 +32,24 @@ public LocalDataService(DuckDbInitializer duckDb) } /// - /// Creates and opens a DuckDB connection. + /// Creates and opens a DuckDB connection wrapped in a read lock. + /// The lock prevents CHECKPOINT and compaction from reorganizing the database file + /// while this connection is reading from it. /// - internal async Task OpenConnectionAsync() + internal async Task OpenConnectionAsync() { - var connection = _duckDb.CreateConnection(); - await connection.OpenAsync(); - return connection; + var readLock = _duckDb.AcquireReadLock(); + try + { + var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + return new LockedConnection(connection, readLock); + } + catch + { + readLock.Dispose(); + throw; + } } /// diff --git a/Lite/Services/RemoteCollectorService.cs b/Lite/Services/RemoteCollectorService.cs index b2d9ad39..565159ac 100644 --- a/Lite/Services/RemoteCollectorService.cs +++ b/Lite/Services/RemoteCollectorService.cs @@ -239,10 +239,11 @@ public async Task RunDueCollectorsAsync(CancellationToken cancellationToken = de await Task.WhenAll(serverTasks); /* Run CHECKPOINT here after all collector connections are closed. - This avoids opening a separate DuckDB instance that could conflict - with concurrent UI connections via OS file locks. */ + Write lock ensures no UI readers have stale file offsets when + CHECKPOINT reorganizes/truncates the database file. */ try { + using var writeLock = _duckDb.AcquireWriteLock(); using var conn = _duckDb.CreateConnection(); await conn.OpenAsync(cancellationToken); using var cmd = conn.CreateCommand(); diff --git a/Lite/Services/RetentionService.cs b/Lite/Services/RetentionService.cs index 12ba612f..743c8346 100644 --- a/Lite/Services/RetentionService.cs +++ b/Lite/Services/RetentionService.cs @@ -29,7 +29,9 @@ public RetentionService(string archivePath, ILogger? logger = /// /// Deletes Parquet files older than the specified retention period. - /// Files are named like "2025-01_wait_stats.parquet" where the prefix is the archive month. + /// Supports two naming formats: + /// - Timestamped: "20260221_1328_wait_stats.parquet" (yyyyMMdd prefix) + /// - Legacy monthly: "2026-02_wait_stats.parquet" (yyyy-MM prefix) /// public void CleanupOldArchives(int retentionDays = 90) { @@ -45,8 +47,24 @@ public void CleanupOldArchives(int retentionDays = 90) try { var fileName = Path.GetFileNameWithoutExtension(file); - /* Parse month from filename: "2025-01_wait_stats" -> "2025-01" */ - if (fileName.Length >= 7 && + + /* Try timestamped format first: "20260221_1328_wait_stats" -> "20260221" */ + if (fileName.Length >= 8 && + DateTime.TryParseExact( + fileName[..8], + "yyyyMMdd", + CultureInfo.InvariantCulture, + DateTimeStyles.None, + out var fileDate)) + { + if (fileDate < cutoffDate) + { + File.Delete(file); + _logger?.LogInformation("Deleted expired archive: {File}", file); + } + } + /* Fall back to legacy monthly format: "2026-02_wait_stats" -> "2026-02" */ + else if (fileName.Length >= 7 && DateTime.TryParseExact( fileName[..7], "yyyy-MM", From 1a8985bfb9dd40ec4ca7e023577c075754b2fb31 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Sun, 22 Feb 2026 09:29:11 -0500 Subject: [PATCH 02/53] v1.4.0 feature work: config tab consolidation, DB drop on remove, DuckDB-first plans, lock wait trend, clipboard fix, procedure stats parity Issues addressed: - #185: Consolidate Server/Database/Trace Flag config change tabs into single ConfigChangesContent UserControl with sub-tabs (Dashboard) - #201: Add option to drop PerformanceMonitor database when removing a server, with confirmation dialog (Dashboard) - #219: Try DuckDB cache first for query plan downloads before falling back to live server (Lite) - #141: Add lock waits trend chart to Blocking > Trends tab with multi-series per LCK% wait type (Lite) Additional fixes: - Fix clipboard copy: "Copy All Rows" and "Export to CSV" context menu items now extract TextBlock text from StackPanel column headers instead of showing "System.Windows.Controls.StackPanel" (both apps, 22 call sites) - Add missing columns to Lite procedure stats grid: Total Spills, Min/Max CPU, Min/Max Duration, Plan Handle, and Download Plan button with DuckDB-first lookup - Filter out zero-activity rows from query stats and procedure stats grids (HAVING clause on delta sums) - Make query stats grid plan download DuckDB-first (was live-server only) - Fix plan download button text resetting to "Save" instead of "Download" Co-Authored-By: Claude Opus 4.6 --- .../Controls/AlertsHistoryContent.xaml.cs | 4 +- Dashboard/Controls/ConfigChangesContent.xaml | 282 +++++++++++++ .../Controls/ConfigChangesContent.xaml.cs | 384 ++++++++++++++++++ .../Controls/CriticalIssuesContent.xaml.cs | 4 +- .../Controls/DailySummaryContent.xaml.cs | 4 +- Dashboard/Controls/MemoryContent.xaml.cs | 4 +- .../Controls/QueryPerformanceContent.xaml.cs | 4 +- .../Controls/ResourceMetricsContent.xaml.cs | 4 +- .../Controls/SystemEventsContent.xaml.cs | 4 +- .../Helpers/DataGridClipboardBehavior.cs | 74 ++++ Dashboard/MainWindow.xaml.cs | 32 +- Dashboard/ManageServersWindow.xaml.cs | 29 +- Dashboard/ProcedureHistoryWindow.xaml.cs | 4 +- Dashboard/QueryExecutionHistoryWindow.xaml.cs | 4 +- Dashboard/QueryStatsHistoryWindow.xaml.cs | 4 +- Dashboard/RemoveServerDialog.xaml | 39 ++ Dashboard/RemoveServerDialog.xaml.cs | 33 ++ Dashboard/ServerTab.xaml | 276 +------------ Dashboard/ServerTab.xaml.cs | 338 +-------------- Dashboard/Services/ServerManager.cs | 25 ++ Dashboard/Themes/DarkTheme.xaml | 4 +- Lite/Controls/ServerTab.xaml | 33 +- Lite/Controls/ServerTab.xaml.cs | 165 +++++++- Lite/Helpers/DataGridClipboardBehavior.cs | 74 ++++ Lite/Services/LocalDataService.Blocking.cs | 57 +++ Lite/Services/LocalDataService.QueryStats.cs | 48 +++ Lite/Themes/DarkTheme.xaml | 4 +- Lite/Windows/QueryStatsHistoryWindow.xaml.cs | 30 +- README.md | 2 +- 29 files changed, 1315 insertions(+), 654 deletions(-) create mode 100644 Dashboard/Controls/ConfigChangesContent.xaml create mode 100644 Dashboard/Controls/ConfigChangesContent.xaml.cs create mode 100644 Dashboard/Helpers/DataGridClipboardBehavior.cs create mode 100644 Dashboard/RemoveServerDialog.xaml create mode 100644 Dashboard/RemoveServerDialog.xaml.cs create mode 100644 Lite/Helpers/DataGridClipboardBehavior.cs diff --git a/Dashboard/Controls/AlertsHistoryContent.xaml.cs b/Dashboard/Controls/AlertsHistoryContent.xaml.cs index 132cb1d8..ef85d722 100644 --- a/Dashboard/Controls/AlertsHistoryContent.xaml.cs +++ b/Dashboard/Controls/AlertsHistoryContent.xaml.cs @@ -373,7 +373,7 @@ private void CopyAllRows_Click(object sender, RoutedEventArgs e) var sb = new StringBuilder(); var headers = dataGrid.Columns .OfType() - .Select(c => TabHelpers.GetColumnHeader(c)) + .Select(c => Helpers.DataGridClipboardBehavior.GetHeaderText(c)) .ToList(); sb.AppendLine(string.Join("\t", headers)); @@ -406,7 +406,7 @@ private void ExportToCsv_Click(object sender, RoutedEventArgs e) var sb = new StringBuilder(); var headers = dataGrid.Columns .OfType() - .Select(c => TabHelpers.EscapeCsvField(TabHelpers.GetColumnHeader(c))) + .Select(c => TabHelpers.EscapeCsvField(Helpers.DataGridClipboardBehavior.GetHeaderText(c))) .ToList(); sb.AppendLine(string.Join(",", headers)); diff --git a/Dashboard/Controls/ConfigChangesContent.xaml b/Dashboard/Controls/ConfigChangesContent.xaml new file mode 100644 index 00000000..e5aeb648 --- /dev/null +++ b/Dashboard/Controls/ConfigChangesContent.xaml @@ -0,0 +1,282 @@ + + + + + + + + + + +