From baf2db6888fafbd7c2053423869a6b5ee700811f Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Mon, 4 May 2026 20:07:59 -0400 Subject: [PATCH 01/27] CI: bump GitHub Actions to Node 24-compatible majors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GitHub will force JS actions to Node 24 on June 2nd, 2026, and remove Node 20 from runners on September 16th, 2026. Bumps the four actions called out in the runner deprecation warning: - actions/checkout v4 → v5 - actions/setup-dotnet v4 → v5 - actions/upload-artifact v4 → v6 - signpath/github-action-submit-signing-request v1 → v2 Matches the bump PerformanceStudio just made for the same reason. dorny/paths-filter@v3 not in the warning, left as-is. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/build.yml | 16 ++++++++-------- .github/workflows/check-version-bump.yml | 4 ++-- .github/workflows/nightly.yml | 6 +++--- .github/workflows/sql-validation.yml | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3c5b35c9..7b4eb8a4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,7 +18,7 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Detect changed paths id: filter @@ -46,7 +46,7 @@ jobs: - 'upgrades/**' - name: Setup .NET 8.0 - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v5 with: dotnet-version: 8.0.x cache: true @@ -132,7 +132,7 @@ jobs: - name: Upload Dashboard for signing if: github.event_name == 'release' id: upload-dashboard - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: Dashboard-unsigned path: publish/Dashboard/ @@ -140,7 +140,7 @@ jobs: - name: Upload Lite for signing if: github.event_name == 'release' id: upload-lite - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: Lite-unsigned path: publish/Lite/ @@ -148,14 +148,14 @@ jobs: - name: Upload Installer for signing if: github.event_name == 'release' id: upload-installer - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: Installer-unsigned path: publish/Installer/ - name: Sign Dashboard if: github.event_name == 'release' - uses: signpath/github-action-submit-signing-request@v1 + uses: signpath/github-action-submit-signing-request@v2 with: api-token: '${{ secrets.SIGNPATH_API_TOKEN }}' organization-id: '7969f8b6-d946-4a74-9bac-a55856d8b8e0' @@ -168,7 +168,7 @@ jobs: - name: Sign Lite if: github.event_name == 'release' - uses: signpath/github-action-submit-signing-request@v1 + uses: signpath/github-action-submit-signing-request@v2 with: api-token: '${{ secrets.SIGNPATH_API_TOKEN }}' organization-id: '7969f8b6-d946-4a74-9bac-a55856d8b8e0' @@ -181,7 +181,7 @@ jobs: - name: Sign Installer if: github.event_name == 'release' - uses: signpath/github-action-submit-signing-request@v1 + uses: signpath/github-action-submit-signing-request@v2 with: api-token: '${{ secrets.SIGNPATH_API_TOKEN }}' organization-id: '7969f8b6-d946-4a74-9bac-a55856d8b8e0' diff --git a/.github/workflows/check-version-bump.yml b/.github/workflows/check-version-bump.yml index 19bca8d5..680664f7 100644 --- a/.github/workflows/check-version-bump.yml +++ b/.github/workflows/check-version-bump.yml @@ -10,7 +10,7 @@ jobs: steps: - name: Checkout PR branch - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Get PR version id: pr @@ -21,7 +21,7 @@ jobs: Write-Host "PR version: $version" - name: Checkout main - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: ref: main path: main-branch diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index ef0ef391..4263afdf 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -15,7 +15,7 @@ jobs: outputs: has_changes: ${{ steps.check.outputs.has_changes }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: ref: dev fetch-depth: 0 @@ -38,12 +38,12 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: ref: dev - name: Setup .NET 8.0 - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v5 with: dotnet-version: 8.0.x cache: true diff --git a/.github/workflows/sql-validation.yml b/.github/workflows/sql-validation.yml index e4f266d3..84c817e5 100644 --- a/.github/workflows/sql-validation.yml +++ b/.github/workflows/sql-validation.yml @@ -41,7 +41,7 @@ jobs: --health-retries 15 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install sqlcmd run: | From 6d389de64c3142dfc40bd7f4a55c7f50ebe67bf9 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Tue, 5 May 2026 11:20:15 -0400 Subject: [PATCH 02/27] Honor collector_database_exclusions in trace collectors (#887 follow-up) PR #905 wired config.collector_database_exclusions into the 8 collectors that iterate sys.databases, but missed default_trace_collector and trace_analysis_collector because they filter trace events by DatabaseID/DatabaseName rather than looping sys.databases. - 29_collect_default_trace.sql: NOT EXISTS against config.collector_database_exclusions joined to sys.databases to translate the exclusion list (by name) into ids matching ft.DatabaseID. - 31_collect_trace_analysis.sql: NOT EXISTS directly on trc.DatabaseName (trace exposes the name here). Co-Authored-By: Claude Opus 4.7 (1M context) --- install/29_collect_default_trace.sql | 9 +++++++++ install/31_collect_trace_analysis.sql | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/install/29_collect_default_trace.sql b/install/29_collect_default_trace.sql index 9760fab6..b23f8b5e 100644 --- a/install/29_collect_default_trace.sql +++ b/install/29_collect_default_trace.sql @@ -369,6 +369,15 @@ BEGIN AND ft.StartTime >= @cutoff_time AND ISNULL(ft.DatabaseID, 0) NOT IN (DB_ID(N'PerformanceMonitor'), 1, 3, 4) AND ISNULL(ft.DatabaseID, 0) < 32761 /*exclude contained AG system databases*/ + AND NOT EXISTS + ( + SELECT + 1/0 + FROM config.collector_database_exclusions AS e + JOIN sys.databases AS d + ON d.name = e.database_name + WHERE d.database_id = ISNULL(ft.DatabaseID, 0) + ) /* Filter for useful system events, excluding login failures */ diff --git a/install/31_collect_trace_analysis.sql b/install/31_collect_trace_analysis.sql index 58b998ec..f934b32e 100644 --- a/install/31_collect_trace_analysis.sql +++ b/install/31_collect_trace_analysis.sql @@ -284,6 +284,13 @@ BEGIN AND trc.DatabaseName NOT IN (N'master', N'msdb', N'model', N'tempdb', N'PerformanceMonitor') AND trc.DatabaseName NOT LIKE N'%[_]master' /*exclude contained AG system databases*/ AND trc.DatabaseName NOT LIKE N'%[_]msdb' /*exclude contained AG system databases*/ + AND NOT EXISTS + ( + SELECT + 1/0 + FROM config.collector_database_exclusions AS e + WHERE e.database_name = trc.DatabaseName + ) ORDER BY trc.StartTime DESC; From 5d83e46684c9a2d383aff1f6ed0f31edc4130a86 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Tue, 5 May 2026 15:44:46 -0400 Subject: [PATCH 03/27] Lite: fix compaction OOM by setting DuckDB temp_directory (#933) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The in-memory DuckDB connections used for parquet compaction had a 4 GB memory_limit pragma but no temp_directory, so the cap acted as a hard wall — DuckDB had nowhere to spill and OOM'd the moment it was hit. Co-locate the spill dir with the archive folder so the writes land on the same volume as the parquet files. Verified end-to-end: 4-server HammerDB load, second 512 MB reset triggered ArchiveAllAndResetAsync, all 21 groups went through the multi-file pair-merge path with two sources each, completed in ~3.5s with no OOM. Co-Authored-By: Claude Opus 4.7 (1M context) --- Lite/Services/ArchiveService.cs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs index d7789e5d..ec691210 100644 --- a/Lite/Services/ArchiveService.cs +++ b/Lite/Services/ArchiveService.cs @@ -322,6 +322,14 @@ Each group gets its own DuckDB connection so memory is fully released between gr var totalMerged = 0; var totalRemoved = 0; + /* Spill directory for the in-memory compaction connections. Without this, + the memory_limit pragma is a hard wall — DuckDB has nowhere to spill and + OOMs the moment the cap is hit. Co-locating with the archive keeps the + write on the same volume the parquet files already live on. */ + var spillDir = Path.Combine(_archivePath, "duckdb_tmp"); + Directory.CreateDirectory(spillDir); + var spillDirSql = spillDir.Replace("\\", "/"); + foreach (var ((month, table), files) in groups) { /* If there's exactly one file and it's already in monthly format, skip */ @@ -376,7 +384,7 @@ Each group gets its own DuckDB connection so memory is fully released between gr con.Open(); using (var pragma = con.CreateCommand()) { - pragma.CommandText = "SET memory_limit = '4GB'; SET preserve_insertion_order = false;"; + pragma.CommandText = $"SET memory_limit = '4GB'; SET preserve_insertion_order = false; SET temp_directory = '{EscapeSqlPath(spillDirSql)}';"; pragma.ExecuteNonQuery(); } @@ -407,7 +415,7 @@ Sort smallest-first so early merges are cheap. */ con.Open(); using (var pragma = con.CreateCommand()) { - pragma.CommandText = "SET memory_limit = '4GB'; SET preserve_insertion_order = false;"; + pragma.CommandText = $"SET memory_limit = '4GB'; SET preserve_insertion_order = false; SET temp_directory = '{EscapeSqlPath(spillDirSql)}';"; pragma.ExecuteNonQuery(); } From 069398363c737f18e9dfa5df42316026cd5540f2 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Wed, 6 May 2026 05:33:16 -0400 Subject: [PATCH 04/27] Re-run installation_history widen for pre-v2.7.0 no-op upgrades (#828) (#936) Servers that crossed 2.4 -> 2.5 before PR #828's USE-statement fix shipped in v2.7.0 ran the widen script as a no-op against master and advanced their installer_version past 2.5. The upgrade filter (ToVersion > current) then permanently skipped the now-fixed script, leaving sql_server_version and sql_server_edition stuck at nvarchar(255) -- which truncates SQL 2022+ @@VERSION strings (~260 chars) on every install attempt. Re-applies the same idempotent widen in 2.10.0-to-2.11.0 so any server upgrading to the next release picks it up. The IF EXISTS guards on max_length = 510 mean already-widened servers skip both ALTERs. Co-authored-by: Claude Opus 4.7 (1M context) --- .../01_rewiden_version_columns.sql | 68 +++++++++++++++++++ upgrades/2.10.0-to-2.11.0/upgrade.txt | 1 + 2 files changed, 69 insertions(+) create mode 100644 upgrades/2.10.0-to-2.11.0/01_rewiden_version_columns.sql create mode 100644 upgrades/2.10.0-to-2.11.0/upgrade.txt diff --git a/upgrades/2.10.0-to-2.11.0/01_rewiden_version_columns.sql b/upgrades/2.10.0-to-2.11.0/01_rewiden_version_columns.sql new file mode 100644 index 00000000..107d6e2b --- /dev/null +++ b/upgrades/2.10.0-to-2.11.0/01_rewiden_version_columns.sql @@ -0,0 +1,68 @@ +/* +Re-run installation_history version-column widening from the 2.4.0-to-2.5.0 upgrade. + +Scope (release-notes guidance — keep this quiet): + Servers that crossed 2.4 -> 2.5 BEFORE PR #828's fix shipped in v2.7.0 ran the + widen script as a no-op (it executed against master, not PerformanceMonitor) + and then advanced their installer_version past 2.5. The upgrade filter only + selects folders where ToVersion > currentVersion, so the now-fixed script is + never re-applied on those servers and their sql_server_version / + sql_server_edition columns stay at nvarchar(255) -- which truncates SQL 2022+ + @@VERSION strings (~260 chars) on every install attempt. + + This is a silent remediation for that long tail. The body is identical to + upgrades/2.4.0-to-2.5.0/01_widen_version_columns.sql and is idempotent: the + IF EXISTS guard checks max_length = 510 (nvarchar(255) in bytes) so servers + that are already widened, or that ran the fixed 2.4->2.5 upgrade cleanly, + skip both ALTERs. + + For changelog purposes this is a one-line "fix": "Re-run installation_history + column widening for servers that hit the no-op pre-v2.7.0 upgrade (#828)." + Do not promote it as a new feature. +*/ + +SET ANSI_NULLS ON; +SET ANSI_PADDING ON; +SET ANSI_WARNINGS ON; +SET ARITHABORT ON; +SET CONCAT_NULL_YIELDS_NULL ON; +SET QUOTED_IDENTIFIER ON; +SET NUMERIC_ROUNDABORT OFF; +SET IMPLICIT_TRANSACTIONS OFF; +SET STATISTICS TIME, IO OFF; +GO + +USE PerformanceMonitor; +GO + +IF EXISTS +( + SELECT + 1/0 + FROM sys.columns AS c + WHERE c.object_id = OBJECT_ID(N'config.installation_history') + AND c.name = N'sql_server_version' + AND c.max_length = 510 /* nvarchar(255) = 510 bytes */ +) +BEGIN + ALTER TABLE config.installation_history + ALTER COLUMN sql_server_version nvarchar(512) NOT NULL; + + PRINT 'Widened config.installation_history.sql_server_version to nvarchar(512)'; +END; + +IF EXISTS +( + SELECT + 1/0 + FROM sys.columns AS c + WHERE c.object_id = OBJECT_ID(N'config.installation_history') + AND c.name = N'sql_server_edition' + AND c.max_length = 510 +) +BEGIN + ALTER TABLE config.installation_history + ALTER COLUMN sql_server_edition nvarchar(512) NOT NULL; + + PRINT 'Widened config.installation_history.sql_server_edition to nvarchar(512)'; +END; diff --git a/upgrades/2.10.0-to-2.11.0/upgrade.txt b/upgrades/2.10.0-to-2.11.0/upgrade.txt new file mode 100644 index 00000000..ef4c2ba4 --- /dev/null +++ b/upgrades/2.10.0-to-2.11.0/upgrade.txt @@ -0,0 +1 @@ +01_rewiden_version_columns.sql From 659eedd9be6e87236af782b1727a9bc6a6d4db81 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Wed, 6 May 2026 05:33:28 -0400 Subject: [PATCH 05/27] =?UTF-8?q?Fix=20#916=20=E2=80=94=20chart=20tooltips?= =?UTF-8?q?=20break=20after=20tab=20switch=20(real=20root=20cause)=20(#937?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #919 / #921 / #922 addressed the WPF Popup wedge but missed the underlying bug: MemoryContent, ResourceMetricsContent, and QueryPerformanceContent all called DisposeChartHelpers() from their per-control Unloaded event handler. WPF fires Unloaded on every TabControl tab switch, not just on destruction, so switching away from the Memory tab unsubscribed every chart's MouseMove handlers — which were never re-registered when the user came back. The popup-wedge fixes were running inside helpers that had already torn themselves down. ServerTab_Unloaded had the same comment warning future maintainers, but the inner UserControls didn't follow that rule. Changes: - MemoryContent / ResourceMetricsContent / QueryPerformanceContent: drop DisposeChartHelpers() (and ThemeManager unsubscribe) from the tab-switch Unloaded handler. Move the ThemeManager unsubscribe into DisposeChartHelpers() so it runs only on real cleanup. - SystemEventsContent: same pattern — add a DisposeChartHelpers() method that disposes the 19 hover helpers, unsubscribes filter-popup events, and unsubscribes ThemeManager. Empty out OnUnloaded. - ServerTab.CleanupOnClose: add SystemEventsContent.DisposeChartHelpers() to the cleanup chain (its hovers leaked on tab close before this). Final disposal still happens correctly via ServerTab.CleanupOnClose, which only fires when a server tab is actually removed. Lite is unaffected — its Unloaded handler never disposed hovers. Co-authored-by: Claude Opus 4.7 (1M context) --- Dashboard/Controls/MemoryContent.xaml.cs | 10 +++--- .../Controls/QueryPerformanceContent.xaml.cs | 7 ++-- .../Controls/ResourceMetricsContent.xaml.cs | 10 +++--- .../Controls/SystemEventsContent.xaml.cs | 36 ++++++++++++++----- Dashboard/ServerTab.xaml.cs | 1 + 5 files changed, 44 insertions(+), 20 deletions(-) diff --git a/Dashboard/Controls/MemoryContent.xaml.cs b/Dashboard/Controls/MemoryContent.xaml.cs index 1ce108ca..691cbee6 100644 --- a/Dashboard/Controls/MemoryContent.xaml.cs +++ b/Dashboard/Controls/MemoryContent.xaml.cs @@ -106,11 +106,10 @@ public MemoryContent() SetupChartContextMenus(); Loaded += OnLoaded; Helpers.ThemeManager.ThemeChanged += OnThemeChanged; - Unloaded += (_, _) => - { - Helpers.ThemeManager.ThemeChanged -= OnThemeChanged; - DisposeChartHelpers(); - }; + /* WPF fires Unloaded on every TabControl tab switch, not just on destruction. + Tearing down chart hover helpers here unsubscribes their MouseMove handlers + and they are never re-registered when the user returns — this is the + root cause of #916. Final disposal happens via ServerTab.CleanupOnClose. */ // Apply dark theme immediately so charts don't flash white before data loads TabHelpers.ApplyThemeToChart(MemoryStatsOverviewChart); @@ -136,6 +135,7 @@ public void DisposeChartHelpers() _memoryClerksHover?.Dispose(); _planCacheHover?.Dispose(); _memoryPressureEventsHover?.Dispose(); + Helpers.ThemeManager.ThemeChanged -= OnThemeChanged; } private void OnLoaded(object sender, RoutedEventArgs e) diff --git a/Dashboard/Controls/QueryPerformanceContent.xaml.cs b/Dashboard/Controls/QueryPerformanceContent.xaml.cs index 3d998fde..e500bf88 100644 --- a/Dashboard/Controls/QueryPerformanceContent.xaml.cs +++ b/Dashboard/Controls/QueryPerformanceContent.xaml.cs @@ -241,8 +241,10 @@ private void OnUnloaded(object sender, RoutedEventArgs e) _qsRegressionsUnfilteredData = null; _lrqPatternsUnfilteredData = null; - DisposeChartHelpers(); - Helpers.ThemeManager.ThemeChanged -= OnThemeChanged; + /* WPF fires Unloaded on every TabControl tab switch, not just on destruction. + Tearing down chart hover helpers or unsubscribing ThemeManager here breaks + tooltips and theme refresh after a tab switch (#916). Final cleanup happens + via ServerTab.CleanupOnClose → DisposeChartHelpers. */ } public void DisposeChartHelpers() @@ -251,6 +253,7 @@ public void DisposeChartHelpers() _procDurationHover?.Dispose(); _qsDurationHover?.Dispose(); _execTrendsHover?.Dispose(); + Helpers.ThemeManager.ThemeChanged -= OnThemeChanged; } private void OnThemeChanged(string _) diff --git a/Dashboard/Controls/ResourceMetricsContent.xaml.cs b/Dashboard/Controls/ResourceMetricsContent.xaml.cs index 3d4c3afa..45dc3ca0 100644 --- a/Dashboard/Controls/ResourceMetricsContent.xaml.cs +++ b/Dashboard/Controls/ResourceMetricsContent.xaml.cs @@ -130,11 +130,10 @@ public ResourceMetricsContent() SetupChartContextMenus(); Loaded += OnLoaded; Helpers.ThemeManager.ThemeChanged += OnThemeChanged; - Unloaded += (_, _) => - { - Helpers.ThemeManager.ThemeChanged -= OnThemeChanged; - DisposeChartHelpers(); - }; + /* WPF fires Unloaded on every TabControl tab switch, not just on destruction. + Tearing down chart hover helpers here unsubscribes their MouseMove handlers + and they are never re-registered when the user returns — this is the + root cause of #916. Final disposal happens via ServerTab.CleanupOnClose. */ // Apply dark theme immediately so charts don't flash white before data loads TabHelpers.ApplyThemeToChart(LatchStatsChart); @@ -175,6 +174,7 @@ public void DisposeChartHelpers() _waitStatsHover?.Dispose(); _tempdbStatsHover?.Dispose(); _tempDbLatencyHover?.Dispose(); + Helpers.ThemeManager.ThemeChanged -= OnThemeChanged; } private void OnLoaded(object sender, RoutedEventArgs e) diff --git a/Dashboard/Controls/SystemEventsContent.xaml.cs b/Dashboard/Controls/SystemEventsContent.xaml.cs index 726f3d2d..2ddb8af2 100644 --- a/Dashboard/Controls/SystemEventsContent.xaml.cs +++ b/Dashboard/Controls/SystemEventsContent.xaml.cs @@ -175,20 +175,40 @@ public SystemEventsContent() private void OnUnloaded(object sender, RoutedEventArgs e) { - /* Unsubscribe from filter popup events to prevent memory leaks */ + /* WPF fires Unloaded on every TabControl tab switch, not just on destruction. + Unsubscribing ThemeManager or filter-popup events here breaks them on + return to the tab (#916 family). Final cleanup happens via + ServerTab.CleanupOnClose → DisposeChartHelpers. */ + } + + public void DisposeChartHelpers() + { + _badPagesHover?.Dispose(); + _dumpRequestsHover?.Dispose(); + _accessViolationsHover?.Dispose(); + _writeAccessViolationsHover?.Dispose(); + _nonYieldingTasksHover?.Dispose(); + _latchWarningsHover?.Dispose(); + _sickSpinlocksHover?.Dispose(); + _cpuComparisonHover?.Dispose(); + _severeErrorsHover?.Dispose(); + _ioIssuesHover?.Dispose(); + _longestPendingIoHover?.Dispose(); + _schedulerIssuesHover?.Dispose(); + _memoryConditionsHover?.Dispose(); + _cpuTasksHover?.Dispose(); + _memoryBrokerHover?.Dispose(); + _memoryBrokerRatioHover?.Dispose(); + _memoryNodeOomHover?.Dispose(); + _memoryNodeOomUtilHover?.Dispose(); + _memoryNodeOomMemoryHover?.Dispose(); + if (_filterPopupContent != null) { _filterPopupContent.FilterApplied -= FilterPopup_FilterApplied; _filterPopupContent.FilterCleared -= FilterPopup_FilterCleared; } - /* Clear large data collections to free memory */ - _systemHealthUnfilteredData = null; - _severeErrorsUnfilteredData = null; - _ioIssuesUnfilteredData = null; - _memoryBrokerUnfilteredData = null; - _memoryNodeOOMUnfilteredData = null; - Helpers.ThemeManager.ThemeChanged -= OnThemeChanged; } diff --git a/Dashboard/ServerTab.xaml.cs b/Dashboard/ServerTab.xaml.cs index 335954b6..3c6974f8 100644 --- a/Dashboard/ServerTab.xaml.cs +++ b/Dashboard/ServerTab.xaml.cs @@ -275,6 +275,7 @@ public void DisposeChartHelpers() MemoryTab.DisposeChartHelpers(); ResourceMetricsContent.DisposeChartHelpers(); PerformanceTab.DisposeChartHelpers(); + SystemEventsContent.DisposeChartHelpers(); } public void RefreshAutoRefreshSettings() From 4ec88f452b33d90c5e65af49bec38ca92e07a809 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Thu, 7 May 2026 12:55:58 -0500 Subject: [PATCH 06/27] =?UTF-8?q?Fix=20#938=20=E2=80=94=20preserve=20mute?= =?UTF-8?q?=20rules=20across=20size-triggered=20DB=20reset?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When monitor.duckdb exceeds 512 MB, ArchiveAllAndResetAsync deletes the file and reinitializes empty tables. config_mute_rules was not in ArchivableTables, so all mute rules — including permanent rules with expires_at_utc = NULL — were silently lost. Export config_mute_rules and dismissed_archive_alerts to a temp Parquet dir before the reset and re-import after. Parquet roundtrip keeps this schema-agnostic. On restore failure, the temp dir is retained for manual recovery. Co-Authored-By: Claude Opus 4.7 (1M context) --- Lite.Tests/MuteRulesSurviveResetTests.cs | 153 +++++++++++++++++++++++ Lite/Services/ArchiveService.cs | 87 ++++++++++++- 2 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 Lite.Tests/MuteRulesSurviveResetTests.cs diff --git a/Lite.Tests/MuteRulesSurviveResetTests.cs b/Lite.Tests/MuteRulesSurviveResetTests.cs new file mode 100644 index 00000000..b8a1eb2b --- /dev/null +++ b/Lite.Tests/MuteRulesSurviveResetTests.cs @@ -0,0 +1,153 @@ +using System; +using System.IO; +using System.Threading.Tasks; +using DuckDB.NET.Data; +using PerformanceMonitorLite.Database; +using PerformanceMonitorLite.Services; +using Xunit; + +namespace PerformanceMonitorLite.Tests; + +/// +/// Issue #938 — mute rules (especially expires_at_utc = NULL "permanent" rules) +/// were silently lost when ArchiveAllAndResetAsync fired due to the 512 MB size threshold. +/// The reset deletes monitor.duckdb outright, and config_mute_rules was not preserved. +/// +public class MuteRulesSurviveResetTests : IDisposable +{ + private readonly string _tempDir; + private readonly string _dbPath; + private readonly string _archiveDir; + + public MuteRulesSurviveResetTests() + { + _tempDir = Path.Combine(Path.GetTempPath(), "LiteTests_" + Guid.NewGuid().ToString("N")[..8]); + Directory.CreateDirectory(_tempDir); + _dbPath = Path.Combine(_tempDir, "test.duckdb"); + _archiveDir = Path.Combine(_tempDir, "archive"); + Directory.CreateDirectory(_archiveDir); + } + + public void Dispose() + { + try + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + catch + { + /* Best-effort cleanup */ + } + } + + [Fact] + public async Task PermanentMuteRule_SurvivesArchiveAllAndReset() + { + var initializer = new DuckDbInitializer(_dbPath); + await initializer.InitializeAsync(); + + var ruleId = Guid.NewGuid().ToString(); + var createdAt = new DateTime(2026, 5, 1, 12, 0, 0, DateTimeKind.Utc); + + await InsertMuteRuleAsync(ruleId, createdAt, expiresAtUtc: null, + serverName: "ProdSql01", metricName: "Blocking Detected"); + + var archiveService = new ArchiveService(initializer, _archiveDir); + await archiveService.ArchiveAllAndResetAsync(); + + var (count, expiresIsNull, serverName) = await ReadMuteRuleAsync(ruleId); + + Assert.Equal(1, count); + Assert.True(expiresIsNull); + Assert.Equal("ProdSql01", serverName); + } + + [Fact] + public async Task ExpiringMuteRule_SurvivesArchiveAllAndReset() + { + var initializer = new DuckDbInitializer(_dbPath); + await initializer.InitializeAsync(); + + var ruleId = Guid.NewGuid().ToString(); + var createdAt = DateTime.UtcNow; + var expiresAt = createdAt.AddDays(7); + + await InsertMuteRuleAsync(ruleId, createdAt, expiresAt, + serverName: "ProdSql02", metricName: "Long-Running Job"); + + var archiveService = new ArchiveService(initializer, _archiveDir); + await archiveService.ArchiveAllAndResetAsync(); + + var (count, expiresIsNull, serverName) = await ReadMuteRuleAsync(ruleId); + + Assert.Equal(1, count); + Assert.False(expiresIsNull); + Assert.Equal("ProdSql02", serverName); + } + + [Fact] + public async Task EmptyMuteRulesTable_DoesNotBreakReset() + { + var initializer = new DuckDbInitializer(_dbPath); + await initializer.InitializeAsync(); + + var archiveService = new ArchiveService(initializer, _archiveDir); + await archiveService.ArchiveAllAndResetAsync(); + + using var connection = new DuckDBConnection($"Data Source={_dbPath}"); + await connection.OpenAsync(TestContext.Current.CancellationToken); + using var cmd = connection.CreateCommand(); + cmd.CommandText = "SELECT COUNT(*) FROM config_mute_rules"; + var count = Convert.ToInt64(await cmd.ExecuteScalarAsync(TestContext.Current.CancellationToken)); + + Assert.Equal(0, count); + } + + private async Task InsertMuteRuleAsync( + string id, + DateTime createdAt, + DateTime? expiresAtUtc, + string serverName, + string metricName) + { + using var connection = new DuckDBConnection($"Data Source={_dbPath}"); + await connection.OpenAsync(TestContext.Current.CancellationToken); + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +INSERT INTO config_mute_rules + (id, enabled, created_at_utc, expires_at_utc, reason, + server_name, metric_name, database_pattern, + query_text_pattern, wait_type_pattern, job_name_pattern) +VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)"; + cmd.Parameters.Add(new DuckDBParameter { Value = id }); + cmd.Parameters.Add(new DuckDBParameter { Value = true }); + cmd.Parameters.Add(new DuckDBParameter { Value = createdAt }); + cmd.Parameters.Add(new DuckDBParameter { Value = (object?)expiresAtUtc ?? DBNull.Value }); + cmd.Parameters.Add(new DuckDBParameter { Value = "test rule" }); + cmd.Parameters.Add(new DuckDBParameter { Value = serverName }); + cmd.Parameters.Add(new DuckDBParameter { Value = metricName }); + cmd.Parameters.Add(new DuckDBParameter { Value = DBNull.Value }); + cmd.Parameters.Add(new DuckDBParameter { Value = DBNull.Value }); + cmd.Parameters.Add(new DuckDBParameter { Value = DBNull.Value }); + cmd.Parameters.Add(new DuckDBParameter { Value = DBNull.Value }); + await cmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken); + } + + private async Task<(int Count, bool ExpiresIsNull, string ServerName)> ReadMuteRuleAsync(string id) + { + using var connection = new DuckDBConnection($"Data Source={_dbPath}"); + await connection.OpenAsync(TestContext.Current.CancellationToken); + using var cmd = connection.CreateCommand(); + cmd.CommandText = "SELECT expires_at_utc, server_name FROM config_mute_rules WHERE id = $1"; + cmd.Parameters.Add(new DuckDBParameter { Value = id }); + + using var reader = await cmd.ExecuteReaderAsync(TestContext.Current.CancellationToken); + if (!await reader.ReadAsync(TestContext.Current.CancellationToken)) + return (0, false, ""); + + var expiresIsNull = reader.IsDBNull(0); + var serverName = reader.GetString(1); + return (1, expiresIsNull, serverName); + } +} diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs index ec691210..f0f55cc7 100644 --- a/Lite/Services/ArchiveService.cs +++ b/Lite/Services/ArchiveService.cs @@ -42,6 +42,16 @@ public static bool IsArchiving private set => s_isArchiving = value; } + /* Config tables that must be preserved through ArchiveAllAndResetAsync. + These hold user configuration (not time-series) and must survive when the + size threshold trips a database reset. Issue #938 — permanent mute rules + were silently lost because ResetDatabaseAsync deletes monitor.duckdb. */ + private static readonly string[] PreservedConfigTables = + [ + "config_mute_rules", + "dismissed_archive_alerts" + ]; + /* Tables eligible for archival with their time column. IMPORTANT: Every table with time-series data must be listed here, or it will grow unbounded and push the DB past the 512 MB reset threshold. */ @@ -502,12 +512,16 @@ public async Task ArchiveAllAndResetAsync() } IsArchiving = true; + var preserveDir = Path.Combine(Path.GetTempPath(), $"pm_preserve_{Guid.NewGuid():N}"); + var preservedFiles = new Dictionary(); try { var timestamp = DateTime.UtcNow.ToString("yyyyMMdd_HHmm"); _logger?.LogInformation("Archiving ALL data to Parquet (prefix: {Timestamp}) and resetting database", timestamp); + Directory.CreateDirectory(preserveDir); + /* Export everything under write lock */ using (_duckDb.AcquireWriteLock()) { @@ -541,6 +555,32 @@ Archive views use glob (*_table.parquet) to pick up all files. */ _logger?.LogError(ex, "Failed to archive table {Table}", table); } } + + /* Preserve config tables that must survive the reset (issue #938). + Written to a temp dir, not the archive dir — these are restored + into the new database, not exposed via archive views. */ + foreach (var table in PreservedConfigTables) + { + try + { + using var countCmd = connection.CreateCommand(); + countCmd.CommandText = $"SELECT COUNT(*) FROM {table}"; + var rowCount = Convert.ToInt64(await countCmd.ExecuteScalarAsync()); + if (rowCount == 0) continue; + + var preservePath = Path.Combine(preserveDir, $"{table}.parquet").Replace("\\", "/"); + using var exportCmd = connection.CreateCommand(); + exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(preservePath)}' (FORMAT PARQUET)"; + await exportCmd.ExecuteNonQueryAsync(); + preservedFiles[table] = preservePath; + + _logger?.LogInformation("Preserved {Count} rows from {Table} for restoration after reset", rowCount, table); + } + catch (Exception ex) + { + _logger?.LogError(ex, "Failed to preserve {Table} before reset — rows will be lost", table); + } + } } /* Compact per-cycle files into monthly parquet files before reset. @@ -553,11 +593,56 @@ and only touches filesystem files — no contention with collectors. */ _logger?.LogInformation("Deleting and reinitializing database"); await _duckDb.ResetDatabaseAsync(); + /* Restore preserved config rows into the freshly initialized tables. */ + var allRestoresSucceeded = true; + if (preservedFiles.Count > 0) + { + using (_duckDb.AcquireWriteLock()) + { + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + foreach (var (table, path) in preservedFiles) + { + try + { + using var insertCmd = connection.CreateCommand(); + insertCmd.CommandText = $"INSERT INTO {table} SELECT * FROM read_parquet('{EscapeSqlPath(path)}')"; + await insertCmd.ExecuteNonQueryAsync(); + _logger?.LogInformation("Restored rows to {Table} after database reset", table); + } + catch (Exception ex) + { + allRestoresSucceeded = false; + _logger?.LogError(ex, "Failed to restore {Table} from {Path} — preservation files retained for manual recovery", table, path); + } + } + } + } + _logger?.LogInformation("Database reset complete — archive views now serve all historical data from Parquet"); + + /* Clean up temp preservation dir only if every restore succeeded. + On failure, leave the parquet files so the user can recover manually. */ + if (allRestoresSucceeded) + { + try + { + if (Directory.Exists(preserveDir)) + Directory.Delete(preserveDir, recursive: true); + } + catch (Exception ex) + { + _logger?.LogWarning(ex, "Could not clean up preservation temp dir {Dir}", preserveDir); + } + } + else + { + _logger?.LogWarning("Preservation files retained at {Dir} for manual recovery", preserveDir); + } } catch (Exception ex) { - _logger?.LogError(ex, "Archive-all-and-reset failed"); + _logger?.LogError(ex, "Archive-all-and-reset failed — preservation files (if any) retained at {Dir}", preserveDir); } finally { From 3bb16e43d972702adecef4882c2d291921292a74 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Thu, 7 May 2026 13:17:24 -0500 Subject: [PATCH 07/27] =?UTF-8?q?Fix=20#933=20=E2=80=94=20bound=20compacti?= =?UTF-8?q?on=20memory=20so=20wide-row=20tables=20don't=20OOM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #935 added temp_directory so DuckDB could spill, but on wider workloads the working set still blew past the 4 GB cap before spill caught up (reporter saw OOM at 3.7 GiB compacting 15 query_snapshots files). Three knobs combined to feed that: - memory_limit = 4 GB was too high — DuckDB held off spilling until late - threads defaulted to N cores, multiplying per-thread row-group buffers - ROW_GROUP_SIZE 122880 buffered up to 122k wide-VARCHAR rows per group Drop memory_limit to 1 GB, cap threads to 2, and shrink ROW_GROUP_SIZE to 8192. On 1.7 M rows of real query_stats data this drops peak working set from 1236 MB → 166 MB (87% reduction) at a 31% wall-time cost. Memory now plateaus instead of growing with row count, which is the load-bearing change for issue #933. Adds tools/CompactionRepro — a standalone reproducer that splits a real monthly parquet file into N per-cycle-shaped chunks and runs the same pair-merge logic with the tuning knobs exposed on the command line. Co-Authored-By: Claude Opus 4.7 (1M context) --- Lite/Services/ArchiveService.cs | 8 +- tools/CompactionRepro/CompactionRepro.csproj | 17 ++ tools/CompactionRepro/Program.cs | 240 +++++++++++++++++++ 3 files changed, 261 insertions(+), 4 deletions(-) create mode 100644 tools/CompactionRepro/CompactionRepro.csproj create mode 100644 tools/CompactionRepro/Program.cs diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs index f0f55cc7..f5f68e90 100644 --- a/Lite/Services/ArchiveService.cs +++ b/Lite/Services/ArchiveService.cs @@ -394,14 +394,14 @@ write on the same volume the parquet files already live on. */ con.Open(); using (var pragma = con.CreateCommand()) { - pragma.CommandText = $"SET memory_limit = '4GB'; SET preserve_insertion_order = false; SET temp_directory = '{EscapeSqlPath(spillDirSql)}';"; + pragma.CommandText = $"SET memory_limit = '1GB'; SET threads = 2; SET preserve_insertion_order = false; SET temp_directory = '{EscapeSqlPath(spillDirSql)}';"; pragma.ExecuteNonQuery(); } var pathList = string.Join(", ", sourcePaths.Select(p => $"'{EscapeSqlPath(p)}'")); using var cmd = con.CreateCommand(); cmd.CommandText = $"COPY (SELECT {selectClause} FROM read_parquet([{pathList}], union_by_name=true)) " + - $"TO '{EscapeSqlPath(tempPath)}' (FORMAT PARQUET, COMPRESSION ZSTD, ROW_GROUP_SIZE 122880)"; + $"TO '{EscapeSqlPath(tempPath)}' (FORMAT PARQUET, COMPRESSION ZSTD, ROW_GROUP_SIZE 8192)"; cmd.ExecuteNonQuery(); } else @@ -425,14 +425,14 @@ Sort smallest-first so early merges are cheap. */ con.Open(); using (var pragma = con.CreateCommand()) { - pragma.CommandText = $"SET memory_limit = '4GB'; SET preserve_insertion_order = false; SET temp_directory = '{EscapeSqlPath(spillDirSql)}';"; + pragma.CommandText = $"SET memory_limit = '1GB'; SET threads = 2; SET preserve_insertion_order = false; SET temp_directory = '{EscapeSqlPath(spillDirSql)}';"; pragma.ExecuteNonQuery(); } var pairList = $"'{EscapeSqlPath(currentPath)}', '{EscapeSqlPath(sorted[i])}'"; using var cmd = con.CreateCommand(); cmd.CommandText = $"COPY (SELECT {selectClause} FROM read_parquet([{pairList}], union_by_name=true)) " + - $"TO '{EscapeSqlPath(stepOutput)}' (FORMAT PARQUET, COMPRESSION ZSTD, ROW_GROUP_SIZE 122880)"; + $"TO '{EscapeSqlPath(stepOutput)}' (FORMAT PARQUET, COMPRESSION ZSTD, ROW_GROUP_SIZE 8192)"; cmd.ExecuteNonQuery(); /* Clean up previous intermediate file */ diff --git a/tools/CompactionRepro/CompactionRepro.csproj b/tools/CompactionRepro/CompactionRepro.csproj new file mode 100644 index 00000000..50173e0a --- /dev/null +++ b/tools/CompactionRepro/CompactionRepro.csproj @@ -0,0 +1,17 @@ + + + Exe + net8.0 + enable + enable + latest + CompactionRepro + CompactionRepro + false + + + + + + + diff --git a/tools/CompactionRepro/Program.cs b/tools/CompactionRepro/Program.cs new file mode 100644 index 00000000..399ced8d --- /dev/null +++ b/tools/CompactionRepro/Program.cs @@ -0,0 +1,240 @@ +using System.Diagnostics; +using DuckDB.NET.Data; + +/* + * CompactionRepro — standalone reproducer for issue #933. + * + * Splits an existing monthly parquet file (like 202604_query_snapshots.parquet) + * into N per-cycle-shaped chunks, then runs the same pair-merge compaction + * logic ArchiveService.CompactParquetFiles uses, with knobs you can flip on + * the command line. The split chunks have the exact row shape that caused + * the user's OOM in #933. + * + * Compare OLD vs NEW tuning by running the same data shape twice with + * different --memory-limit / --threads / --row-group-size values. + * + * Usage: + * dotnet run -- --source-file [options] + * + * Options (defaults match the proposed NEW tuning): + * --source-file Required. Path to a monthly parquet file to split & merge. + * --memory-limit DuckDB memory_limit (e.g. "1GB", "4GB"). Default: 1GB + * --threads DuckDB threads. 0 = DuckDB default. Default: 2 + * --row-group-size Output ROW_GROUP_SIZE. Default: 8192 + * --num-files Number of split chunks. Default: 15 + * --keep Don't delete temp dir after run (for inspection) + * + * Examples: + * # NEW tuning (the proposed fix) on real query_snapshots data + * dotnet run -- --source-file "$LOCALAPPDATA/PerformanceMonitorLite/archive/202604_query_snapshots.parquet" \ + * --memory-limit 1GB --threads 2 --row-group-size 8192 + * + * # OLD tuning (current production) — should reproduce the OOM + * dotnet run -- --source-file "$LOCALAPPDATA/PerformanceMonitorLite/archive/202604_query_snapshots.parquet" \ + * --memory-limit 4GB --threads 0 --row-group-size 122880 + */ + +var sourceFile = GetArg(args, "--source-file", ""); +if (string.IsNullOrEmpty(sourceFile)) +{ + Console.Error.WriteLine("error: --source-file is required"); + Console.Error.WriteLine("Try: --source-file \"$LOCALAPPDATA/PerformanceMonitorLite/archive/202604_query_snapshots.parquet\""); + return 2; +} +if (!File.Exists(sourceFile)) +{ + Console.Error.WriteLine($"error: source file not found: {sourceFile}"); + return 2; +} + +var memoryLimit = GetArg(args, "--memory-limit", "1GB"); +var threads = int.Parse(GetArg(args, "--threads", "2")); +var rowGroupSize = int.Parse(GetArg(args, "--row-group-size", "8192")); +var numFiles = int.Parse(GetArg(args, "--num-files", "15")); +var keep = args.Contains("--keep"); + +var tempDir = Path.Combine(Path.GetTempPath(), $"CompactionRepro_{Guid.NewGuid():N}"); +Directory.CreateDirectory(tempDir); + +Console.WriteLine($"Source: {sourceFile} ({new FileInfo(sourceFile).Length / 1024.0 / 1024.0:F1} MB)"); +Console.WriteLine($"Temp dir: {tempDir}"); +Console.WriteLine($"Settings: memory_limit={memoryLimit}, threads={threads}, ROW_GROUP_SIZE={rowGroupSize}"); +Console.WriteLine($"Splitting source into {numFiles} chunks"); +Console.WriteLine(); + +try +{ + Console.WriteLine($"[1/3] Splitting source file into {numFiles} chunks..."); + var sw = Stopwatch.StartNew(); + var sourcePaths = SplitSourceFile(sourceFile, tempDir, numFiles); + sw.Stop(); + var totalSourceBytes = sourcePaths.Sum(p => new FileInfo(p).Length); + Console.WriteLine($" Wrote {sourcePaths.Count} files, {totalSourceBytes / 1024.0 / 1024.0:F1} MB total in {sw.ElapsedMilliseconds} ms"); + Console.WriteLine(); + + Console.WriteLine("[2/3] Running pair-merge compaction (mirrors ArchiveService.CompactParquetFiles)..."); + var spillDir = Path.Combine(tempDir, "duckdb_tmp").Replace("\\", "/"); + Directory.CreateDirectory(spillDir); + + var targetPath = Path.Combine(tempDir, "compacted.parquet").Replace("\\", "/"); + var process = Process.GetCurrentProcess(); + var startBytes = GC.GetTotalMemory(forceFullCollection: true); + var startWorkingSet = process.WorkingSet64; + + var compactionSw = Stopwatch.StartNew(); + var peakWorkingSet = startWorkingSet; + long compactedFileBytes = 0; + var success = false; + string? failureMessage = null; + + try + { + /* Sort smallest-first like ArchiveService does */ + var sorted = sourcePaths + .OrderBy(p => new FileInfo(p).Length) + .ToList(); + + var currentPath = sorted[0]; + var intermediateFiles = new List(); + + for (var i = 1; i < sorted.Count; i++) + { + var stepOutput = i < sorted.Count - 1 + ? targetPath + $".step{i}.tmp" + : targetPath; + + using var con = new DuckDBConnection("DataSource=:memory:"); + con.Open(); + using (var pragma = con.CreateCommand()) + { + var threadsClause = threads > 0 ? $"SET threads = {threads}; " : ""; + pragma.CommandText = + $"SET memory_limit = '{memoryLimit}'; " + + $"SET preserve_insertion_order = false; " + + $"SET temp_directory = '{spillDir.Replace("'", "''")}'; " + + threadsClause; + pragma.ExecuteNonQuery(); + } + + var pairList = $"'{currentPath.Replace("'", "''")}', '{sorted[i].Replace("'", "''")}'"; + using var cmd = con.CreateCommand(); + cmd.CommandText = + $"COPY (SELECT * FROM read_parquet([{pairList}], union_by_name=true)) " + + $"TO '{stepOutput.Replace("'", "''")}' " + + $"(FORMAT PARQUET, COMPRESSION ZSTD, ROW_GROUP_SIZE {rowGroupSize})"; + cmd.ExecuteNonQuery(); + + process.Refresh(); + if (process.WorkingSet64 > peakWorkingSet) peakWorkingSet = process.WorkingSet64; + + if (intermediateFiles.Count > 0) + { + var prev = intermediateFiles[^1]; + try { File.Delete(prev); } catch { } + } + + intermediateFiles.Add(stepOutput); + currentPath = stepOutput; + + Console.WriteLine($" step {i}/{sorted.Count - 1}: peak working set {peakWorkingSet / 1024.0 / 1024.0:F0} MB"); + } + + compactedFileBytes = new FileInfo(targetPath).Length; + success = true; + } + catch (Exception ex) + { + failureMessage = ex.Message; + } + compactionSw.Stop(); + + process.Refresh(); + if (process.WorkingSet64 > peakWorkingSet) peakWorkingSet = process.WorkingSet64; + + Console.WriteLine(); + Console.WriteLine("[3/3] Result:"); + Console.WriteLine($" Status: {(success ? "SUCCESS" : "FAILURE")}"); + Console.WriteLine($" Wall time: {compactionSw.Elapsed.TotalSeconds:F2}s"); + Console.WriteLine($" Peak working set: {peakWorkingSet / 1024.0 / 1024.0:F0} MB"); + if (success) + { + Console.WriteLine($" Output size: {compactedFileBytes / 1024.0 / 1024.0:F1} MB"); + + /* Sanity check: row count round-trip — output must match source */ + using var verifyCon = new DuckDBConnection("DataSource=:memory:"); + verifyCon.Open(); + using var verifyCmd = verifyCon.CreateCommand(); + verifyCmd.CommandText = + $"SELECT (SELECT COUNT(*) FROM read_parquet('{targetPath.Replace("'", "''")}')) AS out_rows, " + + $" (SELECT COUNT(*) FROM read_parquet('{sourceFile.Replace("'", "''").Replace("\\", "/")}')) AS src_rows"; + using var verifyReader = verifyCmd.ExecuteReader(); + verifyReader.Read(); + var actualRows = verifyReader.GetInt64(0); + var expectedRows = verifyReader.GetInt64(1); + Console.WriteLine($" Row count: {actualRows} (expected {expectedRows}) {(actualRows == expectedRows ? "OK" : "MISMATCH")}"); + } + else + { + Console.WriteLine($" Failure: {failureMessage}"); + } + + /* Spill dir size — non-zero means DuckDB spilled */ + var spillBytes = Directory.Exists(spillDir) + ? Directory.GetFiles(spillDir, "*", SearchOption.AllDirectories).Sum(f => new FileInfo(f).Length) + : 0; + Console.WriteLine($" Spill on disk: {spillBytes / 1024.0 / 1024.0:F1} MB ({(spillBytes > 0 ? "spilled" : "did not spill")})"); + + return success ? 0 : 1; +} +finally +{ + if (!keep) + { + try { Directory.Delete(tempDir, recursive: true); } catch { } + } + else + { + Console.WriteLine(); + Console.WriteLine($"Temp dir retained: {tempDir}"); + } +} + +static List SplitSourceFile(string sourceFile, string outDir, int numChunks) +{ + /* Split a real monthly parquet into N chunks using row-number bucketing. + Each chunk is written as ZSTD parquet (matching the production format). + Empty chunks are skipped. */ + var sourceSql = sourceFile.Replace("'", "''").Replace("\\", "/"); + + using var con = new DuckDBConnection("DataSource=:memory:"); + con.Open(); + + long totalRows; + using (var countCmd = con.CreateCommand()) + { + countCmd.CommandText = $"SELECT COUNT(*) FROM read_parquet('{sourceSql}')"; + totalRows = Convert.ToInt64(countCmd.ExecuteScalar()); + } + Console.WriteLine($" Source has {totalRows} rows; splitting into {numChunks} chunks"); + + var paths = new List(); + for (var i = 0; i < numChunks; i++) + { + var path = Path.Combine(outDir, $"src_{i:D3}.parquet").Replace("\\", "/"); + using var cmd = con.CreateCommand(); + cmd.CommandText = + $"COPY (SELECT * FROM read_parquet('{sourceSql}') " + + $" WHERE (collection_id % {numChunks}) = {i}) " + + $"TO '{path.Replace("'", "''")}' (FORMAT PARQUET, COMPRESSION ZSTD, ROW_GROUP_SIZE 122880)"; + cmd.ExecuteNonQuery(); + if (new FileInfo(path).Length > 0) paths.Add(path); + } + return paths; +} + +static string GetArg(string[] args, string key, string defaultValue) +{ + for (var i = 0; i < args.Length - 1; i++) + if (args[i] == key) return args[i + 1]; + return defaultValue; +} From 6669ddde94b72841ed9e87f3f28c49638bb1a49a Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Fri, 8 May 2026 09:49:50 -0500 Subject: [PATCH 08/27] =?UTF-8?q?Fix=20#944=20=E2=80=94=20one-click=20snoo?= =?UTF-8?q?ze=20from=20alert=20tray=20popup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the standard non-interactive Windows toast for the seven fired alerts (CPU, Blocking, Deadlocks, Poison Wait, Long-Running Query, TempDB Space, Long-Running Job) with a custom WPF balloon that has "Snooze 15m / 1h / 4h" + "Dismiss" buttons. Snoozing creates a temporary mute rule scoped to ServerName + MetricName with the chosen expiration, which the existing alert-fire path already honors for both email and Teams/Slack webhook delivery. Status notifications (Online/Offline + 7 Cleared/Resolved) keep using the standard non-interactive balloon since there's nothing to snooze. Addresses the firefighting workflow described in #943. Co-Authored-By: Claude Opus 4.7 (1M context) --- Lite/Controls/SnoozeBalloon.xaml | 60 ++++++++++++++++ Lite/Controls/SnoozeBalloon.xaml.cs | 106 ++++++++++++++++++++++++++++ Lite/MainWindow.xaml.cs | 49 +++++++++---- Lite/Services/SystemTrayService.cs | 21 ++++++ 4 files changed, 222 insertions(+), 14 deletions(-) create mode 100644 Lite/Controls/SnoozeBalloon.xaml create mode 100644 Lite/Controls/SnoozeBalloon.xaml.cs diff --git a/Lite/Controls/SnoozeBalloon.xaml b/Lite/Controls/SnoozeBalloon.xaml new file mode 100644 index 00000000..2a595225 --- /dev/null +++ b/Lite/Controls/SnoozeBalloon.xaml @@ -0,0 +1,60 @@ + + + + + + + + + + + +