From e7031859d7e6c743c078e4c3722a0f0e4f8c7e4b Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Mon, 23 Feb 2026 08:07:50 -0500 Subject: [PATCH] Use per-collector retention from config.collection_schedule in data retention (#237) The data retention procedure now reads retention_days from config.collection_schedule for each table instead of using a hardcoded 30-day value. Direct name matching handles most tables; special mappings cover HealthParser, blocking_BlockedProcessReport, and deadlocks tables. The @retention_days parameter remains as a fallback for unmatched tables. The SQL Agent job step no longer passes @retention_days = 30, letting the procedure use per-collector settings by default. Tested on sql2022: query_snapshots used 10-day retention, running_jobs used 7-day retention, batch deletion confirmed working with @batch_size = 1000. Closes #237 Co-Authored-By: Claude Opus 4.6 --- install/43_data_retention.sql | 51 ++++++++++++++++++++++++++++---- install/45_create_agent_jobs.sql | 2 +- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/install/43_data_retention.sql b/install/43_data_retention.sql index dc310837..dc47775c 100644 --- a/install/43_data_retention.sql +++ b/install/43_data_retention.sql @@ -20,8 +20,10 @@ GO /* Data retention procedure for performance monitoring system -Automatically purges old data from ALL collection tables based on configurable retention periods -DYNAMIC VERSION - automatically discovers tables in collect schema with collection_time column +Automatically purges old data from ALL collection tables +Uses per-collector retention from config.collection_schedule when available, +falls back to @retention_days parameter for unmatched tables +DYNAMIC VERSION - automatically discovers tables in collect schema with time columns */ IF OBJECT_ID(N'config.data_retention', N'P') IS NULL @@ -33,7 +35,7 @@ GO ALTER PROCEDURE config.data_retention ( - @retention_days integer = 30, /*Number of days to retain collected data*/ + @retention_days integer = 30, /*Fallback retention for tables without a collection_schedule entry*/ @batch_size integer = 10000, /*Number of rows to delete per batch to avoid blocking*/ @debug bit = 0 /*Print debugging information*/ ) @@ -194,6 +196,42 @@ BEGIN WHERE ttc.table_name = t.name ); + /* + Override retention_date per-collector from config.collection_schedule. + Direct match: strip _collector/_analyzer suffix and match table name prefix. + */ + UPDATE ttc + SET ttc.retention_date = DATEADD(DAY, -cs.retention_days, SYSDATETIME()) + FROM #tables_to_clean AS ttc + JOIN config.collection_schedule AS cs + ON ttc.table_name LIKE REPLACE(REPLACE(cs.collector_name, N'_collector', N''), N'_analyzer', N'') + N'%'; + + /* + Special mappings for tables whose names don't match their collector: + - HealthParser_* tables -> system_health_collector + - blocking_BlockedProcessReport -> process_blocked_process_xml + - deadlocks (sp_BlitzLock output) -> process_deadlock_xml + */ + UPDATE ttc + SET ttc.retention_date = DATEADD(DAY, -cs.retention_days, SYSDATETIME()) + FROM #tables_to_clean AS ttc + CROSS JOIN config.collection_schedule AS cs + WHERE + ( + ttc.table_name LIKE N'HealthParser%' + AND cs.collector_name = N'system_health_collector' + ) + OR + ( + ttc.table_name = N'blocking_BlockedProcessReport' + AND cs.collector_name = N'process_blocked_process_xml' + ) + OR + ( + ttc.table_name = N'deadlocks' + AND cs.collector_name = N'process_deadlock_xml' + ); + /* Special handling for config.collection_log - keep 2x longer */ @@ -399,12 +437,13 @@ GO PRINT 'Data retention procedure created successfully (DYNAMIC VERSION)'; PRINT 'Use config.data_retention to automatically purge old monitoring data'; -PRINT 'AUTOMATICALLY discovers ALL collect schema tables with time columns'; +PRINT 'Uses per-collector retention from config.collection_schedule when available'; +PRINT 'Falls back to @retention_days parameter for unmatched tables'; PRINT ''; PRINT 'Examples:'; -PRINT ' -- Keep 30 days (default)'; +PRINT ' -- Use per-collector retention (default)'; PRINT ' EXECUTE config.data_retention @debug = 1;'; PRINT ''; -PRINT ' -- Keep 90 days'; +PRINT ' -- Override fallback to 90 days for tables without schedule entries'; PRINT ' EXECUTE config.data_retention @retention_days = 90;'; GO diff --git a/install/45_create_agent_jobs.sql b/install/45_create_agent_jobs.sql index d9ba1085..e7ce07cf 100644 --- a/install/45_create_agent_jobs.sql +++ b/install/45_create_agent_jobs.sql @@ -164,7 +164,7 @@ EXECUTE msdb.dbo.sp_add_jobstep @step_name = N'Run Data Retention', @subsystem = N'TSQL', @database_name = N'PerformanceMonitor', - @command = N'EXECUTE config.data_retention @retention_days = 30, @debug = 1;', + @command = N'EXECUTE config.data_retention @debug = 1;', @retry_attempts = 0, @on_success_action = 1; /*Quit with success*/