-
Notifications
You must be signed in to change notification settings - Fork 537
[phase-31 4/4] PostgreSQL metastore — migration + compaction columns #6245
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d892f23
4e928fe
d8d71ed
ad115bf
d0a995e
35c3942
33c4070
7a5979f
440631b
5eae799
5b2304c
4d42fd9
b6eb595
76b703a
ff605b9
723168f
9ca263d
73a20ef
75c15a0
ef21859
b4dac46
db51a96
605708e
f21fff5
f790519
759c2ca
6454f1d
4d8b6b2
1e67900
85fcb2d
06a6bf0
4481bef
64c5d5f
a8bf948
caa9c3e
93e1cc7
b968085
f7c89bf
1c99ddd
112f290
e8c6be4
f50f8f6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| -- Reverse Phase 31: Remove compaction metadata columns and triggers. | ||
| DROP TRIGGER IF EXISTS set_publish_timestamp_on_metrics_split_publish ON metrics_splits CASCADE; | ||
| DROP FUNCTION IF EXISTS set_publish_timestamp_for_metrics_split(); | ||
| DROP INDEX IF EXISTS idx_metrics_splits_compaction_scope; | ||
| ALTER TABLE metrics_splits DROP COLUMN IF EXISTS node_id; | ||
| ALTER TABLE metrics_splits DROP COLUMN IF EXISTS delete_opstamp; | ||
| ALTER TABLE metrics_splits DROP COLUMN IF EXISTS maturity_timestamp; | ||
| ALTER TABLE metrics_splits DROP COLUMN IF EXISTS zonemap_regexes; | ||
| ALTER TABLE metrics_splits DROP COLUMN IF EXISTS row_keys; | ||
| ALTER TABLE metrics_splits DROP COLUMN IF EXISTS num_merge_ops; | ||
| ALTER TABLE metrics_splits DROP COLUMN IF EXISTS sort_fields; | ||
| ALTER TABLE metrics_splits DROP COLUMN IF EXISTS window_duration_secs; | ||
| ALTER TABLE metrics_splits DROP COLUMN IF EXISTS window_start; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| -- Phase 31: Add compaction metadata columns to metrics_splits. | ||
| -- These columns support time-windowed compaction planning and execution. | ||
| ALTER TABLE metrics_splits ADD COLUMN IF NOT EXISTS window_start BIGINT; | ||
| ALTER TABLE metrics_splits ADD COLUMN IF NOT EXISTS window_duration_secs INTEGER; | ||
| ALTER TABLE metrics_splits ADD COLUMN IF NOT EXISTS sort_fields TEXT NOT NULL DEFAULT ''; | ||
| ALTER TABLE metrics_splits ADD COLUMN IF NOT EXISTS num_merge_ops INTEGER NOT NULL DEFAULT 0; | ||
| ALTER TABLE metrics_splits ADD COLUMN IF NOT EXISTS row_keys BYTEA; | ||
| ALTER TABLE metrics_splits ADD COLUMN IF NOT EXISTS zonemap_regexes JSONB NOT NULL DEFAULT '{}'; | ||
|
|
||
| -- Columns present on the `splits` table that were missing from `metrics_splits`. | ||
| -- maturity_timestamp: compaction planner needs this to restrict candidates to | ||
| -- Published-and-immature splits, matching the logic the log-side merge planner uses. | ||
| ALTER TABLE metrics_splits ADD COLUMN IF NOT EXISTS maturity_timestamp TIMESTAMP DEFAULT TO_TIMESTAMP(0); | ||
| -- delete_opstamp: tracks which delete tasks have been applied to a split. | ||
| ALTER TABLE metrics_splits ADD COLUMN IF NOT EXISTS delete_opstamp BIGINT CHECK (delete_opstamp >= 0) DEFAULT 0; | ||
| -- node_id: identifies which node produced the split. | ||
| ALTER TABLE metrics_splits ADD COLUMN IF NOT EXISTS node_id VARCHAR(253); | ||
|
|
||
| -- Auto-set publish_timestamp when a split transitions Staged → Published, | ||
| -- matching the trigger on the `splits` table (migration 3). | ||
| CREATE OR REPLACE FUNCTION set_publish_timestamp_for_metrics_split() RETURNS trigger AS $$ | ||
| BEGIN | ||
| IF (TG_OP = 'UPDATE') AND (NEW.split_state = 'Published') AND (OLD.split_state = 'Staged') THEN | ||
| NEW.publish_timestamp := (CURRENT_TIMESTAMP AT TIME ZONE 'UTC'); | ||
| END IF; | ||
| RETURN NEW; | ||
| END; | ||
| $$ LANGUAGE plpgsql; | ||
|
|
||
| DROP TRIGGER IF EXISTS set_publish_timestamp_on_metrics_split_publish ON metrics_splits CASCADE; | ||
| CREATE TRIGGER set_publish_timestamp_on_metrics_split_publish | ||
| BEFORE UPDATE ON metrics_splits | ||
| FOR EACH ROW | ||
| EXECUTE PROCEDURE set_publish_timestamp_for_metrics_split(); | ||
|
|
||
| -- Compaction scope index: supports the compaction planner's primary query pattern | ||
| -- "give me all Published splits for a given (index_uid, sort_fields, window_start) triple." | ||
| CREATE INDEX IF NOT EXISTS idx_metrics_splits_compaction_scope | ||
| ON metrics_splits (index_uid, sort_fields, window_start) | ||
| WHERE split_state = 'Published'; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,6 +56,20 @@ pub(crate) struct StoredMetricsSplit { | |
| pub state: MetricsSplitState, | ||
| /// Update timestamp (Unix epoch seconds). | ||
| pub update_timestamp: i64, | ||
| /// Create timestamp (Unix epoch seconds). | ||
| #[serde(default)] | ||
| pub create_timestamp: i64, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed. |
||
| /// Node that produced this split. | ||
| #[serde(default)] | ||
| pub node_id: String, | ||
| /// Delete opstamp. | ||
| #[serde(default)] | ||
| pub delete_opstamp: u64, | ||
| /// Maturity timestamp (Unix epoch seconds). Splits with | ||
| /// maturity_timestamp <= now are considered mature. | ||
| /// Defaults to 0 (epoch), meaning mature immediately. | ||
| #[serde(default)] | ||
| pub maturity_timestamp: i64, | ||
| } | ||
|
|
||
| /// A `FileBackedIndex` object carries an index metadata and its split metadata. | ||
|
|
@@ -759,6 +773,10 @@ impl FileBackedIndex { | |
| metadata, | ||
| state: MetricsSplitState::Staged, | ||
| update_timestamp: now, | ||
| create_timestamp: now, | ||
| node_id: String::new(), | ||
| delete_opstamp: 0, | ||
| maturity_timestamp: 0, | ||
| }; | ||
| self.metrics_splits.insert(split_id, stored); | ||
| } | ||
|
|
@@ -907,21 +925,37 @@ fn metrics_split_matches_query(split: &StoredMetricsSplit, query: &ListMetricsSp | |
| // Filter by state | ||
| if !query.split_states.is_empty() { | ||
| let state_str = split.state.as_str(); | ||
| if !query.split_states.iter().any(|s| s == state_str) { | ||
| if !query.split_states.iter().any(|s| s.as_str() == state_str) { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| // Filter by time range | ||
| if let Some(start) = query.time_range_start | ||
| && (split.metadata.time_range.end_secs as i64) < start | ||
| { | ||
| return false; | ||
| } | ||
| if let Some(end) = query.time_range_end | ||
| && (split.metadata.time_range.start_secs as i64) > end | ||
| { | ||
| return false; | ||
| // Filter by time range. | ||
| // When sort_fields is set this is a compaction query and time_range | ||
| // refers to the compaction window; otherwise it refers to the data | ||
| // time range. Both use intersection semantics via FilterRange. | ||
| if !query.time_range.is_unbounded() { | ||
| if query.sort_fields.is_some() { | ||
| // Compaction path: intersect against the split's window. | ||
| let split_start = split.metadata.window_start(); | ||
| let split_duration = split.metadata.window_duration_secs() as i64; | ||
| match split_start { | ||
| Some(split_start) if split_duration > 0 => { | ||
| let split_end = split_start + split_duration - 1; | ||
| if !query.time_range.overlaps_with(split_start..=split_end) { | ||
| return false; | ||
| } | ||
| } | ||
| _ => return false, | ||
| } | ||
| } else { | ||
| // Read path: intersect against the split's data time range. | ||
| let data_range = split.metadata.time_range.start_secs as i64 | ||
| ..=split.metadata.time_range.end_secs as i64; | ||
| if !query.time_range.overlaps_with(data_range) { | ||
| return false; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Filter by metric names | ||
|
|
@@ -979,6 +1013,44 @@ fn metrics_split_matches_query(split: &StoredMetricsSplit, query: &ListMetricsSp | |
| } | ||
| } | ||
|
|
||
| if let Some(ref sort_fields) = query.sort_fields | ||
| && split.metadata.sort_fields != *sort_fields | ||
| { | ||
| return false; | ||
| } | ||
|
|
||
| if let Some(node_id) = &query.node_id | ||
| && split.node_id != *node_id | ||
| { | ||
| return false; | ||
| } | ||
|
|
||
| if !query.delete_opstamp.contains(&split.delete_opstamp) { | ||
| return false; | ||
| } | ||
|
|
||
| if !query.update_timestamp.contains(&split.update_timestamp) { | ||
| return false; | ||
| } | ||
|
|
||
| if !query.create_timestamp.contains(&split.create_timestamp) { | ||
| return false; | ||
| } | ||
|
|
||
| match &query.mature { | ||
| Bound::Included(evaluation_datetime) => { | ||
| if split.maturity_timestamp > evaluation_datetime.unix_timestamp() { | ||
| return false; | ||
| } | ||
| } | ||
| Bound::Excluded(evaluation_datetime) => { | ||
| if split.maturity_timestamp <= evaluation_datetime.unix_timestamp() { | ||
| return false; | ||
| } | ||
| } | ||
| Bound::Unbounded => {} | ||
| } | ||
|
|
||
| true | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.