From 3eb4ccecfd7dc6b67813c10bb0eb1d02f94c9f10 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Wed, 8 Apr 2026 12:17:10 +0300 Subject: [PATCH 1/5] perf(encoding): rebase hc positions past u32 boundary - store HC table indexes relative to a moving position base - rebase and rebuild live chain/hash entries before u32 overflow - add regression test for post-u32 boundary insertion path - remove obsolete 4 GiB limitation notes for HC levels Closes #51 --- zstd/src/encoding/match_generator.rs | 80 +++++++++++++++++++++++----- zstd/src/encoding/mod.rs | 21 -------- 2 files changed, 67 insertions(+), 34 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index f223b820..12eb4ef3 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -30,8 +30,8 @@ const HC_CHAIN_LOG: usize = 19; const HC_SEARCH_DEPTH: usize = 16; const HC_MIN_MATCH_LEN: usize = 5; const HC_TARGET_LEN: usize = 48; -// Positions are stored as (abs_pos + 1) so that 0 is a safe empty sentinel -// that can never collide with any valid position, even at the 4 GiB boundary. +// Positions are stored as (relative_pos + 1) so that 0 is a safe empty +// sentinel that can never collide with any valid position. const HC_EMPTY: u32 = 0; // Maximum search depth across all HC-based levels. Used to size the @@ -1535,6 +1535,7 @@ struct HcMatchGenerator { history: Vec, history_start: usize, history_abs_start: usize, + position_base: usize, offset_hist: [u32; 3], hash_table: Vec, chain_table: Vec, @@ -1554,6 +1555,7 @@ impl HcMatchGenerator { history: Vec::new(), history_start: 0, history_abs_start: 0, + position_base: 0, offset_hist: [1, 4, 8], hash_table: Vec::new(), chain_table: Vec::new(), @@ -1583,6 +1585,7 @@ impl HcMatchGenerator { self.history.clear(); self.history_start = 0; self.history_abs_start = 0; + self.position_base = 0; self.offset_hist = [1, 4, 8]; if !self.hash_table.is_empty() { self.hash_table.fill(HC_EMPTY); @@ -1734,23 +1737,50 @@ impl HcMatchGenerator { ((value.wrapping_mul(PRIME)) >> (64 - self.hash_log)) as usize } + fn relative_position(&self, abs_pos: usize) -> Option { + let rel = abs_pos.checked_sub(self.position_base)?; + let rel_u32 = u32::try_from(rel).ok()?; + // Stored as (relative_pos + 1), so u32::MAX cannot be represented. + (rel_u32 < u32::MAX).then_some(rel_u32) + } + + fn maybe_rebase_positions(&mut self, abs_pos: usize) { + let needs_rebase = self + .relative_position(abs_pos) + .is_none_or(|relative| relative >= u32::MAX - 1); + if !needs_rebase { + return; + } + + // Keep all live history addressable after rebase. + self.position_base = self.history_abs_start; + self.hash_table.fill(HC_EMPTY); + self.chain_table.fill(HC_EMPTY); + + let history_start = self.history_abs_start; + let history_end = self.history_abs_end(); + for pos in history_start..history_end { + self.insert_position_no_rebase(pos); + } + } + fn insert_position(&mut self, abs_pos: usize) { + self.maybe_rebase_positions(abs_pos); + self.insert_position_no_rebase(abs_pos); + } + + fn insert_position_no_rebase(&mut self, abs_pos: usize) { let idx = abs_pos - self.history_abs_start; let concat = self.live_history(); if idx + 4 > concat.len() { return; } let hash = self.hash_position(&concat[idx..]); - // Store as (abs_pos + 1) so HC_EMPTY (0) never collides with a valid - // position. Guard on usize before cast to avoid silent u32 truncation. - // Streams >4 GiB stop inserting; matches degrade to repcodes-only. - // TODO(#51): rebase table positions to avoid 4 GiB cutoff - if abs_pos >= u32::MAX as usize { + let Some(relative_pos) = self.relative_position(abs_pos) else { return; - } - let pos_u32 = abs_pos as u32; - let stored = pos_u32 + 1; - let chain_idx = pos_u32 as usize & ((1 << self.chain_log) - 1); + }; + let stored = relative_pos + 1; + let chain_idx = relative_pos as usize & ((1 << self.chain_log) - 1); let prev = self.hash_table[hash]; self.chain_table[chain_idx] = prev; self.hash_table[hash] = stored; @@ -1788,8 +1818,9 @@ impl HcMatchGenerator { if cur == HC_EMPTY { break; } - let candidate_abs = cur.wrapping_sub(1) as usize; - let next = self.chain_table[candidate_abs & chain_mask]; + let candidate_rel = cur.wrapping_sub(1) as usize; + let candidate_abs = self.position_base + candidate_rel; + let next = self.chain_table[candidate_rel & chain_mask]; steps += 1; if next == cur { // Self-loop: two positions share chain_idx, stop to avoid @@ -2664,6 +2695,29 @@ fn prime_with_dictionary_budget_shrinks_after_hc_eviction() { ); } +#[test] +fn hc_rebases_positions_after_u32_boundary() { + let mut matcher = HcMatchGenerator::new(64); + matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {}); + matcher.ensure_tables(); + + // Simulate a long-running stream where absolute history positions crossed + // the u32 range. Before #51 this disabled HC inserts entirely. + matcher.history_abs_start = u32::MAX as usize + 64; + matcher.position_base = 0; + + matcher.skip_matching(); + + assert_eq!( + matcher.position_base, matcher.history_abs_start, + "rebase should anchor to the oldest live absolute position" + ); + assert!( + matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY), + "HC hash table should still be populated after crossing u32 boundary" + ); +} + #[test] fn suffix_store_with_single_slot_does_not_panic_on_keying() { let mut suffixes = SuffixStore::with_capacity(1); diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs index 1fa89360..5210ebf2 100644 --- a/zstd/src/encoding/mod.rs +++ b/zstd/src/encoding/mod.rs @@ -64,13 +64,6 @@ pub enum CompressionLevel { /// Uses the hash-chain matcher with a lazy2 matching strategy: the encoder /// evaluates up to two positions ahead before committing to a match, /// trading speed for a better compression ratio than [`CompressionLevel::Default`]. - /// - /// **Limitation:** hash-chain tables use 32-bit positions. For single-frame - /// inputs exceeding ~4 GiB, matches can still be found for roughly one - /// window past that point; once all in-window positions exceed `u32::MAX` - /// (≈4 GiB + window size), matching becomes effectively repcode-only. - /// Prefer [`CompressionLevel::Default`] for very large single-frame streams - /// until table rebasing is implemented. Better, /// This level is roughly equivalent to Zstd level 11. /// @@ -80,13 +73,6 @@ pub enum CompressionLevel { /// a deeper search (32 candidates vs 16), and a higher target match /// length (128 vs 48), trading speed for the best compression ratio /// available in this crate. - /// - /// **Limitation:** hash-chain tables use 32-bit positions. For single-frame - /// inputs exceeding ~4 GiB, matches can still be found for roughly one - /// window past that point; once all in-window positions exceed `u32::MAX` - /// (≈4 GiB + window size), matching becomes effectively repcode-only. - /// Prefer [`CompressionLevel::Default`] for very large single-frame - /// streams until table rebasing is implemented. Best, /// Numeric compression level. /// @@ -108,13 +94,6 @@ pub enum CompressionLevel { /// this crate has not yet implemented (btopt, btultra) are approximated /// with the closest available matcher. /// - /// **Limitation:** large hash-chain levels still use 32-bit positions. - /// For single-frame inputs exceeding ~4 GiB, matches can still be found - /// for roughly one window past that point; once all in-window positions - /// exceed `u32::MAX` (≈4 GiB + window size), matching becomes effectively - /// repcode-only. Prefer [`CompressionLevel::Default`] for very large - /// single-frame streams until table rebasing is implemented. - /// /// Semver note: this variant was added after the initial enum shape and /// is a breaking API change for downstream crates that exhaustively /// `match` on [`CompressionLevel`] without a wildcard arm. From f2c167c3c44366a5bef829d75cafacbad8033003 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Wed, 8 Apr 2026 12:24:24 +0300 Subject: [PATCH 2/5] test(encoding): avoid 32-bit overflow in hc rebase regression - make hc_rebases_positions_after_u32_boundary platform-safe - keep 64-bit >u32 scenario, use boundary path on 32-bit - prevent const arithmetic overflow on i686 CI --- zstd/src/encoding/match_generator.rs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 12eb4ef3..109f52b3 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -2700,18 +2700,24 @@ fn hc_rebases_positions_after_u32_boundary() { let mut matcher = HcMatchGenerator::new(64); matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {}); matcher.ensure_tables(); - - // Simulate a long-running stream where absolute history positions crossed - // the u32 range. Before #51 this disabled HC inserts entirely. - matcher.history_abs_start = u32::MAX as usize + 64; matcher.position_base = 0; + if usize::BITS > 32 { + // Simulate a long-running stream where absolute history positions + // crossed the u32 range. Before #51 this disabled HC inserts entirely. + matcher.history_abs_start = (u32::MAX as usize) + .checked_add(64) + .expect("64-bit usize should represent u32::MAX + 64"); + matcher.skip_matching(); + assert_eq!( + matcher.position_base, matcher.history_abs_start, + "rebase should anchor to the oldest live absolute position" + ); + } else { + // 32-bit targets cannot represent positions above u32::MAX. Verify + // that the rebase path still handles boundary insertion requests. + matcher.maybe_rebase_positions(u32::MAX as usize); + } - matcher.skip_matching(); - - assert_eq!( - matcher.position_base, matcher.history_abs_start, - "rebase should anchor to the oldest live absolute position" - ); assert!( matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY), "HC hash table should still be populated after crossing u32 boundary" From c1fe8c7212f4a7f00a3cfe080d3d8fb5a998dc52 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Wed, 8 Apr 2026 12:27:20 +0300 Subject: [PATCH 3/5] test(encoding): make hc u32-boundary regression 32-bit safe - avoid const overflow in i686 by using fallible conversion - clarify relative-position and chain-candidate storage comments --- zstd/src/encoding/match_generator.rs | 35 ++++++++++++++-------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 109f52b3..ec08b38e 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -1740,7 +1740,9 @@ impl HcMatchGenerator { fn relative_position(&self, abs_pos: usize) -> Option { let rel = abs_pos.checked_sub(self.position_base)?; let rel_u32 = u32::try_from(rel).ok()?; - // Stored as (relative_pos + 1), so u32::MAX cannot be represented. + // Positions are stored as (relative_pos + 1), with 0 reserved as the + // empty sentinel. So the raw relative position itself must stay + // strictly below u32::MAX. (rel_u32 < u32::MAX).then_some(rel_u32) } @@ -1808,7 +1810,8 @@ impl HcMatchGenerator { let mut filled = 0; // Follow chain up to search_depth valid candidates, skipping stale // entries (evicted from window) instead of stopping at them. - // Stored values are (abs_pos + 1); decode with wrapping_sub(1). + // Stored values are (relative_pos + 1); decode with wrapping_sub(1) + // and recover absolute position via position_base + relative. // Break on self-loops (masked chain_idx collision at periodicity). // Cap total steps at 4x search depth to bound time spent skipping // stale entries while still finding valid candidates deeper in chain. @@ -2701,22 +2704,18 @@ fn hc_rebases_positions_after_u32_boundary() { matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {}); matcher.ensure_tables(); matcher.position_base = 0; - if usize::BITS > 32 { - // Simulate a long-running stream where absolute history positions - // crossed the u32 range. Before #51 this disabled HC inserts entirely. - matcher.history_abs_start = (u32::MAX as usize) - .checked_add(64) - .expect("64-bit usize should represent u32::MAX + 64"); - matcher.skip_matching(); - assert_eq!( - matcher.position_base, matcher.history_abs_start, - "rebase should anchor to the oldest live absolute position" - ); - } else { - // 32-bit targets cannot represent positions above u32::MAX. Verify - // that the rebase path still handles boundary insertion requests. - matcher.maybe_rebase_positions(u32::MAX as usize); - } + let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() { + Ok(value) => value, + Err(_) => return, + }; + // Simulate a long-running stream where absolute history positions crossed + // the u32 range. Before #51 this disabled HC inserts entirely. + matcher.history_abs_start = history_abs_start; + matcher.skip_matching(); + assert_eq!( + matcher.position_base, matcher.history_abs_start, + "rebase should anchor to the oldest live absolute position" + ); assert!( matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY), From a9a14b013a5ec207237057abe938df3ad1f60fa7 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Wed, 8 Apr 2026 15:56:02 +0300 Subject: [PATCH 4/5] test(encoding): verify hc chain candidates after rebase - extend u32-boundary regression to assert chain_candidates returns valid entries - addresses CodeRabbit nitpick on post-rebase matchability --- zstd/src/encoding/match_generator.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index ec08b38e..6c4ecc4e 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -2721,6 +2721,14 @@ fn hc_rebases_positions_after_u32_boundary() { matcher.hash_table.iter().any(|entry| *entry != HC_EMPTY), "HC hash table should still be populated after crossing u32 boundary" ); + + // Verify rebasing preserves candidate lookup, not just table population. + let abs_pos = matcher.history_abs_start + 10; + let candidates = matcher.chain_candidates(abs_pos); + assert!( + candidates.iter().any(|candidate| *candidate != usize::MAX), + "chain_candidates should return valid matches after rebase" + ); } #[test] From cd89f8d40907a5ead6de641c1470946f459160ce Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Wed, 8 Apr 2026 16:12:24 +0300 Subject: [PATCH 5/5] fix(encoding): rebuild hc rebase tables only for seen prefix --- zstd/src/encoding/match_generator.rs | 45 ++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 6c4ecc4e..e95b4326 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -1760,8 +1760,9 @@ impl HcMatchGenerator { self.chain_table.fill(HC_EMPTY); let history_start = self.history_abs_start; - let history_end = self.history_abs_end(); - for pos in history_start..history_end { + // Rebuild only the already-inserted prefix. The caller inserts abs_pos + // immediately after this, and later positions are added in-order. + for pos in history_start..abs_pos { self.insert_position_no_rebase(pos); } } @@ -2731,6 +2732,46 @@ fn hc_rebases_positions_after_u32_boundary() { ); } +#[test] +fn hc_rebase_rebuilds_only_inserted_prefix() { + let mut matcher = HcMatchGenerator::new(64); + matcher.add_data(b"abcdeabcdeabcde".to_vec(), |_| {}); + matcher.ensure_tables(); + matcher.position_base = 0; + let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() { + Ok(value) => value, + Err(_) => return, + }; + matcher.history_abs_start = history_abs_start; + let abs_pos = matcher.history_abs_start + 6; + + let mut expected = HcMatchGenerator::new(64); + expected.add_data(b"abcdeabcdeabcde".to_vec(), |_| {}); + expected.ensure_tables(); + expected.history_abs_start = history_abs_start; + expected.position_base = expected.history_abs_start; + expected.hash_table.fill(HC_EMPTY); + expected.chain_table.fill(HC_EMPTY); + for pos in expected.history_abs_start..abs_pos { + expected.insert_position_no_rebase(pos); + } + + matcher.maybe_rebase_positions(abs_pos); + + assert_eq!( + matcher.position_base, matcher.history_abs_start, + "rebase should still anchor to the oldest live absolute position" + ); + assert_eq!( + matcher.hash_table, expected.hash_table, + "rebase must rebuild only positions already inserted before abs_pos" + ); + assert_eq!( + matcher.chain_table, expected.chain_table, + "future positions must not be pre-seeded into HC chains during rebase" + ); +} + #[test] fn suffix_store_with_single_slot_does_not_panic_on_keying() { let mut suffixes = SuffixStore::with_capacity(1);