From 7bc9969da3402c9910b883d64ffc360b34883555 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 26 Mar 2026 02:19:11 +0200 Subject: [PATCH 1/9] fix(encoding): implement default compression level - route CompressionLevel::Default through the encoder - configure matcher window for default-level frames - add roundtrip and ffi regression coverage Closes #5 --- zstd/src/encoding/frame_compressor.rs | 2 +- zstd/src/encoding/match_generator.rs | 13 ++++++++++++- zstd/src/encoding/mod.rs | 2 -- zstd/src/tests/roundtrip_integrity.rs | 15 +++++++++++++++ zstd/tests/cross_validation.rs | 8 ++++++++ 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index a50d392b..1fd1478c 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -222,7 +222,7 @@ impl FrameCompressor { header.serialize(output); output.extend_from_slice(&uncompressed_data); } - CompressionLevel::Fastest => { + CompressionLevel::Fastest | CompressionLevel::Default => { compress_fastest(&mut self.state, last_block, uncompressed_data, output) } _ => { diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 456867be..bf2ad4f9 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -13,6 +13,8 @@ use super::Matcher; use super::Sequence; const MIN_MATCH_LEN: usize = 5; +const DEFAULT_SLICE_SIZE: usize = 1024 * 128; +const DEFAULT_LEVEL_WINDOW_SIZE: usize = 1024 * 128 * 32; /// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible. pub struct MatchGeneratorDriver { @@ -33,10 +35,19 @@ impl MatchGeneratorDriver { slice_size, } } + + fn configure_for_level(&mut self, level: CompressionLevel) { + self.slice_size = DEFAULT_SLICE_SIZE; + self.match_generator.max_window_size = match level { + CompressionLevel::Default => DEFAULT_LEVEL_WINDOW_SIZE, + _ => self.slice_size, + }; + } } impl Matcher for MatchGeneratorDriver { - fn reset(&mut self, _level: CompressionLevel) { + fn reset(&mut self, level: CompressionLevel) { + self.configure_for_level(level); let vec_pool = &mut self.vec_pool; let suffix_pool = &mut self.suffix_pool; diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs index 17a4a6f0..09aa9b0f 100644 --- a/zstd/src/encoding/mod.rs +++ b/zstd/src/encoding/mod.rs @@ -53,8 +53,6 @@ pub enum CompressionLevel { /// This level is roughly equivalent to Zstd level 3, /// or the one used by the official compressor when no level /// is specified. - /// - /// UNIMPLEMENTED Default, /// This level is roughly equivalent to Zstd level 7. /// diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs index 7912fa10..bd1b6181 100644 --- a/zstd/src/tests/roundtrip_integrity.rs +++ b/zstd/src/tests/roundtrip_integrity.rs @@ -64,6 +64,15 @@ fn roundtrip_streaming(data: &[u8]) -> Vec { result } +/// Roundtrip using compress_to_vec with the default compression level. +fn roundtrip_default(data: &[u8]) -> Vec { + let compressed = compress_to_vec(data, CompressionLevel::Default); + let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); + let mut result = Vec::new(); + decoder.read_to_end(&mut result).unwrap(); + result +} + /// Generate data with limited alphabet for better Huffman compressibility /// but enough variety to avoid RLE path. fn generate_huffman_friendly(seed: u64, len: usize, alphabet_size: u8) -> Vec { @@ -337,3 +346,9 @@ fn roundtrip_reused_frame_compressor_across_frames() { "Second reused-frame roundtrip failed" ); } + +#[test] +fn roundtrip_default_level_regression() { + let data = generate_compressible(777, 64 * 1024); + assert_eq!(roundtrip_default(&data), data); +} diff --git a/zstd/tests/cross_validation.rs b/zstd/tests/cross_validation.rs index e60db61e..5743236a 100644 --- a/zstd/tests/cross_validation.rs +++ b/zstd/tests/cross_validation.rs @@ -196,3 +196,11 @@ fn cross_ffi_compress_rust_decompress_repeat_offsets() { "ffi→rust multi-block repeat offset roundtrip failed" ); } + +#[test] +fn cross_rust_default_compress_ffi_decompress_regression() { + let data = generate_huffman_friendly(900, 64 * 1024, 32); + let compressed = compress_to_vec(&data[..], CompressionLevel::Default); + let result = zstd::decode_all(compressed.as_slice()).unwrap(); + assert_eq!(data, result, "rust default→ffi roundtrip failed"); +} From 6e659becbd02e91af26a45da23a3701bf2b33786 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 26 Mar 2026 08:59:53 +0200 Subject: [PATCH 2/9] fix(encoding): tighten default matcher review follow-ups - preserve matcher constructor baselines across reset - document Default as matching Fastest behavior today - add multi-block default roundtrip regression Closes #5 --- zstd/src/encoding/match_generator.rs | 27 +++++++++++++++++---------- zstd/src/encoding/mod.rs | 9 ++++++--- zstd/src/tests/roundtrip_integrity.rs | 6 ++++++ 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index bf2ad4f9..14f74e8f 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -13,8 +13,6 @@ use super::Matcher; use super::Sequence; const MIN_MATCH_LEN: usize = 5; -const DEFAULT_SLICE_SIZE: usize = 1024 * 128; -const DEFAULT_LEVEL_WINDOW_SIZE: usize = 1024 * 128 * 32; /// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible. pub struct MatchGeneratorDriver { @@ -22,32 +20,41 @@ pub struct MatchGeneratorDriver { suffix_pool: Vec, match_generator: MatchGenerator, slice_size: usize, + base_slice_size: usize, + base_window_size: usize, } impl MatchGeneratorDriver { /// slice_size says how big the slices should be that are allocated to work with /// max_slices_in_window says how many slices should at most be used while looking for matches pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self { + let max_window_size = max_slices_in_window * slice_size; Self { vec_pool: Vec::new(), suffix_pool: Vec::new(), - match_generator: MatchGenerator::new(max_slices_in_window * slice_size), + match_generator: MatchGenerator::new(max_window_size), slice_size, + base_slice_size: slice_size, + base_window_size: max_window_size, } } - fn configure_for_level(&mut self, level: CompressionLevel) { - self.slice_size = DEFAULT_SLICE_SIZE; - self.match_generator.max_window_size = match level { - CompressionLevel::Default => DEFAULT_LEVEL_WINDOW_SIZE, - _ => self.slice_size, - }; + fn level_config(&self, level: CompressionLevel) -> (usize, usize) { + match level { + CompressionLevel::Uncompressed => (self.base_slice_size, self.base_window_size), + CompressionLevel::Fastest => (self.base_slice_size, self.base_window_size), + CompressionLevel::Default => (self.base_slice_size, self.base_window_size), + CompressionLevel::Better => (self.base_slice_size, self.base_window_size), + CompressionLevel::Best => (self.base_slice_size, self.base_window_size), + } } } impl Matcher for MatchGeneratorDriver { fn reset(&mut self, level: CompressionLevel) { - self.configure_for_level(level); + let (slice_size, max_window_size) = self.level_config(level); + self.slice_size = slice_size; + self.match_generator.max_window_size = max_window_size; let vec_pool = &mut self.vec_pool; let suffix_pool = &mut self.suffix_pool; diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs index 09aa9b0f..10ee3577 100644 --- a/zstd/src/encoding/mod.rs +++ b/zstd/src/encoding/mod.rs @@ -50,9 +50,12 @@ pub enum CompressionLevel { Uncompressed, /// This level is roughly equivalent to Zstd compression level 1 Fastest, - /// This level is roughly equivalent to Zstd level 3, - /// or the one used by the official compressor when no level - /// is specified. + /// This level currently uses the same encoder path and matcher + /// configuration as [`CompressionLevel::Fastest`]. + /// + /// It represents this crate's "default" compression setting and may + /// change in future versions to reflect a different tradeoff between + /// speed and compression ratio. Default, /// This level is roughly equivalent to Zstd level 7. /// diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs index bd1b6181..6e1f7df9 100644 --- a/zstd/src/tests/roundtrip_integrity.rs +++ b/zstd/src/tests/roundtrip_integrity.rs @@ -352,3 +352,9 @@ fn roundtrip_default_level_regression() { let data = generate_compressible(777, 64 * 1024); assert_eq!(roundtrip_default(&data), data); } + +#[test] +fn roundtrip_default_level_multi_block_regression() { + let data = generate_compressible(1337, 512 * 1024); + assert_eq!(roundtrip_default(&data), data); +} From e302fa2dcfa8fcf995c26f383c11cb8a4df577a4 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 26 Mar 2026 09:19:12 +0200 Subject: [PATCH 3/9] docs(encoding): clarify explicit level config arms - explain why level_config keeps explicit match arms - document that current levels share the baseline matcher config Closes #5 --- zstd/src/encoding/match_generator.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 14f74e8f..46a64aa7 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -40,6 +40,8 @@ impl MatchGeneratorDriver { } fn level_config(&self, level: CompressionLevel) -> (usize, usize) { + // All levels currently use the same baseline configuration. + // Explicit arms keep future compression-level changes intentional. match level { CompressionLevel::Uncompressed => (self.base_slice_size, self.base_window_size), CompressionLevel::Fastest => (self.base_slice_size, self.base_window_size), From f4bdbc2c7a7daadf32db0b601364d96584f24587 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 26 Mar 2026 17:42:40 +0200 Subject: [PATCH 4/9] feat(encoding): implement dfast default level --- zstd/src/encoding/blocks/compressed.rs | 17 +- zstd/src/encoding/match_generator.rs | 499 +++++++++++++++++++++++-- zstd/tests/cross_validation.rs | 28 ++ 3 files changed, 513 insertions(+), 31 deletions(-) diff --git a/zstd/src/encoding/blocks/compressed.rs b/zstd/src/encoding/blocks/compressed.rs index 54fae524..bcd09d6a 100644 --- a/zstd/src/encoding/blocks/compressed.rs +++ b/zstd/src/encoding/blocks/compressed.rs @@ -437,7 +437,7 @@ fn encode_match_len(len: u32) -> (u8, u32, usize) { 8195..=16386 => (49, len - 8195, 13), 16387..=32770 => (50, len - 16387, 14), 32771..=65538 => (51, len - 32771, 15), - 65539..=131074 => (52, len - 32771, 16), + 65539..=131074 => (52, len - 65539, 16), 131075.. => unreachable!(), } } @@ -446,7 +446,11 @@ fn encode_match_len(len: u32) -> (u8, u32, usize) { /// history per RFC 8878 §3.1.2.5. Updates `offset_hist` in place. /// /// Encoded offset codes: 1/2/3 = repeat offsets, N+3 = new absolute offset N. -fn encode_offset_with_history(actual_offset: u32, lit_len: u32, offset_hist: &mut [u32; 3]) -> u32 { +pub(crate) fn encode_offset_with_history( + actual_offset: u32, + lit_len: u32, + offset_hist: &mut [u32; 3], +) -> u32 { let encoded = if lit_len > 0 { if actual_offset == offset_hist[0] { 1 @@ -603,7 +607,7 @@ mod tests { use alloc::boxed::Box; use super::{ - FseTableMode, choose_table, encode_offset_with_history, previous_table, + FseTableMode, choose_table, encode_match_len, encode_offset_with_history, previous_table, remember_last_used_tables, }; use crate::encoding::frame_compressor::{FseTables, PreviousFseTable}; @@ -645,6 +649,13 @@ mod tests { assert_eq!(hist, [9, 10, 20]); } + #[test] + fn encode_match_len_uses_correct_upper_range_base() { + assert_eq!(encode_match_len(65539), (52, 0, 16)); + assert_eq!(encode_match_len(65540), (52, 1, 16)); + assert_eq!(encode_match_len(131074), (52, 65535, 16)); + } + #[test] fn remember_last_used_tables_keeps_predefined_and_repeat_modes() { let mut fse_tables = FseTables::new(); diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 46a64aa7..2bcc007b 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -6,19 +6,35 @@ //! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data. use alloc::vec::Vec; +use core::convert::TryInto; use core::num::NonZeroUsize; use super::CompressionLevel; use super::Matcher; use super::Sequence; +use super::blocks::encode_offset_with_history; const MIN_MATCH_LEN: usize = 5; +const DFAST_MIN_MATCH_LEN: usize = 6; +const DFAST_TARGET_LEN: usize = 48; +const DFAST_HASH_BITS: usize = 20; +const DFAST_SEARCH_DEPTH: usize = 4; +const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22; +const DFAST_EMPTY_SLOT: usize = usize::MAX; + +#[derive(Copy, Clone)] +enum MatcherBackend { + Simple, + Dfast, +} /// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible. pub struct MatchGeneratorDriver { vec_pool: Vec>, suffix_pool: Vec, match_generator: MatchGenerator, + dfast_match_generator: DfastMatchGenerator, + active_backend: MatcherBackend, slice_size: usize, base_slice_size: usize, base_window_size: usize, @@ -33,30 +49,50 @@ impl MatchGeneratorDriver { vec_pool: Vec::new(), suffix_pool: Vec::new(), match_generator: MatchGenerator::new(max_window_size), + dfast_match_generator: DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE), + active_backend: MatcherBackend::Simple, slice_size, base_slice_size: slice_size, base_window_size: max_window_size, } } - fn level_config(&self, level: CompressionLevel) -> (usize, usize) { - // All levels currently use the same baseline configuration. - // Explicit arms keep future compression-level changes intentional. + fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize) { match level { - CompressionLevel::Uncompressed => (self.base_slice_size, self.base_window_size), - CompressionLevel::Fastest => (self.base_slice_size, self.base_window_size), - CompressionLevel::Default => (self.base_slice_size, self.base_window_size), - CompressionLevel::Better => (self.base_slice_size, self.base_window_size), - CompressionLevel::Best => (self.base_slice_size, self.base_window_size), + CompressionLevel::Uncompressed => ( + MatcherBackend::Simple, + self.base_slice_size, + self.base_window_size, + ), + CompressionLevel::Fastest => ( + MatcherBackend::Simple, + self.base_slice_size, + self.base_window_size, + ), + CompressionLevel::Default => ( + MatcherBackend::Dfast, + self.base_slice_size, + DFAST_DEFAULT_WINDOW_SIZE, + ), + CompressionLevel::Better => ( + MatcherBackend::Simple, + self.base_slice_size, + self.base_window_size, + ), + CompressionLevel::Best => ( + MatcherBackend::Simple, + self.base_slice_size, + self.base_window_size, + ), } } } impl Matcher for MatchGeneratorDriver { fn reset(&mut self, level: CompressionLevel) { - let (slice_size, max_window_size) = self.level_config(level); + let (backend, slice_size, max_window_size) = self.level_config(level); + self.active_backend = backend; self.slice_size = slice_size; - self.match_generator.max_window_size = max_window_size; let vec_pool = &mut self.vec_pool; let suffix_pool = &mut self.suffix_pool; @@ -67,10 +103,19 @@ impl Matcher for MatchGeneratorDriver { suffixes.slots.resize(suffixes.slots.capacity(), None); suffix_pool.push(suffixes); }); + self.match_generator.max_window_size = max_window_size; + self.dfast_match_generator.max_window_size = max_window_size; + self.dfast_match_generator.reset(|mut data| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + }); } fn window_size(&self) -> u64 { - self.match_generator.max_window_size as u64 + match self.active_backend { + MatcherBackend::Simple => self.match_generator.max_window_size as u64, + MatcherBackend::Dfast => self.dfast_match_generator.max_window_size as u64, + } } fn get_next_space(&mut self) -> Vec { @@ -82,31 +127,55 @@ impl Matcher for MatchGeneratorDriver { } fn get_last_space(&mut self) -> &[u8] { - self.match_generator.window.last().unwrap().data.as_slice() + match self.active_backend { + MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(), + MatcherBackend::Dfast => self.dfast_match_generator.get_last_space(), + } } fn commit_space(&mut self, space: Vec) { - let vec_pool = &mut self.vec_pool; - let suffixes = self - .suffix_pool - .pop() - .unwrap_or_else(|| SuffixStore::with_capacity(space.len())); - let suffix_pool = &mut self.suffix_pool; - self.match_generator - .add_data(space, suffixes, |mut data, mut suffixes| { - data.resize(data.capacity(), 0); - vec_pool.push(data); - suffixes.slots.clear(); - suffixes.slots.resize(suffixes.slots.capacity(), None); - suffix_pool.push(suffixes); - }); + match self.active_backend { + MatcherBackend::Simple => { + let vec_pool = &mut self.vec_pool; + let suffixes = self + .suffix_pool + .pop() + .unwrap_or_else(|| SuffixStore::with_capacity(space.len())); + let suffix_pool = &mut self.suffix_pool; + self.match_generator + .add_data(space, suffixes, |mut data, mut suffixes| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + suffixes.slots.clear(); + suffixes.slots.resize(suffixes.slots.capacity(), None); + suffix_pool.push(suffixes); + }); + } + MatcherBackend::Dfast => { + let vec_pool = &mut self.vec_pool; + self.dfast_match_generator.add_data(space, |mut data| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + }); + } + } } fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) { - while self.match_generator.next_sequence(&mut handle_sequence) {} + match self.active_backend { + MatcherBackend::Simple => { + while self.match_generator.next_sequence(&mut handle_sequence) {} + } + MatcherBackend::Dfast => self + .dfast_match_generator + .start_matching(&mut handle_sequence), + } } fn skip_matching(&mut self) { - self.match_generator.skip_matching(); + match self.active_backend { + MatcherBackend::Simple => self.match_generator.skip_matching(), + MatcherBackend::Dfast => self.dfast_match_generator.skip_matching(), + } } } @@ -417,6 +486,345 @@ impl MatchGenerator { } } +struct DfastMatchGenerator { + max_window_size: usize, + window: Vec>, + window_size: usize, + offset_hist: [u32; 3], + short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>, + long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>, +} + +#[derive(Copy, Clone)] +struct MatchCandidate { + start: usize, + offset: usize, + match_len: usize, +} + +impl DfastMatchGenerator { + fn new(max_window_size: usize) -> Self { + Self { + max_window_size, + window: Vec::new(), + window_size: 0, + offset_hist: [1, 4, 8], + short_hash: alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS], + long_hash: alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS], + } + } + + fn reset(&mut self, mut reuse_space: impl FnMut(Vec)) { + self.window_size = 0; + self.offset_hist = [1, 4, 8]; + self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); + self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); + for mut data in self.window.drain(..) { + data.resize(data.capacity(), 0); + reuse_space(data); + } + } + + fn get_last_space(&self) -> &[u8] { + self.window.last().unwrap().as_slice() + } + + fn add_data(&mut self, data: Vec, mut reuse_space: impl FnMut(Vec)) { + assert!(data.len() <= self.max_window_size); + while self.window_size + data.len() > self.max_window_size { + let mut removed = self.window.remove(0); + self.window_size -= removed.len(); + removed.resize(removed.capacity(), 0); + reuse_space(removed); + } + self.window_size += data.len(); + self.window.push(data); + } + + fn skip_matching(&mut self) {} + + fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) { + let current_len = self.window.last().unwrap().len(); + if current_len == 0 { + return; + } + + let total_len: usize = self.window.iter().map(Vec::len).sum(); + let current_abs_start = total_len - current_len; + let mut concat = Vec::with_capacity(total_len); + for block in &self.window { + concat.extend_from_slice(block); + } + + self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); + self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); + self.seed_history_tables(&concat, current_abs_start); + + let mut pos = 0usize; + let mut literals_start = 0usize; + while pos + DFAST_MIN_MATCH_LEN <= current_len { + let abs_pos = current_abs_start + pos; + let lit_len = pos - literals_start; + + let best = self.best_match(&concat, current_abs_start, abs_pos, lit_len); + if let Some(candidate) = + self.pick_lazy_match(&concat, current_abs_start, abs_pos, lit_len, best) + { + self.insert_positions(&concat, abs_pos, candidate.start + candidate.match_len); + let current = self.window.last().unwrap().as_slice(); + let start = candidate.start - current_abs_start; + let literals = ¤t[literals_start..start]; + handle_sequence(Sequence::Triple { + literals, + offset: candidate.offset, + match_len: candidate.match_len, + }); + let _ = encode_offset_with_history( + candidate.offset as u32, + literals.len() as u32, + &mut self.offset_hist, + ); + pos = start + candidate.match_len; + literals_start = pos; + } else { + self.insert_position(&concat, abs_pos); + pos += 1; + } + } + + if literals_start < current_len { + let current = self.window.last().unwrap().as_slice(); + handle_sequence(Sequence::Literals { + literals: ¤t[literals_start..], + }); + } + } + + fn seed_history_tables(&mut self, concat: &[u8], current_abs_start: usize) { + if current_abs_start < 4 { + return; + } + for pos in 0..current_abs_start { + self.insert_position(concat, pos); + } + } + + fn best_match( + &self, + concat: &[u8], + current_abs_start: usize, + abs_pos: usize, + lit_len: usize, + ) -> Option { + let rep = self.repcode_candidate(concat, current_abs_start, abs_pos, lit_len); + let hash = self.hash_candidate(concat, current_abs_start, abs_pos, lit_len); + Self::better_candidate(rep, hash) + } + + fn pick_lazy_match( + &self, + concat: &[u8], + current_abs_start: usize, + abs_pos: usize, + lit_len: usize, + best: Option, + ) -> Option { + let best = best?; + if best.match_len >= DFAST_TARGET_LEN || abs_pos + 1 + DFAST_MIN_MATCH_LEN > concat.len() { + return Some(best); + } + + let next = self.best_match(concat, current_abs_start, abs_pos + 1, lit_len + 1); + match next { + Some(next) + if next.match_len > best.match_len + || (next.match_len == best.match_len && next.offset < best.offset) => + { + None + } + _ => Some(best), + } + } + + fn repcode_candidate( + &self, + concat: &[u8], + _current_abs_start: usize, + abs_pos: usize, + lit_len: usize, + ) -> Option { + let reps = if lit_len == 0 { + [ + Some(self.offset_hist[1] as usize), + Some(self.offset_hist[2] as usize), + (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize), + ] + } else { + [ + Some(self.offset_hist[0] as usize), + Some(self.offset_hist[1] as usize), + Some(self.offset_hist[2] as usize), + ] + }; + + let mut best = None; + for rep in reps.into_iter().flatten() { + if rep == 0 || rep > abs_pos { + continue; + } + let candidate_pos = abs_pos - rep; + if candidate_pos + DFAST_MIN_MATCH_LEN > concat.len() { + continue; + } + let match_len = + MatchGenerator::common_prefix_len(&concat[candidate_pos..], &concat[abs_pos..]); + if match_len >= DFAST_MIN_MATCH_LEN { + let candidate = + self.extend_backwards(concat, candidate_pos, abs_pos, match_len, lit_len); + best = Self::better_candidate(best, Some(candidate)); + } + } + best + } + + fn hash_candidate( + &self, + concat: &[u8], + _current_abs_start: usize, + abs_pos: usize, + lit_len: usize, + ) -> Option { + let mut best = None; + for candidate_pos in self.long_candidates(concat, abs_pos) { + let match_len = + MatchGenerator::common_prefix_len(&concat[candidate_pos..], &concat[abs_pos..]); + if match_len >= DFAST_MIN_MATCH_LEN { + let candidate = + self.extend_backwards(concat, candidate_pos, abs_pos, match_len, lit_len); + best = Self::better_candidate(best, Some(candidate)); + if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) { + return best; + } + } + } + + for candidate_pos in self.short_candidates(concat, abs_pos) { + if candidate_pos >= abs_pos { + continue; + } + let match_len = + MatchGenerator::common_prefix_len(&concat[candidate_pos..], &concat[abs_pos..]); + if match_len >= DFAST_MIN_MATCH_LEN { + let candidate = + self.extend_backwards(concat, candidate_pos, abs_pos, match_len, lit_len); + best = Self::better_candidate(best, Some(candidate)); + if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) { + return best; + } + } + } + best + } + + fn extend_backwards( + &self, + concat: &[u8], + mut candidate_pos: usize, + mut abs_pos: usize, + mut match_len: usize, + lit_len: usize, + ) -> MatchCandidate { + let min_abs_pos = abs_pos - lit_len; + while abs_pos > min_abs_pos + && candidate_pos > 0 + && concat[candidate_pos - 1] == concat[abs_pos - 1] + { + candidate_pos -= 1; + abs_pos -= 1; + match_len += 1; + } + MatchCandidate { + start: abs_pos, + offset: abs_pos - candidate_pos, + match_len, + } + } + + fn better_candidate( + lhs: Option, + rhs: Option, + ) -> Option { + match (lhs, rhs) { + (None, other) | (other, None) => other, + (Some(lhs), Some(rhs)) => { + if rhs.match_len > lhs.match_len + || (rhs.match_len == lhs.match_len && rhs.offset < lhs.offset) + { + Some(rhs) + } else { + Some(lhs) + } + } + } + } + + fn insert_positions(&mut self, concat: &[u8], start: usize, end: usize) { + for pos in start..end { + self.insert_position(concat, pos); + } + } + + fn insert_position(&mut self, concat: &[u8], pos: usize) { + if pos + 4 <= concat.len() { + let short = Self::hash4(&concat[pos..]); + let bucket = &mut self.short_hash[short]; + if bucket[0] != pos { + bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1); + bucket[0] = pos; + } + } + if pos + 8 <= concat.len() { + let long = Self::hash8(&concat[pos..]); + let bucket = &mut self.long_hash[long]; + if bucket[0] != pos { + bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1); + bucket[0] = pos; + } + } + } + + fn short_candidates(&self, concat: &[u8], pos: usize) -> impl Iterator + '_ { + (pos + 4 <= concat.len()) + .then(|| self.short_hash[Self::hash4(&concat[pos..])]) + .into_iter() + .flatten() + .filter(|candidate| *candidate != DFAST_EMPTY_SLOT) + } + + fn long_candidates(&self, concat: &[u8], pos: usize) -> impl Iterator + '_ { + (pos + 8 <= concat.len()) + .then(|| self.long_hash[Self::hash8(&concat[pos..])]) + .into_iter() + .flatten() + .filter(|candidate| *candidate != DFAST_EMPTY_SLOT) + } + + fn hash4(data: &[u8]) -> usize { + let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64; + Self::hash_bits(value) + } + + fn hash8(data: &[u8]) -> usize { + let value = u64::from_le_bytes(data[..8].try_into().unwrap()); + Self::hash_bits(value) + } + + fn hash_bits(value: u64) -> usize { + const PRIME: u64 = 0x9E37_79B1_85EB_CA87; + ((value.wrapping_mul(PRIME)) >> (64 - DFAST_HASH_BITS)) as usize + } +} + #[test] fn matches() { let mut matcher = MatchGenerator::new(1000); @@ -639,3 +1047,38 @@ fn matches() { assert_eq!(reconstructed, original_data); } + +#[test] +fn dfast_matches_roundtrip_multi_block_pattern() { + let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251]; + let first_block: Vec = pattern.iter().copied().cycle().take(128 * 1024).collect(); + let second_block: Vec = pattern.iter().copied().cycle().take(128 * 1024).collect(); + + let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE); + let replay_sequence = |decoded: &mut Vec, seq: Sequence<'_>| match seq { + Sequence::Literals { literals } => decoded.extend_from_slice(literals), + Sequence::Triple { + literals, + offset, + match_len, + } => { + decoded.extend_from_slice(literals); + let start = decoded.len() - offset; + for i in 0..match_len { + let byte = decoded[start + i]; + decoded.push(byte); + } + } + }; + + matcher.add_data(first_block.clone(), |_| {}); + let mut history = Vec::new(); + matcher.start_matching(|seq| replay_sequence(&mut history, seq)); + assert_eq!(history, first_block); + + matcher.add_data(second_block.clone(), |_| {}); + let prefix_len = history.len(); + matcher.start_matching(|seq| replay_sequence(&mut history, seq)); + + assert_eq!(&history[prefix_len..], second_block.as_slice()); +} diff --git a/zstd/tests/cross_validation.rs b/zstd/tests/cross_validation.rs index 5743236a..5c8aa3f4 100644 --- a/zstd/tests/cross_validation.rs +++ b/zstd/tests/cross_validation.rs @@ -204,3 +204,31 @@ fn cross_rust_default_compress_ffi_decompress_regression() { let result = zstd::decode_all(compressed.as_slice()).unwrap(); assert_eq!(data, result, "rust default→ffi roundtrip failed"); } + +#[test] +fn default_level_beats_fastest_on_corpus_proxy() { + let data = include_bytes!("../decodecorpus_files/z000033"); + let fastest = compress_to_vec(data.as_slice(), CompressionLevel::Fastest); + let default = compress_to_vec(data.as_slice(), CompressionLevel::Default); + + assert!( + default.len() < fastest.len(), + "Default should compress better than Fastest on corpus proxy. default={} fastest={}", + default.len(), + fastest.len() + ); +} + +#[test] +fn default_level_stays_within_ten_percent_of_ffi_level3_on_corpus_proxy() { + let data = include_bytes!("../decodecorpus_files/z000033"); + let default = compress_to_vec(data.as_slice(), CompressionLevel::Default); + let ffi_level3 = zstd::encode_all(data.as_slice(), 3).unwrap(); + + assert!( + (default.len() as u64) * 10 <= (ffi_level3.len() as u64) * 11, + "Default should stay within 10% of zstd level 3 on corpus proxy. default={} ffi_l3={}", + default.len(), + ffi_level3.len() + ); +} From 09855af8137afddd0e067d0057ec4c3a3fbfc246 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 26 Mar 2026 18:27:14 +0200 Subject: [PATCH 5/9] perf(encoding): incrementalize dfast matcher --- zstd/src/encoding/match_generator.rs | 312 +++++++++++++++++---------- zstd/src/encoding/mod.rs | 8 +- zstd/tests/cross_validation.rs | 4 + 3 files changed, 205 insertions(+), 119 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 2bcc007b..01eddae1 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -5,6 +5,7 @@ //! //! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data. +use alloc::collections::VecDeque; use alloc::vec::Vec; use core::convert::TryInto; use core::num::NonZeroUsize; @@ -22,7 +23,7 @@ const DFAST_SEARCH_DEPTH: usize = 4; const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22; const DFAST_EMPTY_SLOT: usize = usize::MAX; -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq)] enum MatcherBackend { Simple, Dfast, @@ -33,7 +34,7 @@ pub struct MatchGeneratorDriver { vec_pool: Vec>, suffix_pool: Vec, match_generator: MatchGenerator, - dfast_match_generator: DfastMatchGenerator, + dfast_match_generator: Option, active_backend: MatcherBackend, slice_size: usize, base_slice_size: usize, @@ -49,7 +50,7 @@ impl MatchGeneratorDriver { vec_pool: Vec::new(), suffix_pool: Vec::new(), match_generator: MatchGenerator::new(max_window_size), - dfast_match_generator: DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE), + dfast_match_generator: None, active_backend: MatcherBackend::Simple, slice_size, base_slice_size: slice_size, @@ -91,30 +92,66 @@ impl MatchGeneratorDriver { impl Matcher for MatchGeneratorDriver { fn reset(&mut self, level: CompressionLevel) { let (backend, slice_size, max_window_size) = self.level_config(level); + if self.active_backend != backend { + match self.active_backend { + MatcherBackend::Simple => { + let vec_pool = &mut self.vec_pool; + let suffix_pool = &mut self.suffix_pool; + self.match_generator.reset(|mut data, mut suffixes| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + suffixes.slots.clear(); + suffixes.slots.resize(suffixes.slots.capacity(), None); + suffix_pool.push(suffixes); + }); + } + MatcherBackend::Dfast => { + if let Some(dfast) = self.dfast_match_generator.as_mut() { + let vec_pool = &mut self.vec_pool; + dfast.reset(|mut data| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + }); + } + } + } + } + self.active_backend = backend; self.slice_size = slice_size; - let vec_pool = &mut self.vec_pool; - let suffix_pool = &mut self.suffix_pool; - - self.match_generator.reset(|mut data, mut suffixes| { - data.resize(data.capacity(), 0); - vec_pool.push(data); - suffixes.slots.clear(); - suffixes.slots.resize(suffixes.slots.capacity(), None); - suffix_pool.push(suffixes); - }); - self.match_generator.max_window_size = max_window_size; - self.dfast_match_generator.max_window_size = max_window_size; - self.dfast_match_generator.reset(|mut data| { - data.resize(data.capacity(), 0); - vec_pool.push(data); - }); + match self.active_backend { + MatcherBackend::Simple => { + let vec_pool = &mut self.vec_pool; + let suffix_pool = &mut self.suffix_pool; + self.match_generator.max_window_size = max_window_size; + self.match_generator.reset(|mut data, mut suffixes| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + suffixes.slots.clear(); + suffixes.slots.resize(suffixes.slots.capacity(), None); + suffix_pool.push(suffixes); + }); + } + MatcherBackend::Dfast => { + let dfast = self + .dfast_match_generator + .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size)); + dfast.max_window_size = max_window_size; + let vec_pool = &mut self.vec_pool; + dfast.reset(|mut data| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + }); + } + } } fn window_size(&self) -> u64 { match self.active_backend { MatcherBackend::Simple => self.match_generator.max_window_size as u64, - MatcherBackend::Dfast => self.dfast_match_generator.max_window_size as u64, + MatcherBackend::Dfast => { + self.dfast_match_generator.as_ref().unwrap().max_window_size as u64 + } } } @@ -129,7 +166,11 @@ impl Matcher for MatchGeneratorDriver { fn get_last_space(&mut self) -> &[u8] { match self.active_backend { MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(), - MatcherBackend::Dfast => self.dfast_match_generator.get_last_space(), + MatcherBackend::Dfast => self + .dfast_match_generator + .as_ref() + .unwrap() + .get_last_space(), } } @@ -153,10 +194,13 @@ impl Matcher for MatchGeneratorDriver { } MatcherBackend::Dfast => { let vec_pool = &mut self.vec_pool; - self.dfast_match_generator.add_data(space, |mut data| { - data.resize(data.capacity(), 0); - vec_pool.push(data); - }); + self.dfast_match_generator + .as_mut() + .unwrap() + .add_data(space, |mut data| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + }); } } } @@ -168,13 +212,15 @@ impl Matcher for MatchGeneratorDriver { } MatcherBackend::Dfast => self .dfast_match_generator + .as_mut() + .unwrap() .start_matching(&mut handle_sequence), } } fn skip_matching(&mut self) { match self.active_backend { MatcherBackend::Simple => self.match_generator.skip_matching(), - MatcherBackend::Dfast => self.dfast_match_generator.skip_matching(), + MatcherBackend::Dfast => self.dfast_match_generator.as_mut().unwrap().skip_matching(), } } } @@ -488,8 +534,11 @@ impl MatchGenerator { struct DfastMatchGenerator { max_window_size: usize, - window: Vec>, + window: VecDeque>, window_size: usize, + history: Vec, + history_start: usize, + history_abs_start: usize, offset_hist: [u32; 3], short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>, long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>, @@ -506,19 +555,27 @@ impl DfastMatchGenerator { fn new(max_window_size: usize) -> Self { Self { max_window_size, - window: Vec::new(), + window: VecDeque::new(), window_size: 0, + history: Vec::new(), + history_start: 0, + history_abs_start: 0, offset_hist: [1, 4, 8], - short_hash: alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS], - long_hash: alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS], + short_hash: Vec::new(), + long_hash: Vec::new(), } } fn reset(&mut self, mut reuse_space: impl FnMut(Vec)) { self.window_size = 0; + self.history.clear(); + self.history_start = 0; + self.history_abs_start = 0; self.offset_hist = [1, 4, 8]; - self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); - self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); + if !self.short_hash.is_empty() { + self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); + self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); + } for mut data in self.window.drain(..) { data.resize(data.capacity(), 0); reuse_space(data); @@ -526,39 +583,41 @@ impl DfastMatchGenerator { } fn get_last_space(&self) -> &[u8] { - self.window.last().unwrap().as_slice() + self.window.back().unwrap().as_slice() } fn add_data(&mut self, data: Vec, mut reuse_space: impl FnMut(Vec)) { assert!(data.len() <= self.max_window_size); while self.window_size + data.len() > self.max_window_size { - let mut removed = self.window.remove(0); + let mut removed = self.window.pop_front().unwrap(); self.window_size -= removed.len(); + self.history_start += removed.len(); + self.history_abs_start += removed.len(); removed.resize(removed.capacity(), 0); reuse_space(removed); } + self.compact_history(); + self.history.extend_from_slice(&data); self.window_size += data.len(); - self.window.push(data); + self.window.push_back(data); } - fn skip_matching(&mut self) {} + fn skip_matching(&mut self) { + self.ensure_hash_tables(); + let current_len = self.window.back().unwrap().len(); + let current_abs_start = self.history_abs_start + self.window_size - current_len; + self.insert_positions(current_abs_start, current_abs_start + current_len); + } fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) { - let current_len = self.window.last().unwrap().len(); + self.ensure_hash_tables(); + + let current_len = self.window.back().unwrap().len(); if current_len == 0 { return; } - let total_len: usize = self.window.iter().map(Vec::len).sum(); - let current_abs_start = total_len - current_len; - let mut concat = Vec::with_capacity(total_len); - for block in &self.window { - concat.extend_from_slice(block); - } - - self.short_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); - self.long_hash.fill([DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]); - self.seed_history_tables(&concat, current_abs_start); + let current_abs_start = self.history_abs_start + self.window_size - current_len; let mut pos = 0usize; let mut literals_start = 0usize; @@ -566,12 +625,10 @@ impl DfastMatchGenerator { let abs_pos = current_abs_start + pos; let lit_len = pos - literals_start; - let best = self.best_match(&concat, current_abs_start, abs_pos, lit_len); - if let Some(candidate) = - self.pick_lazy_match(&concat, current_abs_start, abs_pos, lit_len, best) - { - self.insert_positions(&concat, abs_pos, candidate.start + candidate.match_len); - let current = self.window.last().unwrap().as_slice(); + let best = self.best_match(abs_pos, lit_len); + if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) { + self.insert_positions(abs_pos, candidate.start + candidate.match_len); + let current = self.window.back().unwrap().as_slice(); let start = candidate.start - current_abs_start; let literals = ¤t[literals_start..start]; handle_sequence(Sequence::Triple { @@ -587,54 +644,68 @@ impl DfastMatchGenerator { pos = start + candidate.match_len; literals_start = pos; } else { - self.insert_position(&concat, abs_pos); + self.insert_position(abs_pos); pos += 1; } } if literals_start < current_len { - let current = self.window.last().unwrap().as_slice(); + let current = self.window.back().unwrap().as_slice(); handle_sequence(Sequence::Literals { literals: ¤t[literals_start..], }); } } - fn seed_history_tables(&mut self, concat: &[u8], current_abs_start: usize) { - if current_abs_start < 4 { + fn ensure_hash_tables(&mut self) { + if self.short_hash.is_empty() { + self.short_hash = + alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS]; + self.long_hash = + alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS]; + } + } + + fn compact_history(&mut self) { + if self.history_start == 0 { return; } - for pos in 0..current_abs_start { - self.insert_position(concat, pos); + if self.history_start >= self.max_window_size + || self.history_start * 2 >= self.history.len() + { + self.history.drain(..self.history_start); + self.history_start = 0; } } - fn best_match( - &self, - concat: &[u8], - current_abs_start: usize, - abs_pos: usize, - lit_len: usize, - ) -> Option { - let rep = self.repcode_candidate(concat, current_abs_start, abs_pos, lit_len); - let hash = self.hash_candidate(concat, current_abs_start, abs_pos, lit_len); + fn live_history(&self) -> &[u8] { + &self.history[self.history_start..] + } + + fn history_abs_end(&self) -> usize { + self.history_abs_start + self.live_history().len() + } + + fn best_match(&self, abs_pos: usize, lit_len: usize) -> Option { + let rep = self.repcode_candidate(abs_pos, lit_len); + let hash = self.hash_candidate(abs_pos, lit_len); Self::better_candidate(rep, hash) } fn pick_lazy_match( &self, - concat: &[u8], - current_abs_start: usize, abs_pos: usize, lit_len: usize, best: Option, ) -> Option { let best = best?; - if best.match_len >= DFAST_TARGET_LEN || abs_pos + 1 + DFAST_MIN_MATCH_LEN > concat.len() { + if best.match_len >= DFAST_TARGET_LEN + || abs_pos + 1 + DFAST_MIN_MATCH_LEN > self.history_abs_end() + { return Some(best); } - let next = self.best_match(concat, current_abs_start, abs_pos + 1, lit_len + 1); + let next = self.best_match(abs_pos + 1, lit_len + 1); match next { Some(next) if next.match_len > best.match_len @@ -646,13 +717,7 @@ impl DfastMatchGenerator { } } - fn repcode_candidate( - &self, - concat: &[u8], - _current_abs_start: usize, - abs_pos: usize, - lit_len: usize, - ) -> Option { + fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option { let reps = if lit_len == 0 { [ Some(self.offset_hist[1] as usize), @@ -673,34 +738,38 @@ impl DfastMatchGenerator { continue; } let candidate_pos = abs_pos - rep; - if candidate_pos + DFAST_MIN_MATCH_LEN > concat.len() { + if candidate_pos < self.history_abs_start { + continue; + } + let concat = self.live_history(); + let candidate_idx = candidate_pos - self.history_abs_start; + let current_idx = abs_pos - self.history_abs_start; + if current_idx + DFAST_MIN_MATCH_LEN > concat.len() { continue; } let match_len = - MatchGenerator::common_prefix_len(&concat[candidate_pos..], &concat[abs_pos..]); + MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]); if match_len >= DFAST_MIN_MATCH_LEN { - let candidate = - self.extend_backwards(concat, candidate_pos, abs_pos, match_len, lit_len); + let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len); best = Self::better_candidate(best, Some(candidate)); } } best } - fn hash_candidate( - &self, - concat: &[u8], - _current_abs_start: usize, - abs_pos: usize, - lit_len: usize, - ) -> Option { + fn hash_candidate(&self, abs_pos: usize, lit_len: usize) -> Option { + let concat = self.live_history(); + let current_idx = abs_pos - self.history_abs_start; let mut best = None; - for candidate_pos in self.long_candidates(concat, abs_pos) { + for candidate_pos in self.long_candidates(abs_pos) { + if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos { + continue; + } + let candidate_idx = candidate_pos - self.history_abs_start; let match_len = - MatchGenerator::common_prefix_len(&concat[candidate_pos..], &concat[abs_pos..]); + MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]); if match_len >= DFAST_MIN_MATCH_LEN { - let candidate = - self.extend_backwards(concat, candidate_pos, abs_pos, match_len, lit_len); + let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len); best = Self::better_candidate(best, Some(candidate)); if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) { return best; @@ -708,15 +777,15 @@ impl DfastMatchGenerator { } } - for candidate_pos in self.short_candidates(concat, abs_pos) { - if candidate_pos >= abs_pos { + for candidate_pos in self.short_candidates(abs_pos) { + if candidate_pos < self.history_abs_start || candidate_pos >= abs_pos { continue; } + let candidate_idx = candidate_pos - self.history_abs_start; let match_len = - MatchGenerator::common_prefix_len(&concat[candidate_pos..], &concat[abs_pos..]); + MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]); if match_len >= DFAST_MIN_MATCH_LEN { - let candidate = - self.extend_backwards(concat, candidate_pos, abs_pos, match_len, lit_len); + let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len); best = Self::better_candidate(best, Some(candidate)); if best.is_some_and(|best| best.match_len >= DFAST_TARGET_LEN) { return best; @@ -728,16 +797,17 @@ impl DfastMatchGenerator { fn extend_backwards( &self, - concat: &[u8], mut candidate_pos: usize, mut abs_pos: usize, mut match_len: usize, lit_len: usize, ) -> MatchCandidate { + let concat = self.live_history(); let min_abs_pos = abs_pos - lit_len; while abs_pos > min_abs_pos - && candidate_pos > 0 - && concat[candidate_pos - 1] == concat[abs_pos - 1] + && candidate_pos > self.history_abs_start + && concat[candidate_pos - self.history_abs_start - 1] + == concat[abs_pos - self.history_abs_start - 1] { candidate_pos -= 1; abs_pos -= 1; @@ -768,23 +838,31 @@ impl DfastMatchGenerator { } } - fn insert_positions(&mut self, concat: &[u8], start: usize, end: usize) { + fn insert_positions(&mut self, start: usize, end: usize) { for pos in start..end { - self.insert_position(concat, pos); + self.insert_position(pos); } } - fn insert_position(&mut self, concat: &[u8], pos: usize) { - if pos + 4 <= concat.len() { - let short = Self::hash4(&concat[pos..]); + fn insert_position(&mut self, pos: usize) { + let idx = pos - self.history_abs_start; + let short = { + let concat = self.live_history(); + (idx + 4 <= concat.len()).then(|| Self::hash4(&concat[idx..])) + }; + if let Some(short) = short { let bucket = &mut self.short_hash[short]; if bucket[0] != pos { bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1); bucket[0] = pos; } } - if pos + 8 <= concat.len() { - let long = Self::hash8(&concat[pos..]); + + let long = { + let concat = self.live_history(); + (idx + 8 <= concat.len()).then(|| Self::hash8(&concat[idx..])) + }; + if let Some(long) = long { let bucket = &mut self.long_hash[long]; if bucket[0] != pos { bucket.copy_within(0..DFAST_SEARCH_DEPTH - 1, 1); @@ -793,17 +871,21 @@ impl DfastMatchGenerator { } } - fn short_candidates(&self, concat: &[u8], pos: usize) -> impl Iterator + '_ { - (pos + 4 <= concat.len()) - .then(|| self.short_hash[Self::hash4(&concat[pos..])]) + fn short_candidates(&self, pos: usize) -> impl Iterator + '_ { + let concat = self.live_history(); + let idx = pos - self.history_abs_start; + (idx + 4 <= concat.len()) + .then(|| self.short_hash[Self::hash4(&concat[idx..])]) .into_iter() .flatten() .filter(|candidate| *candidate != DFAST_EMPTY_SLOT) } - fn long_candidates(&self, concat: &[u8], pos: usize) -> impl Iterator + '_ { - (pos + 8 <= concat.len()) - .then(|| self.long_hash[Self::hash8(&concat[pos..])]) + fn long_candidates(&self, pos: usize) -> impl Iterator + '_ { + let concat = self.live_history(); + let idx = pos - self.history_abs_start; + (idx + 8 <= concat.len()) + .then(|| self.long_hash[Self::hash8(&concat[idx..])]) .into_iter() .flatten() .filter(|candidate| *candidate != DFAST_EMPTY_SLOT) diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs index 10ee3577..aa640f66 100644 --- a/zstd/src/encoding/mod.rs +++ b/zstd/src/encoding/mod.rs @@ -50,12 +50,12 @@ pub enum CompressionLevel { Uncompressed, /// This level is roughly equivalent to Zstd compression level 1 Fastest, - /// This level currently uses the same encoder path and matcher - /// configuration as [`CompressionLevel::Fastest`]. + /// This level uses the crate's dedicated `dfast`-style matcher to + /// target a better speed/ratio tradeoff than [`CompressionLevel::Fastest`]. /// /// It represents this crate's "default" compression setting and may - /// change in future versions to reflect a different tradeoff between - /// speed and compression ratio. + /// evolve in future versions as the implementation moves closer to + /// reference zstd level 3 behavior. Default, /// This level is roughly equivalent to Zstd level 7. /// diff --git a/zstd/tests/cross_validation.rs b/zstd/tests/cross_validation.rs index 5c8aa3f4..8eff6ff7 100644 --- a/zstd/tests/cross_validation.rs +++ b/zstd/tests/cross_validation.rs @@ -207,6 +207,8 @@ fn cross_rust_default_compress_ffi_decompress_regression() { #[test] fn default_level_beats_fastest_on_corpus_proxy() { + // Keep this strict: issue #5 requires Default to be a real step up from Fastest, + // not just an alias that happens to roundtrip. let data = include_bytes!("../decodecorpus_files/z000033"); let fastest = compress_to_vec(data.as_slice(), CompressionLevel::Fastest); let default = compress_to_vec(data.as_slice(), CompressionLevel::Default); @@ -221,6 +223,8 @@ fn default_level_beats_fastest_on_corpus_proxy() { #[test] fn default_level_stays_within_ten_percent_of_ffi_level3_on_corpus_proxy() { + // This corpus-proxy check is the in-repo acceptance test for the issue's + // level-3 ratio target until the full Silesia corpus is vendored. let data = include_bytes!("../decodecorpus_files/z000033"); let default = compress_to_vec(data.as_slice(), CompressionLevel::Default); let ffi_level3 = zstd::encode_all(data.as_slice(), 3).unwrap(); From 715252527dac23c60848a490b7477b91e582b8ca Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 26 Mar 2026 19:12:00 +0200 Subject: [PATCH 6/9] test(encoding): cover dfast matcher dispatch --- zstd/src/encoding/match_generator.rs | 103 +++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 13 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 01eddae1..e03bd4bb 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -87,6 +87,18 @@ impl MatchGeneratorDriver { ), } } + + fn dfast_matcher(&self) -> &DfastMatchGenerator { + self.dfast_match_generator + .as_ref() + .expect("dfast backend must be initialized by reset() before use") + } + + fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator { + self.dfast_match_generator + .as_mut() + .expect("dfast backend must be initialized by reset() before use") + } } impl Matcher for MatchGeneratorDriver { @@ -149,9 +161,7 @@ impl Matcher for MatchGeneratorDriver { fn window_size(&self) -> u64 { match self.active_backend { MatcherBackend::Simple => self.match_generator.max_window_size as u64, - MatcherBackend::Dfast => { - self.dfast_match_generator.as_ref().unwrap().max_window_size as u64 - } + MatcherBackend::Dfast => self.dfast_matcher().max_window_size as u64, } } @@ -166,11 +176,7 @@ impl Matcher for MatchGeneratorDriver { fn get_last_space(&mut self) -> &[u8] { match self.active_backend { MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(), - MatcherBackend::Dfast => self - .dfast_match_generator - .as_ref() - .unwrap() - .get_last_space(), + MatcherBackend::Dfast => self.dfast_matcher().get_last_space(), } } @@ -196,7 +202,7 @@ impl Matcher for MatchGeneratorDriver { let vec_pool = &mut self.vec_pool; self.dfast_match_generator .as_mut() - .unwrap() + .expect("dfast backend must be initialized by reset() before use") .add_data(space, |mut data| { data.resize(data.capacity(), 0); vec_pool.push(data); @@ -211,16 +217,14 @@ impl Matcher for MatchGeneratorDriver { while self.match_generator.next_sequence(&mut handle_sequence) {} } MatcherBackend::Dfast => self - .dfast_match_generator - .as_mut() - .unwrap() + .dfast_matcher_mut() .start_matching(&mut handle_sequence), } } fn skip_matching(&mut self) { match self.active_backend { MatcherBackend::Simple => self.match_generator.skip_matching(), - MatcherBackend::Dfast => self.dfast_match_generator.as_mut().unwrap().skip_matching(), + MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(), } } } @@ -636,6 +640,8 @@ impl DfastMatchGenerator { offset: candidate.offset, match_len: candidate.match_len, }); + // The encoded offset value is irrelevant here; we only need the + // side effect on offset history for future rep-code matching. let _ = encode_offset_with_history( candidate.offset as u32, literals.len() as u32, @@ -1164,3 +1170,74 @@ fn dfast_matches_roundtrip_multi_block_pattern() { assert_eq!(&history[prefix_len..], second_block.as_slice()); } + +#[test] +fn driver_switches_backends_and_initializes_dfast_via_reset() { + let mut driver = MatchGeneratorDriver::new(32, 2); + + driver.reset(CompressionLevel::Default); + assert_eq!(driver.window_size(), DFAST_DEFAULT_WINDOW_SIZE as u64); + + let mut first = driver.get_next_space(); + first[..12].copy_from_slice(b"abcabcabcabc"); + first.truncate(12); + driver.commit_space(first); + assert_eq!(driver.get_last_space(), b"abcabcabcabc"); + driver.skip_matching(); + + let mut second = driver.get_next_space(); + second[..12].copy_from_slice(b"abcabcabcabc"); + second.truncate(12); + driver.commit_space(second); + + let mut reconstructed = b"abcabcabcabc".to_vec(); + driver.start_matching(|seq| match seq { + Sequence::Literals { literals } => reconstructed.extend_from_slice(literals), + Sequence::Triple { + literals, + offset, + match_len, + } => { + reconstructed.extend_from_slice(literals); + let start = reconstructed.len() - offset; + for i in 0..match_len { + let byte = reconstructed[start + i]; + reconstructed.push(byte); + } + } + }); + assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc"); + + driver.reset(CompressionLevel::Fastest); + assert_eq!(driver.window_size(), 64); +} + +#[test] +fn dfast_skip_matching_handles_window_eviction() { + let mut matcher = DfastMatchGenerator::new(16); + + matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {}); + matcher.skip_matching(); + matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {}); + matcher.skip_matching(); + matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {}); + + let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12]; + matcher.start_matching(|seq| match seq { + Sequence::Literals { literals } => reconstructed.extend_from_slice(literals), + Sequence::Triple { + literals, + offset, + match_len, + } => { + reconstructed.extend_from_slice(literals); + let start = reconstructed.len() - offset; + for i in 0..match_len { + let byte = reconstructed[start + i]; + reconstructed.push(byte); + } + } + }); + + assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]); +} From bbb487b463b6850f52b0d7ac7cbbb0d6b188d239 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 26 Mar 2026 19:15:50 +0200 Subject: [PATCH 7/9] test(encoding): document dfast matcher tradeoffs --- zstd/src/encoding/match_generator.rs | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index e03bd4bb..3d6e2c3c 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -18,6 +18,8 @@ use super::blocks::encode_offset_with_history; const MIN_MATCH_LEN: usize = 5; const DFAST_MIN_MATCH_LEN: usize = 6; const DFAST_TARGET_LEN: usize = 48; +// Keep these aligned with the issue's zstd level-3/dfast target unless ratio +// measurements show we can shrink them without regressing acceptance tests. const DFAST_HASH_BITS: usize = 20; const DFAST_SEARCH_DEPTH: usize = 4; const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22; @@ -540,6 +542,8 @@ struct DfastMatchGenerator { max_window_size: usize, window: VecDeque>, window_size: usize, + // We keep a contiguous searchable history to avoid rebuilding and reseeding + // the matcher state from disjoint block buffers on every block. history: Vec, history_start: usize, history_abs_start: usize, @@ -1241,3 +1245,38 @@ fn dfast_skip_matching_handles_window_eviction() { assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]); } + +#[test] +fn dfast_inserts_tail_positions_for_next_block_matching() { + let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE); + + matcher.add_data(b"012345bcdea".to_vec(), |_| {}); + let mut history = Vec::new(); + matcher.start_matching(|seq| match seq { + Sequence::Literals { literals } => history.extend_from_slice(literals), + Sequence::Triple { .. } => unreachable!("first block should not match history"), + }); + assert_eq!(history, b"012345bcdea"); + + matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {}); + let mut saw_triple = false; + matcher.start_matching(|seq| match seq { + Sequence::Literals { literals } => history.extend_from_slice(literals), + Sequence::Triple { + literals, + offset, + match_len, + } => { + saw_triple = true; + history.extend_from_slice(literals); + let start = history.len() - offset; + for i in 0..match_len { + let byte = history[start + i]; + history.push(byte); + } + } + }); + + assert!(saw_triple, "expected tail-anchored cross-block match"); + assert_eq!(history, b"012345bcdeabcdeabcdeab"); +} From 3595e8fb5e49eb4e47581ce793dfa2098f20d4ad Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 26 Mar 2026 21:35:21 +0200 Subject: [PATCH 8/9] docs(encoding): clarify dfast review invariants --- zstd/src/encoding/frame_compressor.rs | 2 ++ zstd/src/encoding/match_generator.rs | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index 1fd1478c..c87806b7 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -223,6 +223,8 @@ impl FrameCompressor { output.extend_from_slice(&uncompressed_data); } CompressionLevel::Fastest | CompressionLevel::Default => { + // Default shares this fast block-encoding pipeline, but it + // remains a distinct level via the matcher's dfast backend. compress_fastest(&mut self.state, last_block, uncompressed_data, output) } _ => { diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 3d6e2c3c..28424da5 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -660,6 +660,10 @@ impl DfastMatchGenerator { } if literals_start < current_len { + // We stop inserting once fewer than DFAST_MIN_MATCH_LEN bytes remain. + // Those tail positions cannot produce a fresh dfast candidate on their + // own, and the cross-block overlap case is covered by + // dfast_inserts_tail_positions_for_next_block_matching(). let current = self.window.back().unwrap().as_slice(); handle_sequence(Sequence::Literals { literals: ¤t[literals_start..], @@ -669,6 +673,9 @@ impl DfastMatchGenerator { fn ensure_hash_tables(&mut self) { if self.short_hash.is_empty() { + // This is intentionally lazy so Fastest/Uncompressed never pay the + // ~dfast-level memory cost. The current size tracks the issue's + // zstd level-3 style parameters rather than a generic low-memory preset. self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS]; self.long_hash = From 7bb70c85a8e2043e77b670ca667e49413412ed7e Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Thu, 26 Mar 2026 23:07:29 +0200 Subject: [PATCH 9/9] fix(encoding): tighten review follow-ups --- zstd/src/encoding/blocks/compressed.rs | 2 +- zstd/src/encoding/match_generator.rs | 48 ++++++++++++++++---------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/zstd/src/encoding/blocks/compressed.rs b/zstd/src/encoding/blocks/compressed.rs index bcd09d6a..9708e4f1 100644 --- a/zstd/src/encoding/blocks/compressed.rs +++ b/zstd/src/encoding/blocks/compressed.rs @@ -446,7 +446,7 @@ fn encode_match_len(len: u32) -> (u8, u32, usize) { /// history per RFC 8878 §3.1.2.5. Updates `offset_hist` in place. /// /// Encoded offset codes: 1/2/3 = repeat offsets, N+3 = new absolute offset N. -pub(crate) fn encode_offset_with_history( +pub(in crate::encoding) fn encode_offset_with_history( actual_offset: u32, lit_len: u32, offset_hist: &mut [u32; 3], diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 28424da5..a34cc494 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -661,9 +661,11 @@ impl DfastMatchGenerator { if literals_start < current_len { // We stop inserting once fewer than DFAST_MIN_MATCH_LEN bytes remain. - // Those tail positions cannot produce a fresh dfast candidate on their - // own, and the cross-block overlap case is covered by - // dfast_inserts_tail_positions_for_next_block_matching(). + // The last boundary-spanning start that can seed a dfast hash is + // still inserted by the loop above; `dfast_inserts_tail_positions_ + // for_next_block_matching()` asserts that the next block can match + // immediately at the boundary without eagerly seeding the final + // DFAST_MIN_MATCH_LEN - 1 bytes here. let current = self.window.back().unwrap().as_slice(); handle_sequence(Sequence::Literals { literals: ¤t[literals_start..], @@ -1266,24 +1268,34 @@ fn dfast_inserts_tail_positions_for_next_block_matching() { assert_eq!(history, b"012345bcdea"); matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {}); - let mut saw_triple = false; - matcher.start_matching(|seq| match seq { - Sequence::Literals { literals } => history.extend_from_slice(literals), - Sequence::Triple { - literals, - offset, - match_len, - } => { - saw_triple = true; - history.extend_from_slice(literals); - let start = history.len() - offset; - for i in 0..match_len { - let byte = history[start + i]; - history.push(byte); + let mut saw_first_sequence = false; + matcher.start_matching(|seq| { + assert!(!saw_first_sequence, "expected a single cross-block match"); + saw_first_sequence = true; + match seq { + Sequence::Literals { .. } => { + panic!("expected tail-anchored cross-block match before any literals") + } + Sequence::Triple { + literals, + offset, + match_len, + } => { + assert_eq!(literals, b""); + assert_eq!(offset, 5); + assert_eq!(match_len, 11); + let start = history.len() - offset; + for i in 0..match_len { + let byte = history[start + i]; + history.push(byte); + } } } }); - assert!(saw_triple, "expected tail-anchored cross-block match"); + assert!( + saw_first_sequence, + "expected tail-anchored cross-block match" + ); assert_eq!(history, b"012345bcdeabcdeabcdeab"); }