diff --git a/BENCHMARKS.md b/BENCHMARKS.md index 82f639bb..2035d3f8 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -35,8 +35,9 @@ encoder: - `structured-zstd::Fastest` vs `zstd` level `1` - `structured-zstd::Default` vs `zstd` level `3` +- `structured-zstd::Better` vs `zstd` level `7` -`Better` and `Best` are intentionally excluded until the encoder implements them. +`Best` is intentionally excluded until the encoder implements it. Dictionary benchmarks are tracked separately with C FFI `with_dict` vs `without_dict` runs, using a dictionary trained from scenario samples. Pure Rust dictionary compression is still pending and is diff --git a/README.md b/README.md index 2c96c875..d8d6ca9e 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ This is a **maintained fork** of [KillingSpark/zstd-rs](https://github.com/Killi **Fork goals:** - Dictionary compression improvements (critical for per-label trained dictionaries in LSM-tree) - Performance parity with C zstd for decompression (currently 1.4-3.5x slower) -- Additional compression levels (Default/Better/Best — currently only Fastest is implemented) +- Additional compression levels (Best still pending — Fastest, Default, and Better are implemented) - No FFI — pure `cargo build`, no cmake/system libraries (ADR-013 compliance) **Upstream relationship:** We periodically sync with upstream but maintain an independent development trajectory focused on CoordiNode requirements. @@ -44,7 +44,7 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe - [x] Uncompressed blocks - [x] Fastest (roughly level 1) - [x] Default (roughly level 3) -- [ ] Better (roughly level 7) +- [x] Better (roughly level 7) - [ ] Best (roughly level 11) - [x] Checksums - [x] Dictionary compression diff --git a/cli/src/main.rs b/cli/src/main.rs index 7655deca..9d81f4b6 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -40,14 +40,15 @@ enum Commands { /// - 0: Uncompressed /// - 1: Fastest /// - 2: Default + /// - 3: Better (lazy2, ~zstd level 7) /// - /// Streaming mode currently supports only levels 0..=2. + /// Streaming mode currently supports only levels 0..=3. #[arg( short, long, value_name = "COMPRESSION_LEVEL", default_value_t = 2, - value_parser = clap::value_parser!(u8).range(0..=2), + value_parser = clap::value_parser!(u8).range(0..=3), verbatim_doc_comment )] level: u8, @@ -107,6 +108,7 @@ fn compress(input: PathBuf, output: PathBuf, level: u8) -> color_eyre::Result<() 0 => CompressionLevel::Uncompressed, 1 => CompressionLevel::Fastest, 2 => CompressionLevel::Default, + 3 => CompressionLevel::Better, _ => return Err(eyre!("unsupported compression level: {level}")), }; ensure_distinct_paths(&input, &output)?; @@ -400,7 +402,7 @@ mod tests { #[test] fn cli_rejects_unsupported_compression_level_at_parse_time() { - let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "3"]); + let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "4"]); assert!(parse.is_err()); } diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index c8906822..b19729d0 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -70,7 +70,7 @@ pub(crate) fn benchmark_scenarios() -> Vec { scenarios } -pub(crate) fn supported_levels() -> [LevelConfig; 2] { +pub(crate) fn supported_levels() -> [LevelConfig; 3] { [ LevelConfig { name: "fastest", @@ -82,6 +82,11 @@ pub(crate) fn supported_levels() -> [LevelConfig; 2] { rust_level: CompressionLevel::Default, ffi_level: 3, }, + LevelConfig { + name: "better", + rust_level: CompressionLevel::Better, + ffi_level: 7, + }, ] } diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs index ed4c2973..589be2f8 100644 --- a/zstd/src/encoding/frame_compressor.rs +++ b/zstd/src/encoding/frame_compressor.rs @@ -277,10 +277,12 @@ impl FrameCompressor { header.serialize(output); output.extend_from_slice(&uncompressed_data); } - CompressionLevel::Fastest | CompressionLevel::Default => { - // Default shares this fast block-encoding pipeline, but it - // remains a distinct level via the matcher's dfast backend. - compress_fastest(&mut self.state, last_block, uncompressed_data, output) + CompressionLevel::Fastest + | CompressionLevel::Default + | CompressionLevel::Better => { + // All compressed levels share this block-encoding pipeline; + // they differ only in the matcher backend and its parameters. + compress_block_encoded(&mut self.state, last_block, uncompressed_data, output) } _ => { unimplemented!(); diff --git a/zstd/src/encoding/levels/fastest.rs b/zstd/src/encoding/levels/fastest.rs index dc3e4e8b..5f30a767 100644 --- a/zstd/src/encoding/levels/fastest.rs +++ b/zstd/src/encoding/levels/fastest.rs @@ -6,7 +6,11 @@ use crate::{ }; use alloc::vec::Vec; -/// Compresses a single block at [`crate::encoding::CompressionLevel::Fastest`]. +/// Compresses a single block using the shared compressed-block pipeline. +/// +/// Used by all compressed levels (Fastest, Default, Better). The actual +/// compression quality is determined by the matcher backend in `state`, +/// not by this function. /// /// # Parameters /// - `state`: [`CompressState`] so the compressor can refer to data before @@ -17,7 +21,7 @@ use alloc::vec::Vec; /// larger input /// - `output`: As `uncompressed_data` is compressed, it's appended to `output`. #[inline] -pub fn compress_fastest( +pub fn compress_block_encoded( state: &mut CompressState, last_block: bool, uncompressed_data: Vec, diff --git a/zstd/src/encoding/levels/mod.rs b/zstd/src/encoding/levels/mod.rs index fb39caaf..9a934029 100644 --- a/zstd/src/encoding/levels/mod.rs +++ b/zstd/src/encoding/levels/mod.rs @@ -1,2 +1,2 @@ mod fastest; -pub use fastest::compress_fastest; +pub use fastest::compress_block_encoded; diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index e15905e3..87fc80d8 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -24,12 +24,23 @@ const DFAST_TARGET_LEN: usize = 48; const DFAST_HASH_BITS: usize = 20; const DFAST_SEARCH_DEPTH: usize = 4; const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22; +const BETTER_DEFAULT_WINDOW_SIZE: usize = 1 << 23; const DFAST_EMPTY_SLOT: usize = usize::MAX; -#[derive(Copy, Clone, PartialEq, Eq)] +const HC_HASH_LOG: usize = 20; +const HC_CHAIN_LOG: usize = 19; +const HC_SEARCH_DEPTH: usize = 16; +const HC_MIN_MATCH_LEN: usize = 5; +const HC_TARGET_LEN: usize = 48; +// Positions are stored as (abs_pos + 1) so that 0 is a safe empty sentinel +// that can never collide with any valid position, even at the 4 GiB boundary. +const HC_EMPTY: u32 = 0; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] enum MatcherBackend { Simple, Dfast, + HashChain, } /// This is the default implementation of the `Matcher` trait. It allocates and reuses the buffers when possible. @@ -38,6 +49,7 @@ pub struct MatchGeneratorDriver { suffix_pool: Vec, match_generator: MatchGenerator, dfast_match_generator: Option, + hc_match_generator: Option, active_backend: MatcherBackend, slice_size: usize, base_slice_size: usize, @@ -60,6 +72,7 @@ impl MatchGeneratorDriver { suffix_pool: Vec::new(), match_generator: MatchGenerator::new(max_window_size), dfast_match_generator: None, + hc_match_generator: None, active_backend: MatcherBackend::Simple, slice_size, base_slice_size: slice_size, @@ -90,9 +103,9 @@ impl MatchGeneratorDriver { 1, ), CompressionLevel::Better => ( - MatcherBackend::Simple, + MatcherBackend::HashChain, self.base_slice_size, - self.base_window_size, + BETTER_DEFAULT_WINDOW_SIZE, 1, ), CompressionLevel::Best => ( @@ -116,6 +129,18 @@ impl MatchGeneratorDriver { .expect("dfast backend must be initialized by reset() before use") } + fn hc_matcher(&self) -> &HcMatchGenerator { + self.hc_match_generator + .as_ref() + .expect("hash chain backend must be initialized by reset() before use") + } + + fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator { + self.hc_match_generator + .as_mut() + .expect("hash chain backend must be initialized by reset() before use") + } + fn retire_dictionary_budget(&mut self, evicted_bytes: usize) { let reclaimed = evicted_bytes.min(self.dictionary_retained_budget); if reclaimed == 0 { @@ -133,6 +158,10 @@ impl MatchGeneratorDriver { let matcher = self.dfast_matcher_mut(); matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed); } + MatcherBackend::HashChain => { + let matcher = self.hc_matcher_mut(); + matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed); + } } } @@ -163,6 +192,17 @@ impl MatchGeneratorDriver { self.vec_pool.push(data); } } + MatcherBackend::HashChain => { + let mut retired = Vec::new(); + self.hc_matcher_mut().trim_to_window(|data| { + evicted_bytes += data.len(); + retired.push(data); + }); + for mut data in retired { + data.resize(data.capacity(), 0); + self.vec_pool.push(data); + } + } } if evicted_bytes == 0 { break; @@ -202,6 +242,15 @@ impl Matcher for MatchGeneratorDriver { }); } } + MatcherBackend::HashChain => { + if let Some(hc) = self.hc_match_generator.as_mut() { + let vec_pool = &mut self.vec_pool; + hc.reset(|mut data| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + }); + } + } } } @@ -227,12 +276,25 @@ impl Matcher for MatchGeneratorDriver { .dfast_match_generator .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size)); dfast.max_window_size = max_window_size; + dfast.lazy_depth = 1; let vec_pool = &mut self.vec_pool; dfast.reset(|mut data| { data.resize(data.capacity(), 0); vec_pool.push(data); }); } + MatcherBackend::HashChain => { + let hc = self + .hc_match_generator + .get_or_insert_with(|| HcMatchGenerator::new(max_window_size)); + hc.max_window_size = max_window_size; + hc.lazy_depth = 2; + let vec_pool = &mut self.vec_pool; + hc.reset(|mut data| { + data.resize(data.capacity(), 0); + vec_pool.push(data); + }); + } } } @@ -240,6 +302,7 @@ impl Matcher for MatchGeneratorDriver { match self.active_backend { MatcherBackend::Simple => self.match_generator.offset_hist = offset_hist, MatcherBackend::Dfast => self.dfast_matcher_mut().offset_hist = offset_hist, + MatcherBackend::HashChain => self.hc_matcher_mut().offset_hist = offset_hist, } if dict_content.is_empty() { @@ -261,13 +324,21 @@ impl Matcher for MatchGeneratorDriver { matcher.max_window_size = matcher.max_window_size.saturating_add(retained_dict_budget); } + MatcherBackend::HashChain => { + let matcher = self.hc_matcher_mut(); + matcher.max_window_size = + matcher.max_window_size.saturating_add(retained_dict_budget); + } } let mut start = 0usize; let mut committed_dict_budget = 0usize; + // insert_position needs 4 bytes of lookahead for hashing; + // backfill_boundary_positions re-visits tail positions once the + // next slice extends history, but cannot hash <4 byte fragments. let min_primed_tail = match self.active_backend { MatcherBackend::Simple => MIN_MATCH_LEN, - MatcherBackend::Dfast => 4, + MatcherBackend::Dfast | MatcherBackend::HashChain => 4, }; while start < dict_content.len() { let end = (start + self.slice_size).min(dict_content.len()); @@ -298,6 +369,12 @@ impl Matcher for MatchGeneratorDriver { .max_window_size .saturating_sub(uncommitted_tail_budget); } + MatcherBackend::HashChain => { + let matcher = self.hc_matcher_mut(); + matcher.max_window_size = matcher + .max_window_size + .saturating_sub(uncommitted_tail_budget); + } } } if committed_dict_budget > 0 { @@ -323,6 +400,7 @@ impl Matcher for MatchGeneratorDriver { match self.active_backend { MatcherBackend::Simple => self.match_generator.window.last().unwrap().data.as_slice(), MatcherBackend::Dfast => self.dfast_matcher().get_last_space(), + MatcherBackend::HashChain => self.hc_matcher().get_last_space(), } } @@ -362,6 +440,20 @@ impl Matcher for MatchGeneratorDriver { self.retire_dictionary_budget(evicted_bytes); self.trim_after_budget_retire(); } + MatcherBackend::HashChain => { + let vec_pool = &mut self.vec_pool; + let mut evicted_bytes = 0usize; + self.hc_match_generator + .as_mut() + .expect("hash chain backend must be initialized by reset() before use") + .add_data(space, |mut data| { + evicted_bytes += data.len(); + data.resize(data.capacity(), 0); + vec_pool.push(data); + }); + self.retire_dictionary_budget(evicted_bytes); + self.trim_after_budget_retire(); + } } } @@ -373,12 +465,14 @@ impl Matcher for MatchGeneratorDriver { MatcherBackend::Dfast => self .dfast_matcher_mut() .start_matching(&mut handle_sequence), + MatcherBackend::HashChain => self.hc_matcher_mut().start_matching(&mut handle_sequence), } } fn skip_matching(&mut self) { match self.active_backend { MatcherBackend::Simple => self.match_generator.skip_matching(), MatcherBackend::Dfast => self.dfast_matcher_mut().skip_matching(), + MatcherBackend::HashChain => self.hc_matcher_mut().skip_matching(), } } } @@ -870,6 +964,8 @@ struct DfastMatchGenerator { offset_hist: [u32; 3], short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>, long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>, + // Lazy match lookahead depth (internal tuning parameter). + lazy_depth: u8, } #[derive(Copy, Clone)] @@ -891,6 +987,7 @@ impl DfastMatchGenerator { offset_hist: [1, 4, 8], short_hash: Vec::new(), long_hash: Vec::new(), + lazy_depth: 1, } } @@ -1053,16 +1150,26 @@ impl DfastMatchGenerator { return Some(best); } + // Lazy check: evaluate pos+1 let next = self.best_match(abs_pos + 1, lit_len + 1); - match next { - Some(next) - if next.match_len > best.match_len - || (next.match_len == best.match_len && next.offset < best.offset) => + if let Some(next) = next + && (next.match_len > best.match_len + || (next.match_len == best.match_len && next.offset < best.offset)) + { + return None; + } + + // Lazy2 check: also evaluate pos+2 + if self.lazy_depth >= 2 && abs_pos + 2 + DFAST_MIN_MATCH_LEN <= self.history_abs_end() { + let next2 = self.best_match(abs_pos + 2, lit_len + 2); + if let Some(next2) = next2 + && next2.match_len > best.match_len + 1 { - None + return None; } - _ => Some(best), } + + Some(best) } fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option { @@ -1255,6 +1362,432 @@ impl DfastMatchGenerator { } } +struct HcMatchGenerator { + max_window_size: usize, + window: VecDeque>, + window_size: usize, + history: Vec, + history_start: usize, + history_abs_start: usize, + offset_hist: [u32; 3], + hash_table: Vec, + chain_table: Vec, + lazy_depth: u8, +} + +impl HcMatchGenerator { + fn new(max_window_size: usize) -> Self { + Self { + max_window_size, + window: VecDeque::new(), + window_size: 0, + history: Vec::new(), + history_start: 0, + history_abs_start: 0, + offset_hist: [1, 4, 8], + hash_table: Vec::new(), + chain_table: Vec::new(), + lazy_depth: 2, + } + } + + fn reset(&mut self, mut reuse_space: impl FnMut(Vec)) { + self.window_size = 0; + self.history.clear(); + self.history_start = 0; + self.history_abs_start = 0; + self.offset_hist = [1, 4, 8]; + if !self.hash_table.is_empty() { + self.hash_table.fill(HC_EMPTY); + self.chain_table.fill(HC_EMPTY); + } + for mut data in self.window.drain(..) { + data.resize(data.capacity(), 0); + reuse_space(data); + } + } + + fn get_last_space(&self) -> &[u8] { + self.window.back().unwrap().as_slice() + } + + // History duplicates window data for O(1) contiguous access during match + // finding (common_prefix_len, extend_backwards). Same pattern as + // DfastMatchGenerator. Peak: ~2x window size for data buffers + 6 MB tables. + fn add_data(&mut self, data: Vec, mut reuse_space: impl FnMut(Vec)) { + assert!(data.len() <= self.max_window_size); + while self.window_size + data.len() > self.max_window_size { + let removed = self.window.pop_front().unwrap(); + self.window_size -= removed.len(); + self.history_start += removed.len(); + self.history_abs_start += removed.len(); + reuse_space(removed); + } + self.compact_history(); + self.history.extend_from_slice(&data); + self.window_size += data.len(); + self.window.push_back(data); + } + + fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec)) { + while self.window_size > self.max_window_size { + let removed = self.window.pop_front().unwrap(); + self.window_size -= removed.len(); + self.history_start += removed.len(); + self.history_abs_start += removed.len(); + reuse_space(removed); + } + } + + /// Backfill positions from the tail of the previous slice that couldn't be + /// hashed at the time (insert_position needs 4 bytes of lookahead). + fn backfill_boundary_positions(&mut self, current_abs_start: usize) { + let backfill_start = current_abs_start + .saturating_sub(3) + .max(self.history_abs_start); + if backfill_start < current_abs_start { + self.insert_positions(backfill_start, current_abs_start); + } + } + + fn skip_matching(&mut self) { + self.ensure_tables(); + let current_len = self.window.back().unwrap().len(); + let current_abs_start = self.history_abs_start + self.window_size - current_len; + self.backfill_boundary_positions(current_abs_start); + self.insert_positions(current_abs_start, current_abs_start + current_len); + } + + fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) { + self.ensure_tables(); + + let current_len = self.window.back().unwrap().len(); + if current_len == 0 { + return; + } + + let current_abs_start = self.history_abs_start + self.window_size - current_len; + self.backfill_boundary_positions(current_abs_start); + + let mut pos = 0usize; + let mut literals_start = 0usize; + while pos + HC_MIN_MATCH_LEN <= current_len { + let abs_pos = current_abs_start + pos; + let lit_len = pos - literals_start; + + let best = self.find_best_match(abs_pos, lit_len); + if let Some(candidate) = self.pick_lazy_match(abs_pos, lit_len, best) { + self.insert_positions(abs_pos, candidate.start + candidate.match_len); + let current = self.window.back().unwrap().as_slice(); + let start = candidate.start - current_abs_start; + let literals = ¤t[literals_start..start]; + handle_sequence(Sequence::Triple { + literals, + offset: candidate.offset, + match_len: candidate.match_len, + }); + let _ = encode_offset_with_history( + candidate.offset as u32, + literals.len() as u32, + &mut self.offset_hist, + ); + pos = start + candidate.match_len; + literals_start = pos; + } else { + self.insert_position(abs_pos); + pos += 1; + } + } + + // Insert remaining hashable positions in the tail (the matching loop + // stops at HC_MIN_MATCH_LEN but insert_position only needs 4 bytes). + while pos + 4 <= current_len { + self.insert_position(current_abs_start + pos); + pos += 1; + } + + if literals_start < current_len { + let current = self.window.back().unwrap().as_slice(); + handle_sequence(Sequence::Literals { + literals: ¤t[literals_start..], + }); + } + } + + fn ensure_tables(&mut self) { + if self.hash_table.is_empty() { + self.hash_table = alloc::vec![HC_EMPTY; 1 << HC_HASH_LOG]; + self.chain_table = alloc::vec![HC_EMPTY; 1 << HC_CHAIN_LOG]; + } + } + + fn compact_history(&mut self) { + if self.history_start == 0 { + return; + } + if self.history_start >= self.max_window_size + || self.history_start * 2 >= self.history.len() + { + self.history.drain(..self.history_start); + self.history_start = 0; + } + } + + fn live_history(&self) -> &[u8] { + &self.history[self.history_start..] + } + + fn history_abs_end(&self) -> usize { + self.history_abs_start + self.live_history().len() + } + + fn hash_position(&self, data: &[u8]) -> usize { + let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64; + const PRIME: u64 = 0x9E37_79B1_85EB_CA87; + ((value.wrapping_mul(PRIME)) >> (64 - HC_HASH_LOG)) as usize + } + + fn insert_position(&mut self, abs_pos: usize) { + let idx = abs_pos - self.history_abs_start; + let concat = self.live_history(); + if idx + 4 > concat.len() { + return; + } + let hash = self.hash_position(&concat[idx..]); + // Store as (abs_pos + 1) so HC_EMPTY (0) never collides with a valid + // position. Guard on usize before cast to avoid silent u32 truncation. + // Streams >4 GiB stop inserting; matches degrade to repcodes-only. + // TODO(#51): rebase table positions to avoid 4 GiB cutoff + if abs_pos >= u32::MAX as usize { + return; + } + let pos_u32 = abs_pos as u32; + let stored = pos_u32 + 1; + let chain_idx = pos_u32 as usize & ((1 << HC_CHAIN_LOG) - 1); + let prev = self.hash_table[hash]; + self.chain_table[chain_idx] = prev; + self.hash_table[hash] = stored; + } + + fn insert_positions(&mut self, start: usize, end: usize) { + for pos in start..end { + self.insert_position(pos); + } + } + + fn chain_candidates(&self, abs_pos: usize) -> [usize; HC_SEARCH_DEPTH] { + let mut buf = [usize::MAX; HC_SEARCH_DEPTH]; + let idx = abs_pos - self.history_abs_start; + let concat = self.live_history(); + if idx + 4 > concat.len() { + return buf; + } + let hash = self.hash_position(&concat[idx..]); + let chain_mask = (1 << HC_CHAIN_LOG) - 1; + + let mut cur = self.hash_table[hash]; + let mut filled = 0; + // Follow chain up to HC_SEARCH_DEPTH valid candidates, skipping stale + // entries (evicted from window) instead of stopping at them. + // Stored values are (abs_pos + 1); decode with wrapping_sub(1). + // Break on self-loops (masked chain_idx collision at 512K periodicity). + // Cap total steps at 4x search depth to bound time spent skipping + // stale entries while still finding valid candidates deeper in chain. + let mut steps = 0; + const MAX_CHAIN_STEPS: usize = HC_SEARCH_DEPTH * 4; + while filled < HC_SEARCH_DEPTH && steps < MAX_CHAIN_STEPS { + if cur == HC_EMPTY { + break; + } + let candidate_abs = cur.wrapping_sub(1) as usize; + let next = self.chain_table[candidate_abs & chain_mask]; + steps += 1; + if next == cur { + // Self-loop: two positions share chain_idx, stop to avoid + // spinning on the same candidate forever. + if candidate_abs >= self.history_abs_start && candidate_abs < abs_pos { + buf[filled] = candidate_abs; + } + break; + } + cur = next; + if candidate_abs < self.history_abs_start || candidate_abs >= abs_pos { + continue; + } + buf[filled] = candidate_abs; + filled += 1; + } + buf + } + + fn find_best_match(&self, abs_pos: usize, lit_len: usize) -> Option { + let rep = self.repcode_candidate(abs_pos, lit_len); + let hash = self.hash_chain_candidate(abs_pos, lit_len); + Self::better_candidate(rep, hash) + } + + fn hash_chain_candidate(&self, abs_pos: usize, lit_len: usize) -> Option { + let concat = self.live_history(); + let current_idx = abs_pos - self.history_abs_start; + if current_idx + HC_MIN_MATCH_LEN > concat.len() { + return None; + } + + let mut best: Option = None; + for candidate_abs in self.chain_candidates(abs_pos) { + if candidate_abs == usize::MAX { + break; + } + let candidate_idx = candidate_abs - self.history_abs_start; + let match_len = + MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]); + if match_len >= HC_MIN_MATCH_LEN { + let candidate = self.extend_backwards(candidate_abs, abs_pos, match_len, lit_len); + best = Self::better_candidate(best, Some(candidate)); + if best.is_some_and(|b| b.match_len >= HC_TARGET_LEN) { + return best; + } + } + } + best + } + + fn repcode_candidate(&self, abs_pos: usize, lit_len: usize) -> Option { + let reps = if lit_len == 0 { + [ + Some(self.offset_hist[1] as usize), + Some(self.offset_hist[2] as usize), + (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize), + ] + } else { + [ + Some(self.offset_hist[0] as usize), + Some(self.offset_hist[1] as usize), + Some(self.offset_hist[2] as usize), + ] + }; + + let concat = self.live_history(); + let current_idx = abs_pos - self.history_abs_start; + if current_idx + HC_MIN_MATCH_LEN > concat.len() { + return None; + } + + let mut best = None; + for rep in reps.into_iter().flatten() { + if rep == 0 || rep > abs_pos { + continue; + } + let candidate_pos = abs_pos - rep; + if candidate_pos < self.history_abs_start { + continue; + } + let candidate_idx = candidate_pos - self.history_abs_start; + let match_len = + MatchGenerator::common_prefix_len(&concat[candidate_idx..], &concat[current_idx..]); + if match_len >= HC_MIN_MATCH_LEN { + let candidate = self.extend_backwards(candidate_pos, abs_pos, match_len, lit_len); + best = Self::better_candidate(best, Some(candidate)); + } + } + best + } + + fn extend_backwards( + &self, + mut candidate_pos: usize, + mut abs_pos: usize, + mut match_len: usize, + lit_len: usize, + ) -> MatchCandidate { + let concat = self.live_history(); + let min_abs_pos = abs_pos - lit_len; + while abs_pos > min_abs_pos + && candidate_pos > self.history_abs_start + && concat[candidate_pos - self.history_abs_start - 1] + == concat[abs_pos - self.history_abs_start - 1] + { + candidate_pos -= 1; + abs_pos -= 1; + match_len += 1; + } + MatchCandidate { + start: abs_pos, + offset: abs_pos - candidate_pos, + match_len, + } + } + + fn better_candidate( + lhs: Option, + rhs: Option, + ) -> Option { + match (lhs, rhs) { + (None, other) | (other, None) => other, + (Some(lhs), Some(rhs)) => { + let lhs_gain = Self::match_gain(lhs.match_len, lhs.offset); + let rhs_gain = Self::match_gain(rhs.match_len, rhs.offset); + if rhs_gain > lhs_gain { + Some(rhs) + } else { + Some(lhs) + } + } + } + } + + fn match_gain(match_len: usize, offset: usize) -> i32 { + debug_assert!( + offset > 0, + "zstd offsets are 1-indexed, offset=0 is invalid" + ); + let offset_bits = 32 - (offset as u32).leading_zeros() as i32; + (match_len as i32) * 4 - offset_bits + } + + // Lazy lookahead queries pos+1/pos+2 before they are inserted into hash + // tables — matching C zstd behavior. Seeding before comparing would let a + // position match against itself, changing semantics. + fn pick_lazy_match( + &self, + abs_pos: usize, + lit_len: usize, + best: Option, + ) -> Option { + let best = best?; + if best.match_len >= HC_TARGET_LEN + || abs_pos + 1 + HC_MIN_MATCH_LEN > self.history_abs_end() + { + return Some(best); + } + + let current_gain = Self::match_gain(best.match_len, best.offset) + 4; + + // Lazy check: evaluate pos+1 + let next = self.find_best_match(abs_pos + 1, lit_len + 1); + if let Some(next) = next { + let next_gain = Self::match_gain(next.match_len, next.offset); + if next_gain > current_gain { + return None; + } + } + + // Lazy2 check: also evaluate pos+2 + if self.lazy_depth >= 2 && abs_pos + 2 + HC_MIN_MATCH_LEN <= self.history_abs_end() { + let next2 = self.find_best_match(abs_pos + 2, lit_len + 2); + if let Some(next2) = next2 { + let next2_gain = Self::match_gain(next2.match_len, next2.offset); + // Must beat current gain + extra literal cost + if next2_gain > current_gain + 4 { + return None; + } + } + } + + Some(best) + } +} + #[test] fn matches() { let mut matcher = MatchGenerator::new(1000); @@ -1686,6 +2219,72 @@ fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() { ); } +#[test] +fn hc_prime_with_dictionary_preserves_history_for_first_full_block() { + let mut driver = MatchGeneratorDriver::new(8, 1); + driver.reset(CompressionLevel::Better); + + driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]); + + let mut space = driver.get_next_space(); + space.clear(); + // Repeat the dictionary content so the HC matcher can find it. + // HC_MIN_MATCH_LEN is 5, so an 8-byte match is well above threshold. + space.extend_from_slice(b"abcdefgh"); + driver.commit_space(space); + + let mut saw_match = false; + driver.start_matching(|seq| { + if let Sequence::Triple { + literals, + offset, + match_len, + } = seq + && literals.is_empty() + && offset == 8 + && match_len >= HC_MIN_MATCH_LEN + { + saw_match = true; + } + }); + + assert!( + saw_match, + "hash-chain backend should match dictionary-primed history in first full block" + ); +} + +#[test] +fn prime_with_dictionary_budget_shrinks_after_hc_eviction() { + let mut driver = MatchGeneratorDriver::new(8, 1); + driver.reset(CompressionLevel::Better); + // Use a small live window so dictionary-primed slices are evicted quickly. + driver.hc_matcher_mut().max_window_size = 8; + driver.reported_window_size = 8; + + let base_window = driver.hc_matcher().max_window_size; + driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]); + assert_eq!(driver.hc_matcher().max_window_size, base_window + 24); + + for block in [b"AAAAAAAA", b"BBBBBBBB"] { + let mut space = driver.get_next_space(); + space.clear(); + space.extend_from_slice(block); + driver.commit_space(space); + driver.skip_matching(); + } + + assert_eq!( + driver.dictionary_retained_budget, 0, + "dictionary budget should be fully retired once primed dict slices are evicted" + ); + assert_eq!( + driver.hc_matcher().max_window_size, + base_window, + "retired dictionary budget must not remain reusable for live history" + ); +} + #[test] fn suffix_store_with_single_slot_does_not_panic_on_keying() { let mut suffixes = SuffixStore::with_capacity(1); @@ -1704,8 +2303,12 @@ fn fastest_reset_uses_interleaved_hash_fill_step() { driver.reset(CompressionLevel::Fastest); assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP); + // Better uses the HashChain backend with lazy2; verify that the backend switch + // happened and the lazy_depth is configured correctly. driver.reset(CompressionLevel::Better); - assert_eq!(driver.match_generator.hash_fill_step, 1); + assert_eq!(driver.active_backend, MatcherBackend::HashChain); + assert_eq!(driver.window_size(), BETTER_DEFAULT_WINDOW_SIZE as u64); + assert_eq!(driver.hc_matcher().lazy_depth, 2); } #[test] diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs index f001449f..837fab2b 100644 --- a/zstd/src/encoding/mod.rs +++ b/zstd/src/encoding/mod.rs @@ -61,7 +61,16 @@ pub enum CompressionLevel { Default, /// This level is roughly equivalent to Zstd level 7. /// - /// UNIMPLEMENTED + /// Uses the hash-chain matcher with a lazy2 matching strategy: the encoder + /// evaluates up to two positions ahead before committing to a match, + /// trading speed for a better compression ratio than [`CompressionLevel::Default`]. + /// + /// **Limitation:** hash-chain tables use 32-bit positions. For single-frame + /// inputs exceeding ~4 GiB, matches can still be found for roughly one + /// window past that point; once all in-window positions exceed `u32::MAX` + /// (≈4 GiB + window size), matching becomes effectively repcode-only. + /// Prefer [`CompressionLevel::Default`] for very large single-frame streams + /// until table rebasing is implemented. Better, /// This level is roughly equivalent to Zstd level 11. /// diff --git a/zstd/src/encoding/streaming_encoder.rs b/zstd/src/encoding/streaming_encoder.rs index 12a0172f..f50ba097 100644 --- a/zstd/src/encoding/streaming_encoder.rs +++ b/zstd/src/encoding/streaming_encoder.rs @@ -9,7 +9,7 @@ use core::hash::Hasher; #[cfg(feature = "hash")] use twox_hash::XxHash64; -use crate::encoding::levels::compress_fastest; +use crate::encoding::levels::compress_block_encoded; use crate::encoding::{ CompressionLevel, MatchGeneratorDriver, Matcher, block_header::BlockHeader, frame_compressor::CompressState, frame_compressor::FseTables, frame_header::FrameHeader, @@ -208,7 +208,7 @@ impl StreamingEncoder { fn allocate_pending_space(&mut self, block_capacity: usize) -> Vec { let mut space = match self.compression_level { - CompressionLevel::Fastest | CompressionLevel::Default => { + CompressionLevel::Fastest | CompressionLevel::Default | CompressionLevel::Better => { self.state.matcher.get_next_space() } _ => Vec::new(), @@ -262,9 +262,10 @@ impl StreamingEncoder { match self.compression_level { CompressionLevel::Uncompressed | CompressionLevel::Fastest - | CompressionLevel::Default => Ok(()), + | CompressionLevel::Default + | CompressionLevel::Better => Ok(()), _ => Err(invalid_input_error( - "streaming encoder currently supports Uncompressed/Fastest/Default only", + "streaming encoder currently supports Uncompressed/Fastest/Default/Better only", )), } } @@ -297,16 +298,18 @@ impl StreamingEncoder { header.serialize(&mut encoded); encoded.extend_from_slice(block); } - CompressionLevel::Fastest | CompressionLevel::Default => { + CompressionLevel::Fastest + | CompressionLevel::Default + | CompressionLevel::Better => { let block = raw_block.take().expect("raw block missing"); debug_assert!(!block.is_empty(), "empty blocks handled above"); - compress_fastest(&mut self.state, last_block, block, &mut encoded); + compress_block_encoded(&mut self.state, last_block, block, &mut encoded); moved_into_matcher = true; } _ => { return Err(( invalid_input_error( - "streaming encoder currently supports Uncompressed/Fastest/Default only", + "streaming encoder currently supports Uncompressed/Fastest/Default/Better only", ), raw_block.unwrap_or_default(), )); @@ -729,11 +732,17 @@ mod tests { } #[test] - fn better_level_returns_unsupported_error() { + fn better_level_streaming_roundtrip() { + let payload = b"better-level-streaming-test".repeat(256); let mut encoder = StreamingEncoder::new(Vec::new(), CompressionLevel::Better); - let err = encoder.write_all(b"payload").unwrap_err(); - assert_eq!(err.kind(), ErrorKind::InvalidInput); - assert!(encoder.finish().is_err()); + for chunk in payload.chunks(53) { + encoder.write_all(chunk).unwrap(); + } + let compressed = encoder.finish().unwrap(); + let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); + let mut decoded = Vec::new(); + decoder.read_to_end(&mut decoded).unwrap(); + assert_eq!(decoded, payload); } #[test] @@ -749,7 +758,7 @@ mod tests { #[test] fn unsupported_level_write_fails_before_emitting_frame_header() { - let mut encoder = StreamingEncoder::new(Vec::new(), CompressionLevel::Better); + let mut encoder = StreamingEncoder::new(Vec::new(), CompressionLevel::Best); assert!(encoder.write_all(b"payload").is_err()); assert_eq!(encoder.get_ref().len(), 0); } diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs index 6e1f7df9..b10a5ff8 100644 --- a/zstd/src/tests/roundtrip_integrity.rs +++ b/zstd/src/tests/roundtrip_integrity.rs @@ -37,15 +37,19 @@ fn generate_compressible(seed: u64, len: usize) -> Vec { data } -/// Roundtrip using compress_to_vec (simple API). -fn roundtrip_simple(data: &[u8]) -> Vec { - let compressed = compress_to_vec(data, CompressionLevel::Fastest); +/// Roundtrip using compress_to_vec at the given level. +fn roundtrip_at_level(data: &[u8], level: CompressionLevel) -> Vec { + let compressed = compress_to_vec(data, level); let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); let mut result = Vec::new(); decoder.read_to_end(&mut result).unwrap(); result } +fn roundtrip_simple(data: &[u8]) -> Vec { + roundtrip_at_level(data, CompressionLevel::Fastest) +} + fn compress_streaming(data: &[u8]) -> Vec { let mut compressed = Vec::new(); let mut compressor = FrameCompressor::new(CompressionLevel::Fastest); @@ -55,22 +59,33 @@ fn compress_streaming(data: &[u8]) -> Vec { compressed } -/// Roundtrip using FrameCompressor (streaming API). -fn roundtrip_streaming(data: &[u8]) -> Vec { - let compressed = compress_streaming(data); +/// Roundtrip using FrameCompressor at the given level. +fn roundtrip_streaming_at_level(data: &[u8], level: CompressionLevel) -> Vec { + let mut compressed = Vec::new(); + let mut compressor = FrameCompressor::new(level); + compressor.set_source(data); + compressor.set_drain(&mut compressed); + compressor.compress(); let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); let mut result = Vec::new(); decoder.read_to_end(&mut result).unwrap(); result } -/// Roundtrip using compress_to_vec with the default compression level. +fn roundtrip_streaming(data: &[u8]) -> Vec { + roundtrip_streaming_at_level(data, CompressionLevel::Fastest) +} + fn roundtrip_default(data: &[u8]) -> Vec { - let compressed = compress_to_vec(data, CompressionLevel::Default); - let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap(); - let mut result = Vec::new(); - decoder.read_to_end(&mut result).unwrap(); - result + roundtrip_at_level(data, CompressionLevel::Default) +} + +fn roundtrip_better(data: &[u8]) -> Vec { + roundtrip_at_level(data, CompressionLevel::Better) +} + +fn roundtrip_better_streaming(data: &[u8]) -> Vec { + roundtrip_streaming_at_level(data, CompressionLevel::Better) } /// Generate data with limited alphabet for better Huffman compressibility @@ -358,3 +373,97 @@ fn roundtrip_default_level_multi_block_regression() { let data = generate_compressible(1337, 512 * 1024); assert_eq!(roundtrip_default(&data), data); } + +#[test] +fn roundtrip_better_level_compressible() { + let data = generate_compressible(888, 64 * 1024); + assert_eq!(roundtrip_better(&data), data); +} + +#[test] +fn roundtrip_better_level_random() { + let data = generate_data(999, 64 * 1024); + assert_eq!(roundtrip_better(&data), data); +} + +#[test] +fn roundtrip_better_level_multi_block() { + let data = generate_compressible(2001, 512 * 1024); + assert_eq!(roundtrip_better(&data), data); +} + +#[test] +fn roundtrip_better_level_streaming() { + let data = generate_compressible(3003, 64 * 1024); + assert_eq!(roundtrip_better_streaming(&data), data); +} + +#[test] +fn roundtrip_better_level_edge_cases() { + assert_eq!(roundtrip_better(&[]), Vec::::new()); + assert_eq!(roundtrip_better(&[0x42]), vec![0x42]); + let zeros = vec![0u8; 100_000]; + assert_eq!(roundtrip_better(&zeros), zeros); + let ascending: Vec = (0..=255u8).cycle().take(100_000).collect(); + assert_eq!(roundtrip_better(&ascending), ascending); +} + +#[test] +fn roundtrip_better_level_repeat_offsets() { + let data = repeat_offset_fixture(b"ABCDE12345", 10_000); + assert_eq!(roundtrip_better(&data), data); +} + +#[test] +fn roundtrip_better_level_large_literals() { + let data = generate_huffman_friendly(200, 128 * 1024, 64); + assert_eq!(roundtrip_better(&data), data); +} + +/// Better (lazy2) should compress close to or better than Default (lazy) on +/// structured, compressible data. Lazy2 may be marginally worse on some inputs +/// due to skipping otherwise-adequate matches while looking further ahead. +#[test] +fn better_level_compresses_close_to_default() { + let data = repeat_offset_fixture(b"HelloWorld", (256 * 1024) / 12 + 1); + let compressed_default = compress_to_vec(&data[..], CompressionLevel::Default); + let compressed_better = compress_to_vec(&data[..], CompressionLevel::Better); + // Allow up to 1% regression; lazy2 optimizes for broader data patterns. + assert!( + (compressed_better.len() as u64) * 100 <= (compressed_default.len() as u64) * 101, + "Better level should stay within 1% of Default. \ + better={} bytes, default={} bytes", + compressed_better.len(), + compressed_default.len(), + ); +} + +/// Exercise the 8 MiB window: place a repeated pattern beyond Default's +/// 4 MiB window so only Better (8 MiB) can match it. +#[test] +fn roundtrip_better_level_large_window() { + // Two identical 256 KiB regions separated by a 4.5 MiB compressible gap. + // The gap uses a different seed so it doesn't share patterns with the + // regions, but being compressible means hash chains aren't fully + // destroyed by random noise. Better's 8 MiB window can still reach the + // first region; Default's 4 MiB window cannot. + let region = generate_compressible(42, 256 * 1024); + let gap = generate_compressible(9999, 4 * 1024 * 1024 + 512 * 1024); + let mut data = Vec::with_capacity(region.len() + gap.len() + region.len()); + data.extend_from_slice(®ion); + data.extend_from_slice(&gap); + data.extend_from_slice(®ion); + + assert_eq!(roundtrip_better(&data), data); + + // Better should compress the duplicated region; Default cannot reach it. + let compressed_better = compress_to_vec(&data[..], CompressionLevel::Better); + let compressed_default = compress_to_vec(&data[..], CompressionLevel::Default); + assert!( + compressed_better.len() < compressed_default.len(), + "Better (8 MiB window) should beat Default (4 MiB) across 4.5 MiB gap. \ + better={} default={}", + compressed_better.len(), + compressed_default.len(), + ); +} diff --git a/zstd/tests/cross_validation.rs b/zstd/tests/cross_validation.rs index 8eff6ff7..a2fe0801 100644 --- a/zstd/tests/cross_validation.rs +++ b/zstd/tests/cross_validation.rs @@ -236,3 +236,32 @@ fn default_level_stays_within_ten_percent_of_ffi_level3_on_corpus_proxy() { ffi_level3.len() ); } + +#[test] +fn cross_rust_better_compress_ffi_decompress_regression() { + let data = include_bytes!("../decodecorpus_files/z000033"); + let compressed = compress_to_vec(data.as_slice(), CompressionLevel::Better); + let result = zstd::decode_all(compressed.as_slice()).unwrap(); + assert_eq!( + data.as_slice(), + result.as_slice(), + "rust better→ffi roundtrip failed" + ); +} + +/// Verify that Better compresses better than Default on the corpus proxy. +/// The hash-chain matcher with lazy2 should find longer matches than Dfast on +/// this reference input. +#[test] +fn better_level_beats_default_on_corpus_proxy() { + let data = include_bytes!("../decodecorpus_files/z000033"); + let default = compress_to_vec(data.as_slice(), CompressionLevel::Default); + let better = compress_to_vec(data.as_slice(), CompressionLevel::Better); + + assert!( + better.len() < default.len(), + "Better should compress better than Default on corpus proxy. better={} default={}", + better.len(), + default.len() + ); +}