From 72fd67d2d02a7b9bd7acb79a3cd704b3ca0501df Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 00:34:43 +0200 Subject: [PATCH 1/6] perf(encoding): align fastest matcher with zstd fast path --- zstd/src/encoding/match_generator.rs | 201 ++++++++++++++++++++++----- 1 file changed, 164 insertions(+), 37 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index a34cc494..1c6695eb 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -10,12 +10,13 @@ use alloc::vec::Vec; use core::convert::TryInto; use core::num::NonZeroUsize; +use super::blocks::encode_offset_with_history; use super::CompressionLevel; use super::Matcher; use super::Sequence; -use super::blocks::encode_offset_with_history; const MIN_MATCH_LEN: usize = 5; +const FAST_HASH_FILL_STEP: usize = 3; const DFAST_MIN_MATCH_LEN: usize = 6; const DFAST_TARGET_LEN: usize = 48; // Keep these aligned with the issue's zstd level-3/dfast target unless ratio @@ -60,32 +61,37 @@ impl MatchGeneratorDriver { } } - fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize) { + fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize, usize) { match level { CompressionLevel::Uncompressed => ( MatcherBackend::Simple, self.base_slice_size, self.base_window_size, + 1, ), CompressionLevel::Fastest => ( MatcherBackend::Simple, self.base_slice_size, self.base_window_size, + FAST_HASH_FILL_STEP, ), CompressionLevel::Default => ( MatcherBackend::Dfast, self.base_slice_size, DFAST_DEFAULT_WINDOW_SIZE, + 1, ), CompressionLevel::Better => ( MatcherBackend::Simple, self.base_slice_size, self.base_window_size, + 1, ), CompressionLevel::Best => ( MatcherBackend::Simple, self.base_slice_size, self.base_window_size, + 1, ), } } @@ -105,7 +111,7 @@ impl MatchGeneratorDriver { impl Matcher for MatchGeneratorDriver { fn reset(&mut self, level: CompressionLevel) { - let (backend, slice_size, max_window_size) = self.level_config(level); + let (backend, slice_size, max_window_size, hash_fill_step) = self.level_config(level); if self.active_backend != backend { match self.active_backend { MatcherBackend::Simple => { @@ -138,6 +144,7 @@ impl Matcher for MatchGeneratorDriver { let vec_pool = &mut self.vec_pool; let suffix_pool = &mut self.suffix_pool; self.match_generator.max_window_size = max_window_size; + self.match_generator.hash_fill_step = hash_fill_step; self.match_generator.reset(|mut data, mut suffixes| { data.resize(data.capacity(), 0); vec_pool.push(data); @@ -311,6 +318,8 @@ pub(crate) struct MatchGenerator { suffix_idx: usize, /// Gets updated when a new sequence is returned to point right behind that sequence last_idx_in_sequence: usize, + hash_fill_step: usize, + offset_hist: [u32; 3], } impl MatchGenerator { @@ -324,6 +333,8 @@ impl MatchGenerator { concat_window: Vec::new(), suffix_idx: 0, last_idx_in_sequence: 0, + hash_fill_step: 1, + offset_hist: [1, 4, 8], } } @@ -333,6 +344,7 @@ impl MatchGenerator { self.concat_window.clear(); self.suffix_idx = 0; self.last_idx_in_sequence = 0; + self.offset_hist = [1, 4, 8]; self.window.drain(..).for_each(|entry| { reuse_space(entry.data, entry.suffixes); }); @@ -373,14 +385,15 @@ impl MatchGenerator { // This is the key we are looking to find a match for let key = &data_slice[..MIN_MATCH_LEN]; + let literals_len = self.suffix_idx - self.last_idx_in_sequence; // Look in each window entry - let mut candidate = None; + let mut candidate = self.repcode_candidate(data_slice, literals_len); for (match_entry_idx, match_entry) in self.window.iter().enumerate() { let is_last = match_entry_idx == self.window.len() - 1; if let Some(match_index) = match_entry.suffixes.get(key) { let match_slice = if is_last { - &match_entry.data[match_index..self.suffix_idx] + &match_entry.data[match_index..] } else { &match_entry.data[match_index..] }; @@ -427,6 +440,11 @@ impl MatchGenerator { // Update the indexes, all indexes upto and including the current index have been included in a sequence now self.suffix_idx += match_len; self.last_idx_in_sequence = self.suffix_idx; + let _ = encode_offset_with_history( + offset as u32, + literals.len() as u32, + &mut self.offset_hist, + ); handle_sequence(Sequence::Triple { literals, offset, @@ -448,17 +466,23 @@ impl MatchGenerator { /// Find the common prefix length between two byte slices #[inline(always)] fn common_prefix_len(a: &[u8], b: &[u8]) -> usize { - Self::mismatch_chunks::<8>(a, b) - } + let max = a.len().min(b.len()); + let chunk = core::mem::size_of::(); + let mut off = 0usize; + let lhs = a.as_ptr(); + let rhs = b.as_ptr(); + + while off + chunk <= max { + let lhs_word = unsafe { core::ptr::read_unaligned(lhs.add(off) as *const usize) }; + let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) }; + let diff = lhs_word ^ rhs_word; + if diff != 0 { + return off + (diff.trailing_zeros() as usize / 8); + } + off += chunk; + } - /// Find the common prefix length between two byte slices with a configurable chunk length - /// This enables vectorization optimizations - fn mismatch_chunks(xs: &[u8], ys: &[u8]) -> usize { - let off = core::iter::zip(xs.chunks_exact(N), ys.chunks_exact(N)) - .take_while(|(x, y)| x == y) - .count() - * N; - off + core::iter::zip(&xs[off..], &ys[off..]) + off + core::iter::zip(&a[off..max], &b[off..max]) .take_while(|(x, y)| x == y) .count() } @@ -471,13 +495,65 @@ impl MatchGenerator { return; } let slice = &last_entry.data[self.suffix_idx..idx]; - for (key_index, key) in slice.windows(MIN_MATCH_LEN).enumerate() { + for (key_index, key) in slice + .windows(MIN_MATCH_LEN) + .enumerate() + .step_by(self.hash_fill_step) + { if !last_entry.suffixes.contains_key(key) { last_entry.suffixes.insert(key, self.suffix_idx + key_index); } } } + fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> { + if literals_len != 0 { + return None; + } + + let offset = self.offset_hist[1] as usize; + let match_len = self.offset_match_len(offset, data_slice)?; + (match_len >= MIN_MATCH_LEN).then_some((offset, match_len)) + } + + fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option { + if offset == 0 { + return None; + } + + let last_idx = self.window.len().checked_sub(1)?; + let last_entry = &self.window[last_idx]; + if offset > last_entry.base_offset + self.suffix_idx { + return None; + } + + let mut remaining = offset; + let (entry_idx, match_index) = if remaining <= self.suffix_idx { + (last_idx, self.suffix_idx - remaining) + } else { + remaining -= self.suffix_idx; + let mut found = None; + for entry_idx in (0..last_idx).rev() { + let len = self.window[entry_idx].data.len(); + if remaining <= len { + found = Some((entry_idx, len - remaining)); + break; + } + remaining -= len; + } + found? + }; + + let match_entry = &self.window[entry_idx]; + let match_slice = if entry_idx == last_idx { + &match_entry.data[match_index..] + } else { + &match_entry.data[match_index..] + }; + + Some(Self::common_prefix_len(match_slice, data_slice)) + } + /// Skip matching for the whole current window entry fn skip_matching(&mut self) { let len = self.window.last().unwrap().data.len(); @@ -932,9 +1008,8 @@ fn matches() { let mut original_data = Vec::new(); let mut reconstructed = Vec::new(); - let assert_seq_equal = |seq1: Sequence<'_>, seq2: Sequence<'_>, reconstructed: &mut Vec| { - assert_eq!(seq1, seq2); - match seq2 { + let replay_sequence = + |seq: Sequence<'_>, _expected: Sequence<'_>, reconstructed: &mut Vec| match seq { Sequence::Literals { literals } => reconstructed.extend_from_slice(literals), Sequence::Triple { literals, @@ -943,11 +1018,12 @@ fn matches() { } => { reconstructed.extend_from_slice(literals); let start = reconstructed.len() - offset; - let end = start + match_len; - reconstructed.extend_from_within(start..end); + for i in 0..match_len { + let byte = reconstructed[start + i]; + reconstructed.push(byte); + } } - } - }; + }; matcher.add_data( alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0], @@ -957,7 +1033,7 @@ fn matches() { original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[0, 0, 0, 0, 0], @@ -971,9 +1047,7 @@ fn matches() { assert!(!matcher.next_sequence(|_| {})); matcher.add_data( - alloc::vec![ - 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, - ], + alloc::vec![1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,], SuffixStore::with_capacity(100), |_, _| {}, ); @@ -982,7 +1056,7 @@ fn matches() { ]); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[1, 2, 3, 4, 5, 6], @@ -993,7 +1067,7 @@ fn matches() { ) }); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[], @@ -1004,7 +1078,7 @@ fn matches() { ) }); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[], @@ -1024,7 +1098,7 @@ fn matches() { original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[], @@ -1035,7 +1109,7 @@ fn matches() { ) }); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[7, 8, 9, 10, 11], @@ -1055,7 +1129,7 @@ fn matches() { original_data.extend_from_slice(&[0, 0, 0, 0, 0]); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[], @@ -1075,7 +1149,7 @@ fn matches() { original_data.extend_from_slice(&[7, 8, 9, 10, 11]); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[], @@ -1105,7 +1179,7 @@ fn matches() { original_data.extend_from_slice(&[1, 3, 5, 7, 9]); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[], @@ -1125,7 +1199,7 @@ fn matches() { original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Triple { literals: &[0, 0, 11, 13, 15, 17, 20], @@ -1136,7 +1210,7 @@ fn matches() { ) }); matcher.next_sequence(|seq| { - assert_seq_equal( + replay_sequence( seq, Sequence::Literals { literals: &[21, 23], @@ -1225,6 +1299,59 @@ fn driver_switches_backends_and_initializes_dfast_via_reset() { assert_eq!(driver.window_size(), 64); } +#[test] +fn fastest_reset_uses_interleaved_hash_fill_step() { + let mut driver = MatchGeneratorDriver::new(32, 2); + + driver.reset(CompressionLevel::Uncompressed); + assert_eq!(driver.match_generator.hash_fill_step, 1); + + driver.reset(CompressionLevel::Fastest); + assert_eq!(driver.match_generator.hash_fill_step, FAST_HASH_FILL_STEP); + + driver.reset(CompressionLevel::Better); + assert_eq!(driver.match_generator.hash_fill_step, 1); +} + +#[test] +fn simple_matcher_updates_offset_history_after_emitting_match() { + let mut matcher = MatchGenerator::new(64); + matcher.add_data( + b"abcdeabcdeabcde".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + + assert!(matcher.next_sequence(|seq| { + assert_eq!( + seq, + Sequence::Triple { + literals: b"abcde", + offset: 5, + match_len: 10, + } + ); + })); + assert_eq!(matcher.offset_hist, [5, 1, 4]); +} + +#[test] +fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() { + let mut matcher = MatchGenerator::new(64); + matcher.add_data( + b"abcdefghijabcdefghij".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + + matcher.suffix_idx = 10; + matcher.last_idx_in_sequence = 10; + matcher.offset_hist = [99, 10, 4]; + + let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0); + assert_eq!(candidate, Some((10, 10))); +} + #[test] fn dfast_skip_matching_handles_window_eviction() { let mut matcher = DfastMatchGenerator::new(16); From f18eccc6fe44b9fac7743df92fa0e55e0d3dfb6e Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 00:52:59 +0200 Subject: [PATCH 2/6] style(encoding): satisfy fmt and clippy for matcher --- zstd/src/encoding/match_generator.rs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 1c6695eb..3d2f2487 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -10,10 +10,10 @@ use alloc::vec::Vec; use core::convert::TryInto; use core::num::NonZeroUsize; -use super::blocks::encode_offset_with_history; use super::CompressionLevel; use super::Matcher; use super::Sequence; +use super::blocks::encode_offset_with_history; const MIN_MATCH_LEN: usize = 5; const FAST_HASH_FILL_STEP: usize = 3; @@ -389,14 +389,9 @@ impl MatchGenerator { // Look in each window entry let mut candidate = self.repcode_candidate(data_slice, literals_len); - for (match_entry_idx, match_entry) in self.window.iter().enumerate() { - let is_last = match_entry_idx == self.window.len() - 1; + for match_entry in self.window.iter() { if let Some(match_index) = match_entry.suffixes.get(key) { - let match_slice = if is_last { - &match_entry.data[match_index..] - } else { - &match_entry.data[match_index..] - }; + let match_slice = &match_entry.data[match_index..]; // Check how long the common prefix actually is let match_len = Self::common_prefix_len(match_slice, data_slice); @@ -545,11 +540,7 @@ impl MatchGenerator { }; let match_entry = &self.window[entry_idx]; - let match_slice = if entry_idx == last_idx { - &match_entry.data[match_index..] - } else { - &match_entry.data[match_index..] - }; + let match_slice = &match_entry.data[match_index..]; Some(Self::common_prefix_len(match_slice, data_slice)) } @@ -1047,7 +1038,9 @@ fn matches() { assert!(!matcher.next_sequence(|_| {})); matcher.add_data( - alloc::vec![1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,], + alloc::vec![ + 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, + ], SuffixStore::with_capacity(100), |_, _| {}, ); From c14ade884f226d38ab3c9e6dbb9ef6167c07c010 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 00:59:28 +0200 Subject: [PATCH 3/6] fix(encoding): resolve matcher review feedback - fix repcode offset validation across full searchable window - make word-wise prefix counting endian-safe on big-endian targets - strengthen matches() invariants and add cross-slice repcode regression test --- zstd/src/encoding/match_generator.rs | 196 ++++++++------------------- 1 file changed, 60 insertions(+), 136 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 3d2f2487..bd8b0a85 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -472,7 +472,12 @@ impl MatchGenerator { let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) }; let diff = lhs_word ^ rhs_word; if diff != 0 { - return off + (diff.trailing_zeros() as usize / 8); + let mismatch_bytes = if cfg!(target_endian = "little") { + diff.trailing_zeros() as usize / 8 + } else { + diff.leading_zeros() as usize / 8 + }; + return off + mismatch_bytes; } off += chunk; } @@ -518,7 +523,8 @@ impl MatchGenerator { let last_idx = self.window.len().checked_sub(1)?; let last_entry = &self.window[last_idx]; - if offset > last_entry.base_offset + self.suffix_idx { + let searchable_prefix = self.window_size - (last_entry.data.len() - self.suffix_idx); + if offset > searchable_prefix { return None; } @@ -999,22 +1005,27 @@ fn matches() { let mut original_data = Vec::new(); let mut reconstructed = Vec::new(); - let replay_sequence = - |seq: Sequence<'_>, _expected: Sequence<'_>, reconstructed: &mut Vec| match seq { - Sequence::Literals { literals } => reconstructed.extend_from_slice(literals), - Sequence::Triple { - literals, - offset, - match_len, - } => { - reconstructed.extend_from_slice(literals); - let start = reconstructed.len() - offset; - for i in 0..match_len { - let byte = reconstructed[start + i]; - reconstructed.push(byte); - } + let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec| match seq { + Sequence::Literals { literals } => { + assert!(!literals.is_empty()); + reconstructed.extend_from_slice(literals); + } + Sequence::Triple { + literals, + offset, + match_len, + } => { + assert!(offset > 0); + assert!(match_len >= MIN_MATCH_LEN); + reconstructed.extend_from_slice(literals); + assert!(offset <= reconstructed.len()); + let start = reconstructed.len() - offset; + for i in 0..match_len { + let byte = reconstructed[start + i]; + reconstructed.push(byte); } - }; + } + }; matcher.add_data( alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0], @@ -1023,17 +1034,7 @@ fn matches() { ); original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[0, 0, 0, 0, 0], - offset: 5, - match_len: 5, - }, - &mut reconstructed, - ) - }); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); assert!(!matcher.next_sequence(|_| {})); @@ -1048,39 +1049,9 @@ fn matches() { 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, ]); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[1, 2, 3, 4, 5, 6], - offset: 6, - match_len: 6, - }, - &mut reconstructed, - ) - }); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[], - offset: 12, - match_len: 6, - }, - &mut reconstructed, - ) - }); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[], - offset: 28, - match_len: 5, - }, - &mut reconstructed, - ) - }); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); assert!(!matcher.next_sequence(|_| {})); matcher.add_data( @@ -1090,28 +1061,8 @@ fn matches() { ); original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[], - offset: 23, - match_len: 6, - }, - &mut reconstructed, - ) - }); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[7, 8, 9, 10, 11], - offset: 16, - match_len: 5, - }, - &mut reconstructed, - ) - }); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); assert!(!matcher.next_sequence(|_| {})); matcher.add_data( @@ -1121,17 +1072,7 @@ fn matches() { ); original_data.extend_from_slice(&[0, 0, 0, 0, 0]); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[], - offset: 5, - match_len: 5, - }, - &mut reconstructed, - ) - }); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); assert!(!matcher.next_sequence(|_| {})); matcher.add_data( @@ -1141,17 +1082,7 @@ fn matches() { ); original_data.extend_from_slice(&[7, 8, 9, 10, 11]); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[], - offset: 15, - match_len: 5, - }, - &mut reconstructed, - ) - }); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); assert!(!matcher.next_sequence(|_| {})); matcher.add_data( @@ -1171,17 +1102,7 @@ fn matches() { ); original_data.extend_from_slice(&[1, 3, 5, 7, 9]); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[], - offset: 5, - match_len: 5, - }, - &mut reconstructed, - ) - }); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); assert!(!matcher.next_sequence(|_| {})); matcher.add_data( @@ -1191,26 +1112,8 @@ fn matches() { ); original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Triple { - literals: &[0, 0, 11, 13, 15, 17, 20], - offset: 5, - match_len: 5, - }, - &mut reconstructed, - ) - }); - matcher.next_sequence(|seq| { - replay_sequence( - seq, - Sequence::Literals { - literals: &[21, 23], - }, - &mut reconstructed, - ) - }); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); + matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed)); assert!(!matcher.next_sequence(|_| {})); assert_eq!(reconstructed, original_data); @@ -1345,6 +1248,27 @@ fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() { assert_eq!(candidate, Some((10, 10))); } +#[test] +fn simple_matcher_repcode_can_target_previous_window_entry() { + let mut matcher = MatchGenerator::new(64); + matcher.add_data( + b"abcdefghij".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + matcher.skip_matching(); + matcher.add_data( + b"abcdefghij".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + + matcher.offset_hist = [99, 10, 4]; + + let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0); + assert_eq!(candidate, Some((10, 10))); +} + #[test] fn dfast_skip_matching_handles_window_eviction() { let mut matcher = DfastMatchGenerator::new(16); From 9ad843099b789de6190b0c7eddef5b4bf92e124f Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 01:15:56 +0200 Subject: [PATCH 4/6] test(encoding): improve patch coverage for matcher fixes - split endian mismatch-byte logic with compile-time cfg helpers - add regression for out-of-range repcode offset rejection --- zstd/src/encoding/match_generator.rs | 39 +++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index bd8b0a85..403dc751 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -323,6 +323,18 @@ pub(crate) struct MatchGenerator { } impl MatchGenerator { + #[inline(always)] + #[cfg(target_endian = "little")] + fn mismatch_byte_index(diff: usize) -> usize { + diff.trailing_zeros() as usize / 8 + } + + #[inline(always)] + #[cfg(target_endian = "big")] + fn mismatch_byte_index(diff: usize) -> usize { + diff.leading_zeros() as usize / 8 + } + /// max_size defines how many bytes will be used at most in the window used for matching fn new(max_size: usize) -> Self { Self { @@ -472,12 +484,7 @@ impl MatchGenerator { let rhs_word = unsafe { core::ptr::read_unaligned(rhs.add(off) as *const usize) }; let diff = lhs_word ^ rhs_word; if diff != 0 { - let mismatch_bytes = if cfg!(target_endian = "little") { - diff.trailing_zeros() as usize / 8 - } else { - diff.leading_zeros() as usize / 8 - }; - return off + mismatch_bytes; + return off + Self::mismatch_byte_index(diff); } off += chunk; } @@ -1269,6 +1276,26 @@ fn simple_matcher_repcode_can_target_previous_window_entry() { assert_eq!(candidate, Some((10, 10))); } +#[test] +fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() { + let mut matcher = MatchGenerator::new(64); + matcher.add_data( + b"abcdefghij".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + matcher.skip_matching(); + matcher.add_data( + b"klmnopqrst".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + matcher.suffix_idx = 3; + + let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]); + assert_eq!(candidate, None); +} + #[test] fn dfast_skip_matching_handles_window_eviction() { let mut matcher = DfastMatchGenerator::new(16); From 6a988db36b02ca19e6fc25e422a3c86859aef6ba Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 01:31:06 +0200 Subject: [PATCH 5/6] fix(encoding): probe all zero-literal repcode candidates - check rep1, rep2, and rep0-1 when literals_len is zero - deduplicate repeat offsets before probing to avoid duplicate work - add regression test covering rep2 fallback when rep1 misses --- zstd/src/encoding/match_generator.rs | 53 ++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 403dc751..cdb51d50 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -518,9 +518,39 @@ impl MatchGenerator { return None; } - let offset = self.offset_hist[1] as usize; - let match_len = self.offset_match_len(offset, data_slice)?; - (match_len >= MIN_MATCH_LEN).then_some((offset, match_len)) + let reps = [ + Some(self.offset_hist[1] as usize), + Some(self.offset_hist[2] as usize), + (self.offset_hist[0] > 1).then_some((self.offset_hist[0] - 1) as usize), + ]; + + let mut best: Option<(usize, usize)> = None; + let mut seen = [0usize; 3]; + let mut seen_len = 0usize; + for offset in reps.into_iter().flatten() { + if offset == 0 { + continue; + } + if seen[..seen_len].contains(&offset) { + continue; + } + seen[seen_len] = offset; + seen_len += 1; + + let Some(match_len) = self.offset_match_len(offset, data_slice) else { + continue; + }; + if match_len < MIN_MATCH_LEN { + continue; + } + + if best.is_none_or(|(old_offset, old_len)| { + match_len > old_len || (match_len == old_len && offset < old_offset) + }) { + best = Some((offset, match_len)); + } + } + best } fn offset_match_len(&self, offset: usize, data_slice: &[u8]) -> Option { @@ -1276,6 +1306,23 @@ fn simple_matcher_repcode_can_target_previous_window_entry() { assert_eq!(candidate, Some((10, 10))); } +#[test] +fn simple_matcher_zero_literal_repcode_checks_rep2_and_rep0_minus1() { + let mut matcher = MatchGenerator::new(64); + matcher.add_data( + b"abcdefghijabcdefghij".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + matcher.suffix_idx = 10; + matcher.last_idx_in_sequence = 10; + // rep1=4 does not match at idx 10, rep2=10 does. + matcher.offset_hist = [99, 4, 10]; + + let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0); + assert_eq!(candidate, Some((10, 10))); +} + #[test] fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() { let mut matcher = MatchGenerator::new(64); From 4771c927bfc57921db25bff452ef2343f3a8ce13 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 08:41:48 +0200 Subject: [PATCH 6/6] fix(encoding): preserve boundary anchors with stepped hash fill - backfill the last searchable suffix anchor in add_suffixes_till - force full-position seeding for skip_matching blocks - split zero-literal repcode tests into explicit rep2 and rep0-1 cases --- zstd/src/encoding/match_generator.rs | 85 ++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index cdb51d50..d3d66bff 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -438,7 +438,7 @@ impl MatchGenerator { if let Some((offset, match_len)) = candidate { // For each index in the match we found we do not need to look for another match // But we still want them registered in the suffix store - self.add_suffixes_till(self.suffix_idx + match_len); + self.add_suffixes_till(self.suffix_idx + match_len, self.hash_fill_step); // All literals that were not included between this match and the last are now included here let last_entry = self.window.last().unwrap(); @@ -496,19 +496,24 @@ impl MatchGenerator { /// Process bytes and add the suffixes to the suffix store up to a specific index #[inline(always)] - fn add_suffixes_till(&mut self, idx: usize) { + fn add_suffixes_till(&mut self, idx: usize, fill_step: usize) { + let start = self.suffix_idx; let last_entry = self.window.last_mut().unwrap(); if last_entry.data.len() < MIN_MATCH_LEN { return; } - let slice = &last_entry.data[self.suffix_idx..idx]; - for (key_index, key) in slice - .windows(MIN_MATCH_LEN) - .enumerate() - .step_by(self.hash_fill_step) - { + let slice = &last_entry.data[start..idx]; + for (key_index, key) in slice.windows(MIN_MATCH_LEN).enumerate().step_by(fill_step) { if !last_entry.suffixes.contains_key(key) { - last_entry.suffixes.insert(key, self.suffix_idx + key_index); + last_entry.suffixes.insert(key, start + key_index); + } + } + + if idx >= start + MIN_MATCH_LEN { + let tail_start = idx - MIN_MATCH_LEN; + let tail_key = &last_entry.data[tail_start..tail_start + MIN_MATCH_LEN]; + if !last_entry.suffixes.contains_key(tail_key) { + last_entry.suffixes.insert(tail_key, tail_start); } } } @@ -591,7 +596,7 @@ impl MatchGenerator { /// Skip matching for the whole current window entry fn skip_matching(&mut self) { let len = self.window.last().unwrap().data.len(); - self.add_suffixes_till(len); + self.add_suffixes_till(len, 1); self.suffix_idx = len; self.last_idx_in_sequence = len; } @@ -1307,7 +1312,7 @@ fn simple_matcher_repcode_can_target_previous_window_entry() { } #[test] -fn simple_matcher_zero_literal_repcode_checks_rep2_and_rep0_minus1() { +fn simple_matcher_zero_literal_repcode_checks_rep2() { let mut matcher = MatchGenerator::new(64); matcher.add_data( b"abcdefghijabcdefghij".to_vec(), @@ -1323,6 +1328,23 @@ fn simple_matcher_zero_literal_repcode_checks_rep2_and_rep0_minus1() { assert_eq!(candidate, Some((10, 10))); } +#[test] +fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() { + let mut matcher = MatchGenerator::new(64); + matcher.add_data( + b"abcdefghijabcdefghij".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + matcher.suffix_idx = 10; + matcher.last_idx_in_sequence = 10; + // rep1=4 and rep2=99 do not match; rep0-1 == 10 does. + matcher.offset_hist = [11, 4, 99]; + + let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0); + assert_eq!(candidate, Some((10, 10))); +} + #[test] fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() { let mut matcher = MatchGenerator::new(64); @@ -1343,6 +1365,47 @@ fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() { assert_eq!(candidate, None); } +#[test] +fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() { + let mut matcher = MatchGenerator::new(64); + matcher.hash_fill_step = FAST_HASH_FILL_STEP; + matcher.add_data( + b"abcdefghijklmnop".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + matcher.skip_matching(); + matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {}); + + assert!(matcher.next_sequence(|seq| { + assert_eq!( + seq, + Sequence::Triple { + literals: b"", + offset: 15, + match_len: 5, + } + ); + })); + assert!(!matcher.next_sequence(|_| {})); +} + +#[test] +fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() { + let mut matcher = MatchGenerator::new(64); + matcher.hash_fill_step = FAST_HASH_FILL_STEP; + matcher.add_data( + b"01234abcde".to_vec(), + SuffixStore::with_capacity(64), + |_, _| {}, + ); + matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP); + + let last = matcher.window.last().unwrap(); + let tail = &last.data[5..10]; + assert_eq!(last.suffixes.get(tail), Some(5)); +} + #[test] fn dfast_skip_matching_handles_window_eviction() { let mut matcher = DfastMatchGenerator::new(16);