From 93fcfc14f11f4f63c483c34fd65fb10f3109c1b8 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 15:34:33 +0200 Subject: [PATCH 1/4] perf(encoding): interleave fastest hash fill insertion - add interleaved suffix insertion path for fill-step=3 in matcher - keep tail-anchor backfill behavior unchanged --- zstd/src/encoding/match_generator.rs | 56 ++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index d3d66bff..a5ea5ad7 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -502,10 +502,18 @@ impl MatchGenerator { if last_entry.data.len() < MIN_MATCH_LEN { return; } - let slice = &last_entry.data[start..idx]; - for (key_index, key) in slice.windows(MIN_MATCH_LEN).enumerate().step_by(fill_step) { - if !last_entry.suffixes.contains_key(key) { - last_entry.suffixes.insert(key, start + key_index); + let insert_limit = idx.saturating_sub(MIN_MATCH_LEN).saturating_add(1); + if insert_limit > start { + let data = last_entry.data.as_slice(); + let suffixes = &mut last_entry.suffixes; + if fill_step == FAST_HASH_FILL_STEP { + Self::add_suffixes_interleaved_fast(data, suffixes, start, insert_limit); + } else { + let mut pos = start; + while pos < insert_limit { + Self::insert_suffix_if_absent(data, suffixes, pos); + pos += fill_step; + } } } @@ -518,6 +526,46 @@ impl MatchGenerator { } } + #[inline(always)] + fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) { + let key = &data[pos..pos + MIN_MATCH_LEN]; + if !suffixes.contains_key(key) { + suffixes.insert(key, pos); + } + } + + #[inline(always)] + fn add_suffixes_interleaved_fast( + data: &[u8], + suffixes: &mut SuffixStore, + start: usize, + insert_limit: usize, + ) { + let lane = FAST_HASH_FILL_STEP; + let mut pos = start; + + // Pipeline-ish fill: compute and retire several hash positions per loop + // so the fastest path keeps multiple independent hash lookups in flight. + while pos + lane * 3 < insert_limit { + let p0 = pos; + let p1 = pos + lane; + let p2 = pos + lane * 2; + let p3 = pos + lane * 3; + + Self::insert_suffix_if_absent(data, suffixes, p0); + Self::insert_suffix_if_absent(data, suffixes, p1); + Self::insert_suffix_if_absent(data, suffixes, p2); + Self::insert_suffix_if_absent(data, suffixes, p3); + + pos += lane * 4; + } + + while pos < insert_limit { + Self::insert_suffix_if_absent(data, suffixes, pos); + pos += lane; + } + } + fn repcode_candidate(&self, data_slice: &[u8], literals_len: usize) -> Option<(usize, usize)> { if literals_len != 0 { return None; From 82de7e152698551c10370ddd97e621e764cdb041 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 15:54:23 +0200 Subject: [PATCH 2/4] chore(repo): ignore generated benchmark reports - add benchmark-report.md and benchmark-results.json to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d2a3666c..a51c840f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ benchmark-results.json benchmark-report.md fuzz/corpus .idea +benchmark-results.json +benchmark-report.md From 5f64b7c44cece2a31c707f9ca49568d10283b4d5 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 16:56:54 +0200 Subject: [PATCH 3/4] fix(review): dedupe ignores and guard suffix bounds - remove duplicate benchmark artifact entries in .gitignore - add debug_assert for insert_suffix_if_absent slice bound invariant --- .gitignore | 2 -- zstd/src/encoding/match_generator.rs | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index a51c840f..d2a3666c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,3 @@ benchmark-results.json benchmark-report.md fuzz/corpus .idea -benchmark-results.json -benchmark-report.md diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index a5ea5ad7..6436cde5 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -528,6 +528,13 @@ impl MatchGenerator { #[inline(always)] fn insert_suffix_if_absent(data: &[u8], suffixes: &mut SuffixStore, pos: usize) { + debug_assert!( + pos + MIN_MATCH_LEN <= data.len(), + "insert_suffix_if_absent: pos {} + MIN_MATCH_LEN {} exceeds data.len() {}", + pos, + MIN_MATCH_LEN, + data.len() + ); let key = &data[pos..pos + MIN_MATCH_LEN]; if !suffixes.contains_key(key) { suffixes.insert(key, pos); From d2e90a4ac520c12635f46c8a5098a81e3a052162 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 17:16:46 +0200 Subject: [PATCH 4/4] test(matcher): cover interleaved fill boundaries - add regression for idx < MIN_MATCH_LEN in add_suffixes_till - add focused interleaved-position registration test - fix insert_limit saturation to preserve original windows() behavior --- zstd/src/encoding/match_generator.rs | 42 +++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs index 6436cde5..d0c91245 100644 --- a/zstd/src/encoding/match_generator.rs +++ b/zstd/src/encoding/match_generator.rs @@ -502,7 +502,7 @@ impl MatchGenerator { if last_entry.data.len() < MIN_MATCH_LEN { return; } - let insert_limit = idx.saturating_sub(MIN_MATCH_LEN).saturating_add(1); + let insert_limit = idx.saturating_sub(MIN_MATCH_LEN - 1); if insert_limit > start { let data = last_entry.data.as_slice(); let suffixes = &mut last_entry.suffixes; @@ -1461,6 +1461,46 @@ fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() { assert_eq!(last.suffixes.get(tail), Some(5)); } +#[test] +fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() { + let mut matcher = MatchGenerator::new(128); + matcher.hash_fill_step = FAST_HASH_FILL_STEP; + matcher.add_data( + b"abcdefghijklmnopqrstuvwxyz".to_vec(), + SuffixStore::with_capacity(1 << 16), + |_, _| {}, + ); + + matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP); + + let last = matcher.window.last().unwrap(); + let first_key = &last.data[..MIN_MATCH_LEN]; + assert_eq!(last.suffixes.get(first_key), None); +} + +#[test] +fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() { + let mut matcher = MatchGenerator::new(128); + matcher.hash_fill_step = FAST_HASH_FILL_STEP; + matcher.add_data( + b"abcdefghijklmnopqrstuvwxyz".to_vec(), + SuffixStore::with_capacity(1 << 16), + |_, _| {}, + ); + + matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP); + + let last = matcher.window.last().unwrap(); + for pos in [0usize, 3, 6, 9, 12] { + let key = &last.data[pos..pos + MIN_MATCH_LEN]; + assert_eq!( + last.suffixes.get(key), + Some(pos), + "expected interleaved suffix registration at pos {pos}" + ); + } +} + #[test] fn dfast_skip_matching_handles_window_eviction() { let mut matcher = DfastMatchGenerator::new(16);