From a40e31729598772513dfebdf770d0a2fb50964b8 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 14:04:47 +0300
Subject: [PATCH 01/18] feat(encoding): numeric compression levels (1-22) API

- Add CompressionLevel::Level(i32) variant and from_level() constructor
- Port C zstd level parameter table (levels 1-22) with per-level
  window size, hash/chain config, search depth, and lazy depth
- Negative levels (-1..-131072) select ultra-fast mode via Simple
  backend with progressively coarser hash insertion
- Level 0 maps to default (level 3), matching C zstd semantics
- Named variants map exactly to their numeric equivalents:
  Fastest=1, Default=3, Better=7, Best=11
- Refactor MatchGeneratorDriver::reset() to use centralized LevelParams
  instead of per-variant hardcoded constants
- Update CLI to accept zstd-compatible numeric levels (-5..22)
- Add 11 roundtrip tests covering equivalence, all 22 levels,
  negative levels, monotonic ratio, streaming, and clamping

Closes #21
---
 README.md                              |   7 +-
 cli/src/main.rs                        |  56 +++++----
 zstd/src/encoding/frame_compressor.rs  |  15 ++-
 zstd/src/encoding/match_generator.rs   | 167 ++++++++++++++++---------
 zstd/src/encoding/mod.rs               |  37 +++++-
 zstd/src/encoding/streaming_encoder.rs |  11 +-
 zstd/src/tests/roundtrip_integrity.rs  | 126 +++++++++++++++++++
 7 files changed, 326 insertions(+), 93 deletions(-)
diff --git a/README.md b/README.md
index 3bb97f6a..f6578fdf 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ This is a **maintained fork** of [KillingSpark/zstd-rs](https://github.com/Killi
 **Fork goals:**
 - Dictionary compression improvements (critical for per-label trained dictionaries in LSM-tree)
 - Performance parity with C zstd for decompression (currently 1.4-3.5x slower)
-- Additional compression levels (Fastest, Default, Better, and Best are all implemented)
+- Full numeric compression levels (1–22 plus negative ultra-fast, C zstd compatible)
 - No FFI — pure `cargo build`, no cmake/system libraries (ADR-013 compliance)
 
 **Upstream relationship:** We periodically sync with upstream but maintain an independent development trajectory focused on CoordiNode requirements.
@@ -46,6 +46,8 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe
 - [x] Default (roughly level 3)
 - [x] Better (roughly level 7)
 - [x] Best (roughly level 11)
+- [x] Numeric levels 1–22 via `CompressionLevel::from_level(n)` (C zstd compatible numbering)
+- [x] Negative levels for ultra-fast compression
 - [x] Checksums
 - [x] Frame Content Size — `FrameCompressor` writes FCS automatically; `StreamingEncoder` requires `set_pledged_content_size()` before first write
 - [x] Dictionary compression
@@ -67,7 +69,10 @@ Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares
 use structured_zstd::encoding::{compress, compress_to_vec, CompressionLevel};
 
 let data: &[u8] = b"hello world";
+// Named level
 let compressed = compress_to_vec(data, CompressionLevel::Fastest);
+// Numeric level (C zstd compatible: 1-22, negative for ultra-fast)
+let compressed = compress_to_vec(data, CompressionLevel::from_level(7));
 ```
 
 ```rust,no_run
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 1562da3c..cfd652d1 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -34,23 +34,25 @@ enum Commands {
         /// Where the compressed file is written
         /// [default: <INPUT_FILE>.zst]
         output_file: Option<PathBuf>,
-        /// How thoroughly the file should be compressed. A higher level will take
-        /// more time to compress but result in a smaller file, and vice versa.
+        /// Compression level using C zstd numbering (higher = smaller, slower).
         ///
-        /// - 0: Uncompressed
-        /// - 1: Fastest
-        /// - 2: Default
-        /// - 3: Better (lazy2, ~zstd level 7)
-        /// - 4: Best  (deep lazy2, ~zstd level 11)
+        /// -  0: Uncompressed (no compression, raw zstd frame)
+        /// -  1: Fastest (fast hash, ~zstd level 1)
+        /// -  3: Default (dfast, ~zstd level 3)
+        /// -  7: Better  (lazy2, ~zstd level 7)
+        /// - 11: Best    (deep lazy2, ~zstd level 11)
+        /// - Negative: ultra-fast modes (less compression, more speed)
+        /// - 12-22: progressively higher ratio (capped at lazy2 backend)
         #[arg(
             short,
             long,
-            value_name = "COMPRESSION_LEVEL",
-            default_value_t = 2,
-            value_parser = clap::value_parser!(u8).range(0..=4),
-            verbatim_doc_comment
+            value_name = "LEVEL",
+            default_value_t = 3,
+            value_parser = clap::value_parser!(i32).range(-5..=22),
+            verbatim_doc_comment,
+            allow_hyphen_values = true,
         )]
-        level: u8,
+        level: i32,
     },
     Decompress {
         /// .zst archive to decompress
@@ -101,15 +103,11 @@ fn main() -> color_eyre::Result<()> {
     Ok(())
 }
 
-fn compress(input: PathBuf, output: PathBuf, level: u8) -> color_eyre::Result<()> {
+fn compress(input: PathBuf, output: PathBuf, level: i32) -> color_eyre::Result<()> {
     info!("compressing {input:?} to {output:?}");
     let compression_level: structured_zstd::encoding::CompressionLevel = match level {
         0 => CompressionLevel::Uncompressed,
-        1 => CompressionLevel::Fastest,
-        2 => CompressionLevel::Default,
-        3 => CompressionLevel::Better,
-        4 => CompressionLevel::Best,
-        _ => return Err(eyre!("unsupported compression level: {level}")),
+        n => CompressionLevel::from_level(n),
     };
     ensure_distinct_paths(&input, &output)?;
     ensure_regular_output_destination(&output)?;
@@ -402,7 +400,19 @@ mod tests {
 
     #[test]
     fn cli_rejects_unsupported_compression_level_at_parse_time() {
-        let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "5"]);
+        let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "23"]);
+        assert!(parse.is_err());
+    }
+
+    #[test]
+    fn cli_accepts_negative_compression_level() {
+        let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "-3"]);
+        assert!(parse.is_ok());
+    }
+
+    #[test]
+    fn cli_rejects_too_negative_compression_level() {
+        let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "-6"]);
         assert!(parse.is_err());
     }
 
@@ -415,7 +425,7 @@ mod tests {
         let input = std::env::temp_dir().join(format!("structured-zstd-cli-alias-{unique}.txt"));
         fs::write(&input, b"streaming-cli-alias-check").unwrap();
 
-        let err = compress(input.clone(), input.clone(), 2).unwrap_err();
+        let err = compress(input.clone(), input.clone(), 3).unwrap_err();
         let message = format!("{err:#}");
         assert!(
             message.contains("input and output"),
@@ -434,7 +444,7 @@ mod tests {
         fs::write(&input, b"streaming-cli-hardlink-check").unwrap();
         fs::hard_link(&input, &output).unwrap();
 
-        let err = compress(input.clone(), output.clone(), 2).unwrap_err();
+        let err = compress(input.clone(), output.clone(), 3).unwrap_err();
         let message = format!("{err:#}");
         assert!(
             message.contains("input and output"),
@@ -455,7 +465,7 @@ mod tests {
         let output =
             std::env::temp_dir().join(format!("structured-zstd-cli-missing-output-{unique}.zst"));
 
-        let err = compress(missing_input, output.clone(), 2).unwrap_err();
+        let err = compress(missing_input, output.clone(), 3).unwrap_err();
         let message = format!("{err:#}");
         assert!(
             message.contains("failed to open input file"),
@@ -473,7 +483,7 @@ mod tests {
         let output = dir.join("existing-dir");
         fs::create_dir(&output).unwrap();
 
-        let err = compress(input, output.clone(), 2).unwrap_err();
+        let err = compress(input, output.clone(), 3).unwrap_err();
         let message = format!("{err:#}");
         assert!(
             message.contains("not a regular file"),
diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs
index 42c941b9..c4cdc951 100644
--- a/zstd/src/encoding/frame_compressor.rs
+++ b/zstd/src/encoding/frame_compressor.rs
@@ -274,7 +274,8 @@ impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
                 CompressionLevel::Fastest
                 | CompressionLevel::Default
                 | CompressionLevel::Better
-                | CompressionLevel::Best => compress_block_encoded(
+                | CompressionLevel::Best
+                | CompressionLevel::Level(_) => compress_block_encoded(
                     &mut self.state,
                     last_block,
                     uncompressed_data,
@@ -476,7 +477,7 @@ mod tests {
                     data.len() as u64,
                     "FCS mismatch for len={} level={:?}",
                     data.len(),
-                    level as u8,
+                    level,
                 );
                 // Confirm the FCS field is actually present in the header
                 // (not just the decoder returning 0 for absent FCS).
@@ -485,7 +486,7 @@ mod tests {
                     0,
                     "FCS field must be present for len={} level={:?}",
                     data.len(),
-                    level as u8,
+                    level,
                 );
                 // Verify C zstd can decompress
                 let mut decoded = Vec::new();
@@ -883,8 +884,10 @@ mod tests {
             crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
                 .expect("raw dictionary should be valid");
 
-        let payload = b"abcdefgh".repeat(512);
-        let matcher = MatchGeneratorDriver::new(8, 1);
+        // Payload must exceed the encoder's advertised window (128 KiB for
+        // Fastest) so the test actually exercises cross-window-boundary behavior.
+        let payload = b"abcdefgh".repeat(128 * 1024 / 8 + 64);
+        let matcher = MatchGeneratorDriver::new(1024, 1);
 
         let mut no_dict_output = Vec::new();
         let mut no_dict_compressor =
@@ -900,7 +903,7 @@ mod tests {
             .expect("window size should be present");
 
         let mut output = Vec::new();
-        let matcher = MatchGeneratorDriver::new(8, 1);
+        let matcher = MatchGeneratorDriver::new(1024, 1);
         let mut compressor =
             FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
         compressor
diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs
index 7eaae2a5..d3e1cb52 100644
--- a/zstd/src/encoding/match_generator.rs
+++ b/zstd/src/encoding/match_generator.rs
@@ -23,8 +23,6 @@ const DFAST_TARGET_LEN: usize = 48;
 // measurements show we can shrink them without regressing acceptance tests.
 const DFAST_HASH_BITS: usize = 20;
 const DFAST_SEARCH_DEPTH: usize = 4;
-const DFAST_DEFAULT_WINDOW_SIZE: usize = 1 << 22;
-const BETTER_DEFAULT_WINDOW_SIZE: usize = 1 << 23;
 const DFAST_EMPTY_SLOT: usize = usize::MAX;
 
 const HC_HASH_LOG: usize = 20;
@@ -36,7 +34,6 @@ const HC_TARGET_LEN: usize = 48;
 // that can never collide with any valid position, even at the 4 GiB boundary.
 const HC_EMPTY: u32 = 0;
 
-const BEST_DEFAULT_WINDOW_SIZE: usize = 1 << 24;
 // Maximum search depth across all HC-based levels. Used to size the
 // fixed-length candidate array returned by chain_candidates().
 const MAX_HC_SEARCH_DEPTH: usize = 32;
@@ -66,6 +63,91 @@ const BEST_HC_CONFIG: HcConfig = HcConfig {
     target_len: 128,
 };
 
+/// Resolved tuning parameters for a compression level.
+#[derive(Copy, Clone)]
+struct LevelParams {
+    backend: MatcherBackend,
+    window_log: u8,
+    hash_fill_step: usize,
+    lazy_depth: u8,
+    hc: HcConfig,
+}
+
+/// Parameter table for numeric compression levels 1–22.
+///
+/// Each entry maps a zstd compression level to the best-available matcher
+/// backend and tuning knobs.  Levels that require strategies this crate does
+/// not implement (greedy, btopt, btultra) are approximated with the closest
+/// available backend.
+///
+/// Index 0 = level 1, index 21 = level 22.
+#[rustfmt::skip]
+const LEVEL_TABLE: [LevelParams; 22] = [
+    // Lvl  Strategy       wlog  step  lazy  HC config
+    // ---  -------------- ----  ----  ----  ------------------------------------------
+    /* 1 */ LevelParams { backend: MatcherBackend::Simple,    window_log: 17, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG },
+    /* 2 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG },
+    /* 3 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG },
+    /* 4 */ LevelParams { backend: MatcherBackend::Dfast,     window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG },
+    /* 5 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4,  target_len: 32  } },
+    /* 6 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8,  target_len: 48  } },
+    /* 7 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48  } },
+    /* 8 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64  } },
+    /* 9 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 23, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64  } },
+    /*10 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96  } },
+    /*11 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG },
+    /*12 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 } },
+    /*13 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 } },
+    /*14 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 } },
+    /*15 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 } },
+    /*16 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 256 } },
+    /*17 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
+    /*18 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
+    /*19 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
+    /*20 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
+    /*21 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
+    /*22 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
+];
+
+/// Resolve a [`CompressionLevel`] to internal tuning parameters.
+fn resolve_level_params(level: CompressionLevel) -> LevelParams {
+    match level {
+        CompressionLevel::Uncompressed => LevelParams {
+            backend: MatcherBackend::Simple,
+            window_log: 17,
+            hash_fill_step: 1,
+            lazy_depth: 0,
+            hc: HC_CONFIG,
+        },
+        CompressionLevel::Fastest => LEVEL_TABLE[0],
+        CompressionLevel::Default => LEVEL_TABLE[2],
+        CompressionLevel::Better => LEVEL_TABLE[6],
+        CompressionLevel::Best => LEVEL_TABLE[10],
+        CompressionLevel::Level(n) => {
+            if n > 0 {
+                let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
+                LEVEL_TABLE[idx]
+            } else if n == 0 {
+                // Level 0 = default, matching C zstd semantics.
+                LEVEL_TABLE[2]
+            } else {
+                // Negative levels: ultra-fast with the Simple backend.
+                // Acceleration grows with magnitude, expressed as larger
+                // hash_fill_step (fewer positions indexed).
+                let acceleration = ((-n) as usize).min(131072);
+                let step = (acceleration + 3).min(128);
+                LevelParams {
+                    backend: MatcherBackend::Simple,
+                    window_log: 17,
+                    hash_fill_step: step,
+                    lazy_depth: 0,
+                    hc: HC_CONFIG,
+                }
+            }
+        }
+    }
+}
+
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 enum MatcherBackend {
     Simple,
@@ -83,7 +165,6 @@ pub struct MatchGeneratorDriver {
     active_backend: MatcherBackend,
     slice_size: usize,
     base_slice_size: usize,
-    base_window_size: usize,
     // Frame header window size must stay at the configured live-window budget.
     // Dictionary retention expands internal matcher capacity only.
     reported_window_size: usize,
@@ -106,45 +187,13 @@ impl MatchGeneratorDriver {
             active_backend: MatcherBackend::Simple,
             slice_size,
             base_slice_size: slice_size,
-            base_window_size: max_window_size,
             reported_window_size: max_window_size,
             dictionary_retained_budget: 0,
         }
     }
 
-    fn level_config(&self, level: CompressionLevel) -> (MatcherBackend, usize, usize, usize) {
-        match level {
-            CompressionLevel::Uncompressed => (
-                MatcherBackend::Simple,
-                self.base_slice_size,
-                self.base_window_size,
-                1,
-            ),
-            CompressionLevel::Fastest => (
-                MatcherBackend::Simple,
-                self.base_slice_size,
-                self.base_window_size,
-                FAST_HASH_FILL_STEP,
-            ),
-            CompressionLevel::Default => (
-                MatcherBackend::Dfast,
-                self.base_slice_size,
-                DFAST_DEFAULT_WINDOW_SIZE,
-                1,
-            ),
-            CompressionLevel::Better => (
-                MatcherBackend::HashChain,
-                self.base_slice_size,
-                BETTER_DEFAULT_WINDOW_SIZE,
-                1,
-            ),
-            CompressionLevel::Best => (
-                MatcherBackend::HashChain,
-                self.base_slice_size,
-                BEST_DEFAULT_WINDOW_SIZE,
-                1,
-            ),
-        }
+    fn level_params(level: CompressionLevel) -> LevelParams {
+        resolve_level_params(level)
     }
 
     fn dfast_matcher(&self) -> &DfastMatchGenerator {
@@ -248,9 +297,10 @@ impl Matcher for MatchGeneratorDriver {
     }
 
     fn reset(&mut self, level: CompressionLevel) {
-        let (backend, slice_size, max_window_size, hash_fill_step) = self.level_config(level);
+        let params = Self::level_params(level);
+        let max_window_size = 1usize << params.window_log;
         self.dictionary_retained_budget = 0;
-        if self.active_backend != backend {
+        if self.active_backend != params.backend {
             match self.active_backend {
                 MatcherBackend::Simple => {
                     let vec_pool = &mut self.vec_pool;
@@ -288,15 +338,15 @@ impl Matcher for MatchGeneratorDriver {
             }
         }
 
-        self.active_backend = backend;
-        self.slice_size = slice_size;
+        self.active_backend = params.backend;
+        self.slice_size = self.base_slice_size;
         self.reported_window_size = max_window_size;
         match self.active_backend {
             MatcherBackend::Simple => {
                 let vec_pool = &mut self.vec_pool;
                 let suffix_pool = &mut self.suffix_pool;
                 self.match_generator.max_window_size = max_window_size;
-                self.match_generator.hash_fill_step = hash_fill_step;
+                self.match_generator.hash_fill_step = params.hash_fill_step;
                 self.match_generator.reset(|mut data, mut suffixes| {
                     data.resize(data.capacity(), 0);
                     vec_pool.push(data);
@@ -310,7 +360,7 @@ impl Matcher for MatchGeneratorDriver {
                     .dfast_match_generator
                     .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
                 dfast.max_window_size = max_window_size;
-                dfast.lazy_depth = 1;
+                dfast.lazy_depth = params.lazy_depth;
                 let vec_pool = &mut self.vec_pool;
                 dfast.reset(|mut data| {
                     data.resize(data.capacity(), 0);
@@ -322,11 +372,8 @@ impl Matcher for MatchGeneratorDriver {
                     .hc_match_generator
                     .get_or_insert_with(|| HcMatchGenerator::new(max_window_size));
                 hc.max_window_size = max_window_size;
-                hc.lazy_depth = 2;
-                match level {
-                    CompressionLevel::Best => hc.configure(BEST_HC_CONFIG),
-                    _ => hc.configure(HC_CONFIG),
-                }
+                hc.lazy_depth = params.lazy_depth;
+                hc.configure(params.hc);
                 let vec_pool = &mut self.vec_pool;
                 hc.reset(|mut data| {
                     data.resize(data.capacity(), 0);
@@ -1975,7 +2022,7 @@ fn dfast_matches_roundtrip_multi_block_pattern() {
     let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
     let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
 
-    let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
+    let mut matcher = DfastMatchGenerator::new(1 << 22);
     let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
         Sequence::Literals { literals } => decoded.extend_from_slice(literals),
         Sequence::Triple {
@@ -2009,7 +2056,7 @@ fn driver_switches_backends_and_initializes_dfast_via_reset() {
     let mut driver = MatchGeneratorDriver::new(32, 2);
 
     driver.reset(CompressionLevel::Default);
-    assert_eq!(driver.window_size(), DFAST_DEFAULT_WINDOW_SIZE as u64);
+    assert_eq!(driver.window_size(), (1u64 << 22));
 
     let mut first = driver.get_next_space();
     first[..12].copy_from_slice(b"abcabcabcabc");
@@ -2042,7 +2089,7 @@ fn driver_switches_backends_and_initializes_dfast_via_reset() {
     assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
 
     driver.reset(CompressionLevel::Fastest);
-    assert_eq!(driver.window_size(), 64);
+    assert_eq!(driver.window_size(), (1u64 << 17));
 }
 
 #[test]
@@ -2051,7 +2098,7 @@ fn driver_best_to_fastest_releases_oversized_hc_tables() {
 
     // Initialize at Best — allocates large HC tables (2M hash, 1M chain).
     driver.reset(CompressionLevel::Best);
-    assert_eq!(driver.window_size(), BEST_DEFAULT_WINDOW_SIZE as u64);
+    assert_eq!(driver.window_size(), (1u64 << 24));
 
     // Feed data so tables are actually allocated via ensure_tables().
     let mut space = driver.get_next_space();
@@ -2062,7 +2109,7 @@ fn driver_best_to_fastest_releases_oversized_hc_tables() {
 
     // Switch to Fastest — must release HC tables.
     driver.reset(CompressionLevel::Fastest);
-    assert_eq!(driver.window_size(), 64);
+    assert_eq!(driver.window_size(), (1u64 << 17));
 
     // HC matcher should have empty tables after backend switch.
     let hc = driver.hc_match_generator.as_ref().unwrap();
@@ -2082,7 +2129,7 @@ fn driver_better_to_best_resizes_hc_tables() {
 
     // Initialize at Better — allocates small HC tables (1M hash, 512K chain).
     driver.reset(CompressionLevel::Better);
-    assert_eq!(driver.window_size(), BETTER_DEFAULT_WINDOW_SIZE as u64);
+    assert_eq!(driver.window_size(), (1u64 << 23));
 
     let mut space = driver.get_next_space();
     space[..12].copy_from_slice(b"abcabcabcabc");
@@ -2096,7 +2143,7 @@ fn driver_better_to_best_resizes_hc_tables() {
 
     // Switch to Best — must resize to larger tables.
     driver.reset(CompressionLevel::Best);
-    assert_eq!(driver.window_size(), BEST_DEFAULT_WINDOW_SIZE as u64);
+    assert_eq!(driver.window_size(), (1u64 << 24));
 
     // Feed data to trigger ensure_tables with new sizes.
     let mut space = driver.get_next_space();
@@ -2300,6 +2347,10 @@ fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
 fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
     let mut driver = MatchGeneratorDriver::new(8, 1);
     driver.reset(CompressionLevel::Fastest);
+    // Use a small live window so dictionary-primed slices are evicted
+    // quickly and budget retirement can be asserted deterministically.
+    driver.match_generator.max_window_size = 8;
+    driver.reported_window_size = 8;
 
     let base_window = driver.match_generator.max_window_size;
     driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
@@ -2443,7 +2494,7 @@ fn fastest_reset_uses_interleaved_hash_fill_step() {
     // happened and the lazy_depth is configured correctly.
     driver.reset(CompressionLevel::Better);
     assert_eq!(driver.active_backend, MatcherBackend::HashChain);
-    assert_eq!(driver.window_size(), BETTER_DEFAULT_WINDOW_SIZE as u64);
+    assert_eq!(driver.window_size(), (1u64 << 23));
     assert_eq!(driver.hc_matcher().lazy_depth, 2);
 }
 
@@ -2723,7 +2774,7 @@ fn dfast_trim_to_window_callback_reports_evicted_len_not_capacity() {
 
 #[test]
 fn dfast_inserts_tail_positions_for_next_block_matching() {
-    let mut matcher = DfastMatchGenerator::new(DFAST_DEFAULT_WINDOW_SIZE);
+    let mut matcher = DfastMatchGenerator::new(1 << 22);
 
     matcher.add_data(b"012345bcdea".to_vec(), |_| {});
     let mut history = Vec::new();
diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs
index d075b9bc..ee8fd941 100644
--- a/zstd/src/encoding/mod.rs
+++ b/zstd/src/encoding/mod.rs
@@ -45,7 +45,7 @@ pub fn compress_to_vec<R: Read>(source: R, level: CompressionLevel) -> Vec<u8> {
 /// The compression mode used impacts the speed of compression,
 /// and resulting compression ratios. Faster compression will result
 /// in worse compression ratios, and vice versa.
-#[derive(Copy, Clone)]
+#[derive(Copy, Clone, Debug)]
 pub enum CompressionLevel {
     /// This level does not compress the data at all, and simply wraps
     /// it in a Zstandard frame.
@@ -88,6 +88,41 @@ pub enum CompressionLevel {
     /// Prefer [`CompressionLevel::Default`] for very large single-frame
     /// streams until table rebasing is implemented.
     Best,
+    /// Numeric compression level.
+    ///
+    /// Levels 1–22 correspond to the C zstd level numbering.  Higher values
+    /// produce smaller output at the cost of more CPU time.  Negative values
+    /// select ultra-fast modes that trade ratio for speed.  Level 0 is
+    /// treated as [`DEFAULT_LEVEL`](Self::DEFAULT_LEVEL), matching C zstd
+    /// semantics.
+    ///
+    /// Named variants map to specific numeric levels:
+    /// [`Fastest`](Self::Fastest) = 1, [`Default`](Self::Default) = 3,
+    /// [`Better`](Self::Better) = 7, [`Best`](Self::Best) = 11.
+    ///
+    /// Levels above 11 use progressively larger windows and deeper search
+    /// with the lazy2 hash-chain backend.  Levels that require strategies
+    /// this crate has not yet implemented (btopt, btultra) are approximated
+    /// with the closest available matcher.
+    Level(i32),
+}
+
+impl CompressionLevel {
+    /// The minimum supported numeric compression level (ultra-fast mode).
+    pub const MIN_LEVEL: i32 = -131072;
+    /// The maximum supported numeric compression level.
+    pub const MAX_LEVEL: i32 = 22;
+    /// The default numeric compression level (equivalent to [`Default`](Self::Default)).
+    pub const DEFAULT_LEVEL: i32 = 3;
+
+    /// Create a compression level from a numeric value.
+    ///
+    /// Wraps the raw integer in [`Level`](Self::Level).  Values outside
+    /// [`MIN_LEVEL`](Self::MIN_LEVEL)..=[`MAX_LEVEL`](Self::MAX_LEVEL) are
+    /// silently clamped during parameter resolution.
+    pub const fn from_level(level: i32) -> Self {
+        CompressionLevel::Level(level)
+    }
 }
 
 /// Trait used by the encoder that users can use to extend the matching facilities with their own algorithm
diff --git a/zstd/src/encoding/streaming_encoder.rs b/zstd/src/encoding/streaming_encoder.rs
index 6f2e3f0a..d2541339 100644
--- a/zstd/src/encoding/streaming_encoder.rs
+++ b/zstd/src/encoding/streaming_encoder.rs
@@ -246,8 +246,9 @@ impl<W: Write, M: Matcher> StreamingEncoder<W, M> {
             CompressionLevel::Fastest
             | CompressionLevel::Default
             | CompressionLevel::Better
-            | CompressionLevel::Best => self.state.matcher.get_next_space(),
-            _ => Vec::new(),
+            | CompressionLevel::Best
+            | CompressionLevel::Level(_) => self.state.matcher.get_next_space(),
+            CompressionLevel::Uncompressed => Vec::new(),
         };
         space.clear();
         if space.capacity() > block_capacity {
@@ -303,7 +304,8 @@ impl<W: Write, M: Matcher> StreamingEncoder<W, M> {
             | CompressionLevel::Fastest
             | CompressionLevel::Default
             | CompressionLevel::Better
-            | CompressionLevel::Best => Ok(()),
+            | CompressionLevel::Best
+            | CompressionLevel::Level(_) => Ok(()),
         }
     }
 
@@ -338,7 +340,8 @@ impl<W: Write, M: Matcher> StreamingEncoder<W, M> {
                 CompressionLevel::Fastest
                 | CompressionLevel::Default
                 | CompressionLevel::Better
-                | CompressionLevel::Best => {
+                | CompressionLevel::Best
+                | CompressionLevel::Level(_) => {
                     let block = raw_block.take().expect("raw block missing");
                     debug_assert!(!block.is_empty(), "empty blocks handled above");
                     compress_block_encoded(&mut self.state, last_block, block, &mut encoded);
diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs
index 29e9d8f3..0aedc16c 100644
--- a/zstd/src/tests/roundtrip_integrity.rs
+++ b/zstd/src/tests/roundtrip_integrity.rs
@@ -544,3 +544,129 @@ fn roundtrip_best_level_streaming_multi_block() {
     let data = generate_compressible(5555, 512 * 1024);
     assert_eq!(roundtrip_best_streaming(&data), data);
 }
+
+// ─── Numeric compression levels (CompressionLevel::Level) ─────────
+
+/// `from_level(3)` must be equivalent to `Default` — same compressed output.
+#[test]
+fn numeric_level_3_matches_default() {
+    let data = generate_compressible(9000, 64 * 1024);
+    let default = compress_to_vec(&data[..], CompressionLevel::Default);
+    let level_3 = compress_to_vec(&data[..], CompressionLevel::from_level(3));
+    assert_eq!(
+        default, level_3,
+        "Level(3) output must be identical to Default"
+    );
+}
+
+/// `from_level(1)` must be equivalent to `Fastest`.
+#[test]
+fn numeric_level_1_matches_fastest() {
+    let data = generate_compressible(9001, 64 * 1024);
+    let fastest = compress_to_vec(&data[..], CompressionLevel::Fastest);
+    let level_1 = compress_to_vec(&data[..], CompressionLevel::from_level(1));
+    assert_eq!(
+        fastest, level_1,
+        "Level(1) output must be identical to Fastest"
+    );
+}
+
+/// `from_level(7)` must be equivalent to `Better`.
+#[test]
+fn numeric_level_7_matches_better() {
+    let data = generate_compressible(9002, 64 * 1024);
+    let better = compress_to_vec(&data[..], CompressionLevel::Better);
+    let level_7 = compress_to_vec(&data[..], CompressionLevel::from_level(7));
+    assert_eq!(
+        better, level_7,
+        "Level(7) output must be identical to Better"
+    );
+}
+
+/// `from_level(11)` must be equivalent to `Best`.
+#[test]
+fn numeric_level_11_matches_best() {
+    let data = generate_compressible(9003, 64 * 1024);
+    let best = compress_to_vec(&data[..], CompressionLevel::Best);
+    let level_11 = compress_to_vec(&data[..], CompressionLevel::from_level(11));
+    assert_eq!(best, level_11, "Level(11) output must be identical to Best");
+}
+
+/// `from_level(0)` maps to default compression (level 3), matching C zstd.
+#[test]
+fn numeric_level_0_is_default_compression() {
+    let data = generate_compressible(9004, 64 * 1024);
+    let level_0 = compress_to_vec(&data[..], CompressionLevel::from_level(0));
+    let level_3 = compress_to_vec(&data[..], CompressionLevel::from_level(3));
+    assert_eq!(level_0, level_3, "Level(0) should map to default (level 3)");
+}
+
+/// All 22 positive levels produce valid output that round-trips correctly.
+#[test]
+fn all_22_levels_roundtrip() {
+    let data = generate_compressible(9100, 32 * 1024);
+    for level in 1..=22 {
+        let result = roundtrip_at_level(&data, CompressionLevel::from_level(level));
+        assert_eq!(data, result, "Roundtrip failed for Level({level})");
+    }
+}
+
+/// Negative levels produce valid compressed output (ultra-fast mode).
+#[test]
+fn negative_levels_roundtrip() {
+    let data = generate_compressible(9200, 32 * 1024);
+    for level in [-1, -2, -3, -5] {
+        let result = roundtrip_at_level(&data, CompressionLevel::from_level(level));
+        assert_eq!(data, result, "Roundtrip failed for Level({level})");
+    }
+}
+
+/// Higher levels should generally not produce *larger* output than lower levels
+/// on reasonably compressible data.
+#[test]
+fn levels_monotonic_compression_ratio() {
+    let data = generate_compressible(9300, 64 * 1024);
+    let mut prev_size = usize::MAX;
+    for level in [1, 3, 7, 11] {
+        let compressed = compress_to_vec(&data[..], CompressionLevel::from_level(level));
+        assert!(
+            compressed.len() <= prev_size,
+            "Level {level} produced larger output ({}) than a lower level ({prev_size})",
+            compressed.len(),
+        );
+        prev_size = compressed.len();
+    }
+}
+
+/// Numeric levels work with the streaming encoder.
+#[test]
+fn numeric_level_streaming_roundtrip() {
+    use crate::encoding::StreamingEncoder;
+    use crate::io::Write;
+
+    let data = generate_compressible(9400, 200 * 1024);
+    for level in [1, 3, 5, 7, 9, 11, -1] {
+        let mut encoder = StreamingEncoder::new(Vec::new(), CompressionLevel::from_level(level));
+        for chunk in data.chunks(4096) {
+            encoder.write_all(chunk).unwrap();
+        }
+        let compressed = encoder.finish().unwrap();
+        let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap();
+        let mut result = Vec::new();
+        decoder.read_to_end(&mut result).unwrap();
+        assert_eq!(
+            data, result,
+            "Streaming roundtrip failed for Level({level})"
+        );
+    }
+}
+
+/// Values beyond MAX_LEVEL are clamped — they must still produce valid output.
+#[test]
+fn out_of_range_level_clamped() {
+    let data = generate_compressible(9500, 16 * 1024);
+    let result = roundtrip_at_level(&data, CompressionLevel::from_level(100));
+    assert_eq!(data, result, "Clamped Level(100) must still roundtrip");
+    let result = roundtrip_at_level(&data, CompressionLevel::from_level(-200000));
+    assert_eq!(data, result, "Clamped Level(-200000) must still roundtrip");
+}

From bd152e0772bcad0e11a1b50c2964cbcbdf0e1240 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 14:13:37 +0300
Subject: [PATCH 02/18] fix(cli): align level 0 with library API and prevent
 i32::MIN overflow

- Use saturating_abs() instead of (-n) cast to prevent overflow
  when n == i32::MIN in negative level resolution
- Use CompressionLevel::MIN_LEVEL constant instead of hardcoded 131072
- CLI level 0 now maps to default compression (level 3), consistent
  with CompressionLevel::from_level(0) and C zstd semantics
- Add --store flag for uncompressed zstd frames (replaces old level 0
  = Uncompressed behavior)
- Remove "C zstd numbering" claim from CLI help text since the CLI
  extends standard numbering with --store
---
 cli/src/main.rs                      | 35 +++++++++++++++++++---------
 zstd/src/encoding/match_generator.rs |  3 ++-
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index cfd652d1..812f1578 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -34,15 +34,20 @@ enum Commands {
         /// Where the compressed file is written
         /// [default: <INPUT_FILE>.zst]
         output_file: Option<PathBuf>,
-        /// Compression level using C zstd numbering (higher = smaller, slower).
+        /// Compression level (higher = smaller, slower).
         ///
-        /// -  0: Uncompressed (no compression, raw zstd frame)
+        /// Numeric levels follow the zstd convention where 0 means
+        /// "use the default level" (currently 3).
+        ///
+        /// -  0: Default (same as 3)
         /// -  1: Fastest (fast hash, ~zstd level 1)
         /// -  3: Default (dfast, ~zstd level 3)
         /// -  7: Better  (lazy2, ~zstd level 7)
         /// - 11: Best    (deep lazy2, ~zstd level 11)
         /// - Negative: ultra-fast modes (less compression, more speed)
         /// - 12-22: progressively higher ratio (capped at lazy2 backend)
+        ///
+        /// Use --store to write an uncompressed zstd frame.
         #[arg(
             short,
             long,
@@ -53,6 +58,12 @@ enum Commands {
             allow_hyphen_values = true,
         )]
         level: i32,
+        /// Write an uncompressed zstd frame (no compression).
+        ///
+        /// When set, --level is ignored and the input is wrapped in a
+        /// raw zstd frame without any compression.
+        #[arg(long)]
+        store: bool,
     },
     Decompress {
         /// .zst archive to decompress
@@ -83,9 +94,10 @@ fn main() -> color_eyre::Result<()> {
             input_file,
             output_file,
             level,
+            store,
         } => {
             let output_file = output_file.unwrap_or_else(|| add_extension(&input_file, ".zst"));
-            compress(input_file, output_file, level)?;
+            compress(input_file, output_file, level, store)?;
         }
         Commands::Decompress {
             input_file,
@@ -103,11 +115,12 @@ fn main() -> color_eyre::Result<()> {
     Ok(())
 }
 
-fn compress(input: PathBuf, output: PathBuf, level: i32) -> color_eyre::Result<()> {
+fn compress(input: PathBuf, output: PathBuf, level: i32, store: bool) -> color_eyre::Result<()> {
     info!("compressing {input:?} to {output:?}");
-    let compression_level: structured_zstd::encoding::CompressionLevel = match level {
-        0 => CompressionLevel::Uncompressed,
-        n => CompressionLevel::from_level(n),
+    let compression_level = if store {
+        CompressionLevel::Uncompressed
+    } else {
+        CompressionLevel::from_level(level)
     };
     ensure_distinct_paths(&input, &output)?;
     ensure_regular_output_destination(&output)?;
@@ -425,7 +438,7 @@ mod tests {
         let input = std::env::temp_dir().join(format!("structured-zstd-cli-alias-{unique}.txt"));
         fs::write(&input, b"streaming-cli-alias-check").unwrap();
 
-        let err = compress(input.clone(), input.clone(), 3).unwrap_err();
+        let err = compress(input.clone(), input.clone(), 3, false).unwrap_err();
         let message = format!("{err:#}");
         assert!(
             message.contains("input and output"),
@@ -444,7 +457,7 @@ mod tests {
         fs::write(&input, b"streaming-cli-hardlink-check").unwrap();
         fs::hard_link(&input, &output).unwrap();
 
-        let err = compress(input.clone(), output.clone(), 3).unwrap_err();
+        let err = compress(input.clone(), output.clone(), 3, false).unwrap_err();
         let message = format!("{err:#}");
         assert!(
             message.contains("input and output"),
@@ -465,7 +478,7 @@ mod tests {
         let output =
             std::env::temp_dir().join(format!("structured-zstd-cli-missing-output-{unique}.zst"));
 
-        let err = compress(missing_input, output.clone(), 3).unwrap_err();
+        let err = compress(missing_input, output.clone(), 3, false).unwrap_err();
         let message = format!("{err:#}");
         assert!(
             message.contains("failed to open input file"),
@@ -483,7 +496,7 @@ mod tests {
         let output = dir.join("existing-dir");
         fs::create_dir(&output).unwrap();
 
-        let err = compress(input, output.clone(), 3).unwrap_err();
+        let err = compress(input, output.clone(), 3, false).unwrap_err();
         let message = format!("{err:#}");
         assert!(
             message.contains("not a regular file"),
diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs
index d3e1cb52..ef594a6e 100644
--- a/zstd/src/encoding/match_generator.rs
+++ b/zstd/src/encoding/match_generator.rs
@@ -134,7 +134,8 @@ fn resolve_level_params(level: CompressionLevel) -> LevelParams {
                 // Negative levels: ultra-fast with the Simple backend.
                 // Acceleration grows with magnitude, expressed as larger
                 // hash_fill_step (fewer positions indexed).
-                let acceleration = ((-n) as usize).min(131072);
+                let acceleration =
+                    (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
                 let step = (acceleration + 3).min(128);
                 LevelParams {
                     backend: MatcherBackend::Simple,

From e20949590f34d35dae0707fcbc367c123dd05bba Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 14:36:52 +0300
Subject: [PATCH 03/18] feat(encoding): source-size-aware parameter selection
 and review fixes

Source-size-aware selection (issue #21 requirement):
- Add Matcher::set_source_size_hint() trait method (default no-op)
- MatchGeneratorDriver stores hint and adjusts window_log + HC table
  sizes for small inputs, following C zstd source-size-class behavior
- FrameCompressor::set_source_size_hint() passes through to matcher
- StreamingEncoder::set_pledged_content_size() also sets the hint
- StreamingEncoder::set_source_size_hint() for hint without enforcement
- For small inputs, window and hash tables are capped proportionally
  to source size, avoiding multi-MB allocations for tiny payloads

Review fixes:
- Derive Level(0) index from DEFAULT_LEVEL constant (Copilot #5)
- Tighten test doc comment to match strict assertion (Copilot #6)
- Widen CLI level range to MIN_LEVEL..=MAX_LEVEL (CodeRabbit #7)
- Use saturating_abs() for negative level overflow (Copilot #1, prev)
---
 cli/src/main.rs                        |  13 ++-
 zstd/src/encoding/frame_compressor.rs  |   9 +++
 zstd/src/encoding/match_generator.rs   |  62 +++++++++++++--
 zstd/src/encoding/mod.rs               |   9 +++
 zstd/src/encoding/streaming_encoder.rs |  20 +++++
 zstd/src/tests/roundtrip_integrity.rs  | 105 ++++++++++++++++++++++++-
 6 files changed, 207 insertions(+), 11 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index 812f1578..9f191657 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -53,7 +53,9 @@ enum Commands {
             long,
             value_name = "LEVEL",
             default_value_t = 3,
-            value_parser = clap::value_parser!(i32).range(-5..=22),
+            value_parser = clap::value_parser!(i32).range(
+                (CompressionLevel::MIN_LEVEL as i64)..=(CompressionLevel::MAX_LEVEL as i64)
+            ),
             verbatim_doc_comment,
             allow_hyphen_values = true,
         )]
@@ -425,7 +427,14 @@ mod tests {
 
     #[test]
     fn cli_rejects_too_negative_compression_level() {
-        let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "-6"]);
+        // MIN_LEVEL is -131072; anything below that should be rejected
+        let parse = Cli::try_parse_from([
+            "structured-zstd",
+            "compress",
+            "in.bin",
+            "--level",
+            "-131073",
+        ]);
         assert!(parse.is_err());
     }
 
diff --git a/zstd/src/encoding/frame_compressor.rs b/zstd/src/encoding/frame_compressor.rs
index c4cdc951..bd5ad612 100644
--- a/zstd/src/encoding/frame_compressor.rs
+++ b/zstd/src/encoding/frame_compressor.rs
@@ -157,6 +157,15 @@ impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
         self.compressed_data.replace(compressed_data)
     }
 
+    /// Provide a hint about the total uncompressed size for the next frame.
+    ///
+    /// When set, the encoder selects smaller hash tables and windows for
+    /// small inputs, matching the C zstd source-size-class behavior.
+    /// Must be called before [`compress`](Self::compress).
+    pub fn set_source_size_hint(&mut self, size: u64) {
+        self.state.matcher.set_source_size_hint(size);
+    }
+
     /// Compress the uncompressed data from the provided source as one Zstd frame and write it to the provided drain
     ///
     /// This will repeatedly call [Read::read] on the source to fill up blocks until the source returns 0 on the read call.
diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs
index ef594a6e..5dba03a7 100644
--- a/zstd/src/encoding/match_generator.rs
+++ b/zstd/src/encoding/match_generator.rs
@@ -109,9 +109,44 @@ const LEVEL_TABLE: [LevelParams; 22] = [
     /*22 */ LevelParams { backend: MatcherBackend::HashChain, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 23, search_depth: 32, target_len: 256 } },
 ];
 
-/// Resolve a [`CompressionLevel`] to internal tuning parameters.
-fn resolve_level_params(level: CompressionLevel) -> LevelParams {
-    match level {
+/// Smallest window_log the encoder will use regardless of source size.
+const MIN_WINDOW_LOG: u8 = 10;
+
+/// Adjust level parameters for a known source size.
+///
+/// Follows the C zstd `clevels.h` approach: for small inputs, cap
+/// window_log (and hash/chain for HC) so the encoder doesn't allocate
+/// oversized tables.  The four C size classes are:
+///   >256 KiB (default table), ≤256 KiB, ≤128 KiB, ≤16 KiB.
+fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
+    if src_size == 0 {
+        return params;
+    }
+    // Cap window_log so the window doesn't exceed the source.
+    // ceil_log2(src_size): the minimum number of bits to represent src_size.
+    let src_log = 64 - (src_size - 1).leading_zeros(); // ceil_log2
+    let src_log = (src_log as u8).max(MIN_WINDOW_LOG);
+    if src_log < params.window_log {
+        params.window_log = src_log;
+    }
+    // For HC backend: also cap hash_log and chain_log so tables are
+    // proportional to the source, avoiding multi-MB allocations for
+    // tiny inputs.
+    if params.backend == MatcherBackend::HashChain {
+        if (src_log + 2) < params.hc.hash_log as u8 {
+            params.hc.hash_log = (src_log + 2) as usize;
+        }
+        if (src_log + 1) < params.hc.chain_log as u8 {
+            params.hc.chain_log = (src_log + 1) as usize;
+        }
+    }
+    params
+}
+
+/// Resolve a [`CompressionLevel`] to internal tuning parameters,
+/// optionally adjusted for a known source size.
+fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
+    let params = match level {
         CompressionLevel::Uncompressed => LevelParams {
             backend: MatcherBackend::Simple,
             window_log: 17,
@@ -129,7 +164,7 @@ fn resolve_level_params(level: CompressionLevel) -> LevelParams {
                 LEVEL_TABLE[idx]
             } else if n == 0 {
                 // Level 0 = default, matching C zstd semantics.
-                LEVEL_TABLE[2]
+                LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
             } else {
                 // Negative levels: ultra-fast with the Simple backend.
                 // Acceleration grows with magnitude, expressed as larger
@@ -146,6 +181,11 @@ fn resolve_level_params(level: CompressionLevel) -> LevelParams {
                 }
             }
         }
+    };
+    if let Some(size) = source_size {
+        adjust_params_for_source_size(params, size)
+    } else {
+        params
     }
 }
 
@@ -172,6 +212,8 @@ pub struct MatchGeneratorDriver {
     // Tracks currently retained bytes that originated from primed dictionary
     // history and have not been evicted yet.
     dictionary_retained_budget: usize,
+    // Source size hint for next frame (set via set_source_size_hint, cleared on reset).
+    source_size_hint: Option<u64>,
 }
 
 impl MatchGeneratorDriver {
@@ -190,11 +232,12 @@ impl MatchGeneratorDriver {
             base_slice_size: slice_size,
             reported_window_size: max_window_size,
             dictionary_retained_budget: 0,
+            source_size_hint: None,
         }
     }
 
-    fn level_params(level: CompressionLevel) -> LevelParams {
-        resolve_level_params(level)
+    fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
+        resolve_level_params(level, source_size)
     }
 
     fn dfast_matcher(&self) -> &DfastMatchGenerator {
@@ -297,8 +340,13 @@ impl Matcher for MatchGeneratorDriver {
         true
     }
 
+    fn set_source_size_hint(&mut self, size: u64) {
+        self.source_size_hint = Some(size);
+    }
+
     fn reset(&mut self, level: CompressionLevel) {
-        let params = Self::level_params(level);
+        let hint = self.source_size_hint.take();
+        let params = Self::level_params(level, hint);
         let max_window_size = 1usize << params.window_log;
         self.dictionary_retained_budget = 0;
         if self.active_backend != params.backend {
diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs
index ee8fd941..18bd7f74 100644
--- a/zstd/src/encoding/mod.rs
+++ b/zstd/src/encoding/mod.rs
@@ -153,6 +153,15 @@ pub trait Matcher {
     fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>));
     /// Reset this matcher so it can be used for the next new frame
     fn reset(&mut self, level: CompressionLevel);
+    /// Provide a hint about the total uncompressed size for the next frame.
+    ///
+    /// Implementations may use this to select smaller hash tables and windows
+    /// for small inputs, matching the C zstd source-size-class behavior.
+    /// Called before [`reset`](Self::reset) when the caller knows the input
+    /// size (e.g. from pledged content size or file metadata).
+    ///
+    /// The default implementation is a no-op for custom matchers.
+    fn set_source_size_hint(&mut self, _size: u64) {}
     /// Prime matcher state with dictionary history before compressing the next frame.
     /// Default implementation is a no-op for custom matchers that do not support this.
     fn prime_with_dictionary(&mut self, _dict_content: &[u8], _offset_hist: [u32; 3]) {}
diff --git a/zstd/src/encoding/streaming_encoder.rs b/zstd/src/encoding/streaming_encoder.rs
index d2541339..eeebe419 100644
--- a/zstd/src/encoding/streaming_encoder.rs
+++ b/zstd/src/encoding/streaming_encoder.rs
@@ -93,6 +93,26 @@ impl<W: Write, M: Matcher> StreamingEncoder<W, M> {
             ));
         }
         self.pledged_content_size = Some(size);
+        // Also use pledged size as source-size hint so the matcher
+        // can select smaller tables for small inputs.
+        self.state.matcher.set_source_size_hint(size);
+        Ok(())
+    }
+
+    /// Provide a hint about the total uncompressed size for the next frame.
+    ///
+    /// Unlike [`set_pledged_content_size`](Self::set_pledged_content_size),
+    /// this does **not** enforce that exactly `size` bytes are written; it
+    /// only optimises matcher parameters for small inputs.  Must be called
+    /// before the first [`write`](Write::write).
+    pub fn set_source_size_hint(&mut self, size: u64) -> Result<(), Error> {
+        self.ensure_open()?;
+        if self.frame_started {
+            return Err(invalid_input_error(
+                "source size hint must be set before the first write",
+            ));
+        }
+        self.state.matcher.set_source_size_hint(size);
         Ok(())
     }
 
diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs
index 0aedc16c..61204b21 100644
--- a/zstd/src/tests/roundtrip_integrity.rs
+++ b/zstd/src/tests/roundtrip_integrity.rs
@@ -621,8 +621,8 @@ fn negative_levels_roundtrip() {
     }
 }
 
-/// Higher levels should generally not produce *larger* output than lower levels
-/// on reasonably compressible data.
+/// For this reasonably compressible fixture, the sampled higher levels are
+/// expected not to produce larger output than the lower sampled levels.
 #[test]
 fn levels_monotonic_compression_ratio() {
     let data = generate_compressible(9300, 64 * 1024);
@@ -670,3 +670,104 @@ fn out_of_range_level_clamped() {
     let result = roundtrip_at_level(&data, CompressionLevel::from_level(-200000));
     assert_eq!(data, result, "Clamped Level(-200000) must still roundtrip");
 }
+
+// ─── Source-size-aware selection ───────────────────────────────────
+
+/// Small input with source size hint should produce valid output.
+#[test]
+fn source_size_hint_small_input_roundtrip() {
+    let data = generate_compressible(9600, 4 * 1024); // 4 KiB
+    let compressed = {
+        let mut compressor = FrameCompressor::new(CompressionLevel::from_level(7));
+        compressor.set_source_size_hint(data.len() as u64);
+        compressor.set_source(data.as_slice());
+        let mut out = Vec::new();
+        compressor.set_drain(&mut out);
+        compressor.compress();
+        out
+    };
+    let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap();
+    let mut result = Vec::new();
+    decoder.read_to_end(&mut result).unwrap();
+    assert_eq!(data, result, "Small input with size hint must roundtrip");
+}
+
+/// Source size hint should reduce compressed output overhead for small inputs
+/// by avoiding oversized windows/tables.
+#[test]
+fn source_size_hint_reduces_window_for_small_input() {
+    let data = generate_compressible(9601, 1024); // 1 KiB
+    // Without hint: uses full level-11 window (16 MiB)
+    let no_hint = compress_to_vec(&data[..], CompressionLevel::from_level(11));
+    // With hint: should use smaller window
+    let with_hint = {
+        let mut compressor = FrameCompressor::new(CompressionLevel::from_level(11));
+        compressor.set_source_size_hint(data.len() as u64);
+        compressor.set_source(data.as_slice());
+        let mut out = Vec::new();
+        compressor.set_drain(&mut out);
+        compressor.compress();
+        out
+    };
+    // Both must decompress correctly
+    let mut decoder = StreamingDecoder::new(no_hint.as_slice()).unwrap();
+    let mut r = Vec::new();
+    decoder.read_to_end(&mut r).unwrap();
+    assert_eq!(data, r);
+
+    let mut decoder = StreamingDecoder::new(with_hint.as_slice()).unwrap();
+    let mut r = Vec::new();
+    decoder.read_to_end(&mut r).unwrap();
+    assert_eq!(data, r);
+
+    // With hint should produce output no larger than without
+    // (smaller window descriptor in frame header, similar or identical blocks)
+    assert!(
+        with_hint.len() <= no_hint.len(),
+        "Size hint should not produce larger output: hint={} no_hint={}",
+        with_hint.len(),
+        no_hint.len(),
+    );
+}
+
+/// Streaming encoder with pledged content size automatically uses source size hint.
+#[test]
+fn streaming_pledged_size_uses_source_hint() {
+    use crate::encoding::StreamingEncoder;
+    use crate::io::Write;
+
+    let data = generate_compressible(9602, 2 * 1024); // 2 KiB
+    let mut encoder = StreamingEncoder::new(Vec::new(), CompressionLevel::from_level(11));
+    encoder.set_pledged_content_size(data.len() as u64).unwrap();
+    encoder.write_all(&data).unwrap();
+    let compressed = encoder.finish().unwrap();
+
+    let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap();
+    let mut result = Vec::new();
+    decoder.read_to_end(&mut result).unwrap();
+    assert_eq!(data, result, "Pledged-size streaming must roundtrip");
+}
+
+/// All 22 levels produce valid output for a tiny (256 byte) input with size hint.
+#[test]
+fn all_levels_tiny_input_with_hint() {
+    let data = generate_compressible(9603, 256);
+    for level in 1..=22 {
+        let compressed = {
+            let mut compressor = FrameCompressor::new(CompressionLevel::from_level(level));
+            compressor.set_source_size_hint(data.len() as u64);
+            compressor.set_source(data.as_slice());
+            let mut out = Vec::new();
+            compressor.set_drain(&mut out);
+            compressor.compress();
+            out
+        };
+        let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap();
+        let mut result = Vec::new();
+        decoder.read_to_end(&mut result).unwrap();
+        assert_eq!(
+            data, result,
+            "Tiny input with hint failed for Level({level})"
+        );
+    }
+}

From 625c1f09eb4b61df227084e57195f47de8ff0506 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 16:59:28 +0300
Subject: [PATCH 04/18] fix(encoding): honor source-size hints in matcher and
 cli

- pass source size from CLI into StreamingEncoder before writing
- resize dfast hash tables based on hinted window and clamp slice size to window
- strengthen roundtrip tests for i32::MIN clamping and hinted frame window assertions
---
 cli/src/main.rs                       |  3 +
 zstd/src/encoding/match_generator.rs  | 98 ++++++++++++++++++++++-----
 zstd/src/tests/roundtrip_integrity.rs | 27 ++++++--
 3 files changed, 104 insertions(+), 24 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index 9f191657..cbfbc8e5 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -141,6 +141,9 @@ fn compress(input: PathBuf, output: PathBuf, level: i32, store: bool) -> color_e
     let compression_result: color_eyre::Result<File> = (|| {
         let mut encoder =
             structured_zstd::encoding::StreamingEncoder::new(temporary_output, compression_level);
+        encoder
+            .set_source_size_hint(source_size as u64)
+            .wrap_err("failed to configure source size hint")?;
         std::io::copy(&mut encoder_input, &mut encoder).wrap_err("streaming compression failed")?;
         encoder.finish().wrap_err("failed to finalize zstd frame")
     })();
diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs
index 5dba03a7..8d40070a 100644
--- a/zstd/src/encoding/match_generator.rs
+++ b/zstd/src/encoding/match_generator.rs
@@ -73,6 +73,11 @@ struct LevelParams {
     hc: HcConfig,
 }
 
+fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
+    let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
+    window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
+}
+
 /// Parameter table for numeric compression levels 1–22.
 ///
 /// Each entry maps a zstd compression level to the best-available matcher
@@ -217,8 +222,10 @@ pub struct MatchGeneratorDriver {
 }
 
 impl MatchGeneratorDriver {
-    /// slice_size says how big the slices should be that are allocated to work with
-    /// max_slices_in_window says how many slices should at most be used while looking for matches
+    /// `slice_size` sets the base block allocation size used for matcher input chunks.
+    /// `max_slices_in_window` determines the initial window capacity at construction
+    /// time. Effective window sizing is recalculated on every [`reset`](Self::reset)
+    /// from the resolved compression level and optional source-size hint.
     pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
         let max_window_size = max_slices_in_window * slice_size;
         Self {
@@ -388,7 +395,7 @@ impl Matcher for MatchGeneratorDriver {
         }
 
         self.active_backend = params.backend;
-        self.slice_size = self.base_slice_size;
+        self.slice_size = self.base_slice_size.min(max_window_size);
         self.reported_window_size = max_window_size;
         match self.active_backend {
             MatcherBackend::Simple => {
@@ -410,6 +417,7 @@ impl Matcher for MatchGeneratorDriver {
                     .get_or_insert_with(|| DfastMatchGenerator::new(max_window_size));
                 dfast.max_window_size = max_window_size;
                 dfast.lazy_depth = params.lazy_depth;
+                dfast.set_hash_bits(dfast_hash_bits_for_window(max_window_size));
                 let vec_pool = &mut self.vec_pool;
                 dfast.reset(|mut data| {
                     data.resize(data.capacity(), 0);
@@ -1098,6 +1106,7 @@ struct DfastMatchGenerator {
     offset_hist: [u32; 3],
     short_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
     long_hash: Vec<[usize; DFAST_SEARCH_DEPTH]>,
+    hash_bits: usize,
     // Lazy match lookahead depth (internal tuning parameter).
     lazy_depth: u8,
 }
@@ -1121,10 +1130,20 @@ impl DfastMatchGenerator {
             offset_hist: [1, 4, 8],
             short_hash: Vec::new(),
             long_hash: Vec::new(),
+            hash_bits: DFAST_HASH_BITS,
             lazy_depth: 1,
         }
     }
 
+    fn set_hash_bits(&mut self, bits: usize) {
+        let clamped = bits.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS);
+        if self.hash_bits != clamped {
+            self.hash_bits = clamped;
+            self.short_hash = Vec::new();
+            self.long_hash = Vec::new();
+        }
+    }
+
     fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
         self.window_size = 0;
         self.history.clear();
@@ -1234,14 +1253,13 @@ impl DfastMatchGenerator {
     }
 
     fn ensure_hash_tables(&mut self) {
-        if self.short_hash.is_empty() {
+        let table_len = 1usize << self.hash_bits;
+        if self.short_hash.len() != table_len {
             // This is intentionally lazy so Fastest/Uncompressed never pay the
             // ~dfast-level memory cost. The current size tracks the issue's
             // zstd level-3 style parameters rather than a generic low-memory preset.
-            self.short_hash =
-                alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
-            self.long_hash =
-                alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; 1 << DFAST_HASH_BITS];
+            self.short_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
+            self.long_hash = alloc::vec![[DFAST_EMPTY_SLOT; DFAST_SEARCH_DEPTH]; table_len];
         }
     }
 
@@ -1437,7 +1455,7 @@ impl DfastMatchGenerator {
         let idx = pos - self.history_abs_start;
         let short = {
             let concat = self.live_history();
-            (idx + 4 <= concat.len()).then(|| Self::hash4(&concat[idx..]))
+            (idx + 4 <= concat.len()).then(|| self.hash4(&concat[idx..]))
         };
         if let Some(short) = short {
             let bucket = &mut self.short_hash[short];
@@ -1449,7 +1467,7 @@ impl DfastMatchGenerator {
 
         let long = {
             let concat = self.live_history();
-            (idx + 8 <= concat.len()).then(|| Self::hash8(&concat[idx..]))
+            (idx + 8 <= concat.len()).then(|| self.hash8(&concat[idx..]))
         };
         if let Some(long) = long {
             let bucket = &mut self.long_hash[long];
@@ -1464,7 +1482,7 @@ impl DfastMatchGenerator {
         let concat = self.live_history();
         let idx = pos - self.history_abs_start;
         (idx + 4 <= concat.len())
-            .then(|| self.short_hash[Self::hash4(&concat[idx..])])
+            .then(|| self.short_hash[self.hash4(&concat[idx..])])
             .into_iter()
             .flatten()
             .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
@@ -1474,25 +1492,25 @@ impl DfastMatchGenerator {
         let concat = self.live_history();
         let idx = pos - self.history_abs_start;
         (idx + 8 <= concat.len())
-            .then(|| self.long_hash[Self::hash8(&concat[idx..])])
+            .then(|| self.long_hash[self.hash8(&concat[idx..])])
             .into_iter()
             .flatten()
             .filter(|candidate| *candidate != DFAST_EMPTY_SLOT)
     }
 
-    fn hash4(data: &[u8]) -> usize {
+    fn hash4(&self, data: &[u8]) -> usize {
         let value = u32::from_le_bytes(data[..4].try_into().unwrap()) as u64;
-        Self::hash_bits(value)
+        self.hash_bits(value)
     }
 
-    fn hash8(data: &[u8]) -> usize {
+    fn hash8(&self, data: &[u8]) -> usize {
         let value = u64::from_le_bytes(data[..8].try_into().unwrap());
-        Self::hash_bits(value)
+        self.hash_bits(value)
     }
 
-    fn hash_bits(value: u64) -> usize {
+    fn hash_bits(&self, value: u64) -> usize {
         const PRIME: u64 = 0x9E37_79B1_85EB_CA87;
-        ((value.wrapping_mul(PRIME)) >> (64 - DFAST_HASH_BITS)) as usize
+        ((value.wrapping_mul(PRIME)) >> (64 - self.hash_bits)) as usize
     }
 }
 
@@ -2141,6 +2159,50 @@ fn driver_switches_backends_and_initializes_dfast_via_reset() {
     assert_eq!(driver.window_size(), (1u64 << 17));
 }
 
+#[test]
+fn driver_small_source_hint_shrinks_dfast_hash_tables() {
+    let mut driver = MatchGeneratorDriver::new(32, 2);
+
+    driver.reset(CompressionLevel::Default);
+    let mut space = driver.get_next_space();
+    space[..12].copy_from_slice(b"abcabcabcabc");
+    space.truncate(12);
+    driver.commit_space(space);
+    driver.skip_matching();
+    let full_tables = driver.dfast_matcher().short_hash.len();
+    assert_eq!(full_tables, 1 << DFAST_HASH_BITS);
+
+    driver.set_source_size_hint(1024);
+    driver.reset(CompressionLevel::Default);
+    let mut space = driver.get_next_space();
+    space[..12].copy_from_slice(b"xyzxyzxyzxyz");
+    space.truncate(12);
+    driver.commit_space(space);
+    driver.skip_matching();
+    let hinted_tables = driver.dfast_matcher().short_hash.len();
+
+    assert_eq!(driver.window_size(), 1 << MIN_WINDOW_LOG);
+    assert!(
+        hinted_tables < full_tables,
+        "tiny source hint should reduce dfast table footprint"
+    );
+}
+
+#[test]
+fn source_hint_clamps_driver_slice_size_to_window() {
+    let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
+    driver.set_source_size_hint(1024);
+    driver.reset(CompressionLevel::Default);
+
+    let window = driver.window_size() as usize;
+    assert_eq!(window, 1024);
+    assert_eq!(driver.slice_size, window);
+
+    let space = driver.get_next_space();
+    assert_eq!(space.len(), window);
+    driver.commit_space(space);
+}
+
 #[test]
 fn driver_best_to_fastest_releases_oversized_hc_tables() {
     let mut driver = MatchGeneratorDriver::new(32, 2);
diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs
index 61204b21..08ff2918 100644
--- a/zstd/src/tests/roundtrip_integrity.rs
+++ b/zstd/src/tests/roundtrip_integrity.rs
@@ -669,6 +669,8 @@ fn out_of_range_level_clamped() {
     assert_eq!(data, result, "Clamped Level(100) must still roundtrip");
     let result = roundtrip_at_level(&data, CompressionLevel::from_level(-200000));
     assert_eq!(data, result, "Clamped Level(-200000) must still roundtrip");
+    let result = roundtrip_at_level(&data, CompressionLevel::from_level(i32::MIN));
+    assert_eq!(data, result, "Clamped Level(i32::MIN) must still roundtrip");
 }
 
 // ─── Source-size-aware selection ───────────────────────────────────
@@ -699,6 +701,11 @@ fn source_size_hint_reduces_window_for_small_input() {
     let data = generate_compressible(9601, 1024); // 1 KiB
     // Without hint: uses full level-11 window (16 MiB)
     let no_hint = compress_to_vec(&data[..], CompressionLevel::from_level(11));
+    let no_hint_header = crate::decoding::frame::read_frame_header(no_hint.as_slice())
+        .unwrap()
+        .0
+        .window_size()
+        .unwrap();
     // With hint: should use smaller window
     let with_hint = {
         let mut compressor = FrameCompressor::new(CompressionLevel::from_level(11));
@@ -709,6 +716,11 @@ fn source_size_hint_reduces_window_for_small_input() {
         compressor.compress();
         out
     };
+    let with_hint_header = crate::decoding::frame::read_frame_header(with_hint.as_slice())
+        .unwrap()
+        .0
+        .window_size()
+        .unwrap();
     // Both must decompress correctly
     let mut decoder = StreamingDecoder::new(no_hint.as_slice()).unwrap();
     let mut r = Vec::new();
@@ -720,13 +732,16 @@ fn source_size_hint_reduces_window_for_small_input() {
     decoder.read_to_end(&mut r).unwrap();
     assert_eq!(data, r);
 
-    // With hint should produce output no larger than without
-    // (smaller window descriptor in frame header, similar or identical blocks)
     assert!(
-        with_hint.len() <= no_hint.len(),
-        "Size hint should not produce larger output: hint={} no_hint={}",
-        with_hint.len(),
-        no_hint.len(),
+        with_hint_header <= no_hint_header,
+        "size hint should not increase frame window size: hint={} no_hint={}",
+        with_hint_header,
+        no_hint_header
+    );
+    assert!(
+        with_hint_header < (16 * 1024 * 1024),
+        "hinted level-11 frame should advertise smaller-than-default window, got {}",
+        with_hint_header
     );
 }
 

From ec9a61a68263ff181765bda69bae8e8b977b3daf Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 17:36:31 +0300
Subject: [PATCH 05/18] test(cli): clarify --store level validation behavior

- make pledged-size streaming test assert observable window-size effect
- clarify CLI help: --store skips compression but parse-time level validation remains
- add regression test for --store with out-of-range --level
---
 cli/src/main.rs                       | 17 +++++++++++++++--
 zstd/src/tests/roundtrip_integrity.rs | 23 +++++++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index cbfbc8e5..aa197a8f 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -62,8 +62,8 @@ enum Commands {
         level: i32,
         /// Write an uncompressed zstd frame (no compression).
         ///
-        /// When set, --level is ignored and the input is wrapped in a
-        /// raw zstd frame without any compression.
+        /// When set, compression itself ignores `--level` and writes a raw
+        /// zstd frame. The CLI still validates `--level` range at parse time.
         #[arg(long)]
         store: bool,
     },
@@ -441,6 +441,19 @@ mod tests {
         assert!(parse.is_err());
     }
 
+    #[test]
+    fn cli_store_still_validates_level_range_at_parse_time() {
+        let parse = Cli::try_parse_from([
+            "structured-zstd",
+            "compress",
+            "in.bin",
+            "--store",
+            "--level",
+            "23",
+        ]);
+        assert!(parse.is_err());
+    }
+
     #[test]
     fn compress_rejects_same_input_and_output_paths() {
         let unique = SystemTime::now()
diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs
index 08ff2918..f1d35bda 100644
--- a/zstd/src/tests/roundtrip_integrity.rs
+++ b/zstd/src/tests/roundtrip_integrity.rs
@@ -752,15 +752,38 @@ fn streaming_pledged_size_uses_source_hint() {
     use crate::io::Write;
 
     let data = generate_compressible(9602, 2 * 1024); // 2 KiB
+    let no_hint = compress_to_vec(&data[..], CompressionLevel::from_level(11));
+    let no_hint_header = crate::decoding::frame::read_frame_header(no_hint.as_slice())
+        .unwrap()
+        .0
+        .window_size()
+        .unwrap();
+
     let mut encoder = StreamingEncoder::new(Vec::new(), CompressionLevel::from_level(11));
     encoder.set_pledged_content_size(data.len() as u64).unwrap();
     encoder.write_all(&data).unwrap();
     let compressed = encoder.finish().unwrap();
+    let hinted_header = crate::decoding::frame::read_frame_header(compressed.as_slice())
+        .unwrap()
+        .0
+        .window_size()
+        .unwrap();
 
     let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap();
     let mut result = Vec::new();
     decoder.read_to_end(&mut result).unwrap();
     assert_eq!(data, result, "Pledged-size streaming must roundtrip");
+    assert!(
+        hinted_header <= no_hint_header,
+        "pledged source hint should not increase window size: hinted={} no_hint={}",
+        hinted_header,
+        no_hint_header
+    );
+    assert!(
+        hinted_header < (16 * 1024 * 1024),
+        "pledged source hint should reduce level-11 advertised window, got {}",
+        hinted_header
+    );
 }
 
 /// All 22 levels produce valid output for a tiny (256 byte) input with size hint.

From 70dd7f626bd9d27ecf37ff732eecdd424ec28d1f Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 18:02:34 +0300
Subject: [PATCH 06/18] test(cli): derive parse boundary inputs from level
 constants

---
 cli/src/main.rs | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index aa197a8f..a67924a2 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -418,7 +418,15 @@ mod tests {
 
     #[test]
     fn cli_rejects_unsupported_compression_level_at_parse_time() {
-        let parse = Cli::try_parse_from(["structured-zstd", "compress", "in.bin", "--level", "23"]);
+        let too_high =
+            (structured_zstd::encoding::CompressionLevel::MAX_LEVEL as i64 + 1).to_string();
+        let parse = Cli::try_parse_from([
+            "structured-zstd",
+            "compress",
+            "in.bin",
+            "--level",
+            too_high.as_str(),
+        ]);
         assert!(parse.is_err());
     }
 
@@ -430,26 +438,29 @@ mod tests {
 
     #[test]
     fn cli_rejects_too_negative_compression_level() {
-        // MIN_LEVEL is -131072; anything below that should be rejected
+        let too_low =
+            (structured_zstd::encoding::CompressionLevel::MIN_LEVEL as i64 - 1).to_string();
         let parse = Cli::try_parse_from([
             "structured-zstd",
             "compress",
             "in.bin",
             "--level",
-            "-131073",
+            too_low.as_str(),
         ]);
         assert!(parse.is_err());
     }
 
     #[test]
     fn cli_store_still_validates_level_range_at_parse_time() {
+        let too_high =
+            (structured_zstd::encoding::CompressionLevel::MAX_LEVEL as i64 + 1).to_string();
         let parse = Cli::try_parse_from([
             "structured-zstd",
             "compress",
             "in.bin",
             "--store",
             "--level",
-            "23",
+            too_high.as_str(),
         ]);
         assert!(parse.is_err());
     }

From eba6cbb76f4c0a062cd1e8807eb656122fb938b4 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 19:00:51 +0300
Subject: [PATCH 07/18] docs(encoding): align source-size docs with runtime
 behavior

---
 zstd/src/encoding/match_generator.rs   | 14 ++++++++------
 zstd/src/encoding/streaming_encoder.rs |  2 ++
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs
index 8d40070a..cc17335a 100644
--- a/zstd/src/encoding/match_generator.rs
+++ b/zstd/src/encoding/match_generator.rs
@@ -119,16 +119,18 @@ const MIN_WINDOW_LOG: u8 = 10;
 
 /// Adjust level parameters for a known source size.
 ///
-/// Follows the C zstd `clevels.h` approach: for small inputs, cap
-/// window_log (and hash/chain for HC) so the encoder doesn't allocate
-/// oversized tables.  The four C size classes are:
-///   >256 KiB (default table), ≤256 KiB, ≤128 KiB, ≤16 KiB.
+/// For non-empty inputs, this derives a cap from `ceil(log2(src_size))`,
+/// then clamps it to [`MIN_WINDOW_LOG`]. This keeps tables bounded for
+/// small inputs while preserving the encoder's minimum supported window.
+/// For the HC backend, `hash_log` and `chain_log` are reduced
+/// proportionally.
 fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
     if src_size == 0 {
         return params;
     }
-    // Cap window_log so the window doesn't exceed the source.
-    // ceil_log2(src_size): the minimum number of bits to represent src_size.
+    // Derive a source-size-based cap from ceil(log2(src_size)), then
+    // clamp to MIN_WINDOW_LOG. For inputs smaller than 1 KiB we keep the
+    // 1 KiB minimum window instead of shrinking below that floor.
     let src_log = 64 - (src_size - 1).leading_zeros(); // ceil_log2
     let src_log = (src_log as u8).max(MIN_WINDOW_LOG);
     if src_log < params.window_log {
diff --git a/zstd/src/encoding/streaming_encoder.rs b/zstd/src/encoding/streaming_encoder.rs
index eeebe419..d5c4eb2c 100644
--- a/zstd/src/encoding/streaming_encoder.rs
+++ b/zstd/src/encoding/streaming_encoder.rs
@@ -81,6 +81,8 @@ impl<W: Write, M: Matcher> StreamingEncoder<W, M> {
     ///
     /// When set, the frame header will include a `Frame_Content_Size` field.
     /// This enables decoders to pre-allocate output buffers.
+    /// The pledged size is also forwarded as a source-size hint to the
+    /// matcher so small inputs can use smaller matching tables.
     ///
     /// Must be called **before** the first [`write`](Write::write) call;
     /// calling it after the frame header has already been emitted returns an

From 42e6ecf383df20ff4ed2dd07d2361a1cab74476a Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 19:09:00 +0300
Subject: [PATCH 08/18] test(roundtrip): replace brittle monotonic ratio
 assertion

---
 zstd/src/tests/roundtrip_integrity.rs | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs
index f1d35bda..f7715f30 100644
--- a/zstd/src/tests/roundtrip_integrity.rs
+++ b/zstd/src/tests/roundtrip_integrity.rs
@@ -621,20 +621,24 @@ fn negative_levels_roundtrip() {
     }
 }
 
-/// For this reasonably compressible fixture, the sampled higher levels are
-/// expected not to produce larger output than the lower sampled levels.
+/// Sampled numeric levels should produce valid compressed output and preserve
+/// data through a full compress/decompress roundtrip.
 #[test]
 fn levels_monotonic_compression_ratio() {
     let data = generate_compressible(9300, 64 * 1024);
-    let mut prev_size = usize::MAX;
     for level in [1, 3, 7, 11] {
         let compressed = compress_to_vec(&data[..], CompressionLevel::from_level(level));
         assert!(
-            compressed.len() <= prev_size,
-            "Level {level} produced larger output ({}) than a lower level ({prev_size})",
-            compressed.len(),
+            !compressed.is_empty(),
+            "Level {level} produced empty compressed output"
+        );
+        let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap();
+        let mut result = Vec::new();
+        decoder.read_to_end(&mut result).unwrap();
+        assert_eq!(
+            data, result,
+            "Roundtrip failed for sampled compression level {level}"
         );
-        prev_size = compressed.len();
     }
 }
 

From d7f0a79e8c98deacc5d59ea43b89a2e1f068cc43 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 19:23:32 +0300
Subject: [PATCH 09/18] fix(encoding): clamp zero source-size hint to minimum
 window

---
 zstd/src/encoding/match_generator.rs | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs
index cc17335a..956df2f1 100644
--- a/zstd/src/encoding/match_generator.rs
+++ b/zstd/src/encoding/match_generator.rs
@@ -119,20 +119,22 @@ const MIN_WINDOW_LOG: u8 = 10;
 
 /// Adjust level parameters for a known source size.
 ///
-/// For non-empty inputs, this derives a cap from `ceil(log2(src_size))`,
-/// then clamps it to [`MIN_WINDOW_LOG`]. This keeps tables bounded for
+/// This derives a cap from `ceil(log2(src_size))`, then clamps it to
+/// [`MIN_WINDOW_LOG`]. A zero-byte size hint is treated as
+/// [`MIN_WINDOW_LOG`]. This keeps tables bounded for
 /// small inputs while preserving the encoder's minimum supported window.
 /// For the HC backend, `hash_log` and `chain_log` are reduced
 /// proportionally.
 fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
-    if src_size == 0 {
-        return params;
-    }
     // Derive a source-size-based cap from ceil(log2(src_size)), then
-    // clamp to MIN_WINDOW_LOG. For inputs smaller than 1 KiB we keep the
+    // clamp to MIN_WINDOW_LOG. For inputs smaller than 1 KiB (or zero) we keep the
     // 1 KiB minimum window instead of shrinking below that floor.
-    let src_log = 64 - (src_size - 1).leading_zeros(); // ceil_log2
-    let src_log = (src_log as u8).max(MIN_WINDOW_LOG);
+    let src_log = if src_size == 0 {
+        MIN_WINDOW_LOG
+    } else {
+        (64 - (src_size - 1).leading_zeros()) as u8 // ceil_log2
+    };
+    let src_log = src_log.max(MIN_WINDOW_LOG);
     if src_log < params.window_log {
         params.window_log = src_log;
     }

From 8bb19c8cfa761699225a3dd92c998e142310a55f Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 19:25:23 +0300
Subject: [PATCH 10/18] test(encoding): tighten level roundtrip checks and pool
 sizing

---
 zstd/src/encoding/match_generator.rs  | 14 +++++++++-----
 zstd/src/tests/roundtrip_integrity.rs | 15 +++++++++++++--
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs
index 956df2f1..3f46e45a 100644
--- a/zstd/src/encoding/match_generator.rs
+++ b/zstd/src/encoding/match_generator.rs
@@ -535,11 +535,15 @@ impl Matcher for MatchGeneratorDriver {
     }
 
     fn get_next_space(&mut self) -> Vec<u8> {
-        self.vec_pool.pop().unwrap_or_else(|| {
-            let mut space = alloc::vec![0; self.slice_size];
-            space.resize(space.capacity(), 0);
-            space
-        })
+        if let Some(mut space) = self.vec_pool.pop() {
+            space.clear();
+            if space.capacity() > self.slice_size {
+                space.shrink_to(self.slice_size);
+            }
+            space.resize(self.slice_size, 0);
+            return space;
+        }
+        alloc::vec![0; self.slice_size]
     }
 
     fn get_last_space(&mut self) -> &[u8] {
diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs
index f7715f30..1e62ffa8 100644
--- a/zstd/src/tests/roundtrip_integrity.rs
+++ b/zstd/src/tests/roundtrip_integrity.rs
@@ -606,7 +606,18 @@ fn numeric_level_0_is_default_compression() {
 fn all_22_levels_roundtrip() {
     let data = generate_compressible(9100, 32 * 1024);
     for level in 1..=22 {
-        let result = roundtrip_at_level(&data, CompressionLevel::from_level(level));
+        let compressed = {
+            let mut compressor = FrameCompressor::new(CompressionLevel::from_level(level));
+            compressor.set_source_size_hint(data.len() as u64);
+            compressor.set_source(data.as_slice());
+            let mut out = Vec::new();
+            compressor.set_drain(&mut out);
+            compressor.compress();
+            out
+        };
+        let mut decoder = StreamingDecoder::new(compressed.as_slice()).unwrap();
+        let mut result = Vec::new();
+        decoder.read_to_end(&mut result).unwrap();
         assert_eq!(data, result, "Roundtrip failed for Level({level})");
     }
 }
@@ -624,7 +635,7 @@ fn negative_levels_roundtrip() {
 /// Sampled numeric levels should produce valid compressed output and preserve
 /// data through a full compress/decompress roundtrip.
 #[test]
-fn levels_monotonic_compression_ratio() {
+fn sampled_levels_roundtrip_validity() {
     let data = generate_compressible(9300, 64 * 1024);
     for level in [1, 3, 7, 11] {
         let compressed = compress_to_vec(&data[..], CompressionLevel::from_level(level));

From 899a8f07922e871bb66d24daeeb7c09cc5e210b7 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 19:34:17 +0300
Subject: [PATCH 11/18] docs(cli): sync numeric defaults and source-hint
 coverage

---
 README.md                              |  6 ++---
 cli/src/main.rs                        |  2 +-
 zstd/src/encoding/streaming_encoder.rs | 35 ++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index f6578fdf..3f5e783b 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ This is a **maintained fork** of [KillingSpark/zstd-rs](https://github.com/Killi
 **Fork goals:**
 - Dictionary compression improvements (critical for per-label trained dictionaries in LSM-tree)
 - Performance parity with C zstd for decompression (currently 1.4-3.5x slower)
-- Full numeric compression levels (1–22 plus negative ultra-fast, C zstd compatible)
+- Full numeric compression levels (0 = default, 1–22 plus negative ultra-fast, C zstd compatible)
 - No FFI — pure `cargo build`, no cmake/system libraries (ADR-013 compliance)
 
 **Upstream relationship:** We periodically sync with upstream but maintain an independent development trajectory focused on CoordiNode requirements.
@@ -46,7 +46,7 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe
 - [x] Default (roughly level 3)
 - [x] Better (roughly level 7)
 - [x] Best (roughly level 11)
-- [x] Numeric levels 1–22 via `CompressionLevel::from_level(n)` (C zstd compatible numbering)
+- [x] Numeric levels `0` (default), `1–22`, and negative ultra-fast levels via `CompressionLevel::from_level(n)` (C zstd compatible numbering)
 - [x] Negative levels for ultra-fast compression
 - [x] Checksums
 - [x] Frame Content Size — `FrameCompressor` writes FCS automatically; `StreamingEncoder` requires `set_pledged_content_size()` before first write
@@ -71,7 +71,7 @@ use structured_zstd::encoding::{compress, compress_to_vec, CompressionLevel};
 let data: &[u8] = b"hello world";
 // Named level
 let compressed = compress_to_vec(data, CompressionLevel::Fastest);
-// Numeric level (C zstd compatible: 1-22, negative for ultra-fast)
+// Numeric level (C zstd compatible: 0 = default, 1-22, negative for ultra-fast)
 let compressed = compress_to_vec(data, CompressionLevel::from_level(7));
 ```
 
diff --git a/cli/src/main.rs b/cli/src/main.rs
index a67924a2..945f30e2 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -52,7 +52,7 @@ enum Commands {
             short,
             long,
             value_name = "LEVEL",
-            default_value_t = 3,
+            default_value_t = CompressionLevel::DEFAULT_LEVEL,
             value_parser = clap::value_parser!(i32).range(
                 (CompressionLevel::MIN_LEVEL as i64)..=(CompressionLevel::MAX_LEVEL as i64)
             ),
diff --git a/zstd/src/encoding/streaming_encoder.rs b/zstd/src/encoding/streaming_encoder.rs
index d5c4eb2c..afd3db60 100644
--- a/zstd/src/encoding/streaming_encoder.rs
+++ b/zstd/src/encoding/streaming_encoder.rs
@@ -1016,6 +1016,41 @@ mod tests {
         assert_eq!(err.kind(), ErrorKind::InvalidInput);
     }
 
+    #[test]
+    fn source_size_hint_directly_reduces_window_header() {
+        let payload = b"streaming-source-size-hint".repeat(64);
+
+        let mut no_hint = StreamingEncoder::new(Vec::new(), CompressionLevel::from_level(11));
+        no_hint.write_all(payload.as_slice()).unwrap();
+        let no_hint_frame = no_hint.finish().unwrap();
+        let no_hint_header = crate::decoding::frame::read_frame_header(no_hint_frame.as_slice())
+            .unwrap()
+            .0;
+        let no_hint_window = no_hint_header.window_size().unwrap();
+
+        let mut with_hint = StreamingEncoder::new(Vec::new(), CompressionLevel::from_level(11));
+        with_hint
+            .set_source_size_hint(payload.len() as u64)
+            .unwrap();
+        with_hint.write_all(payload.as_slice()).unwrap();
+        let with_hint_frame = with_hint.finish().unwrap();
+        let with_hint_header =
+            crate::decoding::frame::read_frame_header(with_hint_frame.as_slice())
+                .unwrap()
+                .0;
+        let with_hint_window = with_hint_header.window_size().unwrap();
+
+        assert!(
+            with_hint_window <= no_hint_window,
+            "source size hint should not increase advertised window"
+        );
+
+        let mut decoder = StreamingDecoder::new(with_hint_frame.as_slice()).unwrap();
+        let mut decoded = Vec::new();
+        decoder.read_to_end(&mut decoded).unwrap();
+        assert_eq!(decoded, payload);
+    }
+
     #[cfg(feature = "std")]
     #[test]
     fn pledged_content_size_c_zstd_compatible() {

From c37f6cfa5010913990f6869193b8ee7f7a8c2218 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 20:56:30 +0300
Subject: [PATCH 12/18] fix(cli): document clap i64 range bounds for level
 parser

---
 cli/src/main.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cli/src/main.rs b/cli/src/main.rs
index 945f30e2..239074d6 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -53,6 +53,8 @@ enum Commands {
             long,
             value_name = "LEVEL",
             default_value_t = CompressionLevel::DEFAULT_LEVEL,
+            // clap's ranged parser expects i64 bounds here (RangedI64ValueParser),
+            // even though the target value type is i32.
             value_parser = clap::value_parser!(i32).range(
                 (CompressionLevel::MIN_LEVEL as i64)..=(CompressionLevel::MAX_LEVEL as i64)
             ),

From 6aa76184c676e628918c85f3c9d8cbf7fe47ad86 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 22:12:33 +0300
Subject: [PATCH 13/18] docs(encoding): note semver impact of level variant

---
 zstd/src/encoding/mod.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs
index 18bd7f74..0d797c1c 100644
--- a/zstd/src/encoding/mod.rs
+++ b/zstd/src/encoding/mod.rs
@@ -104,6 +104,10 @@ pub enum CompressionLevel {
     /// with the lazy2 hash-chain backend.  Levels that require strategies
     /// this crate has not yet implemented (btopt, btultra) are approximated
     /// with the closest available matcher.
+    ///
+    /// Semver note: this variant was added after the initial enum shape and
+    /// is a breaking API change for downstream crates that exhaustively
+    /// `match` on [`CompressionLevel`] without a wildcard arm.
     Level(i32),
 }
 

From 672ac260962c9c9f40a7d492c003fcb927331f90 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 22:23:03 +0300
Subject: [PATCH 14/18] fix(encoding): canonicalize named levels in from_level

---
 zstd/src/encoding/mod.rs              | 16 +++++++++++----
 zstd/src/tests/roundtrip_integrity.rs | 29 +++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs
index 0d797c1c..1f83313e 100644
--- a/zstd/src/encoding/mod.rs
+++ b/zstd/src/encoding/mod.rs
@@ -121,11 +121,19 @@ impl CompressionLevel {
 
     /// Create a compression level from a numeric value.
     ///
-    /// Wraps the raw integer in [`Level`](Self::Level).  Values outside
-    /// [`MIN_LEVEL`](Self::MIN_LEVEL)..=[`MAX_LEVEL`](Self::MAX_LEVEL) are
-    /// silently clamped during parameter resolution.
+    /// Returns named variants for canonical levels (`0`/`3`, `1`, `7`, `11`)
+    /// and [`Level`](Self::Level) for all other values.
+    ///
+    /// Values outside [`MIN_LEVEL`](Self::MIN_LEVEL)..=[`MAX_LEVEL`](Self::MAX_LEVEL)
+    /// are silently clamped during parameter resolution.
     pub const fn from_level(level: i32) -> Self {
-        CompressionLevel::Level(level)
+        match level {
+            0 | Self::DEFAULT_LEVEL => Self::Default,
+            1 => Self::Fastest,
+            7 => Self::Better,
+            11 => Self::Best,
+            _ => Self::Level(level),
+        }
     }
 }
 
diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs
index 1e62ffa8..1c498bfc 100644
--- a/zstd/src/tests/roundtrip_integrity.rs
+++ b/zstd/src/tests/roundtrip_integrity.rs
@@ -547,6 +547,35 @@ fn roundtrip_best_level_streaming_multi_block() {
 
 // ─── Numeric compression levels (CompressionLevel::Level) ─────────
 
+/// Canonical numeric levels should map to named enum variants for pattern/equality checks.
+#[test]
+fn numeric_levels_map_to_named_variants() {
+    assert!(matches!(
+        CompressionLevel::from_level(0),
+        CompressionLevel::Default
+    ));
+    assert!(matches!(
+        CompressionLevel::from_level(3),
+        CompressionLevel::Default
+    ));
+    assert!(matches!(
+        CompressionLevel::from_level(1),
+        CompressionLevel::Fastest
+    ));
+    assert!(matches!(
+        CompressionLevel::from_level(7),
+        CompressionLevel::Better
+    ));
+    assert!(matches!(
+        CompressionLevel::from_level(11),
+        CompressionLevel::Best
+    ));
+    assert!(matches!(
+        CompressionLevel::from_level(2),
+        CompressionLevel::Level(2)
+    ));
+}
+
 /// `from_level(3)` must be equivalent to `Default` — same compressed output.
 #[test]
 fn numeric_level_3_matches_default() {

From fec2b3f4f34eb4f73c886a58513ae8b034dc697c Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 22:36:17 +0300
Subject: [PATCH 15/18] test(encoding): cover direct Level(0/3) default
 equivalence

---
 zstd/src/tests/roundtrip_integrity.rs | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/zstd/src/tests/roundtrip_integrity.rs b/zstd/src/tests/roundtrip_integrity.rs
index 1c498bfc..e2e39a0a 100644
--- a/zstd/src/tests/roundtrip_integrity.rs
+++ b/zstd/src/tests/roundtrip_integrity.rs
@@ -576,15 +576,20 @@ fn numeric_levels_map_to_named_variants() {
     ));
 }
 
-/// `from_level(3)` must be equivalent to `Default` — same compressed output.
+/// `from_level(3)` and direct `Level(3)` must be equivalent to `Default`.
 #[test]
 fn numeric_level_3_matches_default() {
     let data = generate_compressible(9000, 64 * 1024);
     let default = compress_to_vec(&data[..], CompressionLevel::Default);
-    let level_3 = compress_to_vec(&data[..], CompressionLevel::from_level(3));
+    let from_level_3 = compress_to_vec(&data[..], CompressionLevel::from_level(3));
+    let direct_level_3 = compress_to_vec(&data[..], CompressionLevel::Level(3));
+    assert_eq!(
+        default, from_level_3,
+        "from_level(3) output must be identical to Default"
+    );
     assert_eq!(
-        default, level_3,
-        "Level(3) output must be identical to Default"
+        default, direct_level_3,
+        "direct Level(3) output must be identical to Default"
     );
 }
 
@@ -621,13 +626,21 @@ fn numeric_level_11_matches_best() {
     assert_eq!(best, level_11, "Level(11) output must be identical to Best");
 }
 
-/// `from_level(0)` maps to default compression (level 3), matching C zstd.
+/// `from_level(0)` and direct `Level(0)` map to default compression (level 3).
 #[test]
 fn numeric_level_0_is_default_compression() {
     let data = generate_compressible(9004, 64 * 1024);
-    let level_0 = compress_to_vec(&data[..], CompressionLevel::from_level(0));
+    let from_level_0 = compress_to_vec(&data[..], CompressionLevel::from_level(0));
+    let direct_level_0 = compress_to_vec(&data[..], CompressionLevel::Level(0));
     let level_3 = compress_to_vec(&data[..], CompressionLevel::from_level(3));
-    assert_eq!(level_0, level_3, "Level(0) should map to default (level 3)");
+    assert_eq!(
+        from_level_0, level_3,
+        "from_level(0) should map to default (level 3)"
+    );
+    assert_eq!(
+        direct_level_0, level_3,
+        "direct Level(0) should map to default (level 3)"
+    );
 }
 
 /// All 22 positive levels produce valid output that round-trips correctly.

From 5d8703d30727623a3792fa2914fce999fa2c2a9f Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 22:54:49 +0300
Subject: [PATCH 16/18] docs(encoding): scope clamping guarantee to default
 matcher

---
 zstd/src/encoding/mod.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs
index 1f83313e..9fc5750a 100644
--- a/zstd/src/encoding/mod.rs
+++ b/zstd/src/encoding/mod.rs
@@ -124,8 +124,9 @@ impl CompressionLevel {
     /// Returns named variants for canonical levels (`0`/`3`, `1`, `7`, `11`)
     /// and [`Level`](Self::Level) for all other values.
     ///
-    /// Values outside [`MIN_LEVEL`](Self::MIN_LEVEL)..=[`MAX_LEVEL`](Self::MAX_LEVEL)
-    /// are silently clamped during parameter resolution.
+    /// With the default matcher backend (`MatchGeneratorDriver`), values
+    /// outside [`MIN_LEVEL`](Self::MIN_LEVEL)..=[`MAX_LEVEL`](Self::MAX_LEVEL)
+    /// are silently clamped during built-in level parameter resolution.
     pub const fn from_level(level: i32) -> Self {
         match level {
             0 | Self::DEFAULT_LEVEL => Self::Default,

From dcbdbd14857edae029b0c0dbbc9894fd61beef0d Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 23:27:44 +0300
Subject: [PATCH 17/18] perf(encoding): avoid eager zero-fill in pooled block
 buffers

---
 zstd/src/encoding/match_generator.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/zstd/src/encoding/match_generator.rs b/zstd/src/encoding/match_generator.rs
index 3f46e45a..e303160c 100644
--- a/zstd/src/encoding/match_generator.rs
+++ b/zstd/src/encoding/match_generator.rs
@@ -536,11 +536,15 @@ impl Matcher for MatchGeneratorDriver {
 
     fn get_next_space(&mut self) -> Vec<u8> {
         if let Some(mut space) = self.vec_pool.pop() {
-            space.clear();
+            if space.len() > self.slice_size {
+                space.truncate(self.slice_size);
+            }
             if space.capacity() > self.slice_size {
                 space.shrink_to(self.slice_size);
             }
-            space.resize(self.slice_size, 0);
+            if space.len() < self.slice_size {
+                space.resize(self.slice_size, 0);
+            }
             return space;
         }
         alloc::vec![0; self.slice_size]

From 734f7489c5becb37c2df90fe72f063f9949f2630 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 4 Apr 2026 23:44:27 +0300
Subject: [PATCH 18/18] docs(encoding): clarify source-size hint scope

---
 zstd/src/encoding/mod.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/zstd/src/encoding/mod.rs b/zstd/src/encoding/mod.rs
index 9fc5750a..976d28fd 100644
--- a/zstd/src/encoding/mod.rs
+++ b/zstd/src/encoding/mod.rs
@@ -173,7 +173,9 @@ pub trait Matcher {
     /// Called before [`reset`](Self::reset) when the caller knows the input
     /// size (e.g. from pledged content size or file metadata).
     ///
-    /// The default implementation is a no-op for custom matchers.
+    /// The default implementation is a no-op for custom matchers and
+    /// test stubs. The built-in runtime matcher (`MatchGeneratorDriver`)
+    /// overrides this hook and applies the hint during level resolution.
     fn set_source_size_hint(&mut self, _size: u64) {}
     /// Prime matcher state with dictionary history before compressing the next frame.
     /// Default implementation is a no-op for custom matchers that do not support this.