From 5138d7a11842747d6c042346892a2a87e04f441b Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Fri, 27 Mar 2026 00:23:59 +0200
Subject: [PATCH 01/30] test(bench): expand zstd benchmark suite

- add scenario-based Criterion matrix against C zstd

- generate benchmark JSON and markdown reports for CI

- document benchmark workflows and add flamegraph helper

Refs #24
---
 .github/scripts/run-benchmarks.sh | 100 ++++++++++++---
 .gitignore                        |   2 +
 BENCHMARKS.md                     |  67 ++++++++++
 README.md                         |   4 +
 scripts/bench-flamegraph.sh       |  19 +++
 zstd/benches/compare_ffi.rs       | 166 +++++++++++++++++--------
 zstd/benches/support/mod.rs       | 199 ++++++++++++++++++++++++++++++
 7 files changed, 483 insertions(+), 74 deletions(-)
 create mode 100644 BENCHMARKS.md
 create mode 100755 scripts/bench-flamegraph.sh
 create mode 100644 zstd/benches/support/mod.rs
diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index 1ad2086d..885897d1 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -1,43 +1,103 @@
 #!/bin/bash
-# Run compare_ffi benchmarks and produce github-action-benchmark JSON.
-# Output: benchmark-results.json (customSmallerIsBetter format — lower time = better)
+# Run the Criterion benchmark matrix and produce:
+# - benchmark-results.json for github-action-benchmark
+# - benchmark-report.md for human review
+#
+# Output format note:
+# - benchmark JSON uses customSmallerIsBetter (lower ms/iter is better)
+# - report markdown also includes per-scenario compression size + ratio summaries
 set -eo pipefail
 
-echo "Running benchmarks..." >&2
+echo "Running benchmark matrix..." >&2
+
+export STRUCTURED_ZSTD_BENCH_LARGE_BYTES="${STRUCTURED_ZSTD_BENCH_LARGE_BYTES:-16777216}"
 
-# Run criterion benchmarks, capture output
 cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee /tmp/bench-raw.txt
 
 echo "Parsing results..." >&2
 
-# Parse criterion bencher output into github-action-benchmark JSON
-# Format: "test <name> ... bench: <ns> ns/iter (+/- <variance>)"
 python3 - <<'PYEOF'
-import json, re, sys
+import json
+import re
+import sys
+
+BENCH_RE = re.compile(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter")
+REPORT_RE = re.compile(
+    r'^REPORT scenario=(\S+) label="([^"]+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$'
+)
+
+benchmark_results = []
+timings = []
+ratios = []
 
-results = []
 with open("/tmp/bench-raw.txt") as f:
-    for line in f:
-        m = re.match(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter", line)
-        if m:
-            name = m.group(1)
-            ns = int(m.group(2).replace(",", ""))
-            # Convert ns to ms for readability
+    for raw_line in f:
+        line = raw_line.strip()
+
+        bench_match = BENCH_RE.match(line)
+        if bench_match:
+            name = bench_match.group(1)
+            ns = int(bench_match.group(2).replace(",", ""))
             ms = ns / 1_000_000
-            results.append({
+            benchmark_results.append({
                 "name": name,
                 "unit": "ms",
                 "value": round(ms, 3),
             })
+            timings.append((name, ms))
+            continue
+
+        report_match = REPORT_RE.match(line)
+        if report_match:
+            scenario, label, level, input_bytes, rust_bytes, ffi_bytes, rust_ratio, ffi_ratio = report_match.groups()
+            ratios.append({
+                "scenario": scenario,
+                "label": label,
+                "level": level,
+                "input_bytes": int(input_bytes),
+                "rust_bytes": int(rust_bytes),
+                "ffi_bytes": int(ffi_bytes),
+                "rust_ratio": float(rust_ratio),
+                "ffi_ratio": float(ffi_ratio),
+            })
 
-if not results:
+if not benchmark_results:
     print("ERROR: No benchmark results parsed!", file=sys.stderr)
     sys.exit(1)
 
 with open("benchmark-results.json", "w") as f:
-    json.dump(results, f, indent=2)
+    json.dump(benchmark_results, f, indent=2)
+
+lines = [
+    "# Benchmark Report",
+    "",
+    "Generated by `.github/scripts/run-benchmarks.sh` from `cargo bench --bench compare_ffi`.",
+    "",
+    "## Compression Ratios",
+    "",
+    "| Scenario | Level | Input bytes | Rust bytes | C bytes | Rust ratio | C ratio |",
+    "| --- | --- | ---: | ---: | ---: | ---: | ---: |",
+]
+
+for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])):
+    lines.append(
+        f'| {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |'
+    )
+
+lines.extend([
+    "",
+    "## Timing Metrics",
+    "",
+    "| Benchmark | ms/iter |",
+    "| --- | ---: |",
+])
+
+for name, ms in sorted(timings):
+    lines.append(f"| `{name}` | {ms:.3f} |")
+
+with open("benchmark-report.md", "w") as f:
+    f.write("\n".join(lines) + "\n")
 
-print(f"Wrote {len(results)} benchmark results to benchmark-results.json", file=sys.stderr)
-for r in results:
-    print(f"  {r['name']}: {r['value']} {r['unit']}", file=sys.stderr)
+print(f"Wrote {len(benchmark_results)} timing results to benchmark-results.json", file=sys.stderr)
+print(f"Wrote {len(ratios)} ratio rows to benchmark-report.md", file=sys.stderr)
 PYEOF
diff --git a/.gitignore b/.gitignore
index c9fa3b69..d2a3666c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,5 +6,7 @@ Cargo.lock
 /orig-zstd
 fuzz_decodecorpus
 perf.data*
+benchmark-results.json
+benchmark-report.md
 fuzz/corpus
 .idea
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
new file mode 100644
index 00000000..621b3f72
--- /dev/null
+++ b/BENCHMARKS.md
@@ -0,0 +1,67 @@
+# Benchmark Suite
+
+`structured-zstd` keeps its compression/decompression performance tracking in the Criterion bench
+matrix at `zstd/benches/compare_ffi.rs`.
+
+## Scenarios
+
+The current matrix covers:
+
+- small random payloads (`1 KiB`, `10 KiB`)
+- a small structured log payload (`4 KiB`)
+- a repository corpus fixture (`decodecorpus_files/z000033`)
+- high entropy random payloads (`1 MiB`)
+- low entropy repeated payloads (`1 MiB`)
+- a large structured stream (`100 MiB`)
+- optional Silesia corpus files when `STRUCTURED_ZSTD_SILESIA_DIR=/path/to/silesia` is set
+
+The local default for the large scenario is `100 MiB`. CI can override it with
+`STRUCTURED_ZSTD_BENCH_LARGE_BYTES` to keep regression runs bounded while still exercising the
+same code path.
+
+## Level Mapping
+
+The benchmark suite only compares levels that are currently implemented end-to-end in the pure Rust
+encoder:
+
+- `structured-zstd::Fastest` vs `zstd` level `1`
+- `structured-zstd::Default` vs `zstd` level `3`
+
+`Better` and `Best` are intentionally excluded until the encoder implements them. Dictionary
+compression is also excluded from the timing matrix because the crate currently exposes dictionary
+training, but not dictionary-based compression.
+
+## Commands
+
+Run the full Criterion matrix:
+
+```bash
+cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher
+```
+
+Generate the CI-style JSON and markdown report locally:
+
+```bash
+bash .github/scripts/run-benchmarks.sh
+```
+
+Generate a flamegraph for a hot path:
+
+```bash
+bash scripts/bench-flamegraph.sh
+```
+
+Override the benchmark targeted by the flamegraph script:
+
+```bash
+bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/matrix/pure_rust
+```
+
+## Outputs
+
+`run-benchmarks.sh` writes:
+
+- `benchmark-results.json` for GitHub regression tracking
+- `benchmark-report.md` with scenario-by-scenario compression ratios and timing rows
+
+Criterion also writes its usual detailed estimates under `target/criterion/`.
diff --git a/README.md b/README.md
index 076ad3a0..8d563ec2 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,10 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe
 
 When the `dict_builder` feature is enabled, the `dictionary` module can create raw content dictionaries. Within 0.2% of the official implementation on the `github-users` sample set.
 
+## Benchmarking
+
+Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora.
+
 ## Usage
 
 ### Compression
diff --git a/scripts/bench-flamegraph.sh b/scripts/bench-flamegraph.sh
new file mode 100755
index 00000000..1bfb7b70
--- /dev/null
+++ b/scripts/bench-flamegraph.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+set -euo pipefail
+
+BENCH_FILTER="${1:-compress/default/large-log-stream/matrix/pure_rust}"
+OUTPUT_DIR="${2:-target/flamegraph}"
+
+mkdir -p "$OUTPUT_DIR"
+
+echo "Generating flamegraph for benchmark filter: $BENCH_FILTER" >&2
+echo "Output directory: $OUTPUT_DIR" >&2
+
+cargo flamegraph \
+  --bench compare_ffi \
+  -p structured-zstd \
+  --root \
+  --output "$OUTPUT_DIR/${BENCH_FILTER//\//_}.svg" \
+  -- \
+  --bench \
+  "$BENCH_FILTER"
diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 2660702f..6bae35e3 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -1,72 +1,130 @@
-//! Comparison benchmark: structured-zstd (pure Rust) vs zstd (C FFI).
+//! Comparison benchmark matrix: structured-zstd (pure Rust) vs zstd (C FFI).
 //!
-//! Five variations: decompress (pure Rust/C FFI), compress (pure Rust/C FFI L1/L3).
-//! Both decompress benchmarks allocate per-iteration for symmetric comparison.
+//! The suite covers:
+//! - small payloads (1-10 KiB)
+//! - high entropy and low entropy payloads
+//! - a large 100 MiB structured stream
+//! - the repository decode corpus fixture
+//! - optional Silesia corpus files via `STRUCTURED_ZSTD_SILESIA_DIR`
+//!
+//! Each run prints `REPORT ...` metadata lines that CI scripts can turn into a markdown report.
 
-use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
+mod support;
 
-/// Compressed corpus for decompression benchmarks.
-const COMPRESSED_CORPUS: &[u8] = include_bytes!("../decodecorpus_files/z000033.zst");
+use criterion::{Criterion, SamplingMode, Throughput, black_box, criterion_group, criterion_main};
+use std::time::Duration;
+use structured_zstd::decoding::FrameDecoder;
+use support::{LevelConfig, Scenario, ScenarioClass, benchmark_scenarios, supported_levels};
 
-fn bench_decompress(c: &mut Criterion) {
-    let mut group = c.benchmark_group("decompress");
+fn bench_compress(c: &mut Criterion) {
+    for scenario in benchmark_scenarios() {
+        for level in supported_levels() {
+            let rust_compressed =
+                structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level);
+            let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
+            emit_report_line(&scenario, level, &rust_compressed, &ffi_compressed);
 
-    // Pre-compute expected output length for assertions.
-    let expected_len = zstd::decode_all(COMPRESSED_CORPUS).unwrap().len();
+            let benchmark_name = format!("compress/{}/{}/{}", level.name, scenario.id, "matrix");
+            let mut group = c.benchmark_group(benchmark_name);
+            configure_group(&mut group, &scenario);
+            group.throughput(Throughput::Bytes(scenario.throughput_bytes()));
 
-    // Pure Rust decompression — allocate per-iteration (symmetric with C FFI).
-    group.bench_function("pure_rust", |b| {
-        b.iter(|| {
-            let mut fr = structured_zstd::decoding::FrameDecoder::new();
-            let mut target = vec![0u8; expected_len];
-            let written = fr.decode_all(COMPRESSED_CORPUS, &mut target).unwrap();
-            assert_eq!(written, expected_len);
-        })
-    });
+            group.bench_function("pure_rust", |b| {
+                b.iter(|| {
+                    black_box(structured_zstd::encoding::compress_to_vec(
+                        &scenario.bytes[..],
+                        level.rust_level,
+                    ))
+                })
+            });
 
-    // C FFI decompression — allocates per-iteration via decode_all.
-    group.bench_function("c_ffi", |b| {
-        b.iter(|| {
-            let out = zstd::decode_all(COMPRESSED_CORPUS).unwrap();
-            assert_eq!(out.len(), expected_len);
-        })
-    });
+            group.bench_function("c_ffi", |b| {
+                b.iter(|| {
+                    black_box(zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap())
+                })
+            });
 
-    group.finish();
+            group.finish();
+        }
+    }
 }
 
-fn bench_compress(c: &mut Criterion) {
-    // Get raw data by decompressing the corpus.
-    let raw_data = zstd::decode_all(COMPRESSED_CORPUS).unwrap();
+fn bench_decompress(c: &mut Criterion) {
+    for scenario in benchmark_scenarios() {
+        for level in supported_levels() {
+            let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
+            let expected_len = scenario.len();
+            let benchmark_name = format!("decompress/{}/{}/{}", level.name, scenario.id, "matrix");
+            let mut group = c.benchmark_group(benchmark_name);
+            configure_group(&mut group, &scenario);
+            group.throughput(Throughput::Bytes(scenario.throughput_bytes()));
 
-    let mut group = c.benchmark_group("compress");
+            group.bench_function("pure_rust", |b| {
+                b.iter(|| {
+                    let mut decoder = FrameDecoder::new();
+                    let mut target = vec![0u8; expected_len];
+                    let written = decoder.decode_all(&ffi_compressed, &mut target).unwrap();
+                    assert_eq!(written, expected_len);
+                })
+            });
 
-    // Pure Rust compression (Fastest level)
-    group.bench_with_input(
-        BenchmarkId::new("pure_rust", "fastest"),
-        &raw_data,
-        |b, data| {
-            b.iter(|| {
-                black_box(structured_zstd::encoding::compress_to_vec(
-                    &data[..],
-                    structured_zstd::encoding::CompressionLevel::Fastest,
-                ))
-            })
-        },
-    );
+            group.bench_function("c_ffi", |b| {
+                b.iter(|| {
+                    let output = zstd::decode_all(&ffi_compressed[..]).unwrap();
+                    assert_eq!(output.len(), expected_len);
+                })
+            });
 
-    // C FFI compression (level 1 ≈ fastest)
-    group.bench_with_input(BenchmarkId::new("c_ffi", "level1"), &raw_data, |b, data| {
-        b.iter(|| black_box(zstd::encode_all(&data[..], 1).unwrap()))
-    });
+            group.finish();
+        }
+    }
+}
 
-    // C FFI compression (level 3 ≈ default)
-    group.bench_with_input(BenchmarkId::new("c_ffi", "level3"), &raw_data, |b, data| {
-        b.iter(|| black_box(zstd::encode_all(&data[..], 3).unwrap()))
-    });
+fn configure_group<M: criterion::measurement::Measurement>(
+    group: &mut criterion::BenchmarkGroup<'_, M>,
+    scenario: &Scenario,
+) {
+    match scenario.class {
+        ScenarioClass::Small => {
+            group.sample_size(30);
+            group.measurement_time(Duration::from_secs(3));
+            group.sampling_mode(SamplingMode::Flat);
+        }
+        ScenarioClass::Corpus | ScenarioClass::Entropy => {
+            group.sample_size(10);
+            group.measurement_time(Duration::from_secs(4));
+            group.sampling_mode(SamplingMode::Flat);
+        }
+        ScenarioClass::Large | ScenarioClass::Silesia => {
+            group.sample_size(10);
+            group.measurement_time(Duration::from_secs(2));
+            group.warm_up_time(Duration::from_millis(500));
+            group.sampling_mode(SamplingMode::Flat);
+        }
+    }
+}
 
-    group.finish();
+fn emit_report_line(
+    scenario: &Scenario,
+    level: LevelConfig,
+    rust_compressed: &[u8],
+    ffi_compressed: &[u8],
+) {
+    let input_len = scenario.len() as f64;
+    let rust_ratio = rust_compressed.len() as f64 / input_len;
+    let ffi_ratio = ffi_compressed.len() as f64 / input_len;
+    println!(
+        "REPORT scenario={} label=\"{}\" level={} input_bytes={} rust_bytes={} ffi_bytes={} rust_ratio={:.6} ffi_ratio={:.6}",
+        scenario.id,
+        scenario.label,
+        level.name,
+        scenario.len(),
+        rust_compressed.len(),
+        ffi_compressed.len(),
+        rust_ratio,
+        ffi_ratio
+    );
 }
 
-criterion_group!(benches, bench_decompress, bench_compress);
+criterion_group!(benches, bench_compress, bench_decompress);
 criterion_main!(benches);
diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
new file mode 100644
index 00000000..79ffcb9c
--- /dev/null
+++ b/zstd/benches/support/mod.rs
@@ -0,0 +1,199 @@
+use rand::{RngCore, SeedableRng, rngs::SmallRng};
+use std::{env, fs, path::Path};
+use structured_zstd::encoding::CompressionLevel;
+
+pub(crate) struct Scenario {
+    pub(crate) id: String,
+    pub(crate) label: String,
+    pub(crate) bytes: Vec<u8>,
+    pub(crate) class: ScenarioClass,
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(crate) enum ScenarioClass {
+    Small,
+    Corpus,
+    Entropy,
+    Large,
+    Silesia,
+}
+
+#[derive(Clone, Copy)]
+pub(crate) struct LevelConfig {
+    pub(crate) name: &'static str,
+    pub(crate) rust_level: CompressionLevel,
+    pub(crate) ffi_level: i32,
+}
+
+pub(crate) fn benchmark_scenarios() -> Vec<Scenario> {
+    let mut scenarios = vec![
+        Scenario::new(
+            "small-1k-random",
+            "Small random payload (1 KiB)",
+            random_bytes(1024, 0x5EED_1000),
+            ScenarioClass::Small,
+        ),
+        Scenario::new(
+            "small-10k-random",
+            "Small random payload (10 KiB)",
+            random_bytes(10 * 1024, 0x0005_EED1_0000),
+            ScenarioClass::Small,
+        ),
+        Scenario::new(
+            "small-4k-log-lines",
+            "Small structured log lines (4 KiB)",
+            repeated_log_lines(4 * 1024),
+            ScenarioClass::Small,
+        ),
+        Scenario::new(
+            "decodecorpus-z000033",
+            "Repo decode corpus sample",
+            include_bytes!("../../decodecorpus_files/z000033").to_vec(),
+            ScenarioClass::Corpus,
+        ),
+        Scenario::new(
+            "high-entropy-1m",
+            "High entropy random payload (1 MiB)",
+            random_bytes(1024 * 1024, 0xC0FF_EE11),
+            ScenarioClass::Entropy,
+        ),
+        Scenario::new(
+            "low-entropy-1m",
+            "Low entropy patterned payload (1 MiB)",
+            repeated_pattern_bytes(1024 * 1024),
+            ScenarioClass::Entropy,
+        ),
+        Scenario::new(
+            "large-log-stream",
+            "Large structured stream",
+            repeated_log_lines(large_stream_len()),
+            ScenarioClass::Large,
+        ),
+    ];
+
+    scenarios.extend(load_silesia_from_env());
+    scenarios
+}
+
+pub(crate) fn supported_levels() -> [LevelConfig; 2] {
+    [
+        LevelConfig {
+            name: "fastest",
+            rust_level: CompressionLevel::Fastest,
+            ffi_level: 1,
+        },
+        LevelConfig {
+            name: "default",
+            rust_level: CompressionLevel::Default,
+            ffi_level: 3,
+        },
+    ]
+}
+
+impl Scenario {
+    fn new(
+        id: impl Into<String>,
+        label: impl Into<String>,
+        bytes: Vec<u8>,
+        class: ScenarioClass,
+    ) -> Self {
+        Self {
+            id: id.into(),
+            label: label.into(),
+            bytes,
+            class,
+        }
+    }
+
+    pub(crate) fn len(&self) -> usize {
+        self.bytes.len()
+    }
+
+    pub(crate) fn throughput_bytes(&self) -> u64 {
+        self.bytes.len() as u64
+    }
+}
+
+fn random_bytes(len: usize, seed: u64) -> Vec<u8> {
+    let mut rng = SmallRng::seed_from_u64(seed);
+    let mut bytes = vec![0u8; len];
+    rng.fill_bytes(&mut bytes);
+    bytes
+}
+
+fn repeated_pattern_bytes(len: usize) -> Vec<u8> {
+    let pattern = b"coordinode:segment:0001|tenant=demo|label=orders|";
+    let mut bytes = Vec::with_capacity(len);
+    while bytes.len() < len {
+        let remaining = len - bytes.len();
+        bytes.extend_from_slice(&pattern[..pattern.len().min(remaining)]);
+    }
+    bytes
+}
+
+fn repeated_log_lines(len: usize) -> Vec<u8> {
+    const LINES: &[&str] = &[
+        "ts=2026-03-26T21:39:28Z level=INFO msg=\"flush memtable\" tenant=demo table=orders region=eu-west\n",
+        "ts=2026-03-26T21:39:29Z level=INFO msg=\"rotate segment\" tenant=demo table=orders region=eu-west\n",
+        "ts=2026-03-26T21:39:30Z level=INFO msg=\"compact level\" tenant=demo table=orders region=eu-west\n",
+        "ts=2026-03-26T21:39:31Z level=INFO msg=\"write block\" tenant=demo table=orders region=eu-west\n",
+    ];
+
+    let mut bytes = Vec::with_capacity(len);
+    while bytes.len() < len {
+        for line in LINES {
+            if bytes.len() == len {
+                break;
+            }
+            let remaining = len - bytes.len();
+            bytes.extend_from_slice(&line.as_bytes()[..line.len().min(remaining)]);
+        }
+    }
+    bytes
+}
+
+fn load_silesia_from_env() -> Vec<Scenario> {
+    let Some(dir) = env::var_os("STRUCTURED_ZSTD_SILESIA_DIR") else {
+        return Vec::new();
+    };
+
+    let Ok(entries) = fs::read_dir(Path::new(&dir)) else {
+        eprintln!("BENCH_WARN failed to read STRUCTURED_ZSTD_SILESIA_DIR={dir:?}");
+        return Vec::new();
+    };
+
+    let mut scenarios = Vec::new();
+    for entry in entries.flatten() {
+        let path = entry.path();
+        if !path.is_file() {
+            continue;
+        }
+        let Ok(bytes) = fs::read(&path) else {
+            eprintln!(
+                "BENCH_WARN failed to read Silesia fixture {}",
+                path.display()
+            );
+            continue;
+        };
+        let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) else {
+            continue;
+        };
+        scenarios.push(Scenario::new(
+            format!("silesia-{stem}"),
+            format!("Silesia corpus: {stem}"),
+            bytes,
+            ScenarioClass::Silesia,
+        ));
+    }
+
+    scenarios.sort_by(|left, right| left.id.cmp(&right.id));
+    scenarios
+}
+
+fn large_stream_len() -> usize {
+    env::var("STRUCTURED_ZSTD_BENCH_LARGE_BYTES")
+        .ok()
+        .and_then(|value| value.parse::<usize>().ok())
+        .filter(|value| *value > 0)
+        .unwrap_or(100 * 1024 * 1024)
+}

From 3a419fa936a4a266f391ea19fc87af91d2cb62c2 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Fri, 27 Mar 2026 09:58:33 +0200
Subject: [PATCH 02/30] docs(readme): add benchmark dashboard link

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 8d563ec2..0cea48ab 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,14 @@ Pure Rust zstd implementation — managed fork of [ruzstd](https://github.com/Ki
 [![docs.rs](https://docs.rs/structured-zstd/badge.svg)](https://docs.rs/structured-zstd)
 [![License: Apache-2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE)
 
+## Benchmarks Dashboard
+
+Historical benchmark charts are published to GitHub Pages:
+
+- [Performance dashboard](https://structured-world.github.io/structured-zstd/dev/bench/)
+
+Note: the root Pages URL can be empty; benchmark charts live under `/dev/bench/`.
+
 ## Managed Fork
 
 This is a **maintained fork** of [KillingSpark/zstd-rs](https://github.com/KillingSpark/zstd-rs) (ruzstd) by [Structured World Foundation](https://sw.foundation). We maintain additional features and hardening for the [CoordiNode](https://github.com/structured-world/coordinode) database engine.

From 15a51bd252af05a4301cbb341d1626f84f506cd8 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Fri, 27 Mar 2026 10:13:51 +0200
Subject: [PATCH 03/30] fix(bench): harden matrix scripts and edge scenarios

- guard ratio reporting for zero-length inputs
- skip empty Silesia fixtures with BENCH_WARN
- use mktemp + trap for raw bench output parsing
- make flamegraph --root opt-in via BENCH_FLAMEGRAPH_USE_ROOT
---
 .github/scripts/run-benchmarks.sh | 10 +++++++---
 scripts/bench-flamegraph.sh       | 26 +++++++++++++++++++++++---
 zstd/benches/compare_ffi.rs       | 10 ++++++++--
 zstd/benches/support/mod.rs       |  7 +++++++
 4 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index 885897d1..aff1ccd9 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -11,13 +11,16 @@ set -eo pipefail
 echo "Running benchmark matrix..." >&2
 
 export STRUCTURED_ZSTD_BENCH_LARGE_BYTES="${STRUCTURED_ZSTD_BENCH_LARGE_BYTES:-16777216}"
+BENCH_RAW_FILE="$(mktemp -t structured-zstd-bench-raw.XXXXXX)"
+trap 'rm -f "$BENCH_RAW_FILE"' EXIT
 
-cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee /tmp/bench-raw.txt
+cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee "$BENCH_RAW_FILE"
 
 echo "Parsing results..." >&2
 
-python3 - <<'PYEOF'
+BENCH_RAW_FILE="$BENCH_RAW_FILE" python3 - <<'PYEOF'
 import json
+import os
 import re
 import sys
 
@@ -29,8 +32,9 @@ REPORT_RE = re.compile(
 benchmark_results = []
 timings = []
 ratios = []
+raw_path = os.environ["BENCH_RAW_FILE"]
 
-with open("/tmp/bench-raw.txt") as f:
+with open(raw_path) as f:
     for raw_line in f:
         line = raw_line.strip()
 
diff --git a/scripts/bench-flamegraph.sh b/scripts/bench-flamegraph.sh
index 1bfb7b70..66b41f95 100755
--- a/scripts/bench-flamegraph.sh
+++ b/scripts/bench-flamegraph.sh
@@ -9,11 +9,31 @@ mkdir -p "$OUTPUT_DIR"
 echo "Generating flamegraph for benchmark filter: $BENCH_FILTER" >&2
 echo "Output directory: $OUTPUT_DIR" >&2
 
-cargo flamegraph \
+# Use BENCH_FLAMEGRAPH_USE_ROOT=1 to opt into running cargo flamegraph with --root.
+EXTRA_FLAMEGRAPH_ARGS=()
+if [[ "${BENCH_FLAMEGRAPH_USE_ROOT:-}" == "1" ]]; then
+  EXTRA_FLAMEGRAPH_ARGS+=(--root)
+fi
+
+if ! cargo flamegraph \
   --bench compare_ffi \
   -p structured-zstd \
-  --root \
+  "${EXTRA_FLAMEGRAPH_ARGS[@]}" \
   --output "$OUTPUT_DIR/${BENCH_FILTER//\//_}.svg" \
   -- \
   --bench \
-  "$BENCH_FILTER"
+  "$BENCH_FILTER"; then
+  status=$?
+  if [[ "${BENCH_FLAMEGRAPH_USE_ROOT:-}" != "1" ]]; then
+    cat >&2 <<'EOF'
+cargo flamegraph failed. This may be due to insufficient permissions for perf.
+
+If you see a "Permission denied" or "not allowed to access CPU" error, try re-running with:
+
+  BENCH_FLAMEGRAPH_USE_ROOT=1 sudo -E scripts/bench-flamegraph.sh "<bench_filter>" "<output_dir>"
+
+or otherwise ensure perf has sufficient permissions.
+EOF
+  fi
+  exit "$status"
+fi
diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 6bae35e3..8f4334a0 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -111,8 +111,14 @@ fn emit_report_line(
     ffi_compressed: &[u8],
 ) {
     let input_len = scenario.len() as f64;
-    let rust_ratio = rust_compressed.len() as f64 / input_len;
-    let ffi_ratio = ffi_compressed.len() as f64 / input_len;
+    let (rust_ratio, ffi_ratio) = if input_len > 0.0 {
+        (
+            rust_compressed.len() as f64 / input_len,
+            ffi_compressed.len() as f64 / input_len,
+        )
+    } else {
+        (0.0, 0.0)
+    };
     println!(
         "REPORT scenario={} label=\"{}\" level={} input_bytes={} rust_bytes={} ffi_bytes={} rust_ratio={:.6} ffi_ratio={:.6}",
         scenario.id,
diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index 79ffcb9c..f97aeca3 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -175,6 +175,13 @@ fn load_silesia_from_env() -> Vec<Scenario> {
             );
             continue;
         };
+        if bytes.is_empty() {
+            eprintln!(
+                "BENCH_WARN skipping empty Silesia fixture {}",
+                path.display()
+            );
+            continue;
+        }
         let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) else {
             continue;
         };

From 480a307901d2e7a79ef9cae67d8ea9edaf3a1911 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Fri, 27 Mar 2026 12:09:22 +0200
Subject: [PATCH 04/30] fix(bench): tighten flamegraph and decode benchmarks

---
 scripts/bench-flamegraph.sh | 6 ++++--
 zstd/benches/compare_ffi.rs | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/scripts/bench-flamegraph.sh b/scripts/bench-flamegraph.sh
index 66b41f95..47cd196e 100755
--- a/scripts/bench-flamegraph.sh
+++ b/scripts/bench-flamegraph.sh
@@ -15,14 +15,16 @@ if [[ "${BENCH_FLAMEGRAPH_USE_ROOT:-}" == "1" ]]; then
   EXTRA_FLAMEGRAPH_ARGS+=(--root)
 fi
 
-if ! cargo flamegraph \
+if cargo flamegraph \
   --bench compare_ffi \
   -p structured-zstd \
-  "${EXTRA_FLAMEGRAPH_ARGS[@]}" \
+  ${EXTRA_FLAMEGRAPH_ARGS[@]+"${EXTRA_FLAMEGRAPH_ARGS[@]}"} \
   --output "$OUTPUT_DIR/${BENCH_FILTER//\//_}.svg" \
   -- \
   --bench \
   "$BENCH_FILTER"; then
+  :
+else
   status=$?
   if [[ "${BENCH_FLAMEGRAPH_USE_ROOT:-}" != "1" ]]; then
     cat >&2 <<'EOF'
diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 8f4334a0..6df08633 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -60,9 +60,10 @@ fn bench_decompress(c: &mut Criterion) {
             group.throughput(Throughput::Bytes(scenario.throughput_bytes()));
 
             group.bench_function("pure_rust", |b| {
+                let mut target = vec![0u8; expected_len];
                 b.iter(|| {
                     let mut decoder = FrameDecoder::new();
-                    let mut target = vec![0u8; expected_len];
+                    target.fill(0);
                     let written = decoder.decode_all(&ffi_compressed, &mut target).unwrap();
                     assert_eq!(written, expected_len);
                 })

From 11f779bcc8919dc68eeb90e4a022e52addc36d23 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Fri, 27 Mar 2026 14:54:09 +0200
Subject: [PATCH 05/30] docs(bench): clarify decode benchmark asymmetry
 rationale

---
 zstd/benches/compare_ffi.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 6df08633..2724467b 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -71,6 +71,8 @@ fn bench_decompress(c: &mut Criterion) {
 
             group.bench_function("c_ffi", |b| {
                 b.iter(|| {
+                    // Intentional: zstd::decode_all represents the common high-level FFI path and
+                    // includes allocation cost, while pure_rust isolates decode throughput.
                     let output = zstd::decode_all(&ffi_compressed[..]).unwrap();
                     assert_eq!(output.len(), expected_len);
                 })

From 65cda3a1cc962d5df4a352207e8eaa9b24ad4ba3 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Fri, 27 Mar 2026 15:24:52 +0200
Subject: [PATCH 06/30] perf(bench): remove redundant decode buffer fill

---
 zstd/benches/compare_ffi.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 2724467b..a0581bd0 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -63,7 +63,6 @@ fn bench_decompress(c: &mut Criterion) {
                 let mut target = vec![0u8; expected_len];
                 b.iter(|| {
                     let mut decoder = FrameDecoder::new();
-                    target.fill(0);
                     let written = decoder.decode_all(&ffi_compressed, &mut target).unwrap();
                     assert_eq!(written, expected_len);
                 })

From aa774de9cd2e308592f414cfcc15436968215b4d Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Fri, 27 Mar 2026 17:09:15 +0200
Subject: [PATCH 07/30] fix(bench): scope large default to CI and enforce ratio
 rows

---
 .github/scripts/run-benchmarks.sh | 11 ++++++++++-
 BENCHMARKS.md                     |  6 +++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index aff1ccd9..98354488 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -10,7 +10,9 @@ set -eo pipefail
 
 echo "Running benchmark matrix..." >&2
 
-export STRUCTURED_ZSTD_BENCH_LARGE_BYTES="${STRUCTURED_ZSTD_BENCH_LARGE_BYTES:-16777216}"
+if [ -n "${GITHUB_ACTIONS:-}" ] && [ -z "${STRUCTURED_ZSTD_BENCH_LARGE_BYTES:-}" ]; then
+  export STRUCTURED_ZSTD_BENCH_LARGE_BYTES=16777216
+fi
 BENCH_RAW_FILE="$(mktemp -t structured-zstd-bench-raw.XXXXXX)"
 trap 'rm -f "$BENCH_RAW_FILE"' EXIT
 
@@ -69,6 +71,13 @@ if not benchmark_results:
     print("ERROR: No benchmark results parsed!", file=sys.stderr)
     sys.exit(1)
 
+if not ratios:
+    print(
+        "ERROR: No REPORT ratio lines parsed; benchmark-report.md would have an empty ratio section.",
+        file=sys.stderr,
+    )
+    sys.exit(1)
+
 with open("benchmark-results.json", "w") as f:
     json.dump(benchmark_results, f, indent=2)
 
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
index 621b3f72..1e0f3be0 100644
--- a/BENCHMARKS.md
+++ b/BENCHMARKS.md
@@ -15,9 +15,9 @@ The current matrix covers:
 - a large structured stream (`100 MiB`)
 - optional Silesia corpus files when `STRUCTURED_ZSTD_SILESIA_DIR=/path/to/silesia` is set
 
-The local default for the large scenario is `100 MiB`. CI can override it with
-`STRUCTURED_ZSTD_BENCH_LARGE_BYTES` to keep regression runs bounded while still exercising the
-same code path.
+The local default for the large scenario is `100 MiB`. In GitHub Actions, when
+`STRUCTURED_ZSTD_BENCH_LARGE_BYTES` is unset, `.github/scripts/run-benchmarks.sh` defaults it to
+`16 MiB` to keep CI regression runs bounded while still exercising the same code path.
 
 ## Level Mapping
 

From 54b3dd4bd3061555cc3ce14f25973d740faebacc Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Fri, 27 Mar 2026 17:48:28 +0200
Subject: [PATCH 08/30] feat(bench): add memory and dictionary benchmark
 reporting

---
 .github/scripts/run-benchmarks.sh |  83 +++++++++++++++++
 BENCHMARKS.md                     |  14 ++-
 README.md                         |   2 +-
 zstd/Cargo.toml                   |   3 +-
 zstd/benches/compare_ffi.rs       | 143 +++++++++++++++++++++++++++++-
 5 files changed, 237 insertions(+), 8 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index 98354488..e86a9fe7 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -30,10 +30,18 @@ BENCH_RE = re.compile(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter")
 REPORT_RE = re.compile(
     r'^REPORT scenario=(\S+) label="([^"]+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$'
 )
+MEM_RE = re.compile(
+    r'^REPORT_MEM scenario=(\S+) label="([^"]+)" level=(\S+) stage=(\S+) rust_peak_bytes=(\d+) ffi_peak_bytes=(\d+)$'
+)
+DICT_RE = re.compile(
+    r'^REPORT_DICT scenario=(\S+) label="([^"]+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$'
+)
 
 benchmark_results = []
 timings = []
 ratios = []
+memory_rows = []
+dictionary_rows = []
 raw_path = os.environ["BENCH_RAW_FILE"]
 
 with open(raw_path) as f:
@@ -66,6 +74,45 @@ with open(raw_path) as f:
                 "rust_ratio": float(rust_ratio),
                 "ffi_ratio": float(ffi_ratio),
             })
+            continue
+
+        mem_match = MEM_RE.match(line)
+        if mem_match:
+            scenario, label, level, stage, rust_peak_bytes, ffi_peak_bytes = mem_match.groups()
+            memory_rows.append({
+                "scenario": scenario,
+                "label": label,
+                "level": level,
+                "stage": stage,
+                "rust_peak_bytes": int(rust_peak_bytes),
+                "ffi_peak_bytes": int(ffi_peak_bytes),
+            })
+            continue
+
+        dict_match = DICT_RE.match(line)
+        if dict_match:
+            (
+                scenario,
+                label,
+                level,
+                dict_bytes,
+                train_ms,
+                ffi_no_dict_bytes,
+                ffi_with_dict_bytes,
+                ffi_no_dict_ratio,
+                ffi_with_dict_ratio,
+            ) = dict_match.groups()
+            dictionary_rows.append({
+                "scenario": scenario,
+                "label": label,
+                "level": level,
+                "dict_bytes": int(dict_bytes),
+                "train_ms": float(train_ms),
+                "ffi_no_dict_bytes": int(ffi_no_dict_bytes),
+                "ffi_with_dict_bytes": int(ffi_with_dict_bytes),
+                "ffi_no_dict_ratio": float(ffi_no_dict_ratio),
+                "ffi_with_dict_ratio": float(ffi_with_dict_ratio),
+            })
 
 if not benchmark_results:
     print("ERROR: No benchmark results parsed!", file=sys.stderr)
@@ -78,6 +125,14 @@ if not ratios:
     )
     sys.exit(1)
 
+if not memory_rows:
+    print("ERROR: No REPORT_MEM lines parsed; memory section would be empty.", file=sys.stderr)
+    sys.exit(1)
+
+if not dictionary_rows:
+    print("ERROR: No REPORT_DICT lines parsed; dictionary section would be empty.", file=sys.stderr)
+    sys.exit(1)
+
 with open("benchmark-results.json", "w") as f:
     json.dump(benchmark_results, f, indent=2)
 
@@ -97,6 +152,32 @@ for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])):
         f'| {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |'
     )
 
+lines.extend([
+    "",
+    "## Peak Memory Estimates",
+    "",
+    "| Scenario | Level | Stage | Rust peak bytes | C peak bytes |",
+    "| --- | --- | --- | ---: | ---: |",
+])
+
+for row in sorted(memory_rows, key=lambda item: (item["scenario"], item["level"], item["stage"])):
+    lines.append(
+        f'| {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_peak_bytes"]} | {row["ffi_peak_bytes"]} |'
+    )
+
+lines.extend([
+    "",
+    "## Dictionary Compression (C FFI)",
+    "",
+    "| Scenario | Level | Dict bytes | Train ms | C bytes (no dict) | C bytes (with dict) | C ratio (no dict) | C ratio (with dict) |",
+    "| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |",
+])
+
+for row in sorted(dictionary_rows, key=lambda item: (item["scenario"], item["level"])):
+    lines.append(
+        f'| {row["label"]} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |'
+    )
+
 lines.extend([
     "",
     "## Timing Metrics",
@@ -113,4 +194,6 @@ with open("benchmark-report.md", "w") as f:
 
 print(f"Wrote {len(benchmark_results)} timing results to benchmark-results.json", file=sys.stderr)
 print(f"Wrote {len(ratios)} ratio rows to benchmark-report.md", file=sys.stderr)
+print(f"Wrote {len(memory_rows)} memory rows to benchmark-report.md", file=sys.stderr)
+print(f"Wrote {len(dictionary_rows)} dictionary rows to benchmark-report.md", file=sys.stderr)
 PYEOF
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
index 1e0f3be0..6d30dd03 100644
--- a/BENCHMARKS.md
+++ b/BENCHMARKS.md
@@ -27,9 +27,11 @@ encoder:
 - `structured-zstd::Fastest` vs `zstd` level `1`
 - `structured-zstd::Default` vs `zstd` level `3`
 
-`Better` and `Best` are intentionally excluded until the encoder implements them. Dictionary
-compression is also excluded from the timing matrix because the crate currently exposes dictionary
-training, but not dictionary-based compression.
+`Better` and `Best` are intentionally excluded until the encoder implements them.
+
+Dictionary benchmarks are tracked separately with C FFI `with_dict` vs `without_dict` runs, using a
+dictionary trained from scenario samples. Pure Rust dictionary compression is still pending and is
+therefore not part of the pure-Rust-vs-C timing matrix yet.
 
 ## Commands
 
@@ -62,6 +64,10 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/matrix/
 `run-benchmarks.sh` writes:
 
 - `benchmark-results.json` for GitHub regression tracking
-- `benchmark-report.md` with scenario-by-scenario compression ratios and timing rows
+- `benchmark-report.md` with:
+  - compression ratio tables (`REPORT`)
+  - peak memory estimate tables (`REPORT_MEM`)
+  - dictionary compression tables (`REPORT_DICT`)
+  - timing rows for all benchmark functions
 
 Criterion also writes its usual detailed estimates under `target/criterion/`.
diff --git a/README.md b/README.md
index 0cea48ab..acc117b7 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ When the `dict_builder` feature is enabled, the `dictionary` module can create r
 
 ## Benchmarking
 
-Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora.
+Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports now include compression ratios, peak memory estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios.
 
 ## Usage
 
diff --git a/zstd/Cargo.toml b/zstd/Cargo.toml
index 51a5140c..f80c947c 100644
--- a/zstd/Cargo.toml
+++ b/zstd/Cargo.toml
@@ -31,7 +31,8 @@ alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-all
 [dev-dependencies]
 criterion = "0.5"
 rand = { version = "0.8.5", features = ["small_rng"] }
-zstd = "0.13.2"
+stats_alloc = "0.1"
+zstd = { version = "0.13.2", features = ["zdict_builder"] }
 
 [features]
 default = ["hash", "std"]
diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index a0581bd0..f422fa56 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -12,7 +12,7 @@
 mod support;
 
 use criterion::{Criterion, SamplingMode, Throughput, black_box, criterion_group, criterion_main};
-use std::time::Duration;
+use std::time::{Duration, Instant};
 use structured_zstd::decoding::FrameDecoder;
 use support::{LevelConfig, Scenario, ScenarioClass, benchmark_scenarios, supported_levels};
 
@@ -23,6 +23,13 @@ fn bench_compress(c: &mut Criterion) {
                 structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level);
             let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
             emit_report_line(&scenario, level, &rust_compressed, &ffi_compressed);
+            emit_memory_report(
+                &scenario,
+                level,
+                "compress",
+                scenario.len() + rust_compressed.len(),
+                scenario.len() + ffi_compressed.len(),
+            );
 
             let benchmark_name = format!("compress/{}/{}/{}", level.name, scenario.id, "matrix");
             let mut group = c.benchmark_group(benchmark_name);
@@ -54,6 +61,13 @@ fn bench_decompress(c: &mut Criterion) {
         for level in supported_levels() {
             let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
             let expected_len = scenario.len();
+            emit_memory_report(
+                &scenario,
+                level,
+                "decompress",
+                ffi_compressed.len() + expected_len,
+                ffi_compressed.len() + expected_len,
+            );
             let benchmark_name = format!("decompress/{}/{}/{}", level.name, scenario.id, "matrix");
             let mut group = c.benchmark_group(benchmark_name);
             configure_group(&mut group, &scenario);
@@ -82,6 +96,68 @@ fn bench_decompress(c: &mut Criterion) {
     }
 }
 
+fn bench_dictionary(c: &mut Criterion) {
+    for scenario in benchmark_scenarios() {
+        if !matches!(scenario.class, ScenarioClass::Small | ScenarioClass::Corpus) {
+            continue;
+        }
+
+        let training_samples = split_training_samples(&scenario.bytes);
+        let sample_refs: Vec<&[u8]> = training_samples.iter().map(Vec::as_slice).collect();
+        let total_training_bytes = sample_refs.iter().map(|sample| sample.len()).sum::<usize>();
+        let dict_size = dictionary_size_for(scenario.len())
+            .min(total_training_bytes.saturating_sub(64))
+            .max(256);
+        let train_started = Instant::now();
+        let Ok(dictionary) = zstd::dict::from_samples(&sample_refs, dict_size) else {
+            eprintln!(
+                "BENCH_WARN skipping dictionary benchmark for {} (samples={}, total_training_bytes={}, dict_size={})",
+                scenario.id,
+                sample_refs.len(),
+                total_training_bytes,
+                dict_size
+            );
+            continue;
+        };
+        let train_ms = train_started.elapsed().as_secs_f64() * 1_000.0;
+
+        for level in supported_levels() {
+            let mut no_dict = zstd::bulk::Compressor::new(level.ffi_level).unwrap();
+            let mut with_dict =
+                zstd::bulk::Compressor::with_dictionary(level.ffi_level, &dictionary).unwrap();
+            let no_dict_bytes = no_dict.compress(&scenario.bytes).unwrap();
+            let with_dict_bytes = with_dict.compress(&scenario.bytes).unwrap();
+            emit_dictionary_report(
+                &scenario,
+                level,
+                dictionary.len(),
+                train_ms,
+                &no_dict_bytes,
+                &with_dict_bytes,
+            );
+
+            let benchmark_name =
+                format!("compress-dict/{}/{}/{}", level.name, scenario.id, "matrix");
+            let mut group = c.benchmark_group(benchmark_name);
+            configure_group(&mut group, &scenario);
+            group.throughput(Throughput::Bytes(scenario.throughput_bytes()));
+
+            group.bench_function("c_ffi_without_dict", |b| {
+                let mut compressor = zstd::bulk::Compressor::new(level.ffi_level).unwrap();
+                b.iter(|| black_box(compressor.compress(&scenario.bytes).unwrap()))
+            });
+
+            group.bench_function("c_ffi_with_dict", |b| {
+                let mut compressor =
+                    zstd::bulk::Compressor::with_dictionary(level.ffi_level, &dictionary).unwrap();
+                b.iter(|| black_box(compressor.compress(&scenario.bytes).unwrap()))
+            });
+
+            group.finish();
+        }
+    }
+}
+
 fn configure_group<M: criterion::measurement::Measurement>(
     group: &mut criterion::BenchmarkGroup<'_, M>,
     scenario: &Scenario,
@@ -106,6 +182,19 @@ fn configure_group<M: criterion::measurement::Measurement>(
     }
 }
 
+fn emit_memory_report(
+    scenario: &Scenario,
+    level: LevelConfig,
+    stage: &'static str,
+    rust_peak_bytes: usize,
+    ffi_peak_bytes: usize,
+) {
+    println!(
+        "REPORT_MEM scenario={} label=\"{}\" level={} stage={} rust_peak_bytes={} ffi_peak_bytes={}",
+        scenario.id, scenario.label, level.name, stage, rust_peak_bytes, ffi_peak_bytes
+    );
+}
+
 fn emit_report_line(
     scenario: &Scenario,
     level: LevelConfig,
@@ -134,5 +223,55 @@ fn emit_report_line(
     );
 }
 
-criterion_group!(benches, bench_compress, bench_decompress);
+fn emit_dictionary_report(
+    scenario: &Scenario,
+    level: LevelConfig,
+    dict_bytes: usize,
+    train_ms: f64,
+    no_dict_bytes: &[u8],
+    with_dict_bytes: &[u8],
+) {
+    let input_len = scenario.len() as f64;
+    let no_dict_ratio = no_dict_bytes.len() as f64 / input_len;
+    let with_dict_ratio = with_dict_bytes.len() as f64 / input_len;
+    println!(
+        "REPORT_DICT scenario={} label=\"{}\" level={} dict_bytes={} train_ms={:.3} ffi_no_dict_bytes={} ffi_with_dict_bytes={} ffi_no_dict_ratio={:.6} ffi_with_dict_ratio={:.6}",
+        scenario.id,
+        scenario.label,
+        level.name,
+        dict_bytes,
+        train_ms,
+        no_dict_bytes.len(),
+        with_dict_bytes.len(),
+        no_dict_ratio,
+        with_dict_ratio
+    );
+}
+
+fn split_training_samples(source: &[u8]) -> Vec<Vec<u8>> {
+    let sample_size = source.len().div_ceil(16).clamp(256, 8192);
+    let mut samples: Vec<Vec<u8>> = source
+        .chunks(sample_size)
+        .take(64)
+        .filter(|chunk| chunk.len() >= 64)
+        .map(|chunk| chunk.to_vec())
+        .collect();
+    if samples.len() < 2 {
+        let midpoint = source.len() / 2;
+        let left = &source[..midpoint];
+        let right = &source[midpoint..];
+        if left.len() >= 64 && right.len() >= 64 {
+            samples = vec![left.to_vec(), right.to_vec()];
+        } else {
+            samples = vec![source.to_vec(), source.to_vec()];
+        }
+    }
+    samples
+}
+
+fn dictionary_size_for(input_len: usize) -> usize {
+    input_len.div_ceil(8).clamp(256, 16 * 1024)
+}
+
+criterion_group!(benches, bench_compress, bench_decompress, bench_dictionary);
 criterion_main!(benches);

From 7e032ba15fa167542c392ec068c23162efa93ad6 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Fri, 27 Mar 2026 21:20:26 +0200
Subject: [PATCH 09/30] test(bench): align decompression benchmark paths

- include scenario id in ratio report markdown table

- reuse decoders and buffers in decompression benchmark loops

- keep throughput comparison focused on decode work

Refs #24
---
 .github/scripts/run-benchmarks.sh |  6 +++---
 zstd/benches/compare_ffi.rs       | 12 ++++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index e86a9fe7..bc24452f 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -143,13 +143,13 @@ lines = [
     "",
     "## Compression Ratios",
     "",
-    "| Scenario | Level | Input bytes | Rust bytes | C bytes | Rust ratio | C ratio |",
-    "| --- | --- | ---: | ---: | ---: | ---: | ---: |",
+    "| Scenario | Label | Level | Input bytes | Rust bytes | C bytes | Rust ratio | C ratio |",
+    "| --- | --- | --- | ---: | ---: | ---: | ---: | ---: |",
 ]
 
 for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])):
     lines.append(
-        f'| {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |'
+        f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |'
     )
 
 lines.extend([
diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index f422fa56..539b44d9 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -75,18 +75,22 @@ fn bench_decompress(c: &mut Criterion) {
 
             group.bench_function("pure_rust", |b| {
                 let mut target = vec![0u8; expected_len];
+                let mut decoder = FrameDecoder::new();
                 b.iter(|| {
-                    let mut decoder = FrameDecoder::new();
                     let written = decoder.decode_all(&ffi_compressed, &mut target).unwrap();
                     assert_eq!(written, expected_len);
                 })
             });
 
             group.bench_function("c_ffi", |b| {
+                let mut decoder = zstd::bulk::Decompressor::new().unwrap();
+                let mut output = Vec::with_capacity(expected_len);
                 b.iter(|| {
-                    // Intentional: zstd::decode_all represents the common high-level FFI path and
-                    // includes allocation cost, while pure_rust isolates decode throughput.
-                    let output = zstd::decode_all(&ffi_compressed[..]).unwrap();
+                    output.clear();
+                    let written = decoder
+                        .decompress_to_buffer(&ffi_compressed[..], &mut output)
+                        .unwrap();
+                    assert_eq!(written, expected_len);
                     assert_eq!(output.len(), expected_len);
                 })
             });

From bc3bc2fc1b8b02069cff4cc345757e166e00d958 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 00:31:38 +0200
Subject: [PATCH 10/30] test(bench): include scenario ids in report tables

- add explicit Label columns for memory and dictionary sections

- render stable scenario ids instead of labels in Scenario column

Refs #24
---
 .github/scripts/run-benchmarks.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index bc24452f..2dcd3422 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -156,26 +156,26 @@ lines.extend([
     "",
     "## Peak Memory Estimates",
     "",
-    "| Scenario | Level | Stage | Rust peak bytes | C peak bytes |",
-    "| --- | --- | --- | ---: | ---: |",
+    "| Scenario | Label | Level | Stage | Rust peak bytes | C peak bytes |",
+    "| --- | --- | --- | --- | ---: | ---: |",
 ])
 
 for row in sorted(memory_rows, key=lambda item: (item["scenario"], item["level"], item["stage"])):
     lines.append(
-        f'| {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_peak_bytes"]} | {row["ffi_peak_bytes"]} |'
+        f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_peak_bytes"]} | {row["ffi_peak_bytes"]} |'
     )
 
 lines.extend([
     "",
     "## Dictionary Compression (C FFI)",
     "",
-    "| Scenario | Level | Dict bytes | Train ms | C bytes (no dict) | C bytes (with dict) | C ratio (no dict) | C ratio (with dict) |",
-    "| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |",
+    "| Scenario | Label | Level | Dict bytes | Train ms | C bytes (no dict) | C bytes (with dict) | C ratio (no dict) | C ratio (with dict) |",
+    "| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |",
 ])
 
 for row in sorted(dictionary_rows, key=lambda item: (item["scenario"], item["level"])):
     lines.append(
-        f'| {row["label"]} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |'
+        f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |'
     )
 
 lines.extend([

From 007d5231f2814711272d95b4c2fc075a9f2981ed Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 00:32:10 +0200
Subject: [PATCH 11/30] fix(bench): guard dictionary ratio division

- handle zero-length scenario inputs in emit_dictionary_report

- emit 0.0 ratios instead of inf/NaN for empty payload edge cases

Refs #24
---
 zstd/benches/compare_ffi.rs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 539b44d9..d3ea7ebf 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -236,8 +236,14 @@ fn emit_dictionary_report(
     with_dict_bytes: &[u8],
 ) {
     let input_len = scenario.len() as f64;
-    let no_dict_ratio = no_dict_bytes.len() as f64 / input_len;
-    let with_dict_ratio = with_dict_bytes.len() as f64 / input_len;
+    let (no_dict_ratio, with_dict_ratio) = if input_len > 0.0 {
+        (
+            no_dict_bytes.len() as f64 / input_len,
+            with_dict_bytes.len() as f64 / input_len,
+        )
+    } else {
+        (0.0, 0.0)
+    };
     println!(
         "REPORT_DICT scenario={} label=\"{}\" level={} dict_bytes={} train_ms={:.3} ffi_no_dict_bytes={} ffi_with_dict_bytes={} ffi_no_dict_ratio={:.6} ffi_with_dict_ratio={:.6}",
         scenario.id,

From c4e58d1b186265bd6b6824e56e2e4ec35c54e379 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 00:32:42 +0200
Subject: [PATCH 12/30] fix(bench): bound Silesia fixture loading

- cap loaded fixture count and file size for predictable startup

- support STRUCTURED_ZSTD_SILESIA_MAX_FILES and _MAX_FILE_BYTES overrides

- emit BENCH_WARN diagnostics when limits are applied

Refs #24
---
 zstd/benches/support/mod.rs | 48 ++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index f97aeca3..64653e7a 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -153,21 +153,63 @@ fn repeated_log_lines(len: usize) -> Vec<u8> {
 }
 
 fn load_silesia_from_env() -> Vec<Scenario> {
+    const DEFAULT_MAX_FILES: usize = 12;
+    const DEFAULT_MAX_FILE_BYTES: usize = 64 * 1024 * 1024;
     let Some(dir) = env::var_os("STRUCTURED_ZSTD_SILESIA_DIR") else {
         return Vec::new();
     };
+    let max_files = env::var("STRUCTURED_ZSTD_SILESIA_MAX_FILES")
+        .ok()
+        .and_then(|value| value.parse::<usize>().ok())
+        .filter(|value| *value > 0)
+        .unwrap_or(DEFAULT_MAX_FILES);
+    let max_file_bytes = env::var("STRUCTURED_ZSTD_SILESIA_MAX_FILE_BYTES")
+        .ok()
+        .and_then(|value| value.parse::<usize>().ok())
+        .filter(|value| *value > 0)
+        .unwrap_or(DEFAULT_MAX_FILE_BYTES);
 
     let Ok(entries) = fs::read_dir(Path::new(&dir)) else {
         eprintln!("BENCH_WARN failed to read STRUCTURED_ZSTD_SILESIA_DIR={dir:?}");
         return Vec::new();
     };
 
+    let mut paths: Vec<_> = entries
+        .flatten()
+        .map(|entry| entry.path())
+        .filter(|path| path.is_file())
+        .collect();
+    paths.sort();
+    if paths.len() > max_files {
+        eprintln!(
+            "BENCH_WARN limiting Silesia fixtures to first {} files from {} entries in {}",
+            max_files,
+            paths.len(),
+            Path::new(&dir).display()
+        );
+        paths.truncate(max_files);
+    }
+
     let mut scenarios = Vec::new();
-    for entry in entries.flatten() {
-        let path = entry.path();
-        if !path.is_file() {
+    for path in paths {
+        let Ok(metadata) = fs::metadata(&path) else {
+            eprintln!(
+                "BENCH_WARN failed to stat Silesia fixture {}",
+                path.display()
+            );
+            continue;
+        };
+        let file_len = metadata.len() as usize;
+        if file_len > max_file_bytes {
+            eprintln!(
+                "BENCH_WARN skipping Silesia fixture {} ({} bytes > max {} bytes)",
+                path.display(),
+                file_len,
+                max_file_bytes
+            );
             continue;
         }
+
         let Ok(bytes) = fs::read(&path) else {
             eprintln!(
                 "BENCH_WARN failed to read Silesia fixture {}",

From e5cdee1c65e1b64109a782f747929c4f569fd4f1 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 00:33:41 +0200
Subject: [PATCH 13/30] docs(bench): clarify memory estimates in reports

- rename REPORT_MEM fields to buffer-bytes estimate names

- update report section/columns to explicit input+output estimates

- sync README and BENCHMARKS wording with new semantics

Refs #24
---
 .github/scripts/run-benchmarks.sh | 21 ++++++++++++++-------
 BENCHMARKS.md                     |  4 +++-
 README.md                         |  2 +-
 zstd/benches/compare_ffi.rs       | 13 +++++++++----
 4 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index 2dcd3422..27a8fa4f 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -31,7 +31,7 @@ REPORT_RE = re.compile(
     r'^REPORT scenario=(\S+) label="([^"]+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$'
 )
 MEM_RE = re.compile(
-    r'^REPORT_MEM scenario=(\S+) label="([^"]+)" level=(\S+) stage=(\S+) rust_peak_bytes=(\d+) ffi_peak_bytes=(\d+)$'
+    r'^REPORT_MEM scenario=(\S+) label="([^"]+)" level=(\S+) stage=(\S+) rust_buffer_bytes_estimate=(\d+) ffi_buffer_bytes_estimate=(\d+)$'
 )
 DICT_RE = re.compile(
     r'^REPORT_DICT scenario=(\S+) label="([^"]+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$'
@@ -78,14 +78,21 @@ with open(raw_path) as f:
 
         mem_match = MEM_RE.match(line)
         if mem_match:
-            scenario, label, level, stage, rust_peak_bytes, ffi_peak_bytes = mem_match.groups()
+            (
+                scenario,
+                label,
+                level,
+                stage,
+                rust_buffer_bytes_estimate,
+                ffi_buffer_bytes_estimate,
+            ) = mem_match.groups()
             memory_rows.append({
                 "scenario": scenario,
                 "label": label,
                 "level": level,
                 "stage": stage,
-                "rust_peak_bytes": int(rust_peak_bytes),
-                "ffi_peak_bytes": int(ffi_peak_bytes),
+                "rust_buffer_bytes_estimate": int(rust_buffer_bytes_estimate),
+                "ffi_buffer_bytes_estimate": int(ffi_buffer_bytes_estimate),
             })
             continue
 
@@ -154,15 +161,15 @@ for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])):
 
 lines.extend([
     "",
-    "## Peak Memory Estimates",
+    "## Buffer Size Estimates (Input + Output)",
     "",
-    "| Scenario | Label | Level | Stage | Rust peak bytes | C peak bytes |",
+    "| Scenario | Label | Level | Stage | Rust buffer bytes (estimate) | C buffer bytes (estimate) |",
     "| --- | --- | --- | --- | ---: | ---: |",
 ])
 
 for row in sorted(memory_rows, key=lambda item: (item["scenario"], item["level"], item["stage"])):
     lines.append(
-        f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_peak_bytes"]} | {row["ffi_peak_bytes"]} |'
+        f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_buffer_bytes_estimate"]} | {row["ffi_buffer_bytes_estimate"]} |'
     )
 
 lines.extend([
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
index 6d30dd03..c603cc1c 100644
--- a/BENCHMARKS.md
+++ b/BENCHMARKS.md
@@ -14,6 +14,8 @@ The current matrix covers:
 - low entropy repeated payloads (`1 MiB`)
 - a large structured stream (`100 MiB`)
 - optional Silesia corpus files when `STRUCTURED_ZSTD_SILESIA_DIR=/path/to/silesia` is set
+  - load is bounded by `STRUCTURED_ZSTD_SILESIA_MAX_FILES` (default `12`) and
+    `STRUCTURED_ZSTD_SILESIA_MAX_FILE_BYTES` (default `67108864`)
 
 The local default for the large scenario is `100 MiB`. In GitHub Actions, when
 `STRUCTURED_ZSTD_BENCH_LARGE_BYTES` is unset, `.github/scripts/run-benchmarks.sh` defaults it to
@@ -66,7 +68,7 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/matrix/
 - `benchmark-results.json` for GitHub regression tracking
 - `benchmark-report.md` with:
   - compression ratio tables (`REPORT`)
-  - peak memory estimate tables (`REPORT_MEM`)
+  - input+output buffer size estimate tables (`REPORT_MEM`)
   - dictionary compression tables (`REPORT_DICT`)
   - timing rows for all benchmark functions
 
diff --git a/README.md b/README.md
index acc117b7..63beea4c 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ When the `dict_builder` feature is enabled, the `dictionary` module can create r
 
 ## Benchmarking
 
-Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports now include compression ratios, peak memory estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios.
+Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports now include compression ratios, input+output buffer size estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios.
 
 ## Usage
 
diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index d3ea7ebf..1e5e90d0 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -190,12 +190,17 @@ fn emit_memory_report(
     scenario: &Scenario,
     level: LevelConfig,
     stage: &'static str,
-    rust_peak_bytes: usize,
-    ffi_peak_bytes: usize,
+    rust_buffer_bytes_estimate: usize,
+    ffi_buffer_bytes_estimate: usize,
 ) {
     println!(
-        "REPORT_MEM scenario={} label=\"{}\" level={} stage={} rust_peak_bytes={} ffi_peak_bytes={}",
-        scenario.id, scenario.label, level.name, stage, rust_peak_bytes, ffi_peak_bytes
+        "REPORT_MEM scenario={} label=\"{}\" level={} stage={} rust_buffer_bytes_estimate={} ffi_buffer_bytes_estimate={}",
+        scenario.id,
+        scenario.label,
+        level.name,
+        stage,
+        rust_buffer_bytes_estimate,
+        ffi_buffer_bytes_estimate
     );
 }
 

From 4fd6c1160542ee5768b43beb1d216ac137ff11c3 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 09:59:39 +0200
Subject: [PATCH 14/30] perf(bench): cache benchmark scenario generation

- build scenario inputs once via OnceLock

- reuse cached slice across compress/decompress/dictionary benches

Refs #24
---
 zstd/benches/compare_ffi.rs | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 1e5e90d0..f8384ace 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -12,12 +12,19 @@
 mod support;
 
 use criterion::{Criterion, SamplingMode, Throughput, black_box, criterion_group, criterion_main};
+use std::sync::OnceLock;
 use std::time::{Duration, Instant};
 use structured_zstd::decoding::FrameDecoder;
 use support::{LevelConfig, Scenario, ScenarioClass, benchmark_scenarios, supported_levels};
 
+static BENCHMARK_SCENARIOS: OnceLock<Vec<Scenario>> = OnceLock::new();
+
+fn benchmark_scenarios_cached() -> &'static [Scenario] {
+    BENCHMARK_SCENARIOS.get_or_init(benchmark_scenarios)
+}
+
 fn bench_compress(c: &mut Criterion) {
-    for scenario in benchmark_scenarios() {
+    for scenario in benchmark_scenarios_cached().iter() {
         for level in supported_levels() {
             let rust_compressed =
                 structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level);
@@ -57,7 +64,7 @@ fn bench_compress(c: &mut Criterion) {
 }
 
 fn bench_decompress(c: &mut Criterion) {
-    for scenario in benchmark_scenarios() {
+    for scenario in benchmark_scenarios_cached().iter() {
         for level in supported_levels() {
             let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
             let expected_len = scenario.len();
@@ -101,7 +108,7 @@ fn bench_decompress(c: &mut Criterion) {
 }
 
 fn bench_dictionary(c: &mut Criterion) {
-    for scenario in benchmark_scenarios() {
+    for scenario in benchmark_scenarios_cached().iter() {
         if !matches!(scenario.class, ScenarioClass::Small | ScenarioClass::Corpus) {
             continue;
         }

From 0b2813bdd978dc4f30981ec6dd5dc7b9d1593906 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 10:00:14 +0200
Subject: [PATCH 15/30] chore(bench): drop unused stats_alloc dep

- remove unused dev-dependency from zstd/Cargo.toml

Refs #24
---
 zstd/Cargo.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/zstd/Cargo.toml b/zstd/Cargo.toml
index f80c947c..4d80c255 100644
--- a/zstd/Cargo.toml
+++ b/zstd/Cargo.toml
@@ -31,7 +31,6 @@ alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-all
 [dev-dependencies]
 criterion = "0.5"
 rand = { version = "0.8.5", features = ["small_rng"] }
-stats_alloc = "0.1"
 zstd = { version = "0.13.2", features = ["zdict_builder"] }
 
 [features]

From 86f27c8bb0c8fe20fda21a95007d8fffc2434646 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 10:00:52 +0200
Subject: [PATCH 16/30] fix(bench): allow filtered runs without dict rows

- downgrade missing REPORT_DICT from error to warning

Refs #24
---
 .github/scripts/run-benchmarks.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index 27a8fa4f..c786e438 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -137,8 +137,7 @@ if not memory_rows:
     sys.exit(1)
 
 if not dictionary_rows:
-    print("ERROR: No REPORT_DICT lines parsed; dictionary section would be empty.", file=sys.stderr)
-    sys.exit(1)
+    print("WARN: No REPORT_DICT lines parsed; dictionary section will be empty.", file=sys.stderr)
 
 with open("benchmark-results.json", "w") as f:
     json.dump(benchmark_results, f, indent=2)

From 42cfc4648ba63bc76a4b1f708dd91e7e1999c723 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 10:01:39 +0200
Subject: [PATCH 17/30] fix(bench): avoid duplicate dict fallback samples

- use single-sample fallback for tiny dictionary training inputs

- emit BENCH_WARN when fallback path is used

Refs #24
---
 zstd/benches/compare_ffi.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index f8384ace..3a283626 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -285,7 +285,11 @@ fn split_training_samples(source: &[u8]) -> Vec<Vec<u8>> {
         if left.len() >= 64 && right.len() >= 64 {
             samples = vec![left.to_vec(), right.to_vec()];
         } else {
-            samples = vec![source.to_vec(), source.to_vec()];
+            eprintln!(
+                "BENCH_WARN tiny dictionary training input ({} bytes), using a single sample fallback",
+                source.len()
+            );
+            samples = vec![source.to_vec()];
         }
     }
     samples

From bf8bba5b9d54fd8dbba18993df0a677aafa1460c Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 10:02:25 +0200
Subject: [PATCH 18/30] style(bench): add is_empty for Scenario

- satisfy len_without_is_empty expectations for bench helper type

Refs #24
---
 zstd/benches/support/mod.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index 64653e7a..7260a7b5 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -109,6 +109,11 @@ impl Scenario {
         self.bytes.len()
     }
 
+    #[allow(dead_code)]
+    pub(crate) fn is_empty(&self) -> bool {
+        self.bytes.is_empty()
+    }
+
     pub(crate) fn throughput_bytes(&self) -> u64 {
         self.bytes.len() as u64
     }

From a3a54a4e018b60f26f6a6c7b61346f07066890be Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 10:03:44 +0200
Subject: [PATCH 19/30] fix(bench): remove needless borrows in scenario loops

- pass cached scenario references directly in helper calls

- keep clippy clean with OnceLock-backed scenario cache

Refs #24
---
 zstd/benches/compare_ffi.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 3a283626..24f40d94 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -29,9 +29,9 @@ fn bench_compress(c: &mut Criterion) {
             let rust_compressed =
                 structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level);
             let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
-            emit_report_line(&scenario, level, &rust_compressed, &ffi_compressed);
+            emit_report_line(scenario, level, &rust_compressed, &ffi_compressed);
             emit_memory_report(
-                &scenario,
+                scenario,
                 level,
                 "compress",
                 scenario.len() + rust_compressed.len(),
@@ -40,7 +40,7 @@ fn bench_compress(c: &mut Criterion) {
 
             let benchmark_name = format!("compress/{}/{}/{}", level.name, scenario.id, "matrix");
             let mut group = c.benchmark_group(benchmark_name);
-            configure_group(&mut group, &scenario);
+            configure_group(&mut group, scenario);
             group.throughput(Throughput::Bytes(scenario.throughput_bytes()));
 
             group.bench_function("pure_rust", |b| {
@@ -69,7 +69,7 @@ fn bench_decompress(c: &mut Criterion) {
             let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
             let expected_len = scenario.len();
             emit_memory_report(
-                &scenario,
+                scenario,
                 level,
                 "decompress",
                 ffi_compressed.len() + expected_len,
@@ -77,7 +77,7 @@ fn bench_decompress(c: &mut Criterion) {
             );
             let benchmark_name = format!("decompress/{}/{}/{}", level.name, scenario.id, "matrix");
             let mut group = c.benchmark_group(benchmark_name);
-            configure_group(&mut group, &scenario);
+            configure_group(&mut group, scenario);
             group.throughput(Throughput::Bytes(scenario.throughput_bytes()));
 
             group.bench_function("pure_rust", |b| {
@@ -139,7 +139,7 @@ fn bench_dictionary(c: &mut Criterion) {
             let no_dict_bytes = no_dict.compress(&scenario.bytes).unwrap();
             let with_dict_bytes = with_dict.compress(&scenario.bytes).unwrap();
             emit_dictionary_report(
-                &scenario,
+                scenario,
                 level,
                 dictionary.len(),
                 train_ms,
@@ -150,7 +150,7 @@ fn bench_dictionary(c: &mut Criterion) {
             let benchmark_name =
                 format!("compress-dict/{}/{}/{}", level.name, scenario.id, "matrix");
             let mut group = c.benchmark_group(benchmark_name);
-            configure_group(&mut group, &scenario);
+            configure_group(&mut group, scenario);
             group.throughput(Throughput::Bytes(scenario.throughput_bytes()));
 
             group.bench_function("c_ffi_without_dict", |b| {

From c99d33bb692f0d82607b175c24f01c291686a149 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 10:38:05 +0200
Subject: [PATCH 20/30] fix(bench): sanitize Silesia scenario report fields

- normalize Silesia-derived scenario ids to safe ASCII tokens

- escape report labels before emitting REPORT/REPORT_MEM/REPORT_DICT lines

Refs #24
---
 zstd/benches/compare_ffi.rs | 13 ++++++++++---
 zstd/benches/support/mod.rs | 19 ++++++++++++++++++-
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 24f40d94..91f9c598 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -200,10 +200,11 @@ fn emit_memory_report(
     rust_buffer_bytes_estimate: usize,
     ffi_buffer_bytes_estimate: usize,
 ) {
+    let escaped_label = escape_report_label(&scenario.label);
     println!(
         "REPORT_MEM scenario={} label=\"{}\" level={} stage={} rust_buffer_bytes_estimate={} ffi_buffer_bytes_estimate={}",
         scenario.id,
-        scenario.label,
+        escaped_label,
         level.name,
         stage,
         rust_buffer_bytes_estimate,
@@ -218,6 +219,7 @@ fn emit_report_line(
     ffi_compressed: &[u8],
 ) {
     let input_len = scenario.len() as f64;
+    let escaped_label = escape_report_label(&scenario.label);
     let (rust_ratio, ffi_ratio) = if input_len > 0.0 {
         (
             rust_compressed.len() as f64 / input_len,
@@ -229,7 +231,7 @@ fn emit_report_line(
     println!(
         "REPORT scenario={} label=\"{}\" level={} input_bytes={} rust_bytes={} ffi_bytes={} rust_ratio={:.6} ffi_ratio={:.6}",
         scenario.id,
-        scenario.label,
+        escaped_label,
         level.name,
         scenario.len(),
         rust_compressed.len(),
@@ -248,6 +250,7 @@ fn emit_dictionary_report(
     with_dict_bytes: &[u8],
 ) {
     let input_len = scenario.len() as f64;
+    let escaped_label = escape_report_label(&scenario.label);
     let (no_dict_ratio, with_dict_ratio) = if input_len > 0.0 {
         (
             no_dict_bytes.len() as f64 / input_len,
@@ -259,7 +262,7 @@ fn emit_dictionary_report(
     println!(
         "REPORT_DICT scenario={} label=\"{}\" level={} dict_bytes={} train_ms={:.3} ffi_no_dict_bytes={} ffi_with_dict_bytes={} ffi_no_dict_ratio={:.6} ffi_with_dict_ratio={:.6}",
         scenario.id,
-        scenario.label,
+        escaped_label,
         level.name,
         dict_bytes,
         train_ms,
@@ -299,5 +302,9 @@ fn dictionary_size_for(input_len: usize) -> usize {
     input_len.div_ceil(8).clamp(256, 16 * 1024)
 }
 
+fn escape_report_label(label: &str) -> String {
+    label.replace('\\', "\\\\").replace('\"', "\\\"")
+}
+
 criterion_group!(benches, bench_compress, bench_decompress, bench_dictionary);
 criterion_main!(benches);
diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index 7260a7b5..f0fc695d 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -232,8 +232,9 @@ fn load_silesia_from_env() -> Vec<Scenario> {
         let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) else {
             continue;
         };
+        let scenario_stem = sanitize_scenario_stem(stem);
         scenarios.push(Scenario::new(
-            format!("silesia-{stem}"),
+            format!("silesia-{scenario_stem}"),
             format!("Silesia corpus: {stem}"),
             bytes,
             ScenarioClass::Silesia,
@@ -251,3 +252,19 @@ fn large_stream_len() -> usize {
         .filter(|value| *value > 0)
         .unwrap_or(100 * 1024 * 1024)
 }
+
+fn sanitize_scenario_stem(stem: &str) -> String {
+    let mut sanitized = String::with_capacity(stem.len());
+    for ch in stem.chars() {
+        if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-') {
+            sanitized.push(ch);
+        } else {
+            sanitized.push('_');
+        }
+    }
+    if sanitized.is_empty() {
+        "unnamed".to_string()
+    } else {
+        sanitized
+    }
+}

From c9639b5d4b758d258a3784749b377e7b1ea5e08a Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 10:38:45 +0200
Subject: [PATCH 21/30] perf(bench): bound Silesia dir walk by max_files

- stop collecting file paths once fixture limit is reached

- keep deterministic ordering by sorting only the bounded subset

Refs #24
---
 zstd/benches/support/mod.rs | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index f0fc695d..872a53b6 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -179,20 +179,26 @@ fn load_silesia_from_env() -> Vec<Scenario> {
         return Vec::new();
     };
 
-    let mut paths: Vec<_> = entries
-        .flatten()
-        .map(|entry| entry.path())
-        .filter(|path| path.is_file())
-        .collect();
+    let mut paths = Vec::with_capacity(max_files);
+    let mut hit_limit = false;
+    for entry in entries.flatten() {
+        let path = entry.path();
+        if !path.is_file() {
+            continue;
+        }
+        if paths.len() >= max_files {
+            hit_limit = true;
+            break;
+        }
+        paths.push(path);
+    }
     paths.sort();
-    if paths.len() > max_files {
+    if hit_limit {
         eprintln!(
-            "BENCH_WARN limiting Silesia fixtures to first {} files from {} entries in {}",
+            "BENCH_WARN limiting Silesia fixtures to first {} discovered files in {}",
             max_files,
-            paths.len(),
             Path::new(&dir).display()
         );
-        paths.truncate(max_files);
     }
 
     let mut scenarios = Vec::new();

From d63a2b8b9b822472adaa80973610a6307de98846 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 10:39:29 +0200
Subject: [PATCH 22/30] build(bench): ship decode corpus fixture in crate

- keep include_bytes corpus scenario available in packaged bench sources

- remove decodecorpus_files from zstd crate exclude list

Refs #24
---
 zstd/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zstd/Cargo.toml b/zstd/Cargo.toml
index 4d80c255..82a63edb 100644
--- a/zstd/Cargo.toml
+++ b/zstd/Cargo.toml
@@ -11,7 +11,7 @@ license = "Apache-2.0"
 homepage = "https://github.com/structured-world/structured-zstd"
 repository = "https://github.com/structured-world/structured-zstd"
 description = "Pure Rust zstd implementation — managed fork of ruzstd. Dictionary decompression, no FFI."
-exclude = ["decodecorpus_files/*", "dict_tests/*", "fuzz_decodecorpus/*"]
+exclude = ["dict_tests/*", "fuzz_decodecorpus/*"]
 # Package metadata points at a crate-local symlink so the packaged crate and repo root README stay in sync.
 readme = "README.md"
 keywords = ["zstd", "zstandard", "decompression", "compression", "pure-rust"]

From 60c4ec41bdd168649006e4d51df887faa1f4b003 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 11:08:26 +0200
Subject: [PATCH 23/30] fix(bench): avoid packaging decode corpus fixtures

- restore decodecorpus_files exclusion for crate packaging size

- load corpus sample at runtime with synthetic fallback when fixture is absent

Refs #24
---
 zstd/Cargo.toml             |  2 +-
 zstd/benches/support/mod.rs | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/zstd/Cargo.toml b/zstd/Cargo.toml
index 82a63edb..0d435fc7 100644
--- a/zstd/Cargo.toml
+++ b/zstd/Cargo.toml
@@ -11,7 +11,7 @@ license = "Apache-2.0"
 homepage = "https://github.com/structured-world/structured-zstd"
 repository = "https://github.com/structured-world/structured-zstd"
 description = "Pure Rust zstd implementation — managed fork of ruzstd. Dictionary decompression, no FFI."
-exclude = ["dict_tests/*", "fuzz_decodecorpus/*"]
+exclude = ["dict_tests/*", "fuzz_decodecorpus/*", "decodecorpus_files/*"]
 # Package metadata points at a crate-local symlink so the packaged crate and repo root README stay in sync.
 readme = "README.md"
 keywords = ["zstd", "zstandard", "decompression", "compression", "pure-rust"]
diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index 872a53b6..ae51498f 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -48,7 +48,7 @@ pub(crate) fn benchmark_scenarios() -> Vec<Scenario> {
         Scenario::new(
             "decodecorpus-z000033",
             "Repo decode corpus sample",
-            include_bytes!("../../decodecorpus_files/z000033").to_vec(),
+            load_decode_corpus_sample(),
             ScenarioClass::Corpus,
         ),
         Scenario::new(
@@ -259,6 +259,38 @@ fn large_stream_len() -> usize {
         .unwrap_or(100 * 1024 * 1024)
 }
 
+fn load_decode_corpus_sample() -> Vec<u8> {
+    let manifest_dir = env::var("CARGO_MANIFEST_DIR").ok();
+    let fixture_path = manifest_dir
+        .as_deref()
+        .map(Path::new)
+        .map(|dir| dir.join("decodecorpus_files/z000033"));
+
+    if let Some(path) = fixture_path {
+        match fs::read(&path) {
+            Ok(bytes) if !bytes.is_empty() => return bytes,
+            Ok(_) => {
+                eprintln!(
+                    "BENCH_WARN decode corpus fixture is empty at {}, using synthetic fallback",
+                    path.display()
+                );
+            }
+            Err(err) => {
+                eprintln!(
+                    "BENCH_WARN failed to read decode corpus fixture at {}: {}. Using synthetic fallback",
+                    path.display(),
+                    err
+                );
+            }
+        }
+    } else {
+        eprintln!("BENCH_WARN CARGO_MANIFEST_DIR is not set, using synthetic decode corpus fallback");
+    }
+
+    // Keep the benchmark matrix runnable from packaged sources where fixture files may be omitted.
+    repeated_log_lines(1024 * 1024)
+}
+
 fn sanitize_scenario_stem(stem: &str) -> String {
     let mut sanitized = String::with_capacity(stem.len());
     for ch in stem.chars() {

From 26ad87b7f4a12066b876f62377610e0d91ba6dee Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 11:09:06 +0200
Subject: [PATCH 24/30] fix(bench): parse escaped labels in report script

- accept backslash-escaped quotes in REPORT label regexes

- unescape parsed labels before markdown rendering

Refs #24
---
 .github/scripts/run-benchmarks.sh | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index c786e438..ea8097bb 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -28,15 +28,28 @@ import sys
 
 BENCH_RE = re.compile(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter")
 REPORT_RE = re.compile(
-    r'^REPORT scenario=(\S+) label="([^"]+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$'
+    r'^REPORT scenario=(\S+) label="((?:[^"\\]|\\.)+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$'
 )
 MEM_RE = re.compile(
-    r'^REPORT_MEM scenario=(\S+) label="([^"]+)" level=(\S+) stage=(\S+) rust_buffer_bytes_estimate=(\d+) ffi_buffer_bytes_estimate=(\d+)$'
+    r'^REPORT_MEM scenario=(\S+) label="((?:[^"\\]|\\.)+)" level=(\S+) stage=(\S+) rust_buffer_bytes_estimate=(\d+) ffi_buffer_bytes_estimate=(\d+)$'
 )
 DICT_RE = re.compile(
-    r'^REPORT_DICT scenario=(\S+) label="([^"]+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$'
+    r'^REPORT_DICT scenario=(\S+) label="((?:[^"\\]|\\.)+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$'
 )
 
+def unescape_report_label(value):
+    output = []
+    i = 0
+    while i < len(value):
+        ch = value[i]
+        if ch == "\\" and i + 1 < len(value):
+            i += 1
+            output.append(value[i])
+        else:
+            output.append(ch)
+        i += 1
+    return "".join(output)
+
 benchmark_results = []
 timings = []
 ratios = []
@@ -64,6 +77,7 @@ with open(raw_path) as f:
         report_match = REPORT_RE.match(line)
         if report_match:
             scenario, label, level, input_bytes, rust_bytes, ffi_bytes, rust_ratio, ffi_ratio = report_match.groups()
+            label = unescape_report_label(label)
             ratios.append({
                 "scenario": scenario,
                 "label": label,
@@ -86,6 +100,7 @@ with open(raw_path) as f:
                 rust_buffer_bytes_estimate,
                 ffi_buffer_bytes_estimate,
             ) = mem_match.groups()
+            label = unescape_report_label(label)
             memory_rows.append({
                 "scenario": scenario,
                 "label": label,
@@ -109,6 +124,7 @@ with open(raw_path) as f:
                 ffi_no_dict_ratio,
                 ffi_with_dict_ratio,
             ) = dict_match.groups()
+            label = unescape_report_label(label)
             dictionary_rows.append({
                 "scenario": scenario,
                 "label": label,

From 6994c8af1b88747f2d537f8d923bbfa6ba5123e5 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 11:09:33 +0200
Subject: [PATCH 25/30] fix(bench): pass criterion filter correctly to
 flamegraph

- remove unsupported --bench flag from benchmark binary arguments

Refs #24
---
 scripts/bench-flamegraph.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/bench-flamegraph.sh b/scripts/bench-flamegraph.sh
index 47cd196e..fc48e475 100755
--- a/scripts/bench-flamegraph.sh
+++ b/scripts/bench-flamegraph.sh
@@ -21,7 +21,6 @@ if cargo flamegraph \
   ${EXTRA_FLAMEGRAPH_ARGS[@]+"${EXTRA_FLAMEGRAPH_ARGS[@]}"} \
   --output "$OUTPUT_DIR/${BENCH_FILTER//\//_}.svg" \
   -- \
-  --bench \
   "$BENCH_FILTER"; then
   :
 else

From c42f34c1beb8c3ee25926d04506a404b7b1694ba Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 11:10:17 +0200
Subject: [PATCH 26/30] style(bench): format runtime corpus loader

- apply rustfmt after decodecorpus runtime load changes

Refs #24
---
 zstd/benches/support/mod.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index ae51498f..46475adf 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -284,7 +284,9 @@ fn load_decode_corpus_sample() -> Vec<u8> {
             }
         }
     } else {
-        eprintln!("BENCH_WARN CARGO_MANIFEST_DIR is not set, using synthetic decode corpus fallback");
+        eprintln!(
+            "BENCH_WARN CARGO_MANIFEST_DIR is not set, using synthetic decode corpus fallback"
+        );
     }
 
     // Keep the benchmark matrix runnable from packaged sources where fixture files may be omitted.

From 63144cc8767e5f7d04a67dccdab9306740345fef Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 14:24:39 +0200
Subject: [PATCH 27/30] fix(bench): stabilize corpus fallback scenarios

- use distinct scenario id/label when decode corpus fixture is unavailable

- collect, sort, and truncate Silesia files deterministically
---
 zstd/benches/support/mod.rs | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index 46475adf..8307cef9 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -45,12 +45,7 @@ pub(crate) fn benchmark_scenarios() -> Vec<Scenario> {
             repeated_log_lines(4 * 1024),
             ScenarioClass::Small,
         ),
-        Scenario::new(
-            "decodecorpus-z000033",
-            "Repo decode corpus sample",
-            load_decode_corpus_sample(),
-            ScenarioClass::Corpus,
-        ),
+        load_decode_corpus_scenario(),
         Scenario::new(
             "high-entropy-1m",
             "High entropy random payload (1 MiB)",
@@ -179,26 +174,22 @@ fn load_silesia_from_env() -> Vec<Scenario> {
         return Vec::new();
     };
 
-    let mut paths = Vec::with_capacity(max_files);
-    let mut hit_limit = false;
+    let mut paths = Vec::new();
     for entry in entries.flatten() {
         let path = entry.path();
         if !path.is_file() {
             continue;
         }
-        if paths.len() >= max_files {
-            hit_limit = true;
-            break;
-        }
         paths.push(path);
     }
     paths.sort();
-    if hit_limit {
+    if paths.len() > max_files {
         eprintln!(
-            "BENCH_WARN limiting Silesia fixtures to first {} discovered files in {}",
+            "BENCH_WARN limiting Silesia fixtures to first {} sorted files in {}",
             max_files,
             Path::new(&dir).display()
         );
+        paths.truncate(max_files);
     }
 
     let mut scenarios = Vec::new();
@@ -259,7 +250,12 @@ fn large_stream_len() -> usize {
         .unwrap_or(100 * 1024 * 1024)
 }
 
-fn load_decode_corpus_sample() -> Vec<u8> {
+fn load_decode_corpus_scenario() -> Scenario {
+    const REAL_ID: &str = "decodecorpus-z000033";
+    const REAL_LABEL: &str = "Repo decode corpus sample";
+    const FALLBACK_ID: &str = "decodecorpus-synthetic-1m";
+    const FALLBACK_LABEL: &str = "Synthetic decode corpus fallback (1 MiB)";
+
     let manifest_dir = env::var("CARGO_MANIFEST_DIR").ok();
     let fixture_path = manifest_dir
         .as_deref()
@@ -268,7 +264,9 @@ fn load_decode_corpus_sample() -> Vec<u8> {
 
     if let Some(path) = fixture_path {
         match fs::read(&path) {
-            Ok(bytes) if !bytes.is_empty() => return bytes,
+            Ok(bytes) if !bytes.is_empty() => {
+                return Scenario::new(REAL_ID, REAL_LABEL, bytes, ScenarioClass::Corpus);
+            }
             Ok(_) => {
                 eprintln!(
                     "BENCH_WARN decode corpus fixture is empty at {}, using synthetic fallback",
@@ -290,7 +288,12 @@ fn load_decode_corpus_sample() -> Vec<u8> {
     }
 
     // Keep the benchmark matrix runnable from packaged sources where fixture files may be omitted.
-    repeated_log_lines(1024 * 1024)
+    Scenario::new(
+        FALLBACK_ID,
+        FALLBACK_LABEL,
+        repeated_log_lines(1024 * 1024),
+        ScenarioClass::Corpus,
+    )
 }
 
 fn sanitize_scenario_stem(stem: &str) -> String {

From dc11bd62c7fc745e96f61ddba7cc1d55bc137369 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 14:36:57 +0200
Subject: [PATCH 28/30] fix(bench): gate report precompute and escape labels

- emit REPORT* lines only when STRUCTURED_ZSTD_EMIT_REPORT is enabled

- set report env var in run-benchmarks workflow

- escape markdown table cell labels in benchmark-report.md generation
---
 .github/scripts/run-benchmarks.sh | 15 +++++--
 zstd/benches/compare_ffi.rs       | 70 +++++++++++++++++++------------
 2 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index ea8097bb..4374e3a4 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -16,6 +16,7 @@ fi
 BENCH_RAW_FILE="$(mktemp -t structured-zstd-bench-raw.XXXXXX)"
 trap 'rm -f "$BENCH_RAW_FILE"' EXIT
 
+export STRUCTURED_ZSTD_EMIT_REPORT=1
 cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee "$BENCH_RAW_FILE"
 
 echo "Parsing results..." >&2
@@ -50,6 +51,11 @@ def unescape_report_label(value):
         i += 1
     return "".join(output)
 
+def markdown_table_escape(value):
+    escaped = value.replace("\\", "\\\\")
+    escaped = escaped.replace("|", "\\|")
+    return escaped.replace("\n", "<br>")
+
 benchmark_results = []
 timings = []
 ratios = []
@@ -170,8 +176,9 @@ lines = [
 ]
 
 for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])):
+    label = markdown_table_escape(row["label"])
     lines.append(
-        f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |'
+        f'| {row["scenario"]} | {label} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |'
     )
 
 lines.extend([
@@ -183,8 +190,9 @@ lines.extend([
 ])
 
 for row in sorted(memory_rows, key=lambda item: (item["scenario"], item["level"], item["stage"])):
+    label = markdown_table_escape(row["label"])
     lines.append(
-        f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_buffer_bytes_estimate"]} | {row["ffi_buffer_bytes_estimate"]} |'
+        f'| {row["scenario"]} | {label} | {row["level"]} | {row["stage"]} | {row["rust_buffer_bytes_estimate"]} | {row["ffi_buffer_bytes_estimate"]} |'
     )
 
 lines.extend([
@@ -196,8 +204,9 @@ lines.extend([
 ])
 
 for row in sorted(dictionary_rows, key=lambda item: (item["scenario"], item["level"])):
+    label = markdown_table_escape(row["label"])
     lines.append(
-        f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |'
+        f'| {row["scenario"]} | {label} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |'
     )
 
 lines.extend([
diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs
index 91f9c598..36aa06b0 100644
--- a/zstd/benches/compare_ffi.rs
+++ b/zstd/benches/compare_ffi.rs
@@ -23,20 +23,32 @@ fn benchmark_scenarios_cached() -> &'static [Scenario] {
     BENCHMARK_SCENARIOS.get_or_init(benchmark_scenarios)
 }
 
+fn emit_reports_enabled() -> bool {
+    std::env::var("STRUCTURED_ZSTD_EMIT_REPORT")
+        .map(|value| matches!(value.as_str(), "1" | "true" | "TRUE"))
+        .unwrap_or(false)
+}
+
 fn bench_compress(c: &mut Criterion) {
+    let emit_reports = emit_reports_enabled();
     for scenario in benchmark_scenarios_cached().iter() {
         for level in supported_levels() {
-            let rust_compressed =
-                structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level);
-            let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
-            emit_report_line(scenario, level, &rust_compressed, &ffi_compressed);
-            emit_memory_report(
-                scenario,
-                level,
-                "compress",
-                scenario.len() + rust_compressed.len(),
-                scenario.len() + ffi_compressed.len(),
-            );
+            if emit_reports {
+                let rust_compressed = structured_zstd::encoding::compress_to_vec(
+                    &scenario.bytes[..],
+                    level.rust_level,
+                );
+                let ffi_compressed =
+                    zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
+                emit_report_line(scenario, level, &rust_compressed, &ffi_compressed);
+                emit_memory_report(
+                    scenario,
+                    level,
+                    "compress",
+                    scenario.len() + rust_compressed.len(),
+                    scenario.len() + ffi_compressed.len(),
+                );
+            }
 
             let benchmark_name = format!("compress/{}/{}/{}", level.name, scenario.id, "matrix");
             let mut group = c.benchmark_group(benchmark_name);
@@ -64,17 +76,20 @@ fn bench_compress(c: &mut Criterion) {
 }
 
 fn bench_decompress(c: &mut Criterion) {
+    let emit_reports = emit_reports_enabled();
     for scenario in benchmark_scenarios_cached().iter() {
         for level in supported_levels() {
             let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap();
             let expected_len = scenario.len();
-            emit_memory_report(
-                scenario,
-                level,
-                "decompress",
-                ffi_compressed.len() + expected_len,
-                ffi_compressed.len() + expected_len,
-            );
+            if emit_reports {
+                emit_memory_report(
+                    scenario,
+                    level,
+                    "decompress",
+                    ffi_compressed.len() + expected_len,
+                    ffi_compressed.len() + expected_len,
+                );
+            }
             let benchmark_name = format!("decompress/{}/{}/{}", level.name, scenario.id, "matrix");
             let mut group = c.benchmark_group(benchmark_name);
             configure_group(&mut group, scenario);
@@ -108,6 +123,7 @@ fn bench_decompress(c: &mut Criterion) {
 }
 
 fn bench_dictionary(c: &mut Criterion) {
+    let emit_reports = emit_reports_enabled();
     for scenario in benchmark_scenarios_cached().iter() {
         if !matches!(scenario.class, ScenarioClass::Small | ScenarioClass::Corpus) {
             continue;
@@ -138,14 +154,16 @@ fn bench_dictionary(c: &mut Criterion) {
                 zstd::bulk::Compressor::with_dictionary(level.ffi_level, &dictionary).unwrap();
             let no_dict_bytes = no_dict.compress(&scenario.bytes).unwrap();
             let with_dict_bytes = with_dict.compress(&scenario.bytes).unwrap();
-            emit_dictionary_report(
-                scenario,
-                level,
-                dictionary.len(),
-                train_ms,
-                &no_dict_bytes,
-                &with_dict_bytes,
-            );
+            if emit_reports {
+                emit_dictionary_report(
+                    scenario,
+                    level,
+                    dictionary.len(),
+                    train_ms,
+                    &no_dict_bytes,
+                    &with_dict_bytes,
+                );
+            }
 
             let benchmark_name =
                 format!("compress-dict/{}/{}/{}", level.name, scenario.id, "matrix");

From 94f1c6df66979919e3c27b660b8618fbacc1a02e Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 14:38:33 +0200
Subject: [PATCH 29/30] fix(bench): harden silesia fixture identity and size
 checks

- compare metadata.len() against max size in u64 space

- derive Silesia scenario ids from full file names

- append stable numeric suffix on id collisions
---
 zstd/benches/support/mod.rs | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index 8307cef9..1a1cf316 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -1,5 +1,5 @@
 use rand::{RngCore, SeedableRng, rngs::SmallRng};
-use std::{env, fs, path::Path};
+use std::{collections::HashSet, env, fs, path::Path};
 use structured_zstd::encoding::CompressionLevel;
 
 pub(crate) struct Scenario {
@@ -193,6 +193,7 @@ fn load_silesia_from_env() -> Vec<Scenario> {
     }
 
     let mut scenarios = Vec::new();
+    let mut seen_silesia_ids = HashSet::new();
     for path in paths {
         let Ok(metadata) = fs::metadata(&path) else {
             eprintln!(
@@ -201,8 +202,8 @@ fn load_silesia_from_env() -> Vec<Scenario> {
             );
             continue;
         };
-        let file_len = metadata.len() as usize;
-        if file_len > max_file_bytes {
+        let file_len = metadata.len();
+        if file_len > max_file_bytes as u64 {
             eprintln!(
                 "BENCH_WARN skipping Silesia fixture {} ({} bytes > max {} bytes)",
                 path.display(),
@@ -226,13 +227,15 @@ fn load_silesia_from_env() -> Vec<Scenario> {
             );
             continue;
         }
-        let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) else {
+        let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
             continue;
         };
-        let scenario_stem = sanitize_scenario_stem(stem);
+        let scenario_stem = sanitize_scenario_stem(file_name);
+        let scenario_id =
+            dedupe_scenario_id(format!("silesia-{scenario_stem}"), &mut seen_silesia_ids);
         scenarios.push(Scenario::new(
-            format!("silesia-{scenario_stem}"),
-            format!("Silesia corpus: {stem}"),
+            scenario_id,
+            format!("Silesia corpus: {file_name}"),
             bytes,
             ScenarioClass::Silesia,
         ));
@@ -311,3 +314,18 @@ fn sanitize_scenario_stem(stem: &str) -> String {
         sanitized
     }
 }
+
+fn dedupe_scenario_id(base_id: String, seen_ids: &mut HashSet<String>) -> String {
+    if seen_ids.insert(base_id.clone()) {
+        return base_id;
+    }
+
+    let mut suffix = 2usize;
+    loop {
+        let candidate = format!("{base_id}-{suffix}");
+        if seen_ids.insert(candidate.clone()) {
+            return candidate;
+        }
+        suffix += 1;
+    }
+}

From c2a298893b12fd2d325a40bc3101ce2cb70b630e Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Sat, 28 Mar 2026 15:17:08 +0200
Subject: [PATCH 30/30] fix(bench): tighten label escaping and id dedupe guard

- expand markdown table escaping for benchmark labels

- bound scenario id suffix search and panic deterministically on exhaustion
---
 .github/scripts/run-benchmarks.sh | 11 ++++++++++-
 zstd/benches/support/mod.rs       | 11 ++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index 4374e3a4..0d235420 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -52,8 +52,17 @@ def unescape_report_label(value):
     return "".join(output)
 
 def markdown_table_escape(value):
-    escaped = value.replace("\\", "\\\\")
+    escaped = value.strip()
+    escaped = escaped.replace("\\", "\\\\")
     escaped = escaped.replace("|", "\\|")
+    escaped = escaped.replace("`", "\\`")
+    escaped = escaped.replace("[", "\\[")
+    escaped = escaped.replace("]", "\\]")
+    escaped = escaped.replace("*", "\\*")
+    escaped = escaped.replace("_", "\\_")
+    escaped = escaped.replace("<", "&lt;")
+    escaped = escaped.replace(">", "&gt;")
+    escaped = escaped.replace("%", "&#37;")
     return escaped.replace("\n", "<br>")
 
 benchmark_results = []
diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs
index 1a1cf316..c8906822 100644
--- a/zstd/benches/support/mod.rs
+++ b/zstd/benches/support/mod.rs
@@ -316,16 +316,21 @@ fn sanitize_scenario_stem(stem: &str) -> String {
 }
 
 fn dedupe_scenario_id(base_id: String, seen_ids: &mut HashSet<String>) -> String {
+    const MAX_SUFFIX: usize = 1_000_000;
+
     if seen_ids.insert(base_id.clone()) {
         return base_id;
     }
 
-    let mut suffix = 2usize;
-    loop {
+    for suffix in 2..=MAX_SUFFIX {
         let candidate = format!("{base_id}-{suffix}");
         if seen_ids.insert(candidate.clone()) {
             return candidate;
         }
-        suffix += 1;
     }
+
+    panic!(
+        "failed to allocate unique scenario id for base '{}' after {} attempts",
+        base_id, MAX_SUFFIX
+    );
 }