From 5138d7a11842747d6c042346892a2a87e04f441b Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 00:23:59 +0200 Subject: [PATCH 01/30] test(bench): expand zstd benchmark suite - add scenario-based Criterion matrix against C zstd - generate benchmark JSON and markdown reports for CI - document benchmark workflows and add flamegraph helper Refs #24 --- .github/scripts/run-benchmarks.sh | 100 ++++++++++++--- .gitignore | 2 + BENCHMARKS.md | 67 ++++++++++ README.md | 4 + scripts/bench-flamegraph.sh | 19 +++ zstd/benches/compare_ffi.rs | 166 +++++++++++++++++-------- zstd/benches/support/mod.rs | 199 ++++++++++++++++++++++++++++++ 7 files changed, 483 insertions(+), 74 deletions(-) create mode 100644 BENCHMARKS.md create mode 100755 scripts/bench-flamegraph.sh create mode 100644 zstd/benches/support/mod.rs diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index 1ad2086d..885897d1 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -1,43 +1,103 @@ #!/bin/bash -# Run compare_ffi benchmarks and produce github-action-benchmark JSON. -# Output: benchmark-results.json (customSmallerIsBetter format — lower time = better) +# Run the Criterion benchmark matrix and produce: +# - benchmark-results.json for github-action-benchmark +# - benchmark-report.md for human review +# +# Output format note: +# - benchmark JSON uses customSmallerIsBetter (lower ms/iter is better) +# - report markdown also includes per-scenario compression size + ratio summaries set -eo pipefail -echo "Running benchmarks..." >&2 +echo "Running benchmark matrix..." >&2 + +export STRUCTURED_ZSTD_BENCH_LARGE_BYTES="${STRUCTURED_ZSTD_BENCH_LARGE_BYTES:-16777216}" -# Run criterion benchmarks, capture output cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee /tmp/bench-raw.txt echo "Parsing results..." >&2 -# Parse criterion bencher output into github-action-benchmark JSON -# Format: "test ... bench: ns/iter (+/- )" python3 - <<'PYEOF' -import json, re, sys +import json +import re +import sys + +BENCH_RE = re.compile(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter") +REPORT_RE = re.compile( + r'^REPORT scenario=(\S+) label="([^"]+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$' +) + +benchmark_results = [] +timings = [] +ratios = [] -results = [] with open("/tmp/bench-raw.txt") as f: - for line in f: - m = re.match(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter", line) - if m: - name = m.group(1) - ns = int(m.group(2).replace(",", "")) - # Convert ns to ms for readability + for raw_line in f: + line = raw_line.strip() + + bench_match = BENCH_RE.match(line) + if bench_match: + name = bench_match.group(1) + ns = int(bench_match.group(2).replace(",", "")) ms = ns / 1_000_000 - results.append({ + benchmark_results.append({ "name": name, "unit": "ms", "value": round(ms, 3), }) + timings.append((name, ms)) + continue + + report_match = REPORT_RE.match(line) + if report_match: + scenario, label, level, input_bytes, rust_bytes, ffi_bytes, rust_ratio, ffi_ratio = report_match.groups() + ratios.append({ + "scenario": scenario, + "label": label, + "level": level, + "input_bytes": int(input_bytes), + "rust_bytes": int(rust_bytes), + "ffi_bytes": int(ffi_bytes), + "rust_ratio": float(rust_ratio), + "ffi_ratio": float(ffi_ratio), + }) -if not results: +if not benchmark_results: print("ERROR: No benchmark results parsed!", file=sys.stderr) sys.exit(1) with open("benchmark-results.json", "w") as f: - json.dump(results, f, indent=2) + json.dump(benchmark_results, f, indent=2) + +lines = [ + "# Benchmark Report", + "", + "Generated by `.github/scripts/run-benchmarks.sh` from `cargo bench --bench compare_ffi`.", + "", + "## Compression Ratios", + "", + "| Scenario | Level | Input bytes | Rust bytes | C bytes | Rust ratio | C ratio |", + "| --- | --- | ---: | ---: | ---: | ---: | ---: |", +] + +for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])): + lines.append( + f'| {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |' + ) + +lines.extend([ + "", + "## Timing Metrics", + "", + "| Benchmark | ms/iter |", + "| --- | ---: |", +]) + +for name, ms in sorted(timings): + lines.append(f"| `{name}` | {ms:.3f} |") + +with open("benchmark-report.md", "w") as f: + f.write("\n".join(lines) + "\n") -print(f"Wrote {len(results)} benchmark results to benchmark-results.json", file=sys.stderr) -for r in results: - print(f" {r['name']}: {r['value']} {r['unit']}", file=sys.stderr) +print(f"Wrote {len(benchmark_results)} timing results to benchmark-results.json", file=sys.stderr) +print(f"Wrote {len(ratios)} ratio rows to benchmark-report.md", file=sys.stderr) PYEOF diff --git a/.gitignore b/.gitignore index c9fa3b69..d2a3666c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,7 @@ Cargo.lock /orig-zstd fuzz_decodecorpus perf.data* +benchmark-results.json +benchmark-report.md fuzz/corpus .idea diff --git a/BENCHMARKS.md b/BENCHMARKS.md new file mode 100644 index 00000000..621b3f72 --- /dev/null +++ b/BENCHMARKS.md @@ -0,0 +1,67 @@ +# Benchmark Suite + +`structured-zstd` keeps its compression/decompression performance tracking in the Criterion bench +matrix at `zstd/benches/compare_ffi.rs`. + +## Scenarios + +The current matrix covers: + +- small random payloads (`1 KiB`, `10 KiB`) +- a small structured log payload (`4 KiB`) +- a repository corpus fixture (`decodecorpus_files/z000033`) +- high entropy random payloads (`1 MiB`) +- low entropy repeated payloads (`1 MiB`) +- a large structured stream (`100 MiB`) +- optional Silesia corpus files when `STRUCTURED_ZSTD_SILESIA_DIR=/path/to/silesia` is set + +The local default for the large scenario is `100 MiB`. CI can override it with +`STRUCTURED_ZSTD_BENCH_LARGE_BYTES` to keep regression runs bounded while still exercising the +same code path. + +## Level Mapping + +The benchmark suite only compares levels that are currently implemented end-to-end in the pure Rust +encoder: + +- `structured-zstd::Fastest` vs `zstd` level `1` +- `structured-zstd::Default` vs `zstd` level `3` + +`Better` and `Best` are intentionally excluded until the encoder implements them. Dictionary +compression is also excluded from the timing matrix because the crate currently exposes dictionary +training, but not dictionary-based compression. + +## Commands + +Run the full Criterion matrix: + +```bash +cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher +``` + +Generate the CI-style JSON and markdown report locally: + +```bash +bash .github/scripts/run-benchmarks.sh +``` + +Generate a flamegraph for a hot path: + +```bash +bash scripts/bench-flamegraph.sh +``` + +Override the benchmark targeted by the flamegraph script: + +```bash +bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/matrix/pure_rust +``` + +## Outputs + +`run-benchmarks.sh` writes: + +- `benchmark-results.json` for GitHub regression tracking +- `benchmark-report.md` with scenario-by-scenario compression ratios and timing rows + +Criterion also writes its usual detailed estimates under `target/criterion/`. diff --git a/README.md b/README.md index 076ad3a0..8d563ec2 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,10 @@ Complete RFC 8878 implementation. Performance: ~1.4-3.5x slower than C zstd depe When the `dict_builder` feature is enabled, the `dictionary` module can create raw content dictionaries. Within 0.2% of the official implementation on the `github-users` sample set. +## Benchmarking + +Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. + ## Usage ### Compression diff --git a/scripts/bench-flamegraph.sh b/scripts/bench-flamegraph.sh new file mode 100755 index 00000000..1bfb7b70 --- /dev/null +++ b/scripts/bench-flamegraph.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -euo pipefail + +BENCH_FILTER="${1:-compress/default/large-log-stream/matrix/pure_rust}" +OUTPUT_DIR="${2:-target/flamegraph}" + +mkdir -p "$OUTPUT_DIR" + +echo "Generating flamegraph for benchmark filter: $BENCH_FILTER" >&2 +echo "Output directory: $OUTPUT_DIR" >&2 + +cargo flamegraph \ + --bench compare_ffi \ + -p structured-zstd \ + --root \ + --output "$OUTPUT_DIR/${BENCH_FILTER//\//_}.svg" \ + -- \ + --bench \ + "$BENCH_FILTER" diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 2660702f..6bae35e3 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -1,72 +1,130 @@ -//! Comparison benchmark: structured-zstd (pure Rust) vs zstd (C FFI). +//! Comparison benchmark matrix: structured-zstd (pure Rust) vs zstd (C FFI). //! -//! Five variations: decompress (pure Rust/C FFI), compress (pure Rust/C FFI L1/L3). -//! Both decompress benchmarks allocate per-iteration for symmetric comparison. +//! The suite covers: +//! - small payloads (1-10 KiB) +//! - high entropy and low entropy payloads +//! - a large 100 MiB structured stream +//! - the repository decode corpus fixture +//! - optional Silesia corpus files via `STRUCTURED_ZSTD_SILESIA_DIR` +//! +//! Each run prints `REPORT ...` metadata lines that CI scripts can turn into a markdown report. -use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main}; +mod support; -/// Compressed corpus for decompression benchmarks. -const COMPRESSED_CORPUS: &[u8] = include_bytes!("../decodecorpus_files/z000033.zst"); +use criterion::{Criterion, SamplingMode, Throughput, black_box, criterion_group, criterion_main}; +use std::time::Duration; +use structured_zstd::decoding::FrameDecoder; +use support::{LevelConfig, Scenario, ScenarioClass, benchmark_scenarios, supported_levels}; -fn bench_decompress(c: &mut Criterion) { - let mut group = c.benchmark_group("decompress"); +fn bench_compress(c: &mut Criterion) { + for scenario in benchmark_scenarios() { + for level in supported_levels() { + let rust_compressed = + structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level); + let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); + emit_report_line(&scenario, level, &rust_compressed, &ffi_compressed); - // Pre-compute expected output length for assertions. - let expected_len = zstd::decode_all(COMPRESSED_CORPUS).unwrap().len(); + let benchmark_name = format!("compress/{}/{}/{}", level.name, scenario.id, "matrix"); + let mut group = c.benchmark_group(benchmark_name); + configure_group(&mut group, &scenario); + group.throughput(Throughput::Bytes(scenario.throughput_bytes())); - // Pure Rust decompression — allocate per-iteration (symmetric with C FFI). - group.bench_function("pure_rust", |b| { - b.iter(|| { - let mut fr = structured_zstd::decoding::FrameDecoder::new(); - let mut target = vec![0u8; expected_len]; - let written = fr.decode_all(COMPRESSED_CORPUS, &mut target).unwrap(); - assert_eq!(written, expected_len); - }) - }); + group.bench_function("pure_rust", |b| { + b.iter(|| { + black_box(structured_zstd::encoding::compress_to_vec( + &scenario.bytes[..], + level.rust_level, + )) + }) + }); - // C FFI decompression — allocates per-iteration via decode_all. - group.bench_function("c_ffi", |b| { - b.iter(|| { - let out = zstd::decode_all(COMPRESSED_CORPUS).unwrap(); - assert_eq!(out.len(), expected_len); - }) - }); + group.bench_function("c_ffi", |b| { + b.iter(|| { + black_box(zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap()) + }) + }); - group.finish(); + group.finish(); + } + } } -fn bench_compress(c: &mut Criterion) { - // Get raw data by decompressing the corpus. - let raw_data = zstd::decode_all(COMPRESSED_CORPUS).unwrap(); +fn bench_decompress(c: &mut Criterion) { + for scenario in benchmark_scenarios() { + for level in supported_levels() { + let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); + let expected_len = scenario.len(); + let benchmark_name = format!("decompress/{}/{}/{}", level.name, scenario.id, "matrix"); + let mut group = c.benchmark_group(benchmark_name); + configure_group(&mut group, &scenario); + group.throughput(Throughput::Bytes(scenario.throughput_bytes())); - let mut group = c.benchmark_group("compress"); + group.bench_function("pure_rust", |b| { + b.iter(|| { + let mut decoder = FrameDecoder::new(); + let mut target = vec![0u8; expected_len]; + let written = decoder.decode_all(&ffi_compressed, &mut target).unwrap(); + assert_eq!(written, expected_len); + }) + }); - // Pure Rust compression (Fastest level) - group.bench_with_input( - BenchmarkId::new("pure_rust", "fastest"), - &raw_data, - |b, data| { - b.iter(|| { - black_box(structured_zstd::encoding::compress_to_vec( - &data[..], - structured_zstd::encoding::CompressionLevel::Fastest, - )) - }) - }, - ); + group.bench_function("c_ffi", |b| { + b.iter(|| { + let output = zstd::decode_all(&ffi_compressed[..]).unwrap(); + assert_eq!(output.len(), expected_len); + }) + }); - // C FFI compression (level 1 ≈ fastest) - group.bench_with_input(BenchmarkId::new("c_ffi", "level1"), &raw_data, |b, data| { - b.iter(|| black_box(zstd::encode_all(&data[..], 1).unwrap())) - }); + group.finish(); + } + } +} - // C FFI compression (level 3 ≈ default) - group.bench_with_input(BenchmarkId::new("c_ffi", "level3"), &raw_data, |b, data| { - b.iter(|| black_box(zstd::encode_all(&data[..], 3).unwrap())) - }); +fn configure_group( + group: &mut criterion::BenchmarkGroup<'_, M>, + scenario: &Scenario, +) { + match scenario.class { + ScenarioClass::Small => { + group.sample_size(30); + group.measurement_time(Duration::from_secs(3)); + group.sampling_mode(SamplingMode::Flat); + } + ScenarioClass::Corpus | ScenarioClass::Entropy => { + group.sample_size(10); + group.measurement_time(Duration::from_secs(4)); + group.sampling_mode(SamplingMode::Flat); + } + ScenarioClass::Large | ScenarioClass::Silesia => { + group.sample_size(10); + group.measurement_time(Duration::from_secs(2)); + group.warm_up_time(Duration::from_millis(500)); + group.sampling_mode(SamplingMode::Flat); + } + } +} - group.finish(); +fn emit_report_line( + scenario: &Scenario, + level: LevelConfig, + rust_compressed: &[u8], + ffi_compressed: &[u8], +) { + let input_len = scenario.len() as f64; + let rust_ratio = rust_compressed.len() as f64 / input_len; + let ffi_ratio = ffi_compressed.len() as f64 / input_len; + println!( + "REPORT scenario={} label=\"{}\" level={} input_bytes={} rust_bytes={} ffi_bytes={} rust_ratio={:.6} ffi_ratio={:.6}", + scenario.id, + scenario.label, + level.name, + scenario.len(), + rust_compressed.len(), + ffi_compressed.len(), + rust_ratio, + ffi_ratio + ); } -criterion_group!(benches, bench_decompress, bench_compress); +criterion_group!(benches, bench_compress, bench_decompress); criterion_main!(benches); diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs new file mode 100644 index 00000000..79ffcb9c --- /dev/null +++ b/zstd/benches/support/mod.rs @@ -0,0 +1,199 @@ +use rand::{RngCore, SeedableRng, rngs::SmallRng}; +use std::{env, fs, path::Path}; +use structured_zstd::encoding::CompressionLevel; + +pub(crate) struct Scenario { + pub(crate) id: String, + pub(crate) label: String, + pub(crate) bytes: Vec, + pub(crate) class: ScenarioClass, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum ScenarioClass { + Small, + Corpus, + Entropy, + Large, + Silesia, +} + +#[derive(Clone, Copy)] +pub(crate) struct LevelConfig { + pub(crate) name: &'static str, + pub(crate) rust_level: CompressionLevel, + pub(crate) ffi_level: i32, +} + +pub(crate) fn benchmark_scenarios() -> Vec { + let mut scenarios = vec![ + Scenario::new( + "small-1k-random", + "Small random payload (1 KiB)", + random_bytes(1024, 0x5EED_1000), + ScenarioClass::Small, + ), + Scenario::new( + "small-10k-random", + "Small random payload (10 KiB)", + random_bytes(10 * 1024, 0x0005_EED1_0000), + ScenarioClass::Small, + ), + Scenario::new( + "small-4k-log-lines", + "Small structured log lines (4 KiB)", + repeated_log_lines(4 * 1024), + ScenarioClass::Small, + ), + Scenario::new( + "decodecorpus-z000033", + "Repo decode corpus sample", + include_bytes!("../../decodecorpus_files/z000033").to_vec(), + ScenarioClass::Corpus, + ), + Scenario::new( + "high-entropy-1m", + "High entropy random payload (1 MiB)", + random_bytes(1024 * 1024, 0xC0FF_EE11), + ScenarioClass::Entropy, + ), + Scenario::new( + "low-entropy-1m", + "Low entropy patterned payload (1 MiB)", + repeated_pattern_bytes(1024 * 1024), + ScenarioClass::Entropy, + ), + Scenario::new( + "large-log-stream", + "Large structured stream", + repeated_log_lines(large_stream_len()), + ScenarioClass::Large, + ), + ]; + + scenarios.extend(load_silesia_from_env()); + scenarios +} + +pub(crate) fn supported_levels() -> [LevelConfig; 2] { + [ + LevelConfig { + name: "fastest", + rust_level: CompressionLevel::Fastest, + ffi_level: 1, + }, + LevelConfig { + name: "default", + rust_level: CompressionLevel::Default, + ffi_level: 3, + }, + ] +} + +impl Scenario { + fn new( + id: impl Into, + label: impl Into, + bytes: Vec, + class: ScenarioClass, + ) -> Self { + Self { + id: id.into(), + label: label.into(), + bytes, + class, + } + } + + pub(crate) fn len(&self) -> usize { + self.bytes.len() + } + + pub(crate) fn throughput_bytes(&self) -> u64 { + self.bytes.len() as u64 + } +} + +fn random_bytes(len: usize, seed: u64) -> Vec { + let mut rng = SmallRng::seed_from_u64(seed); + let mut bytes = vec![0u8; len]; + rng.fill_bytes(&mut bytes); + bytes +} + +fn repeated_pattern_bytes(len: usize) -> Vec { + let pattern = b"coordinode:segment:0001|tenant=demo|label=orders|"; + let mut bytes = Vec::with_capacity(len); + while bytes.len() < len { + let remaining = len - bytes.len(); + bytes.extend_from_slice(&pattern[..pattern.len().min(remaining)]); + } + bytes +} + +fn repeated_log_lines(len: usize) -> Vec { + const LINES: &[&str] = &[ + "ts=2026-03-26T21:39:28Z level=INFO msg=\"flush memtable\" tenant=demo table=orders region=eu-west\n", + "ts=2026-03-26T21:39:29Z level=INFO msg=\"rotate segment\" tenant=demo table=orders region=eu-west\n", + "ts=2026-03-26T21:39:30Z level=INFO msg=\"compact level\" tenant=demo table=orders region=eu-west\n", + "ts=2026-03-26T21:39:31Z level=INFO msg=\"write block\" tenant=demo table=orders region=eu-west\n", + ]; + + let mut bytes = Vec::with_capacity(len); + while bytes.len() < len { + for line in LINES { + if bytes.len() == len { + break; + } + let remaining = len - bytes.len(); + bytes.extend_from_slice(&line.as_bytes()[..line.len().min(remaining)]); + } + } + bytes +} + +fn load_silesia_from_env() -> Vec { + let Some(dir) = env::var_os("STRUCTURED_ZSTD_SILESIA_DIR") else { + return Vec::new(); + }; + + let Ok(entries) = fs::read_dir(Path::new(&dir)) else { + eprintln!("BENCH_WARN failed to read STRUCTURED_ZSTD_SILESIA_DIR={dir:?}"); + return Vec::new(); + }; + + let mut scenarios = Vec::new(); + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_file() { + continue; + } + let Ok(bytes) = fs::read(&path) else { + eprintln!( + "BENCH_WARN failed to read Silesia fixture {}", + path.display() + ); + continue; + }; + let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) else { + continue; + }; + scenarios.push(Scenario::new( + format!("silesia-{stem}"), + format!("Silesia corpus: {stem}"), + bytes, + ScenarioClass::Silesia, + )); + } + + scenarios.sort_by(|left, right| left.id.cmp(&right.id)); + scenarios +} + +fn large_stream_len() -> usize { + env::var("STRUCTURED_ZSTD_BENCH_LARGE_BYTES") + .ok() + .and_then(|value| value.parse::().ok()) + .filter(|value| *value > 0) + .unwrap_or(100 * 1024 * 1024) +} From 3a419fa936a4a266f391ea19fc87af91d2cb62c2 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 09:58:33 +0200 Subject: [PATCH 02/30] docs(readme): add benchmark dashboard link --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 8d563ec2..0cea48ab 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,14 @@ Pure Rust zstd implementation — managed fork of [ruzstd](https://github.com/Ki [![docs.rs](https://docs.rs/structured-zstd/badge.svg)](https://docs.rs/structured-zstd) [![License: Apache-2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE) +## Benchmarks Dashboard + +Historical benchmark charts are published to GitHub Pages: + +- [Performance dashboard](https://structured-world.github.io/structured-zstd/dev/bench/) + +Note: the root Pages URL can be empty; benchmark charts live under `/dev/bench/`. + ## Managed Fork This is a **maintained fork** of [KillingSpark/zstd-rs](https://github.com/KillingSpark/zstd-rs) (ruzstd) by [Structured World Foundation](https://sw.foundation). We maintain additional features and hardening for the [CoordiNode](https://github.com/structured-world/coordinode) database engine. From 15a51bd252af05a4301cbb341d1626f84f506cd8 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 10:13:51 +0200 Subject: [PATCH 03/30] fix(bench): harden matrix scripts and edge scenarios - guard ratio reporting for zero-length inputs - skip empty Silesia fixtures with BENCH_WARN - use mktemp + trap for raw bench output parsing - make flamegraph --root opt-in via BENCH_FLAMEGRAPH_USE_ROOT --- .github/scripts/run-benchmarks.sh | 10 +++++++--- scripts/bench-flamegraph.sh | 26 +++++++++++++++++++++++--- zstd/benches/compare_ffi.rs | 10 ++++++++-- zstd/benches/support/mod.rs | 7 +++++++ 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index 885897d1..aff1ccd9 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -11,13 +11,16 @@ set -eo pipefail echo "Running benchmark matrix..." >&2 export STRUCTURED_ZSTD_BENCH_LARGE_BYTES="${STRUCTURED_ZSTD_BENCH_LARGE_BYTES:-16777216}" +BENCH_RAW_FILE="$(mktemp -t structured-zstd-bench-raw.XXXXXX)" +trap 'rm -f "$BENCH_RAW_FILE"' EXIT -cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee /tmp/bench-raw.txt +cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee "$BENCH_RAW_FILE" echo "Parsing results..." >&2 -python3 - <<'PYEOF' +BENCH_RAW_FILE="$BENCH_RAW_FILE" python3 - <<'PYEOF' import json +import os import re import sys @@ -29,8 +32,9 @@ REPORT_RE = re.compile( benchmark_results = [] timings = [] ratios = [] +raw_path = os.environ["BENCH_RAW_FILE"] -with open("/tmp/bench-raw.txt") as f: +with open(raw_path) as f: for raw_line in f: line = raw_line.strip() diff --git a/scripts/bench-flamegraph.sh b/scripts/bench-flamegraph.sh index 1bfb7b70..66b41f95 100755 --- a/scripts/bench-flamegraph.sh +++ b/scripts/bench-flamegraph.sh @@ -9,11 +9,31 @@ mkdir -p "$OUTPUT_DIR" echo "Generating flamegraph for benchmark filter: $BENCH_FILTER" >&2 echo "Output directory: $OUTPUT_DIR" >&2 -cargo flamegraph \ +# Use BENCH_FLAMEGRAPH_USE_ROOT=1 to opt into running cargo flamegraph with --root. +EXTRA_FLAMEGRAPH_ARGS=() +if [[ "${BENCH_FLAMEGRAPH_USE_ROOT:-}" == "1" ]]; then + EXTRA_FLAMEGRAPH_ARGS+=(--root) +fi + +if ! cargo flamegraph \ --bench compare_ffi \ -p structured-zstd \ - --root \ + "${EXTRA_FLAMEGRAPH_ARGS[@]}" \ --output "$OUTPUT_DIR/${BENCH_FILTER//\//_}.svg" \ -- \ --bench \ - "$BENCH_FILTER" + "$BENCH_FILTER"; then + status=$? + if [[ "${BENCH_FLAMEGRAPH_USE_ROOT:-}" != "1" ]]; then + cat >&2 <<'EOF' +cargo flamegraph failed. This may be due to insufficient permissions for perf. + +If you see a "Permission denied" or "not allowed to access CPU" error, try re-running with: + + BENCH_FLAMEGRAPH_USE_ROOT=1 sudo -E scripts/bench-flamegraph.sh "" "" + +or otherwise ensure perf has sufficient permissions. +EOF + fi + exit "$status" +fi diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 6bae35e3..8f4334a0 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -111,8 +111,14 @@ fn emit_report_line( ffi_compressed: &[u8], ) { let input_len = scenario.len() as f64; - let rust_ratio = rust_compressed.len() as f64 / input_len; - let ffi_ratio = ffi_compressed.len() as f64 / input_len; + let (rust_ratio, ffi_ratio) = if input_len > 0.0 { + ( + rust_compressed.len() as f64 / input_len, + ffi_compressed.len() as f64 / input_len, + ) + } else { + (0.0, 0.0) + }; println!( "REPORT scenario={} label=\"{}\" level={} input_bytes={} rust_bytes={} ffi_bytes={} rust_ratio={:.6} ffi_ratio={:.6}", scenario.id, diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index 79ffcb9c..f97aeca3 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -175,6 +175,13 @@ fn load_silesia_from_env() -> Vec { ); continue; }; + if bytes.is_empty() { + eprintln!( + "BENCH_WARN skipping empty Silesia fixture {}", + path.display() + ); + continue; + } let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) else { continue; }; From 480a307901d2e7a79ef9cae67d8ea9edaf3a1911 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 12:09:22 +0200 Subject: [PATCH 04/30] fix(bench): tighten flamegraph and decode benchmarks --- scripts/bench-flamegraph.sh | 6 ++++-- zstd/benches/compare_ffi.rs | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/bench-flamegraph.sh b/scripts/bench-flamegraph.sh index 66b41f95..47cd196e 100755 --- a/scripts/bench-flamegraph.sh +++ b/scripts/bench-flamegraph.sh @@ -15,14 +15,16 @@ if [[ "${BENCH_FLAMEGRAPH_USE_ROOT:-}" == "1" ]]; then EXTRA_FLAMEGRAPH_ARGS+=(--root) fi -if ! cargo flamegraph \ +if cargo flamegraph \ --bench compare_ffi \ -p structured-zstd \ - "${EXTRA_FLAMEGRAPH_ARGS[@]}" \ + ${EXTRA_FLAMEGRAPH_ARGS[@]+"${EXTRA_FLAMEGRAPH_ARGS[@]}"} \ --output "$OUTPUT_DIR/${BENCH_FILTER//\//_}.svg" \ -- \ --bench \ "$BENCH_FILTER"; then + : +else status=$? if [[ "${BENCH_FLAMEGRAPH_USE_ROOT:-}" != "1" ]]; then cat >&2 <<'EOF' diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 8f4334a0..6df08633 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -60,9 +60,10 @@ fn bench_decompress(c: &mut Criterion) { group.throughput(Throughput::Bytes(scenario.throughput_bytes())); group.bench_function("pure_rust", |b| { + let mut target = vec![0u8; expected_len]; b.iter(|| { let mut decoder = FrameDecoder::new(); - let mut target = vec![0u8; expected_len]; + target.fill(0); let written = decoder.decode_all(&ffi_compressed, &mut target).unwrap(); assert_eq!(written, expected_len); }) From 11f779bcc8919dc68eeb90e4a022e52addc36d23 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 14:54:09 +0200 Subject: [PATCH 05/30] docs(bench): clarify decode benchmark asymmetry rationale --- zstd/benches/compare_ffi.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 6df08633..2724467b 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -71,6 +71,8 @@ fn bench_decompress(c: &mut Criterion) { group.bench_function("c_ffi", |b| { b.iter(|| { + // Intentional: zstd::decode_all represents the common high-level FFI path and + // includes allocation cost, while pure_rust isolates decode throughput. let output = zstd::decode_all(&ffi_compressed[..]).unwrap(); assert_eq!(output.len(), expected_len); }) From 65cda3a1cc962d5df4a352207e8eaa9b24ad4ba3 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 15:24:52 +0200 Subject: [PATCH 06/30] perf(bench): remove redundant decode buffer fill --- zstd/benches/compare_ffi.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 2724467b..a0581bd0 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -63,7 +63,6 @@ fn bench_decompress(c: &mut Criterion) { let mut target = vec![0u8; expected_len]; b.iter(|| { let mut decoder = FrameDecoder::new(); - target.fill(0); let written = decoder.decode_all(&ffi_compressed, &mut target).unwrap(); assert_eq!(written, expected_len); }) From aa774de9cd2e308592f414cfcc15436968215b4d Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 17:09:15 +0200 Subject: [PATCH 07/30] fix(bench): scope large default to CI and enforce ratio rows --- .github/scripts/run-benchmarks.sh | 11 ++++++++++- BENCHMARKS.md | 6 +++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index aff1ccd9..98354488 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -10,7 +10,9 @@ set -eo pipefail echo "Running benchmark matrix..." >&2 -export STRUCTURED_ZSTD_BENCH_LARGE_BYTES="${STRUCTURED_ZSTD_BENCH_LARGE_BYTES:-16777216}" +if [ -n "${GITHUB_ACTIONS:-}" ] && [ -z "${STRUCTURED_ZSTD_BENCH_LARGE_BYTES:-}" ]; then + export STRUCTURED_ZSTD_BENCH_LARGE_BYTES=16777216 +fi BENCH_RAW_FILE="$(mktemp -t structured-zstd-bench-raw.XXXXXX)" trap 'rm -f "$BENCH_RAW_FILE"' EXIT @@ -69,6 +71,13 @@ if not benchmark_results: print("ERROR: No benchmark results parsed!", file=sys.stderr) sys.exit(1) +if not ratios: + print( + "ERROR: No REPORT ratio lines parsed; benchmark-report.md would have an empty ratio section.", + file=sys.stderr, + ) + sys.exit(1) + with open("benchmark-results.json", "w") as f: json.dump(benchmark_results, f, indent=2) diff --git a/BENCHMARKS.md b/BENCHMARKS.md index 621b3f72..1e0f3be0 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -15,9 +15,9 @@ The current matrix covers: - a large structured stream (`100 MiB`) - optional Silesia corpus files when `STRUCTURED_ZSTD_SILESIA_DIR=/path/to/silesia` is set -The local default for the large scenario is `100 MiB`. CI can override it with -`STRUCTURED_ZSTD_BENCH_LARGE_BYTES` to keep regression runs bounded while still exercising the -same code path. +The local default for the large scenario is `100 MiB`. In GitHub Actions, when +`STRUCTURED_ZSTD_BENCH_LARGE_BYTES` is unset, `.github/scripts/run-benchmarks.sh` defaults it to +`16 MiB` to keep CI regression runs bounded while still exercising the same code path. ## Level Mapping From 54b3dd4bd3061555cc3ce14f25973d740faebacc Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 17:48:28 +0200 Subject: [PATCH 08/30] feat(bench): add memory and dictionary benchmark reporting --- .github/scripts/run-benchmarks.sh | 83 +++++++++++++++++ BENCHMARKS.md | 14 ++- README.md | 2 +- zstd/Cargo.toml | 3 +- zstd/benches/compare_ffi.rs | 143 +++++++++++++++++++++++++++++- 5 files changed, 237 insertions(+), 8 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index 98354488..e86a9fe7 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -30,10 +30,18 @@ BENCH_RE = re.compile(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter") REPORT_RE = re.compile( r'^REPORT scenario=(\S+) label="([^"]+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$' ) +MEM_RE = re.compile( + r'^REPORT_MEM scenario=(\S+) label="([^"]+)" level=(\S+) stage=(\S+) rust_peak_bytes=(\d+) ffi_peak_bytes=(\d+)$' +) +DICT_RE = re.compile( + r'^REPORT_DICT scenario=(\S+) label="([^"]+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$' +) benchmark_results = [] timings = [] ratios = [] +memory_rows = [] +dictionary_rows = [] raw_path = os.environ["BENCH_RAW_FILE"] with open(raw_path) as f: @@ -66,6 +74,45 @@ with open(raw_path) as f: "rust_ratio": float(rust_ratio), "ffi_ratio": float(ffi_ratio), }) + continue + + mem_match = MEM_RE.match(line) + if mem_match: + scenario, label, level, stage, rust_peak_bytes, ffi_peak_bytes = mem_match.groups() + memory_rows.append({ + "scenario": scenario, + "label": label, + "level": level, + "stage": stage, + "rust_peak_bytes": int(rust_peak_bytes), + "ffi_peak_bytes": int(ffi_peak_bytes), + }) + continue + + dict_match = DICT_RE.match(line) + if dict_match: + ( + scenario, + label, + level, + dict_bytes, + train_ms, + ffi_no_dict_bytes, + ffi_with_dict_bytes, + ffi_no_dict_ratio, + ffi_with_dict_ratio, + ) = dict_match.groups() + dictionary_rows.append({ + "scenario": scenario, + "label": label, + "level": level, + "dict_bytes": int(dict_bytes), + "train_ms": float(train_ms), + "ffi_no_dict_bytes": int(ffi_no_dict_bytes), + "ffi_with_dict_bytes": int(ffi_with_dict_bytes), + "ffi_no_dict_ratio": float(ffi_no_dict_ratio), + "ffi_with_dict_ratio": float(ffi_with_dict_ratio), + }) if not benchmark_results: print("ERROR: No benchmark results parsed!", file=sys.stderr) @@ -78,6 +125,14 @@ if not ratios: ) sys.exit(1) +if not memory_rows: + print("ERROR: No REPORT_MEM lines parsed; memory section would be empty.", file=sys.stderr) + sys.exit(1) + +if not dictionary_rows: + print("ERROR: No REPORT_DICT lines parsed; dictionary section would be empty.", file=sys.stderr) + sys.exit(1) + with open("benchmark-results.json", "w") as f: json.dump(benchmark_results, f, indent=2) @@ -97,6 +152,32 @@ for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])): f'| {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |' ) +lines.extend([ + "", + "## Peak Memory Estimates", + "", + "| Scenario | Level | Stage | Rust peak bytes | C peak bytes |", + "| --- | --- | --- | ---: | ---: |", +]) + +for row in sorted(memory_rows, key=lambda item: (item["scenario"], item["level"], item["stage"])): + lines.append( + f'| {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_peak_bytes"]} | {row["ffi_peak_bytes"]} |' + ) + +lines.extend([ + "", + "## Dictionary Compression (C FFI)", + "", + "| Scenario | Level | Dict bytes | Train ms | C bytes (no dict) | C bytes (with dict) | C ratio (no dict) | C ratio (with dict) |", + "| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |", +]) + +for row in sorted(dictionary_rows, key=lambda item: (item["scenario"], item["level"])): + lines.append( + f'| {row["label"]} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |' + ) + lines.extend([ "", "## Timing Metrics", @@ -113,4 +194,6 @@ with open("benchmark-report.md", "w") as f: print(f"Wrote {len(benchmark_results)} timing results to benchmark-results.json", file=sys.stderr) print(f"Wrote {len(ratios)} ratio rows to benchmark-report.md", file=sys.stderr) +print(f"Wrote {len(memory_rows)} memory rows to benchmark-report.md", file=sys.stderr) +print(f"Wrote {len(dictionary_rows)} dictionary rows to benchmark-report.md", file=sys.stderr) PYEOF diff --git a/BENCHMARKS.md b/BENCHMARKS.md index 1e0f3be0..6d30dd03 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -27,9 +27,11 @@ encoder: - `structured-zstd::Fastest` vs `zstd` level `1` - `structured-zstd::Default` vs `zstd` level `3` -`Better` and `Best` are intentionally excluded until the encoder implements them. Dictionary -compression is also excluded from the timing matrix because the crate currently exposes dictionary -training, but not dictionary-based compression. +`Better` and `Best` are intentionally excluded until the encoder implements them. + +Dictionary benchmarks are tracked separately with C FFI `with_dict` vs `without_dict` runs, using a +dictionary trained from scenario samples. Pure Rust dictionary compression is still pending and is +therefore not part of the pure-Rust-vs-C timing matrix yet. ## Commands @@ -62,6 +64,10 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/matrix/ `run-benchmarks.sh` writes: - `benchmark-results.json` for GitHub regression tracking -- `benchmark-report.md` with scenario-by-scenario compression ratios and timing rows +- `benchmark-report.md` with: + - compression ratio tables (`REPORT`) + - peak memory estimate tables (`REPORT_MEM`) + - dictionary compression tables (`REPORT_DICT`) + - timing rows for all benchmark functions Criterion also writes its usual detailed estimates under `target/criterion/`. diff --git a/README.md b/README.md index 0cea48ab..acc117b7 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ When the `dict_builder` feature is enabled, the `dictionary` module can create r ## Benchmarking -Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. +Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports now include compression ratios, peak memory estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios. ## Usage diff --git a/zstd/Cargo.toml b/zstd/Cargo.toml index 51a5140c..f80c947c 100644 --- a/zstd/Cargo.toml +++ b/zstd/Cargo.toml @@ -31,7 +31,8 @@ alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-all [dev-dependencies] criterion = "0.5" rand = { version = "0.8.5", features = ["small_rng"] } -zstd = "0.13.2" +stats_alloc = "0.1" +zstd = { version = "0.13.2", features = ["zdict_builder"] } [features] default = ["hash", "std"] diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index a0581bd0..f422fa56 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -12,7 +12,7 @@ mod support; use criterion::{Criterion, SamplingMode, Throughput, black_box, criterion_group, criterion_main}; -use std::time::Duration; +use std::time::{Duration, Instant}; use structured_zstd::decoding::FrameDecoder; use support::{LevelConfig, Scenario, ScenarioClass, benchmark_scenarios, supported_levels}; @@ -23,6 +23,13 @@ fn bench_compress(c: &mut Criterion) { structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level); let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); emit_report_line(&scenario, level, &rust_compressed, &ffi_compressed); + emit_memory_report( + &scenario, + level, + "compress", + scenario.len() + rust_compressed.len(), + scenario.len() + ffi_compressed.len(), + ); let benchmark_name = format!("compress/{}/{}/{}", level.name, scenario.id, "matrix"); let mut group = c.benchmark_group(benchmark_name); @@ -54,6 +61,13 @@ fn bench_decompress(c: &mut Criterion) { for level in supported_levels() { let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); let expected_len = scenario.len(); + emit_memory_report( + &scenario, + level, + "decompress", + ffi_compressed.len() + expected_len, + ffi_compressed.len() + expected_len, + ); let benchmark_name = format!("decompress/{}/{}/{}", level.name, scenario.id, "matrix"); let mut group = c.benchmark_group(benchmark_name); configure_group(&mut group, &scenario); @@ -82,6 +96,68 @@ fn bench_decompress(c: &mut Criterion) { } } +fn bench_dictionary(c: &mut Criterion) { + for scenario in benchmark_scenarios() { + if !matches!(scenario.class, ScenarioClass::Small | ScenarioClass::Corpus) { + continue; + } + + let training_samples = split_training_samples(&scenario.bytes); + let sample_refs: Vec<&[u8]> = training_samples.iter().map(Vec::as_slice).collect(); + let total_training_bytes = sample_refs.iter().map(|sample| sample.len()).sum::(); + let dict_size = dictionary_size_for(scenario.len()) + .min(total_training_bytes.saturating_sub(64)) + .max(256); + let train_started = Instant::now(); + let Ok(dictionary) = zstd::dict::from_samples(&sample_refs, dict_size) else { + eprintln!( + "BENCH_WARN skipping dictionary benchmark for {} (samples={}, total_training_bytes={}, dict_size={})", + scenario.id, + sample_refs.len(), + total_training_bytes, + dict_size + ); + continue; + }; + let train_ms = train_started.elapsed().as_secs_f64() * 1_000.0; + + for level in supported_levels() { + let mut no_dict = zstd::bulk::Compressor::new(level.ffi_level).unwrap(); + let mut with_dict = + zstd::bulk::Compressor::with_dictionary(level.ffi_level, &dictionary).unwrap(); + let no_dict_bytes = no_dict.compress(&scenario.bytes).unwrap(); + let with_dict_bytes = with_dict.compress(&scenario.bytes).unwrap(); + emit_dictionary_report( + &scenario, + level, + dictionary.len(), + train_ms, + &no_dict_bytes, + &with_dict_bytes, + ); + + let benchmark_name = + format!("compress-dict/{}/{}/{}", level.name, scenario.id, "matrix"); + let mut group = c.benchmark_group(benchmark_name); + configure_group(&mut group, &scenario); + group.throughput(Throughput::Bytes(scenario.throughput_bytes())); + + group.bench_function("c_ffi_without_dict", |b| { + let mut compressor = zstd::bulk::Compressor::new(level.ffi_level).unwrap(); + b.iter(|| black_box(compressor.compress(&scenario.bytes).unwrap())) + }); + + group.bench_function("c_ffi_with_dict", |b| { + let mut compressor = + zstd::bulk::Compressor::with_dictionary(level.ffi_level, &dictionary).unwrap(); + b.iter(|| black_box(compressor.compress(&scenario.bytes).unwrap())) + }); + + group.finish(); + } + } +} + fn configure_group( group: &mut criterion::BenchmarkGroup<'_, M>, scenario: &Scenario, @@ -106,6 +182,19 @@ fn configure_group( } } +fn emit_memory_report( + scenario: &Scenario, + level: LevelConfig, + stage: &'static str, + rust_peak_bytes: usize, + ffi_peak_bytes: usize, +) { + println!( + "REPORT_MEM scenario={} label=\"{}\" level={} stage={} rust_peak_bytes={} ffi_peak_bytes={}", + scenario.id, scenario.label, level.name, stage, rust_peak_bytes, ffi_peak_bytes + ); +} + fn emit_report_line( scenario: &Scenario, level: LevelConfig, @@ -134,5 +223,55 @@ fn emit_report_line( ); } -criterion_group!(benches, bench_compress, bench_decompress); +fn emit_dictionary_report( + scenario: &Scenario, + level: LevelConfig, + dict_bytes: usize, + train_ms: f64, + no_dict_bytes: &[u8], + with_dict_bytes: &[u8], +) { + let input_len = scenario.len() as f64; + let no_dict_ratio = no_dict_bytes.len() as f64 / input_len; + let with_dict_ratio = with_dict_bytes.len() as f64 / input_len; + println!( + "REPORT_DICT scenario={} label=\"{}\" level={} dict_bytes={} train_ms={:.3} ffi_no_dict_bytes={} ffi_with_dict_bytes={} ffi_no_dict_ratio={:.6} ffi_with_dict_ratio={:.6}", + scenario.id, + scenario.label, + level.name, + dict_bytes, + train_ms, + no_dict_bytes.len(), + with_dict_bytes.len(), + no_dict_ratio, + with_dict_ratio + ); +} + +fn split_training_samples(source: &[u8]) -> Vec> { + let sample_size = source.len().div_ceil(16).clamp(256, 8192); + let mut samples: Vec> = source + .chunks(sample_size) + .take(64) + .filter(|chunk| chunk.len() >= 64) + .map(|chunk| chunk.to_vec()) + .collect(); + if samples.len() < 2 { + let midpoint = source.len() / 2; + let left = &source[..midpoint]; + let right = &source[midpoint..]; + if left.len() >= 64 && right.len() >= 64 { + samples = vec![left.to_vec(), right.to_vec()]; + } else { + samples = vec![source.to_vec(), source.to_vec()]; + } + } + samples +} + +fn dictionary_size_for(input_len: usize) -> usize { + input_len.div_ceil(8).clamp(256, 16 * 1024) +} + +criterion_group!(benches, bench_compress, bench_decompress, bench_dictionary); criterion_main!(benches); From 7e032ba15fa167542c392ec068c23162efa93ad6 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Fri, 27 Mar 2026 21:20:26 +0200 Subject: [PATCH 09/30] test(bench): align decompression benchmark paths - include scenario id in ratio report markdown table - reuse decoders and buffers in decompression benchmark loops - keep throughput comparison focused on decode work Refs #24 --- .github/scripts/run-benchmarks.sh | 6 +++--- zstd/benches/compare_ffi.rs | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index e86a9fe7..bc24452f 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -143,13 +143,13 @@ lines = [ "", "## Compression Ratios", "", - "| Scenario | Level | Input bytes | Rust bytes | C bytes | Rust ratio | C ratio |", - "| --- | --- | ---: | ---: | ---: | ---: | ---: |", + "| Scenario | Label | Level | Input bytes | Rust bytes | C bytes | Rust ratio | C ratio |", + "| --- | --- | --- | ---: | ---: | ---: | ---: | ---: |", ] for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])): lines.append( - f'| {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |' + f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |' ) lines.extend([ diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index f422fa56..539b44d9 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -75,18 +75,22 @@ fn bench_decompress(c: &mut Criterion) { group.bench_function("pure_rust", |b| { let mut target = vec![0u8; expected_len]; + let mut decoder = FrameDecoder::new(); b.iter(|| { - let mut decoder = FrameDecoder::new(); let written = decoder.decode_all(&ffi_compressed, &mut target).unwrap(); assert_eq!(written, expected_len); }) }); group.bench_function("c_ffi", |b| { + let mut decoder = zstd::bulk::Decompressor::new().unwrap(); + let mut output = Vec::with_capacity(expected_len); b.iter(|| { - // Intentional: zstd::decode_all represents the common high-level FFI path and - // includes allocation cost, while pure_rust isolates decode throughput. - let output = zstd::decode_all(&ffi_compressed[..]).unwrap(); + output.clear(); + let written = decoder + .decompress_to_buffer(&ffi_compressed[..], &mut output) + .unwrap(); + assert_eq!(written, expected_len); assert_eq!(output.len(), expected_len); }) }); From bc3bc2fc1b8b02069cff4cc345757e166e00d958 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 00:31:38 +0200 Subject: [PATCH 10/30] test(bench): include scenario ids in report tables - add explicit Label columns for memory and dictionary sections - render stable scenario ids instead of labels in Scenario column Refs #24 --- .github/scripts/run-benchmarks.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index bc24452f..2dcd3422 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -156,26 +156,26 @@ lines.extend([ "", "## Peak Memory Estimates", "", - "| Scenario | Level | Stage | Rust peak bytes | C peak bytes |", - "| --- | --- | --- | ---: | ---: |", + "| Scenario | Label | Level | Stage | Rust peak bytes | C peak bytes |", + "| --- | --- | --- | --- | ---: | ---: |", ]) for row in sorted(memory_rows, key=lambda item: (item["scenario"], item["level"], item["stage"])): lines.append( - f'| {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_peak_bytes"]} | {row["ffi_peak_bytes"]} |' + f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_peak_bytes"]} | {row["ffi_peak_bytes"]} |' ) lines.extend([ "", "## Dictionary Compression (C FFI)", "", - "| Scenario | Level | Dict bytes | Train ms | C bytes (no dict) | C bytes (with dict) | C ratio (no dict) | C ratio (with dict) |", - "| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |", + "| Scenario | Label | Level | Dict bytes | Train ms | C bytes (no dict) | C bytes (with dict) | C ratio (no dict) | C ratio (with dict) |", + "| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |", ]) for row in sorted(dictionary_rows, key=lambda item: (item["scenario"], item["level"])): lines.append( - f'| {row["label"]} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |' + f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |' ) lines.extend([ From 007d5231f2814711272d95b4c2fc075a9f2981ed Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 00:32:10 +0200 Subject: [PATCH 11/30] fix(bench): guard dictionary ratio division - handle zero-length scenario inputs in emit_dictionary_report - emit 0.0 ratios instead of inf/NaN for empty payload edge cases Refs #24 --- zstd/benches/compare_ffi.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 539b44d9..d3ea7ebf 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -236,8 +236,14 @@ fn emit_dictionary_report( with_dict_bytes: &[u8], ) { let input_len = scenario.len() as f64; - let no_dict_ratio = no_dict_bytes.len() as f64 / input_len; - let with_dict_ratio = with_dict_bytes.len() as f64 / input_len; + let (no_dict_ratio, with_dict_ratio) = if input_len > 0.0 { + ( + no_dict_bytes.len() as f64 / input_len, + with_dict_bytes.len() as f64 / input_len, + ) + } else { + (0.0, 0.0) + }; println!( "REPORT_DICT scenario={} label=\"{}\" level={} dict_bytes={} train_ms={:.3} ffi_no_dict_bytes={} ffi_with_dict_bytes={} ffi_no_dict_ratio={:.6} ffi_with_dict_ratio={:.6}", scenario.id, From c4e58d1b186265bd6b6824e56e2e4ec35c54e379 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 00:32:42 +0200 Subject: [PATCH 12/30] fix(bench): bound Silesia fixture loading - cap loaded fixture count and file size for predictable startup - support STRUCTURED_ZSTD_SILESIA_MAX_FILES and _MAX_FILE_BYTES overrides - emit BENCH_WARN diagnostics when limits are applied Refs #24 --- zstd/benches/support/mod.rs | 48 ++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index f97aeca3..64653e7a 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -153,21 +153,63 @@ fn repeated_log_lines(len: usize) -> Vec { } fn load_silesia_from_env() -> Vec { + const DEFAULT_MAX_FILES: usize = 12; + const DEFAULT_MAX_FILE_BYTES: usize = 64 * 1024 * 1024; let Some(dir) = env::var_os("STRUCTURED_ZSTD_SILESIA_DIR") else { return Vec::new(); }; + let max_files = env::var("STRUCTURED_ZSTD_SILESIA_MAX_FILES") + .ok() + .and_then(|value| value.parse::().ok()) + .filter(|value| *value > 0) + .unwrap_or(DEFAULT_MAX_FILES); + let max_file_bytes = env::var("STRUCTURED_ZSTD_SILESIA_MAX_FILE_BYTES") + .ok() + .and_then(|value| value.parse::().ok()) + .filter(|value| *value > 0) + .unwrap_or(DEFAULT_MAX_FILE_BYTES); let Ok(entries) = fs::read_dir(Path::new(&dir)) else { eprintln!("BENCH_WARN failed to read STRUCTURED_ZSTD_SILESIA_DIR={dir:?}"); return Vec::new(); }; + let mut paths: Vec<_> = entries + .flatten() + .map(|entry| entry.path()) + .filter(|path| path.is_file()) + .collect(); + paths.sort(); + if paths.len() > max_files { + eprintln!( + "BENCH_WARN limiting Silesia fixtures to first {} files from {} entries in {}", + max_files, + paths.len(), + Path::new(&dir).display() + ); + paths.truncate(max_files); + } + let mut scenarios = Vec::new(); - for entry in entries.flatten() { - let path = entry.path(); - if !path.is_file() { + for path in paths { + let Ok(metadata) = fs::metadata(&path) else { + eprintln!( + "BENCH_WARN failed to stat Silesia fixture {}", + path.display() + ); + continue; + }; + let file_len = metadata.len() as usize; + if file_len > max_file_bytes { + eprintln!( + "BENCH_WARN skipping Silesia fixture {} ({} bytes > max {} bytes)", + path.display(), + file_len, + max_file_bytes + ); continue; } + let Ok(bytes) = fs::read(&path) else { eprintln!( "BENCH_WARN failed to read Silesia fixture {}", From e5cdee1c65e1b64109a782f747929c4f569fd4f1 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 00:33:41 +0200 Subject: [PATCH 13/30] docs(bench): clarify memory estimates in reports - rename REPORT_MEM fields to buffer-bytes estimate names - update report section/columns to explicit input+output estimates - sync README and BENCHMARKS wording with new semantics Refs #24 --- .github/scripts/run-benchmarks.sh | 21 ++++++++++++++------- BENCHMARKS.md | 4 +++- README.md | 2 +- zstd/benches/compare_ffi.rs | 13 +++++++++---- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index 2dcd3422..27a8fa4f 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -31,7 +31,7 @@ REPORT_RE = re.compile( r'^REPORT scenario=(\S+) label="([^"]+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$' ) MEM_RE = re.compile( - r'^REPORT_MEM scenario=(\S+) label="([^"]+)" level=(\S+) stage=(\S+) rust_peak_bytes=(\d+) ffi_peak_bytes=(\d+)$' + r'^REPORT_MEM scenario=(\S+) label="([^"]+)" level=(\S+) stage=(\S+) rust_buffer_bytes_estimate=(\d+) ffi_buffer_bytes_estimate=(\d+)$' ) DICT_RE = re.compile( r'^REPORT_DICT scenario=(\S+) label="([^"]+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$' @@ -78,14 +78,21 @@ with open(raw_path) as f: mem_match = MEM_RE.match(line) if mem_match: - scenario, label, level, stage, rust_peak_bytes, ffi_peak_bytes = mem_match.groups() + ( + scenario, + label, + level, + stage, + rust_buffer_bytes_estimate, + ffi_buffer_bytes_estimate, + ) = mem_match.groups() memory_rows.append({ "scenario": scenario, "label": label, "level": level, "stage": stage, - "rust_peak_bytes": int(rust_peak_bytes), - "ffi_peak_bytes": int(ffi_peak_bytes), + "rust_buffer_bytes_estimate": int(rust_buffer_bytes_estimate), + "ffi_buffer_bytes_estimate": int(ffi_buffer_bytes_estimate), }) continue @@ -154,15 +161,15 @@ for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])): lines.extend([ "", - "## Peak Memory Estimates", + "## Buffer Size Estimates (Input + Output)", "", - "| Scenario | Label | Level | Stage | Rust peak bytes | C peak bytes |", + "| Scenario | Label | Level | Stage | Rust buffer bytes (estimate) | C buffer bytes (estimate) |", "| --- | --- | --- | --- | ---: | ---: |", ]) for row in sorted(memory_rows, key=lambda item: (item["scenario"], item["level"], item["stage"])): lines.append( - f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_peak_bytes"]} | {row["ffi_peak_bytes"]} |' + f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_buffer_bytes_estimate"]} | {row["ffi_buffer_bytes_estimate"]} |' ) lines.extend([ diff --git a/BENCHMARKS.md b/BENCHMARKS.md index 6d30dd03..c603cc1c 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -14,6 +14,8 @@ The current matrix covers: - low entropy repeated payloads (`1 MiB`) - a large structured stream (`100 MiB`) - optional Silesia corpus files when `STRUCTURED_ZSTD_SILESIA_DIR=/path/to/silesia` is set + - load is bounded by `STRUCTURED_ZSTD_SILESIA_MAX_FILES` (default `12`) and + `STRUCTURED_ZSTD_SILESIA_MAX_FILE_BYTES` (default `67108864`) The local default for the large scenario is `100 MiB`. In GitHub Actions, when `STRUCTURED_ZSTD_BENCH_LARGE_BYTES` is unset, `.github/scripts/run-benchmarks.sh` defaults it to @@ -66,7 +68,7 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/matrix/ - `benchmark-results.json` for GitHub regression tracking - `benchmark-report.md` with: - compression ratio tables (`REPORT`) - - peak memory estimate tables (`REPORT_MEM`) + - input+output buffer size estimate tables (`REPORT_MEM`) - dictionary compression tables (`REPORT_DICT`) - timing rows for all benchmark functions diff --git a/README.md b/README.md index acc117b7..63beea4c 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ When the `dict_builder` feature is enabled, the `dictionary` module can create r ## Benchmarking -Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports now include compression ratios, peak memory estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios. +Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports now include compression ratios, input+output buffer size estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios. ## Usage diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index d3ea7ebf..1e5e90d0 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -190,12 +190,17 @@ fn emit_memory_report( scenario: &Scenario, level: LevelConfig, stage: &'static str, - rust_peak_bytes: usize, - ffi_peak_bytes: usize, + rust_buffer_bytes_estimate: usize, + ffi_buffer_bytes_estimate: usize, ) { println!( - "REPORT_MEM scenario={} label=\"{}\" level={} stage={} rust_peak_bytes={} ffi_peak_bytes={}", - scenario.id, scenario.label, level.name, stage, rust_peak_bytes, ffi_peak_bytes + "REPORT_MEM scenario={} label=\"{}\" level={} stage={} rust_buffer_bytes_estimate={} ffi_buffer_bytes_estimate={}", + scenario.id, + scenario.label, + level.name, + stage, + rust_buffer_bytes_estimate, + ffi_buffer_bytes_estimate ); } From 4fd6c1160542ee5768b43beb1d216ac137ff11c3 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 09:59:39 +0200 Subject: [PATCH 14/30] perf(bench): cache benchmark scenario generation - build scenario inputs once via OnceLock - reuse cached slice across compress/decompress/dictionary benches Refs #24 --- zstd/benches/compare_ffi.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 1e5e90d0..f8384ace 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -12,12 +12,19 @@ mod support; use criterion::{Criterion, SamplingMode, Throughput, black_box, criterion_group, criterion_main}; +use std::sync::OnceLock; use std::time::{Duration, Instant}; use structured_zstd::decoding::FrameDecoder; use support::{LevelConfig, Scenario, ScenarioClass, benchmark_scenarios, supported_levels}; +static BENCHMARK_SCENARIOS: OnceLock> = OnceLock::new(); + +fn benchmark_scenarios_cached() -> &'static [Scenario] { + BENCHMARK_SCENARIOS.get_or_init(benchmark_scenarios) +} + fn bench_compress(c: &mut Criterion) { - for scenario in benchmark_scenarios() { + for scenario in benchmark_scenarios_cached().iter() { for level in supported_levels() { let rust_compressed = structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level); @@ -57,7 +64,7 @@ fn bench_compress(c: &mut Criterion) { } fn bench_decompress(c: &mut Criterion) { - for scenario in benchmark_scenarios() { + for scenario in benchmark_scenarios_cached().iter() { for level in supported_levels() { let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); let expected_len = scenario.len(); @@ -101,7 +108,7 @@ fn bench_decompress(c: &mut Criterion) { } fn bench_dictionary(c: &mut Criterion) { - for scenario in benchmark_scenarios() { + for scenario in benchmark_scenarios_cached().iter() { if !matches!(scenario.class, ScenarioClass::Small | ScenarioClass::Corpus) { continue; } From 0b2813bdd978dc4f30981ec6dd5dc7b9d1593906 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 10:00:14 +0200 Subject: [PATCH 15/30] chore(bench): drop unused stats_alloc dep - remove unused dev-dependency from zstd/Cargo.toml Refs #24 --- zstd/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/zstd/Cargo.toml b/zstd/Cargo.toml index f80c947c..4d80c255 100644 --- a/zstd/Cargo.toml +++ b/zstd/Cargo.toml @@ -31,7 +31,6 @@ alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-all [dev-dependencies] criterion = "0.5" rand = { version = "0.8.5", features = ["small_rng"] } -stats_alloc = "0.1" zstd = { version = "0.13.2", features = ["zdict_builder"] } [features] From 86f27c8bb0c8fe20fda21a95007d8fffc2434646 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 10:00:52 +0200 Subject: [PATCH 16/30] fix(bench): allow filtered runs without dict rows - downgrade missing REPORT_DICT from error to warning Refs #24 --- .github/scripts/run-benchmarks.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index 27a8fa4f..c786e438 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -137,8 +137,7 @@ if not memory_rows: sys.exit(1) if not dictionary_rows: - print("ERROR: No REPORT_DICT lines parsed; dictionary section would be empty.", file=sys.stderr) - sys.exit(1) + print("WARN: No REPORT_DICT lines parsed; dictionary section will be empty.", file=sys.stderr) with open("benchmark-results.json", "w") as f: json.dump(benchmark_results, f, indent=2) From 42cfc4648ba63bc76a4b1f708dd91e7e1999c723 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 10:01:39 +0200 Subject: [PATCH 17/30] fix(bench): avoid duplicate dict fallback samples - use single-sample fallback for tiny dictionary training inputs - emit BENCH_WARN when fallback path is used Refs #24 --- zstd/benches/compare_ffi.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index f8384ace..3a283626 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -285,7 +285,11 @@ fn split_training_samples(source: &[u8]) -> Vec> { if left.len() >= 64 && right.len() >= 64 { samples = vec![left.to_vec(), right.to_vec()]; } else { - samples = vec![source.to_vec(), source.to_vec()]; + eprintln!( + "BENCH_WARN tiny dictionary training input ({} bytes), using a single sample fallback", + source.len() + ); + samples = vec![source.to_vec()]; } } samples From bf8bba5b9d54fd8dbba18993df0a677aafa1460c Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 10:02:25 +0200 Subject: [PATCH 18/30] style(bench): add is_empty for Scenario - satisfy len_without_is_empty expectations for bench helper type Refs #24 --- zstd/benches/support/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index 64653e7a..7260a7b5 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -109,6 +109,11 @@ impl Scenario { self.bytes.len() } + #[allow(dead_code)] + pub(crate) fn is_empty(&self) -> bool { + self.bytes.is_empty() + } + pub(crate) fn throughput_bytes(&self) -> u64 { self.bytes.len() as u64 } From a3a54a4e018b60f26f6a6c7b61346f07066890be Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 10:03:44 +0200 Subject: [PATCH 19/30] fix(bench): remove needless borrows in scenario loops - pass cached scenario references directly in helper calls - keep clippy clean with OnceLock-backed scenario cache Refs #24 --- zstd/benches/compare_ffi.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 3a283626..24f40d94 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -29,9 +29,9 @@ fn bench_compress(c: &mut Criterion) { let rust_compressed = structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level); let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); - emit_report_line(&scenario, level, &rust_compressed, &ffi_compressed); + emit_report_line(scenario, level, &rust_compressed, &ffi_compressed); emit_memory_report( - &scenario, + scenario, level, "compress", scenario.len() + rust_compressed.len(), @@ -40,7 +40,7 @@ fn bench_compress(c: &mut Criterion) { let benchmark_name = format!("compress/{}/{}/{}", level.name, scenario.id, "matrix"); let mut group = c.benchmark_group(benchmark_name); - configure_group(&mut group, &scenario); + configure_group(&mut group, scenario); group.throughput(Throughput::Bytes(scenario.throughput_bytes())); group.bench_function("pure_rust", |b| { @@ -69,7 +69,7 @@ fn bench_decompress(c: &mut Criterion) { let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); let expected_len = scenario.len(); emit_memory_report( - &scenario, + scenario, level, "decompress", ffi_compressed.len() + expected_len, @@ -77,7 +77,7 @@ fn bench_decompress(c: &mut Criterion) { ); let benchmark_name = format!("decompress/{}/{}/{}", level.name, scenario.id, "matrix"); let mut group = c.benchmark_group(benchmark_name); - configure_group(&mut group, &scenario); + configure_group(&mut group, scenario); group.throughput(Throughput::Bytes(scenario.throughput_bytes())); group.bench_function("pure_rust", |b| { @@ -139,7 +139,7 @@ fn bench_dictionary(c: &mut Criterion) { let no_dict_bytes = no_dict.compress(&scenario.bytes).unwrap(); let with_dict_bytes = with_dict.compress(&scenario.bytes).unwrap(); emit_dictionary_report( - &scenario, + scenario, level, dictionary.len(), train_ms, @@ -150,7 +150,7 @@ fn bench_dictionary(c: &mut Criterion) { let benchmark_name = format!("compress-dict/{}/{}/{}", level.name, scenario.id, "matrix"); let mut group = c.benchmark_group(benchmark_name); - configure_group(&mut group, &scenario); + configure_group(&mut group, scenario); group.throughput(Throughput::Bytes(scenario.throughput_bytes())); group.bench_function("c_ffi_without_dict", |b| { From c99d33bb692f0d82607b175c24f01c291686a149 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 10:38:05 +0200 Subject: [PATCH 20/30] fix(bench): sanitize Silesia scenario report fields - normalize Silesia-derived scenario ids to safe ASCII tokens - escape report labels before emitting REPORT/REPORT_MEM/REPORT_DICT lines Refs #24 --- zstd/benches/compare_ffi.rs | 13 ++++++++++--- zstd/benches/support/mod.rs | 19 ++++++++++++++++++- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 24f40d94..91f9c598 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -200,10 +200,11 @@ fn emit_memory_report( rust_buffer_bytes_estimate: usize, ffi_buffer_bytes_estimate: usize, ) { + let escaped_label = escape_report_label(&scenario.label); println!( "REPORT_MEM scenario={} label=\"{}\" level={} stage={} rust_buffer_bytes_estimate={} ffi_buffer_bytes_estimate={}", scenario.id, - scenario.label, + escaped_label, level.name, stage, rust_buffer_bytes_estimate, @@ -218,6 +219,7 @@ fn emit_report_line( ffi_compressed: &[u8], ) { let input_len = scenario.len() as f64; + let escaped_label = escape_report_label(&scenario.label); let (rust_ratio, ffi_ratio) = if input_len > 0.0 { ( rust_compressed.len() as f64 / input_len, @@ -229,7 +231,7 @@ fn emit_report_line( println!( "REPORT scenario={} label=\"{}\" level={} input_bytes={} rust_bytes={} ffi_bytes={} rust_ratio={:.6} ffi_ratio={:.6}", scenario.id, - scenario.label, + escaped_label, level.name, scenario.len(), rust_compressed.len(), @@ -248,6 +250,7 @@ fn emit_dictionary_report( with_dict_bytes: &[u8], ) { let input_len = scenario.len() as f64; + let escaped_label = escape_report_label(&scenario.label); let (no_dict_ratio, with_dict_ratio) = if input_len > 0.0 { ( no_dict_bytes.len() as f64 / input_len, @@ -259,7 +262,7 @@ fn emit_dictionary_report( println!( "REPORT_DICT scenario={} label=\"{}\" level={} dict_bytes={} train_ms={:.3} ffi_no_dict_bytes={} ffi_with_dict_bytes={} ffi_no_dict_ratio={:.6} ffi_with_dict_ratio={:.6}", scenario.id, - scenario.label, + escaped_label, level.name, dict_bytes, train_ms, @@ -299,5 +302,9 @@ fn dictionary_size_for(input_len: usize) -> usize { input_len.div_ceil(8).clamp(256, 16 * 1024) } +fn escape_report_label(label: &str) -> String { + label.replace('\\', "\\\\").replace('\"', "\\\"") +} + criterion_group!(benches, bench_compress, bench_decompress, bench_dictionary); criterion_main!(benches); diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index 7260a7b5..f0fc695d 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -232,8 +232,9 @@ fn load_silesia_from_env() -> Vec { let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) else { continue; }; + let scenario_stem = sanitize_scenario_stem(stem); scenarios.push(Scenario::new( - format!("silesia-{stem}"), + format!("silesia-{scenario_stem}"), format!("Silesia corpus: {stem}"), bytes, ScenarioClass::Silesia, @@ -251,3 +252,19 @@ fn large_stream_len() -> usize { .filter(|value| *value > 0) .unwrap_or(100 * 1024 * 1024) } + +fn sanitize_scenario_stem(stem: &str) -> String { + let mut sanitized = String::with_capacity(stem.len()); + for ch in stem.chars() { + if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-') { + sanitized.push(ch); + } else { + sanitized.push('_'); + } + } + if sanitized.is_empty() { + "unnamed".to_string() + } else { + sanitized + } +} From c9639b5d4b758d258a3784749b377e7b1ea5e08a Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 10:38:45 +0200 Subject: [PATCH 21/30] perf(bench): bound Silesia dir walk by max_files - stop collecting file paths once fixture limit is reached - keep deterministic ordering by sorting only the bounded subset Refs #24 --- zstd/benches/support/mod.rs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index f0fc695d..872a53b6 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -179,20 +179,26 @@ fn load_silesia_from_env() -> Vec { return Vec::new(); }; - let mut paths: Vec<_> = entries - .flatten() - .map(|entry| entry.path()) - .filter(|path| path.is_file()) - .collect(); + let mut paths = Vec::with_capacity(max_files); + let mut hit_limit = false; + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_file() { + continue; + } + if paths.len() >= max_files { + hit_limit = true; + break; + } + paths.push(path); + } paths.sort(); - if paths.len() > max_files { + if hit_limit { eprintln!( - "BENCH_WARN limiting Silesia fixtures to first {} files from {} entries in {}", + "BENCH_WARN limiting Silesia fixtures to first {} discovered files in {}", max_files, - paths.len(), Path::new(&dir).display() ); - paths.truncate(max_files); } let mut scenarios = Vec::new(); From d63a2b8b9b822472adaa80973610a6307de98846 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 10:39:29 +0200 Subject: [PATCH 22/30] build(bench): ship decode corpus fixture in crate - keep include_bytes corpus scenario available in packaged bench sources - remove decodecorpus_files from zstd crate exclude list Refs #24 --- zstd/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zstd/Cargo.toml b/zstd/Cargo.toml index 4d80c255..82a63edb 100644 --- a/zstd/Cargo.toml +++ b/zstd/Cargo.toml @@ -11,7 +11,7 @@ license = "Apache-2.0" homepage = "https://github.com/structured-world/structured-zstd" repository = "https://github.com/structured-world/structured-zstd" description = "Pure Rust zstd implementation — managed fork of ruzstd. Dictionary decompression, no FFI." -exclude = ["decodecorpus_files/*", "dict_tests/*", "fuzz_decodecorpus/*"] +exclude = ["dict_tests/*", "fuzz_decodecorpus/*"] # Package metadata points at a crate-local symlink so the packaged crate and repo root README stay in sync. readme = "README.md" keywords = ["zstd", "zstandard", "decompression", "compression", "pure-rust"] From 60c4ec41bdd168649006e4d51df887faa1f4b003 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 11:08:26 +0200 Subject: [PATCH 23/30] fix(bench): avoid packaging decode corpus fixtures - restore decodecorpus_files exclusion for crate packaging size - load corpus sample at runtime with synthetic fallback when fixture is absent Refs #24 --- zstd/Cargo.toml | 2 +- zstd/benches/support/mod.rs | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/zstd/Cargo.toml b/zstd/Cargo.toml index 82a63edb..0d435fc7 100644 --- a/zstd/Cargo.toml +++ b/zstd/Cargo.toml @@ -11,7 +11,7 @@ license = "Apache-2.0" homepage = "https://github.com/structured-world/structured-zstd" repository = "https://github.com/structured-world/structured-zstd" description = "Pure Rust zstd implementation — managed fork of ruzstd. Dictionary decompression, no FFI." -exclude = ["dict_tests/*", "fuzz_decodecorpus/*"] +exclude = ["dict_tests/*", "fuzz_decodecorpus/*", "decodecorpus_files/*"] # Package metadata points at a crate-local symlink so the packaged crate and repo root README stay in sync. readme = "README.md" keywords = ["zstd", "zstandard", "decompression", "compression", "pure-rust"] diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index 872a53b6..ae51498f 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -48,7 +48,7 @@ pub(crate) fn benchmark_scenarios() -> Vec { Scenario::new( "decodecorpus-z000033", "Repo decode corpus sample", - include_bytes!("../../decodecorpus_files/z000033").to_vec(), + load_decode_corpus_sample(), ScenarioClass::Corpus, ), Scenario::new( @@ -259,6 +259,38 @@ fn large_stream_len() -> usize { .unwrap_or(100 * 1024 * 1024) } +fn load_decode_corpus_sample() -> Vec { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").ok(); + let fixture_path = manifest_dir + .as_deref() + .map(Path::new) + .map(|dir| dir.join("decodecorpus_files/z000033")); + + if let Some(path) = fixture_path { + match fs::read(&path) { + Ok(bytes) if !bytes.is_empty() => return bytes, + Ok(_) => { + eprintln!( + "BENCH_WARN decode corpus fixture is empty at {}, using synthetic fallback", + path.display() + ); + } + Err(err) => { + eprintln!( + "BENCH_WARN failed to read decode corpus fixture at {}: {}. Using synthetic fallback", + path.display(), + err + ); + } + } + } else { + eprintln!("BENCH_WARN CARGO_MANIFEST_DIR is not set, using synthetic decode corpus fallback"); + } + + // Keep the benchmark matrix runnable from packaged sources where fixture files may be omitted. + repeated_log_lines(1024 * 1024) +} + fn sanitize_scenario_stem(stem: &str) -> String { let mut sanitized = String::with_capacity(stem.len()); for ch in stem.chars() { From 26ad87b7f4a12066b876f62377610e0d91ba6dee Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 11:09:06 +0200 Subject: [PATCH 24/30] fix(bench): parse escaped labels in report script - accept backslash-escaped quotes in REPORT label regexes - unescape parsed labels before markdown rendering Refs #24 --- .github/scripts/run-benchmarks.sh | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index c786e438..ea8097bb 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -28,15 +28,28 @@ import sys BENCH_RE = re.compile(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter") REPORT_RE = re.compile( - r'^REPORT scenario=(\S+) label="([^"]+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$' + r'^REPORT scenario=(\S+) label="((?:[^"\\]|\\.)+)" level=(\S+) input_bytes=(\d+) rust_bytes=(\d+) ffi_bytes=(\d+) rust_ratio=([0-9.]+) ffi_ratio=([0-9.]+)$' ) MEM_RE = re.compile( - r'^REPORT_MEM scenario=(\S+) label="([^"]+)" level=(\S+) stage=(\S+) rust_buffer_bytes_estimate=(\d+) ffi_buffer_bytes_estimate=(\d+)$' + r'^REPORT_MEM scenario=(\S+) label="((?:[^"\\]|\\.)+)" level=(\S+) stage=(\S+) rust_buffer_bytes_estimate=(\d+) ffi_buffer_bytes_estimate=(\d+)$' ) DICT_RE = re.compile( - r'^REPORT_DICT scenario=(\S+) label="([^"]+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$' + r'^REPORT_DICT scenario=(\S+) label="((?:[^"\\]|\\.)+)" level=(\S+) dict_bytes=(\d+) train_ms=([0-9.]+) ffi_no_dict_bytes=(\d+) ffi_with_dict_bytes=(\d+) ffi_no_dict_ratio=([0-9.]+) ffi_with_dict_ratio=([0-9.]+)$' ) +def unescape_report_label(value): + output = [] + i = 0 + while i < len(value): + ch = value[i] + if ch == "\\" and i + 1 < len(value): + i += 1 + output.append(value[i]) + else: + output.append(ch) + i += 1 + return "".join(output) + benchmark_results = [] timings = [] ratios = [] @@ -64,6 +77,7 @@ with open(raw_path) as f: report_match = REPORT_RE.match(line) if report_match: scenario, label, level, input_bytes, rust_bytes, ffi_bytes, rust_ratio, ffi_ratio = report_match.groups() + label = unescape_report_label(label) ratios.append({ "scenario": scenario, "label": label, @@ -86,6 +100,7 @@ with open(raw_path) as f: rust_buffer_bytes_estimate, ffi_buffer_bytes_estimate, ) = mem_match.groups() + label = unescape_report_label(label) memory_rows.append({ "scenario": scenario, "label": label, @@ -109,6 +124,7 @@ with open(raw_path) as f: ffi_no_dict_ratio, ffi_with_dict_ratio, ) = dict_match.groups() + label = unescape_report_label(label) dictionary_rows.append({ "scenario": scenario, "label": label, From 6994c8af1b88747f2d537f8d923bbfa6ba5123e5 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 11:09:33 +0200 Subject: [PATCH 25/30] fix(bench): pass criterion filter correctly to flamegraph - remove unsupported --bench flag from benchmark binary arguments Refs #24 --- scripts/bench-flamegraph.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/bench-flamegraph.sh b/scripts/bench-flamegraph.sh index 47cd196e..fc48e475 100755 --- a/scripts/bench-flamegraph.sh +++ b/scripts/bench-flamegraph.sh @@ -21,7 +21,6 @@ if cargo flamegraph \ ${EXTRA_FLAMEGRAPH_ARGS[@]+"${EXTRA_FLAMEGRAPH_ARGS[@]}"} \ --output "$OUTPUT_DIR/${BENCH_FILTER//\//_}.svg" \ -- \ - --bench \ "$BENCH_FILTER"; then : else From c42f34c1beb8c3ee25926d04506a404b7b1694ba Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 11:10:17 +0200 Subject: [PATCH 26/30] style(bench): format runtime corpus loader - apply rustfmt after decodecorpus runtime load changes Refs #24 --- zstd/benches/support/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index ae51498f..46475adf 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -284,7 +284,9 @@ fn load_decode_corpus_sample() -> Vec { } } } else { - eprintln!("BENCH_WARN CARGO_MANIFEST_DIR is not set, using synthetic decode corpus fallback"); + eprintln!( + "BENCH_WARN CARGO_MANIFEST_DIR is not set, using synthetic decode corpus fallback" + ); } // Keep the benchmark matrix runnable from packaged sources where fixture files may be omitted. From 63144cc8767e5f7d04a67dccdab9306740345fef Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 14:24:39 +0200 Subject: [PATCH 27/30] fix(bench): stabilize corpus fallback scenarios - use distinct scenario id/label when decode corpus fixture is unavailable - collect, sort, and truncate Silesia files deterministically --- zstd/benches/support/mod.rs | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index 46475adf..8307cef9 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -45,12 +45,7 @@ pub(crate) fn benchmark_scenarios() -> Vec { repeated_log_lines(4 * 1024), ScenarioClass::Small, ), - Scenario::new( - "decodecorpus-z000033", - "Repo decode corpus sample", - load_decode_corpus_sample(), - ScenarioClass::Corpus, - ), + load_decode_corpus_scenario(), Scenario::new( "high-entropy-1m", "High entropy random payload (1 MiB)", @@ -179,26 +174,22 @@ fn load_silesia_from_env() -> Vec { return Vec::new(); }; - let mut paths = Vec::with_capacity(max_files); - let mut hit_limit = false; + let mut paths = Vec::new(); for entry in entries.flatten() { let path = entry.path(); if !path.is_file() { continue; } - if paths.len() >= max_files { - hit_limit = true; - break; - } paths.push(path); } paths.sort(); - if hit_limit { + if paths.len() > max_files { eprintln!( - "BENCH_WARN limiting Silesia fixtures to first {} discovered files in {}", + "BENCH_WARN limiting Silesia fixtures to first {} sorted files in {}", max_files, Path::new(&dir).display() ); + paths.truncate(max_files); } let mut scenarios = Vec::new(); @@ -259,7 +250,12 @@ fn large_stream_len() -> usize { .unwrap_or(100 * 1024 * 1024) } -fn load_decode_corpus_sample() -> Vec { +fn load_decode_corpus_scenario() -> Scenario { + const REAL_ID: &str = "decodecorpus-z000033"; + const REAL_LABEL: &str = "Repo decode corpus sample"; + const FALLBACK_ID: &str = "decodecorpus-synthetic-1m"; + const FALLBACK_LABEL: &str = "Synthetic decode corpus fallback (1 MiB)"; + let manifest_dir = env::var("CARGO_MANIFEST_DIR").ok(); let fixture_path = manifest_dir .as_deref() @@ -268,7 +264,9 @@ fn load_decode_corpus_sample() -> Vec { if let Some(path) = fixture_path { match fs::read(&path) { - Ok(bytes) if !bytes.is_empty() => return bytes, + Ok(bytes) if !bytes.is_empty() => { + return Scenario::new(REAL_ID, REAL_LABEL, bytes, ScenarioClass::Corpus); + } Ok(_) => { eprintln!( "BENCH_WARN decode corpus fixture is empty at {}, using synthetic fallback", @@ -290,7 +288,12 @@ fn load_decode_corpus_sample() -> Vec { } // Keep the benchmark matrix runnable from packaged sources where fixture files may be omitted. - repeated_log_lines(1024 * 1024) + Scenario::new( + FALLBACK_ID, + FALLBACK_LABEL, + repeated_log_lines(1024 * 1024), + ScenarioClass::Corpus, + ) } fn sanitize_scenario_stem(stem: &str) -> String { From dc11bd62c7fc745e96f61ddba7cc1d55bc137369 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 14:36:57 +0200 Subject: [PATCH 28/30] fix(bench): gate report precompute and escape labels - emit REPORT* lines only when STRUCTURED_ZSTD_EMIT_REPORT is enabled - set report env var in run-benchmarks workflow - escape markdown table cell labels in benchmark-report.md generation --- .github/scripts/run-benchmarks.sh | 15 +++++-- zstd/benches/compare_ffi.rs | 70 +++++++++++++++++++------------ 2 files changed, 56 insertions(+), 29 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index ea8097bb..4374e3a4 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -16,6 +16,7 @@ fi BENCH_RAW_FILE="$(mktemp -t structured-zstd-bench-raw.XXXXXX)" trap 'rm -f "$BENCH_RAW_FILE"' EXIT +export STRUCTURED_ZSTD_EMIT_REPORT=1 cargo bench --bench compare_ffi -p structured-zstd -- --output-format bencher | tee "$BENCH_RAW_FILE" echo "Parsing results..." >&2 @@ -50,6 +51,11 @@ def unescape_report_label(value): i += 1 return "".join(output) +def markdown_table_escape(value): + escaped = value.replace("\\", "\\\\") + escaped = escaped.replace("|", "\\|") + return escaped.replace("\n", "
") + benchmark_results = [] timings = [] ratios = [] @@ -170,8 +176,9 @@ lines = [ ] for row in sorted(ratios, key=lambda item: (item["scenario"], item["level"])): + label = markdown_table_escape(row["label"]) lines.append( - f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |' + f'| {row["scenario"]} | {label} | {row["level"]} | {row["input_bytes"]} | {row["rust_bytes"]} | {row["ffi_bytes"]} | {row["rust_ratio"]:.4f} | {row["ffi_ratio"]:.4f} |' ) lines.extend([ @@ -183,8 +190,9 @@ lines.extend([ ]) for row in sorted(memory_rows, key=lambda item: (item["scenario"], item["level"], item["stage"])): + label = markdown_table_escape(row["label"]) lines.append( - f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["stage"]} | {row["rust_buffer_bytes_estimate"]} | {row["ffi_buffer_bytes_estimate"]} |' + f'| {row["scenario"]} | {label} | {row["level"]} | {row["stage"]} | {row["rust_buffer_bytes_estimate"]} | {row["ffi_buffer_bytes_estimate"]} |' ) lines.extend([ @@ -196,8 +204,9 @@ lines.extend([ ]) for row in sorted(dictionary_rows, key=lambda item: (item["scenario"], item["level"])): + label = markdown_table_escape(row["label"]) lines.append( - f'| {row["scenario"]} | {row["label"]} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |' + f'| {row["scenario"]} | {label} | {row["level"]} | {row["dict_bytes"]} | {row["train_ms"]:.3f} | {row["ffi_no_dict_bytes"]} | {row["ffi_with_dict_bytes"]} | {row["ffi_no_dict_ratio"]:.4f} | {row["ffi_with_dict_ratio"]:.4f} |' ) lines.extend([ diff --git a/zstd/benches/compare_ffi.rs b/zstd/benches/compare_ffi.rs index 91f9c598..36aa06b0 100644 --- a/zstd/benches/compare_ffi.rs +++ b/zstd/benches/compare_ffi.rs @@ -23,20 +23,32 @@ fn benchmark_scenarios_cached() -> &'static [Scenario] { BENCHMARK_SCENARIOS.get_or_init(benchmark_scenarios) } +fn emit_reports_enabled() -> bool { + std::env::var("STRUCTURED_ZSTD_EMIT_REPORT") + .map(|value| matches!(value.as_str(), "1" | "true" | "TRUE")) + .unwrap_or(false) +} + fn bench_compress(c: &mut Criterion) { + let emit_reports = emit_reports_enabled(); for scenario in benchmark_scenarios_cached().iter() { for level in supported_levels() { - let rust_compressed = - structured_zstd::encoding::compress_to_vec(&scenario.bytes[..], level.rust_level); - let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); - emit_report_line(scenario, level, &rust_compressed, &ffi_compressed); - emit_memory_report( - scenario, - level, - "compress", - scenario.len() + rust_compressed.len(), - scenario.len() + ffi_compressed.len(), - ); + if emit_reports { + let rust_compressed = structured_zstd::encoding::compress_to_vec( + &scenario.bytes[..], + level.rust_level, + ); + let ffi_compressed = + zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); + emit_report_line(scenario, level, &rust_compressed, &ffi_compressed); + emit_memory_report( + scenario, + level, + "compress", + scenario.len() + rust_compressed.len(), + scenario.len() + ffi_compressed.len(), + ); + } let benchmark_name = format!("compress/{}/{}/{}", level.name, scenario.id, "matrix"); let mut group = c.benchmark_group(benchmark_name); @@ -64,17 +76,20 @@ fn bench_compress(c: &mut Criterion) { } fn bench_decompress(c: &mut Criterion) { + let emit_reports = emit_reports_enabled(); for scenario in benchmark_scenarios_cached().iter() { for level in supported_levels() { let ffi_compressed = zstd::encode_all(&scenario.bytes[..], level.ffi_level).unwrap(); let expected_len = scenario.len(); - emit_memory_report( - scenario, - level, - "decompress", - ffi_compressed.len() + expected_len, - ffi_compressed.len() + expected_len, - ); + if emit_reports { + emit_memory_report( + scenario, + level, + "decompress", + ffi_compressed.len() + expected_len, + ffi_compressed.len() + expected_len, + ); + } let benchmark_name = format!("decompress/{}/{}/{}", level.name, scenario.id, "matrix"); let mut group = c.benchmark_group(benchmark_name); configure_group(&mut group, scenario); @@ -108,6 +123,7 @@ fn bench_decompress(c: &mut Criterion) { } fn bench_dictionary(c: &mut Criterion) { + let emit_reports = emit_reports_enabled(); for scenario in benchmark_scenarios_cached().iter() { if !matches!(scenario.class, ScenarioClass::Small | ScenarioClass::Corpus) { continue; @@ -138,14 +154,16 @@ fn bench_dictionary(c: &mut Criterion) { zstd::bulk::Compressor::with_dictionary(level.ffi_level, &dictionary).unwrap(); let no_dict_bytes = no_dict.compress(&scenario.bytes).unwrap(); let with_dict_bytes = with_dict.compress(&scenario.bytes).unwrap(); - emit_dictionary_report( - scenario, - level, - dictionary.len(), - train_ms, - &no_dict_bytes, - &with_dict_bytes, - ); + if emit_reports { + emit_dictionary_report( + scenario, + level, + dictionary.len(), + train_ms, + &no_dict_bytes, + &with_dict_bytes, + ); + } let benchmark_name = format!("compress-dict/{}/{}/{}", level.name, scenario.id, "matrix"); From 94f1c6df66979919e3c27b660b8618fbacc1a02e Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 14:38:33 +0200 Subject: [PATCH 29/30] fix(bench): harden silesia fixture identity and size checks - compare metadata.len() against max size in u64 space - derive Silesia scenario ids from full file names - append stable numeric suffix on id collisions --- zstd/benches/support/mod.rs | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index 8307cef9..1a1cf316 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -1,5 +1,5 @@ use rand::{RngCore, SeedableRng, rngs::SmallRng}; -use std::{env, fs, path::Path}; +use std::{collections::HashSet, env, fs, path::Path}; use structured_zstd::encoding::CompressionLevel; pub(crate) struct Scenario { @@ -193,6 +193,7 @@ fn load_silesia_from_env() -> Vec { } let mut scenarios = Vec::new(); + let mut seen_silesia_ids = HashSet::new(); for path in paths { let Ok(metadata) = fs::metadata(&path) else { eprintln!( @@ -201,8 +202,8 @@ fn load_silesia_from_env() -> Vec { ); continue; }; - let file_len = metadata.len() as usize; - if file_len > max_file_bytes { + let file_len = metadata.len(); + if file_len > max_file_bytes as u64 { eprintln!( "BENCH_WARN skipping Silesia fixture {} ({} bytes > max {} bytes)", path.display(), @@ -226,13 +227,15 @@ fn load_silesia_from_env() -> Vec { ); continue; } - let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) else { + let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else { continue; }; - let scenario_stem = sanitize_scenario_stem(stem); + let scenario_stem = sanitize_scenario_stem(file_name); + let scenario_id = + dedupe_scenario_id(format!("silesia-{scenario_stem}"), &mut seen_silesia_ids); scenarios.push(Scenario::new( - format!("silesia-{scenario_stem}"), - format!("Silesia corpus: {stem}"), + scenario_id, + format!("Silesia corpus: {file_name}"), bytes, ScenarioClass::Silesia, )); @@ -311,3 +314,18 @@ fn sanitize_scenario_stem(stem: &str) -> String { sanitized } } + +fn dedupe_scenario_id(base_id: String, seen_ids: &mut HashSet) -> String { + if seen_ids.insert(base_id.clone()) { + return base_id; + } + + let mut suffix = 2usize; + loop { + let candidate = format!("{base_id}-{suffix}"); + if seen_ids.insert(candidate.clone()) { + return candidate; + } + suffix += 1; + } +} From c2a298893b12fd2d325a40bc3101ce2cb70b630e Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 28 Mar 2026 15:17:08 +0200 Subject: [PATCH 30/30] fix(bench): tighten label escaping and id dedupe guard - expand markdown table escaping for benchmark labels - bound scenario id suffix search and panic deterministically on exhaustion --- .github/scripts/run-benchmarks.sh | 11 ++++++++++- zstd/benches/support/mod.rs | 11 ++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index 4374e3a4..0d235420 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -52,8 +52,17 @@ def unescape_report_label(value): return "".join(output) def markdown_table_escape(value): - escaped = value.replace("\\", "\\\\") + escaped = value.strip() + escaped = escaped.replace("\\", "\\\\") escaped = escaped.replace("|", "\\|") + escaped = escaped.replace("`", "\\`") + escaped = escaped.replace("[", "\\[") + escaped = escaped.replace("]", "\\]") + escaped = escaped.replace("*", "\\*") + escaped = escaped.replace("_", "\\_") + escaped = escaped.replace("<", "<") + escaped = escaped.replace(">", ">") + escaped = escaped.replace("%", "%") return escaped.replace("\n", "
") benchmark_results = [] diff --git a/zstd/benches/support/mod.rs b/zstd/benches/support/mod.rs index 1a1cf316..c8906822 100644 --- a/zstd/benches/support/mod.rs +++ b/zstd/benches/support/mod.rs @@ -316,16 +316,21 @@ fn sanitize_scenario_stem(stem: &str) -> String { } fn dedupe_scenario_id(base_id: String, seen_ids: &mut HashSet) -> String { + const MAX_SUFFIX: usize = 1_000_000; + if seen_ids.insert(base_id.clone()) { return base_id; } - let mut suffix = 2usize; - loop { + for suffix in 2..=MAX_SUFFIX { let candidate = format!("{base_id}-{suffix}"); if seen_ids.insert(candidate.clone()) { return candidate; } - suffix += 1; } + + panic!( + "failed to allocate unique scenario id for base '{}' after {} attempts", + base_id, MAX_SUFFIX + ); }