diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh index 0d235420..a467b2a1 100755 --- a/.github/scripts/run-benchmarks.sh +++ b/.github/scripts/run-benchmarks.sh @@ -26,6 +26,7 @@ import json import os import re import sys +from collections import defaultdict BENCH_RE = re.compile(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter") REPORT_RE = re.compile( @@ -70,8 +71,72 @@ timings = [] ratios = [] memory_rows = [] dictionary_rows = [] +timing_rows = [] +scenario_input_bytes = {} raw_path = os.environ["BENCH_RAW_FILE"] +DELTA_LOW = 0.99 +DELTA_HIGH = 1.05 + +def parse_benchmark_name(name): + parts = name.split("/") + if len(parts) == 5 and parts[0] == "compress" and parts[3] == "matrix": + return { + "stage": "compress", + "level": parts[1], + "scenario": parts[2], + "source": None, + "implementation": parts[4], + } + if len(parts) == 6 and parts[0] == "decompress" and parts[4] == "matrix": + return { + "stage": "decompress", + "level": parts[1], + "scenario": parts[2], + "source": parts[3], + "implementation": parts[5], + } + if len(parts) == 5 and parts[0] == "compress-dict" and parts[3] == "matrix": + return { + "stage": "compress-dict", + "level": parts[1], + "scenario": parts[2], + "source": None, + "implementation": parts[4], + } + raise ValueError(f"Unsupported benchmark name format: {name} (parts={parts})") + +def canonical_key(stage, scenario, level, source): + params = [f"stage={stage}", f"level={level}"] + if source: + params.append(f"source={source}") + return f"{scenario} + {', '.join(params)}" + +def normalize_impl(impl): + if impl == "pure_rust": + return "rust" + if impl == "c_ffi": + return "ffi" + return impl + +def classify_ratio_delta(delta): + if delta is None: + return "insufficient-data" + if delta < DELTA_LOW: + return "rust_better_smaller" + if delta <= DELTA_HIGH: + return "near_parity" + return "rust_worse_larger" + +def classify_speed_delta(delta): + if delta is None: + return "insufficient-data" + if delta < DELTA_LOW: + return "rust_slower" + if delta <= DELTA_HIGH: + return "near_parity" + return "rust_faster" + with open(raw_path) as f: for raw_line in f: line = raw_line.strip() @@ -87,6 +152,16 @@ with open(raw_path) as f: "value": round(ms, 3), }) timings.append((name, ms)) + parsed = parse_benchmark_name(name) + timing_rows.append({ + "name": name, + "stage": parsed["stage"], + "level": parsed["level"], + "scenario": parsed["scenario"], + "source": parsed["source"], + "implementation": normalize_impl(parsed["implementation"]), + "ms_per_iter": ms, + }) continue report_match = REPORT_RE.match(line) @@ -103,6 +178,7 @@ with open(raw_path) as f: "rust_ratio": float(rust_ratio), "ffi_ratio": float(ffi_ratio), }) + scenario_input_bytes[scenario] = int(input_bytes) continue mem_match = MEM_RE.match(line) @@ -173,6 +249,133 @@ if not dictionary_rows: with open("benchmark-results.json", "w") as f: json.dump(benchmark_results, f, indent=2) +ratio_index = {} +for row in ratios: + key = canonical_key("compress", row["scenario"], row["level"], None) + ratio_delta = None + if row["ffi_ratio"] > 0.0: + ratio_delta = row["rust_ratio"] / row["ffi_ratio"] + ratio_index[key] = { + "meta": { + "stage": "compress", + "scenario": row["scenario"], + "level": row["level"], + "source": None, + }, + "rust_ratio": row["rust_ratio"], + "ffi_ratio": row["ffi_ratio"], + "delta": ratio_delta, + "status": classify_ratio_delta(ratio_delta), + } + +speed_index = defaultdict(dict) +key_meta = {} +for row in timing_rows: + key = canonical_key(row["stage"], row["scenario"], row["level"], row["source"]) + key_meta[key] = { + "stage": row["stage"], + "scenario": row["scenario"], + "level": row["level"], + "source": row["source"], + } + impl = row["implementation"] + speed_index[key][impl] = { + "name": row["name"], + "ms_per_iter": row["ms_per_iter"], + } + +delta_rows = [] +all_keys = sorted(set(key_meta.keys()) | set(ratio_index.keys())) +for key in all_keys: + ratio_pack = ratio_index.get( + key, + { + "meta": None, + "rust_ratio": None, + "ffi_ratio": None, + "delta": None, + "status": "insufficient-data", + }, + ) + meta = key_meta.get(key) or ratio_pack["meta"] + stage = meta["stage"] if meta else "compress" + scenario = meta["scenario"] if meta else key.split(" + ")[0] + level = meta["level"] if meta else "unknown" + source = meta["source"] if meta else None + input_bytes = scenario_input_bytes.get(scenario) + + speed_series = {} + for impl_name, impl_row in speed_index.get(key, {}).items(): + ms_value = impl_row["ms_per_iter"] + bps_value = None + if input_bytes is not None and ms_value is not None and ms_value > 0.0: + bps_value = input_bytes / (ms_value / 1000.0) + speed_series[impl_name] = { + "benchmark_name": impl_row["name"], + "ms_per_iter": ms_value, + "bytes_per_sec": bps_value, + } + + rust_timing = speed_series.get("rust") + ffi_timing = speed_series.get("ffi") + rust_ms = rust_timing["ms_per_iter"] if rust_timing else None + ffi_ms = ffi_timing["ms_per_iter"] if ffi_timing else None + rust_bps = rust_timing["bytes_per_sec"] if rust_timing else None + ffi_bps = ffi_timing["bytes_per_sec"] if ffi_timing else None + speed_delta = ( + rust_bps / ffi_bps + if (rust_bps is not None and ffi_bps is not None and ffi_bps > 0.0) + else None + ) + + has_comparable_ratio = ( + ratio_pack["rust_ratio"] is not None and ratio_pack["ffi_ratio"] is not None + ) + has_comparable_speed = rust_timing is not None and ffi_timing is not None + if not has_comparable_ratio and not has_comparable_speed: + continue + + delta_rows.append( + { + "key": key, + "scenario": scenario, + "params": { + "stage": stage, + "level": level, + "source": source, + }, + "input_bytes": input_bytes, + "ratio": { + "rust": ratio_pack["rust_ratio"], + "ffi": ratio_pack["ffi_ratio"], + "delta_rust_over_ffi": ratio_pack["delta"], + "status": ratio_pack["status"], + "reference_band": { + "delta_low": DELTA_LOW, + "delta_high": DELTA_HIGH, + }, + "interpretation": "delta<1 means Rust compressed output smaller than FFI; delta>1 means larger", + }, + "speed": { + "series": speed_series, + "rust_ms_per_iter": rust_ms, + "ffi_ms_per_iter": ffi_ms, + "rust_bytes_per_sec": rust_bps, + "ffi_bytes_per_sec": ffi_bps, + "delta_rust_over_ffi": speed_delta, + "status": classify_speed_delta(speed_delta), + "reference_band": { + "delta_low": DELTA_LOW, + "delta_high": DELTA_HIGH, + }, + "interpretation": "delta>1 means Rust faster than FFI; delta<1 means slower", + }, + } + ) + +with open("benchmark-delta.json", "w") as f: + json.dump(delta_rows, f, indent=2) + lines = [ "# Benchmark Report", "", @@ -232,8 +435,135 @@ for name, ms in sorted(timings): with open("benchmark-report.md", "w") as f: f.write("\n".join(lines) + "\n") +delta_lines = [ + "# Benchmark Delta Report", + "", + "Generated by `.github/scripts/run-benchmarks.sh` from `cargo bench --bench compare_ffi`.", + "", + "## Ratio pack", + "", + "Interpretation: lower ratio is better (smaller compressed output).", + "", + "### Rust compression ratio", + "", + "| Key | Rust ratio |", + "| --- | ---: |", +] + +def format_ratio(value): + return f"{value:.6g}" + +for row in delta_rows: + key = markdown_table_escape(row["key"]) + rust_ratio = row["ratio"]["rust"] + if rust_ratio is None: + continue + delta_lines.append(f"| {key} | {format_ratio(rust_ratio)} |") + +delta_lines.extend( + [ + "", + "### FFI compression ratio", + "", + "| Key | FFI ratio |", + "| --- | ---: |", + ] +) + +for row in delta_rows: + key = markdown_table_escape(row["key"]) + ffi_ratio = row["ratio"]["ffi"] + if ffi_ratio is None: + continue + delta_lines.append(f"| {key} | {format_ratio(ffi_ratio)} |") + +delta_lines.extend( + [ + "", + "### Rust/FFI ratio delta", + "", + f"Reference band: `{DELTA_LOW:.2f}–{DELTA_HIGH:.2f}` (near parity).", + "", + "| Key | Delta | Status |", + "| --- | ---: | --- |", + ] +) + +for row in delta_rows: + key = markdown_table_escape(row["key"]) + delta = row["ratio"]["delta_rust_over_ffi"] + if delta is None: + continue + status = row["ratio"]["status"] + delta_lines.append(f"| {key} | {delta:.4f} | {status} |") + +delta_lines.extend( + [ + "", + "## Speed pack", + "", + "Interpretation: higher speed is better (`rust_bytes_per_sec / ffi_bytes_per_sec`).", + "", + "### Rust speed", + "", + "| Key | Rust bytes/sec | Rust ms/iter |", + "| --- | ---: | ---: |", + ] +) + +for row in delta_rows: + key = markdown_table_escape(row["key"]) + bps = row["speed"]["rust_bytes_per_sec"] + ms = row["speed"]["rust_ms_per_iter"] + if bps is None or ms is None: + continue + delta_lines.append(f"| {key} | {bps:.2f} | {ms:.3f} |") + +delta_lines.extend( + [ + "", + "### FFI speed", + "", + "| Key | FFI bytes/sec | FFI ms/iter |", + "| --- | ---: | ---: |", + ] +) + +for row in delta_rows: + key = markdown_table_escape(row["key"]) + bps = row["speed"]["ffi_bytes_per_sec"] + ms = row["speed"]["ffi_ms_per_iter"] + if bps is None or ms is None: + continue + delta_lines.append(f"| {key} | {bps:.2f} | {ms:.3f} |") + +delta_lines.extend( + [ + "", + "### Rust/FFI speed delta", + "", + f"Reference band: `{DELTA_LOW:.2f}–{DELTA_HIGH:.2f}` (near parity).", + "", + "| Key | Delta | Status |", + "| --- | ---: | --- |", + ] +) + +for row in delta_rows: + key = markdown_table_escape(row["key"]) + delta = row["speed"]["delta_rust_over_ffi"] + if delta is None: + continue + status = row["speed"]["status"] + delta_lines.append(f"| {key} | {delta:.4f} | {status} |") + +with open("benchmark-delta.md", "w") as f: + f.write("\n".join(delta_lines) + "\n") + print(f"Wrote {len(benchmark_results)} timing results to benchmark-results.json", file=sys.stderr) print(f"Wrote {len(ratios)} ratio rows to benchmark-report.md", file=sys.stderr) print(f"Wrote {len(memory_rows)} memory rows to benchmark-report.md", file=sys.stderr) print(f"Wrote {len(dictionary_rows)} dictionary rows to benchmark-report.md", file=sys.stderr) +print(f"Wrote {len(delta_rows)} canonical rows to benchmark-delta.json", file=sys.stderr) +print(f"Wrote {len(delta_rows)} canonical rows to benchmark-delta.md", file=sys.stderr) PYEOF diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dcfbc6d8..db351195 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -133,6 +133,15 @@ jobs: - name: Run benchmarks run: bash .github/scripts/run-benchmarks.sh + - name: Upload benchmark delta artifacts + uses: actions/upload-artifact@v4 + with: + name: benchmark-delta + path: | + benchmark-delta.json + benchmark-delta.md + if-no-files-found: error + - name: Store benchmark results if: steps.bot-token.outputs.token != '' uses: benchmark-action/github-action-benchmark@v1 diff --git a/.gitignore b/.gitignore index 0003a24b..2640e9a4 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ fuzz_decodecorpus perf.data* benchmark-results.json benchmark-report.md +benchmark-delta.json +benchmark-delta.md fuzz/corpus .idea /= diff --git a/BENCHMARKS.md b/BENCHMARKS.md index 1d732c3e..cd6a6093 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -86,5 +86,22 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/rust_st - input+output buffer size estimate tables (`REPORT_MEM`) - dictionary compression tables (`REPORT_DICT`) - timing rows for all benchmark functions +- `benchmark-delta.json` with canonical `(scenario + params)` rows including: + - raw Rust/FFI ratio values and `rust/ffi` ratio delta + - raw Rust/FFI speed values (`bytes/sec`) and `rust/ffi` speed delta +- `benchmark-delta.md` with two packs: + - Ratio pack: Rust ratio, FFI ratio, Rust/FFI ratio delta + - Speed pack: Rust speed, FFI speed, Rust/FFI speed delta + +Delta interpretation (direct same-run comparison on the same environment): + +- **Ratio delta** (`rust_ratio / ffi_ratio`): lower is better for Rust +- **Speed delta** (`rust_bytes_per_sec / ffi_bytes_per_sec`): higher is better for Rust + +Status labels in `benchmark-delta` are derived directly from the same-run deltas (no environment +calibration/pre-test coefficients): + +- **ratio status**: `rust_better_smaller` when `< 0.99`, `near_parity` when `0.99..=1.05`, `rust_worse_larger` when `> 1.05` +- **speed status**: `rust_faster` when `> 1.05`, `near_parity` when `0.99..=1.05`, `rust_slower` when `< 0.99` Criterion also writes its usual detailed estimates under `target/criterion/`. diff --git a/README.md b/README.md index 8dcad430..606164d9 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,9 @@ When the `dict_builder` feature is enabled, the `dictionary` module can create r ## Benchmarking -Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports now include compression ratios, input+output buffer size estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios. +Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports include compression ratios, input+output buffer size estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios, plus Rust-vs-FFI delta packs (`benchmark-delta.json`, `benchmark-delta.md`) grouped by canonical `(scenario + params)` keys. + +Benchmark report files are generated by `.github/scripts/run-benchmarks.sh` and are kept as ignored local/CI artifacts rather than tracked files in this repository. ## Usage