diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
index 0d235420..a467b2a1 100755
--- a/.github/scripts/run-benchmarks.sh
+++ b/.github/scripts/run-benchmarks.sh
@@ -26,6 +26,7 @@ import json
 import os
 import re
 import sys
+from collections import defaultdict
 
 BENCH_RE = re.compile(r"test (\S+)\s+\.\.\. bench:\s+([\d,]+) ns/iter")
 REPORT_RE = re.compile(
@@ -70,8 +71,72 @@ timings = []
 ratios = []
 memory_rows = []
 dictionary_rows = []
+timing_rows = []
+scenario_input_bytes = {}
 raw_path = os.environ["BENCH_RAW_FILE"]
 
+DELTA_LOW = 0.99
+DELTA_HIGH = 1.05
+
+def parse_benchmark_name(name):
+    parts = name.split("/")
+    if len(parts) == 5 and parts[0] == "compress" and parts[3] == "matrix":
+        return {
+            "stage": "compress",
+            "level": parts[1],
+            "scenario": parts[2],
+            "source": None,
+            "implementation": parts[4],
+        }
+    if len(parts) == 6 and parts[0] == "decompress" and parts[4] == "matrix":
+        return {
+            "stage": "decompress",
+            "level": parts[1],
+            "scenario": parts[2],
+            "source": parts[3],
+            "implementation": parts[5],
+        }
+    if len(parts) == 5 and parts[0] == "compress-dict" and parts[3] == "matrix":
+        return {
+            "stage": "compress-dict",
+            "level": parts[1],
+            "scenario": parts[2],
+            "source": None,
+            "implementation": parts[4],
+        }
+    raise ValueError(f"Unsupported benchmark name format: {name} (parts={parts})")
+
+def canonical_key(stage, scenario, level, source):
+    params = [f"stage={stage}", f"level={level}"]
+    if source:
+        params.append(f"source={source}")
+    return f"{scenario} + {', '.join(params)}"
+
+def normalize_impl(impl):
+    if impl == "pure_rust":
+        return "rust"
+    if impl == "c_ffi":
+        return "ffi"
+    return impl
+
+def classify_ratio_delta(delta):
+    if delta is None:
+        return "insufficient-data"
+    if delta < DELTA_LOW:
+        return "rust_better_smaller"
+    if delta <= DELTA_HIGH:
+        return "near_parity"
+    return "rust_worse_larger"
+
+def classify_speed_delta(delta):
+    if delta is None:
+        return "insufficient-data"
+    if delta < DELTA_LOW:
+        return "rust_slower"
+    if delta <= DELTA_HIGH:
+        return "near_parity"
+    return "rust_faster"
+
 with open(raw_path) as f:
     for raw_line in f:
         line = raw_line.strip()
@@ -87,6 +152,16 @@ with open(raw_path) as f:
                 "value": round(ms, 3),
             })
             timings.append((name, ms))
+            parsed = parse_benchmark_name(name)
+            timing_rows.append({
+                "name": name,
+                "stage": parsed["stage"],
+                "level": parsed["level"],
+                "scenario": parsed["scenario"],
+                "source": parsed["source"],
+                "implementation": normalize_impl(parsed["implementation"]),
+                "ms_per_iter": ms,
+            })
             continue
 
         report_match = REPORT_RE.match(line)
@@ -103,6 +178,7 @@ with open(raw_path) as f:
                 "rust_ratio": float(rust_ratio),
                 "ffi_ratio": float(ffi_ratio),
             })
+            scenario_input_bytes[scenario] = int(input_bytes)
             continue
 
         mem_match = MEM_RE.match(line)
@@ -173,6 +249,133 @@ if not dictionary_rows:
 with open("benchmark-results.json", "w") as f:
     json.dump(benchmark_results, f, indent=2)
 
+ratio_index = {}
+for row in ratios:
+    key = canonical_key("compress", row["scenario"], row["level"], None)
+    ratio_delta = None
+    if row["ffi_ratio"] > 0.0:
+        ratio_delta = row["rust_ratio"] / row["ffi_ratio"]
+    ratio_index[key] = {
+        "meta": {
+            "stage": "compress",
+            "scenario": row["scenario"],
+            "level": row["level"],
+            "source": None,
+        },
+        "rust_ratio": row["rust_ratio"],
+        "ffi_ratio": row["ffi_ratio"],
+        "delta": ratio_delta,
+        "status": classify_ratio_delta(ratio_delta),
+    }
+
+speed_index = defaultdict(dict)
+key_meta = {}
+for row in timing_rows:
+    key = canonical_key(row["stage"], row["scenario"], row["level"], row["source"])
+    key_meta[key] = {
+        "stage": row["stage"],
+        "scenario": row["scenario"],
+        "level": row["level"],
+        "source": row["source"],
+    }
+    impl = row["implementation"]
+    speed_index[key][impl] = {
+        "name": row["name"],
+        "ms_per_iter": row["ms_per_iter"],
+    }
+
+delta_rows = []
+all_keys = sorted(set(key_meta.keys()) | set(ratio_index.keys()))
+for key in all_keys:
+    ratio_pack = ratio_index.get(
+        key,
+        {
+            "meta": None,
+            "rust_ratio": None,
+            "ffi_ratio": None,
+            "delta": None,
+            "status": "insufficient-data",
+        },
+    )
+    meta = key_meta.get(key) or ratio_pack["meta"]
+    stage = meta["stage"] if meta else "compress"
+    scenario = meta["scenario"] if meta else key.split(" + ")[0]
+    level = meta["level"] if meta else "unknown"
+    source = meta["source"] if meta else None
+    input_bytes = scenario_input_bytes.get(scenario)
+
+    speed_series = {}
+    for impl_name, impl_row in speed_index.get(key, {}).items():
+        ms_value = impl_row["ms_per_iter"]
+        bps_value = None
+        if input_bytes is not None and ms_value is not None and ms_value > 0.0:
+            bps_value = input_bytes / (ms_value / 1000.0)
+        speed_series[impl_name] = {
+            "benchmark_name": impl_row["name"],
+            "ms_per_iter": ms_value,
+            "bytes_per_sec": bps_value,
+        }
+
+    rust_timing = speed_series.get("rust")
+    ffi_timing = speed_series.get("ffi")
+    rust_ms = rust_timing["ms_per_iter"] if rust_timing else None
+    ffi_ms = ffi_timing["ms_per_iter"] if ffi_timing else None
+    rust_bps = rust_timing["bytes_per_sec"] if rust_timing else None
+    ffi_bps = ffi_timing["bytes_per_sec"] if ffi_timing else None
+    speed_delta = (
+        rust_bps / ffi_bps
+        if (rust_bps is not None and ffi_bps is not None and ffi_bps > 0.0)
+        else None
+    )
+
+    has_comparable_ratio = (
+        ratio_pack["rust_ratio"] is not None and ratio_pack["ffi_ratio"] is not None
+    )
+    has_comparable_speed = rust_timing is not None and ffi_timing is not None
+    if not has_comparable_ratio and not has_comparable_speed:
+        continue
+
+    delta_rows.append(
+        {
+            "key": key,
+            "scenario": scenario,
+            "params": {
+                "stage": stage,
+                "level": level,
+                "source": source,
+            },
+            "input_bytes": input_bytes,
+            "ratio": {
+                "rust": ratio_pack["rust_ratio"],
+                "ffi": ratio_pack["ffi_ratio"],
+                "delta_rust_over_ffi": ratio_pack["delta"],
+                "status": ratio_pack["status"],
+                "reference_band": {
+                    "delta_low": DELTA_LOW,
+                    "delta_high": DELTA_HIGH,
+                },
+                "interpretation": "delta<1 means Rust compressed output smaller than FFI; delta>1 means larger",
+            },
+            "speed": {
+                "series": speed_series,
+                "rust_ms_per_iter": rust_ms,
+                "ffi_ms_per_iter": ffi_ms,
+                "rust_bytes_per_sec": rust_bps,
+                "ffi_bytes_per_sec": ffi_bps,
+                "delta_rust_over_ffi": speed_delta,
+                "status": classify_speed_delta(speed_delta),
+                "reference_band": {
+                    "delta_low": DELTA_LOW,
+                    "delta_high": DELTA_HIGH,
+                },
+                "interpretation": "delta>1 means Rust faster than FFI; delta<1 means slower",
+            },
+        }
+    )
+
+with open("benchmark-delta.json", "w") as f:
+    json.dump(delta_rows, f, indent=2)
+
 lines = [
     "# Benchmark Report",
     "",
@@ -232,8 +435,135 @@ for name, ms in sorted(timings):
 with open("benchmark-report.md", "w") as f:
     f.write("\n".join(lines) + "\n")
 
+delta_lines = [
+    "# Benchmark Delta Report",
+    "",
+    "Generated by `.github/scripts/run-benchmarks.sh` from `cargo bench --bench compare_ffi`.",
+    "",
+    "## Ratio pack",
+    "",
+    "Interpretation: lower ratio is better (smaller compressed output).",
+    "",
+    "### Rust compression ratio",
+    "",
+    "| Key | Rust ratio |",
+    "| --- | ---: |",
+]
+
+def format_ratio(value):
+    return f"{value:.6g}"
+
+for row in delta_rows:
+    key = markdown_table_escape(row["key"])
+    rust_ratio = row["ratio"]["rust"]
+    if rust_ratio is None:
+        continue
+    delta_lines.append(f"| {key} | {format_ratio(rust_ratio)} |")
+
+delta_lines.extend(
+    [
+        "",
+        "### FFI compression ratio",
+        "",
+        "| Key | FFI ratio |",
+        "| --- | ---: |",
+    ]
+)
+
+for row in delta_rows:
+    key = markdown_table_escape(row["key"])
+    ffi_ratio = row["ratio"]["ffi"]
+    if ffi_ratio is None:
+        continue
+    delta_lines.append(f"| {key} | {format_ratio(ffi_ratio)} |")
+
+delta_lines.extend(
+    [
+        "",
+        "### Rust/FFI ratio delta",
+        "",
+        f"Reference band: `{DELTA_LOW:.2f}–{DELTA_HIGH:.2f}` (near parity).",
+        "",
+        "| Key | Delta | Status |",
+        "| --- | ---: | --- |",
+    ]
+)
+
+for row in delta_rows:
+    key = markdown_table_escape(row["key"])
+    delta = row["ratio"]["delta_rust_over_ffi"]
+    if delta is None:
+        continue
+    status = row["ratio"]["status"]
+    delta_lines.append(f"| {key} | {delta:.4f} | {status} |")
+
+delta_lines.extend(
+    [
+        "",
+        "## Speed pack",
+        "",
+        "Interpretation: higher speed is better (`rust_bytes_per_sec / ffi_bytes_per_sec`).",
+        "",
+        "### Rust speed",
+        "",
+        "| Key | Rust bytes/sec | Rust ms/iter |",
+        "| --- | ---: | ---: |",
+    ]
+)
+
+for row in delta_rows:
+    key = markdown_table_escape(row["key"])
+    bps = row["speed"]["rust_bytes_per_sec"]
+    ms = row["speed"]["rust_ms_per_iter"]
+    if bps is None or ms is None:
+        continue
+    delta_lines.append(f"| {key} | {bps:.2f} | {ms:.3f} |")
+
+delta_lines.extend(
+    [
+        "",
+        "### FFI speed",
+        "",
+        "| Key | FFI bytes/sec | FFI ms/iter |",
+        "| --- | ---: | ---: |",
+    ]
+)
+
+for row in delta_rows:
+    key = markdown_table_escape(row["key"])
+    bps = row["speed"]["ffi_bytes_per_sec"]
+    ms = row["speed"]["ffi_ms_per_iter"]
+    if bps is None or ms is None:
+        continue
+    delta_lines.append(f"| {key} | {bps:.2f} | {ms:.3f} |")
+
+delta_lines.extend(
+    [
+        "",
+        "### Rust/FFI speed delta",
+        "",
+        f"Reference band: `{DELTA_LOW:.2f}–{DELTA_HIGH:.2f}` (near parity).",
+        "",
+        "| Key | Delta | Status |",
+        "| --- | ---: | --- |",
+    ]
+)
+
+for row in delta_rows:
+    key = markdown_table_escape(row["key"])
+    delta = row["speed"]["delta_rust_over_ffi"]
+    if delta is None:
+        continue
+    status = row["speed"]["status"]
+    delta_lines.append(f"| {key} | {delta:.4f} | {status} |")
+
+with open("benchmark-delta.md", "w") as f:
+    f.write("\n".join(delta_lines) + "\n")
+
 print(f"Wrote {len(benchmark_results)} timing results to benchmark-results.json", file=sys.stderr)
 print(f"Wrote {len(ratios)} ratio rows to benchmark-report.md", file=sys.stderr)
 print(f"Wrote {len(memory_rows)} memory rows to benchmark-report.md", file=sys.stderr)
 print(f"Wrote {len(dictionary_rows)} dictionary rows to benchmark-report.md", file=sys.stderr)
+print(f"Wrote {len(delta_rows)} canonical rows to benchmark-delta.json", file=sys.stderr)
+print(f"Wrote {len(delta_rows)} canonical rows to benchmark-delta.md", file=sys.stderr)
 PYEOF
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dcfbc6d8..db351195 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -133,6 +133,15 @@ jobs:
       - name: Run benchmarks
         run: bash .github/scripts/run-benchmarks.sh
 
+      - name: Upload benchmark delta artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-delta
+          path: |
+            benchmark-delta.json
+            benchmark-delta.md
+          if-no-files-found: error
+
       - name: Store benchmark results
         if: steps.bot-token.outputs.token != ''
         uses: benchmark-action/github-action-benchmark@v1
diff --git a/.gitignore b/.gitignore
index 0003a24b..2640e9a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,8 @@ fuzz_decodecorpus
 perf.data*
 benchmark-results.json
 benchmark-report.md
+benchmark-delta.json
+benchmark-delta.md
 fuzz/corpus
 .idea
 /=
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
index 1d732c3e..cd6a6093 100644
--- a/BENCHMARKS.md
+++ b/BENCHMARKS.md
@@ -86,5 +86,22 @@ bash scripts/bench-flamegraph.sh decompress/default/decodecorpus-z000033/rust_st
   - input+output buffer size estimate tables (`REPORT_MEM`)
   - dictionary compression tables (`REPORT_DICT`)
   - timing rows for all benchmark functions
+- `benchmark-delta.json` with canonical `(scenario + params)` rows including:
+  - raw Rust/FFI ratio values and `rust/ffi` ratio delta
+  - raw Rust/FFI speed values (`bytes/sec`) and `rust/ffi` speed delta
+- `benchmark-delta.md` with two packs:
+  - Ratio pack: Rust ratio, FFI ratio, Rust/FFI ratio delta
+  - Speed pack: Rust speed, FFI speed, Rust/FFI speed delta
+
+Delta interpretation (direct same-run comparison on the same environment):
+
+- **Ratio delta** (`rust_ratio / ffi_ratio`): lower is better for Rust
+- **Speed delta** (`rust_bytes_per_sec / ffi_bytes_per_sec`): higher is better for Rust
+
+Status labels in `benchmark-delta` are derived directly from the same-run deltas (no environment
+calibration/pre-test coefficients):
+
+- **ratio status**: `rust_better_smaller` when `< 0.99`, `near_parity` when `0.99..=1.05`, `rust_worse_larger` when `> 1.05`
+- **speed status**: `rust_faster` when `> 1.05`, `near_parity` when `0.99..=1.05`, `rust_slower` when `< 0.99`
 
 Criterion also writes its usual detailed estimates under `target/criterion/`.
diff --git a/README.md b/README.md
index 8dcad430..606164d9 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,9 @@ When the `dict_builder` feature is enabled, the `dictionary` module can create r
 
 ## Benchmarking
 
-Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports now include compression ratios, input+output buffer size estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios.
+Performance tracking lives in [BENCHMARKS.md](BENCHMARKS.md). The suite compares `structured-zstd` against the C reference across small payloads, entropy extremes, a `100 MiB` large-stream scenario, repository corpus fixtures, and optional local Silesia corpora. Reports include compression ratios, input+output buffer size estimates, and C FFI dictionary compression (with/without dictionary) for small and corpus scenarios, plus Rust-vs-FFI delta packs (`benchmark-delta.json`, `benchmark-delta.md`) grouped by canonical `(scenario + params)` keys.
+
+Benchmark report files are generated by `.github/scripts/run-benchmarks.sh` and are kept as ignored local/CI artifacts rather than tracked files in this repository.
 
 ## Usage