diff --git a/.github/scripts/build_csp_pr_comment.py b/.github/scripts/build_csp_pr_comment.py
new file mode 100644
index 000000000..31e9670f9
--- /dev/null
+++ b/.github/scripts/build_csp_pr_comment.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""Build a sticky PR comment for the CSP benchmarks workflow.
+
+Reads the CSV emitted by ``scripts/run_csp_benchmarks.sh`` (one row per
+circuit) and renders it as a markdown table with human-readable units. If
+``--baseline-csv`` is given, each metric cell appends a percentage delta
+versus the baseline value (last successful CSP-benchmarks run on main).
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+from pathlib import Path
+
+MARKER = "<!-- csp-benchmarks-report -->"
+MAX_COMMENT_CHARS = 62000
+
+# Metric columns we render with a delta. Order matches the table header.
+METRIC_COLUMNS: tuple[tuple[str, str], ...] = (
+    ("num_constraints", "int"),
+    ("num_witnesses", "int"),
+    ("prover_time_ms", "ms"),
+    ("prover_peak_rss_kb", "kb"),
+    ("prover_heap_peak_bytes", "bytes"),
+    ("verifier_time_ms", "ms"),
+    ("proof_size_bytes", "bytes"),
+    ("pkp_size_bytes", "bytes"),
+)
+
+
+def fmt_bytes(value: float) -> str:
+    if value <= 0:
+        return "—"
+    units = ("B", "KB", "MB", "GB", "TB")
+    idx = 0
+    while value >= 1024 and idx < len(units) - 1:
+        value /= 1024.0
+        idx += 1
+    if value >= 100 or idx == 0:
+        return f"{value:.0f} {units[idx]}"
+    return f"{value:.2f} {units[idx]}"
+
+
+def fmt_kb_to_bytes(rss_kb: float) -> str:
+    return fmt_bytes(rss_kb * 1024.0)
+
+
+def fmt_ms(ms: float) -> str:
+    if ms <= 0:
+        return "—"
+    if ms < 1000:
+        return f"{ms:.0f} ms"
+    return f"{ms / 1000.0:.2f} s"
+
+
+def fmt_int(value: float) -> str:
+    if value <= 0:
+        return "—"
+    return f"{int(round(value)):,}"
+
+
+def fmt_value(unit: str, value: float) -> str:
+    if unit == "ms":
+        return fmt_ms(value)
+    if unit == "kb":
+        return fmt_kb_to_bytes(value)
+    if unit == "int":
+        return fmt_int(value)
+    return fmt_bytes(value)
+
+
+def fmt_delta(current: float, baseline: float | None) -> str:
+    """Return a compact delta-vs-baseline annotation, or empty string.
+
+    - Returns "" when no baseline is available.
+    - Returns "(new)" when current is present but baseline is missing
+      for this circuit.
+    - Returns "(±0.0%)" / "(+1.2%)" / "(-3.4%)" otherwise.
+    """
+    if baseline is None:
+        return ""
+    if baseline <= 0:
+        # Baseline collected zero (e.g., older CSV without this metric).
+        # Don't show a misleading divide-by-zero ratio.
+        return ""
+    if current <= 0:
+        return ""
+    delta_pct = (current - baseline) / baseline * 100.0
+    if abs(delta_pct) < 0.05:
+        return " (±0.0%)"
+    sign = "+" if delta_pct > 0 else ""
+    return f" ({sign}{delta_pct:.1f}%)"
+
+
+def status_with_icon(status: str) -> str:
+    normalized = (status or "unknown").strip().lower()
+    labels = {
+        "success": "[PASS]",
+        "failure": "[FAIL]",
+        "cancelled": "[CANCELLED]",
+        "skipped": "[SKIPPED]",
+    }
+    return f"{labels.get(normalized, '[INFO]')} {normalized}"
+
+
+def read_rows(csv_path: Path) -> list[dict[str, str]]:
+    if not csv_path.is_file():
+        return []
+    with csv_path.open(newline="") as f:
+        return list(csv.DictReader(f))
+
+
+def index_baseline(rows: list[dict[str, str]]) -> dict[str, dict[str, float]]:
+    """Index baseline rows by circuit name with float metric values."""
+    out: dict[str, dict[str, float]] = {}
+    for row in rows:
+        circuit = (row.get("circuit") or "").strip()
+        if not circuit:
+            continue
+        metrics: dict[str, float] = {}
+        for metric, _unit in METRIC_COLUMNS:
+            try:
+                metrics[metric] = float(row.get(metric) or 0)
+            except ValueError:
+                metrics[metric] = 0.0
+        out[circuit] = metrics
+    return out
+
+
+def render_table(
+    rows: list[dict[str, str]],
+    baseline: dict[str, dict[str, float]],
+    has_baseline_file: bool,
+) -> str:
+    if not rows:
+        return "_No benchmark results were produced._"
+
+    header = (
+        "| Circuit | Constraints | Witnesses | Prover time | Peak RSS | "
+        "Peak heap | Verifier time | Proof size | PKP size |"
+    )
+    sep = "|---|---:|---:|---:|---:|---:|---:|---:|---:|"
+    lines = [header, sep]
+
+    for row in sorted(rows, key=lambda r: r.get("circuit", "")):
+        circuit = row.get("circuit", "")
+        baseline_metrics = baseline.get(circuit)
+
+        cells = [f"`{circuit}`"]
+        for metric, unit in METRIC_COLUMNS:
+            try:
+                value = float(row.get(metric) or 0)
+            except ValueError:
+                value = 0.0
+
+            value_str = fmt_value(unit, value)
+
+            if has_baseline_file and value_str != "—":
+                if baseline_metrics is None:
+                    delta = " (new)"
+                else:
+                    delta = fmt_delta(value, baseline_metrics.get(metric))
+                cells.append(f"{value_str}{delta}")
+            else:
+                cells.append(value_str)
+        lines.append("| " + " | ".join(cells) + " |")
+
+    return "\n".join(lines)
+
+
+def compose_comment(
+    rows: list[dict[str, str]],
+    baseline: dict[str, dict[str, float]],
+    baseline_run_id: str,
+    has_baseline_file: bool,
+    run_id: str,
+    run_url: str,
+    sha: str,
+    status: str,
+    runs_per_circuit: str,
+) -> str:
+    short_sha = sha[:12] if sha else "unknown"
+    table = render_table(rows, baseline, has_baseline_file)
+
+    if has_baseline_file:
+        if baseline_run_id:
+            baseline_note = (
+                f"Each metric cell shows the current value followed by the "
+                f"percentage delta against the latest successful "
+                f"[`main` run #{baseline_run_id}](https://github.com/worldfnd/provekit/actions/runs/{baseline_run_id}). "
+                f"`(new)` marks circuits absent from the baseline."
+            )
+        else:
+            baseline_note = (
+                "Each metric cell shows the current value followed by the "
+                "percentage delta against the latest successful `main` run. "
+                "`(new)` marks circuits absent from the baseline."
+            )
+    else:
+        baseline_note = (
+            "_No baseline available yet — deltas will appear once this "
+            "workflow has produced at least one successful `main` run._"
+        )
+
+    lines = [
+        MARKER,
+        "## CSP benchmarks",
+        "",
+        "| Metric | Value |",
+        "|--------|-------|",
+        f"| Workflow status | {status_with_icon(status)} |",
+        f"| Commit | `{short_sha}` |",
+        f"| Run | [#{run_id}]({run_url}) |",
+        f"| Circuits benchmarked | {len(rows)} |",
+        f"| Iterations averaged per circuit | {runs_per_circuit} |",
+        "",
+        "Prover time, peak RSS, peak heap, and verifier time are arithmetic means "
+        "across the iterations. Peak heap comes from the largest "
+        "`peak memory` entry in `provekit-cli prove`'s tracing output; peak RSS "
+        "is reported by `/usr/bin/time -v` (max-resident-set-size).",
+        "",
+        baseline_note,
+        "",
+        "<details open>",
+        "<summary>Results</summary>",
+        "",
+        table,
+        "",
+        "</details>",
+        "",
+    ]
+    return "\n".join(lines)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--results-csv", required=True, type=Path)
+    parser.add_argument(
+        "--baseline-csv",
+        type=Path,
+        default=None,
+        help="Optional CSV from the latest successful main run.",
+    )
+    parser.add_argument(
+        "--baseline-run-id",
+        default="",
+        help="Optional Actions run id of the baseline (for the link in the comment).",
+    )
+    parser.add_argument("--output", required=True, type=Path)
+    parser.add_argument("--run-id", required=True)
+    parser.add_argument("--run-url", required=True)
+    parser.add_argument("--sha", required=True)
+    parser.add_argument("--status", required=True)
+    parser.add_argument("--runs-per-circuit", required=True)
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    rows = read_rows(args.results_csv)
+
+    has_baseline_file = bool(
+        args.baseline_csv and args.baseline_csv.is_file()
+    )
+    baseline_rows = read_rows(args.baseline_csv) if has_baseline_file else []
+    baseline = index_baseline(baseline_rows)
+
+    body = compose_comment(
+        rows=rows,
+        baseline=baseline,
+        baseline_run_id=args.baseline_run_id,
+        has_baseline_file=has_baseline_file,
+        run_id=args.run_id,
+        run_url=args.run_url,
+        sha=args.sha,
+        status=args.status,
+        runs_per_circuit=args.runs_per_circuit,
+    )
+    if len(body) > MAX_COMMENT_CHARS:
+        cut = body[: MAX_COMMENT_CHARS - 80].rstrip()
+        body = f"{cut}\n\n_Comment truncated due to GitHub size limits._\n"
+
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    args.output.write_text(body, encoding="utf-8")
+    print(f"Wrote PR comment body to {args.output} ({len(body)} chars)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/scripts/build_noir_pr_comment.py b/.github/scripts/build_noir_pr_comment.py
new file mode 100644
index 000000000..f98bc7723
--- /dev/null
+++ b/.github/scripts/build_noir_pr_comment.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+"""Build a sticky PR comment for noir execution_success workflow runs."""
+
+from __future__ import annotations
+
+import argparse
+import re
+from pathlib import Path
+
+MARKER = "<!-- noir-execution-success-report -->"
+MAX_COMMENT_CHARS = 62000
+
+
+def read_report(path: Path, display_name: str) -> str:
+    if not path.is_file():
+        return f"(missing: {display_name})"
+
+    text = path.read_text(encoding="utf-8", errors="replace").strip()
+    if not text:
+        return f"(empty: {display_name})"
+    return text
+
+
+def parse_grouped_counts(grouped_report_text: str) -> dict[str, str]:
+    counts: dict[str, str] = {}
+    for key in ("PASS", "FAIL", "SKIP"):
+        match = re.search(rf"^{key}=(\d+)$", grouped_report_text, flags=re.MULTILINE)
+        counts[key] = match.group(1) if match else "n/a"
+    return counts
+
+
+def parse_failing_circuits(grouped_report_text: str) -> list[str]:
+    """Extract the flat sorted list of failing circuits from the [stages] section.
+
+    The grouped report's [stages] section only contains failing tests (skipped
+    tests are routed to [grouped] instead). Each line looks like:
+        <stage>\\t<count>\\t<name1>, <name2>, ...
+    """
+    match = re.search(
+        r"^\[stages\]\n(.*?)(?:\n\[|\Z)",
+        grouped_report_text,
+        flags=re.DOTALL | re.MULTILINE,
+    )
+    if not match:
+        return []
+
+    names: set[str] = set()
+    for line in match.group(1).splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        parts = line.split("\t")
+        if len(parts) < 3:
+            continue
+        for raw in parts[2].split(","):
+            name = raw.strip()
+            if name:
+                names.add(name)
+    return sorted(names)
+
+
+def status_with_icon(status: str) -> str:
+    normalized = (status or "unknown").strip().lower()
+    labels = {
+        "success": "[PASS]",
+        "failure": "[FAIL]",
+        "cancelled": "[CANCELLED]",
+        "skipped": "[SKIPPED]",
+    }
+    return f"{labels.get(normalized, '[INFO]')} {normalized}"
+
+
+def compose_comment(
+    grouped_report_text: str,
+    run_id: str,
+    run_url: str,
+    sha: str,
+    noir_ref: str,
+    status: str,
+) -> str:
+    counts = parse_grouped_counts(grouped_report_text)
+    short_sha = sha[:12] if sha else "unknown"
+
+    failing_circuits = parse_failing_circuits(grouped_report_text)
+    if failing_circuits:
+        failing_body = "\n".join(f"- `{name}`" for name in failing_circuits)
+        failing_summary = f"Failing circuits ({len(failing_circuits)})"
+    else:
+        failing_body = "_No failing circuits._"
+        failing_summary = "Failing circuits (0)"
+
+    lines = [
+        MARKER,
+        "## Noir execution_success report",
+        "",
+        "| Metric | Value |",
+        "|--------|-------|",
+        f"| Workflow status | {status_with_icon(status)} |",
+        f"| Noir ref | `{noir_ref}` |",
+        f"| Commit | `{short_sha}` |",
+        f"| Run | [#{run_id}]({run_url}) |",
+        f"| PASS | {counts['PASS']} |",
+        f"| FAIL | {counts['FAIL']} |",
+        f"| SKIP | {counts['SKIP']} |",
+        "",
+        "<details>",
+        f"<summary>{failing_summary}</summary>",
+        "",
+        failing_body,
+        "",
+        "</details>",
+        "",
+    ]
+
+    return "\n".join(lines)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--grouped-report", required=True, type=Path)
+    parser.add_argument("--output", required=True, type=Path)
+    parser.add_argument("--run-id", required=True)
+    parser.add_argument("--run-url", required=True)
+    parser.add_argument("--sha", required=True)
+    parser.add_argument("--noir-ref", required=True)
+    parser.add_argument("--status", required=True)
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+
+    grouped_report_text = read_report(args.grouped_report, "grouped_error_report.txt")
+
+    body = compose_comment(
+        grouped_report_text=grouped_report_text,
+        run_id=args.run_id,
+        run_url=args.run_url,
+        sha=args.sha,
+        noir_ref=args.noir_ref,
+        status=args.status,
+    )
+
+    if len(body) > MAX_COMMENT_CHARS:
+        cut = body[: MAX_COMMENT_CHARS - 80].rstrip()
+        body = f"{cut}\n\n_Comment truncated due to GitHub size limits._\n"
+
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    args.output.write_text(body, encoding="utf-8")
+    print(f"Wrote PR comment body to {args.output} ({len(body)} chars)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/csp-benchmarks.yml b/.github/workflows/csp-benchmarks.yml
new file mode 100644
index 000000000..3e17f6368
--- /dev/null
+++ b/.github/workflows/csp-benchmarks.yml
@@ -0,0 +1,168 @@
+name: CSP Benchmarks
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+  workflow_dispatch:
+    inputs:
+      bench_runs:
+        description: "Iterations per circuit (default: 3)"
+        required: false
+        default: "3"
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+  # Needed to read artifacts from previous successful main runs so we can
+  # render percentage deltas in the PR comment.
+  actions: read
+
+env:
+  CARGO_TERM_COLOR: always
+  BENCH_RUNS: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_runs != '' && github.event.inputs.bench_runs || '3') || '3' }}
+  REQUIRED_NARGO_VERSION: "1.0.0-beta.19"
+
+concurrency:
+  group: csp-benchmarks-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  csp-benchmarks:
+    # Block fork PRs from running our heavy bench script on the runner.
+    # Push to main always runs so the artifact becomes a baseline for PRs.
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
+    name: CSP benchmarks (avg over ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_runs != '' && github.event.inputs.bench_runs || '3') || '3' }} runs)
+    runs-on: ubuntu-24.04-arm
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Rust toolchain
+        uses: moonrepo/setup-rust@v1
+        with:
+          channel: nightly-2026-03-04
+          cache-base: main
+
+      - name: Build provekit-cli (release)
+        run: cargo build --release --bin provekit-cli
+
+      - name: Setup Noir toolchain
+        uses: noir-lang/noirup@v0.1.2
+        with:
+          toolchain: ${{ env.REQUIRED_NARGO_VERSION }}
+
+      - name: Run CSP benchmarks
+        env:
+          PROVEKIT_BIN: ${{ github.workspace }}/target/release/provekit-cli
+          BENCH_DIR: ${{ github.workspace }}/csp-bench-logs
+          BENCH_RUNS: ${{ env.BENCH_RUNS }}
+        run: |
+          bash scripts/run_csp_benchmarks.sh
+
+      - name: Upload bench artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: csp-bench-logs-${{ github.run_id }}
+          path: csp-bench-logs/
+          retention-days: 7
+
+      - name: Fetch baseline from latest successful main run
+        if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+        continue-on-error: true
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GH_REPO: ${{ github.repository }}
+        run: |
+          set -e
+          BASELINE_RUN_ID="$(gh run list \
+            --workflow csp-benchmarks.yml \
+            --branch main \
+            --status success \
+            --limit 1 \
+            --json databaseId \
+            --jq '.[0].databaseId // empty')"
+          if [[ -z "${BASELINE_RUN_ID}" ]]; then
+            echo "No successful main run found yet; deltas will not be shown."
+            exit 0
+          fi
+          echo "Baseline run id: ${BASELINE_RUN_ID}"
+          mkdir -p csp-bench-logs/baseline
+          if gh run download "${BASELINE_RUN_ID}" \
+              --name "csp-bench-logs-${BASELINE_RUN_ID}" \
+              --dir csp-bench-logs/baseline; then
+            echo "BASELINE_RUN_ID=${BASELINE_RUN_ID}" >> "$GITHUB_ENV"
+          else
+            echo "Baseline artifact not retrievable; deltas will not be shown."
+          fi
+
+      - name: Build sticky PR comment body
+        if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+        continue-on-error: true
+        run: |
+          python3 .github/scripts/build_csp_pr_comment.py \
+            --results-csv "csp-bench-logs/results.csv" \
+            --baseline-csv "csp-bench-logs/baseline/results.csv" \
+            --baseline-run-id "${BASELINE_RUN_ID:-}" \
+            --output "csp-bench-logs/pr_comment.md" \
+            --run-id "${{ github.run_id }}" \
+            --run-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
+            --sha "${{ github.sha }}" \
+            --status "${{ job.status }}" \
+            --runs-per-circuit "${{ env.BENCH_RUNS }}"
+
+      - name: Upsert sticky CSP benchmarks comment
+        if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+        continue-on-error: true
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            const marker = '<!-- csp-benchmarks-report -->';
+            const bodyPath = 'csp-bench-logs/pr_comment.md';
+            const fallbackBody = [
+              marker,
+              '## CSP benchmarks',
+              '',
+              'Unable to generate the detailed report body for this run.',
+              '',
+              'Run: [#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})',
+            ].join('\n');
+            const body = fs.existsSync(bodyPath) ? fs.readFileSync(bodyPath, 'utf8') : fallbackBody;
+
+            const { owner, repo } = context.repo;
+            const issue_number = context.issue.number;
+            const comments = await github.paginate(github.rest.issues.listComments, {
+              owner,
+              repo,
+              issue_number,
+              per_page: 100,
+            });
+
+            const existing = comments.find((comment) =>
+              comment.user &&
+              comment.user.login === 'github-actions[bot]' &&
+              comment.body &&
+              comment.body.includes(marker)
+            );
+
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner,
+                repo,
+                comment_id: existing.id,
+                body,
+              });
+              core.info(`Updated existing CSP benchmarks comment (id=${existing.id}).`);
+            } else {
+              const created = await github.rest.issues.createComment({
+                owner,
+                repo,
+                issue_number,
+                body,
+              });
+              core.info(`Created new CSP benchmarks comment (id=${created.data.id}).`);
+            }
diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml
new file mode 100644
index 000000000..8aeafafbb
--- /dev/null
+++ b/.github/workflows/noir-execution-success.yml
@@ -0,0 +1,160 @@
+name: Noir Execution Success Tests
+
+# Provide a noir_ref to test against any Noir release.
+on:
+  pull_request:
+  workflow_dispatch:
+    inputs:
+      noir_ref:
+        description: "noir-lang/noir release tag (e.g. v1.0.0-beta.19)"
+        required: false
+        default: "v1.0.0-beta.19"
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+
+env:
+  CARGO_TERM_COLOR: always
+  NOIR_REF: ${{ github.event.inputs.noir_ref || 'v1.0.0-beta.19' }}
+
+# Cancel any in-progress run on the same branch when a new one is triggered.
+concurrency:
+  group: noir-exec-success-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  noir-execution-success:
+    # Block fork PRs from executing arbitrary build scripts on the self-hosted runner.
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }}
+    name: Noir execution_success suite (${{ env.NOIR_REF }})
+    runs-on: ubuntu-24.04-arm
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Rust toolchain
+        uses: moonrepo/setup-rust@v1
+        with:
+          channel: nightly-2026-03-04
+          cache-base: main
+
+      - name: Build provekit-cli
+        run: cargo build --release --bin provekit-cli
+
+      - name: Setup Noir toolchain
+        uses: noir-lang/noirup@v0.1.2
+        with:
+          toolchain: ${{ env.NOIR_REF }}
+
+      # Sparse checkout — only fetch test_programs/, not the full noir repo.
+      - name: Fetch noir test programs (sparse checkout)
+        run: |
+          tmpdir=$(mktemp -d)
+          # Export the parent so the cleanup step can remove it entirely.
+          echo "NOIR_TMPDIR=${tmpdir}" >> "$GITHUB_ENV"
+          echo "NOIR_REPO_DIR=${tmpdir}/noir" >> "$GITHUB_ENV"
+          git clone \
+            --depth 1 \
+            --filter=blob:none \
+            --sparse \
+            --branch "$NOIR_REF" \
+            https://github.com/noir-lang/noir.git "${tmpdir}/noir"
+          git -C "${tmpdir}/noir" sparse-checkout set \
+            test_programs/execution_success \
+            test_programs/test_libraries
+          echo "Cloned noir @ $(git -C "${tmpdir}/noir" rev-parse HEAD)"
+
+      - name: Run execution_success suite
+        env:
+          PROVEKIT_BIN: ${{ github.workspace }}/target/release/provekit-cli
+          LOG_DIR: ${{ github.workspace }}/noir-execution-logs
+          # NOIR_REPO_DIR is set by the previous step via $GITHUB_ENV
+        run: |
+          bash scripts/run_noir_execution_success.sh
+
+      # Upload logs on every run (pass or fail) for 7 days.
+      - name: Upload test logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: noir-execution-logs-${{ github.run_id }}
+          path: noir-execution-logs/
+          retention-days: 7
+
+      - name: Build sticky PR comment body
+        if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+        continue-on-error: true
+        run: |
+          python3 .github/scripts/build_noir_pr_comment.py \
+            --grouped-report "noir-execution-logs/grouped_error_report.txt" \
+            --output "noir-execution-logs/pr_comment.md" \
+            --run-id "${{ github.run_id }}" \
+            --run-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
+            --sha "${{ github.sha }}" \
+            --noir-ref "${{ env.NOIR_REF }}" \
+            --status "${{ job.status }}"
+
+      - name: Upsert sticky PR report comment
+        if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+        continue-on-error: true
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            const marker = '<!-- noir-execution-success-report -->';
+            const bodyPath = 'noir-execution-logs/pr_comment.md';
+            const fallbackBody = [
+              marker,
+              '## Noir execution_success report',
+              '',
+              'Unable to generate the detailed report body for this run.',
+              '',
+              'Run: [#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})',
+            ].join('\n');
+            const body = fs.existsSync(bodyPath) ? fs.readFileSync(bodyPath, 'utf8') : fallbackBody;
+
+            const { owner, repo } = context.repo;
+            const issue_number = context.issue.number;
+            const comments = await github.paginate(github.rest.issues.listComments, {
+              owner,
+              repo,
+              issue_number,
+              per_page: 100,
+            });
+
+            const existing = comments.find((comment) =>
+              comment.user &&
+              comment.user.login === 'github-actions[bot]' &&
+              comment.body &&
+              comment.body.includes(marker)
+            );
+
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner,
+                repo,
+                comment_id: existing.id,
+                body,
+              });
+              core.info(`Updated existing noir sticky comment (id=${existing.id}).`);
+            } else {
+              const created = await github.rest.issues.createComment({
+                owner,
+                repo,
+                issue_number,
+                body,
+              });
+              core.info(`Created new noir sticky comment (id=${created.data.id}).`);
+            }
+
+      # Always clean up the temp clone, even if the test step failed.
+      - name: Cleanup noir clone
+        if: always()
+        run: |
+          if [[ -n "${NOIR_TMPDIR:-}" && -d "${NOIR_TMPDIR}" ]]; then
+            rm -rf "${NOIR_TMPDIR}"
+            echo "Cleaned up ${NOIR_TMPDIR}"
+          fi
diff --git a/scripts/csp_benchmark_helpers.py b/scripts/csp_benchmark_helpers.py
new file mode 100755
index 000000000..3bd141a0f
--- /dev/null
+++ b/scripts/csp_benchmark_helpers.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""Helpers for scripts/run_csp_benchmarks.sh.
+
+Subcommands:
+  parse-runs <bench_dir> <circuit>  Aggregate per-run measurements for one
+                                    circuit and emit a single CSV row to stdout.
+  human-to-bytes <value>            Convert a human-formatted byte string from
+                                    the prover trace ("1.23 GB", "456 MB", etc.)
+                                    to an integer byte count. Used by tests.
+
+Bench layout produced by run_csp_benchmarks.sh::
+
+    <bench_dir>/per_circuit/<circuit>/
+        prove_<i>.time          # `/usr/bin/time -f '%e %M'` output
+        prove_<i>.stderr        # provekit-cli prove stderr (span_stats trace)
+        verify_<i>.time
+        verify_<i>.stderr
+        meta.txt                # key=value: pkp_size, proof_size
+
+The "peak heap" comes from the largest "peak memory: <SI>B" entry emitted by
+``tooling/cli/src/span_stats.rs`` over the prove invocation's trace. We strip
+ANSI escapes and walk every span-close line; the outermost span propagates
+its children's peak via ``data.peak_memory = max(...)`` so any of them is a
+sufficient upper bound, but we keep the max for safety.
+"""
+
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+from pathlib import Path
+from statistics import mean
+
+ANSI_RE = re.compile(r"\x1b\[[0-9;]*m")
+# Suffix table from provekit_common::utils::human (BN254 utils). The middle
+# entry is a regular ASCII space (no SI prefix). Order matters: we use it to
+# look up the multiplier from a captured suffix character.
+SI_SUFFIXES = "qryzafpnμm kMGTPEZYRQ"
+SI_BASE_INDEX = SI_SUFFIXES.index(" ")  # power 0 lives at index 10
+# The separator between number and SI suffix is U+202F NARROW NO-BREAK SPACE
+# unless `{:#}` (alternate) is used. We accept either form.
+NARROW_NBSP = " "
+PEAK_MEMORY_RE = re.compile(
+    rf"([0-9]+(?:\.[0-9]+)?)[{NARROW_NBSP} ]?([qryzafpnμmkMGTPEZYRQ])?B"
+    r"\s+peak\s+memory",
+)
+# Matches the `info!(constraints, witnesses, "Read Noir proof scheme")` line
+# emitted by `tooling/cli/src/cmd/prove.rs` on every prove invocation.
+SCHEME_SIZE_RE = re.compile(
+    r"Read Noir proof scheme\b.*?\bconstraints=(\d+)\b.*?\bwitnesses=(\d+)\b"
+)
+
+
+def human_to_bytes(value: str) -> int:
+    """Convert a "1.23 GB"-style string from the trace to an integer byte count.
+
+    Accepts either a regular ASCII space or U+202F as the separator. Suffixes
+    follow ``provekit_common::utils::human`` (q…Q). A literal "B" with no SI
+    prefix returns the integer/float value rounded down.
+    """
+    cleaned = ANSI_RE.sub("", value).strip()
+    if not cleaned.endswith("B"):
+        raise ValueError(f"not a byte-formatted value: {value!r}")
+    cleaned = cleaned[:-1].rstrip()  # drop trailing 'B'
+    if cleaned and cleaned[-1] in SI_SUFFIXES and cleaned[-1] != " ":
+        suffix = cleaned[-1]
+        number_part = cleaned[:-1].rstrip()
+    else:
+        suffix = " "
+        number_part = cleaned
+    number_part = number_part.replace(NARROW_NBSP, "").strip()
+    multiplier = 10 ** ((SI_SUFFIXES.index(suffix) - SI_BASE_INDEX) * 3)
+    return int(float(number_part) * multiplier)
+
+
+def parse_peak_heap_bytes(stderr_path: Path) -> int:
+    """Return the largest "peak memory" value (bytes) found in the trace."""
+    if not stderr_path.is_file():
+        return 0
+    text = ANSI_RE.sub("", stderr_path.read_text(encoding="utf-8", errors="replace"))
+    peak = 0
+    for match in PEAK_MEMORY_RE.finditer(text):
+        number = float(match.group(1))
+        suffix = match.group(2) or " "
+        bytes_value = int(number * 10 ** ((SI_SUFFIXES.index(suffix) - SI_BASE_INDEX) * 3))
+        peak = max(peak, bytes_value)
+    return peak
+
+
+def parse_scheme_sizes(stderr_path: Path) -> tuple[int, int]:
+    """Return (num_constraints, num_witnesses) from a prove stderr; (0, 0) if absent."""
+    if not stderr_path.is_file():
+        return 0, 0
+    text = ANSI_RE.sub("", stderr_path.read_text(encoding="utf-8", errors="replace"))
+    match = SCHEME_SIZE_RE.search(text)
+    if not match:
+        return 0, 0
+    return int(match.group(1)), int(match.group(2))
+
+
+def parse_time_file(time_path: Path) -> tuple[float, int]:
+    """Read `/usr/bin/time -f '%e %M'` output: (wall_seconds, max_rss_kb).
+
+    Returns (0.0, 0) if the file is missing or unparsable.
+    """
+    if not time_path.is_file():
+        return 0.0, 0
+    raw = time_path.read_text(encoding="utf-8", errors="replace").strip().splitlines()
+    if not raw:
+        return 0.0, 0
+    parts = raw[-1].split()
+    if len(parts) < 2:
+        return 0.0, 0
+    try:
+        return float(parts[0]), int(parts[1])
+    except ValueError:
+        return 0.0, 0
+
+
+def read_meta(meta_path: Path) -> dict[str, str]:
+    out: dict[str, str] = {}
+    if not meta_path.is_file():
+        return out
+    for line in meta_path.read_text(encoding="utf-8").splitlines():
+        if "=" in line:
+            key, _, val = line.partition("=")
+            out[key.strip()] = val.strip()
+    return out
+
+
+def parse_runs(bench_dir: Path, circuit: str) -> str:
+    circuit_dir = bench_dir / "per_circuit" / circuit
+    meta = read_meta(circuit_dir / "meta.txt")
+
+    prove_runs: list[tuple[float, int, int]] = []
+    verify_runs: list[tuple[float, int]] = []
+
+    i = 1
+    while True:
+        time_path = circuit_dir / f"prove_{i}.time"
+        if not time_path.is_file():
+            break
+        wall, rss_kb = parse_time_file(time_path)
+        heap_bytes = parse_peak_heap_bytes(circuit_dir / f"prove_{i}.stderr")
+        prove_runs.append((wall, rss_kb, heap_bytes))
+        i += 1
+
+    # Constraint and witness counts are deterministic per circuit, so reading
+    # them from the first prove run is sufficient.
+    num_constraints, num_witnesses = parse_scheme_sizes(circuit_dir / "prove_1.stderr")
+
+    j = 1
+    while True:
+        time_path = circuit_dir / f"verify_{j}.time"
+        if not time_path.is_file():
+            break
+        wall, _rss = parse_time_file(time_path)
+        verify_runs.append((wall, _rss))
+        j += 1
+
+    if not prove_runs:
+        return ""
+
+    prove_time_ms = mean(r[0] for r in prove_runs) * 1000.0
+    prover_rss_kb = mean(r[1] for r in prove_runs)
+    prover_heap_bytes = mean(r[2] for r in prove_runs)
+    verifier_time_ms = mean(r[0] for r in verify_runs) * 1000.0 if verify_runs else 0.0
+
+    pkp_size = meta.get("pkp_size_bytes", "0")
+    proof_size = meta.get("proof_size_bytes", "0")
+
+    return ",".join(
+        [
+            circuit,
+            str(num_constraints),
+            str(num_witnesses),
+            f"{prove_time_ms:.1f}",
+            f"{prover_rss_kb:.0f}",
+            f"{prover_heap_bytes:.0f}",
+            f"{verifier_time_ms:.1f}",
+            proof_size,
+            pkp_size,
+            str(len(prove_runs)),
+        ]
+    )
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    p = sub.add_parser("parse-runs")
+    p.add_argument("bench_dir", type=Path)
+    p.add_argument("circuit")
+
+    p = sub.add_parser("human-to-bytes")
+    p.add_argument("value")
+
+    args = parser.parse_args()
+
+    if args.cmd == "parse-runs":
+        row = parse_runs(args.bench_dir, args.circuit)
+        if row:
+            print(row)
+    elif args.cmd == "human-to-bytes":
+        print(human_to_bytes(args.value))
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/generate_provekit_witness_report.py b/scripts/generate_provekit_witness_report.py
new file mode 100755
index 000000000..e8bafc982
--- /dev/null
+++ b/scripts/generate_provekit_witness_report.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+"""Generate a ProveKit-only witness count report.
+
+Usage: python3 generate_provekit_witness_report.py <witness_csv> <output_dir>
+
+Reads a CSV of post-GE constraint and witness counts produced by
+scripts/run_noir_execution_success.sh and writes provekit_witness_report.md
+to <output_dir>.
+"""
+
+from __future__ import annotations
+
+import csv
+import sys
+from pathlib import Path
+
+SKIP_LIST = Path(__file__).resolve().parent / "noir_skip_tests.txt"
+
+
+def load_skip_tests() -> set[str]:
+    if not SKIP_LIST.is_file():
+        return set()
+    skip: set[str] = set()
+    for raw in SKIP_LIST.read_text().splitlines():
+        line = raw.strip()
+        if not line or line.startswith("#"):
+            continue
+        skip.add(line)
+    return skip
+
+
+def main(csv_path: Path, out_dir: Path) -> None:
+    skip_tests = load_skip_tests()
+
+    rows: dict[str, tuple[int | None, int | None]] = {}
+    with csv_path.open() as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            leaf = row["test_name"].split("/")[-1]
+            if leaf in skip_tests:
+                continue
+
+            def _parse(key: str) -> int | None:
+                val = row.get(key, "")
+                try:
+                    return int(val)
+                except (TypeError, ValueError):
+                    return None
+
+            rows[leaf] = (_parse("provekit_constraints"), _parse("provekit_witnesses"))
+
+    lines = [
+        "# ProveKit Witness Counts",
+        "",
+        f"Captured post-GE constraint and witness counts for {len(rows)} circuits.",
+        "",
+        "| Test | Constraints (post-GE) | Witnesses (post-GE) |",
+        "|------|------------------------|----------------------|",
+    ]
+    for name in sorted(rows):
+        constraints, witnesses = rows[name]
+        c = "-" if constraints is None else str(constraints)
+        w = "-" if witnesses is None else str(witnesses)
+        lines.append(f"| {name} | {c} | {w} |")
+
+    out_path = out_dir / "provekit_witness_report.md"
+    out_path.write_text("\n".join(lines) + "\n")
+    print(f"Wrote {out_path} ({len(rows)} circuits)")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print(f"Usage: {sys.argv[0]} <witness_csv> <output_dir>", file=sys.stderr)
+        sys.exit(1)
+    main(Path(sys.argv[1]), Path(sys.argv[2]))
diff --git a/scripts/noir_execution_helpers.py b/scripts/noir_execution_helpers.py
new file mode 100644
index 000000000..06ffb52d1
--- /dev/null
+++ b/scripts/noir_execution_helpers.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python3
+"""Helpers for scripts/run_noir_execution_success.sh.
+
+Subcommands:
+  discover <test_root>                      — list runnable test dirs
+  resolve-prover-toml <project_dir> <name>  — find Prover.toml for a package
+  package-name <project_dir>                — read [package].name from Nargo.toml
+  build-report <log_dir> <passed_count>     — write grouped_error_report.txt
+  skip-tests                                — print the skip list (one per line)
+
+The skip list lives in scripts/noir_skip_tests.txt and is the single source
+of truth shared with scripts/generate_provekit_witness_report.py.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import re
+import sys
+import tomllib
+from collections import defaultdict
+from pathlib import Path
+
+SKIP_LIST_FILE = Path(__file__).with_name("noir_skip_tests.txt")
+
+
+def load_skip_tests() -> set[str]:
+    """Return the skip list parsed from noir_skip_tests.txt.
+
+    Blank lines and lines starting with `#` are ignored. Inline `#` comments
+    are stripped. Returns an empty set if the file is missing.
+    """
+    if not SKIP_LIST_FILE.is_file():
+        return set()
+    names: set[str] = set()
+    for raw in SKIP_LIST_FILE.read_text().splitlines():
+        line = raw.split("#", 1)[0].strip()
+        if line:
+            names.add(line)
+    return names
+
+
+def discover_tests(root: Path) -> list[str]:
+    """Return candidate test project paths relative to ``root``.
+
+    Mirrors the legacy shell heredoc: a path is a candidate if it is a
+    workspace default-member, or if it has both a `[package]` entry in its
+    Nargo.toml and a sibling Prover.toml. Nested projects under a workspace
+    default-member are suppressed.
+    """
+    nargo_data: dict[str, dict] = {}
+    for nargo in root.rglob("Nargo.toml"):
+        rel = nargo.parent.relative_to(root).as_posix()
+        try:
+            data = tomllib.loads(nargo.read_text())
+        except Exception:
+            data = {}
+        nargo_data[rel] = data
+
+    workspace_default_roots: set[str] = set()
+    for rel, data in nargo_data.items():
+        ws = data.get("workspace")
+        if isinstance(ws, dict) and "default-member" in ws:
+            workspace_default_roots.add(rel)
+
+    suppressed: set[str] = set()
+    for ws_rel in workspace_default_roots:
+        ws_path = Path(ws_rel) if ws_rel != "." else Path()
+        for rel in nargo_data:
+            rel_path = Path(rel) if rel != "." else Path()
+            if rel_path != ws_path and ws_path in rel_path.parents:
+                suppressed.add(rel)
+
+    candidates: set[str] = set(workspace_default_roots)
+    for rel, data in nargo_data.items():
+        if rel in suppressed:
+            continue
+        pkg = data.get("package")
+        if isinstance(pkg, dict) and "name" in pkg:
+            if (root / rel / "Prover.toml").is_file():
+                candidates.add(rel)
+
+    return sorted(candidates)
+
+
+def resolve_prover_toml(project_dir: Path, package_name: str) -> str:
+    """Return Prover.toml path (relative to ``project_dir``) for ``package_name``.
+
+    Prefers a Prover.toml located next to the Nargo.toml whose package name
+    matches. Falls back to a root-level Prover.toml, then to the sole
+    Prover.toml under the project when unambiguous. Returns "" otherwise.
+    """
+    matches: list[str] = []
+    for nargo in sorted(project_dir.rglob("Nargo.toml")):
+        try:
+            data = tomllib.loads(nargo.read_text())
+        except Exception:
+            continue
+        pkg = data.get("package")
+        if not isinstance(pkg, dict) or pkg.get("name") != package_name:
+            continue
+        prover = nargo.parent / "Prover.toml"
+        if prover.is_file():
+            matches.append(prover.relative_to(project_dir).as_posix())
+
+    if matches:
+        matches.sort(key=lambda p: (p.count("/"), p))
+        return matches[0]
+
+    root_prover = project_dir / "Prover.toml"
+    if root_prover.is_file():
+        return "Prover.toml"
+
+    all_provers = sorted(project_dir.rglob("Prover.toml"))
+    if len(all_provers) == 1:
+        return all_provers[0].relative_to(project_dir).as_posix()
+
+    return ""
+
+
+def read_package_name(project_dir: Path) -> str:
+    """Return [package].name from ``project_dir/Nargo.toml`` or ""."""
+    nargo = project_dir / "Nargo.toml"
+    if not nargo.is_file():
+        return ""
+    try:
+        data = tomllib.loads(nargo.read_text())
+    except Exception:
+        return ""
+    pkg = data.get("package")
+    if isinstance(pkg, dict):
+        return str(pkg.get("name", ""))
+    return ""
+
+
+_BLACKBOX_RE = re.compile(
+    r"not implemented: Other black box function: BLACKBOX::([A-Z0-9_]+)"
+)
+_PANIC_RE = re.compile(r"panicked at [^\n]*:\n([^\n]+)")
+_SOLVE_RE = re.compile(r"Failed to solve program: '([^']+)'")
+_COMPILE_ERR_RE = re.compile(r"^error:\s*([^\n]+)", flags=re.M)
+_COMPILE_BUG_RE = re.compile(r"^bug:\s*([^\n]+)", flags=re.M)
+_GENERIC_ERR_RE = re.compile(r"^Error:\s*([^\n]+)", flags=re.M)
+_FAIL_STAGE_RE = re.compile(r"FAIL: ([^\n]+)")
+_SKIP_REASON_RE = re.compile(r"SKIP: ([^\n]+)")
+
+
+def _classify_failure(text: str, stage: str) -> str:
+    blackbox = _BLACKBOX_RE.search(text)
+    if blackbox:
+        return f"Not implemented blackbox: {blackbox.group(1)} ({stage})"
+    if "Program must have one entry point." in text:
+        return f"Program must have one entry point ({stage})"
+    panic = _PANIC_RE.search(text)
+    if panic:
+        return f"Panic: {panic.group(1).strip()} ({stage})"
+    solve = _SOLVE_RE.search(text)
+    if solve:
+        return f"Failed to solve program: {solve.group(1)} ({stage})"
+    if "Failed assertion" in text:
+        return f"Failed assertion ({stage})"
+    compile_error = _COMPILE_ERR_RE.search(text)
+    if compile_error:
+        return f"Compile error: {compile_error.group(1).strip()} ({stage})"
+    compile_bug = _COMPILE_BUG_RE.search(text)
+    if compile_bug:
+        return f"Compile bug: {compile_bug.group(1).strip()} ({stage})"
+    generic = _GENERIC_ERR_RE.search(text)
+    if generic:
+        return f"Error: {generic.group(1).strip()} ({stage})"
+    return f"Unknown failure ({stage})"
+
+
+def build_grouped_report(log_dir: Path, passed: int, failed: int, skipped: int) -> None:
+    """Scan ``log_dir/per_test/*.log`` and write ``log_dir/grouped_error_report.txt``.
+
+    PASS/FAIL/SKIP totals come from the shell runner — it has the authoritative
+    counts (including blackbox skips, which don't produce per-test logs). Logs
+    are consulted only for the ``[stages]`` and ``[grouped]`` sections.
+    """
+    per_test_dir = log_dir / "per_test"
+    report_file = log_dir / "grouped_error_report.txt"
+
+    logs = sorted(per_test_dir.glob("*.log"))
+    grouped: dict[str, list[str]] = defaultdict(list)
+    stage_groups: dict[str, list[str]] = defaultdict(list)
+
+    for fp in logs:
+        text = fp.read_text(errors="replace")
+        name = fp.stem
+
+        if "SKIP:" in text:
+            skip_match = _SKIP_REASON_RE.search(text)
+            reason = skip_match.group(1).strip() if skip_match else "unknown"
+            grouped[f"SKIP: {reason}"].append(name)
+            continue
+
+        fail_stages = _FAIL_STAGE_RE.findall(text)
+        stage = fail_stages[-1].strip() if fail_stages else "unknown stage"
+        stage_groups[stage].append(name)
+        grouped[_classify_failure(text, stage)].append(name)
+
+    with report_file.open("w") as f:
+        f.write(f"logs={len(logs)}\n")
+        f.write(f"PASS={passed}\n")
+        f.write(f"FAIL={failed}\n")
+        f.write(f"SKIP={skipped}\n")
+        f.write("\n[stages]\n")
+        for stage, tests in sorted(stage_groups.items(), key=lambda kv: (-len(kv[1]), kv[0])):
+            f.write(f"{stage}\t{len(tests)}\t{', '.join(tests)}\n")
+        f.write("\n[grouped]\n")
+        for key, tests in sorted(grouped.items(), key=lambda kv: (-len(kv[1]), kv[0])):
+            f.write(f"{len(tests)}\t{key}\t{', '.join(tests)}\n")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    p = sub.add_parser("discover", help="list runnable test dirs under <test_root>")
+    p.add_argument("test_root", type=Path)
+
+    p = sub.add_parser("resolve-prover-toml")
+    p.add_argument("project_dir", type=Path)
+    p.add_argument("package_name")
+
+    p = sub.add_parser("package-name")
+    p.add_argument("project_dir", type=Path)
+
+    p = sub.add_parser("build-report")
+    p.add_argument("log_dir", type=Path)
+    p.add_argument("passed_count", type=int)
+    p.add_argument("failed_count", type=int)
+    p.add_argument("skipped_count", type=int)
+
+    sub.add_parser("skip-tests", help="print the skip list, one name per line")
+
+    args = parser.parse_args()
+
+    if args.cmd == "discover":
+        for name in discover_tests(args.test_root):
+            print(name)
+    elif args.cmd == "resolve-prover-toml":
+        print(resolve_prover_toml(args.project_dir, args.package_name))
+    elif args.cmd == "package-name":
+        print(read_package_name(args.project_dir))
+    elif args.cmd == "build-report":
+        build_grouped_report(
+            args.log_dir,
+            args.passed_count,
+            args.failed_count,
+            args.skipped_count,
+        )
+    elif args.cmd == "skip-tests":
+        for name in sorted(load_skip_tests()):
+            print(name)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/noir_skip_tests.txt b/scripts/noir_skip_tests.txt
new file mode 100644
index 000000000..575dd9d0c
--- /dev/null
+++ b/scripts/noir_skip_tests.txt
@@ -0,0 +1,38 @@
+# Tests that use blackbox functions not yet supported by provekit.
+# Counted as SKIP (not FAIL) by scripts/run_noir_execution_success.sh
+# and excluded from scripts/generate_provekit_witness_report.py.
+# Remove entries here once the corresponding blackbox is supported.
+#
+# Format: one bare test name per line. Blank lines and `#` comments are ignored.
+
+# BLAKE3
+a_6
+array_dynamic_blackbox_input
+array_dynamic_nested_blackbox_input
+blake3
+conditional_1
+conditional_regression_short_circuit
+regression_4449
+
+# ECDSA_SECP256K1
+bench_ecdsa_secp256k1
+ecdsa_secp256k1
+ecdsa_secp256k1_invalid_inputs
+ecdsa_secp256k1_invalid_pub_key_in_inactive_branch
+
+# ECDSA_SECP256R1
+ecdsa_secp256r1
+ecdsa_secp256r1_3x
+ecdsa_secp256r1_invalid_pub_key_in_inactive_branch
+ecdsa_secp256r1_msg_equals_order
+
+# EMBEDDED_CURVE_ADD
+embedded_curve_ops
+regression_5045
+regression_7744
+
+# AES128_ENCRYPT
+aes128_encrypt
+
+# BLAKE2S
+a_7
diff --git a/scripts/run_csp_benchmarks.sh b/scripts/run_csp_benchmarks.sh
new file mode 100755
index 000000000..e099c636c
--- /dev/null
+++ b/scripts/run_csp_benchmarks.sh
@@ -0,0 +1,235 @@
+#!/usr/bin/env bash
+# run_csp_benchmarks.sh
+#
+# Run prove/verify benchmarks for noir-examples/csp-benchmarks/*. Each circuit
+# is compiled and prepared once, then prove + verify are each invoked
+# BENCH_RUNS times so the helper can average wall time, peak RSS, and
+# heap-peak bytes (parsed from the prover's tracing output).
+#
+# Environment variables (all optional):
+#   PROVEKIT_BIN     Path to provekit-cli (default: target/release/provekit-cli)
+#   BENCH_ROOT       Path to csp-benchmarks (default: noir-examples/csp-benchmarks)
+#   BENCH_DIR        Output directory (default: csp-bench-logs)
+#   BENCH_RUNS       Iterations to average (default: 3)
+#   TEST_FILTER      Regex on circuit name
+#   MAX_TESTS        Cap on circuits (0 = unlimited)
+#
+# Output: BENCH_DIR/results.csv with one row per circuit:
+#   circuit,num_constraints,num_witnesses,prover_time_ms,prover_peak_rss_kb,
+#     prover_heap_peak_bytes,verifier_time_ms,proof_size_bytes,pkp_size_bytes,
+#     runs
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+HELPER="${SCRIPT_DIR}/csp_benchmark_helpers.py"
+
+PROVEKIT_BIN="${PROVEKIT_BIN:-${REPO_ROOT}/target/release/provekit-cli}"
+BENCH_ROOT="${BENCH_ROOT:-${REPO_ROOT}/noir-examples/csp-benchmarks}"
+BENCH_DIR="${BENCH_DIR:-${REPO_ROOT}/csp-bench-logs}"
+BENCH_RUNS="${BENCH_RUNS:-3}"
+TEST_FILTER="${TEST_FILTER:-}"
+MAX_TESTS="${MAX_TESTS:-0}"
+
+if [[ "${BENCH_DIR}" != /* ]]; then
+  BENCH_DIR="${REPO_ROOT}/${BENCH_DIR}"
+fi
+
+if [[ ! -x "${PROVEKIT_BIN}" ]]; then
+  echo "ERROR: provekit-cli binary not found at ${PROVEKIT_BIN}" >&2
+  echo "Build it first: cargo build --release --bin provekit-cli" >&2
+  exit 1
+fi
+
+if [[ ! -d "${BENCH_ROOT}" ]]; then
+  echo "ERROR: csp-benchmarks not found at ${BENCH_ROOT}" >&2
+  exit 1
+fi
+
+if ! command -v nargo >/dev/null 2>&1; then
+  echo "ERROR: nargo is required but not in PATH" >&2
+  exit 1
+fi
+
+if ! python3 -c "import tomllib" 2>/dev/null; then
+  echo "ERROR: python3.11+ is required (tomllib not found)." >&2
+  echo "Current: $(python3 --version 2>&1)" >&2
+  exit 1
+fi
+
+# `/usr/bin/time` is the GNU-style binary; macOS ships a different `time` shell
+# builtin so users may need `gtime` from `brew install gnu-time`. CI runs on
+# ubuntu-24.04-arm where /usr/bin/time is GNU.
+TIME_BIN=""
+if [[ -x /usr/bin/time ]]; then
+  TIME_BIN=/usr/bin/time
+elif command -v gtime >/dev/null 2>&1; then
+  TIME_BIN="$(command -v gtime)"
+else
+  echo "ERROR: GNU /usr/bin/time not found (try: brew install gnu-time)" >&2
+  exit 1
+fi
+
+mkdir -p "${BENCH_DIR}/per_circuit"
+RESULTS_CSV="${BENCH_DIR}/results.csv"
+echo "circuit,num_constraints,num_witnesses,prover_time_ms,prover_peak_rss_kb,prover_heap_peak_bytes,verifier_time_ms,proof_size_bytes,pkp_size_bytes,runs" > "${RESULTS_CSV}"
+
+shopt -s nullglob
+
+# Discover circuits: any direct subdir of csp-benchmarks/ that has both a
+# Nargo.toml and a Prover.toml at its root. This filters out keccak_lib/.
+discover_circuits() {
+  for dir in "${BENCH_ROOT}"/*/; do
+    if [[ -f "${dir}Nargo.toml" && -f "${dir}Prover.toml" ]]; then
+      basename "${dir%/}"
+    fi
+  done
+}
+
+mapfile -t circuits < <(discover_circuits | sort)
+if [[ "${#circuits[@]}" -eq 0 ]]; then
+  echo "ERROR: no circuits discovered under ${BENCH_ROOT}" >&2
+  exit 1
+fi
+
+echo "Discovered ${#circuits[@]} circuits"
+
+# Read [package].name from a Nargo.toml; fall back to directory basename.
+read_package_name() {
+  local dir="$1"
+  python3 - "$dir" <<'PY'
+import sys, tomllib, pathlib
+nargo = pathlib.Path(sys.argv[1]) / "Nargo.toml"
+try:
+    data = tomllib.loads(nargo.read_text())
+    print(data.get("package", {}).get("name", ""))
+except Exception:
+    pass
+PY
+}
+
+attempted=0
+succeeded=0
+failed=0
+
+for circuit in "${circuits[@]}"; do
+  if [[ -n "${TEST_FILTER}" && ! "${circuit}" =~ ${TEST_FILTER} ]]; then
+    continue
+  fi
+  (( attempted += 1 ))
+  if [[ "${MAX_TESTS}" -gt 0 && "${attempted}" -gt "${MAX_TESTS}" ]]; then
+    break
+  fi
+
+  workdir="${BENCH_ROOT}/${circuit}"
+  out_dir="${BENCH_DIR}/per_circuit/${circuit}"
+  mkdir -p "${out_dir}"
+
+  echo ""
+  echo "==> [${attempted}/${#circuits[@]}] ${circuit}"
+
+  pkg_name="$(read_package_name "${workdir}")"
+  if [[ -z "${pkg_name}" ]]; then
+    pkg_name="${circuit}"
+  fi
+
+  # 1) compile
+  if ! (cd "${workdir}" && nargo compile > "${out_dir}/compile.log" 2>&1); then
+    echo "FAIL: nargo compile (${circuit})"
+    (( failed += 1 ))
+    continue
+  fi
+
+  circuit_json="${workdir}/target/${pkg_name}.json"
+  if [[ ! -f "${circuit_json}" ]]; then
+    # Fallback: pick the first json under target/.
+    candidate=("${workdir}"/target/*.json)
+    if [[ "${#candidate[@]}" -gt 0 ]]; then
+      circuit_json="${candidate[0]}"
+    else
+      echo "FAIL: no compiled JSON in ${workdir}/target/"
+      (( failed += 1 ))
+      continue
+    fi
+  fi
+
+  pkp_path="${out_dir}/prover.pkp"
+  pkv_path="${out_dir}/verifier.pkv"
+  proof_path="${out_dir}/proof.np"
+
+  # 2) prepare
+  if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prepare "${circuit_json}" \
+        --pkp "${pkp_path}" --pkv "${pkv_path}") > "${out_dir}/prepare.log" 2>&1; then
+    echo "FAIL: provekit-cli prepare (${circuit})"
+    (( failed += 1 ))
+    continue
+  fi
+
+  pkp_size_bytes="$(stat -c '%s' "${pkp_path}" 2>/dev/null || stat -f '%z' "${pkp_path}")"
+
+  # 3) prove × BENCH_RUNS — write each run's stderr separately so the helper
+  #    can parse the tracing output's "peak memory" lines.
+  prove_ok=1
+  for ((i=1; i<=BENCH_RUNS; i++)); do
+    if ! (cd "${workdir}" && "${TIME_BIN}" -f '%e %M' \
+            -o "${out_dir}/prove_${i}.time" \
+            "${PROVEKIT_BIN}" prove "${pkp_path}" "${workdir}/Prover.toml" \
+            -o "${proof_path}") 2> "${out_dir}/prove_${i}.stderr"; then
+      echo "FAIL: provekit-cli prove run ${i} (${circuit})"
+      prove_ok=0
+      break
+    fi
+  done
+  if [[ "${prove_ok}" -ne 1 ]]; then
+    (( failed += 1 ))
+    continue
+  fi
+
+  proof_size_bytes="$(stat -c '%s' "${proof_path}" 2>/dev/null || stat -f '%z' "${proof_path}")"
+
+  # 4) verify × BENCH_RUNS
+  verify_ok=1
+  for ((i=1; i<=BENCH_RUNS; i++)); do
+    if ! (cd "${workdir}" && "${TIME_BIN}" -f '%e %M' \
+            -o "${out_dir}/verify_${i}.time" \
+            "${PROVEKIT_BIN}" verify "${pkv_path}" "${proof_path}") \
+            2> "${out_dir}/verify_${i}.stderr"; then
+      echo "FAIL: provekit-cli verify run ${i} (${circuit})"
+      verify_ok=0
+      break
+    fi
+  done
+  if [[ "${verify_ok}" -ne 1 ]]; then
+    (( failed += 1 ))
+    continue
+  fi
+
+  cat > "${out_dir}/meta.txt" <<EOF
+pkp_size_bytes=${pkp_size_bytes}
+proof_size_bytes=${proof_size_bytes}
+EOF
+
+  row="$(python3 "${HELPER}" parse-runs "${BENCH_DIR}" "${circuit}")"
+  if [[ -n "${row}" ]]; then
+    echo "${row}" >> "${RESULTS_CSV}"
+    echo "OK: ${row}"
+    (( succeeded += 1 ))
+  else
+    echo "FAIL: helper produced no row for ${circuit}"
+    (( failed += 1 ))
+  fi
+done
+
+echo ""
+echo "----- csp-benchmarks summary -----"
+echo "Discovered : ${#circuits[@]}"
+echo "Attempted  : ${attempted}"
+echo "Succeeded  : ${succeeded}"
+echo "Failed     : ${failed}"
+echo "Results    : ${RESULTS_CSV}"
+
+if [[ "${failed}" -gt 0 ]]; then
+  exit 1
+fi
+exit 0
diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh
new file mode 100755
index 000000000..514f46938
--- /dev/null
+++ b/scripts/run_noir_execution_success.sh
@@ -0,0 +1,434 @@
+#!/usr/bin/env bash
+# run_noir_execution_success.sh
+#
+# Run the Noir execution_success test suite through provekit-cli.
+#
+# Environment variables (all optional):
+#   NOIR_REPO_DIR            Path to a cloned noir-lang/noir repo root.
+#                            When set, tests come from
+#                            NOIR_REPO_DIR/test_programs/{execution_success,test_libraries}.
+#                            When unset, falls back to the vendored path
+#                            REPO_ROOT/test-programs/noir/.
+#   PROVEKIT_BIN             Path to provekit-cli binary (default: target/release/provekit-cli)
+#   LOG_DIR                  Directory for per-test logs and summary
+#   MAX_TESTS                Cap the number of tests (0 = unlimited)
+#   TEST_FILTER              Regex filter on test name
+#   REQUIRED_NARGO_VERSION   Nargo version string to require (default 1.0.0-beta.19)
+#   ENABLE_ENUMS_FALLBACK    Retry compile with -Zenums on 'enums' feature error (0/1, default 1)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+HELPER="${SCRIPT_DIR}/noir_execution_helpers.py"
+SKIP_LIST_FILE="${SCRIPT_DIR}/noir_skip_tests.txt"
+
+# ---------------------------------------------------------------------------
+# Resolve test corpus root (CI clone vs. local vendored copy)
+# ---------------------------------------------------------------------------
+if [[ -n "${NOIR_REPO_DIR:-}" ]]; then
+  TEST_ROOT="${NOIR_REPO_DIR}/test_programs/execution_success"
+  TEST_LIB_ROOT="${NOIR_REPO_DIR}/test_programs/test_libraries"
+else
+  NOIR_ROOT="${REPO_ROOT}/test-programs/noir"
+  TEST_ROOT="${NOIR_ROOT}/execution_success"
+  TEST_LIB_ROOT="${NOIR_ROOT}/test_libraries"
+fi
+
+PROVEKIT_BIN="${PROVEKIT_BIN:-${REPO_ROOT}/target/release/provekit-cli}"
+MAX_TESTS="${MAX_TESTS:-0}"
+REQUIRED_NARGO_VERSION="${REQUIRED_NARGO_VERSION:-1.0.0-beta.19}"
+ENABLE_ENUMS_FALLBACK="${ENABLE_ENUMS_FALLBACK:-1}"
+TEST_FILTER="${TEST_FILTER:-}"
+RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)"
+LOG_DIR="${LOG_DIR:-${REPO_ROOT}/scripts/noir_execution_logs/${RUN_ID}}"
+
+if [[ "${LOG_DIR}" != /* ]]; then
+  LOG_DIR="${REPO_ROOT}/${LOG_DIR}"
+fi
+
+# ---------------------------------------------------------------------------
+# Unimplemented-blackbox skip list
+# Single source of truth: scripts/noir_skip_tests.txt (shared with
+# scripts/generate_provekit_witness_report.py). Counted as SKIP (not FAIL).
+# ---------------------------------------------------------------------------
+SKIP_TESTS=()
+declare -A SKIP_SET
+if [[ -f "${SKIP_LIST_FILE}" ]]; then
+  while IFS= read -r _raw || [[ -n "${_raw}" ]]; do
+    _name="${_raw%%#*}"
+    _name="${_name#"${_name%%[![:space:]]*}"}"
+    _name="${_name%"${_name##*[![:space:]]}"}"
+    if [[ -n "${_name}" ]]; then
+      SKIP_TESTS+=("${_name}")
+      SKIP_SET["${_name}"]=1
+    fi
+  done < "${SKIP_LIST_FILE}"
+else
+  echo "WARNING: skip list ${SKIP_LIST_FILE} not found; no tests will be skipped." >&2
+fi
+
+if [[ ! -d "${TEST_ROOT}" ]]; then
+  echo "ERROR: Missing test corpus at ${TEST_ROOT}"
+  if [[ -z "${NOIR_REPO_DIR:-}" ]]; then
+    echo "Hint: run scripts/vendor_noir_execution_success.sh first, or set NOIR_REPO_DIR."
+  else
+    echo "Hint: check that NOIR_REPO_DIR (${NOIR_REPO_DIR}) contains test_programs/execution_success."
+  fi
+  exit 1
+fi
+
+if [[ ! -x "${PROVEKIT_BIN}" ]]; then
+  echo "Missing provekit-cli binary at ${PROVEKIT_BIN}"
+  echo "Build it first: cargo build --release --bin provekit-cli"
+  exit 1
+fi
+
+if ! command -v nargo >/dev/null 2>&1; then
+  echo "nargo is required but was not found in PATH."
+  echo "Install with noirup and set version: noirup --version v1.0.0-beta.19"
+  exit 1
+fi
+
+nargo_version="$(nargo --version)"
+if [[ "${nargo_version}" != *"${REQUIRED_NARGO_VERSION}"* ]]; then
+  echo "Unsupported nargo version: ${nargo_version}"
+  echo "Expected version containing: ${REQUIRED_NARGO_VERSION}"
+  echo "Switch with: noirup --version ${REQUIRED_NARGO_VERSION}"
+  exit 1
+fi
+
+if ! python3 -c "import tomllib" 2>/dev/null; then
+  echo "ERROR: python3.11+ is required (tomllib not found)."
+  echo "Current: $(python3 --version 2>&1)"
+  exit 1
+fi
+
+mkdir -p "${LOG_DIR}/per_test"
+GROUPED_REPORT_FILE="${LOG_DIR}/grouped_error_report.txt"
+WITNESS_CSV="${LOG_DIR}/provekit_witness_counts.csv"
+echo "test_name,provekit_constraints,provekit_witnesses" > "${WITNESS_CSV}"
+
+shopt -s nullglob globstar
+
+# Python helpers live in scripts/noir_execution_helpers.py; these are thin
+# shell wrappers so the main loop reads naturally.
+discover_test_dirs() {
+  python3 "${HELPER}" discover "${TEST_ROOT}"
+}
+
+resolve_prover_toml() {
+  python3 "${HELPER}" resolve-prover-toml "$1" "$2"
+}
+
+read_workdir_package_name() {
+  python3 "${HELPER}" package-name "$1"
+}
+
+relative_path() {
+  python3 -c 'import os, sys; print(os.path.relpath(sys.argv[2], sys.argv[1]))' "$1" "$2"
+}
+
+
+
+append_stage_marker() {
+  local log_file="$1"
+  local stage_name="$2"
+  local stage_status="$3"
+  printf '\n[%s] %s: %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "${stage_status}" "${stage_name}" >> "${log_file}"
+}
+
+mapfile -t test_dirs < <(discover_test_dirs)
+
+if [[ "${#test_dirs[@]}" -eq 0 ]]; then
+  echo "No runnable test programs found under ${TEST_ROOT}"
+  exit 1
+fi
+
+total=0
+passed=0
+failed=0
+skipped=0
+
+# Clean up the active test sandbox if the script exits unexpectedly (SIGINT, error).
+_current_sandbox=""
+_cleanup_sandbox() {
+  if [[ -n "${_current_sandbox:-}" && -d "${_current_sandbox}" ]]; then
+    rm -rf "${_current_sandbox}"
+  fi
+}
+trap _cleanup_sandbox EXIT INT TERM
+
+if [[ ! -d "${TEST_LIB_ROOT}" ]]; then
+  echo "WARNING: missing ${TEST_LIB_ROOT}; path-based dependency tests may fail."
+  echo "Run scripts/vendor_noir_execution_success.sh to vendor test_libraries as well."
+fi
+
+for test_name in "${test_dirs[@]}"; do
+  if [[ -n "${TEST_FILTER}" && ! "${test_name}" =~ ${TEST_FILTER} ]]; then
+    continue
+  fi
+
+  # leaf name (no sub-path) is what we key on in the skip set
+  leaf_name="${test_name%%/*}"
+
+  # --- Unimplemented blackbox skip list: no log, no noise ---
+  # Skip BEFORE incrementing `total` so MAX_TESTS caps only attempted tests.
+  if [[ "${SKIP_SET["${leaf_name}"]:-}" == "1" ]]; then
+    echo "SKIP (blackbox): ${test_name}"
+    (( skipped += 1 ))
+    continue
+  fi
+
+  (( total += 1 ))
+
+  if [[ "${MAX_TESTS}" -gt 0 && "${total}" -gt "${MAX_TESTS}" ]]; then
+    break
+  fi
+
+  test_dir="${TEST_ROOT}/${test_name}"
+  safe_test_name="${test_name//\//__}"
+
+  test_log="${LOG_DIR}/per_test/${safe_test_name}.log"
+
+  echo ""
+  echo "==> [${total}] ${test_name}"
+
+  : > "${test_log}"
+  {
+    echo "test_name=${test_name}"
+    echo "test_dir=${test_dir}"
+    echo "run_id=${RUN_ID}"
+    echo "nargo_version=${nargo_version}"
+  } >> "${test_log}"
+
+  if [[ ! -f "${test_dir}/Nargo.toml" ]]; then
+    echo "SKIP: missing Nargo.toml"
+    append_stage_marker "${test_log}" "test" "SKIP"
+    echo "SKIP: missing Nargo.toml" >> "${test_log}"
+    (( skipped += 1 ))
+    continue
+  fi
+
+  if [[ ! -d "${TEST_LIB_ROOT}" ]] && grep -q 'test_libraries' "${test_dir}"/Nargo.toml 2>/dev/null; then
+    echo "SKIP: missing test_libraries for relative path dependency"
+    append_stage_marker "${test_log}" "test" "SKIP"
+    echo "SKIP: missing test_libraries for relative path dependency" >> "${test_log}"
+    (( skipped += 1 ))
+    continue
+  fi
+
+  sandbox_root="$(mktemp -d)"
+  _current_sandbox="${sandbox_root}"
+  sandbox_noir_root="${sandbox_root}/test-programs/noir"
+  sandbox_exec_root="${sandbox_noir_root}/execution_success"
+  fixture_name="${test_name%%/*}"
+  fixture_src="${TEST_ROOT}/${fixture_name}"
+  fixture_dst="${sandbox_exec_root}/${fixture_name}"
+
+  mkdir -p "${sandbox_exec_root}"
+  cp -R "${fixture_src}" "${fixture_dst}"
+
+  if [[ -d "${TEST_LIB_ROOT}" ]]; then
+    mkdir -p "${sandbox_noir_root}"
+    ln -s "${TEST_LIB_ROOT}" "${sandbox_noir_root}/test_libraries"
+  fi
+
+  workdir="${sandbox_exec_root}/${test_name}"
+  echo "sandbox_root=${sandbox_root}" >> "${test_log}"
+  echo "workdir=${workdir}" >> "${test_log}"
+
+  append_stage_marker "${test_log}" "nargo compile" "START"
+  compile_ok=0
+
+  if (cd "${workdir}" && nargo compile >> "${test_log}" 2>&1); then
+    compile_ok=1
+  elif [[ "${ENABLE_ENUMS_FALLBACK}" -eq 1 ]] && grep -q "unstable feature 'enums'" "${test_log}"; then
+    append_stage_marker "${test_log}" "nargo compile -Zenums" "RETRY"
+    if (cd "${workdir}" && nargo compile -Zenums >> "${test_log}" 2>&1); then
+      compile_ok=1
+    fi
+  fi
+
+  if [[ "${compile_ok}" -ne 1 ]]; then
+    append_stage_marker "${test_log}" "nargo compile" "FAIL"
+    echo "FAIL: nargo compile"
+    echo "FAIL: nargo compile" >> "${test_log}"
+    (( failed += 1 ))
+    rm -rf "${sandbox_root}"
+    continue
+  fi
+
+  append_stage_marker "${test_log}" "nargo compile" "PASS"
+
+  compiled_jsons=("${workdir}"/target/*.json)
+  if [[ "${#compiled_jsons[@]}" -eq 0 ]]; then
+    compiled_jsons=("${sandbox_exec_root}/${fixture_name}"/target/*.json)
+  fi
+  if [[ "${#compiled_jsons[@]}" -eq 0 ]]; then
+    compiled_jsons=("${sandbox_exec_root}/${fixture_name}"/**/target/*.json)
+  fi
+  if [[ "${#compiled_jsons[@]}" -eq 0 ]]; then
+    append_stage_marker "${test_log}" "compile output check" "FAIL"
+    echo "FAIL: missing compiled target JSON after nargo compile"
+    echo "FAIL: missing compiled target JSON after nargo compile" >> "${test_log}"
+    (( failed += 1 ))
+    rm -rf "${sandbox_root}"
+    continue
+  fi
+
+  workdir_package_name="$(read_workdir_package_name "${workdir}")"
+  circuit_json_abs=""
+  if [[ -n "${workdir_package_name}" ]]; then
+    for candidate_json in "${compiled_jsons[@]}"; do
+      if [[ "$(basename "${candidate_json}" .json)" == "${workdir_package_name}" ]]; then
+        circuit_json_abs="${candidate_json}"
+        break
+      fi
+    done
+  fi
+  if [[ -z "${circuit_json_abs}" ]]; then
+    circuit_json_abs="${compiled_jsons[0]}"
+  fi
+
+  circuit_json="$(relative_path "${workdir}" "${circuit_json_abs}")"
+  package_name="$(basename "${circuit_json_abs}" .json)"
+  prover_toml_rel="$(resolve_prover_toml "${workdir}" "${package_name}")"
+
+  if [[ -z "${prover_toml_rel}" || ! -f "${workdir}/${prover_toml_rel}" ]]; then
+    append_stage_marker "${test_log}" "resolve prover.toml" "FAIL"
+    echo "FAIL: could not locate Prover.toml for compiled package ${package_name}"
+    echo "FAIL: could not locate Prover.toml for compiled package ${package_name}" >> "${test_log}"
+    (( failed += 1 ))
+    rm -rf "${sandbox_root}"
+    continue
+  fi
+
+  echo "circuit_json=${circuit_json}" >> "${test_log}"
+  echo "prover_toml=${prover_toml_rel}" >> "${test_log}"
+
+  append_stage_marker "${test_log}" "provekit-cli prepare" "START"
+  if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prepare "./${circuit_json}" --pkp "./prover.pkp" --pkv "./verifier.pkv" >> "${test_log}" 2>&1); then
+    append_stage_marker "${test_log}" "provekit-cli prepare" "FAIL"
+    echo "FAIL: provekit-cli prepare"
+    echo "FAIL: provekit-cli prepare" >> "${test_log}"
+    (( failed += 1 ))
+    rm -rf "${sandbox_root}"
+    continue
+  fi
+  append_stage_marker "${test_log}" "provekit-cli prepare" "PASS"
+
+  # Extract ProveKit post-GE constraint and witness counts before the log is deleted on success.
+  # Keep this non-fatal under `set -euo pipefail` if the log format changes/misses.
+  _ge_line="$(grep -o 'After GE optimization: [0-9]* constraints, [0-9]* witnesses' "${test_log}" | tail -1 || true)"
+  _pk_constraints=""
+  _pk_witnesses=""
+  if [[ "${_ge_line}" =~ ([0-9]+)\ constraints,\ ([0-9]+)\ witnesses$ ]]; then
+    _pk_constraints="${BASH_REMATCH[1]}"
+    _pk_witnesses="${BASH_REMATCH[2]}"
+  fi
+  if [[ -n "${_pk_witnesses}" ]]; then
+    echo "${test_name},${_pk_constraints},${_pk_witnesses}" >> "${WITNESS_CSV}"
+  fi
+
+  append_stage_marker "${test_log}" "provekit-cli prove" "START"
+  if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prove "./prover.pkp" "./${prover_toml_rel}" -o "./proof.np" >> "${test_log}" 2>&1); then
+    append_stage_marker "${test_log}" "provekit-cli prove" "FAIL"
+    echo "FAIL: provekit-cli prove"
+    echo "FAIL: provekit-cli prove" >> "${test_log}"
+    (( failed += 1 ))
+    rm -rf "${sandbox_root}"
+    continue
+  fi
+  append_stage_marker "${test_log}" "provekit-cli prove" "PASS"
+
+  append_stage_marker "${test_log}" "provekit-cli verify" "START"
+  if ! (cd "${workdir}" && "${PROVEKIT_BIN}" verify "./verifier.pkv" "./proof.np" >> "${test_log}" 2>&1); then
+    append_stage_marker "${test_log}" "provekit-cli verify" "FAIL"
+    echo "FAIL: provekit-cli verify"
+    echo "FAIL: provekit-cli verify" >> "${test_log}"
+    (( failed += 1 ))
+    rm -rf "${sandbox_root}"
+    continue
+  fi
+  append_stage_marker "${test_log}" "provekit-cli verify" "PASS"
+
+  echo "PASS"
+  (( passed += 1 ))
+  rm -rf "${sandbox_root}"
+  # Remove per-test log for passing tests to keep artifacts lean
+  rm -f "${test_log}"
+done
+
+# Blackbox skips bump `skipped` without bumping `total` (see the skip block
+# above), so summing passed+failed+skipped would double-count them.
+attempted=${total}
+
+echo ""
+echo "----- execution_success summary -----"
+echo "Total discovered : ${#test_dirs[@]}"
+if [[ -n "${TEST_FILTER}" ]]; then
+  echo "Test filter      : ${TEST_FILTER}"
+fi
+if [[ "${MAX_TESTS}" -gt 0 ]]; then
+  echo "Attempted limit  : ${MAX_TESTS}"
+else
+  echo "Attempted limit  : all"
+fi
+echo "Attempted        : ${attempted}"
+echo "Passed           : ${passed}"
+echo "Failed           : ${failed}"
+echo "Skipped          : ${skipped}  (${#SKIP_TESTS[@]} unimplemented-blackbox tests)"
+echo "Log directory    : ${LOG_DIR}"
+
+python3 "${HELPER}" build-report "${LOG_DIR}" "${passed}" "${failed}" "${skipped}"
+
+# Emit GitHub Step Summary when running inside Actions
+# (must be after the Python report generator so grouped_error_report.txt exists)
+if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then
+  {
+    echo "## Noir execution_success — ${RUN_ID}"
+    echo ""
+    echo "| Metric | Count |"
+    echo "|--------|------|"
+    echo "| Discovered | ${#test_dirs[@]} |"
+    echo "| Attempted  | ${attempted} |"
+    echo "| ✅ Passed  | ${passed} |"
+    echo "| ❌ Failed  | ${failed} |"
+    echo "| ⏭️ Skipped  | ${skipped} (${#SKIP_TESTS[@]} unimplemented blackboxes) |"
+    if [[ ${failed} -gt 0 ]]; then
+      echo ""
+      echo "### Failure groups"
+      echo '```'
+      cat "${GROUPED_REPORT_FILE}" 2>/dev/null || echo "(no grouped report)"
+      echo '```'
+    fi
+  } >> "${GITHUB_STEP_SUMMARY}"
+fi
+
+echo "Grouped report  : ${GROUPED_REPORT_FILE}"
+
+# Generate ProveKit witness count report
+if [[ -f "${WITNESS_CSV}" ]] && python3 "${SCRIPT_DIR}/generate_provekit_witness_report.py" "${WITNESS_CSV}" "${LOG_DIR}"; then
+  echo "ProveKit witness report: ${LOG_DIR}/provekit_witness_report.md"
+  if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then
+    {
+      echo ""
+      echo "## ProveKit Witness Counts"
+      head -4 "${LOG_DIR}/provekit_witness_report.md"
+      echo ""
+      echo "_Full table available in artifact: \`provekit_witness_report.md\`_"
+    } >> "${GITHUB_STEP_SUMMARY}"
+  fi
+fi
+
+# Circuit failures are surfaced via the PR sticky comment and the grouped
+# error report. The workflow should not fail just because some circuits
+# don't compile through provekit-cli today — the report is the source of
+# truth for which circuits pass. Set STRICT_FAIL=1 to opt into the old
+# "exit 1 on any failure" behaviour for local CI gates.
+if [[ "${STRICT_FAIL:-0}" == "1" && "${failed}" -gt 0 ]]; then
+  exit 1
+fi
+
+exit 0
diff --git a/scripts/vendor_noir_execution_success.sh b/scripts/vendor_noir_execution_success.sh
new file mode 100755
index 000000000..9bdfae962
--- /dev/null
+++ b/scripts/vendor_noir_execution_success.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+DEST_EXEC_DIR="${REPO_ROOT}/test-programs/noir/execution_success"
+DEST_LIB_DIR="${REPO_ROOT}/test-programs/noir/test_libraries"
+NOIR_REF="${NOIR_REF:-v1.0.0-beta.19}"
+
+tmpdir="$(mktemp -d)"
+cleanup() {
+  rm -rf "${tmpdir}"
+}
+trap cleanup EXIT
+
+echo "Vendoring noir-lang/noir:test_programs/{execution_success,test_libraries} (ref: ${NOIR_REF})"
+
+git clone --depth 1 --filter=blob:none --sparse --branch "${NOIR_REF}" \
+  "https://github.com/noir-lang/noir.git" "${tmpdir}/noir"
+git -C "${tmpdir}/noir" sparse-checkout set \
+  "test_programs/execution_success" \
+  "test_programs/test_libraries"
+
+mkdir -p "$(dirname "${DEST_EXEC_DIR}")"
+rm -rf "${DEST_EXEC_DIR}" "${DEST_LIB_DIR}"
+cp -R "${tmpdir}/noir/test_programs/execution_success" "${DEST_EXEC_DIR}"
+cp -R "${tmpdir}/noir/test_programs/test_libraries" "${DEST_LIB_DIR}"
+
+source_commit="$(git -C "${tmpdir}/noir" rev-parse HEAD)"
+generated_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
+
+cat > "${REPO_ROOT}/test-programs/noir/execution_success.SOURCE" <<EOF
+repository=https://github.com/noir-lang/noir
+ref=${NOIR_REF}
+commit=${source_commit}
+generated_at_utc=${generated_at}
+source_paths=test_programs/execution_success,test_programs/test_libraries
+EOF
+
+echo "Done. Vendored files updated at ${DEST_EXEC_DIR} and ${DEST_LIB_DIR}"
+echo "Source metadata written to test-programs/noir/execution_success.SOURCE"