diff --git a/.github/scripts/build_csp_pr_comment.py b/.github/scripts/build_csp_pr_comment.py
new file mode 100644
index 000000000..31e9670f9
--- /dev/null
+++ b/.github/scripts/build_csp_pr_comment.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""Build a sticky PR comment for the CSP benchmarks workflow.
+
+Reads the CSV emitted by ``scripts/run_csp_benchmarks.sh`` (one row per
+circuit) and renders it as a markdown table with human-readable units. If
+``--baseline-csv`` is given, each metric cell appends a percentage delta
+versus the baseline value (last successful CSP-benchmarks run on main).
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+from pathlib import Path
+
+MARKER = ""
+MAX_COMMENT_CHARS = 62000
+
+# Metric columns we render with a delta. Order matches the table header.
+METRIC_COLUMNS: tuple[tuple[str, str], ...] = (
+ ("num_constraints", "int"),
+ ("num_witnesses", "int"),
+ ("prover_time_ms", "ms"),
+ ("prover_peak_rss_kb", "kb"),
+ ("prover_heap_peak_bytes", "bytes"),
+ ("verifier_time_ms", "ms"),
+ ("proof_size_bytes", "bytes"),
+ ("pkp_size_bytes", "bytes"),
+)
+
+
+def fmt_bytes(value: float) -> str:
+ if value <= 0:
+ return "—"
+ units = ("B", "KB", "MB", "GB", "TB")
+ idx = 0
+ while value >= 1024 and idx < len(units) - 1:
+ value /= 1024.0
+ idx += 1
+ if value >= 100 or idx == 0:
+ return f"{value:.0f} {units[idx]}"
+ return f"{value:.2f} {units[idx]}"
+
+
+def fmt_kb_to_bytes(rss_kb: float) -> str:
+ return fmt_bytes(rss_kb * 1024.0)
+
+
+def fmt_ms(ms: float) -> str:
+ if ms <= 0:
+ return "—"
+ if ms < 1000:
+ return f"{ms:.0f} ms"
+ return f"{ms / 1000.0:.2f} s"
+
+
+def fmt_int(value: float) -> str:
+ if value <= 0:
+ return "—"
+ return f"{int(round(value)):,}"
+
+
+def fmt_value(unit: str, value: float) -> str:
+ if unit == "ms":
+ return fmt_ms(value)
+ if unit == "kb":
+ return fmt_kb_to_bytes(value)
+ if unit == "int":
+ return fmt_int(value)
+ return fmt_bytes(value)
+
+
+def fmt_delta(current: float, baseline: float | None) -> str:
+ """Return a compact delta-vs-baseline annotation, or empty string.
+
+ - Returns "" when no baseline is available.
+ - Returns "(new)" when current is present but baseline is missing
+ for this circuit.
+ - Returns "(±0.0%)" / "(+1.2%)" / "(-3.4%)" otherwise.
+ """
+ if baseline is None:
+ return ""
+ if baseline <= 0:
+ # Baseline collected zero (e.g., older CSV without this metric).
+ # Don't show a misleading divide-by-zero ratio.
+ return ""
+ if current <= 0:
+ return ""
+ delta_pct = (current - baseline) / baseline * 100.0
+ if abs(delta_pct) < 0.05:
+ return " (±0.0%)"
+ sign = "+" if delta_pct > 0 else ""
+ return f" ({sign}{delta_pct:.1f}%)"
+
+
+def status_with_icon(status: str) -> str:
+ normalized = (status or "unknown").strip().lower()
+ labels = {
+ "success": "[PASS]",
+ "failure": "[FAIL]",
+ "cancelled": "[CANCELLED]",
+ "skipped": "[SKIPPED]",
+ }
+ return f"{labels.get(normalized, '[INFO]')} {normalized}"
+
+
+def read_rows(csv_path: Path) -> list[dict[str, str]]:
+ if not csv_path.is_file():
+ return []
+ with csv_path.open(newline="") as f:
+ return list(csv.DictReader(f))
+
+
+def index_baseline(rows: list[dict[str, str]]) -> dict[str, dict[str, float]]:
+ """Index baseline rows by circuit name with float metric values."""
+ out: dict[str, dict[str, float]] = {}
+ for row in rows:
+ circuit = (row.get("circuit") or "").strip()
+ if not circuit:
+ continue
+ metrics: dict[str, float] = {}
+ for metric, _unit in METRIC_COLUMNS:
+ try:
+ metrics[metric] = float(row.get(metric) or 0)
+ except ValueError:
+ metrics[metric] = 0.0
+ out[circuit] = metrics
+ return out
+
+
+def render_table(
+ rows: list[dict[str, str]],
+ baseline: dict[str, dict[str, float]],
+ has_baseline_file: bool,
+) -> str:
+ if not rows:
+ return "_No benchmark results were produced._"
+
+ header = (
+ "| Circuit | Constraints | Witnesses | Prover time | Peak RSS | "
+ "Peak heap | Verifier time | Proof size | PKP size |"
+ )
+ sep = "|---|---:|---:|---:|---:|---:|---:|---:|---:|"
+ lines = [header, sep]
+
+ for row in sorted(rows, key=lambda r: r.get("circuit", "")):
+ circuit = row.get("circuit", "")
+ baseline_metrics = baseline.get(circuit)
+
+ cells = [f"`{circuit}`"]
+ for metric, unit in METRIC_COLUMNS:
+ try:
+ value = float(row.get(metric) or 0)
+ except ValueError:
+ value = 0.0
+
+ value_str = fmt_value(unit, value)
+
+ if has_baseline_file and value_str != "—":
+ if baseline_metrics is None:
+ delta = " (new)"
+ else:
+ delta = fmt_delta(value, baseline_metrics.get(metric))
+ cells.append(f"{value_str}{delta}")
+ else:
+ cells.append(value_str)
+ lines.append("| " + " | ".join(cells) + " |")
+
+ return "\n".join(lines)
+
+
+def compose_comment(
+ rows: list[dict[str, str]],
+ baseline: dict[str, dict[str, float]],
+ baseline_run_id: str,
+ has_baseline_file: bool,
+ run_id: str,
+ run_url: str,
+ sha: str,
+ status: str,
+ runs_per_circuit: str,
+) -> str:
+ short_sha = sha[:12] if sha else "unknown"
+ table = render_table(rows, baseline, has_baseline_file)
+
+ if has_baseline_file:
+ if baseline_run_id:
+ baseline_note = (
+ f"Each metric cell shows the current value followed by the "
+ f"percentage delta against the latest successful "
+ f"[`main` run #{baseline_run_id}](https://github.com/worldfnd/provekit/actions/runs/{baseline_run_id}). "
+ f"`(new)` marks circuits absent from the baseline."
+ )
+ else:
+ baseline_note = (
+ "Each metric cell shows the current value followed by the "
+ "percentage delta against the latest successful `main` run. "
+ "`(new)` marks circuits absent from the baseline."
+ )
+ else:
+ baseline_note = (
+ "_No baseline available yet — deltas will appear once this "
+ "workflow has produced at least one successful `main` run._"
+ )
+
+ lines = [
+ MARKER,
+ "## CSP benchmarks",
+ "",
+ "| Metric | Value |",
+ "|--------|-------|",
+ f"| Workflow status | {status_with_icon(status)} |",
+ f"| Commit | `{short_sha}` |",
+ f"| Run | [#{run_id}]({run_url}) |",
+ f"| Circuits benchmarked | {len(rows)} |",
+ f"| Iterations averaged per circuit | {runs_per_circuit} |",
+ "",
+ "Prover time, peak RSS, peak heap, and verifier time are arithmetic means "
+ "across the iterations. Peak heap comes from the largest "
+ "`peak memory` entry in `provekit-cli prove`'s tracing output; peak RSS "
+ "is reported by `/usr/bin/time -v` (max-resident-set-size).",
+ "",
+ baseline_note,
+ "",
+ "",
+ "Results
",
+ "",
+ table,
+ "",
+ " ",
+ "",
+ ]
+ return "\n".join(lines)
+
+
+def parse_args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("--results-csv", required=True, type=Path)
+ parser.add_argument(
+ "--baseline-csv",
+ type=Path,
+ default=None,
+ help="Optional CSV from the latest successful main run.",
+ )
+ parser.add_argument(
+ "--baseline-run-id",
+ default="",
+ help="Optional Actions run id of the baseline (for the link in the comment).",
+ )
+ parser.add_argument("--output", required=True, type=Path)
+ parser.add_argument("--run-id", required=True)
+ parser.add_argument("--run-url", required=True)
+ parser.add_argument("--sha", required=True)
+ parser.add_argument("--status", required=True)
+ parser.add_argument("--runs-per-circuit", required=True)
+ return parser.parse_args()
+
+
+def main() -> None:
+ args = parse_args()
+ rows = read_rows(args.results_csv)
+
+ has_baseline_file = bool(
+ args.baseline_csv and args.baseline_csv.is_file()
+ )
+ baseline_rows = read_rows(args.baseline_csv) if has_baseline_file else []
+ baseline = index_baseline(baseline_rows)
+
+ body = compose_comment(
+ rows=rows,
+ baseline=baseline,
+ baseline_run_id=args.baseline_run_id,
+ has_baseline_file=has_baseline_file,
+ run_id=args.run_id,
+ run_url=args.run_url,
+ sha=args.sha,
+ status=args.status,
+ runs_per_circuit=args.runs_per_circuit,
+ )
+ if len(body) > MAX_COMMENT_CHARS:
+ cut = body[: MAX_COMMENT_CHARS - 80].rstrip()
+ body = f"{cut}\n\n_Comment truncated due to GitHub size limits._\n"
+
+ args.output.parent.mkdir(parents=True, exist_ok=True)
+ args.output.write_text(body, encoding="utf-8")
+ print(f"Wrote PR comment body to {args.output} ({len(body)} chars)")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.github/scripts/build_noir_pr_comment.py b/.github/scripts/build_noir_pr_comment.py
new file mode 100644
index 000000000..f98bc7723
--- /dev/null
+++ b/.github/scripts/build_noir_pr_comment.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+"""Build a sticky PR comment for noir execution_success workflow runs."""
+
+from __future__ import annotations
+
+import argparse
+import re
+from pathlib import Path
+
+MARKER = ""
+MAX_COMMENT_CHARS = 62000
+
+
+def read_report(path: Path, display_name: str) -> str:
+ if not path.is_file():
+ return f"(missing: {display_name})"
+
+ text = path.read_text(encoding="utf-8", errors="replace").strip()
+ if not text:
+ return f"(empty: {display_name})"
+ return text
+
+
+def parse_grouped_counts(grouped_report_text: str) -> dict[str, str]:
+ counts: dict[str, str] = {}
+ for key in ("PASS", "FAIL", "SKIP"):
+ match = re.search(rf"^{key}=(\d+)$", grouped_report_text, flags=re.MULTILINE)
+ counts[key] = match.group(1) if match else "n/a"
+ return counts
+
+
+def parse_failing_circuits(grouped_report_text: str) -> list[str]:
+ """Extract the flat sorted list of failing circuits from the [stages] section.
+
+ The grouped report's [stages] section only contains failing tests (skipped
+ tests are routed to [grouped] instead). Each line looks like:
+ \\t\\t, , ...
+ """
+ match = re.search(
+ r"^\[stages\]\n(.*?)(?:\n\[|\Z)",
+ grouped_report_text,
+ flags=re.DOTALL | re.MULTILINE,
+ )
+ if not match:
+ return []
+
+ names: set[str] = set()
+ for line in match.group(1).splitlines():
+ line = line.strip()
+ if not line:
+ continue
+ parts = line.split("\t")
+ if len(parts) < 3:
+ continue
+ for raw in parts[2].split(","):
+ name = raw.strip()
+ if name:
+ names.add(name)
+ return sorted(names)
+
+
+def status_with_icon(status: str) -> str:
+ normalized = (status or "unknown").strip().lower()
+ labels = {
+ "success": "[PASS]",
+ "failure": "[FAIL]",
+ "cancelled": "[CANCELLED]",
+ "skipped": "[SKIPPED]",
+ }
+ return f"{labels.get(normalized, '[INFO]')} {normalized}"
+
+
+def compose_comment(
+ grouped_report_text: str,
+ run_id: str,
+ run_url: str,
+ sha: str,
+ noir_ref: str,
+ status: str,
+) -> str:
+ counts = parse_grouped_counts(grouped_report_text)
+ short_sha = sha[:12] if sha else "unknown"
+
+ failing_circuits = parse_failing_circuits(grouped_report_text)
+ if failing_circuits:
+ failing_body = "\n".join(f"- `{name}`" for name in failing_circuits)
+ failing_summary = f"Failing circuits ({len(failing_circuits)})"
+ else:
+ failing_body = "_No failing circuits._"
+ failing_summary = "Failing circuits (0)"
+
+ lines = [
+ MARKER,
+ "## Noir execution_success report",
+ "",
+ "| Metric | Value |",
+ "|--------|-------|",
+ f"| Workflow status | {status_with_icon(status)} |",
+ f"| Noir ref | `{noir_ref}` |",
+ f"| Commit | `{short_sha}` |",
+ f"| Run | [#{run_id}]({run_url}) |",
+ f"| PASS | {counts['PASS']} |",
+ f"| FAIL | {counts['FAIL']} |",
+ f"| SKIP | {counts['SKIP']} |",
+ "",
+ "",
+ f"{failing_summary}
",
+ "",
+ failing_body,
+ "",
+ " ",
+ "",
+ ]
+
+ return "\n".join(lines)
+
+
+def parse_args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("--grouped-report", required=True, type=Path)
+ parser.add_argument("--output", required=True, type=Path)
+ parser.add_argument("--run-id", required=True)
+ parser.add_argument("--run-url", required=True)
+ parser.add_argument("--sha", required=True)
+ parser.add_argument("--noir-ref", required=True)
+ parser.add_argument("--status", required=True)
+ return parser.parse_args()
+
+
+def main() -> None:
+ args = parse_args()
+
+ grouped_report_text = read_report(args.grouped_report, "grouped_error_report.txt")
+
+ body = compose_comment(
+ grouped_report_text=grouped_report_text,
+ run_id=args.run_id,
+ run_url=args.run_url,
+ sha=args.sha,
+ noir_ref=args.noir_ref,
+ status=args.status,
+ )
+
+ if len(body) > MAX_COMMENT_CHARS:
+ cut = body[: MAX_COMMENT_CHARS - 80].rstrip()
+ body = f"{cut}\n\n_Comment truncated due to GitHub size limits._\n"
+
+ args.output.parent.mkdir(parents=True, exist_ok=True)
+ args.output.write_text(body, encoding="utf-8")
+ print(f"Wrote PR comment body to {args.output} ({len(body)} chars)")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.github/workflows/csp-benchmarks.yml b/.github/workflows/csp-benchmarks.yml
new file mode 100644
index 000000000..3e17f6368
--- /dev/null
+++ b/.github/workflows/csp-benchmarks.yml
@@ -0,0 +1,168 @@
+name: CSP Benchmarks
+
+on:
+ pull_request:
+ push:
+ branches: [main]
+ workflow_dispatch:
+ inputs:
+ bench_runs:
+ description: "Iterations per circuit (default: 3)"
+ required: false
+ default: "3"
+
+permissions:
+ contents: read
+ pull-requests: write
+ issues: write
+ # Needed to read artifacts from previous successful main runs so we can
+ # render percentage deltas in the PR comment.
+ actions: read
+
+env:
+ CARGO_TERM_COLOR: always
+ BENCH_RUNS: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_runs != '' && github.event.inputs.bench_runs || '3') || '3' }}
+ REQUIRED_NARGO_VERSION: "1.0.0-beta.19"
+
+concurrency:
+ group: csp-benchmarks-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ csp-benchmarks:
+ # Block fork PRs from running our heavy bench script on the runner.
+ # Push to main always runs so the artifact becomes a baseline for PRs.
+ if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
+ name: CSP benchmarks (avg over ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_runs != '' && github.event.inputs.bench_runs || '3') || '3' }} runs)
+ runs-on: ubuntu-24.04-arm
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Rust toolchain
+ uses: moonrepo/setup-rust@v1
+ with:
+ channel: nightly-2026-03-04
+ cache-base: main
+
+ - name: Build provekit-cli (release)
+ run: cargo build --release --bin provekit-cli
+
+ - name: Setup Noir toolchain
+ uses: noir-lang/noirup@v0.1.2
+ with:
+ toolchain: ${{ env.REQUIRED_NARGO_VERSION }}
+
+ - name: Run CSP benchmarks
+ env:
+ PROVEKIT_BIN: ${{ github.workspace }}/target/release/provekit-cli
+ BENCH_DIR: ${{ github.workspace }}/csp-bench-logs
+ BENCH_RUNS: ${{ env.BENCH_RUNS }}
+ run: |
+ bash scripts/run_csp_benchmarks.sh
+
+ - name: Upload bench artifacts
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: csp-bench-logs-${{ github.run_id }}
+ path: csp-bench-logs/
+ retention-days: 7
+
+ - name: Fetch baseline from latest successful main run
+ if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+ continue-on-error: true
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ GH_REPO: ${{ github.repository }}
+ run: |
+ set -e
+ BASELINE_RUN_ID="$(gh run list \
+ --workflow csp-benchmarks.yml \
+ --branch main \
+ --status success \
+ --limit 1 \
+ --json databaseId \
+ --jq '.[0].databaseId // empty')"
+ if [[ -z "${BASELINE_RUN_ID}" ]]; then
+ echo "No successful main run found yet; deltas will not be shown."
+ exit 0
+ fi
+ echo "Baseline run id: ${BASELINE_RUN_ID}"
+ mkdir -p csp-bench-logs/baseline
+ if gh run download "${BASELINE_RUN_ID}" \
+ --name "csp-bench-logs-${BASELINE_RUN_ID}" \
+ --dir csp-bench-logs/baseline; then
+ echo "BASELINE_RUN_ID=${BASELINE_RUN_ID}" >> "$GITHUB_ENV"
+ else
+ echo "Baseline artifact not retrievable; deltas will not be shown."
+ fi
+
+ - name: Build sticky PR comment body
+ if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+ continue-on-error: true
+ run: |
+ python3 .github/scripts/build_csp_pr_comment.py \
+ --results-csv "csp-bench-logs/results.csv" \
+ --baseline-csv "csp-bench-logs/baseline/results.csv" \
+ --baseline-run-id "${BASELINE_RUN_ID:-}" \
+ --output "csp-bench-logs/pr_comment.md" \
+ --run-id "${{ github.run_id }}" \
+ --run-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
+ --sha "${{ github.sha }}" \
+ --status "${{ job.status }}" \
+ --runs-per-circuit "${{ env.BENCH_RUNS }}"
+
+ - name: Upsert sticky CSP benchmarks comment
+ if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+ continue-on-error: true
+ uses: actions/github-script@v7
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ const fs = require('fs');
+ const marker = '';
+ const bodyPath = 'csp-bench-logs/pr_comment.md';
+ const fallbackBody = [
+ marker,
+ '## CSP benchmarks',
+ '',
+ 'Unable to generate the detailed report body for this run.',
+ '',
+ 'Run: [#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})',
+ ].join('\n');
+ const body = fs.existsSync(bodyPath) ? fs.readFileSync(bodyPath, 'utf8') : fallbackBody;
+
+ const { owner, repo } = context.repo;
+ const issue_number = context.issue.number;
+ const comments = await github.paginate(github.rest.issues.listComments, {
+ owner,
+ repo,
+ issue_number,
+ per_page: 100,
+ });
+
+ const existing = comments.find((comment) =>
+ comment.user &&
+ comment.user.login === 'github-actions[bot]' &&
+ comment.body &&
+ comment.body.includes(marker)
+ );
+
+ if (existing) {
+ await github.rest.issues.updateComment({
+ owner,
+ repo,
+ comment_id: existing.id,
+ body,
+ });
+ core.info(`Updated existing CSP benchmarks comment (id=${existing.id}).`);
+ } else {
+ const created = await github.rest.issues.createComment({
+ owner,
+ repo,
+ issue_number,
+ body,
+ });
+ core.info(`Created new CSP benchmarks comment (id=${created.data.id}).`);
+ }
diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml
new file mode 100644
index 000000000..8aeafafbb
--- /dev/null
+++ b/.github/workflows/noir-execution-success.yml
@@ -0,0 +1,160 @@
+name: Noir Execution Success Tests
+
+# Provide a noir_ref to test against any Noir release.
+on:
+ pull_request:
+ workflow_dispatch:
+ inputs:
+ noir_ref:
+ description: "noir-lang/noir release tag (e.g. v1.0.0-beta.19)"
+ required: false
+ default: "v1.0.0-beta.19"
+
+permissions:
+ contents: read
+ pull-requests: write
+ issues: write
+
+env:
+ CARGO_TERM_COLOR: always
+ NOIR_REF: ${{ github.event.inputs.noir_ref || 'v1.0.0-beta.19' }}
+
+# Cancel any in-progress run on the same branch when a new one is triggered.
+concurrency:
+ group: noir-exec-success-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ noir-execution-success:
+ # Block fork PRs from executing arbitrary build scripts on the self-hosted runner.
+ if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }}
+ name: Noir execution_success suite (${{ env.NOIR_REF }})
+ runs-on: ubuntu-24.04-arm
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Rust toolchain
+ uses: moonrepo/setup-rust@v1
+ with:
+ channel: nightly-2026-03-04
+ cache-base: main
+
+ - name: Build provekit-cli
+ run: cargo build --release --bin provekit-cli
+
+ - name: Setup Noir toolchain
+ uses: noir-lang/noirup@v0.1.2
+ with:
+ toolchain: ${{ env.NOIR_REF }}
+
+ # Sparse checkout — only fetch test_programs/, not the full noir repo.
+ - name: Fetch noir test programs (sparse checkout)
+ run: |
+ tmpdir=$(mktemp -d)
+ # Export the parent so the cleanup step can remove it entirely.
+ echo "NOIR_TMPDIR=${tmpdir}" >> "$GITHUB_ENV"
+ echo "NOIR_REPO_DIR=${tmpdir}/noir" >> "$GITHUB_ENV"
+ git clone \
+ --depth 1 \
+ --filter=blob:none \
+ --sparse \
+ --branch "$NOIR_REF" \
+ https://github.com/noir-lang/noir.git "${tmpdir}/noir"
+ git -C "${tmpdir}/noir" sparse-checkout set \
+ test_programs/execution_success \
+ test_programs/test_libraries
+ echo "Cloned noir @ $(git -C "${tmpdir}/noir" rev-parse HEAD)"
+
+ - name: Run execution_success suite
+ env:
+ PROVEKIT_BIN: ${{ github.workspace }}/target/release/provekit-cli
+ LOG_DIR: ${{ github.workspace }}/noir-execution-logs
+ # NOIR_REPO_DIR is set by the previous step via $GITHUB_ENV
+ run: |
+ bash scripts/run_noir_execution_success.sh
+
+ # Upload logs on every run (pass or fail) for 7 days.
+ - name: Upload test logs
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: noir-execution-logs-${{ github.run_id }}
+ path: noir-execution-logs/
+ retention-days: 7
+
+ - name: Build sticky PR comment body
+ if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+ continue-on-error: true
+ run: |
+ python3 .github/scripts/build_noir_pr_comment.py \
+ --grouped-report "noir-execution-logs/grouped_error_report.txt" \
+ --output "noir-execution-logs/pr_comment.md" \
+ --run-id "${{ github.run_id }}" \
+ --run-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
+ --sha "${{ github.sha }}" \
+ --noir-ref "${{ env.NOIR_REF }}" \
+ --status "${{ job.status }}"
+
+ - name: Upsert sticky PR report comment
+ if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+ continue-on-error: true
+ uses: actions/github-script@v7
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ const fs = require('fs');
+ const marker = '';
+ const bodyPath = 'noir-execution-logs/pr_comment.md';
+ const fallbackBody = [
+ marker,
+ '## Noir execution_success report',
+ '',
+ 'Unable to generate the detailed report body for this run.',
+ '',
+ 'Run: [#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})',
+ ].join('\n');
+ const body = fs.existsSync(bodyPath) ? fs.readFileSync(bodyPath, 'utf8') : fallbackBody;
+
+ const { owner, repo } = context.repo;
+ const issue_number = context.issue.number;
+ const comments = await github.paginate(github.rest.issues.listComments, {
+ owner,
+ repo,
+ issue_number,
+ per_page: 100,
+ });
+
+ const existing = comments.find((comment) =>
+ comment.user &&
+ comment.user.login === 'github-actions[bot]' &&
+ comment.body &&
+ comment.body.includes(marker)
+ );
+
+ if (existing) {
+ await github.rest.issues.updateComment({
+ owner,
+ repo,
+ comment_id: existing.id,
+ body,
+ });
+ core.info(`Updated existing noir sticky comment (id=${existing.id}).`);
+ } else {
+ const created = await github.rest.issues.createComment({
+ owner,
+ repo,
+ issue_number,
+ body,
+ });
+ core.info(`Created new noir sticky comment (id=${created.data.id}).`);
+ }
+
+ # Always clean up the temp clone, even if the test step failed.
+ - name: Cleanup noir clone
+ if: always()
+ run: |
+ if [[ -n "${NOIR_TMPDIR:-}" && -d "${NOIR_TMPDIR}" ]]; then
+ rm -rf "${NOIR_TMPDIR}"
+ echo "Cleaned up ${NOIR_TMPDIR}"
+ fi
diff --git a/scripts/csp_benchmark_helpers.py b/scripts/csp_benchmark_helpers.py
new file mode 100755
index 000000000..3bd141a0f
--- /dev/null
+++ b/scripts/csp_benchmark_helpers.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""Helpers for scripts/run_csp_benchmarks.sh.
+
+Subcommands:
+ parse-runs Aggregate per-run measurements for one
+ circuit and emit a single CSV row to stdout.
+ human-to-bytes Convert a human-formatted byte string from
+ the prover trace ("1.23 GB", "456 MB", etc.)
+ to an integer byte count. Used by tests.
+
+Bench layout produced by run_csp_benchmarks.sh::
+
+ /per_circuit//
+ prove_.time # `/usr/bin/time -f '%e %M'` output
+ prove_.stderr # provekit-cli prove stderr (span_stats trace)
+ verify_.time
+ verify_.stderr
+ meta.txt # key=value: pkp_size, proof_size
+
+The "peak heap" comes from the largest "peak memory: B" entry emitted by
+``tooling/cli/src/span_stats.rs`` over the prove invocation's trace. We strip
+ANSI escapes and walk every span-close line; the outermost span propagates
+its children's peak via ``data.peak_memory = max(...)`` so any of them is a
+sufficient upper bound, but we keep the max for safety.
+"""
+
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+from pathlib import Path
+from statistics import mean
+
+ANSI_RE = re.compile(r"\x1b\[[0-9;]*m")
+# Suffix table from provekit_common::utils::human (BN254 utils). The middle
+# entry is a regular ASCII space (no SI prefix). Order matters: we use it to
+# look up the multiplier from a captured suffix character.
+SI_SUFFIXES = "qryzafpnμm kMGTPEZYRQ"
+SI_BASE_INDEX = SI_SUFFIXES.index(" ") # power 0 lives at index 10
+# The separator between number and SI suffix is U+202F NARROW NO-BREAK SPACE
+# unless `{:#}` (alternate) is used. We accept either form.
+NARROW_NBSP = " "
+PEAK_MEMORY_RE = re.compile(
+ rf"([0-9]+(?:\.[0-9]+)?)[{NARROW_NBSP} ]?([qryzafpnμmkMGTPEZYRQ])?B"
+ r"\s+peak\s+memory",
+)
+# Matches the `info!(constraints, witnesses, "Read Noir proof scheme")` line
+# emitted by `tooling/cli/src/cmd/prove.rs` on every prove invocation.
+SCHEME_SIZE_RE = re.compile(
+ r"Read Noir proof scheme\b.*?\bconstraints=(\d+)\b.*?\bwitnesses=(\d+)\b"
+)
+
+
+def human_to_bytes(value: str) -> int:
+ """Convert a "1.23 GB"-style string from the trace to an integer byte count.
+
+ Accepts either a regular ASCII space or U+202F as the separator. Suffixes
+ follow ``provekit_common::utils::human`` (q…Q). A literal "B" with no SI
+ prefix returns the integer/float value rounded down.
+ """
+ cleaned = ANSI_RE.sub("", value).strip()
+ if not cleaned.endswith("B"):
+ raise ValueError(f"not a byte-formatted value: {value!r}")
+ cleaned = cleaned[:-1].rstrip() # drop trailing 'B'
+ if cleaned and cleaned[-1] in SI_SUFFIXES and cleaned[-1] != " ":
+ suffix = cleaned[-1]
+ number_part = cleaned[:-1].rstrip()
+ else:
+ suffix = " "
+ number_part = cleaned
+ number_part = number_part.replace(NARROW_NBSP, "").strip()
+ multiplier = 10 ** ((SI_SUFFIXES.index(suffix) - SI_BASE_INDEX) * 3)
+ return int(float(number_part) * multiplier)
+
+
+def parse_peak_heap_bytes(stderr_path: Path) -> int:
+ """Return the largest "peak memory" value (bytes) found in the trace."""
+ if not stderr_path.is_file():
+ return 0
+ text = ANSI_RE.sub("", stderr_path.read_text(encoding="utf-8", errors="replace"))
+ peak = 0
+ for match in PEAK_MEMORY_RE.finditer(text):
+ number = float(match.group(1))
+ suffix = match.group(2) or " "
+ bytes_value = int(number * 10 ** ((SI_SUFFIXES.index(suffix) - SI_BASE_INDEX) * 3))
+ peak = max(peak, bytes_value)
+ return peak
+
+
+def parse_scheme_sizes(stderr_path: Path) -> tuple[int, int]:
+ """Return (num_constraints, num_witnesses) from a prove stderr; (0, 0) if absent."""
+ if not stderr_path.is_file():
+ return 0, 0
+ text = ANSI_RE.sub("", stderr_path.read_text(encoding="utf-8", errors="replace"))
+ match = SCHEME_SIZE_RE.search(text)
+ if not match:
+ return 0, 0
+ return int(match.group(1)), int(match.group(2))
+
+
+def parse_time_file(time_path: Path) -> tuple[float, int]:
+ """Read `/usr/bin/time -f '%e %M'` output: (wall_seconds, max_rss_kb).
+
+ Returns (0.0, 0) if the file is missing or unparsable.
+ """
+ if not time_path.is_file():
+ return 0.0, 0
+ raw = time_path.read_text(encoding="utf-8", errors="replace").strip().splitlines()
+ if not raw:
+ return 0.0, 0
+ parts = raw[-1].split()
+ if len(parts) < 2:
+ return 0.0, 0
+ try:
+ return float(parts[0]), int(parts[1])
+ except ValueError:
+ return 0.0, 0
+
+
+def read_meta(meta_path: Path) -> dict[str, str]:
+ out: dict[str, str] = {}
+ if not meta_path.is_file():
+ return out
+ for line in meta_path.read_text(encoding="utf-8").splitlines():
+ if "=" in line:
+ key, _, val = line.partition("=")
+ out[key.strip()] = val.strip()
+ return out
+
+
+def parse_runs(bench_dir: Path, circuit: str) -> str:
+ circuit_dir = bench_dir / "per_circuit" / circuit
+ meta = read_meta(circuit_dir / "meta.txt")
+
+ prove_runs: list[tuple[float, int, int]] = []
+ verify_runs: list[tuple[float, int]] = []
+
+ i = 1
+ while True:
+ time_path = circuit_dir / f"prove_{i}.time"
+ if not time_path.is_file():
+ break
+ wall, rss_kb = parse_time_file(time_path)
+ heap_bytes = parse_peak_heap_bytes(circuit_dir / f"prove_{i}.stderr")
+ prove_runs.append((wall, rss_kb, heap_bytes))
+ i += 1
+
+ # Constraint and witness counts are deterministic per circuit, so reading
+ # them from the first prove run is sufficient.
+ num_constraints, num_witnesses = parse_scheme_sizes(circuit_dir / "prove_1.stderr")
+
+ j = 1
+ while True:
+ time_path = circuit_dir / f"verify_{j}.time"
+ if not time_path.is_file():
+ break
+ wall, _rss = parse_time_file(time_path)
+ verify_runs.append((wall, _rss))
+ j += 1
+
+ if not prove_runs:
+ return ""
+
+ prove_time_ms = mean(r[0] for r in prove_runs) * 1000.0
+ prover_rss_kb = mean(r[1] for r in prove_runs)
+ prover_heap_bytes = mean(r[2] for r in prove_runs)
+ verifier_time_ms = mean(r[0] for r in verify_runs) * 1000.0 if verify_runs else 0.0
+
+ pkp_size = meta.get("pkp_size_bytes", "0")
+ proof_size = meta.get("proof_size_bytes", "0")
+
+ return ",".join(
+ [
+ circuit,
+ str(num_constraints),
+ str(num_witnesses),
+ f"{prove_time_ms:.1f}",
+ f"{prover_rss_kb:.0f}",
+ f"{prover_heap_bytes:.0f}",
+ f"{verifier_time_ms:.1f}",
+ proof_size,
+ pkp_size,
+ str(len(prove_runs)),
+ ]
+ )
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(description=__doc__)
+ sub = parser.add_subparsers(dest="cmd", required=True)
+
+ p = sub.add_parser("parse-runs")
+ p.add_argument("bench_dir", type=Path)
+ p.add_argument("circuit")
+
+ p = sub.add_parser("human-to-bytes")
+ p.add_argument("value")
+
+ args = parser.parse_args()
+
+ if args.cmd == "parse-runs":
+ row = parse_runs(args.bench_dir, args.circuit)
+ if row:
+ print(row)
+ elif args.cmd == "human-to-bytes":
+ print(human_to_bytes(args.value))
+
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/scripts/generate_provekit_witness_report.py b/scripts/generate_provekit_witness_report.py
new file mode 100755
index 000000000..e8bafc982
--- /dev/null
+++ b/scripts/generate_provekit_witness_report.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+"""Generate a ProveKit-only witness count report.
+
+Usage: python3 generate_provekit_witness_report.py
+
+Reads a CSV of post-GE constraint and witness counts produced by
+scripts/run_noir_execution_success.sh and writes provekit_witness_report.md
+to .
+"""
+
+from __future__ import annotations
+
+import csv
+import sys
+from pathlib import Path
+
+SKIP_LIST = Path(__file__).resolve().parent / "noir_skip_tests.txt"
+
+
+def load_skip_tests() -> set[str]:
+ if not SKIP_LIST.is_file():
+ return set()
+ skip: set[str] = set()
+ for raw in SKIP_LIST.read_text().splitlines():
+ line = raw.strip()
+ if not line or line.startswith("#"):
+ continue
+ skip.add(line)
+ return skip
+
+
+def main(csv_path: Path, out_dir: Path) -> None:
+ skip_tests = load_skip_tests()
+
+ rows: dict[str, tuple[int | None, int | None]] = {}
+ with csv_path.open() as f:
+ reader = csv.DictReader(f)
+ for row in reader:
+ leaf = row["test_name"].split("/")[-1]
+ if leaf in skip_tests:
+ continue
+
+ def _parse(key: str) -> int | None:
+ val = row.get(key, "")
+ try:
+ return int(val)
+ except (TypeError, ValueError):
+ return None
+
+ rows[leaf] = (_parse("provekit_constraints"), _parse("provekit_witnesses"))
+
+ lines = [
+ "# ProveKit Witness Counts",
+ "",
+ f"Captured post-GE constraint and witness counts for {len(rows)} circuits.",
+ "",
+ "| Test | Constraints (post-GE) | Witnesses (post-GE) |",
+ "|------|------------------------|----------------------|",
+ ]
+ for name in sorted(rows):
+ constraints, witnesses = rows[name]
+ c = "-" if constraints is None else str(constraints)
+ w = "-" if witnesses is None else str(witnesses)
+ lines.append(f"| {name} | {c} | {w} |")
+
+ out_path = out_dir / "provekit_witness_report.md"
+ out_path.write_text("\n".join(lines) + "\n")
+ print(f"Wrote {out_path} ({len(rows)} circuits)")
+
+
+if __name__ == "__main__":
+ if len(sys.argv) != 3:
+ print(f"Usage: {sys.argv[0]} ", file=sys.stderr)
+ sys.exit(1)
+ main(Path(sys.argv[1]), Path(sys.argv[2]))
diff --git a/scripts/noir_execution_helpers.py b/scripts/noir_execution_helpers.py
new file mode 100644
index 000000000..06ffb52d1
--- /dev/null
+++ b/scripts/noir_execution_helpers.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python3
+"""Helpers for scripts/run_noir_execution_success.sh.
+
+Subcommands:
+ discover — list runnable test dirs
+ resolve-prover-toml — find Prover.toml for a package
+ package-name — read [package].name from Nargo.toml
+ build-report — write grouped_error_report.txt
+ skip-tests — print the skip list (one per line)
+
+The skip list lives in scripts/noir_skip_tests.txt and is the single source
+of truth shared with scripts/generate_provekit_witness_report.py.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import re
+import sys
+import tomllib
+from collections import defaultdict
+from pathlib import Path
+
+SKIP_LIST_FILE = Path(__file__).with_name("noir_skip_tests.txt")
+
+
+def load_skip_tests() -> set[str]:
+ """Return the skip list parsed from noir_skip_tests.txt.
+
+ Blank lines and lines starting with `#` are ignored. Inline `#` comments
+ are stripped. Returns an empty set if the file is missing.
+ """
+ if not SKIP_LIST_FILE.is_file():
+ return set()
+ names: set[str] = set()
+ for raw in SKIP_LIST_FILE.read_text().splitlines():
+ line = raw.split("#", 1)[0].strip()
+ if line:
+ names.add(line)
+ return names
+
+
+def discover_tests(root: Path) -> list[str]:
+ """Return candidate test project paths relative to ``root``.
+
+ Mirrors the legacy shell heredoc: a path is a candidate if it is a
+ workspace default-member, or if it has both a `[package]` entry in its
+ Nargo.toml and a sibling Prover.toml. Nested projects under a workspace
+ default-member are suppressed.
+ """
+ nargo_data: dict[str, dict] = {}
+ for nargo in root.rglob("Nargo.toml"):
+ rel = nargo.parent.relative_to(root).as_posix()
+ try:
+ data = tomllib.loads(nargo.read_text())
+ except Exception:
+ data = {}
+ nargo_data[rel] = data
+
+ workspace_default_roots: set[str] = set()
+ for rel, data in nargo_data.items():
+ ws = data.get("workspace")
+ if isinstance(ws, dict) and "default-member" in ws:
+ workspace_default_roots.add(rel)
+
+ suppressed: set[str] = set()
+ for ws_rel in workspace_default_roots:
+ ws_path = Path(ws_rel) if ws_rel != "." else Path()
+ for rel in nargo_data:
+ rel_path = Path(rel) if rel != "." else Path()
+ if rel_path != ws_path and ws_path in rel_path.parents:
+ suppressed.add(rel)
+
+ candidates: set[str] = set(workspace_default_roots)
+ for rel, data in nargo_data.items():
+ if rel in suppressed:
+ continue
+ pkg = data.get("package")
+ if isinstance(pkg, dict) and "name" in pkg:
+ if (root / rel / "Prover.toml").is_file():
+ candidates.add(rel)
+
+ return sorted(candidates)
+
+
+def resolve_prover_toml(project_dir: Path, package_name: str) -> str:
+ """Return Prover.toml path (relative to ``project_dir``) for ``package_name``.
+
+ Prefers a Prover.toml located next to the Nargo.toml whose package name
+ matches. Falls back to a root-level Prover.toml, then to the sole
+ Prover.toml under the project when unambiguous. Returns "" otherwise.
+ """
+ matches: list[str] = []
+ for nargo in sorted(project_dir.rglob("Nargo.toml")):
+ try:
+ data = tomllib.loads(nargo.read_text())
+ except Exception:
+ continue
+ pkg = data.get("package")
+ if not isinstance(pkg, dict) or pkg.get("name") != package_name:
+ continue
+ prover = nargo.parent / "Prover.toml"
+ if prover.is_file():
+ matches.append(prover.relative_to(project_dir).as_posix())
+
+ if matches:
+ matches.sort(key=lambda p: (p.count("/"), p))
+ return matches[0]
+
+ root_prover = project_dir / "Prover.toml"
+ if root_prover.is_file():
+ return "Prover.toml"
+
+ all_provers = sorted(project_dir.rglob("Prover.toml"))
+ if len(all_provers) == 1:
+ return all_provers[0].relative_to(project_dir).as_posix()
+
+ return ""
+
+
+def read_package_name(project_dir: Path) -> str:
+ """Return [package].name from ``project_dir/Nargo.toml`` or ""."""
+ nargo = project_dir / "Nargo.toml"
+ if not nargo.is_file():
+ return ""
+ try:
+ data = tomllib.loads(nargo.read_text())
+ except Exception:
+ return ""
+ pkg = data.get("package")
+ if isinstance(pkg, dict):
+ return str(pkg.get("name", ""))
+ return ""
+
+
+_BLACKBOX_RE = re.compile(
+ r"not implemented: Other black box function: BLACKBOX::([A-Z0-9_]+)"
+)
+_PANIC_RE = re.compile(r"panicked at [^\n]*:\n([^\n]+)")
+_SOLVE_RE = re.compile(r"Failed to solve program: '([^']+)'")
+_COMPILE_ERR_RE = re.compile(r"^error:\s*([^\n]+)", flags=re.M)
+_COMPILE_BUG_RE = re.compile(r"^bug:\s*([^\n]+)", flags=re.M)
+_GENERIC_ERR_RE = re.compile(r"^Error:\s*([^\n]+)", flags=re.M)
+_FAIL_STAGE_RE = re.compile(r"FAIL: ([^\n]+)")
+_SKIP_REASON_RE = re.compile(r"SKIP: ([^\n]+)")
+
+
+def _classify_failure(text: str, stage: str) -> str:
+ blackbox = _BLACKBOX_RE.search(text)
+ if blackbox:
+ return f"Not implemented blackbox: {blackbox.group(1)} ({stage})"
+ if "Program must have one entry point." in text:
+ return f"Program must have one entry point ({stage})"
+ panic = _PANIC_RE.search(text)
+ if panic:
+ return f"Panic: {panic.group(1).strip()} ({stage})"
+ solve = _SOLVE_RE.search(text)
+ if solve:
+ return f"Failed to solve program: {solve.group(1)} ({stage})"
+ if "Failed assertion" in text:
+ return f"Failed assertion ({stage})"
+ compile_error = _COMPILE_ERR_RE.search(text)
+ if compile_error:
+ return f"Compile error: {compile_error.group(1).strip()} ({stage})"
+ compile_bug = _COMPILE_BUG_RE.search(text)
+ if compile_bug:
+ return f"Compile bug: {compile_bug.group(1).strip()} ({stage})"
+ generic = _GENERIC_ERR_RE.search(text)
+ if generic:
+ return f"Error: {generic.group(1).strip()} ({stage})"
+ return f"Unknown failure ({stage})"
+
+
+def build_grouped_report(log_dir: Path, passed: int, failed: int, skipped: int) -> None:
+ """Scan ``log_dir/per_test/*.log`` and write ``log_dir/grouped_error_report.txt``.
+
+ PASS/FAIL/SKIP totals come from the shell runner — it has the authoritative
+ counts (including blackbox skips, which don't produce per-test logs). Logs
+ are consulted only for the ``[stages]`` and ``[grouped]`` sections.
+ """
+ per_test_dir = log_dir / "per_test"
+ report_file = log_dir / "grouped_error_report.txt"
+
+ logs = sorted(per_test_dir.glob("*.log"))
+ grouped: dict[str, list[str]] = defaultdict(list)
+ stage_groups: dict[str, list[str]] = defaultdict(list)
+
+ for fp in logs:
+ text = fp.read_text(errors="replace")
+ name = fp.stem
+
+ if "SKIP:" in text:
+ skip_match = _SKIP_REASON_RE.search(text)
+ reason = skip_match.group(1).strip() if skip_match else "unknown"
+ grouped[f"SKIP: {reason}"].append(name)
+ continue
+
+ fail_stages = _FAIL_STAGE_RE.findall(text)
+ stage = fail_stages[-1].strip() if fail_stages else "unknown stage"
+ stage_groups[stage].append(name)
+ grouped[_classify_failure(text, stage)].append(name)
+
+ with report_file.open("w") as f:
+ f.write(f"logs={len(logs)}\n")
+ f.write(f"PASS={passed}\n")
+ f.write(f"FAIL={failed}\n")
+ f.write(f"SKIP={skipped}\n")
+ f.write("\n[stages]\n")
+ for stage, tests in sorted(stage_groups.items(), key=lambda kv: (-len(kv[1]), kv[0])):
+ f.write(f"{stage}\t{len(tests)}\t{', '.join(tests)}\n")
+ f.write("\n[grouped]\n")
+ for key, tests in sorted(grouped.items(), key=lambda kv: (-len(kv[1]), kv[0])):
+ f.write(f"{len(tests)}\t{key}\t{', '.join(tests)}\n")
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(description=__doc__)
+ sub = parser.add_subparsers(dest="cmd", required=True)
+
+ p = sub.add_parser("discover", help="list runnable test dirs under ")
+ p.add_argument("test_root", type=Path)
+
+ p = sub.add_parser("resolve-prover-toml")
+ p.add_argument("project_dir", type=Path)
+ p.add_argument("package_name")
+
+ p = sub.add_parser("package-name")
+ p.add_argument("project_dir", type=Path)
+
+ p = sub.add_parser("build-report")
+ p.add_argument("log_dir", type=Path)
+ p.add_argument("passed_count", type=int)
+ p.add_argument("failed_count", type=int)
+ p.add_argument("skipped_count", type=int)
+
+ sub.add_parser("skip-tests", help="print the skip list, one name per line")
+
+ args = parser.parse_args()
+
+ if args.cmd == "discover":
+ for name in discover_tests(args.test_root):
+ print(name)
+ elif args.cmd == "resolve-prover-toml":
+ print(resolve_prover_toml(args.project_dir, args.package_name))
+ elif args.cmd == "package-name":
+ print(read_package_name(args.project_dir))
+ elif args.cmd == "build-report":
+ build_grouped_report(
+ args.log_dir,
+ args.passed_count,
+ args.failed_count,
+ args.skipped_count,
+ )
+ elif args.cmd == "skip-tests":
+ for name in sorted(load_skip_tests()):
+ print(name)
+
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/scripts/noir_skip_tests.txt b/scripts/noir_skip_tests.txt
new file mode 100644
index 000000000..575dd9d0c
--- /dev/null
+++ b/scripts/noir_skip_tests.txt
@@ -0,0 +1,38 @@
+# Tests that use blackbox functions not yet supported by provekit.
+# Counted as SKIP (not FAIL) by scripts/run_noir_execution_success.sh
+# and excluded from scripts/generate_provekit_witness_report.py.
+# Remove entries here once the corresponding blackbox is supported.
+#
+# Format: one bare test name per line. Blank lines and `#` comments are ignored.
+
+# BLAKE3
+a_6
+array_dynamic_blackbox_input
+array_dynamic_nested_blackbox_input
+blake3
+conditional_1
+conditional_regression_short_circuit
+regression_4449
+
+# ECDSA_SECP256K1
+bench_ecdsa_secp256k1
+ecdsa_secp256k1
+ecdsa_secp256k1_invalid_inputs
+ecdsa_secp256k1_invalid_pub_key_in_inactive_branch
+
+# ECDSA_SECP256R1
+ecdsa_secp256r1
+ecdsa_secp256r1_3x
+ecdsa_secp256r1_invalid_pub_key_in_inactive_branch
+ecdsa_secp256r1_msg_equals_order
+
+# EMBEDDED_CURVE_ADD
+embedded_curve_ops
+regression_5045
+regression_7744
+
+# AES128_ENCRYPT
+aes128_encrypt
+
+# BLAKE2S
+a_7
diff --git a/scripts/run_csp_benchmarks.sh b/scripts/run_csp_benchmarks.sh
new file mode 100755
index 000000000..e099c636c
--- /dev/null
+++ b/scripts/run_csp_benchmarks.sh
@@ -0,0 +1,235 @@
+#!/usr/bin/env bash
+# run_csp_benchmarks.sh
+#
+# Run prove/verify benchmarks for noir-examples/csp-benchmarks/*. Each circuit
+# is compiled and prepared once, then prove + verify are each invoked
+# BENCH_RUNS times so the helper can average wall time, peak RSS, and
+# heap-peak bytes (parsed from the prover's tracing output).
+#
+# Environment variables (all optional):
+# PROVEKIT_BIN Path to provekit-cli (default: target/release/provekit-cli)
+# BENCH_ROOT Path to csp-benchmarks (default: noir-examples/csp-benchmarks)
+# BENCH_DIR Output directory (default: csp-bench-logs)
+# BENCH_RUNS Iterations to average (default: 3)
+# TEST_FILTER Regex on circuit name
+# MAX_TESTS Cap on circuits (0 = unlimited)
+#
+# Output: BENCH_DIR/results.csv with one row per circuit:
+# circuit,num_constraints,num_witnesses,prover_time_ms,prover_peak_rss_kb,
+# prover_heap_peak_bytes,verifier_time_ms,proof_size_bytes,pkp_size_bytes,
+# runs
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+HELPER="${SCRIPT_DIR}/csp_benchmark_helpers.py"
+
+PROVEKIT_BIN="${PROVEKIT_BIN:-${REPO_ROOT}/target/release/provekit-cli}"
+BENCH_ROOT="${BENCH_ROOT:-${REPO_ROOT}/noir-examples/csp-benchmarks}"
+BENCH_DIR="${BENCH_DIR:-${REPO_ROOT}/csp-bench-logs}"
+BENCH_RUNS="${BENCH_RUNS:-3}"
+TEST_FILTER="${TEST_FILTER:-}"
+MAX_TESTS="${MAX_TESTS:-0}"
+
+if [[ "${BENCH_DIR}" != /* ]]; then
+ BENCH_DIR="${REPO_ROOT}/${BENCH_DIR}"
+fi
+
+if [[ ! -x "${PROVEKIT_BIN}" ]]; then
+ echo "ERROR: provekit-cli binary not found at ${PROVEKIT_BIN}" >&2
+ echo "Build it first: cargo build --release --bin provekit-cli" >&2
+ exit 1
+fi
+
+if [[ ! -d "${BENCH_ROOT}" ]]; then
+ echo "ERROR: csp-benchmarks not found at ${BENCH_ROOT}" >&2
+ exit 1
+fi
+
+if ! command -v nargo >/dev/null 2>&1; then
+ echo "ERROR: nargo is required but not in PATH" >&2
+ exit 1
+fi
+
+if ! python3 -c "import tomllib" 2>/dev/null; then
+ echo "ERROR: python3.11+ is required (tomllib not found)." >&2
+ echo "Current: $(python3 --version 2>&1)" >&2
+ exit 1
+fi
+
+# `/usr/bin/time` is the GNU-style binary; macOS ships a different `time` shell
+# builtin so users may need `gtime` from `brew install gnu-time`. CI runs on
+# ubuntu-24.04-arm where /usr/bin/time is GNU.
+TIME_BIN=""
+if [[ -x /usr/bin/time ]]; then
+ TIME_BIN=/usr/bin/time
+elif command -v gtime >/dev/null 2>&1; then
+ TIME_BIN="$(command -v gtime)"
+else
+ echo "ERROR: GNU /usr/bin/time not found (try: brew install gnu-time)" >&2
+ exit 1
+fi
+
+mkdir -p "${BENCH_DIR}/per_circuit"
+RESULTS_CSV="${BENCH_DIR}/results.csv"
+echo "circuit,num_constraints,num_witnesses,prover_time_ms,prover_peak_rss_kb,prover_heap_peak_bytes,verifier_time_ms,proof_size_bytes,pkp_size_bytes,runs" > "${RESULTS_CSV}"
+
+shopt -s nullglob
+
+# Discover circuits: any direct subdir of csp-benchmarks/ that has both a
+# Nargo.toml and a Prover.toml at its root. This filters out keccak_lib/.
+discover_circuits() {
+ for dir in "${BENCH_ROOT}"/*/; do
+ if [[ -f "${dir}Nargo.toml" && -f "${dir}Prover.toml" ]]; then
+ basename "${dir%/}"
+ fi
+ done
+}
+
+mapfile -t circuits < <(discover_circuits | sort)
+if [[ "${#circuits[@]}" -eq 0 ]]; then
+ echo "ERROR: no circuits discovered under ${BENCH_ROOT}" >&2
+ exit 1
+fi
+
+echo "Discovered ${#circuits[@]} circuits"
+
+# Read [package].name from a Nargo.toml; fall back to directory basename.
+read_package_name() {
+ local dir="$1"
+ python3 - "$dir" <<'PY'
+import sys, tomllib, pathlib
+nargo = pathlib.Path(sys.argv[1]) / "Nargo.toml"
+try:
+ data = tomllib.loads(nargo.read_text())
+ print(data.get("package", {}).get("name", ""))
+except Exception:
+ pass
+PY
+}
+
+attempted=0
+succeeded=0
+failed=0
+
+for circuit in "${circuits[@]}"; do
+ if [[ -n "${TEST_FILTER}" && ! "${circuit}" =~ ${TEST_FILTER} ]]; then
+ continue
+ fi
+ (( attempted += 1 ))
+ if [[ "${MAX_TESTS}" -gt 0 && "${attempted}" -gt "${MAX_TESTS}" ]]; then
+ break
+ fi
+
+ workdir="${BENCH_ROOT}/${circuit}"
+ out_dir="${BENCH_DIR}/per_circuit/${circuit}"
+ mkdir -p "${out_dir}"
+
+ echo ""
+ echo "==> [${attempted}/${#circuits[@]}] ${circuit}"
+
+ pkg_name="$(read_package_name "${workdir}")"
+ if [[ -z "${pkg_name}" ]]; then
+ pkg_name="${circuit}"
+ fi
+
+ # 1) compile
+ if ! (cd "${workdir}" && nargo compile > "${out_dir}/compile.log" 2>&1); then
+ echo "FAIL: nargo compile (${circuit})"
+ (( failed += 1 ))
+ continue
+ fi
+
+ circuit_json="${workdir}/target/${pkg_name}.json"
+ if [[ ! -f "${circuit_json}" ]]; then
+ # Fallback: pick the first json under target/.
+ candidate=("${workdir}"/target/*.json)
+ if [[ "${#candidate[@]}" -gt 0 ]]; then
+ circuit_json="${candidate[0]}"
+ else
+ echo "FAIL: no compiled JSON in ${workdir}/target/"
+ (( failed += 1 ))
+ continue
+ fi
+ fi
+
+ pkp_path="${out_dir}/prover.pkp"
+ pkv_path="${out_dir}/verifier.pkv"
+ proof_path="${out_dir}/proof.np"
+
+ # 2) prepare
+ if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prepare "${circuit_json}" \
+ --pkp "${pkp_path}" --pkv "${pkv_path}") > "${out_dir}/prepare.log" 2>&1; then
+ echo "FAIL: provekit-cli prepare (${circuit})"
+ (( failed += 1 ))
+ continue
+ fi
+
+ pkp_size_bytes="$(stat -c '%s' "${pkp_path}" 2>/dev/null || stat -f '%z' "${pkp_path}")"
+
+ # 3) prove × BENCH_RUNS — write each run's stderr separately so the helper
+ # can parse the tracing output's "peak memory" lines.
+ prove_ok=1
+ for ((i=1; i<=BENCH_RUNS; i++)); do
+ if ! (cd "${workdir}" && "${TIME_BIN}" -f '%e %M' \
+ -o "${out_dir}/prove_${i}.time" \
+ "${PROVEKIT_BIN}" prove "${pkp_path}" "${workdir}/Prover.toml" \
+ -o "${proof_path}") 2> "${out_dir}/prove_${i}.stderr"; then
+ echo "FAIL: provekit-cli prove run ${i} (${circuit})"
+ prove_ok=0
+ break
+ fi
+ done
+ if [[ "${prove_ok}" -ne 1 ]]; then
+ (( failed += 1 ))
+ continue
+ fi
+
+ proof_size_bytes="$(stat -c '%s' "${proof_path}" 2>/dev/null || stat -f '%z' "${proof_path}")"
+
+ # 4) verify × BENCH_RUNS
+ verify_ok=1
+ for ((i=1; i<=BENCH_RUNS; i++)); do
+ if ! (cd "${workdir}" && "${TIME_BIN}" -f '%e %M' \
+ -o "${out_dir}/verify_${i}.time" \
+ "${PROVEKIT_BIN}" verify "${pkv_path}" "${proof_path}") \
+ 2> "${out_dir}/verify_${i}.stderr"; then
+ echo "FAIL: provekit-cli verify run ${i} (${circuit})"
+ verify_ok=0
+ break
+ fi
+ done
+ if [[ "${verify_ok}" -ne 1 ]]; then
+ (( failed += 1 ))
+ continue
+ fi
+
+ cat > "${out_dir}/meta.txt" <> "${RESULTS_CSV}"
+ echo "OK: ${row}"
+ (( succeeded += 1 ))
+ else
+ echo "FAIL: helper produced no row for ${circuit}"
+ (( failed += 1 ))
+ fi
+done
+
+echo ""
+echo "----- csp-benchmarks summary -----"
+echo "Discovered : ${#circuits[@]}"
+echo "Attempted : ${attempted}"
+echo "Succeeded : ${succeeded}"
+echo "Failed : ${failed}"
+echo "Results : ${RESULTS_CSV}"
+
+if [[ "${failed}" -gt 0 ]]; then
+ exit 1
+fi
+exit 0
diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh
new file mode 100755
index 000000000..514f46938
--- /dev/null
+++ b/scripts/run_noir_execution_success.sh
@@ -0,0 +1,434 @@
+#!/usr/bin/env bash
+# run_noir_execution_success.sh
+#
+# Run the Noir execution_success test suite through provekit-cli.
+#
+# Environment variables (all optional):
+# NOIR_REPO_DIR Path to a cloned noir-lang/noir repo root.
+# When set, tests come from
+# NOIR_REPO_DIR/test_programs/{execution_success,test_libraries}.
+# When unset, falls back to the vendored path
+# REPO_ROOT/test-programs/noir/.
+# PROVEKIT_BIN Path to provekit-cli binary (default: target/release/provekit-cli)
+# LOG_DIR Directory for per-test logs and summary
+# MAX_TESTS Cap the number of tests (0 = unlimited)
+# TEST_FILTER Regex filter on test name
+# REQUIRED_NARGO_VERSION Nargo version string to require (default 1.0.0-beta.19)
+# ENABLE_ENUMS_FALLBACK Retry compile with -Zenums on 'enums' feature error (0/1, default 1)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+HELPER="${SCRIPT_DIR}/noir_execution_helpers.py"
+SKIP_LIST_FILE="${SCRIPT_DIR}/noir_skip_tests.txt"
+
+# ---------------------------------------------------------------------------
+# Resolve test corpus root (CI clone vs. local vendored copy)
+# ---------------------------------------------------------------------------
+if [[ -n "${NOIR_REPO_DIR:-}" ]]; then
+ TEST_ROOT="${NOIR_REPO_DIR}/test_programs/execution_success"
+ TEST_LIB_ROOT="${NOIR_REPO_DIR}/test_programs/test_libraries"
+else
+ NOIR_ROOT="${REPO_ROOT}/test-programs/noir"
+ TEST_ROOT="${NOIR_ROOT}/execution_success"
+ TEST_LIB_ROOT="${NOIR_ROOT}/test_libraries"
+fi
+
+PROVEKIT_BIN="${PROVEKIT_BIN:-${REPO_ROOT}/target/release/provekit-cli}"
+MAX_TESTS="${MAX_TESTS:-0}"
+REQUIRED_NARGO_VERSION="${REQUIRED_NARGO_VERSION:-1.0.0-beta.19}"
+ENABLE_ENUMS_FALLBACK="${ENABLE_ENUMS_FALLBACK:-1}"
+TEST_FILTER="${TEST_FILTER:-}"
+RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)"
+LOG_DIR="${LOG_DIR:-${REPO_ROOT}/scripts/noir_execution_logs/${RUN_ID}}"
+
+if [[ "${LOG_DIR}" != /* ]]; then
+ LOG_DIR="${REPO_ROOT}/${LOG_DIR}"
+fi
+
+# ---------------------------------------------------------------------------
+# Unimplemented-blackbox skip list
+# Single source of truth: scripts/noir_skip_tests.txt (shared with
+# scripts/generate_provekit_witness_report.py). Counted as SKIP (not FAIL).
+# ---------------------------------------------------------------------------
+SKIP_TESTS=()
+declare -A SKIP_SET
+if [[ -f "${SKIP_LIST_FILE}" ]]; then
+ while IFS= read -r _raw || [[ -n "${_raw}" ]]; do
+ _name="${_raw%%#*}"
+ _name="${_name#"${_name%%[![:space:]]*}"}"
+ _name="${_name%"${_name##*[![:space:]]}"}"
+ if [[ -n "${_name}" ]]; then
+ SKIP_TESTS+=("${_name}")
+ SKIP_SET["${_name}"]=1
+ fi
+ done < "${SKIP_LIST_FILE}"
+else
+ echo "WARNING: skip list ${SKIP_LIST_FILE} not found; no tests will be skipped." >&2
+fi
+
+if [[ ! -d "${TEST_ROOT}" ]]; then
+ echo "ERROR: Missing test corpus at ${TEST_ROOT}"
+ if [[ -z "${NOIR_REPO_DIR:-}" ]]; then
+ echo "Hint: run scripts/vendor_noir_execution_success.sh first, or set NOIR_REPO_DIR."
+ else
+ echo "Hint: check that NOIR_REPO_DIR (${NOIR_REPO_DIR}) contains test_programs/execution_success."
+ fi
+ exit 1
+fi
+
+if [[ ! -x "${PROVEKIT_BIN}" ]]; then
+ echo "Missing provekit-cli binary at ${PROVEKIT_BIN}"
+ echo "Build it first: cargo build --release --bin provekit-cli"
+ exit 1
+fi
+
+if ! command -v nargo >/dev/null 2>&1; then
+ echo "nargo is required but was not found in PATH."
+ echo "Install with noirup and set version: noirup --version v1.0.0-beta.19"
+ exit 1
+fi
+
+nargo_version="$(nargo --version)"
+if [[ "${nargo_version}" != *"${REQUIRED_NARGO_VERSION}"* ]]; then
+ echo "Unsupported nargo version: ${nargo_version}"
+ echo "Expected version containing: ${REQUIRED_NARGO_VERSION}"
+ echo "Switch with: noirup --version ${REQUIRED_NARGO_VERSION}"
+ exit 1
+fi
+
+if ! python3 -c "import tomllib" 2>/dev/null; then
+ echo "ERROR: python3.11+ is required (tomllib not found)."
+ echo "Current: $(python3 --version 2>&1)"
+ exit 1
+fi
+
+mkdir -p "${LOG_DIR}/per_test"
+GROUPED_REPORT_FILE="${LOG_DIR}/grouped_error_report.txt"
+WITNESS_CSV="${LOG_DIR}/provekit_witness_counts.csv"
+echo "test_name,provekit_constraints,provekit_witnesses" > "${WITNESS_CSV}"
+
+shopt -s nullglob globstar
+
+# Python helpers live in scripts/noir_execution_helpers.py; these are thin
+# shell wrappers so the main loop reads naturally.
+discover_test_dirs() {
+ python3 "${HELPER}" discover "${TEST_ROOT}"
+}
+
+resolve_prover_toml() {
+ python3 "${HELPER}" resolve-prover-toml "$1" "$2"
+}
+
+read_workdir_package_name() {
+ python3 "${HELPER}" package-name "$1"
+}
+
+relative_path() {
+ python3 -c 'import os, sys; print(os.path.relpath(sys.argv[2], sys.argv[1]))' "$1" "$2"
+}
+
+
+
+append_stage_marker() {
+ local log_file="$1"
+ local stage_name="$2"
+ local stage_status="$3"
+ printf '\n[%s] %s: %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "${stage_status}" "${stage_name}" >> "${log_file}"
+}
+
+mapfile -t test_dirs < <(discover_test_dirs)
+
+if [[ "${#test_dirs[@]}" -eq 0 ]]; then
+ echo "No runnable test programs found under ${TEST_ROOT}"
+ exit 1
+fi
+
+total=0
+passed=0
+failed=0
+skipped=0
+
+# Clean up the active test sandbox if the script exits unexpectedly (SIGINT, error).
+_current_sandbox=""
+_cleanup_sandbox() {
+ if [[ -n "${_current_sandbox:-}" && -d "${_current_sandbox}" ]]; then
+ rm -rf "${_current_sandbox}"
+ fi
+}
+trap _cleanup_sandbox EXIT INT TERM
+
+if [[ ! -d "${TEST_LIB_ROOT}" ]]; then
+ echo "WARNING: missing ${TEST_LIB_ROOT}; path-based dependency tests may fail."
+ echo "Run scripts/vendor_noir_execution_success.sh to vendor test_libraries as well."
+fi
+
+for test_name in "${test_dirs[@]}"; do
+ if [[ -n "${TEST_FILTER}" && ! "${test_name}" =~ ${TEST_FILTER} ]]; then
+ continue
+ fi
+
+ # leaf name (no sub-path) is what we key on in the skip set
+ leaf_name="${test_name%%/*}"
+
+ # --- Unimplemented blackbox skip list: no log, no noise ---
+ # Skip BEFORE incrementing `total` so MAX_TESTS caps only attempted tests.
+ if [[ "${SKIP_SET["${leaf_name}"]:-}" == "1" ]]; then
+ echo "SKIP (blackbox): ${test_name}"
+ (( skipped += 1 ))
+ continue
+ fi
+
+ (( total += 1 ))
+
+ if [[ "${MAX_TESTS}" -gt 0 && "${total}" -gt "${MAX_TESTS}" ]]; then
+ break
+ fi
+
+ test_dir="${TEST_ROOT}/${test_name}"
+ safe_test_name="${test_name//\//__}"
+
+ test_log="${LOG_DIR}/per_test/${safe_test_name}.log"
+
+ echo ""
+ echo "==> [${total}] ${test_name}"
+
+ : > "${test_log}"
+ {
+ echo "test_name=${test_name}"
+ echo "test_dir=${test_dir}"
+ echo "run_id=${RUN_ID}"
+ echo "nargo_version=${nargo_version}"
+ } >> "${test_log}"
+
+ if [[ ! -f "${test_dir}/Nargo.toml" ]]; then
+ echo "SKIP: missing Nargo.toml"
+ append_stage_marker "${test_log}" "test" "SKIP"
+ echo "SKIP: missing Nargo.toml" >> "${test_log}"
+ (( skipped += 1 ))
+ continue
+ fi
+
+ if [[ ! -d "${TEST_LIB_ROOT}" ]] && grep -q 'test_libraries' "${test_dir}"/Nargo.toml 2>/dev/null; then
+ echo "SKIP: missing test_libraries for relative path dependency"
+ append_stage_marker "${test_log}" "test" "SKIP"
+ echo "SKIP: missing test_libraries for relative path dependency" >> "${test_log}"
+ (( skipped += 1 ))
+ continue
+ fi
+
+ sandbox_root="$(mktemp -d)"
+ _current_sandbox="${sandbox_root}"
+ sandbox_noir_root="${sandbox_root}/test-programs/noir"
+ sandbox_exec_root="${sandbox_noir_root}/execution_success"
+ fixture_name="${test_name%%/*}"
+ fixture_src="${TEST_ROOT}/${fixture_name}"
+ fixture_dst="${sandbox_exec_root}/${fixture_name}"
+
+ mkdir -p "${sandbox_exec_root}"
+ cp -R "${fixture_src}" "${fixture_dst}"
+
+ if [[ -d "${TEST_LIB_ROOT}" ]]; then
+ mkdir -p "${sandbox_noir_root}"
+ ln -s "${TEST_LIB_ROOT}" "${sandbox_noir_root}/test_libraries"
+ fi
+
+ workdir="${sandbox_exec_root}/${test_name}"
+ echo "sandbox_root=${sandbox_root}" >> "${test_log}"
+ echo "workdir=${workdir}" >> "${test_log}"
+
+ append_stage_marker "${test_log}" "nargo compile" "START"
+ compile_ok=0
+
+ if (cd "${workdir}" && nargo compile >> "${test_log}" 2>&1); then
+ compile_ok=1
+ elif [[ "${ENABLE_ENUMS_FALLBACK}" -eq 1 ]] && grep -q "unstable feature 'enums'" "${test_log}"; then
+ append_stage_marker "${test_log}" "nargo compile -Zenums" "RETRY"
+ if (cd "${workdir}" && nargo compile -Zenums >> "${test_log}" 2>&1); then
+ compile_ok=1
+ fi
+ fi
+
+ if [[ "${compile_ok}" -ne 1 ]]; then
+ append_stage_marker "${test_log}" "nargo compile" "FAIL"
+ echo "FAIL: nargo compile"
+ echo "FAIL: nargo compile" >> "${test_log}"
+ (( failed += 1 ))
+ rm -rf "${sandbox_root}"
+ continue
+ fi
+
+ append_stage_marker "${test_log}" "nargo compile" "PASS"
+
+ compiled_jsons=("${workdir}"/target/*.json)
+ if [[ "${#compiled_jsons[@]}" -eq 0 ]]; then
+ compiled_jsons=("${sandbox_exec_root}/${fixture_name}"/target/*.json)
+ fi
+ if [[ "${#compiled_jsons[@]}" -eq 0 ]]; then
+ compiled_jsons=("${sandbox_exec_root}/${fixture_name}"/**/target/*.json)
+ fi
+ if [[ "${#compiled_jsons[@]}" -eq 0 ]]; then
+ append_stage_marker "${test_log}" "compile output check" "FAIL"
+ echo "FAIL: missing compiled target JSON after nargo compile"
+ echo "FAIL: missing compiled target JSON after nargo compile" >> "${test_log}"
+ (( failed += 1 ))
+ rm -rf "${sandbox_root}"
+ continue
+ fi
+
+ workdir_package_name="$(read_workdir_package_name "${workdir}")"
+ circuit_json_abs=""
+ if [[ -n "${workdir_package_name}" ]]; then
+ for candidate_json in "${compiled_jsons[@]}"; do
+ if [[ "$(basename "${candidate_json}" .json)" == "${workdir_package_name}" ]]; then
+ circuit_json_abs="${candidate_json}"
+ break
+ fi
+ done
+ fi
+ if [[ -z "${circuit_json_abs}" ]]; then
+ circuit_json_abs="${compiled_jsons[0]}"
+ fi
+
+ circuit_json="$(relative_path "${workdir}" "${circuit_json_abs}")"
+ package_name="$(basename "${circuit_json_abs}" .json)"
+ prover_toml_rel="$(resolve_prover_toml "${workdir}" "${package_name}")"
+
+ if [[ -z "${prover_toml_rel}" || ! -f "${workdir}/${prover_toml_rel}" ]]; then
+ append_stage_marker "${test_log}" "resolve prover.toml" "FAIL"
+ echo "FAIL: could not locate Prover.toml for compiled package ${package_name}"
+ echo "FAIL: could not locate Prover.toml for compiled package ${package_name}" >> "${test_log}"
+ (( failed += 1 ))
+ rm -rf "${sandbox_root}"
+ continue
+ fi
+
+ echo "circuit_json=${circuit_json}" >> "${test_log}"
+ echo "prover_toml=${prover_toml_rel}" >> "${test_log}"
+
+ append_stage_marker "${test_log}" "provekit-cli prepare" "START"
+ if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prepare "./${circuit_json}" --pkp "./prover.pkp" --pkv "./verifier.pkv" >> "${test_log}" 2>&1); then
+ append_stage_marker "${test_log}" "provekit-cli prepare" "FAIL"
+ echo "FAIL: provekit-cli prepare"
+ echo "FAIL: provekit-cli prepare" >> "${test_log}"
+ (( failed += 1 ))
+ rm -rf "${sandbox_root}"
+ continue
+ fi
+ append_stage_marker "${test_log}" "provekit-cli prepare" "PASS"
+
+ # Extract ProveKit post-GE constraint and witness counts before the log is deleted on success.
+ # Keep this non-fatal under `set -euo pipefail` if the log format changes/misses.
+ _ge_line="$(grep -o 'After GE optimization: [0-9]* constraints, [0-9]* witnesses' "${test_log}" | tail -1 || true)"
+ _pk_constraints=""
+ _pk_witnesses=""
+ if [[ "${_ge_line}" =~ ([0-9]+)\ constraints,\ ([0-9]+)\ witnesses$ ]]; then
+ _pk_constraints="${BASH_REMATCH[1]}"
+ _pk_witnesses="${BASH_REMATCH[2]}"
+ fi
+ if [[ -n "${_pk_witnesses}" ]]; then
+ echo "${test_name},${_pk_constraints},${_pk_witnesses}" >> "${WITNESS_CSV}"
+ fi
+
+ append_stage_marker "${test_log}" "provekit-cli prove" "START"
+ if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prove "./prover.pkp" "./${prover_toml_rel}" -o "./proof.np" >> "${test_log}" 2>&1); then
+ append_stage_marker "${test_log}" "provekit-cli prove" "FAIL"
+ echo "FAIL: provekit-cli prove"
+ echo "FAIL: provekit-cli prove" >> "${test_log}"
+ (( failed += 1 ))
+ rm -rf "${sandbox_root}"
+ continue
+ fi
+ append_stage_marker "${test_log}" "provekit-cli prove" "PASS"
+
+ append_stage_marker "${test_log}" "provekit-cli verify" "START"
+ if ! (cd "${workdir}" && "${PROVEKIT_BIN}" verify "./verifier.pkv" "./proof.np" >> "${test_log}" 2>&1); then
+ append_stage_marker "${test_log}" "provekit-cli verify" "FAIL"
+ echo "FAIL: provekit-cli verify"
+ echo "FAIL: provekit-cli verify" >> "${test_log}"
+ (( failed += 1 ))
+ rm -rf "${sandbox_root}"
+ continue
+ fi
+ append_stage_marker "${test_log}" "provekit-cli verify" "PASS"
+
+ echo "PASS"
+ (( passed += 1 ))
+ rm -rf "${sandbox_root}"
+ # Remove per-test log for passing tests to keep artifacts lean
+ rm -f "${test_log}"
+done
+
+# Blackbox skips bump `skipped` without bumping `total` (see the skip block
+# above), so summing passed+failed+skipped would double-count them.
+attempted=${total}
+
+echo ""
+echo "----- execution_success summary -----"
+echo "Total discovered : ${#test_dirs[@]}"
+if [[ -n "${TEST_FILTER}" ]]; then
+ echo "Test filter : ${TEST_FILTER}"
+fi
+if [[ "${MAX_TESTS}" -gt 0 ]]; then
+ echo "Attempted limit : ${MAX_TESTS}"
+else
+ echo "Attempted limit : all"
+fi
+echo "Attempted : ${attempted}"
+echo "Passed : ${passed}"
+echo "Failed : ${failed}"
+echo "Skipped : ${skipped} (${#SKIP_TESTS[@]} unimplemented-blackbox tests)"
+echo "Log directory : ${LOG_DIR}"
+
+python3 "${HELPER}" build-report "${LOG_DIR}" "${passed}" "${failed}" "${skipped}"
+
+# Emit GitHub Step Summary when running inside Actions
+# (must be after the Python report generator so grouped_error_report.txt exists)
+if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then
+ {
+ echo "## Noir execution_success — ${RUN_ID}"
+ echo ""
+ echo "| Metric | Count |"
+ echo "|--------|------|"
+ echo "| Discovered | ${#test_dirs[@]} |"
+ echo "| Attempted | ${attempted} |"
+ echo "| ✅ Passed | ${passed} |"
+ echo "| ❌ Failed | ${failed} |"
+ echo "| ⏭️ Skipped | ${skipped} (${#SKIP_TESTS[@]} unimplemented blackboxes) |"
+ if [[ ${failed} -gt 0 ]]; then
+ echo ""
+ echo "### Failure groups"
+ echo '```'
+ cat "${GROUPED_REPORT_FILE}" 2>/dev/null || echo "(no grouped report)"
+ echo '```'
+ fi
+ } >> "${GITHUB_STEP_SUMMARY}"
+fi
+
+echo "Grouped report : ${GROUPED_REPORT_FILE}"
+
+# Generate ProveKit witness count report
+if [[ -f "${WITNESS_CSV}" ]] && python3 "${SCRIPT_DIR}/generate_provekit_witness_report.py" "${WITNESS_CSV}" "${LOG_DIR}"; then
+ echo "ProveKit witness report: ${LOG_DIR}/provekit_witness_report.md"
+ if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then
+ {
+ echo ""
+ echo "## ProveKit Witness Counts"
+ head -4 "${LOG_DIR}/provekit_witness_report.md"
+ echo ""
+ echo "_Full table available in artifact: \`provekit_witness_report.md\`_"
+ } >> "${GITHUB_STEP_SUMMARY}"
+ fi
+fi
+
+# Circuit failures are surfaced via the PR sticky comment and the grouped
+# error report. The workflow should not fail just because some circuits
+# don't compile through provekit-cli today — the report is the source of
+# truth for which circuits pass. Set STRICT_FAIL=1 to opt into the old
+# "exit 1 on any failure" behaviour for local CI gates.
+if [[ "${STRICT_FAIL:-0}" == "1" && "${failed}" -gt 0 ]]; then
+ exit 1
+fi
+
+exit 0
diff --git a/scripts/vendor_noir_execution_success.sh b/scripts/vendor_noir_execution_success.sh
new file mode 100755
index 000000000..9bdfae962
--- /dev/null
+++ b/scripts/vendor_noir_execution_success.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+DEST_EXEC_DIR="${REPO_ROOT}/test-programs/noir/execution_success"
+DEST_LIB_DIR="${REPO_ROOT}/test-programs/noir/test_libraries"
+NOIR_REF="${NOIR_REF:-v1.0.0-beta.19}"
+
+tmpdir="$(mktemp -d)"
+cleanup() {
+ rm -rf "${tmpdir}"
+}
+trap cleanup EXIT
+
+echo "Vendoring noir-lang/noir:test_programs/{execution_success,test_libraries} (ref: ${NOIR_REF})"
+
+git clone --depth 1 --filter=blob:none --sparse --branch "${NOIR_REF}" \
+ "https://github.com/noir-lang/noir.git" "${tmpdir}/noir"
+git -C "${tmpdir}/noir" sparse-checkout set \
+ "test_programs/execution_success" \
+ "test_programs/test_libraries"
+
+mkdir -p "$(dirname "${DEST_EXEC_DIR}")"
+rm -rf "${DEST_EXEC_DIR}" "${DEST_LIB_DIR}"
+cp -R "${tmpdir}/noir/test_programs/execution_success" "${DEST_EXEC_DIR}"
+cp -R "${tmpdir}/noir/test_programs/test_libraries" "${DEST_LIB_DIR}"
+
+source_commit="$(git -C "${tmpdir}/noir" rev-parse HEAD)"
+generated_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
+
+cat > "${REPO_ROOT}/test-programs/noir/execution_success.SOURCE" <