From c031e6d19d7762de834b6d7109da4f4f0127aa6f Mon Sep 17 00:00:00 2001 From: x-senpai-x Date: Wed, 15 Apr 2026 23:47:48 +0530 Subject: [PATCH 01/16] noir_execution_success test suite in workflow --- .github/workflows/noir-execution-success.yml | 83 +++ scripts/run_noir_execution_success.sh | 594 +++++++++++++++++++ scripts/vendor_noir_execution_success.sh | 41 ++ 3 files changed, 718 insertions(+) create mode 100644 .github/workflows/noir-execution-success.yml create mode 100755 scripts/run_noir_execution_success.sh create mode 100755 scripts/vendor_noir_execution_success.sh diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml new file mode 100644 index 000000000..2209672c2 --- /dev/null +++ b/.github/workflows/noir-execution-success.yml @@ -0,0 +1,83 @@ +name: Noir Execution Success Tests + +# Provide a noir_ref to test against any Noir release. +on: + workflow_dispatch: + inputs: + noir_ref: + description: "noir-lang/noir git ref (tag / branch / SHA)" + required: false + default: "v1.0.0-beta.19" + +env: + CARGO_TERM_COLOR: always + NOIR_REF: ${{ inputs.noir_ref || 'v1.0.0-beta.19' }} + +# Cancel any in-progress run on the same branch when a new one is triggered. +concurrency: + group: noir-exec-success-${{ github.ref }} + cancel-in-progress: true + +jobs: + noir-execution-success: + name: Noir execution_success suite (${{ inputs.noir_ref || 'v1.0.0-beta.19' }}) + runs-on: [self-hosted, Linux, ARM64, provekit-build] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust toolchain + uses: moonrepo/setup-rust@v1 + with: + channel: nightly-2026-03-04 + cache-base: main + + - name: Build provekit-cli + run: cargo build --release --bin provekit-cli + + - name: Setup Noir toolchain + uses: noir-lang/noirup@v0.1.2 + with: + toolchain: ${{ env.NOIR_REF }} + + # Sparse checkout — only fetch test_programs/, not the full noir repo. + - name: Fetch noir test programs (sparse checkout) + run: | + tmpdir=$(mktemp -d) + echo "NOIR_REPO_DIR=${tmpdir}/noir" >> "$GITHUB_ENV" + git clone \ + --depth 1 \ + --filter=blob:none \ + --sparse \ + --branch "$NOIR_REF" \ + https://github.com/noir-lang/noir.git "${tmpdir}/noir" + git -C "${tmpdir}/noir" sparse-checkout set \ + test_programs/execution_success \ + test_programs/test_libraries + echo "Cloned noir @ $(git -C ${tmpdir}/noir rev-parse HEAD)" + + - name: Run execution_success suite + env: + PROVEKIT_BIN: ${{ github.workspace }}/target/release/provekit-cli + LOG_DIR: ${{ github.workspace }}/noir-execution-logs + # NOIR_REPO_DIR is set by the previous step via $GITHUB_ENV + run: | + bash scripts/run_noir_execution_success.sh + + # Upload logs on every run (pass or fail) for 7 days. + - name: Upload test logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: noir-execution-logs-${{ github.run_id }} + path: noir-execution-logs/ + retention-days: 7 + + # Always clean up the temp clone, even if the test step failed. + - name: Cleanup noir clone + if: always() + run: | + if [[ -n "${NOIR_REPO_DIR:-}" && -d "${NOIR_REPO_DIR}" ]]; then + rm -rf "${NOIR_REPO_DIR}" + echo "Cleaned up ${NOIR_REPO_DIR}" + fi diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh new file mode 100755 index 000000000..461ffbb9a --- /dev/null +++ b/scripts/run_noir_execution_success.sh @@ -0,0 +1,594 @@ +#!/usr/bin/env bash +# run_noir_execution_success.sh +# +# Run the Noir execution_success test suite through provekit-cli. +# +# Environment variables (all optional): +# NOIR_REPO_DIR Path to a cloned noir-lang/noir repo root. +# When set, tests come from +# NOIR_REPO_DIR/test_programs/{execution_success,test_libraries}. +# When unset, falls back to the vendored path +# REPO_ROOT/test-programs/noir/. +# PROVEKIT_BIN Path to provekit-cli binary (default: target/release/provekit-cli) +# LOG_DIR Directory for per-test logs and summary +# MAX_TESTS Cap the number of tests (0 = unlimited) +# TEST_FILTER Regex filter on test name +# REQUIRED_NARGO_VERSION Nargo version string to require (default 1.0.0-beta.19) +# ENABLE_ENUMS_FALLBACK Retry compile with -Zenums on 'enums' feature error (0/1, default 1) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +# --------------------------------------------------------------------------- +# Resolve test corpus root (CI clone vs. local vendored copy) +# --------------------------------------------------------------------------- +if [[ -n "${NOIR_REPO_DIR:-}" ]]; then + TEST_ROOT="${NOIR_REPO_DIR}/test_programs/execution_success" + TEST_LIB_ROOT="${NOIR_REPO_DIR}/test_programs/test_libraries" +else + NOIR_ROOT="${REPO_ROOT}/test-programs/noir" + TEST_ROOT="${NOIR_ROOT}/execution_success" + TEST_LIB_ROOT="${NOIR_ROOT}/test_libraries" +fi + +PROVEKIT_BIN="${PROVEKIT_BIN:-${REPO_ROOT}/target/release/provekit-cli}" +MAX_TESTS="${MAX_TESTS:-0}" +REQUIRED_NARGO_VERSION="${REQUIRED_NARGO_VERSION:-1.0.0-beta.19}" +ENABLE_ENUMS_FALLBACK="${ENABLE_ENUMS_FALLBACK:-1}" +TEST_FILTER="${TEST_FILTER:-}" +RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)" +LOG_DIR="${LOG_DIR:-${REPO_ROOT}/scripts/noir_execution_logs/${RUN_ID}}" + +if [[ "${LOG_DIR}" != /* ]]; then + LOG_DIR="${REPO_ROOT}/${LOG_DIR}" +fi + +# --------------------------------------------------------------------------- +# Unimplemented-blackbox skip list +# These tests use blackbox functions not yet supported by provekit. +# They are counted as SKIP (not FAIL) and will be added back once supported. +# --------------------------------------------------------------------------- +SKIP_TESTS=( + # BLAKE3 + a_6 + array_dynamic_blackbox_input + array_dynamic_nested_blackbox_input + blake3 + conditional_1 + conditional_regression_short_circuit + regression_4449 + # ECDSA_SECP256K1 + bench_ecdsa_secp256k1 + ecdsa_secp256k1 + ecdsa_secp256k1_invalid_inputs + ecdsa_secp256k1_invalid_pub_key_in_inactive_branch + # ECDSA_SECP256R1 + ecdsa_secp256r1 + ecdsa_secp256r1_3x + ecdsa_secp256r1_invalid_pub_key_in_inactive_branch + ecdsa_secp256r1_msg_equals_order + # EMBEDDED_CURVE_ADD + embedded_curve_ops + regression_5045 + regression_7744 + # AES128_ENCRYPT + aes128_encrypt + # BLAKE2S + a_7 +) + +# Build a fast associative-array lookup +declare -A SKIP_SET +for _t in "${SKIP_TESTS[@]}"; do + SKIP_SET["${_t}"]=1 +done + +if [[ ! -d "${TEST_ROOT}" ]]; then + echo "ERROR: Missing test corpus at ${TEST_ROOT}" + if [[ -z "${NOIR_REPO_DIR:-}" ]]; then + echo "Hint: run scripts/vendor_noir_execution_success.sh first, or set NOIR_REPO_DIR." + else + echo "Hint: check that NOIR_REPO_DIR (${NOIR_REPO_DIR}) contains test_programs/execution_success." + fi + exit 1 +fi + +if [[ ! -x "${PROVEKIT_BIN}" ]]; then + echo "Missing provekit-cli binary at ${PROVEKIT_BIN}" + echo "Build it first: cargo build --release --bin provekit-cli" + exit 1 +fi + +if ! command -v nargo >/dev/null 2>&1; then + echo "nargo is required but was not found in PATH." + echo "Install with noirup and set version: noirup --version v1.0.0-beta.19" + exit 1 +fi + +nargo_version="$(nargo --version)" +if [[ "${nargo_version}" != *"${REQUIRED_NARGO_VERSION}"* ]]; then + echo "Unsupported nargo version: ${nargo_version}" + echo "Expected version containing: ${REQUIRED_NARGO_VERSION}" + echo "Switch with: noirup --version ${REQUIRED_NARGO_VERSION}" + exit 1 +fi + +mkdir -p "${LOG_DIR}/per_test" +GROUPED_REPORT_FILE="${LOG_DIR}/grouped_error_report.txt" + +shopt -s nullglob globstar + +discover_test_dirs() { + TEST_ROOT="${TEST_ROOT}" python3 - <<'PY' +from pathlib import Path +import tomllib +import os + +root = Path(os.environ["TEST_ROOT"]) +nargo_data = {} + +for nargo in root.rglob("Nargo.toml"): + rel = nargo.parent.relative_to(root).as_posix() + try: + data = tomllib.loads(nargo.read_text()) + except Exception: + data = {} + nargo_data[rel] = data + +workspace_default_roots = set() +for rel, data in nargo_data.items(): + ws = data.get("workspace") + if isinstance(ws, dict) and "default-member" in ws: + workspace_default_roots.add(rel) + +suppressed = set() +for ws_rel in workspace_default_roots: + ws_path = Path(ws_rel) if ws_rel != "." else Path() + for rel in nargo_data: + rel_path = Path(rel) if rel != "." else Path() + if rel_path != ws_path and ws_path in rel_path.parents: + suppressed.add(rel) + +candidates = set(workspace_default_roots) +for rel, data in nargo_data.items(): + if rel in suppressed: + continue + + pkg = data.get("package") + if isinstance(pkg, dict) and "name" in pkg: + if (root / rel / "Prover.toml").is_file(): + candidates.add(rel) + +for rel in sorted(candidates): + print(rel) +PY +} + +resolve_prover_toml() { + local project_dir="$1" + local package_name="$2" + + PROJECT_DIR="${project_dir}" PACKAGE_NAME="${package_name}" python3 - <<'PY' +from pathlib import Path +import tomllib +import os + +project_dir = Path(os.environ["PROJECT_DIR"]) +package_name = os.environ["PACKAGE_NAME"] + +candidates = [] +for nargo in sorted(project_dir.rglob("Nargo.toml")): + try: + data = tomllib.loads(nargo.read_text()) + except Exception: + continue + + pkg = data.get("package") + if not isinstance(pkg, dict): + continue + + if pkg.get("name") != package_name: + continue + + prover = nargo.parent / "Prover.toml" + if prover.is_file(): + candidates.append(prover.relative_to(project_dir).as_posix()) + +if candidates: + candidates.sort(key=lambda p: (p.count("/"), p)) + print(candidates[0]) + raise SystemExit(0) + +root_prover = project_dir / "Prover.toml" +if root_prover.is_file(): + print("Prover.toml") + raise SystemExit(0) + +all_provers = sorted(project_dir.rglob("Prover.toml")) +if len(all_provers) == 1: + print(all_provers[0].relative_to(project_dir).as_posix()) + raise SystemExit(0) + +print("") +PY +} + +read_workdir_package_name() { + local project_dir="$1" + PROJECT_DIR="${project_dir}" python3 - <<'PY' +from pathlib import Path +import tomllib +import os + +nargo = Path(os.environ["PROJECT_DIR"]) / "Nargo.toml" +if not nargo.is_file(): + print("") + raise SystemExit(0) + +try: + data = tomllib.loads(nargo.read_text()) +except Exception: + print("") + raise SystemExit(0) + +pkg = data.get("package") +if isinstance(pkg, dict): + print(pkg.get("name", "")) +else: + print("") +PY +} + +relative_path() { + local from_dir="$1" + local to_path="$2" + FROM_DIR="${from_dir}" TO_PATH="${to_path}" python3 - <<'PY' +import os +print(os.path.relpath(os.environ["TO_PATH"], os.environ["FROM_DIR"])) +PY +} + + + +append_stage_marker() { + local log_file="$1" + local stage_name="$2" + local stage_status="$3" + printf '\n[%s] %s: %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "${stage_status}" "${stage_name}" >> "${log_file}" +} + +mapfile -t test_dirs < <(discover_test_dirs) + +if [[ "${#test_dirs[@]}" -eq 0 ]]; then + echo "No runnable test programs found under ${TEST_ROOT}" + exit 1 +fi + +total=0 +passed=0 +failed=0 +skipped=0 + +if [[ ! -d "${TEST_LIB_ROOT}" ]]; then + echo "WARNING: missing ${TEST_LIB_ROOT}; path-based dependency tests may fail." + echo "Run scripts/vendor_noir_execution_success.sh to vendor test_libraries as well." +fi + +for test_name in "${test_dirs[@]}"; do + if [[ -n "${TEST_FILTER}" && ! "${test_name}" =~ ${TEST_FILTER} ]]; then + continue + fi + + (( total += 1 )) + + if [[ "${MAX_TESTS}" -gt 0 && "${total}" -gt "${MAX_TESTS}" ]]; then + break + fi + + # leaf name (no sub-path) is what we key on in the skip set + leaf_name="${test_name%%/*}" + test_dir="${TEST_ROOT}/${test_name}" + safe_test_name="${test_name//\//__}" + # --- Unimplemented blackbox skip list: no log, no noise --- + if [[ "${SKIP_SET["${leaf_name}"]:-}" == "1" ]]; then + echo "SKIP (blackbox): ${test_name}" + (( skipped += 1 )) + continue + fi + + test_log="${LOG_DIR}/per_test/${safe_test_name}.log" + + echo "" + echo "==> [${total}] ${test_name}" + + : > "${test_log}" + { + echo "test_name=${test_name}" + echo "test_dir=${test_dir}" + echo "run_id=${RUN_ID}" + echo "nargo_version=${nargo_version}" + } >> "${test_log}" + + if [[ ! -f "${test_dir}/Nargo.toml" ]]; then + echo "SKIP: missing Nargo.toml" + append_stage_marker "${test_log}" "test" "SKIP" + echo "SKIP: missing Nargo.toml" >> "${test_log}" + (( skipped += 1 )) + continue + fi + + if [[ ! -d "${TEST_LIB_ROOT}" ]] && grep -qr 'test_libraries' "${test_dir}"/Nargo.toml 2>/dev/null; then + echo "SKIP: missing test_libraries for relative path dependency" + append_stage_marker "${test_log}" "test" "SKIP" + echo "SKIP: missing test_libraries for relative path dependency" >> "${test_log}" + (( skipped += 1 )) + continue + fi + + sandbox_root="$(mktemp -d)" + sandbox_noir_root="${sandbox_root}/test-programs/noir" + sandbox_exec_root="${sandbox_noir_root}/execution_success" + fixture_name="${test_name%%/*}" + fixture_src="${TEST_ROOT}/${fixture_name}" + fixture_dst="${sandbox_exec_root}/${fixture_name}" + + mkdir -p "${sandbox_exec_root}" + cp -R "${fixture_src}" "${fixture_dst}" + + if [[ -d "${TEST_LIB_ROOT}" ]]; then + mkdir -p "${sandbox_noir_root}" + ln -s "${TEST_LIB_ROOT}" "${sandbox_noir_root}/test_libraries" + fi + + workdir="${sandbox_exec_root}/${test_name}" + echo "sandbox_root=${sandbox_root}" >> "${test_log}" + echo "workdir=${workdir}" >> "${test_log}" + + append_stage_marker "${test_log}" "nargo compile" "START" + compile_ok=0 + + if (cd "${workdir}" && nargo compile >> "${test_log}" 2>&1); then + compile_ok=1 + elif [[ "${ENABLE_ENUMS_FALLBACK}" -eq 1 ]] && grep -q "unstable feature 'enums'" "${test_log}"; then + append_stage_marker "${test_log}" "nargo compile -Zenums" "RETRY" + if (cd "${workdir}" && nargo compile -Zenums >> "${test_log}" 2>&1); then + compile_ok=1 + fi + fi + + if [[ "${compile_ok}" -ne 1 ]]; then + append_stage_marker "${test_log}" "nargo compile" "FAIL" + echo "FAIL: nargo compile" + echo "FAIL: nargo compile" >> "${test_log}" + (( failed += 1 )) + rm -rf "${sandbox_root}" + continue + fi + + append_stage_marker "${test_log}" "nargo compile" "PASS" + + compiled_jsons=("${workdir}"/target/*.json) + if [[ "${#compiled_jsons[@]}" -eq 0 ]]; then + compiled_jsons=("${sandbox_exec_root}/${fixture_name}"/target/*.json) + fi + if [[ "${#compiled_jsons[@]}" -eq 0 ]]; then + compiled_jsons=("${sandbox_exec_root}/${fixture_name}"/**/target/*.json) + fi + if [[ "${#compiled_jsons[@]}" -eq 0 ]]; then + append_stage_marker "${test_log}" "compile output check" "FAIL" + echo "FAIL: missing compiled target JSON after nargo compile" + echo "FAIL: missing compiled target JSON after nargo compile" >> "${test_log}" + (( failed += 1 )) + rm -rf "${sandbox_root}" + continue + fi + + workdir_package_name="$(read_workdir_package_name "${workdir}")" + circuit_json_abs="" + if [[ -n "${workdir_package_name}" ]]; then + for candidate_json in "${compiled_jsons[@]}"; do + if [[ "$(basename "${candidate_json}" .json)" == "${workdir_package_name}" ]]; then + circuit_json_abs="${candidate_json}" + break + fi + done + fi + if [[ -z "${circuit_json_abs}" ]]; then + circuit_json_abs="${compiled_jsons[0]}" + fi + + circuit_json="$(relative_path "${workdir}" "${circuit_json_abs}")" + package_name="$(basename "${circuit_json_abs}" .json)" + prover_toml_rel="$(resolve_prover_toml "${workdir}" "${package_name}")" + + if [[ -z "${prover_toml_rel}" || ! -f "${workdir}/${prover_toml_rel}" ]]; then + append_stage_marker "${test_log}" "resolve prover.toml" "FAIL" + echo "FAIL: could not locate Prover.toml for compiled package ${package_name}" + echo "FAIL: could not locate Prover.toml for compiled package ${package_name}" >> "${test_log}" + (( failed += 1 )) + rm -rf "${sandbox_root}" + continue + fi + + echo "circuit_json=${circuit_json}" >> "${test_log}" + echo "prover_toml=${prover_toml_rel}" >> "${test_log}" + + append_stage_marker "${test_log}" "provekit-cli prepare" "START" + if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prepare "./${circuit_json}" --pkp "./prover.pkp" --pkv "./verifier.pkv" >> "${test_log}" 2>&1); then + append_stage_marker "${test_log}" "provekit-cli prepare" "FAIL" + echo "FAIL: provekit-cli prepare" + echo "FAIL: provekit-cli prepare" >> "${test_log}" + (( failed += 1 )) + rm -rf "${sandbox_root}" + continue + fi + append_stage_marker "${test_log}" "provekit-cli prepare" "PASS" + + append_stage_marker "${test_log}" "provekit-cli prove" "START" + if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prove "./prover.pkp" "./${prover_toml_rel}" -o "./proof.np" >> "${test_log}" 2>&1); then + append_stage_marker "${test_log}" "provekit-cli prove" "FAIL" + echo "FAIL: provekit-cli prove" + echo "FAIL: provekit-cli prove" >> "${test_log}" + (( failed += 1 )) + rm -rf "${sandbox_root}" + continue + fi + append_stage_marker "${test_log}" "provekit-cli prove" "PASS" + + append_stage_marker "${test_log}" "provekit-cli verify" "START" + if ! (cd "${workdir}" && "${PROVEKIT_BIN}" verify "./verifier.pkv" "./proof.np" >> "${test_log}" 2>&1); then + append_stage_marker "${test_log}" "provekit-cli verify" "FAIL" + echo "FAIL: provekit-cli verify" + echo "FAIL: provekit-cli verify" >> "${test_log}" + (( failed += 1 )) + rm -rf "${sandbox_root}" + continue + fi + append_stage_marker "${test_log}" "provekit-cli verify" "PASS" + + echo "PASS" + (( passed += 1 )) + rm -rf "${sandbox_root}" + # Remove per-test log for passing tests to keep artifacts lean + rm -f "${test_log}" +done + +attempted=$((passed + failed + skipped)) + +echo "" +echo "----- execution_success summary -----" +echo "Total discovered : ${#test_dirs[@]}" +if [[ -n "${TEST_FILTER}" ]]; then + echo "Test filter : ${TEST_FILTER}" +fi +if [[ "${MAX_TESTS}" -gt 0 ]]; then + echo "Attempted limit : ${MAX_TESTS}" +else + echo "Attempted limit : all" +fi +echo "Attempted : ${attempted}" +echo "Passed : ${passed}" +echo "Failed : ${failed}" +echo "Skipped : ${skipped} (${#SKIP_TESTS[@]} unimplemented-blackbox tests)" +echo "Log directory : ${LOG_DIR}" + +LOG_DIR="${LOG_DIR}" PASSED_COUNT="${passed}" python3 - <<'PY' +from pathlib import Path +import re +from collections import defaultdict +import os + +log_dir = Path(os.environ["LOG_DIR"]) +per_test_dir = log_dir / "per_test" +report_file = log_dir / "grouped_error_report.txt" + +logs = sorted(per_test_dir.glob("*.log")) +# PASS logs are deleted after each successful test run; read the count from the shell instead. +status_counts = {"PASS": int(os.environ.get("PASSED_COUNT", "0")), "FAIL": 0, "SKIP": 0} +grouped = defaultdict(list) +stage_groups = defaultdict(list) + +for fp in logs: + text = fp.read_text(errors="replace") + name = fp.stem + + if "SKIP:" in text: + status_counts["SKIP"] += 1 + skip_reason = re.search(r"SKIP: ([^\n]+)", text) + reason = skip_reason.group(1).strip() if skip_reason else "unknown" + grouped[f"SKIP: {reason}"].append(name) + continue + + status_counts["FAIL"] += 1 + fail_stage_match = re.findall(r"FAIL: ([^\n]+)", text) + stage = fail_stage_match[-1].strip() if fail_stage_match else "unknown stage" + stage_groups[stage].append(name) + + blackbox = re.search(r"not implemented: Other black box function: BLACKBOX::([A-Z0-9_]+)", text) + if blackbox: + grouped[f"Not implemented blackbox: {blackbox.group(1)} ({stage})"].append(name) + continue + + if "Program must have one entry point." in text: + grouped[f"Program must have one entry point ({stage})"].append(name) + continue + + panic = re.search(r"panicked at [^\n]*:\n([^\n]+)", text) + if panic: + grouped[f"Panic: {panic.group(1).strip()} ({stage})"].append(name) + continue + + solve = re.search(r"Failed to solve program: '([^']+)'", text) + if solve: + grouped[f"Failed to solve program: {solve.group(1)} ({stage})"].append(name) + continue + + assertion = re.search(r"Failed assertion", text) + if assertion: + grouped[f"Failed assertion ({stage})"].append(name) + continue + + compile_error = re.search(r"^error:\s*([^\n]+)", text, flags=re.M) + if compile_error: + grouped[f"Compile error: {compile_error.group(1).strip()} ({stage})"].append(name) + continue + + compile_bug = re.search(r"^bug:\s*([^\n]+)", text, flags=re.M) + if compile_bug: + grouped[f"Compile bug: {compile_bug.group(1).strip()} ({stage})"].append(name) + continue + + generic_error = re.search(r"^Error:\s*([^\n]+)", text, flags=re.M) + if generic_error: + grouped[f"Error: {generic_error.group(1).strip()} ({stage})"].append(name) + continue + + grouped[f"Unknown failure ({stage})"].append(name) + +with report_file.open("w") as f: + f.write(f"logs={len(logs)}\n") + f.write(f"PASS={status_counts['PASS']}\n") + f.write(f"FAIL={status_counts['FAIL']}\n") + f.write(f"SKIP={status_counts['SKIP']}\n") + f.write("\n[stages]\n") + for stage, tests in sorted(stage_groups.items(), key=lambda kv: (-len(kv[1]), kv[0])): + f.write(f"{stage}\t{len(tests)}\t{', '.join(tests)}") + f.write("\n") + f.write("\n[grouped]\n") + for key, tests in sorted(grouped.items(), key=lambda kv: (-len(kv[1]), kv[0])): + f.write(f"{len(tests)}\t{key}\t{', '.join(tests)}") + f.write("\n") +PY + +# Emit GitHub Step Summary when running inside Actions +# (must be after the Python report generator so grouped_error_report.txt exists) +if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then + { + echo "## Noir execution_success — ${RUN_ID}" + echo "" + echo "| Metric | Count |" + echo "|--------|------|" + echo "| Discovered | ${#test_dirs[@]} |" + echo "| Attempted | ${attempted} |" + echo "| ✅ Passed | ${passed} |" + echo "| ❌ Failed | ${failed} |" + echo "| ⏭️ Skipped | ${skipped} (${#SKIP_TESTS[@]} unimplemented blackboxes) |" + if [[ ${failed} -gt 0 ]]; then + echo "" + echo "### Failure groups" + echo '```' + cat "${GROUPED_REPORT_FILE}" 2>/dev/null || echo "(no grouped report)" + echo '```' + fi + } >> "${GITHUB_STEP_SUMMARY}" +fi + +echo "Grouped report : ${GROUPED_REPORT_FILE}" + +if [[ "${failed}" -gt 0 ]]; then + exit 1 +fi + +exit 0 diff --git a/scripts/vendor_noir_execution_success.sh b/scripts/vendor_noir_execution_success.sh new file mode 100755 index 000000000..2fd2dddcc --- /dev/null +++ b/scripts/vendor_noir_execution_success.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +DEST_EXEC_DIR="${REPO_ROOT}/test-programs/noir/execution_success" +DEST_LIB_DIR="${REPO_ROOT}/test-programs/noir/test_libraries" +NOIR_REF="${NOIR_REF:-master}" + +tmpdir="$(mktemp -d)" +cleanup() { + rm -rf "${tmpdir}" +} +trap cleanup EXIT + +echo "Vendoring noir-lang/noir:test_programs/{execution_success,test_libraries} (ref: ${NOIR_REF})" + +git clone --depth 1 --filter=blob:none --sparse --branch "${NOIR_REF}" \ + "https://github.com/noir-lang/noir.git" "${tmpdir}/noir" +git -C "${tmpdir}/noir" sparse-checkout set \ + "test_programs/execution_success" \ + "test_programs/test_libraries" + +mkdir -p "$(dirname "${DEST_EXEC_DIR}")" +rm -rf "${DEST_EXEC_DIR}" "${DEST_LIB_DIR}" +cp -R "${tmpdir}/noir/test_programs/execution_success" "${DEST_EXEC_DIR}" +cp -R "${tmpdir}/noir/test_programs/test_libraries" "${DEST_LIB_DIR}" + +source_commit="$(git -C "${tmpdir}/noir" rev-parse HEAD)" +generated_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" + +cat > "${REPO_ROOT}/test-programs/noir/execution_success.SOURCE" < Date: Thu, 16 Apr 2026 12:33:12 +0530 Subject: [PATCH 02/16] copilot issues addressed --- .github/workflows/noir-execution-success.yml | 12 +++++++----- scripts/run_noir_execution_success.sh | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml index 2209672c2..3e6cd267b 100644 --- a/.github/workflows/noir-execution-success.yml +++ b/.github/workflows/noir-execution-success.yml @@ -5,7 +5,7 @@ on: workflow_dispatch: inputs: noir_ref: - description: "noir-lang/noir git ref (tag / branch / SHA)" + description: "noir-lang/noir release tag (e.g. v1.0.0-beta.19)" required: false default: "v1.0.0-beta.19" @@ -44,6 +44,8 @@ jobs: - name: Fetch noir test programs (sparse checkout) run: | tmpdir=$(mktemp -d) + # Export the parent so the cleanup step can remove it entirely. + echo "NOIR_TMPDIR=${tmpdir}" >> "$GITHUB_ENV" echo "NOIR_REPO_DIR=${tmpdir}/noir" >> "$GITHUB_ENV" git clone \ --depth 1 \ @@ -54,7 +56,7 @@ jobs: git -C "${tmpdir}/noir" sparse-checkout set \ test_programs/execution_success \ test_programs/test_libraries - echo "Cloned noir @ $(git -C ${tmpdir}/noir rev-parse HEAD)" + echo "Cloned noir @ $(git -C "${tmpdir}/noir" rev-parse HEAD)" - name: Run execution_success suite env: @@ -77,7 +79,7 @@ jobs: - name: Cleanup noir clone if: always() run: | - if [[ -n "${NOIR_REPO_DIR:-}" && -d "${NOIR_REPO_DIR}" ]]; then - rm -rf "${NOIR_REPO_DIR}" - echo "Cleaned up ${NOIR_REPO_DIR}" + if [[ -n "${NOIR_TMPDIR:-}" && -d "${NOIR_TMPDIR}" ]]; then + rm -rf "${NOIR_TMPDIR}" + echo "Cleaned up ${NOIR_TMPDIR}" fi diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh index 461ffbb9a..09b7deca0 100755 --- a/scripts/run_noir_execution_success.sh +++ b/scripts/run_noir_execution_success.sh @@ -115,6 +115,12 @@ if [[ "${nargo_version}" != *"${REQUIRED_NARGO_VERSION}"* ]]; then exit 1 fi +if ! python3 -c "import tomllib" 2>/dev/null; then + echo "ERROR: python3.11+ is required (tomllib not found)." + echo "Current: $(python3 --version 2>&1)" + exit 1 +fi + mkdir -p "${LOG_DIR}/per_test" GROUPED_REPORT_FILE="${LOG_DIR}/grouped_error_report.txt" @@ -271,6 +277,15 @@ passed=0 failed=0 skipped=0 +# Clean up the active test sandbox if the script exits unexpectedly (SIGINT, error). +_current_sandbox="" +_cleanup_sandbox() { + if [[ -n "${_current_sandbox:-}" && -d "${_current_sandbox}" ]]; then + rm -rf "${_current_sandbox}" + fi +} +trap _cleanup_sandbox EXIT INT TERM + if [[ ! -d "${TEST_LIB_ROOT}" ]]; then echo "WARNING: missing ${TEST_LIB_ROOT}; path-based dependency tests may fail." echo "Run scripts/vendor_noir_execution_success.sh to vendor test_libraries as well." @@ -328,6 +343,7 @@ for test_name in "${test_dirs[@]}"; do fi sandbox_root="$(mktemp -d)" + _current_sandbox="${sandbox_root}" sandbox_noir_root="${sandbox_root}/test-programs/noir" sandbox_exec_root="${sandbox_noir_root}/execution_success" fixture_name="${test_name%%/*}" From 3541e6c80d9a8ee3f622e91379660221a467afe5 Mon Sep 17 00:00:00 2001 From: x-senpai-x Date: Tue, 21 Apr 2026 11:41:47 +0530 Subject: [PATCH 03/16] added mavros comparison --- scripts/generate_witness_comparison.py | 319 +++++++++++++++++++++++++ scripts/run_noir_execution_success.sh | 23 ++ 2 files changed, 342 insertions(+) create mode 100644 scripts/generate_witness_comparison.py diff --git a/scripts/generate_witness_comparison.py b/scripts/generate_witness_comparison.py new file mode 100644 index 000000000..fdbb6ada0 --- /dev/null +++ b/scripts/generate_witness_comparison.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python3 +"""Generate Mavros vs ProveKit witness count comparison table. + +Usage: python3 generate_witness_comparison.py + +Reads provekit_witness_counts.csv produced by run_noir_execution_success.sh, +joins against Mavros Cols from the live reilabs/mavros STATUS.md (with +hardcoded fallback for any entries absent from the live file), and writes +witness_comparison.md to . +""" + +import csv +import sys +import urllib.request +from pathlib import Path + +# Mavros Cols — extracted from reilabs/mavros STATUS.md (column "Cols"), +# noir/test_programs/execution_success/* rows only. Keys are bare circuit names. +MAVROS_COLS: dict[str, int] = { + "a_1327_concrete_in_generic": 4, + "a_1_mul": 577, + "a_2_div": 610, + "a_3_add": 567, + "a_4_sub": 670, + "a_5_over": 798, + "a_6_array": 5619, + "arithmetic_binary_operations": 654, + "array_dedup_regression": 523, + "array_eq": 161, + "array_neq": 161, + "array_oob_regression_7965": 551, + "array_oob_regression_7975": 6, + "array_rc_regression_7842": 1, + "array_with_refs_from_param": 3, + "as_witness": 3, + "assert": 4, + "assert_statement": 3, + "assign_ex": 3, + "bit_and": 1840, + "bit_not": 523, + "bool_not": 2, + "bool_or": 5, + "break_and_continue": 1, + "brillig_acir_as_brillig": 541, + "brillig_array_ifelse": 8, + "brillig_arrays": 4, + "brillig_block_parameter_liveness": 14119, + "brillig_calls": 541, + "brillig_calls_array": 550, + "brillig_calls_conditionals": 577, + "brillig_conditional": 5, + "brillig_constant_reference_regression": 532, + "brillig_cow": 543, + "brillig_cow_assign": 1, + "brillig_fns_as_values": 593, + "brillig_identity_function": 8, + "brillig_loop_size_regression": 3, + "brillig_nested_arrays": 533, + "brillig_not": 9, + "brillig_recursion": 532, + "brillig_recursive_main": 525, + "brillig_recursive_main_indirect": 525, + "brillig_uninitialized_arrays": 790, + "cast_bool": 5, + "cast_to_i8_regression_7776": 648, + "cast_to_u64_regression_7776": 662, + "cast_to_u8_regression_7776": 648, + "comptime_println": 1, + "comptime_println_fmtstr_with_quoted": 1, + "comptime_variable_at_runtime": 1, + "conditional_regression_547": 3, + "conditional_regression_underflow": 517, + "custom_entry": 2, + "databus": 610, + "databus_two_calldata_simple": 619, + "debug_logs": 3, + "diamond_deps_0": 4, + "division_by_max": 528, + "do_not_capture_comptime_locals": 1, + "double_neg_cond_bool_input": 4, + "double_neg_cond_global_var": 809, + "dual_constrained_lambdas": 3, + "field_attribute": 533, + "fmtstr_with_global": 1, + "fold_after_inlined_calls": 539, + "fold_basic": 9, + "fold_basic_nested_call": 7, + "fold_distinct_return": 540, + "generics": 3, + "global_consts": 175, + "global_nested_array_regression_9270": 526, + "global_var_entry_point_used_in_another_entry": 3, + "global_var_func_with_multiple_entry_points": 3, + "global_var_multiple_entry_points_nested": 3, + "inline_never_basic": 5, + "integer_array_indexing": 527, + "lambda_taking_lambda_with_variant": 548, + "last_uses_regression_8935": 524, + "loop": 524, + "loop_break_regression_8319": 1, + "loop_invariant_nested_deep": 2, + "loop_invariant_regression_8586": 2, + "loop_small_break": 2, + "main_return": 3, + "modules": 5, + "modules_more": 5, + "modulus": 864, + "mutate_array_copy": 1, + "negated_jmpif_condition": 5, + "negative_associated_constants": 1, + "nested_array_with_refs": 2, + "nested_array_with_refs_from_param": 3, + "nested_arrays_from_brillig": 19, + "nested_fmtstr": 1, + "no_predicates_basic": 5, + "no_predicates_brillig": 532, + "poseidon_bn254_hash_width_3": 552, + "poseidonsponge_x5_254": 608, + "pred_eq": 5, + "prelude": 1, + "reference_alias_in_array": 1, + "regression_10197": 523, + "regression_10307": 531, + "regression_10466": 1, + "regression_10516": 1, + "regression_10690": 1, + "regression_10917": 3, + "regression_10923": 5, + "regression_2660": 572, + "regression_3051": 1, + "regression_3394": 1, + "regression_3607": 596, + "regression_3889": 5, + "regression_4088": 2, + "regression_4124": 2, + "regression_4202": 550, + "regression_4663": 1, + "regression_5435": 3, + "regression_5615": 1, + "regression_6451": 11, + "regression_6674_1": 1, + "regression_6674_2": 1, + "regression_6734": 1, + "regression_6990": 1, + "regression_7143": 532, + "regression_7195": 12, + "regression_7451": 1304, + "regression_7962": 1461, + "regression_8174": 569, + "regression_8212": 524, + "regression_8235": 4, + "regression_8329": 7, + "regression_8519": 677, + "regression_8558": 657, + "regression_8739": 1, + "regression_8761": 2, + "regression_8874": 524, + "regression_8890": 6, + "regression_8926": 532, + "regression_8975": 2, + "regression_9037": 3, + "regression_9047": 521, + "regression_9102": 5, + "regression_9116": 1, + "regression_9160": 3, + "regression_9193": 2, + "regression_9206": 532, + "regression_9243": 1, + "regression_9294": 1, + "regression_9329": 523, + "regression_9546": 529, + "regression_9657": 523, + "regression_9725_1": 1, + "regression_9725_2": 2, + "regression_9907": 3, + "regression_method_cannot_be_found": 1, + "return_twice": 5, + "shift_right_overflow": 517, + "shl_signed_regression_9661": 520, + "signed_bitshift": 1, + "signed_overflow_in_else_regression_8617": 660, + "signed_truncation": 918, + "simple_2d_array": 13, + "simple_add_and_ret_arr": 3, + "simple_array_param": 4, + "simple_bitwise": 1355, + "simple_comparison": 784, + "simple_mut": 3, + "simple_not": 3, + "simple_print": 3, + "simple_program_addition": 3, + "struct": 5, + "struct_array_inputs": 8, + "struct_fields_ordering": 524, + "submodules": 4, + "trait_as_return_type": 523, + "trait_associated_constant": 1, + "trait_impl_base_type": 523, + "traits_in_crates_1": 3, + "traits_in_crates_2": 3, + "tuple_inputs": 845, + "tuples": 657, + "type_aliases": 5, + "unsafe_range_constraint": 527, + "unsigned_to_signed_cast": 918, + "while_loop_break_regression_8521": 541, + "wildcard_type": 7, + "witness_compression": 6, + "workspace_default_member": 3, + "wrapping_operations": 908, + "xor": 1367, +} + + +_MAVROS_STATUS_URL = ( + "https://raw.githubusercontent.com/reilabs/mavros/main/STATUS.md" +) +_EXEC_SUCCESS_PREFIX = "noir/test_programs/execution_success/" + + +def _fetch_live_mavros_cols() -> dict[str, int]: + """Parse Mavros Cols from the live reilabs/mavros STATUS.md. + + Returns an empty dict on any network or parse failure so the caller + can fall back gracefully to the hardcoded table. + """ + try: + with urllib.request.urlopen(_MAVROS_STATUS_URL, timeout=15) as resp: + content = resp.read().decode("utf-8") + except Exception as exc: + print( + f"Warning: could not fetch live Mavros STATUS.md ({exc}); " + "falling back to hardcoded data.", + file=sys.stderr, + ) + return {} + + result: dict[str, int] = {} + for line in content.splitlines(): + if _EXEC_SUCCESS_PREFIX not in line: + continue + parts = line.split("|") + # Table row: | test | Compiled | R1CS | Rows | Cols | ... + # After split: parts[1]=test, parts[5]=Cols (1-indexed, 0 is empty) + if len(parts) < 6: + continue + name_field = parts[1].strip() + cols_field = parts[5].strip() + if _EXEC_SUCCESS_PREFIX not in name_field: + continue + circuit = name_field.split(_EXEC_SUCCESS_PREFIX, 1)[1].strip() + if circuit and cols_field.isdigit(): + result[circuit] = int(cols_field) + + if result: + print(f"Fetched {len(result)} Mavros entries from live STATUS.md.") + else: + print("Warning: live STATUS.md parsed 0 entries; using hardcoded data.", file=sys.stderr) + return result + + +def main(csv_path: Path, out_dir: Path) -> None: + # Live data takes precedence; hardcoded fills any gaps. + live = _fetch_live_mavros_cols() + mavros_cols = {**MAVROS_COLS, **live} + + provekit: dict[str, int] = {} + with csv_path.open() as f: + for row in csv.DictReader(f): + leaf = row["test_name"].split("/")[-1] + try: + provekit[leaf] = int(row["provekit_witnesses"]) + except (ValueError, KeyError): + continue + + comparable = [ + (name, mavros, provekit[name]) + for name, mavros in sorted(mavros_cols.items()) + if name in provekit + ] + + equal = sum(1 for _, m, p in comparable if m == p) + mavros_better = sum(1 for _, m, p in comparable if m < p) + provekit_better = sum(1 for _, m, p in comparable if p < m) + + lines = [ + "# Mavros vs Provekit Witnesses Count", + "", + f"Comparable {len(comparable)} circuits: {equal} equal, " + f"{mavros_better} Mavros better, {provekit_better} Provekit better.", + "", + "| Test | Mavros Cols | Provekit Post-GE | Delta | Better | Factor |", + "|------|-------------|------------------|-------|--------|--------|", + ] + + for name, mavros, pk in comparable: + delta = pk - mavros + delta_str = f"+{delta}" if delta > 0 else str(delta) + if mavros == pk: + better = "equal" + factor = "1.00x" + elif pk < mavros: + better = "provekit" + factor = f"{mavros / pk:.2f}x" + else: + better = "mavros" + factor = f"{pk / mavros:.2f}x" + lines.append(f"| {name} | {mavros} | {pk} | {delta_str} | {better} | {factor} |") + + out_path = out_dir / "witness_comparison.md" + out_path.write_text("\n".join(lines) + "\n") + print(f"Wrote {out_path} ({len(comparable)} circuits compared)") + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + main(Path(sys.argv[1]), Path(sys.argv[2])) diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh index 09b7deca0..a58c4ff49 100755 --- a/scripts/run_noir_execution_success.sh +++ b/scripts/run_noir_execution_success.sh @@ -123,6 +123,8 @@ fi mkdir -p "${LOG_DIR}/per_test" GROUPED_REPORT_FILE="${LOG_DIR}/grouped_error_report.txt" +WITNESS_CSV="${LOG_DIR}/provekit_witness_counts.csv" +echo "test_name,provekit_witnesses" > "${WITNESS_CSV}" shopt -s nullglob globstar @@ -442,6 +444,13 @@ for test_name in "${test_dirs[@]}"; do fi append_stage_marker "${test_log}" "provekit-cli prepare" "PASS" + # Extract ProveKit post-GE witness count before the log is deleted on success + _ge_line=$(grep -o 'After GE optimization: [0-9]* constraints, [0-9]* witnesses' "${test_log}" | tail -1) + _pk_witnesses=$(echo "${_ge_line}" | grep -o '[0-9]* witnesses' | grep -o '^[0-9]*') + if [[ -n "${_pk_witnesses}" ]]; then + echo "${test_name},${_pk_witnesses}" >> "${WITNESS_CSV}" + fi + append_stage_marker "${test_log}" "provekit-cli prove" "START" if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prove "./prover.pkp" "./${prover_toml_rel}" -o "./proof.np" >> "${test_log}" 2>&1); then append_stage_marker "${test_log}" "provekit-cli prove" "FAIL" @@ -603,6 +612,20 @@ fi echo "Grouped report : ${GROUPED_REPORT_FILE}" +# Generate Mavros vs ProveKit witness comparison table +if [[ -f "${WITNESS_CSV}" ]] && python3 "${SCRIPT_DIR}/generate_witness_comparison.py" "${WITNESS_CSV}" "${LOG_DIR}"; then + echo "Witness comparison: ${LOG_DIR}/witness_comparison.md" + if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then + { + echo "" + echo "## Mavros vs ProveKit Witness Count" + head -4 "${LOG_DIR}/witness_comparison.md" + echo "" + echo "_Full table available in artifact: \`witness_comparison.md\`_" + } >> "${GITHUB_STEP_SUMMARY}" + fi +fi + if [[ "${failed}" -gt 0 ]]; then exit 1 fi From 688d761926a6785c417f3b88a446089f38d7844b Mon Sep 17 00:00:00 2001 From: x-senpai-x Date: Tue, 21 Apr 2026 11:55:46 +0530 Subject: [PATCH 04/16] made v19 defauilt in vendor script --- scripts/run_noir_execution_success.sh | 10 +++++++--- scripts/vendor_noir_execution_success.sh | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh index a58c4ff49..71dc4b34d 100755 --- a/scripts/run_noir_execution_success.sh +++ b/scripts/run_noir_execution_success.sh @@ -444,9 +444,13 @@ for test_name in "${test_dirs[@]}"; do fi append_stage_marker "${test_log}" "provekit-cli prepare" "PASS" - # Extract ProveKit post-GE witness count before the log is deleted on success - _ge_line=$(grep -o 'After GE optimization: [0-9]* constraints, [0-9]* witnesses' "${test_log}" | tail -1) - _pk_witnesses=$(echo "${_ge_line}" | grep -o '[0-9]* witnesses' | grep -o '^[0-9]*') + # Extract ProveKit post-GE witness count before the log is deleted on success. + # Keep this non-fatal under `set -euo pipefail` if the log format changes/misses. + _ge_line="$(grep -o 'After GE optimization: [0-9]* constraints, [0-9]* witnesses' "${test_log}" | tail -1 || true)" + _pk_witnesses="" + if [[ "${_ge_line}" =~ ([0-9]+)\ witnesses$ ]]; then + _pk_witnesses="${BASH_REMATCH[1]}" + fi if [[ -n "${_pk_witnesses}" ]]; then echo "${test_name},${_pk_witnesses}" >> "${WITNESS_CSV}" fi diff --git a/scripts/vendor_noir_execution_success.sh b/scripts/vendor_noir_execution_success.sh index 2fd2dddcc..9bdfae962 100755 --- a/scripts/vendor_noir_execution_success.sh +++ b/scripts/vendor_noir_execution_success.sh @@ -5,7 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" DEST_EXEC_DIR="${REPO_ROOT}/test-programs/noir/execution_success" DEST_LIB_DIR="${REPO_ROOT}/test-programs/noir/test_libraries" -NOIR_REF="${NOIR_REF:-master}" +NOIR_REF="${NOIR_REF:-v1.0.0-beta.19}" tmpdir="$(mktemp -d)" cleanup() { From 4d24b89bc1a741647af5ec7398dbb5c822048035 Mon Sep 17 00:00:00 2001 From: x-senpai-x Date: Tue, 21 Apr 2026 12:55:57 +0530 Subject: [PATCH 05/16] added missing tests in one of them as well --- scripts/generate_witness_comparison.py | 76 ++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/scripts/generate_witness_comparison.py b/scripts/generate_witness_comparison.py index fdbb6ada0..e3619d4ad 100644 --- a/scripts/generate_witness_comparison.py +++ b/scripts/generate_witness_comparison.py @@ -217,6 +217,37 @@ ) _EXEC_SUCCESS_PREFIX = "noir/test_programs/execution_success/" +# Keep in sync with SKIP_TESTS in scripts/run_noir_execution_success.sh. +# These are intentionally excluded from both sides of the witness comparison. +SKIP_TESTS: set[str] = { + # BLAKE3 + "a_6", + "array_dynamic_blackbox_input", + "array_dynamic_nested_blackbox_input", + "blake3", + "conditional_1", + "conditional_regression_short_circuit", + "regression_4449", + # ECDSA_SECP256K1 + "bench_ecdsa_secp256k1", + "ecdsa_secp256k1", + "ecdsa_secp256k1_invalid_inputs", + "ecdsa_secp256k1_invalid_pub_key_in_inactive_branch", + # ECDSA_SECP256R1 + "ecdsa_secp256r1", + "ecdsa_secp256r1_3x", + "ecdsa_secp256r1_invalid_pub_key_in_inactive_branch", + "ecdsa_secp256r1_msg_equals_order", + # EMBEDDED_CURVE_ADD + "embedded_curve_ops", + "regression_5045", + "regression_7744", + # AES128_ENCRYPT + "aes128_encrypt", + # BLAKE2S + "a_7", +} + def _fetch_live_mavros_cols() -> dict[str, int]: """Parse Mavros Cols from the live reilabs/mavros STATUS.md. @@ -262,22 +293,31 @@ def _fetch_live_mavros_cols() -> dict[str, int]: def main(csv_path: Path, out_dir: Path) -> None: # Live data takes precedence; hardcoded fills any gaps. live = _fetch_live_mavros_cols() - mavros_cols = {**MAVROS_COLS, **live} + mavros_cols = { + name: cols + for name, cols in {**MAVROS_COLS, **live}.items() + if name not in SKIP_TESTS + } provekit: dict[str, int] = {} with csv_path.open() as f: for row in csv.DictReader(f): leaf = row["test_name"].split("/")[-1] + if leaf in SKIP_TESTS: + continue try: provekit[leaf] = int(row["provekit_witnesses"]) except (ValueError, KeyError): continue + all_names = sorted(set(mavros_cols) | set(provekit)) comparable = [ - (name, mavros, provekit[name]) - for name, mavros in sorted(mavros_cols.items()) - if name in provekit + (name, mavros_cols[name], provekit[name]) + for name in all_names + if name in mavros_cols and name in provekit ] + missing_in_provekit = sum(1 for name in all_names if name in mavros_cols and name not in provekit) + missing_in_mavros = sum(1 for name in all_names if name in provekit and name not in mavros_cols) equal = sum(1 for _, m, p in comparable if m == p) mavros_better = sum(1 for _, m, p in comparable if m < p) @@ -286,14 +326,27 @@ def main(csv_path: Path, out_dir: Path) -> None: lines = [ "# Mavros vs Provekit Witnesses Count", "", - f"Comparable {len(comparable)} circuits: {equal} equal, " - f"{mavros_better} Mavros better, {provekit_better} Provekit better.", + f"Union {len(all_names)} circuits: {len(comparable)} comparable, " + f"{missing_in_provekit} missing in Provekit, {missing_in_mavros} missing in Mavros.", + f"Among comparable: {equal} equal, {mavros_better} Mavros better, " + f"{provekit_better} Provekit better.", "", "| Test | Mavros Cols | Provekit Post-GE | Delta | Better | Factor |", "|------|-------------|------------------|-------|--------|--------|", ] - for name, mavros, pk in comparable: + for name in all_names: + mavros = mavros_cols.get(name) + pk = provekit.get(name) + + if mavros is None: + lines.append(f"| {name} | - | {pk} | - | missing_mavros | - |") + continue + + if pk is None: + lines.append(f"| {name} | {mavros} | - | - | missing_provekit | - |") + continue + delta = pk - mavros delta_str = f"+{delta}" if delta > 0 else str(delta) if mavros == pk: @@ -301,15 +354,18 @@ def main(csv_path: Path, out_dir: Path) -> None: factor = "1.00x" elif pk < mavros: better = "provekit" - factor = f"{mavros / pk:.2f}x" + factor = "inf" if pk == 0 else f"{mavros / pk:.2f}x" else: better = "mavros" - factor = f"{pk / mavros:.2f}x" + factor = "inf" if mavros == 0 else f"{pk / mavros:.2f}x" lines.append(f"| {name} | {mavros} | {pk} | {delta_str} | {better} | {factor} |") out_path = out_dir / "witness_comparison.md" out_path.write_text("\n".join(lines) + "\n") - print(f"Wrote {out_path} ({len(comparable)} circuits compared)") + print( + f"Wrote {out_path} " + f"({len(all_names)} total circuits, {len(comparable)} comparable)" + ) if __name__ == "__main__": From fe608a9ab533ebb8ed0689bcdb8a1f4188d5e885 Mon Sep 17 00:00:00 2001 From: x-senpai-x Date: Tue, 21 Apr 2026 23:56:32 +0530 Subject: [PATCH 06/16] refactored shell script --- .github/scripts/build_noir_pr_comment.py | 221 ++++++++++++++++ .github/workflows/noir-execution-success.yml | 77 +++++- scripts/generate_witness_comparison.py | 36 +-- scripts/noir_execution_helpers.py | 258 ++++++++++++++++++ scripts/noir_skip_tests.txt | 38 +++ scripts/run_noir_execution_success.sh | 265 ++----------------- 6 files changed, 623 insertions(+), 272 deletions(-) create mode 100644 .github/scripts/build_noir_pr_comment.py create mode 100644 scripts/noir_execution_helpers.py create mode 100644 scripts/noir_skip_tests.txt diff --git a/.github/scripts/build_noir_pr_comment.py b/.github/scripts/build_noir_pr_comment.py new file mode 100644 index 000000000..bd760a354 --- /dev/null +++ b/.github/scripts/build_noir_pr_comment.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +"""Build a sticky PR comment for noir execution_success workflow runs.""" + +from __future__ import annotations + +import argparse +import re +from pathlib import Path + +MARKER = "" +MAX_COMMENT_CHARS = 62000 +MIN_SECTION_CHARS = 1500 + + +def read_report(path: Path, display_name: str) -> str: + if not path.is_file(): + return f"(missing: {display_name})" + + text = path.read_text(encoding="utf-8", errors="replace").strip() + if not text: + return f"(empty: {display_name})" + return text + + +def parse_grouped_counts(grouped_report_text: str) -> dict[str, str]: + counts: dict[str, str] = {} + for key in ("PASS", "FAIL", "SKIP"): + match = re.search(rf"^{key}=(\d+)$", grouped_report_text, flags=re.MULTILINE) + counts[key] = match.group(1) if match else "n/a" + return counts + + +def status_with_icon(status: str) -> str: + normalized = (status or "unknown").strip().lower() + labels = { + "success": "[PASS]", + "failure": "[FAIL]", + "cancelled": "[CANCELLED]", + "skipped": "[SKIPPED]", + } + return f"{labels.get(normalized, '[INFO]')} {normalized}" + + +def sanitize_code_fence(text: str) -> str: + return text.replace("```", "``\\`") + + +def compose_comment( + grouped_report_text: str, + witness_report_text: str, + grouped_truncated: bool, + witness_truncated: bool, + run_id: str, + run_url: str, + sha: str, + noir_ref: str, + status: str, +) -> str: + counts = parse_grouped_counts(grouped_report_text) + short_sha = sha[:12] if sha else "unknown" + + grouped_truncated_note = ( + "\n_Grouped report truncated to fit GitHub comment size limits._\n" + if grouped_truncated + else "" + ) + witness_truncated_note = ( + "\n_Witness comparison truncated to fit GitHub comment size limits._\n" + if witness_truncated + else "" + ) + + lines = [ + MARKER, + "## Noir execution_success report", + "", + "| Metric | Value |", + "|--------|-------|", + f"| Workflow status | {status_with_icon(status)} |", + f"| Noir ref | `{noir_ref}` |", + f"| Commit | `{short_sha}` |", + f"| Run | [#{run_id}]({run_url}) |", + f"| PASS | {counts['PASS']} |", + f"| FAIL | {counts['FAIL']} |", + f"| SKIP | {counts['SKIP']} |", + "", + "
", + "grouped_error_report.txt", + "", + "```text", + sanitize_code_fence(grouped_report_text), + "```", + grouped_truncated_note, + "
", + "", + "
", + "witness_comparison.md", + "", + witness_report_text, + witness_truncated_note, + "
", + "", + "_This comment is automatically updated by the Noir Execution Success workflow._", + "", + ] + + return "\n".join(lines) + + +def clip_tail(text: str, min_chars: int, excess: int, label: str) -> tuple[str, bool]: + if len(text) <= min_chars or excess <= 0: + return text, False + + reduction = min(len(text) - min_chars, excess + 1024) + kept = text[: len(text) - reduction].rstrip() + omitted = len(text) - len(kept) + clipped = f"{kept}\n\n[... truncated {omitted} characters from {label} ...]" + return clipped, True + + +def build_with_truncation( + grouped_report_text: str, + witness_report_text: str, + run_id: str, + run_url: str, + sha: str, + noir_ref: str, + status: str, +) -> str: + grouped_work = grouped_report_text + witness_work = witness_report_text + grouped_truncated = False + witness_truncated = False + + for _ in range(128): + comment = compose_comment( + grouped_work, + witness_work, + grouped_truncated=grouped_truncated, + witness_truncated=witness_truncated, + run_id=run_id, + run_url=run_url, + sha=sha, + noir_ref=noir_ref, + status=status, + ) + if len(comment) <= MAX_COMMENT_CHARS: + return comment + + excess = len(comment) - MAX_COMMENT_CHARS + witness_work, witness_changed = clip_tail( + witness_work, MIN_SECTION_CHARS, excess, "witness_comparison.md" + ) + witness_truncated = witness_truncated or witness_changed + if witness_changed: + continue + + grouped_work, grouped_changed = clip_tail( + grouped_work, MIN_SECTION_CHARS, excess, "grouped_error_report.txt" + ) + grouped_truncated = grouped_truncated or grouped_changed + if grouped_changed: + continue + + break + + # Final hard guard if both reports are already near minimum length. + fallback = compose_comment( + grouped_work, + witness_work, + grouped_truncated=True, + witness_truncated=True, + run_id=run_id, + run_url=run_url, + sha=sha, + noir_ref=noir_ref, + status=status, + ) + if len(fallback) <= MAX_COMMENT_CHARS: + return fallback + + hard_cut = fallback[: MAX_COMMENT_CHARS - 120].rstrip() + return f"{hard_cut}\n\n_Comment truncated due to GitHub size limits._\n" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--grouped-report", required=True, type=Path) + parser.add_argument("--witness-report", required=True, type=Path) + parser.add_argument("--output", required=True, type=Path) + parser.add_argument("--run-id", required=True) + parser.add_argument("--run-url", required=True) + parser.add_argument("--sha", required=True) + parser.add_argument("--noir-ref", required=True) + parser.add_argument("--status", required=True) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + grouped_report_text = read_report(args.grouped_report, "grouped_error_report.txt") + witness_report_text = read_report(args.witness_report, "witness_comparison.md") + + body = build_with_truncation( + grouped_report_text=grouped_report_text, + witness_report_text=witness_report_text, + run_id=args.run_id, + run_url=args.run_url, + sha=args.sha, + noir_ref=args.noir_ref, + status=args.status, + ) + + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(body, encoding="utf-8") + print(f"Wrote PR comment body to {args.output} ({len(body)} chars)") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml index 3e6cd267b..5def5ac96 100644 --- a/.github/workflows/noir-execution-success.yml +++ b/.github/workflows/noir-execution-success.yml @@ -2,6 +2,7 @@ name: Noir Execution Success Tests # Provide a noir_ref to test against any Noir release. on: + pull_request: workflow_dispatch: inputs: noir_ref: @@ -9,9 +10,13 @@ on: required: false default: "v1.0.0-beta.19" +permissions: + contents: read + pull-requests: write + env: CARGO_TERM_COLOR: always - NOIR_REF: ${{ inputs.noir_ref || 'v1.0.0-beta.19' }} + NOIR_REF: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.noir_ref != '' && github.event.inputs.noir_ref || 'v1.0.0-beta.19') || 'v1.0.0-beta.19' }} # Cancel any in-progress run on the same branch when a new one is triggered. concurrency: @@ -20,7 +25,7 @@ concurrency: jobs: noir-execution-success: - name: Noir execution_success suite (${{ inputs.noir_ref || 'v1.0.0-beta.19' }}) + name: Noir execution_success suite (${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.noir_ref != '' && github.event.inputs.noir_ref || 'v1.0.0-beta.19') || 'v1.0.0-beta.19' }}) runs-on: [self-hosted, Linux, ARM64, provekit-build] steps: @@ -75,6 +80,74 @@ jobs: path: noir-execution-logs/ retention-days: 7 + - name: Build sticky PR comment body + if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository + continue-on-error: true + run: | + python3 .github/scripts/build_noir_pr_comment.py \ + --grouped-report "noir-execution-logs/grouped_error_report.txt" \ + --witness-report "noir-execution-logs/witness_comparison.md" \ + --output "noir-execution-logs/pr_comment.md" \ + --run-id "${{ github.run_id }}" \ + --run-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ + --sha "${{ github.sha }}" \ + --noir-ref "${{ env.NOIR_REF }}" \ + --status "${{ job.status }}" + + - name: Upsert sticky PR report comment + if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository + continue-on-error: true + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const marker = ''; + const bodyPath = 'noir-execution-logs/pr_comment.md'; + const fallbackBody = [ + marker, + '## Noir execution_success report', + '', + 'Unable to generate the detailed report body for this run.', + '', + 'Run: [#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})', + ].join('\n'); + const body = fs.existsSync(bodyPath) ? fs.readFileSync(bodyPath, 'utf8') : fallbackBody; + + const { owner, repo } = context.repo; + const issue_number = context.issue.number; + const comments = await github.paginate(github.rest.issues.listComments, { + owner, + repo, + issue_number, + per_page: 100, + }); + + const existing = comments.find((comment) => + comment.user && + comment.user.login === 'github-actions[bot]' && + comment.body && + comment.body.includes(marker) + ); + + if (existing) { + await github.rest.issues.updateComment({ + owner, + repo, + comment_id: existing.id, + body, + }); + core.info(`Updated existing noir sticky comment (id=${existing.id}).`); + } else { + const created = await github.rest.issues.createComment({ + owner, + repo, + issue_number, + body, + }); + core.info(`Created new noir sticky comment (id=${created.data.id}).`); + } + # Always clean up the temp clone, even if the test step failed. - name: Cleanup noir clone if: always() diff --git a/scripts/generate_witness_comparison.py b/scripts/generate_witness_comparison.py index e3619d4ad..0eda181e8 100644 --- a/scripts/generate_witness_comparison.py +++ b/scripts/generate_witness_comparison.py @@ -14,6 +14,8 @@ import urllib.request from pathlib import Path +from noir_execution_helpers import load_skip_tests + # Mavros Cols — extracted from reilabs/mavros STATUS.md (column "Cols"), # noir/test_programs/execution_success/* rows only. Keys are bare circuit names. MAVROS_COLS: dict[str, int] = { @@ -217,36 +219,10 @@ ) _EXEC_SUCCESS_PREFIX = "noir/test_programs/execution_success/" -# Keep in sync with SKIP_TESTS in scripts/run_noir_execution_success.sh. -# These are intentionally excluded from both sides of the witness comparison. -SKIP_TESTS: set[str] = { - # BLAKE3 - "a_6", - "array_dynamic_blackbox_input", - "array_dynamic_nested_blackbox_input", - "blake3", - "conditional_1", - "conditional_regression_short_circuit", - "regression_4449", - # ECDSA_SECP256K1 - "bench_ecdsa_secp256k1", - "ecdsa_secp256k1", - "ecdsa_secp256k1_invalid_inputs", - "ecdsa_secp256k1_invalid_pub_key_in_inactive_branch", - # ECDSA_SECP256R1 - "ecdsa_secp256r1", - "ecdsa_secp256r1_3x", - "ecdsa_secp256r1_invalid_pub_key_in_inactive_branch", - "ecdsa_secp256r1_msg_equals_order", - # EMBEDDED_CURVE_ADD - "embedded_curve_ops", - "regression_5045", - "regression_7744", - # AES128_ENCRYPT - "aes128_encrypt", - # BLAKE2S - "a_7", -} +# Skip list is shared with scripts/run_noir_execution_success.sh via +# scripts/noir_skip_tests.txt; these circuits are excluded from both sides +# of the witness comparison. +SKIP_TESTS: set[str] = load_skip_tests() def _fetch_live_mavros_cols() -> dict[str, int]: diff --git a/scripts/noir_execution_helpers.py b/scripts/noir_execution_helpers.py new file mode 100644 index 000000000..c18288a88 --- /dev/null +++ b/scripts/noir_execution_helpers.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +"""Helpers for scripts/run_noir_execution_success.sh. + +Subcommands: + discover — list runnable test dirs + resolve-prover-toml — find Prover.toml for a package + package-name — read [package].name from Nargo.toml + build-report — write grouped_error_report.txt + skip-tests — print the skip list (one per line) + +The skip list lives in scripts/noir_skip_tests.txt and is the single source +of truth shared with scripts/generate_witness_comparison.py. +""" + +from __future__ import annotations + +import argparse +import os +import re +import sys +import tomllib +from collections import defaultdict +from pathlib import Path + +SKIP_LIST_FILE = Path(__file__).with_name("noir_skip_tests.txt") + + +def load_skip_tests() -> set[str]: + """Return the skip list parsed from noir_skip_tests.txt. + + Blank lines and lines starting with `#` are ignored. Inline `#` comments + are stripped. Returns an empty set if the file is missing. + """ + if not SKIP_LIST_FILE.is_file(): + return set() + names: set[str] = set() + for raw in SKIP_LIST_FILE.read_text().splitlines(): + line = raw.split("#", 1)[0].strip() + if line: + names.add(line) + return names + + +def discover_tests(root: Path) -> list[str]: + """Return candidate test project paths relative to ``root``. + + Mirrors the legacy shell heredoc: a path is a candidate if it is a + workspace default-member, or if it has both a `[package]` entry in its + Nargo.toml and a sibling Prover.toml. Nested projects under a workspace + default-member are suppressed. + """ + nargo_data: dict[str, dict] = {} + for nargo in root.rglob("Nargo.toml"): + rel = nargo.parent.relative_to(root).as_posix() + try: + data = tomllib.loads(nargo.read_text()) + except Exception: + data = {} + nargo_data[rel] = data + + workspace_default_roots: set[str] = set() + for rel, data in nargo_data.items(): + ws = data.get("workspace") + if isinstance(ws, dict) and "default-member" in ws: + workspace_default_roots.add(rel) + + suppressed: set[str] = set() + for ws_rel in workspace_default_roots: + ws_path = Path(ws_rel) if ws_rel != "." else Path() + for rel in nargo_data: + rel_path = Path(rel) if rel != "." else Path() + if rel_path != ws_path and ws_path in rel_path.parents: + suppressed.add(rel) + + candidates: set[str] = set(workspace_default_roots) + for rel, data in nargo_data.items(): + if rel in suppressed: + continue + pkg = data.get("package") + if isinstance(pkg, dict) and "name" in pkg: + if (root / rel / "Prover.toml").is_file(): + candidates.add(rel) + + return sorted(candidates) + + +def resolve_prover_toml(project_dir: Path, package_name: str) -> str: + """Return Prover.toml path (relative to ``project_dir``) for ``package_name``. + + Prefers a Prover.toml located next to the Nargo.toml whose package name + matches. Falls back to a root-level Prover.toml, then to the sole + Prover.toml under the project when unambiguous. Returns "" otherwise. + """ + matches: list[str] = [] + for nargo in sorted(project_dir.rglob("Nargo.toml")): + try: + data = tomllib.loads(nargo.read_text()) + except Exception: + continue + pkg = data.get("package") + if not isinstance(pkg, dict) or pkg.get("name") != package_name: + continue + prover = nargo.parent / "Prover.toml" + if prover.is_file(): + matches.append(prover.relative_to(project_dir).as_posix()) + + if matches: + matches.sort(key=lambda p: (p.count("/"), p)) + return matches[0] + + root_prover = project_dir / "Prover.toml" + if root_prover.is_file(): + return "Prover.toml" + + all_provers = sorted(project_dir.rglob("Prover.toml")) + if len(all_provers) == 1: + return all_provers[0].relative_to(project_dir).as_posix() + + return "" + + +def read_package_name(project_dir: Path) -> str: + """Return [package].name from ``project_dir/Nargo.toml`` or "".""" + nargo = project_dir / "Nargo.toml" + if not nargo.is_file(): + return "" + try: + data = tomllib.loads(nargo.read_text()) + except Exception: + return "" + pkg = data.get("package") + if isinstance(pkg, dict): + return str(pkg.get("name", "")) + return "" + + +_BLACKBOX_RE = re.compile( + r"not implemented: Other black box function: BLACKBOX::([A-Z0-9_]+)" +) +_PANIC_RE = re.compile(r"panicked at [^\n]*:\n([^\n]+)") +_SOLVE_RE = re.compile(r"Failed to solve program: '([^']+)'") +_COMPILE_ERR_RE = re.compile(r"^error:\s*([^\n]+)", flags=re.M) +_COMPILE_BUG_RE = re.compile(r"^bug:\s*([^\n]+)", flags=re.M) +_GENERIC_ERR_RE = re.compile(r"^Error:\s*([^\n]+)", flags=re.M) +_FAIL_STAGE_RE = re.compile(r"FAIL: ([^\n]+)") +_SKIP_REASON_RE = re.compile(r"SKIP: ([^\n]+)") + + +def _classify_failure(text: str, stage: str) -> str: + blackbox = _BLACKBOX_RE.search(text) + if blackbox: + return f"Not implemented blackbox: {blackbox.group(1)} ({stage})" + if "Program must have one entry point." in text: + return f"Program must have one entry point ({stage})" + panic = _PANIC_RE.search(text) + if panic: + return f"Panic: {panic.group(1).strip()} ({stage})" + solve = _SOLVE_RE.search(text) + if solve: + return f"Failed to solve program: {solve.group(1)} ({stage})" + if "Failed assertion" in text: + return f"Failed assertion ({stage})" + compile_error = _COMPILE_ERR_RE.search(text) + if compile_error: + return f"Compile error: {compile_error.group(1).strip()} ({stage})" + compile_bug = _COMPILE_BUG_RE.search(text) + if compile_bug: + return f"Compile bug: {compile_bug.group(1).strip()} ({stage})" + generic = _GENERIC_ERR_RE.search(text) + if generic: + return f"Error: {generic.group(1).strip()} ({stage})" + return f"Unknown failure ({stage})" + + +def build_grouped_report(log_dir: Path, passed_count: int) -> None: + """Scan ``log_dir/per_test/*.log`` and write ``log_dir/grouped_error_report.txt``. + + PASS logs are deleted by the shell runner after each successful test, so + the PASS count is threaded in as ``passed_count`` rather than inferred. + """ + per_test_dir = log_dir / "per_test" + report_file = log_dir / "grouped_error_report.txt" + + logs = sorted(per_test_dir.glob("*.log")) + status_counts = {"PASS": passed_count, "FAIL": 0, "SKIP": 0} + grouped: dict[str, list[str]] = defaultdict(list) + stage_groups: dict[str, list[str]] = defaultdict(list) + + for fp in logs: + text = fp.read_text(errors="replace") + name = fp.stem + + if "SKIP:" in text: + status_counts["SKIP"] += 1 + skip_match = _SKIP_REASON_RE.search(text) + reason = skip_match.group(1).strip() if skip_match else "unknown" + grouped[f"SKIP: {reason}"].append(name) + continue + + status_counts["FAIL"] += 1 + fail_stages = _FAIL_STAGE_RE.findall(text) + stage = fail_stages[-1].strip() if fail_stages else "unknown stage" + stage_groups[stage].append(name) + grouped[_classify_failure(text, stage)].append(name) + + with report_file.open("w") as f: + f.write(f"logs={len(logs)}\n") + f.write(f"PASS={status_counts['PASS']}\n") + f.write(f"FAIL={status_counts['FAIL']}\n") + f.write(f"SKIP={status_counts['SKIP']}\n") + f.write("\n[stages]\n") + for stage, tests in sorted(stage_groups.items(), key=lambda kv: (-len(kv[1]), kv[0])): + f.write(f"{stage}\t{len(tests)}\t{', '.join(tests)}\n") + f.write("\n[grouped]\n") + for key, tests in sorted(grouped.items(), key=lambda kv: (-len(kv[1]), kv[0])): + f.write(f"{len(tests)}\t{key}\t{', '.join(tests)}\n") + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + p = sub.add_parser("discover", help="list runnable test dirs under ") + p.add_argument("test_root", type=Path) + + p = sub.add_parser("resolve-prover-toml") + p.add_argument("project_dir", type=Path) + p.add_argument("package_name") + + p = sub.add_parser("package-name") + p.add_argument("project_dir", type=Path) + + p = sub.add_parser("build-report") + p.add_argument("log_dir", type=Path) + p.add_argument("passed_count", type=int) + + sub.add_parser("skip-tests", help="print the skip list, one name per line") + + args = parser.parse_args() + + if args.cmd == "discover": + for name in discover_tests(args.test_root): + print(name) + elif args.cmd == "resolve-prover-toml": + print(resolve_prover_toml(args.project_dir, args.package_name)) + elif args.cmd == "package-name": + print(read_package_name(args.project_dir)) + elif args.cmd == "build-report": + build_grouped_report(args.log_dir, args.passed_count) + elif args.cmd == "skip-tests": + for name in sorted(load_skip_tests()): + print(name) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/noir_skip_tests.txt b/scripts/noir_skip_tests.txt new file mode 100644 index 000000000..31b26c337 --- /dev/null +++ b/scripts/noir_skip_tests.txt @@ -0,0 +1,38 @@ +# Tests that use blackbox functions not yet supported by provekit. +# Counted as SKIP (not FAIL) by scripts/run_noir_execution_success.sh +# and excluded from scripts/generate_witness_comparison.py. +# Remove entries here once the corresponding blackbox is supported. +# +# Format: one bare test name per line. Blank lines and `#` comments are ignored. + +# BLAKE3 +a_6 +array_dynamic_blackbox_input +array_dynamic_nested_blackbox_input +blake3 +conditional_1 +conditional_regression_short_circuit +regression_4449 + +# ECDSA_SECP256K1 +bench_ecdsa_secp256k1 +ecdsa_secp256k1 +ecdsa_secp256k1_invalid_inputs +ecdsa_secp256k1_invalid_pub_key_in_inactive_branch + +# ECDSA_SECP256R1 +ecdsa_secp256r1 +ecdsa_secp256r1_3x +ecdsa_secp256r1_invalid_pub_key_in_inactive_branch +ecdsa_secp256r1_msg_equals_order + +# EMBEDDED_CURVE_ADD +embedded_curve_ops +regression_5045 +regression_7744 + +# AES128_ENCRYPT +aes128_encrypt + +# BLAKE2S +a_7 diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh index 71dc4b34d..930d606fb 100755 --- a/scripts/run_noir_execution_success.sh +++ b/scripts/run_noir_execution_success.sh @@ -20,6 +20,8 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +HELPER="${SCRIPT_DIR}/noir_execution_helpers.py" +SKIP_LIST_FILE="${SCRIPT_DIR}/noir_skip_tests.txt" # --------------------------------------------------------------------------- # Resolve test corpus root (CI clone vs. local vendored copy) @@ -47,43 +49,24 @@ fi # --------------------------------------------------------------------------- # Unimplemented-blackbox skip list -# These tests use blackbox functions not yet supported by provekit. -# They are counted as SKIP (not FAIL) and will be added back once supported. +# Single source of truth: scripts/noir_skip_tests.txt (shared with +# scripts/generate_witness_comparison.py). Counted as SKIP (not FAIL). # --------------------------------------------------------------------------- -SKIP_TESTS=( - # BLAKE3 - a_6 - array_dynamic_blackbox_input - array_dynamic_nested_blackbox_input - blake3 - conditional_1 - conditional_regression_short_circuit - regression_4449 - # ECDSA_SECP256K1 - bench_ecdsa_secp256k1 - ecdsa_secp256k1 - ecdsa_secp256k1_invalid_inputs - ecdsa_secp256k1_invalid_pub_key_in_inactive_branch - # ECDSA_SECP256R1 - ecdsa_secp256r1 - ecdsa_secp256r1_3x - ecdsa_secp256r1_invalid_pub_key_in_inactive_branch - ecdsa_secp256r1_msg_equals_order - # EMBEDDED_CURVE_ADD - embedded_curve_ops - regression_5045 - regression_7744 - # AES128_ENCRYPT - aes128_encrypt - # BLAKE2S - a_7 -) - -# Build a fast associative-array lookup +SKIP_TESTS=() declare -A SKIP_SET -for _t in "${SKIP_TESTS[@]}"; do - SKIP_SET["${_t}"]=1 -done +if [[ -f "${SKIP_LIST_FILE}" ]]; then + while IFS= read -r _raw || [[ -n "${_raw}" ]]; do + _name="${_raw%%#*}" + _name="${_name#"${_name%%[![:space:]]*}"}" + _name="${_name%"${_name##*[![:space:]]}"}" + if [[ -n "${_name}" ]]; then + SKIP_TESTS+=("${_name}") + SKIP_SET["${_name}"]=1 + fi + done < "${SKIP_LIST_FILE}" +else + echo "WARNING: skip list ${SKIP_LIST_FILE} not found; no tests will be skipped." >&2 +fi if [[ ! -d "${TEST_ROOT}" ]]; then echo "ERROR: Missing test corpus at ${TEST_ROOT}" @@ -128,134 +111,22 @@ echo "test_name,provekit_witnesses" > "${WITNESS_CSV}" shopt -s nullglob globstar +# Python helpers live in scripts/noir_execution_helpers.py; these are thin +# shell wrappers so the main loop reads naturally. discover_test_dirs() { - TEST_ROOT="${TEST_ROOT}" python3 - <<'PY' -from pathlib import Path -import tomllib -import os - -root = Path(os.environ["TEST_ROOT"]) -nargo_data = {} - -for nargo in root.rglob("Nargo.toml"): - rel = nargo.parent.relative_to(root).as_posix() - try: - data = tomllib.loads(nargo.read_text()) - except Exception: - data = {} - nargo_data[rel] = data - -workspace_default_roots = set() -for rel, data in nargo_data.items(): - ws = data.get("workspace") - if isinstance(ws, dict) and "default-member" in ws: - workspace_default_roots.add(rel) - -suppressed = set() -for ws_rel in workspace_default_roots: - ws_path = Path(ws_rel) if ws_rel != "." else Path() - for rel in nargo_data: - rel_path = Path(rel) if rel != "." else Path() - if rel_path != ws_path and ws_path in rel_path.parents: - suppressed.add(rel) - -candidates = set(workspace_default_roots) -for rel, data in nargo_data.items(): - if rel in suppressed: - continue - - pkg = data.get("package") - if isinstance(pkg, dict) and "name" in pkg: - if (root / rel / "Prover.toml").is_file(): - candidates.add(rel) - -for rel in sorted(candidates): - print(rel) -PY + python3 "${HELPER}" discover "${TEST_ROOT}" } resolve_prover_toml() { - local project_dir="$1" - local package_name="$2" - - PROJECT_DIR="${project_dir}" PACKAGE_NAME="${package_name}" python3 - <<'PY' -from pathlib import Path -import tomllib -import os - -project_dir = Path(os.environ["PROJECT_DIR"]) -package_name = os.environ["PACKAGE_NAME"] - -candidates = [] -for nargo in sorted(project_dir.rglob("Nargo.toml")): - try: - data = tomllib.loads(nargo.read_text()) - except Exception: - continue - - pkg = data.get("package") - if not isinstance(pkg, dict): - continue - - if pkg.get("name") != package_name: - continue - - prover = nargo.parent / "Prover.toml" - if prover.is_file(): - candidates.append(prover.relative_to(project_dir).as_posix()) - -if candidates: - candidates.sort(key=lambda p: (p.count("/"), p)) - print(candidates[0]) - raise SystemExit(0) - -root_prover = project_dir / "Prover.toml" -if root_prover.is_file(): - print("Prover.toml") - raise SystemExit(0) - -all_provers = sorted(project_dir.rglob("Prover.toml")) -if len(all_provers) == 1: - print(all_provers[0].relative_to(project_dir).as_posix()) - raise SystemExit(0) - -print("") -PY + python3 "${HELPER}" resolve-prover-toml "$1" "$2" } read_workdir_package_name() { - local project_dir="$1" - PROJECT_DIR="${project_dir}" python3 - <<'PY' -from pathlib import Path -import tomllib -import os - -nargo = Path(os.environ["PROJECT_DIR"]) / "Nargo.toml" -if not nargo.is_file(): - print("") - raise SystemExit(0) - -try: - data = tomllib.loads(nargo.read_text()) -except Exception: - print("") - raise SystemExit(0) - -pkg = data.get("package") -if isinstance(pkg, dict): - print(pkg.get("name", "")) -else: - print("") -PY + python3 "${HELPER}" package-name "$1" } relative_path() { - local from_dir="$1" - local to_path="$2" - FROM_DIR="${from_dir}" TO_PATH="${to_path}" python3 - <<'PY' -import os -print(os.path.relpath(os.environ["TO_PATH"], os.environ["FROM_DIR"])) -PY + python3 -c 'import os, sys; print(os.path.relpath(sys.argv[2], sys.argv[1]))' "$1" "$2" } @@ -503,93 +374,7 @@ echo "Failed : ${failed}" echo "Skipped : ${skipped} (${#SKIP_TESTS[@]} unimplemented-blackbox tests)" echo "Log directory : ${LOG_DIR}" -LOG_DIR="${LOG_DIR}" PASSED_COUNT="${passed}" python3 - <<'PY' -from pathlib import Path -import re -from collections import defaultdict -import os - -log_dir = Path(os.environ["LOG_DIR"]) -per_test_dir = log_dir / "per_test" -report_file = log_dir / "grouped_error_report.txt" - -logs = sorted(per_test_dir.glob("*.log")) -# PASS logs are deleted after each successful test run; read the count from the shell instead. -status_counts = {"PASS": int(os.environ.get("PASSED_COUNT", "0")), "FAIL": 0, "SKIP": 0} -grouped = defaultdict(list) -stage_groups = defaultdict(list) - -for fp in logs: - text = fp.read_text(errors="replace") - name = fp.stem - - if "SKIP:" in text: - status_counts["SKIP"] += 1 - skip_reason = re.search(r"SKIP: ([^\n]+)", text) - reason = skip_reason.group(1).strip() if skip_reason else "unknown" - grouped[f"SKIP: {reason}"].append(name) - continue - - status_counts["FAIL"] += 1 - fail_stage_match = re.findall(r"FAIL: ([^\n]+)", text) - stage = fail_stage_match[-1].strip() if fail_stage_match else "unknown stage" - stage_groups[stage].append(name) - - blackbox = re.search(r"not implemented: Other black box function: BLACKBOX::([A-Z0-9_]+)", text) - if blackbox: - grouped[f"Not implemented blackbox: {blackbox.group(1)} ({stage})"].append(name) - continue - - if "Program must have one entry point." in text: - grouped[f"Program must have one entry point ({stage})"].append(name) - continue - - panic = re.search(r"panicked at [^\n]*:\n([^\n]+)", text) - if panic: - grouped[f"Panic: {panic.group(1).strip()} ({stage})"].append(name) - continue - - solve = re.search(r"Failed to solve program: '([^']+)'", text) - if solve: - grouped[f"Failed to solve program: {solve.group(1)} ({stage})"].append(name) - continue - - assertion = re.search(r"Failed assertion", text) - if assertion: - grouped[f"Failed assertion ({stage})"].append(name) - continue - - compile_error = re.search(r"^error:\s*([^\n]+)", text, flags=re.M) - if compile_error: - grouped[f"Compile error: {compile_error.group(1).strip()} ({stage})"].append(name) - continue - - compile_bug = re.search(r"^bug:\s*([^\n]+)", text, flags=re.M) - if compile_bug: - grouped[f"Compile bug: {compile_bug.group(1).strip()} ({stage})"].append(name) - continue - - generic_error = re.search(r"^Error:\s*([^\n]+)", text, flags=re.M) - if generic_error: - grouped[f"Error: {generic_error.group(1).strip()} ({stage})"].append(name) - continue - - grouped[f"Unknown failure ({stage})"].append(name) - -with report_file.open("w") as f: - f.write(f"logs={len(logs)}\n") - f.write(f"PASS={status_counts['PASS']}\n") - f.write(f"FAIL={status_counts['FAIL']}\n") - f.write(f"SKIP={status_counts['SKIP']}\n") - f.write("\n[stages]\n") - for stage, tests in sorted(stage_groups.items(), key=lambda kv: (-len(kv[1]), kv[0])): - f.write(f"{stage}\t{len(tests)}\t{', '.join(tests)}") - f.write("\n") - f.write("\n[grouped]\n") - for key, tests in sorted(grouped.items(), key=lambda kv: (-len(kv[1]), kv[0])): - f.write(f"{len(tests)}\t{key}\t{', '.join(tests)}") - f.write("\n") -PY +python3 "${HELPER}" build-report "${LOG_DIR}" "${passed}" # Emit GitHub Step Summary when running inside Actions # (must be after the Python report generator so grouped_error_report.txt exists) From 65fd51a291f2446eb6e84eec97bcd4efe1091f06 Mon Sep 17 00:00:00 2001 From: x-senpai-x Date: Wed, 22 Apr 2026 00:34:00 +0530 Subject: [PATCH 07/16] minor nits --- .github/workflows/noir-execution-success.yml | 3 +++ scripts/generate_witness_comparison.py | 8 +++---- scripts/noir_execution_helpers.py | 25 ++++++++++++-------- scripts/run_noir_execution_success.sh | 23 ++++++++++-------- 4 files changed, 35 insertions(+), 24 deletions(-) diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml index 5def5ac96..999247592 100644 --- a/.github/workflows/noir-execution-success.yml +++ b/.github/workflows/noir-execution-success.yml @@ -13,6 +13,7 @@ on: permissions: contents: read pull-requests: write + issues: write env: CARGO_TERM_COLOR: always @@ -25,6 +26,8 @@ concurrency: jobs: noir-execution-success: + # Block fork PRs from executing arbitrary build scripts on the self-hosted runner. + if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }} name: Noir execution_success suite (${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.noir_ref != '' && github.event.inputs.noir_ref || 'v1.0.0-beta.19') || 'v1.0.0-beta.19' }}) runs-on: [self-hosted, Linux, ARM64, provekit-build] diff --git a/scripts/generate_witness_comparison.py b/scripts/generate_witness_comparison.py index 0eda181e8..75f16dfc9 100644 --- a/scripts/generate_witness_comparison.py +++ b/scripts/generate_witness_comparison.py @@ -300,14 +300,14 @@ def main(csv_path: Path, out_dir: Path) -> None: provekit_better = sum(1 for _, m, p in comparable if p < m) lines = [ - "# Mavros vs Provekit Witnesses Count", + "# Mavros vs ProveKit Witnesses Count", "", f"Union {len(all_names)} circuits: {len(comparable)} comparable, " - f"{missing_in_provekit} missing in Provekit, {missing_in_mavros} missing in Mavros.", + f"{missing_in_provekit} missing in ProveKit, {missing_in_mavros} missing in Mavros.", f"Among comparable: {equal} equal, {mavros_better} Mavros better, " - f"{provekit_better} Provekit better.", + f"{provekit_better} ProveKit better.", "", - "| Test | Mavros Cols | Provekit Post-GE | Delta | Better | Factor |", + "| Test | Mavros Cols | ProveKit Post-GE | Delta | Better | Factor |", "|------|-------------|------------------|-------|--------|--------|", ] diff --git a/scripts/noir_execution_helpers.py b/scripts/noir_execution_helpers.py index c18288a88..080676245 100644 --- a/scripts/noir_execution_helpers.py +++ b/scripts/noir_execution_helpers.py @@ -172,17 +172,17 @@ def _classify_failure(text: str, stage: str) -> str: return f"Unknown failure ({stage})" -def build_grouped_report(log_dir: Path, passed_count: int) -> None: +def build_grouped_report(log_dir: Path, passed: int, failed: int, skipped: int) -> None: """Scan ``log_dir/per_test/*.log`` and write ``log_dir/grouped_error_report.txt``. - PASS logs are deleted by the shell runner after each successful test, so - the PASS count is threaded in as ``passed_count`` rather than inferred. + PASS/FAIL/SKIP totals come from the shell runner — it has the authoritative + counts (including blackbox skips, which don't produce per-test logs). Logs + are consulted only for the ``[stages]`` and ``[grouped]`` sections. """ per_test_dir = log_dir / "per_test" report_file = log_dir / "grouped_error_report.txt" logs = sorted(per_test_dir.glob("*.log")) - status_counts = {"PASS": passed_count, "FAIL": 0, "SKIP": 0} grouped: dict[str, list[str]] = defaultdict(list) stage_groups: dict[str, list[str]] = defaultdict(list) @@ -191,13 +191,11 @@ def build_grouped_report(log_dir: Path, passed_count: int) -> None: name = fp.stem if "SKIP:" in text: - status_counts["SKIP"] += 1 skip_match = _SKIP_REASON_RE.search(text) reason = skip_match.group(1).strip() if skip_match else "unknown" grouped[f"SKIP: {reason}"].append(name) continue - status_counts["FAIL"] += 1 fail_stages = _FAIL_STAGE_RE.findall(text) stage = fail_stages[-1].strip() if fail_stages else "unknown stage" stage_groups[stage].append(name) @@ -205,9 +203,9 @@ def build_grouped_report(log_dir: Path, passed_count: int) -> None: with report_file.open("w") as f: f.write(f"logs={len(logs)}\n") - f.write(f"PASS={status_counts['PASS']}\n") - f.write(f"FAIL={status_counts['FAIL']}\n") - f.write(f"SKIP={status_counts['SKIP']}\n") + f.write(f"PASS={passed}\n") + f.write(f"FAIL={failed}\n") + f.write(f"SKIP={skipped}\n") f.write("\n[stages]\n") for stage, tests in sorted(stage_groups.items(), key=lambda kv: (-len(kv[1]), kv[0])): f.write(f"{stage}\t{len(tests)}\t{', '.join(tests)}\n") @@ -233,6 +231,8 @@ def main() -> int: p = sub.add_parser("build-report") p.add_argument("log_dir", type=Path) p.add_argument("passed_count", type=int) + p.add_argument("failed_count", type=int) + p.add_argument("skipped_count", type=int) sub.add_parser("skip-tests", help="print the skip list, one name per line") @@ -246,7 +246,12 @@ def main() -> int: elif args.cmd == "package-name": print(read_package_name(args.project_dir)) elif args.cmd == "build-report": - build_grouped_report(args.log_dir, args.passed_count) + build_grouped_report( + args.log_dir, + args.passed_count, + args.failed_count, + args.skipped_count, + ) elif args.cmd == "skip-tests": for name in sorted(load_skip_tests()): print(name) diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh index 930d606fb..47baad3cd 100755 --- a/scripts/run_noir_execution_success.sh +++ b/scripts/run_noir_execution_success.sh @@ -169,23 +169,26 @@ for test_name in "${test_dirs[@]}"; do continue fi - (( total += 1 )) - - if [[ "${MAX_TESTS}" -gt 0 && "${total}" -gt "${MAX_TESTS}" ]]; then - break - fi - # leaf name (no sub-path) is what we key on in the skip set leaf_name="${test_name%%/*}" - test_dir="${TEST_ROOT}/${test_name}" - safe_test_name="${test_name//\//__}" + # --- Unimplemented blackbox skip list: no log, no noise --- + # Skip BEFORE incrementing `total` so MAX_TESTS caps only attempted tests. if [[ "${SKIP_SET["${leaf_name}"]:-}" == "1" ]]; then echo "SKIP (blackbox): ${test_name}" (( skipped += 1 )) continue fi + (( total += 1 )) + + if [[ "${MAX_TESTS}" -gt 0 && "${total}" -gt "${MAX_TESTS}" ]]; then + break + fi + + test_dir="${TEST_ROOT}/${test_name}" + safe_test_name="${test_name//\//__}" + test_log="${LOG_DIR}/per_test/${safe_test_name}.log" echo "" @@ -207,7 +210,7 @@ for test_name in "${test_dirs[@]}"; do continue fi - if [[ ! -d "${TEST_LIB_ROOT}" ]] && grep -qr 'test_libraries' "${test_dir}"/Nargo.toml 2>/dev/null; then + if [[ ! -d "${TEST_LIB_ROOT}" ]] && grep -q 'test_libraries' "${test_dir}"/Nargo.toml 2>/dev/null; then echo "SKIP: missing test_libraries for relative path dependency" append_stage_marker "${test_log}" "test" "SKIP" echo "SKIP: missing test_libraries for relative path dependency" >> "${test_log}" @@ -374,7 +377,7 @@ echo "Failed : ${failed}" echo "Skipped : ${skipped} (${#SKIP_TESTS[@]} unimplemented-blackbox tests)" echo "Log directory : ${LOG_DIR}" -python3 "${HELPER}" build-report "${LOG_DIR}" "${passed}" +python3 "${HELPER}" build-report "${LOG_DIR}" "${passed}" "${failed}" "${skipped}" # Emit GitHub Step Summary when running inside Actions # (must be after the Python report generator so grouped_error_report.txt exists) From 8e011067406449ffb8eb0d050ff3ff879679a334 Mon Sep 17 00:00:00 2001 From: x-senpai-x Date: Wed, 22 Apr 2026 00:36:18 +0530 Subject: [PATCH 08/16] switched runner to ubuntu-24.04-arm --- .github/workflows/noir-execution-success.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml index 999247592..898764a5e 100644 --- a/.github/workflows/noir-execution-success.yml +++ b/.github/workflows/noir-execution-success.yml @@ -29,7 +29,7 @@ jobs: # Block fork PRs from executing arbitrary build scripts on the self-hosted runner. if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }} name: Noir execution_success suite (${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.noir_ref != '' && github.event.inputs.noir_ref || 'v1.0.0-beta.19') || 'v1.0.0-beta.19' }}) - runs-on: [self-hosted, Linux, ARM64, provekit-build] + runs-on: ubuntu-24.04-arm steps: - uses: actions/checkout@v4 From d7eca9901db291e10e4d45138b2d780c6e884829 Mon Sep 17 00:00:00 2001 From: x-senpai-x Date: Thu, 23 Apr 2026 02:49:19 +0530 Subject: [PATCH 09/16] removed mavros comparison --- .github/scripts/build_noir_pr_comment.py | 8 +- .github/workflows/noir-execution-success.yml | 2 +- scripts/generate_provekit_witness_report.py | 75 ++++ scripts/generate_witness_comparison.py | 351 ------------------- scripts/noir_execution_helpers.py | 2 +- scripts/noir_skip_tests.txt | 2 +- scripts/run_noir_execution_success.sh | 26 +- 7 files changed, 96 insertions(+), 370 deletions(-) create mode 100755 scripts/generate_provekit_witness_report.py delete mode 100644 scripts/generate_witness_comparison.py diff --git a/.github/scripts/build_noir_pr_comment.py b/.github/scripts/build_noir_pr_comment.py index bd760a354..168af9e2b 100644 --- a/.github/scripts/build_noir_pr_comment.py +++ b/.github/scripts/build_noir_pr_comment.py @@ -65,7 +65,7 @@ def compose_comment( else "" ) witness_truncated_note = ( - "\n_Witness comparison truncated to fit GitHub comment size limits._\n" + "\n_ProveKit witness report truncated to fit GitHub comment size limits._\n" if witness_truncated else "" ) @@ -94,7 +94,7 @@ def compose_comment( "", "", "
", - "witness_comparison.md", + "provekit_witness_report.md", "", witness_report_text, witness_truncated_note, @@ -149,7 +149,7 @@ def build_with_truncation( excess = len(comment) - MAX_COMMENT_CHARS witness_work, witness_changed = clip_tail( - witness_work, MIN_SECTION_CHARS, excess, "witness_comparison.md" + witness_work, MIN_SECTION_CHARS, excess, "provekit_witness_report.md" ) witness_truncated = witness_truncated or witness_changed if witness_changed: @@ -200,7 +200,7 @@ def main() -> None: args = parse_args() grouped_report_text = read_report(args.grouped_report, "grouped_error_report.txt") - witness_report_text = read_report(args.witness_report, "witness_comparison.md") + witness_report_text = read_report(args.witness_report, "provekit_witness_report.md") body = build_with_truncation( grouped_report_text=grouped_report_text, diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml index 898764a5e..c69195907 100644 --- a/.github/workflows/noir-execution-success.yml +++ b/.github/workflows/noir-execution-success.yml @@ -89,7 +89,7 @@ jobs: run: | python3 .github/scripts/build_noir_pr_comment.py \ --grouped-report "noir-execution-logs/grouped_error_report.txt" \ - --witness-report "noir-execution-logs/witness_comparison.md" \ + --witness-report "noir-execution-logs/provekit_witness_report.md" \ --output "noir-execution-logs/pr_comment.md" \ --run-id "${{ github.run_id }}" \ --run-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ diff --git a/scripts/generate_provekit_witness_report.py b/scripts/generate_provekit_witness_report.py new file mode 100755 index 000000000..e8bafc982 --- /dev/null +++ b/scripts/generate_provekit_witness_report.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +"""Generate a ProveKit-only witness count report. + +Usage: python3 generate_provekit_witness_report.py + +Reads a CSV of post-GE constraint and witness counts produced by +scripts/run_noir_execution_success.sh and writes provekit_witness_report.md +to . +""" + +from __future__ import annotations + +import csv +import sys +from pathlib import Path + +SKIP_LIST = Path(__file__).resolve().parent / "noir_skip_tests.txt" + + +def load_skip_tests() -> set[str]: + if not SKIP_LIST.is_file(): + return set() + skip: set[str] = set() + for raw in SKIP_LIST.read_text().splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + skip.add(line) + return skip + + +def main(csv_path: Path, out_dir: Path) -> None: + skip_tests = load_skip_tests() + + rows: dict[str, tuple[int | None, int | None]] = {} + with csv_path.open() as f: + reader = csv.DictReader(f) + for row in reader: + leaf = row["test_name"].split("/")[-1] + if leaf in skip_tests: + continue + + def _parse(key: str) -> int | None: + val = row.get(key, "") + try: + return int(val) + except (TypeError, ValueError): + return None + + rows[leaf] = (_parse("provekit_constraints"), _parse("provekit_witnesses")) + + lines = [ + "# ProveKit Witness Counts", + "", + f"Captured post-GE constraint and witness counts for {len(rows)} circuits.", + "", + "| Test | Constraints (post-GE) | Witnesses (post-GE) |", + "|------|------------------------|----------------------|", + ] + for name in sorted(rows): + constraints, witnesses = rows[name] + c = "-" if constraints is None else str(constraints) + w = "-" if witnesses is None else str(witnesses) + lines.append(f"| {name} | {c} | {w} |") + + out_path = out_dir / "provekit_witness_report.md" + out_path.write_text("\n".join(lines) + "\n") + print(f"Wrote {out_path} ({len(rows)} circuits)") + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + main(Path(sys.argv[1]), Path(sys.argv[2])) diff --git a/scripts/generate_witness_comparison.py b/scripts/generate_witness_comparison.py deleted file mode 100644 index 75f16dfc9..000000000 --- a/scripts/generate_witness_comparison.py +++ /dev/null @@ -1,351 +0,0 @@ -#!/usr/bin/env python3 -"""Generate Mavros vs ProveKit witness count comparison table. - -Usage: python3 generate_witness_comparison.py - -Reads provekit_witness_counts.csv produced by run_noir_execution_success.sh, -joins against Mavros Cols from the live reilabs/mavros STATUS.md (with -hardcoded fallback for any entries absent from the live file), and writes -witness_comparison.md to . -""" - -import csv -import sys -import urllib.request -from pathlib import Path - -from noir_execution_helpers import load_skip_tests - -# Mavros Cols — extracted from reilabs/mavros STATUS.md (column "Cols"), -# noir/test_programs/execution_success/* rows only. Keys are bare circuit names. -MAVROS_COLS: dict[str, int] = { - "a_1327_concrete_in_generic": 4, - "a_1_mul": 577, - "a_2_div": 610, - "a_3_add": 567, - "a_4_sub": 670, - "a_5_over": 798, - "a_6_array": 5619, - "arithmetic_binary_operations": 654, - "array_dedup_regression": 523, - "array_eq": 161, - "array_neq": 161, - "array_oob_regression_7965": 551, - "array_oob_regression_7975": 6, - "array_rc_regression_7842": 1, - "array_with_refs_from_param": 3, - "as_witness": 3, - "assert": 4, - "assert_statement": 3, - "assign_ex": 3, - "bit_and": 1840, - "bit_not": 523, - "bool_not": 2, - "bool_or": 5, - "break_and_continue": 1, - "brillig_acir_as_brillig": 541, - "brillig_array_ifelse": 8, - "brillig_arrays": 4, - "brillig_block_parameter_liveness": 14119, - "brillig_calls": 541, - "brillig_calls_array": 550, - "brillig_calls_conditionals": 577, - "brillig_conditional": 5, - "brillig_constant_reference_regression": 532, - "brillig_cow": 543, - "brillig_cow_assign": 1, - "brillig_fns_as_values": 593, - "brillig_identity_function": 8, - "brillig_loop_size_regression": 3, - "brillig_nested_arrays": 533, - "brillig_not": 9, - "brillig_recursion": 532, - "brillig_recursive_main": 525, - "brillig_recursive_main_indirect": 525, - "brillig_uninitialized_arrays": 790, - "cast_bool": 5, - "cast_to_i8_regression_7776": 648, - "cast_to_u64_regression_7776": 662, - "cast_to_u8_regression_7776": 648, - "comptime_println": 1, - "comptime_println_fmtstr_with_quoted": 1, - "comptime_variable_at_runtime": 1, - "conditional_regression_547": 3, - "conditional_regression_underflow": 517, - "custom_entry": 2, - "databus": 610, - "databus_two_calldata_simple": 619, - "debug_logs": 3, - "diamond_deps_0": 4, - "division_by_max": 528, - "do_not_capture_comptime_locals": 1, - "double_neg_cond_bool_input": 4, - "double_neg_cond_global_var": 809, - "dual_constrained_lambdas": 3, - "field_attribute": 533, - "fmtstr_with_global": 1, - "fold_after_inlined_calls": 539, - "fold_basic": 9, - "fold_basic_nested_call": 7, - "fold_distinct_return": 540, - "generics": 3, - "global_consts": 175, - "global_nested_array_regression_9270": 526, - "global_var_entry_point_used_in_another_entry": 3, - "global_var_func_with_multiple_entry_points": 3, - "global_var_multiple_entry_points_nested": 3, - "inline_never_basic": 5, - "integer_array_indexing": 527, - "lambda_taking_lambda_with_variant": 548, - "last_uses_regression_8935": 524, - "loop": 524, - "loop_break_regression_8319": 1, - "loop_invariant_nested_deep": 2, - "loop_invariant_regression_8586": 2, - "loop_small_break": 2, - "main_return": 3, - "modules": 5, - "modules_more": 5, - "modulus": 864, - "mutate_array_copy": 1, - "negated_jmpif_condition": 5, - "negative_associated_constants": 1, - "nested_array_with_refs": 2, - "nested_array_with_refs_from_param": 3, - "nested_arrays_from_brillig": 19, - "nested_fmtstr": 1, - "no_predicates_basic": 5, - "no_predicates_brillig": 532, - "poseidon_bn254_hash_width_3": 552, - "poseidonsponge_x5_254": 608, - "pred_eq": 5, - "prelude": 1, - "reference_alias_in_array": 1, - "regression_10197": 523, - "regression_10307": 531, - "regression_10466": 1, - "regression_10516": 1, - "regression_10690": 1, - "regression_10917": 3, - "regression_10923": 5, - "regression_2660": 572, - "regression_3051": 1, - "regression_3394": 1, - "regression_3607": 596, - "regression_3889": 5, - "regression_4088": 2, - "regression_4124": 2, - "regression_4202": 550, - "regression_4663": 1, - "regression_5435": 3, - "regression_5615": 1, - "regression_6451": 11, - "regression_6674_1": 1, - "regression_6674_2": 1, - "regression_6734": 1, - "regression_6990": 1, - "regression_7143": 532, - "regression_7195": 12, - "regression_7451": 1304, - "regression_7962": 1461, - "regression_8174": 569, - "regression_8212": 524, - "regression_8235": 4, - "regression_8329": 7, - "regression_8519": 677, - "regression_8558": 657, - "regression_8739": 1, - "regression_8761": 2, - "regression_8874": 524, - "regression_8890": 6, - "regression_8926": 532, - "regression_8975": 2, - "regression_9037": 3, - "regression_9047": 521, - "regression_9102": 5, - "regression_9116": 1, - "regression_9160": 3, - "regression_9193": 2, - "regression_9206": 532, - "regression_9243": 1, - "regression_9294": 1, - "regression_9329": 523, - "regression_9546": 529, - "regression_9657": 523, - "regression_9725_1": 1, - "regression_9725_2": 2, - "regression_9907": 3, - "regression_method_cannot_be_found": 1, - "return_twice": 5, - "shift_right_overflow": 517, - "shl_signed_regression_9661": 520, - "signed_bitshift": 1, - "signed_overflow_in_else_regression_8617": 660, - "signed_truncation": 918, - "simple_2d_array": 13, - "simple_add_and_ret_arr": 3, - "simple_array_param": 4, - "simple_bitwise": 1355, - "simple_comparison": 784, - "simple_mut": 3, - "simple_not": 3, - "simple_print": 3, - "simple_program_addition": 3, - "struct": 5, - "struct_array_inputs": 8, - "struct_fields_ordering": 524, - "submodules": 4, - "trait_as_return_type": 523, - "trait_associated_constant": 1, - "trait_impl_base_type": 523, - "traits_in_crates_1": 3, - "traits_in_crates_2": 3, - "tuple_inputs": 845, - "tuples": 657, - "type_aliases": 5, - "unsafe_range_constraint": 527, - "unsigned_to_signed_cast": 918, - "while_loop_break_regression_8521": 541, - "wildcard_type": 7, - "witness_compression": 6, - "workspace_default_member": 3, - "wrapping_operations": 908, - "xor": 1367, -} - - -_MAVROS_STATUS_URL = ( - "https://raw.githubusercontent.com/reilabs/mavros/main/STATUS.md" -) -_EXEC_SUCCESS_PREFIX = "noir/test_programs/execution_success/" - -# Skip list is shared with scripts/run_noir_execution_success.sh via -# scripts/noir_skip_tests.txt; these circuits are excluded from both sides -# of the witness comparison. -SKIP_TESTS: set[str] = load_skip_tests() - - -def _fetch_live_mavros_cols() -> dict[str, int]: - """Parse Mavros Cols from the live reilabs/mavros STATUS.md. - - Returns an empty dict on any network or parse failure so the caller - can fall back gracefully to the hardcoded table. - """ - try: - with urllib.request.urlopen(_MAVROS_STATUS_URL, timeout=15) as resp: - content = resp.read().decode("utf-8") - except Exception as exc: - print( - f"Warning: could not fetch live Mavros STATUS.md ({exc}); " - "falling back to hardcoded data.", - file=sys.stderr, - ) - return {} - - result: dict[str, int] = {} - for line in content.splitlines(): - if _EXEC_SUCCESS_PREFIX not in line: - continue - parts = line.split("|") - # Table row: | test | Compiled | R1CS | Rows | Cols | ... - # After split: parts[1]=test, parts[5]=Cols (1-indexed, 0 is empty) - if len(parts) < 6: - continue - name_field = parts[1].strip() - cols_field = parts[5].strip() - if _EXEC_SUCCESS_PREFIX not in name_field: - continue - circuit = name_field.split(_EXEC_SUCCESS_PREFIX, 1)[1].strip() - if circuit and cols_field.isdigit(): - result[circuit] = int(cols_field) - - if result: - print(f"Fetched {len(result)} Mavros entries from live STATUS.md.") - else: - print("Warning: live STATUS.md parsed 0 entries; using hardcoded data.", file=sys.stderr) - return result - - -def main(csv_path: Path, out_dir: Path) -> None: - # Live data takes precedence; hardcoded fills any gaps. - live = _fetch_live_mavros_cols() - mavros_cols = { - name: cols - for name, cols in {**MAVROS_COLS, **live}.items() - if name not in SKIP_TESTS - } - - provekit: dict[str, int] = {} - with csv_path.open() as f: - for row in csv.DictReader(f): - leaf = row["test_name"].split("/")[-1] - if leaf in SKIP_TESTS: - continue - try: - provekit[leaf] = int(row["provekit_witnesses"]) - except (ValueError, KeyError): - continue - - all_names = sorted(set(mavros_cols) | set(provekit)) - comparable = [ - (name, mavros_cols[name], provekit[name]) - for name in all_names - if name in mavros_cols and name in provekit - ] - missing_in_provekit = sum(1 for name in all_names if name in mavros_cols and name not in provekit) - missing_in_mavros = sum(1 for name in all_names if name in provekit and name not in mavros_cols) - - equal = sum(1 for _, m, p in comparable if m == p) - mavros_better = sum(1 for _, m, p in comparable if m < p) - provekit_better = sum(1 for _, m, p in comparable if p < m) - - lines = [ - "# Mavros vs ProveKit Witnesses Count", - "", - f"Union {len(all_names)} circuits: {len(comparable)} comparable, " - f"{missing_in_provekit} missing in ProveKit, {missing_in_mavros} missing in Mavros.", - f"Among comparable: {equal} equal, {mavros_better} Mavros better, " - f"{provekit_better} ProveKit better.", - "", - "| Test | Mavros Cols | ProveKit Post-GE | Delta | Better | Factor |", - "|------|-------------|------------------|-------|--------|--------|", - ] - - for name in all_names: - mavros = mavros_cols.get(name) - pk = provekit.get(name) - - if mavros is None: - lines.append(f"| {name} | - | {pk} | - | missing_mavros | - |") - continue - - if pk is None: - lines.append(f"| {name} | {mavros} | - | - | missing_provekit | - |") - continue - - delta = pk - mavros - delta_str = f"+{delta}" if delta > 0 else str(delta) - if mavros == pk: - better = "equal" - factor = "1.00x" - elif pk < mavros: - better = "provekit" - factor = "inf" if pk == 0 else f"{mavros / pk:.2f}x" - else: - better = "mavros" - factor = "inf" if mavros == 0 else f"{pk / mavros:.2f}x" - lines.append(f"| {name} | {mavros} | {pk} | {delta_str} | {better} | {factor} |") - - out_path = out_dir / "witness_comparison.md" - out_path.write_text("\n".join(lines) + "\n") - print( - f"Wrote {out_path} " - f"({len(all_names)} total circuits, {len(comparable)} comparable)" - ) - - -if __name__ == "__main__": - if len(sys.argv) != 3: - print(f"Usage: {sys.argv[0]} ", file=sys.stderr) - sys.exit(1) - main(Path(sys.argv[1]), Path(sys.argv[2])) diff --git a/scripts/noir_execution_helpers.py b/scripts/noir_execution_helpers.py index 080676245..06ffb52d1 100644 --- a/scripts/noir_execution_helpers.py +++ b/scripts/noir_execution_helpers.py @@ -9,7 +9,7 @@ skip-tests — print the skip list (one per line) The skip list lives in scripts/noir_skip_tests.txt and is the single source -of truth shared with scripts/generate_witness_comparison.py. +of truth shared with scripts/generate_provekit_witness_report.py. """ from __future__ import annotations diff --git a/scripts/noir_skip_tests.txt b/scripts/noir_skip_tests.txt index 31b26c337..575dd9d0c 100644 --- a/scripts/noir_skip_tests.txt +++ b/scripts/noir_skip_tests.txt @@ -1,6 +1,6 @@ # Tests that use blackbox functions not yet supported by provekit. # Counted as SKIP (not FAIL) by scripts/run_noir_execution_success.sh -# and excluded from scripts/generate_witness_comparison.py. +# and excluded from scripts/generate_provekit_witness_report.py. # Remove entries here once the corresponding blackbox is supported. # # Format: one bare test name per line. Blank lines and `#` comments are ignored. diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh index 47baad3cd..50848f466 100755 --- a/scripts/run_noir_execution_success.sh +++ b/scripts/run_noir_execution_success.sh @@ -50,7 +50,7 @@ fi # --------------------------------------------------------------------------- # Unimplemented-blackbox skip list # Single source of truth: scripts/noir_skip_tests.txt (shared with -# scripts/generate_witness_comparison.py). Counted as SKIP (not FAIL). +# scripts/generate_provekit_witness_report.py). Counted as SKIP (not FAIL). # --------------------------------------------------------------------------- SKIP_TESTS=() declare -A SKIP_SET @@ -107,7 +107,7 @@ fi mkdir -p "${LOG_DIR}/per_test" GROUPED_REPORT_FILE="${LOG_DIR}/grouped_error_report.txt" WITNESS_CSV="${LOG_DIR}/provekit_witness_counts.csv" -echo "test_name,provekit_witnesses" > "${WITNESS_CSV}" +echo "test_name,provekit_constraints,provekit_witnesses" > "${WITNESS_CSV}" shopt -s nullglob globstar @@ -318,15 +318,17 @@ for test_name in "${test_dirs[@]}"; do fi append_stage_marker "${test_log}" "provekit-cli prepare" "PASS" - # Extract ProveKit post-GE witness count before the log is deleted on success. + # Extract ProveKit post-GE constraint and witness counts before the log is deleted on success. # Keep this non-fatal under `set -euo pipefail` if the log format changes/misses. _ge_line="$(grep -o 'After GE optimization: [0-9]* constraints, [0-9]* witnesses' "${test_log}" | tail -1 || true)" + _pk_constraints="" _pk_witnesses="" - if [[ "${_ge_line}" =~ ([0-9]+)\ witnesses$ ]]; then - _pk_witnesses="${BASH_REMATCH[1]}" + if [[ "${_ge_line}" =~ ([0-9]+)\ constraints,\ ([0-9]+)\ witnesses$ ]]; then + _pk_constraints="${BASH_REMATCH[1]}" + _pk_witnesses="${BASH_REMATCH[2]}" fi if [[ -n "${_pk_witnesses}" ]]; then - echo "${test_name},${_pk_witnesses}" >> "${WITNESS_CSV}" + echo "${test_name},${_pk_constraints},${_pk_witnesses}" >> "${WITNESS_CSV}" fi append_stage_marker "${test_log}" "provekit-cli prove" "START" @@ -404,16 +406,16 @@ fi echo "Grouped report : ${GROUPED_REPORT_FILE}" -# Generate Mavros vs ProveKit witness comparison table -if [[ -f "${WITNESS_CSV}" ]] && python3 "${SCRIPT_DIR}/generate_witness_comparison.py" "${WITNESS_CSV}" "${LOG_DIR}"; then - echo "Witness comparison: ${LOG_DIR}/witness_comparison.md" +# Generate ProveKit witness count report +if [[ -f "${WITNESS_CSV}" ]] && python3 "${SCRIPT_DIR}/generate_provekit_witness_report.py" "${WITNESS_CSV}" "${LOG_DIR}"; then + echo "ProveKit witness report: ${LOG_DIR}/provekit_witness_report.md" if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then { echo "" - echo "## Mavros vs ProveKit Witness Count" - head -4 "${LOG_DIR}/witness_comparison.md" + echo "## ProveKit Witness Counts" + head -4 "${LOG_DIR}/provekit_witness_report.md" echo "" - echo "_Full table available in artifact: \`witness_comparison.md\`_" + echo "_Full table available in artifact: \`provekit_witness_report.md\`_" } >> "${GITHUB_STEP_SUMMARY}" fi fi From 6390e92768574ceb3fa2266f9848a6d64cdb59c8 Mon Sep 17 00:00:00 2001 From: Aditya Bisht Date: Tue, 28 Apr 2026 13:40:58 +0530 Subject: [PATCH 10/16] ci(noir): replace witness report in PR comment with failing-circuits list Drop the provekit_witness_report.md table from the sticky PR comment and add a "Failing circuits (N)" details section parsed from the [stages] block of grouped_error_report.txt, alongside the grouped report itself. --- .github/scripts/build_noir_pr_comment.py | 77 ++++++++++++-------- .github/workflows/noir-execution-success.yml | 1 - 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/.github/scripts/build_noir_pr_comment.py b/.github/scripts/build_noir_pr_comment.py index 168af9e2b..5959ea185 100644 --- a/.github/scripts/build_noir_pr_comment.py +++ b/.github/scripts/build_noir_pr_comment.py @@ -30,6 +30,36 @@ def parse_grouped_counts(grouped_report_text: str) -> dict[str, str]: return counts +def parse_failing_circuits(grouped_report_text: str) -> list[str]: + """Extract the flat sorted list of failing circuits from the [stages] section. + + The grouped report's [stages] section only contains failing tests (skipped + tests are routed to [grouped] instead). Each line looks like: + \\t\\t, , ... + """ + match = re.search( + r"^\[stages\]\n(.*?)(?:\n\[|\Z)", + grouped_report_text, + flags=re.DOTALL | re.MULTILINE, + ) + if not match: + return [] + + names: set[str] = set() + for line in match.group(1).splitlines(): + line = line.strip() + if not line: + continue + parts = line.split("\t") + if len(parts) < 3: + continue + for raw in parts[2].split(","): + name = raw.strip() + if name: + names.add(name) + return sorted(names) + + def status_with_icon(status: str) -> str: normalized = (status or "unknown").strip().lower() labels = { @@ -47,9 +77,7 @@ def sanitize_code_fence(text: str) -> str: def compose_comment( grouped_report_text: str, - witness_report_text: str, grouped_truncated: bool, - witness_truncated: bool, run_id: str, run_url: str, sha: str, @@ -64,11 +92,14 @@ def compose_comment( if grouped_truncated else "" ) - witness_truncated_note = ( - "\n_ProveKit witness report truncated to fit GitHub comment size limits._\n" - if witness_truncated - else "" - ) + + failing_circuits = parse_failing_circuits(grouped_report_text) + if failing_circuits: + failing_body = "\n".join(f"- `{name}`" for name in failing_circuits) + failing_summary = f"Failing circuits ({len(failing_circuits)})" + else: + failing_body = "_No failing circuits._" + failing_summary = "Failing circuits (0)" lines = [ MARKER, @@ -85,6 +116,13 @@ def compose_comment( f"| SKIP | {counts['SKIP']} |", "", "
", + f"{failing_summary}", + "", + failing_body, + "", + "
", + "", + "
", "grouped_error_report.txt", "", "```text", @@ -93,13 +131,6 @@ def compose_comment( grouped_truncated_note, "
", "", - "
", - "provekit_witness_report.md", - "", - witness_report_text, - witness_truncated_note, - "
", - "", "_This comment is automatically updated by the Noir Execution Success workflow._", "", ] @@ -120,7 +151,6 @@ def clip_tail(text: str, min_chars: int, excess: int, label: str) -> tuple[str, def build_with_truncation( grouped_report_text: str, - witness_report_text: str, run_id: str, run_url: str, sha: str, @@ -128,16 +158,12 @@ def build_with_truncation( status: str, ) -> str: grouped_work = grouped_report_text - witness_work = witness_report_text grouped_truncated = False - witness_truncated = False for _ in range(128): comment = compose_comment( grouped_work, - witness_work, grouped_truncated=grouped_truncated, - witness_truncated=witness_truncated, run_id=run_id, run_url=run_url, sha=sha, @@ -148,13 +174,6 @@ def build_with_truncation( return comment excess = len(comment) - MAX_COMMENT_CHARS - witness_work, witness_changed = clip_tail( - witness_work, MIN_SECTION_CHARS, excess, "provekit_witness_report.md" - ) - witness_truncated = witness_truncated or witness_changed - if witness_changed: - continue - grouped_work, grouped_changed = clip_tail( grouped_work, MIN_SECTION_CHARS, excess, "grouped_error_report.txt" ) @@ -164,12 +183,9 @@ def build_with_truncation( break - # Final hard guard if both reports are already near minimum length. fallback = compose_comment( grouped_work, - witness_work, grouped_truncated=True, - witness_truncated=True, run_id=run_id, run_url=run_url, sha=sha, @@ -186,7 +202,6 @@ def build_with_truncation( def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--grouped-report", required=True, type=Path) - parser.add_argument("--witness-report", required=True, type=Path) parser.add_argument("--output", required=True, type=Path) parser.add_argument("--run-id", required=True) parser.add_argument("--run-url", required=True) @@ -200,11 +215,9 @@ def main() -> None: args = parse_args() grouped_report_text = read_report(args.grouped_report, "grouped_error_report.txt") - witness_report_text = read_report(args.witness_report, "provekit_witness_report.md") body = build_with_truncation( grouped_report_text=grouped_report_text, - witness_report_text=witness_report_text, run_id=args.run_id, run_url=args.run_url, sha=args.sha, diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml index c69195907..6cf7a64f7 100644 --- a/.github/workflows/noir-execution-success.yml +++ b/.github/workflows/noir-execution-success.yml @@ -89,7 +89,6 @@ jobs: run: | python3 .github/scripts/build_noir_pr_comment.py \ --grouped-report "noir-execution-logs/grouped_error_report.txt" \ - --witness-report "noir-execution-logs/provekit_witness_report.md" \ --output "noir-execution-logs/pr_comment.md" \ --run-id "${{ github.run_id }}" \ --run-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ From 5f0a31907a5716861c28cd5cd1d320f529bf7edd Mon Sep 17 00:00:00 2001 From: Aditya Bisht Date: Tue, 28 Apr 2026 13:52:46 +0530 Subject: [PATCH 11/16] ci(bench): add CSP benchmarks workflow with sticky PR comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a separate Noir-CSP benchmark suite that runs prepare/prove/verify on every circuit under noir-examples/csp-benchmarks/ and posts a sticky PR comment with prover time, peak RSS, peak heap, verifier time, proof size, and PKP size — averaged over BENCH_RUNS (default 3) iterations. - scripts/run_csp_benchmarks.sh: bench runner. Per circuit: nargo compile + provekit-cli prepare once, then prove and verify each three times under /usr/bin/time -f '%e %M', capturing prover stderr so the helper can extract the largest "peak memory" entry from the span_stats trace. - scripts/csp_benchmark_helpers.py: parses /usr/bin/time output and the SI-formatted peak-memory entries (with U+202F separator), converts to bytes, and emits a single averaged CSV row per circuit. - .github/scripts/build_csp_pr_comment.py: renders results.csv as a markdown table and wraps it with a marker so it lives in its own sticky comment (no race with the noir-execution-success workflow). - .github/workflows/csp-benchmarks.yml: ubuntu-24.04-arm, runs on PRs from this repo and on workflow_dispatch (with a configurable bench_runs override). --- .github/scripts/build_csp_pr_comment.py | 164 +++++++++++++++++ .github/workflows/csp-benchmarks.yml | 131 +++++++++++++ scripts/csp_benchmark_helpers.py | 191 +++++++++++++++++++ scripts/run_csp_benchmarks.sh | 234 ++++++++++++++++++++++++ 4 files changed, 720 insertions(+) create mode 100644 .github/scripts/build_csp_pr_comment.py create mode 100644 .github/workflows/csp-benchmarks.yml create mode 100755 scripts/csp_benchmark_helpers.py create mode 100755 scripts/run_csp_benchmarks.sh diff --git a/.github/scripts/build_csp_pr_comment.py b/.github/scripts/build_csp_pr_comment.py new file mode 100644 index 000000000..c291080db --- /dev/null +++ b/.github/scripts/build_csp_pr_comment.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +"""Build a sticky PR comment for the CSP benchmarks workflow. + +Reads the CSV emitted by ``scripts/run_csp_benchmarks.sh`` (one row per +circuit) and renders it as a markdown table with human-readable units. +""" + +from __future__ import annotations + +import argparse +import csv +from pathlib import Path + +MARKER = "" +MAX_COMMENT_CHARS = 62000 + + +def fmt_bytes(value: float) -> str: + if value <= 0: + return "—" + units = ("B", "KB", "MB", "GB", "TB") + idx = 0 + while value >= 1024 and idx < len(units) - 1: + value /= 1024.0 + idx += 1 + if value >= 100 or idx == 0: + return f"{value:.0f} {units[idx]}" + return f"{value:.2f} {units[idx]}" + + +def fmt_kb_to_bytes(rss_kb: float) -> str: + return fmt_bytes(rss_kb * 1024.0) + + +def fmt_ms(ms: float) -> str: + if ms <= 0: + return "—" + if ms < 1000: + return f"{ms:.0f} ms" + return f"{ms / 1000.0:.2f} s" + + +def status_with_icon(status: str) -> str: + normalized = (status or "unknown").strip().lower() + labels = { + "success": "[PASS]", + "failure": "[FAIL]", + "cancelled": "[CANCELLED]", + "skipped": "[SKIPPED]", + } + return f"{labels.get(normalized, '[INFO]')} {normalized}" + + +def read_rows(csv_path: Path) -> list[dict[str, str]]: + if not csv_path.is_file(): + return [] + with csv_path.open(newline="") as f: + return list(csv.DictReader(f)) + + +def render_table(rows: list[dict[str, str]]) -> str: + if not rows: + return "_No benchmark results were produced._" + + header = ( + "| Circuit | Prover time | Peak RSS | Peak heap | Verifier time | " + "Proof size | PKP size | Runs |" + ) + sep = "|---|---:|---:|---:|---:|---:|---:|---:|" + lines = [header, sep] + for row in sorted(rows, key=lambda r: r.get("circuit", "")): + lines.append( + "| " + + " | ".join( + [ + f"`{row['circuit']}`", + fmt_ms(float(row.get("prover_time_ms", 0) or 0)), + fmt_kb_to_bytes(float(row.get("prover_peak_rss_kb", 0) or 0)), + fmt_bytes(float(row.get("prover_heap_peak_bytes", 0) or 0)), + fmt_ms(float(row.get("verifier_time_ms", 0) or 0)), + fmt_bytes(float(row.get("proof_size_bytes", 0) or 0)), + fmt_bytes(float(row.get("pkp_size_bytes", 0) or 0)), + row.get("runs", "—"), + ] + ) + + " |" + ) + return "\n".join(lines) + + +def compose_comment( + rows: list[dict[str, str]], + run_id: str, + run_url: str, + sha: str, + status: str, + runs_per_circuit: str, +) -> str: + short_sha = sha[:12] if sha else "unknown" + table = render_table(rows) + lines = [ + MARKER, + "## CSP benchmarks", + "", + "| Metric | Value |", + "|--------|-------|", + f"| Workflow status | {status_with_icon(status)} |", + f"| Commit | `{short_sha}` |", + f"| Run | [#{run_id}]({run_url}) |", + f"| Circuits benchmarked | {len(rows)} |", + f"| Iterations averaged per circuit | {runs_per_circuit} |", + "", + "Prover time, peak RSS, peak heap, and verifier time are arithmetic means " + "across the iterations. Peak heap comes from the largest " + "`peak memory` entry in `provekit-cli prove`'s tracing output; peak RSS " + "is reported by `/usr/bin/time -v` (max-resident-set-size).", + "", + "
", + "Results", + "", + table, + "", + "
", + "", + "_This comment is automatically updated by the CSP Benchmarks workflow._", + "", + ] + return "\n".join(lines) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--results-csv", required=True, type=Path) + parser.add_argument("--output", required=True, type=Path) + parser.add_argument("--run-id", required=True) + parser.add_argument("--run-url", required=True) + parser.add_argument("--sha", required=True) + parser.add_argument("--status", required=True) + parser.add_argument("--runs-per-circuit", required=True) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + rows = read_rows(args.results_csv) + body = compose_comment( + rows=rows, + run_id=args.run_id, + run_url=args.run_url, + sha=args.sha, + status=args.status, + runs_per_circuit=args.runs_per_circuit, + ) + if len(body) > MAX_COMMENT_CHARS: + cut = body[: MAX_COMMENT_CHARS - 80].rstrip() + body = f"{cut}\n\n_Comment truncated due to GitHub size limits._\n" + + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(body, encoding="utf-8") + print(f"Wrote PR comment body to {args.output} ({len(body)} chars)") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/csp-benchmarks.yml b/.github/workflows/csp-benchmarks.yml new file mode 100644 index 000000000..8dc95147e --- /dev/null +++ b/.github/workflows/csp-benchmarks.yml @@ -0,0 +1,131 @@ +name: CSP Benchmarks + +on: + pull_request: + workflow_dispatch: + inputs: + bench_runs: + description: "Iterations per circuit (default: 3)" + required: false + default: "3" + +permissions: + contents: read + pull-requests: write + issues: write + +env: + CARGO_TERM_COLOR: always + BENCH_RUNS: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_runs != '' && github.event.inputs.bench_runs || '3') || '3' }} + REQUIRED_NARGO_VERSION: "1.0.0-beta.19" + +concurrency: + group: csp-benchmarks-${{ github.ref }} + cancel-in-progress: true + +jobs: + csp-benchmarks: + # Block fork PRs from running our heavy bench script on the runner. + if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }} + name: CSP benchmarks (avg over ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_runs != '' && github.event.inputs.bench_runs || '3') || '3' }} runs) + runs-on: ubuntu-24.04-arm + + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust toolchain + uses: moonrepo/setup-rust@v1 + with: + channel: nightly-2026-03-04 + cache-base: main + + - name: Build provekit-cli (release) + run: cargo build --release --bin provekit-cli + + - name: Setup Noir toolchain + uses: noir-lang/noirup@v0.1.2 + with: + toolchain: ${{ env.REQUIRED_NARGO_VERSION }} + + - name: Run CSP benchmarks + env: + PROVEKIT_BIN: ${{ github.workspace }}/target/release/provekit-cli + BENCH_DIR: ${{ github.workspace }}/csp-bench-logs + BENCH_RUNS: ${{ env.BENCH_RUNS }} + run: | + bash scripts/run_csp_benchmarks.sh + + - name: Upload bench artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: csp-bench-logs-${{ github.run_id }} + path: csp-bench-logs/ + retention-days: 7 + + - name: Build sticky PR comment body + if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository + continue-on-error: true + run: | + python3 .github/scripts/build_csp_pr_comment.py \ + --results-csv "csp-bench-logs/results.csv" \ + --output "csp-bench-logs/pr_comment.md" \ + --run-id "${{ github.run_id }}" \ + --run-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ + --sha "${{ github.sha }}" \ + --status "${{ job.status }}" \ + --runs-per-circuit "${{ env.BENCH_RUNS }}" + + - name: Upsert sticky CSP benchmarks comment + if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository + continue-on-error: true + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const marker = ''; + const bodyPath = 'csp-bench-logs/pr_comment.md'; + const fallbackBody = [ + marker, + '## CSP benchmarks', + '', + 'Unable to generate the detailed report body for this run.', + '', + 'Run: [#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})', + ].join('\n'); + const body = fs.existsSync(bodyPath) ? fs.readFileSync(bodyPath, 'utf8') : fallbackBody; + + const { owner, repo } = context.repo; + const issue_number = context.issue.number; + const comments = await github.paginate(github.rest.issues.listComments, { + owner, + repo, + issue_number, + per_page: 100, + }); + + const existing = comments.find((comment) => + comment.user && + comment.user.login === 'github-actions[bot]' && + comment.body && + comment.body.includes(marker) + ); + + if (existing) { + await github.rest.issues.updateComment({ + owner, + repo, + comment_id: existing.id, + body, + }); + core.info(`Updated existing CSP benchmarks comment (id=${existing.id}).`); + } else { + const created = await github.rest.issues.createComment({ + owner, + repo, + issue_number, + body, + }); + core.info(`Created new CSP benchmarks comment (id=${created.data.id}).`); + } diff --git a/scripts/csp_benchmark_helpers.py b/scripts/csp_benchmark_helpers.py new file mode 100755 index 000000000..5333ece79 --- /dev/null +++ b/scripts/csp_benchmark_helpers.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +"""Helpers for scripts/run_csp_benchmarks.sh. + +Subcommands: + parse-runs Aggregate per-run measurements for one + circuit and emit a single CSV row to stdout. + human-to-bytes Convert a human-formatted byte string from + the prover trace ("1.23 GB", "456 MB", etc.) + to an integer byte count. Used by tests. + +Bench layout produced by run_csp_benchmarks.sh:: + + /per_circuit// + prove_.time # `/usr/bin/time -f '%e %M'` output + prove_.stderr # provekit-cli prove stderr (span_stats trace) + verify_.time + verify_.stderr + meta.txt # key=value: pkp_size, proof_size + +The "peak heap" comes from the largest "peak memory: B" entry emitted by +``tooling/cli/src/span_stats.rs`` over the prove invocation's trace. We strip +ANSI escapes and walk every span-close line; the outermost span propagates +its children's peak via ``data.peak_memory = max(...)`` so any of them is a +sufficient upper bound, but we keep the max for safety. +""" + +from __future__ import annotations + +import argparse +import re +import sys +from pathlib import Path +from statistics import mean + +ANSI_RE = re.compile(r"\x1b\[[0-9;]*m") +# Suffix table from provekit_common::utils::human (BN254 utils). The middle +# entry is a regular ASCII space (no SI prefix). Order matters: we use it to +# look up the multiplier from a captured suffix character. +SI_SUFFIXES = "qryzafpnμm kMGTPEZYRQ" +SI_BASE_INDEX = SI_SUFFIXES.index(" ") # power 0 lives at index 10 +# The separator between number and SI suffix is U+202F NARROW NO-BREAK SPACE +# unless `{:#}` (alternate) is used. We accept either form. +NARROW_NBSP = " " +PEAK_MEMORY_RE = re.compile( + rf"([0-9]+(?:\.[0-9]+)?)[{NARROW_NBSP} ]?([qryzafpnμmkMGTPEZYRQ])?B" + r"\s+peak\s+memory", +) + + +def human_to_bytes(value: str) -> int: + """Convert a "1.23 GB"-style string from the trace to an integer byte count. + + Accepts either a regular ASCII space or U+202F as the separator. Suffixes + follow ``provekit_common::utils::human`` (q…Q). A literal "B" with no SI + prefix returns the integer/float value rounded down. + """ + cleaned = ANSI_RE.sub("", value).strip() + if not cleaned.endswith("B"): + raise ValueError(f"not a byte-formatted value: {value!r}") + cleaned = cleaned[:-1].rstrip() # drop trailing 'B' + if cleaned and cleaned[-1] in SI_SUFFIXES and cleaned[-1] != " ": + suffix = cleaned[-1] + number_part = cleaned[:-1].rstrip() + else: + suffix = " " + number_part = cleaned + number_part = number_part.replace(NARROW_NBSP, "").strip() + multiplier = 10 ** ((SI_SUFFIXES.index(suffix) - SI_BASE_INDEX) * 3) + return int(float(number_part) * multiplier) + + +def parse_peak_heap_bytes(stderr_path: Path) -> int: + """Return the largest "peak memory" value (bytes) found in the trace.""" + if not stderr_path.is_file(): + return 0 + text = ANSI_RE.sub("", stderr_path.read_text(encoding="utf-8", errors="replace")) + peak = 0 + for match in PEAK_MEMORY_RE.finditer(text): + number = float(match.group(1)) + suffix = match.group(2) or " " + bytes_value = int(number * 10 ** ((SI_SUFFIXES.index(suffix) - SI_BASE_INDEX) * 3)) + peak = max(peak, bytes_value) + return peak + + +def parse_time_file(time_path: Path) -> tuple[float, int]: + """Read `/usr/bin/time -f '%e %M'` output: (wall_seconds, max_rss_kb). + + Returns (0.0, 0) if the file is missing or unparseable. + """ + if not time_path.is_file(): + return 0.0, 0 + raw = time_path.read_text(encoding="utf-8", errors="replace").strip().splitlines() + if not raw: + return 0.0, 0 + parts = raw[-1].split() + if len(parts) < 2: + return 0.0, 0 + try: + return float(parts[0]), int(parts[1]) + except ValueError: + return 0.0, 0 + + +def read_meta(meta_path: Path) -> dict[str, str]: + out: dict[str, str] = {} + if not meta_path.is_file(): + return out + for line in meta_path.read_text(encoding="utf-8").splitlines(): + if "=" in line: + key, _, val = line.partition("=") + out[key.strip()] = val.strip() + return out + + +def parse_runs(bench_dir: Path, circuit: str) -> str: + circuit_dir = bench_dir / "per_circuit" / circuit + meta = read_meta(circuit_dir / "meta.txt") + + prove_runs: list[tuple[float, int, int]] = [] + verify_runs: list[tuple[float, int]] = [] + + i = 1 + while True: + time_path = circuit_dir / f"prove_{i}.time" + if not time_path.is_file(): + break + wall, rss_kb = parse_time_file(time_path) + heap_bytes = parse_peak_heap_bytes(circuit_dir / f"prove_{i}.stderr") + prove_runs.append((wall, rss_kb, heap_bytes)) + i += 1 + + j = 1 + while True: + time_path = circuit_dir / f"verify_{j}.time" + if not time_path.is_file(): + break + wall, _rss = parse_time_file(time_path) + verify_runs.append((wall, _rss)) + j += 1 + + if not prove_runs: + return "" + + prove_time_ms = mean(r[0] for r in prove_runs) * 1000.0 + prover_rss_kb = mean(r[1] for r in prove_runs) + prover_heap_bytes = mean(r[2] for r in prove_runs) + verifier_time_ms = mean(r[0] for r in verify_runs) * 1000.0 if verify_runs else 0.0 + + pkp_size = meta.get("pkp_size_bytes", "0") + proof_size = meta.get("proof_size_bytes", "0") + + return ",".join( + [ + circuit, + f"{prove_time_ms:.1f}", + f"{prover_rss_kb:.0f}", + f"{prover_heap_bytes:.0f}", + f"{verifier_time_ms:.1f}", + proof_size, + pkp_size, + str(len(prove_runs)), + ] + ) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + p = sub.add_parser("parse-runs") + p.add_argument("bench_dir", type=Path) + p.add_argument("circuit") + + p = sub.add_parser("human-to-bytes") + p.add_argument("value") + + args = parser.parse_args() + + if args.cmd == "parse-runs": + row = parse_runs(args.bench_dir, args.circuit) + if row: + print(row) + elif args.cmd == "human-to-bytes": + print(human_to_bytes(args.value)) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/run_csp_benchmarks.sh b/scripts/run_csp_benchmarks.sh new file mode 100755 index 000000000..e7382a34a --- /dev/null +++ b/scripts/run_csp_benchmarks.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +# run_csp_benchmarks.sh +# +# Run prove/verify benchmarks for noir-examples/csp-benchmarks/*. Each circuit +# is compiled and prepared once, then prove + verify are each invoked +# BENCH_RUNS times so the helper can average wall time, peak RSS, and +# heap-peak bytes (parsed from the prover's tracing output). +# +# Environment variables (all optional): +# PROVEKIT_BIN Path to provekit-cli (default: target/release/provekit-cli) +# BENCH_ROOT Path to csp-benchmarks (default: noir-examples/csp-benchmarks) +# BENCH_DIR Output directory (default: csp-bench-logs) +# BENCH_RUNS Iterations to average (default: 3) +# TEST_FILTER Regex on circuit name +# MAX_TESTS Cap on circuits (0 = unlimited) +# +# Output: BENCH_DIR/results.csv with one row per circuit: +# circuit,prover_time_ms,prover_peak_rss_kb,prover_heap_peak_bytes, +# verifier_time_ms,proof_size_bytes,pkp_size_bytes,runs + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +HELPER="${SCRIPT_DIR}/csp_benchmark_helpers.py" + +PROVEKIT_BIN="${PROVEKIT_BIN:-${REPO_ROOT}/target/release/provekit-cli}" +BENCH_ROOT="${BENCH_ROOT:-${REPO_ROOT}/noir-examples/csp-benchmarks}" +BENCH_DIR="${BENCH_DIR:-${REPO_ROOT}/csp-bench-logs}" +BENCH_RUNS="${BENCH_RUNS:-3}" +TEST_FILTER="${TEST_FILTER:-}" +MAX_TESTS="${MAX_TESTS:-0}" + +if [[ "${BENCH_DIR}" != /* ]]; then + BENCH_DIR="${REPO_ROOT}/${BENCH_DIR}" +fi + +if [[ ! -x "${PROVEKIT_BIN}" ]]; then + echo "ERROR: provekit-cli binary not found at ${PROVEKIT_BIN}" >&2 + echo "Build it first: cargo build --release --bin provekit-cli" >&2 + exit 1 +fi + +if [[ ! -d "${BENCH_ROOT}" ]]; then + echo "ERROR: csp-benchmarks not found at ${BENCH_ROOT}" >&2 + exit 1 +fi + +if ! command -v nargo >/dev/null 2>&1; then + echo "ERROR: nargo is required but not in PATH" >&2 + exit 1 +fi + +if ! python3 -c "import tomllib" 2>/dev/null; then + echo "ERROR: python3.11+ is required (tomllib not found)." >&2 + echo "Current: $(python3 --version 2>&1)" >&2 + exit 1 +fi + +# `/usr/bin/time` is the GNU-style binary; macOS ships a different `time` shell +# builtin so users may need `gtime` from `brew install gnu-time`. CI runs on +# ubuntu-24.04-arm where /usr/bin/time is GNU. +TIME_BIN="" +if [[ -x /usr/bin/time ]]; then + TIME_BIN=/usr/bin/time +elif command -v gtime >/dev/null 2>&1; then + TIME_BIN="$(command -v gtime)" +else + echo "ERROR: GNU /usr/bin/time not found (try: brew install gnu-time)" >&2 + exit 1 +fi + +mkdir -p "${BENCH_DIR}/per_circuit" +RESULTS_CSV="${BENCH_DIR}/results.csv" +echo "circuit,prover_time_ms,prover_peak_rss_kb,prover_heap_peak_bytes,verifier_time_ms,proof_size_bytes,pkp_size_bytes,runs" > "${RESULTS_CSV}" + +shopt -s nullglob + +# Discover circuits: any direct subdir of csp-benchmarks/ that has both a +# Nargo.toml and a Prover.toml at its root. This filters out keccak_lib/. +discover_circuits() { + for dir in "${BENCH_ROOT}"/*/; do + if [[ -f "${dir}Nargo.toml" && -f "${dir}Prover.toml" ]]; then + basename "${dir%/}" + fi + done +} + +mapfile -t circuits < <(discover_circuits | sort) +if [[ "${#circuits[@]}" -eq 0 ]]; then + echo "ERROR: no circuits discovered under ${BENCH_ROOT}" >&2 + exit 1 +fi + +echo "Discovered ${#circuits[@]} circuits" + +# Read [package].name from a Nargo.toml; fall back to directory basename. +read_package_name() { + local dir="$1" + python3 - "$dir" <<'PY' +import sys, tomllib, pathlib +nargo = pathlib.Path(sys.argv[1]) / "Nargo.toml" +try: + data = tomllib.loads(nargo.read_text()) + print(data.get("package", {}).get("name", "")) +except Exception: + pass +PY +} + +attempted=0 +succeeded=0 +failed=0 + +for circuit in "${circuits[@]}"; do + if [[ -n "${TEST_FILTER}" && ! "${circuit}" =~ ${TEST_FILTER} ]]; then + continue + fi + (( attempted += 1 )) + if [[ "${MAX_TESTS}" -gt 0 && "${attempted}" -gt "${MAX_TESTS}" ]]; then + break + fi + + workdir="${BENCH_ROOT}/${circuit}" + out_dir="${BENCH_DIR}/per_circuit/${circuit}" + mkdir -p "${out_dir}" + + echo "" + echo "==> [${attempted}/${#circuits[@]}] ${circuit}" + + pkg_name="$(read_package_name "${workdir}")" + if [[ -z "${pkg_name}" ]]; then + pkg_name="${circuit}" + fi + + # 1) compile + if ! (cd "${workdir}" && nargo compile > "${out_dir}/compile.log" 2>&1); then + echo "FAIL: nargo compile (${circuit})" + (( failed += 1 )) + continue + fi + + circuit_json="${workdir}/target/${pkg_name}.json" + if [[ ! -f "${circuit_json}" ]]; then + # Fallback: pick the first json under target/. + candidate=("${workdir}"/target/*.json) + if [[ "${#candidate[@]}" -gt 0 ]]; then + circuit_json="${candidate[0]}" + else + echo "FAIL: no compiled JSON in ${workdir}/target/" + (( failed += 1 )) + continue + fi + fi + + pkp_path="${out_dir}/prover.pkp" + pkv_path="${out_dir}/verifier.pkv" + proof_path="${out_dir}/proof.np" + + # 2) prepare + if ! (cd "${workdir}" && "${PROVEKIT_BIN}" prepare "${circuit_json}" \ + --pkp "${pkp_path}" --pkv "${pkv_path}") > "${out_dir}/prepare.log" 2>&1; then + echo "FAIL: provekit-cli prepare (${circuit})" + (( failed += 1 )) + continue + fi + + pkp_size_bytes="$(stat -c '%s' "${pkp_path}" 2>/dev/null || stat -f '%z' "${pkp_path}")" + + # 3) prove × BENCH_RUNS — write each run's stderr separately so the helper + # can parse the tracing output's "peak memory" lines. + prove_ok=1 + for ((i=1; i<=BENCH_RUNS; i++)); do + if ! (cd "${workdir}" && "${TIME_BIN}" -f '%e %M' \ + -o "${out_dir}/prove_${i}.time" \ + "${PROVEKIT_BIN}" prove "${pkp_path}" "${workdir}/Prover.toml" \ + -o "${proof_path}") 2> "${out_dir}/prove_${i}.stderr"; then + echo "FAIL: provekit-cli prove run ${i} (${circuit})" + prove_ok=0 + break + fi + done + if [[ "${prove_ok}" -ne 1 ]]; then + (( failed += 1 )) + continue + fi + + proof_size_bytes="$(stat -c '%s' "${proof_path}" 2>/dev/null || stat -f '%z' "${proof_path}")" + + # 4) verify × BENCH_RUNS + verify_ok=1 + for ((i=1; i<=BENCH_RUNS; i++)); do + if ! (cd "${workdir}" && "${TIME_BIN}" -f '%e %M' \ + -o "${out_dir}/verify_${i}.time" \ + "${PROVEKIT_BIN}" verify "${pkv_path}" "${proof_path}") \ + 2> "${out_dir}/verify_${i}.stderr"; then + echo "FAIL: provekit-cli verify run ${i} (${circuit})" + verify_ok=0 + break + fi + done + if [[ "${verify_ok}" -ne 1 ]]; then + (( failed += 1 )) + continue + fi + + cat > "${out_dir}/meta.txt" <> "${RESULTS_CSV}" + echo "OK: ${row}" + (( succeeded += 1 )) + else + echo "FAIL: helper produced no row for ${circuit}" + (( failed += 1 )) + fi +done + +echo "" +echo "----- csp-benchmarks summary -----" +echo "Discovered : ${#circuits[@]}" +echo "Attempted : ${attempted}" +echo "Succeeded : ${succeeded}" +echo "Failed : ${failed}" +echo "Results : ${RESULTS_CSV}" + +if [[ "${failed}" -gt 0 ]]; then + exit 1 +fi +exit 0 From 40fd266de1af4c64ac8da12a6ad623f7afc0c995 Mon Sep 17 00:00:00 2001 From: Aditya Bisht Date: Tue, 28 Apr 2026 14:54:31 +0530 Subject: [PATCH 12/16] fix(ci): unblock noir-execution-success suite and fix typo - scripts/csp_benchmark_helpers.py: spell "unparsable" the way crate-ci/typos expects, fixing the Spell check job that was failing on this PR. - scripts/run_noir_execution_success.sh: don't exit 1 when only some circuits fail. The PR sticky comment surfaces failing circuits, so the workflow shouldn't block PRs on known provekit-cli limitations. Local callers that want the old behaviour can opt back in via STRICT_FAIL=1. --- scripts/csp_benchmark_helpers.py | 2 +- scripts/run_noir_execution_success.sh | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/csp_benchmark_helpers.py b/scripts/csp_benchmark_helpers.py index 5333ece79..f03b85797 100755 --- a/scripts/csp_benchmark_helpers.py +++ b/scripts/csp_benchmark_helpers.py @@ -86,7 +86,7 @@ def parse_peak_heap_bytes(stderr_path: Path) -> int: def parse_time_file(time_path: Path) -> tuple[float, int]: """Read `/usr/bin/time -f '%e %M'` output: (wall_seconds, max_rss_kb). - Returns (0.0, 0) if the file is missing or unparseable. + Returns (0.0, 0) if the file is missing or unparsable. """ if not time_path.is_file(): return 0.0, 0 diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh index 50848f466..c7a899768 100755 --- a/scripts/run_noir_execution_success.sh +++ b/scripts/run_noir_execution_success.sh @@ -420,7 +420,12 @@ if [[ -f "${WITNESS_CSV}" ]] && python3 "${SCRIPT_DIR}/generate_provekit_witness fi fi -if [[ "${failed}" -gt 0 ]]; then +# Circuit failures are surfaced via the PR sticky comment and the grouped +# error report. The workflow should not fail just because some circuits +# don't compile through provekit-cli today — the report is the source of +# truth for which circuits pass. Set STRICT_FAIL=1 to opt into the old +# "exit 1 on any failure" behaviour for local CI gates. +if [[ "${STRICT_FAIL:-0}" == "1" && "${failed}" -gt 0 ]]; then exit 1 fi From fb27ab516b14058b4b017c641f92880c3d5378d3 Mon Sep 17 00:00:00 2001 From: Aditya Bisht Date: Tue, 28 Apr 2026 14:54:40 +0530 Subject: [PATCH 13/16] feat(bench): show deltas vs. main baseline in CSP PR comment - build_csp_pr_comment.py: drop the "Runs" column (the iteration count is already shown in the metric metadata table) and accept an optional --baseline-csv. When present, each metric cell shows a percentage delta against the latest successful CSP-benchmarks run on main. Circuits absent from the baseline are flagged "(new)". - csp-benchmarks.yml: also run on push to main (so each main commit publishes a new baseline artifact) and add a best-effort step that uses gh to look up the latest successful main run, download its results.csv into csp-bench-logs/baseline/, and pass it through to the renderer along with the baseline run id for the comment link. - Add actions:read so the workflow can list runs and download artifacts from previous runs. --- .github/scripts/build_csp_pr_comment.py | 160 ++++++++++++++++++++---- .github/workflows/csp-benchmarks.yml | 39 +++++- 2 files changed, 177 insertions(+), 22 deletions(-) diff --git a/.github/scripts/build_csp_pr_comment.py b/.github/scripts/build_csp_pr_comment.py index c291080db..bff8ce8cb 100644 --- a/.github/scripts/build_csp_pr_comment.py +++ b/.github/scripts/build_csp_pr_comment.py @@ -2,7 +2,9 @@ """Build a sticky PR comment for the CSP benchmarks workflow. Reads the CSV emitted by ``scripts/run_csp_benchmarks.sh`` (one row per -circuit) and renders it as a markdown table with human-readable units. +circuit) and renders it as a markdown table with human-readable units. If +``--baseline-csv`` is given, each metric cell appends a percentage delta +versus the baseline value (last successful CSP-benchmarks run on main). """ from __future__ import annotations @@ -14,6 +16,16 @@ MARKER = "" MAX_COMMENT_CHARS = 62000 +# Metric columns we render with a delta. Order matches the table header. +METRIC_COLUMNS: tuple[tuple[str, str], ...] = ( + ("prover_time_ms", "ms"), + ("prover_peak_rss_kb", "kb"), + ("prover_heap_peak_bytes", "bytes"), + ("verifier_time_ms", "ms"), + ("proof_size_bytes", "bytes"), + ("pkp_size_bytes", "bytes"), +) + def fmt_bytes(value: float) -> str: if value <= 0: @@ -40,6 +52,37 @@ def fmt_ms(ms: float) -> str: return f"{ms / 1000.0:.2f} s" +def fmt_value(unit: str, value: float) -> str: + if unit == "ms": + return fmt_ms(value) + if unit == "kb": + return fmt_kb_to_bytes(value) + return fmt_bytes(value) + + +def fmt_delta(current: float, baseline: float | None) -> str: + """Return a compact delta-vs-baseline annotation, or empty string. + + - Returns "" when no baseline is available. + - Returns "(new)" when current is present but baseline is missing + for this circuit. + - Returns "(±0.0%)" / "(+1.2%)" / "(-3.4%)" otherwise. + """ + if baseline is None: + return "" + if baseline <= 0: + # Baseline collected zero (e.g., older CSV without this metric). + # Don't show a misleading divide-by-zero ratio. + return "" + if current <= 0: + return "" + delta_pct = (current - baseline) / baseline * 100.0 + if abs(delta_pct) < 0.05: + return " (±0.0%)" + sign = "+" if delta_pct > 0 else "" + return f" ({sign}{delta_pct:.1f}%)" + + def status_with_icon(status: str) -> str: normalized = (status or "unknown").strip().lower() labels = { @@ -58,38 +101,69 @@ def read_rows(csv_path: Path) -> list[dict[str, str]]: return list(csv.DictReader(f)) -def render_table(rows: list[dict[str, str]]) -> str: +def index_baseline(rows: list[dict[str, str]]) -> dict[str, dict[str, float]]: + """Index baseline rows by circuit name with float metric values.""" + out: dict[str, dict[str, float]] = {} + for row in rows: + circuit = (row.get("circuit") or "").strip() + if not circuit: + continue + metrics: dict[str, float] = {} + for metric, _unit in METRIC_COLUMNS: + try: + metrics[metric] = float(row.get(metric) or 0) + except ValueError: + metrics[metric] = 0.0 + out[circuit] = metrics + return out + + +def render_table( + rows: list[dict[str, str]], + baseline: dict[str, dict[str, float]], + has_baseline_file: bool, +) -> str: if not rows: return "_No benchmark results were produced._" header = ( "| Circuit | Prover time | Peak RSS | Peak heap | Verifier time | " - "Proof size | PKP size | Runs |" + "Proof size | PKP size |" ) - sep = "|---|---:|---:|---:|---:|---:|---:|---:|" + sep = "|---|---:|---:|---:|---:|---:|---:|" lines = [header, sep] + for row in sorted(rows, key=lambda r: r.get("circuit", "")): - lines.append( - "| " - + " | ".join( - [ - f"`{row['circuit']}`", - fmt_ms(float(row.get("prover_time_ms", 0) or 0)), - fmt_kb_to_bytes(float(row.get("prover_peak_rss_kb", 0) or 0)), - fmt_bytes(float(row.get("prover_heap_peak_bytes", 0) or 0)), - fmt_ms(float(row.get("verifier_time_ms", 0) or 0)), - fmt_bytes(float(row.get("proof_size_bytes", 0) or 0)), - fmt_bytes(float(row.get("pkp_size_bytes", 0) or 0)), - row.get("runs", "—"), - ] - ) - + " |" - ) + circuit = row.get("circuit", "") + baseline_metrics = baseline.get(circuit) + + cells = [f"`{circuit}`"] + for metric, unit in METRIC_COLUMNS: + try: + value = float(row.get(metric) or 0) + except ValueError: + value = 0.0 + + value_str = fmt_value(unit, value) + + if has_baseline_file and value_str != "—": + if baseline_metrics is None: + delta = " (new)" + else: + delta = fmt_delta(value, baseline_metrics.get(metric)) + cells.append(f"{value_str}{delta}") + else: + cells.append(value_str) + lines.append("| " + " | ".join(cells) + " |") + return "\n".join(lines) def compose_comment( rows: list[dict[str, str]], + baseline: dict[str, dict[str, float]], + baseline_run_id: str, + has_baseline_file: bool, run_id: str, run_url: str, sha: str, @@ -97,7 +171,28 @@ def compose_comment( runs_per_circuit: str, ) -> str: short_sha = sha[:12] if sha else "unknown" - table = render_table(rows) + table = render_table(rows, baseline, has_baseline_file) + + if has_baseline_file: + if baseline_run_id: + baseline_note = ( + f"Each metric cell shows the current value followed by the " + f"percentage delta against the latest successful " + f"[`main` run #{baseline_run_id}](https://github.com/worldfnd/provekit/actions/runs/{baseline_run_id}). " + f"`(new)` marks circuits absent from the baseline." + ) + else: + baseline_note = ( + "Each metric cell shows the current value followed by the " + "percentage delta against the latest successful `main` run. " + "`(new)` marks circuits absent from the baseline." + ) + else: + baseline_note = ( + "_No baseline available yet — deltas will appear once this " + "workflow has produced at least one successful `main` run._" + ) + lines = [ MARKER, "## CSP benchmarks", @@ -115,6 +210,8 @@ def compose_comment( "`peak memory` entry in `provekit-cli prove`'s tracing output; peak RSS " "is reported by `/usr/bin/time -v` (max-resident-set-size).", "", + baseline_note, + "", "
", "Results", "", @@ -131,6 +228,17 @@ def compose_comment( def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--results-csv", required=True, type=Path) + parser.add_argument( + "--baseline-csv", + type=Path, + default=None, + help="Optional CSV from the latest successful main run.", + ) + parser.add_argument( + "--baseline-run-id", + default="", + help="Optional Actions run id of the baseline (for the link in the comment).", + ) parser.add_argument("--output", required=True, type=Path) parser.add_argument("--run-id", required=True) parser.add_argument("--run-url", required=True) @@ -143,8 +251,18 @@ def parse_args() -> argparse.Namespace: def main() -> None: args = parse_args() rows = read_rows(args.results_csv) + + has_baseline_file = bool( + args.baseline_csv and args.baseline_csv.is_file() + ) + baseline_rows = read_rows(args.baseline_csv) if has_baseline_file else [] + baseline = index_baseline(baseline_rows) + body = compose_comment( rows=rows, + baseline=baseline, + baseline_run_id=args.baseline_run_id, + has_baseline_file=has_baseline_file, run_id=args.run_id, run_url=args.run_url, sha=args.sha, diff --git a/.github/workflows/csp-benchmarks.yml b/.github/workflows/csp-benchmarks.yml index 8dc95147e..3e17f6368 100644 --- a/.github/workflows/csp-benchmarks.yml +++ b/.github/workflows/csp-benchmarks.yml @@ -2,6 +2,8 @@ name: CSP Benchmarks on: pull_request: + push: + branches: [main] workflow_dispatch: inputs: bench_runs: @@ -13,6 +15,9 @@ permissions: contents: read pull-requests: write issues: write + # Needed to read artifacts from previous successful main runs so we can + # render percentage deltas in the PR comment. + actions: read env: CARGO_TERM_COLOR: always @@ -26,7 +31,8 @@ concurrency: jobs: csp-benchmarks: # Block fork PRs from running our heavy bench script on the runner. - if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }} + # Push to main always runs so the artifact becomes a baseline for PRs. + if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }} name: CSP benchmarks (avg over ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_runs != '' && github.event.inputs.bench_runs || '3') || '3' }} runs) runs-on: ubuntu-24.04-arm @@ -63,12 +69,43 @@ jobs: path: csp-bench-logs/ retention-days: 7 + - name: Fetch baseline from latest successful main run + if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository + continue-on-error: true + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_REPO: ${{ github.repository }} + run: | + set -e + BASELINE_RUN_ID="$(gh run list \ + --workflow csp-benchmarks.yml \ + --branch main \ + --status success \ + --limit 1 \ + --json databaseId \ + --jq '.[0].databaseId // empty')" + if [[ -z "${BASELINE_RUN_ID}" ]]; then + echo "No successful main run found yet; deltas will not be shown." + exit 0 + fi + echo "Baseline run id: ${BASELINE_RUN_ID}" + mkdir -p csp-bench-logs/baseline + if gh run download "${BASELINE_RUN_ID}" \ + --name "csp-bench-logs-${BASELINE_RUN_ID}" \ + --dir csp-bench-logs/baseline; then + echo "BASELINE_RUN_ID=${BASELINE_RUN_ID}" >> "$GITHUB_ENV" + else + echo "Baseline artifact not retrievable; deltas will not be shown." + fi + - name: Build sticky PR comment body if: always() && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository continue-on-error: true run: | python3 .github/scripts/build_csp_pr_comment.py \ --results-csv "csp-bench-logs/results.csv" \ + --baseline-csv "csp-bench-logs/baseline/results.csv" \ + --baseline-run-id "${BASELINE_RUN_ID:-}" \ --output "csp-bench-logs/pr_comment.md" \ --run-id "${{ github.run_id }}" \ --run-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ From c0b171ecbbc2c5a0c68d0dc520d5b2f054a84a6c Mon Sep 17 00:00:00 2001 From: Aditya Bisht Date: Tue, 28 Apr 2026 15:37:56 +0530 Subject: [PATCH 14/16] ci(comments): drop grouped report block and auto-update footer - build_noir_pr_comment.py: stop rendering the raw grouped_error_report.txt details block. The failing-circuits list and PASS/FAIL/SKIP counts are parsed from the report and shown directly, so the raw dump is redundant. Truncation infrastructure (clip_tail, build_with_truncation, the sanitize_code_fence helper) is removed since the comment is now small enough to fit comfortably; a single hard-cut guard remains. - build_csp_pr_comment.py: remove the trailing "_This comment is automatically updated_" footer. --- .github/scripts/build_csp_pr_comment.py | 2 - .github/scripts/build_noir_pr_comment.py | 90 ++---------------------- 2 files changed, 5 insertions(+), 87 deletions(-) diff --git a/.github/scripts/build_csp_pr_comment.py b/.github/scripts/build_csp_pr_comment.py index bff8ce8cb..2f23107cd 100644 --- a/.github/scripts/build_csp_pr_comment.py +++ b/.github/scripts/build_csp_pr_comment.py @@ -219,8 +219,6 @@ def compose_comment( "", "
", "", - "_This comment is automatically updated by the CSP Benchmarks workflow._", - "", ] return "\n".join(lines) diff --git a/.github/scripts/build_noir_pr_comment.py b/.github/scripts/build_noir_pr_comment.py index 5959ea185..f98bc7723 100644 --- a/.github/scripts/build_noir_pr_comment.py +++ b/.github/scripts/build_noir_pr_comment.py @@ -9,7 +9,6 @@ MARKER = "" MAX_COMMENT_CHARS = 62000 -MIN_SECTION_CHARS = 1500 def read_report(path: Path, display_name: str) -> str: @@ -71,13 +70,8 @@ def status_with_icon(status: str) -> str: return f"{labels.get(normalized, '[INFO]')} {normalized}" -def sanitize_code_fence(text: str) -> str: - return text.replace("```", "``\\`") - - def compose_comment( grouped_report_text: str, - grouped_truncated: bool, run_id: str, run_url: str, sha: str, @@ -87,12 +81,6 @@ def compose_comment( counts = parse_grouped_counts(grouped_report_text) short_sha = sha[:12] if sha else "unknown" - grouped_truncated_note = ( - "\n_Grouped report truncated to fit GitHub comment size limits._\n" - if grouped_truncated - else "" - ) - failing_circuits = parse_failing_circuits(grouped_report_text) if failing_circuits: failing_body = "\n".join(f"- `{name}`" for name in failing_circuits) @@ -122,83 +110,11 @@ def compose_comment( "", "
", "", - "
", - "grouped_error_report.txt", - "", - "```text", - sanitize_code_fence(grouped_report_text), - "```", - grouped_truncated_note, - "
", - "", - "_This comment is automatically updated by the Noir Execution Success workflow._", - "", ] return "\n".join(lines) -def clip_tail(text: str, min_chars: int, excess: int, label: str) -> tuple[str, bool]: - if len(text) <= min_chars or excess <= 0: - return text, False - - reduction = min(len(text) - min_chars, excess + 1024) - kept = text[: len(text) - reduction].rstrip() - omitted = len(text) - len(kept) - clipped = f"{kept}\n\n[... truncated {omitted} characters from {label} ...]" - return clipped, True - - -def build_with_truncation( - grouped_report_text: str, - run_id: str, - run_url: str, - sha: str, - noir_ref: str, - status: str, -) -> str: - grouped_work = grouped_report_text - grouped_truncated = False - - for _ in range(128): - comment = compose_comment( - grouped_work, - grouped_truncated=grouped_truncated, - run_id=run_id, - run_url=run_url, - sha=sha, - noir_ref=noir_ref, - status=status, - ) - if len(comment) <= MAX_COMMENT_CHARS: - return comment - - excess = len(comment) - MAX_COMMENT_CHARS - grouped_work, grouped_changed = clip_tail( - grouped_work, MIN_SECTION_CHARS, excess, "grouped_error_report.txt" - ) - grouped_truncated = grouped_truncated or grouped_changed - if grouped_changed: - continue - - break - - fallback = compose_comment( - grouped_work, - grouped_truncated=True, - run_id=run_id, - run_url=run_url, - sha=sha, - noir_ref=noir_ref, - status=status, - ) - if len(fallback) <= MAX_COMMENT_CHARS: - return fallback - - hard_cut = fallback[: MAX_COMMENT_CHARS - 120].rstrip() - return f"{hard_cut}\n\n_Comment truncated due to GitHub size limits._\n" - - def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--grouped-report", required=True, type=Path) @@ -216,7 +132,7 @@ def main() -> None: grouped_report_text = read_report(args.grouped_report, "grouped_error_report.txt") - body = build_with_truncation( + body = compose_comment( grouped_report_text=grouped_report_text, run_id=args.run_id, run_url=args.run_url, @@ -225,6 +141,10 @@ def main() -> None: status=args.status, ) + if len(body) > MAX_COMMENT_CHARS: + cut = body[: MAX_COMMENT_CHARS - 80].rstrip() + body = f"{cut}\n\n_Comment truncated due to GitHub size limits._\n" + args.output.parent.mkdir(parents=True, exist_ok=True) args.output.write_text(body, encoding="utf-8") print(f"Wrote PR comment body to {args.output} ({len(body)} chars)") From f775db8068d0491f640e4df27ce8abecfb62be26 Mon Sep 17 00:00:00 2001 From: Aditya Bisht Date: Tue, 28 Apr 2026 16:16:20 +0530 Subject: [PATCH 15/16] feat(bench): add constraints and witnesses columns to CSP PR comment Reads the counts from `provekit-cli prove`'s `Read Noir proof scheme` log line (already captured per run); no CLI changes required. Renders both as integer columns with thousands separators and the same delta-vs-main treatment as the perf metrics. Backwards-compatible with older baseline CSVs lacking the columns. --- .github/scripts/build_csp_pr_comment.py | 16 +++++++++++++--- scripts/csp_benchmark_helpers.py | 22 ++++++++++++++++++++++ scripts/run_csp_benchmarks.sh | 7 ++++--- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/.github/scripts/build_csp_pr_comment.py b/.github/scripts/build_csp_pr_comment.py index 2f23107cd..31e9670f9 100644 --- a/.github/scripts/build_csp_pr_comment.py +++ b/.github/scripts/build_csp_pr_comment.py @@ -18,6 +18,8 @@ # Metric columns we render with a delta. Order matches the table header. METRIC_COLUMNS: tuple[tuple[str, str], ...] = ( + ("num_constraints", "int"), + ("num_witnesses", "int"), ("prover_time_ms", "ms"), ("prover_peak_rss_kb", "kb"), ("prover_heap_peak_bytes", "bytes"), @@ -52,11 +54,19 @@ def fmt_ms(ms: float) -> str: return f"{ms / 1000.0:.2f} s" +def fmt_int(value: float) -> str: + if value <= 0: + return "—" + return f"{int(round(value)):,}" + + def fmt_value(unit: str, value: float) -> str: if unit == "ms": return fmt_ms(value) if unit == "kb": return fmt_kb_to_bytes(value) + if unit == "int": + return fmt_int(value) return fmt_bytes(value) @@ -127,10 +137,10 @@ def render_table( return "_No benchmark results were produced._" header = ( - "| Circuit | Prover time | Peak RSS | Peak heap | Verifier time | " - "Proof size | PKP size |" + "| Circuit | Constraints | Witnesses | Prover time | Peak RSS | " + "Peak heap | Verifier time | Proof size | PKP size |" ) - sep = "|---|---:|---:|---:|---:|---:|---:|" + sep = "|---|---:|---:|---:|---:|---:|---:|---:|---:|" lines = [header, sep] for row in sorted(rows, key=lambda r: r.get("circuit", "")): diff --git a/scripts/csp_benchmark_helpers.py b/scripts/csp_benchmark_helpers.py index f03b85797..3bd141a0f 100755 --- a/scripts/csp_benchmark_helpers.py +++ b/scripts/csp_benchmark_helpers.py @@ -45,6 +45,11 @@ rf"([0-9]+(?:\.[0-9]+)?)[{NARROW_NBSP} ]?([qryzafpnμmkMGTPEZYRQ])?B" r"\s+peak\s+memory", ) +# Matches the `info!(constraints, witnesses, "Read Noir proof scheme")` line +# emitted by `tooling/cli/src/cmd/prove.rs` on every prove invocation. +SCHEME_SIZE_RE = re.compile( + r"Read Noir proof scheme\b.*?\bconstraints=(\d+)\b.*?\bwitnesses=(\d+)\b" +) def human_to_bytes(value: str) -> int: @@ -83,6 +88,17 @@ def parse_peak_heap_bytes(stderr_path: Path) -> int: return peak +def parse_scheme_sizes(stderr_path: Path) -> tuple[int, int]: + """Return (num_constraints, num_witnesses) from a prove stderr; (0, 0) if absent.""" + if not stderr_path.is_file(): + return 0, 0 + text = ANSI_RE.sub("", stderr_path.read_text(encoding="utf-8", errors="replace")) + match = SCHEME_SIZE_RE.search(text) + if not match: + return 0, 0 + return int(match.group(1)), int(match.group(2)) + + def parse_time_file(time_path: Path) -> tuple[float, int]: """Read `/usr/bin/time -f '%e %M'` output: (wall_seconds, max_rss_kb). @@ -130,6 +146,10 @@ def parse_runs(bench_dir: Path, circuit: str) -> str: prove_runs.append((wall, rss_kb, heap_bytes)) i += 1 + # Constraint and witness counts are deterministic per circuit, so reading + # them from the first prove run is sufficient. + num_constraints, num_witnesses = parse_scheme_sizes(circuit_dir / "prove_1.stderr") + j = 1 while True: time_path = circuit_dir / f"verify_{j}.time" @@ -153,6 +173,8 @@ def parse_runs(bench_dir: Path, circuit: str) -> str: return ",".join( [ circuit, + str(num_constraints), + str(num_witnesses), f"{prove_time_ms:.1f}", f"{prover_rss_kb:.0f}", f"{prover_heap_bytes:.0f}", diff --git a/scripts/run_csp_benchmarks.sh b/scripts/run_csp_benchmarks.sh index e7382a34a..e099c636c 100755 --- a/scripts/run_csp_benchmarks.sh +++ b/scripts/run_csp_benchmarks.sh @@ -15,8 +15,9 @@ # MAX_TESTS Cap on circuits (0 = unlimited) # # Output: BENCH_DIR/results.csv with one row per circuit: -# circuit,prover_time_ms,prover_peak_rss_kb,prover_heap_peak_bytes, -# verifier_time_ms,proof_size_bytes,pkp_size_bytes,runs +# circuit,num_constraints,num_witnesses,prover_time_ms,prover_peak_rss_kb, +# prover_heap_peak_bytes,verifier_time_ms,proof_size_bytes,pkp_size_bytes, +# runs set -euo pipefail @@ -72,7 +73,7 @@ fi mkdir -p "${BENCH_DIR}/per_circuit" RESULTS_CSV="${BENCH_DIR}/results.csv" -echo "circuit,prover_time_ms,prover_peak_rss_kb,prover_heap_peak_bytes,verifier_time_ms,proof_size_bytes,pkp_size_bytes,runs" > "${RESULTS_CSV}" +echo "circuit,num_constraints,num_witnesses,prover_time_ms,prover_peak_rss_kb,prover_heap_peak_bytes,verifier_time_ms,proof_size_bytes,pkp_size_bytes,runs" > "${RESULTS_CSV}" shopt -s nullglob From ffbdd9f29144f229e93ed6a24557e6930c894ff3 Mon Sep 17 00:00:00 2001 From: Aditya Bisht Date: Tue, 28 Apr 2026 16:16:27 +0530 Subject: [PATCH 16/16] fix(ci): address noir-execution-success review feedback - Deduplicate NOIR_REF: collapse the nested ternary in env to a single `||` fallback and have the job name read `${{ env.NOIR_REF }}` so the literal version string only needs to bump in two places. - Fix attempted overcount: blackbox skips bump `skipped` without bumping `total`, so summing passed+failed+skipped double-counts them. Track `attempted=${total}` instead. Observed 332 PASS / 15 FAIL / 20 SKIP run now reports 347 (was 367). --- .github/workflows/noir-execution-success.yml | 4 ++-- scripts/run_noir_execution_success.sh | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/noir-execution-success.yml b/.github/workflows/noir-execution-success.yml index 6cf7a64f7..8aeafafbb 100644 --- a/.github/workflows/noir-execution-success.yml +++ b/.github/workflows/noir-execution-success.yml @@ -17,7 +17,7 @@ permissions: env: CARGO_TERM_COLOR: always - NOIR_REF: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.noir_ref != '' && github.event.inputs.noir_ref || 'v1.0.0-beta.19') || 'v1.0.0-beta.19' }} + NOIR_REF: ${{ github.event.inputs.noir_ref || 'v1.0.0-beta.19' }} # Cancel any in-progress run on the same branch when a new one is triggered. concurrency: @@ -28,7 +28,7 @@ jobs: noir-execution-success: # Block fork PRs from executing arbitrary build scripts on the self-hosted runner. if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.head.repo.full_name == github.repository }} - name: Noir execution_success suite (${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.noir_ref != '' && github.event.inputs.noir_ref || 'v1.0.0-beta.19') || 'v1.0.0-beta.19' }}) + name: Noir execution_success suite (${{ env.NOIR_REF }}) runs-on: ubuntu-24.04-arm steps: diff --git a/scripts/run_noir_execution_success.sh b/scripts/run_noir_execution_success.sh index c7a899768..514f46938 100755 --- a/scripts/run_noir_execution_success.sh +++ b/scripts/run_noir_execution_success.sh @@ -360,7 +360,9 @@ for test_name in "${test_dirs[@]}"; do rm -f "${test_log}" done -attempted=$((passed + failed + skipped)) +# Blackbox skips bump `skipped` without bumping `total` (see the skip block +# above), so summing passed+failed+skipped would double-count them. +attempted=${total} echo "" echo "----- execution_success summary -----"